From 6e53c1bd12019c63715987c0a76c79e868a05401 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 28 May 2016 20:40:33 +0200 Subject: [PATCH 001/912] Change render target output from PAL8 to BGRA8 --- src/basictypes.h | 6 + src/f_wipe.cpp | 17 +- src/m_misc.cpp | 2 + src/r_draw.cpp | 1106 ++++++++++++++++++++++++++++++-- src/r_draw.h | 12 +- src/r_drawt.cpp | 397 ++++++++++-- src/r_main.cpp | 8 +- src/r_main.h | 28 + src/r_plane.cpp | 24 +- src/r_segs.cpp | 107 ++- src/r_swrenderer.cpp | 4 + src/r_things.cpp | 73 ++- src/textures/canvastexture.cpp | 5 + src/v_draw.cpp | 45 +- src/v_video.cpp | 52 +- src/v_video.h | 14 +- src/win32/fb_d3d9.cpp | 38 +- src/win32/fb_ddraw.cpp | 10 +- src/win32/win32iface.h | 4 +- src/win32/win32video.cpp | 13 + 20 files changed, 1785 insertions(+), 180 deletions(-) diff --git a/src/basictypes.h b/src/basictypes.h index ff2cd972e6..45e33a4a73 100644 --- a/src/basictypes.h +++ b/src/basictypes.h @@ -66,6 +66,12 @@ union QWORD_UNION typedef SDWORD fixed_t; typedef DWORD dsfixed_t; // fixedpt used by span drawer +#ifndef PALETTEOUTPUT +typedef uint32_t canvas_pixel_t; +#else +typedef BYTE canvas_pixel_t; +#endif + #define FIXED_MAX (signed)(0x7fffffff) #define FIXED_MIN (signed)(0x80000000) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index a3ceb8d508..c6f20cadbd 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -77,8 +77,10 @@ bool wipe_initMelt (int ticks) { int i, r; +#ifdef PALETTEOUTPUT // copy start screen to main screen screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); +#endif // makes this wipe faster (in theory) // to have stuff in column-major format @@ -271,7 +273,8 @@ bool wipe_doBurn (int ticks) // Draw the screen int xstep, ystep, firex, firey; int x, y; - BYTE *to, *fromold, *fromnew; + canvas_pixel_t *to; + BYTE *fromold, *fromnew; const int SHIFT = 16; xstep = (FIREWIDTH << SHIFT) / SCREENWIDTH; @@ -298,6 +301,9 @@ bool wipe_doBurn (int ticks) } else { +#ifndef PALETTEOUTPUT + // TO DO: RGB32k.All +#else int bglevel = 64-fglevel; DWORD *fg2rgb = Col2RGB8[fglevel]; DWORD *bg2rgb = Col2RGB8[bglevel]; @@ -305,6 +311,7 @@ bool wipe_doBurn (int ticks) DWORD bg = bg2rgb[fromold[x]]; fg = (fg+bg) | 0x1f07c1f; to[x] = RGB32k.All[fg & (fg>>15)]; +#endif done = false; } } @@ -335,7 +342,9 @@ bool wipe_doFade (int ticks) fade += ticks * 2; if (fade > 64) { +#ifdef PALETTEOUTPUT screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); +#endif return true; } else @@ -346,7 +355,7 @@ bool wipe_doFade (int ticks) DWORD *bg2rgb = Col2RGB8[bglevel]; BYTE *fromnew = (BYTE *)wipe_scr_end; BYTE *fromold = (BYTE *)wipe_scr_start; - BYTE *to = screen->GetBuffer(); + canvas_pixel_t *to = screen->GetBuffer(); for (y = 0; y < SCREENHEIGHT; y++) { @@ -387,7 +396,9 @@ bool wipe_StartScreen (int type) if (CurrentWipeType) { wipe_scr_start = new short[SCREENWIDTH * SCREENHEIGHT / 2]; +#ifdef PALETTEOUTPUT screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); +#endif return true; } return false; @@ -398,8 +409,10 @@ void wipe_EndScreen (void) if (CurrentWipeType) { wipe_scr_end = new short[SCREENWIDTH * SCREENHEIGHT / 2]; +#ifdef PALETTEOUTPUT screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // restore start scr. +#endif // Initialize the wipe (*wipes[(CurrentWipeType-1)*3])(0); } diff --git a/src/m_misc.cpp b/src/m_misc.cpp index 87f61f2539..79416c31d8 100644 --- a/src/m_misc.cpp +++ b/src/m_misc.cpp @@ -655,6 +655,7 @@ static bool FindFreeName (FString &fullname, const char *extension) void M_ScreenShot (const char *filename) { +#ifdef PALETTEOUTPUT FILE *file; FString autoname; bool writepcx = (stricmp (screenshot_type, "pcx") == 0); // PNG is the default @@ -743,6 +744,7 @@ void M_ScreenShot (const char *filename) Printf ("Could not create screenshot.\n"); } } +#endif } CCMD (screenshot) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 80b91ed2d4..0449100085 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -42,6 +42,9 @@ #include "gi.h" #include "stats.h" #include "x86.h" +#ifndef NO_SSE +#include +#endif #undef RANGECHECK @@ -61,7 +64,7 @@ extern int ST_Y; BYTE* viewimage; extern "C" { int ylookup[MAXHEIGHT]; -BYTE *dc_destorg; +canvas_pixel_t *dc_destorg; } int scaledviewwidth; @@ -90,6 +93,7 @@ extern "C" { int dc_pitch=0xABadCafe; // [RH] Distance between rows lighttable_t* dc_colormap; +fixed_t dc_light; int dc_x; int dc_yl; int dc_yh; @@ -103,12 +107,13 @@ DWORD *dc_destblend; // blending lookups // first pixel in a column (possibly virtual) const BYTE* dc_source; -BYTE* dc_dest; +canvas_pixel_t* dc_dest; int dc_count; DWORD vplce[4]; DWORD vince[4]; BYTE* palookupoffse[4]; +fixed_t palookuplight[4]; const BYTE* bufplce[4]; // just for profiling @@ -180,7 +185,7 @@ void R_InitShadeMaps() void R_DrawColumnP_C (void) { int count; - BYTE* dest; + canvas_pixel_t* dest; fixed_t frac; fixed_t fracstep; @@ -193,6 +198,10 @@ void R_DrawColumnP_C (void) // Framebuffer destination address. dest = dc_dest; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + // Determine scaling, // which is the only mapping to be done. fracstep = dc_iscale; @@ -212,7 +221,11 @@ void R_DrawColumnP_C (void) { // Re-map color indices from wall texture column // using a lighting/special effects LUT. - *dest = colormap[source[frac>>FRACBITS]]; +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[source[frac>>FRACBITS]], light); +#else + *dest = colormap[source[frac >> FRACBITS]]; +#endif dest += pitch; frac += fracstep; @@ -226,7 +239,7 @@ void R_DrawColumnP_C (void) void R_FillColumnP (void) { int count; - BYTE* dest; + canvas_pixel_t* dest; count = dc_count; @@ -235,13 +248,21 @@ void R_FillColumnP (void) dest = dc_dest; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + { int pitch = dc_pitch; BYTE color = dc_color; do { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(color, light); +#else *dest = color; +#endif dest += pitch; } while (--count); } @@ -250,19 +271,39 @@ void R_FillColumnP (void) void R_FillAddColumn (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; count = dc_count; if (count <= 0) return; dest = dc_dest; + int pitch = dc_pitch; + +#ifndef PALETTEOUTPUT + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); +#else DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; - int pitch = dc_pitch; do { @@ -271,25 +312,45 @@ void R_FillAddColumn (void) *dest = RGB32k.All[bg & (bg>>15)]; dest += pitch; } while (--count); - +#endif } void R_FillAddClampColumn (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; count = dc_count; if (count <= 0) return; dest = dc_dest; + int pitch = dc_pitch; + +#ifndef PALETTEOUTPUT + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); +#else DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; - int pitch = dc_pitch; do { @@ -304,25 +365,45 @@ void R_FillAddClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); - +#endif } void R_FillSubClampColumn (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; count = dc_count; if (count <= 0) return; dest = dc_dest; + int pitch = dc_pitch; + +#ifndef PALETTEOUTPUT + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); +#else DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor | 0x40100400; - int pitch = dc_pitch; do { @@ -336,25 +417,45 @@ void R_FillSubClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); - +#endif } void R_FillRevSubClampColumn (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; count = dc_count; if (count <= 0) return; dest = dc_dest; + int pitch = dc_pitch; + +#ifndef PALETTEOUTPUT + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); +#else DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; - int pitch = dc_pitch; do { @@ -368,7 +469,7 @@ void R_FillRevSubClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); - +#endif } // @@ -421,7 +522,7 @@ void R_InitFuzzTable (int fuzzoff) void R_DrawFuzzColumnP_C (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; // Adjust borders. Low... if (dc_yl == 0) @@ -441,6 +542,85 @@ void R_DrawFuzzColumnP_C (void) dest = ylookup[dc_yl] + dc_x + dc_destorg; +#ifndef PALETTEOUTPUT + + // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) + // I'm not sure if this is really always the case or not. + + { + // [RH] Make local copies of global vars to try and improve + // the optimizations made by the compiler. + int pitch = dc_pitch; + int fuzz = fuzzpos; + int cnt; + + // [RH] Split this into three separate loops to minimize + // the number of times fuzzpos needs to be clamped. + if (fuzz) + { + cnt = MIN(FUZZTABLE - fuzz, count); + count -= cnt; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + if (fuzz == FUZZTABLE || count > 0) + { + while (count >= FUZZTABLE) + { + fuzz = 0; + cnt = FUZZTABLE; + count -= FUZZTABLE; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + fuzz = 0; + if (count > 0) + { + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } + } + fuzzpos = fuzz; + } + +#else + // colormap #6 is used for shading (of 0-31, a bit brighter than average) { // [RH] Make local copies of global vars to try and improve @@ -487,6 +667,7 @@ void R_DrawFuzzColumnP_C (void) } fuzzpos = fuzz; } +#endif } #endif @@ -539,7 +720,7 @@ algorithm that uses RGB tables. void R_DrawAddColumnP_C (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -552,6 +733,34 @@ void R_DrawAddColumnP_C (void) fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + BYTE *colormap = dc_colormap; + + do + { + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], 0); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -572,6 +781,7 @@ void R_DrawAddColumnP_C (void) frac += fracstep; } while (--count); } +#endif } // @@ -585,7 +795,7 @@ void R_DrawAddColumnP_C (void) void R_DrawTranslatedColumnP_C (void) { int count; - BYTE* dest; + canvas_pixel_t* dest; fixed_t frac; fixed_t fracstep; @@ -593,6 +803,10 @@ void R_DrawTranslatedColumnP_C (void) if (count <= 0) return; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + dest = dc_dest; fracstep = dc_iscale; @@ -607,7 +821,11 @@ void R_DrawTranslatedColumnP_C (void) do { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); +#else *dest = colormap[translation[source[frac>>FRACBITS]]]; +#endif dest += pitch; frac += fracstep; @@ -619,7 +837,7 @@ void R_DrawTranslatedColumnP_C (void) void R_DrawTlatedAddColumnP_C (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -627,11 +845,44 @@ void R_DrawTlatedAddColumnP_C (void) if (count <= 0) return; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + dest = dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -647,12 +898,13 @@ void R_DrawTlatedAddColumnP_C (void) fg = fg2rgb[fg]; bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; dest += pitch; frac += fracstep; } while (--count); } +#endif } // Draw a column whose "color" values are actually translucency @@ -660,7 +912,7 @@ void R_DrawTlatedAddColumnP_C (void) void R_DrawShadedColumnP_C (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac, fracstep; count = dc_count; @@ -673,6 +925,36 @@ void R_DrawShadedColumnP_C (void) fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + + do + { + DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); + DWORD inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { const BYTE *source = dc_source; BYTE *colormap = dc_colormap; @@ -690,13 +972,14 @@ void R_DrawShadedColumnP_C (void) frac += fracstep; } while (--count); } +#endif } // Add source to destination, clamping it to white void R_DrawAddClampColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -709,6 +992,34 @@ void R_DrawAddClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -731,13 +1042,14 @@ void R_DrawAddClampColumnP_C () frac += fracstep; } while (--count); } +#endif } // Add translated source to destination, clamping it to white void R_DrawAddClampTranslatedColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -750,6 +1062,35 @@ void R_DrawAddClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -773,13 +1114,14 @@ void R_DrawAddClampTranslatedColumnP_C () frac += fracstep; } while (--count); } +#endif } // Subtract destination from source, clamping it to black void R_DrawSubClampColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -792,6 +1134,34 @@ void R_DrawSubClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -813,13 +1183,14 @@ void R_DrawSubClampColumnP_C () frac += fracstep; } while (--count); } +#endif } // Subtract destination from source, clamping it to black void R_DrawSubClampTranslatedColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -832,6 +1203,35 @@ void R_DrawSubClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -854,13 +1254,14 @@ void R_DrawSubClampTranslatedColumnP_C () frac += fracstep; } while (--count); } +#endif } // Subtract source from destination, clamping it to black void R_DrawRevSubClampColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -873,6 +1274,34 @@ void R_DrawRevSubClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -894,13 +1323,14 @@ void R_DrawRevSubClampColumnP_C () frac += fracstep; } while (--count); } +#endif } // Subtract source from destination, clamping it to black void R_DrawRevSubClampTranslatedColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -913,6 +1343,35 @@ void R_DrawRevSubClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -935,6 +1394,7 @@ void R_DrawRevSubClampTranslatedColumnP_C () frac += fracstep; } while (--count); } +#endif } @@ -967,6 +1427,7 @@ int ds_x1; int ds_x2; lighttable_t* ds_colormap; +//dsfixed_t ds_light; dsfixed_t ds_xfrac; dsfixed_t ds_yfrac; @@ -1019,6 +1480,7 @@ void R_SetSpanSource(const BYTE *pixels) void R_SetSpanColormap(BYTE *colormap) { ds_colormap = colormap; + ds_light = 0; #ifdef X86_ASM if (ds_colormap != ds_curcolormap) { @@ -1062,7 +1524,7 @@ void R_DrawSpanP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1087,9 +1549,64 @@ void R_DrawSpanP_C (void) xstep = ds_xstep; ystep = ds_ystep; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(ds_light); +#endif + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. + +#ifndef PALETTEOUTPUT +#ifndef NO_SSE + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + int sse_count = count / 4; + count -= sse_count * 4; + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = colormap[source[spot]]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = colormap[source[spot]]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = colormap[source[spot]]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = colormap[source[spot]]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p0], palette[p1], palette[p2], palette[p3]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + fg = _mm_packus_epi16(fg_hi, fg_lo); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + if (count == 0) + return; +#endif +#endif + do { // Current texture index in u,v. @@ -1097,7 +1614,11 @@ void R_DrawSpanP_C (void) // Lookup pixel from flat texture tile, // re-index using light/colormap. +#ifndef PALETTEOUTPUT + *dest++ = shade_pal_index(colormap[source[spot]], light); +#else *dest++ = colormap[source[spot]]; +#endif // Next step in u,v. xfrac += xstep; @@ -1117,7 +1638,11 @@ void R_DrawSpanP_C (void) // Lookup pixel from flat texture tile, // re-index using light/colormap. +#ifndef PALETTEOUTPUT + *dest++ = shade_pal_index(colormap[source[spot]], light); +#else *dest++ = colormap[source[spot]]; +#endif // Next step in u,v. xfrac += xstep; @@ -1133,12 +1658,16 @@ void R_DrawSpanMaskedP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; int spot; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(ds_light); +#endif + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1160,7 +1689,11 @@ void R_DrawSpanMaskedP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[texdata], light); +#else *dest = colormap[texdata]; +#endif } dest++; xfrac += xstep; @@ -1180,7 +1713,11 @@ void R_DrawSpanMaskedP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[texdata], light); +#else *dest = colormap[texdata]; +#endif } dest++; xfrac += xstep; @@ -1196,7 +1733,7 @@ void R_DrawSpanTranslucentP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1214,9 +1751,35 @@ void R_DrawSpanTranslucentP_C (void) xstep = ds_xstep; ystep = ds_ystep; + uint32_t light = calc_light_multiplier(ds_light); + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. +#ifndef PALETTEOUTPUT + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); +#else do { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); @@ -1229,9 +1792,37 @@ void R_DrawSpanTranslucentP_C (void) xfrac += xstep; yfrac += ystep; } while (--count); +#endif } else { +#ifndef PALETTEOUTPUT + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); +#else BYTE yshift = 32 - ds_ybits; BYTE xshift = yshift - ds_xbits; int xmask = ((1 << ds_xbits) - 1) << ds_ybits; @@ -1247,6 +1838,7 @@ void R_DrawSpanTranslucentP_C (void) xfrac += xstep; yfrac += ystep; } while (--count); +#endif } } @@ -1256,7 +1848,7 @@ void R_DrawSpanMaskedTranslucentP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1264,6 +1856,8 @@ void R_DrawSpanMaskedTranslucentP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(ds_light); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1285,12 +1879,29 @@ void R_DrawSpanMaskedTranslucentP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD fg = colormap[texdata]; DWORD bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; +#endif } dest++; xfrac += xstep; @@ -1310,12 +1921,29 @@ void R_DrawSpanMaskedTranslucentP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD fg = colormap[texdata]; DWORD bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; +#endif } dest++; xfrac += xstep; @@ -1330,7 +1958,7 @@ void R_DrawSpanAddClampP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1338,6 +1966,8 @@ void R_DrawSpanAddClampP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(ds_light); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1354,6 +1984,23 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); + +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1363,6 +2010,8 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; +#endif + xfrac += xstep; yfrac += ystep; } while (--count); @@ -1375,6 +2024,23 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1384,6 +2050,8 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; +#endif + xfrac += xstep; yfrac += ystep; } while (--count); @@ -1396,7 +2064,7 @@ void R_DrawSpanMaskedAddClampP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1404,6 +2072,8 @@ void R_DrawSpanMaskedAddClampP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(ds_light); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1425,6 +2095,22 @@ void R_DrawSpanMaskedAddClampP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; DWORD b = a; @@ -1434,6 +2120,7 @@ void R_DrawSpanMaskedAddClampP_C (void) b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a>>15)]; +#endif } dest++; xfrac += xstep; @@ -1453,6 +2140,22 @@ void R_DrawSpanMaskedAddClampP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; DWORD b = a; @@ -1462,6 +2165,7 @@ void R_DrawSpanMaskedAddClampP_C (void) b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a>>15)]; +#endif } dest++; xfrac += xstep; @@ -1473,7 +2177,16 @@ void R_DrawSpanMaskedAddClampP_C (void) // [RH] Just fill a span with a color void R_FillSpan (void) { - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1); +#ifndef PALETTEOUTPUT + canvas_pixel_t *dest = ylookup[ds_y] + ds_x1 + dc_destorg; + int count = (ds_x2 - ds_x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; +#else + memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1) * sizeof(canvas_pixel_t)); +#endif } // Draw a voxel slab @@ -1492,7 +2205,7 @@ extern "C" void R_SetupDrawSlabC(const BYTE *colormap) slabcolormap = colormap; } -extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) +extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, canvas_pixel_t *p) { int x; const BYTE *colormap = slabcolormap; @@ -1666,13 +2379,21 @@ DWORD vlinec1 () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = vlinebits; int pitch = dc_pitch; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + do { - *dest = colormap[source[frac>>bits]]; +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[source[frac>>bits]], light); +#else + *dest = colormap[source[frac >> bits]]; +#endif frac += fracstep; dest += pitch; } while (--count); @@ -1682,19 +2403,83 @@ DWORD vlinec1 () void vlinec4 () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = vlinebits; DWORD place; +#ifndef PALETTEOUTPUT + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); +#ifndef NO_SSE + __m128i mlight_hi = _mm_set_epi16(256, light0, light0, light0, 256, light1, light1, light1); + __m128i mlight_lo = _mm_set_epi16(256, light2, light2, light2, 256, light3, light3, light3); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; +#endif +#endif + do { +#ifndef PALETTEOUTPUT +#ifndef NO_SSE + + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = palookupoffse[0][bufplce[0][place0 >> bits]]; + BYTE p1 = palookupoffse[1][bufplce[1][place1 >> bits]]; + BYTE p2 = palookupoffse[2][bufplce[2][place2 >> bits]]; + BYTE p3 = palookupoffse[3][bufplce[3][place3 >> bits]]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p0], palette[p1], palette[p2], palette[p3]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); + fg_lo = _mm_srli_epi16(fg_lo, 8); + fg = _mm_packus_epi16(fg_hi, fg_lo); + _mm_storeu_si128((__m128i*)dest, fg); + +#else + dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; +#endif +#else dest[0] = palookupoffse[0][bufplce[0][(place=vplce[0])>>bits]]; vplce[0] = place+vince[0]; dest[1] = palookupoffse[1][bufplce[1][(place=vplce[1])>>bits]]; vplce[1] = place+vince[1]; dest[2] = palookupoffse[2][bufplce[2][(place=vplce[2])>>bits]]; vplce[2] = place+vince[2]; dest[3] = palookupoffse[3][bufplce[3][(place=vplce[3])>>bits]]; vplce[3] = place+vince[3]; +#endif dest += dc_pitch; } while (--count); + +#ifndef PALETTEOUTPUT +#ifndef NO_SSE + // Is this needed? Global variables makes it tricky to know.. + vplce[0] = local_vplce[0]; + vplce[1] = local_vplce[1]; + vplce[2] = local_vplce[2]; + vplce[3] = local_vplce[3]; + vince[0] = local_vince[0]; + vince[1] = local_vince[1]; + vince[2] = local_vince[2]; + vince[3] = local_vince[3]; +#endif +#endif } #endif @@ -1717,16 +2502,24 @@ DWORD mvlinec1 () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = mvlinebits; int pitch = dc_pitch; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + do { BYTE pix = source[frac>>bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[pix], light); +#else *dest = colormap[pix]; +#endif } frac += fracstep; dest += pitch; @@ -1737,19 +2530,33 @@ DWORD mvlinec1 () void mvlinec4 () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = mvlinebits; DWORD place; +#ifndef PALETTEOUTPUT + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); +#endif + do { BYTE pix; +#ifndef PALETTEOUTPUT + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; +#else pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; pix = bufplce[3][(place=vplce[3])>>bits]; if(pix) dest[3] = palookupoffse[3][pix]; vplce[3] = place+vince[3]; +#endif dest += dc_pitch; } while (--count); } @@ -1763,7 +2570,11 @@ extern int wallshade; static void R_DrawFogBoundarySection (int y, int y2, int x1) { BYTE *colormap = dc_colormap; - BYTE *dest = ylookup[y] + dc_destorg; + canvas_pixel_t *dest = ylookup[y] + dc_destorg; + +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif for (; y < y2; ++y) { @@ -1771,7 +2582,11 @@ static void R_DrawFogBoundarySection (int y, int y2, int x1) int x = x1; do { +#ifndef PALETTEOUTPUT + dest[x] = shade_pal_index(colormap[dest[x]], light); +#else dest[x] = colormap[dest[x]]; +#endif } while (++x <= x2); dest += dc_pitch; } @@ -1781,10 +2596,19 @@ static void R_DrawFogBoundaryLine (int y, int x) { int x2 = spanend[y]; BYTE *colormap = dc_colormap; - BYTE *dest = ylookup[y] + dc_destorg; + canvas_pixel_t *dest = ylookup[y] + dc_destorg; + +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + do { +#ifndef PALETTEOUTPUT + dest[x] = shade_pal_index(colormap[dest[x]], light); +#else dest[x] = colormap[dest[x]]; +#endif } while (++x <= x2); } @@ -1809,6 +2633,7 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) } dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); + dc_light = 0; for (--x; x >= x1; --x) { @@ -1834,6 +2659,7 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) } rcolormap = lcolormap; dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); + dc_light = 0; } else { @@ -1891,15 +2717,37 @@ fixed_t tmvline1_add () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); + do { +#ifndef PALETTEOUTPUT + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } +#else BYTE pix = source[frac>>bits]; if (pix != 0) { @@ -1908,6 +2756,7 @@ fixed_t tmvline1_add () fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; } +#endif frac += fracstep; dest += pitch; } while (--count); @@ -1917,13 +2766,19 @@ fixed_t tmvline1_add () void tmvline4_add () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + do { for (int i = 0; i < 4; ++i) @@ -1931,10 +2786,27 @@ void tmvline4_add () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD fg = fg2rgb[palookupoffse[i][pix]]; DWORD bg = bg2rgb[dest[i]]; fg = (fg+bg) | 0x1f07c1f; dest[i] = RGB32k.All[fg & (fg>>15)]; +#endif } vplce[i] += vince[i]; } @@ -1949,18 +2821,36 @@ fixed_t tmvline1_addclamp () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); + do { BYTE pix = source[frac>>bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; DWORD b = a; @@ -1970,6 +2860,7 @@ fixed_t tmvline1_addclamp () b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a>>15)]; +#endif } frac += fracstep; dest += pitch; @@ -1980,13 +2871,19 @@ fixed_t tmvline1_addclamp () void tmvline4_addclamp () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + do { for (int i = 0; i < 4; ++i) @@ -1994,6 +2891,22 @@ void tmvline4_addclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[palookupoffse[i][pix]] + bg2rgb[dest[i]]; DWORD b = a; @@ -2003,6 +2916,7 @@ void tmvline4_addclamp () b = b - (b >> 5); a |= b; dest[i] = RGB32k.All[a & (a>>15)]; +#endif } vplce[i] += vince[i]; } @@ -2017,18 +2931,36 @@ fixed_t tmvline1_subclamp () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); + do { BYTE pix = source[frac>>bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; DWORD b = a; @@ -2037,6 +2969,7 @@ fixed_t tmvline1_subclamp () a &= b; a |= 0x01f07c1f; *dest = RGB32k.All[a & (a>>15)]; +#endif } frac += fracstep; dest += pitch; @@ -2047,13 +2980,19 @@ fixed_t tmvline1_subclamp () void tmvline4_subclamp () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + do { for (int i = 0; i < 4; ++i) @@ -2061,6 +3000,22 @@ void tmvline4_subclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = (fg2rgb[palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; DWORD b = a; @@ -2069,6 +3024,7 @@ void tmvline4_subclamp () a &= b; a |= 0x01f07c1f; dest[i] = RGB32k.All[a & (a>>15)]; +#endif } vplce[i] += vince[i]; } @@ -2083,18 +3039,36 @@ fixed_t tmvline1_revsubclamp () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); + do { BYTE pix = source[frac>>bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; DWORD b = a; @@ -2103,6 +3077,7 @@ fixed_t tmvline1_revsubclamp () a &= b; a |= 0x01f07c1f; *dest = RGB32k.All[a & (a>>15)]; +#endif } frac += fracstep; dest += pitch; @@ -2113,13 +3088,19 @@ fixed_t tmvline1_revsubclamp () void tmvline4_revsubclamp () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + do { for (int i = 0; i < 4; ++i) @@ -2127,6 +3108,22 @@ void tmvline4_revsubclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[palookupoffse[i][pix]]; DWORD b = a; @@ -2135,6 +3132,7 @@ void tmvline4_revsubclamp () a &= b; a |= 0x01f07c1f; dest[i] = RGB32k.All[a & (a>>15)]; +#endif } vplce[i] += vince[i]; } @@ -2418,6 +3416,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, { dc_colormap += fixedlightlev; } + dc_light = 0; return r_columnmethod ? DoDraw1 : DoDraw0; } @@ -2443,6 +3442,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; hcolfunc_pre = R_FillColumnHorizP; dc_colormap = identitymap; + dc_light = 0; } if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) diff --git a/src/r_draw.h b/src/r_draw.h index cb2f68f335..6f7a91154b 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -30,6 +30,7 @@ extern "C" int ylookup[MAXHEIGHT]; extern "C" int dc_pitch; // [RH] Distance between rows extern "C" lighttable_t*dc_colormap; +extern "C" fixed_t dc_light; extern "C" int dc_x; extern "C" int dc_yl; extern "C" int dc_yh; @@ -44,16 +45,17 @@ extern "C" DWORD *dc_destblend; // first pixel in a column extern "C" const BYTE* dc_source; -extern "C" BYTE *dc_dest, *dc_destorg; +extern "C" canvas_pixel_t *dc_dest, *dc_destorg; extern "C" int dc_count; extern "C" DWORD vplce[4]; extern "C" DWORD vince[4]; extern "C" BYTE* palookupoffse[4]; +extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; // [RH] Temporary buffer for column drawing -extern "C" BYTE *dc_temp; +extern "C" canvas_pixel_t *dc_temp; extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; @@ -184,7 +186,7 @@ extern void (*rt_map4cols)(int sx, int yl, int yh); void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. -void rt_initcols (BYTE *buffer=NULL); +void rt_initcols (canvas_pixel_t *buffer=NULL); void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); @@ -231,13 +233,15 @@ void R_FillSpan (void); #endif extern "C" void R_SetupDrawSlab(const BYTE *colormap); -extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); +extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, canvas_pixel_t *p); extern "C" int ds_y; extern "C" int ds_x1; extern "C" int ds_x2; extern "C" lighttable_t* ds_colormap; +//extern "C" dsfixed_t ds_light; +#define ds_light dc_light extern "C" dsfixed_t ds_xfrac; extern "C" dsfixed_t ds_yfrac; diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index e8faff0ceb..f5fc027b57 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -57,8 +57,8 @@ // dc_ctspan is advanced while drawing into dc_temp. // horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. -BYTE dc_tempbuff[MAXHEIGHT*4]; -BYTE *dc_temp; +canvas_pixel_t dc_tempbuff[MAXHEIGHT*4]; +canvas_pixel_t *dc_temp; unsigned int dc_tspans[4][MAXHEIGHT]; unsigned int *dc_ctspan[4]; unsigned int *horizspan[4]; @@ -73,8 +73,8 @@ extern "C" void R_SetupAddClampCol(); // Copies one span at hx to the screen at sx. void rt_copy1col_c (int hx, int sx, int yl, int yh) { - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -114,6 +114,13 @@ void rt_copy1col_c (int hx, int sx, int yl, int yh) // Copies all four spans to the screen starting at sx. void rt_copy4cols_c (int sx, int yl, int yh) { +#ifndef PALETTEOUTPUT + // To do: we could do this with SSE using __m128i + rt_copy1col_c(0, sx, yl, yh); + rt_copy1col_c(1, sx + 1, yl, yh); + rt_copy1col_c(2, sx + 2, yl, yh); + rt_copy1col_c(3, sx + 3, yl, yh); +#else int *source; int *dest; int count; @@ -142,14 +149,15 @@ void rt_copy4cols_c (int sx, int yl, int yh) source += 8/sizeof(int); dest += pitch*2; } while (--count); +#endif } // Maps one span at hx to the screen at sx. void rt_map1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -158,13 +166,21 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) return; count++; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; if (count & 1) { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[*source], light); +#else *dest = colormap[*source]; +#endif source += 4; dest += pitch; } @@ -172,8 +188,13 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) return; do { +#ifndef PALETTEOUTPUT + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[pitch] = shade_pal_index(colormap[source[4]], light); +#else dest[0] = colormap[source[0]]; dest[pitch] = colormap[source[4]]; +#endif source += 8; dest += pitch*2; } while (--count); @@ -183,8 +204,8 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) void rt_map4cols_c (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -193,16 +214,27 @@ void rt_map4cols_c (int sx, int yl, int yh) return; count++; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; if (count & 1) { +#ifndef PALETTEOUTPUT + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[1] = shade_pal_index(colormap[source[1]], light); + dest[2] = shade_pal_index(colormap[source[2]], light); + dest[3] = shade_pal_index(colormap[source[3]], light); +#else dest[0] = colormap[source[0]]; dest[1] = colormap[source[1]]; dest[2] = colormap[source[2]]; dest[3] = colormap[source[3]]; +#endif source += 4; dest += pitch; } @@ -210,6 +242,16 @@ void rt_map4cols_c (int sx, int yl, int yh) return; do { +#ifndef PALETTEOUTPUT + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[1] = shade_pal_index(colormap[source[1]], light); + dest[2] = shade_pal_index(colormap[source[2]], light); + dest[3] = shade_pal_index(colormap[source[3]], light); + dest[pitch] = shade_pal_index(colormap[source[4]], light); + dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); + dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); + dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); +#else dest[0] = colormap[source[0]]; dest[1] = colormap[source[1]]; dest[2] = colormap[source[2]]; @@ -218,6 +260,7 @@ void rt_map4cols_c (int sx, int yl, int yh) dest[pitch+1] = colormap[source[5]]; dest[pitch+2] = colormap[source[6]]; dest[pitch+3] = colormap[source[7]]; +#endif source += 8; dest += pitch*2; } while (--count); @@ -227,7 +270,7 @@ void rt_map4cols_c (int sx, int yl, int yh) void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) { int count = yh - yl + 1; - BYTE *source = &dc_temp[yl*4 + hx]; + canvas_pixel_t *source = &dc_temp[yl*4 + hx]; // Things we do to hit the compiler's optimizer with a clue bat: // 1. Parallelism is explicitly spelled out by using a separate @@ -274,7 +317,7 @@ void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) void rt_Translate4cols(const BYTE *translation, int yl, int yh) { int count = yh - yl + 1; - BYTE *source = &dc_temp[yl*4]; + canvas_pixel_t *source = &dc_temp[yl*4]; int c0, c1; BYTE b0, b1; @@ -330,8 +373,8 @@ void rt_tlate4cols (int sx, int yl, int yh) void rt_add1col (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -340,13 +383,36 @@ void rt_add1col (int hx, int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; do { DWORD fg = colormap[*source]; DWORD bg = *dest; @@ -358,14 +424,15 @@ void rt_add1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_c (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -374,13 +441,40 @@ void rt_add4cols_c (int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + do { DWORD fg = colormap[source[0]]; DWORD bg = dest[0]; @@ -414,6 +508,7 @@ void rt_add4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Translates and adds one span at hx to the screen at sx without clamping. @@ -433,10 +528,9 @@ void rt_tlateadd4cols (int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col (int hx, int sx, int yl, int yh) { - DWORD *fgstart; BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -445,12 +539,37 @@ void rt_shaded1col (int hx, int sx, int yl, int yh) return; count++; - fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + uint32_t alpha = colormap[*source]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fgstart; + fgstart = &Col2RGB8[0][dc_color]; + do { DWORD val = colormap[*source]; DWORD fg = fgstart[val<<8]; @@ -459,15 +578,15 @@ void rt_shaded1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Shades all four spans to the screen starting at sx. void rt_shaded4cols_c (int sx, int yl, int yh) { - DWORD *fgstart; BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -476,12 +595,40 @@ void rt_shaded4cols_c (int sx, int yl, int yh) return; count++; - fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = colormap[source[i]]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fgstart; + fgstart = &Col2RGB8[0][dc_color]; + do { DWORD val; @@ -504,14 +651,15 @@ void rt_shaded4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -520,13 +668,36 @@ void rt_addclamp1col (int hx, int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + do { DWORD a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; DWORD b = a; @@ -540,14 +711,15 @@ void rt_addclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -556,13 +728,39 @@ void rt_addclamp4cols_c (int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + do { DWORD a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; DWORD b = a; @@ -604,6 +802,7 @@ void rt_addclamp4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Translates and adds one span at hx to the screen at sx with clamping. @@ -624,8 +823,8 @@ void rt_tlateaddclamp4cols (int sx, int yl, int yh) void rt_subclamp1col (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -634,13 +833,35 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; DWORD b = a; @@ -653,14 +874,15 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -669,13 +891,39 @@ void rt_subclamp4cols (int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; DWORD b = a; @@ -713,6 +961,7 @@ void rt_subclamp4cols (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Translates and subtracts one span at hx to the screen at sx with clamping. @@ -733,8 +982,8 @@ void rt_tlatesubclamp4cols (int sx, int yl, int yh) void rt_revsubclamp1col (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -750,6 +999,28 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +#else do { DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; DWORD b = a; @@ -762,14 +1033,15 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -785,6 +1057,32 @@ void rt_revsubclamp4cols (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +#else do { DWORD a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; DWORD b = a; @@ -822,6 +1120,7 @@ void rt_revsubclamp4cols (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Translates and subtracts one span at hx from the screen at sx with clamping. @@ -1002,7 +1301,7 @@ void rt_draw4cols (int sx) // Before each pass through a rendering loop that uses these routines, // call this function to set up the span pointers. -void rt_initcols (BYTE *buff) +void rt_initcols (canvas_pixel_t *buff) { int y; @@ -1016,7 +1315,7 @@ void rt_initcols (BYTE *buff) void R_DrawColumnHorizP_C (void) { int count = dc_count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t fracstep; fixed_t frac; @@ -1077,7 +1376,7 @@ void R_FillColumnHorizP (void) { int count = dc_count; BYTE color = dc_color; - BYTE *dest; + canvas_pixel_t *dest; if (count <= 0) return; diff --git a/src/r_main.cpp b/src/r_main.cpp index ce4841a2e7..04e7989814 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -578,7 +578,7 @@ void R_HighlightPortal (PortalDrawseg* pds) BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 255, 0, 0, 0, 255); - BYTE* pixels = RenderTarget->GetBuffer(); + canvas_pixel_t* pixels = RenderTarget->GetBuffer(); // top edge for (int x = pds->x1; x < pds->x2; x++) { @@ -623,7 +623,7 @@ void R_EnterPortal (PortalDrawseg* pds, int depth) int Ytop = pds->ceilingclip[x-pds->x1]; int Ybottom = pds->floorclip[x-pds->x1]; - BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; + canvas_pixel_t *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; for (int y = Ytop; y <= Ybottom; y++) { @@ -794,10 +794,10 @@ void R_EnterPortal (PortalDrawseg* pds, int depth) void R_SetupBuffer () { - static BYTE *lastbuff = NULL; + static canvas_pixel_t *lastbuff = NULL; int pitch = RenderTarget->GetPitch(); - BYTE *lineptr = RenderTarget->GetBuffer() + viewwindowy*pitch + viewwindowx; + canvas_pixel_t *lineptr = RenderTarget->GetBuffer() + viewwindowy*pitch + viewwindowx; if (dc_pitch != pitch || lineptr != lastbuff) { diff --git a/src/r_main.h b/src/r_main.h index 24103393d4..37a41a7631 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -82,6 +82,34 @@ extern bool r_dontmaplines; // Change R_CalcTiltedLighting() when this changes. #define GETPALOOKUP(vis,shade) (clamp (((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis))))>>FRACBITS, 0, NUMCOLORMAPS-1)) +// Calculate the light multiplier for ds_light +// This is used instead of GETPALOOKUP when ds_colormap+dc_colormap is set to the base colormap +#define LIGHTSCALE(vis,shade) ((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis)))) + +#ifndef PALETTEOUTPUT + +// calculates the light constant passed to the shade_pal_index function +inline uint32_t calc_light_multiplier(dsfixed_t light) +{ + // the 0.70 multiplier shouldn't be needed - maybe the palette shades in doom weren't linear? + return (uint32_t)clamp((1.0 - FIXED2DBL(light) / MAXLIGHTVIS * 0.70) * 256 + 0.5, 0.0, 256.0); +} + +// Calculates a ARGB8 color for the given palette index and light multiplier +inline uint32_t shade_pal_index(uint32_t index, uint32_t light) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +#endif + extern double GlobVis; void R_SetVisibility(double visibility); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index d749319e3e..b385302e5b 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -227,8 +227,14 @@ void R_MapPlane (int y, int x1) if (plane_shade) { // Determine lighting based on the span's distance from the viewer. +#ifndef PALETTEOUTPUT + ds_colormap = basecolormap->Maps; + ds_light = LIGHTSCALE(GlobVis * fabs(CenterY - y), planeshade); +#else ds_colormap = basecolormap->Maps + (GETPALOOKUP ( GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT); + ds_light = 0; +#endif } #ifdef X86_ASM @@ -360,7 +366,7 @@ void R_MapTiltedPlane (int y, int x1) int x2 = spanend[y]; int width = x2 - x1; double iz, uz, vz; - BYTE *fb; + canvas_pixel_t *fb; DWORD u, v; int i; @@ -393,6 +399,7 @@ void R_MapTiltedPlane (int y, int x1) u = SQWORD(uz*z) + pviewx; v = SQWORD(vz*z) + pviewy; ds_colormap = tiltlighting[i]; + ds_light = 0; fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; iz += plane_sz[0]; uz += plane_su[0]; @@ -486,7 +493,16 @@ void R_MapTiltedPlane (int y, int x1) void R_MapColoredPlane (int y, int x1) { - memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); +#ifndef PALETTEOUTPUT + canvas_pixel_t *dest = ylookup[y] + x1 + dc_destorg; + int count = (spanend[y] - x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; +#else + memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1) * sizeof(canvas_pixel_t)); +#endif } //========================================================================== @@ -1462,11 +1478,13 @@ void R_DrawSkyPlane (visplane_t *pl) if (fixedcolormap) { dc_colormap = fixedcolormap; + dc_light = 0; } else { fakefixed = true; fixedcolormap = dc_colormap = NormalLight.Maps; + dc_light = 0; } R_DrawSky (pl); @@ -1547,6 +1565,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t planeheight = fabs(pl->height.Zat0() - ViewPos.Z); GlobVis = r_FloorVisibility / planeheight; + ds_light = 0; if (fixedlightlev >= 0) ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; else if (fixedcolormap) @@ -1707,6 +1726,7 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t if (pl->height.fC() > 0) planelightfloat = -planelightfloat; + ds_light = 0; if (fixedlightlev >= 0) ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; else if (fixedcolormap) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 4eb3cb440c..1cdb785558 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -178,6 +178,7 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText if (fixedcolormap == NULL && fixedlightlev < 0) { dc_colormap = basecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + dc_light = 0; } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -316,6 +317,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) dc_colormap = basecolormap->Maps + fixedlightlev; else if (fixedcolormap != NULL) dc_colormap = fixedcolormap; + dc_light = 0; // find positioning texheight = tex->GetScaledHeightDouble(); @@ -633,6 +635,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) dc_colormap = basecolormap->Maps + fixedlightlev; else if (fixedcolormap != NULL) dc_colormap = fixedcolormap; + dc_light = 0; WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -1066,10 +1069,11 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) } // prevlineasm1 is like vlineasm1 but skips the loop if only drawing one pixel -inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) { dc_iscale = vince; dc_colormap = colormap; + dc_light = light; dc_count = count; dc_texturefrac = vplce; dc_source = bufplce; @@ -1117,6 +1121,10 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l palookupoffse[1] = dc_colormap; palookupoffse[2] = dc_colormap; palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; } for(; (x < x2) && (x & 3); ++x) @@ -1130,7 +1138,13 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, wallshade); +#else dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1170,7 +1184,13 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l for (z = 0; z < 4; ++z) { light += rw_lightstep; - palookupoffse[z] = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); +#ifndef PALETTEOUTPUT + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); +#else + palookupoffse[z] = basecolormapdata + (GETPALOOKUP(12/*light*/, wallshade) << COLORMAPSHIFT); + palookuplight[z] = 0; +#endif } } @@ -1183,7 +1203,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l { if (!(bad & 1)) { - prevline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); + prevline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); } bad >>= 1; } @@ -1194,7 +1214,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l { if (u4 > y1ve[z]) { - vplce[z] = prevline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); + vplce[z] = prevline1(vince[z],palookupoffse[z], palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); } } @@ -1205,12 +1225,12 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l dovline4(); } - BYTE *i = x+ylookup[d4]+dc_destorg; + canvas_pixel_t *i = x+ylookup[d4]+dc_destorg; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - prevline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + prevline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); } } } @@ -1225,7 +1245,13 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, wallshade); +#else dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1416,10 +1442,11 @@ static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *d } } -inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) { dc_iscale = vince; dc_colormap = colormap; + dc_light = 0; dc_count = count; dc_texturefrac = vplce; dc_source = bufplce; @@ -1431,7 +1458,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { int x, fracbits; - BYTE *p; + canvas_pixel_t *p; int y1ve[4], y2ve[4], u4, d4, startx, dax, z; char bad; float light = rw_light - rw_lightstep; @@ -1471,7 +1498,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ palookupoffse[3] = dc_colormap; } - for(; (x < x2) && ((size_t)p & 3); ++x, ++p) + for(; (x < x2) && (((size_t)p/sizeof(canvas_pixel_t)) & 3); ++x, ++p) { light += rw_lightstep; y1ve[0] = uwal[x];//max(uwal[x],umost[x]); @@ -1481,6 +1508,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1553,7 +1581,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ domvline4(); } - BYTE *i = p+ylookup[d4]; + canvas_pixel_t *i = p+ylookup[d4]; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) @@ -1572,6 +1600,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1589,10 +1618,11 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ NetUpdate (); } -inline void preptmvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +inline void preptmvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) { dc_iscale = vince; dc_colormap = colormap; + dc_light = light; dc_count = count; dc_texturefrac = vplce; dc_source = bufplce; @@ -1605,7 +1635,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f fixed_t (*tmvline1)(); void (*tmvline4)(); int x, fracbits; - BYTE *p; + canvas_pixel_t *p; int y1ve[4], y2ve[4], u4, d4, startx, dax, z; char bad; float light = rw_light - rw_lightstep; @@ -1645,9 +1675,13 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f palookupoffse[1] = dc_colormap; palookupoffse[2] = dc_colormap; palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; } - for(; (x < x2) && ((size_t)p & 3); ++x, ++p) + for(; (x < x2) && (((size_t)p / sizeof(canvas_pixel_t)) & 3); ++x, ++p) { light += rw_lightstep; y1ve[0] = uwal[x];//max(uwal[x],umost[x]); @@ -1656,7 +1690,13 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, wallshade); +#else dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1694,7 +1734,12 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f for (z = 0; z < 4; ++z) { light += rw_lightstep; +#ifndef PALETTEOUTPUT + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); +#else palookupoffse[z] = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); +#endif } } @@ -1707,7 +1752,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f { if (!(bad & 1)) { - preptmvline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + preptmvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); tmvline1(); } bad >>= 1; @@ -1719,7 +1764,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f { if (u4 > y1ve[z]) { - preptmvline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + preptmvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); vplce[z] = tmvline1(); } } @@ -1731,12 +1776,12 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f tmvline4(); } - BYTE *i = p+ylookup[d4]; + canvas_pixel_t *i = p+ylookup[d4]; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - preptmvline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + preptmvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); tmvline1(); } } @@ -1750,7 +1795,13 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); +#ifndef PALETTEOUTPUT + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, wallshade); +#else + dc_colormap = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1791,6 +1842,7 @@ void R_RenderSegLoop () dc_colormap = basecolormap->Maps + fixedlightlev; else if (fixedcolormap != NULL) dc_colormap = fixedcolormap; + dc_light = 0; // clip wall to the floor and ceiling for (x = x1; x < x2; ++x) @@ -3194,6 +3246,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, dc_colormap = usecolormap->Maps; else calclighting = true; + dc_light = 0; // Draw it if (decal->RenderFlags & RF_YFLIP) @@ -3242,7 +3295,13 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, wallshade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; @@ -3252,7 +3311,13 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, wallshade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } rt_initcols(); for (int zz = 4; zz; --zz) @@ -3267,7 +3332,13 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, wallshade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 07edf25e97..433007acb1 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -182,6 +182,7 @@ void FSoftwareRenderer::RemapVoxels() void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, int height) { +#ifdef PALETTEOUTPUT DCanvas *pic = new DSimpleCanvas (width, height); PalEntry palette[256]; @@ -195,6 +196,7 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, i pic->Destroy(); pic->ObjectFlags |= OF_YesReallyDelete; delete pic; +#endif } //=========================================================================== @@ -311,6 +313,7 @@ void FSoftwareRenderer::CopyStackedViewParameters() void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { +#ifdef PALETTEOUTPUT BYTE *Pixels = const_cast(tex->GetPixels()); DSimpleCanvas *Canvas = tex->GetCanvas(); @@ -334,6 +337,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin tex->SetUpdated(); fixedcolormap = savecolormap; realfixedcolormap = savecm; +#endif } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index 427e61b065..0e55b45f9d 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -132,7 +132,7 @@ EXTERN_CVAR (Bool, r_drawvoxels) // int OffscreenBufferWidth, OffscreenBufferHeight; -BYTE *OffscreenColorBuffer; +canvas_pixel_t *OffscreenColorBuffer; FCoverageBuffer *OffscreenCoverageBuffer; // @@ -408,6 +408,7 @@ void R_DrawVisSprite (vissprite_t *vis) fixed_t centeryfrac = FLOAT2FIXED(CenterY); dc_colormap = vis->Style.colormap; + dc_light = 0; mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); @@ -544,6 +545,7 @@ void R_DrawWallSprite(vissprite_t *spr) dc_colormap = usecolormap->Maps; else calclighting = true; + dc_light = 0; // Draw it WallSpriteTile = spr->pic; @@ -592,7 +594,13 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, shade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); + dc_light = FLOAT2FIXED(MAXLIGHTVIS); +#endif } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -603,7 +611,13 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, shade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); + dc_light = FLOAT2FIXED(MAXLIGHTVIS); +#endif } rt_initcols(); for (int zz = 4; zz; --zz) @@ -619,7 +633,13 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, shade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); + dc_light = FLOAT2FIXED(MAXLIGHTVIS); +#endif } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -654,6 +674,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Do setup for blending. dc_colormap = spr->Style.colormap; + dc_light = 0; mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) @@ -2598,10 +2619,8 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) void R_DrawParticle (vissprite_t *vis) { - DWORD *bg2rgb; int spacing; - BYTE *dest; - DWORD fg; + canvas_pixel_t *dest; BYTE color = vis->Style.colormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2610,6 +2629,47 @@ void R_DrawParticle (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + // vis->renderflags holds translucency level (0-255) + fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; + uint32_t alpha = fglevel * 256 / FRACUNIT; + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + spacing = RenderTarget->GetPitch(); + + for (int x = x1; x < (x1 + countbase); x++) + { + dc_x = x; + if (R_ClipSpriteColumnWithPortals(vis)) + continue; + dest = ylookup[yl] + x + dc_destorg; + for (int y = 0; y < ycount; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * alpha) / 256; + uint32_t green = (fg_green + bg_green * alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += spacing; + } + } +#else + DWORD *bg2rgb; + DWORD fg; + // vis->renderflags holds translucency level (0-255) { fixed_t fglevel, bglevel; @@ -2659,6 +2719,7 @@ void R_DrawParticle (vissprite_t *vis) dest += spacing; } } +#endif } extern double BaseYaspectMul;; @@ -3189,12 +3250,12 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly) { if (OffscreenColorBuffer == NULL) { - OffscreenColorBuffer = new BYTE[width * height]; + OffscreenColorBuffer = new canvas_pixel_t[width * height]; } else if (OffscreenBufferWidth != width || OffscreenBufferHeight != height) { delete[] OffscreenColorBuffer; - OffscreenColorBuffer = new BYTE[width * height]; + OffscreenColorBuffer = new canvas_pixel_t[width * height]; } } OffscreenBufferWidth = width; diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index 062c3af1d3..7388c13060 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -106,6 +106,10 @@ void FCanvasTexture::MakeTexture () Canvas = new DSimpleCanvas (Width, Height); Canvas->Lock (); GC::AddSoftRoot(Canvas); +#ifndef PALETTEOUTPUT + Pixels = new BYTE[Width*Height]; + bPixelsAllocated = true; +#else if (Width != Height || Width != Canvas->GetPitch()) { Pixels = new BYTE[Width*Height]; @@ -116,6 +120,7 @@ void FCanvasTexture::MakeTexture () Pixels = Canvas->GetBuffer(); bPixelsAllocated = false; } +#endif // Draw a special "unrendered" initial texture into the buffer. memset (Pixels, 0, Width*Height/2); memset (Pixels+Width*Height/2, 255, Width*Height/2); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index c7b62b0a66..fd14b5e0a3 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -166,16 +166,18 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { dc_colormap = (lighttable_t *)translation; + dc_light = 0; } else { dc_colormap = identitymap; + dc_light = 0; } fixedcolormap = dc_colormap; ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); - BYTE *destorgsave = dc_destorg; + canvas_pixel_t *destorgsave = dc_destorg; dc_destorg = screen->GetBuffer(); if (dc_destorg == NULL) { @@ -1015,13 +1017,32 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) oldyyshifted = yy * GetPitch(); } - BYTE *spot = GetBuffer() + oldyyshifted + xx; +#ifndef PALETTEOUTPUT + canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + + uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; +#else + canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; DWORD *bg2rgb = Col2RGB8[1+level]; DWORD *fg2rgb = Col2RGB8[63-level]; DWORD fg = fg2rgb[basecolor]; DWORD bg = bg2rgb[*spot]; bg = (fg+bg) | 0x1f07c1f; *spot = RGB32k.All[bg&(bg>>15)]; +#endif } void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) @@ -1069,7 +1090,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } else if (deltaX == 0) { // vertical line - BYTE *spot = GetBuffer() + y0*GetPitch() + x0; + canvas_pixel_t *spot = GetBuffer() + y0*GetPitch() + x0; int pitch = GetPitch (); do { @@ -1079,7 +1100,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } else if (deltaX == deltaY) { // diagonal line. - BYTE *spot = GetBuffer() + y0*GetPitch() + x0; + canvas_pixel_t *spot = GetBuffer() + y0*GetPitch() + x0; int advance = GetPitch() + xDir; do { @@ -1205,7 +1226,7 @@ void DCanvas::DrawPixel(int x, int y, int palColor, uint32 realcolor) void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uint32 color) { int x, y; - BYTE *dest; + canvas_pixel_t *dest; if (left == right || top == bottom) { @@ -1426,11 +1447,11 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // V_DrawBlock // Draw a linear block of pixels into the view buffer. // -void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) const +void DCanvas::DrawBlock (int x, int y, int _width, int _height, const canvas_pixel_t *src) const { int srcpitch = _width; int destpitch; - BYTE *dest; + canvas_pixel_t *dest; if (ClipBox (x, y, _width, _height, src, srcpitch)) { @@ -1442,7 +1463,7 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) do { - memcpy (dest, src, _width); + memcpy (dest, src, _width * sizeof(canvas_pixel_t)); src += srcpitch; dest += destpitch; } while (--_height); @@ -1452,9 +1473,9 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) // V_GetBlock // Gets a linear block of pixels from the view buffer. // -void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const +void DCanvas::GetBlock (int x, int y, int _width, int _height, canvas_pixel_t *dest) const { - const BYTE *src; + const canvas_pixel_t *src; #ifdef RANGECHECK if (x<0 @@ -1470,14 +1491,14 @@ void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const while (_height--) { - memcpy (dest, src, _width); + memcpy (dest, src, _width * sizeof(canvas_pixel_t)); src += Pitch; dest += _width; } } // Returns true if the box was completely clipped. False otherwise. -bool DCanvas::ClipBox (int &x, int &y, int &w, int &h, const BYTE *&src, const int srcpitch) const +bool DCanvas::ClipBox (int &x, int &y, int &w, int &h, const canvas_pixel_t *&src, const int srcpitch) const { if (x >= Width || y >= Height || x+w <= 0 || y+h <= 0) { // Completely clipped off screen diff --git a/src/v_video.cpp b/src/v_video.cpp index 01a73950b2..b6a626753d 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -343,10 +343,8 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) if (damount == 0.f) return; - DWORD *bg2rgb; - DWORD fg; int gap; - BYTE *spot; + canvas_pixel_t *spot; int x, y; if (x1 >= Width || y1 >= Height) @@ -366,6 +364,43 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; } + spot = Buffer + x1 + y1*Pitch; + gap = Pitch - w; + +#ifndef PALETTEOUTPUT + uint32_t fg = color.d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f); + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + for (y = h; y != 0; y--) + { + for (x = w; x != 0; x--) + { + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; + + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; + spot++; + } + spot += gap; + } +#else + DWORD *bg2rgb; + DWORD fg; + { int amount; @@ -377,8 +412,6 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) (((color.b * amount) >> 4) << 10); } - spot = Buffer + x1 + y1*Pitch; - gap = Pitch - w; for (y = h; y != 0; y--) { for (x = w; x != 0; x--) @@ -392,6 +425,7 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) } spot += gap; } +#endif } //========================================================================== @@ -403,7 +437,7 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) // //========================================================================== -void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type) +void DCanvas::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type) { Lock(true); buffer = GetBuffer(); @@ -759,8 +793,8 @@ DSimpleCanvas::DSimpleCanvas (int width, int height) Pitch = width + MAX(0, CPU.DataL1LineSize - 8); } } - MemBuffer = new BYTE[Pitch * height]; - memset (MemBuffer, 0, Pitch * height); + MemBuffer = new canvas_pixel_t[Pitch * height]; + memset (MemBuffer, 0, Pitch * height * sizeof(canvas_pixel_t)); } //========================================================================== @@ -879,7 +913,7 @@ void DFrameBuffer::DrawRateStuff () { int i = I_GetTime(false); int tics = i - LastTic; - BYTE *buffer = GetBuffer(); + canvas_pixel_t *buffer = GetBuffer(); LastTic = i; if (tics > 20) tics = 20; diff --git a/src/v_video.h b/src/v_video.h index fa1ce83df0..27c09ee36f 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -189,7 +189,7 @@ public: virtual ~DCanvas (); // Member variable access - inline BYTE *GetBuffer () const { return Buffer; } + inline canvas_pixel_t *GetBuffer () const { return Buffer; } inline int GetWidth () const { return Width; } inline int GetHeight () const { return Height; } inline int GetPitch () const { return Pitch; } @@ -202,10 +202,10 @@ public: virtual bool IsLocked () { return Buffer != NULL; } // Returns true if the surface is locked // Draw a linear block of pixels into the canvas - virtual void DrawBlock (int x, int y, int width, int height, const BYTE *src) const; + virtual void DrawBlock (int x, int y, int width, int height, const canvas_pixel_t *src) const; // Reads a linear block of pixels into the view buffer. - virtual void GetBlock (int x, int y, int width, int height, BYTE *dest) const; + virtual void GetBlock (int x, int y, int width, int height, canvas_pixel_t *dest) const; // Dim the entire canvas for the menus virtual void Dim (PalEntry color = 0); @@ -237,7 +237,7 @@ public: // Retrieves a buffer containing image data for a screenshot. // Hint: Pitch can be negative for upside-down images, in which case buffer // points to the last row in the buffer, which will be the first row output. - virtual void GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type); + virtual void GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type); // Releases the screenshot buffer. virtual void ReleaseScreenshotBuffer(); @@ -262,13 +262,13 @@ public: void DrawChar (FFont *font, int normalcolor, int x, int y, BYTE character, int tag_first, ...); protected: - BYTE *Buffer; + canvas_pixel_t *Buffer; int Width; int Height; int Pitch; int LockCount; - bool ClipBox (int &left, int &top, int &width, int &height, const BYTE *&src, const int srcpitch) const; + bool ClipBox (int &left, int &top, int &width, int &height, const canvas_pixel_t *&src, const int srcpitch) const; void DrawTextureV(FTexture *img, double x, double y, uint32 tag, va_list tags) = delete; virtual void DrawTextureParms(FTexture *img, DrawParms &parms); bool ParseDrawTextureTags (FTexture *img, double x, double y, uint32 tag, va_list tags, DrawParms *parms, bool fortext) const; @@ -297,7 +297,7 @@ public: void Unlock (); protected: - BYTE *MemBuffer; + canvas_pixel_t *MemBuffer; DSimpleCanvas() {} }; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index efdced151c..14a78d4cd6 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -765,14 +765,20 @@ void D3DFB::KillNativeTexs() bool D3DFB::CreateFBTexture () { - if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL))) +#ifndef PALETTEOUTPUT + D3DFORMAT FBFormat = D3DFMT_A8R8G8B8; +#else + D3DFORMAT FBFormat = D3DFMT_L8; +#endif + + if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL))) { int pow2width, pow2height, i; for (i = 1; i < Width; i <<= 1) {} pow2width = i; for (i = 1; i < Height; i <<= 1) {} pow2height = i; - if (FAILED(D3DDevice->CreateTexture(pow2width, pow2height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL))) + if (FAILED(D3DDevice->CreateTexture(pow2width, pow2height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL))) { return false; } @@ -1304,18 +1310,18 @@ void D3DFB::Draw3DPart(bool copy3d) SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) || SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0))) { - if (lockrect.Pitch == Pitch && Pitch == Width) + if (lockrect.Pitch == Pitch * sizeof(canvas_pixel_t) && Pitch == Width) { - memcpy (lockrect.pBits, MemBuffer, Width * Height); + memcpy (lockrect.pBits, MemBuffer, Width * Height * sizeof(canvas_pixel_t)); } else { - BYTE *dest = (BYTE *)lockrect.pBits; - BYTE *src = MemBuffer; + canvas_pixel_t *dest = (canvas_pixel_t *)lockrect.pBits; + canvas_pixel_t *src = MemBuffer; for (int y = 0; y < Height; y++) { - memcpy (dest, src, Width); - dest += lockrect.Pitch; + memcpy (dest, src, Width * sizeof(canvas_pixel_t)); + dest = reinterpret_cast(reinterpret_cast(dest) + lockrect.Pitch); src += Pitch; } } @@ -1349,7 +1355,11 @@ void D3DFB::Draw3DPart(bool copy3d) memset(Constant, 0, sizeof(Constant)); SetAlphaBlend(D3DBLENDOP(0)); EnableAlphaTest(FALSE); +#ifndef PALETTEOUTPUT + SetPixelShader(Shaders[SHADER_NormalColor]); +#else SetPixelShader(Shaders[SHADER_NormalColorPal]); +#endif if (copy3d) { FBVERTEX verts[4]; @@ -1367,7 +1377,11 @@ void D3DFB::Draw3DPart(bool copy3d) realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); +#ifndef PALETTEOUTPUT + SetPixelShader(Shaders[SHADER_SpecialColormap]); +#else SetPixelShader(Shaders[SHADER_SpecialColormapPal]); +#endif } } else @@ -1378,7 +1392,11 @@ void D3DFB::Draw3DPart(bool copy3d) CalcFullscreenCoords(verts, Accel2D, false, color0, color1); D3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX)); } +#ifndef PALETTEOUTPUT + SetPixelShader(Shaders[SHADER_NormalColor]); +#else SetPixelShader(Shaders[SHADER_NormalColorPal]); +#endif } //========================================================================== @@ -1707,7 +1725,7 @@ void D3DFB::SetBlendingRect(int x1, int y1, int x2, int y2) // //========================================================================== -void D3DFB::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type) +void D3DFB::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type) { D3DLOCKED_RECT lrect; @@ -1733,7 +1751,7 @@ void D3DFB::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_ } else { - buffer = (const BYTE *)lrect.pBits; + buffer = (const canvas_pixel_t *)lrect.pBits; pitch = lrect.Pitch; color_type = SS_BGRA; } diff --git a/src/win32/fb_ddraw.cpp b/src/win32/fb_ddraw.cpp index 7cc6037865..9be571f985 100644 --- a/src/win32/fb_ddraw.cpp +++ b/src/win32/fb_ddraw.cpp @@ -32,7 +32,6 @@ ** */ - // HEADER FILES ------------------------------------------------------------ #define DIRECTDRAW_VERSION 0x0300 @@ -61,7 +60,9 @@ // TYPES ------------------------------------------------------------------- +#ifdef USE_OBSOLETE_DDRAW IMPLEMENT_CLASS(DDrawFB) +#endif // EXTERNAL FUNCTION PROTOTYPES -------------------------------------------- @@ -119,6 +120,8 @@ cycle_t BlitCycles; // CODE -------------------------------------------------------------------- +#ifdef USE_OBSOLETE_DDRAW + DDrawFB::DDrawFB (int width, int height, bool fullscreen) : BaseWinFB (width, height) { @@ -996,8 +999,8 @@ DDrawFB::LockSurfRes DDrawFB::LockSurf (LPRECT lockrect, LPDIRECTDRAWSURFACE toL LOG1 ("Final result after restoration attempts: %08lx\n", hr); return NoGood; } - Buffer = (BYTE *)desc.lpSurface; - Pitch = desc.lPitch; + Buffer = (canvas_pixel_t *)desc.lpSurface; + Pitch = desc.lPitch / sizeof(canvas_pixel_t); BufferingNow = false; return wasLost ? GoodWasLost : Good; } @@ -1327,6 +1330,7 @@ void DDrawFB::Blank () PrimarySurf->Blt (NULL, NULL, NULL, DDBLT_COLORFILL, &blitFX); } } +#endif ADD_STAT (blit) { diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index 9b2754eae1..73a2c6966e 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -142,6 +142,7 @@ protected: BaseWinFB() {} }; +#ifdef USE_OBSOLETE_DDRAW class DDrawFB : public BaseWinFB { DECLARE_CLASS(DDrawFB, BaseWinFB) @@ -223,6 +224,7 @@ private: DDrawFB() {} }; +#endif class D3DFB : public BaseWinFB { @@ -250,7 +252,7 @@ public: bool PaintToWindow (); void SetVSync (bool vsync); void NewRefreshRate(); - void GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type); + void GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type); void ReleaseScreenshotBuffer(); void SetBlendingRect (int x1, int y1, int x2, int y2); bool Begin2D (bool copy3d); diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index 29bb905fbe..3f3645d0bf 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -221,8 +221,15 @@ bool Win32Video::InitD3D9 () // Enumerate available display modes. FreeModes (); +#ifndef PALETTEOUTPUT // To do: remove this again (AddD3DModes fails when there are too many modes available for videomenu to display) + AddMode(1920, 1080, 8, 1440, 0); // 1080p + AddMode(1920*2, 1080*2, 8, 1440, 0); // 4k + AddMode(2560, 1440, 8, 1440, 0); // 27" classic + AddMode(2560*2, 1440*2, 8, 1440*2, 0); // 5k +#else AddD3DModes (m_Adapter, D3DFMT_X8R8G8B8); AddD3DModes (m_Adapter, D3DFMT_R5G6B5); +#endif if (Args->CheckParm ("-2")) { // Force all modes to be pixel-doubled. ScaleModes (1); @@ -660,6 +667,10 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr flashAmount = 0; } +#ifndef USE_OBSOLETE_DDRAW + fb = new D3DFB(m_Adapter, width, height, fullscreen); + LOG1("New fb created @ %p\n", fb); +#else if (D3D != NULL) { fb = new D3DFB (m_Adapter, width, height, fullscreen); @@ -668,6 +679,7 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr { fb = new DDrawFB (width, height, fullscreen); } + LOG1 ("New fb created @ %p\n", fb); // If we could not create the framebuffer, try again with slightly @@ -729,6 +741,7 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr fb = static_cast(CreateFrameBuffer (width, height, fullscreen, NULL)); } retry = 0; +#endif fb->SetFlash (flashColor, flashAmount); return fb; From 8aabc26cd94018238ed606b81b3d49fabbe429fd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 May 2016 05:52:15 +0200 Subject: [PATCH 002/912] Created standalone rgba drawing functions --- src/CMakeLists.txt | 1 + src/r_draw.cpp | 2470 ++++++++++++++++++++++++++++++------------ src/r_draw.h | 165 ++- src/r_drawt.cpp | 332 +----- src/r_drawt_rgba.cpp | 883 +++++++++++++++ src/r_main.cpp | 4 +- src/r_plane.cpp | 11 +- src/r_plane.h | 4 + src/r_segs.cpp | 4 +- src/r_things.cpp | 97 +- src/r_things.h | 5 +- src/v_draw.cpp | 2 +- 12 files changed, 2896 insertions(+), 1082 deletions(-) create mode 100644 src/r_drawt_rgba.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 84d6f06b93..c90756b5d7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -883,6 +883,7 @@ set( FASTMATH_PCH_SOURCES r_bsp.cpp r_draw.cpp r_drawt.cpp + r_drawt_rgba.cpp r_main.cpp r_plane.cpp r_segs.cpp diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 0449100085..d2b694f05f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -38,6 +38,7 @@ #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_plane.h" #include "gi.h" #include "stats.h" @@ -73,6 +74,19 @@ int scaledviewwidth; // screen depth and asm/no asm. void (*R_DrawColumnHoriz)(void); void (*R_DrawColumn)(void); +void (*R_FillColumn)(void); +void (*R_FillAddColumn)(void); +void (*R_FillAddClampColumn)(void); +void (*R_FillSubClampColumn)(void); +void (*R_FillRevSubClampColumn)(void); +void (*R_DrawAddColumn)(void); +void (*R_DrawTlatedAddColumn)(void); +void (*R_DrawAddClampColumn)(void); +void (*R_DrawAddClampTranslatedColumn)(void); +void (*R_DrawSubClampColumn)(void); +void (*R_DrawSubClampTranslatedColumn)(void); +void (*R_DrawRevSubClampColumn)(void); +void (*R_DrawRevSubClampTranslatedColumn)(void); void (*R_DrawFuzzColumn)(void); void (*R_DrawTranslatedColumn)(void); void (*R_DrawShadedColumn)(void); @@ -82,7 +96,44 @@ void (*R_DrawSpanTranslucent)(void); void (*R_DrawSpanMaskedTranslucent)(void); void (*R_DrawSpanAddClamp)(void); void (*R_DrawSpanMaskedAddClamp)(void); -void (*rt_map4cols)(int,int,int); +void (*R_FillSpan)(void); +void (*R_FillColumnHoriz)(void); +void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); +void (*R_MapColoredPlane)(int y, int x1); +void (*R_DrawParticle)(vissprite_t *); +fixed_t (*tmvline1_add)(); +void (*tmvline4_add)(); +fixed_t (*tmvline1_addclamp)(); +void (*tmvline4_addclamp)(); +fixed_t (*tmvline1_subclamp)(); +void (*tmvline4_subclamp)(); +fixed_t (*tmvline1_revsubclamp)(); +void (*tmvline4_revsubclamp)(); +void (*rt_copy1col)(int hx, int sx, int yl, int yh); +void (*rt_copy4cols)(int sx, int yl, int yh); +void (*rt_shaded1col)(int hx, int sx, int yl, int yh); +void (*rt_shaded4cols)(int sx, int yl, int yh); +void (*rt_map1col)(int hx, int sx, int yl, int yh); +void (*rt_add1col)(int hx, int sx, int yl, int yh); +void (*rt_addclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_subclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_revsubclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_tlate1col)(int hx, int sx, int yl, int yh); +void (*rt_tlateadd1col)(int hx, int sx, int yl, int yh); +void (*rt_tlateaddclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_tlatesubclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_tlaterevsubclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_map4cols)(int sx, int yl, int yh); +void (*rt_add4cols)(int sx, int yl, int yh); +void (*rt_addclamp4cols)(int sx, int yl, int yh); +void (*rt_subclamp4cols)(int sx, int yl, int yh); +void (*rt_revsubclamp4cols)(int sx, int yl, int yh); +void (*rt_tlate4cols)(int sx, int yl, int yh); +void (*rt_tlateadd4cols)(int sx, int yl, int yh); +void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); +void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); +void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); +void (*rt_initcols)(canvas_pixel_t *buffer); // // R_DrawColumn @@ -198,10 +249,6 @@ void R_DrawColumnP_C (void) // Framebuffer destination address. dest = dc_dest; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - // Determine scaling, // which is the only mapping to be done. fracstep = dc_iscale; @@ -221,11 +268,7 @@ void R_DrawColumnP_C (void) { // Re-map color indices from wall texture column // using a lighting/special effects LUT. -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[source[frac>>FRACBITS]], light); -#else *dest = colormap[source[frac >> FRACBITS]]; -#endif dest += pitch; frac += fracstep; @@ -235,8 +278,78 @@ void R_DrawColumnP_C (void) } #endif +void R_DrawColumnP_RGBA_C() +{ + int count; + canvas_pixel_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + + // Zero length, column does not exceed a pixel. + if (count <= 0) + return; + + // Framebuffer destination address. + dest = dc_dest; + + uint32_t light = calc_light_multiplier(dc_light); + + // Determine scaling, + // which is the only mapping to be done. + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + // Inner loop that does the actual texture mapping, + // e.g. a DDA-lile scaling. + // This is as fast as it gets. + do + { + // Re-map color indices from wall texture column + // using a lighting/special effects LUT. + *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + + dest += pitch; + frac += fracstep; + + } while (--count); + } +} + // [RH] Just fills a column with a color -void R_FillColumnP (void) +void R_FillColumnP_C (void) +{ + int count; + canvas_pixel_t* dest; + + count = dc_count; + + if (count <= 0) + return; + + dest = dc_dest; + + { + int pitch = dc_pitch; + BYTE color = dc_color; + + do + { + *dest = color; + dest += pitch; + } while (--count); + } +} + +void R_FillColumnP_RGBA() { int count; canvas_pixel_t* dest; @@ -248,9 +361,7 @@ void R_FillColumnP (void) dest = dc_dest; -#ifndef PALETTEOUTPUT uint32_t light = calc_light_multiplier(dc_light); -#endif { int pitch = dc_pitch; @@ -258,17 +369,40 @@ void R_FillColumnP (void) do { -#ifndef PALETTEOUTPUT *dest = shade_pal_index(color, light); -#else - *dest = color; -#endif dest += pitch; } while (--count); } } -void R_FillAddColumn (void) +void R_FillAddColumn_C (void) +{ + int count; + canvas_pixel_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + int pitch = dc_pitch; + + DWORD *bg2rgb; + DWORD fg; + + bg2rgb = dc_destblend; + fg = dc_srccolor; + + do + { + DWORD bg; + bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; + *dest = RGB32k.All[bg & (bg>>15)]; + dest += pitch; + } while (--count); +} + +void R_FillAddColumn_RGBA_C() { int count; canvas_pixel_t *dest; @@ -280,7 +414,6 @@ void R_FillAddColumn (void) dest = dc_dest; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; @@ -298,24 +431,9 @@ void R_FillAddColumn (void) *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; } while (--count); -#else - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor; - - do - { - DWORD bg; - bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg>>15)]; - dest += pitch; - } while (--count); -#endif } -void R_FillAddClampColumn (void) +void R_FillAddClampColumn_C (void) { int count; canvas_pixel_t *dest; @@ -327,25 +445,6 @@ void R_FillAddClampColumn (void) dest = dc_dest; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -#else DWORD *bg2rgb; DWORD fg; @@ -365,10 +464,9 @@ void R_FillAddClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); -#endif } -void R_FillSubClampColumn (void) +void R_FillAddClampColumn_RGBA() { int count; canvas_pixel_t *dest; @@ -380,7 +478,6 @@ void R_FillSubClampColumn (void) dest = dc_dest; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; @@ -391,14 +488,27 @@ void R_FillSubClampColumn (void) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; } while (--count); -#else +} + +void R_FillSubClampColumn_C (void) +{ + int count; + canvas_pixel_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + int pitch = dc_pitch; + DWORD *bg2rgb; DWORD fg; @@ -417,10 +527,9 @@ void R_FillSubClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); -#endif } -void R_FillRevSubClampColumn (void) +void R_FillSubClampColumn_RGBA() { int count; canvas_pixel_t *dest; @@ -432,7 +541,6 @@ void R_FillRevSubClampColumn (void) dest = dc_dest; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; @@ -443,14 +551,27 @@ void R_FillRevSubClampColumn (void) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; } while (--count); -#else +} + +void R_FillRevSubClampColumn_C (void) +{ + int count; + canvas_pixel_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + int pitch = dc_pitch; + DWORD *bg2rgb; DWORD fg; @@ -469,7 +590,37 @@ void R_FillRevSubClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); -#endif +} + +void R_FillRevSubClampColumn_RGBA() +{ + int count; + canvas_pixel_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + int pitch = dc_pitch; + + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); } // @@ -542,7 +693,77 @@ void R_DrawFuzzColumnP_C (void) dest = ylookup[dc_yl] + dc_x + dc_destorg; -#ifndef PALETTEOUTPUT + // colormap #6 is used for shading (of 0-31, a bit brighter than average) + { + // [RH] Make local copies of global vars to try and improve + // the optimizations made by the compiler. + int pitch = dc_pitch; + int fuzz = fuzzpos; + int cnt; + BYTE *map = &NormalLight.Maps[6*256]; + + // [RH] Split this into three separate loops to minimize + // the number of times fuzzpos needs to be clamped. + if (fuzz) + { + cnt = MIN(FUZZTABLE-fuzz,count); + count -= cnt; + do + { + *dest = map[dest[fuzzoffset[fuzz++]]]; + dest += pitch; + } while (--cnt); + } + if (fuzz == FUZZTABLE || count > 0) + { + while (count >= FUZZTABLE) + { + fuzz = 0; + cnt = FUZZTABLE; + count -= FUZZTABLE; + do + { + *dest = map[dest[fuzzoffset[fuzz++]]]; + dest += pitch; + } while (--cnt); + } + fuzz = 0; + if (count > 0) + { + do + { + *dest = map[dest[fuzzoffset[fuzz++]]]; + dest += pitch; + } while (--count); + } + } + fuzzpos = fuzz; + } +} +#endif + +void R_DrawFuzzColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + + // Adjust borders. Low... + if (dc_yl == 0) + dc_yl = 1; + + // .. and high. + if (dc_yh > fuzzviewheight) + dc_yh = fuzzviewheight; + + count = dc_yh - dc_yl; + + // Zero length. + if (count < 0) + return; + + count++; + + dest = ylookup[dc_yl] + dc_x + dc_destorg; // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) // I'm not sure if this is really always the case or not. @@ -618,58 +839,7 @@ void R_DrawFuzzColumnP_C (void) } fuzzpos = fuzz; } - -#else - - // colormap #6 is used for shading (of 0-31, a bit brighter than average) - { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch; - int fuzz = fuzzpos; - int cnt; - BYTE *map = &NormalLight.Maps[6*256]; - - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) - { - cnt = MIN(FUZZTABLE-fuzz,count); - count -= cnt; - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; - } -#endif -} -#endif +} // // R_DrawTranlucentColumn @@ -733,7 +903,44 @@ void R_DrawAddColumnP_C (void) fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + DWORD fg = colormap[source[frac>>FRACBITS]]; + DWORD bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg>>15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawAddColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { const BYTE *source = dc_source; int pitch = dc_pitch; @@ -760,28 +967,6 @@ void R_DrawAddColumnP_C (void) frac += fracstep; } while (--count); } -#else - { - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - DWORD fg = colormap[source[frac>>FRACBITS]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } // @@ -803,9 +988,39 @@ void R_DrawTranslatedColumnP_C (void) if (count <= 0) return; -#ifndef PALETTEOUTPUT + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Local copies of global vars to improve compiler optimizations + BYTE *colormap = dc_colormap; + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + *dest = colormap[translation[source[frac>>FRACBITS]]]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawTranslatedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + uint32_t light = calc_light_multiplier(dc_light); -#endif dest = dc_dest; @@ -821,20 +1036,54 @@ void R_DrawTranslatedColumnP_C (void) do { -#ifndef PALETTEOUTPUT *dest = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); -#else - *dest = colormap[translation[source[frac>>FRACBITS]]]; -#endif dest += pitch; - frac += fracstep; } while (--count); } } // Draw a column that is both translated and translucent -void R_DrawTlatedAddColumnP_C (void) +void R_DrawTlatedAddColumnP_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + DWORD fg = colormap[translation[source[frac>>FRACBITS]]]; + DWORD bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawTlatedAddColumnP_RGBA_C() { int count; canvas_pixel_t *dest; @@ -845,16 +1094,13 @@ void R_DrawTlatedAddColumnP_C (void) if (count <= 0) return; -#ifndef PALETTEOUTPUT uint32_t light = calc_light_multiplier(dc_light); -#endif dest = dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -882,29 +1128,6 @@ void R_DrawTlatedAddColumnP_C (void) frac += fracstep; } while (--count); } -#else - { - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - DWORD fg = colormap[translation[source[frac>>FRACBITS]]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } // Draw a column whose "color" values are actually translucency @@ -925,7 +1148,41 @@ void R_DrawShadedColumnP_C (void) fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + DWORD *fgstart = &Col2RGB8[0][dc_color]; + + do + { + DWORD val = colormap[source[frac>>FRACBITS]]; + DWORD fg = fgstart[val<<8]; + val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val>>15)]; + + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawShadedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac, fracstep; + + count = dc_count; + + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -954,26 +1211,7 @@ void R_DrawShadedColumnP_C (void) frac += fracstep; } while (--count); } -#else - { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch; - DWORD *fgstart = &Col2RGB8[0][dc_color]; - - do - { - DWORD val = colormap[source[frac>>FRACBITS]]; - DWORD fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; - - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif -} +} // Add source to destination, clamping it to white void R_DrawAddClampColumnP_C () @@ -992,7 +1230,6 @@ void R_DrawAddClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT { const BYTE *source = dc_source; BYTE *colormap = dc_colormap; @@ -1019,30 +1256,50 @@ void R_DrawAddClampColumnP_C () frac += fracstep; } while (--count); } -#else +} + +void R_DrawAddClampColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); do { - DWORD a = fg2rgb[colormap[source[frac>>FRACBITS]]] + bg2rgb[*dest]; - DWORD b = a; + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; frac += fracstep; } while (--count); } -#endif } // Add translated source to destination, clamping it to white @@ -1062,35 +1319,6 @@ void R_DrawAddClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - - do - { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -1114,7 +1342,51 @@ void R_DrawAddClampTranslatedColumnP_C () frac += fracstep; } while (--count); } -#endif +} + +void R_DrawAddClampTranslatedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } } // Subtract destination from source, clamping it to black @@ -1134,7 +1406,45 @@ void R_DrawSubClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + do + { + DWORD a = (fg2rgb[colormap[source[frac>>FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a>>15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawSubClampColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -1161,29 +1471,6 @@ void R_DrawSubClampColumnP_C () frac += fracstep; } while (--count); } -#else - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (fg2rgb[colormap[source[frac>>FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } // Subtract destination from source, clamping it to black @@ -1203,35 +1490,6 @@ void R_DrawSubClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - - do - { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -1254,7 +1512,51 @@ void R_DrawSubClampTranslatedColumnP_C () frac += fracstep; } while (--count); } -#endif +} + +void R_DrawSubClampTranslatedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } } // Subtract source from destination, clamping it to black @@ -1274,7 +1576,45 @@ void R_DrawRevSubClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + do + { + DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac>>FRACBITS]]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a>>15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawRevSubClampColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -1301,29 +1641,6 @@ void R_DrawRevSubClampColumnP_C () frac += fracstep; } while (--count); } -#else - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac>>FRACBITS]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } // Subtract source from destination, clamping it to black @@ -1343,7 +1660,46 @@ void R_DrawRevSubClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + do + { + DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac>>FRACBITS]]]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawRevSubClampTranslatedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -1371,34 +1727,9 @@ void R_DrawRevSubClampTranslatedColumnP_C () frac += fracstep; } while (--count); } -#else - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac>>FRACBITS]]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } - // // R_DrawSpan // With DOOM style restrictions on view orientation, @@ -1549,15 +1880,84 @@ void R_DrawSpanP_C (void) xstep = ds_xstep; ystep = ds_ystep; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(ds_light); + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + do + { + // Current texture index in u,v. + spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} #endif +void R_DrawSpanP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + +#ifdef RANGECHECK + if (ds_x2 < ds_x1 || ds_x1 < 0 + || ds_x2 >= screen->width || ds_y > screen->height) + { + I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); + } + // dscount++; +#endif + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. -#ifndef PALETTEOUTPUT #ifndef NO_SSE __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -1589,14 +1989,14 @@ void R_DrawSpanP_C (void) // Lookup pixel from flat texture tile, // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p0], palette[p1], palette[p2], palette[p3]); + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); fg_hi = _mm_mullo_epi16(fg_hi, mlight); fg_hi = _mm_srli_epi16(fg_hi, 8); fg_lo = _mm_mullo_epi16(fg_lo, mlight); fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_hi, fg_lo); + fg = _mm_packus_epi16(fg_lo, fg_hi); _mm_storeu_si128((__m128i*)dest, fg); // Next step in u,v. @@ -1604,21 +2004,16 @@ void R_DrawSpanP_C (void) } if (count == 0) return; -#endif #endif do { // Current texture index in u,v. - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile, // re-index using light/colormap. -#ifndef PALETTEOUTPUT *dest++ = shade_pal_index(colormap[source[spot]], light); -#else - *dest++ = colormap[source[spot]]; -#endif // Next step in u,v. xfrac += xstep; @@ -1638,11 +2033,7 @@ void R_DrawSpanP_C (void) // Lookup pixel from flat texture tile, // re-index using light/colormap. -#ifndef PALETTEOUTPUT *dest++ = shade_pal_index(colormap[source[spot]], light); -#else - *dest++ = colormap[source[spot]]; -#endif // Next step in u,v. xfrac += xstep; @@ -1651,6 +2042,8 @@ void R_DrawSpanP_C (void) } } +#ifndef X86_ASM + // [RH] Draw a span with holes void R_DrawSpanMaskedP_C (void) { @@ -1664,10 +2057,6 @@ void R_DrawSpanMaskedP_C (void) int count; int spot; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(ds_light); -#endif - xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1689,11 +2078,7 @@ void R_DrawSpanMaskedP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[texdata], light); -#else *dest = colormap[texdata]; -#endif } dest++; xfrac += xstep; @@ -1713,11 +2098,7 @@ void R_DrawSpanMaskedP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[texdata], light); -#else *dest = colormap[texdata]; -#endif } dest++; xfrac += xstep; @@ -1727,6 +2108,71 @@ void R_DrawSpanMaskedP_C (void) } #endif +void R_DrawSpanMaskedP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(colormap[texdata], light); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(colormap[texdata], light); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + + void R_DrawSpanTranslucentP_C (void) { dsfixed_t xfrac; @@ -1756,7 +2202,68 @@ void R_DrawSpanTranslucentP_C (void) if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. -#ifndef PALETTEOUTPUT + do + { + spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); + DWORD fg = colormap[source[spot]]; + DWORD bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg>>15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + DWORD fg = colormap[source[spot]]; + DWORD bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg>>15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + +void R_DrawSpanTranslucentP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. do { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); @@ -1779,24 +2286,9 @@ void R_DrawSpanTranslucentP_C (void) xfrac += xstep; yfrac += ystep; } while (--count); -#else - do - { - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); -#endif } else { -#ifndef PALETTEOUTPUT BYTE yshift = 32 - ds_ybits; BYTE xshift = yshift - ds_xbits; int xmask = ((1 << ds_xbits) - 1) << ds_ybits; @@ -1822,23 +2314,6 @@ void R_DrawSpanTranslucentP_C (void) xfrac += xstep; yfrac += ystep; } while (--count); -#else - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); -#endif } } @@ -1879,29 +2354,12 @@ void R_DrawSpanMaskedTranslucentP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(colormap[texdata], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else DWORD fg = colormap[texdata]; DWORD bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; -#endif } dest++; xfrac += xstep; @@ -1921,29 +2379,12 @@ void R_DrawSpanMaskedTranslucentP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(colormap[texdata], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else DWORD fg = colormap[texdata]; DWORD bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; -#endif } dest++; xfrac += xstep; @@ -1952,7 +2393,7 @@ void R_DrawSpanMaskedTranslucentP_C (void) } } -void R_DrawSpanAddClampP_C (void) +void R_DrawSpanMaskedTranslucentP_RGBA_C() { dsfixed_t xfrac; dsfixed_t yfrac; @@ -1978,6 +2419,96 @@ void R_DrawSpanAddClampP_C (void) xstep = ds_xstep; ystep = ds_ystep; + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + +void R_DrawSpanAddClampP_C (void) +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1985,22 +2516,6 @@ void R_DrawSpanAddClampP_C (void) { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(colormap[source[spot]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; -#else DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -2010,7 +2525,6 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; -#endif xfrac += xstep; yfrac += ystep; @@ -2025,7 +2539,55 @@ void R_DrawSpanAddClampP_C (void) { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); -#ifndef PALETTEOUTPUT + DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a>>15)]; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + +void R_DrawSpanAddClampP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light = calc_light_multiplier(ds_light); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t fg = shade_pal_index(colormap[source[spot]], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2040,17 +2602,34 @@ void R_DrawSpanAddClampP_C (void) uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - DWORD b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a>>15)]; -#endif + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; xfrac += xstep; yfrac += ystep; @@ -2095,22 +2674,6 @@ void R_DrawSpanMaskedAddClampP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(colormap[texdata], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; DWORD b = a; @@ -2120,7 +2683,6 @@ void R_DrawSpanMaskedAddClampP_C (void) b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a>>15)]; -#endif } dest++; xfrac += xstep; @@ -2140,7 +2702,60 @@ void R_DrawSpanMaskedAddClampP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT + DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a>>15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + +void R_DrawSpanMaskedAddClampP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light = calc_light_multiplier(ds_light); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { uint32_t fg = shade_pal_index(colormap[texdata], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2155,17 +2770,39 @@ void R_DrawSpanMaskedAddClampP_C (void) uint32_t blue = (fg_blue + bg_blue + 1) / 2; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - DWORD b = a; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; -#endif + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } dest++; xfrac += xstep; @@ -2175,18 +2812,19 @@ void R_DrawSpanMaskedAddClampP_C (void) } // [RH] Just fill a span with a color -void R_FillSpan (void) +void R_FillSpan_C (void) +{ + memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1)); +} + +void R_FillSpan_RGBA() { -#ifndef PALETTEOUTPUT canvas_pixel_t *dest = ylookup[ds_y] + ds_x1 + dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); uint32_t color = shade_pal_index(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; -#else - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1) * sizeof(canvas_pixel_t)); -#endif } // Draw a voxel slab @@ -2383,17 +3021,33 @@ DWORD vlinec1 () int bits = vlinebits; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); + do + { + *dest = colormap[source[frac >> bits]]; + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} #endif +DWORD vlinec1_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = vlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + do { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[source[frac>>bits]], light); -#else - *dest = colormap[source[frac >> bits]]; -#endif + *dest = shade_pal_index(colormap[source[frac >> bits]], light); frac += fracstep; dest += pitch; } while (--count); @@ -2401,6 +3055,7 @@ DWORD vlinec1 () return frac; } +#if !defined(X86_ASM) void vlinec4 () { canvas_pixel_t *dest = dc_dest; @@ -2408,23 +3063,37 @@ void vlinec4 () int bits = vlinebits; DWORD place; -#ifndef PALETTEOUTPUT + do + { + dest[0] = palookupoffse[0][bufplce[0][(place=vplce[0])>>bits]]; vplce[0] = place+vince[0]; + dest[1] = palookupoffse[1][bufplce[1][(place=vplce[1])>>bits]]; vplce[1] = place+vince[1]; + dest[2] = palookupoffse[2][bufplce[2][(place=vplce[2])>>bits]]; vplce[2] = place+vince[2]; + dest[3] = palookupoffse[3][bufplce[3][(place=vplce[3])>>bits]]; vplce[3] = place+vince[3]; + dest += dc_pitch; + } while (--count); +} +#endif + +void vlinec4_RGBA() +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = vlinebits; + uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); #ifndef NO_SSE - __m128i mlight_hi = _mm_set_epi16(256, light0, light0, light0, 256, light1, light1, light1); - __m128i mlight_lo = _mm_set_epi16(256, light2, light2, light2, 256, light3, light3, light3); + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; -#endif #endif do { -#ifndef PALETTEOUTPUT #ifndef NO_SSE DWORD place0 = local_vplce[0]; @@ -2442,14 +3111,14 @@ void vlinec4 () local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p0], palette[p1], palette[p2], palette[p3]); + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); fg_hi = _mm_srli_epi16(fg_hi, 8); fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_hi, fg_lo); + fg = _mm_packus_epi16(fg_lo, fg_hi); _mm_storeu_si128((__m128i*)dest, fg); #else @@ -2457,17 +3126,10 @@ void vlinec4 () dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; -#endif -#else - dest[0] = palookupoffse[0][bufplce[0][(place=vplce[0])>>bits]]; vplce[0] = place+vince[0]; - dest[1] = palookupoffse[1][bufplce[1][(place=vplce[1])>>bits]]; vplce[1] = place+vince[1]; - dest[2] = palookupoffse[2][bufplce[2][(place=vplce[2])>>bits]]; vplce[2] = place+vince[2]; - dest[3] = palookupoffse[3][bufplce[3][(place=vplce[3])>>bits]]; vplce[3] = place+vince[3]; #endif dest += dc_pitch; } while (--count); -#ifndef PALETTEOUTPUT #ifndef NO_SSE // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; @@ -2479,9 +3141,7 @@ void vlinec4 () vince[2] = local_vince[2]; vince[3] = local_vince[3]; #endif -#endif } -#endif void setupmvline (int fracbits) { @@ -2506,20 +3166,40 @@ DWORD mvlinec1 () int bits = mvlinebits; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - do { BYTE pix = source[frac>>bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[pix], light); -#else *dest = colormap[pix]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} #endif + +DWORD mvlinec1_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = mvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + *dest = shade_pal_index(colormap[pix], light); } frac += fracstep; dest += pitch; @@ -2528,6 +3208,7 @@ DWORD mvlinec1 () return frac; } +#if !defined(X86_ASM) void mvlinec4 () { canvas_pixel_t *dest = dc_dest; @@ -2535,33 +3216,42 @@ void mvlinec4 () int bits = mvlinebits; DWORD place; -#ifndef PALETTEOUTPUT - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); -#endif - do { BYTE pix; - -#ifndef PALETTEOUTPUT - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; -#else pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; pix = bufplce[3][(place=vplce[3])>>bits]; if(pix) dest[3] = palookupoffse[3][pix]; vplce[3] = place+vince[3]; -#endif dest += dc_pitch; } while (--count); } #endif +void mvlinec4_RGBA() +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = mvlinebits; + DWORD place; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + do + { + BYTE pix; + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); +} + + extern "C" short spanend[MAXHEIGHT]; extern float rw_light; extern float rw_lightstep; @@ -2572,21 +3262,13 @@ static void R_DrawFogBoundarySection (int y, int y2, int x1) BYTE *colormap = dc_colormap; canvas_pixel_t *dest = ylookup[y] + dc_destorg; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - for (; y < y2; ++y) { int x2 = spanend[y]; int x = x1; do { -#ifndef PALETTEOUTPUT - dest[x] = shade_pal_index(colormap[dest[x]], light); -#else dest[x] = colormap[dest[x]]; -#endif } while (++x <= x2); dest += dc_pitch; } @@ -2598,21 +3280,13 @@ static void R_DrawFogBoundaryLine (int y, int x) BYTE *colormap = dc_colormap; canvas_pixel_t *dest = ylookup[y] + dc_destorg; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - do { -#ifndef PALETTEOUTPUT - dest[x] = shade_pal_index(colormap[dest[x]], light); -#else dest[x] = colormap[dest[x]]; -#endif } while (++x <= x2); } -void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) +void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) { // This is essentially the same as R_MapVisPlane but with an extra step // to create new horizontal spans whenever the light changes enough that @@ -2703,6 +3377,133 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) } } +static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) +{ + BYTE *colormap = dc_colormap; + canvas_pixel_t *dest = ylookup[y] + dc_destorg; + + uint32_t light = calc_light_multiplier(dc_light); + + for (; y < y2; ++y) + { + int x2 = spanend[y]; + int x = x1; + do + { + dest[x] = shade_pal_index(colormap[dest[x]], light); + } while (++x <= x2); + dest += dc_pitch; + } +} + +static void R_DrawFogBoundaryLine_RGBA(int y, int x) +{ + int x2 = spanend[y]; + BYTE *colormap = dc_colormap; + canvas_pixel_t *dest = ylookup[y] + dc_destorg; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + dest[x] = shade_pal_index(colormap[dest[x]], light); + } while (++x <= x2); +} + +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) +{ + // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis + + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + BYTE *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); + dc_light = 0; + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection_RGBA(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); + dc_light = 0; + } + else + { + if (dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + R_DrawFogBoundaryLine_RGBA(t2++, xr); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + R_DrawFogBoundaryLine_RGBA(--b2, xr); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection_RGBA(t2, b2, x1); + } +} + + int tmvlinebits; void setuptmvline (int bits) @@ -2710,7 +3511,40 @@ void setuptmvline (int bits) tmvlinebits = bits; } -fixed_t tmvline1_add () +fixed_t tmvline1_add_C () +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac>>bits]; + if (pix != 0) + { + DWORD fg = fg2rgb[colormap[pix]]; + DWORD bg = bg2rgb[*dest]; + fg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg>>15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} + +fixed_t tmvline1_add_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -2728,7 +3562,6 @@ fixed_t tmvline1_add () do { -#ifndef PALETTEOUTPUT BYTE pix = source[frac >> bits]; if (pix != 0) { @@ -2747,16 +3580,6 @@ fixed_t tmvline1_add () *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } -#else - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - DWORD fg = fg2rgb[colormap[pix]]; - DWORD bg = bg2rgb[*dest]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - } -#endif frac += fracstep; dest += pitch; } while (--count); @@ -2764,7 +3587,40 @@ fixed_t tmvline1_add () return frac; } -void tmvline4_add () +void tmvline4_add_C () +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + DWORD fg = fg2rgb[palookupoffse[i][pix]]; + DWORD bg = bg2rgb[dest[i]]; + fg = (fg+bg) | 0x1f07c1f; + dest[i] = RGB32k.All[fg & (fg>>15)]; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); +} + +void tmvline4_add_RGBA() { canvas_pixel_t *dest = dc_dest; int count = dc_count; @@ -2786,7 +3642,6 @@ void tmvline4_add () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2801,12 +3656,6 @@ void tmvline4_add () uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD fg = fg2rgb[palookupoffse[i][pix]]; - DWORD bg = bg2rgb[dest[i]]; - fg = (fg+bg) | 0x1f07c1f; - dest[i] = RGB32k.All[fg & (fg>>15)]; -#endif } vplce[i] += vince[i]; } @@ -2814,7 +3663,7 @@ void tmvline4_add () } while (--count); } -fixed_t tmvline1_addclamp () +fixed_t tmvline1_addclamp_C () { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -2835,7 +3684,44 @@ fixed_t tmvline1_addclamp () BYTE pix = source[frac>>bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT + DWORD a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a>>15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} + +fixed_t tmvline1_addclamp_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { uint32_t fg = shade_pal_index(colormap[pix], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2850,17 +3736,6 @@ fixed_t tmvline1_addclamp () uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; -#endif } frac += fracstep; dest += pitch; @@ -2869,7 +3744,7 @@ fixed_t tmvline1_addclamp () return frac; } -void tmvline4_addclamp () +void tmvline4_addclamp_C () { canvas_pixel_t *dest = dc_dest; int count = dc_count; @@ -2878,6 +3753,35 @@ void tmvline4_addclamp () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + DWORD a = fg2rgb[palookupoffse[i][pix]] + bg2rgb[dest[i]]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[i] = RGB32k.All[a & (a>>15)]; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); +} + +void tmvline4_addclamp_RGBA() +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = tmvlinebits; + uint32_t light[4]; light[0] = calc_light_multiplier(palookuplight[0]); light[1] = calc_light_multiplier(palookuplight[1]); @@ -2891,7 +3795,6 @@ void tmvline4_addclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2906,17 +3809,6 @@ void tmvline4_addclamp () uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = fg2rgb[palookupoffse[i][pix]] + bg2rgb[dest[i]]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[i] = RGB32k.All[a & (a>>15)]; -#endif } vplce[i] += vince[i]; } @@ -2924,7 +3816,7 @@ void tmvline4_addclamp () } while (--count); } -fixed_t tmvline1_subclamp () +fixed_t tmvline1_subclamp_C () { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -2938,14 +3830,45 @@ fixed_t tmvline1_subclamp () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT + DWORD a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a>>15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} + +fixed_t tmvline1_subclamp_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { uint32_t fg = shade_pal_index(colormap[pix], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2960,16 +3883,6 @@ fixed_t tmvline1_subclamp () uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; -#endif } frac += fracstep; dest += pitch; @@ -2978,7 +3891,7 @@ fixed_t tmvline1_subclamp () return frac; } -void tmvline4_subclamp () +void tmvline4_subclamp_C () { canvas_pixel_t *dest = dc_dest; int count = dc_count; @@ -2987,6 +3900,34 @@ void tmvline4_subclamp () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + DWORD a = (fg2rgb[palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a>>15)]; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); +} + +void tmvline4_subclamp_RGBA() +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = tmvlinebits; + uint32_t light[4]; light[0] = calc_light_multiplier(palookuplight[0]); light[1] = calc_light_multiplier(palookuplight[1]); @@ -3000,7 +3941,6 @@ void tmvline4_subclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -3015,16 +3955,6 @@ void tmvline4_subclamp () uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = (fg2rgb[palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a>>15)]; -#endif } vplce[i] += vince[i]; } @@ -3032,7 +3962,7 @@ void tmvline4_subclamp () } while (--count); } -fixed_t tmvline1_revsubclamp () +fixed_t tmvline1_revsubclamp_C () { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -3046,14 +3976,45 @@ fixed_t tmvline1_revsubclamp () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT + DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a>>15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} + +fixed_t tmvline1_revsubclamp_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { uint32_t fg = shade_pal_index(colormap[pix], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -3068,16 +4029,6 @@ fixed_t tmvline1_revsubclamp () uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; -#endif } frac += fracstep; dest += pitch; @@ -3086,7 +4037,38 @@ fixed_t tmvline1_revsubclamp () return frac; } -void tmvline4_revsubclamp () +void tmvline4_revsubclamp_C () +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + DWORD a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[palookupoffse[i][pix]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a>>15)]; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); +} + +void tmvline4_revsubclamp_RGBA() { canvas_pixel_t *dest = dc_dest; int count = dc_count; @@ -3108,7 +4090,6 @@ void tmvline4_revsubclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -3123,16 +4104,6 @@ void tmvline4_revsubclamp () uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[palookupoffse[i][pix]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a>>15)]; -#endif } vplce[i] += vince[i]; } @@ -3164,6 +4135,85 @@ const BYTE *R_GetColumn (FTexture *tex, int col) // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { +#ifndef PALETTEOUTPUT + + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; + R_DrawColumn = R_DrawColumnP_RGBA_C; + R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; + R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; + R_DrawSpan = R_DrawSpanP_RGBA_C; + R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; + rt_map4cols = rt_map4cols_RGBA_c; + + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; + R_FillColumn = R_FillColumnP_RGBA; + R_FillAddColumn = R_FillAddColumn_RGBA_C; + R_FillAddClampColumn = R_FillAddClampColumn_RGBA; + R_FillSubClampColumn = R_FillSubClampColumn_RGBA; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; + R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; + R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; + R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; + R_FillSpan = R_FillSpan_RGBA; + R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; + + R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_MapColoredPlane = R_MapColoredPlane_RGBA; + R_DrawParticle = R_DrawParticle_RGBA; + + tmvline1_add = tmvline1_add_RGBA; + tmvline4_add = tmvline4_add_RGBA; + tmvline1_addclamp = tmvline1_addclamp_RGBA; + tmvline4_addclamp = tmvline4_addclamp_RGBA; + tmvline1_subclamp = tmvline1_subclamp_RGBA; + tmvline4_subclamp = tmvline4_subclamp_RGBA; + tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; + tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; + + rt_copy1col = rt_copy1col_RGBA_c; + rt_copy4cols = rt_copy4cols_RGBA_c; + rt_map1col = rt_map1col_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; + rt_shaded1col = rt_shaded1col_RGBA_c; + rt_add1col = rt_add1col_RGBA_c; + rt_addclamp1col = rt_addclamp1col_RGBA_c; + rt_subclamp1col = rt_subclamp1col_RGBA_c; + rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; + rt_tlate1col = rt_tlate1col_RGBA_c; + rt_tlateadd1col = rt_tlateadd1col_RGBA_c; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; + rt_map4cols = rt_map4cols_RGBA_c; + rt_subclamp4cols = rt_subclamp4cols_RGBA_c; + rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; + rt_tlate4cols = rt_tlate4cols_RGBA_c; + rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; + rt_initcols = rt_initcols_rgba; + + dovline1 = vlinec1_RGBA; + doprevline1 = vlinec1_RGBA; + dovline4 = vlinec4_RGBA; + domvline1 = mvlinec1_RGBA; + domvline4 = mvlinec4_RGBA; + +#else + #ifdef X86_ASM R_DrawColumn = R_DrawColumnP_ASM; R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; @@ -3194,6 +4244,72 @@ void R_InitColumnDrawers () R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C; R_DrawSpanAddClamp = R_DrawSpanAddClampP_C; R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C; + R_FillColumn = R_FillColumnP_C; + R_FillAddColumn = R_FillAddColumn_C; + R_FillAddClampColumn = R_FillAddClampColumn_C; + R_FillSubClampColumn = R_FillSubClampColumn_C; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_C; + R_DrawAddColumn = R_DrawAddColumnP_C; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_C; + R_DrawAddClampColumn = R_DrawAddClampColumnP_C; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_C; + R_DrawSubClampColumn = R_DrawSubClampColumnP_C; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_C; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_C; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_C; + R_FillSpan = R_FillSpan_C; + R_DrawFogBoundary = R_DrawFogBoundary_C; + R_FillColumnHoriz = R_FillColumnHorizP_C; + + R_DrawFogBoundary = R_DrawFogBoundary_C; + R_MapColoredPlane = R_MapColoredPlane_C; + R_DrawParticle = R_DrawParticle_C; + + tmvline1_add = tmvline1_add_C; + tmvline4_add = tmvline4_add_C; + tmvline1_addclamp = tmvline1_addclamp_C; + tmvline4_addclamp = tmvline4_addclamp_C; + tmvline1_subclamp = tmvline1_subclamp_C; + tmvline4_subclamp = tmvline4_subclamp_C; + tmvline1_revsubclamp = tmvline1_revsubclamp_C; + tmvline4_revsubclamp = tmvline4_revsubclamp_C; + +#ifdef X86_ASM + rt_copy1col = rt_copy1col_asm; + rt_copy4cols = rt_copy4cols_asm; + rt_map1col = rt_map1col_asm; + rt_shaded4cols = rt_shaded4cols_asm; + rt_add4cols = rt_add4cols_asm; + rt_addclamp4cols = rt_addclamp4cols_asm; +#else + rt_copy1col = rt_copy1col_c; + rt_copy4cols = rt_copy4cols_c; + rt_map1col = rt_map1col_c; + rt_shaded4cols = rt_shaded4cols_c; + rt_add4cols = rt_add4cols_c; + rt_addclamp4cols = rt_addclamp4cols_c; +#endif + rt_shaded1col = rt_shaded1col_c; + rt_add1col = rt_add1col_c; + rt_addclamp1col = rt_addclamp1col_c; + rt_subclamp1col = rt_subclamp1col_c; + rt_revsubclamp1col = rt_revsubclamp1col_c; + rt_tlate1col = rt_tlate1col_c; + rt_tlateadd1col = rt_tlateadd1col_c; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_c; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_c; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c; + rt_map4cols = rt_map4cols_c; + rt_subclamp4cols = rt_subclamp4cols_c; + rt_revsubclamp4cols = rt_revsubclamp4cols_c; + rt_tlate4cols = rt_tlate4cols_c; + rt_tlateadd4cols = rt_tlateadd4cols_c; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_c; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; + rt_initcols = rt_initcols_pal; + +#endif } // [RH] Choose column drawers in a single place @@ -3211,7 +4327,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) { if (flags & STYLEF_ColorIsFixed) { - colfunc = R_FillColumnP; + colfunc = R_FillColumn; hcolfunc_post1 = rt_copy1col; hcolfunc_post4 = rt_copy4cols; } @@ -3261,13 +4377,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) } else if (dc_translation == NULL) { - colfunc = R_DrawAddColumnP_C; + colfunc = R_DrawAddColumn; hcolfunc_post1 = rt_add1col; hcolfunc_post4 = rt_add4cols; } else { - colfunc = R_DrawTlatedAddColumnP_C; + colfunc = R_DrawTlatedAddColumn; hcolfunc_post1 = rt_tlateadd1col; hcolfunc_post4 = rt_tlateadd4cols; } @@ -3282,13 +4398,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) } else if (dc_translation == NULL) { - colfunc = R_DrawAddClampColumnP_C; + colfunc = R_DrawAddClampColumn; hcolfunc_post1 = rt_addclamp1col; hcolfunc_post4 = rt_addclamp4cols; } else { - colfunc = R_DrawAddClampTranslatedColumnP_C; + colfunc = R_DrawAddClampTranslatedColumn; hcolfunc_post1 = rt_tlateaddclamp1col; hcolfunc_post4 = rt_tlateaddclamp4cols; } @@ -3304,13 +4420,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) } else if (dc_translation == NULL) { - colfunc = R_DrawSubClampColumnP_C; + colfunc = R_DrawSubClampColumn; hcolfunc_post1 = rt_subclamp1col; hcolfunc_post4 = rt_subclamp4cols; } else { - colfunc = R_DrawSubClampTranslatedColumnP_C; + colfunc = R_DrawSubClampTranslatedColumn; hcolfunc_post1 = rt_tlatesubclamp1col; hcolfunc_post4 = rt_tlatesubclamp4cols; } @@ -3329,13 +4445,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) } else if (dc_translation == NULL) { - colfunc = R_DrawRevSubClampColumnP_C; + colfunc = R_DrawRevSubClampColumn; hcolfunc_post1 = rt_revsubclamp1col; hcolfunc_post4 = rt_revsubclamp4cols; } else { - colfunc = R_DrawRevSubClampTranslatedColumnP_C; + colfunc = R_DrawRevSubClampTranslatedColumn; hcolfunc_post1 = rt_tlaterevsubclamp1col; hcolfunc_post4 = rt_tlaterevsubclamp4cols; } @@ -3440,7 +4556,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, // dc_srccolor is used by the R_Fill* routines. It is premultiplied // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; - hcolfunc_pre = R_FillColumnHorizP; + hcolfunc_pre = R_FillColumnHoriz; dc_colormap = identitymap; dc_light = 0; } @@ -3459,25 +4575,25 @@ void R_FinishSetPatchStyle () bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) { - if (colfunc == R_DrawAddColumnP_C) + if (colfunc == R_DrawAddColumn) { *tmvline1 = tmvline1_add; *tmvline4 = tmvline4_add; return true; } - if (colfunc == R_DrawAddClampColumnP_C) + if (colfunc == R_DrawAddClampColumn) { *tmvline1 = tmvline1_addclamp; *tmvline4 = tmvline4_addclamp; return true; } - if (colfunc == R_DrawSubClampColumnP_C) + if (colfunc == R_DrawSubClampColumn) { *tmvline1 = tmvline1_subclamp; *tmvline4 = tmvline4_subclamp; return true; } - if (colfunc == R_DrawRevSubClampColumnP_C) + if (colfunc == R_DrawRevSubClampColumn) { *tmvline1 = tmvline1_revsubclamp; *tmvline4 = tmvline4_revsubclamp; diff --git a/src/r_draw.h b/src/r_draw.h index 6f7a91154b..17698c3609 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -127,33 +127,33 @@ extern "C" void rt_copy1col_c (int hx, int sx, int yl, int yh); void rt_copy4cols_c (int sx, int yl, int yh); -void rt_shaded1col (int hx, int sx, int yl, int yh); +void rt_shaded1col_c (int hx, int sx, int yl, int yh); void rt_shaded4cols_c (int sx, int yl, int yh); void rt_shaded4cols_asm (int sx, int yl, int yh); void rt_map1col_c (int hx, int sx, int yl, int yh); -void rt_add1col (int hx, int sx, int yl, int yh); -void rt_addclamp1col (int hx, int sx, int yl, int yh); -void rt_subclamp1col (int hx, int sx, int yl, int yh); -void rt_revsubclamp1col (int hx, int sx, int yl, int yh); +void rt_add1col_c (int hx, int sx, int yl, int yh); +void rt_addclamp1col_c (int hx, int sx, int yl, int yh); +void rt_subclamp1col_c (int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh); -void rt_tlate1col (int hx, int sx, int yl, int yh); -void rt_tlateadd1col (int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh); +void rt_tlate1col_c (int hx, int sx, int yl, int yh); +void rt_tlateadd1col_c (int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_c (int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_c (int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_c (int hx, int sx, int yl, int yh); void rt_map4cols_c (int sx, int yl, int yh); void rt_add4cols_c (int sx, int yl, int yh); void rt_addclamp4cols_c (int sx, int yl, int yh); -void rt_subclamp4cols (int sx, int yl, int yh); -void rt_revsubclamp4cols (int sx, int yl, int yh); +void rt_subclamp4cols_c (int sx, int yl, int yh); +void rt_revsubclamp4cols_c (int sx, int yl, int yh); -void rt_tlate4cols (int sx, int yl, int yh); -void rt_tlateadd4cols (int sx, int yl, int yh); -void rt_tlateaddclamp4cols (int sx, int yl, int yh); -void rt_tlatesubclamp4cols (int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols (int sx, int yl, int yh); +void rt_tlate4cols_c (int sx, int yl, int yh); +void rt_tlateadd4cols_c (int sx, int yl, int yh); +void rt_tlateaddclamp4cols_c (int sx, int yl, int yh); +void rt_tlatesubclamp4cols_c (int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_c (int sx, int yl, int yh); void rt_copy1col_asm (int hx, int sx, int yl, int yh); void rt_map1col_asm (int hx, int sx, int yl, int yh); @@ -163,32 +163,83 @@ void rt_map4cols_asm1 (int sx, int yl, int yh); void rt_map4cols_asm2 (int sx, int yl, int yh); void rt_add4cols_asm (int sx, int yl, int yh); void rt_addclamp4cols_asm (int sx, int yl, int yh); + +/// + +void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_copy4cols_RGBA_c (int sx, int yl, int yh); + +void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); + +void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); + +void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); + +void rt_map4cols_RGBA_c (int sx, int yl, int yh); +void rt_add4cols_RGBA_c (int sx, int yl, int yh); +void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); + +void rt_tlate4cols_RGBA_c (int sx, int yl, int yh); +void rt_tlateadd4cols_RGBA_c (int sx, int yl, int yh); +void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh); + } -extern void (*rt_map4cols)(int sx, int yl, int yh); +extern void (*rt_copy1col)(int hx, int sx, int yl, int yh); +extern void (*rt_copy4cols)(int sx, int yl, int yh); -#ifdef X86_ASM -#define rt_copy1col rt_copy1col_asm -#define rt_copy4cols rt_copy4cols_asm -#define rt_map1col rt_map1col_asm -#define rt_shaded4cols rt_shaded4cols_asm -#define rt_add4cols rt_add4cols_asm -#define rt_addclamp4cols rt_addclamp4cols_asm -#else -#define rt_copy1col rt_copy1col_c -#define rt_copy4cols rt_copy4cols_c -#define rt_map1col rt_map1col_c -#define rt_shaded4cols rt_shaded4cols_c -#define rt_add4cols rt_add4cols_c -#define rt_addclamp4cols rt_addclamp4cols_c -#endif +extern void (*rt_shaded1col)(int hx, int sx, int yl, int yh); +extern void (*rt_shaded4cols)(int sx, int yl, int yh); + +extern void (*rt_map1col)(int hx, int sx, int yl, int yh); +extern void (*rt_add1col)(int hx, int sx, int yl, int yh); +extern void (*rt_addclamp1col)(int hx, int sx, int yl, int yh); +extern void (*rt_subclamp1col)(int hx, int sx, int yl, int yh); +extern void (*rt_revsubclamp1col)(int hx, int sx, int yl, int yh); + +extern void (*rt_tlate1col)(int hx, int sx, int yl, int yh); +extern void (*rt_tlateadd1col)(int hx, int sx, int yl, int yh); +extern void (*rt_tlateaddclamp1col)(int hx, int sx, int yl, int yh); +extern void (*rt_tlatesubclamp1col)(int hx, int sx, int yl, int yh); +extern void (*rt_tlaterevsubclamp1col)(int hx, int sx, int yl, int yh); + +extern void (*rt_map4cols)(int sx, int yl, int yh); +extern void (*rt_add4cols)(int sx, int yl, int yh); +extern void (*rt_addclamp4cols)(int sx, int yl, int yh); +extern void (*rt_subclamp4cols)(int sx, int yl, int yh); +extern void (*rt_revsubclamp4cols)(int sx, int yl, int yh); + +extern void (*rt_tlate4cols)(int sx, int yl, int yh); +extern void (*rt_tlateadd4cols)(int sx, int yl, int yh); +extern void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); +extern void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); +extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); + +extern void (*rt_initcols)(canvas_pixel_t *buffer); void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. -void rt_initcols (canvas_pixel_t *buffer=NULL); +void rt_initcols_pal (canvas_pixel_t *buffer); +void rt_initcols_rgba (canvas_pixel_t *buffer); -void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); + +extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); + +void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip); #ifdef X86_ASM @@ -212,6 +263,14 @@ void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); void R_DrawSpanMaskedP_C (void); +void R_DrawColumnHorizP_RGBA_C (void); +void R_DrawColumnP_RGBA_C (void); +void R_DrawFuzzColumnP_RGBA_C (void); +void R_DrawTranslatedColumnP_RGBA_C (void); +void R_DrawShadedColumnP_RGBA_C (void); +void R_DrawSpanP_RGBA_C (void); +void R_DrawSpanMaskedP_RGBA_C (void); + #endif void R_DrawSpanTranslucentP_C (void); @@ -220,9 +279,30 @@ void R_DrawSpanMaskedTranslucentP_C (void); void R_DrawTlatedLucentColumnP_C (void); #define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C -void R_FillColumnP (void); -void R_FillColumnHorizP (void); -void R_FillSpan (void); +extern void(*R_FillColumn)(void); +extern void(*R_FillAddColumn)(void); +extern void(*R_FillAddClampColumn)(void); +extern void(*R_FillSubClampColumn)(void); +extern void(*R_FillRevSubClampColumn)(void); +extern void(*R_DrawAddColumn)(void); +extern void(*R_DrawTlatedAddColumn)(void); +extern void(*R_DrawAddClampColumn)(void); +extern void(*R_DrawAddClampTranslatedColumn)(void); +extern void(*R_DrawSubClampColumn)(void); +extern void(*R_DrawSubClampTranslatedColumn)(void); +extern void(*R_DrawRevSubClampColumn)(void); +extern void(*R_DrawRevSubClampTranslatedColumn)(void); + +extern void(*R_FillSpan)(void); +extern void(*R_FillColumnHoriz)(void); + +void R_FillColumnP_C (void); + +void R_FillColumnHorizP_C (void); +void R_FillSpan_C (void); + +void R_FillColumnHorizP_RGBA_C(void); +void R_FillSpan_RGBA_C(void); #ifdef X86_ASM #define R_SetupDrawSlab R_SetupDrawSlabA @@ -282,6 +362,15 @@ inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translati // style was STYLE_Shade void R_FinishSetPatchStyle (); +extern fixed_t(*tmvline1_add)(); +extern void(*tmvline4_add)(); +extern fixed_t(*tmvline1_addclamp)(); +extern void(*tmvline4_addclamp)(); +extern fixed_t(*tmvline1_subclamp)(); +extern void(*tmvline4_subclamp)(); +extern fixed_t(*tmvline1_revsubclamp)(); +extern void(*tmvline4_revsubclamp)(); + // transmaskwallscan calls this to find out what column drawers to use bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()); diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index f5fc027b57..9520f59b37 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -114,13 +114,6 @@ void rt_copy1col_c (int hx, int sx, int yl, int yh) // Copies all four spans to the screen starting at sx. void rt_copy4cols_c (int sx, int yl, int yh) { -#ifndef PALETTEOUTPUT - // To do: we could do this with SSE using __m128i - rt_copy1col_c(0, sx, yl, yh); - rt_copy1col_c(1, sx + 1, yl, yh); - rt_copy1col_c(2, sx + 2, yl, yh); - rt_copy1col_c(3, sx + 3, yl, yh); -#else int *source; int *dest; int count; @@ -149,7 +142,6 @@ void rt_copy4cols_c (int sx, int yl, int yh) source += 8/sizeof(int); dest += pitch*2; } while (--count); -#endif } // Maps one span at hx to the screen at sx. @@ -166,21 +158,13 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) return; count++; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; if (count & 1) { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[*source], light); -#else *dest = colormap[*source]; -#endif source += 4; dest += pitch; } @@ -188,13 +172,8 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) return; do { -#ifndef PALETTEOUTPUT - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); -#else dest[0] = colormap[source[0]]; dest[pitch] = colormap[source[4]]; -#endif source += 8; dest += pitch*2; } while (--count); @@ -214,27 +193,16 @@ void rt_map4cols_c (int sx, int yl, int yh) return; count++; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; if (count & 1) { -#ifndef PALETTEOUTPUT - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); -#else dest[0] = colormap[source[0]]; dest[1] = colormap[source[1]]; dest[2] = colormap[source[2]]; dest[3] = colormap[source[3]]; -#endif source += 4; dest += pitch; } @@ -242,16 +210,6 @@ void rt_map4cols_c (int sx, int yl, int yh) return; do { -#ifndef PALETTEOUTPUT - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); - dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); - dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); - dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); -#else dest[0] = colormap[source[0]]; dest[1] = colormap[source[1]]; dest[2] = colormap[source[2]]; @@ -260,7 +218,6 @@ void rt_map4cols_c (int sx, int yl, int yh) dest[pitch+1] = colormap[source[5]]; dest[pitch+2] = colormap[source[6]]; dest[pitch+3] = colormap[source[7]]; -#endif source += 8; dest += pitch*2; } while (--count); @@ -356,21 +313,21 @@ void rt_Translate4cols(const BYTE *translation, int yl, int yh) } // Translates one span at hx to the screen at sx. -void rt_tlate1col (int hx, int sx, int yl, int yh) +void rt_tlate1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_map1col(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. -void rt_tlate4cols (int sx, int yl, int yh) +void rt_tlate4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_map4cols(sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. -void rt_add1col (int hx, int sx, int yl, int yh) +void rt_add1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -388,29 +345,6 @@ void rt_add1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - uint32_t fg = shade_pal_index(colormap[*source], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { @@ -424,7 +358,6 @@ void rt_add1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Adds all four spans to the screen starting at sx without clamping. @@ -446,32 +379,6 @@ void rt_add4cols_c (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(colormap[source[i]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -508,25 +415,24 @@ void rt_add4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col (int hx, int sx, int yl, int yh) +void rt_tlateadd1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_add1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols (int sx, int yl, int yh) +void rt_tlateadd4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_add4cols(sx, yl, yh); } // Shades one span at hx to the screen at sx. -void rt_shaded1col (int hx, int sx, int yl, int yh) +void rt_shaded1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -544,29 +450,6 @@ void rt_shaded1col (int hx, int sx, int yl, int yh) source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - uint32_t alpha = colormap[*source]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fgstart; fgstart = &Col2RGB8[0][dc_color]; @@ -578,7 +461,6 @@ void rt_shaded1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Shades all four spans to the screen starting at sx. @@ -600,32 +482,6 @@ void rt_shaded4cols_c (int sx, int yl, int yh) source = &dc_temp[yl*4]; pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - for (int i = 0; i < 4; i++) - { - uint32_t alpha = colormap[source[i]]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fgstart; fgstart = &Col2RGB8[0][dc_color]; @@ -651,11 +507,10 @@ void rt_shaded4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col (int hx, int sx, int yl, int yh) +void rt_addclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -673,28 +528,6 @@ void rt_addclamp1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - uint32_t fg = shade_pal_index(colormap[*source], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -711,7 +544,6 @@ void rt_addclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Adds all four spans to the screen starting at sx with clamping. @@ -733,31 +565,6 @@ void rt_addclamp4cols_c (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(colormap[source[i]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -802,25 +609,24 @@ void rt_addclamp4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh) +void rt_tlateaddclamp1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_addclamp1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols (int sx, int yl, int yh) +void rt_tlateaddclamp4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_addclamp4cols(sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col (int hx, int sx, int yl, int yh) +void rt_subclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -838,28 +644,6 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - uint32_t fg = shade_pal_index(colormap[*source], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { @@ -874,11 +658,10 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols (int sx, int yl, int yh) +void rt_subclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -896,32 +679,6 @@ void rt_subclamp4cols (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(colormap[source[i]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { @@ -961,25 +718,24 @@ void rt_subclamp4cols (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh) +void rt_tlatesubclamp1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_subclamp1col(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols (int sx, int yl, int yh) +void rt_tlatesubclamp4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_subclamp4cols(sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col (int hx, int sx, int yl, int yh) +void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -999,28 +755,6 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - uint32_t fg = shade_pal_index(colormap[*source], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); -#else do { DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; DWORD b = a; @@ -1033,11 +767,10 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols (int sx, int yl, int yh) +void rt_revsubclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -1057,32 +790,6 @@ void rt_revsubclamp4cols (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(colormap[source[i]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); -#else do { DWORD a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; DWORD b = a; @@ -1120,18 +827,17 @@ void rt_revsubclamp4cols (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh) +void rt_tlaterevsubclamp1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_revsubclamp1col(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols (int sx, int yl, int yh) +void rt_tlaterevsubclamp4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_revsubclamp4cols(sx, yl, yh); @@ -1301,7 +1007,7 @@ void rt_draw4cols (int sx) // Before each pass through a rendering loop that uses these routines, // call this function to set up the span pointers. -void rt_initcols (canvas_pixel_t *buff) +void rt_initcols_pal (canvas_pixel_t *buff) { int y; @@ -1372,7 +1078,7 @@ void R_DrawColumnHorizP_C (void) } // [RH] Just fills a column with a given color -void R_FillColumnHorizP (void) +void R_FillColumnHorizP_C (void) { int count = dc_count; BYTE color = dc_color; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp new file mode 100644 index 0000000000..e8111be8fb --- /dev/null +++ b/src/r_drawt_rgba.cpp @@ -0,0 +1,883 @@ +/* +** r_drawt_rgba.cpp +** Faster column drawers for modern processors, true color edition +** +**--------------------------------------------------------------------------- +** Copyright 1998-2006 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +** True color versions of the similar functions in r_drawt.cpp +** Please see r_drawt.cpp for a description of the globals used. +*/ + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "r_draw.h" +#include "r_main.h" +#include "r_things.h" +#include "v_video.h" + +canvas_pixel_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; +canvas_pixel_t *dc_temp_rgba; + +// Defined in r_draw_t.cpp: +extern unsigned int dc_tspans[4][MAXHEIGHT]; +extern unsigned int *dc_ctspan[4]; +extern unsigned int *horizspan[4]; + +// Copies one span at hx to the screen at sx. +void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = *source; + source += 4; + dest += pitch; + } + if (count & 2) { + dest[0] = source[0]; + dest[pitch] = source[4]; + source += 8; + dest += pitch*2; + } + if (!(count >>= 2)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4]; + dest[pitch*2] = source[8]; + dest[pitch*3] = source[12]; + source += 16; + dest += pitch*4; + } while (--count); +} + +// Copies all four spans to the screen starting at sx. +void rt_copy4cols_RGBA_c (int sx, int yl, int yh) +{ + // To do: we could do this with SSE using __m128i + rt_copy1col_RGBA_c(0, sx, yl, yh); + rt_copy1col_RGBA_c(1, sx + 1, yl, yh); + rt_copy1col_RGBA_c(2, sx + 2, yl, yh); + rt_copy1col_RGBA_c(3, sx + 3, yl, yh); +} + +// Maps one span at hx to the screen at sx. +void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + + colormap = dc_colormap; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = shade_pal_index(colormap[*source], light); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[pitch] = shade_pal_index(colormap[source[4]], light); + source += 8; + dest += pitch*2; + } while (--count); +} + +// Maps all four spans to the screen starting at sx. +void rt_map4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + + colormap = dc_colormap; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + + if (count & 1) { + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[1] = shade_pal_index(colormap[source[1]], light); + dest[2] = shade_pal_index(colormap[source[2]], light); + dest[3] = shade_pal_index(colormap[source[3]], light); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[1] = shade_pal_index(colormap[source[1]], light); + dest[2] = shade_pal_index(colormap[source[2]], light); + dest[3] = shade_pal_index(colormap[source[3]], light); + dest[pitch] = shade_pal_index(colormap[source[4]], light); + dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); + dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); + dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); + source += 8; + dest += pitch*2; + } while (--count); +} + +void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) +{ + int count = yh - yl + 1; + canvas_pixel_t *source = &dc_temp_rgba[yl*4 + hx]; + + // Things we do to hit the compiler's optimizer with a clue bat: + // 1. Parallelism is explicitly spelled out by using a separate + // C instruction for each assembly instruction. GCC lets me + // have four temporaries, but VC++ spills to the stack with + // more than two. Two is probably optimal, anyway. + // 2. The results of the translation lookups are explicitly + // stored in byte-sized variables. This causes the VC++ code + // to use byte mov instructions in most cases; for apparently + // random reasons, it will use movzx for some places. GCC + // ignores this and uses movzx always. + + // Do 8 rows at a time. + for (int count8 = count >> 3; count8; --count8) + { + int c0, c1; + BYTE b0, b1; + + c0 = source[0]; c1 = source[4]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[4] = b1; + + c0 = source[8]; c1 = source[12]; + b0 = translation[c0]; b1 = translation[c1]; + source[8] = b0; source[12] = b1; + + c0 = source[16]; c1 = source[20]; + b0 = translation[c0]; b1 = translation[c1]; + source[16] = b0; source[20] = b1; + + c0 = source[24]; c1 = source[28]; + b0 = translation[c0]; b1 = translation[c1]; + source[24] = b0; source[28] = b1; + + source += 32; + } + // Finish by doing 1 row at a time. + for (count &= 7; count; --count, source += 4) + { + source[0] = translation[source[0]]; + } +} + +void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) +{ + int count = yh - yl + 1; + canvas_pixel_t *source = &dc_temp_rgba[yl*4]; + int c0, c1; + BYTE b0, b1; + + // Do 2 rows at a time. + for (int count8 = count >> 1; count8; --count8) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + + c0 = source[4]; c1 = source[5]; + b0 = translation[c0]; b1 = translation[c1]; + source[4] = b0; source[5] = b1; + + c0 = source[6]; c1 = source[7]; + b0 = translation[c0]; b1 = translation[c1]; + source[6] = b0; source[7] = b1; + + source += 8; + } + // Do the final row if count was odd. + if (count & 1) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + } +} + +// Translates one span at hx to the screen at sx. +void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_map1col(hx, sx, yl, yh); +} + +// Translates all four spans to the screen starting at sx. +void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_map4cols(sx, yl, yh); +} + +// Adds one span at hx to the screen at sx without clamping. +void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + + source += 4; + dest += pitch; + } while (--count); +} + +// Adds all four spans to the screen starting at sx without clamping. +void rt_add4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +} + +// Translates and adds one span at hx to the screen at sx without clamping. +void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_add1col(hx, sx, yl, yh); +} + +// Translates and adds all four spans to the screen starting at sx without clamping. +void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_add4cols(sx, yl, yh); +} + +// Shades one span at hx to the screen at sx. +void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + uint32_t alpha = colormap[*source]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +} + +// Shades all four spans to the screen starting at sx. +void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = colormap[source[i]]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); +} + +// Adds one span at hx to the screen at sx with clamping. +void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +} + +// Adds all four spans to the screen starting at sx with clamping. +void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); +} + +// Translates and adds one span at hx to the screen at sx with clamping. +void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_addclamp1col_RGBA_c(hx, sx, yl, yh); +} + +// Translates and adds all four spans to the screen starting at sx with clamping. +void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_addclamp4cols(sx, yl, yh); +} + +// Subtracts one span at hx to the screen at sx with clamping. +void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +} + +// Subtracts all four spans to the screen starting at sx with clamping. +void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +} + +// Translates and subtracts one span at hx to the screen at sx with clamping. +void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_subclamp1col_RGBA_c(hx, sx, yl, yh); +} + +// Translates and subtracts all four spans to the screen starting at sx with clamping. +void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_subclamp4cols_RGBA_c(sx, yl, yh); +} + +// Subtracts one span at hx from the screen at sx with clamping. +void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +} + +// Subtracts all four spans from the screen starting at sx with clamping. +void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +} + +// Translates and subtracts one span at hx from the screen at sx with clamping. +void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_revsubclamp1col_RGBA_c(hx, sx, yl, yh); +} + +// Translates and subtracts all four spans from the screen starting at sx with clamping. +void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_revsubclamp4cols_RGBA_c(sx, yl, yh); +} + +// Before each pass through a rendering loop that uses these routines, +// call this function to set up the span pointers. +void rt_initcols_rgba (canvas_pixel_t *buff) +{ + int y; + + dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : buff; + for (y = 3; y >= 0; y--) + horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; +} + +// Stretches a column into a temporary buffer which is later +// drawn to the screen along with up to three other columns. +void R_DrawColumnHorizP_RGBA_C (void) +{ + int count = dc_count; + canvas_pixel_t *dest; + fixed_t fracstep; + fixed_t frac; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + unsigned int **span; + + span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + dest = &dc_temp_rgba[x + 4*dc_yl]; + } + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + + if (count & 1) { + *dest = source[frac>>FRACBITS]; dest += 4; frac += fracstep; + } + if (count & 2) { + dest[0] = source[frac>>FRACBITS]; frac += fracstep; + dest[4] = source[frac>>FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac>>FRACBITS]; frac += fracstep; + dest[4] = source[frac>>FRACBITS]; frac += fracstep; + dest[8] = source[frac>>FRACBITS]; frac += fracstep; + dest[12]= source[frac>>FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac>>FRACBITS]; frac += fracstep; + dest[4] = source[frac>>FRACBITS]; frac += fracstep; + dest[8] = source[frac>>FRACBITS]; frac += fracstep; + dest[12]= source[frac>>FRACBITS]; frac += fracstep; + dest[16]= source[frac>>FRACBITS]; frac += fracstep; + dest[20]= source[frac>>FRACBITS]; frac += fracstep; + dest[24]= source[frac>>FRACBITS]; frac += fracstep; + dest[28]= source[frac>>FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); + } +} + +// [RH] Just fills a column with a given color +void R_FillColumnHorizP_RGBA_C (void) +{ + int count = dc_count; + BYTE color = dc_color; + canvas_pixel_t *dest; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + dest = &dc_temp_rgba[x + 4*dc_yl]; + } + + if (count & 1) { + *dest = color; + dest += 4; + } + if (!(count >>= 1)) + return; + do { + dest[0] = color; dest[4] = color; + dest += 8; + } while (--count); +} diff --git a/src/r_main.cpp b/src/r_main.cpp index 04e7989814..b7723d07d6 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -847,10 +847,10 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // [RH] Show off segs if r_drawflat is 1 if (r_drawflat) { - hcolfunc_pre = R_FillColumnHorizP; + hcolfunc_pre = R_FillColumnHoriz; hcolfunc_post1 = rt_copy1col; hcolfunc_post4 = rt_copy4cols; - colfunc = R_FillColumnP; + colfunc = R_FillColumn; spanfunc = R_FillSpan; } else diff --git a/src/r_plane.cpp b/src/r_plane.cpp index b385302e5b..c8258a1ba0 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -491,18 +491,19 @@ void R_MapTiltedPlane (int y, int x1) // //========================================================================== -void R_MapColoredPlane (int y, int x1) +void R_MapColoredPlane_C (int y, int x1) +{ + memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1)); +} + +void R_MapColoredPlane_RGBA(int y, int x1) { -#ifndef PALETTEOUTPUT canvas_pixel_t *dest = ylookup[y] + x1 + dc_destorg; int count = (spanend[y] - x1 + 1); uint32_t light = calc_light_multiplier(ds_light); uint32_t color = shade_pal_index(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; -#else - memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1) * sizeof(canvas_pixel_t)); -#endif } //========================================================================== diff --git a/src/r_plane.h b/src/r_plane.h index d4db3dc09c..ac63501e30 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -93,6 +93,10 @@ void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t al void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); +extern void(*R_MapColoredPlane)(int y, int x1); +void R_MapColoredPlane_C(int y, int x1); +void R_MapColoredPlane_RGBA(int y, int x1); + visplane_t *R_FindPlane ( const secplane_t &height, FTextureID picnum, diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 1cdb785558..fb27a99de7 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -463,7 +463,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) while (dc_x < stop) { - rt_initcols(); + rt_initcols(nullptr); BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; @@ -3319,7 +3319,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, dc_light = 0; #endif } - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { R_WallSpriteColumn (R_DrawMaskedColumnHoriz); diff --git a/src/r_things.cpp b/src/r_things.cpp index 0e55b45f9d..a6f6aea287 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -470,7 +470,7 @@ void R_DrawVisSprite (vissprite_t *vis) while (dc_x < stop4) { - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { pixels = tex->GetColumn (frac >> FRACBITS, &spans); @@ -619,7 +619,7 @@ void R_DrawWallSprite(vissprite_t *spr) dc_light = FLOAT2FIXED(MAXLIGHTVIS); #endif } - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { if (!R_ClipSpriteColumnWithPortals(spr)) @@ -681,7 +681,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop { return; } - if (colfunc == fuzzcolfunc || colfunc == R_FillColumnP) + if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) { flags = DVF_OFFSCREEN | DVF_SPANSONLY; } @@ -2617,7 +2617,7 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) } } -void R_DrawParticle (vissprite_t *vis) +void R_DrawParticle_C (vissprite_t *vis) { int spacing; canvas_pixel_t *dest; @@ -2629,44 +2629,6 @@ void R_DrawParticle (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(color, calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - // vis->renderflags holds translucency level (0-255) - fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - uint32_t alpha = fglevel * 256 / FRACUNIT; - uint32_t inv_alpha = 256 - alpha; - - fg_red *= alpha; - fg_green *= alpha; - fg_blue *= alpha; - - spacing = RenderTarget->GetPitch(); - - for (int x = x1; x < (x1 + countbase); x++) - { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) - continue; - dest = ylookup[yl] + x + dc_destorg; - for (int y = 0; y < ycount; y++) - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red * alpha) / 256; - uint32_t green = (fg_green + bg_green * alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += spacing; - } - } -#else DWORD *bg2rgb; DWORD fg; @@ -2719,7 +2681,56 @@ void R_DrawParticle (vissprite_t *vis) dest += spacing; } } -#endif +} + +void R_DrawParticle_RGBA(vissprite_t *vis) +{ + int spacing; + canvas_pixel_t *dest; + BYTE color = vis->Style.colormap[vis->startfrac]; + int yl = vis->y1; + int ycount = vis->y2 - yl + 1; + int x1 = vis->x1; + int countbase = vis->x2 - x1; + + R_DrawMaskedSegsBehindParticle(vis); + + uint32_t fg = shade_pal_index(color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + // vis->renderflags holds translucency level (0-255) + fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; + uint32_t alpha = fglevel * 256 / FRACUNIT; + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + spacing = RenderTarget->GetPitch(); + + for (int x = x1; x < (x1 + countbase); x++) + { + dc_x = x; + if (R_ClipSpriteColumnWithPortals(vis)) + continue; + dest = ylookup[yl] + x + dc_destorg; + for (int y = 0; y < ycount; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * alpha) / 256; + uint32_t green = (fg_green + bg_green * alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += spacing; + } + } } extern double BaseYaspectMul;; diff --git a/src/r_things.h b/src/r_things.h index 1cf9b02007..057b7cfe2a 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -97,7 +97,10 @@ struct vissprite_t struct particle_t; -void R_DrawParticle (vissprite_t *); +extern void(*R_DrawParticle)(vissprite_t *); +void R_DrawParticle_C (vissprite_t *); +void R_DrawParticle_RGBA (vissprite_t *); + void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); extern int MaxVisSprites; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index fd14b5e0a3..8853fc9479 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -300,7 +300,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) while (dc_x < stop4) { - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { pixels = img->GetColumn(frac >> FRACBITS, spanptr); From 7080180d478ae7158e4f2b8d1821089105764c8f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 May 2016 13:32:24 +0200 Subject: [PATCH 003/912] Added menu option for toggling true color output on and off --- src/r_draw.cpp | 327 ++++++++++++++++++------------------- src/r_main.cpp | 1 + src/win32/fb_d3d9.cpp | 79 +++++---- src/win32/win32iface.h | 1 + wadsrc/static/language.enu | 1 + wadsrc/static/menudef.txt | 1 + 6 files changed, 216 insertions(+), 194 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index d2b694f05f..83a4472f39 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -178,7 +178,7 @@ FDynamicColormap ShadeFakeColormap[16]; BYTE identitymap[256]; EXTERN_CVAR (Int, r_columnmethod) - +EXTERN_CVAR (Bool, r_swtruecolor) void R_InitShadeMaps() { @@ -4135,181 +4135,180 @@ const BYTE *R_GetColumn (FTexture *tex, int col) // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { -#ifndef PALETTEOUTPUT - - R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; - R_DrawColumn = R_DrawColumnP_RGBA_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; - R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; - R_DrawSpan = R_DrawSpanP_RGBA_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; - rt_map4cols = rt_map4cols_RGBA_c; - - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; - R_FillColumn = R_FillColumnP_RGBA; - R_FillAddColumn = R_FillAddColumn_RGBA_C; - R_FillAddClampColumn = R_FillAddClampColumn_RGBA; - R_FillSubClampColumn = R_FillSubClampColumn_RGBA; - R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; - R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; - R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; - R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; - R_FillSpan = R_FillSpan_RGBA; - R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; - - R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_MapColoredPlane = R_MapColoredPlane_RGBA; - R_DrawParticle = R_DrawParticle_RGBA; - - tmvline1_add = tmvline1_add_RGBA; - tmvline4_add = tmvline4_add_RGBA; - tmvline1_addclamp = tmvline1_addclamp_RGBA; - tmvline4_addclamp = tmvline4_addclamp_RGBA; - tmvline1_subclamp = tmvline1_subclamp_RGBA; - tmvline4_subclamp = tmvline4_subclamp_RGBA; - tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; - tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; - - rt_copy1col = rt_copy1col_RGBA_c; - rt_copy4cols = rt_copy4cols_RGBA_c; - rt_map1col = rt_map1col_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; - rt_shaded1col = rt_shaded1col_RGBA_c; - rt_add1col = rt_add1col_RGBA_c; - rt_addclamp1col = rt_addclamp1col_RGBA_c; - rt_subclamp1col = rt_subclamp1col_RGBA_c; - rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; - rt_tlate1col = rt_tlate1col_RGBA_c; - rt_tlateadd1col = rt_tlateadd1col_RGBA_c; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; - rt_map4cols = rt_map4cols_RGBA_c; - rt_subclamp4cols = rt_subclamp4cols_RGBA_c; - rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; - rt_tlate4cols = rt_tlate4cols_RGBA_c; - rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; - rt_initcols = rt_initcols_rgba; - - dovline1 = vlinec1_RGBA; - doprevline1 = vlinec1_RGBA; - dovline4 = vlinec4_RGBA; - domvline1 = mvlinec1_RGBA; - domvline4 = mvlinec4_RGBA; - -#else - -#ifdef X86_ASM - R_DrawColumn = R_DrawColumnP_ASM; - R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; - R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_ASM; - R_DrawSpanMasked = R_DrawSpanMaskedP_ASM; - if (CPU.Family <= 5) + if (r_swtruecolor) { - rt_map4cols = rt_map4cols_asm2; + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; + R_DrawColumn = R_DrawColumnP_RGBA_C; + R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; + R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; + R_DrawSpan = R_DrawSpanP_RGBA_C; + R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; + rt_map4cols = rt_map4cols_RGBA_c; + + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; + R_FillColumn = R_FillColumnP_RGBA; + R_FillAddColumn = R_FillAddColumn_RGBA_C; + R_FillAddClampColumn = R_FillAddClampColumn_RGBA; + R_FillSubClampColumn = R_FillSubClampColumn_RGBA; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; + R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; + R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; + R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; + R_FillSpan = R_FillSpan_RGBA; + R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; + + R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_MapColoredPlane = R_MapColoredPlane_RGBA; + R_DrawParticle = R_DrawParticle_RGBA; + + tmvline1_add = tmvline1_add_RGBA; + tmvline4_add = tmvline4_add_RGBA; + tmvline1_addclamp = tmvline1_addclamp_RGBA; + tmvline4_addclamp = tmvline4_addclamp_RGBA; + tmvline1_subclamp = tmvline1_subclamp_RGBA; + tmvline4_subclamp = tmvline4_subclamp_RGBA; + tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; + tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; + + rt_copy1col = rt_copy1col_RGBA_c; + rt_copy4cols = rt_copy4cols_RGBA_c; + rt_map1col = rt_map1col_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; + rt_shaded1col = rt_shaded1col_RGBA_c; + rt_add1col = rt_add1col_RGBA_c; + rt_addclamp1col = rt_addclamp1col_RGBA_c; + rt_subclamp1col = rt_subclamp1col_RGBA_c; + rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; + rt_tlate1col = rt_tlate1col_RGBA_c; + rt_tlateadd1col = rt_tlateadd1col_RGBA_c; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; + rt_map4cols = rt_map4cols_RGBA_c; + rt_subclamp4cols = rt_subclamp4cols_RGBA_c; + rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; + rt_tlate4cols = rt_tlate4cols_RGBA_c; + rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; + rt_initcols = rt_initcols_rgba; + + dovline1 = vlinec1_RGBA; + doprevline1 = vlinec1_RGBA; + dovline4 = vlinec4_RGBA; + domvline1 = mvlinec1_RGBA; + domvline4 = mvlinec4_RGBA; } else { - rt_map4cols = rt_map4cols_asm1; - } +#ifdef X86_ASM + R_DrawColumn = R_DrawColumnP_ASM; + R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; + R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; + R_DrawShadedColumn = R_DrawShadedColumnP_C; + R_DrawSpan = R_DrawSpanP_ASM; + R_DrawSpanMasked = R_DrawSpanMaskedP_ASM; + if (CPU.Family <= 5) + { + rt_map4cols = rt_map4cols_asm2; + } + else + { + rt_map4cols = rt_map4cols_asm1; + } #else - R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawColumn = R_DrawColumnP_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_C; - rt_map4cols = rt_map4cols_c; + R_DrawColumnHoriz = R_DrawColumnHorizP_C; + R_DrawColumn = R_DrawColumnP_C; + R_DrawFuzzColumn = R_DrawFuzzColumnP_C; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; + R_DrawShadedColumn = R_DrawShadedColumnP_C; + R_DrawSpan = R_DrawSpanP_C; + R_DrawSpanMasked = R_DrawSpanMaskedP_C; + rt_map4cols = rt_map4cols_c; #endif - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_C; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C; - R_FillColumn = R_FillColumnP_C; - R_FillAddColumn = R_FillAddColumn_C; - R_FillAddClampColumn = R_FillAddClampColumn_C; - R_FillSubClampColumn = R_FillSubClampColumn_C; - R_FillRevSubClampColumn = R_FillRevSubClampColumn_C; - R_DrawAddColumn = R_DrawAddColumnP_C; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_C; - R_DrawAddClampColumn = R_DrawAddClampColumnP_C; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_C; - R_DrawSubClampColumn = R_DrawSubClampColumnP_C; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_C; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_C; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_C; - R_FillSpan = R_FillSpan_C; - R_DrawFogBoundary = R_DrawFogBoundary_C; - R_FillColumnHoriz = R_FillColumnHorizP_C; + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_C; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C; + R_FillColumn = R_FillColumnP_C; + R_FillAddColumn = R_FillAddColumn_C; + R_FillAddClampColumn = R_FillAddClampColumn_C; + R_FillSubClampColumn = R_FillSubClampColumn_C; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_C; + R_DrawAddColumn = R_DrawAddColumnP_C; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_C; + R_DrawAddClampColumn = R_DrawAddClampColumnP_C; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_C; + R_DrawSubClampColumn = R_DrawSubClampColumnP_C; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_C; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_C; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_C; + R_FillSpan = R_FillSpan_C; + R_DrawFogBoundary = R_DrawFogBoundary_C; + R_FillColumnHoriz = R_FillColumnHorizP_C; - R_DrawFogBoundary = R_DrawFogBoundary_C; - R_MapColoredPlane = R_MapColoredPlane_C; - R_DrawParticle = R_DrawParticle_C; + R_DrawFogBoundary = R_DrawFogBoundary_C; + R_MapColoredPlane = R_MapColoredPlane_C; + R_DrawParticle = R_DrawParticle_C; - tmvline1_add = tmvline1_add_C; - tmvline4_add = tmvline4_add_C; - tmvline1_addclamp = tmvline1_addclamp_C; - tmvline4_addclamp = tmvline4_addclamp_C; - tmvline1_subclamp = tmvline1_subclamp_C; - tmvline4_subclamp = tmvline4_subclamp_C; - tmvline1_revsubclamp = tmvline1_revsubclamp_C; - tmvline4_revsubclamp = tmvline4_revsubclamp_C; + tmvline1_add = tmvline1_add_C; + tmvline4_add = tmvline4_add_C; + tmvline1_addclamp = tmvline1_addclamp_C; + tmvline4_addclamp = tmvline4_addclamp_C; + tmvline1_subclamp = tmvline1_subclamp_C; + tmvline4_subclamp = tmvline4_subclamp_C; + tmvline1_revsubclamp = tmvline1_revsubclamp_C; + tmvline4_revsubclamp = tmvline4_revsubclamp_C; #ifdef X86_ASM - rt_copy1col = rt_copy1col_asm; - rt_copy4cols = rt_copy4cols_asm; - rt_map1col = rt_map1col_asm; - rt_shaded4cols = rt_shaded4cols_asm; - rt_add4cols = rt_add4cols_asm; - rt_addclamp4cols = rt_addclamp4cols_asm; + rt_copy1col = rt_copy1col_asm; + rt_copy4cols = rt_copy4cols_asm; + rt_map1col = rt_map1col_asm; + rt_shaded4cols = rt_shaded4cols_asm; + rt_add4cols = rt_add4cols_asm; + rt_addclamp4cols = rt_addclamp4cols_asm; #else - rt_copy1col = rt_copy1col_c; - rt_copy4cols = rt_copy4cols_c; - rt_map1col = rt_map1col_c; - rt_shaded4cols = rt_shaded4cols_c; - rt_add4cols = rt_add4cols_c; - rt_addclamp4cols = rt_addclamp4cols_c; -#endif - rt_shaded1col = rt_shaded1col_c; - rt_add1col = rt_add1col_c; - rt_addclamp1col = rt_addclamp1col_c; - rt_subclamp1col = rt_subclamp1col_c; - rt_revsubclamp1col = rt_revsubclamp1col_c; - rt_tlate1col = rt_tlate1col_c; - rt_tlateadd1col = rt_tlateadd1col_c; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_c; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_c; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c; - rt_map4cols = rt_map4cols_c; - rt_subclamp4cols = rt_subclamp4cols_c; - rt_revsubclamp4cols = rt_revsubclamp4cols_c; - rt_tlate4cols = rt_tlate4cols_c; - rt_tlateadd4cols = rt_tlateadd4cols_c; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_c; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; - rt_initcols = rt_initcols_pal; - + rt_copy1col = rt_copy1col_c; + rt_copy4cols = rt_copy4cols_c; + rt_map1col = rt_map1col_c; + rt_shaded4cols = rt_shaded4cols_c; + rt_add4cols = rt_add4cols_c; + rt_addclamp4cols = rt_addclamp4cols_c; #endif + rt_shaded1col = rt_shaded1col_c; + rt_add1col = rt_add1col_c; + rt_addclamp1col = rt_addclamp1col_c; + rt_subclamp1col = rt_subclamp1col_c; + rt_revsubclamp1col = rt_revsubclamp1col_c; + rt_tlate1col = rt_tlate1col_c; + rt_tlateadd1col = rt_tlateadd1col_c; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_c; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_c; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c; + rt_map4cols = rt_map4cols_c; + rt_subclamp4cols = rt_subclamp4cols_c; + rt_revsubclamp4cols = rt_revsubclamp4cols_c; + rt_tlate4cols = rt_tlate4cols_c; + rt_tlateadd4cols = rt_tlateadd4cols_c; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_c; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; + rt_initcols = rt_initcols_pal; + } } // [RH] Choose column drawers in a single place diff --git a/src/r_main.cpp b/src/r_main.cpp index b7723d07d6..d85cd62a07 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -103,6 +103,7 @@ bool r_dontmaplines; CVAR (String, r_viewsize, "", CVAR_NOSET) CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) +CVAR (Bool, r_swtruecolor, false, CVAR_ARCHIVE) double r_BaseVisibility; double r_WallVisibility; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 14a78d4cd6..0cc9045ee3 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -187,6 +187,7 @@ EXTERN_CVAR (Float, Gamma) EXTERN_CVAR (Bool, vid_vsync) EXTERN_CVAR (Float, transsouls) EXTERN_CVAR (Int, vid_refreshrate) +EXTERN_CVAR (Bool, r_swtruecolor) extern IDirect3D9 *D3D; @@ -765,11 +766,7 @@ void D3DFB::KillNativeTexs() bool D3DFB::CreateFBTexture () { -#ifndef PALETTEOUTPUT - D3DFORMAT FBFormat = D3DFMT_A8R8G8B8; -#else - D3DFORMAT FBFormat = D3DFMT_L8; -#endif + FBFormat = r_swtruecolor ? D3DFMT_A8R8G8B8 : D3DFMT_L8; if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL))) { @@ -1310,20 +1307,45 @@ void D3DFB::Draw3DPart(bool copy3d) SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) || SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0))) { - if (lockrect.Pitch == Pitch * sizeof(canvas_pixel_t) && Pitch == Width) + if (r_swtruecolor && FBFormat == D3DFMT_A8R8G8B8) { - memcpy (lockrect.pBits, MemBuffer, Width * Height * sizeof(canvas_pixel_t)); + if (lockrect.Pitch == Pitch * sizeof(uint32_t) && Pitch == Width) + { + memcpy(lockrect.pBits, MemBuffer, Width * Height * sizeof(uint32_t)); + } + else + { + uint32_t *dest = (uint32_t *)lockrect.pBits; + uint32_t *src = MemBuffer; + for (int y = 0; y < Height; y++) + { + memcpy(dest, src, Width * sizeof(uint32_t)); + dest = reinterpret_cast(reinterpret_cast(dest) + lockrect.Pitch); + src += Pitch; + } + } + } + else if (!r_swtruecolor && FBFormat == D3DFMT_L8) + { + if (lockrect.Pitch == Pitch && Pitch == Width) + { + memcpy(lockrect.pBits, MemBuffer, Width * Height); + } + else + { + BYTE *dest = (BYTE *)lockrect.pBits; + BYTE *src = (BYTE *)MemBuffer; + for (int y = 0; y < Height; y++) + { + memcpy(dest, src, Width); + dest = reinterpret_cast(reinterpret_cast(dest) + lockrect.Pitch); + src += Pitch; + } + } } else { - canvas_pixel_t *dest = (canvas_pixel_t *)lockrect.pBits; - canvas_pixel_t *src = MemBuffer; - for (int y = 0; y < Height; y++) - { - memcpy (dest, src, Width * sizeof(canvas_pixel_t)); - dest = reinterpret_cast(reinterpret_cast(dest) + lockrect.Pitch); - src += Pitch; - } + memset(lockrect.pBits, 0, lockrect.Pitch * Height); } FBTexture->UnlockRect (0); } @@ -1355,11 +1377,10 @@ void D3DFB::Draw3DPart(bool copy3d) memset(Constant, 0, sizeof(Constant)); SetAlphaBlend(D3DBLENDOP(0)); EnableAlphaTest(FALSE); -#ifndef PALETTEOUTPUT - SetPixelShader(Shaders[SHADER_NormalColor]); -#else - SetPixelShader(Shaders[SHADER_NormalColorPal]); -#endif + if (r_swtruecolor) + SetPixelShader(Shaders[SHADER_NormalColor]); + else + SetPixelShader(Shaders[SHADER_NormalColorPal]); if (copy3d) { FBVERTEX verts[4]; @@ -1377,11 +1398,10 @@ void D3DFB::Draw3DPart(bool copy3d) realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); -#ifndef PALETTEOUTPUT - SetPixelShader(Shaders[SHADER_SpecialColormap]); -#else - SetPixelShader(Shaders[SHADER_SpecialColormapPal]); -#endif + if (r_swtruecolor) + SetPixelShader(Shaders[SHADER_SpecialColormap]); + else + SetPixelShader(Shaders[SHADER_SpecialColormapPal]); } } else @@ -1392,11 +1412,10 @@ void D3DFB::Draw3DPart(bool copy3d) CalcFullscreenCoords(verts, Accel2D, false, color0, color1); D3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX)); } -#ifndef PALETTEOUTPUT - SetPixelShader(Shaders[SHADER_NormalColor]); -#else - SetPixelShader(Shaders[SHADER_NormalColorPal]); -#endif + if (r_swtruecolor) + SetPixelShader(Shaders[SHADER_NormalColor]); + else + SetPixelShader(Shaders[SHADER_NormalColorPal]); } //========================================================================== diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index 73a2c6966e..d26765100c 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -424,6 +424,7 @@ private: bool NeedPalUpdate; bool NeedGammaUpdate; int FBWidth, FBHeight; + D3DFORMAT FBFormat; bool VSync; RECT BlendingRect; int In2D; diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 62761a417c..001172185a 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1780,6 +1780,7 @@ DSPLYMNU_BRIGHTNESS = "Brightness"; DSPLYMNU_VSYNC = "Vertical Sync"; DSPLYMNU_CAPFPS = "Rendering Interpolation"; DSPLYMNU_COLUMNMETHOD = "Column render mode"; +DSPLYMNU_TRUECOLOR = "True color output"; DSPLYMNU_WIPETYPE = "Screen wipe style"; DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen"; DSPLYMNU_PALLETEHACK = "DirectDraw palette hack"; // Not used diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index ff395ff2a4..93e33ce799 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -661,6 +661,7 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_VSYNC", "vid_vsync", "OnOff" Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn" Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" + Option "$DSPLYMNU_TRUECOLOR", "r_swtruecolor", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From 20b7743ec39088186e49142146d40c43e0cccae5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 31 May 2016 01:49:39 +0200 Subject: [PATCH 004/912] Added R_SetColorMapLight and R_SetDSColorMapLight --- src/f_wipe.cpp | 37 +++++++------ src/r_draw.cpp | 27 ++++++++++ src/r_draw.h | 6 +++ src/r_main.h | 4 -- src/r_plane.cpp | 9 +--- src/r_segs.cpp | 90 ++++++++++--------------------- src/r_things.cpp | 24 ++------- src/textures/canvastexture.cpp | 9 ++-- src/v_draw.cpp | 49 +++++++++-------- src/v_video.cpp | 98 ++++++++++++++++++---------------- 10 files changed, 166 insertions(+), 187 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index c6f20cadbd..a86f93fc4d 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -33,6 +33,8 @@ // SCREEN WIPE PACKAGE // +EXTERN_CVAR(Bool, r_swtruecolor) + static int CurrentWipeType; static short *wipe_scr_start; @@ -77,10 +79,8 @@ bool wipe_initMelt (int ticks) { int i, r; -#ifdef PALETTEOUTPUT // copy start screen to main screen - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); -#endif + screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); // makes this wipe faster (in theory) // to have stuff in column-major format @@ -301,9 +301,6 @@ bool wipe_doBurn (int ticks) } else { -#ifndef PALETTEOUTPUT - // TO DO: RGB32k.All -#else int bglevel = 64-fglevel; DWORD *fg2rgb = Col2RGB8[fglevel]; DWORD *bg2rgb = Col2RGB8[bglevel]; @@ -311,7 +308,6 @@ bool wipe_doBurn (int ticks) DWORD bg = bg2rgb[fromold[x]]; fg = (fg+bg) | 0x1f07c1f; to[x] = RGB32k.All[fg & (fg>>15)]; -#endif done = false; } } @@ -342,9 +338,7 @@ bool wipe_doFade (int ticks) fade += ticks * 2; if (fade > 64) { -#ifdef PALETTEOUTPUT - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); -#endif + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_end); return true; } else @@ -391,14 +385,15 @@ static bool (*wipes[])(int) = // Returns true if the wipe should be performed. bool wipe_StartScreen (int type) { + if (r_swtruecolor) + return false; + CurrentWipeType = clamp(type, 0, wipe_NUMWIPES - 1); if (CurrentWipeType) { wipe_scr_start = new short[SCREENWIDTH * SCREENHEIGHT / 2]; -#ifdef PALETTEOUTPUT - screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); -#endif + screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); return true; } return false; @@ -406,13 +401,15 @@ bool wipe_StartScreen (int type) void wipe_EndScreen (void) { + if (r_swtruecolor) + return; + if (CurrentWipeType) { wipe_scr_end = new short[SCREENWIDTH * SCREENHEIGHT / 2]; -#ifdef PALETTEOUTPUT - screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // restore start scr. -#endif + screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_end); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); // restore start scr. + // Initialize the wipe (*wipes[(CurrentWipeType-1)*3])(0); } @@ -423,6 +420,9 @@ bool wipe_ScreenWipe (int ticks) { bool rc; + if (r_swtruecolor) + return true; + if (CurrentWipeType == wipe_None) return true; @@ -436,6 +436,9 @@ bool wipe_ScreenWipe (int ticks) // Final things for the wipe void wipe_Cleanup() { + if (r_swtruecolor) + return; + if (wipe_scr_start != NULL) { delete[] wipe_scr_start; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 83a4472f39..cd34a71b4a 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -4601,3 +4601,30 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) return false; } +void R_SetColorMapLight(BYTE *basecolormapdata, float light, int shade) +{ + if (r_swtruecolor) + { + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, shade); + } + else + { + dc_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + dc_light = 0; + } +} + +void R_SetDSColorMapLight(BYTE *basecolormapdata, float light, int shade) +{ + if (r_swtruecolor) + { + ds_colormap = basecolormapdata; + ds_light = LIGHTSCALE(light, shade); + } + else + { + ds_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + ds_light = 0; + } +} diff --git a/src/r_draw.h b/src/r_draw.h index 17698c3609..db109dbee5 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -386,4 +386,10 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ // transmaskwallscan is like maskwallscan, but it can also blend to the background void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); +// Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) +void R_SetColorMapLight(BYTE *base_colormap, float light, int shade); + +// Same as R_SetColorMapLight, but for ds_colormap and ds_light +void R_SetDSColorMapLight(BYTE *base_colormap, float light, int shade); + #endif diff --git a/src/r_main.h b/src/r_main.h index 37a41a7631..c1034ea3eb 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -86,8 +86,6 @@ extern bool r_dontmaplines; // This is used instead of GETPALOOKUP when ds_colormap+dc_colormap is set to the base colormap #define LIGHTSCALE(vis,shade) ((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis)))) -#ifndef PALETTEOUTPUT - // calculates the light constant passed to the shade_pal_index function inline uint32_t calc_light_multiplier(dsfixed_t light) { @@ -108,8 +106,6 @@ inline uint32_t shade_pal_index(uint32_t index, uint32_t light) return 0xff000000 | (red << 16) | (green << 8) | blue; } -#endif - extern double GlobVis; void R_SetVisibility(double visibility); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index c8258a1ba0..8d0c882ba4 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -227,14 +227,7 @@ void R_MapPlane (int y, int x1) if (plane_shade) { // Determine lighting based on the span's distance from the viewer. -#ifndef PALETTEOUTPUT - ds_colormap = basecolormap->Maps; - ds_light = LIGHTSCALE(GlobVis * fabs(CenterY - y), planeshade); -#else - ds_colormap = basecolormap->Maps + (GETPALOOKUP ( - GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT); - ds_light = 0; -#endif + R_SetDSColorMapLight(basecolormap->Maps, GlobVis * fabs(CenterY - y), planeshade); } #ifdef X86_ASM diff --git a/src/r_segs.cpp b/src/r_segs.cpp index fb27a99de7..548cd994f3 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -61,6 +61,8 @@ CVAR(Bool, r_np2, true, 0) //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) +EXTERN_CVAR(Bool, r_swtruecolor) + #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) @@ -1138,13 +1140,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, wallshade); -#else - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1184,13 +1180,16 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l for (z = 0; z < 4; ++z) { light += rw_lightstep; -#ifndef PALETTEOUTPUT - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); -#else - palookupoffse[z] = basecolormapdata + (GETPALOOKUP(12/*light*/, wallshade) << COLORMAPSHIFT); - palookuplight[z] = 0; -#endif + if (r_swtruecolor) + { + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); + } + else + { + palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + palookuplight[z] = 0; + } } } @@ -1245,13 +1244,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, wallshade); -#else - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1690,13 +1683,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, wallshade); -#else - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1734,12 +1721,15 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f for (z = 0; z < 4; ++z) { light += rw_lightstep; -#ifndef PALETTEOUTPUT - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); -#else - palookupoffse[z] = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); -#endif + if (r_swtruecolor) + { + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); + } + else + { + palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } } } @@ -1795,13 +1785,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, wallshade); -#else - dc_colormap = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -3295,13 +3279,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, wallshade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; @@ -3311,13 +3289,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, wallshade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -3332,13 +3304,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, wallshade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; diff --git a/src/r_things.cpp b/src/r_things.cpp index a6f6aea287..22538bd406 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -594,13 +594,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, shade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); - dc_light = FLOAT2FIXED(MAXLIGHTVIS); -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -611,13 +605,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, shade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); - dc_light = FLOAT2FIXED(MAXLIGHTVIS); -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, shade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -633,13 +621,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, shade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); - dc_light = FLOAT2FIXED(MAXLIGHTVIS); -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index 7388c13060..d1f70439f4 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -106,10 +106,7 @@ void FCanvasTexture::MakeTexture () Canvas = new DSimpleCanvas (Width, Height); Canvas->Lock (); GC::AddSoftRoot(Canvas); -#ifndef PALETTEOUTPUT - Pixels = new BYTE[Width*Height]; - bPixelsAllocated = true; -#else + if (Width != Height || Width != Canvas->GetPitch()) { Pixels = new BYTE[Width*Height]; @@ -117,10 +114,10 @@ void FCanvasTexture::MakeTexture () } else { - Pixels = Canvas->GetBuffer(); + Pixels = (BYTE*)Canvas->GetBuffer(); bPixelsAllocated = false; } -#endif + // Draw a special "unrendered" initial texture into the buffer. memset (Pixels, 0, Width*Height/2); memset (Pixels+Width*Height/2, 255, Width*Height/2); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 8853fc9479..984375f255 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -77,6 +77,8 @@ extern "C" short spanend[MAXHEIGHT]; CVAR (Bool, hud_scale, false, CVAR_ARCHIVE); +EXTERN_CVAR(Bool, r_swtruecolor) + // For routines that take RGB colors, cache the previous lookup in case there // are several repetitions with the same color. static int LastPal = -1; @@ -1017,32 +1019,35 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) oldyyshifted = yy * GetPitch(); } -#ifndef PALETTEOUTPUT - canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + if (r_swtruecolor) + { + canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; + uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*spot >> 16) & 0xff; - uint32_t bg_green = (*spot >> 8) & 0xff; - uint32_t bg_blue = (*spot) & 0xff; + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; - *spot = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; - DWORD *bg2rgb = Col2RGB8[1+level]; - DWORD *fg2rgb = Col2RGB8[63-level]; - DWORD fg = fg2rgb[basecolor]; - DWORD bg = bg2rgb[*spot]; - bg = (fg+bg) | 0x1f07c1f; - *spot = RGB32k.All[bg&(bg>>15)]; -#endif + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; + } + else + { + canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + DWORD *bg2rgb = Col2RGB8[1+level]; + DWORD *fg2rgb = Col2RGB8[63-level]; + DWORD fg = fg2rgb[basecolor]; + DWORD bg = bg2rgb[*spot]; + bg = (fg+bg) | 0x1f07c1f; + *spot = RGB32k.All[bg&(bg>>15)]; + } } void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) diff --git a/src/v_video.cpp b/src/v_video.cpp index b6a626753d..2fb46e88ae 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -65,6 +65,7 @@ #include "menu/menu.h" #include "r_data/voxels.h" +EXTERN_CVAR(Bool, r_swtruecolor) FRenderer *Renderer; @@ -367,65 +368,68 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) spot = Buffer + x1 + y1*Pitch; gap = Pitch - w; -#ifndef PALETTEOUTPUT - uint32_t fg = color.d; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f); - uint32_t inv_alpha = 256 - alpha; - - fg_red *= alpha; - fg_green *= alpha; - fg_blue *= alpha; - - for (y = h; y != 0; y--) + if (r_swtruecolor) { - for (x = w; x != 0; x--) + uint32_t fg = color.d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f); + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + for (y = h; y != 0; y--) { - uint32_t bg_red = (*spot >> 16) & 0xff; - uint32_t bg_green = (*spot >> 8) & 0xff; - uint32_t bg_blue = (*spot) & 0xff; + for (x = w; x != 0; x--) + { + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; - uint32_t red = (fg_red + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; - *spot = 0xff000000 | (red << 16) | (green << 8) | blue; - spot++; + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; + spot++; + } + spot += gap; } - spot += gap; } -#else - DWORD *bg2rgb; - DWORD fg; - + else { - int amount; + DWORD *bg2rgb; + DWORD fg; - amount = (int)(damount * 64); - bg2rgb = Col2RGB8[64-amount]; - - fg = (((color.r * amount) >> 4) << 20) | - ((color.g * amount) >> 4) | - (((color.b * amount) >> 4) << 10); - } - - for (y = h; y != 0; y--) - { - for (x = w; x != 0; x--) { - DWORD bg; + int amount; - bg = bg2rgb[(*spot)&0xff]; - bg = (fg+bg) | 0x1f07c1f; - *spot = RGB32k.All[bg&(bg>>15)]; - spot++; + amount = (int)(damount * 64); + bg2rgb = Col2RGB8[64-amount]; + + fg = (((color.r * amount) >> 4) << 20) | + ((color.g * amount) >> 4) | + (((color.b * amount) >> 4) << 10); + } + + for (y = h; y != 0; y--) + { + for (x = w; x != 0; x--) + { + DWORD bg; + + bg = bg2rgb[(*spot)&0xff]; + bg = (fg+bg) | 0x1f07c1f; + *spot = RGB32k.All[bg&(bg>>15)]; + spot++; + } + spot += gap; } - spot += gap; } -#endif } //========================================================================== From 045bad1b5287d75f2c9f3d84e4a6cc2975499c18 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 31 May 2016 05:31:32 +0200 Subject: [PATCH 005/912] Removed the need for the pixel_canvas_t typedef --- src/basictypes.h | 6 -- src/f_wipe.cpp | 14 +-- src/m_misc.cpp | 2 - src/r_draw.cpp | 234 ++++++++++++++++++++--------------------- src/r_draw.h | 12 +-- src/r_drawt.cpp | 71 +++++++------ src/r_drawt_rgba.cpp | 94 ++++++++--------- src/r_main.cpp | 34 ++++-- src/r_plane.cpp | 130 ++++++++++++++++++++++- src/r_plane.h | 4 + src/r_segs.cpp | 93 ++++++++++------ src/r_things.cpp | 23 ++-- src/v_draw.cpp | 112 +++++++++++++++----- src/v_video.cpp | 35 ++++-- src/v_video.h | 14 +-- src/win32/fb_d3d9.cpp | 6 +- src/win32/fb_ddraw.cpp | 4 +- src/win32/win32iface.h | 2 +- 18 files changed, 561 insertions(+), 329 deletions(-) diff --git a/src/basictypes.h b/src/basictypes.h index 45e33a4a73..ff2cd972e6 100644 --- a/src/basictypes.h +++ b/src/basictypes.h @@ -66,12 +66,6 @@ union QWORD_UNION typedef SDWORD fixed_t; typedef DWORD dsfixed_t; // fixedpt used by span drawer -#ifndef PALETTEOUTPUT -typedef uint32_t canvas_pixel_t; -#else -typedef BYTE canvas_pixel_t; -#endif - #define FIXED_MAX (signed)(0x7fffffff) #define FIXED_MIN (signed)(0x80000000) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index a86f93fc4d..7e1ec678e8 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -80,7 +80,7 @@ bool wipe_initMelt (int ticks) int i, r; // copy start screen to main screen - screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); + screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // makes this wipe faster (in theory) // to have stuff in column-major format @@ -273,7 +273,7 @@ bool wipe_doBurn (int ticks) // Draw the screen int xstep, ystep, firex, firey; int x, y; - canvas_pixel_t *to; + BYTE *to; BYTE *fromold, *fromnew; const int SHIFT = 16; @@ -338,7 +338,7 @@ bool wipe_doFade (int ticks) fade += ticks * 2; if (fade > 64) { - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_end); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); return true; } else @@ -349,7 +349,7 @@ bool wipe_doFade (int ticks) DWORD *bg2rgb = Col2RGB8[bglevel]; BYTE *fromnew = (BYTE *)wipe_scr_end; BYTE *fromold = (BYTE *)wipe_scr_start; - canvas_pixel_t *to = screen->GetBuffer(); + BYTE *to = screen->GetBuffer(); for (y = 0; y < SCREENHEIGHT; y++) { @@ -393,7 +393,7 @@ bool wipe_StartScreen (int type) if (CurrentWipeType) { wipe_scr_start = new short[SCREENWIDTH * SCREENHEIGHT / 2]; - screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); + screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); return true; } return false; @@ -407,8 +407,8 @@ void wipe_EndScreen (void) if (CurrentWipeType) { wipe_scr_end = new short[SCREENWIDTH * SCREENHEIGHT / 2]; - screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_end); - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); // restore start scr. + screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // restore start scr. // Initialize the wipe (*wipes[(CurrentWipeType-1)*3])(0); diff --git a/src/m_misc.cpp b/src/m_misc.cpp index 79416c31d8..87f61f2539 100644 --- a/src/m_misc.cpp +++ b/src/m_misc.cpp @@ -655,7 +655,6 @@ static bool FindFreeName (FString &fullname, const char *extension) void M_ScreenShot (const char *filename) { -#ifdef PALETTEOUTPUT FILE *file; FString autoname; bool writepcx = (stricmp (screenshot_type, "pcx") == 0); // PNG is the default @@ -744,7 +743,6 @@ void M_ScreenShot (const char *filename) Printf ("Could not create screenshot.\n"); } } -#endif } CCMD (screenshot) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index cd34a71b4a..f939406bb5 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -65,7 +65,7 @@ extern int ST_Y; BYTE* viewimage; extern "C" { int ylookup[MAXHEIGHT]; -canvas_pixel_t *dc_destorg; +BYTE* dc_destorg; } int scaledviewwidth; @@ -99,6 +99,7 @@ void (*R_DrawSpanMaskedAddClamp)(void); void (*R_FillSpan)(void); void (*R_FillColumnHoriz)(void); void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); +void (*R_MapTiltedPlane)(int y, int x1); void (*R_MapColoredPlane)(int y, int x1); void (*R_DrawParticle)(vissprite_t *); fixed_t (*tmvline1_add)(); @@ -133,7 +134,7 @@ void (*rt_tlateadd4cols)(int sx, int yl, int yh); void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); -void (*rt_initcols)(canvas_pixel_t *buffer); +void (*rt_initcols)(BYTE *buffer); // // R_DrawColumn @@ -158,7 +159,7 @@ DWORD *dc_destblend; // blending lookups // first pixel in a column (possibly virtual) const BYTE* dc_source; -canvas_pixel_t* dc_dest; +BYTE* dc_dest; int dc_count; DWORD vplce[4]; @@ -236,7 +237,7 @@ void R_InitShadeMaps() void R_DrawColumnP_C (void) { int count; - canvas_pixel_t* dest; + BYTE* dest; fixed_t frac; fixed_t fracstep; @@ -281,7 +282,7 @@ void R_DrawColumnP_C (void) void R_DrawColumnP_RGBA_C() { int count; - canvas_pixel_t* dest; + uint32_t* dest; fixed_t frac; fixed_t fracstep; @@ -292,7 +293,7 @@ void R_DrawColumnP_RGBA_C() return; // Framebuffer destination address. - dest = dc_dest; + dest = (uint32_t*)dc_dest; uint32_t light = calc_light_multiplier(dc_light); @@ -328,7 +329,7 @@ void R_DrawColumnP_RGBA_C() void R_FillColumnP_C (void) { int count; - canvas_pixel_t* dest; + BYTE* dest; count = dc_count; @@ -352,14 +353,14 @@ void R_FillColumnP_C (void) void R_FillColumnP_RGBA() { int count; - canvas_pixel_t* dest; + uint32_t* dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; uint32_t light = calc_light_multiplier(dc_light); @@ -378,7 +379,7 @@ void R_FillColumnP_RGBA() void R_FillAddColumn_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; count = dc_count; if (count <= 0) @@ -405,13 +406,13 @@ void R_FillAddColumn_C (void) void R_FillAddColumn_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; int pitch = dc_pitch; uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; @@ -436,7 +437,7 @@ void R_FillAddColumn_RGBA_C() void R_FillAddClampColumn_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; count = dc_count; if (count <= 0) @@ -469,13 +470,13 @@ void R_FillAddClampColumn_C (void) void R_FillAddClampColumn_RGBA() { int count; - canvas_pixel_t *dest; + uint32_t *dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; int pitch = dc_pitch; uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; @@ -500,7 +501,7 @@ void R_FillAddClampColumn_RGBA() void R_FillSubClampColumn_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; count = dc_count; if (count <= 0) @@ -532,13 +533,13 @@ void R_FillSubClampColumn_C (void) void R_FillSubClampColumn_RGBA() { int count; - canvas_pixel_t *dest; + uint32_t *dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; int pitch = dc_pitch; uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; @@ -563,7 +564,7 @@ void R_FillSubClampColumn_RGBA() void R_FillRevSubClampColumn_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; count = dc_count; if (count <= 0) @@ -595,13 +596,13 @@ void R_FillRevSubClampColumn_C (void) void R_FillRevSubClampColumn_RGBA() { int count; - canvas_pixel_t *dest; + uint32_t *dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; int pitch = dc_pitch; uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; @@ -673,7 +674,7 @@ void R_InitFuzzTable (int fuzzoff) void R_DrawFuzzColumnP_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; // Adjust borders. Low... if (dc_yl == 0) @@ -745,7 +746,7 @@ void R_DrawFuzzColumnP_C (void) void R_DrawFuzzColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; // Adjust borders. Low... if (dc_yl == 0) @@ -763,7 +764,7 @@ void R_DrawFuzzColumnP_RGBA_C() count++; - dest = ylookup[dc_yl] + dc_x + dc_destorg; + dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) // I'm not sure if this is really always the case or not. @@ -890,7 +891,7 @@ algorithm that uses RGB tables. void R_DrawAddColumnP_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -928,7 +929,7 @@ void R_DrawAddColumnP_C (void) void R_DrawAddColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -936,7 +937,7 @@ void R_DrawAddColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -980,7 +981,7 @@ void R_DrawAddColumnP_RGBA_C() void R_DrawTranslatedColumnP_C (void) { int count; - canvas_pixel_t* dest; + BYTE* dest; fixed_t frac; fixed_t fracstep; @@ -1012,7 +1013,7 @@ void R_DrawTranslatedColumnP_C (void) void R_DrawTranslatedColumnP_RGBA_C() { int count; - canvas_pixel_t* dest; + uint32_t* dest; fixed_t frac; fixed_t fracstep; @@ -1022,7 +1023,7 @@ void R_DrawTranslatedColumnP_RGBA_C() uint32_t light = calc_light_multiplier(dc_light); - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1047,7 +1048,7 @@ void R_DrawTranslatedColumnP_RGBA_C() void R_DrawTlatedAddColumnP_C() { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1086,7 +1087,7 @@ void R_DrawTlatedAddColumnP_C() void R_DrawTlatedAddColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1096,7 +1097,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() uint32_t light = calc_light_multiplier(dc_light); - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1135,7 +1136,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() void R_DrawShadedColumnP_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac, fracstep; count = dc_count; @@ -1170,7 +1171,7 @@ void R_DrawShadedColumnP_C (void) void R_DrawShadedColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac, fracstep; count = dc_count; @@ -1178,7 +1179,7 @@ void R_DrawShadedColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1217,7 +1218,7 @@ void R_DrawShadedColumnP_RGBA_C() void R_DrawAddClampColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1234,24 +1235,20 @@ void R_DrawAddClampColumnP_C () const BYTE *source = dc_source; BYTE *colormap = dc_colormap; int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; + DWORD a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + DWORD b = a; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; dest += pitch; frac += fracstep; } while (--count); @@ -1261,7 +1258,7 @@ void R_DrawAddClampColumnP_C () void R_DrawAddClampColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1269,7 +1266,7 @@ void R_DrawAddClampColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1306,7 +1303,7 @@ void R_DrawAddClampColumnP_RGBA_C() void R_DrawAddClampTranslatedColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1347,7 +1344,7 @@ void R_DrawAddClampTranslatedColumnP_C () void R_DrawAddClampTranslatedColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1355,7 +1352,7 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1393,7 +1390,7 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() void R_DrawSubClampColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1432,7 +1429,7 @@ void R_DrawSubClampColumnP_C () void R_DrawSubClampColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1440,7 +1437,7 @@ void R_DrawSubClampColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1477,7 +1474,7 @@ void R_DrawSubClampColumnP_RGBA_C() void R_DrawSubClampTranslatedColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1517,7 +1514,7 @@ void R_DrawSubClampTranslatedColumnP_C () void R_DrawSubClampTranslatedColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1525,7 +1522,7 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1563,7 +1560,7 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() void R_DrawRevSubClampColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1602,7 +1599,7 @@ void R_DrawRevSubClampColumnP_C () void R_DrawRevSubClampColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1610,7 +1607,7 @@ void R_DrawRevSubClampColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1647,7 +1644,7 @@ void R_DrawRevSubClampColumnP_RGBA_C() void R_DrawRevSubClampTranslatedColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1687,7 +1684,7 @@ void R_DrawRevSubClampTranslatedColumnP_C () void R_DrawRevSubClampTranslatedColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1695,7 +1692,7 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1855,7 +1852,7 @@ void R_DrawSpanP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1927,7 +1924,7 @@ void R_DrawSpanP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1945,7 +1942,7 @@ void R_DrawSpanP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2051,7 +2048,7 @@ void R_DrawSpanMaskedP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2114,7 +2111,7 @@ void R_DrawSpanMaskedP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2125,7 +2122,7 @@ void R_DrawSpanMaskedP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2179,7 +2176,7 @@ void R_DrawSpanTranslucentP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2241,7 +2238,7 @@ void R_DrawSpanTranslucentP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2252,7 +2249,7 @@ void R_DrawSpanTranslucentP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2323,7 +2320,7 @@ void R_DrawSpanMaskedTranslucentP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2399,7 +2396,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2412,7 +2409,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2491,7 +2488,7 @@ void R_DrawSpanAddClampP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2561,7 +2558,7 @@ void R_DrawSpanAddClampP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2574,7 +2571,7 @@ void R_DrawSpanAddClampP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2643,7 +2640,7 @@ void R_DrawSpanMaskedAddClampP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2725,7 +2722,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2738,7 +2735,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2819,7 +2816,7 @@ void R_FillSpan_C (void) void R_FillSpan_RGBA() { - canvas_pixel_t *dest = ylookup[ds_y] + ds_x1 + dc_destorg; + uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); uint32_t color = shade_pal_index(ds_color, light); @@ -2843,7 +2840,7 @@ extern "C" void R_SetupDrawSlabC(const BYTE *colormap) slabcolormap = colormap; } -extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, canvas_pixel_t *p) +extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) { int x; const BYTE *colormap = slabcolormap; @@ -3017,7 +3014,7 @@ DWORD vlinec1 () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = vlinebits; int pitch = dc_pitch; @@ -3039,7 +3036,7 @@ DWORD vlinec1_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = vlinebits; int pitch = dc_pitch; @@ -3058,7 +3055,7 @@ DWORD vlinec1_RGBA() #if !defined(X86_ASM) void vlinec4 () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = vlinebits; DWORD place; @@ -3076,7 +3073,7 @@ void vlinec4 () void vlinec4_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = vlinebits; @@ -3162,7 +3159,7 @@ DWORD mvlinec1 () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = mvlinebits; int pitch = dc_pitch; @@ -3188,7 +3185,7 @@ DWORD mvlinec1_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = mvlinebits; int pitch = dc_pitch; @@ -3211,7 +3208,7 @@ DWORD mvlinec1_RGBA() #if !defined(X86_ASM) void mvlinec4 () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = mvlinebits; DWORD place; @@ -3230,7 +3227,7 @@ void mvlinec4 () void mvlinec4_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = mvlinebits; DWORD place; @@ -3260,7 +3257,7 @@ extern int wallshade; static void R_DrawFogBoundarySection (int y, int y2, int x1) { BYTE *colormap = dc_colormap; - canvas_pixel_t *dest = ylookup[y] + dc_destorg; + BYTE *dest = ylookup[y] + dc_destorg; for (; y < y2; ++y) { @@ -3278,7 +3275,7 @@ static void R_DrawFogBoundaryLine (int y, int x) { int x2 = spanend[y]; BYTE *colormap = dc_colormap; - canvas_pixel_t *dest = ylookup[y] + dc_destorg; + BYTE *dest = ylookup[y] + dc_destorg; do { @@ -3380,7 +3377,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) { BYTE *colormap = dc_colormap; - canvas_pixel_t *dest = ylookup[y] + dc_destorg; + uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); @@ -3400,7 +3397,7 @@ static void R_DrawFogBoundaryLine_RGBA(int y, int x) { int x2 = spanend[y]; BYTE *colormap = dc_colormap; - canvas_pixel_t *dest = ylookup[y] + dc_destorg; + uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); @@ -3518,7 +3515,7 @@ fixed_t tmvline1_add_C () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3551,7 +3548,7 @@ fixed_t tmvline1_add_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3589,7 +3586,7 @@ fixed_t tmvline1_add_RGBA() void tmvline4_add_C () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3622,13 +3619,10 @@ void tmvline4_add_C () void tmvline4_add_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = tmvlinebits; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - uint32_t light[4]; light[0] = calc_light_multiplier(palookuplight[0]); light[1] = calc_light_multiplier(palookuplight[1]); @@ -3670,7 +3664,7 @@ fixed_t tmvline1_addclamp_C () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3708,7 +3702,7 @@ fixed_t tmvline1_addclamp_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3746,7 +3740,7 @@ fixed_t tmvline1_addclamp_RGBA() void tmvline4_addclamp_C () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3778,7 +3772,7 @@ void tmvline4_addclamp_C () void tmvline4_addclamp_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3823,7 +3817,7 @@ fixed_t tmvline1_subclamp_C () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3858,7 +3852,7 @@ fixed_t tmvline1_subclamp_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3893,7 +3887,7 @@ fixed_t tmvline1_subclamp_RGBA() void tmvline4_subclamp_C () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3924,7 +3918,7 @@ void tmvline4_subclamp_C () void tmvline4_subclamp_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3969,7 +3963,7 @@ fixed_t tmvline1_revsubclamp_C () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -4004,7 +3998,7 @@ fixed_t tmvline1_revsubclamp_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -4039,7 +4033,7 @@ fixed_t tmvline1_revsubclamp_RGBA() void tmvline4_revsubclamp_C () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -4070,7 +4064,7 @@ void tmvline4_revsubclamp_C () void tmvline4_revsubclamp_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -4168,6 +4162,7 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_MapTiltedPlane = R_MapColoredPlane_RGBA; R_MapColoredPlane = R_MapColoredPlane_RGBA; R_DrawParticle = R_DrawParticle_RGBA; @@ -4262,6 +4257,7 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_C; R_DrawFogBoundary = R_DrawFogBoundary_C; + R_MapTiltedPlane = R_MapColoredPlane_C; R_MapColoredPlane = R_MapColoredPlane_C; R_DrawParticle = R_DrawParticle_C; diff --git a/src/r_draw.h b/src/r_draw.h index db109dbee5..2348914b6e 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -45,7 +45,7 @@ extern "C" DWORD *dc_destblend; // first pixel in a column extern "C" const BYTE* dc_source; -extern "C" canvas_pixel_t *dc_dest, *dc_destorg; +extern "C" BYTE* dc_dest, *dc_destorg; extern "C" int dc_count; extern "C" DWORD vplce[4]; @@ -55,7 +55,7 @@ extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; // [RH] Temporary buffer for column drawing -extern "C" canvas_pixel_t *dc_temp; +extern "C" BYTE *dc_temp; extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; @@ -228,13 +228,13 @@ extern void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); extern void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); -extern void (*rt_initcols)(canvas_pixel_t *buffer); +extern void (*rt_initcols)(BYTE *buffer); void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. -void rt_initcols_pal (canvas_pixel_t *buffer); -void rt_initcols_rgba (canvas_pixel_t *buffer); +void rt_initcols_pal (BYTE *buffer); +void rt_initcols_rgba (BYTE *buffer); extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); @@ -313,7 +313,7 @@ void R_FillSpan_RGBA_C(void); #endif extern "C" void R_SetupDrawSlab(const BYTE *colormap); -extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, canvas_pixel_t *p); +extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); extern "C" int ds_y; extern "C" int ds_x1; diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 9520f59b37..485ed7ab31 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -47,6 +47,8 @@ #include "r_things.h" #include "v_video.h" +EXTERN_CVAR(Bool, r_swtruecolor) + // I should have commented this stuff better. // // dc_temp is the buffer R_DrawColumnHoriz writes into. @@ -57,8 +59,8 @@ // dc_ctspan is advanced while drawing into dc_temp. // horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. -canvas_pixel_t dc_tempbuff[MAXHEIGHT*4]; -canvas_pixel_t *dc_temp; +BYTE dc_tempbuff[MAXHEIGHT*4]; +BYTE *dc_temp; unsigned int dc_tspans[4][MAXHEIGHT]; unsigned int *dc_ctspan[4]; unsigned int *horizspan[4]; @@ -73,8 +75,8 @@ extern "C" void R_SetupAddClampCol(); // Copies one span at hx to the screen at sx. void rt_copy1col_c (int hx, int sx, int yl, int yh) { - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -148,8 +150,8 @@ void rt_copy4cols_c (int sx, int yl, int yh) void rt_map1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -183,8 +185,8 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) void rt_map4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -227,7 +229,7 @@ void rt_map4cols_c (int sx, int yl, int yh) void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) { int count = yh - yl + 1; - canvas_pixel_t *source = &dc_temp[yl*4 + hx]; + BYTE *source = &dc_temp[yl*4 + hx]; // Things we do to hit the compiler's optimizer with a clue bat: // 1. Parallelism is explicitly spelled out by using a separate @@ -274,7 +276,7 @@ void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) void rt_Translate4cols(const BYTE *translation, int yl, int yh) { int count = yh - yl + 1; - canvas_pixel_t *source = &dc_temp[yl*4]; + BYTE *source = &dc_temp[yl*4]; int c0, c1; BYTE b0, b1; @@ -330,8 +332,8 @@ void rt_tlate4cols_c (int sx, int yl, int yh) void rt_add1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -364,8 +366,8 @@ void rt_add1col_c (int hx, int sx, int yl, int yh) void rt_add4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -435,8 +437,8 @@ void rt_tlateadd4cols_c (int sx, int yl, int yh) void rt_shaded1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -467,8 +469,8 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh) void rt_shaded4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -513,8 +515,8 @@ void rt_shaded4cols_c (int sx, int yl, int yh) void rt_addclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -550,8 +552,8 @@ void rt_addclamp1col_c (int hx, int sx, int yl, int yh) void rt_addclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -629,8 +631,8 @@ void rt_tlateaddclamp4cols_c (int sx, int yl, int yh) void rt_subclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -664,8 +666,8 @@ void rt_subclamp1col_c (int hx, int sx, int yl, int yh) void rt_subclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -738,8 +740,8 @@ void rt_tlatesubclamp4cols_c (int sx, int yl, int yh) void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -773,8 +775,8 @@ void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh) void rt_revsubclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -1007,7 +1009,7 @@ void rt_draw4cols (int sx) // Before each pass through a rendering loop that uses these routines, // call this function to set up the span pointers. -void rt_initcols_pal (canvas_pixel_t *buff) +void rt_initcols_pal (BYTE *buff) { int y; @@ -1021,7 +1023,7 @@ void rt_initcols_pal (canvas_pixel_t *buff) void R_DrawColumnHorizP_C (void) { int count = dc_count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t fracstep; fixed_t frac; @@ -1082,7 +1084,7 @@ void R_FillColumnHorizP_C (void) { int count = dc_count; BYTE color = dc_color; - canvas_pixel_t *dest; + BYTE *dest; if (count <= 0) return; @@ -1113,6 +1115,7 @@ void R_FillColumnHorizP_C (void) void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) { + int pixelsize = r_swtruecolor ? 4 : 1; const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); while (span->Length != 0) { @@ -1182,7 +1185,7 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) } } dc_source = column + top; - dc_dest = ylookup[dc_yl] + dc_x + dc_destorg; + dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; hcolfunc_pre (); } diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index e8111be8fb..872cb4b89e 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -43,8 +43,8 @@ #include "r_things.h" #include "v_video.h" -canvas_pixel_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; -canvas_pixel_t *dc_temp_rgba; +uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; +uint32_t *dc_temp_rgba; // Defined in r_draw_t.cpp: extern unsigned int dc_tspans[4][MAXHEIGHT]; @@ -54,8 +54,8 @@ extern unsigned int *horizspan[4]; // Copies one span at hx to the screen at sx. void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) { - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -64,7 +64,7 @@ void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; @@ -106,8 +106,8 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh) void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -119,7 +119,7 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; @@ -143,8 +143,8 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_map4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -156,7 +156,7 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; @@ -188,7 +188,7 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) { int count = yh - yl + 1; - canvas_pixel_t *source = &dc_temp_rgba[yl*4 + hx]; + uint32_t *source = &dc_temp_rgba[yl*4 + hx]; // Things we do to hit the compiler's optimizer with a clue bat: // 1. Parallelism is explicitly spelled out by using a separate @@ -235,7 +235,7 @@ void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) { int count = yh - yl + 1; - canvas_pixel_t *source = &dc_temp_rgba[yl*4]; + uint32_t *source = &dc_temp_rgba[yl*4]; int c0, c1; BYTE b0, b1; @@ -291,8 +291,8 @@ void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -301,7 +301,7 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; @@ -333,8 +333,8 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_add4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -343,7 +343,7 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; colormap = dc_colormap; @@ -392,8 +392,8 @@ void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -403,7 +403,7 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; @@ -434,8 +434,8 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -445,7 +445,7 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; @@ -479,8 +479,8 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -489,7 +489,7 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; @@ -520,8 +520,8 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -530,7 +530,7 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; colormap = dc_colormap; @@ -578,8 +578,8 @@ void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -588,7 +588,7 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; @@ -619,8 +619,8 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -629,7 +629,7 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; colormap = dc_colormap; @@ -678,8 +678,8 @@ void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -690,7 +690,7 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; @@ -721,8 +721,8 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -733,7 +733,7 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; colormap = dc_colormap; @@ -780,11 +780,11 @@ void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) // Before each pass through a rendering loop that uses these routines, // call this function to set up the span pointers. -void rt_initcols_rgba (canvas_pixel_t *buff) +void rt_initcols_rgba (BYTE *buff) { int y; - dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : buff; + dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : (uint32_t*)buff; for (y = 3; y >= 0; y--) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; } @@ -794,7 +794,7 @@ void rt_initcols_rgba (canvas_pixel_t *buff) void R_DrawColumnHorizP_RGBA_C (void) { int count = dc_count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t fracstep; fixed_t frac; @@ -855,7 +855,7 @@ void R_FillColumnHorizP_RGBA_C (void) { int count = dc_count; BYTE color = dc_color; - canvas_pixel_t *dest; + uint32_t *dest; if (count <= 0) return; diff --git a/src/r_main.cpp b/src/r_main.cpp index d85cd62a07..9dc61eea32 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -577,9 +577,12 @@ void R_HighlightPortal (PortalDrawseg* pds) // [ZZ] NO OVERFLOW CHECKS HERE // I believe it won't break. if it does, blame me. :( + if (r_swtruecolor) // Assuming this is just a debug function + return; + BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 255, 0, 0, 0, 255); - canvas_pixel_t* pixels = RenderTarget->GetBuffer(); + BYTE* pixels = RenderTarget->GetBuffer(); // top edge for (int x = pds->x1; x < pds->x2; x++) { @@ -624,12 +627,26 @@ void R_EnterPortal (PortalDrawseg* pds, int depth) int Ytop = pds->ceilingclip[x-pds->x1]; int Ybottom = pds->floorclip[x-pds->x1]; - canvas_pixel_t *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; - - for (int y = Ytop; y <= Ybottom; y++) + if (r_swtruecolor) { - *dest = color; - dest += spacing; + uint32_t *dest = (uint32_t*)RenderTarget->GetBuffer() + x + Ytop * spacing; + + uint32_t c = GPalette.BaseColors[color].d; + for (int y = Ytop; y <= Ybottom; y++) + { + *dest = c; + dest += spacing; + } + } + else + { + BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; + + for (int y = Ytop; y <= Ybottom; y++) + { + *dest = color; + dest += spacing; + } } } @@ -795,10 +812,11 @@ void R_EnterPortal (PortalDrawseg* pds, int depth) void R_SetupBuffer () { - static canvas_pixel_t *lastbuff = NULL; + static BYTE *lastbuff = NULL; int pitch = RenderTarget->GetPitch(); - canvas_pixel_t *lineptr = RenderTarget->GetBuffer() + viewwindowy*pitch + viewwindowx; + int pixelsize = r_swtruecolor ? 4 : 1; + BYTE *lineptr = RenderTarget->GetBuffer() + (viewwindowy*pitch + viewwindowx) * pixelsize; if (dc_pitch != pitch || lineptr != lastbuff) { diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 8d0c882ba4..a71590c9d4 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -354,12 +354,12 @@ void R_CalcTiltedLighting (double lval, double lend, int width) // //========================================================================== -void R_MapTiltedPlane (int y, int x1) +void R_MapTiltedPlane_C (int y, int x1) { int x2 = spanend[y]; int width = x2 - x1; double iz, uz, vz; - canvas_pixel_t *fb; + BYTE *fb; DWORD u, v; int i; @@ -478,6 +478,130 @@ void R_MapTiltedPlane (int y, int x1) #endif } +void R_MapTiltedPlane_RGBA (int y, int x1) +{ + int x2 = spanend[y]; + int width = x2 - x1; + double iz, uz, vz; + uint32_t *fb; + DWORD u, v; + int i; + + iz = plane_sz[2] + plane_sz[1]*(centery-y) + plane_sz[0]*(x1-centerx); + + // Lighting is simple. It's just linear interpolation from start to end + if (plane_shade) + { + uz = (iz + plane_sz[0]*width) * planelightfloat; + vz = iz * planelightfloat; + R_CalcTiltedLighting (vz, uz, width); + } + + uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx); + vz = plane_sv[2] + plane_sv[1]*(centery-y) + plane_sv[0]*(x1-centerx); + + fb = ylookup[y] + x1 + (uint32_t*)dc_destorg; + + BYTE vshift = 32 - ds_ybits; + BYTE ushift = vshift - ds_xbits; + int umask = ((1 << ds_xbits) - 1) << ds_ybits; + +#if 0 // The "perfect" reference version of this routine. Pretty slow. + // Use it only to see how things are supposed to look. + i = 0; + do + { + double z = 1.f/iz; + + u = SQWORD(uz*z) + pviewx; + v = SQWORD(vz*z) + pviewy; + ds_colormap = tiltlighting[i]; + ds_light = 0; + fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; + iz += plane_sz[0]; + uz += plane_su[0]; + vz += plane_sv[0]; + } while (--width >= 0); +#else +//#define SPANSIZE 32 +//#define INVSPAN 0.03125f +//#define SPANSIZE 8 +//#define INVSPAN 0.125f +#define SPANSIZE 16 +#define INVSPAN 0.0625f + + double startz = 1.f/iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep, uzstep, vzstep; + + izstep = plane_sz[0] * SPANSIZE; + uzstep = plane_su[0] * SPANSIZE; + vzstep = plane_sv[0] * SPANSIZE; + x1 = 0; + width++; + + while (width >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f/iz; + double endu = uz*endz; + double endv = vz*endz; + DWORD stepu = SQWORD((endu - startu) * INVSPAN); + DWORD stepv = SQWORD((endv - startv) * INVSPAN); + u = SQWORD(startu) + pviewx; + v = SQWORD(startv) + pviewy; + + for (i = SPANSIZE-1; i >= 0; i--) + { + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + startu = endu; + startv = endv; + width -= SPANSIZE; + } + if (width > 0) + { + if (width == 1) + { + u = SQWORD(startu); + v = SQWORD(startv); + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + } + else + { + double left = width; + iz += plane_sz[0] * left; + uz += plane_su[0] * left; + vz += plane_sv[0] * left; + + double endz = 1.f/iz; + double endu = uz*endz; + double endv = vz*endz; + left = 1.f/left; + DWORD stepu = SQWORD((endu - startu) * left); + DWORD stepv = SQWORD((endv - startv) * left); + u = SQWORD(startu) + pviewx; + v = SQWORD(startv) + pviewy; + + for (; width != 0; width--) + { + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + } + } +#endif +} + //========================================================================== // // R_MapColoredPlane @@ -491,7 +615,7 @@ void R_MapColoredPlane_C (int y, int x1) void R_MapColoredPlane_RGBA(int y, int x1) { - canvas_pixel_t *dest = ylookup[y] + x1 + dc_destorg; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); uint32_t light = calc_light_multiplier(ds_light); uint32_t color = shade_pal_index(ds_color, light); diff --git a/src/r_plane.h b/src/r_plane.h index ac63501e30..7505ac9957 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -94,6 +94,10 @@ void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t al void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); extern void(*R_MapColoredPlane)(int y, int x1); +extern void(*R_MapTiltedPlane)(int y, int x1); + +void R_MapTiltedPlane_C(int y, int x1); +void R_MapTiltedPlane_RGBA(int y, int x); void R_MapColoredPlane_C(int y, int x1); void R_MapColoredPlane_RGBA(int y, int x1); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 548cd994f3..8c71f0fb73 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1071,7 +1071,7 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) } // prevlineasm1 is like vlineasm1 but skips the loop if only drawing one pixel -inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) +inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) { dc_iscale = vince; dc_colormap = colormap; @@ -1107,6 +1107,8 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l //extern cycle_t WallScanCycles; //clock (WallScanCycles); + int pixelsize = r_swtruecolor ? 4 : 1; + rw_pic->GetHeight(); // Make sure texture size is loaded fracbits = 32 - rw_pic->HeightBits; setupvline(fracbits); @@ -1144,7 +1146,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + x + dc_destorg; + dc_dest = (ylookup[y1ve[0]] + x)*pixelsize + dc_destorg; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1202,7 +1204,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l { if (!(bad & 1)) { - prevline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); + prevline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+x+z)*pixelsize+dc_destorg); } bad >>= 1; } @@ -1213,23 +1215,23 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l { if (u4 > y1ve[z]) { - vplce[z] = prevline1(vince[z],palookupoffse[z], palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); + vplce[z] = prevline1(vince[z],palookupoffse[z], palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+x+z)*pixelsize+dc_destorg); } } if (d4 > u4) { dc_count = d4-u4; - dc_dest = ylookup[u4]+x+dc_destorg; + dc_dest = (ylookup[u4]+x)*pixelsize+dc_destorg; dovline4(); } - canvas_pixel_t *i = x+ylookup[d4]+dc_destorg; + BYTE *i = (x+ylookup[d4])*pixelsize+dc_destorg; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - prevline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + prevline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); } } } @@ -1248,7 +1250,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + x + dc_destorg; + dc_dest = (ylookup[y1ve[0]] + x) * pixelsize + dc_destorg; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1435,7 +1437,7 @@ static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *d } } -inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) +inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) { dc_iscale = vince; dc_colormap = colormap; @@ -1451,7 +1453,8 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { int x, fracbits; - canvas_pixel_t *p; + BYTE *pixel; + int pixelsize, pixelshift; int y1ve[4], y2ve[4], u4, d4, startx, dax, z; char bad; float light = rw_light - rw_lightstep; @@ -1473,6 +1476,9 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ //extern cycle_t WallScanCycles; //clock (WallScanCycles); + pixelsize = r_swtruecolor ? 4 : 1; + pixelshift = r_swtruecolor ? 2 : 0; + rw_pic->GetHeight(); // Make sure texture size is loaded fracbits = 32- rw_pic->HeightBits; setupmvline(fracbits); @@ -1480,7 +1486,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ basecolormapdata = basecolormap->Maps; x = startx = x1; - p = x + dc_destorg; + pixel = x * pixelsize + dc_destorg; bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1489,9 +1495,13 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ palookupoffse[1] = dc_colormap; palookupoffse[2] = dc_colormap; palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; } - for(; (x < x2) && (((size_t)p/sizeof(canvas_pixel_t)) & 3); ++x, ++p) + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; y1ve[0] = uwal[x];//max(uwal[x],umost[x]); @@ -1505,7 +1515,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + p; + dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1514,7 +1524,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ domvline1(); } - for(; x < x2-3; x += 4, p+= 4) + for(; x < x2-3; x += 4, pixel += 4 * pixelsize) { bad = 0; for (z = 3, dax = x+3; z >= 0; --z, --dax) @@ -1539,7 +1549,16 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ for (z = 0; z < 4; ++z) { light += rw_lightstep; - palookupoffse[z] = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + if (r_swtruecolor) + { + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); + } + else + { + palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + palookuplight[z] = 0; + } } } @@ -1552,7 +1571,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (!(bad & 1)) { - mvline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + mvline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); } bad >>= 1; } @@ -1563,27 +1582,27 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (u4 > y1ve[z]) { - vplce[z] = mvline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + vplce[z] = mvline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); } } if (d4 > u4) { dc_count = d4-u4; - dc_dest = ylookup[u4]+p; + dc_dest = ylookup[u4]*pixelsize+pixel; domvline4(); } - canvas_pixel_t *i = p+ylookup[d4]; + BYTE *i = pixel+ylookup[d4]*pixelsize; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - mvline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + mvline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); } } } - for(; x < x2; ++x, ++p) + for(; x < x2; ++x, pixel += pixelsize) { light += rw_lightstep; y1ve[0] = uwal[x]; @@ -1597,7 +1616,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + p; + dc_dest = ylookup[y1ve[0]]*pixelsize + pixel; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1611,7 +1630,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ NetUpdate (); } -inline void preptmvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) +inline void preptmvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) { dc_iscale = vince; dc_colormap = colormap; @@ -1628,7 +1647,8 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f fixed_t (*tmvline1)(); void (*tmvline4)(); int x, fracbits; - canvas_pixel_t *p; + BYTE *pixel; + int pixelsize, pixelshift; int y1ve[4], y2ve[4], u4, d4, startx, dax, z; char bad; float light = rw_light - rw_lightstep; @@ -1651,6 +1671,9 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f //extern cycle_t WallScanCycles; //clock (WallScanCycles); + pixelsize = r_swtruecolor ? 4 : 1; + pixelshift = r_swtruecolor ? 2 : 0; + rw_pic->GetHeight(); // Make sure texture size is loaded fracbits = 32 - rw_pic->HeightBits; setuptmvline(fracbits); @@ -1659,7 +1682,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f fixed_t centeryfrac = FLOAT2FIXED(CenterY); x = startx = x1; - p = x + dc_destorg; + pixel = x * pixelsize + dc_destorg; bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1674,7 +1697,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f palookuplight[3] = 0; } - for(; (x < x2) && (((size_t)p / sizeof(canvas_pixel_t)) & 3); ++x, ++p) + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; y1ve[0] = uwal[x];//max(uwal[x],umost[x]); @@ -1687,7 +1710,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + p; + dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1696,7 +1719,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f tmvline1(); } - for(; x < x2-3; x += 4, p+= 4) + for(; x < x2-3; x += 4, pixel += 4 * pixelsize) { bad = 0; for (z = 3, dax = x+3; z >= 0; --z, --dax) @@ -1742,7 +1765,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f { if (!(bad & 1)) { - preptmvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + preptmvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); tmvline1(); } bad >>= 1; @@ -1754,7 +1777,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f { if (u4 > y1ve[z]) { - preptmvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + preptmvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); vplce[z] = tmvline1(); } } @@ -1762,21 +1785,21 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (d4 > u4) { dc_count = d4-u4; - dc_dest = ylookup[u4]+p; + dc_dest = ylookup[u4]*pixelsize+pixel; tmvline4(); } - canvas_pixel_t *i = p+ylookup[d4]; + BYTE *i = pixel+ylookup[d4]*pixelsize; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - preptmvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + preptmvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); tmvline1(); } } } - for(; x < x2; ++x, ++p) + for(; x < x2; ++x, pixel += pixelsize) { light += rw_lightstep; y1ve[0] = uwal[x]; @@ -1789,7 +1812,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + p; + dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); diff --git a/src/r_things.cpp b/src/r_things.cpp index 22538bd406..2abcc0e12f 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -98,6 +98,7 @@ EXTERN_CVAR (Bool, st_scale) EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, r_drawfuzz) EXTERN_CVAR(Bool, r_deathcamera); +EXTERN_CVAR(Bool, r_swtruecolor) // // Sprite rotation 0 is facing the viewer, @@ -132,7 +133,7 @@ EXTERN_CVAR (Bool, r_drawvoxels) // int OffscreenBufferWidth, OffscreenBufferHeight; -canvas_pixel_t *OffscreenColorBuffer; +BYTE *OffscreenColorBuffer; FCoverageBuffer *OffscreenCoverageBuffer; // @@ -244,6 +245,7 @@ bool sprflipvert; void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) { + int pixelsize = r_swtruecolor ? 4 : 1; const fixed_t centeryfrac = FLOAT2FIXED(CenterY); const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); while (span->Length != 0) @@ -314,7 +316,7 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) } } dc_source = column + top; - dc_dest = ylookup[dc_yl] + dc_x + dc_destorg; + dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; colfunc (); } @@ -688,6 +690,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Blend the voxel, if that's what we need to do. if ((flags & ~DVF_MIRRORED) != 0) { + int pixelsize = r_swtruecolor ? 4 : 1; for (int x = 0; x < viewwidth; ++x) { if (!(flags & DVF_SPANSONLY) && (x & 3) == 0) @@ -702,7 +705,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop dc_yl = span->Start; dc_yh = span->Stop - 1; dc_count = span->Stop - span->Start; - dc_dest = ylookup[span->Start] + x + dc_destorg; + dc_dest = (ylookup[span->Start] + x) * pixelsize + dc_destorg; colfunc(); } else @@ -2602,7 +2605,7 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) void R_DrawParticle_C (vissprite_t *vis) { int spacing; - canvas_pixel_t *dest; + BYTE *dest; BYTE color = vis->Style.colormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2668,7 +2671,7 @@ void R_DrawParticle_C (vissprite_t *vis) void R_DrawParticle_RGBA(vissprite_t *vis) { int spacing; - canvas_pixel_t *dest; + uint32_t *dest; BYTE color = vis->Style.colormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2698,7 +2701,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) dc_x = x; if (R_ClipSpriteColumnWithPortals(vis)) continue; - dest = ylookup[yl] + x + dc_destorg; + dest = ylookup[yl] + x + (uint32_t*)dc_destorg; for (int y = 0; y < ycount; y++) { uint32_t bg_red = (*dest >> 16) & 0xff; @@ -2759,6 +2762,8 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, R_SetupDrawSlab(colormap); + int pixelsize = r_swtruecolor ? 4 : 1; + // Select mip level i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang)); i = DivScale6(i, MIN(daxscale, dayscale)); @@ -3012,7 +3017,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, if (!(flags & DVF_OFFSCREEN)) { // Draw directly to the screen. - R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, ylookup[z1] + lxt + xxl + dc_destorg); + R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, (ylookup[z1] + lxt + xxl) * pixelsize + dc_destorg); } else { @@ -3243,12 +3248,12 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly) { if (OffscreenColorBuffer == NULL) { - OffscreenColorBuffer = new canvas_pixel_t[width * height]; + OffscreenColorBuffer = new BYTE[width * height * 4]; } else if (OffscreenBufferWidth != width || OffscreenBufferHeight != height) { delete[] OffscreenColorBuffer; - OffscreenColorBuffer = new canvas_pixel_t[width * height]; + OffscreenColorBuffer = new BYTE[width * height * 4]; } } OffscreenBufferWidth = width; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 984375f255..ed6571ad36 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -179,7 +179,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) fixedcolormap = dc_colormap; ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); - canvas_pixel_t *destorgsave = dc_destorg; + BYTE *destorgsave = dc_destorg; dc_destorg = screen->GetBuffer(); if (dc_destorg == NULL) { @@ -1021,7 +1021,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) if (r_swtruecolor) { - canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); uint32_t fg_red = (fg >> 16) & 0xff; @@ -1040,7 +1040,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) } else { - canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + BYTE *spot = GetBuffer() + oldyyshifted + xx; DWORD *bg2rgb = Col2RGB8[1+level]; DWORD *fg2rgb = Col2RGB8[63-level]; DWORD fg = fg2rgb[basecolor]; @@ -1091,27 +1091,62 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real { swapvalues (x0, x1); } - memset (GetBuffer() + y0*GetPitch() + x0, palColor, deltaX+1); + if (r_swtruecolor) + { + uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; + for (int i = 0; i <= deltaX; i++) + spot[i] = palColor; + } + else + { + memset (GetBuffer() + y0*GetPitch() + x0, palColor, deltaX+1); + } } else if (deltaX == 0) { // vertical line - canvas_pixel_t *spot = GetBuffer() + y0*GetPitch() + x0; - int pitch = GetPitch (); - do + if (r_swtruecolor) { - *spot = palColor; - spot += pitch; - } while (--deltaY != 0); + uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; + int pitch = GetPitch(); + do + { + *spot = palColor; + spot += pitch; + } while (--deltaY != 0); + } + else + { + BYTE *spot = GetBuffer() + y0*GetPitch() + x0; + int pitch = GetPitch(); + do + { + *spot = palColor; + spot += pitch; + } while (--deltaY != 0); + } } else if (deltaX == deltaY) { // diagonal line. - canvas_pixel_t *spot = GetBuffer() + y0*GetPitch() + x0; - int advance = GetPitch() + xDir; - do + if (r_swtruecolor) { - *spot = palColor; - spot += advance; - } while (--deltaY != 0); + uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; + int advance = GetPitch() + xDir; + do + { + *spot = palColor; + spot += advance; + } while (--deltaY != 0); + } + else + { + BYTE *spot = GetBuffer() + y0*GetPitch() + x0; + int advance = GetPitch() + xDir; + do + { + *spot = palColor; + spot += advance; + } while (--deltaY != 0); + } } else { @@ -1231,7 +1266,6 @@ void DCanvas::DrawPixel(int x, int y, int palColor, uint32 realcolor) void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uint32 color) { int x, y; - canvas_pixel_t *dest; if (left == right || top == bottom) { @@ -1261,12 +1295,26 @@ void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uin palcolor = PalFromRGB(color); } - dest = Buffer + top * Pitch + left; - x = right - left; - for (y = top; y < bottom; y++) + if (r_swtruecolor) { - memset(dest, palcolor, x); - dest += Pitch; + uint32_t *dest = (uint32_t*)Buffer + top * Pitch + left; + x = right - left; + for (y = top; y < bottom; y++) + { + for (int i = 0; i < x; i++) + dest[i] = palcolor; + dest += Pitch; + } + } + else + { + BYTE *dest = Buffer + top * Pitch + left; + x = right - left; + for (y = top; y < bottom; y++) + { + memset(dest, palcolor, x); + dest += Pitch; + } } } @@ -1452,11 +1500,14 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // V_DrawBlock // Draw a linear block of pixels into the view buffer. // -void DCanvas::DrawBlock (int x, int y, int _width, int _height, const canvas_pixel_t *src) const +void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) const { + if (r_swtruecolor) + return; + int srcpitch = _width; int destpitch; - canvas_pixel_t *dest; + BYTE *dest; if (ClipBox (x, y, _width, _height, src, srcpitch)) { @@ -1468,7 +1519,7 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const canvas_pix do { - memcpy (dest, src, _width * sizeof(canvas_pixel_t)); + memcpy (dest, src, _width); src += srcpitch; dest += destpitch; } while (--_height); @@ -1478,9 +1529,12 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const canvas_pix // V_GetBlock // Gets a linear block of pixels from the view buffer. // -void DCanvas::GetBlock (int x, int y, int _width, int _height, canvas_pixel_t *dest) const +void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const { - const canvas_pixel_t *src; + if (r_swtruecolor) + return; + + const BYTE *src; #ifdef RANGECHECK if (x<0 @@ -1496,14 +1550,14 @@ void DCanvas::GetBlock (int x, int y, int _width, int _height, canvas_pixel_t *d while (_height--) { - memcpy (dest, src, _width * sizeof(canvas_pixel_t)); + memcpy (dest, src, _width); src += Pitch; dest += _width; } } // Returns true if the box was completely clipped. False otherwise. -bool DCanvas::ClipBox (int &x, int &y, int &w, int &h, const canvas_pixel_t *&src, const int srcpitch) const +bool DCanvas::ClipBox (int &x, int &y, int &w, int &h, const BYTE *&src, const int srcpitch) const { if (x >= Width || y >= Height || x+w <= 0 || y+h <= 0) { // Completely clipped off screen diff --git a/src/v_video.cpp b/src/v_video.cpp index 2fb46e88ae..01043b8bc9 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -345,7 +345,6 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; int gap; - canvas_pixel_t *spot; int x, y; if (x1 >= Width || y1 >= Height) @@ -365,11 +364,12 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; } - spot = Buffer + x1 + y1*Pitch; gap = Pitch - w; if (r_swtruecolor) { + uint32_t *spot = (uint32_t*)Buffer + x1 + y1*Pitch; + uint32_t fg = color.d; uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -402,6 +402,8 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) } else { + BYTE *spot = Buffer + x1 + y1*Pitch; + DWORD *bg2rgb; DWORD fg; @@ -441,12 +443,12 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) // //========================================================================== -void DCanvas::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type) +void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type) { Lock(true); buffer = GetBuffer(); pitch = GetPitch(); - color_type = SS_PAL; + color_type = r_swtruecolor ? SS_BGRA : SS_PAL; } //========================================================================== @@ -797,8 +799,8 @@ DSimpleCanvas::DSimpleCanvas (int width, int height) Pitch = width + MAX(0, CPU.DataL1LineSize - 8); } } - MemBuffer = new canvas_pixel_t[Pitch * height]; - memset (MemBuffer, 0, Pitch * height * sizeof(canvas_pixel_t)); + MemBuffer = new BYTE[Pitch * height * 4]; + memset (MemBuffer, 0, Pitch * height * 4); } //========================================================================== @@ -917,7 +919,7 @@ void DFrameBuffer::DrawRateStuff () { int i = I_GetTime(false); int tics = i - LastTic; - canvas_pixel_t *buffer = GetBuffer(); + BYTE *buffer = GetBuffer(); LastTic = i; if (tics > 20) tics = 20; @@ -925,10 +927,21 @@ void DFrameBuffer::DrawRateStuff () // Buffer can be NULL if we're doing hardware accelerated 2D if (buffer != NULL) { - buffer += (GetHeight()-1) * GetPitch(); - - for (i = 0; i < tics*2; i += 2) buffer[i] = 0xff; - for ( ; i < 20*2; i += 2) buffer[i] = 0x00; + if (r_swtruecolor) + { + uint32_t *buffer32 = (uint32_t*)buffer; + buffer32 += (GetHeight() - 1) * GetPitch(); + + for (i = 0; i < tics * 2; i += 2) buffer32[i] = 0xffffffff; + for (; i < 20 * 2; i += 2) buffer32[i] = 0xff000000; + } + else + { + buffer += (GetHeight() - 1) * GetPitch(); + + for (i = 0; i < tics * 2; i += 2) buffer[i] = 0xff; + for (; i < 20 * 2; i += 2) buffer[i] = 0x00; + } } else { diff --git a/src/v_video.h b/src/v_video.h index 27c09ee36f..fa1ce83df0 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -189,7 +189,7 @@ public: virtual ~DCanvas (); // Member variable access - inline canvas_pixel_t *GetBuffer () const { return Buffer; } + inline BYTE *GetBuffer () const { return Buffer; } inline int GetWidth () const { return Width; } inline int GetHeight () const { return Height; } inline int GetPitch () const { return Pitch; } @@ -202,10 +202,10 @@ public: virtual bool IsLocked () { return Buffer != NULL; } // Returns true if the surface is locked // Draw a linear block of pixels into the canvas - virtual void DrawBlock (int x, int y, int width, int height, const canvas_pixel_t *src) const; + virtual void DrawBlock (int x, int y, int width, int height, const BYTE *src) const; // Reads a linear block of pixels into the view buffer. - virtual void GetBlock (int x, int y, int width, int height, canvas_pixel_t *dest) const; + virtual void GetBlock (int x, int y, int width, int height, BYTE *dest) const; // Dim the entire canvas for the menus virtual void Dim (PalEntry color = 0); @@ -237,7 +237,7 @@ public: // Retrieves a buffer containing image data for a screenshot. // Hint: Pitch can be negative for upside-down images, in which case buffer // points to the last row in the buffer, which will be the first row output. - virtual void GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type); + virtual void GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type); // Releases the screenshot buffer. virtual void ReleaseScreenshotBuffer(); @@ -262,13 +262,13 @@ public: void DrawChar (FFont *font, int normalcolor, int x, int y, BYTE character, int tag_first, ...); protected: - canvas_pixel_t *Buffer; + BYTE *Buffer; int Width; int Height; int Pitch; int LockCount; - bool ClipBox (int &left, int &top, int &width, int &height, const canvas_pixel_t *&src, const int srcpitch) const; + bool ClipBox (int &left, int &top, int &width, int &height, const BYTE *&src, const int srcpitch) const; void DrawTextureV(FTexture *img, double x, double y, uint32 tag, va_list tags) = delete; virtual void DrawTextureParms(FTexture *img, DrawParms &parms); bool ParseDrawTextureTags (FTexture *img, double x, double y, uint32 tag, va_list tags, DrawParms *parms, bool fortext) const; @@ -297,7 +297,7 @@ public: void Unlock (); protected: - canvas_pixel_t *MemBuffer; + BYTE *MemBuffer; DSimpleCanvas() {} }; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 0cc9045ee3..0cd847b978 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -1316,7 +1316,7 @@ void D3DFB::Draw3DPart(bool copy3d) else { uint32_t *dest = (uint32_t *)lockrect.pBits; - uint32_t *src = MemBuffer; + uint32_t *src = (uint32_t*)MemBuffer; for (int y = 0; y < Height; y++) { memcpy(dest, src, Width * sizeof(uint32_t)); @@ -1744,7 +1744,7 @@ void D3DFB::SetBlendingRect(int x1, int y1, int x2, int y2) // //========================================================================== -void D3DFB::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type) +void D3DFB::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type) { D3DLOCKED_RECT lrect; @@ -1770,7 +1770,7 @@ void D3DFB::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSTy } else { - buffer = (const canvas_pixel_t *)lrect.pBits; + buffer = (const BYTE *)lrect.pBits; pitch = lrect.Pitch; color_type = SS_BGRA; } diff --git a/src/win32/fb_ddraw.cpp b/src/win32/fb_ddraw.cpp index 9be571f985..fbdf035a3a 100644 --- a/src/win32/fb_ddraw.cpp +++ b/src/win32/fb_ddraw.cpp @@ -999,8 +999,8 @@ DDrawFB::LockSurfRes DDrawFB::LockSurf (LPRECT lockrect, LPDIRECTDRAWSURFACE toL LOG1 ("Final result after restoration attempts: %08lx\n", hr); return NoGood; } - Buffer = (canvas_pixel_t *)desc.lpSurface; - Pitch = desc.lPitch / sizeof(canvas_pixel_t); + Buffer = (BYTE *)desc.lpSurface; + Pitch = desc.lPitch; BufferingNow = false; return wasLost ? GoodWasLost : Good; } diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index d26765100c..0b3333d639 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -252,7 +252,7 @@ public: bool PaintToWindow (); void SetVSync (bool vsync); void NewRefreshRate(); - void GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type); + void GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type); void ReleaseScreenshotBuffer(); void SetBlendingRect (int x1, int y1, int x2, int y2); bool Begin2D (bool copy3d); From 05220a713320b0b14525baad647f8ace577e19ee Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 31 May 2016 09:36:18 +0200 Subject: [PATCH 006/912] Added IsBgra() to DCanvas Changed SWRender output format to be decided by IsBgra() --- src/f_wipe.cpp | 10 ++++----- src/posix/cocoa/i_video.mm | 10 +++++---- src/posix/hardware.h | 2 +- src/posix/sdl/hardware.cpp | 5 ++++- src/posix/sdl/sdlvideo.cpp | 6 +++--- src/posix/sdl/sdlvideo.h | 2 +- src/r_draw.cpp | 39 +++++++++++++++++++++++++++++++++- src/r_drawt.cpp | 2 -- src/r_main.cpp | 28 ++++++++++++++---------- src/r_main.h | 2 ++ src/r_segs.cpp | 2 -- src/r_swrenderer.cpp | 12 ++++++----- src/r_things.cpp | 1 - src/textures/canvastexture.cpp | 2 +- src/v_draw.cpp | 16 ++++++-------- src/v_video.cpp | 26 +++++++++++------------ src/v_video.h | 8 ++++--- src/win32/fb_d3d9.cpp | 17 +++++++-------- src/win32/fb_ddraw.cpp | 7 +----- src/win32/hardware.cpp | 5 ++++- src/win32/hardware.h | 2 +- src/win32/win32iface.h | 8 +++---- src/win32/win32video.cpp | 21 ++++++++++-------- wadsrc/static/menudef.txt | 2 +- 24 files changed, 139 insertions(+), 96 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index 7e1ec678e8..84b6036e48 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -33,8 +33,6 @@ // SCREEN WIPE PACKAGE // -EXTERN_CVAR(Bool, r_swtruecolor) - static int CurrentWipeType; static short *wipe_scr_start; @@ -385,7 +383,7 @@ static bool (*wipes[])(int) = // Returns true if the wipe should be performed. bool wipe_StartScreen (int type) { - if (r_swtruecolor) + if (screen->IsBgra()) return false; CurrentWipeType = clamp(type, 0, wipe_NUMWIPES - 1); @@ -401,7 +399,7 @@ bool wipe_StartScreen (int type) void wipe_EndScreen (void) { - if (r_swtruecolor) + if (screen->IsBgra()) return; if (CurrentWipeType) @@ -420,7 +418,7 @@ bool wipe_ScreenWipe (int ticks) { bool rc; - if (r_swtruecolor) + if (screen->IsBgra()) return true; if (CurrentWipeType == wipe_None) @@ -436,7 +434,7 @@ bool wipe_ScreenWipe (int ticks) // Final things for the wipe void wipe_Cleanup() { - if (r_swtruecolor) + if (screen->IsBgra()) return; if (wipe_scr_start != NULL) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 5e073daf3f..c97460a022 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -96,6 +96,8 @@ EXTERN_CVAR(Bool, ticker ) EXTERN_CVAR(Bool, vid_vsync) EXTERN_CVAR(Bool, vid_hidpi) +CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE) + CUSTOM_CVAR(Bool, fullscreen, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) { extern int NewWidth, NewHeight, NewBits, DisplayBits; @@ -199,7 +201,7 @@ public: virtual EDisplayType GetDisplayType() { return DISPLAY_Both; } virtual void SetWindowedScale(float scale); - virtual DFrameBuffer* CreateFrameBuffer(int width, int height, bool fs, DFrameBuffer* old); + virtual DFrameBuffer* CreateFrameBuffer(int width, int height, bool bgra, bool fs, DFrameBuffer* old); virtual void StartModeIterator(int bits, bool fullscreen); virtual bool NextMode(int* width, int* height, bool* letterbox); @@ -518,7 +520,7 @@ bool CocoaVideo::NextMode(int* const width, int* const height, bool* const lette return false; } -DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, const bool fullscreen, DFrameBuffer* const old) +DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, const bool bgra, const bool fullscreen, DFrameBuffer* const old) { PalEntry flashColor = 0; int flashAmount = 0; @@ -762,7 +764,7 @@ CocoaVideo* CocoaVideo::GetInstance() CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool fullscreen) -: DFrameBuffer(width, height) +: DFrameBuffer(width, height, false) , m_needPaletteUpdate(false) , m_gamma(0.0f) , m_needGammaUpdate(false) @@ -1064,7 +1066,7 @@ void I_CreateRenderer() DFrameBuffer* I_SetMode(int &width, int &height, DFrameBuffer* old) { - return Video->CreateFrameBuffer(width, height, fullscreen, old); + return Video->CreateFrameBuffer(width, height, swtruecolor, fullscreen, old); } bool I_CheckResolution(const int width, const int height, const int bits) diff --git a/src/posix/hardware.h b/src/posix/hardware.h index 618941fe59..3c06cb6c6d 100644 --- a/src/posix/hardware.h +++ b/src/posix/hardware.h @@ -74,7 +74,7 @@ class IVideo virtual EDisplayType GetDisplayType () = 0; virtual void SetWindowedScale (float scale) = 0; - virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old) = 0; + virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old) = 0; virtual void StartModeIterator (int bits, bool fs) = 0; virtual bool NextMode (int *width, int *height, bool *letterbox) = 0; diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index 6142eb1d8e..52bca35e7f 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -51,6 +51,7 @@ EXTERN_CVAR (Bool, ticker) EXTERN_CVAR (Bool, fullscreen) +EXTERN_CVAR (Bool, swtruecolor) EXTERN_CVAR (Float, vid_winscale) IVideo *Video; @@ -128,7 +129,7 @@ DFrameBuffer *I_SetMode (int &width, int &height, DFrameBuffer *old) fs = fullscreen; break; } - DFrameBuffer *res = Video->CreateFrameBuffer (width, height, fs, old); + DFrameBuffer *res = Video->CreateFrameBuffer (width, height, swtruecolor, fs, old); /* Right now, CreateFrameBuffer cannot return NULL if (res == NULL) @@ -280,6 +281,8 @@ CUSTOM_CVAR (Int, vid_maxfps, 200, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) } } +CVAR (Bool, swtruecolor, false, CVAR_ARCHIVE) + extern int NewWidth, NewHeight, NewBits, DisplayBits; CUSTOM_CVAR (Bool, fullscreen, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) diff --git a/src/posix/sdl/sdlvideo.cpp b/src/posix/sdl/sdlvideo.cpp index 04c3a3f2ef..b050097be6 100644 --- a/src/posix/sdl/sdlvideo.cpp +++ b/src/posix/sdl/sdlvideo.cpp @@ -257,7 +257,7 @@ bool SDLVideo::NextMode (int *width, int *height, bool *letterbox) return false; } -DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old) +DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old) { static int retry = 0; static int owidth, oheight; @@ -335,7 +335,7 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscree } ++retry; - fb = static_cast(CreateFrameBuffer (width, height, fullscreen, NULL)); + fb = static_cast(CreateFrameBuffer (width, height, bgra, fullscreen, NULL)); } retry = 0; @@ -351,7 +351,7 @@ void SDLVideo::SetWindowedScale (float scale) // FrameBuffer implementation ----------------------------------------------- SDLFB::SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin) - : DFrameBuffer (width, height) + : DFrameBuffer (width, height, false) { int i; diff --git a/src/posix/sdl/sdlvideo.h b/src/posix/sdl/sdlvideo.h index 072167b5a2..385733bc15 100644 --- a/src/posix/sdl/sdlvideo.h +++ b/src/posix/sdl/sdlvideo.h @@ -10,7 +10,7 @@ class SDLVideo : public IVideo EDisplayType GetDisplayType () { return DISPLAY_Both; } void SetWindowedScale (float scale); - DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old); + DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old); void StartModeIterator (int bits, bool fs); bool NextMode (int *width, int *height, bool *letterbox); diff --git a/src/r_draw.cpp b/src/r_draw.cpp index f939406bb5..82169ec6fc 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -179,7 +179,6 @@ FDynamicColormap ShadeFakeColormap[16]; BYTE identitymap[256]; EXTERN_CVAR (Int, r_columnmethod) -EXTERN_CVAR (Bool, r_swtruecolor) void R_InitShadeMaps() { @@ -4129,6 +4128,14 @@ const BYTE *R_GetColumn (FTexture *tex, int col) // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { + // Save a copy when switching to true color mode as the assembly palette drawers might change them + static bool pointers_saved = false; + static DWORD(*dovline1_saved)(); + static DWORD(*doprevline1_saved)(); + static DWORD(*domvline1_saved)(); + static void(*dovline4_saved)(); + static void(*domvline4_saved)(); + if (r_swtruecolor) { R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; @@ -4201,6 +4208,16 @@ void R_InitColumnDrawers () rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; + if (!pointers_saved) + { + pointers_saved = true; + dovline1_saved = dovline1; + doprevline1_saved = doprevline1; + domvline1_saved = domvline1; + dovline4_saved = dovline4; + domvline4_saved = domvline4; + } + dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; dovline4 = vlinec4_RGBA; @@ -4304,7 +4321,27 @@ void R_InitColumnDrawers () rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; rt_initcols = rt_initcols_pal; + + if (pointers_saved) + { + pointers_saved = false; + dovline1 = dovline1_saved; + doprevline1 = doprevline1_saved; + domvline1 = domvline1_saved; + dovline4 = dovline4_saved; + domvline4 = domvline4_saved; + } } + + colfunc = basecolfunc = R_DrawColumn; + fuzzcolfunc = R_DrawFuzzColumn; + transcolfunc = R_DrawTranslatedColumn; + spanfunc = R_DrawSpan; + + // [RH] Horizontal column drawers + hcolfunc_pre = R_DrawColumnHoriz; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; } // [RH] Choose column drawers in a single place diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 485ed7ab31..e47590c72a 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -47,8 +47,6 @@ #include "r_things.h" #include "v_video.h" -EXTERN_CVAR(Bool, r_swtruecolor) - // I should have commented this stuff better. // // dc_temp is the buffer R_DrawColumnHoriz writes into. diff --git a/src/r_main.cpp b/src/r_main.cpp index 9dc61eea32..aec8310d52 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -103,7 +103,8 @@ bool r_dontmaplines; CVAR (String, r_viewsize, "", CVAR_NOSET) CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) -CVAR (Bool, r_swtruecolor, false, CVAR_ARCHIVE) + +bool r_swtruecolor; double r_BaseVisibility; double r_WallVisibility; @@ -398,16 +399,6 @@ void R_InitRenderer() R_InitPlanes (); R_InitShadeMaps(); R_InitColumnDrawers (); - - colfunc = basecolfunc = R_DrawColumn; - fuzzcolfunc = R_DrawFuzzColumn; - transcolfunc = R_DrawTranslatedColumn; - spanfunc = R_DrawSpan; - - // [RH] Horizontal column drawers - hcolfunc_pre = R_DrawColumnHoriz; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; } //========================================================================== @@ -962,6 +953,13 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) { const bool savedviewactive = viewactive; + const bool savedoutputformat = r_swtruecolor; + + if (r_swtruecolor != canvas->IsBgra()) + { + r_swtruecolor = canvas->IsBgra(); + R_InitColumnDrawers(); + } viewwidth = width; RenderTarget = canvas; @@ -980,7 +978,15 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, screen->Lock (true); R_SetupBuffer (); screen->Unlock (); + viewactive = savedviewactive; + r_swtruecolor = savedoutputformat; + + if (r_swtruecolor != canvas->IsBgra()) + { + r_swtruecolor = canvas->IsBgra(); + R_InitColumnDrawers(); + } } //========================================================================== diff --git a/src/r_main.h b/src/r_main.h index c1034ea3eb..765635e5da 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -106,6 +106,8 @@ inline uint32_t shade_pal_index(uint32_t index, uint32_t light) return 0xff000000 | (red << 16) | (green << 8) | blue; } +extern bool r_swtruecolor; + extern double GlobVis; void R_SetVisibility(double visibility); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 8c71f0fb73..cab97adfca 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -61,8 +61,6 @@ CVAR(Bool, r_np2, true, 0) //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) -EXTERN_CVAR(Bool, r_swtruecolor) - #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 433007acb1..15e2fda8f0 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -155,6 +155,12 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap &act void FSoftwareRenderer::RenderView(player_t *player) { + if (r_swtruecolor != screen->IsBgra()) + { + r_swtruecolor = screen->IsBgra(); + R_InitColumnDrawers(); + } + R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); @@ -182,8 +188,7 @@ void FSoftwareRenderer::RemapVoxels() void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, int height) { -#ifdef PALETTEOUTPUT - DCanvas *pic = new DSimpleCanvas (width, height); + DCanvas *pic = new DSimpleCanvas (width, height, false); PalEntry palette[256]; // Take a snapshot of the player's view @@ -196,7 +201,6 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, i pic->Destroy(); pic->ObjectFlags |= OF_YesReallyDelete; delete pic; -#endif } //=========================================================================== @@ -313,7 +317,6 @@ void FSoftwareRenderer::CopyStackedViewParameters() void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { -#ifdef PALETTEOUTPUT BYTE *Pixels = const_cast(tex->GetPixels()); DSimpleCanvas *Canvas = tex->GetCanvas(); @@ -337,7 +340,6 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin tex->SetUpdated(); fixedcolormap = savecolormap; realfixedcolormap = savecm; -#endif } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index 2abcc0e12f..f52c80376c 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -98,7 +98,6 @@ EXTERN_CVAR (Bool, st_scale) EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, r_drawfuzz) EXTERN_CVAR(Bool, r_deathcamera); -EXTERN_CVAR(Bool, r_swtruecolor) // // Sprite rotation 0 is facing the viewer, diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index d1f70439f4..7242149a40 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -103,7 +103,7 @@ const BYTE *FCanvasTexture::GetPixels () void FCanvasTexture::MakeTexture () { - Canvas = new DSimpleCanvas (Width, Height); + Canvas = new DSimpleCanvas (Width, Height, false); Canvas->Lock (); GC::AddSoftRoot(Canvas); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index ed6571ad36..b4f1ad4b53 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -77,8 +77,6 @@ extern "C" short spanend[MAXHEIGHT]; CVAR (Bool, hud_scale, false, CVAR_ARCHIVE); -EXTERN_CVAR(Bool, r_swtruecolor) - // For routines that take RGB colors, cache the previous lookup in case there // are several repetitions with the same color. static int LastPal = -1; @@ -1019,7 +1017,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) oldyyshifted = yy * GetPitch(); } - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; @@ -1091,7 +1089,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real { swapvalues (x0, x1); } - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; for (int i = 0; i <= deltaX; i++) @@ -1104,7 +1102,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } else if (deltaX == 0) { // vertical line - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; int pitch = GetPitch(); @@ -1127,7 +1125,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } else if (deltaX == deltaY) { // diagonal line. - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; int advance = GetPitch() + xDir; @@ -1295,7 +1293,7 @@ void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uin palcolor = PalFromRGB(color); } - if (r_swtruecolor) + if (IsBgra()) { uint32_t *dest = (uint32_t*)Buffer + top * Pitch + left; x = right - left; @@ -1502,7 +1500,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) const { - if (r_swtruecolor) + if (IsBgra()) return; int srcpitch = _width; @@ -1531,7 +1529,7 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) // void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const { - if (r_swtruecolor) + if (IsBgra()) return; const BYTE *src; diff --git a/src/v_video.cpp b/src/v_video.cpp index 01043b8bc9..bc99edbf10 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -65,8 +65,6 @@ #include "menu/menu.h" #include "r_data/voxels.h" -EXTERN_CVAR(Bool, r_swtruecolor) - FRenderer *Renderer; IMPLEMENT_ABSTRACT_CLASS (DCanvas) @@ -83,7 +81,7 @@ class DDummyFrameBuffer : public DFrameBuffer DECLARE_CLASS (DDummyFrameBuffer, DFrameBuffer); public: DDummyFrameBuffer (int width, int height) - : DFrameBuffer (0, 0) + : DFrameBuffer (0, 0, false) { Width = width; Height = height; @@ -208,13 +206,14 @@ DCanvas *DCanvas::CanvasChain = NULL; // //========================================================================== -DCanvas::DCanvas (int _width, int _height) +DCanvas::DCanvas (int _width, int _height, bool _bgra) { // Init member vars Buffer = NULL; LockCount = 0; Width = _width; Height = _height; + Bgra = _bgra; // Add to list of active canvases Next = CanvasChain; @@ -366,7 +365,7 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) gap = Pitch - w; - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)Buffer + x1 + y1*Pitch; @@ -448,7 +447,7 @@ void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &colo Lock(true); buffer = GetBuffer(); pitch = GetPitch(); - color_type = r_swtruecolor ? SS_BGRA : SS_PAL; + color_type = IsBgra() ? SS_BGRA : SS_PAL; } //========================================================================== @@ -761,8 +760,8 @@ void DCanvas::CalcGamma (float gamma, BYTE gammalookup[256]) // //========================================================================== -DSimpleCanvas::DSimpleCanvas (int width, int height) - : DCanvas (width, height) +DSimpleCanvas::DSimpleCanvas (int width, int height, bool bgra) + : DCanvas (width, height, bgra) { // Making the pitch a power of 2 is very bad for performance // Try to maximize the number of cache lines that can be filled @@ -799,8 +798,9 @@ DSimpleCanvas::DSimpleCanvas (int width, int height) Pitch = width + MAX(0, CPU.DataL1LineSize - 8); } } - MemBuffer = new BYTE[Pitch * height * 4]; - memset (MemBuffer, 0, Pitch * height * 4); + int bytes_per_pixel = bgra ? 4 : 1; + MemBuffer = new BYTE[Pitch * height * bytes_per_pixel]; + memset (MemBuffer, 0, Pitch * height * bytes_per_pixel); } //========================================================================== @@ -869,8 +869,8 @@ void DSimpleCanvas::Unlock () // //========================================================================== -DFrameBuffer::DFrameBuffer (int width, int height) - : DSimpleCanvas (width, height) +DFrameBuffer::DFrameBuffer (int width, int height, bool bgra) + : DSimpleCanvas (width, height, bgra) { LastMS = LastSec = FrameCount = LastCount = LastTic = 0; Accel2D = false; @@ -927,7 +927,7 @@ void DFrameBuffer::DrawRateStuff () // Buffer can be NULL if we're doing hardware accelerated 2D if (buffer != NULL) { - if (r_swtruecolor) + if (IsBgra()) { uint32_t *buffer32 = (uint32_t*)buffer; buffer32 += (GetHeight() - 1) * GetPitch(); diff --git a/src/v_video.h b/src/v_video.h index fa1ce83df0..120beff9a9 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -185,7 +185,7 @@ class DCanvas : public DObject { DECLARE_ABSTRACT_CLASS (DCanvas, DObject) public: - DCanvas (int width, int height); + DCanvas (int width, int height, bool bgra); virtual ~DCanvas (); // Member variable access @@ -193,6 +193,7 @@ public: inline int GetWidth () const { return Width; } inline int GetHeight () const { return Height; } inline int GetPitch () const { return Pitch; } + inline bool IsBgra() const { return Bgra; } virtual bool IsValid (); @@ -267,6 +268,7 @@ protected: int Height; int Pitch; int LockCount; + bool Bgra; bool ClipBox (int &left, int &top, int &width, int &height, const BYTE *&src, const int srcpitch) const; void DrawTextureV(FTexture *img, double x, double y, uint32 tag, va_list tags) = delete; @@ -289,7 +291,7 @@ class DSimpleCanvas : public DCanvas { DECLARE_CLASS (DSimpleCanvas, DCanvas) public: - DSimpleCanvas (int width, int height); + DSimpleCanvas (int width, int height, bool bgra); ~DSimpleCanvas (); bool IsValid (); @@ -327,7 +329,7 @@ class DFrameBuffer : public DSimpleCanvas { DECLARE_ABSTRACT_CLASS (DFrameBuffer, DSimpleCanvas) public: - DFrameBuffer (int width, int height); + DFrameBuffer (int width, int height, bool bgra); // Force the surface to use buffered output if true is passed. virtual bool Lock (bool buffered) = 0; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 0cd847b978..fd84e3bbb7 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -187,7 +187,6 @@ EXTERN_CVAR (Float, Gamma) EXTERN_CVAR (Bool, vid_vsync) EXTERN_CVAR (Float, transsouls) EXTERN_CVAR (Int, vid_refreshrate) -EXTERN_CVAR (Bool, r_swtruecolor) extern IDirect3D9 *D3D; @@ -243,8 +242,8 @@ CVAR(Bool, vid_hwaalines, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) // //========================================================================== -D3DFB::D3DFB (UINT adapter, int width, int height, bool fullscreen) - : BaseWinFB (width, height) +D3DFB::D3DFB (UINT adapter, int width, int height, bool bgra, bool fullscreen) + : BaseWinFB (width, height, bgra) { D3DPRESENT_PARAMETERS d3dpp; @@ -766,7 +765,7 @@ void D3DFB::KillNativeTexs() bool D3DFB::CreateFBTexture () { - FBFormat = r_swtruecolor ? D3DFMT_A8R8G8B8 : D3DFMT_L8; + FBFormat = IsBgra() ? D3DFMT_A8R8G8B8 : D3DFMT_L8; if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL))) { @@ -1307,7 +1306,7 @@ void D3DFB::Draw3DPart(bool copy3d) SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) || SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0))) { - if (r_swtruecolor && FBFormat == D3DFMT_A8R8G8B8) + if (IsBgra() && FBFormat == D3DFMT_A8R8G8B8) { if (lockrect.Pitch == Pitch * sizeof(uint32_t) && Pitch == Width) { @@ -1325,7 +1324,7 @@ void D3DFB::Draw3DPart(bool copy3d) } } } - else if (!r_swtruecolor && FBFormat == D3DFMT_L8) + else if (!IsBgra() && FBFormat == D3DFMT_L8) { if (lockrect.Pitch == Pitch && Pitch == Width) { @@ -1377,7 +1376,7 @@ void D3DFB::Draw3DPart(bool copy3d) memset(Constant, 0, sizeof(Constant)); SetAlphaBlend(D3DBLENDOP(0)); EnableAlphaTest(FALSE); - if (r_swtruecolor) + if (IsBgra()) SetPixelShader(Shaders[SHADER_NormalColor]); else SetPixelShader(Shaders[SHADER_NormalColorPal]); @@ -1398,7 +1397,7 @@ void D3DFB::Draw3DPart(bool copy3d) realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); - if (r_swtruecolor) + if (IsBgra()) SetPixelShader(Shaders[SHADER_SpecialColormap]); else SetPixelShader(Shaders[SHADER_SpecialColormapPal]); @@ -1412,7 +1411,7 @@ void D3DFB::Draw3DPart(bool copy3d) CalcFullscreenCoords(verts, Accel2D, false, color0, color1); D3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX)); } - if (r_swtruecolor) + if (IsBgra()) SetPixelShader(Shaders[SHADER_NormalColor]); else SetPixelShader(Shaders[SHADER_NormalColorPal]); diff --git a/src/win32/fb_ddraw.cpp b/src/win32/fb_ddraw.cpp index fbdf035a3a..5637e96956 100644 --- a/src/win32/fb_ddraw.cpp +++ b/src/win32/fb_ddraw.cpp @@ -60,9 +60,7 @@ // TYPES ------------------------------------------------------------------- -#ifdef USE_OBSOLETE_DDRAW IMPLEMENT_CLASS(DDrawFB) -#endif // EXTERNAL FUNCTION PROTOTYPES -------------------------------------------- @@ -120,10 +118,8 @@ cycle_t BlitCycles; // CODE -------------------------------------------------------------------- -#ifdef USE_OBSOLETE_DDRAW - DDrawFB::DDrawFB (int width, int height, bool fullscreen) - : BaseWinFB (width, height) + : BaseWinFB (width, height, false) { int i; @@ -1330,7 +1326,6 @@ void DDrawFB::Blank () PrimarySurf->Blt (NULL, NULL, NULL, DDBLT_COLORFILL, &blitFX); } } -#endif ADD_STAT (blit) { diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 8cc7705569..8856924c00 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -51,6 +51,7 @@ EXTERN_CVAR (Bool, ticker) EXTERN_CVAR (Bool, fullscreen) +EXTERN_CVAR (Bool, swtruecolor) EXTERN_CVAR (Float, vid_winscale) CVAR(Int, win_x, -1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) @@ -146,7 +147,7 @@ DFrameBuffer *I_SetMode (int &width, int &height, DFrameBuffer *old) } break; } - DFrameBuffer *res = Video->CreateFrameBuffer (width, height, fs, old); + DFrameBuffer *res = Video->CreateFrameBuffer (width, height, swtruecolor, fs, old); /* Right now, CreateFrameBuffer cannot return NULL if (res == NULL) @@ -310,6 +311,8 @@ void I_RestoreWindowedPos () MoveWindow (Window, winx, winy, winw, winh, TRUE); } +CVAR (Bool, swtruecolor, false, CVAR_ARCHIVE) + extern int NewWidth, NewHeight, NewBits, DisplayBits; CUSTOM_CVAR (Bool, fullscreen, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) diff --git a/src/win32/hardware.h b/src/win32/hardware.h index b2bafef322..184eeccf54 100644 --- a/src/win32/hardware.h +++ b/src/win32/hardware.h @@ -45,7 +45,7 @@ class IVideo virtual EDisplayType GetDisplayType () = 0; virtual void SetWindowedScale (float scale) = 0; - virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old) = 0; + virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old) = 0; virtual void StartModeIterator (int bits, bool fs) = 0; virtual bool NextMode (int *width, int *height, bool *letterbox) = 0; diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index 0b3333d639..d30475eb37 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -70,7 +70,7 @@ class Win32Video : public IVideo EDisplayType GetDisplayType () { return DISPLAY_Both; } void SetWindowedScale (float scale); - DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old); + DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old); void StartModeIterator (int bits, bool fs); bool NextMode (int *width, int *height, bool *letterbox); @@ -121,7 +121,7 @@ class BaseWinFB : public DFrameBuffer { DECLARE_ABSTRACT_CLASS(BaseWinFB, DFrameBuffer) public: - BaseWinFB (int width, int height) : DFrameBuffer (width, height), Windowed (true) {} + BaseWinFB (int width, int height, bool bgra) : DFrameBuffer (width, height, bgra), Windowed (true) {} bool IsFullscreen () { return !Windowed; } virtual void Blank () = 0; @@ -142,7 +142,6 @@ protected: BaseWinFB() {} }; -#ifdef USE_OBSOLETE_DDRAW class DDrawFB : public BaseWinFB { DECLARE_CLASS(DDrawFB, BaseWinFB) @@ -224,13 +223,12 @@ private: DDrawFB() {} }; -#endif class D3DFB : public BaseWinFB { DECLARE_CLASS(D3DFB, BaseWinFB) public: - D3DFB (UINT adapter, int width, int height, bool fullscreen); + D3DFB (UINT adapter, int width, int height, bool bgra, bool fullscreen); ~D3DFB (); bool IsValid (); diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index 3f3645d0bf..5b2d5ef208 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -222,6 +222,13 @@ bool Win32Video::InitD3D9 () // Enumerate available display modes. FreeModes (); #ifndef PALETTEOUTPUT // To do: remove this again (AddD3DModes fails when there are too many modes available for videomenu to display) + + AddMode(320, 200, 8, 200, 0); + AddMode(320, 240, 8, 240, 0); + AddMode(640, 480, 8, 480, 0); + AddMode(800, 600, 8, 600, 0); + AddMode(1024, 768, 8, 768, 0); + AddMode(1920, 1080, 8, 1440, 0); // 1080p AddMode(1920*2, 1080*2, 8, 1440, 0); // 4k AddMode(2560, 1440, 8, 1440, 0); // 27" classic @@ -636,7 +643,7 @@ bool Win32Video::NextMode (int *width, int *height, bool *letterbox) return false; } -DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old) +DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old) { static int retry = 0; static int owidth, oheight; @@ -652,7 +659,8 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr BaseWinFB *fb = static_cast (old); if (fb->Width == width && fb->Height == height && - fb->Windowed == !fullscreen) + fb->Windowed == !fullscreen && + fb->Bgra == bgra) { return old; } @@ -667,13 +675,9 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr flashAmount = 0; } -#ifndef USE_OBSOLETE_DDRAW - fb = new D3DFB(m_Adapter, width, height, fullscreen); - LOG1("New fb created @ %p\n", fb); -#else if (D3D != NULL) { - fb = new D3DFB (m_Adapter, width, height, fullscreen); + fb = new D3DFB (m_Adapter, width, height, bgra, fullscreen); } else { @@ -738,10 +742,9 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr } ++retry; - fb = static_cast(CreateFrameBuffer (width, height, fullscreen, NULL)); + fb = static_cast(CreateFrameBuffer (width, height, bgra, fullscreen, NULL)); } retry = 0; -#endif fb->SetFlash (flashColor, flashAmount); return fb; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 93e33ce799..3c712de967 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -661,7 +661,7 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_VSYNC", "vid_vsync", "OnOff" Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn" Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" - Option "$DSPLYMNU_TRUECOLOR", "r_swtruecolor", "OnOff" + Option "$DSPLYMNU_TRUECOLOR", "swtruecolor", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From e929eec80f688f2afbd4a27ade847282aad9622d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Jun 2016 05:28:14 +0200 Subject: [PATCH 007/912] Make x86 asm aware of swtruecolor --- src/doomtype.h | 5 + src/r_draw.cpp | 243 +++++++++++++++++++++++++++++++++------ src/r_draw.h | 1 + src/r_drawt.cpp | 25 ++-- src/r_plane.cpp | 41 ++++--- src/win32/win32video.cpp | 4 +- 6 files changed, 256 insertions(+), 63 deletions(-) diff --git a/src/doomtype.h b/src/doomtype.h index 39c59751d3..9fca870d33 100644 --- a/src/doomtype.h +++ b/src/doomtype.h @@ -99,6 +99,11 @@ typedef TMap FClassMap; #endif +// Only use SSE intrinsics on Intel architecture +#if !defined(_M_IX86) && !defined(__i386__) && !defined(_M_X64) && !defined(__amd64__) +#define NO_SSE +#endif + #if defined(_MSC_VER) #define NOVTABLE __declspec(novtable) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 82169ec6fc..d7b7409739 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1789,7 +1789,7 @@ void R_SetSpanSource(const BYTE *pixels) { ds_source = pixels; #ifdef X86_ASM - if (ds_cursource != ds_source) + if (!r_swtruecolor && ds_cursource != ds_source) { R_SetSpanSource_ASM(pixels); } @@ -1809,7 +1809,7 @@ void R_SetSpanColormap(BYTE *colormap) ds_colormap = colormap; ds_light = 0; #ifdef X86_ASM - if (ds_colormap != ds_curcolormap) + if (!r_swtruecolor && ds_colormap != ds_curcolormap) { R_SetSpanColormap_ASM (ds_colormap); } @@ -1838,7 +1838,8 @@ void R_SetupSpanBits(FTexture *tex) ds_ybits--; } #ifdef X86_ASM - R_SetSpanSize_ASM (ds_xbits, ds_ybits); + if (!r_swtruecolor) + R_SetSpanSize_ASM (ds_xbits, ds_ybits); #endif } @@ -1954,7 +1955,80 @@ void R_DrawSpanP_RGBA_C() { // 64x64 is the most common case by far, so special case it. + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = shade_pal_index(colormap[source[spot]], light); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = shade_pal_index(colormap[source[spot]], light); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + #ifndef NO_SSE +void R_DrawSpanP_RGBA_SSE() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + +#ifdef RANGECHECK + if (ds_x2 < ds_x1 || ds_x1 < 0 + || ds_x2 >= screen->width || ds_y > screen->height) + { + I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); + } + // dscount++; +#endif + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -2000,7 +2074,6 @@ void R_DrawSpanP_RGBA_C() } if (count == 0) return; -#endif do { @@ -2037,6 +2110,7 @@ void R_DrawSpanP_RGBA_C() } while (--count); } } +#endif #ifndef X86_ASM @@ -2971,6 +3045,12 @@ void (*domvline4)() = mvlineasm4; void setupvline (int fracbits) { + if (r_swtruecolor) + { + vlinebits = fracbits; + return; + } + #ifdef X86_ASM if (CPU.Family <= 5) { @@ -3075,23 +3155,43 @@ void vlinec4_RGBA() uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = vlinebits; + DWORD place; uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + do + { + dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); +} + #ifndef NO_SSE +void vlinec4_RGBA_SSE() +{ + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; -#endif do { -#ifndef NO_SSE - DWORD place0 = local_vplce[0]; DWORD place1 = local_vplce[1]; DWORD place2 = local_vplce[2]; @@ -3116,17 +3216,9 @@ void vlinec4_RGBA() fg_lo = _mm_srli_epi16(fg_lo, 8); fg = _mm_packus_epi16(fg_lo, fg_hi); _mm_storeu_si128((__m128i*)dest, fg); - -#else - dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; -#endif dest += dc_pitch; } while (--count); -#ifndef NO_SSE // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; vplce[1] = local_vplce[1]; @@ -3136,18 +3228,25 @@ void vlinec4_RGBA() vince[1] = local_vince[1]; vince[2] = local_vince[2]; vince[3] = local_vince[3]; -#endif } +#endif void setupmvline (int fracbits) { + if (!r_swtruecolor) + { #if defined(X86_ASM) - setupmvlineasm (fracbits); - domvline1 = mvlineasm1; - domvline4 = mvlineasm4; + setupmvlineasm(fracbits); + domvline1 = mvlineasm1; + domvline4 = mvlineasm4; #else - mvlinebits = fracbits; + mvlinebits = fracbits; #endif + } + else + { + mvlinebits = fracbits; + } } #if !defined(X86_ASM) @@ -3247,6 +3346,73 @@ void mvlinec4_RGBA() } while (--count); } +#ifndef NO_SSE +void mvlinec4_RGBA_SSE() +{ + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + BYTE p0 = palookupoffse[0][pix0]; + BYTE p1 = palookupoffse[1][pix1]; + BYTE p2 = palookupoffse[2][pix2]; + BYTE p3 = palookupoffse[3][pix3]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); + fg_lo = _mm_srli_epi16(fg_lo, 8); + fg = _mm_packus_epi16(fg_lo, fg_hi); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + + // Is this needed? Global variables makes it tricky to know.. + vplce[0] = local_vplce[0]; + vplce[1] = local_vplce[1]; + vplce[2] = local_vplce[2]; + vplce[3] = local_vplce[3]; + vince[0] = local_vince[0]; + vince[1] = local_vince[1]; + vince[2] = local_vince[2]; + vince[3] = local_vince[3]; +} +#endif + extern "C" short spanend[MAXHEIGHT]; extern float rw_light; @@ -4138,14 +4304,28 @@ void R_InitColumnDrawers () if (r_swtruecolor) { + if (!pointers_saved) + { + pointers_saved = true; + dovline1_saved = dovline1; + doprevline1_saved = doprevline1; + domvline1_saved = domvline1; + dovline4_saved = dovline4; + domvline4_saved = domvline4; + } + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; R_DrawColumn = R_DrawColumnP_RGBA_C; R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; - R_DrawSpan = R_DrawSpanP_RGBA_C; R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; rt_map4cols = rt_map4cols_RGBA_c; +#ifndef NO_SSE + R_DrawSpan = R_DrawSpanP_RGBA_SSE; +#else + R_DrawSpan = R_DrawSpanP_RGBA_C; +#endif R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; @@ -4208,21 +4388,18 @@ void R_InitColumnDrawers () rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; - if (!pointers_saved) - { - pointers_saved = true; - dovline1_saved = dovline1; - doprevline1_saved = doprevline1; - domvline1_saved = domvline1; - dovline4_saved = dovline4; - domvline4_saved = domvline4; - } - dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; - dovline4 = vlinec4_RGBA; domvline1 = mvlinec1_RGBA; - domvline4 = mvlinec4_RGBA; + +#ifndef NO_SSE + dovline4 = vlinec4_RGBA_SSE; + domvline4 = mvlinec4_RGBA_SSE; +#else + dovline4 = vlinec4_RGBA; + domvline4 = mvlinec4_RGBA; +#endif + } else { diff --git a/src/r_draw.h b/src/r_draw.h index 2348914b6e..d5007c885c 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -269,6 +269,7 @@ void R_DrawFuzzColumnP_RGBA_C (void); void R_DrawTranslatedColumnP_RGBA_C (void); void R_DrawShadedColumnP_RGBA_C (void); void R_DrawSpanP_RGBA_C (void); +void R_DrawSpanP_RGBA_SSE (void); void R_DrawSpanMaskedP_RGBA_C (void); #endif diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index e47590c72a..ca6862ed60 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -860,18 +860,21 @@ void rt_draw4cols (int sx) } #ifdef X86_ASM - // Setup assembly routines for changed colormaps or other parameters. - if (hcolfunc_post4 == rt_shaded4cols) + if (!r_swtruecolor) { - R_SetupShadedCol(); - } - else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols) - { - R_SetupAddClampCol(); - } - else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols) - { - R_SetupAddCol(); + // Setup assembly routines for changed colormaps or other parameters. + if (hcolfunc_post4 == rt_shaded4cols) + { + R_SetupShadedCol(); + } + else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols) + { + R_SetupAddClampCol(); + } + else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols) + { + R_SetupAddCol(); + } } #endif diff --git a/src/r_plane.cpp b/src/r_plane.cpp index a71590c9d4..40e14c0200 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -231,7 +231,7 @@ void R_MapPlane (int y, int x1) } #ifdef X86_ASM - if (ds_colormap != ds_curcolormap) + if (!r_swtruecolor && ds_colormap != ds_curcolormap) R_SetSpanColormap_ASM (ds_colormap); #endif @@ -1620,7 +1620,7 @@ void R_DrawSkyPlane (visplane_t *pl) void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { #ifdef X86_ASM - if (ds_source != ds_cursource) + if (!r_swtruecolor && ds_source != ds_cursource) { R_SetSpanSource_ASM (ds_source); } @@ -1747,7 +1747,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t // //========================================================================== -void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) +void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { static const float ifloatpow2[16] = { @@ -1782,7 +1782,7 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t // p is the texture origin in view space // Don't add in the offsets at this stage, because doing so can result in // errors if the flat is rotated. - ang = M_PI*3/2 - ViewAngle.Radians(); + ang = M_PI * 3 / 2 - ViewAngle.Radians(); cosine = cos(ang), sine = sin(ang); p[0] = ViewPos.X * cosine - ViewPos.Y * sine; p[2] = ViewPos.X * sine + ViewPos.Y * cosine; @@ -1793,25 +1793,25 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t cosine = cos(ang), sine = sin(ang); m[0] = yscale * cosine; m[2] = yscale * sine; -// m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); -// VectorScale2 (m, 64.f/VectorLength(m)); + // m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); + // VectorScale2 (m, 64.f/VectorLength(m)); - // n is the u direction vector in view space + // n is the u direction vector in view space #if 0 //let's use the sin/cosine we already know instead of computing new ones - ang += M_PI/2 - n[0] = -xscale * cos(ang); + ang += M_PI / 2 + n[0] = -xscale * cos(ang); n[2] = -xscale * sin(ang); #else n[0] = xscale * sine; n[2] = -xscale * cosine; #endif -// n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); -// VectorScale2 (n, 64.f/VectorLength(n)); + // n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); + // VectorScale2 (n, 64.f/VectorLength(n)); - // This code keeps the texture coordinates constant across the x,y plane no matter - // how much you slope the surface. Use the commented-out code above instead to keep - // the textures a constant size across the surface's plane instead. + // This code keeps the texture coordinates constant across the x,y plane no matter + // how much you slope the surface. Use the commented-out code above instead to keep + // the textures a constant size across the surface's plane instead. cosine = cos(planeang), sine = sin(planeang); m[1] = pl->height.ZatPoint(ViewPos.X + yscale * sine, ViewPos.Y + yscale * cosine) - zeroheight; n[1] = pl->height.ZatPoint(ViewPos.X - xscale * cosine, ViewPos.Y + xscale * sine) - zeroheight; @@ -1861,9 +1861,16 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t } #if defined(X86_ASM) - if (ds_source != ds_curtiltedsource) - R_SetTiltedSpanSource_ASM (ds_source); - R_MapVisPlane (pl, R_DrawTiltedPlane_ASM); + if (!r_swtruecolor) + { + if (ds_source != ds_curtiltedsource) + R_SetTiltedSpanSource_ASM(ds_source); + R_MapVisPlane(pl, R_DrawTiltedPlane_ASM); + } + else + { + R_MapVisPlane(pl, R_MapTiltedPlane); + } #else R_MapVisPlane (pl, R_MapTiltedPlane); #endif diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index 5b2d5ef208..a180a35ea2 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -229,8 +229,8 @@ bool Win32Video::InitD3D9 () AddMode(800, 600, 8, 600, 0); AddMode(1024, 768, 8, 768, 0); - AddMode(1920, 1080, 8, 1440, 0); // 1080p - AddMode(1920*2, 1080*2, 8, 1440, 0); // 4k + AddMode(1920, 1080, 8, 1080, 0); // 1080p + AddMode(1920*2, 1080*2, 8, 1080*2, 0); // 4k AddMode(2560, 1440, 8, 1440, 0); // 27" classic AddMode(2560*2, 1440*2, 8, 1440*2, 0); // 5k #else From b9d7a98aeceac8987917db03c9eecce50b1b4abd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Jun 2016 06:02:37 +0200 Subject: [PATCH 008/912] Change swtruecolor cvar to take effect immediately --- src/posix/cocoa/i_video.mm | 11 ++++++++++- src/posix/sdl/hardware.cpp | 12 ++++++++++-- src/win32/hardware.cpp | 12 ++++++++++-- src/win32/win32video.cpp | 14 -------------- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index c97460a022..c2eb58c6d9 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -96,7 +96,16 @@ EXTERN_CVAR(Bool, ticker ) EXTERN_CVAR(Bool, vid_vsync) EXTERN_CVAR(Bool, vid_hidpi) -CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE) +CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +{ + // Strictly speaking this doesn't require a mode switch, but it is the easiest + // way to force a CreateFramebuffer call without a lot of refactoring. + extern int NewWidth, NewHeight, NewBits, DisplayBits; + NewWidth = screen->GetWidth(); + NewHeight = screen->GetHeight(); + NewBits = DisplayBits; + setmodeneeded = true; +} CUSTOM_CVAR(Bool, fullscreen, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) { diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index 52bca35e7f..9de4d03a49 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -281,10 +281,18 @@ CUSTOM_CVAR (Int, vid_maxfps, 200, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) } } -CVAR (Bool, swtruecolor, false, CVAR_ARCHIVE) - extern int NewWidth, NewHeight, NewBits, DisplayBits; +CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) +{ + // Strictly speaking this doesn't require a mode switch, but it is the easiest + // way to force a CreateFramebuffer call without a lot of refactoring. + NewWidth = screen->GetWidth(); + NewHeight = screen->GetHeight(); + NewBits = DisplayBits; + setmodeneeded = true; +} + CUSTOM_CVAR (Bool, fullscreen, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) { NewWidth = screen->GetWidth(); diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 8856924c00..49c970457b 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -311,10 +311,18 @@ void I_RestoreWindowedPos () MoveWindow (Window, winx, winy, winw, winh, TRUE); } -CVAR (Bool, swtruecolor, false, CVAR_ARCHIVE) - extern int NewWidth, NewHeight, NewBits, DisplayBits; +CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) +{ + // Strictly speaking this doesn't require a mode switch, but it is the easiest + // way to force a CreateFramebuffer call without a lot of refactoring. + NewWidth = screen->GetWidth(); + NewHeight = screen->GetHeight(); + NewBits = DisplayBits; + setmodeneeded = true; +} + CUSTOM_CVAR (Bool, fullscreen, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) { NewWidth = screen->GetWidth(); diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index a180a35ea2..74b10ef073 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -221,22 +221,8 @@ bool Win32Video::InitD3D9 () // Enumerate available display modes. FreeModes (); -#ifndef PALETTEOUTPUT // To do: remove this again (AddD3DModes fails when there are too many modes available for videomenu to display) - - AddMode(320, 200, 8, 200, 0); - AddMode(320, 240, 8, 240, 0); - AddMode(640, 480, 8, 480, 0); - AddMode(800, 600, 8, 600, 0); - AddMode(1024, 768, 8, 768, 0); - - AddMode(1920, 1080, 8, 1080, 0); // 1080p - AddMode(1920*2, 1080*2, 8, 1080*2, 0); // 4k - AddMode(2560, 1440, 8, 1440, 0); // 27" classic - AddMode(2560*2, 1440*2, 8, 1440*2, 0); // 5k -#else AddD3DModes (m_Adapter, D3DFMT_X8R8G8B8); AddD3DModes (m_Adapter, D3DFMT_R5G6B5); -#endif if (Args->CheckParm ("-2")) { // Force all modes to be pixel-doubled. ScaleModes (1); From 4f635983fcf791b52535b9c597bdc4e823fef635 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Jun 2016 08:54:39 +0200 Subject: [PATCH 009/912] Add bgra support to OS X target --- src/posix/cocoa/i_video.mm | 30 +++++++++++++++++++++--------- src/r_main.cpp | 1 - src/r_swrenderer.cpp | 1 + src/v_draw.cpp | 6 ++++++ 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index c2eb58c6d9..425fe58873 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -249,7 +249,7 @@ private: class CocoaFrameBuffer : public DFrameBuffer { public: - CocoaFrameBuffer(int width, int height, bool fullscreen); + CocoaFrameBuffer(int width, int height, bool bgra, bool fullscreen); ~CocoaFrameBuffer(); virtual bool Lock(bool buffer); @@ -536,7 +536,7 @@ DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, c if (NULL != old) { - if (width == m_width && height == m_height) + if (width == m_width && height == m_height && bgra == old->IsBgra()) { SetMode(width, height, fullscreen, vid_hidpi); return old; @@ -553,7 +553,7 @@ DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, c delete old; } - CocoaFrameBuffer* fb = new CocoaFrameBuffer(width, height, fullscreen); + CocoaFrameBuffer* fb = new CocoaFrameBuffer(width, height, bgra, fullscreen); fb->SetFlash(flashColor, flashAmount); SetMode(width, height, fullscreen, vid_hidpi); @@ -772,8 +772,8 @@ CocoaVideo* CocoaVideo::GetInstance() } -CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool fullscreen) -: DFrameBuffer(width, height, false) +CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool bgra, bool fullscreen) +: DFrameBuffer(width, height, bgra) , m_needPaletteUpdate(false) , m_gamma(0.0f) , m_needGammaUpdate(false) @@ -867,8 +867,18 @@ void CocoaFrameBuffer::Update() FlipCycles.Reset(); BlitCycles.Clock(); - GPfx.Convert(MemBuffer, Pitch, m_pixelBuffer, Width * BYTES_PER_PIXEL, - Width, Height, FRACUNIT, FRACUNIT, 0, 0); + if (IsBgra()) + { + for (int y = 0; y < Height; y++) + { + memcpy((uint32_t*)m_pixelBuffer + y * Width, (uint32_t*)MemBuffer + y * Pitch, Width * BYTES_PER_PIXEL); + } + } + else + { + GPfx.Convert(MemBuffer, Pitch, m_pixelBuffer, Width * BYTES_PER_PIXEL, + Width, Height, FRACUNIT, FRACUNIT, 0, 0); + } FlipCycles.Clock(); Flip(); @@ -1000,8 +1010,10 @@ void CocoaFrameBuffer::Flip() static const GLenum format = GL_ABGR_EXT; #endif // __LITTLE_ENDIAN__ - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, - Width, Height, 0, format, GL_UNSIGNED_BYTE, m_pixelBuffer); + if (IsBgra()) + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, GL_BGRA_EXT, GL_UNSIGNED_BYTE, m_pixelBuffer); + else + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, format, GL_UNSIGNED_BYTE, m_pixelBuffer); glBegin(GL_QUADS); glColor4f(1.0f, 1.0f, 1.0f, 1.0f); diff --git a/src/r_main.cpp b/src/r_main.cpp index aec8310d52..aaf8fc532a 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -984,7 +984,6 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, if (r_swtruecolor != canvas->IsBgra()) { - r_swtruecolor = canvas->IsBgra(); R_InitColumnDrawers(); } } diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 15e2fda8f0..ee6ac5fedb 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -58,6 +58,7 @@ void R_InitRenderer(); void FSoftwareRenderer::Init() { + r_swtruecolor = screen->IsBgra(); R_InitRenderer(); } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index b4f1ad4b53..57fac3cda5 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -129,6 +129,12 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; const BYTE *translation = NULL; + if (r_swtruecolor != IsBgra()) + { + r_swtruecolor = IsBgra(); + R_InitColumnDrawers(); + } + if (parms.masked) { spanptr = &spans; From 47f32d03cd2d0b8966361b49acb5d0b6de40b94b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 14:49:03 +0200 Subject: [PATCH 010/912] Fixed some light and blending functions for the true color mode --- src/r_draw.cpp | 283 +++++++++++++++++++++++++------------------ src/r_draw.h | 2 + src/r_drawt_rgba.cpp | 76 +++++++----- src/r_main.h | 13 +- src/r_plane.cpp | 48 ++++++-- src/r_segs.cpp | 41 +++---- src/r_things.cpp | 13 +- src/v_draw.cpp | 6 +- 8 files changed, 284 insertions(+), 198 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index d7b7409739..5a314e6400 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -155,6 +155,8 @@ int dc_color; // [RH] Color for column filler DWORD dc_srccolor; DWORD *dc_srcblend; // [RH] Source and destination DWORD *dc_destblend; // blending lookups +fixed_t dc_srcalpha; // Alpha value used by dc_srcblend +fixed_t dc_destalpha; // Alpha value used by dc_destblend // first pixel in a column (possibly virtual) const BYTE* dc_source; @@ -414,9 +416,10 @@ void R_FillAddColumn_RGBA_C() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; do { @@ -478,9 +481,10 @@ void R_FillAddClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; do { @@ -541,9 +545,10 @@ void R_FillSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; do { @@ -604,9 +609,10 @@ void R_FillRevSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; do { @@ -946,6 +952,9 @@ void R_DrawAddColumnP_RGBA_C() int pitch = dc_pitch; BYTE *colormap = dc_colormap; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], 0); @@ -958,9 +967,9 @@ void R_DrawAddColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1107,6 +1116,9 @@ void R_DrawTlatedAddColumnP_RGBA_C() const BYTE *source = dc_source; int pitch = dc_pitch; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); @@ -1119,9 +1131,9 @@ void R_DrawTlatedAddColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1276,6 +1288,9 @@ void R_DrawAddClampColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); @@ -1287,9 +1302,9 @@ void R_DrawAddClampColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1363,6 +1378,9 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); @@ -1374,9 +1392,9 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1447,6 +1465,9 @@ void R_DrawSubClampColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); @@ -1458,9 +1479,9 @@ void R_DrawSubClampColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1533,6 +1554,9 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); @@ -1544,9 +1568,9 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1616,6 +1640,8 @@ void R_DrawRevSubClampColumnP_RGBA_C() const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { @@ -1628,9 +1654,9 @@ void R_DrawRevSubClampColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1703,6 +1729,9 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); @@ -1714,9 +1743,9 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1806,8 +1835,7 @@ void R_SetSpanSource(const BYTE *pixels) void R_SetSpanColormap(BYTE *colormap) { - ds_colormap = colormap; - ds_light = 0; + R_SetDSColorMapLight(colormap, 0, 0); #ifdef X86_ASM if (!r_swtruecolor && ds_colormap != ds_curcolormap) { @@ -2316,8 +2344,6 @@ void R_DrawSpanTranslucentP_RGBA_C() const BYTE* colormap = ds_colormap; int count; int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2331,6 +2357,9 @@ void R_DrawSpanTranslucentP_RGBA_C() uint32_t light = calc_light_multiplier(ds_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -2347,9 +2376,9 @@ void R_DrawSpanTranslucentP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -2375,9 +2404,9 @@ void R_DrawSpanTranslucentP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -2474,11 +2503,12 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() const BYTE* colormap = ds_colormap; int count; int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; uint32_t light = calc_light_multiplier(ds_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2509,9 +2539,9 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -2542,9 +2572,9 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -2636,11 +2666,12 @@ void R_DrawSpanAddClampP_RGBA_C() const BYTE* colormap = ds_colormap; int count; int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; uint32_t light = calc_light_multiplier(ds_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2667,9 +2698,9 @@ void R_DrawSpanAddClampP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -2695,9 +2726,9 @@ void R_DrawSpanAddClampP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -2800,11 +2831,12 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() const BYTE* colormap = ds_colormap; int count; int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; uint32_t light = calc_light_multiplier(ds_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2835,9 +2867,9 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -2868,9 +2900,9 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -3468,8 +3500,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } - dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, (float)light, wallshade); for (--x; x >= x1; --x) { @@ -3494,8 +3525,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } rcolormap = lcolormap; - dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, (float)light, wallshade); } else { @@ -3594,8 +3624,7 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } - dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, (float)light, wallshade); for (--x; x >= x1; --x) { @@ -3620,8 +3649,7 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; - dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, (float)light, wallshade); } else { @@ -3717,11 +3745,11 @@ fixed_t tmvline1_add_RGBA() int bits = tmvlinebits; int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { BYTE pix = source[frac >> bits]; @@ -3736,9 +3764,9 @@ fixed_t tmvline1_add_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -3794,6 +3822,9 @@ void tmvline4_add_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; ++i) @@ -3810,9 +3841,9 @@ void tmvline4_add_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -3871,11 +3902,11 @@ fixed_t tmvline1_addclamp_RGBA() int bits = tmvlinebits; int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { BYTE pix = source[frac >> bits]; @@ -3890,9 +3921,9 @@ fixed_t tmvline1_addclamp_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -3947,6 +3978,9 @@ void tmvline4_addclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; ++i) @@ -3963,9 +3997,9 @@ void tmvline4_addclamp_RGBA() uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4023,6 +4057,9 @@ fixed_t tmvline1_subclamp_RGBA() uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { BYTE pix = source[frac >> bits]; @@ -4037,9 +4074,9 @@ fixed_t tmvline1_subclamp_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4093,6 +4130,9 @@ void tmvline4_subclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; ++i) @@ -4109,9 +4149,9 @@ void tmvline4_subclamp_RGBA() uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4169,6 +4209,9 @@ fixed_t tmvline1_revsubclamp_RGBA() uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { BYTE pix = source[frac >> bits]; @@ -4183,9 +4226,9 @@ fixed_t tmvline1_revsubclamp_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4233,15 +4276,15 @@ void tmvline4_revsubclamp_RGBA() int count = dc_count; int bits = tmvlinebits; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - uint32_t light[4]; light[0] = calc_light_multiplier(palookuplight[0]); light[1] = calc_light_multiplier(palookuplight[1]); light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; ++i) @@ -4258,9 +4301,9 @@ void tmvline4_revsubclamp_RGBA() uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4558,16 +4601,22 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) { dc_srcblend = Col2RGB8_Inverse[fglevel>>10]; dc_destblend = Col2RGB8_LessPrecision[bglevel>>10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; } else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) { dc_srcblend = Col2RGB8[fglevel>>10]; dc_destblend = Col2RGB8[bglevel>>10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; } else { dc_srcblend = Col2RGB8_LessPrecision[fglevel>>10]; dc_destblend = Col2RGB8_LessPrecision[bglevel>>10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; } switch (op) { @@ -4736,12 +4785,15 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, hcolfunc_post1 = rt_shaded1col; hcolfunc_post4 = rt_shaded4cols; dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; - dc_colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; + lighttable_t *colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; if (fixedlightlev >= 0 && fixedcolormap == NULL) { - dc_colormap += fixedlightlev; + R_SetColorMapLight(colormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + } + else + { + R_SetColorMapLight(colormap, 0, 0); } - dc_light = 0; return r_columnmethod ? DoDraw1 : DoDraw0; } @@ -4766,8 +4818,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; hcolfunc_pre = R_FillColumnHoriz; - dc_colormap = identitymap; - dc_light = 0; + R_SetColorMapLight(identitymap, 0, 0); } if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) diff --git a/src/r_draw.h b/src/r_draw.h index d5007c885c..f60b2299eb 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -41,6 +41,8 @@ extern "C" int dc_color; // [RH] For flat colors (no texturing) extern "C" DWORD dc_srccolor; extern "C" DWORD *dc_srcblend; extern "C" DWORD *dc_destblend; +extern "C" fixed_t dc_srcalpha; +extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 872cb4b89e..1725b80e4d 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -308,6 +308,9 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[*source], light); uint32_t fg_red = (fg >> 16) & 0xff; @@ -318,9 +321,9 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -350,6 +353,9 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; i++) { @@ -362,9 +368,9 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -496,6 +502,9 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[*source], light); uint32_t fg_red = (fg >> 16) & 0xff; @@ -506,9 +515,9 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; source += 4; @@ -537,6 +546,9 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; i++) { @@ -549,9 +561,9 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -595,6 +607,9 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[*source], light); uint32_t fg_red = (fg >> 16) & 0xff; @@ -605,9 +620,9 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; source += 4; @@ -636,6 +651,9 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; i++) { @@ -648,9 +666,9 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -688,8 +706,6 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; @@ -697,6 +713,9 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[*source], light); uint32_t fg_red = (fg >> 16) & 0xff; @@ -707,9 +726,9 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; source += 4; @@ -731,8 +750,6 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; @@ -740,6 +757,9 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; i++) { @@ -752,9 +772,9 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } diff --git a/src/r_main.h b/src/r_main.h index 765635e5da..e8be3c1a30 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -82,15 +82,18 @@ extern bool r_dontmaplines; // Change R_CalcTiltedLighting() when this changes. #define GETPALOOKUP(vis,shade) (clamp (((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis))))>>FRACBITS, 0, NUMCOLORMAPS-1)) -// Calculate the light multiplier for ds_light -// This is used instead of GETPALOOKUP when ds_colormap+dc_colormap is set to the base colormap -#define LIGHTSCALE(vis,shade) ((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis)))) +// Calculate the light multiplier for dc_light/ds_light +// This is used instead of GETPALOOKUP when ds_colormap/dc_colormap is set to the base colormap +// Returns a value between 0 and 1 in fixed point +#define LIGHTSCALE(vis,shade) FLOAT2FIXED(clamp((FIXED2DBL(shade) - (MIN(MAXLIGHTVIS,double(vis)))) / NUMCOLORMAPS, 0.0, (NUMCOLORMAPS-1)/(double)NUMCOLORMAPS)) + +// Converts fixedlightlev into a shade value +#define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) // calculates the light constant passed to the shade_pal_index function inline uint32_t calc_light_multiplier(dsfixed_t light) { - // the 0.70 multiplier shouldn't be needed - maybe the palette shades in doom weren't linear? - return (uint32_t)clamp((1.0 - FIXED2DBL(light) / MAXLIGHTVIS * 0.70) * 256 + 0.5, 0.0, 256.0); + return 256 - (light >> (FRACBITS - 8)); } // Calculates a ARGB8 color for the given palette index and light multiplier diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 40e14c0200..9805ab2006 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -391,8 +391,7 @@ void R_MapTiltedPlane_C (int y, int x1) u = SQWORD(uz*z) + pviewx; v = SQWORD(vz*z) + pviewy; - ds_colormap = tiltlighting[i]; - ds_light = 0; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; iz += plane_sz[0]; uz += plane_su[0]; @@ -515,8 +514,7 @@ void R_MapTiltedPlane_RGBA (int y, int x1) u = SQWORD(uz*z) + pviewx; v = SQWORD(vz*z) + pviewy; - ds_colormap = tiltlighting[i]; - ds_light = 0; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; iz += plane_sz[0]; uz += plane_su[0]; @@ -1595,14 +1593,13 @@ void R_DrawSkyPlane (visplane_t *pl) bool fakefixed = false; if (fixedcolormap) { - dc_colormap = fixedcolormap; - dc_light = 0; + R_SetColorMapLight(fixedcolormap, 0, 0); } else { fakefixed = true; - fixedcolormap = dc_colormap = NormalLight.Maps; - dc_light = 0; + fixedcolormap = NormalLight.Maps; + R_SetColorMapLight(fixedcolormap, 0, 0); } R_DrawSky (pl); @@ -1685,11 +1682,19 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t GlobVis = r_FloorVisibility / planeheight; ds_light = 0; if (fixedlightlev >= 0) - ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; + { + R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + plane_shade = false; + } else if (fixedcolormap) - ds_colormap = fixedcolormap, plane_shade = false; + { + R_SetDSColorMapLight(fixedcolormap, 0, 0); + plane_shade = false; + } else + { plane_shade = true; + } if (spanfunc != R_FillSpan) { @@ -1702,12 +1707,16 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t spanfunc = R_DrawSpanMaskedTranslucent; dc_srcblend = Col2RGB8[alpha>>10]; dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; } else { spanfunc = R_DrawSpanMaskedAddClamp; dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; } } else @@ -1724,12 +1733,16 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t spanfunc = R_DrawSpanTranslucent; dc_srcblend = Col2RGB8[alpha>>10]; dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; } else { spanfunc = R_DrawSpanAddClamp; dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; } } else @@ -1846,11 +1859,20 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a ds_light = 0; if (fixedlightlev >= 0) - ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; + { + R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + plane_shade = false; + } else if (fixedcolormap) - ds_colormap = fixedcolormap, plane_shade = false; + { + R_SetDSColorMapLight(fixedcolormap, 0, 0); + plane_shade = false; + } else - ds_colormap = basecolormap->Maps, plane_shade = true; + { + R_SetDSColorMapLight(basecolormap->Maps, 0, 0); + plane_shade = true; + } if (!plane_shade) { diff --git a/src/r_segs.cpp b/src/r_segs.cpp index cab97adfca..43590247e8 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -177,8 +177,7 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - dc_colormap = basecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormap->Maps, rw_light, wallshade); } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -314,10 +313,9 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - dc_colormap = basecolormap->Maps + fixedlightlev; + R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; - dc_light = 0; + R_SetColorMapLight(fixedcolormap, 0, 0); // find positioning texheight = tex->GetScaledHeightDouble(); @@ -632,10 +630,9 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) } if (fixedlightlev >= 0) - dc_colormap = basecolormap->Maps + fixedlightlev; + R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; - dc_light = 0; + R_SetColorMapLight(fixedcolormap, 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -1435,11 +1432,11 @@ static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *d } } -inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) { dc_iscale = vince; dc_colormap = colormap; - dc_light = 0; + dc_light = light; dc_count = count; dc_texturefrac = vplce; dc_source = bufplce; @@ -1508,8 +1505,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1569,7 +1565,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (!(bad & 1)) { - mvline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); + mvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); } bad >>= 1; } @@ -1580,7 +1576,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (u4 > y1ve[z]) { - vplce[z] = mvline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); + vplce[z] = mvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); } } @@ -1596,7 +1592,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (y2ve[z] > d4) { - mvline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); + mvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); } } } @@ -1609,8 +1605,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1844,10 +1839,9 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - dc_colormap = basecolormap->Maps + fixedlightlev; + R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; - dc_light = 0; + R_SetColorMapLight(fixedcolormap, 0, 0); // clip wall to the floor and ceiling for (x = x1; x < x2; ++x) @@ -3244,14 +3238,13 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - WallC.sx1) * rw_lightstep; if (fixedlightlev >= 0) - dc_colormap = usecolormap->Maps + fixedlightlev; + R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - dc_colormap = usecolormap->Maps; + R_SetColorMapLight(usecolormap->Maps, 0, 0); else calclighting = true; - dc_light = 0; // Draw it if (decal->RenderFlags & RF_YFLIP) diff --git a/src/r_things.cpp b/src/r_things.cpp index f52c80376c..98557817de 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -408,8 +408,7 @@ void R_DrawVisSprite (vissprite_t *vis) } fixed_t centeryfrac = FLOAT2FIXED(CenterY); - dc_colormap = vis->Style.colormap; - dc_light = 0; + R_SetColorMapLight(vis->Style.colormap, 0, 0); mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); @@ -539,14 +538,13 @@ void R_DrawWallSprite(vissprite_t *spr) rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; if (fixedlightlev >= 0) - dc_colormap = usecolormap->Maps + fixedlightlev; + R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - dc_colormap = usecolormap->Maps; + R_SetColorMapLight(usecolormap->Maps, 0, 0); else calclighting = true; - dc_light = 0; // Draw it WallSpriteTile = spr->pic; @@ -656,8 +654,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop int flags = 0; // Do setup for blending. - dc_colormap = spr->Style.colormap; - dc_light = 0; + R_SetColorMapLight(spr->Style.colormap, 0, 0); mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 57fac3cda5..ff0427b345 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -171,13 +171,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { - dc_colormap = (lighttable_t *)translation; - dc_light = 0; + R_SetColorMapLight((lighttable_t *)translation, 0, 0); } else { - dc_colormap = identitymap; - dc_light = 0; + R_SetColorMapLight(identitymap, 0, 0); } fixedcolormap = dc_colormap; From 41537a50ab9f9aeb5f07e121ed8d1396dd7d261a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 16:52:41 +0200 Subject: [PATCH 011/912] Fix true color light calculation bug for decals --- src/r_draw.cpp | 8 ++++---- src/r_drawt_rgba.cpp | 4 ++-- src/r_things.cpp | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 5a314e6400..c190c1e73e 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -416,7 +416,7 @@ void R_FillAddColumn_RGBA_C() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -481,7 +481,7 @@ void R_FillAddClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -545,7 +545,7 @@ void R_FillSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -609,7 +609,7 @@ void R_FillRevSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 1725b80e4d..60520783d7 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -413,7 +413,7 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -455,7 +455,7 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_things.cpp b/src/r_things.cpp index 98557817de..3fcefe0381 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -416,7 +416,7 @@ void R_DrawVisSprite (vissprite_t *vis) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - dc_colormap += vis->ColormapNum << COLORMAPSHIFT; + R_SetColorMapLight(dc_colormap, 0, vis->ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -2704,9 +2704,9 @@ void R_DrawParticle_RGBA(vissprite_t *vis) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red * alpha) / 256; - uint32_t green = (fg_green + bg_green * alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * alpha) / 256; + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += spacing; From 672b80898b720c03ea10367259cc7b524cc4bead Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 19:26:27 +0200 Subject: [PATCH 012/912] Moved ColormapNum to visstyle_t and changed colormap to BaseColormap --- src/g_shared/a_artifacts.cpp | 3 +- src/r_defs.h | 3 +- src/r_things.cpp | 115 ++++++++++++++++++++--------------- src/r_things.h | 1 - 4 files changed, 70 insertions(+), 52 deletions(-) diff --git a/src/g_shared/a_artifacts.cpp b/src/g_shared/a_artifacts.cpp index d36cdfe650..777d6824ad 100644 --- a/src/g_shared/a_artifacts.cpp +++ b/src/g_shared/a_artifacts.cpp @@ -737,7 +737,8 @@ int APowerInvisibility::AlterWeaponSprite (visstyle_t *vis) if ((vis->Alpha < 0.25f && special1 > 0) || (vis->Alpha == 0)) { vis->Alpha = clamp((1.f - float(Strength/100)), 0.f, 1.f); - vis->colormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + vis->BaseColormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + vis->ColormapNum = 0; } return -1; // This item is valid so another one shouldn't reset the translucency } diff --git a/src/r_defs.h b/src/r_defs.h index f27ac27168..8a247a5c07 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1401,7 +1401,8 @@ typedef BYTE lighttable_t; // This could be wider for >8 bit display. // This encapsulates the fields of vissprite_t that can be altered by AlterWeaponSprite struct visstyle_t { - lighttable_t *colormap; + int ColormapNum; // Which colormap is rendered + lighttable_t *BaseColormap; // Base colormap used together with ColormapNum float Alpha; FRenderStyle RenderStyle; }; diff --git a/src/r_things.cpp b/src/r_things.cpp index 3fcefe0381..6f80381482 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -408,7 +408,7 @@ void R_DrawVisSprite (vissprite_t *vis) } fixed_t centeryfrac = FLOAT2FIXED(CenterY); - R_SetColorMapLight(vis->Style.colormap, 0, 0); + R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); @@ -416,7 +416,7 @@ void R_DrawVisSprite (vissprite_t *vis) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - R_SetColorMapLight(dc_colormap, 0, vis->ColormapNum << FRACBITS); + R_SetColorMapLight(dc_colormap, 0, vis->Style.ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -654,7 +654,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop int flags = 0; // Do setup for blending. - R_SetColorMapLight(spr->Style.colormap, 0, 0); + R_SetColorMapLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) @@ -680,7 +680,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Render the voxel, either directly to the screen or offscreen. R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.colormap, cliptop, clipbot, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, minslabz, maxslabz, flags); // Blend the voxel, if that's what we need to do. @@ -1058,7 +1058,7 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor vis->Style.Alpha = float(thing->Alpha); vis->fakefloor = fakefloor; vis->fakeceiling = fakeceiling; - vis->ColormapNum = 0; + vis->Style.ColormapNum = 0; vis->bInMirror = MirrorFlags & RF_XFLIP; vis->bSplitSprite = false; @@ -1110,7 +1110,8 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor // get light level if (fixedcolormap != NULL) { // fixed map - vis->Style.colormap = fixedcolormap; + vis->Style.BaseColormap = fixedcolormap; + vis->Style.ColormapNum = 0; } else { @@ -1120,17 +1121,19 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor } if (fixedlightlev >= 0) { - vis->Style.colormap = mybasecolormap->Maps + fixedlightlev; + vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright - vis->Style.colormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.ColormapNum = 0; } else { // diminished light - vis->ColormapNum = GETPALOOKUP( + vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.colormap = mybasecolormap->Maps + (vis->ColormapNum << COLORMAPSHIFT); + vis->Style.BaseColormap = mybasecolormap->Maps; } } } @@ -1199,14 +1202,13 @@ static void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID p vis->Style.Alpha = float(thing->Alpha); vis->fakefloor = NULL; vis->fakeceiling = NULL; - vis->ColormapNum = 0; vis->bInMirror = MirrorFlags & RF_XFLIP; vis->pic = pic; vis->bIsVoxel = false; vis->bWallSprite = true; - vis->ColormapNum = GETPALOOKUP( + vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.colormap = basecolormap->Maps + (vis->ColormapNum << COLORMAPSHIFT); + vis->Style.BaseColormap = basecolormap->Maps; vis->wallc = wallc; } @@ -1376,7 +1378,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double vis->yscale = float(pspriteyscale / tex->Scale.Y); vis->Translation = 0; // [RH] Use default colors vis->pic = tex; - vis->ColormapNum = 0; + vis->Style.ColormapNum = 0; if (flip) { @@ -1426,7 +1428,8 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double if (realfixedcolormap != NULL) { // fixed color - vis->Style.colormap = realfixedcolormap->Colormap; + vis->Style.BaseColormap = realfixedcolormap->Colormap; + vis->Style.ColormapNum = 0; } else { @@ -1436,35 +1439,39 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } if (fixedlightlev >= 0) { - vis->Style.colormap = mybasecolormap->Maps + fixedlightlev; + vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && psp->state->GetFullbright()) { // full bright - vis->Style.colormap = mybasecolormap->Maps; // [RH] use basecolormap + vis->Style.BaseColormap = mybasecolormap->Maps; // [RH] use basecolormap + vis->Style.ColormapNum = 0; } else { // local light - vis->Style.colormap = mybasecolormap->Maps + (GETPALOOKUP (0, spriteshade) << COLORMAPSHIFT); + vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.ColormapNum = GETPALOOKUP(0, spriteshade); } } if (camera->Inventory != NULL) { - lighttable_t *oldcolormap = vis->Style.colormap; + BYTE oldcolormapnum = vis->Style.ColormapNum; + lighttable_t *oldcolormap = vis->Style.BaseColormap; camera->Inventory->AlterWeaponSprite (&vis->Style); - if (vis->Style.colormap != oldcolormap) + if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum) { // The colormap has changed. Is it one we can easily identify? // If not, then don't bother trying to identify it for // hardware accelerated drawing. - if (vis->Style.colormap < SpecialColormaps[0].Colormap || - vis->Style.colormap > SpecialColormaps.Last().Colormap) + if (vis->Style.BaseColormap < SpecialColormaps[0].Colormap || + vis->Style.BaseColormap > SpecialColormaps.Last().Colormap) { noaccel = true; } // Has the basecolormap changed? If so, we can't hardware accelerate it, // since we don't know what it is anymore. - else if (vis->Style.colormap < mybasecolormap->Maps || - vis->Style.colormap >= mybasecolormap->Maps + NUMCOLORMAPS*256) + else if (vis->Style.BaseColormap < mybasecolormap->Maps || + vis->Style.BaseColormap >= mybasecolormap->Maps + NUMCOLORMAPS*256) { noaccel = true; } @@ -1472,8 +1479,8 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } // If we're drawing with a special colormap, but shaders for them are disabled, do // not accelerate. - if (!r_shadercolormaps && (vis->Style.colormap >= SpecialColormaps[0].Colormap && - vis->Style.colormap <= SpecialColormaps.Last().Colormap)) + if (!r_shadercolormaps && (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && + vis->Style.BaseColormap <= SpecialColormaps.Last().Colormap)) { noaccel = true; } @@ -1495,7 +1502,8 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double else { colormap_to_use = basecolormap; - vis->Style.colormap = basecolormap->Maps; + vis->Style.BaseColormap = basecolormap->Maps; + vis->Style.ColormapNum = 0; vis->Style.RenderStyle = STYLE_Normal; } @@ -1641,18 +1649,18 @@ void R_DrawRemainingPlayerSprites() FColormapStyle colormapstyle; bool usecolormapstyle = false; - if (vis->Style.colormap >= SpecialColormaps[0].Colormap && - vis->Style.colormap < SpecialColormaps[SpecialColormaps.Size()].Colormap) + if (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && + vis->Style.BaseColormap < SpecialColormaps[SpecialColormaps.Size()].Colormap) { // Yuck! There needs to be a better way to store colormaps in the vissprite... :( - ptrdiff_t specialmap = (vis->Style.colormap - SpecialColormaps[0].Colormap) / sizeof(FSpecialColormap); + ptrdiff_t specialmap = (vis->Style.BaseColormap - SpecialColormaps[0].Colormap) / sizeof(FSpecialColormap) + vis->Style.ColormapNum; special = &SpecialColormaps[specialmap]; } else if (colormap->Color == PalEntry(255,255,255) && colormap->Desaturate == 0) { overlay = colormap->Fade; - overlay.a = BYTE(((vis->Style.colormap - colormap->Maps) >> 8) * 255 / NUMCOLORMAPS); + overlay.a = BYTE(vis->Style.ColormapNum * 255 / NUMCOLORMAPS); } else { @@ -1660,7 +1668,7 @@ void R_DrawRemainingPlayerSprites() colormapstyle.Color = colormap->Color; colormapstyle.Fade = colormap->Fade; colormapstyle.Desaturate = colormap->Desaturate; - colormapstyle.FadeLevel = ((vis->Style.colormap - colormap->Maps) >> 8) / float(NUMCOLORMAPS); + colormapstyle.FadeLevel = vis->Style.ColormapNum / float(NUMCOLORMAPS); } screen->DrawTexture(vis->pic, viewwindowx + VisPSpritesX1[i], @@ -1904,7 +1912,8 @@ void R_DrawSprite (vissprite_t *spr) int r1, r2; short topclip, botclip; short *clip1, *clip2; - lighttable_t *colormap = spr->Style.colormap; + lighttable_t *colormap = spr->Style.BaseColormap; + int colormapnum = spr->Style.ColormapNum; F3DFloor *rover; FDynamicColormap *mybasecolormap; @@ -2001,17 +2010,19 @@ void R_DrawSprite (vissprite_t *spr) } if (fixedlightlev >= 0) { - spr->Style.colormap = mybasecolormap->Maps + fixedlightlev; + spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) { // full bright - spr->Style.colormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.ColormapNum = 0; } else { // diminished light spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); - spr->Style.colormap = mybasecolormap->Maps + (GETPALOOKUP ( - r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade) << COLORMAPSHIFT); + spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); } } } @@ -2159,7 +2170,8 @@ void R_DrawSprite (vissprite_t *spr) if (topclip >= botclip) { - spr->Style.colormap = colormap; + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; return; } @@ -2289,7 +2301,8 @@ void R_DrawSprite (vissprite_t *spr) } if (i == x2) { - spr->Style.colormap = colormap; + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; return; } } @@ -2307,7 +2320,8 @@ void R_DrawSprite (vissprite_t *spr) int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); R_DrawVisVoxel(spr, minvoxely, maxvoxely, cliptop, clipbot); } - spr->Style.colormap = colormap; + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; } // kg3D: @@ -2551,25 +2565,28 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, vis->renderflags = particle->trans; vis->FakeFlatStat = fakeside; vis->floorclip = 0; - vis->ColormapNum = 0; + vis->Style.ColormapNum = 0; if (fixedlightlev >= 0) { - vis->Style.colormap = map + fixedlightlev; + vis->Style.BaseColormap = map; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (fixedcolormap) { - vis->Style.colormap = fixedcolormap; + vis->Style.BaseColormap = fixedcolormap; + vis->Style.ColormapNum = 0; } else if (particle->bright) { - vis->Style.colormap = map; + vis->Style.BaseColormap = map; + vis->Style.ColormapNum = 0; } else { // Particles are slightly more visible than regular sprites. - vis->ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade); - vis->Style.colormap = map + (vis->ColormapNum << COLORMAPSHIFT); + vis->Style.ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade); + vis->Style.BaseColormap = map; } } @@ -2602,7 +2619,7 @@ void R_DrawParticle_C (vissprite_t *vis) { int spacing; BYTE *dest; - BYTE color = vis->Style.colormap[vis->startfrac]; + BYTE color = vis->Style.BaseColormap[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2668,7 +2685,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) { int spacing; uint32_t *dest; - BYTE color = vis->Style.colormap[vis->startfrac]; + BYTE color = vis->Style.BaseColormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2676,7 +2693,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) R_DrawMaskedSegsBehindParticle(vis); - uint32_t fg = shade_pal_index(color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_things.h b/src/r_things.h index 057b7cfe2a..785729b098 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -86,7 +86,6 @@ struct vissprite_t BYTE bSplitSprite:1; // [RH] Sprite was split by a drawseg BYTE bInMirror:1; // [RH] Sprite is "inside" a mirror BYTE FakeFlatStat; // [RH] which side of fake/floor ceiling sprite is on - BYTE ColormapNum; // Which colormap is rendered (needed for shaded drawer) short renderflags; DWORD Translation; // [RH] for color translation visstyle_t Style; From 02a39ef4576204463474a71beef359848cac032d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 20:05:08 +0200 Subject: [PATCH 013/912] Added bgra support to SDL target --- src/posix/sdl/sdlvideo.cpp | 46 ++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/src/posix/sdl/sdlvideo.cpp b/src/posix/sdl/sdlvideo.cpp index b050097be6..26121aa711 100644 --- a/src/posix/sdl/sdlvideo.cpp +++ b/src/posix/sdl/sdlvideo.cpp @@ -28,7 +28,7 @@ class SDLFB : public DFrameBuffer { DECLARE_CLASS(SDLFB, DFrameBuffer) public: - SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin); + SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin); ~SDLFB (); bool Lock (bool buffer); @@ -271,7 +271,8 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool bgra, boo { // Reuse the old framebuffer if its attributes are the same SDLFB *fb = static_cast (old); if (fb->Width == width && - fb->Height == height) + fb->Height == height && + fb->Bgra == bgra) { bool fsnow = (SDL_GetWindowFlags (fb->Screen) & SDL_WINDOW_FULLSCREEN_DESKTOP) != 0; @@ -296,7 +297,7 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool bgra, boo flashAmount = 0; } - SDLFB *fb = new SDLFB (width, height, fullscreen, oldwin); + SDLFB *fb = new SDLFB (width, height, bgra, fullscreen, oldwin); // If we could not create the framebuffer, try again with slightly // different parameters in this order: @@ -350,8 +351,8 @@ void SDLVideo::SetWindowedScale (float scale) // FrameBuffer implementation ----------------------------------------------- -SDLFB::SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin) - : DFrameBuffer (width, height, false) +SDLFB::SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin) + : DFrameBuffer (width, height, bgra) { int i; @@ -494,7 +495,21 @@ void SDLFB::Update () pitch = Surface->pitch; } - if (NotPaletted) + if (Bgra) + { + if (pitch == Pitch * 4) + { + memcpy(pixels, MemBuffer, Width*Height*4); + } + else + { + for (int y = 0; y < Height; ++y) + { + memcpy((BYTE *)pixels + y*pitch, MemBuffer + y*Pitch*4, Width*4); + } + } + } + else if (NotPaletted) { GPfx.Convert (MemBuffer, Pitch, pixels, pitch, Width, Height, @@ -674,13 +689,20 @@ void SDLFB::ResetSDLRenderer () SDL_SetRenderDrawColor(Renderer, 0, 0, 0, 255); Uint32 fmt; - switch(vid_displaybits) + if (Bgra) { - default: fmt = SDL_PIXELFORMAT_ARGB8888; break; - case 30: fmt = SDL_PIXELFORMAT_ARGB2101010; break; - case 24: fmt = SDL_PIXELFORMAT_RGB888; break; - case 16: fmt = SDL_PIXELFORMAT_RGB565; break; - case 15: fmt = SDL_PIXELFORMAT_ARGB1555; break; + fmt = SDL_PIXELFORMAT_ARGB8888; + } + else + { + switch (vid_displaybits) + { + default: fmt = SDL_PIXELFORMAT_ARGB8888; break; + case 30: fmt = SDL_PIXELFORMAT_ARGB2101010; break; + case 24: fmt = SDL_PIXELFORMAT_RGB888; break; + case 16: fmt = SDL_PIXELFORMAT_RGB565; break; + case 15: fmt = SDL_PIXELFORMAT_ARGB1555; break; + } } Texture = SDL_CreateTexture (Renderer, fmt, SDL_TEXTUREACCESS_STREAMING, Width, Height); From 7142faf41d8cd250a19cd86af9becd0ae2d79e32 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 21:39:44 +0200 Subject: [PATCH 014/912] Minor compile error fixes when X86_ASM is defined --- src/r_draw.cpp | 6 +++--- src/r_draw.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index c190c1e73e..2c2c67ad6c 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -3025,9 +3025,11 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v // wallscan stuff, in C +static int vlinebits; +static int mvlinebits; + #ifndef X86_ASM static DWORD vlinec1 (); -static int vlinebits; DWORD (*dovline1)() = vlinec1; DWORD (*doprevline1)() = vlinec1; @@ -3043,7 +3045,6 @@ void (*dovline4)() = vlinec4; static DWORD mvlinec1(); static void mvlinec4(); -static int mvlinebits; DWORD (*domvline1)() = mvlinec1; void (*domvline4)() = mvlinec4; @@ -4532,7 +4533,6 @@ void R_InitColumnDrawers () rt_tlateaddclamp1col = rt_tlateaddclamp1col_c; rt_tlatesubclamp1col = rt_tlatesubclamp1col_c; rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c; - rt_map4cols = rt_map4cols_c; rt_subclamp4cols = rt_subclamp4cols_c; rt_revsubclamp4cols = rt_revsubclamp4cols_c; rt_tlate4cols = rt_tlate4cols_c; diff --git a/src/r_draw.h b/src/r_draw.h index f60b2299eb..fcaedff478 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -265,6 +265,8 @@ void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); void R_DrawSpanMaskedP_C (void); +#endif + void R_DrawColumnHorizP_RGBA_C (void); void R_DrawColumnP_RGBA_C (void); void R_DrawFuzzColumnP_RGBA_C (void); @@ -274,8 +276,6 @@ void R_DrawSpanP_RGBA_C (void); void R_DrawSpanP_RGBA_SSE (void); void R_DrawSpanMaskedP_RGBA_C (void); -#endif - void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); From 6160675e080355f35206af2164306a29be3af4be Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Jun 2016 12:42:08 +0200 Subject: [PATCH 015/912] Added a few more SSE drawers --- src/r_draw.cpp | 17 ++- src/r_draw.h | 4 + src/r_drawt_rgba.cpp | 273 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 289 insertions(+), 5 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 2c2c67ad6c..aed4bbeea0 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -4364,7 +4364,6 @@ void R_InitColumnDrawers () R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; - rt_map4cols = rt_map4cols_RGBA_c; #ifndef NO_SSE R_DrawSpan = R_DrawSpanP_RGBA_SSE; #else @@ -4409,9 +4408,6 @@ void R_InitColumnDrawers () rt_copy1col = rt_copy1col_RGBA_c; rt_copy4cols = rt_copy4cols_RGBA_c; rt_map1col = rt_map1col_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; rt_shaded1col = rt_shaded1col_RGBA_c; rt_add1col = rt_add1col_RGBA_c; rt_addclamp1col = rt_addclamp1col_RGBA_c; @@ -4422,7 +4418,6 @@ void R_InitColumnDrawers () rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; - rt_map4cols = rt_map4cols_RGBA_c; rt_subclamp4cols = rt_subclamp4cols_RGBA_c; rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; rt_tlate4cols = rt_tlate4cols_RGBA_c; @@ -4432,6 +4427,18 @@ void R_InitColumnDrawers () rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; +#ifndef NO_SSE + rt_map4cols = rt_map4cols_RGBA_SSE; + rt_add4cols = rt_add4cols_RGBA_SSE; + rt_addclamp4cols = rt_addclamp4cols_RGBA_SSE; + rt_shaded4cols = rt_shaded4cols_RGBA_SSE; +#else + rt_map4cols = rt_map4cols_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; +#endif + dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; domvline1 = mvlinec1_RGBA; diff --git a/src/r_draw.h b/src/r_draw.h index fcaedff478..27a985dcb4 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -173,6 +173,7 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh); void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); +void rt_shaded4cols_RGBA_SSE (int sx, int yl, int yh); void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); @@ -187,8 +188,11 @@ void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_map4cols_RGBA_c (int sx, int yl, int yh); +void rt_map4cols_RGBA_SSE (int sx, int yl, int yh); void rt_add4cols_RGBA_c (int sx, int yl, int yh); +void rt_add4cols_RGBA_SSE (int sx, int yl, int yh); void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_addclamp4cols_RGBA_SSE (int sx, int yl, int yh); void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 60520783d7..d390fc54d8 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -42,6 +42,9 @@ #include "r_main.h" #include "r_things.h" #include "v_video.h" +#ifndef NO_SSE +#include +#endif uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; uint32_t *dc_temp_rgba; @@ -185,6 +188,98 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) } while (--count); } +// Maps all four spans to the screen starting at sx. +void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) +{ + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + + if (count & 1) { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + fg = _mm_packus_epi16(fg_lo, fg_hi); + _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + fg = _mm_packus_epi16(fg_lo, fg_hi); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = colormap[source[4]]; + uint32_t p1 = colormap[source[5]]; + uint32_t p2 = colormap[source[6]]; + uint32_t p3 = colormap[source[7]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + fg = _mm_packus_epi16(fg_lo, fg_hi); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); +} + void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) { int count = yh - yl + 1; @@ -380,6 +475,69 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) } while (--count); } +// Adds all four spans to the screen starting at sx without clamping. +#ifndef NO_SSE +void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) +{ + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); +} +#endif + // Translates and adds one span at hx to the screen at sx without clamping. void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -481,6 +639,58 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) } while (--count); } +// Shades all four spans to the screen starting at sx. +#ifndef NO_SSE +void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) +{ + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i alpha_one = _mm_set1_epi16(64); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); + __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); + __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); + __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * alpha + bg_red * inv_alpha) / 64: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); +} +#endif + // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -572,6 +782,69 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) } while (--count); } +// Adds all four spans to the screen starting at sx with clamping. +#ifndef NO_SSE +void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) +{ + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); +} +#endif + // Translates and adds one span at hx to the screen at sx with clamping. void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { From 373b59b94fa93b78527c50a0af9aea84e09a569b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Jun 2016 14:06:44 +0200 Subject: [PATCH 016/912] Fix dovline4 being a define on X64_ASM --- src/r_draw.cpp | 2 +- src/r_draw.h | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index aed4bbeea0..ccaa864e6f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -3036,8 +3036,8 @@ DWORD (*doprevline1)() = vlinec1; #ifdef X64_ASM extern "C" void vlinetallasm4(); -#define dovline4 vlinetallasm4 extern "C" void setupvlinetallasm (int); +void (*dovline4)() = vlinetallasm4; #else static void vlinec4 (); void (*dovline4)() = vlinec4; diff --git a/src/r_draw.h b/src/r_draw.h index 27a985dcb4..2eefff9bd5 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -71,12 +71,7 @@ extern void (*R_DrawColumn)(void); extern DWORD (*dovline1) (); extern DWORD (*doprevline1) (); -#ifdef X64_ASM -#define dovline4 vlinetallasm4 -extern "C" void vlinetallasm4(); -#else extern void (*dovline4) (); -#endif extern void setupvline (int); extern DWORD (*domvline1) (); From af02bafdeb4a96e091f6ff8608d3d82278bf7c3e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Jun 2016 22:57:36 +0200 Subject: [PATCH 017/912] Fixed missing some columns in transparency rendering --- src/r_draw.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ccaa864e6f..ec0645fd2e 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -952,12 +952,14 @@ void R_DrawAddColumnP_RGBA_C() int pitch = dc_pitch; BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], 0); + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; From 0c8c9e0aeace39987a44183ff16670e5ea967007 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 5 Jun 2016 14:08:03 +0200 Subject: [PATCH 018/912] Added FDynamicColormap support to true color mode --- src/g_level.cpp | 2 +- src/g_shared/a_artifacts.cpp | 2 +- src/r_data/colormaps.cpp | 20 +- src/r_data/colormaps.h | 25 +- src/r_defs.h | 3 +- src/r_draw.cpp | 592 ++++++++++++++++++++++++----------- src/r_draw.h | 19 +- src/r_drawt_rgba.cpp | 427 +++++++++++++++---------- src/r_main.cpp | 8 +- src/r_main.h | 141 ++++++++- src/r_plane.cpp | 12 +- src/r_segs.cpp | 45 ++- src/r_swrenderer.cpp | 2 +- src/r_things.cpp | 79 +++-- src/r_utility.cpp | 4 +- src/v_draw.cpp | 13 +- 16 files changed, 937 insertions(+), 457 deletions(-) diff --git a/src/g_level.cpp b/src/g_level.cpp index 141932c225..d27747ccb0 100644 --- a/src/g_level.cpp +++ b/src/g_level.cpp @@ -1307,7 +1307,7 @@ void G_InitLevelLocals () level_info_t *info; BaseBlendA = 0.0f; // Remove underwater blend effect, if any - NormalLight.Maps = realcolormaps; + NormalLight.Maps = realcolormaps.Maps; // [BB] Instead of just setting the color, we also have to reset Desaturate and build the lights. NormalLight.ChangeColor (PalEntry (255, 255, 255), 0); diff --git a/src/g_shared/a_artifacts.cpp b/src/g_shared/a_artifacts.cpp index 777d6824ad..305260ebff 100644 --- a/src/g_shared/a_artifacts.cpp +++ b/src/g_shared/a_artifacts.cpp @@ -737,7 +737,7 @@ int APowerInvisibility::AlterWeaponSprite (visstyle_t *vis) if ((vis->Alpha < 0.25f && special1 > 0) || (vis->Alpha == 0)) { vis->Alpha = clamp((1.f - float(Strength/100)), 0.f, 1.f); - vis->BaseColormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + vis->BaseColormap = &SpecialColormaps[INVERSECOLORMAP]; vis->ColormapNum = 0; } return -1; // This item is valid so another one shouldn't reset the translucency diff --git a/src/r_data/colormaps.cpp b/src/r_data/colormaps.cpp index b463424634..ffaaa38ac6 100644 --- a/src/r_data/colormaps.cpp +++ b/src/r_data/colormaps.cpp @@ -71,7 +71,7 @@ struct FakeCmap }; TArray fakecmaps; -BYTE *realcolormaps; +FColormap realcolormaps; size_t numfakecmaps; @@ -408,7 +408,7 @@ void R_SetDefaultColormap (const char *name) foo.Color = 0xFFFFFF; foo.Fade = 0; - foo.Maps = realcolormaps; + foo.Maps = realcolormaps.Maps; foo.Desaturate = 0; foo.Next = NULL; foo.BuildLights (); @@ -430,7 +430,7 @@ void R_SetDefaultColormap (const char *name) remap[0] = 0; for (i = 0; i < NUMCOLORMAPS; ++i) { - BYTE *map2 = &realcolormaps[i*256]; + BYTE *map2 = &realcolormaps.Maps[i*256]; lumpr.Read (map, 256); for (j = 0; j < 256; ++j) { @@ -454,11 +454,7 @@ void R_DeinitColormaps () { SpecialColormaps.Clear(); fakecmaps.Clear(); - if (realcolormaps != NULL) - { - delete[] realcolormaps; - realcolormaps = NULL; - } + delete[] realcolormaps.Maps; FreeSpecialLights(); } @@ -501,7 +497,7 @@ void R_InitColormaps () } } } - realcolormaps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()]; + realcolormaps.Maps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()]; R_SetDefaultColormap ("COLORMAP"); if (fakecmaps.Size() > 1) @@ -523,7 +519,7 @@ void R_InitColormaps () { int k, r, g, b; FWadLump lump = Wads.OpenLumpNum (fakecmaps[j].lump); - BYTE *const map = realcolormaps + NUMCOLORMAPS*256*j; + BYTE *const map = realcolormaps.Maps + NUMCOLORMAPS*256*j; for (k = 0; k < NUMCOLORMAPS; ++k) { @@ -550,8 +546,8 @@ void R_InitColormaps () } NormalLight.Color = PalEntry (255, 255, 255); NormalLight.Fade = 0; - NormalLight.Maps = realcolormaps; - NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps); + NormalLight.Maps = realcolormaps.Maps; + NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps.Maps); numfakecmaps = fakecmaps.Size(); // build default special maps (e.g. invulnerability) diff --git a/src/r_data/colormaps.h b/src/r_data/colormaps.h index 0764191a3c..bda6a5ea4f 100644 --- a/src/r_data/colormaps.h +++ b/src/r_data/colormaps.h @@ -1,18 +1,26 @@ #ifndef __RES_CMAP_H #define __RES_CMAP_H +struct FColormap; + void R_InitColormaps (); void R_DeinitColormaps (); DWORD R_ColormapNumForName(const char *name); // killough 4/4/98 void R_SetDefaultColormap (const char *name); // [RH] change normal fadetable DWORD R_BlendForColormap (DWORD map); // [RH] return calculated blend for a colormap -extern BYTE *realcolormaps; // [RH] make the colormaps externally visible +extern FColormap realcolormaps; // [RH] make the colormaps externally visible extern size_t numfakecmaps; +struct FColormap +{ + BYTE *Maps = nullptr; + PalEntry Color = 0xffffffff; + PalEntry Fade = 0xff000000; + int Desaturate = 0; +}; - -struct FDynamicColormap +struct FDynamicColormap : FColormap { void ChangeFade (PalEntry fadecolor); void ChangeColor (PalEntry lightcolor, int desaturate); @@ -20,10 +28,6 @@ struct FDynamicColormap void BuildLights (); static void RebuildAllLights(); - BYTE *Maps; - PalEntry Color; - PalEntry Fade; - int Desaturate; FDynamicColormap *Next; }; @@ -43,8 +47,13 @@ enum }; -struct FSpecialColormap +struct FSpecialColormap : FColormap { + FSpecialColormap() + { + Maps = Colormap; + } + float ColorizeStart[3]; float ColorizeEnd[3]; BYTE Colormap[256]; diff --git a/src/r_defs.h b/src/r_defs.h index 8a247a5c07..c0f8786649 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1397,12 +1397,13 @@ struct FMiniBSP // typedef BYTE lighttable_t; // This could be wider for >8 bit display. +struct FColormap; // This encapsulates the fields of vissprite_t that can be altered by AlterWeaponSprite struct visstyle_t { int ColormapNum; // Which colormap is rendered - lighttable_t *BaseColormap; // Base colormap used together with ColormapNum + FColormap *BaseColormap; // Base colormap used together with ColormapNum float Alpha; FRenderStyle RenderStyle; }; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ec0645fd2e..2e21c70380 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -145,6 +145,8 @@ extern "C" { int dc_pitch=0xABadCafe; // [RH] Distance between rows lighttable_t* dc_colormap; +FColormap *dc_fcolormap; +ShadeConstants dc_shade_constants; fixed_t dc_light; int dc_x; int dc_yl; @@ -179,6 +181,7 @@ BYTE *dc_translation; BYTE shadetables[NUMCOLORMAPS*16*256]; FDynamicColormap ShadeFakeColormap[16]; BYTE identitymap[256]; +FDynamicColormap identitycolormap; EXTERN_CVAR (Int, r_columnmethod) @@ -219,6 +222,10 @@ void R_InitShadeMaps() { identitymap[i] = i; } + identitycolormap.Color = ~0u; + identitycolormap.Desaturate = 0; + identitycolormap.Next = NULL; + identitycolormap.Maps = identitymap; } /************************************/ @@ -297,6 +304,7 @@ void R_DrawColumnP_RGBA_C() dest = (uint32_t*)dc_dest; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; // Determine scaling, // which is the only mapping to be done. @@ -315,9 +323,7 @@ void R_DrawColumnP_RGBA_C() // This is as fast as it gets. do { - // Re-map color indices from wall texture column - // using a lighting/special effects LUT. - *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); dest += pitch; frac += fracstep; @@ -371,7 +377,7 @@ void R_FillColumnP_RGBA() do { - *dest = shade_pal_index(color, light); + *dest = shade_pal_index_simple(color, light); dest += pitch; } while (--count); } @@ -416,7 +422,7 @@ void R_FillAddColumn_RGBA_C() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -481,7 +487,7 @@ void R_FillAddClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -545,7 +551,7 @@ void R_FillSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -609,7 +615,7 @@ void R_FillRevSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -953,13 +959,14 @@ void R_DrawAddColumnP_RGBA_C() BYTE *colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -1032,6 +1039,7 @@ void R_DrawTranslatedColumnP_RGBA_C() return; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; dest = (uint32_t*)dc_dest; @@ -1040,14 +1048,13 @@ void R_DrawTranslatedColumnP_RGBA_C() { // [RH] Local copies of global vars to improve compiler optimizations - BYTE *colormap = dc_colormap; BYTE *translation = dc_translation; const BYTE *source = dc_source; int pitch = dc_pitch; do { - *dest = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); dest += pitch; frac += fracstep; } while (--count); @@ -1106,6 +1113,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() return; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; dest = (uint32_t*)dc_dest; @@ -1114,7 +1122,6 @@ void R_DrawTlatedAddColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; @@ -1123,7 +1130,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -1197,7 +1204,7 @@ void R_DrawShadedColumnP_RGBA_C() fracstep = dc_iscale; frac = dc_texturefrac; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1286,16 +1293,16 @@ void R_DrawAddClampColumnP_RGBA_C() { const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1375,17 +1382,17 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1462,17 +1469,17 @@ void R_DrawSubClampColumnP_RGBA_C() frac = dc_texturefrac; { - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1551,17 +1558,17 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1638,16 +1645,16 @@ void R_DrawRevSubClampColumnP_RGBA_C() frac = dc_texturefrac; { - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1726,17 +1733,17 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1784,8 +1791,10 @@ int ds_y; int ds_x1; int ds_x2; +FColormap* ds_fcolormap; lighttable_t* ds_colormap; -//dsfixed_t ds_light; +ShadeConstants ds_shade_constants; +dsfixed_t ds_light; dsfixed_t ds_xfrac; dsfixed_t ds_yfrac; @@ -1835,9 +1844,9 @@ void R_SetSpanSource(const BYTE *pixels) // //========================================================================== -void R_SetSpanColormap(BYTE *colormap) +void R_SetSpanColormap(FDynamicColormap *colormap, int shade) { - R_SetDSColorMapLight(colormap, 0, 0); + R_SetDSColorMapLight(colormap, 0, shade); #ifdef X86_ASM if (!r_swtruecolor && ds_colormap != ds_curcolormap) { @@ -1956,7 +1965,6 @@ void R_DrawSpanP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; @@ -1980,6 +1988,7 @@ void R_DrawSpanP_RGBA_C() ystep = ds_ystep; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; if (ds_xbits == 6 && ds_ybits == 6) { @@ -1990,9 +1999,8 @@ void R_DrawSpanP_RGBA_C() // Current texture index in u,v. spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2010,9 +2018,8 @@ void R_DrawSpanP_RGBA_C() // Current texture index in u,v. spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2030,7 +2037,6 @@ void R_DrawSpanP_RGBA_SSE() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; @@ -2054,54 +2060,92 @@ void R_DrawSpanP_RGBA_SSE() ystep = ds_ystep; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; int sse_count = count / 4; count -= sse_count * 4; - while (sse_count--) + + if (shade_constants.simple_shade) { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + SSE_SHADE_SIMPLE_INIT(light); - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)dest, fg); + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; - // Next step in u,v. - dest += 4; + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } } + else + { + SSE_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + if (count == 0) return; @@ -2110,9 +2154,8 @@ void R_DrawSpanP_RGBA_SSE() // Current texture index in u,v. spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2130,9 +2173,8 @@ void R_DrawSpanP_RGBA_SSE() // Current texture index in u,v. spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2221,6 +2263,7 @@ void R_DrawSpanMaskedP_RGBA_C() int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2243,7 +2286,7 @@ void R_DrawSpanMaskedP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(colormap[texdata], light); + *dest = shade_pal_index(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -2263,7 +2306,7 @@ void R_DrawSpanMaskedP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(colormap[texdata], light); + *dest = shade_pal_index(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -2343,7 +2386,6 @@ void R_DrawSpanTranslucentP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; @@ -2358,6 +2400,7 @@ void R_DrawSpanTranslucentP_RGBA_C() ystep = ds_ystep; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2369,7 +2412,7 @@ void R_DrawSpanTranslucentP_RGBA_C() { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2397,7 +2440,7 @@ void R_DrawSpanTranslucentP_RGBA_C() { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2502,11 +2545,11 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2532,7 +2575,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2565,7 +2608,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2665,11 +2708,11 @@ void R_DrawSpanAddClampP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2691,7 +2734,7 @@ void R_DrawSpanAddClampP_RGBA_C() { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2719,7 +2762,7 @@ void R_DrawSpanAddClampP_RGBA_C() { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2830,11 +2873,11 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2860,7 +2903,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2893,7 +2936,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2926,7 +2969,7 @@ void R_FillSpan_RGBA() uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index(ds_color, light); + uint32_t color = shade_pal_index_simple(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -3147,7 +3190,6 @@ DWORD vlinec1_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; uint32_t *dest = (uint32_t*)dc_dest; @@ -3155,10 +3197,11 @@ DWORD vlinec1_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; do { - *dest = shade_pal_index(colormap[source[frac >> bits]], light); + *dest = shade_pal_index(source[frac >> bits], light, shade_constants); frac += fracstep; dest += pitch; } while (--count); @@ -3197,12 +3240,14 @@ void vlinec4_RGBA() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + do { - dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; + dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; dest += dc_pitch; } while (--count); } @@ -3219,40 +3264,64 @@ void vlinec4_RGBA_SSE() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - BYTE p0 = palookupoffse[0][bufplce[0][place0 >> bits]]; - BYTE p1 = palookupoffse[1][bufplce[1][place1 >> bits]]; - BYTE p2 = palookupoffse[2][bufplce[2][place2 >> bits]]; - BYTE p3 = palookupoffse[3][bufplce[3][place3 >> bits]]; + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); - fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; - } while (--count); + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; @@ -3323,13 +3392,14 @@ DWORD mvlinec1_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; do { BYTE pix = source[frac >> bits]; if (pix != 0) { - *dest = shade_pal_index(colormap[pix], light); + *dest = shade_pal_index(pix, light, shade_constants); } frac += fracstep; dest += pitch; @@ -3370,13 +3440,15 @@ void mvlinec4_RGBA() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + do { BYTE pix; - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; dest += dc_pitch; } while (--count); } @@ -3393,48 +3465,70 @@ void mvlinec4_RGBA_SSE() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - BYTE p0 = palookupoffse[0][pix0]; - BYTE p1 = palookupoffse[1][pix1]; - BYTE p2 = palookupoffse[2][pix2]; - BYTE p3 = palookupoffse[3][pix3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE_SIMPLE(fg); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); - fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; - } while (--count); + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE(fg, shade_constants); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; @@ -3503,7 +3597,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); for (--x; x >= x1; --x) { @@ -3528,7 +3622,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } rcolormap = lcolormap; - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); } else { @@ -3578,6 +3672,7 @@ static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants constants = dc_shade_constants; for (; y < y2; ++y) { @@ -3585,7 +3680,37 @@ static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) int x = x1; do { - dest[x] = shade_pal_index(colormap[dest[x]], light); + uint32_t red = (dest[x] >> 16) & 0xff; + uint32_t green = (dest[x] >> 8) & 0xff; + uint32_t blue = dest[x] & 0xff; + + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + + dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; } while (++x <= x2); dest += dc_pitch; } @@ -3598,10 +3723,41 @@ static void R_DrawFogBoundaryLine_RGBA(int y, int x) uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants constants = dc_shade_constants; do { - dest[x] = shade_pal_index(colormap[dest[x]], light); + uint32_t red = (dest[x] >> 16) & 0xff; + uint32_t green = (dest[x] >> 8) & 0xff; + uint32_t blue = dest[x] & 0xff; + + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + + dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; } while (++x <= x2); } @@ -3627,7 +3783,9 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); + + BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); for (--x; x >= x1; --x) { @@ -3652,11 +3810,12 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); } else { - if (dc_colormap != basecolormapdata) + if (fake_dc_colormap != basecolormapdata) { stop = MIN(t1, b2); while (t2 < stop) @@ -3741,7 +3900,6 @@ fixed_t tmvline1_add_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; uint32_t *dest = (uint32_t*)dc_dest; @@ -3749,6 +3907,7 @@ fixed_t tmvline1_add_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3758,7 +3917,7 @@ fixed_t tmvline1_add_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3825,6 +3984,8 @@ void tmvline4_add_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3835,7 +3996,7 @@ void tmvline4_add_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3898,7 +4059,6 @@ fixed_t tmvline1_addclamp_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; uint32_t *dest = (uint32_t*)dc_dest; @@ -3906,6 +4066,7 @@ fixed_t tmvline1_addclamp_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3915,7 +4076,7 @@ fixed_t tmvline1_addclamp_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3981,6 +4142,8 @@ void tmvline4_addclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3991,7 +4154,7 @@ void tmvline4_addclamp_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4059,6 +4222,7 @@ fixed_t tmvline1_subclamp_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4068,7 +4232,7 @@ fixed_t tmvline1_subclamp_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4133,6 +4297,8 @@ void tmvline4_subclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4143,7 +4309,7 @@ void tmvline4_subclamp_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4211,6 +4377,7 @@ fixed_t tmvline1_revsubclamp_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4220,7 +4387,7 @@ fixed_t tmvline1_revsubclamp_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4285,6 +4452,8 @@ void tmvline4_revsubclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4295,7 +4464,7 @@ void tmvline4_revsubclamp_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4793,15 +4962,15 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, colfunc = R_DrawShadedColumn; hcolfunc_post1 = rt_shaded1col; hcolfunc_post4 = rt_shaded4cols; - dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; - lighttable_t *colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; + dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; + basecolormap = &ShadeFakeColormap[16-alpha]; if (fixedlightlev >= 0 && fixedcolormap == NULL) { - R_SetColorMapLight(colormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); } else { - R_SetColorMapLight(colormap, 0, 0); + R_SetColorMapLight(basecolormap, 0, 0); } return r_columnmethod ? DoDraw1 : DoDraw0; } @@ -4827,7 +4996,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; hcolfunc_pre = R_FillColumnHoriz; - R_SetColorMapLight(identitymap, 0, 0); + R_SetColorMapLight(&identitycolormap, 0, 0); } if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) @@ -4871,30 +5040,77 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) return false; } -void R_SetColorMapLight(BYTE *basecolormapdata, float light, int shade) +void R_SetTranslationMap(lighttable_t *translation) { + dc_fcolormap = nullptr; + dc_shade_constants.light_red = 256; + dc_shade_constants.light_green = 256; + dc_shade_constants.light_blue = 256; + dc_shade_constants.light_alpha = 256; + dc_shade_constants.fade_red = 0; + dc_shade_constants.fade_green = 0; + dc_shade_constants.fade_blue = 0; + dc_shade_constants.fade_alpha = 256; + dc_shade_constants.desaturate = 0; + dc_shade_constants.simple_shade = true; if (r_swtruecolor) { - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, shade); + dc_colormap = translation; + dc_light = 0; } else { - dc_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + dc_colormap = translation; dc_light = 0; } } -void R_SetDSColorMapLight(BYTE *basecolormapdata, float light, int shade) +void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) { + dc_fcolormap = base_colormap; + dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; + dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; + dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; + dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; + dc_shade_constants.fade_red = dc_fcolormap->Fade.r; + dc_shade_constants.fade_green = dc_fcolormap->Fade.g; + dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; + dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; + dc_shade_constants.desaturate = MIN(std::abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); if (r_swtruecolor) { - ds_colormap = basecolormapdata; + dc_colormap = base_colormap->Maps; + dc_light = LIGHTSCALE(light, shade); + } + else + { + dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + dc_light = 0; + } +} + +void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) +{ + ds_fcolormap = base_colormap; + ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; + ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; + ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; + ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; + ds_shade_constants.fade_red = ds_fcolormap->Fade.r; + ds_shade_constants.fade_green = ds_fcolormap->Fade.g; + ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; + ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; + ds_shade_constants.desaturate = MIN(std::abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); + if (r_swtruecolor) + { + ds_colormap = base_colormap->Maps; ds_light = LIGHTSCALE(light, shade); } else { - ds_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); ds_light = 0; } } diff --git a/src/r_draw.h b/src/r_draw.h index 2eefff9bd5..cc3b109355 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -25,11 +25,16 @@ #include "r_defs.h" +struct FColormap; +struct ShadeConstants; + extern "C" int ylookup[MAXHEIGHT]; extern "C" int dc_pitch; // [RH] Distance between rows extern "C" lighttable_t*dc_colormap; +extern "C" FColormap *dc_fcolormap; +extern "C" ShadeConstants dc_shade_constants; extern "C" fixed_t dc_light; extern "C" int dc_x; extern "C" int dc_yl; @@ -93,7 +98,7 @@ extern void (*R_DrawTranslatedColumn)(void); // Span drawing for rows, floor/ceiling. No Spectre effect needed. extern void (*R_DrawSpan)(void); void R_SetupSpanBits(FTexture *tex); -void R_SetSpanColormap(BYTE *colormap); +void R_SetSpanColormap(FDynamicColormap *colormap, int shade); void R_SetSpanSource(const BYTE *pixels); // Span drawing for masked textures. @@ -321,9 +326,10 @@ extern "C" int ds_y; extern "C" int ds_x1; extern "C" int ds_x2; +extern "C" FColormap* ds_fcolormap; extern "C" lighttable_t* ds_colormap; -//extern "C" dsfixed_t ds_light; -#define ds_light dc_light +extern "C" ShadeConstants ds_shade_constants; +extern "C" dsfixed_t ds_light; extern "C" dsfixed_t ds_xfrac; extern "C" dsfixed_t ds_yfrac; @@ -341,6 +347,7 @@ extern "C" int ds_color; // [RH] For flat color (no texturing) extern BYTE shadetables[/*NUMCOLORMAPS*16*256*/]; extern FDynamicColormap ShadeFakeColormap[16]; extern BYTE identitymap[256]; +extern FDynamicColormap identitycolormap; extern BYTE *dc_translation; // [RH] Added for muliresolution support @@ -389,9 +396,11 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) -void R_SetColorMapLight(BYTE *base_colormap, float light, int shade); +void R_SetColorMapLight(FColormap *base_colormap, float light, int shade); // Same as R_SetColorMapLight, but for ds_colormap and ds_light -void R_SetDSColorMapLight(BYTE *base_colormap, float light, int shade); +void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); + +void R_SetTranslationMap(lighttable_t *translation); #endif diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index d390fc54d8..ff5c0d82f1 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -108,7 +108,6 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh) // Maps one span at hx to the screen at sx. void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -120,14 +119,14 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) count++; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; - colormap = dc_colormap; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; if (count & 1) { - *dest = shade_pal_index(colormap[*source], light); + *dest = shade_pal_index(*source, light, shade_constants); source += 4; dest += pitch; } @@ -135,8 +134,8 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) return; do { - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); source += 8; dest += pitch*2; } while (--count); @@ -145,7 +144,6 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) // Maps all four spans to the screen starting at sx. void rt_map4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -157,17 +155,17 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) count++; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; - colormap = dc_colormap; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; if (count & 1) { - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); source += 4; dest += pitch; } @@ -175,14 +173,14 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) return; do { - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); - dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); - dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); - dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); + dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); + dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); + dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); source += 8; dest += pitch*2; } while (--count); @@ -191,7 +189,6 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) // Maps all four spans to the screen starting at sx. void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -202,82 +199,114 @@ void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) return; count++; + ShadeConstants shade_constants = dc_shade_constants; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - colormap = dc_colormap; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); - if (count & 1) { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); - - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)dest, fg); - - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); - - fg = _mm_packus_epi16(fg_lo, fg_hi); + SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; } + if (!(count >>= 1)) + return; - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = colormap[source[4]]; - uint32_t p1 = colormap[source[5]]; - uint32_t p2 = colormap[source[6]]; - uint32_t p3 = colormap[source[7]]; + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); + source += 4; + dest += pitch; } + if (!(count >>= 1)) + return; - source += 8; - dest += pitch * 2; - } while (--count); + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } } void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) @@ -385,7 +414,6 @@ void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) // Adds one span at hx to the screen at sx without clamping. void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -399,15 +427,15 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -430,7 +458,6 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -444,9 +471,9 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -454,7 +481,7 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -479,7 +506,6 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) #ifndef NO_SSE void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -493,7 +519,6 @@ void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -501,40 +526,80 @@ void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + ShadeConstants shade_constants = dc_shade_constants; - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - source += 4; - dest += pitch; - } while (--count); + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } } #endif @@ -571,7 +636,7 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -613,7 +678,7 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -659,7 +724,7 @@ void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -694,7 +759,6 @@ void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -708,15 +772,15 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -738,7 +802,6 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -752,9 +815,9 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -762,7 +825,7 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -786,7 +849,6 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) #ifndef NO_SSE void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -800,7 +862,6 @@ void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -808,40 +869,80 @@ void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + ShadeConstants shade_constants = dc_shade_constants; - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - source += 4; - dest += pitch; - } while (--count); + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } } #endif @@ -862,7 +963,6 @@ void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx to the screen at sx with clamping. void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -876,15 +976,15 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -906,7 +1006,6 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -920,9 +1019,9 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -930,7 +1029,7 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -968,7 +1067,6 @@ void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx from the screen at sx with clamping. void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -982,15 +1080,15 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1012,7 +1110,6 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -1026,9 +1123,9 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -1036,7 +1133,7 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_main.cpp b/src/r_main.cpp index aaf8fc532a..a795f80167 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -119,7 +119,7 @@ double FocalLengthX; double FocalLengthY; FDynamicColormap*basecolormap; // [RH] colormap currently drawing with int fixedlightlev; -lighttable_t *fixedcolormap; +FColormap *fixedcolormap; FSpecialColormap *realfixedcolormap; double WallTMapScale2; @@ -464,11 +464,11 @@ void R_SetupColormap(player_t *player) // Render everything fullbright. The copy to video memory will // apply the special colormap, so it won't be restricted to the // palette. - fixedcolormap = realcolormaps; + fixedcolormap = &realcolormaps; } else { - fixedcolormap = SpecialColormaps[player->fixedcolormap].Colormap; + fixedcolormap = &SpecialColormaps[player->fixedcolormap]; } } else if (player->fixedlightlevel >= 0 && player->fixedlightlevel < NUMCOLORMAPS) @@ -479,7 +479,7 @@ void R_SetupColormap(player_t *player) // [RH] Inverse light for shooting the Sigil if (fixedcolormap == NULL && extralight == INT_MIN) { - fixedcolormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + fixedcolormap = &SpecialColormaps[INVERSECOLORMAP]; extralight = 0; } } diff --git a/src/r_main.h b/src/r_main.h index e8be3c1a30..0db704df1d 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -90,25 +90,162 @@ extern bool r_dontmaplines; // Converts fixedlightlev into a shade value #define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) +struct ShadeConstants +{ + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; +}; + // calculates the light constant passed to the shade_pal_index function inline uint32_t calc_light_multiplier(dsfixed_t light) { return 256 - (light >> (FRACBITS - 8)); } +// Give the compiler a strong hint we want these functions inlined: +#ifndef FORCEINLINE +#if defined(_MSC_VER) +#define FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define FORCEINLINE __attribute__((always_inline)) +#else +#define FORCEINLINE inline +#endif +#endif + // Calculates a ARGB8 color for the given palette index and light multiplier -inline uint32_t shade_pal_index(uint32_t index, uint32_t light) +FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) { const PalEntry &color = GPalette.BaseColors[index]; uint32_t red = color.r; uint32_t green = color.g; uint32_t blue = color.b; + red = red * light / 256; green = green * light / 256; blue = blue * light / 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; } +// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap +FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +// Calculate constants for a simple shade +#define SSE_SHADE_SIMPLE_INIT(light) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; + +// Calculate constants for a simple shade with different light levels for each pixel +#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + +// Simple shade 4 pixels +#define SSE_SHADE_SIMPLE(fg) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ + fg_hi = _mm_srli_epi16(fg_hi, 8); \ + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ + fg_lo = _mm_srli_epi16(fg_lo, 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + +// Calculate constants for a complex shade +#define SSE_SHADE_INIT(light, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = fade_amount_hi; \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Calculate constants for a complex shade with different light levels for each pixel +#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Complex shade 4 pixels +#define SSE_SHADE(fg, shade_constants) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + \ + __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_hi0 = ((intensity_hi.m128i_u16[2] + intensity_hi.m128i_u16[1] + intensity_hi.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi1 = ((intensity_hi.m128i_u16[6] + intensity_hi.m128i_u16[5] + intensity_hi.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ + \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ + \ + __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_lo0 = ((intensity_lo.m128i_u16[2] + intensity_lo.m128i_u16[1] + intensity_lo.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo1 = ((intensity_lo.m128i_u16[6] + intensity_lo.m128i_u16[5] + intensity_lo.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ + \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ + \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + extern bool r_swtruecolor; extern double GlobVis; @@ -125,7 +262,7 @@ extern double r_SpriteVisibility; extern int r_actualextralight; extern bool foggy; extern int fixedlightlev; -extern lighttable_t* fixedcolormap; +extern FColormap* fixedcolormap; extern FSpecialColormap*realfixedcolormap; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 9805ab2006..26d579d6d2 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -227,7 +227,7 @@ void R_MapPlane (int y, int x1) if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - R_SetDSColorMapLight(basecolormap->Maps, GlobVis * fabs(CenterY - y), planeshade); + R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); } #ifdef X86_ASM @@ -616,7 +616,7 @@ void R_MapColoredPlane_RGBA(int y, int x1) uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index(ds_color, light); + uint32_t color = shade_pal_index_simple(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -1598,7 +1598,7 @@ void R_DrawSkyPlane (visplane_t *pl) else { fakefixed = true; - fixedcolormap = NormalLight.Maps; + fixedcolormap = &NormalLight; R_SetColorMapLight(fixedcolormap, 0, 0); } @@ -1683,7 +1683,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t ds_light = 0; if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) @@ -1860,7 +1860,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a ds_light = 0; if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) @@ -1870,7 +1870,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a } else { - R_SetDSColorMapLight(basecolormap->Maps, 0, 0); + R_SetDSColorMapLight(basecolormap, 0, 0); plane_shade = true; } diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 43590247e8..bd2c7d22bb 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -177,7 +177,7 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - R_SetColorMapLight(basecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(basecolormap, rw_light, wallshade); } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -313,7 +313,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -630,7 +630,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) } if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -1126,6 +1126,11 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l palookuplight[3] = 0; } + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + for(; (x < x2) && (x & 3); ++x) { light += rw_lightstep; @@ -1137,7 +1142,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1241,7 +1246,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1496,6 +1501,11 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ palookuplight[3] = 0; } + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; @@ -1505,7 +1515,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1605,7 +1615,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1690,6 +1700,11 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f palookuplight[3] = 0; } + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; @@ -1699,7 +1714,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1801,7 +1816,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1839,7 +1854,7 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -3238,11 +3253,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - WallC.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap->Maps, 0, 0); + R_SetColorMapLight(usecolormap, 0, 0); else calclighting = true; @@ -3293,7 +3308,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; @@ -3303,7 +3318,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -3318,7 +3333,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index ee6ac5fedb..645741a2a5 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -323,7 +323,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin // curse Doom's overuse of global variables in the renderer. // These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette. - unsigned char *savecolormap = fixedcolormap; + FColormap *savecolormap = fixedcolormap; FSpecialColormap *savecm = realfixedcolormap; DAngle savedfov = FieldOfView; diff --git a/src/r_things.cpp b/src/r_things.cpp index 6f80381482..c132cc2fd6 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -416,7 +416,7 @@ void R_DrawVisSprite (vissprite_t *vis) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - R_SetColorMapLight(dc_colormap, 0, vis->Style.ColormapNum << FRACBITS); + R_SetColorMapLight(dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -538,11 +538,11 @@ void R_DrawWallSprite(vissprite_t *spr) rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap->Maps, 0, 0); + R_SetColorMapLight(usecolormap, 0, 0); else calclighting = true; @@ -593,7 +593,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, shade); + R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -604,7 +604,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, shade); + R_SetColorMapLight(usecolormap, rw_light, shade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -620,7 +620,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, shade); + R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -680,7 +680,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Render the voxel, either directly to the screen or offscreen. R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap->Maps + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, minslabz, maxslabz, flags); // Blend the voxel, if that's what we need to do. @@ -1121,19 +1121,19 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor } if (fixedlightlev >= 0) { - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = 0; } else { // diminished light vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; } } } @@ -1208,7 +1208,7 @@ static void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID p vis->bWallSprite = true; vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = basecolormap->Maps; + vis->Style.BaseColormap = basecolormap; vis->wallc = wallc; } @@ -1428,7 +1428,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double if (realfixedcolormap != NULL) { // fixed color - vis->Style.BaseColormap = realfixedcolormap->Colormap; + vis->Style.BaseColormap = realfixedcolormap; vis->Style.ColormapNum = 0; } else @@ -1439,39 +1439,38 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } if (fixedlightlev >= 0) { - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && psp->state->GetFullbright()) { // full bright - vis->Style.BaseColormap = mybasecolormap->Maps; // [RH] use basecolormap + vis->Style.BaseColormap = mybasecolormap; // [RH] use basecolormap vis->Style.ColormapNum = 0; } else { // local light - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = GETPALOOKUP(0, spriteshade); } } if (camera->Inventory != NULL) { BYTE oldcolormapnum = vis->Style.ColormapNum; - lighttable_t *oldcolormap = vis->Style.BaseColormap; + FColormap *oldcolormap = vis->Style.BaseColormap; camera->Inventory->AlterWeaponSprite (&vis->Style); if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum) { // The colormap has changed. Is it one we can easily identify? // If not, then don't bother trying to identify it for // hardware accelerated drawing. - if (vis->Style.BaseColormap < SpecialColormaps[0].Colormap || - vis->Style.BaseColormap > SpecialColormaps.Last().Colormap) + if (vis->Style.BaseColormap < &SpecialColormaps[0] || + vis->Style.BaseColormap > &SpecialColormaps.Last()) { noaccel = true; } // Has the basecolormap changed? If so, we can't hardware accelerate it, // since we don't know what it is anymore. - else if (vis->Style.BaseColormap < mybasecolormap->Maps || - vis->Style.BaseColormap >= mybasecolormap->Maps + NUMCOLORMAPS*256) + else if (vis->Style.BaseColormap != mybasecolormap) { noaccel = true; } @@ -1479,13 +1478,13 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } // If we're drawing with a special colormap, but shaders for them are disabled, do // not accelerate. - if (!r_shadercolormaps && (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && - vis->Style.BaseColormap <= SpecialColormaps.Last().Colormap)) + if (!r_shadercolormaps && (vis->Style.BaseColormap >= &SpecialColormaps[0] && + vis->Style.BaseColormap <= &SpecialColormaps.Last())) { noaccel = true; } // If drawing with a BOOM colormap, disable acceleration. - if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps) + if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps) { noaccel = true; } @@ -1502,7 +1501,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double else { colormap_to_use = basecolormap; - vis->Style.BaseColormap = basecolormap->Maps; + vis->Style.BaseColormap = basecolormap; vis->Style.ColormapNum = 0; vis->Style.RenderStyle = STYLE_Normal; } @@ -1649,12 +1648,10 @@ void R_DrawRemainingPlayerSprites() FColormapStyle colormapstyle; bool usecolormapstyle = false; - if (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && - vis->Style.BaseColormap < SpecialColormaps[SpecialColormaps.Size()].Colormap) + if (vis->Style.BaseColormap >= &SpecialColormaps[0] && + vis->Style.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) { - // Yuck! There needs to be a better way to store colormaps in the vissprite... :( - ptrdiff_t specialmap = (vis->Style.BaseColormap - SpecialColormaps[0].Colormap) / sizeof(FSpecialColormap) + vis->Style.ColormapNum; - special = &SpecialColormaps[specialmap]; + special = static_cast(vis->Style.BaseColormap); } else if (colormap->Color == PalEntry(255,255,255) && colormap->Desaturate == 0) @@ -1912,7 +1909,7 @@ void R_DrawSprite (vissprite_t *spr) int r1, r2; short topclip, botclip; short *clip1, *clip2; - lighttable_t *colormap = spr->Style.BaseColormap; + FColormap *colormap = spr->Style.BaseColormap; int colormapnum = spr->Style.ColormapNum; F3DFloor *rover; FDynamicColormap *mybasecolormap; @@ -2010,18 +2007,18 @@ void R_DrawSprite (vissprite_t *spr) } if (fixedlightlev >= 0) { - spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) { // full bright - spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = 0; } else { // diminished light spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); - spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); } } @@ -2438,7 +2435,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, int x1, x2, y1, y2; vissprite_t* vis; sector_t* heightsec = NULL; - BYTE* map; + FColormap* map; // [ZZ] Particle not visible through the portal plane if (CurrentPortal && !!P_PointOnLineSide(particle->Pos, CurrentPortal->dst)) @@ -2511,7 +2508,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = &heightsec->ceilingplane; toppic = sector->GetTexture(sector_t::ceiling); botpic = heightsec->GetTexture(sector_t::ceiling); - map = heightsec->ColorMap->Maps; + map = heightsec->ColorMap; } else if (fakeside == FAKED_BelowFloor) { @@ -2519,7 +2516,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = §or->floorplane; toppic = heightsec->GetTexture(sector_t::floor); botpic = sector->GetTexture(sector_t::floor); - map = heightsec->ColorMap->Maps; + map = heightsec->ColorMap; } else { @@ -2527,7 +2524,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = &heightsec->floorplane; toppic = heightsec->GetTexture(sector_t::ceiling); botpic = heightsec->GetTexture(sector_t::floor); - map = sector->ColorMap->Maps; + map = sector->ColorMap; } } else @@ -2536,7 +2533,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = §or->floorplane; toppic = sector->GetTexture(sector_t::ceiling); botpic = sector->GetTexture(sector_t::floor); - map = sector->ColorMap->Maps; + map = sector->ColorMap; } if (botpic != skyflatnum && particle->Pos.Z < botplane->ZatPoint (particle->Pos)) @@ -2619,7 +2616,7 @@ void R_DrawParticle_C (vissprite_t *vis) { int spacing; BYTE *dest; - BYTE color = vis->Style.BaseColormap[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; + BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2685,7 +2682,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) { int spacing; uint32_t *dest; - BYTE color = vis->Style.BaseColormap[vis->startfrac]; + BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2693,7 +2690,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) R_DrawMaskedSegsBehindParticle(vis); - uint32_t fg = shade_pal_index(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); + uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_utility.cpp b/src/r_utility.cpp index 71d3f23769..efc901ca5e 100644 --- a/src/r_utility.cpp +++ b/src/r_utility.cpp @@ -889,11 +889,11 @@ void R_SetupFrame (AActor *actor) BaseBlendG = GPART(newblend); BaseBlendB = BPART(newblend); BaseBlendA = APART(newblend) / 255.f; - NormalLight.Maps = realcolormaps; + NormalLight.Maps = realcolormaps.Maps; } else { - NormalLight.Maps = realcolormaps + NUMCOLORMAPS*256*newblend; + NormalLight.Maps = realcolormaps.Maps + NUMCOLORMAPS*256*newblend; BaseBlendR = BaseBlendG = BaseBlendB = 0; BaseBlendA = 0.f; } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index ff0427b345..c2dbf31c55 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -171,14 +171,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { - R_SetColorMapLight((lighttable_t *)translation, 0, 0); + R_SetTranslationMap((lighttable_t *)translation); } else { - R_SetColorMapLight(identitymap, 0, 0); + R_SetTranslationMap(identitymap); } - fixedcolormap = dc_colormap; + fixedcolormap = dc_fcolormap; ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); BYTE *destorgsave = dc_destorg; @@ -1025,7 +1025,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) { uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index_simple(basecolor, calc_light_multiplier(0)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1394,7 +1394,10 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // Setup constant texture mapping parameters. R_SetupSpanBits(tex); - R_SetSpanColormap(colormap != NULL ? &colormap->Maps[clamp(shade >> FRACBITS, 0, NUMCOLORMAPS-1) * 256] : identitymap); + if (colormap) + R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); + else + R_SetSpanColormap(&identitycolormap, 0); R_SetSpanSource(tex->GetPixels()); scalex = double(1u << (32 - ds_xbits)) / scalex; scaley = double(1u << (32 - ds_ybits)) / scaley; From c058ab9cc9ac17a23a86964387e9f138359935f1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 5 Jun 2016 17:34:51 +0200 Subject: [PATCH 019/912] Fixed non-standard __m128i usage in SSE_SHADE --- src/r_main.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/r_main.h b/src/r_main.h index 0db704df1d..5266fb52c4 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -226,8 +226,8 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ \ __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_hi0 = ((intensity_hi.m128i_u16[2] + intensity_hi.m128i_u16[1] + intensity_hi.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_hi1 = ((intensity_hi.m128i_u16[6] + intensity_hi.m128i_u16[5] + intensity_hi.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \ intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ \ fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ @@ -235,8 +235,8 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ \ __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_lo0 = ((intensity_lo.m128i_u16[2] + intensity_lo.m128i_u16[1] + intensity_lo.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_lo1 = ((intensity_lo.m128i_u16[6] + intensity_lo.m128i_u16[5] + intensity_lo.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \ intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ \ fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ From c176d38b7e8992f006a99950a249848ba0a0f039 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sun, 5 Jun 2016 19:41:08 +0300 Subject: [PATCH 020/912] Fixed compilation with Clang --- src/r_draw.cpp | 4 ++-- src/r_main.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 2e21c70380..e809342e95 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -5076,7 +5076,7 @@ void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) dc_shade_constants.fade_green = dc_fcolormap->Fade.g; dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; - dc_shade_constants.desaturate = MIN(std::abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); if (r_swtruecolor) { @@ -5101,7 +5101,7 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) ds_shade_constants.fade_green = ds_fcolormap->Fade.g; ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; - ds_shade_constants.desaturate = MIN(std::abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); if (r_swtruecolor) { diff --git a/src/r_main.h b/src/r_main.h index 5266fb52c4..5d4ff11748 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -115,7 +115,7 @@ inline uint32_t calc_light_multiplier(dsfixed_t light) #if defined(_MSC_VER) #define FORCEINLINE __forceinline #elif defined(__GNUC__) -#define FORCEINLINE __attribute__((always_inline)) +#define FORCEINLINE __attribute__((always_inline)) inline #else #define FORCEINLINE inline #endif From c5fcfb664f210996eadc22e20c036b50b4064abb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 7 Jun 2016 00:55:52 +0200 Subject: [PATCH 021/912] Changed rgba renderer to use a command queue system for its drawers --- src/CMakeLists.txt | 1 + src/r_draw.cpp | 2273 +-------------------------- src/r_draw.h | 95 +- src/r_draw_rgba.cpp | 3492 ++++++++++++++++++++++++++++++++++++++++++ src/r_drawt.cpp | 8 + src/r_drawt_rgba.cpp | 2633 ++++++++++++++++++------------- src/r_main.cpp | 2 + src/r_swrenderer.cpp | 1 + src/r_things.cpp | 5 +- 9 files changed, 5189 insertions(+), 3321 deletions(-) create mode 100644 src/r_draw_rgba.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c90756b5d7..75cf27cadb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -882,6 +882,7 @@ set( FASTMATH_PCH_SOURCES r_3dfloors.cpp r_bsp.cpp r_draw.cpp + r_draw_rgba.cpp r_drawt.cpp r_drawt_rgba.cpp r_main.cpp diff --git a/src/r_draw.cpp b/src/r_draw.cpp index e809342e95..984a74f3f6 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -43,9 +43,6 @@ #include "gi.h" #include "stats.h" #include "x86.h" -#ifndef NO_SSE -#include -#endif #undef RANGECHECK @@ -135,6 +132,7 @@ void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); void (*rt_initcols)(BYTE *buffer); +void (*rt_span_coverage)(int x, int start, int stop); // // R_DrawColumn @@ -287,51 +285,6 @@ void R_DrawColumnP_C (void) } #endif -void R_DrawColumnP_RGBA_C() -{ - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - - // Zero length, column does not exceed a pixel. - if (count <= 0) - return; - - // Framebuffer destination address. - dest = (uint32_t*)dc_dest; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - // Determine scaling, - // which is the only mapping to be done. - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - // Inner loop that does the actual texture mapping, - // e.g. a DDA-lile scaling. - // This is as fast as it gets. - do - { - *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - - dest += pitch; - frac += fracstep; - - } while (--count); - } -} - // [RH] Just fills a column with a color void R_FillColumnP_C (void) { @@ -357,32 +310,6 @@ void R_FillColumnP_C (void) } } -void R_FillColumnP_RGBA() -{ - int count; - uint32_t* dest; - - count = dc_count; - - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - uint32_t light = calc_light_multiplier(dc_light); - - { - int pitch = dc_pitch; - BYTE color = dc_color; - - do - { - *dest = shade_pal_index_simple(color, light); - dest += pitch; - } while (--count); - } -} - void R_FillAddColumn_C (void) { int count; @@ -410,38 +337,6 @@ void R_FillAddColumn_C (void) } while (--count); } -void R_FillAddColumn_RGBA_C() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - void R_FillAddClampColumn_C (void) { int count; @@ -475,38 +370,6 @@ void R_FillAddClampColumn_C (void) } while (--count); } -void R_FillAddClampColumn_RGBA() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - void R_FillSubClampColumn_C (void) { int count; @@ -539,38 +402,6 @@ void R_FillSubClampColumn_C (void) } while (--count); } -void R_FillSubClampColumn_RGBA() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - void R_FillRevSubClampColumn_C (void) { int count; @@ -603,42 +434,9 @@ void R_FillRevSubClampColumn_C (void) } while (--count); } -void R_FillRevSubClampColumn_RGBA() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - // // Spectre/Invisibility. // -#define FUZZTABLE 50 extern "C" { @@ -754,105 +552,6 @@ void R_DrawFuzzColumnP_C (void) } #endif -void R_DrawFuzzColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - - // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; - - // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; - - count = dc_yh - dc_yl; - - // Zero length. - if (count < 0) - return; - - count++; - - dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; - - // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) - // I'm not sure if this is really always the case or not. - - { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch; - int fuzz = fuzzpos; - int cnt; - - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) - { - cnt = MIN(FUZZTABLE - fuzz, count); - count -= cnt; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; - } -} - // // R_DrawTranlucentColumn // @@ -937,56 +636,6 @@ void R_DrawAddColumnP_C (void) } } -void R_DrawAddColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - BYTE *colormap = dc_colormap; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // // R_DrawTranslatedColumn // Used to draw player sprites with the green colorramp mapped to others. @@ -1027,40 +676,6 @@ void R_DrawTranslatedColumnP_C (void) } } -void R_DrawTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Draw a column that is both translated and translucent void R_DrawTlatedAddColumnP_C() { @@ -1101,56 +716,6 @@ void R_DrawTlatedAddColumnP_C() } } -void R_DrawTlatedAddColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Draw a column whose "color" values are actually translucency // levels for a base color stored in dc_color. void R_DrawShadedColumnP_C (void) @@ -1188,52 +753,6 @@ void R_DrawShadedColumnP_C (void) } } -void R_DrawShadedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac, fracstep; - - count = dc_count; - - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch; - - do - { - DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); - DWORD inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Add source to destination, clamping it to white void R_DrawAddClampColumnP_C () { @@ -1275,53 +794,6 @@ void R_DrawAddClampColumnP_C () } } -void R_DrawAddClampColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Add translated source to destination, clamping it to white void R_DrawAddClampTranslatedColumnP_C () { @@ -1364,54 +836,6 @@ void R_DrawAddClampTranslatedColumnP_C () } } -void R_DrawAddClampTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract destination from source, clamping it to black void R_DrawSubClampColumnP_C () { @@ -1452,53 +876,6 @@ void R_DrawSubClampColumnP_C () } } -void R_DrawSubClampColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract destination from source, clamping it to black void R_DrawSubClampTranslatedColumnP_C () { @@ -1540,54 +917,6 @@ void R_DrawSubClampTranslatedColumnP_C () } } -void R_DrawSubClampTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract source from destination, clamping it to black void R_DrawRevSubClampColumnP_C () { @@ -1628,52 +957,6 @@ void R_DrawRevSubClampColumnP_C () } } -void R_DrawRevSubClampColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract source from destination, clamping it to black void R_DrawRevSubClampTranslatedColumnP_C () { @@ -1715,55 +998,6 @@ void R_DrawRevSubClampTranslatedColumnP_C () } } -void R_DrawRevSubClampTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - - // // R_DrawSpan // With DOOM style restrictions on view orientation, @@ -1957,233 +1191,6 @@ void R_DrawSpanP_C (void) } #endif -void R_DrawSpanP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - -#ifdef RANGECHECK - if (ds_x2 < ds_x1 || ds_x1 < 0 - || ds_x2 >= screen->width || ds_y > screen->height) - { - I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); - } - // dscount++; -#endif - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -#ifndef NO_SSE -void R_DrawSpanP_RGBA_SSE() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - -#ifdef RANGECHECK - if (ds_x2 < ds_x1 || ds_x1 < 0 - || ds_x2 >= screen->width || ds_y > screen->height) - { - I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); - } - // dscount++; -#endif - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} -#endif - #ifndef X86_ASM // [RH] Draw a span with holes @@ -2250,72 +1257,6 @@ void R_DrawSpanMaskedP_C (void) } #endif -void R_DrawSpanMaskedP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - *dest = shade_pal_index(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - *dest = shade_pal_index(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - - void R_DrawSpanTranslucentP_C (void) { dsfixed_t xfrac; @@ -2378,89 +1319,6 @@ void R_DrawSpanTranslucentP_C (void) } } -void R_DrawSpanTranslucentP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - void R_DrawSpanMaskedTranslucentP_C (void) { dsfixed_t xfrac; @@ -2537,99 +1395,6 @@ void R_DrawSpanMaskedTranslucentP_C (void) } } -void R_DrawSpanMaskedTranslucentP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - void R_DrawSpanAddClampP_C (void) { dsfixed_t xfrac; @@ -2700,88 +1465,6 @@ void R_DrawSpanAddClampP_C (void) } } -void R_DrawSpanAddClampP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} void R_DrawSpanMaskedAddClampP_C (void) { @@ -2865,114 +1548,12 @@ void R_DrawSpanMaskedAddClampP_C (void) } } -void R_DrawSpanMaskedAddClampP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - // [RH] Just fill a span with a color void R_FillSpan_C (void) { memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1)); } -void R_FillSpan_RGBA() -{ - uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - int count = (ds_x2 - ds_x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); - for (int i = 0; i < count; i++) - dest[i] = color; -} // Draw a voxel slab // @@ -3070,8 +1651,8 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v // wallscan stuff, in C -static int vlinebits; -static int mvlinebits; +int vlinebits; +int mvlinebits; #ifndef X86_ASM static DWORD vlinec1 (); @@ -3186,29 +1767,6 @@ DWORD vlinec1 () } #endif -DWORD vlinec1_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = vlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - do - { - *dest = shade_pal_index(source[frac >> bits], light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - #if !defined(X86_ASM) void vlinec4 () { @@ -3228,113 +1786,6 @@ void vlinec4 () } #endif -void vlinec4_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = vlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - do - { - dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; - } while (--count); -} - -#ifndef NO_SSE -void vlinec4_RGBA_SSE() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = vlinebits; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; - } while (--count); - } - - // Is this needed? Global variables makes it tricky to know.. - vplce[0] = local_vplce[0]; - vplce[1] = local_vplce[1]; - vplce[2] = local_vplce[2]; - vplce[3] = local_vplce[3]; - vince[0] = local_vince[0]; - vince[1] = local_vince[1]; - vince[2] = local_vince[2]; - vince[3] = local_vince[3]; -} -#endif - void setupmvline (int fracbits) { if (!r_swtruecolor) @@ -3380,34 +1831,6 @@ DWORD mvlinec1 () } #endif -DWORD mvlinec1_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = mvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - *dest = shade_pal_index(pix, light, shade_constants); - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - #if !defined(X86_ASM) void mvlinec4 () { @@ -3428,121 +1851,6 @@ void mvlinec4 () } #endif -void mvlinec4_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = mvlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - do - { - BYTE pix; - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; - } while (--count); -} - -#ifndef NO_SSE -void mvlinec4_RGBA_SSE() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = vlinebits; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); - SSE_SHADE_SIMPLE(fg); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); - SSE_SHADE(fg, shade_constants); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; - } while (--count); - } - - // Is this needed? Global variables makes it tricky to know.. - vplce[0] = local_vplce[0]; - vplce[1] = local_vplce[1]; - vplce[2] = local_vplce[2]; - vplce[3] = local_vplce[3]; - vince[0] = local_vince[0]; - vince[1] = local_vince[1]; - vince[2] = local_vince[2]; - vince[3] = local_vince[3]; -} -#endif - - extern "C" short spanend[MAXHEIGHT]; extern float rw_light; extern float rw_lightstep; @@ -3666,196 +1974,6 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) } } -static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) -{ - BYTE *colormap = dc_colormap; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; - - for (; y < y2; ++y) - { - int x2 = spanend[y]; - int x = x1; - do - { - uint32_t red = (dest[x] >> 16) & 0xff; - uint32_t green = (dest[x] >> 8) & 0xff; - uint32_t blue = dest[x] & 0xff; - - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - - dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; - } while (++x <= x2); - dest += dc_pitch; - } -} - -static void R_DrawFogBoundaryLine_RGBA(int y, int x) -{ - int x2 = spanend[y]; - BYTE *colormap = dc_colormap; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; - - do - { - uint32_t red = (dest[x] >> 16) & 0xff; - uint32_t green = (dest[x] >> 8) & 0xff; - uint32_t blue = dest[x] & 0xff; - - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - - dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; - } while (++x <= x2); -} - -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) -{ - // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis - - // This is essentially the same as R_MapVisPlane but with an extra step - // to create new horizontal spans whenever the light changes enough that - // we need to use a new colormap. - - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2 - x1 - 1); - int x = x2 - 1; - int t2 = uclip[x]; - int b2 = dclip[x]; - int rcolormap = GETPALOOKUP(light, wallshade); - int lcolormap; - BYTE *basecolormapdata = basecolormap->Maps; - - if (b2 > t2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - - R_SetColorMapLight(basecolormap, (float)light, wallshade); - - BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - for (--x; x >= x1; --x) - { - int t1 = uclip[x]; - int b1 = dclip[x]; - const int xr = x + 1; - int stop; - - light -= rw_lightstep; - lcolormap = GETPALOOKUP(light, wallshade); - if (lcolormap != rcolormap) - { - if (t2 < b2 && rcolormap != 0) - { // Colormap 0 is always the identity map, so rendering it is - // just a waste of time. - R_DrawFogBoundarySection_RGBA(t2, b2, xr); - } - if (t1 < t2) t2 = t1; - if (b1 > b2) b2 = b1; - if (t2 < b2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - rcolormap = lcolormap; - R_SetColorMapLight(basecolormap, (float)light, wallshade); - fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - } - else - { - if (fake_dc_colormap != basecolormapdata) - { - stop = MIN(t1, b2); - while (t2 < stop) - { - R_DrawFogBoundaryLine_RGBA(t2++, xr); - } - stop = MAX(b1, t2); - while (b2 > stop) - { - R_DrawFogBoundaryLine_RGBA(--b2, xr); - } - } - else - { - t2 = MAX(t2, MIN(t1, b2)); - b2 = MIN(b2, MAX(b1, t2)); - } - - stop = MIN(t2, b1); - while (t1 < stop) - { - spanend[t1++] = x; - } - stop = MAX(b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; - } - } - - t2 = uclip[x]; - b2 = dclip[x]; - } - if (t2 < b2 && rcolormap != 0) - { - R_DrawFogBoundarySection_RGBA(t2, b2, x1); - } -} - - int tmvlinebits; void setuptmvline (int bits) @@ -3896,49 +2014,6 @@ fixed_t tmvline1_add_C () return frac; } -fixed_t tmvline1_add_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_add_C () { BYTE *dest = dc_dest; @@ -3972,51 +2047,6 @@ void tmvline4_add_C () } while (--count); } -void tmvline4_add_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - fixed_t tmvline1_addclamp_C () { DWORD fracstep = dc_iscale; @@ -4055,49 +2085,6 @@ fixed_t tmvline1_addclamp_C () return frac; } -fixed_t tmvline1_addclamp_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_addclamp_C () { BYTE *dest = dc_dest; @@ -4130,51 +2117,6 @@ void tmvline4_addclamp_C () } while (--count); } -void tmvline4_addclamp_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - fixed_t tmvline1_subclamp_C () { DWORD fracstep = dc_iscale; @@ -4210,50 +2152,6 @@ fixed_t tmvline1_subclamp_C () return frac; } -fixed_t tmvline1_subclamp_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_subclamp_C () { BYTE *dest = dc_dest; @@ -4285,51 +2183,6 @@ void tmvline4_subclamp_C () } while (--count); } -void tmvline4_subclamp_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - fixed_t tmvline1_revsubclamp_C () { DWORD fracstep = dc_iscale; @@ -4365,50 +2218,6 @@ fixed_t tmvline1_revsubclamp_C () return frac; } -fixed_t tmvline1_revsubclamp_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_revsubclamp_C () { BYTE *dest = dc_dest; @@ -4440,52 +2249,6 @@ void tmvline4_revsubclamp_C () } while (--count); } -void tmvline4_revsubclamp_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - - //========================================================================== // // R_GetColumn @@ -4535,11 +2298,7 @@ void R_InitColumnDrawers () R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; -#ifndef NO_SSE - R_DrawSpan = R_DrawSpanP_RGBA_SSE; -#else - R_DrawSpan = R_DrawSpanP_RGBA_C; -#endif + R_DrawSpan = R_DrawSpanP_RGBA_C; R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; @@ -4579,9 +2338,13 @@ void R_InitColumnDrawers () rt_copy1col = rt_copy1col_RGBA_c; rt_copy4cols = rt_copy4cols_RGBA_c; rt_map1col = rt_map1col_RGBA_c; + rt_map4cols = rt_map4cols_RGBA_c; rt_shaded1col = rt_shaded1col_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; rt_add1col = rt_add1col_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; rt_addclamp1col = rt_addclamp1col_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; rt_subclamp1col = rt_subclamp1col_RGBA_c; rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; rt_tlate1col = rt_tlate1col_RGBA_c; @@ -4597,31 +2360,14 @@ void R_InitColumnDrawers () rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; - -#ifndef NO_SSE - rt_map4cols = rt_map4cols_RGBA_SSE; - rt_add4cols = rt_add4cols_RGBA_SSE; - rt_addclamp4cols = rt_addclamp4cols_RGBA_SSE; - rt_shaded4cols = rt_shaded4cols_RGBA_SSE; -#else - rt_map4cols = rt_map4cols_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; -#endif + rt_span_coverage = rt_span_coverage_rgba; dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; domvline1 = mvlinec1_RGBA; -#ifndef NO_SSE - dovline4 = vlinec4_RGBA_SSE; - domvline4 = mvlinec4_RGBA_SSE; -#else dovline4 = vlinec4_RGBA; domvline4 = mvlinec4_RGBA; -#endif - } else { @@ -4719,6 +2465,7 @@ void R_InitColumnDrawers () rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; rt_initcols = rt_initcols_pal; + rt_span_coverage = rt_span_coverage_pal; if (pointers_saved) { diff --git a/src/r_draw.h b/src/r_draw.h index cc3b109355..98be57c513 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -24,6 +24,13 @@ #define __R_DRAW__ #include "r_defs.h" +#include + +// Spectre/Invisibility. +#define FUZZTABLE 50 +extern "C" int fuzzoffset[FUZZTABLE + 1]; // [RH] +1 for the assembly routine +extern "C" int fuzzpos; +extern "C" int fuzzviewheight; struct FColormap; struct ShadeConstants; @@ -173,7 +180,6 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh); void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); -void rt_shaded4cols_RGBA_SSE (int sx, int yl, int yh); void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); @@ -188,11 +194,8 @@ void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_map4cols_RGBA_c (int sx, int yl, int yh); -void rt_map4cols_RGBA_SSE (int sx, int yl, int yh); void rt_add4cols_RGBA_c (int sx, int yl, int yh); -void rt_add4cols_RGBA_SSE (int sx, int yl, int yh); void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_addclamp4cols_RGBA_SSE (int sx, int yl, int yh); void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); @@ -235,6 +238,7 @@ extern void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); extern void (*rt_initcols)(BYTE *buffer); +extern void (*rt_span_coverage)(int x, int start, int stop); void rt_draw4cols (int sx); @@ -242,6 +246,8 @@ void rt_draw4cols (int sx); void rt_initcols_pal (BYTE *buffer); void rt_initcols_rgba (BYTE *buffer); +void rt_span_coverage_pal(int x, int start, int stop); +void rt_span_coverage_rgba(int x, int start, int stop); extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); @@ -277,9 +283,40 @@ void R_DrawFuzzColumnP_RGBA_C (void); void R_DrawTranslatedColumnP_RGBA_C (void); void R_DrawShadedColumnP_RGBA_C (void); void R_DrawSpanP_RGBA_C (void); -void R_DrawSpanP_RGBA_SSE (void); void R_DrawSpanMaskedP_RGBA_C (void); +void R_DrawSpanTranslucentP_RGBA_C(); +void R_DrawSpanMaskedTranslucentP_RGBA_C(); +void R_DrawSpanAddClampP_RGBA_C(); +void R_DrawSpanMaskedAddClampP_RGBA_C(); +void R_FillColumnP_RGBA(); +void R_FillAddColumn_RGBA_C(); +void R_FillAddClampColumn_RGBA(); +void R_FillSubClampColumn_RGBA(); +void R_FillRevSubClampColumn_RGBA(); +void R_DrawAddColumnP_RGBA_C(); +void R_DrawTlatedAddColumnP_RGBA_C(); +void R_DrawAddClampColumnP_RGBA_C(); +void R_DrawAddClampTranslatedColumnP_RGBA_C(); +void R_DrawSubClampColumnP_RGBA_C(); +void R_DrawSubClampTranslatedColumnP_RGBA_C(); +void R_DrawRevSubClampColumnP_RGBA_C(); +void R_DrawRevSubClampTranslatedColumnP_RGBA_C(); +void R_FillSpan_RGBA(); +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); +fixed_t tmvline1_add_RGBA(); +void tmvline4_add_RGBA(); +fixed_t tmvline1_addclamp_RGBA(); +void tmvline4_addclamp_RGBA(); +fixed_t tmvline1_subclamp_RGBA(); +void tmvline4_subclamp_RGBA(); +fixed_t tmvline1_revsubclamp_RGBA(); +void tmvline4_revsubclamp_RGBA(); +DWORD vlinec1_RGBA(); +void vlinec4_RGBA(); +DWORD mvlinec1_RGBA(); +void mvlinec4_RGBA(); + void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); @@ -403,4 +440,52 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +// Wait until all drawers finished executing +void R_FinishDrawerCommands(); + +class DrawerThread +{ +public: + int core = 0; + int num_cores = 1; + + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba; +}; + +class DrawerCommand +{ +public: + virtual void Execute(DrawerThread *thread) = 0; +}; + +class DrawerCommandQueue +{ + enum { memorypool_size = 4 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + static DrawerCommandQueue *Instance(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + void *ptr = AllocMemory(sizeof(T)); + T *command = new (ptr)T(std::forward(args)...); + if (!command) + return; + Instance()->commands.push_back(command); + } + + // Wait until all worker threads finished executing commands + static void Finish(); +}; + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp new file mode 100644 index 0000000000..9e61bb427a --- /dev/null +++ b/src/r_draw_rgba.cpp @@ -0,0 +1,3492 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// $Log:$ +// +// DESCRIPTION: +// True color span/column drawing functions. +// +//----------------------------------------------------------------------------- + +#include + +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_plane.h" + +#include "gi.h" +#include "stats.h" +#include "x86.h" +#ifndef NO_SSE +#include +#endif +#include + +extern int vlinebits; +extern int mvlinebits; +extern int tmvlinebits; + +extern "C" short spanend[MAXHEIGHT]; +extern float rw_light; +extern float rw_lightstep; +extern int wallshade; + +///////////////////////////////////////////////////////////////////////////// + +DrawerCommandQueue *DrawerCommandQueue::Instance() +{ + static DrawerCommandQueue queue; + return &queue; +} + +void* DrawerCommandQueue::AllocMemory(size_t size) +{ + // Make sure allocations remain 16-byte aligned + size = (size + 15) / 16 * 16; + + auto queue = Instance(); + if (queue->memorypool_pos + size > memorypool_size) + return nullptr; + + void *data = queue->memorypool + queue->memorypool_pos; + queue->memorypool_pos += size; + return data; +} + +void DrawerCommandQueue::Finish() +{ + auto queue = Instance(); + + DrawerThread thread; + + size_t size = queue->commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->commands[i]; + command->Execute(&thread); + } + + for (auto &command : queue->commands) + command->~DrawerCommand(); + queue->commands.clear(); + queue->memorypool_pos = 0; +} + +///////////////////////////////////////////////////////////////////////////// + +class DrawColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_texturefrac; + fixed_t dc_iscale; + fixed_t dc_light; + const BYTE *dc_source; + int dc_pitch; + ShadeConstants dc_shade_constants; + +public: + DrawColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_texturefrac = ::dc_texturefrac; + dc_iscale = ::dc_iscale; + dc_light = ::dc_light; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + + // Zero length, column does not exceed a pixel. + if (count <= 0) + return; + + // Framebuffer destination address. + dest = (uint32_t*)dc_dest; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + // Determine scaling, + // which is the only mapping to be done. + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + const BYTE *source = dc_source; + int pitch = dc_pitch; + + // Inner loop that does the actual texture mapping, + // e.g. a DDA-lile scaling. + // This is as fast as it gets. + do + { + *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + + dest += pitch; + frac += fracstep; + + } while (--count); + } + } +}; + +class FillColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_light; + int dc_pitch; + int dc_color; + +public: + FillColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_light = ::dc_light; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t* dest; + + count = dc_count; + + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + uint32_t light = calc_light_multiplier(dc_light); + + { + int pitch = dc_pitch; + BYTE color = dc_color; + + do + { + *dest = shade_pal_index_simple(color, light); + dest += pitch; + } while (--count); + } + } +}; + +class FillAddColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + fixed_t dc_light; + int dc_color; + +public: + FillAddColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_color = ::dc_color; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class FillAddClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + fixed_t dc_light; + int dc_color; + +public: + FillAddClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_color = ::dc_color; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class FillSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + int dc_color; + fixed_t dc_light; + +public: + FillSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + dc_light = ::dc_light; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class FillRevSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + int dc_color; + fixed_t dc_light; + +public: + FillRevSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + dc_light = ::dc_light; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class DrawFuzzColumnRGBACommand : public DrawerCommand +{ + int dc_x; + int dc_yl; + int dc_yh; + BYTE *dc_destorg; + int dc_pitch; + int fuzzpos; + int fuzzviewheight; + +public: + DrawFuzzColumnRGBACommand() + { + dc_x = ::dc_x; + dc_yl = ::dc_yl; + dc_yh = ::dc_yh; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + fuzzpos = ::fuzzpos; + fuzzviewheight = ::fuzzviewheight; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + // Adjust borders. Low... + if (dc_yl == 0) + dc_yl = 1; + + // .. and high. + if (dc_yh > fuzzviewheight) + dc_yh = fuzzviewheight; + + count = dc_yh - dc_yl; + + // Zero length. + if (count < 0) + return; + + count++; + + dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; + + // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) + // I'm not sure if this is really always the case or not. + + { + // [RH] Make local copies of global vars to try and improve + // the optimizations made by the compiler. + int pitch = dc_pitch; + int fuzz = fuzzpos; + int cnt; + + // [RH] Split this into three separate loops to minimize + // the number of times fuzzpos needs to be clamped. + if (fuzz) + { + cnt = MIN(FUZZTABLE - fuzz, count); + count -= cnt; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + if (fuzz == FUZZTABLE || count > 0) + { + while (count >= FUZZTABLE) + { + fuzz = 0; + cnt = FUZZTABLE; + count -= FUZZTABLE; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + fuzz = 0; + if (count > 0) + { + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } + } + fuzzpos = fuzz; + } + } +}; + +class DrawAddColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawAddColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + BYTE *dc_translation; + const BYTE *dc_source; + int dc_pitch; + +public: + DrawTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_translation = ::dc_translation; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Local copies of global vars to improve compiler optimizations + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawTlatedAddColumnRGBACommand : public DrawerCommand +{ + int dc_count; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + BYTE *dc_translation; + const BYTE *dc_source; + int dc_pitch; + +public: + DrawTlatedAddColumnRGBACommand() + { + dc_count = ::dc_count; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_translation = ::dc_translation; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawShadedColumnRGBACommand : public DrawerCommand +{ +private: + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + fixed_t dc_light; + const BYTE *dc_source; + lighttable_t *dc_colormap; + int dc_color; + int dc_pitch; + +public: + DrawShadedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_light = ::dc_light; + dc_source = ::dc_source; + dc_colormap = ::dc_colormap; + dc_color = ::dc_color; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac, fracstep; + + count = dc_count; + + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + + do + { + DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); + DWORD inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawAddClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawAddClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + BYTE *dc_translation; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawAddClampTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_translation = ::dc_translation; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + BYTE *dc_translation; + +public: + DrawSubClampTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_translation = ::dc_translation; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawRevSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawRevSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + BYTE *dc_translation; + +public: + DrawRevSubClampTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_translation = ::dc_translation; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawSpanRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_x1; + int ds_x2; + int ds_y; + int ds_xbits; + int ds_ybits; + BYTE *dc_destorg; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + +public: + DrawSpanRGBACommand() + { + ds_source = ::ds_source; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + dc_destorg = ::dc_destorg; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +#else + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +#endif +}; + +class DrawSpanMaskedRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanMaskedRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(texdata, light, shade_constants); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(texdata, light, shade_constants); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanTranslucentRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanTranslucentRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanMaskedTranslucentRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanAddClampRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanAddClampRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanMaskedAddClampRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class FillSpanRGBACommand : public DrawerCommand +{ + int ds_x1; + int ds_x2; + int ds_y; + BYTE *dc_destorg; + fixed_t ds_light; + int ds_color; + +public: + FillSpanRGBACommand() + { + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + dc_destorg = ::dc_destorg; + ds_light = ::ds_light; + ds_color = ::ds_color; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + int count = (ds_x2 - ds_x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index_simple(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; + } +}; + +class Vlinec1RGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int vlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + Vlinec1RGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + vlinebits = ::vlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = vlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + do + { + *dest = shade_pal_index(source[frac >> bits], light, shade_constants); + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Vlinec4RGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + int vlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Vlinec4RGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + vlinebits = ::vlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + DWORD place; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + do + { + dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } + } +#endif +}; + +class Mvlinec1RGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int mvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + Mvlinec1RGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + mvlinebits = ::mvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = mvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + *dest = shade_pal_index(pix, light, shade_constants); + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Mvlinec4RGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + int mvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Mvlinec4RGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + mvlinebits = ::mvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = mvlinebits; + DWORD place; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + do + { + BYTE pix; + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = mvlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE_SIMPLE(fg); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE(fg, shade_constants); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } + } +#endif +}; + +class Tmvline1AddRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1AddRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4AddRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4AddRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class Tmvline1AddClampRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1AddClampRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4AddClampRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4AddClampRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class Tmvline1SubClampRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1SubClampRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4SubClampRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4SubClampRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class Tmvline1RevSubClampRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1RevSubClampRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4RevSubClampRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4RevSubClampRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class DrawFogBoundaryLineRGBACommand : public DrawerCommand +{ + int _y; + int _x; + int _x2; + BYTE *dc_destorg; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + DrawFogBoundaryLineRGBACommand(int y, int x, int x2) + { + _y = y; + _x = x; + _x2 = x2; + + dc_destorg = ::dc_destorg; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + int y = _y; + int x = _x; + int x2 = _x2; + + uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants constants = dc_shade_constants; + + do + { + uint32_t red = (dest[x] >> 16) & 0xff; + uint32_t green = (dest[x] >> 8) & 0xff; + uint32_t blue = dest[x] & 0xff; + + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + + dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; + } while (++x <= x2); + } +}; + +///////////////////////////////////////////////////////////////////////////// + +void R_FinishDrawerCommands() +{ + DrawerCommandQueue::Finish(); +} + +void R_DrawColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillColumnP_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillAddColumn_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillAddClampColumn_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillSubClampColumn_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillRevSubClampColumn_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawFuzzColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); + fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; +} + +void R_DrawAddColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawTlatedAddColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawShadedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawAddClampColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawAddClampTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSubClampColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSubClampTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawRevSubClampColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawRevSubClampTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanMaskedP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanTranslucentP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanMaskedTranslucentP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanAddClampP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanMaskedAddClampP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillSpan_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +DWORD vlinec1_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void vlinec4_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +DWORD mvlinec1_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void mvlinec4_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_add_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_add_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_addclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_addclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_subclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_subclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_revsubclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_revsubclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) +{ + for (; y < y2; ++y) + { + int x2 = spanend[y]; + DrawerCommandQueue::QueueCommand(y, x1, x2); + } +} + +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) +{ + // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis + + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + BYTE *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + R_SetColorMapLight(basecolormap, (float)light, wallshade); + + BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection_RGBA(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + R_SetColorMapLight(basecolormap, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } + else + { + if (fake_dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection_RGBA(t2, b2, x1); + } +} diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index ca6862ed60..c829c2dc4c 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -1019,6 +1019,14 @@ void rt_initcols_pal (BYTE *buff) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; } +void rt_span_coverage_pal(int x, int start, int stop) +{ + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; +} + // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. void R_DrawColumnHorizP_C (void) diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index ff5c0d82f1..bbf68a7956 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -46,53 +46,1560 @@ #include #endif -uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; -uint32_t *dc_temp_rgba; - -// Defined in r_draw_t.cpp: extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; extern unsigned int *horizspan[4]; +///////////////////////////////////////////////////////////////////////////// + +class RtCopy1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + +public: + RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = *source; + source += 4; + dest += pitch; + } + if (count & 2) { + dest[0] = source[0]; + dest[pitch] = source[4]; + source += 8; + dest += pitch * 2; + } + if (!(count >>= 2)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4]; + dest[pitch * 2] = source[8]; + dest[pitch * 3] = source[12]; + source += 16; + dest += pitch * 4; + } while (--count); + } +}; + +class RtMap1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_destorg; + int dc_pitch; + +public: + RtMap1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = shade_pal_index(*source, light, shade_constants); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); + source += 8; + dest += pitch * 2; + } while (--count); + } +}; + +class RtMap4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_destorg; + int dc_pitch; + +public: + RtMap4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + if (count & 1) { + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); + dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); + dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); + dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); + source += 8; + dest += pitch * 2; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } + } +#endif +}; + +class RtTranslate1colRGBACommand : public DrawerCommand +{ + const BYTE *translation; + int hx; + int yl; + int yh; + +public: + RtTranslate1colRGBACommand(const BYTE *translation, int hx, int yl, int yh) + { + this->translation = translation; + this->hx = hx; + this->yl = yl; + this->yh = yh; + } + + void Execute(DrawerThread *thread) override + { + int count = yh - yl + 1; + uint32_t *source = &thread->dc_temp_rgba[yl*4 + hx]; + + // Things we do to hit the compiler's optimizer with a clue bat: + // 1. Parallelism is explicitly spelled out by using a separate + // C instruction for each assembly instruction. GCC lets me + // have four temporaries, but VC++ spills to the stack with + // more than two. Two is probably optimal, anyway. + // 2. The results of the translation lookups are explicitly + // stored in byte-sized variables. This causes the VC++ code + // to use byte mov instructions in most cases; for apparently + // random reasons, it will use movzx for some places. GCC + // ignores this and uses movzx always. + + // Do 8 rows at a time. + for (int count8 = count >> 3; count8; --count8) + { + int c0, c1; + BYTE b0, b1; + + c0 = source[0]; c1 = source[4]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[4] = b1; + + c0 = source[8]; c1 = source[12]; + b0 = translation[c0]; b1 = translation[c1]; + source[8] = b0; source[12] = b1; + + c0 = source[16]; c1 = source[20]; + b0 = translation[c0]; b1 = translation[c1]; + source[16] = b0; source[20] = b1; + + c0 = source[24]; c1 = source[28]; + b0 = translation[c0]; b1 = translation[c1]; + source[24] = b0; source[28] = b1; + + source += 32; + } + // Finish by doing 1 row at a time. + for (count &= 7; count; --count, source += 4) + { + source[0] = translation[source[0]]; + } + } +}; + +class RtTranslate4colsRGBACommand : public DrawerCommand +{ + const BYTE *translation; + int yl; + int yh; + +public: + RtTranslate4colsRGBACommand(const BYTE *translation, int yl, int yh) + { + this->translation = translation; + this->yl = yl; + this->yh = yh; + } + + void Execute(DrawerThread *thread) override + { + int count = yh - yl + 1; + uint32_t *source = &thread->dc_temp_rgba[yl*4]; + int c0, c1; + BYTE b0, b1; + + // Do 2 rows at a time. + for (int count8 = count >> 1; count8; --count8) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + + c0 = source[4]; c1 = source[5]; + b0 = translation[c0]; b1 = translation[c1]; + source[4] = b0; source[5] = b1; + + c0 = source[6]; c1 = source[7]; + b0 = translation[c0]; b1 = translation[c1]; + source[6] = b0; source[7] = b1; + + source += 8; + } + // Do the final row if count was odd. + if (count & 1) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + } + } +}; + +class RtAdd1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtAdd4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + RtAdd4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = dc_shade_constants; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + } +#endif +}; + +class RtShaded1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + lighttable_t *dc_colormap; + BYTE *dc_destorg; + int dc_pitch; + int dc_color; + fixed_t dc_light; + +public: + RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_colormap = ::dc_colormap; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + dc_light = ::dc_light; + } + + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + uint32_t alpha = colormap[*source]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtShaded4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + lighttable_t *dc_colormap; + int dc_color; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + +public: + RtShaded4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_colormap = ::dc_colormap; + dc_color = ::dc_color; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = colormap[source[i]]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i alpha_one = _mm_set1_epi16(64); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); + __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); + __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); + __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * alpha + bg_red * inv_alpha) / 64: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } +#endif +}; + +class RtAddClamp1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtAddClamp4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtAddClamp4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = dc_shade_constants; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + } +#endif +}; + +class RtSubClamp1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtSubClamp4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtSubClamp4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtRevSubClamp1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtRevSubClamp4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtInitColsRGBACommand : public DrawerCommand +{ + BYTE *buff; + +public: + RtInitColsRGBACommand(BYTE *buff) + { + this->buff = buff; + } + + void Execute(DrawerThread *thread) override + { + thread->dc_temp_rgba = buff == NULL ? thread->dc_temp_rgbabuff_rgba : (uint32_t*)buff; + } +}; + +class DrawColumnHorizRGBACommand : public DrawerCommand +{ + int dc_count; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_x; + int dc_yl; + int dc_yh; + +public: + DrawColumnHorizRGBACommand() + { + dc_count = ::dc_count; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_x = ::dc_x; + dc_yl = ::dc_yl; + dc_yh = ::dc_yh; + } + + void Execute(DrawerThread *thread) override + { + int count = dc_count; + uint32_t *dest; + fixed_t fracstep; + fixed_t frac; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + } + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + + if (count & 1) { + *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; + } + if (count & 2) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest[16] = source[frac >> FRACBITS]; frac += fracstep; + dest[20] = source[frac >> FRACBITS]; frac += fracstep; + dest[24] = source[frac >> FRACBITS]; frac += fracstep; + dest[28] = source[frac >> FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); + } + } +}; + +class FillColumnHorizRGBACommand : public DrawerCommand +{ + int dc_x; + int dc_yl; + int dc_yh; + int dc_count; + int dc_color; + +public: + FillColumnHorizRGBACommand() + { + dc_x = ::dc_x; + dc_count = ::dc_count; + dc_color = ::dc_color; + dc_yl = ::dc_yl; + dc_yh = ::dc_yh; + } + + void Execute(DrawerThread *thread) override + { + int count = dc_count; + int color = dc_color; + uint32_t *dest; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + } + + if (count & 1) { + *dest = color; + dest += 4; + } + if (!(count >>= 1)) + return; + do { + dest[0] = color; dest[4] = color; + dest += 8; + } while (--count); + } +}; + +///////////////////////////////////////////////////////////////////////////// + // Copies one span at hx to the screen at sx. void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = *source; - source += 4; - dest += pitch; - } - if (count & 2) { - dest[0] = source[0]; - dest[pitch] = source[4]; - source += 8; - dest += pitch*2; - } - if (!(count >>= 2)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4]; - dest[pitch*2] = source[8]; - dest[pitch*3] = source[12]; - source += 16; - dest += pitch*4; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. @@ -108,293 +1615,23 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh) // Maps one span at hx to the screen at sx. void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = shade_pal_index(*source, light, shade_constants); - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - source += 8; - dest += pitch*2; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. void rt_map4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - if (count & 1) { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); - dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); - dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); - source += 8; - dest += pitch*2; - } while (--count); -} - -// Maps all four spans to the screen starting at sx. -void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) -{ - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - ShadeConstants shade_constants = dc_shade_constants; - uint32_t light = calc_light_multiplier(dc_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += 8; - dest += pitch * 2; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += 8; - dest += pitch * 2; - } while (--count); - } + DrawerCommandQueue::QueueCommand(sx, yl, yh); } void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) { - int count = yh - yl + 1; - uint32_t *source = &dc_temp_rgba[yl*4 + hx]; - - // Things we do to hit the compiler's optimizer with a clue bat: - // 1. Parallelism is explicitly spelled out by using a separate - // C instruction for each assembly instruction. GCC lets me - // have four temporaries, but VC++ spills to the stack with - // more than two. Two is probably optimal, anyway. - // 2. The results of the translation lookups are explicitly - // stored in byte-sized variables. This causes the VC++ code - // to use byte mov instructions in most cases; for apparently - // random reasons, it will use movzx for some places. GCC - // ignores this and uses movzx always. - - // Do 8 rows at a time. - for (int count8 = count >> 3; count8; --count8) - { - int c0, c1; - BYTE b0, b1; - - c0 = source[0]; c1 = source[4]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[4] = b1; - - c0 = source[8]; c1 = source[12]; - b0 = translation[c0]; b1 = translation[c1]; - source[8] = b0; source[12] = b1; - - c0 = source[16]; c1 = source[20]; - b0 = translation[c0]; b1 = translation[c1]; - source[16] = b0; source[20] = b1; - - c0 = source[24]; c1 = source[28]; - b0 = translation[c0]; b1 = translation[c1]; - source[24] = b0; source[28] = b1; - - source += 32; - } - // Finish by doing 1 row at a time. - for (count &= 7; count; --count, source += 4) - { - source[0] = translation[source[0]]; - } + DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); } void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) { - int count = yh - yl + 1; - uint32_t *source = &dc_temp_rgba[yl*4]; - int c0, c1; - BYTE b0, b1; - - // Do 2 rows at a time. - for (int count8 = count >> 1; count8; --count8) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - - c0 = source[4]; c1 = source[5]; - b0 = translation[c0]; b1 = translation[c1]; - source[4] = b0; source[5] = b1; - - c0 = source[6]; c1 = source[7]; - b0 = translation[c0]; b1 = translation[c1]; - source[6] = b0; source[7] = b1; - - source += 8; - } - // Do the final row if count was odd. - if (count & 1) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - } + DrawerCommandQueue::QueueCommand(translation, yl, yh); } // Translates one span at hx to the screen at sx. @@ -414,195 +1651,15 @@ void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) // Adds one span at hx to the screen at sx without clamping. void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } -// Adds all four spans to the screen starting at sx without clamping. -#ifndef NO_SSE -void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) -{ - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = dc_shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } -} -#endif - // Translates and adds one span at hx to the screen at sx without clamping. void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -620,332 +1677,27 @@ void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - uint32_t alpha = colormap[*source]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - for (int i = 0; i < 4; i++) - { - uint32_t alpha = colormap[source[i]]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } -// Shades all four spans to the screen starting at sx. -#ifndef NO_SSE -void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) -{ - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); - __m128i alpha_one = _mm_set1_epi16(64); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); - __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); - __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); - __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * alpha + bg_red * inv_alpha) / 64: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); -} -#endif - // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } -// Adds all four spans to the screen starting at sx with clamping. -#ifndef NO_SSE -void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) -{ - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = dc_shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } -} -#endif - // Translates and adds one span at hx to the screen at sx with clamping. void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -963,91 +1715,13 @@ void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx to the screen at sx with clamping. void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. @@ -1067,91 +1741,13 @@ void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx from the screen at sx with clamping. void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. @@ -1172,102 +1768,41 @@ void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) // call this function to set up the span pointers. void rt_initcols_rgba (BYTE *buff) { - int y; - - dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : (uint32_t*)buff; - for (y = 3; y >= 0; y--) + for (int y = 3; y >= 0; y--) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; + + DrawerCommandQueue::QueueCommand(buff); +} + +void rt_span_coverage_rgba(int x, int start, int stop) +{ + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; } // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. void R_DrawColumnHorizP_RGBA_C (void) { - int count = dc_count; - uint32_t *dest; - fixed_t fracstep; - fixed_t frac; + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span; - - span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp_rgba[x + 4*dc_yl]; - } - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - - if (count & 1) { - *dest = source[frac>>FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest[16]= source[frac>>FRACBITS]; frac += fracstep; - dest[20]= source[frac>>FRACBITS]; frac += fracstep; - dest[24]= source[frac>>FRACBITS]; frac += fracstep; - dest[28]= source[frac>>FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); - } + DrawerCommandQueue::QueueCommand(); } // [RH] Just fills a column with a given color void R_FillColumnHorizP_RGBA_C (void) { - int count = dc_count; - BYTE color = dc_color; - uint32_t *dest; + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp_rgba[x + 4*dc_yl]; - } - - if (count & 1) { - *dest = color; - dest += 4; - } - if (!(count >>= 1)) - return; - do { - dest[0] = color; dest[4] = color; - dest += 8; - } while (--count); + DrawerCommandQueue::QueueCommand(); } diff --git a/src/r_main.cpp b/src/r_main.cpp index a795f80167..348c701204 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -979,6 +979,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_SetupBuffer (); screen->Unlock (); + R_FinishDrawerCommands(); + viewactive = savedviewactive; r_swtruecolor = savedoutputformat; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 645741a2a5..62190b6064 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -165,6 +165,7 @@ void FSoftwareRenderer::RenderView(player_t *player) R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); + R_FinishDrawerCommands(); } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index c132cc2fd6..b3a2daefe3 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -706,10 +706,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop } else { - unsigned int **tspan = &dc_ctspan[x & 3]; - (*tspan)[0] = span->Start; - (*tspan)[1] = span->Stop - 1; - *tspan += 2; + rt_span_coverage(x, span->Start, span->Stop - 1); } } if (!(flags & DVF_SPANSONLY) && (x & 3) == 3) From c452d0257380799e6b2d89e6177380ae2a948235 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 7 Jun 2016 15:25:11 +0200 Subject: [PATCH 022/912] Added multicore rendering to true color drawers --- src/r_draw.h | 60 ++++- src/r_draw_rgba.cpp | 535 ++++++++++++++++++++++++++++++------------- src/r_drawt_rgba.cpp | 308 +++++++++++++------------ 3 files changed, 603 insertions(+), 300 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 98be57c513..bf73c9dfb1 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -25,6 +25,9 @@ #include "r_defs.h" #include +#include +#include +#include // Spectre/Invisibility. #define FUZZTABLE 50 @@ -74,7 +77,6 @@ extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; - // [RH] Pointers to the different column and span drawers... // The span blitting interface. @@ -443,19 +445,58 @@ void R_SetTranslationMap(lighttable_t *translation); // Wait until all drawers finished executing void R_FinishDrawerCommands(); +class DrawerCommandQueue; + class DrawerThread { public: + std::thread thread; + + // Thread line index of this thread int core = 0; + + // Number of active threads int num_cores = 1; uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; uint32_t *dc_temp_rgba; + + // Checks if a line is rendered by this thread + bool line_skipped_by_thread(int line) + { + return line % num_cores != core; + } + + // The number of lines to skip to reach the first line to be rendered by this thread + int skipped_by_thread(int first_line) + { + return (num_cores - (first_line - core) % num_cores) % num_cores; + } + + // The number of lines to be rendered by this thread + int count_for_thread(int first_line, int count) + { + return (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + } + + // Calculate the dest address for the first line to be rendered by this thread + uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) + { + return dest + skipped_by_thread(first_line) * pitch; + } }; class DrawerCommand { +protected: + int dc_dest_y; + public: + DrawerCommand() + { + dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + } + virtual void Execute(DrawerThread *thread) = 0; }; @@ -467,8 +508,25 @@ class DrawerCommandQueue std::vector commands; + std::vector threads; + + std::mutex start_mutex; + std::condition_variable start_condition; + std::vector active_commands; + bool shutdown_flag = false; + int run_id = 0; + + std::mutex end_mutex; + std::condition_variable end_condition; + int finished_threads = 0; + + void StartThreads(); + void StopThreads(); + static DrawerCommandQueue *Instance(); + ~DrawerCommandQueue(); + public: // Allocate memory valid for the duration of a command execution static void* AllocMemory(size_t size); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9e61bb427a..489716e1f7 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -63,6 +63,11 @@ DrawerCommandQueue *DrawerCommandQueue::Instance() return &queue; } +DrawerCommandQueue::~DrawerCommandQueue() +{ + StopThreads(); +} + void* DrawerCommandQueue::AllocMemory(size_t size) { // Make sure allocations remain 16-byte aligned @@ -81,19 +86,102 @@ void DrawerCommandQueue::Finish() { auto queue = Instance(); - DrawerThread thread; + // Give worker threads something to do: - size_t size = queue->commands.size(); + std::unique_lock start_lock(queue->start_mutex); + queue->active_commands.swap(queue->commands); + queue->run_id++; + start_lock.unlock(); + + queue->StartThreads(); + queue->start_condition.notify_all(); + + // Do one thread ourselves: + + DrawerThread thread; + thread.core = 0; + thread.num_cores = queue->threads.size() + 1; + + size_t size = queue->active_commands.size(); for (size_t i = 0; i < size; i++) { - auto &command = queue->commands[i]; + auto &command = queue->active_commands[i]; command->Execute(&thread); } - for (auto &command : queue->commands) + // Wait for everyone to finish: + + std::unique_lock end_lock(queue->end_mutex); + queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); + + // Clean up batch: + + for (auto &command : queue->active_commands) command->~DrawerCommand(); - queue->commands.clear(); + queue->active_commands.clear(); queue->memorypool_pos = 0; + queue->finished_threads = 0; +} + +void DrawerCommandQueue::StartThreads() +{ + if (!threads.empty()) + return; + + int num_threads = std::thread::hardware_concurrency(); + if (num_threads == 0) + num_threads = 4; + + threads.resize(num_threads - 1); + + for (int i = 0; i < num_threads - 1; i++) + { + DrawerCommandQueue *queue = this; + DrawerThread *thread = &threads[i]; + thread->core = i + 1; + thread->num_cores = num_threads; + thread->thread = std::thread([=]() + { + int run_id = 0; + while (true) + { + // Wait until we are signalled to run: + std::unique_lock start_lock(queue->start_mutex); + queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; }); + if (queue->shutdown_flag) + break; + run_id = queue->run_id; + start_lock.unlock(); + + // Do the work: + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(thread); + } + + // Notify main thread that we finished: + std::unique_lock end_lock(queue->end_mutex); + queue->finished_threads++; + end_lock.unlock(); + queue->end_condition.notify_all(); + } + }); + } +} + +void DrawerCommandQueue::StopThreads() +{ + std::unique_lock lock(start_mutex); + shutdown_flag = true; + lock.unlock(); + start_condition.notify_all(); + for (auto &thread : threads) + thread.thread.join(); + threads.clear(); + lock.lock(); + shutdown_flag = false; } ///////////////////////////////////////////////////////////////////////////// @@ -129,28 +217,28 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); // Zero length, column does not exceed a pixel. if (count <= 0) return; // Framebuffer destination address. - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; // Determine scaling, // which is the only mapping to be done. - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { // [RH] Get local copies of these variables so that the compiler // has a better chance of optimizing this well. const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; // Inner loop that does the actual texture mapping, // e.g. a DDA-lile scaling. @@ -190,17 +278,17 @@ public: int count; uint32_t* dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); uint32_t light = calc_light_multiplier(dc_light); { - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; BYTE color = dc_color; do @@ -235,12 +323,12 @@ public: int count; uint32_t *dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; @@ -286,12 +374,12 @@ public: int count; uint32_t *dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; @@ -337,12 +425,12 @@ public: int count; uint32_t *dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; @@ -388,12 +476,12 @@ public: int count; uint32_t *dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; @@ -451,15 +539,13 @@ public: if (dc_yh > fuzzviewheight) dc_yh = fuzzviewheight; - count = dc_yh - dc_yl; + count = thread->count_for_thread(dc_yl, dc_yh - dc_yl + 1); // Zero length. - if (count < 0) + if (count <= 0) return; - count++; - - dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; + dest = thread->dest_for_thread(dc_yl, dc_pitch, ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg); // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) // I'm not sure if this is really always the case or not. @@ -467,7 +553,7 @@ public: { // [RH] Make local copies of global vars to try and improve // the optimizations made by the compiler. - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; int fuzz = fuzzpos; int cnt; @@ -573,18 +659,18 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -649,23 +735,23 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { // [RH] Local copies of global vars to improve compiler optimizations BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; do { @@ -710,22 +796,22 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -787,15 +873,15 @@ public: uint32_t *dest; fixed_t frac, fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; @@ -805,7 +891,7 @@ public: { const BYTE *source = dc_source; BYTE *colormap = dc_colormap; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; do { @@ -863,18 +949,18 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -941,19 +1027,19 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1018,18 +1104,18 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1096,19 +1182,19 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1173,18 +1259,18 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); @@ -1250,19 +1336,19 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1329,6 +1415,9 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1391,6 +1480,9 @@ public: #else void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1572,6 +1664,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1671,6 +1766,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1789,6 +1887,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1917,6 +2018,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -2035,6 +2139,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -2149,6 +2256,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); @@ -2186,13 +2296,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = vlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2238,8 +2351,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = vlinebits; DWORD place; @@ -2250,21 +2367,34 @@ public: ShadeConstants shade_constants = dc_shade_constants; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { - dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; + dest[0] = shade_pal_index(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_pal_index(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_pal_index(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_pal_index(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; } while (--count); } #else void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = vlinebits; + int pitch = dc_pitch * thread->num_cores; uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); @@ -2276,6 +2406,12 @@ public: uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } if (shade_constants.simple_shade) { @@ -2300,7 +2436,7 @@ public: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; + dest += pitch; } while (--count); } else @@ -2326,7 +2462,7 @@ public: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; + dest += pitch; } while (--count); } } @@ -2361,13 +2497,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = mvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2417,8 +2556,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = mvlinebits; DWORD place; @@ -2429,21 +2572,34 @@ public: ShadeConstants shade_constants = dc_shade_constants; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { BYTE pix; - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; } while (--count); } #else void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = mvlinebits; uint32_t light0 = calc_light_multiplier(palookuplight[0]); @@ -2456,6 +2612,12 @@ public: uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } if (shade_constants.simple_shade) { @@ -2483,7 +2645,7 @@ public: __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); SSE_SHADE_SIMPLE(fg); _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; + dest += pitch; } while (--count); } else @@ -2512,7 +2674,7 @@ public: __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); SSE_SHADE(fg, shade_constants); _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; + dest += pitch; } while (--count); } } @@ -2551,13 +2713,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2626,8 +2791,12 @@ public: void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2641,11 +2810,20 @@ public: uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][vplce[i] >> bits]; + BYTE pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { uint32_t fg = shade_pal_index(pix, light[i], shade_constants); @@ -2663,9 +2841,9 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - vplce[i] += vince[i]; + local_vplce[i] += local_vince[i]; } - dest += dc_pitch; + dest += pitch; } while (--count); } }; @@ -2702,13 +2880,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2777,8 +2958,12 @@ public: void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2792,11 +2977,20 @@ public: uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][vplce[i] >> bits]; + BYTE pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { uint32_t fg = shade_pal_index(pix, light[i], shade_constants); @@ -2814,9 +3008,9 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - vplce[i] += vince[i]; + local_vplce[i] += local_vince[i]; } - dest += dc_pitch; + dest += pitch; } while (--count); } }; @@ -2853,13 +3047,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2928,8 +3125,12 @@ public: void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2943,11 +3144,20 @@ public: uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][vplce[i] >> bits]; + BYTE pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { uint32_t fg = shade_pal_index(pix, light[i], shade_constants); @@ -2965,9 +3175,9 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - vplce[i] += vince[i]; + local_vplce[i] += local_vince[i]; } - dest += dc_pitch; + dest += pitch; } while (--count); } }; @@ -3004,13 +3214,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -3079,8 +3292,12 @@ public: void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3094,11 +3311,20 @@ public: uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][vplce[i] >> bits]; + BYTE pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { uint32_t fg = shade_pal_index(pix, light[i], shade_constants); @@ -3116,9 +3342,9 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - vplce[i] += vince[i]; + local_vplce[i] += local_vince[i]; } - dest += dc_pitch; + dest += pitch; } while (--count); } }; @@ -3146,6 +3372,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(_y)) + return; + int y = _y; int x = _x; int x2 = _x2; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index bbf68a7956..c2caec0c2c 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -78,26 +78,26 @@ public: uint32_t *source; uint32_t *dest; int count; - int pitch; + int pitch, sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, (yh - yl + 1)); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = thread->num_cores * 4; if (count & 1) { *dest = *source; - source += 4; + source += sincr; dest += pitch; } if (count & 2) { dest[0] = source[0]; - dest[pitch] = source[4]; - source += 8; + dest[pitch] = source[sincr]; + source += sincr * 2; dest += pitch * 2; } if (!(count >>= 2)) @@ -105,10 +105,10 @@ public: do { dest[0] = source[0]; - dest[pitch] = source[4]; - dest[pitch * 2] = source[8]; - dest[pitch * 3] = source[12]; - source += 16; + dest[pitch] = source[sincr]; + dest[pitch * 2] = source[sincr * 2]; + dest[pitch * 3] = source[sincr * 3]; + source += sincr * 4; dest += pitch * 4; } while (--count); } @@ -145,22 +145,23 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = thread->num_cores * 4; if (count & 1) { *dest = shade_pal_index(*source, light, shade_constants); - source += 4; + source += sincr; dest += pitch; } if (!(count >>= 1)) @@ -168,8 +169,8 @@ public: do { dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - source += 8; + dest[pitch] = shade_pal_index(source[sincr], light, shade_constants); + source += sincr * 2; dest += pitch * 2; } while (--count); } @@ -205,25 +206,26 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = thread->num_cores * 4; if (count & 1) { dest[0] = shade_pal_index(source[0], light, shade_constants); dest[1] = shade_pal_index(source[1], light, shade_constants); dest[2] = shade_pal_index(source[2], light, shade_constants); dest[3] = shade_pal_index(source[3], light, shade_constants); - source += 4; + source += sincr; dest += pitch; } if (!(count >>= 1)) @@ -234,11 +236,11 @@ public: dest[1] = shade_pal_index(source[1], light, shade_constants); dest[2] = shade_pal_index(source[2], light, shade_constants); dest[3] = shade_pal_index(source[3], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); - dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); - dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); - source += 8; + dest[pitch] = shade_pal_index(source[sincr], light, shade_constants); + dest[pitch + 1] = shade_pal_index(source[sincr + 1], light, shade_constants); + dest[pitch + 2] = shade_pal_index(source[sincr + 2], light, shade_constants); + dest[pitch + 3] = shade_pal_index(source[sincr + 3], light, shade_constants); + source += sincr * 2; dest += pitch * 2; } while (--count); } @@ -249,19 +251,20 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; ShadeConstants shade_constants = dc_shade_constants; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = thread->num_cores * 4; if (shade_constants.simple_shade) { @@ -278,7 +281,7 @@ public: SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); - source += 4; + source += sincr; dest += pitch; } if (!(count >>= 1)) @@ -299,17 +302,17 @@ public: // shade_pal_index 4-7 (pitch) { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; + uint32_t p0 = source[sincr]; + uint32_t p1 = source[sincr + 1]; + uint32_t p2 = source[sincr + 2]; + uint32_t p3 = source[sincr + 3]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)(dest + pitch), fg); } - source += 8; + source += sincr * 2; dest += pitch * 2; } while (--count); } @@ -328,7 +331,7 @@ public: SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); - source += 4; + source += sincr; dest += pitch; } if (!(count >>= 1)) @@ -349,17 +352,17 @@ public: // shade_pal_index 4-7 (pitch) { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; + uint32_t p0 = source[sincr]; + uint32_t p1 = source[sincr + 1]; + uint32_t p2 = source[sincr + 2]; + uint32_t p3 = source[sincr + 3]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)(dest + pitch), fg); } - source += 8; + source += sincr * 2; dest += pitch * 2; } while (--count); } @@ -522,15 +525,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -554,7 +558,7 @@ public: *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -590,15 +594,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -625,7 +630,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -636,15 +641,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -686,7 +692,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -722,7 +728,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -764,16 +770,17 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; @@ -793,7 +800,7 @@ public: uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -832,16 +839,17 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; @@ -864,7 +872,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -876,16 +884,17 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); @@ -913,7 +922,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -955,15 +964,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -986,7 +996,7 @@ public: uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1026,15 +1036,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1060,7 +1071,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1071,15 +1082,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -1121,7 +1133,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1157,7 +1169,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1200,15 +1212,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1231,7 +1244,7 @@ public: uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1270,15 +1283,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1305,7 +1319,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1346,15 +1360,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1377,7 +1392,7 @@ public: uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1416,15 +1431,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1451,7 +1467,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } From c59db95cc87543e648f64a19d37f78f2a6656d4d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 9 Jun 2016 23:12:38 +0200 Subject: [PATCH 023/912] Rewrote wallscan to fix buffer overruns and code duplication. --- src/r_segs.cpp | 910 +++++++++++++++---------------------------------- 1 file changed, 283 insertions(+), 627 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index bd2c7d22bb..5aa7c29a27 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1065,53 +1065,149 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } -// prevlineasm1 is like vlineasm1 but skips the loop if only drawing one pixel -inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +// Draw a column with support for non-power-of-two ranges +uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)()) { - dc_iscale = vince; - dc_colormap = colormap; - dc_light = light; - dc_count = count; - dc_texturefrac = vplce; - dc_source = bufplce; - dc_dest = dest; - return doprevline1 (); + int pixelsize = r_swtruecolor ? 4 : 1; + if (uv_max == 0) // power of two + { + int count = y2 - y1; + if (count > 0) + { + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = y2 - y1; + dc_iscale = uv_step; + dc_texturefrac = uv_start; + draw1column(); + } + return uv_start + uv_step * (uint32_t)count; + } + else + { + uint32_t uv_pos = uv_start; + + int left = y2 - y1; + while (left > 0) + { + int next_uv_wrap = (uv_max - uv_pos + uv_step - 1) / uv_step; + int count = MIN(left, next_uv_wrap); + + if (count > 0) + { + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_pos; + draw1column(); + } + + left -= count; + uv_pos += uv_step * count; + if (uv_pos >= uv_max) + uv_pos -= uv_max; + } + + return uv_pos; + } } -void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, - double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +// Draw four columns with support for non-power-of-two ranges +void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)()) { - int x, fracbits; - int y1ve[4], y2ve[4], u4, d4, z; - char bad; - float light = rw_light - rw_lightstep; - SDWORD xoffset; - BYTE *basecolormapdata; - double iscale; - - // This function also gets used to draw skies. Unlike BUILD, skies are - // drawn by visplane instead of by bunch, so these checks are invalid. - //if ((uwal[x1] > viewheight) && (uwal[x2] > viewheight)) return; - //if ((dwal[x1] < 0) && (dwal[x2] < 0)) return; - - if (rw_pic->UseType == FTexture::TEX_Null) - { - return; - } - -//extern cycle_t WallScanCycles; -//clock (WallScanCycles); - int pixelsize = r_swtruecolor ? 4 : 1; + if (uv_max == 0) // power of two, no wrap handling needed + { + for (int i = 0; i < 4; i++) + { + bufplce[i] = source[i]; + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; + uv_pos[i] += uv_step[i] * (y2 - y1); + } + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = y2 - y1; + draw4columns(); + } + else + { + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + for (int i = 0; i < 4; i++) + bufplce[i] = source[i]; - rw_pic->GetHeight(); // Make sure texture size is loaded - fracbits = 32 - rw_pic->HeightBits; - setupvline(fracbits); - xoffset = rw_offset; - basecolormapdata = basecolormap->Maps; + int left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + int count = left; + for (int i = 0; i < 4; i++) + { + int next_uv_wrap = (uv_max - uv_pos[i] + uv_step[i] - 1) / uv_step[i]; + count = MIN(next_uv_wrap, count); + } - x = x1; - //while ((umost[x] > dmost[x]) && (x < x2)) x++; + // Draw until that column wraps + if (count > 0) + { + for (int i = 0; i < 4; i++) + { + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; + } + dc_count = count; + draw4columns(); + } + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + uv_pos[i] += uv_step[i] * count; + if (uv_pos[i] >= uv_max) + uv_pos[i] -= uv_max; + } + + left -= count; + } + } +} + +// Calculates a wrapped uv start position value for a column +void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_height, int fracbits, uint32_t &uv_start_out, uint32_t &uv_step_out) +{ + double uv_stepd = swal * yrepeat; + + // Find start uv in [0-uv_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; + v = v - std::floor(v); + v *= uv_height; + v *= (1 << fracbits); + + uv_start_out = (uint32_t)v; + uv_step_out = xs_ToFixed(fracbits, uv_stepd); +} + +typedef DWORD(*Draw1ColumnFuncPtr)(); +typedef void(*Draw4ColumnsFuncPtr)(); + +void wallscan_any( + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, + const BYTE *(*getcol)(FTexture *tex, int x), + void(setupwallscan(int bits,Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) +{ + if (rw_pic->UseType == FTexture::TEX_Null) + return; + + uint32_t uv_height = rw_pic->GetHeight(); + uint32_t fracbits = 32 - rw_pic->HeightBits; + uint32_t uv_max = uv_height << fracbits; + + DWORD(*draw1column)(); + void(*draw4columns)(); + setupwallscan(fracbits, draw1column, draw4columns); + + fixed_t xoffset = rw_offset; bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1131,139 +1227,190 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l else R_SetColorMapLight(basecolormap, 0, 0); - for(; (x < x2) && (x & 3); ++x) + float light = rw_light; + + // Calculate where 4 column alignment begins and ends: + int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); + int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + + // First unaligned columns: + for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) { - light += rw_lightstep; - y1ve[0] = uwal[x];//max(uwal[x],umost[x]); - y2ve[0] = dwal[x];//min(dwal[x],dmost[x]); - if (y2ve[0] <= y1ve[0]) continue; - assert (y1ve[0] < viewheight); - assert (y2ve[0] <= viewheight); + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; if (!fixed) - { // calculate lighting R_SetColorMapLight(basecolormap, light, wallshade); - } - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = (ylookup[y1ve[0]] + x)*pixelsize + dc_destorg; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); + const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dovline1(); + uint32_t uv_start, uv_step; + calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); + + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); } - for(; x < x2-3; x += 4) + // The aligned columns + for (int x = aligned_x1; x < aligned_x2; x += 4) { - bad = 0; - for (z = 3; z>= 0; --z) - { - y1ve[z] = uwal[x+z];//max(uwal[x+z],umost[x+z]); - y2ve[z] = dwal[x+z];//min(dwal[x+z],dmost[x+z])-1; - if (y2ve[z] <= y1ve[z]) { bad += 1<> FRACBITS); - iscale = swal[x + z] * yrepeat; - vince[z] = xs_ToFixed(fracbits, iscale); - vplce[z] = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[z] - CenterY + 0.5)); - } - if (bad == 15) + const BYTE *source[4]; + for (int i = 0; i < 4; i++) + source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS); + + float lights[4]; + for (int i = 0; i < 4; i++) { - light += rw_lightstep * 4; + lights[i] = light; + light += rw_lightstep; + } + + uint32_t uv_pos[4], uv_step[4]; + for (int i = 0; i < 4; i++) + calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); + + // Figure out where we vertically can start and stop drawing 4 columns in one go + int middle_y1 = y1[0]; + int middle_y2 = y2[0]; + for (int i = 1; i < 4; i++) + { + middle_y1 = MAX(y1[i], middle_y1); + middle_y2 = MIN(y2[i], middle_y2); + } + + // If we got an empty column in our set we cannot draw 4 columns in one go: + bool empty_column_in_set = false; + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + empty_column_in_set = true; + } + + if (empty_column_in_set || middle_y2 <= middle_y1) + { + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } continue; } + // Draw the first rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + + if (y1[i] < middle_y1) + uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } + + // Draw the area where all 4 columns are active if (!fixed) { - for (z = 0; z < 4; ++z) + for (int i = 0; i < 4; i++) { - light += rw_lightstep; if (r_swtruecolor) { - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); + palookupoffse[i] = basecolormap->Maps; + palookuplight[i] = LIGHTSCALE(lights[i], wallshade); } else { - palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - palookuplight[z] = 0; + palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + palookuplight[i] = 0; } } } + wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns); - u4 = MAX(MAX(y1ve[0],y1ve[1]),MAX(y1ve[2],y1ve[3])); - d4 = MIN(MIN(y2ve[0],y2ve[1]),MIN(y2ve[2],y2ve[3])); - - if ((bad != 0) || (u4 >= d4)) + // Draw the last rows where not all 4 columns are active + for (int i = 0; i < 4; i++) { - for (z = 0; z < 4; ++z) - { - if (!(bad & 1)) - { - prevline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+x+z)*pixelsize+dc_destorg); - } - bad >>= 1; - } - continue; - } + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); - for (z = 0; z < 4; ++z) - { - if (u4 > y1ve[z]) - { - vplce[z] = prevline1(vince[z],palookupoffse[z], palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+x+z)*pixelsize+dc_destorg); - } - } - - if (d4 > u4) - { - dc_count = d4-u4; - dc_dest = (ylookup[u4]+x)*pixelsize+dc_destorg; - dovline4(); - } - - BYTE *i = (x+ylookup[d4])*pixelsize+dc_destorg; - for (z = 0; z < 4; ++z) - { - if (y2ve[z] > d4) - { - prevline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); - } + if (middle_y2 < y2[i]) + uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); } } - for(;x> FRACBITS); - dc_dest = (ylookup[y1ve[0]] + x) * pixelsize + dc_destorg; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); + const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dovline1(); + uint32_t uv_start, uv_step; + calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); + + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); } -//unclock (WallScanCycles); - NetUpdate (); } +void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupvline(bits); + line1 = dovline1; + line4 = dovline4; + }); +} + +void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupmvline(bits); + line1 = domvline1; + line4 = domvline4; + }); + } +} + +void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + static fixed_t(*tmvline1)(); + static void(*tmvline4)(); + if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) + { + // The current translucency is unsupported, so draw with regular maskwallscan instead. + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setuptmvline(bits); + line1 = reinterpret_cast(tmvline1); + line4 = tmvline4; + }); + } +} + void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { FDynamicColormap *startcolormap = basecolormap; @@ -1331,507 +1478,16 @@ static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, } } -//============================================================================= -// -// wallscan_np2 -// -// This is a wrapper around wallscan that helps it tile textures whose heights -// are not powers of 2. It divides the wall into texture-sized strips and calls -// wallscan for each of those. Since only one repetition of the texture fits -// in each strip, wallscan will not tile. -// -//============================================================================= - +// wallscan now tiles with non-power-of-two textures - this function is therefore not needed anymore.. void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) { - if (!r_np2) - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); - } - else - { - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - double texheight = rw_pic->GetHeight(); - double partition; - double scaledtexheight = texheight / yrepeat; - - if (yrepeat >= 0) - { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - if (partition == top) - { - partition -= scaledtexheight; - } - up = uwal; - down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition > bot) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp(most3[j], up[j], dwal[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - up = down; - down = (down == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); - } - else - { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - up = most1; - down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition < top) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 12) - { - for (int j = x1; j < x2; ++j) - { - up[j] = clamp(most3[j], uwal[j], down[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - down = up; - up = (up == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); - } - } + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); } +// wallscan now tiles with non-power-of-two textures - this function is therefore not needed anymore.. static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { - if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) - { - double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); - double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); - double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); - double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); - double top = MAX(frontcz1, frontcz2); - double bot = MIN(frontfz1, frontfz2); - if (fake3D & FAKE3D_CLIPTOP) - { - top = MIN(top, sclipTop); - } - if (fake3D & FAKE3D_CLIPBOTTOM) - { - bot = MAX(bot, sclipBottom); - } - wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); - } - else - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); - } -} - -inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) -{ - dc_iscale = vince; - dc_colormap = colormap; - dc_light = light; - dc_count = count; - dc_texturefrac = vplce; - dc_source = bufplce; - dc_dest = dest; - return domvline1 (); -} - -void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, - double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - int x, fracbits; - BYTE *pixel; - int pixelsize, pixelshift; - int y1ve[4], y2ve[4], u4, d4, startx, dax, z; - char bad; - float light = rw_light - rw_lightstep; - SDWORD xoffset; - BYTE *basecolormapdata; - double iscale; - - if (rw_pic->UseType == FTexture::TEX_Null) - { - return; - } - - if (!rw_pic->bMasked) - { // Textures that aren't masked can use the faster wallscan. - wallscan (x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - return; - } - -//extern cycle_t WallScanCycles; -//clock (WallScanCycles); - - pixelsize = r_swtruecolor ? 4 : 1; - pixelshift = r_swtruecolor ? 2 : 0; - - rw_pic->GetHeight(); // Make sure texture size is loaded - fracbits = 32- rw_pic->HeightBits; - setupmvline(fracbits); - xoffset = rw_offset; - basecolormapdata = basecolormap->Maps; - - x = startx = x1; - pixel = x * pixelsize + dc_destorg; - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - palookuplight[0] = 0; - palookuplight[1] = 0; - palookuplight[2] = 0; - palookuplight[3] = 0; - } - - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); - else - R_SetColorMapLight(basecolormap, 0, 0); - - for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) - { - light += rw_lightstep; - y1ve[0] = uwal[x];//max(uwal[x],umost[x]); - y2ve[0] = dwal[x];//min(dwal[x],dmost[x]); - if (y2ve[0] <= y1ve[0]) continue; - - if (!fixed) - { // calculate lighting - R_SetColorMapLight(basecolormap, light, wallshade); - } - - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); - - domvline1(); - } - - for(; x < x2-3; x += 4, pixel += 4 * pixelsize) - { - bad = 0; - for (z = 3, dax = x+3; z >= 0; --z, --dax) - { - y1ve[z] = uwal[dax]; - y2ve[z] = dwal[dax]; - if (y2ve[z] <= y1ve[z]) { bad += 1<> FRACBITS); - iscale = swal[dax] * yrepeat; - vince[z] = xs_ToFixed(fracbits, iscale); - vplce[z] = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[z] - CenterY + 0.5)); - } - if (bad == 15) - { - light += rw_lightstep * 4; - continue; - } - - if (!fixed) - { - for (z = 0; z < 4; ++z) - { - light += rw_lightstep; - if (r_swtruecolor) - { - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); - } - else - { - palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - palookuplight[z] = 0; - } - } - } - - u4 = MAX(MAX(y1ve[0],y1ve[1]),MAX(y1ve[2],y1ve[3])); - d4 = MIN(MIN(y2ve[0],y2ve[1]),MIN(y2ve[2],y2ve[3])); - - if ((bad != 0) || (u4 >= d4)) - { - for (z = 0; z < 4; ++z) - { - if (!(bad & 1)) - { - mvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); - } - bad >>= 1; - } - continue; - } - - for (z = 0; z < 4; ++z) - { - if (u4 > y1ve[z]) - { - vplce[z] = mvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); - } - } - - if (d4 > u4) - { - dc_count = d4-u4; - dc_dest = ylookup[u4]*pixelsize+pixel; - domvline4(); - } - - BYTE *i = pixel+ylookup[d4]*pixelsize; - for (z = 0; z < 4; ++z) - { - if (y2ve[z] > d4) - { - mvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); - } - } - } - for(; x < x2; ++x, pixel += pixelsize) - { - light += rw_lightstep; - y1ve[0] = uwal[x]; - y2ve[0] = dwal[x]; - if (y2ve[0] <= y1ve[0]) continue; - - if (!fixed) - { // calculate lighting - R_SetColorMapLight(basecolormap, light, wallshade); - } - - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]]*pixelsize + pixel; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); - - domvline1(); - } - -//unclock(WallScanCycles); - - NetUpdate (); -} - -inline void preptmvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) -{ - dc_iscale = vince; - dc_colormap = colormap; - dc_light = light; - dc_count = count; - dc_texturefrac = vplce; - dc_source = bufplce; - dc_dest = dest; -} - -void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, - double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - fixed_t (*tmvline1)(); - void (*tmvline4)(); - int x, fracbits; - BYTE *pixel; - int pixelsize, pixelshift; - int y1ve[4], y2ve[4], u4, d4, startx, dax, z; - char bad; - float light = rw_light - rw_lightstep; - SDWORD xoffset; - BYTE *basecolormapdata; - double iscale; - - if (rw_pic->UseType == FTexture::TEX_Null) - { - return; - } - - if (!R_GetTransMaskDrawers (&tmvline1, &tmvline4)) - { - // The current translucency is unsupported, so draw with regular maskwallscan instead. - maskwallscan (x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - return; - } - -//extern cycle_t WallScanCycles; -//clock (WallScanCycles); - - pixelsize = r_swtruecolor ? 4 : 1; - pixelshift = r_swtruecolor ? 2 : 0; - - rw_pic->GetHeight(); // Make sure texture size is loaded - fracbits = 32 - rw_pic->HeightBits; - setuptmvline(fracbits); - xoffset = rw_offset; - basecolormapdata = basecolormap->Maps; - fixed_t centeryfrac = FLOAT2FIXED(CenterY); - - x = startx = x1; - pixel = x * pixelsize + dc_destorg; - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - palookuplight[0] = 0; - palookuplight[1] = 0; - palookuplight[2] = 0; - palookuplight[3] = 0; - } - - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); - else - R_SetColorMapLight(basecolormap, 0, 0); - - for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) - { - light += rw_lightstep; - y1ve[0] = uwal[x];//max(uwal[x],umost[x]); - y2ve[0] = dwal[x];//min(dwal[x],dmost[x]); - if (y2ve[0] <= y1ve[0]) continue; - - if (!fixed) - { // calculate lighting - R_SetColorMapLight(basecolormap, light, wallshade); - } - - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); - - tmvline1(); - } - - for(; x < x2-3; x += 4, pixel += 4 * pixelsize) - { - bad = 0; - for (z = 3, dax = x+3; z >= 0; --z, --dax) - { - y1ve[z] = uwal[dax]; - y2ve[z] = dwal[dax]; - if (y2ve[z] <= y1ve[z]) { bad += 1<> FRACBITS); - iscale = swal[dax] * yrepeat; - vince[z] = xs_ToFixed(fracbits, iscale); - vplce[z] = xs_ToFixed(fracbits, dc_texturemid + vince[z] * (y1ve[z] - CenterY + 0.5)); - } - if (bad == 15) - { - light += rw_lightstep * 4; - continue; - } - - if (!fixed) - { - for (z = 0; z < 4; ++z) - { - light += rw_lightstep; - if (r_swtruecolor) - { - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); - } - else - { - palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - } - } - } - - u4 = MAX(MAX(y1ve[0],y1ve[1]),MAX(y1ve[2],y1ve[3])); - d4 = MIN(MIN(y2ve[0],y2ve[1]),MIN(y2ve[2],y2ve[3])); - - if ((bad != 0) || (u4 >= d4)) - { - for (z = 0; z < 4; ++z) - { - if (!(bad & 1)) - { - preptmvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); - tmvline1(); - } - bad >>= 1; - } - continue; - } - - for (z = 0; z < 4; ++z) - { - if (u4 > y1ve[z]) - { - preptmvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); - vplce[z] = tmvline1(); - } - } - - if (d4 > u4) - { - dc_count = d4-u4; - dc_dest = ylookup[u4]*pixelsize+pixel; - tmvline4(); - } - - BYTE *i = pixel+ylookup[d4]*pixelsize; - for (z = 0; z < 4; ++z) - { - if (y2ve[z] > d4) - { - preptmvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); - tmvline1(); - } - } - } - for(; x < x2; ++x, pixel += pixelsize) - { - light += rw_lightstep; - y1ve[0] = uwal[x]; - y2ve[0] = dwal[x]; - if (y2ve[0] <= y1ve[0]) continue; - - if (!fixed) - { // calculate lighting - R_SetColorMapLight(basecolormap, light, wallshade); - } - - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); - - tmvline1(); - } - -//unclock(WallScanCycles); - - NetUpdate (); + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); } // From 05b6fe6174147ceef8c64ad83a1eab2736080c3e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 13:50:34 +0200 Subject: [PATCH 024/912] Added true color texture support for walls and floors --- src/r_draw.cpp | 7 +- src/r_draw.h | 22 ++- src/r_draw_rgba.cpp | 303 +++++++++++++++++++----------------- src/r_main.h | 46 ++++++ src/r_plane.cpp | 104 +++++++++---- src/r_segs.cpp | 4 + src/textures/pngtexture.cpp | 162 ++++++++++++++++++- src/textures/texture.cpp | 28 ++++ src/textures/textures.h | 12 +- 9 files changed, 508 insertions(+), 180 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 984a74f3f6..2710b9992d 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2265,9 +2265,12 @@ const BYTE *R_GetColumn (FTexture *tex, int col) { col = width + (col % width); } - return tex->GetColumn (col, NULL); -} + if (r_swtruecolor) + return (const BYTE *)tex->GetColumnBgra(col, NULL); + else + return tex->GetColumn(col, NULL); +} // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () diff --git a/src/r_draw.h b/src/r_draw.h index bf73c9dfb1..3f97a7a65f 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -520,6 +520,9 @@ class DrawerCommandQueue std::condition_variable end_condition; int finished_threads = 0; + bool no_threading = false; + DrawerThread single_core_thread; + void StartThreads(); void StopThreads(); @@ -535,11 +538,20 @@ public: template static void QueueCommand(Types &&... args) { - void *ptr = AllocMemory(sizeof(T)); - T *command = new (ptr)T(std::forward(args)...); - if (!command) - return; - Instance()->commands.push_back(command); + auto queue = Instance(); + if (queue->no_threading) + { + T command(std::forward(args)...); + command.Execute(&queue->single_core_thread); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } } // Wait until all worker threads finished executing commands diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 489716e1f7..528c3c986f 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -85,6 +85,8 @@ void* DrawerCommandQueue::AllocMemory(size_t size) void DrawerCommandQueue::Finish() { auto queue = Instance(); + if (queue->commands.empty()) + return; // Give worker threads something to do: @@ -190,8 +192,8 @@ class DrawColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_texturefrac; - fixed_t dc_iscale; + DWORD dc_texturefrac; + DWORD dc_iscale; fixed_t dc_light; const BYTE *dc_source; int dc_pitch; @@ -628,8 +630,8 @@ class DrawAddColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -708,8 +710,8 @@ class DrawTranslatedColumnRGBACommand : public DrawerCommand fixed_t dc_light; ShadeConstants dc_shade_constants; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; BYTE *dc_translation; const BYTE *dc_source; int dc_pitch; @@ -769,8 +771,8 @@ class DrawTlatedAddColumnRGBACommand : public DrawerCommand fixed_t dc_light; ShadeConstants dc_shade_constants; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; BYTE *dc_translation; const BYTE *dc_source; int dc_pitch; @@ -845,8 +847,8 @@ class DrawShadedColumnRGBACommand : public DrawerCommand private: int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; fixed_t dc_light; const BYTE *dc_source; lighttable_t *dc_colormap; @@ -918,8 +920,8 @@ class DrawAddClampColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -994,8 +996,8 @@ class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; BYTE *dc_translation; const BYTE *dc_source; int dc_pitch; @@ -1073,8 +1075,8 @@ class DrawSubClampColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1149,8 +1151,8 @@ class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1228,8 +1230,8 @@ class DrawRevSubClampColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1303,8 +1305,8 @@ class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1380,7 +1382,7 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_xfrac; fixed_t ds_yfrac; fixed_t ds_xstep; @@ -1397,7 +1399,7 @@ class DrawSpanRGBACommand : public DrawerCommand public: DrawSpanRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_xfrac = ::ds_xfrac; ds_yfrac = ::ds_yfrac; ds_xstep = ::ds_xstep; @@ -1423,7 +1425,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1450,7 +1452,7 @@ public: spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1469,7 +1471,7 @@ public: spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1488,7 +1490,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1598,7 +1600,7 @@ public: spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1617,7 +1619,7 @@ public: spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1630,7 +1632,7 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -1647,7 +1649,7 @@ class DrawSpanMaskedRGBACommand : public DrawerCommand public: DrawSpanMaskedRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -1672,7 +1674,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1694,13 +1696,13 @@ public: // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(texdata, light, shade_constants); + *dest = shade_bgra(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -1714,13 +1716,13 @@ public: int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(texdata, light, shade_constants); + *dest = shade_bgra(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -1732,7 +1734,7 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -1749,7 +1751,7 @@ class DrawSpanTranslucentRGBACommand : public DrawerCommand public: DrawSpanTranslucentRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t *)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -1774,7 +1776,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1801,7 +1803,7 @@ public: { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1829,7 +1831,7 @@ public: { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1853,7 +1855,7 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -1870,7 +1872,7 @@ class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand public: DrawSpanMaskedTranslucentRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -1895,7 +1897,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1920,13 +1922,13 @@ public: // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1953,13 +1955,13 @@ public: int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1984,7 +1986,7 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -2001,7 +2003,7 @@ class DrawSpanAddClampRGBACommand : public DrawerCommand public: DrawSpanAddClampRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -2026,7 +2028,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -2053,7 +2055,7 @@ public: { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2081,7 +2083,7 @@ public: { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2105,7 +2107,7 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -2122,7 +2124,7 @@ class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand public: DrawSpanMaskedAddClampRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -2147,7 +2149,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -2172,13 +2174,13 @@ public: // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2205,13 +2207,13 @@ public: int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2270,8 +2272,8 @@ public: class Vlinec1RGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2302,7 +2304,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = vlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2312,7 +2314,7 @@ public: do { - *dest = shade_pal_index(source[frac >> bits], light, shade_constants); + *dest = shade_bgra(source[frac >> bits], light, shade_constants); frac += fracstep; dest += pitch; } while (--count); @@ -2329,7 +2331,7 @@ class Vlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Vlinec4RGBACommand() @@ -2344,7 +2346,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2378,10 +2380,10 @@ public: do { - dest[0] = shade_pal_index(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_pal_index(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_pal_index(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_pal_index(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; dest += pitch; } while (--count); } @@ -2403,7 +2405,6 @@ public: ShadeConstants shade_constants = dc_shade_constants; - uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; int skipped = thread->skipped_by_thread(dc_dest_y); @@ -2423,17 +2424,17 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); dest += pitch; @@ -2449,17 +2450,17 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); dest += pitch; @@ -2471,8 +2472,8 @@ public: class Mvlinec1RGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2503,7 +2504,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = mvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2513,10 +2514,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - *dest = shade_pal_index(pix, light, shade_constants); + *dest = shade_bgra(pix, light, shade_constants); } frac += fracstep; dest += pitch; @@ -2534,7 +2535,7 @@ class Mvlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Mvlinec4RGBACommand() @@ -2549,7 +2550,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2583,11 +2584,11 @@ public: do { - BYTE pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; + uint32_t pix; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_bgra(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_bgra(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_bgra(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_bgra(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; dest += pitch; } while (--count); } @@ -2609,7 +2610,6 @@ public: ShadeConstants shade_constants = dc_shade_constants; - uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; int skipped = thread->skipped_by_thread(dc_dest_y); @@ -2629,10 +2629,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; // movemask = !(pix == 0) __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); @@ -2642,7 +2642,7 @@ public: local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); SSE_SHADE_SIMPLE(fg); _mm_maskmoveu_si128(fg, movemask, (char*)dest); dest += pitch; @@ -2658,10 +2658,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; // movemask = !(pix == 0) __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); @@ -2671,7 +2671,7 @@ public: local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); SSE_SHADE(fg, shade_constants); _mm_maskmoveu_si128(fg, movemask, (char*)dest); dest += pitch; @@ -2683,8 +2683,8 @@ public: class Tmvline1AddRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2719,7 +2719,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2732,10 +2732,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -2768,7 +2768,7 @@ class Tmvline4AddRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4AddRGBACommand() @@ -2785,7 +2785,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2823,10 +2823,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -2850,8 +2850,8 @@ public: class Tmvline1AddClampRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2886,7 +2886,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2899,10 +2899,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -2935,7 +2935,7 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4AddClampRGBACommand() @@ -2952,7 +2952,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2990,10 +2990,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3017,8 +3017,8 @@ public: class Tmvline1SubClampRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -3053,7 +3053,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -3066,10 +3066,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3102,7 +3102,7 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4SubClampRGBACommand() @@ -3119,7 +3119,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -3157,10 +3157,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3184,8 +3184,8 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -3220,7 +3220,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -3233,10 +3233,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3269,7 +3269,7 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4RevSubClampRGBACommand() @@ -3286,7 +3286,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -3324,10 +3324,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3549,8 +3549,17 @@ void R_FillSpan_RGBA() DrawerCommandQueue::QueueCommand(); } +extern FTexture *rw_pic; // For the asserts below + DWORD vlinec1_RGBA() { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + DWORD height = rw_pic->GetHeight(); + assert((frac >> vlinebits) < height); + frac += dc_count * fracstep; + assert((frac >> vlinebits) <= height); + DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } @@ -3558,6 +3567,8 @@ DWORD vlinec1_RGBA() void vlinec4_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } DWORD mvlinec1_RGBA() @@ -3569,6 +3580,8 @@ DWORD mvlinec1_RGBA() void mvlinec4_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_add_RGBA() @@ -3580,6 +3593,8 @@ fixed_t tmvline1_add_RGBA() void tmvline4_add_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_addclamp_RGBA() @@ -3591,6 +3606,8 @@ fixed_t tmvline1_addclamp_RGBA() void tmvline4_addclamp_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_subclamp_RGBA() @@ -3602,6 +3619,8 @@ fixed_t tmvline1_subclamp_RGBA() void tmvline4_subclamp_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_revsubclamp_RGBA() @@ -3613,6 +3632,8 @@ fixed_t tmvline1_revsubclamp_RGBA() void tmvline4_revsubclamp_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) diff --git a/src/r_main.h b/src/r_main.h index 5d4ff11748..6d0e2a21f2 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -136,6 +136,19 @@ FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + // Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) { @@ -171,6 +184,39 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 26d579d6d2..05fce79a65 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -973,15 +973,22 @@ extern FTexture *rw_pic; // Allow for layer skies up to 512 pixels tall. This is overkill, // since the most anyone can ever see of the sky is 500 pixels. // We need 4 skybufs because wallscan can draw up to 4 columns at a time. +// Need two versions - one for true color and one for palette static BYTE skybuf[4][512]; +static uint32_t skybuf_bgra[4][512]; static DWORD lastskycol[4]; +static DWORD lastskycol_bgra[4]; static int skycolplace; +static int skycolplace_bgra; // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + if (!r_swtruecolor) + return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + else + return (const BYTE *)fronttex->GetColumnBgra((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); } // Get a column of sky when there are two overlapping sky textures @@ -996,38 +1003,77 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) DWORD skycol = (angle1 << 16) | angle2; int i; - for (i = 0; i < 4; ++i) + if (!r_swtruecolor) { - if (lastskycol[i] == skycol) + for (i = 0; i < 4; ++i) { - return skybuf[i]; + if (lastskycol[i] == skycol) + { + return skybuf[i]; + } } + + lastskycol[skycolplace] = skycol; + BYTE *composite = skybuf[skycolplace]; + skycolplace = (skycolplace + 1) & 3; + + // The ordering of the following code has been tuned to allow VC++ to optimize + // it well. In particular, this arrangement lets it keep count in a register + // instead of on the stack. + const BYTE *front = fronttex->GetColumn(angle1, NULL); + const BYTE *back = backskytex->GetColumn(angle2, NULL); + + int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); + i = 0; + do + { + if (front[i]) + { + composite[i] = front[i]; + } + else + { + composite[i] = back[i]; + } + } while (++i, --count); + return composite; } - - lastskycol[skycolplace] = skycol; - BYTE *composite = skybuf[skycolplace]; - skycolplace = (skycolplace + 1) & 3; - - // The ordering of the following code has been tuned to allow VC++ to optimize - // it well. In particular, this arrangement lets it keep count in a register - // instead of on the stack. - const BYTE *front = fronttex->GetColumn (angle1, NULL); - const BYTE *back = backskytex->GetColumn (angle2, NULL); - - int count = MIN (512, MIN (backskytex->GetHeight(), fronttex->GetHeight())); - i = 0; - do + else { - if (front[i]) + return R_GetOneSkyColumn(fronttex, x); + for (i = 0; i < 4; ++i) { - composite[i] = front[i]; + if (lastskycol_bgra[i] == skycol) + { + return (BYTE*)(skybuf_bgra[i]); + } } - else + + lastskycol_bgra[skycolplace_bgra] = skycol; + uint32_t *composite = skybuf_bgra[skycolplace_bgra]; + skycolplace_bgra = (skycolplace_bgra + 1) & 3; + + // The ordering of the following code has been tuned to allow VC++ to optimize + // it well. In particular, this arrangement lets it keep count in a register + // instead of on the stack. + const uint32_t *front = (const uint32_t *)fronttex->GetColumnBgra(angle1, NULL); + const uint32_t *back = (const uint32_t *)backskytex->GetColumnBgra(angle2, NULL); + + int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); + i = 0; + do { - composite[i] = back[i]; - } - } while (++i, --count); - return composite; + if (front[i]) + { + composite[i] = front[i]; + } + else + { + composite[i] = back[i]; + } + } while (++i, --count); + return (BYTE*)composite; + } } static void R_DrawSky (visplane_t *pl) @@ -1062,6 +1108,7 @@ static void R_DrawSky (visplane_t *pl) for (x = 0; x < 4; ++x) { lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; } rw_pic = frontskytex; @@ -1075,6 +1122,7 @@ static void R_DrawSky (visplane_t *pl) for (x = 0; x < 4; ++x) { lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; } wallscan (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); @@ -1112,6 +1160,7 @@ static void R_DrawSkyStriped (visplane_t *pl) for (x = 0; x < 4; ++x) { lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; } wallscan (pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); @@ -1230,7 +1279,10 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske R_SetupSpanBits(tex); double xscale = pl->xform.xScale * tex->Scale.X; double yscale = pl->xform.yScale * tex->Scale.Y; - ds_source = tex->GetPixels (); + if (r_swtruecolor) + ds_source = (const BYTE*)tex->GetPixelsBgra(); + else + ds_source = tex->GetPixels(); basecolormap = pl->colormap; planeshade = LIGHT2SHADE(pl->lightlevel); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 5aa7c29a27..87ce48ec41 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1092,6 +1092,8 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv { int next_uv_wrap = (uv_max - uv_pos + uv_step - 1) / uv_step; int count = MIN(left, next_uv_wrap); + if (count <= 0) + break; // This should never happen, but it does.. if (count > 0) { @@ -1146,6 +1148,8 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste int next_uv_wrap = (uv_max - uv_pos[i] + uv_step[i] - 1) / uv_step[i]; count = MIN(next_uv_wrap, count); } + if (count <= 0) + break; // This should never happen, but it does.. // Draw until that column wraps if (count > 0) diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index e47fa62c07..95f7aca75f 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -41,6 +41,7 @@ #include "bitmap.h" #include "v_palette.h" #include "textures/textures.h" +#include //========================================================================== // @@ -56,6 +57,7 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -65,6 +67,7 @@ protected: FString SourceFile; BYTE *Pixels; + std::vector PixelsBgra; Span **Spans; BYTE BitDepth; @@ -73,11 +76,13 @@ protected: bool HaveTrans; WORD NonPaletteTrans[3]; + std::vector PngPalette; BYTE *PaletteMap; int PaletteSize; DWORD StartOfIDAT; void MakeTexture (); + void MakeTextureBgra (); friend class FTexture; }; @@ -266,6 +271,12 @@ FPNGTexture::FPNGTexture (FileReader &lump, int lumpnum, const FString &filename { lump.Seek (len - PaletteSize * 3, SEEK_CUR); } + for (i = 0; i < PaletteSize; i++) + { + PngPalette.push_back(p.pngpal[i][0]); + PngPalette.push_back(p.pngpal[i][1]); + PngPalette.push_back(p.pngpal[i][2]); + } for (i = PaletteSize - 1; i >= 0; --i) { p.palette[i] = MAKERGB(p.pngpal[i][0], p.pngpal[i][1], p.pngpal[i][2]); @@ -369,11 +380,9 @@ FPNGTexture::~FPNGTexture () void FPNGTexture::Unload () { - if (Pixels != NULL) - { - delete[] Pixels; - Pixels = NULL; - } + delete[] Pixels; + Pixels = NULL; + PixelsBgra.clear(); } //========================================================================== @@ -446,6 +455,16 @@ const BYTE *FPNGTexture::GetPixels () return Pixels; } +const uint32_t *FPNGTexture::GetPixelsBgra() +{ + if (PixelsBgra.empty()) + { + MakeTextureBgra(); + } + return PixelsBgra.data(); +} + + //========================================================================== // // @@ -602,6 +621,139 @@ void FPNGTexture::MakeTexture () delete lump; } +void FPNGTexture::MakeTextureBgra () +{ + FileReader *lump; + + if (SourceLump >= 0) + { + lump = new FWadLump(Wads.OpenLumpNum(SourceLump)); + } + else + { + lump = new FileReader(SourceFile.GetChars()); + } + + PixelsBgra.resize(Width * Height, 0xffff0000); + if (StartOfIDAT != 0) + { + DWORD len, id; + lump->Seek (StartOfIDAT, SEEK_SET); + lump->Read(&len, 4); + lump->Read(&id, 4); + + if (ColorType == 0 || ColorType == 3) /* Grayscale and paletted */ + { + std::vector src(Width*Height); + M_ReadIDAT (lump, src.data(), Width, Height, Width, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); + + if (!PngPalette.empty()) + { + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t r = PngPalette[src[x + y * Width] * 3 + 0]; + uint32_t g = PngPalette[src[x + y * Width] * 3 + 1]; + uint32_t b = PngPalette[src[x + y * Width] * 3 + 2]; + PixelsBgra[x * Height + y] = 0xff000000 | (r << 16) | (g << 8) | b; + } + } + } + else + { + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t gray = src[x + y * Width]; + PixelsBgra[x * Height + y] = 0xff000000 | (gray << 16) | (gray << 8) | gray; + } + } + } + } + else /* RGB and/or Alpha present */ + { + int bytesPerPixel = ColorType == 2 ? 3 : ColorType == 4 ? 2 : 4; + BYTE *tempix = new BYTE[Width * Height * bytesPerPixel]; + BYTE *in; + uint32_t *out; + int x, y, pitch, backstep; + + M_ReadIDAT (lump, tempix, Width, Height, Width*bytesPerPixel, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); + in = tempix; + out = PixelsBgra.data(); + + // Convert from source format to paletted, column-major. + // Formats with alpha maps are reduced to only 1 bit of alpha. + switch (ColorType) + { + case 2: // RGB + pitch = Width * 3; + backstep = Height * pitch - 3; + for (x = Width; x > 0; --x) + { + for (y = Height; y > 0; --y) + { + if (!HaveTrans) + { + *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + } + else + { + if (in[0] == NonPaletteTrans[0] && + in[1] == NonPaletteTrans[1] && + in[2] == NonPaletteTrans[2]) + { + *out++ = 0; + } + else + { + *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + } + } + in += pitch; + } + in -= backstep; + } + break; + + case 4: // Grayscale + Alpha + pitch = Width * 2; + backstep = Height * pitch - 2; + for (x = Width; x > 0; --x) + { + for (y = Height; y > 0; --y) + { + uint32_t alpha = in[1]; + uint32_t gray = in[0]; + *out++ = (alpha << 24) | (gray << 16) | (gray << 8) | gray; + in += pitch; + } + in -= backstep; + } + break; + + case 6: // RGB + Alpha + pitch = Width * 4; + backstep = Height * pitch - 4; + for (x = Width; x > 0; --x) + { + for (y = Height; y > 0; --y) + { + *out++ = (((uint32_t)in[3]) << 24) | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + in += pitch; + } + in -= backstep; + } + break; + } + delete[] tempix; + } + } + delete lump; +} + //=========================================================================== // // FPNGTexture::CopyTrueColorPixels diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 7b90c295fc..1869491b18 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -45,6 +45,7 @@ #include "v_video.h" #include "m_fixed.h" #include "textures/textures.h" +#include "v_palette.h" typedef bool (*CheckFunc)(FileReader & file); typedef FTexture * (*CreateFunc)(FileReader & file, int lumpnum); @@ -175,6 +176,33 @@ FTexture::~FTexture () KillNative(); } +const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_out) +{ + const uint32_t *pixels = GetPixelsBgra(); + + column %= Width; + if (column < 0) + column += Width; + + if (spans_out != nullptr) + GetColumn(column, spans_out); + return pixels + column * Height; +} + +const uint32_t *FTexture::GetPixelsBgra() +{ + if (BgraPixels.empty()) + { + const BYTE *indices = GetPixels(); + BgraPixels.resize(Width * Height); + for (int i = 0; i < Width * Height; i++) + { + BgraPixels[i] = GPalette.BaseColors[indices[i]].d; + } + } + return BgraPixels.data(); +} + bool FTexture::CheckModified () { return false; diff --git a/src/textures/textures.h b/src/textures/textures.h index 14667093c7..0d066eff52 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -3,6 +3,7 @@ #include "doomtype.h" #include "vectors.h" +#include class FBitmap; struct FRemapTable; @@ -175,9 +176,15 @@ public: // Returns a single column of the texture virtual const BYTE *GetColumn (unsigned int column, const Span **spans_out) = 0; + // Returns a single column of the texture, in BGRA8 format + virtual const uint32_t *GetColumnBgra(unsigned int column, const Span **spans_out); + // Returns the whole texture, stored in column-major order virtual const BYTE *GetPixels () = 0; - + + // Returns the whole texture, stored in column-major order, in BGRA8 format + virtual const uint32_t *GetPixelsBgra(); + virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL); int CopyTrueColorTranslated(FBitmap *bmp, int x, int y, int rotate, FRemapTable *remap, FCopyInfo *inf = NULL); virtual bool UseBasePalette(); @@ -262,6 +269,9 @@ protected: Rotations = other->Rotations; } +private: + std::vector BgraPixels; + public: static void FlipSquareBlock (BYTE *block, int x, int y); static void FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap); From 24f846f702bb6f8fb57a4a6370f33267442e385c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 14:25:56 +0200 Subject: [PATCH 025/912] Bug fixes --- src/r_draw_rgba.cpp | 6 ++---- src/r_main.h | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 528c3c986f..fa632cb5d3 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1511,8 +1511,6 @@ public: { // 64x64 is the most common case by far, so special case it. - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - int sse_count = count / 4; count -= sse_count * 4; @@ -1545,7 +1543,7 @@ public: // Lookup pixel from flat texture tile, // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); @@ -1582,7 +1580,7 @@ public: // Lookup pixel from flat texture tile, // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); diff --git a/src/r_main.h b/src/r_main.h index 6d0e2a21f2..d71d44fe1f 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -217,6 +217,27 @@ FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConst return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) +{ + uint32_t fg_alpha = (fg >> 24) & 0xff; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 + uint32_t inv_alpha = 256 - alpha; + + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = bg & 0xff; + + uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; + uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; + uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ From 6c70eaea2f46cdd2625ad0b51bbcc194ac3200e8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 15:56:50 +0200 Subject: [PATCH 026/912] Add jpeg bgra support --- src/textures/jpegtexture.cpp | 116 +++++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 5 deletions(-) diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index 2253965987..a37eff6c3d 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -187,6 +187,7 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -195,9 +196,11 @@ public: protected: BYTE *Pixels; + std::vector PixelsBgra; Span DummySpans[2]; void MakeTexture (); + void MakeTextureBgra (); friend class FTexture; }; @@ -295,11 +298,9 @@ FJPEGTexture::~FJPEGTexture () void FJPEGTexture::Unload () { - if (Pixels != NULL) - { - delete[] Pixels; - Pixels = NULL; - } + delete[] Pixels; + Pixels = NULL; + PixelsBgra.clear(); } //========================================================================== @@ -358,6 +359,15 @@ const BYTE *FJPEGTexture::GetPixels () return Pixels; } +const uint32_t *FJPEGTexture::GetPixelsBgra() +{ + if (PixelsBgra.empty()) + { + MakeTextureBgra(); + } + return PixelsBgra.data(); +} + //========================================================================== // // @@ -457,6 +467,102 @@ void FJPEGTexture::MakeTexture () } } +void FJPEGTexture::MakeTextureBgra() +{ + FWadLump lump = Wads.OpenLumpNum(SourceLump); + JSAMPLE *buff = NULL; + + jpeg_decompress_struct cinfo; + jpeg_error_mgr jerr; + + PixelsBgra.resize(Width * Height, 0xffba0000); + + cinfo.err = jpeg_std_error(&jerr); + cinfo.err->output_message = JPEG_OutputMessage; + cinfo.err->error_exit = JPEG_ErrorExit; + jpeg_create_decompress(&cinfo); + try + { + FLumpSourceMgr sourcemgr(&lump, &cinfo); + jpeg_read_header(&cinfo, TRUE); + if (!((cinfo.out_color_space == JCS_RGB && cinfo.num_components == 3) || + (cinfo.out_color_space == JCS_CMYK && cinfo.num_components == 4) || + (cinfo.out_color_space == JCS_GRAYSCALE && cinfo.num_components == 1))) + { + Printf(TEXTCOLOR_ORANGE "Unsupported color format\n"); + throw - 1; + } + + jpeg_start_decompress(&cinfo); + + int y = 0; + buff = new BYTE[cinfo.output_width * cinfo.output_components]; + + while (cinfo.output_scanline < cinfo.output_height) + { + int num_scanlines = jpeg_read_scanlines(&cinfo, &buff, 1); + BYTE *in = buff; + uint32_t *out = PixelsBgra.data() + y; + switch (cinfo.out_color_space) + { + case JCS_RGB: + for (int x = Width; x > 0; --x) + { + uint32_t r = in[0]; + uint32_t g = in[1]; + uint32_t b = in[2]; + *out = 0xff000000 | (r << 16) | (g << 8) | b; + out += Height; + in += 3; + } + break; + + case JCS_GRAYSCALE: + for (int x = Width; x > 0; --x) + { + uint32_t gray = in[0]; + *out = 0xff000000 | (gray << 16) | (gray << 8) | gray; + out += Height; + in += 1; + } + break; + + case JCS_CMYK: + // What are you doing using a CMYK image? :) + for (int x = Width; x > 0; --x) + { + // To be precise, these calculations should use 255, but + // 256 is much faster and virtually indistinguishable. + uint32_t r = in[3] - (((256 - in[0])*in[3]) >> 8); + uint32_t g = in[3] - (((256 - in[1])*in[3]) >> 8); + uint32_t b = in[3] - (((256 - in[2])*in[3]) >> 8); + *out = 0xff000000 | (r << 16) | (g << 8) | b; + out += Height; + in += 4; + } + break; + + default: + // The other colorspaces were considered above and discarded, + // but GCC will complain without a default for them here. + break; + } + y++; + } + jpeg_finish_decompress(&cinfo); + jpeg_destroy_decompress(&cinfo); + } + catch (int) + { + Printf(TEXTCOLOR_ORANGE " in texture %s\n", Name.GetChars()); + jpeg_destroy_decompress(&cinfo); + } + if (buff != NULL) + { + delete[] buff; + } +} + //=========================================================================== // From 103a6baac54d7d874021ad94f40098cc66d787dc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 15:57:31 +0200 Subject: [PATCH 027/912] Support for drawing in multiple passes --- src/r_draw.h | 17 +++++-- src/r_draw_rgba.cpp | 105 +++++++++++++++++++++++++++++++++++++++---- src/r_drawt_rgba.cpp | 62 +++++++++++++------------ 3 files changed, 141 insertions(+), 43 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 3f97a7a65f..409b7c01be 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -458,25 +458,34 @@ public: // Number of active threads int num_cores = 1; + // Range of rows processed this pass + int pass_start_y = 0; + int pass_end_y = 300; + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; uint32_t *dc_temp_rgba; // Checks if a line is rendered by this thread bool line_skipped_by_thread(int line) { - return line % num_cores != core; + return line < pass_start_y || line >= pass_end_y || line % num_cores != core; } // The number of lines to skip to reach the first line to be rendered by this thread int skipped_by_thread(int first_line) { - return (num_cores - (first_line - core) % num_cores) % num_cores; + int pass_skip = MAX(pass_start_y - first_line, 0); + int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; + return pass_skip + core_skip; } // The number of lines to be rendered by this thread int count_for_thread(int first_line, int count) { - return (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + int lines_until_pass_end = MAX(pass_end_y - first_line, 0); + count = MIN(count, lines_until_pass_end); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); } // Calculate the dest address for the first line to be rendered by this thread @@ -522,6 +531,8 @@ class DrawerCommandQueue bool no_threading = false; DrawerThread single_core_thread; + int num_passes = 2; + int rows_in_pass = 540; void StartThreads(); void StopThreads(); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index fa632cb5d3..b81ee4cca4 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -104,11 +104,19 @@ void DrawerCommandQueue::Finish() thread.core = 0; thread.num_cores = queue->threads.size() + 1; - size_t size = queue->active_commands.size(); - for (size_t i = 0; i < size; i++) + for (int pass = 0; pass < queue->num_passes; pass++) { - auto &command = queue->active_commands[i]; - command->Execute(&thread); + thread.pass_start_y = pass * queue->rows_in_pass; + thread.pass_end_y = (pass + 1) * queue->rows_in_pass; + if (pass + 1 == queue->num_passes) + thread.pass_end_y = MAX(thread.pass_end_y, MAXHEIGHT); + + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(&thread); + } } // Wait for everyone to finish: @@ -156,11 +164,19 @@ void DrawerCommandQueue::StartThreads() start_lock.unlock(); // Do the work: - size_t size = queue->active_commands.size(); - for (size_t i = 0; i < size; i++) + for (int pass = 0; pass < queue->num_passes; pass++) { - auto &command = queue->active_commands[i]; - command->Execute(thread); + thread->pass_start_y = pass * queue->rows_in_pass; + thread->pass_end_y = (pass + 1) * queue->rows_in_pass; + if (pass + 1 == queue->num_passes) + thread->pass_end_y = MAX(thread->pass_end_y, MAXHEIGHT); + + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(thread); + } } // Notify main thread that we finished: @@ -1611,6 +1627,79 @@ public: BYTE xshift = yshift - ds_xbits; int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + do { // Current texture index in u,v. diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index c2caec0c2c..28c86d3f57 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -1528,40 +1528,38 @@ public: fracstep = dc_iscale; frac = dc_texturefrac; - { - const BYTE *source = dc_source; + const BYTE *source = dc_source; - if (count & 1) { - *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest[16] = source[frac >> FRACBITS]; frac += fracstep; - dest[20] = source[frac >> FRACBITS]; frac += fracstep; - dest[24] = source[frac >> FRACBITS]; frac += fracstep; - dest[28] = source[frac >> FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); + if (count & 1) { + *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; } + if (count & 2) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest[16] = source[frac >> FRACBITS]; frac += fracstep; + dest[20] = source[frac >> FRACBITS]; frac += fracstep; + dest[24] = source[frac >> FRACBITS]; frac += fracstep; + dest[28] = source[frac >> FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); } }; From 27156eb60ad4d12e32ea24cf90437483c2373818 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 16:32:47 +0200 Subject: [PATCH 028/912] Linux compile fixes and missing variable declarations --- src/r_draw.h | 5 +++-- src/r_draw_rgba.cpp | 5 +++++ src/r_segs.cpp | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 409b7c01be..37a0e67788 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -28,6 +28,7 @@ #include #include #include +#include // Spectre/Invisibility. #define FUZZTABLE 50 @@ -460,7 +461,7 @@ public: // Range of rows processed this pass int pass_start_y = 0; - int pass_end_y = 300; + int pass_end_y = MAXHEIGHT; uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; uint32_t *dc_temp_rgba; @@ -527,7 +528,7 @@ class DrawerCommandQueue std::mutex end_mutex; std::condition_variable end_condition; - int finished_threads = 0; + size_t finished_threads = 0; bool no_threading = false; DrawerThread single_core_thread; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index b81ee4cca4..e2dbd443ab 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1726,6 +1726,7 @@ class DrawSpanMaskedRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; @@ -1828,6 +1829,7 @@ class DrawSpanTranslucentRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; @@ -1949,6 +1951,7 @@ class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; @@ -2080,6 +2083,7 @@ class DrawSpanAddClampRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; @@ -2201,6 +2205,7 @@ class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 87ce48ec41..c1d1ad7446 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1184,7 +1184,7 @@ void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_heig // Find start uv in [0-uv_height[ range. // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; - v = v - std::floor(v); + v = v - floor(v); v *= uv_height; v *= (1 << fracbits); From ffcfe0b54f19420b51284abcbfbec09f3aee9074 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 17:08:45 +0200 Subject: [PATCH 029/912] Fix warning --- src/textures/texture.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 1869491b18..d500810627 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -181,8 +181,6 @@ const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_ const uint32_t *pixels = GetPixelsBgra(); column %= Width; - if (column < 0) - column += Width; if (spans_out != nullptr) GetColumn(column, spans_out); From 07571da98ccad2fd2c360c4b73ba989a902f184b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 18:43:49 +0200 Subject: [PATCH 030/912] Improved how threaded rendering is handled --- src/r_draw.h | 16 ++++++++++++---- src/r_draw_rgba.cpp | 24 ++++++++++++++++++++++-- src/r_main.cpp | 4 +++- src/r_swrenderer.cpp | 3 ++- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 37a0e67788..d192dc5e49 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -443,8 +443,11 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +// Redirect drawer commands to worker threads +void R_BeginDrawerCommands(); + // Wait until all drawers finished executing -void R_FinishDrawerCommands(); +void R_EndDrawerCommands(); class DrawerCommandQueue; @@ -530,13 +533,14 @@ class DrawerCommandQueue std::condition_variable end_condition; size_t finished_threads = 0; - bool no_threading = false; + int threaded_render = 0; DrawerThread single_core_thread; int num_passes = 2; int rows_in_pass = 540; void StartThreads(); void StopThreads(); + void Finish(); static DrawerCommandQueue *Instance(); @@ -551,7 +555,7 @@ public: static void QueueCommand(Types &&... args) { auto queue = Instance(); - if (queue->no_threading) + if (queue->threaded_render == 0) { T command(std::forward(args)...); command.Execute(&queue->single_core_thread); @@ -565,9 +569,13 @@ public: queue->commands.push_back(command); } } + + // Redirects all drawing commands to worker threads until Finish is called + // Begin/End blocks can be nested. + static void Begin(); // Wait until all worker threads finished executing commands - static void Finish(); + static void End(); }; #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index e2dbd443ab..23ab106a65 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -82,6 +82,21 @@ void* DrawerCommandQueue::AllocMemory(size_t size) return data; } +void DrawerCommandQueue::Begin() +{ + auto queue = Instance(); + queue->Finish(); + queue->threaded_render++; +} + +void DrawerCommandQueue::End() +{ + auto queue = Instance(); + queue->Finish(); + if (queue->threaded_render > 0) + queue->threaded_render--; +} + void DrawerCommandQueue::Finish() { auto queue = Instance(); @@ -3515,9 +3530,14 @@ public: ///////////////////////////////////////////////////////////////////////////// -void R_FinishDrawerCommands() +void R_BeginDrawerCommands() { - DrawerCommandQueue::Finish(); + DrawerCommandQueue::Begin(); +} + +void R_EndDrawerCommands() +{ + DrawerCommandQueue::End(); } void R_DrawColumnP_RGBA_C() diff --git a/src/r_main.cpp b/src/r_main.cpp index 348c701204..c1b78303b3 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -960,6 +960,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, r_swtruecolor = canvas->IsBgra(); R_InitColumnDrawers(); } + + R_BeginDrawerCommands(); viewwidth = width; RenderTarget = canvas; @@ -979,7 +981,7 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_SetupBuffer (); screen->Unlock (); - R_FinishDrawerCommands(); + R_EndDrawerCommands(); viewactive = savedviewactive; r_swtruecolor = savedoutputformat; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 62190b6064..11f879c38c 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -162,10 +162,11 @@ void FSoftwareRenderer::RenderView(player_t *player) R_InitColumnDrawers(); } + R_BeginDrawerCommands(); R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); - R_FinishDrawerCommands(); + R_EndDrawerCommands(); } //========================================================================== From d5331e60951bbd4509ce1e2f5be13239c58a04d6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 22:22:40 +0200 Subject: [PATCH 031/912] Wallscan fix --- src/r_draw_rgba.cpp | 8 +++---- src/r_segs.cpp | 53 ++++++++++++++++++++++----------------------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 23ab106a65..e3a64dd7a6 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -3661,16 +3661,16 @@ void R_FillSpan_RGBA() DrawerCommandQueue::QueueCommand(); } -extern FTexture *rw_pic; // For the asserts below +//extern FTexture *rw_pic; // For the asserts below DWORD vlinec1_RGBA() { - DWORD fracstep = dc_iscale; + /*DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; DWORD height = rw_pic->GetHeight(); assert((frac >> vlinebits) < height); - frac += dc_count * fracstep; - assert((frac >> vlinebits) <= height); + frac += (dc_count-1) * fracstep; + assert((frac >> vlinebits) <= height);*/ DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index c1d1ad7446..451ddf986b 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1087,23 +1087,21 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv { uint32_t uv_pos = uv_start; - int left = y2 - y1; + uint32_t left = y2 - y1; while (left > 0) { - int next_uv_wrap = (uv_max - uv_pos + uv_step - 1) / uv_step; - int count = MIN(left, next_uv_wrap); - if (count <= 0) - break; // This should never happen, but it does.. + uint32_t available = uv_max - uv_pos; + uint32_t next_uv_wrap = available / uv_step; + if (available % uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); - if (count > 0) - { - dc_source = source; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = count; - dc_iscale = uv_step; - dc_texturefrac = uv_pos; - draw1column(); - } + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_pos; + draw1column(); left -= count; uv_pos += uv_step * count; @@ -1138,30 +1136,28 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste for (int i = 0; i < 4; i++) bufplce[i] = source[i]; - int left = y2 - y1; + uint32_t left = y2 - y1; while (left > 0) { // Find which column wraps first - int count = left; + uint32_t count = left; for (int i = 0; i < 4; i++) { - int next_uv_wrap = (uv_max - uv_pos[i] + uv_step[i] - 1) / uv_step[i]; + uint32_t available = uv_max - uv_pos[i]; + uint32_t next_uv_wrap = available / uv_step[i]; + if (available % uv_step[i] != 0) + next_uv_wrap++; count = MIN(next_uv_wrap, count); } - if (count <= 0) - break; // This should never happen, but it does.. // Draw until that column wraps - if (count > 0) + for (int i = 0; i < 4; i++) { - for (int i = 0; i < 4; i++) - { - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; - } - dc_count = count; - draw4columns(); + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; } + dc_count = count; + draw4columns(); // Wrap the uv position for (int i = 0; i < 4; i++) @@ -1299,6 +1295,9 @@ void wallscan_any( { for (int i = 0; i < 4; i++) { + if (y2[i] <= y1[i]) + continue; + if (!fixed) R_SetColorMapLight(basecolormap, lights[i], wallshade); wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); From a6d696bbfd45c30bd7162ab948cdf4ba36fa170e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Jun 2016 00:50:36 +0200 Subject: [PATCH 032/912] Undo removal of wallscan_np2 and wallscan_np2_ds --- src/r_segs.cpp | 131 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 115 insertions(+), 16 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 451ddf986b..ad242b2f91 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1072,16 +1072,17 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv if (uv_max == 0) // power of two { int count = y2 - y1; - if (count > 0) - { - dc_source = source; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = y2 - y1; - dc_iscale = uv_step; - dc_texturefrac = uv_start; - draw1column(); - } - return uv_start + uv_step * (uint32_t)count; + + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_start; + draw1column(); + + uint64_t step64 = uv_step; + uint64_t pos64 = uv_start; + return (uint32_t)(pos64 + step64 * count); } else { @@ -1119,15 +1120,19 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste int pixelsize = r_swtruecolor ? 4 : 1; if (uv_max == 0) // power of two, no wrap handling needed { + int count = y2 - y1; for (int i = 0; i < 4; i++) { bufplce[i] = source[i]; vplce[i] = uv_pos[i]; vince[i] = uv_step[i]; - uv_pos[i] += uv_step[i] * (y2 - y1); + + uint64_t step64 = uv_step[i]; + uint64_t pos64 = uv_pos[i]; + uv_pos[i] = (uint32_t)(pos64 + step64 * count); } dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = y2 - y1; + dc_count = count; draw4columns(); } else @@ -1481,16 +1486,110 @@ static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, } } -// wallscan now tiles with non-power-of-two textures - this function is therefore not needed anymore.. +//============================================================================= +// +// wallscan_np2 +// +// This is a wrapper around wallscan that helps it tile textures whose heights +// are not powers of 2. It divides the wall into texture-sized strips and calls +// wallscan for each of those. Since only one repetition of the texture fits +// in each strip, wallscan will not tile. +// +//============================================================================= + void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); + if (!r_np2) + { + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); + } + else + { + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + double texheight = rw_pic->GetHeight(); + double partition; + double scaledtexheight = texheight / yrepeat; + + if (yrepeat >= 0) + { // normal orientation: draw strips from top to bottom + partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + if (partition == top) + { + partition -= scaledtexheight; + } + up = uwal; + down = most1; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition > bot) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp(most3[j], up[j], dwal[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + up = down; + down = (down == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); + } + else + { // upside down: draw strips from bottom to top + partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + up = most1; + down = dwal; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition < top) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 12) + { + for (int j = x1; j < x2; ++j) + { + up[j] = clamp(most3[j], uwal[j], down[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + down = up; + up = (up == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); + } + } } -// wallscan now tiles with non-power-of-two textures - this function is therefore not needed anymore.. static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); + if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) + { + double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); + double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); + double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); + double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); + double top = MAX(frontcz1, frontcz2); + double bot = MIN(frontfz1, frontfz2); + if (fake3D & FAKE3D_CLIPTOP) + { + top = MIN(top, sclipTop); + } + if (fake3D & FAKE3D_CLIPBOTTOM) + { + bot = MAX(bot, sclipBottom); + } + wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); + } + else + { + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); + } } // From 5ae8e9e8c2a68fb55aab598ba46ad86762fc8806 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Jun 2016 16:17:30 +0200 Subject: [PATCH 033/912] Fix missing colormap lookup --- src/r_draw.cpp | 69 +++++++++++++-------------- src/r_draw_rgba.cpp | 28 ++++++----- src/r_drawt_rgba.cpp | 108 +++++++++++++++++++++++++------------------ 3 files changed, 106 insertions(+), 99 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 2710b9992d..70b3893f45 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2793,24 +2793,19 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) void R_SetTranslationMap(lighttable_t *translation) { dc_fcolormap = nullptr; - dc_shade_constants.light_red = 256; - dc_shade_constants.light_green = 256; - dc_shade_constants.light_blue = 256; - dc_shade_constants.light_alpha = 256; - dc_shade_constants.fade_red = 0; - dc_shade_constants.fade_green = 0; - dc_shade_constants.fade_blue = 0; - dc_shade_constants.fade_alpha = 256; - dc_shade_constants.desaturate = 0; - dc_shade_constants.simple_shade = true; + dc_colormap = translation; if (r_swtruecolor) { - dc_colormap = translation; - dc_light = 0; - } - else - { - dc_colormap = translation; + dc_shade_constants.light_red = 256; + dc_shade_constants.light_green = 256; + dc_shade_constants.light_blue = 256; + dc_shade_constants.light_alpha = 256; + dc_shade_constants.fade_red = 0; + dc_shade_constants.fade_green = 0; + dc_shade_constants.fade_blue = 0; + dc_shade_constants.fade_alpha = 256; + dc_shade_constants.desaturate = 0; + dc_shade_constants.simple_shade = true; dc_light = 0; } } @@ -2818,49 +2813,47 @@ void R_SetTranslationMap(lighttable_t *translation) void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) { dc_fcolormap = base_colormap; - dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; - dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; - dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; - dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; - dc_shade_constants.fade_red = dc_fcolormap->Fade.r; - dc_shade_constants.fade_green = dc_fcolormap->Fade.g; - dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; - dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; - dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; - dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); if (r_swtruecolor) { + dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; + dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; + dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; + dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; + dc_shade_constants.fade_red = dc_fcolormap->Fade.r; + dc_shade_constants.fade_green = dc_fcolormap->Fade.g; + dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; + dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; + dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); dc_colormap = base_colormap->Maps; dc_light = LIGHTSCALE(light, shade); } else { dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); - dc_light = 0; } } void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) { ds_fcolormap = base_colormap; - ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; - ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; - ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; - ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; - ds_shade_constants.fade_red = ds_fcolormap->Fade.r; - ds_shade_constants.fade_green = ds_fcolormap->Fade.g; - ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; - ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; - ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; - ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); if (r_swtruecolor) { + ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; + ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; + ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; + ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; + ds_shade_constants.fade_red = ds_fcolormap->Fade.r; + ds_shade_constants.fade_green = ds_fcolormap->Fade.g; + ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; + ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; + ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); ds_colormap = base_colormap->Maps; ds_light = LIGHTSCALE(light, shade); } else { ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); - ds_light = 0; } } diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index e3a64dd7a6..979dc07437 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -229,6 +229,7 @@ class DrawColumnRGBACommand : public DrawerCommand const BYTE *dc_source; int dc_pitch; ShadeConstants dc_shade_constants; + BYTE *dc_colormap; public: DrawColumnRGBACommand() @@ -241,6 +242,7 @@ public: dc_source = ::dc_source; dc_pitch = ::dc_pitch; dc_shade_constants = ::dc_shade_constants; + dc_colormap = ::dc_colormap; } void Execute(DrawerThread *thread) override @@ -267,24 +269,20 @@ public: fracstep = dc_iscale * thread->num_cores; frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + const BYTE *source = dc_source; + int pitch = dc_pitch * thread->num_cores; + BYTE *colormap = dc_colormap; + + do { - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light, shade_constants); - // Inner loop that does the actual texture mapping, - // e.g. a DDA-lile scaling. - // This is as fast as it gets. - do - { - *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + dest += pitch; + frac += fracstep; - dest += pitch; - frac += fracstep; - - } while (--count); - } + } while (--count); } }; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 28c86d3f57..5f0fc4156c 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -90,13 +90,13 @@ public: sincr = thread->num_cores * 4; if (count & 1) { - *dest = *source; + *dest = GPalette.BaseColors[*source]; source += sincr; dest += pitch; } if (count & 2) { - dest[0] = source[0]; - dest[pitch] = source[sincr]; + dest[0] = GPalette.BaseColors[source[0]]; + dest[pitch] = GPalette.BaseColors[source[sincr]]; source += sincr * 2; dest += pitch * 2; } @@ -104,10 +104,10 @@ public: return; do { - dest[0] = source[0]; - dest[pitch] = source[sincr]; - dest[pitch * 2] = source[sincr * 2]; - dest[pitch * 3] = source[sincr * 3]; + dest[0] = GPalette.BaseColors[source[0]]; + dest[pitch] = GPalette.BaseColors[source[sincr]]; + dest[pitch * 2] = GPalette.BaseColors[source[sincr * 2]]; + dest[pitch * 3] = GPalette.BaseColors[source[sincr * 3]]; source += sincr * 4; dest += pitch * 4; } while (--count); @@ -124,6 +124,7 @@ class RtMap1colRGBACommand : public DrawerCommand ShadeConstants dc_shade_constants; BYTE *dc_destorg; int dc_pitch; + BYTE *dc_colormap; public: RtMap1colRGBACommand(int hx, int sx, int yl, int yh) @@ -137,6 +138,7 @@ public: dc_shade_constants = ::dc_shade_constants; dc_destorg = ::dc_destorg; dc_pitch = ::dc_pitch; + dc_colormap = ::dc_colormap; } void Execute(DrawerThread *thread) override @@ -158,9 +160,11 @@ public: source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; pitch = dc_pitch * thread->num_cores; sincr = thread->num_cores * 4; + + BYTE *colormap = dc_colormap; if (count & 1) { - *dest = shade_pal_index(*source, light, shade_constants); + *dest = shade_pal_index(colormap[*source], light, shade_constants); source += sincr; dest += pitch; } @@ -168,8 +172,8 @@ public: return; do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[pitch] = shade_pal_index(source[sincr], light, shade_constants); + dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); + dest[pitch] = shade_pal_index(colormap[source[sincr]], light, shade_constants); source += sincr * 2; dest += pitch * 2; } while (--count); @@ -185,6 +189,7 @@ class RtMap4colsRGBACommand : public DrawerCommand ShadeConstants dc_shade_constants; BYTE *dc_destorg; int dc_pitch; + BYTE *colormap; public: RtMap4colsRGBACommand(int sx, int yl, int yh) @@ -197,6 +202,7 @@ public: dc_shade_constants = ::dc_shade_constants; dc_destorg = ::dc_destorg; dc_pitch = ::dc_pitch; + dc_colormap = ::dc_colormap; } #ifdef NO_SSE @@ -219,12 +225,14 @@ public: source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; pitch = dc_pitch * thread->num_cores; sincr = thread->num_cores * 4; + + BYTE *colormap = dc_colormap; if (count & 1) { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); + dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); + dest[1] = shade_pal_index(colormap[source[1]], light, shade_constants); + dest[2] = shade_pal_index(colormap[source[2]], light, shade_constants); + dest[3] = shade_pal_index(colormap[source[3]], light, shade_constants); source += sincr; dest += pitch; } @@ -232,14 +240,14 @@ public: return; do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); - dest[pitch] = shade_pal_index(source[sincr], light, shade_constants); - dest[pitch + 1] = shade_pal_index(source[sincr + 1], light, shade_constants); - dest[pitch + 2] = shade_pal_index(source[sincr + 2], light, shade_constants); - dest[pitch + 3] = shade_pal_index(source[sincr + 3], light, shade_constants); + dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); + dest[1] = shade_pal_index(colormap[source[1]], light, shade_constants); + dest[2] = shade_pal_index(colormap[source[2]], light, shade_constants); + dest[3] = shade_pal_index(colormap[source[3]], light, shade_constants); + dest[pitch] = shade_pal_index(colormap[source[sincr]], light, shade_constants); + dest[pitch + 1] = shade_pal_index(colormap[source[sincr + 1]], light, shade_constants); + dest[pitch + 2] = shade_pal_index(colormap[source[sincr + 2]], light, shade_constants); + dest[pitch + 3] = shade_pal_index(colormap[source[sincr + 3]], light, shade_constants); source += sincr * 2; dest += pitch * 2; } while (--count); @@ -265,16 +273,18 @@ public: source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; pitch = dc_pitch * thread->num_cores; sincr = thread->num_cores * 4; + + BYTE *colormap = dc_colormap; if (shade_constants.simple_shade) { SSE_SHADE_SIMPLE_INIT(light); if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); @@ -290,10 +300,10 @@ public: do { // shade_pal_index 0-3 { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE_SIMPLE(fg); @@ -302,10 +312,10 @@ public: // shade_pal_index 4-7 (pitch) { - uint32_t p0 = source[sincr]; - uint32_t p1 = source[sincr + 1]; - uint32_t p2 = source[sincr + 2]; - uint32_t p3 = source[sincr + 3]; + uint32_t p0 = colormap[source[sincr]]; + uint32_t p1 = colormap[source[sincr + 1]]; + uint32_t p2 = colormap[source[sincr + 2]]; + uint32_t p3 = colormap[source[sincr + 3]]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE_SIMPLE(fg); @@ -321,10 +331,10 @@ public: SSE_SHADE_INIT(light, shade_constants); if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); @@ -340,10 +350,10 @@ public: do { // shade_pal_index 0-3 { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE(fg, shade_constants); @@ -352,10 +362,10 @@ public: // shade_pal_index 4-7 (pitch) { - uint32_t p0 = source[sincr]; - uint32_t p1 = source[sincr + 1]; - uint32_t p2 = source[sincr + 2]; - uint32_t p3 = source[sincr + 3]; + uint32_t p0 = colormap[source[sincr]]; + uint32_t p1 = colormap[source[sincr + 1]]; + uint32_t p2 = colormap[source[sincr + 2]]; + uint32_t p3 = colormap[source[sincr + 3]]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE(fg, shade_constants); @@ -1800,6 +1810,9 @@ void rt_span_coverage_rgba(int x, int start, int stop) // drawn to the screen along with up to three other columns. void R_DrawColumnHorizP_RGBA_C (void) { + if (dc_count <= 0) + return; + int x = dc_x & 3; unsigned int **span = &dc_ctspan[x]; (*span)[0] = dc_yl; @@ -1812,6 +1825,9 @@ void R_DrawColumnHorizP_RGBA_C (void) // [RH] Just fills a column with a given color void R_FillColumnHorizP_RGBA_C (void) { + if (dc_count <= 0) + return; + int x = dc_x & 3; unsigned int **span = &dc_ctspan[x]; (*span)[0] = dc_yl; From 40b76dc9b0cd8d59c0f2c597cc4a690cb78ab89e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Jun 2016 18:41:56 +0200 Subject: [PATCH 034/912] Apply gamma when using true color output on Linux and Mac --- src/posix/cocoa/i_video.mm | 5 +-- src/posix/sdl/sdlvideo.cpp | 12 +------ src/v_video.cpp | 67 ++++++++++++++++++++++++++++++++++++-- src/v_video.h | 1 + 4 files changed, 68 insertions(+), 17 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 425fe58873..ddfccaa57e 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -869,10 +869,7 @@ void CocoaFrameBuffer::Update() if (IsBgra()) { - for (int y = 0; y < Height; y++) - { - memcpy((uint32_t*)m_pixelBuffer + y * Width, (uint32_t*)MemBuffer + y * Pitch, Width * BYTES_PER_PIXEL); - } + CopyWithGammaBgra(m_pixelBuffer, Width * BYTES_PER_PIXEL, m_gammaTable[0], m_gammaTable[1], m_gammaTable[2], m_flashColor, m_flashAmount); } else { diff --git a/src/posix/sdl/sdlvideo.cpp b/src/posix/sdl/sdlvideo.cpp index 26121aa711..56b883978b 100644 --- a/src/posix/sdl/sdlvideo.cpp +++ b/src/posix/sdl/sdlvideo.cpp @@ -497,17 +497,7 @@ void SDLFB::Update () if (Bgra) { - if (pitch == Pitch * 4) - { - memcpy(pixels, MemBuffer, Width*Height*4); - } - else - { - for (int y = 0; y < Height; ++y) - { - memcpy((BYTE *)pixels + y*pitch, MemBuffer + y*Pitch*4, Width*4); - } - } + CopyWithGammaBgra(pixels, pitch, GammaTable[0], GammaTable[1], GammaTable[2], Flash, FlashAmount); } else if (NotPaletted) { diff --git a/src/v_video.cpp b/src/v_video.cpp index bc99edbf10..e586381211 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -742,13 +742,12 @@ void DCanvas::CalcGamma (float gamma, BYTE gammalookup[256]) // I found this formula on the web at // , // but that page no longer exits. - double invgamma = 1.f / gamma; int i; for (i = 0; i < 256; i++) { - gammalookup[i] = (BYTE)(255.0 * pow (i / 255.0, invgamma)); + gammalookup[i] = (BYTE)(255.0 * pow (i / 255.0, invgamma) + 0.5); } } @@ -876,6 +875,70 @@ DFrameBuffer::DFrameBuffer (int width, int height, bool bgra) Accel2D = false; } +//========================================================================== +// +// DFrameBuffer :: PostprocessBgra +// +// Copies data to destination buffer while performing gamma and flash. +// This is only needed if a target cannot do this with shaders. +// +//========================================================================== + +void DFrameBuffer::CopyWithGammaBgra(void *output, int pitch, const BYTE *gammared, const BYTE *gammagreen, const BYTE *gammablue, PalEntry flash, int flash_amount) +{ + const BYTE *gammatables[3] = { gammared, gammagreen, gammablue }; + + if (flash_amount > 0) + { + uint16_t inv_flash_amount = 256 - flash_amount; + uint16_t flash_red = flash.r * flash_amount; + uint16_t flash_green = flash.g * flash_amount; + uint16_t flash_blue = flash.b * flash_amount; + + for (int y = 0; y < Height; y++) + { + BYTE *dest = (BYTE*)output + y * pitch; + BYTE *src = MemBuffer + y * Pitch * 4; + for (int x = 0; x < Width; x++) + { + uint16_t fg_red = src[2]; + uint16_t fg_green = src[1]; + uint16_t fg_blue = src[0]; + uint16_t red = (fg_red * inv_flash_amount + flash_red) >> 8; + uint16_t green = (fg_green * inv_flash_amount + flash_green) >> 8; + uint16_t blue = (fg_blue * inv_flash_amount + flash_blue) >> 8; + + dest[0] = gammatables[2][blue]; + dest[1] = gammatables[1][green]; + dest[2] = gammatables[0][red]; + dest[3] = 0xff; + + dest += 4; + src += 4; + } + } + } + else + { + for (int y = 0; y < Height; y++) + { + BYTE *dest = (BYTE*)output + y * pitch; + BYTE *src = MemBuffer + y * Pitch * 4; + for (int x = 0; x < Width; x++) + { + dest[0] = gammatables[2][src[0]]; + dest[1] = gammatables[1][src[1]]; + dest[2] = gammatables[0][src[2]]; + dest[3] = 0xff; + + dest += 4; + src += 4; + } + } + } +} + + //========================================================================== // // DFrameBuffer :: DrawRateStuff diff --git a/src/v_video.h b/src/v_video.h index 120beff9a9..19213bd260 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -420,6 +420,7 @@ public: protected: void DrawRateStuff (); void CopyFromBuff (BYTE *src, int srcPitch, int width, int height, BYTE *dest); + void CopyWithGammaBgra(void *output, int pitch, const BYTE *gammared, const BYTE *gammagreen, const BYTE *gammablue, PalEntry flash, int flash_amount); DFrameBuffer () {} From 42efc7334e5e5dc0419b020b4db36777bc647be4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Jun 2016 00:50:43 +0200 Subject: [PATCH 035/912] Fix missing particles in true color mode --- src/r_draw.h | 7 +++++-- src/r_draw_rgba.cpp | 5 +++++ src/r_things.cpp | 2 ++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index d192dc5e49..55ad8a0cad 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -570,12 +570,15 @@ public: } } - // Redirects all drawing commands to worker threads until Finish is called + // Redirects all drawing commands to worker threads until End is called // Begin/End blocks can be nested. static void Begin(); - // Wait until all worker threads finished executing commands + // End redirection and wait until all worker threads finished executing static void End(); + + // Waits until all worker threads finished executing + static void WaitForWorkers(); }; #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 979dc07437..af8487964e 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -97,6 +97,11 @@ void DrawerCommandQueue::End() queue->threaded_render--; } +void DrawerCommandQueue::WaitForWorkers() +{ + Instance()->Finish(); +} + void DrawerCommandQueue::Finish() { auto queue = Instance(); diff --git a/src/r_things.cpp b/src/r_things.cpp index 933d50e46d..0c5e17b7cd 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2686,6 +2686,8 @@ void R_DrawParticle_RGBA(vissprite_t *vis) int countbase = vis->x2 - x1; R_DrawMaskedSegsBehindParticle(vis); + + DrawerCommandQueue::WaitForWorkers(); uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; From 350857a9f6b2e6aa0a0da9f6eb27c2b05066c80e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Jun 2016 19:19:44 +0200 Subject: [PATCH 036/912] Fixed fuzz effect when using multiple cores --- src/r_draw_rgba.cpp | 89 ++++++++++++--------------------------------- 1 file changed, 23 insertions(+), 66 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index af8487964e..a5d924dfa1 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -583,79 +583,36 @@ public: dest = thread->dest_for_thread(dc_yl, dc_pitch, ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg); - // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) - // I'm not sure if this is really always the case or not. + int pitch = dc_pitch * thread->num_cores; + int fuzzstep = thread->num_cores; + int fuzz = (fuzzpos + thread->skipped_by_thread(dc_yl)) % FUZZTABLE; + while (count > 0) { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch * thread->num_cores; - int fuzz = fuzzpos; - int cnt; + int available = (FUZZTABLE - fuzz); + int next_wrap = available / fuzzstep; + if (available % fuzzstep != 0) + next_wrap++; - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) + int cnt = MIN(count, next_wrap); + count -= cnt; + do { - cnt = MIN(FUZZTABLE - fuzz, count); - count -= cnt; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; + uint32_t bg = dest[fuzzoffset[fuzz]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + fuzz += fuzzstep; + } while (--cnt); - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; + fuzz %= FUZZTABLE; } } }; From 0f0859b0b2d8f82c89ea1674b6ecc999934ae659 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Jun 2016 22:54:23 +0200 Subject: [PATCH 037/912] Special colormap support for when no hw accel is available --- src/r_draw.h | 18 +++++ src/r_draw_rgba.cpp | 152 +++++++++++++++++++++++++++++++++++++++++++ src/r_main.cpp | 4 +- src/r_swrenderer.cpp | 8 +++ src/r_things.cpp | 2 +- 5 files changed, 181 insertions(+), 3 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 55ad8a0cad..d09d0ab892 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -581,4 +581,22 @@ public: static void WaitForWorkers(); }; +class ApplySpecialColormapRGBACommand : public DrawerCommand +{ + BYTE *buffer; + int pitch; + int width; + int height; + int start_red; + int start_green; + int start_blue; + int end_red; + int end_green; + int end_blue; + +public: + ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); + void Execute(DrawerThread *thread) override; +}; + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a5d924dfa1..d5c275d0ee 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -3488,6 +3488,158 @@ public: } }; +ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) +{ + buffer = screen->GetBuffer(); + pitch = screen->GetPitch(); + width = screen->GetWidth(); + height = screen->GetHeight(); + + start_red = (int)(colormap->ColorizeStart[0] * 255); + start_green = (int)(colormap->ColorizeStart[1] * 255); + start_blue = (int)(colormap->ColorizeStart[2] * 255); + end_red = (int)(colormap->ColorizeEnd[0] * 255); + end_green = (int)(colormap->ColorizeEnd[1] * 255); + end_blue = (int)(colormap->ColorizeEnd[2] * 255); +} + +#ifdef NO_SSE +void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) +{ + int y = thread->skipped_by_thread(0); + int count = thread->count_for_thread(0, height); + while (count > 0) + { + BYTE *pixels = buffer + y * pitch * 4; + for (int x = 0; x < width; x++) + { + int fg_red = pixels[2]; + int fg_green = pixels[1]; + int fg_blue = pixels[0]; + + int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; + gray += (gray >> 7); // gray*=256/255 + int inv_gray = 256 - gray; + + int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); + int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); + int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); + + pixels[0] = (BYTE)blue; + pixels[1] = (BYTE)green; + pixels[2] = (BYTE)red; + pixels[3] = 0xff; + + pixels += 4; + } + y += thread->num_cores; + count--; + } +} +#else +void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) +{ + int y = thread->skipped_by_thread(0); + int count = thread->count_for_thread(0, height); + __m128i gray_weight = _mm_set_epi16(256, 77, 143, 37, 256, 77, 143, 37); + __m128i start_end = _mm_set_epi16(255, start_red, start_green, start_blue, 255, end_red, end_green, end_blue); + while (count > 0) + { + BYTE *pixels = buffer + y * pitch * 4; + int sse_length = width / 4; + for (int x = 0; x < sse_length; x++) + { + // Unpack to integers: + __m128i p = _mm_loadu_si128((const __m128i*)pixels); + + __m128i p16_0 = _mm_unpacklo_epi8(p, _mm_setzero_si128()); + __m128i p16_1 = _mm_unpackhi_epi8(p, _mm_setzero_si128()); + + // Add gray weighting to colors + __m128i mullo0 = _mm_mullo_epi16(p16_0, gray_weight); + __m128i mullo1 = _mm_mullo_epi16(p16_1, gray_weight); + __m128i p32_0 = _mm_unpacklo_epi16(mullo0, _mm_setzero_si128()); + __m128i p32_1 = _mm_unpackhi_epi16(mullo0, _mm_setzero_si128()); + __m128i p32_2 = _mm_unpacklo_epi16(mullo1, _mm_setzero_si128()); + __m128i p32_3 = _mm_unpackhi_epi16(mullo1, _mm_setzero_si128()); + + // Transpose to get color components in individual vectors: + __m128 tmpx = _mm_castsi128_ps(p32_0); + __m128 tmpy = _mm_castsi128_ps(p32_1); + __m128 tmpz = _mm_castsi128_ps(p32_2); + __m128 tmpw = _mm_castsi128_ps(p32_3); + _MM_TRANSPOSE4_PS(tmpx, tmpy, tmpz, tmpw); + __m128i blue = _mm_castps_si128(tmpx); + __m128i green = _mm_castps_si128(tmpy); + __m128i red = _mm_castps_si128(tmpz); + __m128i alpha = _mm_castps_si128(tmpw); + + // Calculate gray and 256-gray values: + __m128i gray = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(red, green), blue), 8); + __m128i inv_gray = _mm_sub_epi32(_mm_set1_epi32(256), gray); + + // p32 = start * inv_gray + end * gray: + __m128i gray0 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i gray1 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(1, 1, 1, 1)); + __m128i gray2 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(2, 2, 2, 2)); + __m128i gray3 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(3, 3, 3, 3)); + __m128i inv_gray0 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i inv_gray1 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(1, 1, 1, 1)); + __m128i inv_gray2 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(2, 2, 2, 2)); + __m128i inv_gray3 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(3, 3, 3, 3)); + __m128i gray16_0 = _mm_packs_epi32(gray0, inv_gray0); + __m128i gray16_1 = _mm_packs_epi32(gray1, inv_gray1); + __m128i gray16_2 = _mm_packs_epi32(gray2, inv_gray2); + __m128i gray16_3 = _mm_packs_epi32(gray3, inv_gray3); + __m128i gray16_0_mullo = _mm_mullo_epi16(gray16_0, start_end); + __m128i gray16_1_mullo = _mm_mullo_epi16(gray16_1, start_end); + __m128i gray16_2_mullo = _mm_mullo_epi16(gray16_2, start_end); + __m128i gray16_3_mullo = _mm_mullo_epi16(gray16_3, start_end); + __m128i gray16_0_mulhi = _mm_mulhi_epi16(gray16_0, start_end); + __m128i gray16_1_mulhi = _mm_mulhi_epi16(gray16_1, start_end); + __m128i gray16_2_mulhi = _mm_mulhi_epi16(gray16_2, start_end); + __m128i gray16_3_mulhi = _mm_mulhi_epi16(gray16_3, start_end); + p32_0 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_0_mullo, gray16_0_mulhi), _mm_unpackhi_epi16(gray16_0_mullo, gray16_0_mulhi)), 8); + p32_1 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_1_mullo, gray16_1_mulhi), _mm_unpackhi_epi16(gray16_1_mullo, gray16_1_mulhi)), 8); + p32_2 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_2_mullo, gray16_2_mulhi), _mm_unpackhi_epi16(gray16_2_mullo, gray16_2_mulhi)), 8); + p32_3 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_3_mullo, gray16_3_mulhi), _mm_unpackhi_epi16(gray16_3_mullo, gray16_3_mulhi)), 8); + + p16_0 = _mm_packs_epi32(p32_0, p32_1); + p16_1 = _mm_packs_epi32(p32_2, p32_3); + p = _mm_packus_epi16(p16_0, p16_1); + + _mm_storeu_si128((__m128i*)pixels, p); + pixels += 16; + } + + for (int x = sse_length * 4; x < width; x++) + { + int fg_red = pixels[2]; + int fg_green = pixels[1]; + int fg_blue = pixels[0]; + + int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; + gray += (gray >> 7); // gray*=256/255 + int inv_gray = 256 - gray; + + int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); + int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); + int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); + + pixels[0] = (BYTE)blue; + pixels[1] = (BYTE)green; + pixels[2] = (BYTE)red; + pixels[3] = 0xff; + + pixels += 4; + } + + y += thread->num_cores; + count--; + } +} +#endif + ///////////////////////////////////////////////////////////////////////////// void R_BeginDrawerCommands() diff --git a/src/r_main.cpp b/src/r_main.cpp index c1b78303b3..2eb0ce1419 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -459,7 +459,7 @@ void R_SetupColormap(player_t *player) if (player->fixedcolormap >= 0 && player->fixedcolormap < (int)SpecialColormaps.Size()) { realfixedcolormap = &SpecialColormaps[player->fixedcolormap]; - if (RenderTarget == screen && (DFrameBuffer *)screen->Accel2D && r_shadercolormaps) + if (RenderTarget == screen && (r_swtruecolor || ((DFrameBuffer *)screen->Accel2D && r_shadercolormaps))) { // Render everything fullbright. The copy to video memory will // apply the special colormap, so it won't be restricted to the @@ -935,7 +935,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // If we don't want shadered colormaps, NULL it now so that the // copy to the screen does not use a special colormap shader. - if (!r_shadercolormaps) + if (!r_shadercolormaps && !r_swtruecolor) { realfixedcolormap = NULL; } diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 11f879c38c..c4558bf7c8 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -43,6 +43,7 @@ #include "textures/textures.h" #include "r_data/voxels.h" +EXTERN_CVAR(Bool, r_shadercolormaps) class FArchive; void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, int trueratio); @@ -166,6 +167,13 @@ void FSoftwareRenderer::RenderView(player_t *player) R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); + + // Apply special colormap if the target cannot do it + if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + { + DrawerCommandQueue::QueueCommand(realfixedcolormap, screen); + } + R_EndDrawerCommands(); } diff --git a/src/r_things.cpp b/src/r_things.cpp index 0c5e17b7cd..2dc0bdb6cd 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -1423,7 +1423,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } } - if (realfixedcolormap != NULL) + if (realfixedcolormap != NULL && (!r_swtruecolor || (r_shadercolormaps && screen->Accel2D))) { // fixed color vis->Style.BaseColormap = realfixedcolormap; vis->Style.ColormapNum = 0; From cc10c2a97045010453c610e68e4a66cef6e36dd9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 03:16:48 +0200 Subject: [PATCH 038/912] Fix cameras and kdizd intro for true color mode --- src/r_main.cpp | 4 +-- src/r_swrenderer.cpp | 35 +++++++++++++++++++----- src/textures/canvastexture.cpp | 49 ++++++++++++++++++++++++++++++++-- src/textures/texture.cpp | 36 +++++++++++++++++++++++++ src/textures/textures.h | 18 +++++++++---- 5 files changed, 126 insertions(+), 16 deletions(-) diff --git a/src/r_main.cpp b/src/r_main.cpp index 2eb0ce1419..4e5ff1dbdb 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -974,6 +974,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_RenderActorView (actor, dontmaplines); + R_EndDrawerCommands(); + RenderTarget = screen; bRenderingToCanvas = false; R_ExecuteSetViewSize (); @@ -981,8 +983,6 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_SetupBuffer (); screen->Unlock (); - R_EndDrawerCommands(); - viewactive = savedviewactive; r_swtruecolor = savedoutputformat; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c4558bf7c8..556323df52 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -87,11 +87,17 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache) if (cache & FTextureManager::HIT_Columnmode) { const FTexture::Span *spanp; - tex->GetColumn(0, &spanp); + /*if (r_swtruecolor) + tex->GetColumnBgra(0, &spanp); + else*/ + tex->GetColumn(0, &spanp); } else if (cache != 0) { - tex->GetPixels (); + if (r_swtruecolor) + tex->GetPixels(); + else + tex->GetPixels (); } else { @@ -328,8 +334,8 @@ void FSoftwareRenderer::CopyStackedViewParameters() void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { - BYTE *Pixels = const_cast(tex->GetPixels()); - DSimpleCanvas *Canvas = tex->GetCanvas(); + BYTE *Pixels = r_swtruecolor ? (BYTE*)tex->GetPixelsBgra() : (BYTE*)tex->GetPixels(); + DSimpleCanvas *Canvas = r_swtruecolor ? tex->GetCanvasBgra() : tex->GetCanvas(); // curse Doom's overuse of global variables in the renderer. // These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette. @@ -340,13 +346,28 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin R_SetFOV ((double)fov); R_RenderViewToCanvas (viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); R_SetFOV (savedfov); - if (Pixels == Canvas->GetBuffer()) + + if (Canvas->IsBgra()) { - FTexture::FlipSquareBlockRemap (Pixels, tex->GetWidth(), tex->GetHeight(), GPalette.Remap); + if (Pixels == Canvas->GetBuffer()) + { + FTexture::FlipSquareBlockBgra((uint32_t*)Pixels, tex->GetWidth(), tex->GetHeight()); + } + else + { + FTexture::FlipNonSquareBlockBgra((uint32_t*)Pixels, (const uint32_t*)Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch()); + } } else { - FTexture::FlipNonSquareBlockRemap (Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap); + if (Pixels == Canvas->GetBuffer()) + { + FTexture::FlipSquareBlockRemap(Pixels, tex->GetWidth(), tex->GetHeight(), GPalette.Remap); + } + else + { + FTexture::FlipNonSquareBlockRemap(Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap); + } } tex->SetUpdated(); fixedcolormap = savecolormap; diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index 7242149a40..a72546d781 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -53,7 +53,6 @@ FCanvasTexture::FCanvasTexture (const char *name, int width, int height) DummySpans[1].TopOffset = 0; DummySpans[1].Length = 0; UseType = TEX_Wall; - Canvas = NULL; bNeedsUpdate = true; bDidUpdate = false; bHasCanvas = true; @@ -101,6 +100,16 @@ const BYTE *FCanvasTexture::GetPixels () return Pixels; } +const uint32_t *FCanvasTexture::GetPixelsBgra() +{ + bNeedsUpdate = true; + if (CanvasBgra == NULL) + { + MakeTextureBgra(); + } + return PixelsBgra; +} + void FCanvasTexture::MakeTexture () { Canvas = new DSimpleCanvas (Width, Height, false); @@ -123,21 +132,57 @@ void FCanvasTexture::MakeTexture () memset (Pixels+Width*Height/2, 255, Width*Height/2); } +void FCanvasTexture::MakeTextureBgra() +{ + CanvasBgra = new DSimpleCanvas(Width, Height, true); + CanvasBgra->Lock(); + GC::AddSoftRoot(CanvasBgra); + + if (Width != Height || Width != CanvasBgra->GetPitch()) + { + PixelsBgra = new uint32_t[Width*Height]; + bPixelsAllocatedBgra = true; + } + else + { + PixelsBgra = (uint32_t*)CanvasBgra->GetBuffer(); + bPixelsAllocatedBgra = false; + } + + // Draw a special "unrendered" initial texture into the buffer. + memset(PixelsBgra, 0, Width*Height / 2 * 4); + memset(PixelsBgra + Width*Height / 2, 255, Width*Height / 2 * 4); +} + void FCanvasTexture::Unload () { if (bPixelsAllocated) { - if (Pixels != NULL) delete [] Pixels; + if (Pixels != NULL) delete[] Pixels; bPixelsAllocated = false; Pixels = NULL; } + if (bPixelsAllocatedBgra) + { + if (PixelsBgra != NULL) delete[] PixelsBgra; + bPixelsAllocatedBgra = false; + PixelsBgra = NULL; + } + if (Canvas != NULL) { GC::DelSoftRoot(Canvas); Canvas->Destroy(); Canvas = NULL; } + + if (CanvasBgra != NULL) + { + GC::DelSoftRoot(CanvasBgra); + CanvasBgra->Destroy(); + CanvasBgra = NULL; + } } bool FCanvasTexture::CheckModified () diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index d500810627..28a3b93338 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -410,6 +410,29 @@ void FTexture::FlipSquareBlock (BYTE *block, int x, int y) } } +void FTexture::FlipSquareBlockBgra(uint32_t *block, int x, int y) +{ + int i, j; + + if (x != y) return; + + for (i = 0; i < x; ++i) + { + uint32_t *corner = block + x*i + i; + int count = x - i; + if (count & 1) + { + count--; + swapvalues(corner[count], corner[count*x]); + } + for (j = 0; j < count; j += 2) + { + swapvalues(corner[j], corner[j*x]); + swapvalues(corner[j + 1], corner[(j + 1)*x]); + } + } +} + void FTexture::FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap) { int i, j; @@ -453,6 +476,19 @@ void FTexture::FlipNonSquareBlock (BYTE *dst, const BYTE *src, int x, int y, int } } +void FTexture::FlipNonSquareBlockBgra(uint32_t *dst, const uint32_t *src, int x, int y, int srcpitch) +{ + int i, j; + + for (i = 0; i < x; ++i) + { + for (j = 0; j < y; ++j) + { + dst[i*y + j] = src[i + j*srcpitch]; + } + } +} + void FTexture::FlipNonSquareBlockRemap (BYTE *dst, const BYTE *src, int x, int y, int srcpitch, const BYTE *remap) { int i, j; diff --git a/src/textures/textures.h b/src/textures/textures.h index 0d066eff52..872c83b1a9 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -274,8 +274,10 @@ private: public: static void FlipSquareBlock (BYTE *block, int x, int y); + static void FlipSquareBlockBgra (uint32_t *block, int x, int y); static void FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap); static void FlipNonSquareBlock (BYTE *blockto, const BYTE *blockfrom, int x, int y, int srcpitch); + static void FlipNonSquareBlockBgra (uint32_t *blockto, const uint32_t *blockfrom, int x, int y, int srcpitch); static void FlipNonSquareBlockRemap (BYTE *blockto, const BYTE *blockfrom, int x, int y, int srcpitch, const BYTE *remap); friend class D3DTex; @@ -518,21 +520,27 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra() override; void Unload (); bool CheckModified (); void NeedUpdate() { bNeedsUpdate=true; } void SetUpdated() { bNeedsUpdate = false; bDidUpdate = true; bFirstUpdate = false; } DSimpleCanvas *GetCanvas() { return Canvas; } + DSimpleCanvas *GetCanvasBgra() { return CanvasBgra; } void MakeTexture (); + void MakeTextureBgra (); protected: - DSimpleCanvas *Canvas; - BYTE *Pixels; + DSimpleCanvas *Canvas = nullptr; + DSimpleCanvas *CanvasBgra = nullptr; + BYTE *Pixels = nullptr; + uint32_t *PixelsBgra = nullptr; Span DummySpans[2]; - bool bNeedsUpdate; - bool bDidUpdate; - bool bPixelsAllocated; + bool bNeedsUpdate = true; + bool bDidUpdate = false; + bool bPixelsAllocated = false; + bool bPixelsAllocatedBgra = false; public: bool bFirstUpdate; From e31331bed265925a2e03d66658863e9c26f2ca26 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 19:09:48 +0200 Subject: [PATCH 039/912] Sloped plane adjustments --- src/r_draw.cpp | 4 +- src/r_plane.cpp | 126 +++++------------------------------------------- 2 files changed, 14 insertions(+), 116 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 70b3893f45..ec7313c4f6 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2325,7 +2325,7 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_MapTiltedPlane = R_MapColoredPlane_RGBA; + R_MapTiltedPlane = R_MapTiltedPlane_RGBA; R_MapColoredPlane = R_MapColoredPlane_RGBA; R_DrawParticle = R_DrawParticle_RGBA; @@ -2422,7 +2422,7 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_C; R_DrawFogBoundary = R_DrawFogBoundary_C; - R_MapTiltedPlane = R_MapColoredPlane_C; + R_MapTiltedPlane = R_MapTiltedPlane_C; R_MapColoredPlane = R_MapColoredPlane_C; R_DrawParticle = R_DrawParticle_C; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 05fce79a65..1cde16071c 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -480,124 +480,22 @@ void R_MapTiltedPlane_C (int y, int x1) void R_MapTiltedPlane_RGBA (int y, int x1) { int x2 = spanend[y]; - int width = x2 - x1; - double iz, uz, vz; - uint32_t *fb; - DWORD u, v; - int i; - iz = plane_sz[2] + plane_sz[1]*(centery-y) + plane_sz[0]*(x1-centerx); + // Slopes are broken currently in master. + // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. - // Lighting is simple. It's just linear interpolation from start to end - if (plane_shade) + uint32_t *source = (uint32_t*)ds_source; + int source_width = 1 << ds_xbits; + int source_height = 1 << ds_ybits; + + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + + int count = x2 - x1 + 1; + while (count > 0) { - uz = (iz + plane_sz[0]*width) * planelightfloat; - vz = iz * planelightfloat; - R_CalcTiltedLighting (vz, uz, width); + *(dest++) = source[0]; + count--; } - - uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx); - vz = plane_sv[2] + plane_sv[1]*(centery-y) + plane_sv[0]*(x1-centerx); - - fb = ylookup[y] + x1 + (uint32_t*)dc_destorg; - - BYTE vshift = 32 - ds_ybits; - BYTE ushift = vshift - ds_xbits; - int umask = ((1 << ds_xbits) - 1) << ds_ybits; - -#if 0 // The "perfect" reference version of this routine. Pretty slow. - // Use it only to see how things are supposed to look. - i = 0; - do - { - double z = 1.f/iz; - - u = SQWORD(uz*z) + pviewx; - v = SQWORD(vz*z) + pviewy; - R_SetDSColorMapLight(tiltlighting[i], 0, 0); - fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; - iz += plane_sz[0]; - uz += plane_su[0]; - vz += plane_sv[0]; - } while (--width >= 0); -#else -//#define SPANSIZE 32 -//#define INVSPAN 0.03125f -//#define SPANSIZE 8 -//#define INVSPAN 0.125f -#define SPANSIZE 16 -#define INVSPAN 0.0625f - - double startz = 1.f/iz; - double startu = uz*startz; - double startv = vz*startz; - double izstep, uzstep, vzstep; - - izstep = plane_sz[0] * SPANSIZE; - uzstep = plane_su[0] * SPANSIZE; - vzstep = plane_sv[0] * SPANSIZE; - x1 = 0; - width++; - - while (width >= SPANSIZE) - { - iz += izstep; - uz += uzstep; - vz += vzstep; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - DWORD stepu = SQWORD((endu - startu) * INVSPAN); - DWORD stepv = SQWORD((endv - startv) * INVSPAN); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (i = SPANSIZE-1; i >= 0; i--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - startu = endu; - startv = endv; - width -= SPANSIZE; - } - if (width > 0) - { - if (width == 1) - { - u = SQWORD(startu); - v = SQWORD(startv); - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - } - else - { - double left = width; - iz += plane_sz[0] * left; - uz += plane_su[0] * left; - vz += plane_sv[0] * left; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - left = 1.f/left; - DWORD stepu = SQWORD((endu - startu) * left); - DWORD stepv = SQWORD((endv - startv) * left); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (; width != 0; width--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - } - } -#endif } //========================================================================== From 3ce2d8365dd6a91c068a20b0caf4b683634ceba3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 20:01:31 +0200 Subject: [PATCH 040/912] Fix HUD colors when hw2d is off --- src/r_draw_rgba.cpp | 9 ++++++--- src/r_drawt_rgba.cpp | 19 +++++++++++++------ src/v_draw.cpp | 13 +++++++++---- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index d5c275d0ee..7e9f851179 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -325,11 +325,11 @@ public: { int pitch = dc_pitch * thread->num_cores; - BYTE color = dc_color; + uint32_t color = shade_pal_index_simple(dc_color, light); do { - *dest = shade_pal_index_simple(color, light); + *dest = color; dest += pitch; } while (--count); } @@ -629,6 +629,7 @@ class DrawAddColumnRGBACommand : public DrawerCommand ShadeConstants dc_shade_constants; fixed_t dc_srcalpha; fixed_t dc_destalpha; + BYTE *dc_colormap; public: DrawAddColumnRGBACommand() @@ -643,6 +644,7 @@ public: dc_shade_constants = ::dc_shade_constants; dc_srcalpha = ::dc_srcalpha; dc_destalpha = ::dc_destalpha; + dc_colormap = ::dc_colormap; } void Execute(DrawerThread *thread) override @@ -667,13 +669,14 @@ public: uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; + BYTE *colormap = dc_colormap; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 5f0fc4156c..cd124ac639 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -512,6 +512,7 @@ class RtAdd1colRGBACommand : public DrawerCommand ShadeConstants dc_shade_constants; fixed_t dc_srcalpha; fixed_t dc_destalpha; + BYTE *dc_colormap; public: RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) @@ -527,6 +528,7 @@ public: dc_shade_constants = ::dc_shade_constants; dc_srcalpha = ::dc_srcalpha; dc_destalpha = ::dc_destalpha; + dc_colormap = ::dc_colormap; } void Execute(DrawerThread *thread) override @@ -548,12 +550,13 @@ public: uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; + BYTE *colormap = dc_colormap; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg = shade_pal_index(colormap[*source], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -583,6 +586,7 @@ class RtAdd4colsRGBACommand : public DrawerCommand int dc_pitch; fixed_t dc_light; ShadeConstants dc_shade_constants; + BYTE *dc_colormap; public: RtAdd4colsRGBACommand(int sx, int yl, int yh) @@ -595,6 +599,7 @@ public: dc_pitch = ::dc_pitch; dc_light = ::dc_light; dc_shade_constants = ::dc_shade_constants; + dc_colormap = ::dc_colormap; } #ifdef NO_SSE @@ -617,6 +622,7 @@ public: uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; + BYTE *colormap = dc_colormap; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -624,7 +630,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg = shade_pal_index(colormap[source[i]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -664,6 +670,7 @@ public: uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; + BYTE *colormap = dc_colormap; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -678,10 +685,10 @@ public: __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index c2dbf31c55..d03853c117 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1095,9 +1095,10 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } if (IsBgra()) { + uint32_t fillColor = GPalette.BaseColors[palColor].d; uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; for (int i = 0; i <= deltaX; i++) - spot[i] = palColor; + spot[i] = fillColor; } else { @@ -1108,11 +1109,12 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real { // vertical line if (IsBgra()) { + uint32_t fillColor = GPalette.BaseColors[palColor].d; uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; int pitch = GetPitch(); do { - *spot = palColor; + *spot = fillColor; spot += pitch; } while (--deltaY != 0); } @@ -1131,11 +1133,12 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real { // diagonal line. if (IsBgra()) { + uint32_t fillColor = GPalette.BaseColors[palColor].d; uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; int advance = GetPitch() + xDir; do { - *spot = palColor; + *spot = fillColor; spot += advance; } while (--deltaY != 0); } @@ -1299,12 +1302,14 @@ void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uin if (IsBgra()) { + uint32_t fill_color = GPalette.BaseColors[palcolor]; + uint32_t *dest = (uint32_t*)Buffer + top * Pitch + left; x = right - left; for (y = top; y < bottom; y++) { for (int i = 0; i < x; i++) - dest[i] = palcolor; + dest[i] = fill_color; dest += Pitch; } } From 8ba6a4f17501e34db5b644567fcd40e06502017c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 21:39:55 +0200 Subject: [PATCH 041/912] Precache, Unload and FillSimplePoly bug fix --- src/g_strife/strife_sbar.cpp | 5 ----- src/menu/playerdisplay.cpp | 5 ----- src/r_swrenderer.cpp | 4 ++-- src/textures/automaptexture.cpp | 1 + src/textures/buildtexture.cpp | 12 ------------ src/textures/canvastexture.cpp | 2 ++ src/textures/ddstexture.cpp | 1 + src/textures/flattexture.cpp | 1 + src/textures/imgztexture.cpp | 1 + src/textures/jpegtexture.cpp | 3 +-- src/textures/multipatchtexture.cpp | 1 + src/textures/patchtexture.cpp | 1 + src/textures/pcxtexture.cpp | 1 + src/textures/pngtexture.cpp | 3 +-- src/textures/rawpagetexture.cpp | 1 + src/textures/texture.cpp | 20 ++++++++++++-------- src/textures/textures.h | 6 ++---- src/textures/tgatexture.cpp | 1 + src/textures/warptexture.cpp | 1 + src/v_draw.cpp | 2 +- src/v_font.cpp | 2 ++ src/v_video.cpp | 11 ----------- 22 files changed, 33 insertions(+), 52 deletions(-) diff --git a/src/g_strife/strife_sbar.cpp b/src/g_strife/strife_sbar.cpp index bcdf624d7b..eb3fa26087 100644 --- a/src/g_strife/strife_sbar.cpp +++ b/src/g_strife/strife_sbar.cpp @@ -35,7 +35,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); bool CheckModified (); - void Unload (); void SetVial (int level); @@ -90,10 +89,6 @@ bool FHealthBar::CheckModified () return NeedRefresh; } -void FHealthBar::Unload () -{ -} - const BYTE *FHealthBar::GetColumn (unsigned int column, const Span **spans_out) { if (NeedRefresh) diff --git a/src/menu/playerdisplay.cpp b/src/menu/playerdisplay.cpp index c3d11a43a3..16671975a0 100644 --- a/src/menu/playerdisplay.cpp +++ b/src/menu/playerdisplay.cpp @@ -78,7 +78,6 @@ public: const BYTE *GetColumn(unsigned int column, const Span **spans_out); const BYTE *GetPixels(); - void Unload(); bool CheckModified(); protected: @@ -212,10 +211,6 @@ bool FBackdropTexture::CheckModified() return LastRenderTic != gametic; } -void FBackdropTexture::Unload() -{ -} - //============================================================================= // // diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 556323df52..5be41660e6 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -87,9 +87,9 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache) if (cache & FTextureManager::HIT_Columnmode) { const FTexture::Span *spanp; - /*if (r_swtruecolor) + if (r_swtruecolor) tex->GetColumnBgra(0, &spanp); - else*/ + else tex->GetColumn(0, &spanp); } else if (cache != 0) diff --git a/src/textures/automaptexture.cpp b/src/textures/automaptexture.cpp index 67d68b9feb..9aac379ef1 100644 --- a/src/textures/automaptexture.cpp +++ b/src/textures/automaptexture.cpp @@ -122,6 +122,7 @@ void FAutomapTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/buildtexture.cpp b/src/textures/buildtexture.cpp index bfcc6333d3..1155dacc4b 100644 --- a/src/textures/buildtexture.cpp +++ b/src/textures/buildtexture.cpp @@ -56,7 +56,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - void Unload (); protected: const BYTE *Pixels; @@ -103,17 +102,6 @@ FBuildTexture::~FBuildTexture () // //========================================================================== -void FBuildTexture::Unload () -{ - // Nothing to do, since the pixels are accessed from memory-mapped files directly -} - -//========================================================================== -// -// -// -//========================================================================== - const BYTE *FBuildTexture::GetPixels () { return Pixels; diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index a72546d781..109d927ab2 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -183,6 +183,8 @@ void FCanvasTexture::Unload () CanvasBgra->Destroy(); CanvasBgra = NULL; } + + FTexture::Unload(); } bool FCanvasTexture::CheckModified () diff --git a/src/textures/ddstexture.cpp b/src/textures/ddstexture.cpp index 31e7480221..fb4de34c57 100644 --- a/src/textures/ddstexture.cpp +++ b/src/textures/ddstexture.cpp @@ -401,6 +401,7 @@ void FDDSTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/flattexture.cpp b/src/textures/flattexture.cpp index 840d53aaf1..08e0d1221e 100644 --- a/src/textures/flattexture.cpp +++ b/src/textures/flattexture.cpp @@ -138,6 +138,7 @@ void FFlatTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/imgztexture.cpp b/src/textures/imgztexture.cpp index 1c262d7079..04932d4bf5 100644 --- a/src/textures/imgztexture.cpp +++ b/src/textures/imgztexture.cpp @@ -142,6 +142,7 @@ void FIMGZTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index a37eff6c3d..3b53598460 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -196,7 +196,6 @@ public: protected: BYTE *Pixels; - std::vector PixelsBgra; Span DummySpans[2]; void MakeTexture (); @@ -300,7 +299,7 @@ void FJPEGTexture::Unload () { delete[] Pixels; Pixels = NULL; - PixelsBgra.clear(); + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/multipatchtexture.cpp b/src/textures/multipatchtexture.cpp index b0db481a89..6ae45c785d 100644 --- a/src/textures/multipatchtexture.cpp +++ b/src/textures/multipatchtexture.cpp @@ -362,6 +362,7 @@ void FMultiPatchTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/patchtexture.cpp b/src/textures/patchtexture.cpp index 423ce4deb2..8388515c02 100644 --- a/src/textures/patchtexture.cpp +++ b/src/textures/patchtexture.cpp @@ -184,6 +184,7 @@ void FPatchTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/pcxtexture.cpp b/src/textures/pcxtexture.cpp index 0ec5d2933c..42a13b85a9 100644 --- a/src/textures/pcxtexture.cpp +++ b/src/textures/pcxtexture.cpp @@ -191,6 +191,7 @@ void FPCXTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index 95f7aca75f..206797a344 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -67,7 +67,6 @@ protected: FString SourceFile; BYTE *Pixels; - std::vector PixelsBgra; Span **Spans; BYTE BitDepth; @@ -382,7 +381,7 @@ void FPNGTexture::Unload () { delete[] Pixels; Pixels = NULL; - PixelsBgra.clear(); + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/rawpagetexture.cpp b/src/textures/rawpagetexture.cpp index 1402f88442..69313fd1c2 100644 --- a/src/textures/rawpagetexture.cpp +++ b/src/textures/rawpagetexture.cpp @@ -206,6 +206,7 @@ void FRawPageTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 28a3b93338..0030719cbe 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -176,6 +176,11 @@ FTexture::~FTexture () KillNative(); } +void FTexture::Unload() +{ + PixelsBgra = std::vector(); +} + const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_out) { const uint32_t *pixels = GetPixelsBgra(); @@ -189,16 +194,19 @@ const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_ const uint32_t *FTexture::GetPixelsBgra() { - if (BgraPixels.empty()) + if (PixelsBgra.empty()) { + GetColumn(0, nullptr); const BYTE *indices = GetPixels(); - BgraPixels.resize(Width * Height); + if (indices == nullptr) + return nullptr; + PixelsBgra.resize(Width * Height); for (int i = 0; i < Width * Height; i++) { - BgraPixels[i] = GPalette.BaseColors[indices[i]].d; + PixelsBgra[i] = GPalette.BaseColors[indices[i]].d; } } - return BgraPixels.data(); + return PixelsBgra.data(); } bool FTexture::CheckModified () @@ -642,10 +650,6 @@ FDummyTexture::FDummyTexture () UseType = TEX_Null; } -void FDummyTexture::Unload () -{ -} - void FDummyTexture::SetSize (int width, int height) { Width = width; diff --git a/src/textures/textures.h b/src/textures/textures.h index 872c83b1a9..38d1ef487b 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -192,7 +192,7 @@ public: virtual FTexture *GetRedirect(bool wantwarped); virtual FTexture *GetRawTexture(); // for FMultiPatchTexture to override - virtual void Unload () = 0; + virtual void Unload (); // Returns the native pixel format for this image virtual FTextureFormat GetFormat(); @@ -269,8 +269,7 @@ protected: Rotations = other->Rotations; } -private: - std::vector BgraPixels; + std::vector PixelsBgra; public: static void FlipSquareBlock (BYTE *block, int x, int y); @@ -472,7 +471,6 @@ public: FDummyTexture (); const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - void Unload (); void SetSize (int width, int height); }; diff --git a/src/textures/tgatexture.cpp b/src/textures/tgatexture.cpp index b208a51a37..5e76a63b2d 100644 --- a/src/textures/tgatexture.cpp +++ b/src/textures/tgatexture.cpp @@ -181,6 +181,7 @@ void FTGATexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/warptexture.cpp b/src/textures/warptexture.cpp index a8a2ddb9e1..b6977dd77e 100644 --- a/src/textures/warptexture.cpp +++ b/src/textures/warptexture.cpp @@ -74,6 +74,7 @@ void FWarpTexture::Unload () Spans = NULL; } SourcePic->Unload (); + FTexture::Unload(); } bool FWarpTexture::CheckModified () diff --git a/src/v_draw.cpp b/src/v_draw.cpp index d03853c117..02ba591b6e 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1403,7 +1403,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else R_SetSpanColormap(&identitycolormap, 0); - R_SetSpanSource(tex->GetPixels()); + R_SetSpanSource(r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels()); scalex = double(1u << (32 - ds_xbits)) / scalex; scaley = double(1u << (32 - ds_ybits)) / scaley; ds_xstep = xs_RoundToInt(cosrot * scalex); diff --git a/src/v_font.cpp b/src/v_font.cpp index 052074d11e..ef9b69dd12 100644 --- a/src/v_font.cpp +++ b/src/v_font.cpp @@ -1662,6 +1662,7 @@ void FFontChar1::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== @@ -1723,6 +1724,7 @@ void FFontChar2::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/v_video.cpp b/src/v_video.cpp index e586381211..2cf04a29d2 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -117,7 +117,6 @@ public: const BYTE *GetColumn(unsigned int column, const Span **spans_out); const BYTE *GetPixels(); - void Unload(); bool CheckModified(); void SetTranslation(int num); @@ -1076,16 +1075,6 @@ void FPaletteTester::SetTranslation(int num) } } -//========================================================================== -// -// FPaletteTester :: Unload -// -//========================================================================== - -void FPaletteTester::Unload() -{ -} - //========================================================================== // // FPaletteTester :: GetColumn From 69b2fa72e86b180351a70a95243c1b7484f8cec9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 23:10:54 +0200 Subject: [PATCH 042/912] Moved RGBA draw stuff to its own header file --- src/r_draw.cpp | 91 +++++++------- src/r_draw.h | 239 ------------------------------------- src/r_draw_rgba.cpp | 39 +++--- src/r_draw_rgba.h | 276 +++++++++++++++++++++++++++++++++++++++++++ src/r_drawt_rgba.cpp | 95 +++++++-------- src/r_main.cpp | 1 + src/r_swrenderer.cpp | 1 + src/r_things.cpp | 1 + 8 files changed, 393 insertions(+), 350 deletions(-) create mode 100644 src/r_draw_rgba.h diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ec7313c4f6..552e5ff13a 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -39,6 +39,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_plane.h" +#include "r_draw_rgba.h" #include "gi.h" #include "stats.h" @@ -2295,34 +2296,34 @@ void R_InitColumnDrawers () domvline4_saved = domvline4; } - R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; - R_DrawColumn = R_DrawColumnP_RGBA_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; - R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; - R_DrawSpan = R_DrawSpanP_RGBA_C; + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA; + R_DrawColumn = R_DrawColumnP_RGBA; + R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA; + R_DrawShadedColumn = R_DrawShadedColumnP_RGBA; + R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA; + R_DrawSpan = R_DrawSpanP_RGBA; - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA; R_FillColumn = R_FillColumnP_RGBA; - R_FillAddColumn = R_FillAddColumn_RGBA_C; + R_FillAddColumn = R_FillAddColumn_RGBA; R_FillAddClampColumn = R_FillAddClampColumn_RGBA; R_FillSubClampColumn = R_FillSubClampColumn_RGBA; R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; - R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; - R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; - R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; + R_DrawAddColumn = R_DrawAddColumnP_RGBA; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA; + R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA; + R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA; R_FillSpan = R_FillSpan_RGBA; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; + R_FillColumnHoriz = R_FillColumnHorizP_RGBA; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; R_MapTiltedPlane = R_MapTiltedPlane_RGBA; @@ -2338,30 +2339,30 @@ void R_InitColumnDrawers () tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; - rt_copy1col = rt_copy1col_RGBA_c; - rt_copy4cols = rt_copy4cols_RGBA_c; - rt_map1col = rt_map1col_RGBA_c; - rt_map4cols = rt_map4cols_RGBA_c; - rt_shaded1col = rt_shaded1col_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; - rt_add1col = rt_add1col_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp1col = rt_addclamp1col_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; - rt_subclamp1col = rt_subclamp1col_RGBA_c; - rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; - rt_tlate1col = rt_tlate1col_RGBA_c; - rt_tlateadd1col = rt_tlateadd1col_RGBA_c; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; - rt_subclamp4cols = rt_subclamp4cols_RGBA_c; - rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; - rt_tlate4cols = rt_tlate4cols_RGBA_c; - rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; + rt_copy1col = rt_copy1col_RGBA; + rt_copy4cols = rt_copy4cols_RGBA; + rt_map1col = rt_map1col_RGBA; + rt_map4cols = rt_map4cols_RGBA; + rt_shaded1col = rt_shaded1col_RGBA; + rt_shaded4cols = rt_shaded4cols_RGBA; + rt_add1col = rt_add1col_RGBA; + rt_add4cols = rt_add4cols_RGBA; + rt_addclamp1col = rt_addclamp1col_RGBA; + rt_addclamp4cols = rt_addclamp4cols_RGBA; + rt_subclamp1col = rt_subclamp1col_RGBA; + rt_revsubclamp1col = rt_revsubclamp1col_RGBA; + rt_tlate1col = rt_tlate1col_RGBA; + rt_tlateadd1col = rt_tlateadd1col_RGBA; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA; + rt_subclamp4cols = rt_subclamp4cols_RGBA; + rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA; + rt_tlate4cols = rt_tlate4cols_RGBA; + rt_tlateadd4cols = rt_tlateadd4cols_RGBA; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA; rt_initcols = rt_initcols_rgba; rt_span_coverage = rt_span_coverage_rgba; diff --git a/src/r_draw.h b/src/r_draw.h index d09d0ab892..cea05e469c 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -24,11 +24,6 @@ #define __R_DRAW__ #include "r_defs.h" -#include -#include -#include -#include -#include // Spectre/Invisibility. #define FUZZTABLE 50 @@ -175,39 +170,6 @@ void rt_map4cols_asm1 (int sx, int yl, int yh); void rt_map4cols_asm2 (int sx, int yl, int yh); void rt_add4cols_asm (int sx, int yl, int yh); void rt_addclamp4cols_asm (int sx, int yl, int yh); - -/// - -void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_copy4cols_RGBA_c (int sx, int yl, int yh); - -void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); - -void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); - -void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); - -void rt_map4cols_RGBA_c (int sx, int yl, int yh); -void rt_add4cols_RGBA_c (int sx, int yl, int yh); -void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); - -void rt_tlate4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlateadd4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh); - } extern void (*rt_copy1col)(int hx, int sx, int yl, int yh); @@ -247,10 +209,8 @@ void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. void rt_initcols_pal (BYTE *buffer); -void rt_initcols_rgba (BYTE *buffer); void rt_span_coverage_pal(int x, int start, int stop); -void rt_span_coverage_rgba(int x, int start, int stop); extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); @@ -280,46 +240,6 @@ void R_DrawSpanMaskedP_C (void); #endif -void R_DrawColumnHorizP_RGBA_C (void); -void R_DrawColumnP_RGBA_C (void); -void R_DrawFuzzColumnP_RGBA_C (void); -void R_DrawTranslatedColumnP_RGBA_C (void); -void R_DrawShadedColumnP_RGBA_C (void); -void R_DrawSpanP_RGBA_C (void); -void R_DrawSpanMaskedP_RGBA_C (void); - -void R_DrawSpanTranslucentP_RGBA_C(); -void R_DrawSpanMaskedTranslucentP_RGBA_C(); -void R_DrawSpanAddClampP_RGBA_C(); -void R_DrawSpanMaskedAddClampP_RGBA_C(); -void R_FillColumnP_RGBA(); -void R_FillAddColumn_RGBA_C(); -void R_FillAddClampColumn_RGBA(); -void R_FillSubClampColumn_RGBA(); -void R_FillRevSubClampColumn_RGBA(); -void R_DrawAddColumnP_RGBA_C(); -void R_DrawTlatedAddColumnP_RGBA_C(); -void R_DrawAddClampColumnP_RGBA_C(); -void R_DrawAddClampTranslatedColumnP_RGBA_C(); -void R_DrawSubClampColumnP_RGBA_C(); -void R_DrawSubClampTranslatedColumnP_RGBA_C(); -void R_DrawRevSubClampColumnP_RGBA_C(); -void R_DrawRevSubClampTranslatedColumnP_RGBA_C(); -void R_FillSpan_RGBA(); -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); -fixed_t tmvline1_add_RGBA(); -void tmvline4_add_RGBA(); -fixed_t tmvline1_addclamp_RGBA(); -void tmvline4_addclamp_RGBA(); -fixed_t tmvline1_subclamp_RGBA(); -void tmvline4_subclamp_RGBA(); -fixed_t tmvline1_revsubclamp_RGBA(); -void tmvline4_revsubclamp_RGBA(); -DWORD vlinec1_RGBA(); -void vlinec4_RGBA(); -DWORD mvlinec1_RGBA(); -void mvlinec4_RGBA(); - void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); @@ -348,9 +268,6 @@ void R_FillColumnP_C (void); void R_FillColumnHorizP_C (void); void R_FillSpan_C (void); -void R_FillColumnHorizP_RGBA_C(void); -void R_FillSpan_RGBA_C(void); - #ifdef X86_ASM #define R_SetupDrawSlab R_SetupDrawSlabA #define R_DrawSlab R_DrawSlabA @@ -443,160 +360,4 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); -// Redirect drawer commands to worker threads -void R_BeginDrawerCommands(); - -// Wait until all drawers finished executing -void R_EndDrawerCommands(); - -class DrawerCommandQueue; - -class DrawerThread -{ -public: - std::thread thread; - - // Thread line index of this thread - int core = 0; - - // Number of active threads - int num_cores = 1; - - // Range of rows processed this pass - int pass_start_y = 0; - int pass_end_y = MAXHEIGHT; - - uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; - uint32_t *dc_temp_rgba; - - // Checks if a line is rendered by this thread - bool line_skipped_by_thread(int line) - { - return line < pass_start_y || line >= pass_end_y || line % num_cores != core; - } - - // The number of lines to skip to reach the first line to be rendered by this thread - int skipped_by_thread(int first_line) - { - int pass_skip = MAX(pass_start_y - first_line, 0); - int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; - return pass_skip + core_skip; - } - - // The number of lines to be rendered by this thread - int count_for_thread(int first_line, int count) - { - int lines_until_pass_end = MAX(pass_end_y - first_line, 0); - count = MIN(count, lines_until_pass_end); - int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; - return MAX(c, 0); - } - - // Calculate the dest address for the first line to be rendered by this thread - uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) - { - return dest + skipped_by_thread(first_line) * pitch; - } -}; - -class DrawerCommand -{ -protected: - int dc_dest_y; - -public: - DrawerCommand() - { - dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); - } - - virtual void Execute(DrawerThread *thread) = 0; -}; - -class DrawerCommandQueue -{ - enum { memorypool_size = 4 * 1024 * 1024 }; - char memorypool[memorypool_size]; - size_t memorypool_pos = 0; - - std::vector commands; - - std::vector threads; - - std::mutex start_mutex; - std::condition_variable start_condition; - std::vector active_commands; - bool shutdown_flag = false; - int run_id = 0; - - std::mutex end_mutex; - std::condition_variable end_condition; - size_t finished_threads = 0; - - int threaded_render = 0; - DrawerThread single_core_thread; - int num_passes = 2; - int rows_in_pass = 540; - - void StartThreads(); - void StopThreads(); - void Finish(); - - static DrawerCommandQueue *Instance(); - - ~DrawerCommandQueue(); - -public: - // Allocate memory valid for the duration of a command execution - static void* AllocMemory(size_t size); - - // Queue command to be executed by drawer worker threads - template - static void QueueCommand(Types &&... args) - { - auto queue = Instance(); - if (queue->threaded_render == 0) - { - T command(std::forward(args)...); - command.Execute(&queue->single_core_thread); - } - else - { - void *ptr = AllocMemory(sizeof(T)); - if (!ptr) - return; - T *command = new (ptr)T(std::forward(args)...); - queue->commands.push_back(command); - } - } - - // Redirects all drawing commands to worker threads until End is called - // Begin/End blocks can be nested. - static void Begin(); - - // End redirection and wait until all worker threads finished executing - static void End(); - - // Waits until all worker threads finished executing - static void WaitForWorkers(); -}; - -class ApplySpecialColormapRGBACommand : public DrawerCommand -{ - BYTE *buffer; - int pitch; - int width; - int height; - int start_red; - int start_green; - int start_blue; - int end_red; - int end_green; - int end_blue; - -public: - ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); - void Execute(DrawerThread *thread) override; -}; - #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 7e9f851179..a9dd2db322 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -37,6 +37,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_plane.h" +#include "r_draw_rgba.h" #include "gi.h" #include "stats.h" @@ -3655,7 +3656,7 @@ void R_EndDrawerCommands() DrawerCommandQueue::End(); } -void R_DrawColumnP_RGBA_C() +void R_DrawColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } @@ -3665,7 +3666,7 @@ void R_FillColumnP_RGBA() DrawerCommandQueue::QueueCommand(); } -void R_FillAddColumn_RGBA_C() +void R_FillAddColumn_RGBA() { DrawerCommandQueue::QueueCommand(); } @@ -3685,88 +3686,88 @@ void R_FillRevSubClampColumn_RGBA() DrawerCommandQueue::QueueCommand(); } -void R_DrawFuzzColumnP_RGBA_C() +void R_DrawFuzzColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; } -void R_DrawAddColumnP_RGBA_C() +void R_DrawAddColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTranslatedColumnP_RGBA_C() +void R_DrawTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTlatedAddColumnP_RGBA_C() +void R_DrawTlatedAddColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawShadedColumnP_RGBA_C() +void R_DrawShadedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampColumnP_RGBA_C() +void R_DrawAddClampColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampTranslatedColumnP_RGBA_C() +void R_DrawAddClampTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampColumnP_RGBA_C() +void R_DrawSubClampColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampTranslatedColumnP_RGBA_C() +void R_DrawSubClampTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampColumnP_RGBA_C() +void R_DrawRevSubClampColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampTranslatedColumnP_RGBA_C() +void R_DrawRevSubClampTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanP_RGBA_C() +void R_DrawSpanP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedP_RGBA_C() +void R_DrawSpanMaskedP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanTranslucentP_RGBA_C() +void R_DrawSpanTranslucentP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedTranslucentP_RGBA_C() +void R_DrawSpanMaskedTranslucentP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanAddClampP_RGBA_C() +void R_DrawSpanAddClampP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedAddClampP_RGBA_C() +void R_DrawSpanMaskedAddClampP_RGBA() { DrawerCommandQueue::QueueCommand(); } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h new file mode 100644 index 0000000000..5d7402634b --- /dev/null +++ b/src/r_draw_rgba.h @@ -0,0 +1,276 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// DESCRIPTION: +// System specific interface stuff. +// +//----------------------------------------------------------------------------- + + +#ifndef __R_DRAW_RGBA__ +#define __R_DRAW_RGBA__ + +#include "r_draw.h" +#include +#include +#include +#include +#include + +///////////////////////////////////////////////////////////////////////////// +// Drawer functions: + +void rt_initcols_rgba(BYTE *buffer); +void rt_span_coverage_rgba(int x, int start, int stop); + +void rt_copy1col_RGBA(int hx, int sx, int yl, int yh); +void rt_copy4cols_RGBA(int sx, int yl, int yh); +void rt_shaded1col_RGBA(int hx, int sx, int yl, int yh); +void rt_shaded4cols_RGBA(int sx, int yl, int yh); +void rt_map1col_RGBA(int hx, int sx, int yl, int yh); +void rt_add1col_RGBA(int hx, int sx, int yl, int yh); +void rt_addclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_subclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlate1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlateadd1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_map4cols_RGBA(int sx, int yl, int yh); +void rt_add4cols_RGBA(int sx, int yl, int yh); +void rt_addclamp4cols_RGBA(int sx, int yl, int yh); +void rt_subclamp4cols_RGBA(int sx, int yl, int yh); +void rt_revsubclamp4cols_RGBA(int sx, int yl, int yh); +void rt_tlate4cols_RGBA(int sx, int yl, int yh); +void rt_tlateadd4cols_RGBA(int sx, int yl, int yh); +void rt_tlateaddclamp4cols_RGBA(int sx, int yl, int yh); +void rt_tlatesubclamp4cols_RGBA(int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_RGBA(int sx, int yl, int yh); + +void R_DrawColumnHorizP_RGBA(); +void R_DrawColumnP_RGBA(); +void R_DrawFuzzColumnP_RGBA(); +void R_DrawTranslatedColumnP_RGBA(); +void R_DrawShadedColumnP_RGBA(); + +void R_FillColumnP_RGBA(); +void R_FillAddColumn_RGBA(); +void R_FillAddClampColumn_RGBA(); +void R_FillSubClampColumn_RGBA(); +void R_FillRevSubClampColumn_RGBA(); +void R_DrawAddColumnP_RGBA(); +void R_DrawTlatedAddColumnP_RGBA(); +void R_DrawAddClampColumnP_RGBA(); +void R_DrawAddClampTranslatedColumnP_RGBA(); +void R_DrawSubClampColumnP_RGBA(); +void R_DrawSubClampTranslatedColumnP_RGBA(); +void R_DrawRevSubClampColumnP_RGBA(); +void R_DrawRevSubClampTranslatedColumnP_RGBA(); + +void R_DrawSpanP_RGBA(void); +void R_DrawSpanMaskedP_RGBA(void); +void R_DrawSpanTranslucentP_RGBA(); +void R_DrawSpanMaskedTranslucentP_RGBA(); +void R_DrawSpanAddClampP_RGBA(); +void R_DrawSpanMaskedAddClampP_RGBA(); +void R_FillSpan_RGBA(); + +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); + +DWORD vlinec1_RGBA(); +void vlinec4_RGBA(); +DWORD mvlinec1_RGBA(); +void mvlinec4_RGBA(); +fixed_t tmvline1_add_RGBA(); +void tmvline4_add_RGBA(); +fixed_t tmvline1_addclamp_RGBA(); +void tmvline4_addclamp_RGBA(); +fixed_t tmvline1_subclamp_RGBA(); +void tmvline4_subclamp_RGBA(); +fixed_t tmvline1_revsubclamp_RGBA(); +void tmvline4_revsubclamp_RGBA(); + +void R_FillColumnHorizP_RGBA(); +void R_FillSpan_RGBA(); + +///////////////////////////////////////////////////////////////////////////// +// Multithreaded rendering infrastructure: + +// Redirect drawer commands to worker threads +void R_BeginDrawerCommands(); + +// Wait until all drawers finished executing +void R_EndDrawerCommands(); + +struct FSpecialColormap; +class DrawerCommandQueue; + +// Worker data for each thread executing drawer commands +class DrawerThread +{ +public: + std::thread thread; + + // Thread line index of this thread + int core = 0; + + // Number of active threads + int num_cores = 1; + + // Range of rows processed this pass + int pass_start_y = 0; + int pass_end_y = MAXHEIGHT; + + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba; + + // Checks if a line is rendered by this thread + bool line_skipped_by_thread(int line) + { + return line < pass_start_y || line >= pass_end_y || line % num_cores != core; + } + + // The number of lines to skip to reach the first line to be rendered by this thread + int skipped_by_thread(int first_line) + { + int pass_skip = MAX(pass_start_y - first_line, 0); + int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; + return pass_skip + core_skip; + } + + // The number of lines to be rendered by this thread + int count_for_thread(int first_line, int count) + { + int lines_until_pass_end = MAX(pass_end_y - first_line, 0); + count = MIN(count, lines_until_pass_end); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); + } + + // Calculate the dest address for the first line to be rendered by this thread + uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) + { + return dest + skipped_by_thread(first_line) * pitch; + } +}; + +// Task to be executed by each worker thread +class DrawerCommand +{ +protected: + int dc_dest_y; + +public: + DrawerCommand() + { + dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + } + + virtual void Execute(DrawerThread *thread) = 0; +}; + +// Manages queueing up commands and executing them on worker threads +class DrawerCommandQueue +{ + enum { memorypool_size = 4 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + std::vector threads; + + std::mutex start_mutex; + std::condition_variable start_condition; + std::vector active_commands; + bool shutdown_flag = false; + int run_id = 0; + + std::mutex end_mutex; + std::condition_variable end_condition; + size_t finished_threads = 0; + + int threaded_render = 0; + DrawerThread single_core_thread; + int num_passes = 2; + int rows_in_pass = 540; + + void StartThreads(); + void StopThreads(); + void Finish(); + + static DrawerCommandQueue *Instance(); + + ~DrawerCommandQueue(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + auto queue = Instance(); + if (queue->threaded_render == 0) + { + T command(std::forward(args)...); + command.Execute(&queue->single_core_thread); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } + } + + // Redirects all drawing commands to worker threads until End is called + // Begin/End blocks can be nested. + static void Begin(); + + // End redirection and wait until all worker threads finished executing + static void End(); + + // Waits until all worker threads finished executing + static void WaitForWorkers(); +}; + +///////////////////////////////////////////////////////////////////////////// +// Drawer commands: + +class ApplySpecialColormapRGBACommand : public DrawerCommand +{ + BYTE *buffer; + int pitch; + int width; + int height; + int start_red; + int start_green; + int start_blue; + int end_red; + int end_green; + int end_blue; + +public: + ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); + void Execute(DrawerThread *thread) override; +}; + +#endif diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index cd124ac639..32d5080c5f 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -42,6 +42,7 @@ #include "r_main.h" #include "r_things.h" #include "v_video.h" +#include "r_draw_rgba.h" #ifndef NO_SSE #include #endif @@ -1628,171 +1629,171 @@ public: ///////////////////////////////////////////////////////////////////////////// // Copies one span at hx to the screen at sx. -void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_copy1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. -void rt_copy4cols_RGBA_c (int sx, int yl, int yh) +void rt_copy4cols_RGBA (int sx, int yl, int yh) { // To do: we could do this with SSE using __m128i - rt_copy1col_RGBA_c(0, sx, yl, yh); - rt_copy1col_RGBA_c(1, sx + 1, yl, yh); - rt_copy1col_RGBA_c(2, sx + 2, yl, yh); - rt_copy1col_RGBA_c(3, sx + 3, yl, yh); + rt_copy1col_RGBA(0, sx, yl, yh); + rt_copy1col_RGBA(1, sx + 1, yl, yh); + rt_copy1col_RGBA(2, sx + 2, yl, yh); + rt_copy1col_RGBA(3, sx + 3, yl, yh); } // Maps one span at hx to the screen at sx. -void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_map1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. -void rt_map4cols_RGBA_c (int sx, int yl, int yh) +void rt_map4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } -void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) +void rt_Translate1col_RGBA(const BYTE *translation, int hx, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); } -void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) +void rt_Translate4cols_RGBA(const BYTE *translation, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, yl, yh); } // Translates one span at hx to the screen at sx. -void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlate1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); rt_map1col(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. -void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlate4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); rt_map4cols(sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. -void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_add1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. -void rt_add4cols_RGBA_c (int sx, int yl, int yh) +void rt_add4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlateadd1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); rt_add1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) +void rt_tlateadd4cols_RGBA(int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); rt_add4cols(sx, yl, yh); } // Shades one span at hx to the screen at sx. -void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_shaded1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. -void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) +void rt_shaded4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_addclamp1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. -void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_addclamp4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlateaddclamp1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); - rt_addclamp1col_RGBA_c(hx, sx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_addclamp1col_RGBA(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlateaddclamp4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); rt_addclamp4cols(sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_subclamp1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_subclamp4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlatesubclamp1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); - rt_subclamp1col_RGBA_c(hx, sx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_subclamp1col_RGBA(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlatesubclamp4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); - rt_subclamp4cols_RGBA_c(sx, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_subclamp4cols_RGBA(sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_revsubclamp1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_revsubclamp4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlaterevsubclamp1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); - rt_revsubclamp1col_RGBA_c(hx, sx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_revsubclamp1col_RGBA(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlaterevsubclamp4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); - rt_revsubclamp4cols_RGBA_c(sx, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_revsubclamp4cols_RGBA(sx, yl, yh); } // Before each pass through a rendering loop that uses these routines, @@ -1815,7 +1816,7 @@ void rt_span_coverage_rgba(int x, int start, int stop) // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. -void R_DrawColumnHorizP_RGBA_C (void) +void R_DrawColumnHorizP_RGBA (void) { if (dc_count <= 0) return; @@ -1830,7 +1831,7 @@ void R_DrawColumnHorizP_RGBA_C (void) } // [RH] Just fills a column with a given color -void R_FillColumnHorizP_RGBA_C (void) +void R_FillColumnHorizP_RGBA (void) { if (dc_count <= 0) return; diff --git a/src/r_main.cpp b/src/r_main.cpp index 4e5ff1dbdb..247a981255 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -40,6 +40,7 @@ #include "r_segs.h" #include "r_3dfloors.h" #include "r_sky.h" +#include "r_draw_rgba.h" #include "st_stuff.h" #include "c_cvars.h" #include "c_dispatch.h" diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 5be41660e6..fbbd65b17c 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -42,6 +42,7 @@ #include "r_3dfloors.h" #include "textures/textures.h" #include "r_data/voxels.h" +#include "r_draw_rgba.h" EXTERN_CVAR(Bool, r_shadercolormaps) diff --git a/src/r_things.cpp b/src/r_things.cpp index 2dc0bdb6cd..f1f29f160d 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -58,6 +58,7 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" +#include "r_draw_rgba.h" #include "v_palette.h" #include "r_data/r_translate.h" #include "r_data/colormaps.h" From 586d5cdf1eb5609fcd480aa0d69fc764c4fc0103 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 23:33:52 +0200 Subject: [PATCH 043/912] Normalize naming convention a little --- src/r_draw.cpp | 136 +++++++++++++++++++++---------------------- src/r_draw_rgba.cpp | 80 ++++++++++++------------- src/r_draw_rgba.h | 128 ++++++++++++++++++++-------------------- src/r_drawt_rgba.cpp | 94 +++++++++++++++--------------- src/r_plane.cpp | 4 +- src/r_plane.h | 4 +- src/r_things.cpp | 2 +- src/r_things.h | 2 +- 8 files changed, 225 insertions(+), 225 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 552e5ff13a..ecb4441f8c 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2296,82 +2296,82 @@ void R_InitColumnDrawers () domvline4_saved = domvline4; } - R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA; - R_DrawColumn = R_DrawColumnP_RGBA; - R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA; - R_DrawShadedColumn = R_DrawShadedColumnP_RGBA; - R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA; - R_DrawSpan = R_DrawSpanP_RGBA; + R_DrawColumnHoriz = R_DrawColumnHoriz_rgba; + R_DrawColumn = R_DrawColumn_rgba; + R_DrawFuzzColumn = R_DrawFuzzColumn_rgba; + R_DrawTranslatedColumn = R_DrawTranslatedColumn_rgba; + R_DrawShadedColumn = R_DrawShadedColumn_rgba; + R_DrawSpanMasked = R_DrawSpanMasked_rgba; + R_DrawSpan = R_DrawSpan_rgba; - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA; - R_FillColumn = R_FillColumnP_RGBA; - R_FillAddColumn = R_FillAddColumn_RGBA; - R_FillAddClampColumn = R_FillAddClampColumn_RGBA; - R_FillSubClampColumn = R_FillSubClampColumn_RGBA; - R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; - R_DrawAddColumn = R_DrawAddColumnP_RGBA; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA; - R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA; - R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA; - R_FillSpan = R_FillSpan_RGBA; - R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_FillColumnHoriz = R_FillColumnHorizP_RGBA; + R_DrawSpanTranslucent = R_DrawSpanTranslucent_rgba; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucent_rgba; + R_DrawSpanAddClamp = R_DrawSpanAddClamp_rgba; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClamp_rgba; + R_FillColumn = R_FillColumn_rgba; + R_FillAddColumn = R_FillAddColumn_rgba; + R_FillAddClampColumn = R_FillAddClampColumn_rgba; + R_FillSubClampColumn = R_FillSubClampColumn_rgba; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_rgba; + R_DrawAddColumn = R_DrawAddColumn_rgba; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumn_rgba; + R_DrawAddClampColumn = R_DrawAddClampColumn_rgba; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumn_rgba; + R_DrawSubClampColumn = R_DrawSubClampColumn_rgba; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumn_rgba; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumn_rgba; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumn_rgba; + R_FillSpan = R_FillSpan_rgba; + R_DrawFogBoundary = R_DrawFogBoundary_rgba; + R_FillColumnHoriz = R_FillColumnHoriz_rgba; - R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_MapTiltedPlane = R_MapTiltedPlane_RGBA; - R_MapColoredPlane = R_MapColoredPlane_RGBA; - R_DrawParticle = R_DrawParticle_RGBA; + R_DrawFogBoundary = R_DrawFogBoundary_rgba; + R_MapTiltedPlane = R_MapTiltedPlane_rgba; + R_MapColoredPlane = R_MapColoredPlane_rgba; + R_DrawParticle = R_DrawParticle_rgba; - tmvline1_add = tmvline1_add_RGBA; - tmvline4_add = tmvline4_add_RGBA; - tmvline1_addclamp = tmvline1_addclamp_RGBA; - tmvline4_addclamp = tmvline4_addclamp_RGBA; - tmvline1_subclamp = tmvline1_subclamp_RGBA; - tmvline4_subclamp = tmvline4_subclamp_RGBA; - tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; - tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; + tmvline1_add = tmvline1_add_rgba; + tmvline4_add = tmvline4_add_rgba; + tmvline1_addclamp = tmvline1_addclamp_rgba; + tmvline4_addclamp = tmvline4_addclamp_rgba; + tmvline1_subclamp = tmvline1_subclamp_rgba; + tmvline4_subclamp = tmvline4_subclamp_rgba; + tmvline1_revsubclamp = tmvline1_revsubclamp_rgba; + tmvline4_revsubclamp = tmvline4_revsubclamp_rgba; - rt_copy1col = rt_copy1col_RGBA; - rt_copy4cols = rt_copy4cols_RGBA; - rt_map1col = rt_map1col_RGBA; - rt_map4cols = rt_map4cols_RGBA; - rt_shaded1col = rt_shaded1col_RGBA; - rt_shaded4cols = rt_shaded4cols_RGBA; - rt_add1col = rt_add1col_RGBA; - rt_add4cols = rt_add4cols_RGBA; - rt_addclamp1col = rt_addclamp1col_RGBA; - rt_addclamp4cols = rt_addclamp4cols_RGBA; - rt_subclamp1col = rt_subclamp1col_RGBA; - rt_revsubclamp1col = rt_revsubclamp1col_RGBA; - rt_tlate1col = rt_tlate1col_RGBA; - rt_tlateadd1col = rt_tlateadd1col_RGBA; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA; - rt_subclamp4cols = rt_subclamp4cols_RGBA; - rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA; - rt_tlate4cols = rt_tlate4cols_RGBA; - rt_tlateadd4cols = rt_tlateadd4cols_RGBA; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA; + rt_copy1col = rt_copy1col_rgba; + rt_copy4cols = rt_copy4cols_rgba; + rt_map1col = rt_map1col_rgba; + rt_map4cols = rt_map4cols_rgba; + rt_shaded1col = rt_shaded1col_rgba; + rt_shaded4cols = rt_shaded4cols_rgba; + rt_add1col = rt_add1col_rgba; + rt_add4cols = rt_add4cols_rgba; + rt_addclamp1col = rt_addclamp1col_rgba; + rt_addclamp4cols = rt_addclamp4cols_rgba; + rt_subclamp1col = rt_subclamp1col_rgba; + rt_revsubclamp1col = rt_revsubclamp1col_rgba; + rt_tlate1col = rt_tlate1col_rgba; + rt_tlateadd1col = rt_tlateadd1col_rgba; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_rgba; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_rgba; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_rgba; + rt_subclamp4cols = rt_subclamp4cols_rgba; + rt_revsubclamp4cols = rt_revsubclamp4cols_rgba; + rt_tlate4cols = rt_tlate4cols_rgba; + rt_tlateadd4cols = rt_tlateadd4cols_rgba; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_rgba; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_rgba; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_rgba; rt_initcols = rt_initcols_rgba; rt_span_coverage = rt_span_coverage_rgba; - dovline1 = vlinec1_RGBA; - doprevline1 = vlinec1_RGBA; - domvline1 = mvlinec1_RGBA; + dovline1 = vlinec1_rgba; + doprevline1 = vlinec1_rgba; + domvline1 = mvlinec1_rgba; - dovline4 = vlinec4_RGBA; - domvline4 = mvlinec4_RGBA; + dovline4 = vlinec4_rgba; + domvline4 = mvlinec4_rgba; } else { diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a9dd2db322..2062609b41 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -3656,130 +3656,130 @@ void R_EndDrawerCommands() DrawerCommandQueue::End(); } -void R_DrawColumnP_RGBA() +void R_DrawColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillColumnP_RGBA() +void R_FillColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillAddColumn_RGBA() +void R_FillAddColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillAddClampColumn_RGBA() +void R_FillAddClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillSubClampColumn_RGBA() +void R_FillSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillRevSubClampColumn_RGBA() +void R_FillRevSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawFuzzColumnP_RGBA() +void R_DrawFuzzColumn_rgba() { DrawerCommandQueue::QueueCommand(); fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; } -void R_DrawAddColumnP_RGBA() +void R_DrawAddColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTranslatedColumnP_RGBA() +void R_DrawTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTlatedAddColumnP_RGBA() +void R_DrawTlatedAddColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawShadedColumnP_RGBA() +void R_DrawShadedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampColumnP_RGBA() +void R_DrawAddClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampTranslatedColumnP_RGBA() +void R_DrawAddClampTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampColumnP_RGBA() +void R_DrawSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampTranslatedColumnP_RGBA() +void R_DrawSubClampTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampColumnP_RGBA() +void R_DrawRevSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampTranslatedColumnP_RGBA() +void R_DrawRevSubClampTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanP_RGBA() +void R_DrawSpan_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedP_RGBA() +void R_DrawSpanMasked_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanTranslucentP_RGBA() +void R_DrawSpanTranslucent_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedTranslucentP_RGBA() +void R_DrawSpanMaskedTranslucent_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanAddClampP_RGBA() +void R_DrawSpanAddClamp_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedAddClampP_RGBA() +void R_DrawSpanMaskedAddClamp_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillSpan_RGBA() +void R_FillSpan_rgba() { DrawerCommandQueue::QueueCommand(); } //extern FTexture *rw_pic; // For the asserts below -DWORD vlinec1_RGBA() +DWORD vlinec1_rgba() { /*DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -3792,79 +3792,79 @@ DWORD vlinec1_RGBA() return dc_texturefrac + dc_count * dc_iscale; } -void vlinec4_RGBA() +void vlinec4_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -DWORD mvlinec1_RGBA() +DWORD mvlinec1_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void mvlinec4_RGBA() +void mvlinec4_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -fixed_t tmvline1_add_RGBA() +fixed_t tmvline1_add_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void tmvline4_add_RGBA() +void tmvline4_add_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -fixed_t tmvline1_addclamp_RGBA() +fixed_t tmvline1_addclamp_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void tmvline4_addclamp_RGBA() +void tmvline4_addclamp_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -fixed_t tmvline1_subclamp_RGBA() +fixed_t tmvline1_subclamp_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void tmvline4_subclamp_RGBA() +void tmvline4_subclamp_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -fixed_t tmvline1_revsubclamp_RGBA() +fixed_t tmvline1_revsubclamp_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void tmvline4_revsubclamp_RGBA() +void tmvline4_revsubclamp_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) +void R_DrawFogBoundarySection_rgba(int y, int y2, int x1) { for (; y < y2; ++y) { @@ -3873,7 +3873,7 @@ void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) } } -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) +void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip) { // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis @@ -3913,7 +3913,7 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) if (t2 < b2 && rcolormap != 0) { // Colormap 0 is always the identity map, so rendering it is // just a waste of time. - R_DrawFogBoundarySection_RGBA(t2, b2, xr); + R_DrawFogBoundarySection_rgba(t2, b2, xr); } if (t1 < t2) t2 = t1; if (b1 > b2) b2 = b1; @@ -3965,6 +3965,6 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) } if (t2 < b2 && rcolormap != 0) { - R_DrawFogBoundarySection_RGBA(t2, b2, x1); + R_DrawFogBoundarySection_rgba(t2, b2, x1); } } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 5d7402634b..9f07ff0bf0 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -36,76 +36,76 @@ void rt_initcols_rgba(BYTE *buffer); void rt_span_coverage_rgba(int x, int start, int stop); -void rt_copy1col_RGBA(int hx, int sx, int yl, int yh); -void rt_copy4cols_RGBA(int sx, int yl, int yh); -void rt_shaded1col_RGBA(int hx, int sx, int yl, int yh); -void rt_shaded4cols_RGBA(int sx, int yl, int yh); -void rt_map1col_RGBA(int hx, int sx, int yl, int yh); -void rt_add1col_RGBA(int hx, int sx, int yl, int yh); -void rt_addclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_subclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_revsubclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlate1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlateadd1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_map4cols_RGBA(int sx, int yl, int yh); -void rt_add4cols_RGBA(int sx, int yl, int yh); -void rt_addclamp4cols_RGBA(int sx, int yl, int yh); -void rt_subclamp4cols_RGBA(int sx, int yl, int yh); -void rt_revsubclamp4cols_RGBA(int sx, int yl, int yh); -void rt_tlate4cols_RGBA(int sx, int yl, int yh); -void rt_tlateadd4cols_RGBA(int sx, int yl, int yh); -void rt_tlateaddclamp4cols_RGBA(int sx, int yl, int yh); -void rt_tlatesubclamp4cols_RGBA(int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols_RGBA(int sx, int yl, int yh); +void rt_copy1col_rgba(int hx, int sx, int yl, int yh); +void rt_copy4cols_rgba(int sx, int yl, int yh); +void rt_shaded1col_rgba(int hx, int sx, int yl, int yh); +void rt_shaded4cols_rgba(int sx, int yl, int yh); +void rt_map1col_rgba(int hx, int sx, int yl, int yh); +void rt_add1col_rgba(int hx, int sx, int yl, int yh); +void rt_addclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_subclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlate1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlateadd1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_map4cols_rgba(int sx, int yl, int yh); +void rt_add4cols_rgba(int sx, int yl, int yh); +void rt_addclamp4cols_rgba(int sx, int yl, int yh); +void rt_subclamp4cols_rgba(int sx, int yl, int yh); +void rt_revsubclamp4cols_rgba(int sx, int yl, int yh); +void rt_tlate4cols_rgba(int sx, int yl, int yh); +void rt_tlateadd4cols_rgba(int sx, int yl, int yh); +void rt_tlateaddclamp4cols_rgba(int sx, int yl, int yh); +void rt_tlatesubclamp4cols_rgba(int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_rgba(int sx, int yl, int yh); -void R_DrawColumnHorizP_RGBA(); -void R_DrawColumnP_RGBA(); -void R_DrawFuzzColumnP_RGBA(); -void R_DrawTranslatedColumnP_RGBA(); -void R_DrawShadedColumnP_RGBA(); +void R_DrawColumnHoriz_rgba(); +void R_DrawColumn_rgba(); +void R_DrawFuzzColumn_rgba(); +void R_DrawTranslatedColumn_rgba(); +void R_DrawShadedColumn_rgba(); -void R_FillColumnP_RGBA(); -void R_FillAddColumn_RGBA(); -void R_FillAddClampColumn_RGBA(); -void R_FillSubClampColumn_RGBA(); -void R_FillRevSubClampColumn_RGBA(); -void R_DrawAddColumnP_RGBA(); -void R_DrawTlatedAddColumnP_RGBA(); -void R_DrawAddClampColumnP_RGBA(); -void R_DrawAddClampTranslatedColumnP_RGBA(); -void R_DrawSubClampColumnP_RGBA(); -void R_DrawSubClampTranslatedColumnP_RGBA(); -void R_DrawRevSubClampColumnP_RGBA(); -void R_DrawRevSubClampTranslatedColumnP_RGBA(); +void R_FillColumn_rgba(); +void R_FillAddColumn_rgba(); +void R_FillAddClampColumn_rgba(); +void R_FillSubClampColumn_rgba(); +void R_FillRevSubClampColumn_rgba(); +void R_DrawAddColumn_rgba(); +void R_DrawTlatedAddColumn_rgba(); +void R_DrawAddClampColumn_rgba(); +void R_DrawAddClampTranslatedColumn_rgba(); +void R_DrawSubClampColumn_rgba(); +void R_DrawSubClampTranslatedColumn_rgba(); +void R_DrawRevSubClampColumn_rgba(); +void R_DrawRevSubClampTranslatedColumn_rgba(); -void R_DrawSpanP_RGBA(void); -void R_DrawSpanMaskedP_RGBA(void); -void R_DrawSpanTranslucentP_RGBA(); -void R_DrawSpanMaskedTranslucentP_RGBA(); -void R_DrawSpanAddClampP_RGBA(); -void R_DrawSpanMaskedAddClampP_RGBA(); -void R_FillSpan_RGBA(); +void R_DrawSpan_rgba(void); +void R_DrawSpanMasked_rgba(void); +void R_DrawSpanTranslucent_rgba(); +void R_DrawSpanMaskedTranslucent_rgba(); +void R_DrawSpanAddClamp_rgba(); +void R_DrawSpanMaskedAddClamp_rgba(); +void R_FillSpan_rgba(); -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); +void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip); -DWORD vlinec1_RGBA(); -void vlinec4_RGBA(); -DWORD mvlinec1_RGBA(); -void mvlinec4_RGBA(); -fixed_t tmvline1_add_RGBA(); -void tmvline4_add_RGBA(); -fixed_t tmvline1_addclamp_RGBA(); -void tmvline4_addclamp_RGBA(); -fixed_t tmvline1_subclamp_RGBA(); -void tmvline4_subclamp_RGBA(); -fixed_t tmvline1_revsubclamp_RGBA(); -void tmvline4_revsubclamp_RGBA(); +DWORD vlinec1_rgba(); +void vlinec4_rgba(); +DWORD mvlinec1_rgba(); +void mvlinec4_rgba(); +fixed_t tmvline1_add_rgba(); +void tmvline4_add_rgba(); +fixed_t tmvline1_addclamp_rgba(); +void tmvline4_addclamp_rgba(); +fixed_t tmvline1_subclamp_rgba(); +void tmvline4_subclamp_rgba(); +fixed_t tmvline1_revsubclamp_rgba(); +void tmvline4_revsubclamp_rgba(); -void R_FillColumnHorizP_RGBA(); -void R_FillSpan_RGBA(); +void R_FillColumnHoriz_rgba(); +void R_FillSpan_rgba(); ///////////////////////////////////////////////////////////////////////////// // Multithreaded rendering infrastructure: diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 32d5080c5f..8f6d2ca132 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -1629,171 +1629,171 @@ public: ///////////////////////////////////////////////////////////////////////////// // Copies one span at hx to the screen at sx. -void rt_copy1col_RGBA (int hx, int sx, int yl, int yh) +void rt_copy1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. -void rt_copy4cols_RGBA (int sx, int yl, int yh) +void rt_copy4cols_rgba (int sx, int yl, int yh) { // To do: we could do this with SSE using __m128i - rt_copy1col_RGBA(0, sx, yl, yh); - rt_copy1col_RGBA(1, sx + 1, yl, yh); - rt_copy1col_RGBA(2, sx + 2, yl, yh); - rt_copy1col_RGBA(3, sx + 3, yl, yh); + rt_copy1col_rgba(0, sx, yl, yh); + rt_copy1col_rgba(1, sx + 1, yl, yh); + rt_copy1col_rgba(2, sx + 2, yl, yh); + rt_copy1col_rgba(3, sx + 3, yl, yh); } // Maps one span at hx to the screen at sx. -void rt_map1col_RGBA (int hx, int sx, int yl, int yh) +void rt_map1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. -void rt_map4cols_RGBA (int sx, int yl, int yh) +void rt_map4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } -void rt_Translate1col_RGBA(const BYTE *translation, int hx, int yl, int yh) +void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); } -void rt_Translate4cols_RGBA(const BYTE *translation, int yl, int yh) +void rt_Translate4cols_rgba(const BYTE *translation, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, yl, yh); } // Translates one span at hx to the screen at sx. -void rt_tlate1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlate1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); rt_map1col(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. -void rt_tlate4cols_RGBA (int sx, int yl, int yh) +void rt_tlate4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); rt_map4cols(sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. -void rt_add1col_RGBA (int hx, int sx, int yl, int yh) +void rt_add1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. -void rt_add4cols_RGBA (int sx, int yl, int yh) +void rt_add4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlateadd1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); rt_add1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols_RGBA(int sx, int yl, int yh) +void rt_tlateadd4cols_rgba(int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); rt_add4cols(sx, yl, yh); } // Shades one span at hx to the screen at sx. -void rt_shaded1col_RGBA (int hx, int sx, int yl, int yh) +void rt_shaded1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. -void rt_shaded4cols_RGBA (int sx, int yl, int yh) +void rt_shaded4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. -void rt_addclamp4cols_RGBA (int sx, int yl, int yh) +void rt_addclamp4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlateaddclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); - rt_addclamp1col_RGBA(hx, sx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); + rt_addclamp1col_rgba(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols_RGBA (int sx, int yl, int yh) +void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); rt_addclamp4cols(sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols_RGBA (int sx, int yl, int yh) +void rt_subclamp4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlatesubclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); - rt_subclamp1col_RGBA(hx, sx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); + rt_subclamp1col_rgba(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols_RGBA (int sx, int yl, int yh) +void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); - rt_subclamp4cols_RGBA(sx, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); + rt_subclamp4cols_rgba(sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols_RGBA (int sx, int yl, int yh) +void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlaterevsubclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); - rt_revsubclamp1col_RGBA(hx, sx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); + rt_revsubclamp1col_rgba(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols_RGBA (int sx, int yl, int yh) +void rt_tlaterevsubclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); - rt_revsubclamp4cols_RGBA(sx, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); + rt_revsubclamp4cols_rgba(sx, yl, yh); } // Before each pass through a rendering loop that uses these routines, @@ -1816,7 +1816,7 @@ void rt_span_coverage_rgba(int x, int start, int stop) // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. -void R_DrawColumnHorizP_RGBA (void) +void R_DrawColumnHoriz_rgba (void) { if (dc_count <= 0) return; @@ -1831,7 +1831,7 @@ void R_DrawColumnHorizP_RGBA (void) } // [RH] Just fills a column with a given color -void R_FillColumnHorizP_RGBA (void) +void R_FillColumnHoriz_rgba (void) { if (dc_count <= 0) return; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 1cde16071c..1a08d1793d 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -477,7 +477,7 @@ void R_MapTiltedPlane_C (int y, int x1) #endif } -void R_MapTiltedPlane_RGBA (int y, int x1) +void R_MapTiltedPlane_rgba (int y, int x1) { int x2 = spanend[y]; @@ -509,7 +509,7 @@ void R_MapColoredPlane_C (int y, int x1) memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1)); } -void R_MapColoredPlane_RGBA(int y, int x1) +void R_MapColoredPlane_rgba(int y, int x1) { uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); diff --git a/src/r_plane.h b/src/r_plane.h index 7505ac9957..b199d34776 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -97,9 +97,9 @@ extern void(*R_MapColoredPlane)(int y, int x1); extern void(*R_MapTiltedPlane)(int y, int x1); void R_MapTiltedPlane_C(int y, int x1); -void R_MapTiltedPlane_RGBA(int y, int x); +void R_MapTiltedPlane_rgba(int y, int x); void R_MapColoredPlane_C(int y, int x1); -void R_MapColoredPlane_RGBA(int y, int x1); +void R_MapColoredPlane_rgba(int y, int x1); visplane_t *R_FindPlane ( const secplane_t &height, diff --git a/src/r_things.cpp b/src/r_things.cpp index f1f29f160d..0858dce2f6 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2676,7 +2676,7 @@ void R_DrawParticle_C (vissprite_t *vis) } } -void R_DrawParticle_RGBA(vissprite_t *vis) +void R_DrawParticle_rgba(vissprite_t *vis) { int spacing; uint32_t *dest; diff --git a/src/r_things.h b/src/r_things.h index 785729b098..f5cd30e003 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -98,7 +98,7 @@ struct particle_t; extern void(*R_DrawParticle)(vissprite_t *); void R_DrawParticle_C (vissprite_t *); -void R_DrawParticle_RGBA (vissprite_t *); +void R_DrawParticle_rgba (vissprite_t *); void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); From 9c8c1e0ea51534d8c6d5fba8ed0c58a965aa88e8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 14 Jun 2016 00:27:08 +0200 Subject: [PATCH 044/912] Fixed window transparency bug --- src/r_drawt_rgba.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 8f6d2ca132..d2d715c8dd 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -588,6 +588,8 @@ class RtAdd4colsRGBACommand : public DrawerCommand fixed_t dc_light; ShadeConstants dc_shade_constants; BYTE *dc_colormap; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; public: RtAdd4colsRGBACommand(int sx, int yl, int yh) @@ -601,6 +603,8 @@ public: dc_light = ::dc_light; dc_shade_constants = ::dc_shade_constants; dc_colormap = ::dc_colormap; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; } #ifdef NO_SSE @@ -722,10 +726,10 @@ public: __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); From 77c4786b9d716ab018ec4b082490b6ed78f5cc36 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 14 Jun 2016 23:05:20 +0200 Subject: [PATCH 045/912] Minor code cleanup --- src/f_wipe.cpp | 5 +- src/r_draw.cpp | 57 +++-------- src/r_draw.h | 19 +++- src/r_draw_rgba.h | 213 +++++++++++++++++++++++++++++++++++++++++ src/r_drawt.cpp | 32 +++---- src/r_main.h | 223 ------------------------------------------- src/r_plane.cpp | 28 +++--- src/r_swrenderer.cpp | 2 +- src/r_things.cpp | 5 +- src/v_draw.cpp | 1 + 10 files changed, 277 insertions(+), 308 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index 84b6036e48..aa9038eeb2 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -78,7 +78,7 @@ bool wipe_initMelt (int ticks) int i, r; // copy start screen to main screen - screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // makes this wipe faster (in theory) // to have stuff in column-major format @@ -271,8 +271,7 @@ bool wipe_doBurn (int ticks) // Draw the screen int xstep, ystep, firex, firey; int x, y; - BYTE *to; - BYTE *fromold, *fromnew; + BYTE *to, *fromold, *fromnew; const int SHIFT = 16; xstep = (FIREWIDTH << SHIFT) / SCREENWIDTH; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ecb4441f8c..4dcdc3e6be 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -63,7 +63,7 @@ extern int ST_Y; BYTE* viewimage; extern "C" { int ylookup[MAXHEIGHT]; -BYTE* dc_destorg; +BYTE *dc_destorg; } int scaledviewwidth; @@ -276,7 +276,7 @@ void R_DrawColumnP_C (void) { // Re-map color indices from wall texture column // using a lighting/special effects LUT. - *dest = colormap[source[frac >> FRACBITS]]; + *dest = colormap[source[frac>>FRACBITS]]; dest += pitch; frac += fracstep; @@ -321,13 +321,12 @@ void R_FillAddColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; + int pitch = dc_pitch; do { @@ -348,13 +347,12 @@ void R_FillAddClampColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; + int pitch = dc_pitch; do { @@ -381,13 +379,12 @@ void R_FillSubClampColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor | 0x40100400; + int pitch = dc_pitch; do { @@ -413,13 +410,12 @@ void R_FillRevSubClampColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; + int pitch = dc_pitch; do { @@ -672,13 +668,14 @@ void R_DrawTranslatedColumnP_C (void) { *dest = colormap[translation[source[frac>>FRACBITS]]]; dest += pitch; + frac += fracstep; } while (--count); } } // Draw a column that is both translated and translucent -void R_DrawTlatedAddColumnP_C() +void R_DrawTlatedAddColumnP_C (void) { int count; BYTE *dest; @@ -772,15 +769,15 @@ void R_DrawAddClampColumnP_C () frac = dc_texturefrac; { - const BYTE *source = dc_source; BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { - DWORD a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + DWORD a = fg2rgb[colormap[source[frac>>FRACBITS]]] + bg2rgb[*dest]; DWORD b = a; a |= 0x01f07c1f; @@ -788,7 +785,7 @@ void R_DrawAddClampColumnP_C () a &= 0x3fffffff; b = b - (b >> 5); a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + *dest = RGB32k.All[a & (a>>15)]; dest += pitch; frac += fracstep; } while (--count); @@ -1190,9 +1187,6 @@ void R_DrawSpanP_C (void) } while (--count); } } -#endif - -#ifndef X86_ASM // [RH] Draw a span with holes void R_DrawSpanMaskedP_C (void) @@ -1282,8 +1276,6 @@ void R_DrawSpanTranslucentP_C (void) xstep = ds_xstep; ystep = ds_ystep; - uint32_t light = calc_light_multiplier(ds_light); - if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1334,8 +1326,6 @@ void R_DrawSpanMaskedTranslucentP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(ds_light); - xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1426,7 +1416,6 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1436,7 +1425,6 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; yfrac += ystep; } while (--count); @@ -1449,7 +1437,6 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1459,14 +1446,12 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; yfrac += ystep; } while (--count); } } - void R_DrawSpanMaskedAddClampP_C (void) { dsfixed_t xfrac; @@ -1481,8 +1466,6 @@ void R_DrawSpanMaskedAddClampP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(ds_light); - xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1552,7 +1535,7 @@ void R_DrawSpanMaskedAddClampP_C (void) // [RH] Just fill a span with a color void R_FillSpan_C (void) { - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1)); + memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1); } @@ -1759,7 +1742,7 @@ DWORD vlinec1 () do { - *dest = colormap[source[frac >> bits]]; + *dest = colormap[source[frac>>bits]]; frac += fracstep; dest += pitch; } while (--count); @@ -1830,9 +1813,7 @@ DWORD mvlinec1 () return frac; } -#endif -#if !defined(X86_ASM) void mvlinec4 () { BYTE *dest = dc_dest; @@ -1843,6 +1824,7 @@ void mvlinec4 () do { BYTE pix; + pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; @@ -1879,7 +1861,6 @@ static void R_DrawFogBoundaryLine (int y, int x) int x2 = spanend[y]; BYTE *colormap = dc_colormap; BYTE *dest = ylookup[y] + dc_destorg; - do { dest[x] = colormap[dest[x]]; @@ -1996,8 +1977,6 @@ fixed_t tmvline1_add_C () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; @@ -2024,12 +2003,6 @@ void tmvline4_add_C () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - do { for (int i = 0; i < 4; ++i) @@ -2062,8 +2035,6 @@ fixed_t tmvline1_addclamp_C () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; diff --git a/src/r_draw.h b/src/r_draw.h index cea05e469c..a311834055 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -32,7 +32,20 @@ extern "C" int fuzzpos; extern "C" int fuzzviewheight; struct FColormap; -struct ShadeConstants; + +struct ShadeConstants +{ + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; +}; extern "C" int ylookup[MAXHEIGHT]; @@ -58,7 +71,7 @@ extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; -extern "C" BYTE* dc_dest, *dc_destorg; +extern "C" BYTE *dc_dest, *dc_destorg; extern "C" int dc_count; extern "C" DWORD vplce[4]; @@ -68,7 +81,7 @@ extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; // [RH] Temporary buffer for column drawing -extern "C" BYTE *dc_temp; +extern "C" BYTE *dc_temp; extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 9f07ff0bf0..47ea75260d 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -24,6 +24,7 @@ #define __R_DRAW_RGBA__ #include "r_draw.h" +#include "v_palette.h" #include #include #include @@ -273,4 +274,216 @@ public: void Execute(DrawerThread *thread) override; }; +///////////////////////////////////////////////////////////////////////////// +// Pixel shading macros and inline functions: + +// Give the compiler a strong hint we want these functions inlined: +#ifndef FORCEINLINE +#if defined(_MSC_VER) +#define FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define FORCEINLINE __attribute__((always_inline)) inline +#else +#define FORCEINLINE inline +#endif +#endif + +// calculates the light constant passed to the shade_pal_index function +FORCEINLINE uint32_t calc_light_multiplier(dsfixed_t light) +{ + return 256 - (light >> (FRACBITS - 8)); +} + +// Calculates a ARGB8 color for the given palette index and light multiplier +FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap +FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) +{ + uint32_t fg_alpha = (fg >> 24) & 0xff; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 + uint32_t inv_alpha = 256 - alpha; + + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = bg & 0xff; + + uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; + uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; + uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +// Calculate constants for a simple shade +#define SSE_SHADE_SIMPLE_INIT(light) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; + +// Calculate constants for a simple shade with different light levels for each pixel +#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + +// Simple shade 4 pixels +#define SSE_SHADE_SIMPLE(fg) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ + fg_hi = _mm_srli_epi16(fg_hi, 8); \ + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ + fg_lo = _mm_srli_epi16(fg_lo, 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + +// Calculate constants for a complex shade +#define SSE_SHADE_INIT(light, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = fade_amount_hi; \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Calculate constants for a complex shade with different light levels for each pixel +#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Complex shade 4 pixels +#define SSE_SHADE(fg, shade_constants) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + \ + __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \ + intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ + \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ + \ + __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \ + intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ + \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ + \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + #endif diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index c829c2dc4c..8370930441 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -340,13 +340,13 @@ void rt_add1col_c (int hx, int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; do { DWORD fg = colormap[*source]; DWORD bg = *dest; @@ -374,14 +374,13 @@ void rt_add4cols_c (int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - do { DWORD fg = colormap[source[0]]; DWORD bg = dest[0]; @@ -434,6 +433,7 @@ void rt_tlateadd4cols_c (int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col_c (int hx, int sx, int yl, int yh) { + DWORD *fgstart; BYTE *colormap; BYTE *source; BYTE *dest; @@ -445,14 +445,12 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh) return; count++; + fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; - DWORD *fgstart; - fgstart = &Col2RGB8[0][dc_color]; - do { DWORD val = colormap[*source]; DWORD fg = fgstart[val<<8]; @@ -466,6 +464,7 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh) // Shades all four spans to the screen starting at sx. void rt_shaded4cols_c (int sx, int yl, int yh) { + DWORD *fgstart; BYTE *colormap; BYTE *source; BYTE *dest; @@ -477,14 +476,12 @@ void rt_shaded4cols_c (int sx, int yl, int yh) return; count++; + fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; - DWORD *fgstart; - fgstart = &Col2RGB8[0][dc_color]; - do { DWORD val; @@ -523,14 +520,13 @@ void rt_addclamp1col_c (int hx, int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - do { DWORD a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; DWORD b = a; @@ -639,13 +635,13 @@ void rt_subclamp1col_c (int hx, int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; DWORD b = a; @@ -674,13 +670,13 @@ void rt_subclamp4cols_c (int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; DWORD b = a; diff --git a/src/r_main.h b/src/r_main.h index d71d44fe1f..fa8fe0bb15 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -90,229 +90,6 @@ extern bool r_dontmaplines; // Converts fixedlightlev into a shade value #define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) -struct ShadeConstants -{ - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - bool simple_shade; -}; - -// calculates the light constant passed to the shade_pal_index function -inline uint32_t calc_light_multiplier(dsfixed_t light) -{ - return 256 - (light >> (FRACBITS - 8)); -} - -// Give the compiler a strong hint we want these functions inlined: -#ifndef FORCEINLINE -#if defined(_MSC_VER) -#define FORCEINLINE __forceinline -#elif defined(__GNUC__) -#define FORCEINLINE __attribute__((always_inline)) inline -#else -#define FORCEINLINE inline -#endif -#endif - -// Calculates a ARGB8 color for the given palette index and light multiplier -FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) -{ - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap -FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) -{ - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) -{ - uint32_t fg_alpha = (fg >> 24) & 0xff; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 - uint32_t inv_alpha = 256 - alpha; - - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = bg & 0xff; - - uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; - uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; - uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -// Calculate constants for a simple shade -#define SSE_SHADE_SIMPLE_INIT(light) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; - -// Calculate constants for a simple shade with different light levels for each pixel -#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); - -// Simple shade 4 pixels -#define SSE_SHADE_SIMPLE(fg) { \ - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ - fg_hi = _mm_srli_epi16(fg_hi, 8); \ - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ - fg_lo = _mm_srli_epi16(fg_lo, 8); \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ -} - -// Calculate constants for a complex shade -#define SSE_SHADE_INIT(light, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; \ - __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = fade_amount_hi; \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ - -// Calculate constants for a complex shade with different light levels for each pixel -#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ - __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ - -// Complex shade 4 pixels -#define SSE_SHADE(fg, shade_constants) { \ - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ - \ - __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \ - intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ - \ - fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ - fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ - fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ - \ - __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \ - intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ - \ - fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ - fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ - fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ - \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ -} - extern bool r_swtruecolor; extern double GlobVis; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 1a08d1793d..807066f77d 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -58,6 +58,7 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_draw_rgba.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -506,7 +507,7 @@ void R_MapTiltedPlane_rgba (int y, int x1) void R_MapColoredPlane_C (int y, int x1) { - memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1)); + memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); } void R_MapColoredPlane_rgba(int y, int x1) @@ -1710,7 +1711,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t // //========================================================================== -void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) +void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { static const float ifloatpow2[16] = { @@ -1745,7 +1746,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a // p is the texture origin in view space // Don't add in the offsets at this stage, because doing so can result in // errors if the flat is rotated. - ang = M_PI * 3 / 2 - ViewAngle.Radians(); + ang = M_PI*3/2 - ViewAngle.Radians(); cosine = cos(ang), sine = sin(ang); p[0] = ViewPos.X * cosine - ViewPos.Y * sine; p[2] = ViewPos.X * sine + ViewPos.Y * cosine; @@ -1756,25 +1757,25 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a cosine = cos(ang), sine = sin(ang); m[0] = yscale * cosine; m[2] = yscale * sine; - // m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); - // VectorScale2 (m, 64.f/VectorLength(m)); +// m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); +// VectorScale2 (m, 64.f/VectorLength(m)); - // n is the u direction vector in view space + // n is the u direction vector in view space #if 0 //let's use the sin/cosine we already know instead of computing new ones - ang += M_PI / 2 - n[0] = -xscale * cos(ang); + ang += M_PI/2 + n[0] = -xscale * cos(ang); n[2] = -xscale * sin(ang); #else n[0] = xscale * sine; n[2] = -xscale * cosine; #endif - // n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); - // VectorScale2 (n, 64.f/VectorLength(n)); +// n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); +// VectorScale2 (n, 64.f/VectorLength(n)); - // This code keeps the texture coordinates constant across the x,y plane no matter - // how much you slope the surface. Use the commented-out code above instead to keep - // the textures a constant size across the surface's plane instead. + // This code keeps the texture coordinates constant across the x,y plane no matter + // how much you slope the surface. Use the commented-out code above instead to keep + // the textures a constant size across the surface's plane instead. cosine = cos(planeang), sine = sin(planeang); m[1] = pl->height.ZatPoint(ViewPos.X + yscale * sine, ViewPos.Y + yscale * cosine) - zeroheight; n[1] = pl->height.ZatPoint(ViewPos.X - xscale * cosine, ViewPos.Y + xscale * sine) - zeroheight; @@ -1807,7 +1808,6 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a if (pl->height.fC() > 0) planelightfloat = -planelightfloat; - ds_light = 0; if (fixedlightlev >= 0) { R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index fbbd65b17c..c4347236de 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -96,7 +96,7 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache) else if (cache != 0) { if (r_swtruecolor) - tex->GetPixels(); + tex->GetPixelsBgra(); else tex->GetPixels (); } diff --git a/src/r_things.cpp b/src/r_things.cpp index 0858dce2f6..836f586903 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2612,8 +2612,10 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) void R_DrawParticle_C (vissprite_t *vis) { + DWORD *bg2rgb; int spacing; BYTE *dest; + DWORD fg; BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2622,9 +2624,6 @@ void R_DrawParticle_C (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); - DWORD *bg2rgb; - DWORD fg; - // vis->renderflags holds translucency level (0-255) { fixed_t fglevel, bglevel; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 02ba591b6e..6a8dad0477 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -44,6 +44,7 @@ #include "r_utility.h" #ifndef NO_SWRENDER #include "r_draw.h" +#include "r_draw_rgba.h" #include "r_main.h" #include "r_things.h" #endif From 312776621e194e36f7ef1b01d36942929ff241bf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 16 Jun 2016 06:47:30 +0200 Subject: [PATCH 046/912] Added DrawerContext class --- src/CMakeLists.txt | 1 + src/r_bsp.cpp | 4 +- src/r_draw.cpp | 798 ++++++++++++++++++++++++++++++++++++++- src/r_draw.h | 89 ++++- src/r_draw_rgba.cpp | 169 +++++++++ src/r_draw_rgba.h | 7 + src/r_drawer_context.cpp | 464 +++++++++++++++++++++++ src/r_drawer_context.h | 123 ++++++ src/r_drawt.cpp | 22 +- src/r_drawt_rgba.cpp | 2 + src/r_main.cpp | 14 +- src/r_main.h | 16 - src/r_plane.cpp | 454 ++-------------------- src/r_plane.h | 8 - src/r_segs.cpp | 541 +++++--------------------- src/r_swrenderer.cpp | 1 + src/r_things.cpp | 382 +++++-------------- src/r_things.h | 9 +- src/v_draw.cpp | 109 +++--- 19 files changed, 1922 insertions(+), 1291 deletions(-) create mode 100644 src/r_drawer_context.cpp create mode 100644 src/r_drawer_context.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c0a30ea07..49152b785c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -892,6 +892,7 @@ set( FASTMATH_PCH_SOURCES r_swrenderer.cpp r_3dfloors.cpp r_bsp.cpp + r_drawer_context.cpp r_draw.cpp r_draw_rgba.cpp r_drawt.cpp diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 934d2d3e54..2b94b1e95a 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -41,7 +41,7 @@ #include "r_local.h" #include "r_main.h" #include "r_plane.h" -#include "r_draw.h" +#include "r_drawer_context.h" #include "r_things.h" #include "r_3dfloors.h" #include "a_sharedglobal.h" @@ -545,7 +545,7 @@ void R_AddLine (seg_t *line) curline = line; // [RH] Color if not texturing line - dc_color = (((int)(line - segs) * 8) + 4) & 255; + DrawerContext::SetFlatColor((((int)(line - segs) * 8) + 4) & 255); pt1 = line->v1->fPos() - ViewPos; pt2 = line->v2->fPos() - ViewPos; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 4dcdc3e6be..19195e9076 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -23,6 +23,8 @@ // //----------------------------------------------------------------------------- +#define DRAWER_INTERNALS + #include #include "templates.h" @@ -40,6 +42,8 @@ #include "r_data/colormaps.h" #include "r_plane.h" #include "r_draw_rgba.h" +#include "r_drawer_context.h" +#include "d_net.h" #include "gi.h" #include "stats.h" @@ -66,6 +70,7 @@ int ylookup[MAXHEIGHT]; BYTE *dc_destorg; } int scaledviewwidth; +DCanvas *dc_canvas; // [RH] Pointers to the different column drawers. // These get changed depending on the current @@ -97,9 +102,9 @@ void (*R_DrawSpanMaskedAddClamp)(void); void (*R_FillSpan)(void); void (*R_FillColumnHoriz)(void); void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); -void (*R_MapTiltedPlane)(int y, int x1); -void (*R_MapColoredPlane)(int y, int x1); -void (*R_DrawParticle)(vissprite_t *); +void (*R_DrawTiltedSpan)(int y, int x1, int x2); +void (*R_DrawColoredSpan)(int y, int x1, int x2); +void (*R_FillTransColumn)(int x, int y1, int y2, int color, int alpha); fixed_t (*tmvline1_add)(); void (*tmvline4_add)(); fixed_t (*tmvline1_addclamp)(); @@ -134,6 +139,24 @@ void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); void (*rt_initcols)(BYTE *buffer); void (*rt_span_coverage)(int x, int start, int stop); +void (*colfunc) (void); +void (*basecolfunc) (void); +void (*fuzzcolfunc) (void); +void (*transcolfunc) (void); +void (*spanfunc) (void); +void (*hcolfunc_pre) (void); +void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); +void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); +void (*hcolfunc_post4) (int sx, int yl, int yh); + +extern "C" void R_DrawTiltedPlane_ASM(int y, int x1); +#ifdef X86_ASM +extern "C" void R_SetSpanSource_ASM(const BYTE *flat); +extern "C" void R_SetSpanSize_ASM(int xbits, int ybits); +extern "C" void R_SetSpanColormap_ASM(BYTE *colormap); +extern "C" void R_SetTiltedSpanSource_ASM(const BYTE *flat); +extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; +#endif // // R_DrawColumn @@ -1040,13 +1063,6 @@ const BYTE* ds_source; // just for profiling int dscount; - -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif } //========================================================================== @@ -1076,9 +1092,8 @@ void R_SetSpanSource(const BYTE *pixels) // //========================================================================== -void R_SetSpanColormap(FDynamicColormap *colormap, int shade) +void R_SetSpanColormap() { - R_SetDSColorMapLight(colormap, 0, shade); #ifdef X86_ASM if (!r_swtruecolor && ds_colormap != ds_curcolormap) { @@ -2297,9 +2312,9 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHoriz_rgba; R_DrawFogBoundary = R_DrawFogBoundary_rgba; - R_MapTiltedPlane = R_MapTiltedPlane_rgba; - R_MapColoredPlane = R_MapColoredPlane_rgba; - R_DrawParticle = R_DrawParticle_rgba; + R_DrawTiltedSpan = R_DrawTiltedSpan_rgba; + R_DrawColoredSpan = R_DrawColoredSpan_rgba; + R_FillTransColumn = R_FillTransColumn_rgba; tmvline1_add = tmvline1_add_rgba; tmvline4_add = tmvline4_add_rgba; @@ -2394,9 +2409,15 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_C; R_DrawFogBoundary = R_DrawFogBoundary_C; - R_MapTiltedPlane = R_MapTiltedPlane_C; - R_MapColoredPlane = R_MapColoredPlane_C; - R_DrawParticle = R_DrawParticle_C; + R_DrawColoredSpan = R_DrawColoredSpan_C; + R_FillTransColumn = R_FillTransColumn_C; + +#ifdef X86_ASM + // To do: update R_DrawTiltedPlane_ASM to use x2 rather than spanend[y] + R_DrawTiltedSpan = [](int y, int x1, int x2) { R_DrawTiltedPlane_ASM(y, x1); }; +#else + R_DrawTiltedSpan = R_DrawTiltedSpan_C; +#endif tmvline1_add = tmvline1_add_C; tmvline4_add = tmvline4_add_C; @@ -2829,3 +2850,744 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); } } + +///////////////////////////////////////////////////////////////////////////// + +FVector3 ds_plane_sz, ds_plane_su, ds_plane_sv; +bool ds_plane_shade; +float ds_planelightfloat; +fixed_t ds_pviewx, ds_pviewy; +int ds_planeshade; +extern "C" BYTE *tiltlighting[MAXWIDTH]; + +extern "C" { void R_CalcTiltedLighting(double lval, double lend, int width); } + +#ifdef _MSC_VER +#pragma warning(disable:4244) // warning C4244: conversion from 'SQWORD' to 'DWORD', possible loss of data +#endif + +//========================================================================== +// +// R_CalcTiltedLighting +// +// Calculates the lighting for one row of a tilted plane. If the definition +// of GETPALOOKUP changes, this needs to change, too. +// +//========================================================================== + +extern "C" { +void R_CalcTiltedLighting (double lval, double lend, int width) +{ + double lstep; + BYTE *lightfiller; + BYTE *basecolormapdata = ds_fcolormap->Maps; + int i = 0; + + if (width == 0 || lval == lend) + { // Constant lighting + lightfiller = basecolormapdata + (GETPALOOKUP(lval, ds_planeshade) << COLORMAPSHIFT); + } + else + { + lstep = (lend - lval) / width; + if (lval >= MAXLIGHTVIS) + { // lval starts "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lval, ds_planeshade) << COLORMAPSHIFT); + for (; i <= width && lval >= MAXLIGHTVIS; ++i) + { + tiltlighting[i] = lightfiller; + lval += lstep; + } + } + if (lend >= MAXLIGHTVIS) + { // lend ends "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lend, ds_planeshade) << COLORMAPSHIFT); + for (; width > i && lend >= MAXLIGHTVIS; --width) + { + tiltlighting[width] = lightfiller; + lend -= lstep; + } + } + if (width > 0) + { + lval = FIXED2DBL(ds_planeshade) - lval; + lend = FIXED2DBL(ds_planeshade) - lend; + lstep = (lend - lval) / width; + if (lstep < 0) + { // Going from dark to light + if (lval < 1.) + { // All bright + lightfiller = basecolormapdata; + } + else + { + if (lval >= NUMCOLORMAPS) + { // Starts beyond the dark end + BYTE *clight = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); + while (lval >= NUMCOLORMAPS && i <= width) + { + tiltlighting[i++] = clight; + lval += lstep; + } + if (i > width) + return; + } + while (i <= width && lval >= 0) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata; + } + } + else + { // Going from light to dark + if (lval >= (NUMCOLORMAPS-1)) + { // All dark + lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); + } + else + { + while (lval < 0 && i <= width) + { + tiltlighting[i++] = basecolormapdata; + lval += lstep; + } + if (i > width) + return; + while (i <= width && lval < (NUMCOLORMAPS-1)) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); + } + } + } + } + for (; i <= width; i++) + { + tiltlighting[i] = lightfiller; + } +} +} // extern "C" + +void R_DrawTiltedSpan_C (int y, int x1, int x2) +{ + int width = x2 - x1; + double iz, uz, vz; + BYTE *fb; + DWORD u, v; + int i; + + iz = ds_plane_sz[2] + ds_plane_sz[1]*(centery-y) + ds_plane_sz[0]*(x1-centerx); + + // Lighting is simple. It's just linear interpolation from start to end + if (ds_plane_shade) + { + uz = (iz + ds_plane_sz[0]*width) * ds_planelightfloat; + vz = iz * ds_planelightfloat; + R_CalcTiltedLighting (vz, uz, width); + } + + uz = ds_plane_su[2] + ds_plane_su[1]*(centery-y) + ds_plane_su[0]*(x1-centerx); + vz = ds_plane_sv[2] + ds_plane_sv[1]*(centery-y) + ds_plane_sv[0]*(x1-centerx); + + fb = ylookup[y] + x1 + dc_destorg; + + BYTE vshift = 32 - ds_ybits; + BYTE ushift = vshift - ds_xbits; + int umask = ((1 << ds_xbits) - 1) << ds_ybits; + +#if 0 // The "perfect" reference version of this routine. Pretty slow. + // Use it only to see how things are supposed to look. + i = 0; + do + { + double z = 1.f/iz; + + u = SQWORD(uz*z) + ds_pviewx; + v = SQWORD(vz*z) + ds_pviewy; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); + fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; + iz += ds_plane_sz[0]; + uz += ds_plane_su[0]; + vz += ds_plane_sv[0]; + } while (--width >= 0); +#else +//#define SPANSIZE 32 +//#define INVSPAN 0.03125f +//#define SPANSIZE 8 +//#define INVSPAN 0.125f +#define SPANSIZE 16 +#define INVSPAN 0.0625f + + double startz = 1.f/iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep, uzstep, vzstep; + + izstep = ds_plane_sz[0] * SPANSIZE; + uzstep = ds_plane_su[0] * SPANSIZE; + vzstep = ds_plane_sv[0] * SPANSIZE; + x1 = 0; + width++; + + while (width >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f/iz; + double endu = uz*endz; + double endv = vz*endz; + DWORD stepu = SQWORD((endu - startu) * INVSPAN); + DWORD stepv = SQWORD((endv - startv) * INVSPAN); + u = SQWORD(startu) + ds_pviewx; + v = SQWORD(startv) + ds_pviewy; + + for (i = SPANSIZE-1; i >= 0; i--) + { + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + startu = endu; + startv = endv; + width -= SPANSIZE; + } + if (width > 0) + { + if (width == 1) + { + u = SQWORD(startu); + v = SQWORD(startv); + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + } + else + { + double left = width; + iz += ds_plane_sz[0] * left; + uz += ds_plane_su[0] * left; + vz += ds_plane_sv[0] * left; + + double endz = 1.f/iz; + double endu = uz*endz; + double endv = vz*endz; + left = 1.f/left; + DWORD stepu = SQWORD((endu - startu) * left); + DWORD stepv = SQWORD((endv - startv) * left); + u = SQWORD(startu) + ds_pviewx; + v = SQWORD(startv) + ds_pviewy; + + for (; width != 0; width--) + { + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + } + } +#endif +} + +void R_DrawColoredSpan_C (int y, int x1, int x2) +{ + memset (ylookup[y] + x1 + dc_destorg, ds_color, x2 - x1 + 1); +} + +///////////////////////////////////////////////////////////////////////////// + +// Draw a column with support for non-power-of-two ranges +uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)()) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + if (uv_max == 0) // power of two + { + int count = y2 - y1; + + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_start; + draw1column(); + + uint64_t step64 = uv_step; + uint64_t pos64 = uv_start; + return (uint32_t)(pos64 + step64 * count); + } + else + { + uint32_t uv_pos = uv_start; + + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = uv_max - uv_pos; + uint32_t next_uv_wrap = available / uv_step; + if (available % uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_pos; + draw1column(); + + left -= count; + uv_pos += uv_step * count; + if (uv_pos >= uv_max) + uv_pos -= uv_max; + } + + return uv_pos; + } +} + +// Draw four columns with support for non-power-of-two ranges +void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)()) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + if (uv_max == 0) // power of two, no wrap handling needed + { + int count = y2 - y1; + for (int i = 0; i < 4; i++) + { + bufplce[i] = source[i]; + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; + + uint64_t step64 = uv_step[i]; + uint64_t pos64 = uv_pos[i]; + uv_pos[i] = (uint32_t)(pos64 + step64 * count); + } + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + draw4columns(); + } + else + { + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + for (int i = 0; i < 4; i++) + bufplce[i] = source[i]; + + uint32_t left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + uint32_t count = left; + for (int i = 0; i < 4; i++) + { + uint32_t available = uv_max - uv_pos[i]; + uint32_t next_uv_wrap = available / uv_step[i]; + if (available % uv_step[i] != 0) + next_uv_wrap++; + count = MIN(next_uv_wrap, count); + } + + // Draw until that column wraps + for (int i = 0; i < 4; i++) + { + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; + } + dc_count = count; + draw4columns(); + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + uv_pos[i] += uv_step[i] * count; + if (uv_pos[i] >= uv_max) + uv_pos[i] -= uv_max; + } + + left -= count; + } + } +} + +// Calculates a wrapped uv start position value for a column +void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_height, int fracbits, uint32_t &uv_start_out, uint32_t &uv_step_out) +{ + double uv_stepd = swal * yrepeat; + + // Find start uv in [0-uv_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; + v = v - floor(v); + v *= uv_height; + v *= (1 << fracbits); + + uv_start_out = (uint32_t)v; + uv_step_out = xs_ToFixed(fracbits, uv_stepd); +} + +typedef DWORD(*Draw1ColumnFuncPtr)(); +typedef void(*Draw4ColumnsFuncPtr)(); + +void wallscan_any( + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, + FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int x), + void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) +{ + if (rw_pic->UseType == FTexture::TEX_Null) + return; + + uint32_t uv_height = rw_pic->GetHeight(); + uint32_t fracbits = 32 - rw_pic->HeightBits; + uint32_t uv_max = uv_height << fracbits; + + DWORD(*draw1column)(); + void(*draw4columns)(); + setupwallscan(fracbits, draw1column, draw4columns); + + fixed_t xoffset = rw_offset; + + bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); + if (fixed) + { + palookupoffse[0] = dc_colormap; + palookupoffse[1] = dc_colormap; + palookupoffse[2] = dc_colormap; + palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; + } + + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + + float light = rw_light; + + // Calculate where 4 column alignment begins and ends: + int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); + int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + + // First unaligned columns: + for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, light, wallshade); + + const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); + + uint32_t uv_start, uv_step; + calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); + + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); + } + + // The aligned columns + for (int x = aligned_x1; x < aligned_x2; x += 4) + { + // Find y1, y2, light and uv values for four columns: + int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; + int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; + + const BYTE *source[4]; + for (int i = 0; i < 4; i++) + source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS); + + float lights[4]; + for (int i = 0; i < 4; i++) + { + lights[i] = light; + light += rw_lightstep; + } + + uint32_t uv_pos[4], uv_step[4]; + for (int i = 0; i < 4; i++) + calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); + + // Figure out where we vertically can start and stop drawing 4 columns in one go + int middle_y1 = y1[0]; + int middle_y2 = y2[0]; + for (int i = 1; i < 4; i++) + { + middle_y1 = MAX(y1[i], middle_y1); + middle_y2 = MIN(y2[i], middle_y2); + } + + // If we got an empty column in our set we cannot draw 4 columns in one go: + bool empty_column_in_set = false; + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + empty_column_in_set = true; + } + + if (empty_column_in_set || middle_y2 <= middle_y1) + { + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } + continue; + } + + // Draw the first rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + + if (y1[i] < middle_y1) + uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } + + // Draw the area where all 4 columns are active + if (!fixed) + { + for (int i = 0; i < 4; i++) + { + if (r_swtruecolor) + { + palookupoffse[i] = basecolormap->Maps; + palookuplight[i] = LIGHTSCALE(lights[i], wallshade); + } + else + { + palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + palookuplight[i] = 0; + } + } + } + wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns); + + // Draw the last rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + + if (middle_y2 < y2[i]) + uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } + } + + // The last unaligned columns: + for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, light, wallshade); + + const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); + + uint32_t uv_start, uv_step; + calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); + + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); + } + + NetUpdate(); +} + +void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupvline(bits); + line1 = dovline1; + line4 = dovline4; + }); +} + +void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupmvline(bits); + line1 = domvline1; + line4 = domvline4; + }); + } +} + +void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + static fixed_t(*tmvline1)(); + static void(*tmvline4)(); + if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) + { + // The current translucency is unsupported, so draw with regular maskwallscan instead. + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setuptmvline(bits); + line1 = reinterpret_cast(tmvline1); + line4 = tmvline4; + }); + } +} + +///////////////////////////////////////////////////////////////////////////// + +void R_FillTransColumn_C(int x, int y1, int y2, int color, int alpha) +{ + fixed_t fglevel, bglevel; + DWORD *fg2rgb; + DWORD *bg2rgb; + int spacing; + BYTE *dest; + DWORD fg; + + fglevel = ((alpha + 1) << 8) & ~0x3ff; + bglevel = FRACUNIT - fglevel; + fg2rgb = Col2RGB8[fglevel >> 10]; + bg2rgb = Col2RGB8[bglevel >> 10]; + fg = fg2rgb[color]; + + spacing = dc_pitch; + + int ycount = y2 - y1 + 1; + dest = ylookup[y1] + x + dc_destorg; + for (int y = 0; y < ycount; y++) + { + DWORD bg = bg2rgb[*dest]; + bg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[bg & (bg >> 15)]; + dest += spacing; + } +} + +///////////////////////////////////////////////////////////////////////////// + +// +// R_DrawMaskedColumn +// Used for sprites and masked mid textures. +// Masked means: partly transparent, i.e. stored +// in posts/runs of opaque pixels. +// +short* dc_mfloorclip; +short* dc_mceilingclip; + +double dc_spryscale; +double dc_sprtopscreen; + +bool dc_sprflipvert; + +void R_DrawMaskedColumn(int x, const BYTE *column, const FTexture::Span *span) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + const fixed_t centeryfrac = FLOAT2FIXED(CenterY); + const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); + while (span->Length != 0) + { + const int length = span->Length; + const int top = span->TopOffset; + + fixed_t texturefrac = dc_texturefrac; + fixed_t iscale = dc_iscale; + + // calculate unclipped screen coordinates for post + int yl = xs_RoundToInt(dc_sprtopscreen + dc_spryscale * top); + int yh = xs_RoundToInt(dc_sprtopscreen + dc_spryscale * (top + length)) - 1; + + if (dc_sprflipvert) + { + swapvalues(yl, yh); + } + + if (yh >= dc_mfloorclip[x]) + { + yh = dc_mfloorclip[x] - 1; + } + if (yl < dc_mceilingclip[x]) + { + yl = dc_mceilingclip[x]; + } + + if (yl <= yh) + { + if (dc_sprflipvert) + { + texturefrac = (yl*iscale) - (top << FRACBITS) + - FixedMul(centeryfrac, iscale) - texturemid; + const fixed_t maxfrac = length << FRACBITS; + while (texturefrac >= maxfrac) + { + if (++yl > yh) + goto nextpost; + texturefrac += iscale; + } + fixed_t endfrac = texturefrac + (yh - yl)*iscale; + while (endfrac < 0) + { + if (--yh < yl) + goto nextpost; + endfrac -= iscale; + } + } + else + { + texturefrac = texturemid - (top << FRACBITS) + + (yl*iscale) - FixedMul(centeryfrac - FRACUNIT, iscale); + while (texturefrac < 0) + { + if (++yl > yh) + goto nextpost; + texturefrac += iscale; + } + fixed_t endfrac = texturefrac + (yh - yl)*iscale; + const fixed_t maxfrac = length << FRACBITS; + if (yh < dc_mfloorclip[x] - 1 && endfrac < maxfrac - iscale) + { + yh++; + } + else while (endfrac >= maxfrac) + { + if (--yh < yl) + goto nextpost; + endfrac -= iscale; + } + } + + dc_yl = yl; + dc_yh = yh; + dc_x = x; + dc_texturefrac = texturefrac; + dc_iscale = iscale; + dc_source = column + top; + dc_count = yh - yl + 1; + dc_dest = (ylookup[yl] + x) * pixelsize + dc_destorg; + colfunc(); + } + nextpost: + span++; + } +} diff --git a/src/r_draw.h b/src/r_draw.h index a311834055..c22c958d02 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -25,6 +25,11 @@ #include "r_defs.h" +// Prevents files outside the DrawerContext class getting good ideas about +// accessing the private globals. Any drawer actions should be facilitated +// via the DrawerContext class! +#ifdef DRAWER_INTERNALS + // Spectre/Invisibility. #define FUZZTABLE 50 extern "C" int fuzzoffset[FUZZTABLE + 1]; // [RH] +1 for the assembly routine @@ -71,6 +76,7 @@ extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; +extern DCanvas *dc_canvas; extern "C" BYTE *dc_dest, *dc_destorg; extern "C" int dc_count; @@ -86,6 +92,23 @@ extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; +// +// Function pointers to switch refresh/drawing functions. +// Used to select shadow mode etc. +// +extern void (*colfunc) (void); +extern void (*basecolfunc) (void); +extern void (*fuzzcolfunc) (void); +extern void (*transcolfunc) (void); +// No shadow effects on floors. +extern void (*spanfunc) (void); + +// [RH] Function pointers for the horizontal column drawers. +extern void (*hcolfunc_pre) (void); +extern void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); +extern void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); +extern void (*hcolfunc_post4) (int sx, int yl, int yh); + // [RH] Pointers to the different column and span drawers... // The span blitting interface. @@ -116,7 +139,7 @@ extern void (*R_DrawTranslatedColumn)(void); // Span drawing for rows, floor/ceiling. No Spectre effect needed. extern void (*R_DrawSpan)(void); void R_SetupSpanBits(FTexture *tex); -void R_SetSpanColormap(FDynamicColormap *colormap, int shade); +void R_SetSpanColormap(); void R_SetSpanSource(const BYTE *pixels); // Span drawing for masked textures. @@ -281,6 +304,15 @@ void R_FillColumnP_C (void); void R_FillColumnHorizP_C (void); void R_FillSpan_C (void); +// vars for R_DrawMaskedColumn +extern short* dc_mfloorclip; +extern short* dc_mceilingclip; +extern double dc_spryscale; +extern double dc_sprtopscreen; +extern bool dc_sprflipvert; + +void R_DrawMaskedColumn(int x, const BYTE *column, const FTexture::Span *spans); + #ifdef X86_ASM #define R_SetupDrawSlab R_SetupDrawSlabA #define R_DrawSlab R_DrawSlabA @@ -325,12 +357,7 @@ void R_InitShadeMaps(); void R_InitFuzzTable (int fuzzoff); // [RH] Consolidate column drawer selection -enum ESPSResult -{ - DontDraw, // not useful to draw this - DoDraw0, // draw this as if r_columnmethod is 0 - DoDraw1, // draw this as if r_columnmethod is 1 -}; +enum ESPSResult; ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, DWORD color); inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color) { @@ -353,18 +380,6 @@ extern void(*tmvline4_revsubclamp)(); // transmaskwallscan calls this to find out what column drawers to use bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()); -// Retrieve column data for wallscan. Should probably be removed -// to just use the texture's GetColumn() method. It just exists -// for double-layer skies. -const BYTE *R_GetColumn (FTexture *tex, int col); -void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -// maskwallscan is exactly like wallscan but does not draw anything where the texture is color 0. -void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -// transmaskwallscan is like maskwallscan, but it can also blend to the background -void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) void R_SetColorMapLight(FColormap *base_colormap, float light, int shade); @@ -373,4 +388,40 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +// Retrieve column data for wallscan. Should probably be removed +// to just use the texture's GetColumn() method. It just exists +// for double-layer skies. +const BYTE *R_GetColumn (FTexture *tex, int col); +void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); + +// maskwallscan is exactly like wallscan but does not draw anything where the texture is color 0. +void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); + +// transmaskwallscan is like maskwallscan, but it can also blend to the background +void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); + +extern void(*R_DrawColoredSpan)(int y, int x1, int x2); +extern void(*R_DrawTiltedSpan)(int y, int x1, int x2); + +void R_DrawTiltedSpan_C(int y, int x1, int x2); +void R_DrawTiltedSpan_rgba(int y, int x1, int x2); +void R_DrawColoredSpan_C(int y, int x1, int x2); +void R_DrawColoredSpan_rgba(int y, int x1, int x2); + +extern FVector3 ds_plane_sz, ds_plane_su, ds_plane_sv; +extern bool ds_plane_shade; +extern float ds_planelightfloat; +extern fixed_t ds_pviewx, ds_pviewy; +extern int ds_planeshade; + +extern "C" BYTE *tiltlighting[MAXWIDTH]; +extern "C" { void R_CalcTiltedLighting(double lval, double lend, int width); } + +struct vissprite_t; +extern void(*R_FillTransColumn)(int x, int y1, int y2, int color, int alpha); +void R_FillTransColumn_C(int x, int y1, int y2, int color, int alpha); +void R_FillTransColumn_rgba(int x, int y1, int y2, int color, int alpha); + +#endif + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2062609b41..420b63dff7 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -21,6 +21,8 @@ // //----------------------------------------------------------------------------- +#define DRAWER_INTERNALS + #include #include "templates.h" @@ -3492,6 +3494,158 @@ public: } }; +class DrawTiltedSpanRGBACommand : public DrawerCommand +{ + int _y; + int _x1; + int _x2; + BYTE *dc_destorg; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + const BYTE *ds_source; + +public: + DrawTiltedSpanRGBACommand(int y, int x1, int x2) + { + _y = y; + _x1 = x1; + _x2 = x2; + + dc_destorg = ::dc_destorg; + ds_source = ::ds_source; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + int y = _y; + int x1 = _x1; + int x2 = _x2; + + // Slopes are broken currently in master. + // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. + + uint32_t *source = (uint32_t*)ds_source; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + + int count = x2 - x1 + 1; + while (count > 0) + { + *(dest++) = source[0]; + count--; + } + } +}; + +class DrawColoredSpanRGBACommand : public DrawerCommand +{ + int _y; + int _x1; + int _x2; + BYTE *dc_destorg; + fixed_t ds_light; + int ds_color; + +public: + DrawColoredSpanRGBACommand(int y, int x1, int x2) + { + _y = y; + _x1 = x1; + _x2 = x2; + + dc_destorg = ::dc_destorg; + ds_light = ::ds_light; + ds_color = ::ds_color; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + int y = _y; + int x1 = _x1; + int x2 = _x2; + + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + int count = (x2 - x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index_simple(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; + } +}; + +class FillTransColumnRGBACommand : public DrawerCommand +{ + int _x; + int _y1; + int _y2; + int _color; + int _a; + BYTE *dc_destorg; + int dc_pitch; + fixed_t ds_light; + int ds_color; + +public: + FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) + { + _x = x; + _y1 = y1; + _y2 = y2; + _color = color; + _a = a; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int x = _x; + int y1 = _y1; + int y2 = _y2; + int color = _color; + int a = _a; + + int ycount = thread->count_for_thread(y1, y2 - y1 + 1); + if (ycount <= 0) + return; + + uint32_t fg = GPalette.BaseColors[color].d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = a + 1; + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + int spacing = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(y1, dc_pitch, ylookup[y1] + x + (uint32_t*)dc_destorg); + + for (int y = 0; y < ycount; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += spacing; + } + } +}; + ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) { buffer = screen->GetBuffer(); @@ -3968,3 +4122,18 @@ void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip) R_DrawFogBoundarySection_rgba(t2, b2, x1); } } + +void R_DrawTiltedSpan_rgba(int y, int x1, int x2) +{ + DrawerCommandQueue::QueueCommand(y, x1, x2); +} + +void R_DrawColoredSpan_rgba(int y, int x1, int x2) +{ + DrawerCommandQueue::QueueCommand(y, x1, x2); +} + +void R_FillTransColumn_rgba(int x, int y1, int y2, int color, int a) +{ + DrawerCommandQueue::QueueCommand(x, y1, y2, color, a); +} diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 47ea75260d..a91b54d744 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -23,6 +23,11 @@ #ifndef __R_DRAW_RGBA__ #define __R_DRAW_RGBA__ +// Prevents files outside the DrawerContext class getting good ideas about +// accessing the private globals. Any drawer actions should be facilitated +// via the DrawerContext class! +#ifdef DRAWER_INTERNALS + #include "r_draw.h" #include "v_palette.h" #include @@ -487,3 +492,5 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) } #endif + +#endif diff --git a/src/r_drawer_context.cpp b/src/r_drawer_context.cpp new file mode 100644 index 0000000000..3533a3e4fe --- /dev/null +++ b/src/r_drawer_context.cpp @@ -0,0 +1,464 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// $Log:$ +// +// DESCRIPTION: +// The actual span/column drawing functions. +// Here find the main potential for optimization, +// e.g. inline assembly, different algorithms. +// +//----------------------------------------------------------------------------- + +#define DRAWER_INTERNALS + +#include + +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_plane.h" +#include "r_draw_rgba.h" +#include "d_net.h" +#include "r_drawer_context.h" + +#include "gi.h" +#include "stats.h" +#include "x86.h" + +#ifdef X86_ASM +extern "C" void R_SetSpanSource_ASM (const BYTE *flat); +extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); +extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); +extern "C" void R_SetTiltedSpanSource_ASM(const BYTE *flat); +extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; +#endif + +DCanvas *DrawerContext::Canvas() +{ + return dc_canvas; +} + +uint8_t DrawerContext::FlatColor() +{ + return dc_color; +} + +FColormap *DrawerContext::LightColormap() +{ + return dc_fcolormap; +} + +fixed_t DrawerContext::TextureFrac() +{ + return dc_texturefrac; +} + +fixed_t DrawerContext::TextureStep() +{ + return dc_iscale; +} + +double DrawerContext::TextureMid() +{ + return dc_texturemid; +} + +int DrawerContext::SpanXBits() +{ + return ds_xbits; +} + +int DrawerContext::SpanYBits() +{ + return ds_ybits; +} + +lighttable_t *DrawerContext::SpanLitColormap() +{ + return ds_colormap; +} + +bool DrawerContext::IsFuzzColumn() +{ + return colfunc == fuzzcolfunc; +} + +bool DrawerContext::IsFillColumn() +{ + return colfunc == R_FillColumn; +} + +bool DrawerContext::IsBaseColumn() +{ + return colfunc == basecolfunc; +} + +void DrawerContext::SetDest(int x, int y) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + dc_dest = dc_destorg + (ylookup[y] + x) * pixelsize; +} + +void DrawerContext::SetFlatColor(uint8_t color_index) +{ + dc_color = color_index; +} + +void DrawerContext::SetLight(FColormap *base_colormap, float light, int shade) +{ + R_SetColorMapLight(base_colormap, light, shade); +} + +void DrawerContext::SetX(int x) +{ + dc_x = x; +} + +void DrawerContext::SetY1(int y) +{ + dc_yl = y; +} + +void DrawerContext::SetY2(int y) +{ + dc_yh = y; +} + +void DrawerContext::SetSource(const BYTE *source) +{ + dc_source = source; +} + +void DrawerContext::SetTextureFrac(fixed_t pos) +{ + dc_texturefrac = pos; +} + +void DrawerContext::SetTextureStep(fixed_t step) +{ + dc_iscale = step; +} + +void DrawerContext::SetTextureMid(double value) +{ + dc_texturemid = value; +} + +void DrawerContext::SetDrawCount(int count) +{ + dc_count = count; +} + +void DrawerContext::SetSpanY(int y) +{ + ds_y = y; +} + +void DrawerContext::SetSpanX1(int x) +{ + ds_x1 = x; +} + +void DrawerContext::SetSpanX2(int x) +{ + ds_x2 = x; +} + +void DrawerContext::SetSpanXStep(dsfixed_t step) +{ + ds_xstep = step; +} + +void DrawerContext::SetSpanYStep(dsfixed_t step) +{ + ds_ystep = step; +} + +void DrawerContext::SetSpanXBits(int bits) +{ + ds_xbits = bits; +} + +void DrawerContext::SetSpanYBits(int bits) +{ + ds_ybits = bits; +} + +void DrawerContext::SetSpanXFrac(dsfixed_t frac) +{ + ds_xfrac = frac; +} + +void DrawerContext::SetSpanYFrac(dsfixed_t frac) +{ + ds_yfrac = frac; +} + +void DrawerContext::SetSpanLight(FColormap *base_colormap, float light, int shade) +{ + R_SetDSColorMapLight(base_colormap ? base_colormap : &identitycolormap, light, shade); + R_SetSpanColormap(); +} + +ESPSResult DrawerContext::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, DWORD color) +{ + return R_SetPatchStyle(style, alpha, translation, color); +} + +ESPSResult DrawerContext::SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color) +{ + return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); +} + +void DrawerContext::FinishSetPatchStyle() +{ + R_FinishSetPatchStyle(); +} + +void DrawerContext::SetCanvas(DCanvas *canvas) +{ + dc_canvas = canvas; + dc_destorg = canvas->GetBuffer(); + + if (r_swtruecolor != canvas->IsBgra()) + { + r_swtruecolor = canvas->IsBgra(); + R_InitColumnDrawers(); + } +} + +void DrawerContext::SetTranslationMap(lighttable_t *translation) +{ + R_SetTranslationMap(translation ? translation : identitymap); +} + +void DrawerContext::SetSpanSource(FTexture *tex) +{ + R_SetupSpanBits(tex); + if (r_swtruecolor) + ds_source = (const BYTE*)tex->GetPixelsBgra(); + else + ds_source = tex->GetPixels(); + +#ifdef X86_ASM + if (!r_swtruecolor && ds_source != ds_cursource) + { + R_SetSpanSource_ASM (ds_source); + } + if (!r_swtruecolor) + { + if (ds_source != ds_curtiltedsource) + R_SetTiltedSpanSource_ASM(ds_source); + } +#endif +} + +void DrawerContext::SetTiltedSpanState(FVector3 plane_sz, FVector3 plane_su, FVector3 plane_sv, bool plane_shade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) +{ + ds_plane_sz = plane_sz; + ds_plane_su = plane_su; + ds_plane_sv = plane_sv; + ds_plane_shade = plane_shade; + ds_planelightfloat = planelightfloat; + ds_pviewx = pviewx; + ds_pviewy = pviewy; + + if (!plane_shade) + { + for (int i = 0; i < viewwidth; ++i) + { + tiltlighting[i] = DrawerContext::SpanLitColormap(); + } + } +} + +void DrawerContext::SetSlabLight(const BYTE *colormap) +{ + R_SetupDrawSlab(colormap); +} + +void DrawerContext::DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *source, int dest_x, int dest_y) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + R_DrawSlab(dx, v, dy, vi, source, (ylookup[dest_y] + dest_x) * pixelsize + dc_destorg); +} + +void DrawerContext::SetSpanStyle(fixed_t alpha, bool additive, bool masked) +{ + if (spanfunc != R_FillSpan) + { + if (masked) + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = R_DrawSpanMaskedTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = R_DrawSpanMaskedAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + } + else + { + spanfunc = R_DrawSpanMasked; + } + } + else + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = R_DrawSpanTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = R_DrawSpanAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + } + else + { + spanfunc = R_DrawSpan; + } + } + } +} + +void DrawerContext::RtInitCols(BYTE *buffer) +{ + rt_initcols(buffer); +} + +void DrawerContext::RtSpanCoverage(int x, int start, int stop) +{ + rt_span_coverage(x, start, stop); +} + +void DrawerContext::SetMaskedColumnState(short *mfloorclip, short *mceilingclip, double spryscale, double sprtopscreen, bool sprflipvert) +{ + dc_mfloorclip = mfloorclip; + dc_mceilingclip = mceilingclip; + dc_spryscale = spryscale; + dc_sprtopscreen = sprtopscreen; + dc_sprflipvert = sprflipvert; +} + +void DrawerContext::DrawMaskedColumn(int x, const BYTE *column, const FTexture::Span *spans) +{ + R_DrawMaskedColumn(x, column, spans); +} + +void DrawerContext::DrawMaskedColumnHoriz(int x, const BYTE *column, const FTexture::Span *spans) +{ + dc_x = x; + R_DrawMaskedColumnHoriz(column, spans); +} + +void DrawerContext::DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) +{ + R_DrawFogBoundary(x1, x2, uclip, dclip); +} + +void DrawerContext::DrawRt4cols(int sx) +{ + rt_draw4cols(sx); +} + +void DrawerContext::DrawColumn() +{ + colfunc(); +} + +void DrawerContext::DrawSpan() +{ + spanfunc(); +} + +void DrawerContext::DrawHColumnPre() +{ + hcolfunc_pre(); +} + +void DrawerContext::DrawSimplePolySpan() +{ + R_DrawSpan(); +} + +void DrawerContext::SetBaseStyle() +{ + colfunc = basecolfunc; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; +} + +void DrawerContext::DrawWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)) +{ + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol ? getcol : R_GetColumn); +} + +void DrawerContext::DrawMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)) +{ + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol ? getcol : R_GetColumn); +} + +void DrawerContext::DrawTransMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)) +{ + transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol ? getcol : R_GetColumn); +} + +void DrawerContext::DrawColoredSpan(int y, int x1, int x2) +{ + R_DrawColoredSpan(y, x1, x2); +} + +void DrawerContext::DrawTiltedSpan(int y, int x1, int x2) +{ + R_DrawTiltedSpan(y, x1, x2); +} + +void DrawerContext::FillTransColumn(int x, int y1, int y2, int color, int alpha) +{ + R_FillTransColumn(x, y1, y2, color, alpha); +} diff --git a/src/r_drawer_context.h b/src/r_drawer_context.h new file mode 100644 index 0000000000..64e0bf6a04 --- /dev/null +++ b/src/r_drawer_context.h @@ -0,0 +1,123 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// DESCRIPTION: +// System specific interface stuff. +// +//----------------------------------------------------------------------------- + + +#ifndef __R_DRAWER_CONTEXT__ +#define __R_DRAWER_CONTEXT__ + +#include "r_defs.h" + +// [RH] Consolidate column drawer selection +enum ESPSResult +{ + DontDraw, // not useful to draw this + DoDraw0, // draw this as if r_columnmethod is 0 + DoDraw1, // draw this as if r_columnmethod is 1 +}; + +struct TiltedPlaneData; + +// Immediate graphics context for column/span based software rendering. +class DrawerContext +{ +public: + static DCanvas *Canvas(); // dc_destorg + + static uint8_t FlatColor(); // dc_color + static FColormap *LightColormap(); // dc_fcolormap + static fixed_t TextureFrac(); // dc_texturefrac + static fixed_t TextureStep(); // dc_iscale + static double TextureMid(); // dc_texturemid + + static int SpanXBits(); // ds_xbits + static int SpanYBits(); // ds_ybits + static lighttable_t *SpanLitColormap(); // ds_colormap + + static bool IsFuzzColumn(); // colfunc == fuzzcolfunc + static bool IsFillColumn(); // colfunc == R_FillColumn + static bool IsBaseColumn(); // colfunc == basecolfunc + + static void SetCanvas(DCanvas *canvas); // dc_destorg + + static void SetFlatColor(uint8_t color_index); // dc_color + static void SetLight(FColormap *base_colormap, float light, int shade); + static void SetTranslationMap(lighttable_t *translation); + static void SetX(int x); // dc_x + static void SetY1(int y); // dc_yl + static void SetY2(int y); // dc_yh + static void SetSource(const BYTE *source); // dc_source + static void SetTextureFrac(fixed_t pos); // dc_texturefrac + static void SetTextureStep(fixed_t step); // dc_iscale + static void SetTextureMid(double value); // dc_texturemid + static void SetDest(int x, int y); // dc_dest + static void SetDrawCount(int count); // dc_count + + static void SetSpanY(int y); // ds_y + static void SetSpanX1(int x); // ds_x1 + static void SetSpanX2(int x); // ds_x2 + static void SetSpanXStep(dsfixed_t step); // ds_xstep + static void SetSpanYStep(dsfixed_t step); // ds_ystep + static void SetSpanXBits(int bits); // ds_xbits + static void SetSpanYBits(int bits); // ds_ybits + static void SetSpanXFrac(dsfixed_t frac); // ds_xfrac + static void SetSpanYFrac(dsfixed_t frac); // ds_yfrac + static void SetSpanLight(FColormap *base_colormap, float light, int shade); + static void SetSpanSource(FTexture *texture); + static void SetSpanStyle(fixed_t alpha, bool additive, bool masked); + + static void SetSlabLight(const BYTE *colormap); + + static ESPSResult SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, DWORD color); + static ESPSResult SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color); + // Call this after finished drawing the current thing, in case its style was STYLE_Shade + static void SetBaseStyle(); + static void FinishSetPatchStyle(); + + static void SetMaskedColumnState(short *mfloorclip, short *mceilingclip, double spryscale, double sprtopscreen, bool sprflipvert); + static void SetTiltedSpanState(FVector3 plane_sz, FVector3 plane_su, FVector3 plane_sv, bool plane_shade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + + static void RtInitCols(BYTE *buffer); + static void RtSpanCoverage(int x, int start, int stop); + + static void DrawMaskedColumn(int x, const BYTE *column, const FTexture::Span *spans); + static void DrawMaskedColumnHoriz(int x, const BYTE *column, const FTexture::Span *spans); + + static void DrawRt4cols(int sx); + static void DrawColumn(); + static void DrawHColumnPre(); + static void DrawSpan(); + static void DrawSimplePolySpan(); + + static void DrawWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *texture, fixed_t texturefrac, const BYTE *(*getcol)(FTexture *tex, int col) = nullptr); + static void DrawMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *texture, fixed_t texturefrac, const BYTE *(*getcol)(FTexture *tex, int col) = nullptr); + static void DrawTransMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *texture, fixed_t texturefrac, const BYTE *(*getcol)(FTexture *tex, int col) = nullptr); + + static void DrawColoredSpan(int y, int x1, int x2); + static void DrawTiltedSpan(int y, int x1, int x2); + + static void DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *source, int dest_x, int dest_y); + + static void FillTransColumn(int x, int y1, int y2, int color, int alpha); + + static void DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); +}; + +#endif diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 8370930441..a74dc01338 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -38,6 +38,8 @@ ** Let's hope so. :-) */ +#define DRAWER_INTERNALS + #include "templates.h" #include "doomtype.h" #include "doomdef.h" @@ -1128,26 +1130,26 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) const int top = span->TopOffset; // calculate unclipped screen coordinates for post - dc_yl = xs_RoundToInt(sprtopscreen + spryscale * top); - dc_yh = xs_RoundToInt(sprtopscreen + spryscale * (top + length) - 1); + dc_yl = xs_RoundToInt(dc_sprtopscreen + dc_spryscale * top); + dc_yh = xs_RoundToInt(dc_sprtopscreen + dc_spryscale * (top + length) - 1); - if (sprflipvert) + if (dc_sprflipvert) { swapvalues (dc_yl, dc_yh); } - if (dc_yh >= mfloorclip[dc_x]) + if (dc_yh >= dc_mfloorclip[dc_x]) { - dc_yh = mfloorclip[dc_x] - 1; + dc_yh = dc_mfloorclip[dc_x] - 1; } - if (dc_yl < mceilingclip[dc_x]) + if (dc_yl < dc_mceilingclip[dc_x]) { - dc_yl = mceilingclip[dc_x]; + dc_yl = dc_mceilingclip[dc_x]; } if (dc_yl <= dc_yh) { - if (sprflipvert) + if (dc_sprflipvert) { dc_texturefrac = (dc_yl*dc_iscale) - (top << FRACBITS) - fixed_t(CenterY * dc_iscale) - texturemid; @@ -1178,7 +1180,7 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) } fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; const fixed_t maxfrac = length << FRACBITS; - if (dc_yh < mfloorclip[dc_x]-1 && endfrac < maxfrac - dc_iscale) + if (dc_yh < dc_mfloorclip[dc_x]-1 && endfrac < maxfrac - dc_iscale) { dc_yh++; } @@ -1198,7 +1200,7 @@ nextpost: span++; } - if (sprflipvert) + if (dc_sprflipvert) { unsigned int *front = horizspan[dc_x&3]; unsigned int *back = dc_ctspan[dc_x&3] - 2; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index d2d715c8dd..4b6605b4aa 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -34,6 +34,8 @@ ** Please see r_drawt.cpp for a description of the globals used. */ +#define DRAWER_INTERNALS + #include "templates.h" #include "doomtype.h" #include "doomdef.h" diff --git a/src/r_main.cpp b/src/r_main.cpp index 247a981255..a30aa232be 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -23,6 +23,8 @@ // //----------------------------------------------------------------------------- +#define DRAWER_INTERNALS + // HEADER FILES ------------------------------------------------------------ #include @@ -150,17 +152,6 @@ angle_t xtoviewangle[MAXWIDTH+1]; bool foggy; // [RH] ignore extralight and fullbright? int r_actualextralight; -void (*colfunc) (void); -void (*basecolfunc) (void); -void (*fuzzcolfunc) (void); -void (*transcolfunc) (void); -void (*spanfunc) (void); - -void (*hcolfunc_pre) (void); -void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); -void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); -void (*hcolfunc_post4) (int sx, int yl, int yh); - cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; // PRIVATE DATA DEFINITIONS ------------------------------------------------ @@ -820,6 +811,7 @@ void R_SetupBuffer () ASM_PatchPitch (); #endif } + dc_canvas = RenderTarget; dc_destorg = lineptr; for (int i = 0; i < RenderTarget->GetHeight(); i++) { diff --git a/src/r_main.h b/src/r_main.h index fa8fe0bb15..91eb5b1831 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -110,22 +110,6 @@ extern FColormap* fixedcolormap; extern FSpecialColormap*realfixedcolormap; -// -// Function pointers to switch refresh/drawing functions. -// Used to select shadow mode etc. -// -extern void (*colfunc) (void); -extern void (*basecolfunc) (void); -extern void (*fuzzcolfunc) (void); -extern void (*transcolfunc) (void); -// No shadow effects on floors. -extern void (*spanfunc) (void); - -// [RH] Function pointers for the horizontal column drawers. -extern void (*hcolfunc_pre) (void); -extern void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); -extern void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); -extern void (*hcolfunc_post4) (int sx, int yl, int yh); void R_InitTextureMapping (); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 807066f77d..c4d7cd59c3 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -58,7 +58,7 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "r_draw_rgba.h" +#include "r_drawer_context.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -138,8 +138,6 @@ FVector3 plane_sz, plane_su, plane_sv; float planelightfloat; bool plane_shade; fixed_t pviewx, pviewy; - -void R_DrawTiltedPlane_ASM (int y, int x1); } float yslope[MAXHEIGHT]; @@ -147,13 +145,6 @@ static fixed_t xscale, yscale; static double xstepscale, ystepscale; static double basexfrac, baseyfrac; -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" void R_SetTiltedSpanSource_ASM (const BYTE *flat); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); //========================================================================== @@ -220,304 +211,22 @@ void R_MapPlane (int y, int x1) distance = planeheight * yslope[y]; - ds_xstep = xs_ToFixed(32-ds_xbits, distance * xstepscale); - ds_ystep = xs_ToFixed(32-ds_ybits, distance * ystepscale); - ds_xfrac = xs_ToFixed(32-ds_xbits, distance * basexfrac) + pviewx; - ds_yfrac = xs_ToFixed(32-ds_ybits, distance * baseyfrac) + pviewy; + DrawerContext::SetSpanXStep(xs_ToFixed(32 - DrawerContext::SpanXBits(), distance * xstepscale)); + DrawerContext::SetSpanYStep(xs_ToFixed(32 - DrawerContext::SpanYBits(), distance * ystepscale)); + DrawerContext::SetSpanXFrac(xs_ToFixed(32 - DrawerContext::SpanXBits(), distance * basexfrac) + pviewx); + DrawerContext::SetSpanYFrac(xs_ToFixed(32 - DrawerContext::SpanYBits(), distance * baseyfrac) + pviewy); if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); + DrawerContext::SetSpanLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); } -#ifdef X86_ASM - if (!r_swtruecolor && ds_colormap != ds_curcolormap) - R_SetSpanColormap_ASM (ds_colormap); -#endif + DrawerContext::SetSpanY(y); + DrawerContext::SetSpanX1(x1); + DrawerContext::SetSpanX2(x2); - ds_y = y; - ds_x1 = x1; - ds_x2 = x2; - - spanfunc (); -} - -//========================================================================== -// -// R_CalcTiltedLighting -// -// Calculates the lighting for one row of a tilted plane. If the definition -// of GETPALOOKUP changes, this needs to change, too. -// -//========================================================================== - -extern "C" { -void R_CalcTiltedLighting (double lval, double lend, int width) -{ - double lstep; - BYTE *lightfiller; - BYTE *basecolormapdata = basecolormap->Maps; - int i = 0; - - if (width == 0 || lval == lend) - { // Constant lighting - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - } - else - { - lstep = (lend - lval) / width; - if (lval >= MAXLIGHTVIS) - { // lval starts "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - for (; i <= width && lval >= MAXLIGHTVIS; ++i) - { - tiltlighting[i] = lightfiller; - lval += lstep; - } - } - if (lend >= MAXLIGHTVIS) - { // lend ends "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); - for (; width > i && lend >= MAXLIGHTVIS; --width) - { - tiltlighting[width] = lightfiller; - lend -= lstep; - } - } - if (width > 0) - { - lval = FIXED2DBL(planeshade) - lval; - lend = FIXED2DBL(planeshade) - lend; - lstep = (lend - lval) / width; - if (lstep < 0) - { // Going from dark to light - if (lval < 1.) - { // All bright - lightfiller = basecolormapdata; - } - else - { - if (lval >= NUMCOLORMAPS) - { // Starts beyond the dark end - BYTE *clight = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - while (lval >= NUMCOLORMAPS && i <= width) - { - tiltlighting[i++] = clight; - lval += lstep; - } - if (i > width) - return; - } - while (i <= width && lval >= 0) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata; - } - } - else - { // Going from light to dark - if (lval >= (NUMCOLORMAPS-1)) - { // All dark - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - else - { - while (lval < 0 && i <= width) - { - tiltlighting[i++] = basecolormapdata; - lval += lstep; - } - if (i > width) - return; - while (i <= width && lval < (NUMCOLORMAPS-1)) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - } - } - } - for (; i <= width; i++) - { - tiltlighting[i] = lightfiller; - } -} -} // extern "C" - -//========================================================================== -// -// R_MapTiltedPlane -// -//========================================================================== - -void R_MapTiltedPlane_C (int y, int x1) -{ - int x2 = spanend[y]; - int width = x2 - x1; - double iz, uz, vz; - BYTE *fb; - DWORD u, v; - int i; - - iz = plane_sz[2] + plane_sz[1]*(centery-y) + plane_sz[0]*(x1-centerx); - - // Lighting is simple. It's just linear interpolation from start to end - if (plane_shade) - { - uz = (iz + plane_sz[0]*width) * planelightfloat; - vz = iz * planelightfloat; - R_CalcTiltedLighting (vz, uz, width); - } - - uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx); - vz = plane_sv[2] + plane_sv[1]*(centery-y) + plane_sv[0]*(x1-centerx); - - fb = ylookup[y] + x1 + dc_destorg; - - BYTE vshift = 32 - ds_ybits; - BYTE ushift = vshift - ds_xbits; - int umask = ((1 << ds_xbits) - 1) << ds_ybits; - -#if 0 // The "perfect" reference version of this routine. Pretty slow. - // Use it only to see how things are supposed to look. - i = 0; - do - { - double z = 1.f/iz; - - u = SQWORD(uz*z) + pviewx; - v = SQWORD(vz*z) + pviewy; - R_SetDSColorMapLight(tiltlighting[i], 0, 0); - fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; - iz += plane_sz[0]; - uz += plane_su[0]; - vz += plane_sv[0]; - } while (--width >= 0); -#else -//#define SPANSIZE 32 -//#define INVSPAN 0.03125f -//#define SPANSIZE 8 -//#define INVSPAN 0.125f -#define SPANSIZE 16 -#define INVSPAN 0.0625f - - double startz = 1.f/iz; - double startu = uz*startz; - double startv = vz*startz; - double izstep, uzstep, vzstep; - - izstep = plane_sz[0] * SPANSIZE; - uzstep = plane_su[0] * SPANSIZE; - vzstep = plane_sv[0] * SPANSIZE; - x1 = 0; - width++; - - while (width >= SPANSIZE) - { - iz += izstep; - uz += uzstep; - vz += vzstep; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - DWORD stepu = SQWORD((endu - startu) * INVSPAN); - DWORD stepv = SQWORD((endv - startv) * INVSPAN); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (i = SPANSIZE-1; i >= 0; i--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - startu = endu; - startv = endv; - width -= SPANSIZE; - } - if (width > 0) - { - if (width == 1) - { - u = SQWORD(startu); - v = SQWORD(startv); - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - } - else - { - double left = width; - iz += plane_sz[0] * left; - uz += plane_su[0] * left; - vz += plane_sv[0] * left; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - left = 1.f/left; - DWORD stepu = SQWORD((endu - startu) * left); - DWORD stepv = SQWORD((endv - startv) * left); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (; width != 0; width--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - } - } -#endif -} - -void R_MapTiltedPlane_rgba (int y, int x1) -{ - int x2 = spanend[y]; - - // Slopes are broken currently in master. - // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. - - uint32_t *source = (uint32_t*)ds_source; - int source_width = 1 << ds_xbits; - int source_height = 1 << ds_ybits; - - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; - - int count = x2 - x1 + 1; - while (count > 0) - { - *(dest++) = source[0]; - count--; - } -} - -//========================================================================== -// -// R_MapColoredPlane -// -//========================================================================== - -void R_MapColoredPlane_C (int y, int x1) -{ - memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); -} - -void R_MapColoredPlane_rgba(int y, int x1) -{ - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; - int count = (spanend[y] - x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); - for (int i = 0; i < count; i++) - dest[i] = color; + DrawerContext::DrawSpan(); } //========================================================================== @@ -1014,7 +723,7 @@ static void R_DrawSky (visplane_t *pl) rw_offset = 0; frontyScale = rw_pic->Scale.Y; - dc_texturemid = skymid * frontyScale; + DrawerContext::SetTextureMid(skymid * frontyScale); if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) { // The texture tiles nicely @@ -1023,8 +732,8 @@ static void R_DrawSky (visplane_t *pl) lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - wallscan (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, - frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + DrawerContext::DrawWall (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, + frontyScale, rw_pic, rw_offset, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); } else { // The texture does not tile nicely @@ -1047,7 +756,7 @@ static void R_DrawSkyStriped (visplane_t *pl) if (topfrac < 0) topfrac += frontskytex->GetHeight(); yl = 0; yh = short((frontskytex->GetHeight() - topfrac) * frontyScale); - dc_texturemid = topfrac - iscale * (1 - CenterY); + DrawerContext::SetTextureMid(topfrac - iscale * (1 - CenterY)); while (yl < viewheight) { @@ -1061,11 +770,11 @@ static void R_DrawSkyStriped (visplane_t *pl) lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - wallscan (pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, - backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + DrawerContext::DrawWall(pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, + rw_pic, rw_offset, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); yl = yh; yh += drawheight; - dc_texturemid = iscale * (centery-yl-1); + DrawerContext::SetTextureMid(iscale * (centery-yl-1)); } } @@ -1086,7 +795,7 @@ int R_DrawPlanes () int i; int vpcount = 0; - ds_color = 3; + DrawerContext::SetFlatColor(3); for (i = 0; i < MAXVISPLANES; i++) { @@ -1111,7 +820,7 @@ void R_DrawHeightPlanes(double height) visplane_t *pl; int i; - ds_color = 3; + DrawerContext::SetFlatColor(3); DVector3 oViewPos = ViewPos; DAngle oViewAngle = ViewAngle; @@ -1151,8 +860,8 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske if (r_drawflat) { // [RH] no texture mapping - ds_color += 4; - R_MapVisPlane (pl, R_MapColoredPlane); + DrawerContext::SetFlatColor(DrawerContext::FlatColor() + 4); + R_MapVisPlane (pl, [](int y, int x1) { DrawerContext::DrawColoredSpan(y, x1, spanend[y]); }); } else if (pl->picnum == skyflatnum) { // sky flat @@ -1175,13 +884,9 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske { // Don't waste time on a masked texture if it isn't really masked. masked = false; } - R_SetupSpanBits(tex); double xscale = pl->xform.xScale * tex->Scale.X; double yscale = pl->xform.yScale * tex->Scale.Y; - if (r_swtruecolor) - ds_source = (const BYTE*)tex->GetPixelsBgra(); - else - ds_source = tex->GetPixels(); + DrawerContext::SetSpanSource(tex); basecolormap = pl->colormap; planeshade = LIGHT2SHADE(pl->lightlevel); @@ -1544,13 +1249,13 @@ void R_DrawSkyPlane (visplane_t *pl) bool fakefixed = false; if (fixedcolormap) { - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); } else { fakefixed = true; fixedcolormap = &NormalLight; - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); } R_DrawSky (pl); @@ -1567,13 +1272,6 @@ void R_DrawSkyPlane (visplane_t *pl) void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { -#ifdef X86_ASM - if (!r_swtruecolor && ds_source != ds_cursource) - { - R_SetSpanSource_ASM (ds_source); - } -#endif - if (alpha <= 0) { return; @@ -1583,8 +1281,8 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t double xstep, ystep, leftxfrac, leftyfrac, rightxfrac, rightyfrac; double x; - xscale = xs_ToFixed(32 - ds_xbits, _xscale); - yscale = xs_ToFixed(32 - ds_ybits, _yscale); + xscale = xs_ToFixed(32 - DrawerContext::SpanXBits(), _xscale); + yscale = xs_ToFixed(32 - DrawerContext::SpanYBits(), _yscale); if (planeang != 0) { double cosine = cos(planeang), sine = sin(planeang); @@ -1631,15 +1329,14 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t planeheight = fabs(pl->height.Zat0() - ViewPos.Z); GlobVis = r_FloorVisibility / planeheight; - ds_light = 0; if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetSpanLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) { - R_SetDSColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetSpanLight(fixedcolormap, 0, 0); plane_shade = false; } else @@ -1647,61 +1344,8 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t plane_shade = true; } - if (spanfunc != R_FillSpan) - { - if (masked) - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = R_DrawSpanMaskedTranslucent; - dc_srcblend = Col2RGB8[alpha>>10]; - dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = R_DrawSpanMaskedAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - } - else - { - spanfunc = R_DrawSpanMasked; - } - } - else - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = R_DrawSpanTranslucent; - dc_srcblend = Col2RGB8[alpha>>10]; - dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = R_DrawSpanAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - } - else - { - spanfunc = R_DrawSpan; - } - } - } + DrawerContext::SetSpanStyle(alpha, additive, masked); + R_MapVisPlane (pl, R_MapPlane); } @@ -1733,14 +1377,14 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t return; } - lxscale = _xscale * ifloatpow2[ds_xbits]; - lyscale = _yscale * ifloatpow2[ds_ybits]; + lxscale = _xscale * ifloatpow2[DrawerContext::SpanXBits()]; + lyscale = _yscale * ifloatpow2[DrawerContext::SpanYBits()]; xscale = 64.f / lxscale; yscale = 64.f / lyscale; zeroheight = pl->height.ZatPoint(ViewPos); - pviewx = xs_ToFixed(32 - ds_xbits, pl->xform.xOffs * pl->xform.xScale); - pviewy = xs_ToFixed(32 - ds_ybits, pl->xform.yOffs * pl->xform.yScale); + pviewx = xs_ToFixed(32 - DrawerContext::SpanXBits(), pl->xform.xOffs * pl->xform.xScale); + pviewy = xs_ToFixed(32 - DrawerContext::SpanYBits(), pl->xform.yOffs * pl->xform.yScale); planeang = (pl->xform.Angle + pl->xform.baseAngle).Radians(); // p is the texture origin in view space @@ -1810,42 +1454,22 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetSpanLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) { - R_SetDSColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetSpanLight(fixedcolormap, 0, 0); plane_shade = false; } else { - R_SetDSColorMapLight(basecolormap, 0, 0); + DrawerContext::SetSpanLight(basecolormap, 0, 0); plane_shade = true; } - if (!plane_shade) - { - for (int i = 0; i < viewwidth; ++i) - { - tiltlighting[i] = ds_colormap; - } - } - -#if defined(X86_ASM) - if (!r_swtruecolor) - { - if (ds_source != ds_curtiltedsource) - R_SetTiltedSpanSource_ASM(ds_source); - R_MapVisPlane(pl, R_DrawTiltedPlane_ASM); - } - else - { - R_MapVisPlane(pl, R_MapTiltedPlane); - } -#else - R_MapVisPlane (pl, R_MapTiltedPlane); -#endif + DrawerContext::SetTiltedSpanState(plane_sz, plane_su, plane_sv, plane_shade, planelightfloat, pviewx, pviewy); + R_MapVisPlane (pl, [](int y, int x1) { DrawerContext::DrawTiltedSpan(y, x1, spanend[y]); }); } //========================================================================== diff --git a/src/r_plane.h b/src/r_plane.h index b199d34776..d4db3dc09c 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -93,14 +93,6 @@ void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t al void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); -extern void(*R_MapColoredPlane)(int y, int x1); -extern void(*R_MapTiltedPlane)(int y, int x1); - -void R_MapTiltedPlane_C(int y, int x1); -void R_MapTiltedPlane_rgba(int y, int x); -void R_MapColoredPlane_C(int y, int x1); -void R_MapColoredPlane_rgba(int y, int x1); - visplane_t *R_FindPlane ( const secplane_t &height, FTextureID picnum, diff --git a/src/r_segs.cpp b/src/r_segs.cpp index ad242b2f91..2476e07ac1 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -52,6 +52,7 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_drawer_context.h" #define WALLYREPEAT 8 @@ -172,19 +173,19 @@ CVAR(Bool, r_drawmirrors, true, 0) float *MaskedSWall; float MaskedScaleY; -static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FTexture::Span *spans), FTexture *tex) +static void BlastMaskedColumn (int x, void (*blastfunc)(int x, const BYTE *pixels, const FTexture::Span *spans), FTexture *tex) { // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - R_SetColorMapLight(basecolormap, rw_light, wallshade); + DrawerContext::SetLight(basecolormap, rw_light, wallshade); } - dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); + DrawerContext::SetTextureStep(xs_Fix<16>::ToFix(MaskedSWall[x] * MaskedScaleY)); if (sprflipvert) - sprtopscreen = CenterY + dc_texturemid * spryscale; + sprtopscreen = CenterY + DrawerContext::TextureMid() * spryscale; else - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - DrawerContext::TextureMid() * spryscale; // killough 1/25/98: here's where Medusa came in, because // it implicitly assumed that the column was all one patch. @@ -194,10 +195,12 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText // the Medusa effect. The fix is to construct true columns // when forming multipatched textures (see r_data.c). + DrawerContext::SetMaskedColumnState(mfloorclip, mceilingclip, spryscale, sprtopscreen, sprflipvert); + // draw the texture const FTexture::Span *spans; - const BYTE *pixels = tex->GetColumn (maskedtexturecol[dc_x] >> FRACBITS, &spans); - blastfunc (pixels, spans); + const BYTE *pixels = tex->GetColumn (maskedtexturecol[x] >> FRACBITS, &spans); + blastfunc (x, pixels, spans); rw_light += rw_lightstep; spryscale += rw_scalestep; } @@ -243,7 +246,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) // [RH] modified because we don't use user-definable translucency maps ESPSResult drawmode; - drawmode = R_SetPatchStyle (LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], + drawmode = DrawerContext::SetPatchStyle (LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], (float)MIN(curline->linedef->alpha, 1.), 0, 0); if ((drawmode == DontDraw && !ds->bFogBoundary && !ds->bFakeBoundary)) @@ -295,7 +298,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) // [RH] Draw fog partition if (ds->bFogBoundary) { - R_DrawFogBoundary (x1, x2, mceilingclip, mfloorclip); + DrawerContext::DrawFogBoundary (x1, x2, mceilingclip, mfloorclip); if (ds->maskedtexturecol == -1) { goto clearfog; @@ -313,9 +316,9 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); // find positioning texheight = tex->GetScaledHeightDouble(); @@ -326,11 +329,11 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) } if (curline->linedef->flags & ML_DONTPEGBOTTOM) { - dc_texturemid = MAX(frontsector->GetPlaneTexZ(sector_t::floor), backsector->GetPlaneTexZ(sector_t::floor)) + texheight; + DrawerContext::SetTextureMid(MAX(frontsector->GetPlaneTexZ(sector_t::floor), backsector->GetPlaneTexZ(sector_t::floor)) + texheight); } else { - dc_texturemid = MIN(frontsector->GetPlaneTexZ(sector_t::ceiling), backsector->GetPlaneTexZ(sector_t::ceiling)); + DrawerContext::SetTextureMid(MIN(frontsector->GetPlaneTexZ(sector_t::ceiling), backsector->GetPlaneTexZ(sector_t::ceiling))); } rowoffset = curline->sidedef->GetTextureYOffset(side_t::mid); @@ -349,21 +352,21 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) { // rowoffset is added before the multiply so that the masked texture will // still be positioned in world units rather than texels. - dc_texturemid += rowoffset - ViewPos.Z; - textop = dc_texturemid; - dc_texturemid *= MaskedScaleY; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() + rowoffset - ViewPos.Z); + textop = DrawerContext::TextureMid(); + DrawerContext::SetTextureMid(DrawerContext::TextureMid() * MaskedScaleY); } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - textop = dc_texturemid + rowoffset / MaskedScaleY - ViewPos.Z; - dc_texturemid = (dc_texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; + textop = DrawerContext::TextureMid() + rowoffset / MaskedScaleY - ViewPos.Z; + DrawerContext::SetTextureMid((DrawerContext::TextureMid() - ViewPos.Z) * MaskedScaleY + rowoffset); } if (sprflipvert) { MaskedScaleY = -MaskedScaleY; - dc_texturemid -= tex->GetHeight() << FRACBITS; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - tex->GetHeight()); } // [RH] Don't bother drawing segs that are completely offscreen @@ -438,9 +441,9 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) // draw the columns one at a time if (drawmode == DoDraw0) { - for (dc_x = x1; dc_x < x2; ++dc_x) + for (int x = x1; x < x2; ++x) { - BlastMaskedColumn (R_DrawMaskedColumn, tex); + BlastMaskedColumn (x, DrawerContext::DrawMaskedColumn, tex); } } else @@ -451,29 +454,29 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) if (x1 >= x2) goto clearfog; - dc_x = x1; + int x = x1; - while ((dc_x < stop) && (dc_x & 3)) + while ((x < stop) && (x & 3)) { - BlastMaskedColumn (R_DrawMaskedColumn, tex); - dc_x++; + BlastMaskedColumn (x, DrawerContext::DrawMaskedColumn, tex); + x++; } - while (dc_x < stop) + while (x < stop) { - rt_initcols(nullptr); - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); - rt_draw4cols (dc_x - 3); - dc_x++; + DrawerContext::RtInitCols(nullptr); + BlastMaskedColumn (x, DrawerContext::DrawMaskedColumnHoriz, tex); + BlastMaskedColumn (x + 1, DrawerContext::DrawMaskedColumnHoriz, tex); + BlastMaskedColumn (x + 2, DrawerContext::DrawMaskedColumnHoriz, tex); + BlastMaskedColumn (x + 3, DrawerContext::DrawMaskedColumnHoriz, tex); + DrawerContext::DrawRt4cols (x); + x += 4; } - while (dc_x < x2) + while (x < x2) { - BlastMaskedColumn (R_DrawMaskedColumn, tex); - dc_x++; + BlastMaskedColumn (x, DrawerContext::DrawMaskedColumn, tex); + x++; } } } @@ -483,13 +486,13 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) { // rowoffset is added before the multiply so that the masked texture will // still be positioned in world units rather than texels. - dc_texturemid = (dc_texturemid - ViewPos.Z + rowoffset) * MaskedScaleY; + DrawerContext::SetTextureMid((DrawerContext::TextureMid() - ViewPos.Z + rowoffset) * MaskedScaleY); } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - dc_texturemid = (dc_texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; + DrawerContext::SetTextureMid((DrawerContext::TextureMid() - ViewPos.Z) * MaskedScaleY + rowoffset); } WallC.sz1 = ds->sz1; @@ -535,7 +538,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) } clearfog: - R_FinishSetPatchStyle (); + DrawerContext::FinishSetPatchStyle (); if (ds->bFakeBoundary & 3) { R_RenderFakeWallRange(ds, x1, x2); @@ -567,11 +570,11 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); ESPSResult drawmode; - drawmode = R_SetPatchStyle (LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], + drawmode = DrawerContext::SetPatchStyle (LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], Alpha, 0, 0); if(drawmode == DontDraw) { - R_FinishSetPatchStyle(); + DrawerContext::FinishSetPatchStyle(); return; } @@ -613,26 +616,26 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) { rowoffset += rw_pic->GetHeight(); } - dc_texturemid = (planez - ViewPos.Z) * yscale; + DrawerContext::SetTextureMid((planez - ViewPos.Z) * yscale); if (rw_pic->bWorldPanning) { // rowoffset is added before the multiply so that the masked texture will // still be positioned in world units rather than texels. - dc_texturemid = dc_texturemid + rowoffset * yscale; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() + rowoffset * yscale); rw_offset = xs_RoundToInt(rw_offset * xscale); } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - dc_texturemid += rowoffset; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() + rowoffset); } if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -660,7 +663,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) PrepLWall (lwall, curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2); wallscan_np2_ds(ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale); - R_FinishSetPatchStyle(); + DrawerContext::FinishSetPatchStyle(); } // kg3D - walls of fake floors @@ -1065,360 +1068,6 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } -// Draw a column with support for non-power-of-two ranges -uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)()) -{ - int pixelsize = r_swtruecolor ? 4 : 1; - if (uv_max == 0) // power of two - { - int count = y2 - y1; - - dc_source = source; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = count; - dc_iscale = uv_step; - dc_texturefrac = uv_start; - draw1column(); - - uint64_t step64 = uv_step; - uint64_t pos64 = uv_start; - return (uint32_t)(pos64 + step64 * count); - } - else - { - uint32_t uv_pos = uv_start; - - uint32_t left = y2 - y1; - while (left > 0) - { - uint32_t available = uv_max - uv_pos; - uint32_t next_uv_wrap = available / uv_step; - if (available % uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); - - dc_source = source; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = count; - dc_iscale = uv_step; - dc_texturefrac = uv_pos; - draw1column(); - - left -= count; - uv_pos += uv_step * count; - if (uv_pos >= uv_max) - uv_pos -= uv_max; - } - - return uv_pos; - } -} - -// Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)()) -{ - int pixelsize = r_swtruecolor ? 4 : 1; - if (uv_max == 0) // power of two, no wrap handling needed - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - bufplce[i] = source[i]; - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; - - uint64_t step64 = uv_step[i]; - uint64_t pos64 = uv_pos[i]; - uv_pos[i] = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - for (int i = 0; i < 4; i++) - bufplce[i] = source[i]; - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = uv_max - uv_pos[i]; - uint32_t next_uv_wrap = available / uv_step[i]; - if (available % uv_step[i] != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps - for (int i = 0; i < 4; i++) - { - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; - } - dc_count = count; - draw4columns(); - - // Wrap the uv position - for (int i = 0; i < 4; i++) - { - uv_pos[i] += uv_step[i] * count; - if (uv_pos[i] >= uv_max) - uv_pos[i] -= uv_max; - } - - left -= count; - } - } -} - -// Calculates a wrapped uv start position value for a column -void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_height, int fracbits, uint32_t &uv_start_out, uint32_t &uv_step_out) -{ - double uv_stepd = swal * yrepeat; - - // Find start uv in [0-uv_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; - v = v - floor(v); - v *= uv_height; - v *= (1 << fracbits); - - uv_start_out = (uint32_t)v; - uv_step_out = xs_ToFixed(fracbits, uv_stepd); -} - -typedef DWORD(*Draw1ColumnFuncPtr)(); -typedef void(*Draw4ColumnsFuncPtr)(); - -void wallscan_any( - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, - const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits,Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) -{ - if (rw_pic->UseType == FTexture::TEX_Null) - return; - - uint32_t uv_height = rw_pic->GetHeight(); - uint32_t fracbits = 32 - rw_pic->HeightBits; - uint32_t uv_max = uv_height << fracbits; - - DWORD(*draw1column)(); - void(*draw4columns)(); - setupwallscan(fracbits, draw1column, draw4columns); - - fixed_t xoffset = rw_offset; - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - palookuplight[0] = 0; - palookuplight[1] = 0; - palookuplight[2] = 0; - palookuplight[3] = 0; - } - - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); - else - R_SetColorMapLight(basecolormap, 0, 0); - - float light = rw_light; - - // Calculate where 4 column alignment begins and ends: - int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); - int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - - // First unaligned columns: - for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - - uint32_t uv_start, uv_step; - calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); - } - - // The aligned columns - for (int x = aligned_x1; x < aligned_x2; x += 4) - { - // Find y1, y2, light and uv values for four columns: - int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; - int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - - const BYTE *source[4]; - for (int i = 0; i < 4; i++) - source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS); - - float lights[4]; - for (int i = 0; i < 4; i++) - { - lights[i] = light; - light += rw_lightstep; - } - - uint32_t uv_pos[4], uv_step[4]; - for (int i = 0; i < 4; i++) - calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); - - // Figure out where we vertically can start and stop drawing 4 columns in one go - int middle_y1 = y1[0]; - int middle_y2 = y2[0]; - for (int i = 1; i < 4; i++) - { - middle_y1 = MAX(y1[i], middle_y1); - middle_y2 = MIN(y2[i], middle_y2); - } - - // If we got an empty column in our set we cannot draw 4 columns in one go: - bool empty_column_in_set = false; - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - empty_column_in_set = true; - } - - if (empty_column_in_set || middle_y2 <= middle_y1) - { - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); - } - continue; - } - - // Draw the first rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (y1[i] < middle_y1) - uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column); - } - - // Draw the area where all 4 columns are active - if (!fixed) - { - for (int i = 0; i < 4; i++) - { - if (r_swtruecolor) - { - palookupoffse[i] = basecolormap->Maps; - palookuplight[i] = LIGHTSCALE(lights[i], wallshade); - } - else - { - palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - palookuplight[i] = 0; - } - } - } - wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns); - - // Draw the last rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (middle_y2 < y2[i]) - uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); - } - } - - // The last unaligned columns: - for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - - uint32_t uv_start, uv_step; - calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); - } - - NetUpdate (); -} - -void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupvline(bits); - line1 = dovline1; - line4 = dovline4; - }); -} - -void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. - { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupmvline(bits); - line1 = domvline1; - line4 = domvline4; - }); - } -} - -void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - static fixed_t(*tmvline1)(); - static void(*tmvline4)(); - if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) - { - // The current translucency is unsupported, so draw with regular maskwallscan instead. - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setuptmvline(bits); - line1 = reinterpret_cast(tmvline1); - line4 = tmvline4; - }); - } -} - void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { FDynamicColormap *startcolormap = basecolormap; @@ -1444,7 +1093,7 @@ void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fi { down[j] = clamp (most3[j], up[j], dwal[j]); } - wallscan (x1, x2, up, down, swal, lwal, yrepeat); + DrawerContext::DrawWall (x1, x2, up, down, swal, lwal, yrepeat, rw_pic, rw_offset); up = down; down = (down == most1) ? most2 : most1; } @@ -1455,7 +1104,7 @@ void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fi *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); } - wallscan (x1, x2, up, dwal, swal, lwal, yrepeat); + DrawerContext::DrawWall (x1, x2, up, dwal, swal, lwal, yrepeat, rw_pic, rw_offset); basecolormap = startcolormap; wallshade = startshade; } @@ -1464,20 +1113,20 @@ static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, { if (mask) { - if (colfunc == basecolfunc) + if (DrawerContext::IsBaseColumn()) { - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + DrawerContext::DrawMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset); } else { - transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + DrawerContext::DrawTransMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset); } } else { if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + DrawerContext::DrawWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset); } else { @@ -1513,14 +1162,14 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t if (yrepeat >= 0) { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + partition = top - fmod(top - DrawerContext::TextureMid() / yrepeat - ViewPos.Z, scaledtexheight); if (partition == top) { partition -= scaledtexheight; } up = uwal; down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + DrawerContext::SetTextureMid((partition - ViewPos.Z) * yrepeat + texheight); while (partition > bot) { int j = OWallMost(most3, partition - ViewPos.Z, &WallC); @@ -1535,16 +1184,16 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t down = (down == most1) ? most2 : most1; } partition -= scaledtexheight; - dc_texturemid -= texheight; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - texheight); } call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); } else { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + partition = bot - fmod(bot - DrawerContext::TextureMid() / yrepeat - ViewPos.Z, scaledtexheight); up = most1; down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + DrawerContext::SetTextureMid((partition - ViewPos.Z) * yrepeat + texheight); while (partition < top) { int j = OWallMost(most3, partition - ViewPos.Z, &WallC); @@ -1559,7 +1208,7 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t up = (up == most1) ? most2 : most1; } partition -= scaledtexheight; - dc_texturemid -= texheight; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - texheight); } call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); } @@ -1612,9 +1261,9 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); // clip wall to the floor and ceiling for (x = x1; x < x2; ++x) @@ -1695,7 +1344,7 @@ void R_RenderSegLoop () { // one sided line if (midtexture->UseType != FTexture::TEX_Null && viewactive) { - dc_texturemid = rw_midtexturemid; + DrawerContext::SetTextureMid(rw_midtexturemid); rw_pic = midtexture; xscale = rw_pic->Scale.X * rw_midtexturescalex; yscale = rw_pic->Scale.Y * rw_midtexturescaley; @@ -1738,7 +1387,7 @@ void R_RenderSegLoop () } if (viewactive) { - dc_texturemid = rw_toptexturemid; + DrawerContext::SetTextureMid(rw_toptexturemid); rw_pic = toptexture; xscale = rw_pic->Scale.X * rw_toptexturescalex; yscale = rw_pic->Scale.Y * rw_toptexturescaley; @@ -1784,7 +1433,7 @@ void R_RenderSegLoop () } if (viewactive) { - dc_texturemid = rw_bottomtexturemid; + DrawerContext::SetTextureMid(rw_bottomtexturemid); rw_pic = bottomtexture; xscale = rw_pic->Scale.X * rw_bottomtexturescalex; yscale = rw_pic->Scale.Y * rw_bottomtexturescaley; @@ -2974,7 +2623,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, } yscale = decal->ScaleY; - dc_texturemid = WallSpriteTile->TopOffset + (zpos - ViewPos.Z) / yscale; + DrawerContext::SetTextureMid(WallSpriteTile->TopOffset + (zpos - ViewPos.Z) / yscale); // Clip sprite to drawseg x1 = MAX(clipper->x1, x1); @@ -3011,11 +2660,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - WallC.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap, 0, 0); + DrawerContext::SetLight(usecolormap, 0, 0); else calclighting = true; @@ -3024,7 +2673,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { sprflipvert = true; yscale = -yscale; - dc_texturemid -= WallSpriteTile->GetHeight(); + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - WallSpriteTile->GetHeight()); } else { @@ -3034,10 +2683,9 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, MaskedScaleY = float(1 / yscale); do { - dc_x = x1; ESPSResult mode; - mode = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); + mode = DrawerContext::SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -3053,48 +2701,50 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { int stop4; + int x = x1; + if (mode == DoDraw0) { // 1 column at a time - stop4 = dc_x; + stop4 = x; } else // DoDraw1 { // up to 4 columns at a time stop4 = x2 & ~3; } - while ((dc_x < stop4) && (dc_x & 3)) + while ((x < stop4) && (x & 3)) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + DrawerContext::SetLight(usecolormap, rw_light, wallshade); } - R_WallSpriteColumn (R_DrawMaskedColumn); - dc_x++; + R_WallSpriteColumn (x, DrawerContext::DrawMaskedColumn); + x++; } - while (dc_x < stop4) + while (x < stop4) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + DrawerContext::SetLight(usecolormap, rw_light, wallshade); } - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) + DrawerContext::RtInitCols(nullptr); + for (int zz = 0; zz < 4; ++zz) { - R_WallSpriteColumn (R_DrawMaskedColumnHoriz); - dc_x++; + R_WallSpriteColumn (x + zz, DrawerContext::DrawMaskedColumnHoriz); } - rt_draw4cols (dc_x - 4); + DrawerContext::DrawRt4cols (x); + x += 4; } - while (dc_x < x2) + while (x < x2) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + DrawerContext::SetLight(usecolormap, rw_light, wallshade); } - R_WallSpriteColumn (R_DrawMaskedColumn); - dc_x++; + R_WallSpriteColumn (x, DrawerContext::DrawMaskedColumn); + x++; } } @@ -3103,14 +2753,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, // needrepeat will be 0, and the while will fail. mceilingclip = floorclip; mfloorclip = wallbottom; - R_FinishSetPatchStyle (); + DrawerContext::FinishSetPatchStyle (); } while (needrepeat--); - colfunc = basecolfunc; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; - - R_FinishSetPatchStyle (); + DrawerContext::SetBaseStyle(); + DrawerContext::FinishSetPatchStyle (); done: WallC = savecoord; } diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c4347236de..480cdd02b8 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -32,6 +32,7 @@ ** */ +#define DRAWER_INTERNALS #include "r_local.h" #include "v_palette.h" diff --git a/src/r_things.cpp b/src/r_things.cpp index 836f586903..b856b968af 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -58,7 +58,7 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" -#include "r_draw_rgba.h" +#include "r_drawer_context.h" #include "v_palette.h" #include "r_data/r_translate.h" #include "r_data/colormaps.h" @@ -229,12 +229,6 @@ vissprite_t *R_NewVisSprite (void) return *(vissprite_p-1); } -// -// R_DrawMaskedColumn -// Used for sprites and masked mid textures. -// Masked means: partly transparent, i.e. stored -// in posts/runs of opaque pixels. -// short* mfloorclip; short* mceilingclip; @@ -243,88 +237,6 @@ double sprtopscreen; bool sprflipvert; -void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) -{ - int pixelsize = r_swtruecolor ? 4 : 1; - const fixed_t centeryfrac = FLOAT2FIXED(CenterY); - const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); - while (span->Length != 0) - { - const int length = span->Length; - const int top = span->TopOffset; - - // calculate unclipped screen coordinates for post - dc_yl = xs_RoundToInt(sprtopscreen + spryscale * top); - dc_yh = xs_RoundToInt(sprtopscreen + spryscale * (top + length)) - 1; - - if (sprflipvert) - { - swapvalues (dc_yl, dc_yh); - } - - if (dc_yh >= mfloorclip[dc_x]) - { - dc_yh = mfloorclip[dc_x] - 1; - } - if (dc_yl < mceilingclip[dc_x]) - { - dc_yl = mceilingclip[dc_x]; - } - - if (dc_yl <= dc_yh) - { - if (sprflipvert) - { - dc_texturefrac = (dc_yl*dc_iscale) - (top << FRACBITS) - - FixedMul (centeryfrac, dc_iscale) - texturemid; - const fixed_t maxfrac = length << FRACBITS; - while (dc_texturefrac >= maxfrac) - { - if (++dc_yl > dc_yh) - goto nextpost; - dc_texturefrac += dc_iscale; - } - fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; - while (endfrac < 0) - { - if (--dc_yh < dc_yl) - goto nextpost; - endfrac -= dc_iscale; - } - } - else - { - dc_texturefrac = texturemid - (top << FRACBITS) - + (dc_yl*dc_iscale) - FixedMul (centeryfrac-FRACUNIT, dc_iscale); - while (dc_texturefrac < 0) - { - if (++dc_yl > dc_yh) - goto nextpost; - dc_texturefrac += dc_iscale; - } - fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; - const fixed_t maxfrac = length << FRACBITS; - if (dc_yh < mfloorclip[dc_x]-1 && endfrac < maxfrac - dc_iscale) - { - dc_yh++; - } - else while (endfrac >= maxfrac) - { - if (--dc_yh < dc_yl) - goto nextpost; - endfrac -= dc_iscale; - } - } - dc_source = column + top; - dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; - dc_count = dc_yh - dc_yl + 1; - colfunc (); - } -nextpost: - span++; - } -} - // [ZZ] // R_ClipSpriteColumnWithPortals // @@ -361,7 +273,7 @@ static inline void R_CollectPortals() } } -static inline bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) +bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr) { // [ZZ] 10.01.2016: don't clip sprites from the root of a skybox. if (CurrentPortalInSkybox) @@ -380,7 +292,7 @@ static inline bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) continue; // now if current column is covered by this drawseg, we clip it away - if ((dc_x >= seg->x1) && (dc_x < seg->x2)) + if ((x >= seg->x1) && (x < seg->x2)) return true; } @@ -409,15 +321,15 @@ void R_DrawVisSprite (vissprite_t *vis) } fixed_t centeryfrac = FLOAT2FIXED(CenterY); - R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); + DrawerContext::SetLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); - mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); + mode = DrawerContext::SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Shaded]) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - R_SetColorMapLight(dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS); + DrawerContext::SetLight(DrawerContext::LightColormap(), 0, vis->Style.ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -436,65 +348,67 @@ void R_DrawVisSprite (vissprite_t *vis) tex = vis->pic; spryscale = vis->yscale; sprflipvert = false; - dc_iscale = FLOAT2FIXED(1 / vis->yscale); + DrawerContext::SetTextureStep(FLOAT2FIXED(1 / vis->yscale)); frac = vis->startfrac; xiscale = vis->xiscale; - dc_texturemid = vis->texturemid; + DrawerContext::SetTextureMid(vis->texturemid); if (vis->renderflags & RF_YFLIP) { sprflipvert = true; spryscale = -spryscale; - dc_iscale = -dc_iscale; - dc_texturemid -= vis->pic->GetHeight(); - sprtopscreen = CenterY + dc_texturemid * spryscale; + DrawerContext::SetTextureStep(-DrawerContext::TextureStep()); + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - vis->pic->GetHeight()); + sprtopscreen = CenterY + DrawerContext::TextureMid() * spryscale; } else { sprflipvert = false; - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - DrawerContext::TextureMid() * spryscale; } - dc_x = vis->x1; + int x = vis->x1; x2 = vis->x2; - if (dc_x < x2) + if (x < x2) { - while ((dc_x < stop4) && (dc_x & 3)) + DrawerContext::SetMaskedColumnState(mfloorclip, mceilingclip, spryscale, sprtopscreen, sprflipvert); + + while ((x < stop4) && (x & 3)) { pixels = tex->GetColumn (frac >> FRACBITS, &spans); - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans); - dc_x++; + if (ispsprite || !R_ClipSpriteColumnWithPortals(x, vis)) + DrawerContext::DrawMaskedColumn (x, pixels, spans); + x++; frac += xiscale; } - while (dc_x < stop4) + while (x < stop4) { - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) + DrawerContext::RtInitCols(nullptr); + for (int zz = 0; zz < 4; ++zz) { pixels = tex->GetColumn (frac >> FRACBITS, &spans); - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumnHoriz (pixels, spans); - dc_x++; + if (ispsprite || !R_ClipSpriteColumnWithPortals(x + zz, vis)) + DrawerContext::DrawMaskedColumnHoriz (x + zz, pixels, spans); frac += xiscale; } - rt_draw4cols (dc_x - 4); + DrawerContext::DrawRt4cols(x); + x += 4; } - while (dc_x < x2) + while (x < x2) { pixels = tex->GetColumn (frac >> FRACBITS, &spans); - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans); - dc_x++; + if (ispsprite || !R_ClipSpriteColumnWithPortals(x, vis)) + DrawerContext::DrawMaskedColumn (x, pixels, spans); + x++; frac += xiscale; } } } - R_FinishSetPatchStyle (); + DrawerContext::FinishSetPatchStyle (); NetUpdate (); } @@ -511,7 +425,7 @@ void R_DrawWallSprite(vissprite_t *spr) WallT.InitFromWallCoords(&spr->wallc); PrepWall(swall, lwall, spr->pic->GetWidth() << FRACBITS, x1, x2); iyscale = 1 / spr->yscale; - dc_texturemid = (spr->gzt - ViewPos.Z) * iyscale; + DrawerContext::SetTextureMid((spr->gzt - ViewPos.Z) * iyscale); if (spr->renderflags & RF_XFLIP) { int right = (spr->pic->GetWidth() << FRACBITS) - 1; @@ -539,11 +453,11 @@ void R_DrawWallSprite(vissprite_t *spr) rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap, 0, 0); + DrawerContext::SetLight(usecolormap, 0, 0); else calclighting = true; @@ -553,7 +467,7 @@ void R_DrawWallSprite(vissprite_t *spr) { sprflipvert = true; iyscale = -iyscale; - dc_texturemid -= spr->pic->GetHeight(); + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - spr->pic->GetHeight()); } else { @@ -562,10 +476,9 @@ void R_DrawWallSprite(vissprite_t *spr) MaskedScaleY = (float)iyscale; - dc_x = x1; ESPSResult mode; - mode = R_SetPatchStyle (spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); + mode = DrawerContext::SetPatchStyle (spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -581,71 +494,74 @@ void R_DrawWallSprite(vissprite_t *spr) { int stop4; + int x = x1; + if (mode == DoDraw0) { // 1 column at a time - stop4 = dc_x; + stop4 = x; } else // DoDraw1 { // up to 4 columns at a time stop4 = x2 & ~3; } - while ((dc_x < stop4) && (dc_x & 3)) + while ((x < stop4) && (x & 3)) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); + DrawerContext::SetLight(usecolormap, rw_light, shade); } - if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(R_DrawMaskedColumn); - dc_x++; + if (!R_ClipSpriteColumnWithPortals(x, spr)) + R_WallSpriteColumn(x, DrawerContext::DrawMaskedColumn); + x++; } - while (dc_x < stop4) + while (x < stop4) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); + DrawerContext::SetLight(usecolormap, rw_light, shade); } - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) + DrawerContext::RtInitCols(nullptr); + for (int zz = 0; zz < 4; ++zz) { - if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(R_DrawMaskedColumnHoriz); - dc_x++; + if (!R_ClipSpriteColumnWithPortals(x + zz, spr)) + R_WallSpriteColumn(x + zz, DrawerContext::DrawMaskedColumnHoriz); } - rt_draw4cols(dc_x - 4); + DrawerContext::DrawRt4cols(x); + x += 4; } - while (dc_x < x2) + while (x < x2) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); + DrawerContext::SetLight(usecolormap, rw_light, shade); } - if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(R_DrawMaskedColumn); - dc_x++; + if (!R_ClipSpriteColumnWithPortals(x, spr)) + R_WallSpriteColumn(x, DrawerContext::DrawMaskedColumn); + x++; } } - R_FinishSetPatchStyle(); + DrawerContext::FinishSetPatchStyle(); } -void R_WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)) +void R_WallSpriteColumn (int x, void (*drawfunc)(int x, const BYTE *column, const FTexture::Span *spans)) { - float iscale = swall[dc_x] * MaskedScaleY; - dc_iscale = FLOAT2FIXED(iscale); + float iscale = swall[x] * MaskedScaleY; + DrawerContext::SetTextureStep(FLOAT2FIXED(iscale)); spryscale = 1 / iscale; if (sprflipvert) - sprtopscreen = CenterY + dc_texturemid * spryscale; + sprtopscreen = CenterY + DrawerContext::TextureMid() * spryscale; else - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - DrawerContext::TextureMid() * spryscale; const BYTE *column; const FTexture::Span *spans; - column = WallSpriteTile->GetColumn (lwall[dc_x] >> FRACBITS, &spans); - dc_texturefrac = 0; - drawfunc (column, spans); + column = WallSpriteTile->GetColumn (lwall[x] >> FRACBITS, &spans); + DrawerContext::SetTextureFrac(0); + DrawerContext::SetMaskedColumnState(mfloorclip, mceilingclip, spryscale, sprtopscreen, sprflipvert); + drawfunc (x, column, spans); rw_light += rw_lightstep; } @@ -655,18 +571,18 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop int flags = 0; // Do setup for blending. - R_SetColorMapLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); - mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); + DrawerContext::SetLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); + mode = DrawerContext::SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) { return; } - if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) + if (DrawerContext::IsFuzzColumn() || DrawerContext::IsFillColumn()) { flags = DVF_OFFSCREEN | DVF_SPANSONLY; } - else if (colfunc != basecolfunc) + else if (!DrawerContext::IsBaseColumn()) { flags = DVF_OFFSCREEN; } @@ -692,32 +608,32 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop { if (!(flags & DVF_SPANSONLY) && (x & 3) == 0) { - rt_initcols(OffscreenColorBuffer + x * OffscreenBufferHeight); + DrawerContext::RtInitCols(OffscreenColorBuffer + x * OffscreenBufferHeight); } for (FCoverageBuffer::Span *span = OffscreenCoverageBuffer->Spans[x]; span != NULL; span = span->NextSpan) { if (flags & DVF_SPANSONLY) { - dc_x = x; - dc_yl = span->Start; - dc_yh = span->Stop - 1; - dc_count = span->Stop - span->Start; - dc_dest = (ylookup[span->Start] + x) * pixelsize + dc_destorg; - colfunc(); + DrawerContext::SetX(x); + DrawerContext::SetY1(span->Start); + DrawerContext::SetY2(span->Stop - 1); + DrawerContext::SetDrawCount(span->Stop - span->Start); + DrawerContext::SetDest(x, span->Start); + DrawerContext::DrawColumn(); } else { - rt_span_coverage(x, span->Start, span->Stop - 1); + DrawerContext::RtSpanCoverage(x, span->Start, span->Stop - 1); } } if (!(flags & DVF_SPANSONLY) && (x & 3) == 3) { - rt_draw4cols(x - 3); + DrawerContext::DrawRt4cols(x - 3); } } } - R_FinishSetPatchStyle(); + DrawerContext::FinishSetPatchStyle(); NetUpdate(); } @@ -2585,7 +2501,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, } } -static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) +void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) { const int x1 = vis->x1; const int x2 = vis->x2; @@ -2610,120 +2526,24 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) } } -void R_DrawParticle_C (vissprite_t *vis) +void R_DrawParticle(vissprite_t *vis) { - DWORD *bg2rgb; - int spacing; - BYTE *dest; - DWORD fg; BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; - int ycount = vis->y2 - yl + 1; - int x1 = vis->x1; - int countbase = vis->x2 - x1; - - R_DrawMaskedSegsBehindParticle (vis); - - // vis->renderflags holds translucency level (0-255) - { - fixed_t fglevel, bglevel; - DWORD *fg2rgb; - - fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - bglevel = FRACUNIT-fglevel; - fg2rgb = Col2RGB8[fglevel>>10]; - bg2rgb = Col2RGB8[bglevel>>10]; - fg = fg2rgb[color]; - } - - /* - - spacing = RenderTarget->GetPitch() - countbase; - dest = ylookup[yl] + x1 + dc_destorg; - - do - { - int count = countbase; - do - { - DWORD bg = bg2rgb[*dest]; - bg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[bg & (bg>>15)]; - } while (--count); - dest += spacing; - } while (--ycount);*/ - - // original was row-wise - // width = countbase - // height = ycount - - spacing = RenderTarget->GetPitch(); - - for (int x = x1; x < (x1+countbase); x++) - { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) - continue; - dest = ylookup[yl] + x + dc_destorg; - for (int y = 0; y < ycount; y++) - { - DWORD bg = bg2rgb[*dest]; - bg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg>>15)]; - dest += spacing; - } - } -} - -void R_DrawParticle_rgba(vissprite_t *vis) -{ - int spacing; - uint32_t *dest; - BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac]; - int yl = vis->y1; - int ycount = vis->y2 - yl + 1; + int yh = vis->y2; int x1 = vis->x1; int countbase = vis->x2 - x1; R_DrawMaskedSegsBehindParticle(vis); - - DrawerCommandQueue::WaitForWorkers(); - - uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; // vis->renderflags holds translucency level (0-255) - fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - uint32_t alpha = fglevel * 256 / FRACUNIT; - uint32_t inv_alpha = 256 - alpha; - - fg_red *= alpha; - fg_green *= alpha; - fg_blue *= alpha; - - spacing = RenderTarget->GetPitch(); + int alpha = vis->renderflags; for (int x = x1; x < (x1 + countbase); x++) { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) + if (R_ClipSpriteColumnWithPortals(x, vis)) continue; - dest = ylookup[yl] + x + (uint32_t*)dc_destorg; - for (int y = 0; y < ycount; y++) - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += spacing; - } + DrawerContext::FillTransColumn(x, yl, yh, color, alpha); } } @@ -2769,9 +2589,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, sprcosang = FLOAT2FIXED(dasprang.Cos()) >> 2; sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2; - R_SetupDrawSlab(colormap); - - int pixelsize = r_swtruecolor ? 4 : 1; + DrawerContext::SetSlabLight(colormap); // Select mip level i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang)); @@ -3026,25 +2844,25 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, if (!(flags & DVF_OFFSCREEN)) { // Draw directly to the screen. - R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, (ylookup[z1] + lxt + xxl) * pixelsize + dc_destorg); + DrawerContext::DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, lxt + xxl, z1); } else { // Record the area covered and possibly draw to an offscreen buffer. - dc_yl = z1; - dc_yh = z2 - 1; - dc_count = z2 - z1; - dc_iscale = yinc; + DrawerContext::SetY1(z1); + DrawerContext::SetY2(z2 - 1); + DrawerContext::SetDrawCount(z2 - z1); + DrawerContext::SetTextureStep(yinc); for (int x = xxl; x < xxr; ++x) { OffscreenCoverageBuffer->InsertSpan(lxt + x, z1, z2); if (!(flags & DVF_SPANSONLY)) { - dc_x = lxt + x; - rt_initcols(OffscreenColorBuffer + (dc_x & ~3) * OffscreenBufferHeight); - dc_source = col; - dc_texturefrac = yplc[xxl]; - hcolfunc_pre(); + DrawerContext::RtInitCols(OffscreenColorBuffer + ((lxt + x) & ~3) * OffscreenBufferHeight); + DrawerContext::SetX(lxt + x); + DrawerContext::SetSource(col); + DrawerContext::SetTextureFrac(yplc[xxl]); + DrawerContext::DrawHColumnPre(); } } } diff --git a/src/r_things.h b/src/r_things.h index f5cd30e003..869de4da25 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -96,11 +96,8 @@ struct vissprite_t struct particle_t; -extern void(*R_DrawParticle)(vissprite_t *); -void R_DrawParticle_C (vissprite_t *); -void R_DrawParticle_rgba (vissprite_t *); - void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); +void R_DrawParticle (vissprite_t *vis); extern int MaxVisSprites; @@ -112,7 +109,6 @@ extern vissprite_t **vissprite_p; extern short zeroarray[MAXWIDTH]; extern short screenheightarray[MAXWIDTH]; -// vars for R_DrawMaskedColumn extern short* mfloorclip; extern short* mceilingclip; extern double spryscale; @@ -126,8 +122,7 @@ extern double pspriteyscale; extern FTexture *WallSpriteTile; -void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *spans); -void R_WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)); +void R_WallSpriteColumn (int x, void (*drawfunc)(int x, const BYTE *column, const FTexture::Span *spans)); void R_CacheSprite (spritedef_t *sprite); void R_SortVisSprites (int (*compare)(const void *, const void *), size_t first); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 6a8dad0477..21cbd1a335 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -43,8 +43,7 @@ #include "r_defs.h" #include "r_utility.h" #ifndef NO_SWRENDER -#include "r_draw.h" -#include "r_draw_rgba.h" +#include "r_drawer_context.h" #include "r_main.h" #include "r_things.h" #endif @@ -130,12 +129,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; const BYTE *translation = NULL; - if (r_swtruecolor != IsBgra()) + DCanvas *destorgsave = DrawerContext::Canvas(); + if (screen->GetBuffer() == NULL) { - r_swtruecolor = IsBgra(); - R_InitColumnDrawers(); + I_FatalError("Attempt to write to buffer of hardware canvas"); } + DrawerContext::SetCanvas(screen); + if (parms.masked) { spanptr = &spans; @@ -172,22 +173,15 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { - R_SetTranslationMap((lighttable_t *)translation); + DrawerContext::SetTranslationMap((lighttable_t *)translation); } else { - R_SetTranslationMap(identitymap); + DrawerContext::SetTranslationMap(nullptr); } - fixedcolormap = dc_fcolormap; - ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); - - BYTE *destorgsave = dc_destorg; - dc_destorg = screen->GetBuffer(); - if (dc_destorg == NULL) - { - I_FatalError("Attempt to write to buffer of hardware canvas"); - } + fixedcolormap = DrawerContext::LightColormap(); + ESPSResult mode = DrawerContext::SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); double x0 = parms.x - parms.left * parms.destwidth / parms.texwidth; double y0 = parms.y - parms.top * parms.destheight / parms.texheight; @@ -220,11 +214,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) assert(spryscale > 0); sprflipvert = false; - //dc_iscale = FLOAT2FIXED(iyscale); - //dc_texturemid = (-y0) * iyscale; - //dc_iscale = 0xffffffffu / (unsigned)spryscale; - dc_iscale = FLOAT2FIXED(1 / spryscale); - dc_texturemid = (CenterY - 1 - sprtopscreen) * dc_iscale / 65536; + //DrawerContext::SetTextureStep(FLOAT2FIXED(iyscale)); + //DrawerContext::SetTextureMid((-y0) * iyscale); + //DrawerContext::SetTextureStep(0xffffffffu / (unsigned)spryscale); + DrawerContext::SetTextureStep(FLOAT2FIXED(1 / spryscale)); + DrawerContext::SetTextureMid((CenterY - 1 - sprtopscreen) * DrawerContext::TextureStep() / 65536); fixed_t frac = 0; double xiscale = img->GetWidth() / parms.destwidth; double x2 = x0 + parms.destwidth; @@ -278,14 +272,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) mode = DoDraw0; } - dc_x = int(x0); + int x = int(x0); int x2_i = int(x2); fixed_t xiscale_i = FLOAT2FIXED(xiscale); if (mode == DoDraw0) { // One column at a time - stop4 = dc_x; + stop4 = x; } else // DoDraw1` { @@ -293,42 +287,44 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) stop4 = x2_i & ~3; } - if (dc_x < x2_i) + if (x < x2_i) { - while ((dc_x < stop4) && (dc_x & 3)) + DrawerContext::SetMaskedColumnState(mfloorclip, mceilingclip, spryscale, sprtopscreen, sprflipvert); + + while ((x < stop4) && (x & 3)) { pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumn(pixels, spans); - dc_x++; + DrawerContext::DrawMaskedColumn(x, pixels, spans); + x++; frac += xiscale_i; } - while (dc_x < stop4) + while (x < stop4) { - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) + DrawerContext::RtInitCols(nullptr); + for (int zz = 0; zz < 4; ++zz) { pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumnHoriz(pixels, spans); - dc_x++; + DrawerContext::DrawMaskedColumnHoriz(x + zz, pixels, spans); frac += xiscale_i; } - rt_draw4cols(dc_x - 4); + DrawerContext::DrawRt4cols(x); + x += 4; } - while (dc_x < x2_i) + while (x < x2_i) { pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumn(pixels, spans); - dc_x++; + DrawerContext::DrawMaskedColumn(x, pixels, spans); + x++; frac += xiscale_i; } } CenterY = centeryback; } - R_FinishSetPatchStyle (); + DrawerContext::FinishSetPatchStyle (); - dc_destorg = destorgsave; + DrawerContext::SetCanvas(destorgsave); if (ticdup != 0 && menuactive == MENU_Off) { @@ -1024,9 +1020,11 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) if (IsBgra()) { + int inv_level = 64 - level; + uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index_simple(basecolor, calc_light_multiplier(0)); + uint32_t fg = GPalette.BaseColors[basecolor].d; uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1035,9 +1033,9 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) uint32_t bg_green = (*spot >> 8) & 0xff; uint32_t bg_blue = (*spot) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * level + bg_red * inv_level + 1) / 64; + uint32_t green = (fg_green * level + bg_green * inv_level + 1) / 64; + uint32_t blue = (fg_blue * level + bg_blue * inv_level + 1) / 64; *spot = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -1399,16 +1397,15 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, sinrot = sin(rotation.Radians()); // Setup constant texture mapping parameters. - R_SetupSpanBits(tex); if (colormap) - R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); + DrawerContext::SetSpanLight(colormap, 0, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else - R_SetSpanColormap(&identitycolormap, 0); - R_SetSpanSource(r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels()); - scalex = double(1u << (32 - ds_xbits)) / scalex; - scaley = double(1u << (32 - ds_ybits)) / scaley; - ds_xstep = xs_RoundToInt(cosrot * scalex); - ds_ystep = xs_RoundToInt(sinrot * scaley); + DrawerContext::SetSpanLight(nullptr, 0, 0); + DrawerContext::SetSpanSource(tex); + scalex = double(1u << (32 - DrawerContext::SpanXBits())) / scalex; + scaley = double(1u << (32 - DrawerContext::SpanYBits())) / scaley; + DrawerContext::SetSpanXStep(xs_RoundToInt(cosrot * scalex)); + DrawerContext::SetSpanYStep(xs_RoundToInt(sinrot * scaley)); // Travel down the right edge and create an outline of that edge. pt1 = toppt; @@ -1472,9 +1469,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, #if 0 memset(this->Buffer + y * this->Pitch + x1, (int)tex, x2 - x1); #else - ds_y = y; - ds_x1 = x1; - ds_x2 = x2 - 1; + DrawerContext::SetSpanY(y); + DrawerContext::SetSpanX1(x1); + DrawerContext::SetSpanX2(x2 - 1); DVector2 tex(x1 - originx, y - originy); if (dorotate) @@ -1483,10 +1480,10 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, tex.X = t * cosrot - tex.Y * sinrot; tex.Y = tex.Y * cosrot + t * sinrot; } - ds_xfrac = xs_RoundToInt(tex.X * scalex); - ds_yfrac = xs_RoundToInt(tex.Y * scaley); + DrawerContext::SetSpanXFrac(xs_RoundToInt(tex.X * scalex)); + DrawerContext::SetSpanYFrac(xs_RoundToInt(tex.Y * scaley)); - R_DrawSpan(); + DrawerContext::DrawSimplePolySpan(); #endif } x += xinc; From 70dbde4f78a99f167c3dcfc41491cb3f5cfc6cc8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 07:38:05 +0200 Subject: [PATCH 047/912] Added r_multithreaded CVAR and fixed some broken colors --- src/r_draw_rgba.cpp | 2057 +++++++++++++++++++++--------------------- src/r_draw_rgba.h | 8 +- src/r_drawt_rgba.cpp | 540 +++++------ 3 files changed, 1314 insertions(+), 1291 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 420b63dff7..3e4bf241aa 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,6 +58,8 @@ extern float rw_light; extern float rw_lightstep; extern int wallshade; +CVAR(Bool, r_multithreaded, true, 0) + ///////////////////////////////////////////////////////////////////////////// DrawerCommandQueue *DrawerCommandQueue::Instance() @@ -229,28 +231,28 @@ void DrawerCommandQueue::StopThreads() class DrawColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_texturefrac; - DWORD dc_iscale; - fixed_t dc_light; - const BYTE *dc_source; - int dc_pitch; - ShadeConstants dc_shade_constants; - BYTE *dc_colormap; + int _count; + BYTE *_dest; + DWORD _texturefrac; + DWORD _iscale; + fixed_t _light; + const BYTE *_source; + int _pitch; + ShadeConstants _shade_constants; + BYTE *_colormap; public: DrawColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_texturefrac = ::dc_texturefrac; - dc_iscale = ::dc_iscale; - dc_light = ::dc_light; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_colormap = ::dc_colormap; + _count = dc_count; + _dest = dc_dest; + _texturefrac = dc_texturefrac; + _iscale = dc_iscale; + _light = dc_light; + _source = dc_source; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -260,28 +262,28 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); // Zero length, column does not exceed a pixel. if (count <= 0) return; // Framebuffer destination address. - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; // Determine scaling, // which is the only mapping to be done. - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); // [RH] Get local copies of these variables so that the compiler // has a better chance of optimizing this well. - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - BYTE *colormap = dc_colormap; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + BYTE *colormap = _colormap; do { @@ -296,20 +298,20 @@ public: class FillColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - fixed_t dc_light; - int dc_pitch; - int dc_color; + int _count; + BYTE *_dest; + fixed_t _light; + int _pitch; + int _color; public: FillColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_light = ::dc_light; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _light = dc_light; + _pitch = dc_pitch; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -317,18 +319,18 @@ public: int count; uint32_t* dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); { - int pitch = dc_pitch * thread->num_cores; - uint32_t color = shade_pal_index_simple(dc_color, light); + int pitch = _pitch * thread->num_cores; + uint32_t color = shade_pal_index_simple(_color, light); do { @@ -341,20 +343,20 @@ public: class FillAddColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - fixed_t dc_light; - int dc_color; + int _count; + BYTE *_dest; + int _pitch; + fixed_t _light; + int _color; public: FillAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _light = dc_light; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -362,14 +364,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -392,20 +394,20 @@ public: class FillAddClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - fixed_t dc_light; - int dc_color; + int _count; + BYTE *_dest; + int _pitch; + fixed_t _light; + int _color; public: FillAddClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _light = dc_light; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -413,14 +415,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -443,20 +445,20 @@ public: class FillSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - int dc_color; - fixed_t dc_light; + int _count; + BYTE *_dest; + int _pitch; + int _color; + fixed_t _light; public: FillSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -464,14 +466,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -494,20 +496,20 @@ public: class FillRevSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - int dc_color; - fixed_t dc_light; + int _count; + BYTE *_dest; + int _pitch; + int _color; + fixed_t _light; public: FillRevSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -515,14 +517,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -545,22 +547,22 @@ public: class DrawFuzzColumnRGBACommand : public DrawerCommand { - int dc_x; - int dc_yl; - int dc_yh; - BYTE *dc_destorg; - int dc_pitch; + int _x; + int _yl; + int _yh; + BYTE *_destorg; + int _pitch; int fuzzpos; int fuzzviewheight; public: DrawFuzzColumnRGBACommand() { - dc_x = ::dc_x; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _x = dc_x; + _yl = dc_yl; + _yh = dc_yh; + _destorg = dc_destorg; + _pitch = dc_pitch; fuzzpos = ::fuzzpos; fuzzviewheight = ::fuzzviewheight; } @@ -571,24 +573,24 @@ public: uint32_t *dest; // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; + if (_yl == 0) + _yl = 1; // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; + if (_yh > fuzzviewheight) + _yh = fuzzviewheight; - count = thread->count_for_thread(dc_yl, dc_yh - dc_yl + 1); + count = thread->count_for_thread(_yl, _yh - _yl + 1); // Zero length. if (count <= 0) return; - dest = thread->dest_for_thread(dc_yl, dc_pitch, ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(_yl, _pitch, ylookup[_yl] + _x + (uint32_t*)_destorg); - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; - int fuzz = (fuzzpos + thread->skipped_by_thread(dc_yl)) % FUZZTABLE; + int fuzz = (fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; while (count > 0) { @@ -622,32 +624,32 @@ public: class DrawAddColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_colormap; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_colormap; public: DrawAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_colormap = ::dc_colormap; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -657,25 +659,25 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -703,28 +705,28 @@ public: class DrawTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; + int _count; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; public: DrawTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; + _count = dc_count; + _light = dc_light; + _shade_constants = dc_shade_constants; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -734,23 +736,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; do { @@ -764,28 +766,32 @@ public: class DrawTlatedAddColumnRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; + int _count; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawTlatedAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; + _count = dc_count; + _light = dc_light; + _shade_constants = dc_shade_constants; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -795,25 +801,25 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -842,28 +848,28 @@ public: class DrawShadedColumnRGBACommand : public DrawerCommand { private: - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - fixed_t dc_light; - const BYTE *dc_source; - lighttable_t *dc_colormap; - int dc_color; - int dc_pitch; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + fixed_t _light; + const BYTE *_source; + lighttable_t *_colormap; + int _color; + int _pitch; public: DrawShadedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_light = ::dc_light; - dc_source = ::dc_source; - dc_colormap = ::dc_colormap; - dc_color = ::dc_color; - dc_pitch = ::dc_pitch; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _light = dc_light; + _source = dc_source; + _colormap = dc_colormap; + _color = dc_color; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -872,25 +878,25 @@ public: uint32_t *dest; fixed_t frac, fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch * thread->num_cores; + const BYTE *source = _source; + BYTE *colormap = _colormap; + int pitch = _pitch * thread->num_cores; do { @@ -915,30 +921,30 @@ public: class DrawAddClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawAddClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -948,23 +954,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -991,32 +997,32 @@ public: class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawAddClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1026,24 +1032,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1070,30 +1076,30 @@ public: class DrawSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1103,23 +1109,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1146,32 +1152,32 @@ public: class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_translation; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_translation; public: DrawSubClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_translation = ::dc_translation; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _translation = dc_translation; } void Execute(DrawerThread *thread) override @@ -1181,24 +1187,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1225,30 +1231,30 @@ public: class DrawRevSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawRevSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1258,22 +1264,22 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1300,32 +1306,32 @@ public: class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_translation; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_translation; public: DrawRevSubClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_translation = ::dc_translation; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _translation = dc_translation; } void Execute(DrawerThread *thread) override @@ -1335,24 +1341,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1379,42 +1385,42 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_x1; - int ds_x2; - int ds_y; - int ds_xbits; - int ds_ybits; - BYTE *dc_destorg; - fixed_t ds_light; - ShadeConstants ds_shade_constants; + const uint32_t *_source; + fixed_t _xfrac; + fixed_t _yfrac; + fixed_t _xstep; + fixed_t _ystep; + int _x1; + int _x2; + int _y; + int _xbits; + int _ybits; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; public: DrawSpanRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; + _source = (const uint32_t*)ds_source; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _xstep = ds_xstep; + _ystep = ds_ystep; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xbits = ds_xbits; + _ybits = ds_ybits; + _destorg = dc_destorg; + _light = ds_light; + _shade_constants = ds_shade_constants; } #ifdef NO_SSE void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1422,24 +1428,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1458,9 +1464,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { @@ -1479,7 +1485,7 @@ public: #else void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1487,24 +1493,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1604,9 +1610,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; int sse_count = count / 4; count -= sse_count * 4; @@ -1700,42 +1706,42 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; public: DrawSpanMaskedRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1743,24 +1749,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -1780,9 +1786,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -1803,42 +1809,46 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanTranslucentRGBACommand() { - ds_source = (const uint32_t *)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t *)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1846,27 +1856,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -1894,9 +1904,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); @@ -1925,42 +1935,46 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanMaskedTranslucentRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1968,27 +1982,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2021,9 +2035,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -2057,42 +2071,46 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanAddClampRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2100,27 +2118,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2148,9 +2166,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); @@ -2179,42 +2197,46 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanMaskedAddClampRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2222,27 +2244,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2275,9 +2297,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -2311,33 +2333,33 @@ public: class FillSpanRGBACommand : public DrawerCommand { - int ds_x1; - int ds_x2; - int ds_y; - BYTE *dc_destorg; - fixed_t ds_light; - int ds_color; + int _x1; + int _x2; + int _y; + BYTE *_destorg; + fixed_t _light; + int _color; public: FillSpanRGBACommand() { - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_color = ::ds_color; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _destorg = dc_destorg; + _light = ds_light; + _color = ds_color; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; - uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - int count = (ds_x2 - ds_x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); + uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + int count = (_x2 - _x1 + 1); + uint32_t light = calc_light_multiplier(_light); + uint32_t color = shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -2345,45 +2367,45 @@ public: class Vlinec1RGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int vlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; public: Vlinec1RGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; vlinebits = ::vlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; do { @@ -2396,10 +2418,10 @@ public: class Vlinec4RGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; int vlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2409,10 +2431,10 @@ class Vlinec4RGBACommand : public DrawerCommand public: Vlinec4RGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; vlinebits = ::vlinebits; for (int i = 0; i < 4; i++) { @@ -2426,12 +2448,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = vlinebits; DWORD place; @@ -2440,11 +2462,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2463,24 +2485,24 @@ public: #else void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2545,45 +2567,45 @@ public: class Mvlinec1RGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int mvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; public: Mvlinec1RGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; mvlinebits = ::mvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = mvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; do { @@ -2600,10 +2622,10 @@ public: class Mvlinec4RGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; int mvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2613,10 +2635,10 @@ class Mvlinec4RGBACommand : public DrawerCommand public: Mvlinec4RGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; mvlinebits = ::mvlinebits; for (int i = 0; i < 4; i++) { @@ -2630,12 +2652,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = mvlinebits; DWORD place; @@ -2644,11 +2666,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2668,12 +2690,12 @@ public: #else void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = mvlinebits; uint32_t light0 = calc_light_multiplier(palookuplight[0]); @@ -2681,11 +2703,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2756,52 +2778,52 @@ public: class Tmvline1AddRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1AddRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -2831,12 +2853,12 @@ public: class Tmvline4AddRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2846,12 +2868,12 @@ class Tmvline4AddRGBACommand : public DrawerCommand public: Tmvline4AddRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -2864,12 +2886,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2878,14 +2900,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2923,52 +2945,52 @@ public: class Tmvline1AddClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1AddClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -2998,12 +3020,12 @@ public: class Tmvline4AddClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3013,12 +3035,12 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand public: Tmvline4AddClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3031,12 +3053,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3045,14 +3067,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3090,52 +3112,52 @@ public: class Tmvline1SubClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1SubClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -3165,12 +3187,12 @@ public: class Tmvline4SubClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3180,12 +3202,12 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand public: Tmvline4SubClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3198,12 +3220,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3212,14 +3234,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3257,52 +3279,52 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1RevSubClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -3332,12 +3354,12 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3347,12 +3369,12 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand public: Tmvline4RevSubClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3365,12 +3387,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3379,14 +3401,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3427,9 +3449,9 @@ class DrawFogBoundaryLineRGBACommand : public DrawerCommand int _y; int _x; int _x2; - BYTE *dc_destorg; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; public: DrawFogBoundaryLineRGBACommand(int y, int x, int x2) @@ -3438,9 +3460,9 @@ public: _x = x; _x2 = x2; - dc_destorg = ::dc_destorg; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -3452,10 +3474,10 @@ public: int x = _x; int x2 = _x2; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; + uint32_t *dest = ylookup[y] + (uint32_t*)_destorg; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants constants = _shade_constants; do { @@ -3499,10 +3521,10 @@ class DrawTiltedSpanRGBACommand : public DrawerCommand int _y; int _x1; int _x2; - BYTE *dc_destorg; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - const BYTE *ds_source; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; + const BYTE *_source; public: DrawTiltedSpanRGBACommand(int y, int x1, int x2) @@ -3511,8 +3533,8 @@ public: _x1 = x1; _x2 = x2; - dc_destorg = ::dc_destorg; - ds_source = ::ds_source; + _destorg = dc_destorg; + _source = ds_source; } void Execute(DrawerThread *thread) override @@ -3527,8 +3549,8 @@ public: // Slopes are broken currently in master. // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. - uint32_t *source = (uint32_t*)ds_source; - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + uint32_t *source = (uint32_t*)_source; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; int count = x2 - x1 + 1; while (count > 0) @@ -3544,9 +3566,9 @@ class DrawColoredSpanRGBACommand : public DrawerCommand int _y; int _x1; int _x2; - BYTE *dc_destorg; - fixed_t ds_light; - int ds_color; + BYTE *_destorg; + fixed_t _light; + int _color; public: DrawColoredSpanRGBACommand(int y, int x1, int x2) @@ -3555,9 +3577,9 @@ public: _x1 = x1; _x2 = x2; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_color = ::ds_color; + _destorg = dc_destorg; + _light = ds_light; + _color = ds_color; } void Execute(DrawerThread *thread) override @@ -3569,10 +3591,10 @@ public: int x1 = _x1; int x2 = _x2; - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; int count = (x2 - x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); + uint32_t light = calc_light_multiplier(_light); + uint32_t color = shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -3585,10 +3607,9 @@ class FillTransColumnRGBACommand : public DrawerCommand int _y2; int _color; int _a; - BYTE *dc_destorg; - int dc_pitch; - fixed_t ds_light; - int ds_color; + BYTE *_destorg; + int _pitch; + fixed_t _light; public: FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) @@ -3599,8 +3620,8 @@ public: _color = color; _a = a; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _destorg = dc_destorg; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -3627,8 +3648,8 @@ public: fg_green *= alpha; fg_blue *= alpha; - int spacing = dc_pitch * thread->num_cores; - uint32_t *dest = thread->dest_for_thread(y1, dc_pitch, ylookup[y1] + x + (uint32_t*)dc_destorg); + int spacing = _pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(y1, _pitch, ylookup[y1] + x + (uint32_t*)_destorg); for (int y = 0; y < ycount; y++) { diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index a91b54d744..6e35de9ffc 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -178,17 +178,19 @@ public: class DrawerCommand { protected: - int dc_dest_y; + int _dest_y; public: DrawerCommand() { - dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + _dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); } virtual void Execute(DrawerThread *thread) = 0; }; +EXTERN_CVAR(Bool, r_multithreaded) + // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue { @@ -232,7 +234,7 @@ public: static void QueueCommand(Types &&... args) { auto queue = Instance(); - if (queue->threaded_render == 0) + if (queue->threaded_render == 0 || !r_multithreaded) { T command(std::forward(args)...); command.Execute(&queue->single_core_thread); diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 4b6605b4aa..0eabc48d89 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -61,8 +61,8 @@ class RtCopy1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; + BYTE *_destorg; + int _pitch; public: RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) @@ -72,8 +72,8 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _destorg = dc_destorg; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -87,9 +87,9 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; if (count & 1) { @@ -123,11 +123,11 @@ class RtMap1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_destorg; - int dc_pitch; - BYTE *dc_colormap; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_destorg; + int _pitch; + BYTE *_colormap; public: RtMap1colRGBACommand(int hx, int sx, int yl, int yh) @@ -137,11 +137,11 @@ public: this->yl = yl; this->yh = yh; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_colormap = ::dc_colormap; + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -156,15 +156,15 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (count & 1) { *dest = shade_pal_index(colormap[*source], light, shade_constants); @@ -188,11 +188,11 @@ class RtMap4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_destorg; - int dc_pitch; - BYTE *colormap; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_destorg; + int _pitch; + BYTE *_colormap; public: RtMap4colsRGBACommand(int sx, int yl, int yh) @@ -201,11 +201,11 @@ public: this->yl = yl; this->yh = yh; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_colormap = ::dc_colormap; + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; } #ifdef NO_SSE @@ -221,15 +221,15 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (count & 1) { dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); @@ -268,16 +268,16 @@ public: if (count <= 0) return; - ShadeConstants shade_constants = dc_shade_constants; - uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = _shade_constants; + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (shade_constants.simple_shade) { @@ -509,13 +509,13 @@ class RtAdd1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_colormap; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_colormap; public: RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) @@ -525,13 +525,13 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_colormap = ::dc_colormap; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -546,17 +546,17 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(colormap[*source], light, shade_constants); @@ -585,13 +585,13 @@ class RtAdd4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_colormap; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_colormap; + fixed_t _srcalpha; + fixed_t _destalpha; public: RtAdd4colsRGBACommand(int sx, int yl, int yh) @@ -600,13 +600,13 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_colormap = ::dc_colormap; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } #ifdef NO_SSE @@ -622,17 +622,17 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -670,19 +670,19 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; if (shade_constants.simple_shade) { @@ -766,11 +766,11 @@ class RtShaded1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *dc_colormap; - BYTE *dc_destorg; - int dc_pitch; - int dc_color; - fixed_t dc_light; + lighttable_t *_colormap; + BYTE *_destorg; + int _pitch; + int _color; + fixed_t _light; public: RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) @@ -780,11 +780,11 @@ public: this->yl = yl; this->yh = yh; - dc_colormap = ::dc_colormap; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _colormap = dc_colormap; + _destorg = dc_destorg; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -800,13 +800,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -835,11 +835,11 @@ class RtShaded4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *dc_colormap; - int dc_color; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; + lighttable_t *_colormap; + int _color; + BYTE *_destorg; + int _pitch; + fixed_t _light; public: RtShaded4colsRGBACommand(int sx, int yl, int yh) @@ -848,11 +848,11 @@ public: this->yl = yl; this->yh = yh; - dc_colormap = ::dc_colormap; - dc_color = ::dc_color; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; + _colormap = dc_colormap; + _color = dc_color; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; } #ifdef NO_SSE @@ -869,13 +869,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -914,13 +914,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -959,12 +959,12 @@ class RtAddClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -974,12 +974,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -994,16 +994,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1031,12 +1031,12 @@ class RtAddClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtAddClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1045,12 +1045,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } #ifdef NO_SSE @@ -1066,16 +1066,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1112,18 +1112,18 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; if (shade_constants.simple_shade) { @@ -1207,12 +1207,12 @@ class RtSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -1222,12 +1222,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1242,16 +1242,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1279,12 +1279,12 @@ class RtSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtSubClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1293,12 +1293,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1313,16 +1313,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1355,12 +1355,12 @@ class RtRevSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -1370,12 +1370,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1390,16 +1390,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1427,12 +1427,12 @@ class RtRevSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1441,12 +1441,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1461,16 +1461,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1515,29 +1515,29 @@ public: class DrawColumnHorizRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_iscale; - fixed_t dc_texturefrac; - const BYTE *dc_source; - int dc_x; - int dc_yl; - int dc_yh; + int _count; + fixed_t _iscale; + fixed_t _texturefrac; + const BYTE *_source; + int _x; + int _yl; + int _yh; public: DrawColumnHorizRGBACommand() { - dc_count = ::dc_count; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_x = ::dc_x; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; + _count = dc_count; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _x = dc_x; + _yl = dc_yl; + _yh = dc_yh; } void Execute(DrawerThread *thread) override { - int count = dc_count; + int count = _count; uint32_t *dest; fixed_t fracstep; fixed_t frac; @@ -1546,13 +1546,13 @@ public: return; { - int x = dc_x & 3; - dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + int x = _x & 3; + dest = &thread->dc_temp_rgba[x + 4 * _yl]; } - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = _iscale; + frac = _texturefrac; - const BYTE *source = dc_source; + const BYTE *source = _source; if (count & 1) { *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; @@ -1589,34 +1589,34 @@ public: class FillColumnHorizRGBACommand : public DrawerCommand { - int dc_x; - int dc_yl; - int dc_yh; - int dc_count; - int dc_color; + int _x; + int _yl; + int _yh; + int _count; + int _color; public: FillColumnHorizRGBACommand() { - dc_x = ::dc_x; - dc_count = ::dc_count; - dc_color = ::dc_color; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; + _x = dc_x; + _count = dc_count; + _color = dc_color; + _yl = dc_yl; + _yh = dc_yh; } void Execute(DrawerThread *thread) override { - int count = dc_count; - int color = dc_color; + int count = _count; + int color = _color; uint32_t *dest; if (count <= 0) return; { - int x = dc_x & 3; - dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + int x = _x & 3; + dest = &thread->dc_temp_rgba[x + 4 * _yl]; } if (count & 1) { From fee8650357e87ee6fc19a622133ec95d9ca9f364 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 08:02:41 +0200 Subject: [PATCH 048/912] Add r_multithreaded and fix color issue --- src/r_draw_rgba.cpp | 2159 +++++++++++++++++++++++------------------- src/r_draw_rgba.h | 4 +- src/r_drawt_rgba.cpp | 540 +++++------ 3 files changed, 1439 insertions(+), 1264 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2062609b41..722fbb8cd7 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -56,6 +56,8 @@ extern float rw_light; extern float rw_lightstep; extern int wallshade; +CVAR(Bool, r_multithreaded, true, 0) + ///////////////////////////////////////////////////////////////////////////// DrawerCommandQueue *DrawerCommandQueue::Instance() @@ -227,28 +229,28 @@ void DrawerCommandQueue::StopThreads() class DrawColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_texturefrac; - DWORD dc_iscale; - fixed_t dc_light; - const BYTE *dc_source; - int dc_pitch; - ShadeConstants dc_shade_constants; - BYTE *dc_colormap; + int _count; + BYTE *_dest; + DWORD _texturefrac; + DWORD _iscale; + fixed_t _light; + const BYTE *_source; + int _pitch; + ShadeConstants _shade_constants; + BYTE *_colormap; public: DrawColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_texturefrac = ::dc_texturefrac; - dc_iscale = ::dc_iscale; - dc_light = ::dc_light; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_colormap = ::dc_colormap; + _count = dc_count; + _dest = dc_dest; + _texturefrac = dc_texturefrac; + _iscale = dc_iscale; + _light = dc_light; + _source = dc_source; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -258,28 +260,28 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); // Zero length, column does not exceed a pixel. if (count <= 0) return; // Framebuffer destination address. - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; // Determine scaling, // which is the only mapping to be done. - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); // [RH] Get local copies of these variables so that the compiler // has a better chance of optimizing this well. - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - BYTE *colormap = dc_colormap; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + BYTE *colormap = _colormap; do { @@ -294,20 +296,20 @@ public: class FillColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - fixed_t dc_light; - int dc_pitch; - int dc_color; + int _count; + BYTE *_dest; + fixed_t _light; + int _pitch; + int _color; public: FillColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_light = ::dc_light; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _light = dc_light; + _pitch = dc_pitch; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -315,18 +317,18 @@ public: int count; uint32_t* dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); { - int pitch = dc_pitch * thread->num_cores; - uint32_t color = shade_pal_index_simple(dc_color, light); + int pitch = _pitch * thread->num_cores; + uint32_t color = shade_pal_index_simple(_color, light); do { @@ -339,20 +341,20 @@ public: class FillAddColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - fixed_t dc_light; - int dc_color; + int _count; + BYTE *_dest; + int _pitch; + fixed_t _light; + int _color; public: FillAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _light = dc_light; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -360,14 +362,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -390,20 +392,20 @@ public: class FillAddClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - fixed_t dc_light; - int dc_color; + int _count; + BYTE *_dest; + int _pitch; + fixed_t _light; + int _color; public: FillAddClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _light = dc_light; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -411,14 +413,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -441,20 +443,20 @@ public: class FillSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - int dc_color; - fixed_t dc_light; + int _count; + BYTE *_dest; + int _pitch; + int _color; + fixed_t _light; public: FillSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -462,14 +464,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -492,20 +494,20 @@ public: class FillRevSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - int dc_color; - fixed_t dc_light; + int _count; + BYTE *_dest; + int _pitch; + int _color; + fixed_t _light; public: FillRevSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -513,14 +515,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -543,22 +545,22 @@ public: class DrawFuzzColumnRGBACommand : public DrawerCommand { - int dc_x; - int dc_yl; - int dc_yh; - BYTE *dc_destorg; - int dc_pitch; + int _x; + int _yl; + int _yh; + BYTE *_destorg; + int _pitch; int fuzzpos; int fuzzviewheight; public: DrawFuzzColumnRGBACommand() { - dc_x = ::dc_x; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _x = dc_x; + _yl = dc_yl; + _yh = dc_yh; + _destorg = dc_destorg; + _pitch = dc_pitch; fuzzpos = ::fuzzpos; fuzzviewheight = ::fuzzviewheight; } @@ -569,24 +571,24 @@ public: uint32_t *dest; // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; + if (_yl == 0) + _yl = 1; // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; + if (_yh > fuzzviewheight) + _yh = fuzzviewheight; - count = thread->count_for_thread(dc_yl, dc_yh - dc_yl + 1); + count = thread->count_for_thread(_yl, _yh - _yl + 1); // Zero length. if (count <= 0) return; - dest = thread->dest_for_thread(dc_yl, dc_pitch, ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(_yl, _pitch, ylookup[_yl] + _x + (uint32_t*)_destorg); - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; - int fuzz = (fuzzpos + thread->skipped_by_thread(dc_yl)) % FUZZTABLE; + int fuzz = (fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; while (count > 0) { @@ -620,32 +622,32 @@ public: class DrawAddColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_colormap; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_colormap; public: DrawAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_colormap = ::dc_colormap; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -655,25 +657,25 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -701,28 +703,28 @@ public: class DrawTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; + int _count; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; public: DrawTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; + _count = dc_count; + _light = dc_light; + _shade_constants = dc_shade_constants; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -732,23 +734,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; do { @@ -762,28 +764,32 @@ public: class DrawTlatedAddColumnRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; + int _count; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawTlatedAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; + _count = dc_count; + _light = dc_light; + _shade_constants = dc_shade_constants; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -793,25 +799,25 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -840,28 +846,28 @@ public: class DrawShadedColumnRGBACommand : public DrawerCommand { private: - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - fixed_t dc_light; - const BYTE *dc_source; - lighttable_t *dc_colormap; - int dc_color; - int dc_pitch; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + fixed_t _light; + const BYTE *_source; + lighttable_t *_colormap; + int _color; + int _pitch; public: DrawShadedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_light = ::dc_light; - dc_source = ::dc_source; - dc_colormap = ::dc_colormap; - dc_color = ::dc_color; - dc_pitch = ::dc_pitch; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _light = dc_light; + _source = dc_source; + _colormap = dc_colormap; + _color = dc_color; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -870,25 +876,25 @@ public: uint32_t *dest; fixed_t frac, fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch * thread->num_cores; + const BYTE *source = _source; + BYTE *colormap = _colormap; + int pitch = _pitch * thread->num_cores; do { @@ -913,30 +919,30 @@ public: class DrawAddClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawAddClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -946,23 +952,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -989,32 +995,32 @@ public: class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawAddClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1024,24 +1030,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1068,30 +1074,30 @@ public: class DrawSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1101,23 +1107,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1144,32 +1150,32 @@ public: class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_translation; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_translation; public: DrawSubClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_translation = ::dc_translation; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _translation = dc_translation; } void Execute(DrawerThread *thread) override @@ -1179,24 +1185,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1223,30 +1229,30 @@ public: class DrawRevSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawRevSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1256,22 +1262,22 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1298,32 +1304,32 @@ public: class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_translation; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_translation; public: DrawRevSubClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_translation = ::dc_translation; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _translation = dc_translation; } void Execute(DrawerThread *thread) override @@ -1333,24 +1339,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1377,42 +1383,42 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_x1; - int ds_x2; - int ds_y; - int ds_xbits; - int ds_ybits; - BYTE *dc_destorg; - fixed_t ds_light; - ShadeConstants ds_shade_constants; + const uint32_t *_source; + fixed_t _xfrac; + fixed_t _yfrac; + fixed_t _xstep; + fixed_t _ystep; + int _x1; + int _x2; + int _y; + int _xbits; + int _ybits; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; public: DrawSpanRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; + _source = (const uint32_t*)ds_source; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _xstep = ds_xstep; + _ystep = ds_ystep; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xbits = ds_xbits; + _ybits = ds_ybits; + _destorg = dc_destorg; + _light = ds_light; + _shade_constants = ds_shade_constants; } #ifdef NO_SSE void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1420,24 +1426,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1456,9 +1462,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { @@ -1477,7 +1483,7 @@ public: #else void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1485,24 +1491,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1602,9 +1608,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; int sse_count = count / 4; count -= sse_count * 4; @@ -1698,42 +1704,42 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; public: DrawSpanMaskedRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1741,24 +1747,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -1778,9 +1784,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -1801,42 +1807,46 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanTranslucentRGBACommand() { - ds_source = (const uint32_t *)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t *)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1844,27 +1854,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -1892,9 +1902,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); @@ -1923,42 +1933,46 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanMaskedTranslucentRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1966,27 +1980,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2019,9 +2033,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -2055,42 +2069,46 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanAddClampRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2098,27 +2116,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2146,9 +2164,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); @@ -2177,42 +2195,46 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanMaskedAddClampRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2220,27 +2242,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2273,9 +2295,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -2309,33 +2331,33 @@ public: class FillSpanRGBACommand : public DrawerCommand { - int ds_x1; - int ds_x2; - int ds_y; - BYTE *dc_destorg; - fixed_t ds_light; - int ds_color; + int _x1; + int _x2; + int _y; + BYTE *_destorg; + fixed_t _light; + int _color; public: FillSpanRGBACommand() { - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_color = ::ds_color; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _destorg = dc_destorg; + _light = ds_light; + _color = ds_color; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; - uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - int count = (ds_x2 - ds_x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); + uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + int count = (_x2 - _x1 + 1); + uint32_t light = calc_light_multiplier(_light); + uint32_t color = shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -2343,45 +2365,45 @@ public: class Vlinec1RGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int vlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; public: Vlinec1RGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; vlinebits = ::vlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; do { @@ -2394,10 +2416,10 @@ public: class Vlinec4RGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; int vlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2407,10 +2429,10 @@ class Vlinec4RGBACommand : public DrawerCommand public: Vlinec4RGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; vlinebits = ::vlinebits; for (int i = 0; i < 4; i++) { @@ -2424,12 +2446,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = vlinebits; DWORD place; @@ -2438,11 +2460,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2461,24 +2483,24 @@ public: #else void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2543,45 +2565,45 @@ public: class Mvlinec1RGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int mvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; public: Mvlinec1RGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; mvlinebits = ::mvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = mvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; do { @@ -2598,10 +2620,10 @@ public: class Mvlinec4RGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; int mvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2611,10 +2633,10 @@ class Mvlinec4RGBACommand : public DrawerCommand public: Mvlinec4RGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; mvlinebits = ::mvlinebits; for (int i = 0; i < 4; i++) { @@ -2628,12 +2650,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = mvlinebits; DWORD place; @@ -2642,11 +2664,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2666,12 +2688,12 @@ public: #else void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = mvlinebits; uint32_t light0 = calc_light_multiplier(palookuplight[0]); @@ -2679,11 +2701,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2754,52 +2776,52 @@ public: class Tmvline1AddRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1AddRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -2829,12 +2851,12 @@ public: class Tmvline4AddRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2844,12 +2866,12 @@ class Tmvline4AddRGBACommand : public DrawerCommand public: Tmvline4AddRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -2862,12 +2884,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2876,14 +2898,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2921,52 +2943,52 @@ public: class Tmvline1AddClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1AddClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -2996,12 +3018,12 @@ public: class Tmvline4AddClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3011,12 +3033,12 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand public: Tmvline4AddClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3029,12 +3051,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3043,14 +3065,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3088,52 +3110,52 @@ public: class Tmvline1SubClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1SubClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -3163,12 +3185,12 @@ public: class Tmvline4SubClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3178,12 +3200,12 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand public: Tmvline4SubClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3196,12 +3218,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3210,14 +3232,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3255,52 +3277,52 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1RevSubClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -3330,12 +3352,12 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3345,12 +3367,12 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand public: Tmvline4RevSubClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3363,12 +3385,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3377,14 +3399,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3425,9 +3447,9 @@ class DrawFogBoundaryLineRGBACommand : public DrawerCommand int _y; int _x; int _x2; - BYTE *dc_destorg; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; public: DrawFogBoundaryLineRGBACommand(int y, int x, int x2) @@ -3436,9 +3458,9 @@ public: _x = x; _x2 = x2; - dc_destorg = ::dc_destorg; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -3450,10 +3472,10 @@ public: int x = _x; int x2 = _x2; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; + uint32_t *dest = ylookup[y] + (uint32_t*)_destorg; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants constants = _shade_constants; do { @@ -3492,6 +3514,157 @@ public: } }; +class DrawTiltedSpanRGBACommand : public DrawerCommand +{ + int _y; + int _x1; + int _x2; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; + const BYTE *_source; + +public: + DrawTiltedSpanRGBACommand(int y, int x1, int x2) + { + _y = y; + _x1 = x1; + _x2 = x2; + + _destorg = dc_destorg; + _source = ds_source; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + int y = _y; + int x1 = _x1; + int x2 = _x2; + + // Slopes are broken currently in master. + // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. + + uint32_t *source = (uint32_t*)_source; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; + + int count = x2 - x1 + 1; + while (count > 0) + { + *(dest++) = source[0]; + count--; + } + } +}; + +class DrawColoredSpanRGBACommand : public DrawerCommand +{ + int _y; + int _x1; + int _x2; + BYTE *_destorg; + fixed_t _light; + int _color; + +public: + DrawColoredSpanRGBACommand(int y, int x1, int x2) + { + _y = y; + _x1 = x1; + _x2 = x2; + + _destorg = dc_destorg; + _light = ds_light; + _color = ds_color; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + int y = _y; + int x1 = _x1; + int x2 = _x2; + + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; + int count = (x2 - x1 + 1); + uint32_t light = calc_light_multiplier(_light); + uint32_t color = shade_pal_index_simple(_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; + } +}; + +class FillTransColumnRGBACommand : public DrawerCommand +{ + int _x; + int _y1; + int _y2; + int _color; + int _a; + BYTE *_destorg; + int _pitch; + fixed_t _light; + +public: + FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) + { + _x = x; + _y1 = y1; + _y2 = y2; + _color = color; + _a = a; + + _destorg = dc_destorg; + _pitch = dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int x = _x; + int y1 = _y1; + int y2 = _y2; + int color = _color; + int a = _a; + + int ycount = thread->count_for_thread(y1, y2 - y1 + 1); + if (ycount <= 0) + return; + + uint32_t fg = GPalette.BaseColors[color].d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = a + 1; + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + int spacing = _pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(y1, _pitch, ylookup[y1] + x + (uint32_t*)_destorg); + + for (int y = 0; y < ycount; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += spacing; + } + } +}; + ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) { buffer = screen->GetBuffer(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 47ea75260d..0ab3e298a4 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -184,6 +184,8 @@ public: virtual void Execute(DrawerThread *thread) = 0; }; +EXTERN_CVAR(Bool, r_multithreaded) + // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue { @@ -227,7 +229,7 @@ public: static void QueueCommand(Types &&... args) { auto queue = Instance(); - if (queue->threaded_render == 0) + if (queue->threaded_render == 0 || !r_multithreaded) { T command(std::forward(args)...); command.Execute(&queue->single_core_thread); diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index d2d715c8dd..2311cb4477 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -59,8 +59,8 @@ class RtCopy1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; + BYTE *_destorg; + int _pitch; public: RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) @@ -70,8 +70,8 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _destorg = dc_destorg; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -85,9 +85,9 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; if (count & 1) { @@ -121,11 +121,11 @@ class RtMap1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_destorg; - int dc_pitch; - BYTE *dc_colormap; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_destorg; + int _pitch; + BYTE *_colormap; public: RtMap1colRGBACommand(int hx, int sx, int yl, int yh) @@ -135,11 +135,11 @@ public: this->yl = yl; this->yh = yh; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_colormap = ::dc_colormap; + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -154,15 +154,15 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (count & 1) { *dest = shade_pal_index(colormap[*source], light, shade_constants); @@ -186,11 +186,11 @@ class RtMap4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_destorg; - int dc_pitch; - BYTE *colormap; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_destorg; + int _pitch; + BYTE *_colormap; public: RtMap4colsRGBACommand(int sx, int yl, int yh) @@ -199,11 +199,11 @@ public: this->yl = yl; this->yh = yh; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_colormap = ::dc_colormap; + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; } #ifdef NO_SSE @@ -219,15 +219,15 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (count & 1) { dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); @@ -266,16 +266,16 @@ public: if (count <= 0) return; - ShadeConstants shade_constants = dc_shade_constants; - uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = _shade_constants; + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (shade_constants.simple_shade) { @@ -507,13 +507,13 @@ class RtAdd1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_colormap; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_colormap; public: RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) @@ -523,13 +523,13 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_colormap = ::dc_colormap; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -544,17 +544,17 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(colormap[*source], light, shade_constants); @@ -583,13 +583,13 @@ class RtAdd4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_colormap; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_colormap; + fixed_t _srcalpha; + fixed_t _destalpha; public: RtAdd4colsRGBACommand(int sx, int yl, int yh) @@ -598,13 +598,13 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_colormap = ::dc_colormap; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } #ifdef NO_SSE @@ -620,17 +620,17 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -668,19 +668,19 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; if (shade_constants.simple_shade) { @@ -764,11 +764,11 @@ class RtShaded1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *dc_colormap; - BYTE *dc_destorg; - int dc_pitch; - int dc_color; - fixed_t dc_light; + lighttable_t *_colormap; + BYTE *_destorg; + int _pitch; + int _color; + fixed_t _light; public: RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) @@ -778,11 +778,11 @@ public: this->yl = yl; this->yh = yh; - dc_colormap = ::dc_colormap; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _colormap = dc_colormap; + _destorg = dc_destorg; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -798,13 +798,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -833,11 +833,11 @@ class RtShaded4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *dc_colormap; - int dc_color; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; + lighttable_t *_colormap; + int _color; + BYTE *_destorg; + int _pitch; + fixed_t _light; public: RtShaded4colsRGBACommand(int sx, int yl, int yh) @@ -846,11 +846,11 @@ public: this->yl = yl; this->yh = yh; - dc_colormap = ::dc_colormap; - dc_color = ::dc_color; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; + _colormap = dc_colormap; + _color = dc_color; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; } #ifdef NO_SSE @@ -867,13 +867,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -912,13 +912,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -957,12 +957,12 @@ class RtAddClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -972,12 +972,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -992,16 +992,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1029,12 +1029,12 @@ class RtAddClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtAddClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1043,12 +1043,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } #ifdef NO_SSE @@ -1064,16 +1064,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1110,18 +1110,18 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; if (shade_constants.simple_shade) { @@ -1205,12 +1205,12 @@ class RtSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -1220,12 +1220,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1240,16 +1240,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1277,12 +1277,12 @@ class RtSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtSubClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1291,12 +1291,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1311,16 +1311,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1353,12 +1353,12 @@ class RtRevSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -1368,12 +1368,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1388,16 +1388,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1425,12 +1425,12 @@ class RtRevSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1439,12 +1439,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1459,16 +1459,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1513,29 +1513,29 @@ public: class DrawColumnHorizRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_iscale; - fixed_t dc_texturefrac; - const BYTE *dc_source; - int dc_x; - int dc_yl; - int dc_yh; + int _count; + fixed_t _iscale; + fixed_t _texturefrac; + const BYTE *_source; + int _x; + int _yl; + int _yh; public: DrawColumnHorizRGBACommand() { - dc_count = ::dc_count; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_x = ::dc_x; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; + _count = dc_count; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _x = dc_x; + _yl = dc_yl; + _yh = dc_yh; } void Execute(DrawerThread *thread) override { - int count = dc_count; + int count = _count; uint32_t *dest; fixed_t fracstep; fixed_t frac; @@ -1544,13 +1544,13 @@ public: return; { - int x = dc_x & 3; - dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + int x = _x & 3; + dest = &thread->dc_temp_rgba[x + 4 * _yl]; } - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = _iscale; + frac = _texturefrac; - const BYTE *source = dc_source; + const BYTE *source = _source; if (count & 1) { *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; @@ -1587,34 +1587,34 @@ public: class FillColumnHorizRGBACommand : public DrawerCommand { - int dc_x; - int dc_yl; - int dc_yh; - int dc_count; - int dc_color; + int _x; + int _yl; + int _yh; + int _count; + int _color; public: FillColumnHorizRGBACommand() { - dc_x = ::dc_x; - dc_count = ::dc_count; - dc_color = ::dc_color; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; + _x = dc_x; + _count = dc_count; + _color = dc_color; + _yl = dc_yl; + _yh = dc_yh; } void Execute(DrawerThread *thread) override { - int count = dc_count; - int color = dc_color; + int count = _count; + int color = _color; uint32_t *dest; if (count <= 0) return; { - int x = dc_x & 3; - dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + int x = _x & 3; + dest = &thread->dc_temp_rgba[x + 4 * _yl]; } if (count & 1) { From 3089043b07c206db4a2d05cb27639378147d2851 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 08:28:30 +0200 Subject: [PATCH 049/912] Fixed typo --- src/r_draw_rgba.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 0ab3e298a4..15a76c6892 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -173,12 +173,12 @@ public: class DrawerCommand { protected: - int dc_dest_y; + int _dest_y; public: DrawerCommand() { - dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + _dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); } virtual void Execute(DrawerThread *thread) = 0; From 000008e04dfa50fa5443d005d8076511dba1ca46 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 10:14:33 +0200 Subject: [PATCH 050/912] Fixed empty canvas in kdizd intermission screen --- src/r_swrenderer.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c4347236de..c788dfd544 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -370,7 +370,30 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin FTexture::FlipNonSquareBlockRemap(Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap); } } - tex->SetUpdated(); + + if (r_swtruecolor) + { + // True color render still sometimes uses palette textures (for sprites, mostly). + // We need to make sure that both pixel buffers contain data: + int width = tex->GetWidth(); + int height = tex->GetHeight(); + BYTE *palbuffer = (BYTE *)tex->GetPixels(); + uint32_t *bgrabuffer = (uint32_t*)tex->GetPixelsBgra(); + for (int x = 0; x < width; x++) + { + for (int y = 0; y < height; y++) + { + uint32_t color = bgrabuffer[y]; + int r = RPART(color); + int g = GPART(color); + int b = BPART(color); + palbuffer[y] = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + } + palbuffer += height; + bgrabuffer += height; + } + } + fixedcolormap = savecolormap; realfixedcolormap = savecm; } From 5963f29afd0906d7c4d1c99f116e78907c60dac4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 10:16:34 +0200 Subject: [PATCH 051/912] Added missing SetUpdated --- src/r_swrenderer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c788dfd544..c81d2a1103 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -394,6 +394,8 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin } } + tex->SetUpdated(); + fixedcolormap = savecolormap; realfixedcolormap = savecm; } From 12a50c140c7656ad944df027bd9f5c332f48f698 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 10:47:30 +0200 Subject: [PATCH 052/912] Fix animated textures not updating in swtruecolor mode --- src/g_strife/strife_sbar.cpp | 11 +++++++++++ src/menu/playerdisplay.cpp | 11 +++++++++++ src/textures/textures.h | 1 + src/textures/warptexture.cpp | 12 ++++++++++++ 4 files changed, 35 insertions(+) diff --git a/src/g_strife/strife_sbar.cpp b/src/g_strife/strife_sbar.cpp index eb3fa26087..e1fcb3cda1 100644 --- a/src/g_strife/strife_sbar.cpp +++ b/src/g_strife/strife_sbar.cpp @@ -34,6 +34,7 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra() override; bool CheckModified (); void SetVial (int level); @@ -115,6 +116,16 @@ const BYTE *FHealthBar::GetPixels () return Pixels; } +const uint32_t *FHealthBar::GetPixelsBgra() +{ + if (NeedRefresh) + { + MakeTexture(); + PixelsBgra.clear(); + } + return FTexture::GetPixelsBgra(); +} + void FHealthBar::SetVial (int level) { if (level < 0) diff --git a/src/menu/playerdisplay.cpp b/src/menu/playerdisplay.cpp index 16671975a0..7b7e9ca5d4 100644 --- a/src/menu/playerdisplay.cpp +++ b/src/menu/playerdisplay.cpp @@ -78,6 +78,7 @@ public: const BYTE *GetColumn(unsigned int column, const Span **spans_out); const BYTE *GetPixels(); + const uint32_t *GetPixelsBgra() override; bool CheckModified(); protected: @@ -246,6 +247,16 @@ const BYTE *FBackdropTexture::GetPixels() return Pixels; } +const uint32_t *FBackdropTexture::GetPixelsBgra() +{ + if (LastRenderTic != gametic) + { + Render(); + PixelsBgra.clear(); + } + return FTexture::GetPixelsBgra(); +} + //============================================================================= // // This is one plasma and two rotozoomers. I think it turned out quite awesome. diff --git a/src/textures/textures.h b/src/textures/textures.h index 38d1ef487b..3b4b0b8b35 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -484,6 +484,7 @@ public: virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL); const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra() override; void Unload (); bool CheckModified (); diff --git a/src/textures/warptexture.cpp b/src/textures/warptexture.cpp index b6977dd77e..0d18ab58f4 100644 --- a/src/textures/warptexture.cpp +++ b/src/textures/warptexture.cpp @@ -93,6 +93,18 @@ const BYTE *FWarpTexture::GetPixels () return Pixels; } +const uint32_t *FWarpTexture::GetPixelsBgra() +{ + DWORD time = r_FrameTime; + + if (Pixels == NULL || time != GenTime) + { + MakeTexture(time); + PixelsBgra.clear(); + } + return FTexture::GetPixelsBgra(); +} + const BYTE *FWarpTexture::GetColumn (unsigned int column, const Span **spans_out) { DWORD time = r_FrameTime; From 35c078dc1e0f4dc67d0ed3cd09f292e257fb1b9c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 11:24:21 +0200 Subject: [PATCH 053/912] Screenshot fix --- src/v_video.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/v_video.cpp b/src/v_video.cpp index 2cf04a29d2..bcd49f9209 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -445,7 +445,7 @@ void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &colo { Lock(true); buffer = GetBuffer(); - pitch = GetPitch(); + pitch = IsBgra() ? GetPitch() * 4 : GetPitch(); color_type = IsBgra() ? SS_BGRA : SS_PAL; } From f53e468f3f32fcb544842e86be5c06a63b0c3f31 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 12:38:00 +0200 Subject: [PATCH 054/912] Fixed fill column rgba drawers --- src/r_draw.cpp | 11 +++-- src/r_draw.h | 1 + src/r_draw_rgba.cpp | 114 ++++++++++++++++++++++++++++---------------- 3 files changed, 82 insertions(+), 44 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 4dcdc3e6be..7829e2b77f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -154,6 +154,7 @@ fixed_t dc_iscale; fixed_t dc_texturefrac; int dc_color; // [RH] Color for column filler DWORD dc_srccolor; +uint32_t dc_srccolor_bgra; DWORD *dc_srcblend; // [RH] Source and destination DWORD *dc_destblend; // blending lookups fixed_t dc_srcalpha; // Alpha value used by dc_srcblend @@ -2702,10 +2703,10 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, if (style.Flags & STYLEF_ColorIsFixed) { - int x = fglevel >> 10; - int r = RPART(color); - int g = GPART(color); - int b = BPART(color); + uint32_t x = fglevel >> 10; + uint32_t r = RPART(color); + uint32_t g = GPART(color); + uint32_t b = BPART(color); // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. dc_color = RGB32k.RGB[r>>3][g>>3][b>>3]; if (style.Flags & STYLEF_InvertSource) @@ -2714,6 +2715,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, g = 255 - g; b = 255 - b; } + uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); + dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; // dc_srccolor is used by the R_Fill* routines. It is premultiplied // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; diff --git a/src/r_draw.h b/src/r_draw.h index a311834055..99ee4d10d9 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -63,6 +63,7 @@ extern double dc_texturemid; extern "C" fixed_t dc_texturefrac; extern "C" int dc_color; // [RH] For flat colors (no texturing) extern "C" DWORD dc_srccolor; +extern "C" uint32_t dc_srccolor_bgra; extern "C" DWORD *dc_srcblend; extern "C" DWORD *dc_destblend; extern "C" fixed_t dc_srcalpha; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 722fbb8cd7..491c6ab987 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -344,8 +344,7 @@ class FillAddColumnRGBACommand : public DrawerCommand int _count; BYTE *_dest; int _pitch; - fixed_t _light; - int _color; + uint32_t _srccolor; public: FillAddColumnRGBACommand() @@ -353,8 +352,7 @@ public: _count = dc_count; _dest = dc_dest; _pitch = dc_pitch; - _light = dc_light; - _color = dc_color; + _srccolor = dc_srccolor_bgra; } void Execute(DrawerThread *thread) override @@ -369,10 +367,18 @@ public: dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg = _srccolor; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; + uint32_t fg_alpha = fg >> 24; + fg_alpha += fg_alpha >> 7; + + fg_red *= fg_alpha; + fg_green *= fg_alpha; + fg_blue *= fg_alpha; + + uint32_t inv_alpha = 256 - fg_alpha; do { @@ -380,9 +386,9 @@ public: uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -395,8 +401,10 @@ class FillAddClampColumnRGBACommand : public DrawerCommand int _count; BYTE *_dest; int _pitch; - fixed_t _light; int _color; + uint32_t _srccolor; + fixed_t _srcalpha; + fixed_t _destalpha; public: FillAddClampColumnRGBACommand() @@ -404,8 +412,10 @@ public: _count = dc_count; _dest = dc_dest; _pitch = dc_pitch; - _light = dc_light; _color = dc_color; + _srccolor = dc_srccolor_bgra; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -420,20 +430,26 @@ public: dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg = _srccolor; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + fg_red *= fg_alpha; + fg_green *= fg_alpha; + fg_blue *= fg_alpha; + + do { - do - { uint32_t bg_red = (*dest >> 16) & 0xff; uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -447,7 +463,9 @@ class FillSubClampColumnRGBACommand : public DrawerCommand BYTE *_dest; int _pitch; int _color; - fixed_t _light; + uint32_t _srccolor; + fixed_t _srcalpha; + fixed_t _destalpha; public: FillSubClampColumnRGBACommand() @@ -456,7 +474,9 @@ public: _dest = dc_dest; _pitch = dc_pitch; _color = dc_color; - _light = dc_light; + _srccolor = dc_srccolor_bgra; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -471,20 +491,25 @@ public: dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg = _srccolor; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - do - { + fg_red *= fg_alpha; + fg_green *= fg_alpha; + fg_blue *= fg_alpha; + + do { uint32_t bg_red = (*dest >> 16) & 0xff; uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + uint32_t red = clamp((0x10000 - fg_red + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -498,7 +523,9 @@ class FillRevSubClampColumnRGBACommand : public DrawerCommand BYTE *_dest; int _pitch; int _color; - fixed_t _light; + uint32_t _srccolor; + fixed_t _srcalpha; + fixed_t _destalpha; public: FillRevSubClampColumnRGBACommand() @@ -507,7 +534,9 @@ public: _dest = dc_dest; _pitch = dc_pitch; _color = dc_color; - _light = dc_light; + _srccolor = dc_srccolor_bgra; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -522,20 +551,25 @@ public: dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg = _srccolor; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - do - { + fg_red *= fg_alpha; + fg_green *= fg_alpha; + fg_blue *= fg_alpha; + + do { uint32_t bg_red = (*dest >> 16) & 0xff; uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + uint32_t red = clamp((0x10000 + fg_red - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; From 822bbd5b9a015141d6ca17d19df8b8a8600f220b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 13:40:23 +0200 Subject: [PATCH 055/912] Fuzz (invisibility) adjustments --- src/r_draw_rgba.cpp | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 491c6ab987..bdbcd12501 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -584,8 +584,8 @@ class DrawFuzzColumnRGBACommand : public DrawerCommand int _yh; BYTE *_destorg; int _pitch; - int fuzzpos; - int fuzzviewheight; + int _fuzzpos; + int _fuzzviewheight; public: DrawFuzzColumnRGBACommand() @@ -595,8 +595,8 @@ public: _yh = dc_yh; _destorg = dc_destorg; _pitch = dc_pitch; - fuzzpos = ::fuzzpos; - fuzzviewheight = ::fuzzviewheight; + _fuzzpos = fuzzpos; + _fuzzviewheight = fuzzviewheight; } void Execute(DrawerThread *thread) override @@ -609,8 +609,8 @@ public: _yl = 1; // .. and high. - if (_yh > fuzzviewheight) - _yh = fuzzviewheight; + if (_yh > _fuzzviewheight) + _yh = _fuzzviewheight; count = thread->count_for_thread(_yl, _yh - _yl + 1); @@ -622,7 +622,7 @@ public: int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; - int fuzz = (fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; + int fuzz = (_fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; while (count > 0) { @@ -640,9 +640,9 @@ public: uint32_t bg_green = (bg >> 8) & 0xff; uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = bg_red * 7 / 8; + uint32_t green = bg_green * 7 / 8; + uint32_t blue = bg_blue * 7 / 8; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -3896,7 +3896,13 @@ void R_FillRevSubClampColumn_rgba() void R_DrawFuzzColumn_rgba() { DrawerCommandQueue::QueueCommand(); - fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; + + if (dc_yl == 0) + dc_yl = 1; + if (dc_yh > fuzzviewheight) + dc_yh = fuzzviewheight; + + fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; } void R_DrawAddColumn_rgba() From 4ef2fb3cdb19d71a67c21c860f7d0bea96db8cb2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 14:45:52 +0200 Subject: [PATCH 056/912] Fixed multithreaded rendering issue with the fuzz effect --- src/r_draw_rgba.cpp | 75 +++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index bdbcd12501..b1ee1f02c4 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -604,26 +604,48 @@ public: int count; uint32_t *dest; - // Adjust borders. Low... - if (_yl == 0) - _yl = 1; + int yl = MAX(_yl, 1); + int yh = MIN(_yh, _fuzzviewheight); - // .. and high. - if (_yh > _fuzzviewheight) - _yh = _fuzzviewheight; - - count = thread->count_for_thread(_yl, _yh - _yl + 1); + count = thread->count_for_thread(yl, yh - yl + 1); // Zero length. if (count <= 0) return; - dest = thread->dest_for_thread(_yl, _pitch, ylookup[_yl] + _x + (uint32_t*)_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + (uint32_t*)_destorg); int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; - int fuzz = (_fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; + int fuzz = (_fuzzpos + thread->skipped_by_thread(yl)) % FUZZTABLE; + yl += thread->skipped_by_thread(yl); + + // Handle the case where we would go out of bounds at the top: + if (yl < fuzzstep) + { + count--; + + uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep + pitch]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + fuzz += fuzzstep; + fuzz %= FUZZTABLE; + } + + bool lowerbounds = (yl + count * fuzzstep > _fuzzviewheight); + if (lowerbounds) + count--; + + // Fuzz where fuzzoffset stays within bounds while (count > 0) { int available = (FUZZTABLE - fuzz); @@ -635,14 +657,14 @@ public: count -= cnt; do { - uint32_t bg = dest[fuzzoffset[fuzz]]; + uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep]; uint32_t bg_red = (bg >> 16) & 0xff; uint32_t bg_green = (bg >> 8) & 0xff; uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 7 / 8; - uint32_t green = bg_green * 7 / 8; - uint32_t blue = bg_blue * 7 / 8; + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -651,6 +673,21 @@ public: fuzz %= FUZZTABLE; } + + // Handle the case where we would go out of bounds at the bottom + if (lowerbounds) + { + uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep - pitch]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } } }; @@ -3897,12 +3934,10 @@ void R_DrawFuzzColumn_rgba() { DrawerCommandQueue::QueueCommand(); - if (dc_yl == 0) - dc_yl = 1; - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; - - fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; + dc_yl = MAX(dc_yl, 1); + dc_yh = MIN(dc_yh, fuzzviewheight); + if (dc_yl <= dc_yh) + fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; } void R_DrawAddColumn_rgba() From e7cdcd9c0af6d82c8acc7c14102fbc8c4e34da1e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 18 Jun 2016 05:20:34 +0200 Subject: [PATCH 057/912] Change to one pass rendering to remove fuzz artifact --- src/r_draw_rgba.cpp | 6 ++++-- src/r_draw_rgba.h | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index b1ee1f02c4..6021c9265d 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -624,8 +624,6 @@ public: // Handle the case where we would go out of bounds at the top: if (yl < fuzzstep) { - count--; - uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep + pitch]; uint32_t bg_red = (bg >> 16) & 0xff; uint32_t bg_green = (bg >> 8) & 0xff; @@ -639,6 +637,10 @@ public: dest += pitch; fuzz += fuzzstep; fuzz %= FUZZTABLE; + + count--; + if (count == 0) + return; } bool lowerbounds = (yl + count * fuzzstep > _fuzzviewheight); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 15a76c6892..83977d65c1 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -209,8 +209,8 @@ class DrawerCommandQueue int threaded_render = 0; DrawerThread single_core_thread; - int num_passes = 2; - int rows_in_pass = 540; + int num_passes = 1; + int rows_in_pass = MAXHEIGHT; void StartThreads(); void StopThreads(); From 3e7eb79729049302bf80e79908af8a8006c3841c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 18 Jun 2016 11:17:59 +0200 Subject: [PATCH 058/912] Added some experimental AVX2 drawers --- src/r_draw_rgba.cpp | 420 ++++++++++++++++++++++++++++++++++--------- src/r_draw_rgba.h | 91 ++++++++++ src/r_drawt_rgba.cpp | 46 ++--- 3 files changed, 454 insertions(+), 103 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 6021c9265d..2712508550 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -44,9 +44,14 @@ #include "x86.h" #ifndef NO_SSE #include +#include #endif #include +#ifdef _MSC_VER +#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX +#endif + extern int vlinebits; extern int mvlinebits; extern int tmvlinebits; @@ -58,6 +63,8 @@ extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) +//#define USE_AVX // Use AVX2 256 bit intrinsics (requires Haswell or newer) + ///////////////////////////////////////////////////////////////////////////// DrawerCommandQueue *DrawerCommandQueue::Instance() @@ -230,14 +237,14 @@ void DrawerCommandQueue::StopThreads() class DrawColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _texturefrac; DWORD _iscale; fixed_t _light; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; ShadeConstants _shade_constants; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: DrawColumnRGBACommand() @@ -297,7 +304,7 @@ public: class FillColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; fixed_t _light; int _pitch; int _color; @@ -342,7 +349,7 @@ public: class FillAddColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; int _pitch; uint32_t _srccolor; @@ -399,7 +406,7 @@ public: class FillAddClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; int _pitch; int _color; uint32_t _srccolor; @@ -460,7 +467,7 @@ public: class FillSubClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; int _pitch; int _color; uint32_t _srccolor; @@ -520,7 +527,7 @@ public: class FillRevSubClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; int _pitch; int _color; uint32_t _srccolor; @@ -582,7 +589,7 @@ class DrawFuzzColumnRGBACommand : public DrawerCommand int _x; int _yl; int _yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; int _fuzzpos; int _fuzzviewheight; @@ -696,16 +703,16 @@ public: class DrawAddColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: DrawAddColumnRGBACommand() @@ -779,11 +786,11 @@ class DrawTranslatedColumnRGBACommand : public DrawerCommand int _count; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - BYTE *_translation; - const BYTE *_source; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; int _pitch; public: @@ -840,11 +847,11 @@ class DrawTlatedAddColumnRGBACommand : public DrawerCommand int _count; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - BYTE *_translation; - const BYTE *_source; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; int _pitch; fixed_t _srcalpha; fixed_t _destalpha; @@ -920,12 +927,12 @@ class DrawShadedColumnRGBACommand : public DrawerCommand { private: int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; fixed_t _light; - const BYTE *_source; - lighttable_t *_colormap; + const BYTE * RESTRICT _source; + lighttable_t * RESTRICT _colormap; int _color; int _pitch; @@ -993,10 +1000,10 @@ public: class DrawAddClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1069,11 +1076,11 @@ public: class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - BYTE *_translation; - const BYTE *_source; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1148,10 +1155,10 @@ public: class DrawSubClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1224,16 +1231,16 @@ public: class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - BYTE *_translation; + BYTE * RESTRICT _translation; public: DrawSubClampTranslatedColumnRGBACommand() @@ -1303,10 +1310,10 @@ public: class DrawRevSubClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1378,16 +1385,16 @@ public: class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - BYTE *_translation; + BYTE * RESTRICT _translation; public: DrawRevSubClampTranslatedColumnRGBACommand() @@ -1422,8 +1429,8 @@ public: frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = _translation; - const BYTE *source = _source; + BYTE * RESTRICT translation = _translation; + const BYTE * RESTRICT source = _source; int pitch = _pitch * thread->num_cores; uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; @@ -1456,7 +1463,7 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _xfrac; fixed_t _yfrac; fixed_t _xstep; @@ -1466,7 +1473,7 @@ class DrawSpanRGBACommand : public DrawerCommand int _y; int _xbits; int _ybits; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; @@ -1539,6 +1546,181 @@ public: BYTE xshift = yshift - _xbits; int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +#elif defined(USE_AVX) + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const uint32_t* source = _source; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + int sse_count = count / 8; + count -= sse_count * 8; + + if (shade_constants.simple_shade) + { + AVX2_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + uint32_t fg_pixels[8]; + for (int i = 0; i < 8; i++) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + fg_pixels[i] = source[spot]; + xfrac += xstep; + yfrac += ystep; + } + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); + AVX2_SHADE_SIMPLE(fg); + _mm256_storeu_si256((__m256i*)dest, fg); + + // Next step in u,v. + dest += 8; + } + } + else + { + AVX2_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + uint32_t fg_pixels[8]; + for (int i = 0; i < 8; i++) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + fg_pixels[i] = source[spot]; + xfrac += xstep; + yfrac += ystep; + } + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); + AVX2_SHADE(fg, shade_constants); + _mm256_storeu_si256((__m256i*)dest, fg); + + // Next step in u,v. + dest += 8; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + int sse_count = count / 8; + count -= sse_count * 8; + + if (shade_constants.simple_shade) + { + AVX2_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + uint32_t fg_pixels[8]; + for (int i = 0; i < 8; i++) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + fg_pixels[i] = source[spot]; + xfrac += xstep; + yfrac += ystep; + } + + // Lookup pixel from flat texture tile + __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); + AVX2_SHADE_SIMPLE(fg); + _mm256_storeu_si256((__m256i*)dest, fg); + dest += 8; + } + } + else + { + AVX2_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + uint32_t fg_pixels[8]; + for (int i = 0; i < 8; i++) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + fg_pixels[i] = source[spot]; + xfrac += xstep; + yfrac += ystep; + } + + // Lookup pixel from flat texture tile + __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); + AVX2_SHADE_SIMPLE(fg); + _mm256_storeu_si256((__m256i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + do { // Current texture index in u,v. @@ -1777,12 +1959,12 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -1880,12 +2062,12 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -2006,12 +2188,12 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -2142,12 +2324,12 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -2268,12 +2450,12 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -2407,7 +2589,7 @@ class FillSpanRGBACommand : public DrawerCommand int _x1; int _x2; int _y; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; int _color; @@ -2441,8 +2623,8 @@ class Vlinec1RGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int vlinebits; int _pitch; fixed_t _light; @@ -2489,7 +2671,7 @@ public: class Vlinec4RGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -2497,7 +2679,7 @@ class Vlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 * RESTRICT bufplce[4]; public: Vlinec4RGBACommand() @@ -2553,6 +2735,84 @@ public: dest += pitch; } while (--count); } +#elif defined(USE_AVX) + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int bits = vlinebits; + int pitch = _pitch * thread->num_cores; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (count & 1) + { + DWORD place; + dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } + count /= 2; + + // Assume all columns come from the same texture (which they do): + const uint32_t *base_addr = MIN(MIN(MIN(bufplce[0], bufplce[1]), bufplce[2]), bufplce[3]); + __m256i column_offsets = _mm256_set_epi32( + bufplce[3] - base_addr, bufplce[2] - base_addr, bufplce[1] - base_addr, bufplce[0] - base_addr, + bufplce[3] - base_addr, bufplce[2] - base_addr, bufplce[1] - base_addr, bufplce[0] - base_addr); + + __m256i place = _mm256_set_epi32( + local_vplce[3] + local_vince[3], local_vplce[2] + local_vince[2], local_vplce[1] + local_vince[1], local_vplce[0] + local_vince[0], + local_vplce[3], local_vplce[2], local_vplce[1], local_vplce[0]); + + __m256i step = _mm256_set_epi32( + local_vince[3], local_vince[2], local_vince[1], local_vince[0], + local_vince[3], local_vince[2], local_vince[1], local_vince[0]); + step = _mm256_add_epi32(step, step); + + if (shade_constants.simple_shade) + { + AVX2_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + while (count--) + { + __m256i fg = _mm256_i32gather_epi32((const int *)base_addr, _mm256_add_epi32(column_offsets, _mm256_srli_epi32(place, bits)), 4); + place = _mm256_add_epi32(place, step); + AVX2_SHADE_SIMPLE(fg); + _mm256_storeu2_m128i((__m128i*)(dest + pitch), (__m128i*)dest, fg); + dest += pitch * 2; + } + } + else + { + AVX2_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + while (count--) + { + __m256i fg = _mm256_i32gather_epi32((const int *)base_addr, _mm256_add_epi32(column_offsets, _mm256_srai_epi32(place, bits)), 4); + place = _mm256_add_epi32(place, step); + AVX2_SHADE(fg, shade_constants); + _mm256_storeu2_m128i((__m128i*)(dest + pitch), (__m128i*)dest, fg); + dest += pitch * 2; + } + } + } #else void Execute(DrawerThread *thread) override { @@ -2641,8 +2901,8 @@ class Mvlinec1RGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int mvlinebits; int _pitch; fixed_t _light; @@ -2693,7 +2953,7 @@ public: class Mvlinec4RGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -2701,7 +2961,7 @@ class Mvlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 * RESTRICT bufplce[4]; public: Mvlinec4RGBACommand() @@ -2852,8 +3112,8 @@ class Tmvline1AddRGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int tmvlinebits; int _pitch; fixed_t _light; @@ -2924,7 +3184,7 @@ public: class Tmvline4AddRGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -2934,7 +3194,7 @@ class Tmvline4AddRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 * RESTRICT bufplce[4]; public: Tmvline4AddRGBACommand() @@ -3019,8 +3279,8 @@ class Tmvline1AddClampRGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int tmvlinebits; int _pitch; fixed_t _light; @@ -3091,7 +3351,7 @@ public: class Tmvline4AddClampRGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -3101,7 +3361,7 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 *RESTRICT bufplce[4]; public: Tmvline4AddClampRGBACommand() @@ -3186,8 +3446,8 @@ class Tmvline1SubClampRGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int tmvlinebits; int _pitch; fixed_t _light; @@ -3258,7 +3518,7 @@ public: class Tmvline4SubClampRGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -3268,7 +3528,7 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 *RESTRICT bufplce[4]; public: Tmvline4SubClampRGBACommand() @@ -3353,8 +3613,8 @@ class Tmvline1RevSubClampRGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int tmvlinebits; int _pitch; fixed_t _light; @@ -3425,7 +3685,7 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -3435,7 +3695,7 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 *RESTRICT bufplce[4]; public: Tmvline4RevSubClampRGBACommand() @@ -3520,7 +3780,7 @@ class DrawFogBoundaryLineRGBACommand : public DrawerCommand int _y; int _x; int _x2; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; @@ -3592,10 +3852,10 @@ class DrawTiltedSpanRGBACommand : public DrawerCommand int _y; int _x1; int _x2; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; - const BYTE *_source; + const BYTE * RESTRICT _source; public: DrawTiltedSpanRGBACommand(int y, int x1, int x2) @@ -3637,7 +3897,7 @@ class DrawColoredSpanRGBACommand : public DrawerCommand int _y; int _x1; int _x2; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; int _color; @@ -3678,7 +3938,7 @@ class FillTransColumnRGBACommand : public DrawerCommand int _y2; int _color; int _a; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 83977d65c1..8f051b4cb0 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -290,6 +290,17 @@ public: #endif #endif +// Promise compiler we have no aliasing of this pointer +#ifndef RESTRICT +#if defined(_MSC_VER) +#define RESTRICT __restrict +#elif defined(__GNUC__) +#define RESTRICT __restrict__ +#else +#define RESTRICT +#endif +#endif + // calculates the light constant passed to the shade_pal_index function FORCEINLINE uint32_t calc_light_multiplier(dsfixed_t light) { @@ -413,6 +424,86 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } +// Calculate constants for a simple shade +#define AVX2_SHADE_SIMPLE_INIT(light) \ + __m256i mlight = _mm256_set_epi16(256, light, light, light, 256, light, light, light, 256, light, light, light, 256, light, light, light); + +// Calculate constants for a simple shade with different light levels for each pixel +#define AVX2_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m256i mlight = _mm256_set_epi16(256, light3, light3, light3, 256, light2, light2, light2, 256, light1, light1, light1, 256, light0, light0, light0); + +// Simple shade 8 pixels +#define AVX2_SHADE_SIMPLE(fg) { \ + __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ + __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ + fg_hi = _mm256_mullo_epi16(fg_hi, mlight); \ + fg_hi = _mm256_srli_epi16(fg_hi, 8); \ + fg_lo = _mm256_mullo_epi16(fg_lo, mlight); \ + fg_lo = _mm256_srli_epi16(fg_lo, 8); \ + fg = _mm256_packus_epi16(fg_lo, fg_hi); \ +} + +// Calculate constants for a complex shade +#define AVX2_SHADE_INIT(light, shade_constants) \ + __m256i mlight = _mm256_set_epi16(256, light, light, light, 256, light, light, light, 256, light, light, light, 256, light, light, light); \ + __m256i color = _mm256_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m256i fade = _mm256_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m256i fade_amount = _mm256_mullo_epi16(fade, _mm256_subs_epu16(_mm256_set1_epi16(256), mlight)); \ + __m256i desaturate = _mm256_set1_epi16(shade_constants.desaturate); \ + __m256i inv_desaturate = _mm256_set1_epi16(256 - shade_constants.desaturate); + +// Calculate constants for a complex shade with different light levels for each pixel +#define AVX2_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m256i mlight = _mm256_set_epi16(256, light3, light3, light3, 256, light2, light2, light2, 256, light1, light1, light1, 256, light0, light0, light0); \ + __m256i color = _mm256_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m256i fade = _mm256_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m256i fade_amount = _mm256_mullo_epi16(fade, _mm256_subs_epu16(_mm256_set1_epi16(256), mlight)); \ + __m256i desaturate = _mm256_set1_epi16(shade_constants.desaturate); \ + __m256i inv_desaturate = _mm256_set1_epi16(256 - shade_constants.desaturate); + +// Complex shade 8 pixels +#define AVX2_SHADE(fg, shade_constants) { \ + __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ + __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ + \ + __m256i intensity_hi = _mm256_mullo_epi16(fg_hi, _mm256_set_epi16(0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37)); \ + __m256i intensity_lo = _mm256_mullo_epi16(fg_lo, _mm256_set_epi16(0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37)); \ + __m256i intensity = _mm256_mullo_epi16(_mm256_srli_epi16(_mm256_hadd_epi16(_mm256_hadd_epi16(intensity_lo, intensity_hi), _mm256_setzero_si256()), 8), desaturate); \ + intensity = _mm256_unpacklo_epi16(intensity, intensity); \ + intensity_hi = _mm256_unpackhi_epi32(intensity, intensity); \ + intensity_lo = _mm256_unpacklo_epi32(intensity, intensity); \ + \ + fg_hi = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ + fg_hi = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_hi, mlight), fade_amount), 8); \ + fg_hi = _mm256_srli_epi16(_mm256_mullo_epi16(fg_hi, color), 8); \ + \ + fg_lo = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ + fg_lo = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_lo, mlight), fade_amount), 8); \ + fg_lo = _mm256_srli_epi16(_mm256_mullo_epi16(fg_lo, color), 8); \ + \ + fg = _mm256_packus_epi16(fg_lo, fg_hi); \ +} + + + + + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 2311cb4477..269dd9d9d8 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -59,7 +59,7 @@ class RtCopy1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; public: @@ -123,9 +123,9 @@ class RtMap1colRGBACommand : public DrawerCommand int yh; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: RtMap1colRGBACommand(int hx, int sx, int yl, int yh) @@ -188,9 +188,9 @@ class RtMap4colsRGBACommand : public DrawerCommand int yh; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: RtMap4colsRGBACommand(int sx, int yl, int yh) @@ -383,7 +383,7 @@ public: class RtTranslate1colRGBACommand : public DrawerCommand { - const BYTE *translation; + const BYTE * RESTRICT translation; int hx; int yl; int yh; @@ -447,7 +447,7 @@ public: class RtTranslate4colsRGBACommand : public DrawerCommand { - const BYTE *translation; + const BYTE * RESTRICT translation; int yl; int yh; @@ -507,13 +507,13 @@ class RtAdd1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) @@ -583,11 +583,11 @@ class RtAdd4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_colormap; + BYTE * RESTRICT _colormap; fixed_t _srcalpha; fixed_t _destalpha; @@ -764,8 +764,8 @@ class RtShaded1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *_colormap; - BYTE *_destorg; + lighttable_t * RESTRICT _colormap; + BYTE * RESTRICT _destorg; int _pitch; int _color; fixed_t _light; @@ -833,9 +833,9 @@ class RtShaded4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *_colormap; + lighttable_t * RESTRICT _colormap; int _color; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; @@ -957,7 +957,7 @@ class RtAddClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1029,7 +1029,7 @@ class RtAddClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1205,7 +1205,7 @@ class RtSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1277,7 +1277,7 @@ class RtSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1353,7 +1353,7 @@ class RtRevSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1425,7 +1425,7 @@ class RtRevSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1497,7 +1497,7 @@ public: class RtInitColsRGBACommand : public DrawerCommand { - BYTE *buff; + BYTE * RESTRICT buff; public: RtInitColsRGBACommand(BYTE *buff) @@ -1516,7 +1516,7 @@ class DrawColumnHorizRGBACommand : public DrawerCommand int _count; fixed_t _iscale; fixed_t _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _x; int _yl; int _yh; From 3f905197d09e224db664264c9c8534985ca4c7df Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Jun 2016 07:40:01 +0200 Subject: [PATCH 059/912] Moved vectorized drawers to their own files --- src/r_draw_rgba.cpp | 701 +++-------------------------------------- src/r_draw_rgba.h | 161 +++++++--- src/r_draw_rgba_sse.h | 491 +++++++++++++++++++++++++++++ src/r_drawt_rgba.cpp | 443 ++++---------------------- src/r_drawt_rgba_sse.h | 495 +++++++++++++++++++++++++++++ 5 files changed, 1212 insertions(+), 1079 deletions(-) create mode 100644 src/r_draw_rgba_sse.h create mode 100644 src/r_drawt_rgba_sse.h diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2712508550..28c5df2ac6 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -48,10 +48,6 @@ #endif #include -#ifdef _MSC_VER -#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX -#endif - extern int vlinebits; extern int mvlinebits; extern int tmvlinebits; @@ -62,8 +58,38 @@ extern float rw_lightstep; extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) +CVAR(Bool, r_linearlight, false, 0) -//#define USE_AVX // Use AVX2 256 bit intrinsics (requires Haswell or newer) +#ifndef NO_SSE + +// Generate SSE drawers: +#define VecCommand(name) name##_SSE_Command +#define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT +#define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 +#define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE +#define VEC_SHADE_INIT SSE_SHADE_INIT +#define VEC_SHADE_INIT4 SSE_SHADE_INIT4 +#define VEC_SHADE SSE_SHADE +#include "r_draw_rgba_sse.h" + +// Generate AVX drawers: +#undef VecCommand +#undef VEC_SHADE_SIMPLE_INIT +#undef VEC_SHADE_SIMPLE_INIT4 +#undef VEC_SHADE_SIMPLE +#undef VEC_SHADE_INIT +#undef VEC_SHADE_INIT4 +#undef VEC_SHADE +#define VecCommand(name) name##_AVX_Command +#define VEC_SHADE_SIMPLE_INIT AVX_LINEAR_SHADE_SIMPLE_INIT +#define VEC_SHADE_SIMPLE_INIT4 AVX_LINEAR_SHADE_SIMPLE_INIT4 +#define VEC_SHADE_SIMPLE AVX_LINEAR_SHADE_SIMPLE +#define VEC_SHADE_INIT AVX_LINEAR_SHADE_INIT +#define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 +#define VEC_SHADE AVX_LINEAR_SHADE +#include "r_draw_rgba_sse.h" + +#endif ///////////////////////////////////////////////////////////////////////////// @@ -1495,7 +1521,6 @@ public: _shade_constants = ds_shade_constants; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { if (thread->line_skipped_by_thread(_y)) @@ -1560,401 +1585,6 @@ public: } while (--count); } } -#elif defined(USE_AVX) - void Execute(DrawerThread *thread) override - { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - if (_xbits == 6 && _ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - int sse_count = count / 8; - count -= sse_count * 8; - - if (shade_constants.simple_shade) - { - AVX2_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - uint32_t fg_pixels[8]; - for (int i = 0; i < 8; i++) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - fg_pixels[i] = source[spot]; - xfrac += xstep; - yfrac += ystep; - } - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); - AVX2_SHADE_SIMPLE(fg); - _mm256_storeu_si256((__m256i*)dest, fg); - - // Next step in u,v. - dest += 8; - } - } - else - { - AVX2_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - uint32_t fg_pixels[8]; - for (int i = 0; i < 8; i++) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - fg_pixels[i] = source[spot]; - xfrac += xstep; - yfrac += ystep; - } - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); - AVX2_SHADE(fg, shade_constants); - _mm256_storeu_si256((__m256i*)dest, fg); - - // Next step in u,v. - dest += 8; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - int sse_count = count / 8; - count -= sse_count * 8; - - if (shade_constants.simple_shade) - { - AVX2_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - uint32_t fg_pixels[8]; - for (int i = 0; i < 8; i++) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - fg_pixels[i] = source[spot]; - xfrac += xstep; - yfrac += ystep; - } - - // Lookup pixel from flat texture tile - __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); - AVX2_SHADE_SIMPLE(fg); - _mm256_storeu_si256((__m256i*)dest, fg); - dest += 8; - } - } - else - { - AVX2_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - uint32_t fg_pixels[8]; - for (int i = 0; i < 8; i++) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - fg_pixels[i] = source[spot]; - xfrac += xstep; - yfrac += ystep; - } - - // Lookup pixel from flat texture tile - __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); - AVX2_SHADE_SIMPLE(fg); - _mm256_storeu_si256((__m256i*)dest, fg); - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - } -#else - void Execute(DrawerThread *thread) override - { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - if (_xbits == 6 && _ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; - } - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - } -#endif }; class DrawSpanMaskedRGBACommand : public DrawerCommand @@ -2698,7 +2328,6 @@ public: } } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { int count = thread->count_for_thread(_dest_y, _count); @@ -2735,165 +2364,6 @@ public: dest += pitch; } while (--count); } -#elif defined(USE_AVX) - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = vlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (count & 1) - { - DWORD place; - dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } - count /= 2; - - // Assume all columns come from the same texture (which they do): - const uint32_t *base_addr = MIN(MIN(MIN(bufplce[0], bufplce[1]), bufplce[2]), bufplce[3]); - __m256i column_offsets = _mm256_set_epi32( - bufplce[3] - base_addr, bufplce[2] - base_addr, bufplce[1] - base_addr, bufplce[0] - base_addr, - bufplce[3] - base_addr, bufplce[2] - base_addr, bufplce[1] - base_addr, bufplce[0] - base_addr); - - __m256i place = _mm256_set_epi32( - local_vplce[3] + local_vince[3], local_vplce[2] + local_vince[2], local_vplce[1] + local_vince[1], local_vplce[0] + local_vince[0], - local_vplce[3], local_vplce[2], local_vplce[1], local_vplce[0]); - - __m256i step = _mm256_set_epi32( - local_vince[3], local_vince[2], local_vince[1], local_vince[0], - local_vince[3], local_vince[2], local_vince[1], local_vince[0]); - step = _mm256_add_epi32(step, step); - - if (shade_constants.simple_shade) - { - AVX2_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - while (count--) - { - __m256i fg = _mm256_i32gather_epi32((const int *)base_addr, _mm256_add_epi32(column_offsets, _mm256_srli_epi32(place, bits)), 4); - place = _mm256_add_epi32(place, step); - AVX2_SHADE_SIMPLE(fg); - _mm256_storeu2_m128i((__m128i*)(dest + pitch), (__m128i*)dest, fg); - dest += pitch * 2; - } - } - else - { - AVX2_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - while (count--) - { - __m256i fg = _mm256_i32gather_epi32((const int *)base_addr, _mm256_add_epi32(column_offsets, _mm256_srai_epi32(place, bits)), 4); - place = _mm256_add_epi32(place, step); - AVX2_SHADE(fg, shade_constants); - _mm256_storeu2_m128i((__m128i*)(dest + pitch), (__m128i*)dest, fg); - dest += pitch * 2; - } - } - } -#else - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = vlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } -#endif }; class Mvlinec1RGBACommand : public DrawerCommand @@ -2980,7 +2450,6 @@ public: } } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { int count = thread->count_for_thread(_dest_y, _count); @@ -3018,93 +2487,6 @@ public: dest += pitch; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = mvlinebits; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - SSE_SHADE_SIMPLE(fg); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - SSE_SHADE(fg, shade_constants); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += pitch; - } while (--count); - } - } -#endif }; class Tmvline1AddRGBACommand : public DrawerCommand @@ -4254,7 +3636,14 @@ void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawSpan_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanMasked_rgba() @@ -4304,7 +3693,14 @@ DWORD vlinec1_rgba() void vlinec4_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -4317,7 +3713,14 @@ DWORD mvlinec1_rgba() void mvlinec4_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 8f051b4cb0..1744781624 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -424,59 +424,124 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } -// Calculate constants for a simple shade -#define AVX2_SHADE_SIMPLE_INIT(light) \ - __m256i mlight = _mm256_set_epi16(256, light, light, light, 256, light, light, light, 256, light, light, light, 256, light, light, light); +// Calculate constants for a simple shade with gamma correction +#define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ + __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ + mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ + __m256 mlight_lo = mlight_hi; \ + __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ + __m256 m255 = _mm256_set1_ps(255.0f); -// Calculate constants for a simple shade with different light levels for each pixel -#define AVX2_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ - __m256i mlight = _mm256_set_epi16(256, light3, light3, light3, 256, light2, light2, light2, 256, light1, light1, light1, 256, light0, light0, light0); +// Calculate constants for a simple shade with different light levels for each pixel and gamma correction +#define AVX_LINEAR_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m256 mlight_hi = _mm256_set_ps(1.0f, light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), 1.0f, light0 * (1.0f/256.0f), light0 * (1.0f/256.0f), light0 * (1.0f/256.0f)); \ + __m256 mlight_lo = _mm256_set_ps(1.0f, light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), 1.0f, light2 * (1.0f/256.0f), light2 * (1.0f/256.0f), light2 * (1.0f/256.0f)); \ + mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ + mlight_lo = _mm256_mul_ps(mlight_lo, mlight_lo); \ + __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ + __m256 m255 = _mm256_set1_ps(255.0f); -// Simple shade 8 pixels -#define AVX2_SHADE_SIMPLE(fg) { \ - __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ - __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ - fg_hi = _mm256_mullo_epi16(fg_hi, mlight); \ - fg_hi = _mm256_srli_epi16(fg_hi, 8); \ - fg_lo = _mm256_mullo_epi16(fg_lo, mlight); \ - fg_lo = _mm256_srli_epi16(fg_lo, 8); \ - fg = _mm256_packus_epi16(fg_lo, fg_hi); \ +// Simple shade 4 pixels with gamma correction +#define AVX_LINEAR_SHADE_SIMPLE(fg) { \ + __m256i fg_16 = _mm256_set_m128i(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _mm_unpacklo_epi8(fg, _mm_setzero_si128())); \ + __m256 fg_hi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(fg_16, _mm256_setzero_si256())); \ + __m256 fg_lo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(fg_16, _mm256_setzero_si256())); \ + fg_hi = _mm256_mul_ps(fg_hi, mrcp_255); \ + fg_hi = _mm256_mul_ps(fg_hi, fg_hi); \ + fg_hi = _mm256_mul_ps(fg_hi, mlight_hi); \ + fg_hi = _mm256_sqrt_ps(fg_hi); \ + fg_hi = _mm256_mul_ps(fg_hi, m255); \ + fg_lo = _mm256_mul_ps(fg_lo, mrcp_255); \ + fg_lo = _mm256_mul_ps(fg_lo, fg_lo); \ + fg_lo = _mm256_mul_ps(fg_lo, mlight_lo); \ + fg_lo = _mm256_sqrt_ps(fg_lo); \ + fg_lo = _mm256_mul_ps(fg_lo, m255); \ + fg_16 = _mm256_packus_epi32(_mm256_cvtps_epi32(fg_lo), _mm256_cvtps_epi32(fg_hi)); \ + fg = _mm_packus_epi16(_mm256_extractf128_si256(fg_16, 0), _mm256_extractf128_si256(fg_16, 1)); \ } -// Calculate constants for a complex shade -#define AVX2_SHADE_INIT(light, shade_constants) \ - __m256i mlight = _mm256_set_epi16(256, light, light, light, 256, light, light, light, 256, light, light, light, 256, light, light, light); \ - __m256i color = _mm256_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m256i fade = _mm256_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m256i fade_amount = _mm256_mullo_epi16(fade, _mm256_subs_epu16(_mm256_set1_epi16(256), mlight)); \ - __m256i desaturate = _mm256_set1_epi16(shade_constants.desaturate); \ - __m256i inv_desaturate = _mm256_set1_epi16(256 - shade_constants.desaturate); +// Calculate constants for a complex shade with gamma correction +#define AVX_LINEAR_SHADE_INIT(light, shade_constants) \ + __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ + mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ + __m256 mlight_lo = mlight_hi; \ + __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ + __m256 m255 = _mm256_set1_ps(255.0f); \ + __m256 color = _mm256_set_ps( \ + shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ + shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ + __m256 fade = _mm256_set_ps( \ + shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ + shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ + __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ + __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ + __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ + __m128 ss_desaturate = _mm_set_ss(shade_constants.desaturate * (1.0f/256.0f)); \ + __m128 intensity_weight = _mm_set_ps(0.0f, 77.0f/256.0f, 143.0f/256.0f, 37.0f/256.0f); -// Calculate constants for a complex shade with different light levels for each pixel -#define AVX2_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ - __m256i mlight = _mm256_set_epi16(256, light3, light3, light3, 256, light2, light2, light2, 256, light1, light1, light1, 256, light0, light0, light0); \ - __m256i color = _mm256_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m256i fade = _mm256_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m256i fade_amount = _mm256_mullo_epi16(fade, _mm256_subs_epu16(_mm256_set1_epi16(256), mlight)); \ - __m256i desaturate = _mm256_set1_epi16(shade_constants.desaturate); \ - __m256i inv_desaturate = _mm256_set1_epi16(256 - shade_constants.desaturate); +// Calculate constants for a complex shade with different light levels for each pixel and gamma correction +#define AVX_LINEAR_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m256 mlight_hi = _mm256_set_ps(1.0f, light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), 1.0f, light0 * (1.0f/256.0f), light0 * (1.0f/256.0f), light0 * (1.0f/256.0f)); \ + __m256 mlight_lo = _mm256_set_ps(1.0f, light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), 1.0f, light2 * (1.0f/256.0f), light2 * (1.0f/256.0f), light2 * (1.0f/256.0f)); \ + mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ + mlight_lo = _mm256_mul_ps(mlight_lo, mlight_lo); \ + __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ + __m256 m255 = _mm256_set1_ps(255.0f); \ + __m256 color = _mm256_set_ps( \ + shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ + shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ + __m256 fade = _mm256_set_ps( \ + shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ + shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ + __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ + __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ + __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ + __m128 ss_desaturate = _mm_set_ss(shade_constants.desaturate * (1.0f/256.0f)); \ + __m128 intensity_weight = _mm_set_ps(0.0f, 77.0f/256.0f, 143.0f/256.0f, 37.0f/256.0f); +// Complex shade 4 pixels with gamma correction +#define AVX_LINEAR_SHADE(fg, shade_constants) { \ + __m256i fg_16 = _mm256_set_m128i(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _mm_unpacklo_epi8(fg, _mm_setzero_si128())); \ + __m256 fg_hi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(fg_16, _mm256_setzero_si256())); \ + __m256 fg_lo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(fg_16, _mm256_setzero_si256())); \ + fg_hi = _mm256_mul_ps(fg_hi, mrcp_255); \ + fg_hi = _mm256_mul_ps(fg_hi, fg_hi); \ + fg_lo = _mm256_mul_ps(fg_lo, mrcp_255); \ + fg_lo = _mm256_mul_ps(fg_lo, fg_lo); \ + \ + __m128 intensity_hi0 = _mm_mul_ps(_mm256_extractf128_ps(fg_hi, 0), intensity_weight); \ + __m128 intensity_hi1 = _mm_mul_ps(_mm256_extractf128_ps(fg_hi, 1), intensity_weight); \ + intensity_hi0 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_hi0, _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ + intensity_hi0 = _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(0,0,0,0)); \ + intensity_hi1 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_hi1, _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ + intensity_hi1 = _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(0,0,0,0)); \ + __m256 intensity_hi = _mm256_set_m128(intensity_hi1, intensity_hi0); \ + \ + fg_hi = _mm256_add_ps(_mm256_mul_ps(fg_hi, inv_desaturate), intensity_hi); \ + fg_hi = _mm256_add_ps(_mm256_mul_ps(fg_hi, mlight_hi), fade_amount_hi); \ + fg_hi = _mm256_mul_ps(fg_hi, color); \ + \ + __m128 intensity_lo0 = _mm_mul_ps(_mm256_extractf128_ps(fg_lo, 0), intensity_weight); \ + __m128 intensity_lo1 = _mm_mul_ps(_mm256_extractf128_ps(fg_lo, 1), intensity_weight); \ + intensity_lo0 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_lo0, _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ + intensity_lo0 = _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(0,0,0,0)); \ + intensity_lo1 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_lo1, _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ + intensity_lo1 = _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(0,0,0,0)); \ + __m256 intensity_lo = _mm256_set_m128(intensity_lo1, intensity_lo0); \ + \ + fg_lo = _mm256_add_ps(_mm256_mul_ps(fg_lo, inv_desaturate), intensity_lo); \ + fg_lo = _mm256_add_ps(_mm256_mul_ps(fg_lo, mlight_lo), fade_amount_lo); \ + fg_lo = _mm256_mul_ps(fg_lo, color); \ + \ + fg_hi = _mm256_sqrt_ps(fg_hi); \ + fg_hi = _mm256_mul_ps(fg_hi, m255); \ + fg_lo = _mm256_sqrt_ps(fg_lo); \ + fg_lo = _mm256_mul_ps(fg_lo, m255); \ + fg_16 = _mm256_packus_epi32(_mm256_cvtps_epi32(fg_lo), _mm256_cvtps_epi32(fg_hi)); \ + fg = _mm_packus_epi16(_mm256_extractf128_si256(fg_16, 0), _mm256_extractf128_si256(fg_16, 1)); \ +} + +/* // Complex shade 8 pixels #define AVX2_SHADE(fg, shade_constants) { \ __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ @@ -499,7 +564,7 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) \ fg = _mm256_packus_epi16(fg_lo, fg_hi); \ } - +*/ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h new file mode 100644 index 0000000000..14ebbbb41d --- /dev/null +++ b/src/r_draw_rgba_sse.h @@ -0,0 +1,491 @@ +// +// SSE/AVX intrinsics based drawers for the r_draw family of drawers. +// +// Note: This header file is intentionally not guarded by a __R_DRAW_RGBA_SSE__ define. +// It is because the code is nearly identical for SSE vs AVX. The file is included +// multiple times by r_draw_rgba.cpp with different defines that changes the class +// names outputted and the type of intrinsics used. + +#ifdef _MSC_VER +#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX +#endif + +class VecCommand(DrawSpanRGBA) : public DrawerCommand +{ + const uint32_t * RESTRICT _source; + fixed_t _xfrac; + fixed_t _yfrac; + fixed_t _xstep; + fixed_t _ystep; + int _x1; + int _x2; + int _y; + int _xbits; + int _ybits; + BYTE * RESTRICT _destorg; + fixed_t _light; + ShadeConstants _shade_constants; + +public: + VecCommand(DrawSpanRGBA)() + { + _source = (const uint32_t*)ds_source; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _xstep = ds_xstep; + _ystep = ds_ystep; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xbits = ds_xbits; + _ybits = ds_ybits; + _destorg = dc_destorg; + _light = ds_light; + _shade_constants = ds_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const uint32_t* source = _source; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class VecCommand(Vlinec4RGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + int vlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 * RESTRICT bufplce[4]; + +public: + VecCommand(Vlinec4RGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + vlinebits = ::vlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int bits = vlinebits; + int pitch = _pitch * thread->num_cores; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Mvlinec4RGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + int mvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 * RESTRICT bufplce[4]; + +public: + VecCommand(Mvlinec4RGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + mvlinebits = ::mvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = mvlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_SHADE_SIMPLE(fg); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_SHADE(fg, shade_constants); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += pitch; + } while (--count); + } + } +}; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 269dd9d9d8..4da963430e 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -51,6 +51,39 @@ extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; extern unsigned int *horizspan[4]; +EXTERN_CVAR(Bool, r_linearlight) + +#ifndef NO_SSE + +// Generate SSE drawers: +#define VecCommand(name) name##_SSE_Command +#define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT +#define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 +#define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE +#define VEC_SHADE_INIT SSE_SHADE_INIT +#define VEC_SHADE_INIT4 SSE_SHADE_INIT4 +#define VEC_SHADE SSE_SHADE +#include "r_drawt_rgba_sse.h" + +// Generate AVX drawers: +#undef VecCommand +#undef VEC_SHADE_SIMPLE_INIT +#undef VEC_SHADE_SIMPLE_INIT4 +#undef VEC_SHADE_SIMPLE +#undef VEC_SHADE_INIT +#undef VEC_SHADE_INIT4 +#undef VEC_SHADE +#define VecCommand(name) name##_AVX_Command +#define VEC_SHADE_SIMPLE_INIT AVX_LINEAR_SHADE_SIMPLE_INIT +#define VEC_SHADE_SIMPLE_INIT4 AVX_LINEAR_SHADE_SIMPLE_INIT4 +#define VEC_SHADE_SIMPLE AVX_LINEAR_SHADE_SIMPLE +#define VEC_SHADE_INIT AVX_LINEAR_SHADE_INIT +#define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 +#define VEC_SHADE AVX_LINEAR_SHADE +#include "r_drawt_rgba_sse.h" + +#endif + ///////////////////////////////////////////////////////////////////////////// class RtCopy1colRGBACommand : public DrawerCommand @@ -206,7 +239,6 @@ public: _colormap = dc_colormap; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { uint32_t *source; @@ -253,132 +285,6 @@ public: dest += pitch * 2; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - ShadeConstants shade_constants = _shade_constants; - uint32_t light = calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; - - BYTE *colormap = _colormap; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - if (count & 1) { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - source += sincr; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = colormap[source[sincr]]; - uint32_t p1 = colormap[source[sincr + 1]]; - uint32_t p2 = colormap[source[sincr + 2]]; - uint32_t p3 = colormap[source[sincr + 3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += sincr * 2; - dest += pitch * 2; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - if (count & 1) { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - source += sincr; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = colormap[source[sincr]]; - uint32_t p1 = colormap[source[sincr + 1]]; - uint32_t p2 = colormap[source[sincr + 2]]; - uint32_t p3 = colormap[source[sincr + 3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += sincr * 2; - dest += pitch * 2; - } while (--count); - } - } -#endif }; class RtTranslate1colRGBACommand : public DrawerCommand @@ -607,7 +513,6 @@ public: _destalpha = dc_destalpha; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { uint32_t *source; @@ -655,107 +560,6 @@ public: dest += pitch; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = _shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - } -#endif }; class RtShaded1colRGBACommand : public DrawerCommand @@ -853,7 +657,6 @@ public: _light = dc_light; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { BYTE *colormap; @@ -898,57 +701,6 @@ public: dest += pitch; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - colormap = _colormap; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); - __m128i alpha_one = _mm_set1_epi16(64); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); - __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); - __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); - __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * alpha + bg_red * inv_alpha) / 64: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } -#endif }; class RtAddClamp1colRGBACommand : public DrawerCommand @@ -1051,7 +803,6 @@ public: _shade_constants = dc_shade_constants; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { uint32_t *source; @@ -1097,106 +848,6 @@ public: dest += pitch; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = _shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - } -#endif }; class RtSubClamp1colRGBACommand : public DrawerCommand @@ -1657,7 +1308,14 @@ void rt_map1col_rgba (int hx, int sx, int yl, int yh) // Maps all four spans to the screen starting at sx. void rt_map4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh) @@ -1693,7 +1351,14 @@ void rt_add1col_rgba (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Translates and adds one span at hx to the screen at sx without clamping. @@ -1719,7 +1384,14 @@ void rt_shaded1col_rgba (int hx, int sx, int yl, int yh) // Shades all four spans to the screen starting at sx. void rt_shaded4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Adds one span at hx to the screen at sx with clamping. @@ -1731,7 +1403,14 @@ void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Translates and adds one span at hx to the screen at sx with clamping. diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h new file mode 100644 index 0000000000..5b8ae8081d --- /dev/null +++ b/src/r_drawt_rgba_sse.h @@ -0,0 +1,495 @@ +// +// SSE/AVX intrinsics based drawers for the r_drawt family of drawers. +// +// Note: This header file is intentionally not guarded by a __R_DRAWT_RGBA_SSE__ define. +// It is because the code is nearly identical for SSE vs AVX. The file is included +// multiple times by r_drawt_rgba.cpp with different defines that changes the class +// names outputted and the type of intrinsics used. + +#ifdef _MSC_VER +#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX +#endif + +class VecCommand(RtMap4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _destorg; + int _pitch; + BYTE * RESTRICT _colormap; + +public: + VecCommand(RtMap4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + ShadeConstants shade_constants = _shade_constants; + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = thread->num_cores * 4; + + BYTE *colormap = _colormap; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + if (count & 1) { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + source += sincr; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = colormap[source[sincr]]; + uint32_t p1 = colormap[source[sincr + 1]]; + uint32_t p2 = colormap[source[sincr + 2]]; + uint32_t p3 = colormap[source[sincr + 3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += sincr * 2; + dest += pitch * 2; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + if (count & 1) { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + source += sincr; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = colormap[source[sincr]]; + uint32_t p1 = colormap[source[sincr + 1]]; + uint32_t p2 = colormap[source[sincr + 2]]; + uint32_t p3 = colormap[source[sincr + 3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += sincr * 2; + dest += pitch * 2; + } while (--count); + } + } +}; + +class VecCommand(RtAdd4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _colormap; + fixed_t _srcalpha; + fixed_t _destalpha; + +public: + VecCommand(RtAdd4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + BYTE *colormap = _colormap; + + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = _shade_constants; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(RtShaded4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + lighttable_t * RESTRICT _colormap; + int _color; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + +public: + VecCommand(RtShaded4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _colormap = dc_colormap; + _color = dc_color; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + } + + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); + __m128i alpha_one = _mm_set1_epi16(64); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); + __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); + __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); + __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * alpha + bg_red * inv_alpha) / 64: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } +}; + +class VecCommand(RtAddClamp4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; + +public: + VecCommand(RtAddClamp4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = _shade_constants; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + } +}; From 38aba81dcc816ce9bb0888f95b94f73714771f67 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Jun 2016 23:11:41 +0200 Subject: [PATCH 060/912] Added more SSE drawers --- src/r_draw_rgba.cpp | 313 ++++++++++++---------- src/r_draw_rgba.h | 58 +++- src/r_draw_rgba_sse.h | 583 ++++++++++++++++++++++++++++++++++++++++- src/r_drawt_rgba.cpp | 14 + src/r_drawt_rgba_sse.h | 252 ++++++++++++++++++ 5 files changed, 1072 insertions(+), 148 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 28c5df2ac6..96232ab0c0 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2411,10 +2411,7 @@ public: do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - *dest = shade_bgra(pix, light, shade_constants); - } + *dest = alpha_blend(shade_bgra(pix, light, shade_constants), *dest); frac += fracstep; dest += pitch; } while (--count); @@ -2480,10 +2477,10 @@ public: do { uint32_t pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_bgra(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_bgra(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_bgra(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_bgra(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; dest[0] = alpha_blend(shade_bgra(pix, light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; dest[1] = alpha_blend(shade_bgra(pix, light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; dest[2] = alpha_blend(shade_bgra(pix, light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; dest[3] = alpha_blend(shade_bgra(pix, light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; dest += pitch; } while (--count); } @@ -2535,29 +2532,31 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + uint32_t fg = shade_bgra(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } frac += fracstep; dest += pitch; } while (--count); @@ -2615,8 +2614,8 @@ public: ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; @@ -2632,23 +2631,25 @@ public: for (int i = 0; i < 4; ++i) { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } local_vplce[i] += local_vince[i]; } dest += pitch; @@ -2702,29 +2703,31 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + uint32_t fg = shade_bgra(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } frac += fracstep; dest += pitch; } while (--count); @@ -2782,8 +2785,8 @@ public: ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; @@ -2799,23 +2802,25 @@ public: for (int i = 0; i < 4; ++i) { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } local_vplce[i] += local_vince[i]; } dest += pitch; @@ -2869,29 +2874,31 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t fg = shade_bgra(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } frac += fracstep; dest += pitch; } while (--count); @@ -2949,8 +2956,8 @@ public: ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; @@ -2966,23 +2973,25 @@ public: for (int i = 0; i < 4; ++i) { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t fg = shade_bgra(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } local_vplce[i] += local_vince[i]; } dest += pitch; @@ -3036,29 +3045,31 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t fg = shade_bgra(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } frac += fracstep; dest += pitch; } while (--count); @@ -3116,8 +3127,8 @@ public: ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; @@ -3133,23 +3144,25 @@ public: for (int i = 0; i < 4; ++i) { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t fg = shade_bgra(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } local_vplce[i] += local_vince[i]; } dest += pitch; @@ -3733,7 +3746,14 @@ fixed_t tmvline1_add_rgba() void tmvline4_add_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -3746,7 +3766,14 @@ fixed_t tmvline1_addclamp_rgba() void tmvline4_addclamp_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -3759,7 +3786,14 @@ fixed_t tmvline1_subclamp_rgba() void tmvline4_subclamp_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -3772,7 +3806,14 @@ fixed_t tmvline1_revsubclamp_rgba() void tmvline4_revsubclamp_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 1744781624..66be1f38b8 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -417,9 +417,9 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) uint32_t bg_green = (bg >> 8) & 0xff; uint32_t bg_blue = bg & 0xff; - uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; - uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; - uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; + uint32_t red = clamp(fg_red + (bg_red * inv_alpha) / 256, 0, 255); + uint32_t green = clamp(fg_green + (bg_green * inv_alpha) / 256, 0, 255); + uint32_t blue = clamp(fg_blue + (bg_blue * inv_alpha) / 256, 0, 255); return 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -543,7 +543,7 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) /* // Complex shade 8 pixels -#define AVX2_SHADE(fg, shade_constants) { \ +#define AVX_SHADE(fg, shade_constants) { \ __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ \ @@ -566,8 +566,58 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) } */ +// Normal premultiplied alpha blend using the alpha from fg +#define VEC_ALPHA_BLEND(fg,bg) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); \ + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); \ + __m128i m255 = _mm_set1_epi16(255); \ + __m128i inv_alpha_hi = _mm_sub_epi16(m255, _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_hi, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3))); \ + __m128i inv_alpha_lo = _mm_sub_epi16(m255, _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_lo, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3))); \ + inv_alpha_hi = _mm_add_epi16(inv_alpha_hi, _mm_srli_epi16(inv_alpha_hi, 7)); \ + inv_alpha_lo = _mm_add_epi16(inv_alpha_lo, _mm_srli_epi16(inv_alpha_lo, 7)); \ + bg_hi = _mm_mullo_epi16(bg_hi, inv_alpha_hi); \ + bg_hi = _mm_srli_epi16(bg_hi, 8); \ + bg_lo = _mm_mullo_epi16(bg_lo, inv_alpha_lo); \ + bg_lo = _mm_srli_epi16(bg_lo, 8); \ + bg = _mm_packus_epi16(bg_lo, bg_hi); \ + fg = _mm_adds_epu8(fg, bg); \ +} +/* +FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) +{ + fg_alpha = src_alpha; + bg_alpha = dest_alpha; +} +#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ + __m128i fg_alpha_hi = msrc_alpha; \ + __m128i fg_alpha_lo = msrc_alpha; \ + __m128i bg_alpha_hi = mdest_alpha; \ + __m128i bg_alpha_lo = mdest_alpha; +*/ + +// Calculates the final alpha values to be used when combined with the source texture alpha channel +FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) +{ + fg_alpha = (fg >> 24) & 0xff; + fg_alpha += fg_alpha >> 7; + bg_alpha = (dest_alpha * (256 - fg_alpha)) >> 8; + fg_alpha = (src_alpha * fg_alpha) >> 8; +} + +// Calculates the final alpha values to be used when combined with the source texture alpha channel +#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ + __m128i fg_alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ + __m128i fg_alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ + fg_alpha_hi = _mm_add_epi16(fg_alpha_hi, _mm_srli_epi16(fg_alpha_hi, 7)); \ + fg_alpha_lo = _mm_add_epi16(fg_alpha_lo, _mm_srli_epi16(fg_alpha_lo, 7)); \ + __m128i bg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_hi), mdest_alpha), 8); \ + __m128i bg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_lo), mdest_alpha), 8); \ + fg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_hi, msrc_alpha), 8); \ + fg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_lo, msrc_alpha), 8); // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 14ebbbb41d..0597580e1e 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -444,17 +444,16 @@ public: uint32_t pix2 = bufplce[2][place2 >> bits]; uint32_t pix3 = bufplce[3][place3 >> bits]; - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + __m128i bg = _mm_loadu_si128((const __m128i*)dest); VEC_SHADE_SIMPLE(fg); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); dest += pitch; } while (--count); } @@ -473,17 +472,585 @@ public: uint32_t pix2 = bufplce[2][place2 >> bits]; uint32_t pix3 = bufplce[3][place3 >> bits]; - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + __m128i bg = _mm_loadu_si128((const __m128i*)dest); VEC_SHADE(fg, shade_constants); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Tmvline4AddRGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 * RESTRICT bufplce[4]; + +public: + VecCommand(Tmvline4AddRGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE_SIMPLE(fg); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE(fg, shade_constants); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 *RESTRICT bufplce[4]; + +public: + VecCommand(Tmvline4AddClampRGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE_SIMPLE(fg); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE(fg, shade_constants); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 *RESTRICT bufplce[4]; + +public: + VecCommand(Tmvline4SubClampRGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE_SIMPLE(fg); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE(fg, shade_constants); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 *RESTRICT bufplce[4]; + +public: + VecCommand(Tmvline4RevSubClampRGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE_SIMPLE(fg); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE(fg, shade_constants); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); dest += pitch; } while (--count); } diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 4da963430e..1e1236f0e6 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -1436,7 +1436,14 @@ void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh) // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Translates and subtracts one span at hx to the screen at sx with clamping. @@ -1462,7 +1469,14 @@ void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh) // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Translates and subtracts one span at hx from the screen at sx with clamping. diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h index 5b8ae8081d..684be2b6ae 100644 --- a/src/r_drawt_rgba_sse.h +++ b/src/r_drawt_rgba_sse.h @@ -493,3 +493,255 @@ public: } } }; + +class VecCommand(RtSubClamp4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; + +public: + VecCommand(RtSubClamp4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + ShadeConstants shade_constants = _shade_constants; + + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (bg_red * bg_alpha - fg_red * fg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, mbg_alpha), _mm_mullo_epi16(fg_hi, mfg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, mbg_alpha), _mm_mullo_epi16(fg_lo, mfg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (bg_red * bg_alpha - fg_red * fg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, mbg_alpha), _mm_mullo_epi16(fg_hi, mfg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, mbg_alpha), _mm_mullo_epi16(fg_lo, mfg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(RtRevSubClamp4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; + +public: + VecCommand(RtRevSubClamp4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + ShadeConstants shade_constants = _shade_constants; + + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha - bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha - bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + } +}; From e72a032a114c7710112534157abad8fa300c2f7d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Jun 2016 23:12:10 +0200 Subject: [PATCH 061/912] Fixed alpha channel issue with textures --- src/textures/pngtexture.cpp | 10 ++++++++-- src/textures/texture.cpp | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index 206797a344..408cf1e2fe 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -724,8 +724,9 @@ void FPNGTexture::MakeTextureBgra () { for (y = Height; y > 0; --y) { + // output as premultiplied alpha uint32_t alpha = in[1]; - uint32_t gray = in[0]; + uint32_t gray = (in[0] * alpha + 127) / 255; *out++ = (alpha << 24) | (gray << 16) | (gray << 8) | gray; in += pitch; } @@ -740,7 +741,12 @@ void FPNGTexture::MakeTextureBgra () { for (y = Height; y > 0; --y) { - *out++ = (((uint32_t)in[3]) << 24) | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + // output as premultiplied alpha + uint32_t alpha = in[3]; + uint32_t red = (in[0] * alpha + 127) / 255; + uint32_t green = (in[1] * alpha + 127) / 255; + uint32_t blue = (in[2] * alpha + 127) / 255; + *out++ = (alpha << 24) | (red << 16) | (green << 8) | blue; in += pitch; } in -= backstep; diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 0030719cbe..da5dd8ad7b 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -203,7 +203,7 @@ const uint32_t *FTexture::GetPixelsBgra() PixelsBgra.resize(Width * Height); for (int i = 0; i < Width * Height; i++) { - PixelsBgra[i] = GPalette.BaseColors[indices[i]].d; + PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; } } return PixelsBgra.data(); From d3bc68a160be4b6549f68454b308ad66c62e1d50 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Jun 2016 23:37:22 +0200 Subject: [PATCH 062/912] Disabled the AVX intrinsics --- src/r_draw_rgba.cpp | 40 +++++++++------------------------------- src/r_drawt_rgba.cpp | 36 ++++++++---------------------------- 2 files changed, 17 insertions(+), 59 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 96232ab0c0..b437fbe001 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,7 +58,6 @@ extern float rw_lightstep; extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) -CVAR(Bool, r_linearlight, false, 0) #ifndef NO_SSE @@ -71,7 +70,7 @@ CVAR(Bool, r_linearlight, false, 0) #define VEC_SHADE_INIT4 SSE_SHADE_INIT4 #define VEC_SHADE SSE_SHADE #include "r_draw_rgba_sse.h" - +/* // Generate AVX drawers: #undef VecCommand #undef VEC_SHADE_SIMPLE_INIT @@ -88,7 +87,7 @@ CVAR(Bool, r_linearlight, false, 0) #define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 #define VEC_SHADE AVX_LINEAR_SHADE #include "r_draw_rgba_sse.h" - +*/ #endif ///////////////////////////////////////////////////////////////////////////// @@ -3652,10 +3651,7 @@ void R_DrawSpan_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif } @@ -3709,10 +3705,7 @@ void vlinec4_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3729,10 +3722,7 @@ void mvlinec4_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3749,10 +3739,7 @@ void tmvline4_add_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3769,10 +3756,7 @@ void tmvline4_addclamp_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3789,10 +3773,7 @@ void tmvline4_subclamp_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3809,10 +3790,7 @@ void tmvline4_revsubclamp_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 1e1236f0e6..e239674e8c 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -51,8 +51,6 @@ extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; extern unsigned int *horizspan[4]; -EXTERN_CVAR(Bool, r_linearlight) - #ifndef NO_SSE // Generate SSE drawers: @@ -64,7 +62,7 @@ EXTERN_CVAR(Bool, r_linearlight) #define VEC_SHADE_INIT4 SSE_SHADE_INIT4 #define VEC_SHADE SSE_SHADE #include "r_drawt_rgba_sse.h" - +/* // Generate AVX drawers: #undef VecCommand #undef VEC_SHADE_SIMPLE_INIT @@ -81,7 +79,7 @@ EXTERN_CVAR(Bool, r_linearlight) #define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 #define VEC_SHADE AVX_LINEAR_SHADE #include "r_drawt_rgba_sse.h" - +*/ #endif ///////////////////////////////////////////////////////////////////////////// @@ -1311,10 +1309,7 @@ void rt_map4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1354,10 +1349,7 @@ void rt_add4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1387,10 +1379,7 @@ void rt_shaded4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1406,10 +1395,7 @@ void rt_addclamp4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1439,10 +1425,7 @@ void rt_subclamp4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1472,10 +1455,7 @@ void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } From 6daeb5a15881c2198af31cb564c23d6090f026d4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 20 Jun 2016 02:36:54 +0200 Subject: [PATCH 063/912] Blend mode fixes --- src/r_draw_rgba.cpp | 32 +++++++-------- src/r_draw_rgba.h | 87 ++++++++++++++++++---------------------- src/r_draw_rgba_sse.h | 48 ++++++++-------------- src/textures/texture.cpp | 5 ++- 4 files changed, 76 insertions(+), 96 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index b437fbe001..f317a34d60 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2538,8 +2538,8 @@ public: { uint32_t pix = source[frac >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2631,8 +2631,8 @@ public: { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2709,8 +2709,8 @@ public: { uint32_t pix = source[frac >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2802,8 +2802,8 @@ public: { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2880,8 +2880,8 @@ public: { uint32_t pix = source[frac >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2973,8 +2973,8 @@ public: { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -3051,8 +3051,8 @@ public: { uint32_t pix = source[frac >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -3144,8 +3144,8 @@ public: { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 66be1f38b8..2527e84a61 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -339,6 +339,7 @@ FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) { const PalEntry &color = GPalette.BaseColors[index]; + uint32_t alpha = color.d & 0xff000000; uint32_t red = color.r; uint32_t green = color.g; uint32_t blue = color.b; @@ -367,11 +368,12 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade green = (green * constants.light_green) / 256; blue = (blue * constants.light_blue) / 256; } - return 0xff000000 | (red << 16) | (green << 8) | blue; + return alpha | (red << 16) | (green << 8) | blue; } FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) { + uint32_t alpha = color & 0xff000000; uint32_t red = (color >> 16) & 0xff; uint32_t green = (color >> 8) & 0xff; uint32_t blue = color & 0xff; @@ -400,12 +402,12 @@ FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConst green = (green * constants.light_green) / 256; blue = (blue * constants.light_blue) / 256; } - return 0xff000000 | (red << 16) | (green << 8) | blue; + return alpha | (red << 16) | (green << 8) | blue; } FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) { - uint32_t fg_alpha = (fg >> 24) & 0xff; + uint32_t fg_alpha = fg >> 24; uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -468,11 +470,11 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ __m256 m255 = _mm256_set1_ps(255.0f); \ __m256 color = _mm256_set_ps( \ - shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ - shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ + 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ + 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ __m256 fade = _mm256_set_ps( \ - shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ - shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ + 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ + 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ @@ -488,11 +490,11 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ __m256 m255 = _mm256_set1_ps(255.0f); \ __m256 color = _mm256_set_ps( \ - shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ - shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ + 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ + 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ __m256 fade = _mm256_set_ps( \ - shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ - shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ + 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ + 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ @@ -585,39 +587,30 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) fg = _mm_adds_epu8(fg, bg); \ } -/* -FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) +// Calculates the final alpha values to be used when combined with the source texture alpha channel +FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) { - fg_alpha = src_alpha; - bg_alpha = dest_alpha; + uint32_t alpha = fg >> 24; + alpha += alpha >> 7; + return 256 - alpha; // (dest_alpha * (256 - alpha)) >> 8; } -#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ - __m128i fg_alpha_hi = msrc_alpha; \ - __m128i fg_alpha_lo = msrc_alpha; \ - __m128i bg_alpha_hi = mdest_alpha; \ - __m128i bg_alpha_lo = mdest_alpha; -*/ +#define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \ + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); \ + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); // Calculates the final alpha values to be used when combined with the source texture alpha channel -FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) -{ - fg_alpha = (fg >> 24) & 0xff; - fg_alpha += fg_alpha >> 7; - bg_alpha = (dest_alpha * (256 - fg_alpha)) >> 8; - fg_alpha = (src_alpha * fg_alpha) >> 8; -} - -// Calculates the final alpha values to be used when combined with the source texture alpha channel -#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ - __m128i fg_alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ - __m128i fg_alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ - fg_alpha_hi = _mm_add_epi16(fg_alpha_hi, _mm_srli_epi16(fg_alpha_hi, 7)); \ - fg_alpha_lo = _mm_add_epi16(fg_alpha_lo, _mm_srli_epi16(fg_alpha_lo, 7)); \ - __m128i bg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_hi), mdest_alpha), 8); \ - __m128i bg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_lo), mdest_alpha), 8); \ - fg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_hi, msrc_alpha), 8); \ - fg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_lo, msrc_alpha), 8); +#define VEC_CALC_BLEND_ALPHA(fg) \ + __m128i fg_alpha_hi, fg_alpha_lo, bg_alpha_hi, bg_alpha_lo; { \ + __m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ + __m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ + alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \ + alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \ + bg_alpha_hi = _mm_sub_epi16(_mm_set1_epi16(256), alpha_hi); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_hi), mdest_alpha), 8);*/ \ + bg_alpha_lo = _mm_sub_epi16(_mm_set1_epi16(256), alpha_lo); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_lo), mdest_alpha), 8);*/ \ + fg_alpha_hi = msrc_alpha; \ + fg_alpha_lo = msrc_alpha; \ + } // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ @@ -645,11 +638,11 @@ FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ __m128i mlight_lo = mlight_hi; \ __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ __m128i fade_amount_lo = fade_amount_hi; \ __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ @@ -659,11 +652,11 @@ FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 0597580e1e..220638c751 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -554,9 +554,7 @@ public: if (shade_constants.simple_shade) { VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -572,7 +570,7 @@ public: __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE_SIMPLE(fg); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -593,9 +591,7 @@ public: else { VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -610,7 +606,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE(fg, shade_constants); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -697,9 +693,7 @@ public: if (shade_constants.simple_shade) { VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -714,7 +708,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE_SIMPLE(fg); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -735,9 +729,7 @@ public: else { VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -752,7 +744,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE(fg, shade_constants); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -839,9 +831,7 @@ public: if (shade_constants.simple_shade) { VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -856,7 +846,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE_SIMPLE(fg); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -877,9 +867,7 @@ public: else { VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -894,7 +882,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE(fg, shade_constants); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -981,9 +969,7 @@ public: if (shade_constants.simple_shade) { VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -998,7 +984,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE_SIMPLE(fg); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -1019,9 +1005,7 @@ public: else { VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -1036,7 +1020,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE(fg, shade_constants); __m128i bg = _mm_loadu_si128((const __m128i*)dest); diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index da5dd8ad7b..16a9e63a6b 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -203,7 +203,10 @@ const uint32_t *FTexture::GetPixelsBgra() PixelsBgra.resize(Width * Height); for (int i = 0; i < Width * Height; i++) { - PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; + if (indices[i] != 0) + PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; + else + PixelsBgra[i] = 0; } } return PixelsBgra.data(); From c70aa1fe99657e053e3b0aa1a9d00b307ab54bca Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 20 Jun 2016 08:24:02 +0200 Subject: [PATCH 064/912] Added bilinear filtering --- src/r_draw.cpp | 4 ++ src/r_draw.h | 36 ++++++++++ src/r_draw_rgba.cpp | 158 ++++++++++++++++++++++++++++++++------------ src/r_draw_rgba.h | 52 +++++++++++++++ src/r_segs.cpp | 54 +++++++++------ 5 files changed, 242 insertions(+), 62 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 7829e2b77f..83c4ac8d40 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -162,6 +162,8 @@ fixed_t dc_destalpha; // Alpha value used by dc_destblend // first pixel in a column (possibly virtual) const BYTE* dc_source; +const BYTE* dc_source2; +uint32_t dc_texturefracx; BYTE* dc_dest; int dc_count; @@ -171,6 +173,8 @@ DWORD vince[4]; BYTE* palookupoffse[4]; fixed_t palookuplight[4]; const BYTE* bufplce[4]; +const BYTE* bufplce2[4]; +uint32_t buftexturefracx[4]; // just for profiling int dccount; diff --git a/src/r_draw.h b/src/r_draw.h index 99ee4d10d9..d5ecbd289d 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -71,6 +71,8 @@ extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; +extern "C" const BYTE* dc_source2; +extern "C" uint32_t dc_texturefracx; extern "C" BYTE *dc_dest, *dc_destorg; extern "C" int dc_count; @@ -80,6 +82,8 @@ extern "C" DWORD vince[4]; extern "C" BYTE* palookupoffse[4]; extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; +extern "C" const BYTE* bufplce2[4]; +extern "C" uint32_t buftexturefracx[4]; // [RH] Temporary buffer for column drawing extern "C" BYTE *dc_temp; @@ -374,4 +378,36 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +extern bool r_swtruecolor; +EXTERN_CVAR(Bool, r_bilinear); + +// Texture sampler state needed for bilinear filtering +struct SamplerSetup +{ + SamplerSetup() { } + SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + + const BYTE *source; + const BYTE *source2; + uint32_t texturefracx; +}; + +inline SamplerSetup::SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) +{ + // Only do bilinear filtering if enabled and not a magnifying filter + if (!r_swtruecolor || !r_bilinear || magnifying) + { + source = getcol(texture, xoffset >> FRACBITS); + source2 = nullptr; + texturefracx = 0; + } + else + { + int tx = (xoffset - FRACUNIT / 2) >> FRACBITS; + source = getcol(texture, tx); + source2 = getcol(texture, tx + 1); + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } +} + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index f317a34d60..d85d9994bb 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,6 +58,7 @@ extern float rw_lightstep; extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) +CVAR(Bool, r_bilinear, false, 0) #ifndef NO_SSE @@ -1547,41 +1548,72 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - if (_xbits == 6 && _ybits == 6) + fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); + fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); + fixed_t magnitude = xmagnitude + ymagnitude; + + bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; + if (magnifying) { - // 64x64 is the most common case by far, so special case it. - - do + if (_xbits == 6 && _ybits == 6) { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + // 64x64 is the most common case by far, so special case it. - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - do + if (_xbits == 6 && _ybits == 6) { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + // 64x64 is the most common case by far, so special case it. - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); + do + { + *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + do + { + *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } } }; @@ -2253,6 +2285,8 @@ class Vlinec1RGBACommand : public DrawerCommand DWORD _texturefrac; int _count; const BYTE * RESTRICT _source; + const BYTE * RESTRICT _source2; + uint32_t _texturefracx; BYTE * RESTRICT _dest; int vlinebits; int _pitch; @@ -2266,6 +2300,8 @@ public: _texturefrac = dc_texturefrac; _count = dc_count; _source = dc_source; + _source2 = dc_source2; + _texturefracx = dc_texturefracx; _dest = dc_dest; vlinebits = ::vlinebits; _pitch = dc_pitch; @@ -2282,6 +2318,8 @@ public: DWORD fracstep = _iscale * thread->num_cores; DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); const uint32 *source = (const uint32 *)_source; + const uint32 *source2 = (const uint32 *)_source2; + uint32_t texturefracx = _texturefracx; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; int pitch = _pitch * thread->num_cores; @@ -2289,12 +2327,24 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - do + if (_source2 == nullptr) { - *dest = shade_bgra(source[frac >> bits], light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); + do + { + *dest = shade_bgra(source[frac >> bits], light, shade_constants); + frac += fracstep; + dest += pitch; + } while (--count); + } + else + { + do + { + *dest = shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants); + frac += fracstep; + dest += pitch; + } while (--count); + } } }; @@ -2308,7 +2358,9 @@ class Vlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; + const uint32_t * RESTRICT bufplce[4]; + const uint32_t * RESTRICT bufplce2[4]; + uint32_t buftexturefracx[4]; public: Vlinec4RGBACommand() @@ -2323,7 +2375,9 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; + bufplce[i] = (const uint32_t *)::bufplce[i]; + bufplce2[i] = (const uint32_t *)::bufplce2[i]; + buftexturefracx[i] = ::buftexturefracx[i]; } } @@ -2354,14 +2408,28 @@ public: local_vince[i] *= thread->num_cores; } - do + if (bufplce2[0] == nullptr) { - dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + do + { + dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } while (--count); + } + else + { + do + { + dest[0] = shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } while (--count); + } } }; @@ -3651,7 +3719,10 @@ void R_DrawSpan_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - DrawerCommandQueue::QueueCommand(); + if (!r_bilinear) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); #endif } @@ -3705,7 +3776,10 @@ void vlinec4_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - DrawerCommandQueue::QueueCommand(); + if (!r_bilinear) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 2527e84a61..a266ce878a 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -426,6 +426,58 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits) +{ + uint32_t half = 1 << (ybits - 1); + uint32_t y = (texturefracy - half) >> ybits; + + uint32_t p00 = col0[y]; + uint32_t p01 = col0[y + 1]; + uint32_t p10 = col1[y]; + uint32_t p11 = col1[y + 1]; + + uint32_t inv_b = texturefracx; + uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + + return (alpha << 24) | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits) +{ + int xshift = (32 - xbits); + int yshift = (32 - ybits); + int xmask = (1 << xshift) - 1; + int ymask = (1 << yshift) - 1; + uint32_t xhalf = 1 << (xbits - 1); + uint32_t yhalf = 1 << (ybits - 1); + uint32_t x = (xfrac - xhalf) >> xbits; + uint32_t y = (yfrac - yhalf) >> ybits; + + uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; + uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)]; + uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; + uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)]; + + uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; + uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + + return (alpha << 24) | (red << 16) | (green << 8) | blue; +} + // Calculate constants for a simple shade with gamma correction #define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ diff --git a/src/r_segs.cpp b/src/r_segs.cpp index ad242b2f91..d71487bb9d 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -58,6 +58,8 @@ CVAR(Bool, r_np2, true, 0) +EXTERN_CVAR(Bool, r_bilinear) + //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) @@ -1066,14 +1068,16 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) } // Draw a column with support for non-power-of-two ranges -uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)()) +uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const SamplerSetup &sampler, DWORD(*draw1column)()) { int pixelsize = r_swtruecolor ? 4 : 1; if (uv_max == 0) // power of two { int count = y2 - y1; - dc_source = source; + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; dc_iscale = uv_step; @@ -1097,7 +1101,9 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv next_uv_wrap++; uint32_t count = MIN(left, next_uv_wrap); - dc_source = source; + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; dc_iscale = uv_step; @@ -1115,7 +1121,7 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv } // Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)()) +void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const SamplerSetup *sampler, void(*draw4columns)()) { int pixelsize = r_swtruecolor ? 4 : 1; if (uv_max == 0) // power of two, no wrap handling needed @@ -1123,7 +1129,9 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste int count = y2 - y1; for (int i = 0; i < 4; i++) { - bufplce[i] = source[i]; + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; vplce[i] = uv_pos[i]; vince[i] = uv_step[i]; @@ -1139,7 +1147,11 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste { dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; for (int i = 0; i < 4; i++) - bufplce[i] = source[i]; + { + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; + } uint32_t left = y2 - y1; while (left > 0) @@ -1249,12 +1261,11 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - uint32_t uv_start, uv_step; calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); + SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column); } // The aligned columns @@ -1264,10 +1275,6 @@ void wallscan_any( int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - const BYTE *source[4]; - for (int i = 0; i < 4; i++) - source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS); - float lights[4]; for (int i = 0; i < 4; i++) { @@ -1276,8 +1283,16 @@ void wallscan_any( } uint32_t uv_pos[4], uv_step[4]; + int magnifying = 0; for (int i = 0; i < 4; i++) + { calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); + magnifying |= uv_step[i] >> (fracbits - 1); + } + + SamplerSetup sampler[4]; + for (int i = 0; i < 4; i++) + sampler[i] = SamplerSetup(lwal[x + i] + xoffset, magnifying == 0, rw_pic, getcol); // Figure out where we vertically can start and stop drawing 4 columns in one go int middle_y1 = y1[0]; @@ -1305,7 +1320,7 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, lights[i], wallshade); - wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); } continue; } @@ -1317,7 +1332,7 @@ void wallscan_any( R_SetColorMapLight(basecolormap, lights[i], wallshade); if (y1[i] < middle_y1) - uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); } // Draw the area where all 4 columns are active @@ -1337,7 +1352,7 @@ void wallscan_any( } } } - wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns); + wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, sampler, draw4columns); // Draw the last rows where not all 4 columns are active for (int i = 0; i < 4; i++) @@ -1346,7 +1361,7 @@ void wallscan_any( R_SetColorMapLight(basecolormap, lights[i], wallshade); if (middle_y2 < y2[i]) - uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); } } @@ -1361,12 +1376,11 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - uint32_t uv_start, uv_step; calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); + SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column); } NetUpdate (); From c1b5ba5b9064997cbe9802f1b5df59a88231d4e3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 06:22:43 +0200 Subject: [PATCH 065/912] Added SSE versions of bilinear filtering --- src/r_draw_rgba.cpp | 163 +++++++---- src/r_draw_rgba.h | 82 ++++++ src/r_draw_rgba_sse.h | 657 +++++++++++++++++++++++++++--------------- 3 files changed, 611 insertions(+), 291 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index d85d9994bb..869edaba1d 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,7 +58,7 @@ extern float rw_lightstep; extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) -CVAR(Bool, r_bilinear, false, 0) +CVAR(Bool, r_bilinear, true, 0) #ifndef NO_SSE @@ -1680,43 +1680,70 @@ public: xstep = _xstep; ystep = _ystep; - if (_xbits == 6 && _ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - uint32_t texdata; + fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); + fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); + fixed_t magnitude = xmagnitude + ymagnitude; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) + bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; + if (magnifying) + { + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do { - *dest = shade_bgra(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + *dest = alpha_blend(shade_bgra(texdata, light, shade_constants), *dest); + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint32_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + *dest = alpha_blend(shade_bgra(texdata, light, shade_constants), *dest); + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - do + if (_xbits == 6 && _ybits == 6) { - uint32_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) + // 64x64 is the most common case by far, so special case it. + do { - *dest = shade_bgra(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + *dest++ = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants), *dest); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + *dest++ = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants), *dest); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } } }; @@ -2439,6 +2466,8 @@ class Mvlinec1RGBACommand : public DrawerCommand DWORD _texturefrac; int _count; const BYTE * RESTRICT _source; + const BYTE * RESTRICT _source2; + uint32_t _texturefracx; BYTE * RESTRICT _dest; int mvlinebits; int _pitch; @@ -2452,6 +2481,8 @@ public: _texturefrac = dc_texturefrac; _count = dc_count; _source = dc_source; + _source2 = dc_source2; + _texturefracx = dc_texturefracx; _dest = dc_dest; mvlinebits = ::mvlinebits; _pitch = dc_pitch; @@ -2468,6 +2499,8 @@ public: DWORD fracstep = _iscale * thread->num_cores; DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); const uint32 *source = (const uint32 *)_source; + const uint32 *source2 = (const uint32 *)_source2; + uint32_t texturefracx = _texturefracx; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = mvlinebits; int pitch = _pitch * thread->num_cores; @@ -2475,13 +2508,25 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - do + if (_source2 == nullptr) { - uint32_t pix = source[frac >> bits]; - *dest = alpha_blend(shade_bgra(pix, light, shade_constants), *dest); - frac += fracstep; - dest += pitch; - } while (--count); + do + { + uint32_t pix = source[frac >> bits]; + *dest = alpha_blend(shade_bgra(pix, light, shade_constants), *dest); + frac += fracstep; + dest += pitch; + } while (--count); + } + else + { + do + { + *dest = alpha_blend(shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants), *dest); + frac += fracstep; + dest += pitch; + } while (--count); + } } }; @@ -2496,6 +2541,8 @@ class Mvlinec4RGBACommand : public DrawerCommand DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + const uint32 * RESTRICT bufplce2[4]; + uint32_t buftexturefracx[4]; public: Mvlinec4RGBACommand() @@ -2511,6 +2558,8 @@ public: vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufplce2[i] = (const uint32_t *)::bufplce2[i]; + buftexturefracx[i] = ::buftexturefracx[i]; } } @@ -2541,15 +2590,29 @@ public: local_vince[i] *= thread->num_cores; } - do + if (bufplce2[0] == nullptr) { - uint32_t pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; dest[0] = alpha_blend(shade_bgra(pix, light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; dest[1] = alpha_blend(shade_bgra(pix, light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; dest[2] = alpha_blend(shade_bgra(pix, light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; dest[3] = alpha_blend(shade_bgra(pix, light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + do + { + uint32_t pix; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; dest[0] = alpha_blend(shade_bgra(pix, light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; dest[1] = alpha_blend(shade_bgra(pix, light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; dest[2] = alpha_blend(shade_bgra(pix, light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; dest[3] = alpha_blend(shade_bgra(pix, light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } while (--count); + } + else + { + do + { + dest[0] = alpha_blend(shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; + dest[1] = alpha_blend(shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; + dest[2] = alpha_blend(shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; + dest[3] = alpha_blend(shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } while (--count); + } } }; @@ -3719,10 +3782,7 @@ void R_DrawSpan_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_bilinear) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif } @@ -3776,10 +3836,7 @@ void vlinec4_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_bilinear) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index a266ce878a..0900e89977 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -478,6 +478,88 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d return (alpha << 24) | (red << 16) | (green << 8) | blue; } +#ifndef NO_SSE +FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t **col0, const uint32_t **col1, uint32_t texturefracx[4], uint32_t texturefracy[4], int ybits) +{ + uint32_t half = 1 << (ybits - 1); + + __m128i m127 = _mm_set1_epi16(127); + __m128i fg = _mm_setzero_si128(); + for (int i = 0; i < 4; i++) + { + uint32_t y = (texturefracy[i] - half) >> ybits; + + uint32_t inv_b = texturefracx[i]; + uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t ab = a * b; + uint32_t invab = inv_a * b; + uint32_t ainvb = a * inv_b; + uint32_t invainvb = inv_a * inv_b; + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); + + __m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); + __m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); + + __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); + __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); + + fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); + } + return fg; +} + +FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t *texture, dsfixed_t &xfrac, dsfixed_t &yfrac, dsfixed_t xstep, dsfixed_t ystep, int xbits, int ybits) +{ + int xshift = (32 - xbits); + int yshift = (32 - ybits); + int xmask = (1 << xshift) - 1; + int ymask = (1 << yshift) - 1; + uint32_t xhalf = 1 << (xbits - 1); + uint32_t yhalf = 1 << (ybits - 1); + + __m128i m127 = _mm_set1_epi16(127); + __m128i fg = _mm_setzero_si128(); + for (int i = 0; i < 4; i++) + { + uint32_t x = (xfrac - xhalf) >> xbits; + uint32_t y = (yfrac - yhalf) >> ybits; + + uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; + uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)]; + uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; + uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)]; + + uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; + uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t ab = a * b; + uint32_t invab = inv_a * b; + uint32_t ainvb = a * inv_b; + uint32_t invainvb = inv_a * inv_b; + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); + + __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); + __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); + + __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); + __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); + + fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); + + xfrac += xstep; + yfrac += ystep; + } + return fg; +} +#endif + // Calculate constants for a simple shade with gamma correction #define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 220638c751..7214717249 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -71,195 +71,284 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - if (_xbits == 6 && _ybits == 6) + fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); + fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); + fixed_t magnitude = xmagnitude + ymagnitude; + + bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; + if (magnifying) { - // 64x64 is the most common case by far, so special case it. - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) + if (_xbits == 6 && _ybits == 6) { - VEC_SHADE_SIMPLE_INIT(light); + // 64x64 is the most common case by far, so special case it. - while (sse_count--) + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + + if (count == 0) + return; + + do { // Current texture index in u,v. spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. - dest += 4; - } + xfrac += xstep; + yfrac += ystep; + } while (--count); } else { - VEC_SHADE_INIT(light, shade_constants); + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; - while (sse_count--) + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + + do { // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. - dest += 4; - } + xfrac += xstep; + yfrac += ystep; + } while (--count); } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) + if (_xbits == 6 && _ybits == 6) { - VEC_SHADE_SIMPLE_INIT(light); + // 64x64 is the most common case by far, so special case it. - while (sse_count--) + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; + VEC_SHADE_SIMPLE_INIT(light); + while (sse_count--) + { + __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } } + else + { + VEC_SHADE_INIT(light, shade_constants); + while (sse_count--) + { + __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + + do + { + *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); + xfrac += xstep; + yfrac += ystep; + } while (--count); } else { - VEC_SHADE_INIT(light, shade_constants); + int sse_count = count / 4; + count -= sse_count * 4; - while (sse_count--) + if (shade_constants.simple_shade) { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; + VEC_SHADE_SIMPLE_INIT(light); + while (sse_count--) + { + __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 -_xbits, 32 - _ybits); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } } + else + { + VEC_SHADE_INIT(light, shade_constants); + while (sse_count--) + { + __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 - _xbits, 32 - _ybits); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + + do + { + *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); + xfrac += xstep; + yfrac += ystep; + } while (--count); } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); } } }; @@ -275,6 +364,8 @@ class VecCommand(Vlinec4RGBA) : public DrawerCommand DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + const uint32_t * RESTRICT bufplce2[4]; + uint32_t buftexturefracx[4]; public: VecCommand(Vlinec4RGBA)() @@ -290,6 +381,8 @@ public: vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufplce2[i] = (const uint32_t *)::bufplce2[i]; + buftexturefracx[i] = ::buftexturefracx[i]; } } @@ -319,57 +412,97 @@ public: local_vince[i] *= thread->num_cores; } - if (shade_constants.simple_shade) + if (bufplce2[0] == nullptr) { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } } else { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } } } }; @@ -385,6 +518,8 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + const uint32 * RESTRICT bufplce2[4]; + uint32_t buftexturefracx[4]; public: VecCommand(Mvlinec4RGBA)() @@ -400,6 +535,8 @@ public: vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufplce2[i] = (const uint32_t *)::bufplce2[i]; + buftexturefracx[i] = ::buftexturefracx[i]; } } @@ -429,61 +566,105 @@ public: local_vince[i] *= thread->num_cores; } - if (shade_constants.simple_shade) + if (bufplce2[0] == nullptr) { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE_SIMPLE(fg); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + VEC_SHADE_SIMPLE(fg); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + VEC_SHADE(fg, shade_constants); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } } else { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + VEC_SHADE_SIMPLE(fg); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE(fg, shade_constants); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + VEC_SHADE(fg, shade_constants); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } } } }; From d15af1524cebd3e000bbd7971d9b5e51205cfde6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 09:38:47 +0200 Subject: [PATCH 066/912] Added mipmap support for floor and ceiling --- src/r_draw.cpp | 6 +-- src/r_draw.h | 2 +- src/r_draw_rgba.cpp | 119 +++++++++++++++++++++++++++++++++++++----- src/r_draw_rgba.h | 33 ++++++++++++ src/r_draw_rgba_sse.h | 9 ++-- src/r_plane.cpp | 5 +- src/r_swrenderer.cpp | 1 + src/v_draw.cpp | 2 +- 8 files changed, 150 insertions(+), 27 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 83c4ac8d40..73ddb72f88 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1062,13 +1062,13 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; // //========================================================================== -void R_SetSpanSource(const BYTE *pixels) +void R_SetSpanSource(FTexture *tex) { - ds_source = pixels; + R_SetMipmappedSpanSource(tex); #ifdef X86_ASM if (!r_swtruecolor && ds_cursource != ds_source) { - R_SetSpanSource_ASM(pixels); + R_SetSpanSource_ASM(ds_source); } #endif } diff --git a/src/r_draw.h b/src/r_draw.h index d5ecbd289d..b662ddcee3 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -122,7 +122,7 @@ extern void (*R_DrawTranslatedColumn)(void); extern void (*R_DrawSpan)(void); void R_SetupSpanBits(FTexture *tex); void R_SetSpanColormap(FDynamicColormap *colormap, int shade); -void R_SetSpanSource(const BYTE *pixels); +void R_SetSpanSource(FTexture *tex); // Span drawing for masked textures. extern void (*R_DrawSpanMasked)(void); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 869edaba1d..9cdcdbf802 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -59,6 +59,7 @@ extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) CVAR(Bool, r_bilinear, true, 0) +CVAR(Bool, r_mipmap, true, 0) #ifndef NO_SSE @@ -1502,6 +1503,7 @@ class DrawSpanRGBACommand : public DrawerCommand BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; + bool _magnifying; public: DrawSpanRGBACommand() @@ -1519,6 +1521,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; + _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -1548,12 +1551,7 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); - fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); - fixed_t magnitude = xmagnitude + ymagnitude; - - bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; - if (magnifying) + if (_magnifying) { if (_xbits == 6 && _ybits == 6) { @@ -1634,6 +1632,7 @@ class DrawSpanMaskedRGBACommand : public DrawerCommand fixed_t _ystep; int _xbits; int _ybits; + bool _magnifying; public: DrawSpanMaskedRGBACommand() @@ -1651,6 +1650,7 @@ public: _ystep = ds_ystep; _xbits = ds_xbits; _ybits = ds_ybits; + _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -1680,12 +1680,7 @@ public: xstep = _xstep; ystep = _ystep; - fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); - fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); - fixed_t magnitude = xmagnitude + ymagnitude; - - bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; - if (magnifying) + if (_magnifying) { if (_xbits == 6 && _ybits == 6) { @@ -3677,6 +3672,106 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// +#include + +class MipmappedTexture +{ +public: + MipmappedTexture(FTexture *texture) + { + const uint32_t *base_texture = texture->GetPixelsBgra(); + Width = texture->GetWidth(); + Height = texture->GetHeight(); + Levels = MAX(texture->WidthBits, texture->HeightBits); + + // I bet there is a better way to calculate this.. + int buffersize = 0; + for (int i = 0; i < Levels; i++) + { + int w = MAX(Width >> i, 2); // 2 instead of 1 because we texelGather in 2x2 blocks + int h = MAX(Height >> i, 2); + buffersize += w * h; + } + Pixels.resize(buffersize); + + // Base level: + memcpy(Pixels.data(), base_texture, Width * Height * 4); + + // Mipmap levels: + uint32_t *src = Pixels.data(); + uint32_t *dest = src + Width * Height; + for (int i = 1; i < Levels; i++) + { + int srch = MAX(Height >> (i - 1), 2); + int w = MAX(Width >> i, 2); + int h = MAX(Height >> i, 2); + + for (int x = 0; x < w; x++) + { + for (int y = 0; y < h; y++) + { + uint32_t src00 = src[y * 2 + x * 2 * srch]; + uint32_t src01 = src[y * 2 + 1 + x * 2 * srch]; + uint32_t src10 = src[y * 2 + (x * 2 + 1) * srch]; + uint32_t src11 = src[y * 2 + 1 + (x * 2 + 1) * srch]; + + uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4; + uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4; + uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4; + uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4; + + dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue; + } + } + + src = dest; + dest += w * h; + } + } + + int Width = 0; + int Height = 0; + int Levels = 0; + std::vector Pixels; +}; + +class TextureMipmapper +{ +public: + static std::map> &Textures() + { + static std::map> textures; + return textures; + } +}; + +void R_SetMipmappedSpanSource(FTexture *tex) +{ + if (r_swtruecolor) + { + if (r_mipmap) + { + auto &mipmap = TextureMipmapper::Textures()[tex]; + if (!mipmap) + mipmap = std::make_shared(tex); + ds_source = (const BYTE*)mipmap->Pixels.data(); + } + else + { + ds_source = (const BYTE*)tex->GetPixelsBgra(); + } + } + else + { + ds_source = tex->GetPixels(); + } +} + +void R_ClearMipmapCache() +{ + TextureMipmapper::Textures().clear(); +} + void R_BeginDrawerCommands() { DrawerCommandQueue::Begin(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 0900e89977..37dc1a70a1 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -108,6 +108,9 @@ void tmvline4_revsubclamp_rgba(); void R_FillColumnHoriz_rgba(); void R_FillSpan_rgba(); +void R_SetMipmappedSpanSource(FTexture *tex); +void R_ClearMipmapCache(); + ///////////////////////////////////////////////////////////////////////////// // Multithreaded rendering infrastructure: @@ -185,6 +188,7 @@ public: }; EXTERN_CVAR(Bool, r_multithreaded) +EXTERN_CVAR(Bool, r_mipmap) // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue @@ -426,6 +430,35 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } +inline bool span_sampler_setup(const uint32_t *&source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) +{ + if (!r_bilinear) + return false; + + // Is this a magfilter or minfilter? + fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); + fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); + fixed_t magnitude = (xmagnitude + ymagnitude) * 3 + (1 << (FRACBITS -1)); + if (magnitude >> FRACBITS == 0) + return false; + + if (r_mipmap) + { + int level = magnitude >> (FRACBITS + 1); + while (level != 0) + { + if (xbits <= 2 || ybits <= 2) + break; + + source += (1 << (xbits)) * (1 << (ybits)); + xbits -= 1; + ybits -= 1; + level >>= 1; + } + } + return true; +} + FORCEINLINE uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits) { uint32_t half = 1 << (ybits - 1); diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 7214717249..4002a55356 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -25,6 +25,7 @@ class VecCommand(DrawSpanRGBA) : public DrawerCommand BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; + bool _magnifying; public: VecCommand(DrawSpanRGBA)() @@ -42,6 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; + _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -71,12 +73,7 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); - fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); - fixed_t magnitude = xmagnitude + ymagnitude; - - bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; - if (magnifying) + if (_magnifying) { if (_xbits == 6 && _ybits == 6) { diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 807066f77d..6913db9183 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1178,10 +1178,7 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske R_SetupSpanBits(tex); double xscale = pl->xform.xScale * tex->Scale.X; double yscale = pl->xform.yScale * tex->Scale.Y; - if (r_swtruecolor) - ds_source = (const BYTE*)tex->GetPixelsBgra(); - else - ds_source = tex->GetPixels(); + R_SetSpanSource(tex); basecolormap = pl->colormap; planeshade = LIGHT2SHADE(pl->lightlevel); diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c81d2a1103..c1e2d4bd0f 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -170,6 +170,7 @@ void FSoftwareRenderer::RenderView(player_t *player) R_InitColumnDrawers(); } + R_ClearMipmapCache(); R_BeginDrawerCommands(); R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 6a8dad0477..fd12a15871 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1404,7 +1404,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else R_SetSpanColormap(&identitycolormap, 0); - R_SetSpanSource(r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels()); + R_SetSpanSource(tex); scalex = double(1u << (32 - ds_xbits)) / scalex; scaley = double(1u << (32 - ds_ybits)) / scaley; ds_xstep = xs_RoundToInt(cosrot * scalex); From c235de5c22f6f7aebbf36aa3f80a45e0f5f6accf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 21:55:08 +0200 Subject: [PATCH 067/912] Native mipmap support to FTexture --- src/r_draw.cpp | 2 +- src/r_draw_rgba.cpp | 100 ---------------------- src/r_draw_rgba.h | 157 +++++++++++++++++------------------ src/r_draw_rgba_sse.h | 28 +++++-- src/r_swrenderer.cpp | 1 - src/textures/jpegtexture.cpp | 4 +- src/textures/pngtexture.cpp | 3 +- src/textures/texture.cpp | 68 ++++++++++++++- src/textures/textures.h | 4 + 9 files changed, 171 insertions(+), 196 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 73ddb72f88..55353a0068 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1064,7 +1064,7 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; void R_SetSpanSource(FTexture *tex) { - R_SetMipmappedSpanSource(tex); + ds_source = r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels(); #ifdef X86_ASM if (!r_swtruecolor && ds_cursource != ds_source) { diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9cdcdbf802..8144c096d7 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -3672,106 +3672,6 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// -#include - -class MipmappedTexture -{ -public: - MipmappedTexture(FTexture *texture) - { - const uint32_t *base_texture = texture->GetPixelsBgra(); - Width = texture->GetWidth(); - Height = texture->GetHeight(); - Levels = MAX(texture->WidthBits, texture->HeightBits); - - // I bet there is a better way to calculate this.. - int buffersize = 0; - for (int i = 0; i < Levels; i++) - { - int w = MAX(Width >> i, 2); // 2 instead of 1 because we texelGather in 2x2 blocks - int h = MAX(Height >> i, 2); - buffersize += w * h; - } - Pixels.resize(buffersize); - - // Base level: - memcpy(Pixels.data(), base_texture, Width * Height * 4); - - // Mipmap levels: - uint32_t *src = Pixels.data(); - uint32_t *dest = src + Width * Height; - for (int i = 1; i < Levels; i++) - { - int srch = MAX(Height >> (i - 1), 2); - int w = MAX(Width >> i, 2); - int h = MAX(Height >> i, 2); - - for (int x = 0; x < w; x++) - { - for (int y = 0; y < h; y++) - { - uint32_t src00 = src[y * 2 + x * 2 * srch]; - uint32_t src01 = src[y * 2 + 1 + x * 2 * srch]; - uint32_t src10 = src[y * 2 + (x * 2 + 1) * srch]; - uint32_t src11 = src[y * 2 + 1 + (x * 2 + 1) * srch]; - - uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4; - uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4; - uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4; - uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4; - - dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue; - } - } - - src = dest; - dest += w * h; - } - } - - int Width = 0; - int Height = 0; - int Levels = 0; - std::vector Pixels; -}; - -class TextureMipmapper -{ -public: - static std::map> &Textures() - { - static std::map> textures; - return textures; - } -}; - -void R_SetMipmappedSpanSource(FTexture *tex) -{ - if (r_swtruecolor) - { - if (r_mipmap) - { - auto &mipmap = TextureMipmapper::Textures()[tex]; - if (!mipmap) - mipmap = std::make_shared(tex); - ds_source = (const BYTE*)mipmap->Pixels.data(); - } - else - { - ds_source = (const BYTE*)tex->GetPixelsBgra(); - } - } - else - { - ds_source = tex->GetPixels(); - } -} - -void R_ClearMipmapCache() -{ - TextureMipmapper::Textures().clear(); -} - void R_BeginDrawerCommands() { DrawerCommandQueue::Begin(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 37dc1a70a1..4808cb2574 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -108,9 +108,6 @@ void tmvline4_revsubclamp_rgba(); void R_FillColumnHoriz_rgba(); void R_FillSpan_rgba(); -void R_SetMipmappedSpanSource(FTexture *tex); -void R_ClearMipmapCache(); - ///////////////////////////////////////////////////////////////////////////// // Multithreaded rendering infrastructure: @@ -494,9 +491,9 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d uint32_t y = (yfrac - yhalf) >> ybits; uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; - uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)]; + uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)]; + uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; @@ -511,87 +508,81 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d return (alpha << 24) | (red << 16) | (green << 8) | blue; } -#ifndef NO_SSE -FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t **col0, const uint32_t **col1, uint32_t texturefracx[4], uint32_t texturefracy[4], int ybits) -{ - uint32_t half = 1 << (ybits - 1); - - __m128i m127 = _mm_set1_epi16(127); - __m128i fg = _mm_setzero_si128(); - for (int i = 0; i < 4; i++) - { - uint32_t y = (texturefracy[i] - half) >> ybits; - - uint32_t inv_b = texturefracx[i]; - uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t ab = a * b; - uint32_t invab = inv_a * b; - uint32_t ainvb = a * inv_b; - uint32_t invainvb = inv_a * inv_b; - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); - - __m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); - __m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); - - __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); - __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); - - fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); - } - return fg; +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits) { \ + uint32_t half = 1 << (ybits - 1); \ + \ + __m128i m127 = _mm_set1_epi16(127); \ + fg = _mm_setzero_si128(); \ + for (int i = 0; i < 4; i++) \ + { \ + uint32_t y = (texturefracy[i] - half) >> ybits; \ + \ + uint32_t inv_b = texturefracx[i]; \ + uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; \ + uint32_t a = 16 - inv_a; \ + uint32_t b = 16 - inv_b; \ + \ + uint32_t ab = a * b; \ + uint32_t invab = inv_a * b; \ + uint32_t ainvb = a * inv_b; \ + uint32_t invainvb = inv_a * inv_b; \ + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ + \ + __m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); \ + __m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); \ + \ + __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ + __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ + \ + fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \ + } \ } -FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t *texture, dsfixed_t &xfrac, dsfixed_t &yfrac, dsfixed_t xstep, dsfixed_t ystep, int xbits, int ybits) -{ - int xshift = (32 - xbits); - int yshift = (32 - ybits); - int xmask = (1 << xshift) - 1; - int ymask = (1 << yshift) - 1; - uint32_t xhalf = 1 << (xbits - 1); - uint32_t yhalf = 1 << (ybits - 1); - - __m128i m127 = _mm_set1_epi16(127); - __m128i fg = _mm_setzero_si128(); - for (int i = 0; i < 4; i++) - { - uint32_t x = (xfrac - xhalf) >> xbits; - uint32_t y = (yfrac - yhalf) >> ybits; - - uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; - uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)]; - uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)]; - - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t ab = a * b; - uint32_t invab = inv_a * b; - uint32_t ainvb = a * inv_b; - uint32_t invainvb = inv_a * inv_b; - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); - - __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); - __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); - - __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); - __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); - - fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); - - xfrac += xstep; - yfrac += ystep; - } - return fg; +#define VEC_SAMPLE_BILINEAR4_SPAN(fg, texture, xfrac, yfrac, xstep, ystep, xbits, ybits) { \ + int xshift = (32 - xbits); \ + int yshift = (32 - ybits); \ + int xmask = (1 << xshift) - 1; \ + int ymask = (1 << yshift) - 1; \ + uint32_t xhalf = 1 << (xbits - 1); \ + uint32_t yhalf = 1 << (ybits - 1); \ + \ + __m128i m127 = _mm_set1_epi16(127); \ + fg = _mm_setzero_si128(); \ + for (int i = 0; i < 4; i++) \ + { \ + uint32_t x = (xfrac - xhalf) >> xbits; \ + uint32_t y = (yfrac - yhalf) >> ybits; \ + \ + uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \ + uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \ + uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \ + uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \ + \ + uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \ + uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \ + uint32_t a = 16 - inv_a; \ + uint32_t b = 16 - inv_b; \ + \ + uint32_t ab = a * b; \ + uint32_t invab = inv_a * b; \ + uint32_t ainvb = a * inv_b; \ + uint32_t invainvb = inv_a * inv_b; \ + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ + \ + __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); \ + __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); \ + \ + __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ + __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ + \ + fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \ + \ + xfrac += xstep; \ + yfrac += ystep; \ + } \ } -#endif // Calculate constants for a simple shade with gamma correction #define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 4002a55356..af761c6e7d 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -280,7 +280,8 @@ public: VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) { - __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26); + __m128i fg; + VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26); VEC_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); dest += 4; @@ -291,7 +292,8 @@ public: VEC_SHADE_INIT(light, shade_constants); while (sse_count--) { - __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26); + __m128i fg; + VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26); VEC_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); dest += 4; @@ -318,7 +320,10 @@ public: VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) { - __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 -_xbits, 32 - _ybits); + __m128i fg; + int tmpx = 32 - _xbits; + int tmpy = 32 - _ybits; + VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy); VEC_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); dest += 4; @@ -329,7 +334,10 @@ public: VEC_SHADE_INIT(light, shade_constants); while (sse_count--) { - __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 - _xbits, 32 - _ybits); + __m128i fg; + int tmpx = 32 - _xbits; + int tmpy = 32 - _ybits; + VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy); VEC_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); dest += 4; @@ -471,7 +479,8 @@ public: VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); do { - __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -488,7 +497,8 @@ public: VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); do { - __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -629,7 +639,8 @@ public: VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); do { - __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -648,7 +659,8 @@ public: VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); do { - __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c1e2d4bd0f..c81d2a1103 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -170,7 +170,6 @@ void FSoftwareRenderer::RenderView(player_t *player) R_InitColumnDrawers(); } - R_ClearMipmapCache(); R_BeginDrawerCommands(); R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index 3b53598460..f44b34d088 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -474,7 +474,7 @@ void FJPEGTexture::MakeTextureBgra() jpeg_decompress_struct cinfo; jpeg_error_mgr jerr; - PixelsBgra.resize(Width * Height, 0xffba0000); + CreatePixelsBgraWithMipmaps(); cinfo.err = jpeg_std_error(&jerr); cinfo.err->output_message = JPEG_OutputMessage; @@ -560,6 +560,8 @@ void FJPEGTexture::MakeTextureBgra() { delete[] buff; } + + GenerateBgraMipmaps(); } diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index 408cf1e2fe..ee4eabe900 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -633,7 +633,7 @@ void FPNGTexture::MakeTextureBgra () lump = new FileReader(SourceFile.GetChars()); } - PixelsBgra.resize(Width * Height, 0xffff0000); + CreatePixelsBgraWithMipmaps(); if (StartOfIDAT != 0) { DWORD len, id; @@ -757,6 +757,7 @@ void FPNGTexture::MakeTextureBgra () } } delete lump; + GenerateBgraMipmaps(); } //=========================================================================== diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 16a9e63a6b..f5e4d4aa8a 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -200,7 +200,7 @@ const uint32_t *FTexture::GetPixelsBgra() const BYTE *indices = GetPixels(); if (indices == nullptr) return nullptr; - PixelsBgra.resize(Width * Height); + CreatePixelsBgraWithMipmaps(); for (int i = 0; i < Width * Height; i++) { if (indices[i] != 0) @@ -208,6 +208,7 @@ const uint32_t *FTexture::GetPixelsBgra() else PixelsBgra[i] = 0; } + GenerateBgraMipmaps(); } return PixelsBgra.data(); } @@ -355,6 +356,71 @@ void FTexture::FreeSpans (Span **spans) const M_Free (spans); } +void FTexture::CreatePixelsBgraWithMipmaps() +{ + int levels = MipmapLevels(); + int buffersize = 0; + for (int i = 0; i < levels; i++) + { + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + buffersize += w * h; + } + PixelsBgra.resize(buffersize, 0xffff0000); +} + +int FTexture::MipmapLevels() const +{ + int widthbits = 0; + while ((Width >> widthbits) != 0) widthbits++; + + int heightbits = 0; + while ((Height >> heightbits) != 0) heightbits++; + + return MAX(widthbits, heightbits); +} + +void FTexture::GenerateBgraMipmaps() +{ + uint32_t *src = PixelsBgra.data(); + uint32_t *dest = src + Width * Height; + int levels = MipmapLevels(); + for (int i = 1; i < levels; i++) + { + int srcw = MAX(Width >> (i - 1), 1); + int srch = MAX(Height >> (i - 1), 1); + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + + for (int x = 0; x < w; x++) + { + int sx0 = x * 2; + int sx1 = MIN((x + 1) * 2, srcw - 1); + + for (int y = 0; y < h; y++) + { + int sy0 = y * 2; + int sy1 = MIN((y + 1) * 2, srch - 1); + + uint32_t src00 = src[sy0 + sx0 * srch]; + uint32_t src01 = src[sy1 + sx0 * srch]; + uint32_t src10 = src[sy0 + sx1 * srch]; + uint32_t src11 = src[sy1 + sx1 * srch]; + + uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4; + uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4; + uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4; + uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4; + + dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue; + } + } + + src = dest; + dest += w * h; + } +} + void FTexture::CopyToBlock (BYTE *dest, int dwidth, int dheight, int xpos, int ypos, int rotate, const BYTE *translation) { const BYTE *pixels = GetPixels(); diff --git a/src/textures/textures.h b/src/textures/textures.h index 3b4b0b8b35..ab9dc3719c 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -271,6 +271,10 @@ protected: std::vector PixelsBgra; + void CreatePixelsBgraWithMipmaps(); + void GenerateBgraMipmaps(); + int MipmapLevels() const; + public: static void FlipSquareBlock (BYTE *block, int x, int y); static void FlipSquareBlockBgra (uint32_t *block, int x, int y); From 4142b6ed1b1dc858a4a7ab16ca2f01a79567ce3b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 22:03:34 +0200 Subject: [PATCH 068/912] GCC compile fix --- src/r_draw_rgba.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 4808cb2574..617e831079 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -427,7 +427,7 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } -inline bool span_sampler_setup(const uint32_t *&source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) +inline bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) { if (!r_bilinear) return false; From f81042b3e20a2d9d300c0431d0bb094435eef340 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 22:10:04 +0200 Subject: [PATCH 069/912] Fix warning generated by gcc --- src/r_draw_rgba.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 8144c096d7..57b32b28c6 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1722,7 +1722,8 @@ public: // 64x64 is the most common case by far, so special case it. do { - *dest++ = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants), *dest); + *dest = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants), *dest); + dest++; xfrac += xstep; yfrac += ystep; } while (--count); @@ -1734,7 +1735,8 @@ public: int xmask = ((1 << _xbits) - 1) << _ybits; do { - *dest++ = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants), *dest); + *dest = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants), *dest); + dest++; xfrac += xstep; yfrac += ystep; } while (--count); From e294906d692e4eee921d35e013fafdd633f42257 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Jun 2016 00:22:06 +0200 Subject: [PATCH 070/912] Voxel support in true color mode --- src/r_draw.cpp | 13 ++++ src/r_draw.h | 18 +++--- src/r_draw_rgba.cpp | 142 ++++++++++++++++++++++++++++++++++++++++++++ src/r_draw_rgba.h | 3 + src/r_things.cpp | 6 +- src/r_things.h | 2 +- 6 files changed, 171 insertions(+), 13 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 55353a0068..8cca132898 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -100,6 +100,8 @@ void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); void (*R_MapTiltedPlane)(int y, int x1); void (*R_MapColoredPlane)(int y, int x1); void (*R_DrawParticle)(vissprite_t *); +void (*R_SetupDrawSlab)(FColormap *base_colormap, float light, int shade); +void (*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); fixed_t (*tmvline1_add)(); void (*tmvline4_add)(); fixed_t (*tmvline1_addclamp)(); @@ -2306,6 +2308,9 @@ void R_InitColumnDrawers () R_MapColoredPlane = R_MapColoredPlane_rgba; R_DrawParticle = R_DrawParticle_rgba; + R_SetupDrawSlab = R_SetupDrawSlab_rgba; + R_DrawSlab = R_DrawSlab_rgba; + tmvline1_add = tmvline1_add_rgba; tmvline4_add = tmvline4_add_rgba; tmvline1_addclamp = tmvline1_addclamp_rgba; @@ -2403,6 +2408,14 @@ void R_InitColumnDrawers () R_MapColoredPlane = R_MapColoredPlane_C; R_DrawParticle = R_DrawParticle_C; +#ifdef X86_ASM + R_SetupDrawSlab = [](FColormap *colormap, float light, int shade) { R_SetupDrawSlabA(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; + R_DrawSlab = R_DrawSlabA; +#else + R_SetupDrawSlab = [](FColormap *colormap, float light, int shade) { R_SetupDrawSlabC(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; + R_DrawSlab = R_DrawSlabC; +#endif + tmvline1_add = tmvline1_add_C; tmvline4_add = tmvline4_add_C; tmvline1_addclamp = tmvline1_addclamp_C; diff --git a/src/r_draw.h b/src/r_draw.h index b662ddcee3..547a044ea3 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -286,16 +286,16 @@ void R_FillColumnP_C (void); void R_FillColumnHorizP_C (void); void R_FillSpan_C (void); -#ifdef X86_ASM -#define R_SetupDrawSlab R_SetupDrawSlabA -#define R_DrawSlab R_DrawSlabA -#else -#define R_SetupDrawSlab R_SetupDrawSlabC -#define R_DrawSlab R_DrawSlabC -#endif +extern void(*R_SetupDrawSlab)(FColormap *base_colormap, float light, int shade); +extern void(*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); -extern "C" void R_SetupDrawSlab(const BYTE *colormap); -extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); +#ifdef X86_ASM +extern "C" void R_SetupDrawSlabA(const BYTE *colormap); +extern "C" void R_DrawSlabA(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); +#else +extern "C" void R_SetupDrawSlabC(const BYTE *colormap); +extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); +#endif extern "C" int ds_y; extern "C" int ds_x1; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 57b32b28c6..9603a8b3eb 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2303,6 +2303,123 @@ public: } }; +class DrawSlabRGBACommand : public DrawerCommand +{ + int _dx; + fixed_t _v; + int _dy; + fixed_t _vi; + const BYTE *_vptr; + uint32_t *_p; + ShadeConstants _shade_constants; + const BYTE *_colormap; + fixed_t _light; + int _pitch; + int _start_y; + +public: + DrawSlabRGBACommand(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p, ShadeConstants shade_constants, const BYTE *colormap, fixed_t light) + { + _dx = dx; + _v = v; + _dy = dy; + _vi = vi; + _vptr = vptr; + _p = (uint32_t *)p; + _shade_constants = shade_constants; + _colormap = colormap; + _light = light; + _pitch = dc_pitch; + _start_y = static_cast((p - dc_destorg) / (dc_pitch * 4)); + assert(dx > 0); + } + + void Execute(DrawerThread *thread) override + { + int dx = _dx; + fixed_t v = _v; + int dy = _dy; + fixed_t vi = _vi; + const BYTE *vptr = _vptr; + uint32_t *p = _p; + ShadeConstants shade_constants = _shade_constants; + const BYTE *colormap = _colormap; + uint32_t light = calc_light_multiplier(_light); + int pitch = _pitch; + int x; + + dy = thread->count_for_thread(_start_y, dy); + p = thread->dest_for_thread(_start_y, pitch, p); + v += vi * thread->skipped_by_thread(_start_y); + vi *= thread->num_cores; + pitch *= thread->num_cores; + + if (dx == 1) + { + while (dy > 0) + { + *p = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + p += pitch; + v += vi; + dy--; + } + } + else if (dx == 2) + { + while (dy > 0) + { + uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + p[0] = color; + p[1] = color; + p += pitch; + v += vi; + dy--; + } + } + else if (dx == 3) + { + while (dy > 0) + { + uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + p[0] = color; + p[1] = color; + p[2] = color; + p += pitch; + v += vi; + dy--; + } + } + else if (dx == 4) + { + while (dy > 0) + { + uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + p[0] = color; + p[1] = color; + p[2] = color; + p[3] = color; + p += pitch; + v += vi; + dy--; + } + } + else while (dy > 0) + { + uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + // The optimizer will probably turn this into a memset call. + // Since dx is not likely to be large, I'm not sure that's a good thing, + // hence the alternatives above. + for (x = 0; x < dx; x++) + { + p[x] = color; + } + p += pitch; + v += vi; + dy--; + } + } +}; + class Vlinec1RGBACommand : public DrawerCommand { DWORD _iscale; @@ -3813,6 +3930,31 @@ void R_FillSpan_rgba() DrawerCommandQueue::QueueCommand(); } +static ShadeConstants slab_rgba_shade_constants; +static const BYTE *slab_rgba_colormap; +static fixed_t slab_rgba_light; + +void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade) +{ + slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; + slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; + slab_rgba_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; + slab_rgba_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; + slab_rgba_shade_constants.fade_red = base_colormap->Fade.r; + slab_rgba_shade_constants.fade_green = base_colormap->Fade.g; + slab_rgba_shade_constants.fade_blue = base_colormap->Fade.b; + slab_rgba_shade_constants.fade_alpha = base_colormap->Fade.a; + slab_rgba_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + slab_rgba_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); + slab_rgba_colormap = base_colormap->Maps; + slab_rgba_light = LIGHTSCALE(light, shade); +} + +void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) +{ + DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); +} + //extern FTexture *rw_pic; // For the asserts below DWORD vlinec1_rgba() diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 617e831079..c94cb1e4b9 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -90,6 +90,9 @@ void R_DrawSpanAddClamp_rgba(); void R_DrawSpanMaskedAddClamp_rgba(); void R_FillSpan_rgba(); +void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade); +void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); + void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip); DWORD vlinec1_rgba(); diff --git a/src/r_things.cpp b/src/r_things.cpp index f6a1a709fc..e1f1017f36 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -688,7 +688,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Render the voxel, either directly to the screen or offscreen. R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap->Maps + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap, spr->Style.ColormapNum, cliptop, clipbot, minslabz, maxslabz, flags); // Blend the voxel, if that's what we need to do. @@ -2775,7 +2775,7 @@ extern double BaseYaspectMul;; void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, const FVector3 &dasprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, - lighttable_t *colormap, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) + FColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) { int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff; fixed_t cosang, sinang, sprcosang, sprsinang; @@ -2812,7 +2812,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, sprcosang = FLOAT2FIXED(dasprang.Cos()) >> 2; sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2; - R_SetupDrawSlab(colormap); + R_SetupDrawSlab(colormap, 0.0f, colormapnum << FRACBITS); int pixelsize = r_swtruecolor ? 4 : 1; diff --git a/src/r_things.h b/src/r_things.h index 04d5487ee1..13f89574b5 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -144,7 +144,7 @@ enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle, const FVector3 &sprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, struct FVoxel *voxobj, - lighttable_t *colormap, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags); + FColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags); void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); From db4cba239a16662c437da8dc5d03ce3f14dd151c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Jun 2016 00:27:12 +0200 Subject: [PATCH 071/912] Renamed member variable to make it compile with gcc --- src/r_draw_rgba.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9603a8b3eb..1e2678bd30 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2309,7 +2309,7 @@ class DrawSlabRGBACommand : public DrawerCommand fixed_t _v; int _dy; fixed_t _vi; - const BYTE *_vptr; + const BYTE *_voxelptr; uint32_t *_p; ShadeConstants _shade_constants; const BYTE *_colormap; @@ -2324,7 +2324,7 @@ public: _v = v; _dy = dy; _vi = vi; - _vptr = vptr; + _voxelptr = vptr; _p = (uint32_t *)p; _shade_constants = shade_constants; _colormap = colormap; @@ -2340,7 +2340,7 @@ public: fixed_t v = _v; int dy = _dy; fixed_t vi = _vi; - const BYTE *vptr = _vptr; + const BYTE *vptr = _voxelptr; uint32_t *p = _p; ShadeConstants shade_constants = _shade_constants; const BYTE *colormap = _colormap; From ca9d8e580e4b2b94c19182dcb160d9de6c904b5c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Jun 2016 00:51:16 +0200 Subject: [PATCH 072/912] Increase command queue memory pool to 16 MB and make it flush if its exhausted --- src/r_draw_rgba.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index c94cb1e4b9..47f7c88659 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -193,7 +193,7 @@ EXTERN_CVAR(Bool, r_mipmap) // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue { - enum { memorypool_size = 4 * 1024 * 1024 }; + enum { memorypool_size = 16 * 1024 * 1024 }; char memorypool[memorypool_size]; size_t memorypool_pos = 0; @@ -241,8 +241,13 @@ public: else { void *ptr = AllocMemory(sizeof(T)); - if (!ptr) - return; + if (!ptr) // Out of memory - render what we got + { + queue->Finish(); + ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + } T *command = new (ptr)T(std::forward(args)...); queue->commands.push_back(command); } From 7a0c801a18bcf4f1910a1ae5cc708fe746ca2f9c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Jun 2016 08:23:16 +0200 Subject: [PATCH 073/912] Added mipmapping to wallscan --- src/r_draw.h | 29 ------- src/r_draw_rgba.h | 2 +- src/r_segs.cpp | 207 +++++++++++++++++++++++++++++----------------- 3 files changed, 130 insertions(+), 108 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 547a044ea3..72304e81f7 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -381,33 +381,4 @@ void R_SetTranslationMap(lighttable_t *translation); extern bool r_swtruecolor; EXTERN_CVAR(Bool, r_bilinear); -// Texture sampler state needed for bilinear filtering -struct SamplerSetup -{ - SamplerSetup() { } - SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); - - const BYTE *source; - const BYTE *source2; - uint32_t texturefracx; -}; - -inline SamplerSetup::SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) -{ - // Only do bilinear filtering if enabled and not a magnifying filter - if (!r_swtruecolor || !r_bilinear || magnifying) - { - source = getcol(texture, xoffset >> FRACBITS); - source2 = nullptr; - texturefracx = 0; - } - else - { - int tx = (xoffset - FRACUNIT / 2) >> FRACBITS; - source = getcol(texture, tx); - source2 = getcol(texture, tx + 1); - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; - } -} - #endif diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 47f7c88659..8f97d4ecd2 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -443,7 +443,7 @@ inline bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, in // Is this a magfilter or minfilter? fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); - fixed_t magnitude = (xmagnitude + ymagnitude) * 3 + (1 << (FRACBITS -1)); + fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS -1)); if (magnitude >> FRACBITS == 0) return false; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index d71487bb9d..84c967d1dd 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1067,11 +1067,92 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } +EXTERN_CVAR(Bool, r_mipmap) + +struct WallscanSampler +{ + WallscanSampler() { } + WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + + uint32_t uv_pos; + uint32_t uv_step; + int32_t uv_fracbits; + uint32_t uv_max; + + const BYTE *source; + const BYTE *source2; + uint32_t texturefracx; +}; + +WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) +{ + int base_width = texture->GetWidth(); + int base_height = texture->GetHeight(); + uv_fracbits = 32 - texture->HeightBits; + uv_max = base_height << uv_fracbits; + + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / base_height; + v = v - floor(v); + v *= base_height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + + bool magnifying = uv_step >> (uv_fracbits - 1) == 0; + + // Only do bilinear filtering if enabled and not a magnifying filter + if (!r_swtruecolor || !r_bilinear || magnifying || getcol != R_GetColumn) + { + source = getcol(texture, xoffset >> FRACBITS); + source2 = nullptr; + texturefracx = 0; + } + else + { + int mipmap_offset = 0; + int mip_width = base_width; + int mip_height = base_height; + if (r_mipmap) + { + fixed_t magnitude = abs((int32_t)uv_step) >> (uv_fracbits - FRACBITS); + int level = magnitude >> FRACBITS; + while (level != 0) + { + if (uv_fracbits > 30) + break; + + mipmap_offset += mip_width * mip_height; + uv_fracbits += 1; + uv_pos >>= 1; + uv_step >>= 1; + xoffset >>= 1; + level >>= 1; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + } + + const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + + int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } +} + // Draw a column with support for non-power-of-two ranges -uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const SamplerSetup &sampler, DWORD(*draw1column)()) +void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) { int pixelsize = r_swtruecolor ? 4 : 1; - if (uv_max == 0) // power of two + if (sampler.uv_max == 0) // power of two { int count = y2 - y1; @@ -1080,24 +1161,24 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; - dc_iscale = uv_step; - dc_texturefrac = uv_start; + dc_iscale = sampler.uv_step; + dc_texturefrac = sampler.uv_pos; draw1column(); - uint64_t step64 = uv_step; - uint64_t pos64 = uv_start; - return (uint32_t)(pos64 + step64 * count); + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); } else { - uint32_t uv_pos = uv_start; + uint32_t uv_pos = sampler.uv_pos; uint32_t left = y2 - y1; while (left > 0) { - uint32_t available = uv_max - uv_pos; - uint32_t next_uv_wrap = available / uv_step; - if (available % uv_step != 0) + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) next_uv_wrap++; uint32_t count = MIN(left, next_uv_wrap); @@ -1106,25 +1187,25 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; - dc_iscale = uv_step; + dc_iscale = sampler.uv_step; dc_texturefrac = uv_pos; draw1column(); left -= count; - uv_pos += uv_step * count; - if (uv_pos >= uv_max) - uv_pos -= uv_max; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; } - return uv_pos; + sampler.uv_pos = uv_pos; } } // Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const SamplerSetup *sampler, void(*draw4columns)()) +void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) { int pixelsize = r_swtruecolor ? 4 : 1; - if (uv_max == 0) // power of two, no wrap handling needed + if (sampler[0].uv_max == 0) // power of two, no wrap handling needed { int count = y2 - y1; for (int i = 0; i < 4; i++) @@ -1132,12 +1213,12 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste bufplce[i] = sampler[i].source; bufplce2[i] = sampler[i].source2; buftexturefracx[i] = sampler[i].texturefracx; - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; - uint64_t step64 = uv_step[i]; - uint64_t pos64 = uv_pos[i]; - uv_pos[i] = (uint32_t)(pos64 + step64 * count); + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; @@ -1160,9 +1241,9 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste uint32_t count = left; for (int i = 0; i < 4; i++) { - uint32_t available = uv_max - uv_pos[i]; - uint32_t next_uv_wrap = available / uv_step[i]; - if (available % uv_step[i] != 0) + uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; + uint32_t next_uv_wrap = available / sampler[i].uv_step; + if (available % sampler[i].uv_step != 0) next_uv_wrap++; count = MIN(next_uv_wrap, count); } @@ -1170,8 +1251,8 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste // Draw until that column wraps for (int i = 0; i < 4; i++) { - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; } dc_count = count; draw4columns(); @@ -1179,9 +1260,9 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste // Wrap the uv position for (int i = 0; i < 4; i++) { - uv_pos[i] += uv_step[i] * count; - if (uv_pos[i] >= uv_max) - uv_pos[i] -= uv_max; + sampler[i].uv_pos += sampler[i].uv_step * count; + if (sampler[i].uv_pos >= sampler[i].uv_max) + sampler[i].uv_pos -= sampler[i].uv_max; } left -= count; @@ -1189,22 +1270,6 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste } } -// Calculates a wrapped uv start position value for a column -void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_height, int fracbits, uint32_t &uv_start_out, uint32_t &uv_step_out) -{ - double uv_stepd = swal * yrepeat; - - // Find start uv in [0-uv_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; - v = v - floor(v); - v *= uv_height; - v *= (1 << fracbits); - - uv_start_out = (uint32_t)v; - uv_step_out = xs_ToFixed(fracbits, uv_stepd); -} - typedef DWORD(*Draw1ColumnFuncPtr)(); typedef void(*Draw4ColumnsFuncPtr)(); @@ -1216,15 +1281,12 @@ void wallscan_any( if (rw_pic->UseType == FTexture::TEX_Null) return; - uint32_t uv_height = rw_pic->GetHeight(); - uint32_t fracbits = 32 - rw_pic->HeightBits; - uint32_t uv_max = uv_height << fracbits; + fixed_t xoffset = rw_offset; + rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set DWORD(*draw1column)(); void(*draw4columns)(); - setupwallscan(fracbits, draw1column, draw4columns); - - fixed_t xoffset = rw_offset; + setupwallscan(32 - rw_pic->HeightBits, draw1column, draw4columns); bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1261,11 +1323,8 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - uint32_t uv_start, uv_step; - calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - - SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column); + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); } // The aligned columns @@ -1282,17 +1341,9 @@ void wallscan_any( light += rw_lightstep; } - uint32_t uv_pos[4], uv_step[4]; - int magnifying = 0; + WallscanSampler sampler[4]; for (int i = 0; i < 4; i++) - { - calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); - magnifying |= uv_step[i] >> (fracbits - 1); - } - - SamplerSetup sampler[4]; - for (int i = 0; i < 4; i++) - sampler[i] = SamplerSetup(lwal[x + i] + xoffset, magnifying == 0, rw_pic, getcol); + sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, rw_pic, getcol); // Figure out where we vertically can start and stop drawing 4 columns in one go int middle_y1 = y1[0]; @@ -1305,13 +1356,16 @@ void wallscan_any( // If we got an empty column in our set we cannot draw 4 columns in one go: bool empty_column_in_set = false; + int bilinear_count = 0; for (int i = 0; i < 4; i++) { if (y2[i] <= y1[i]) empty_column_in_set = true; + if (sampler[i].source2) + bilinear_count++; } - if (empty_column_in_set || middle_y2 <= middle_y1) + if (empty_column_in_set || middle_y2 <= middle_y1 || (bilinear_count > 0 && bilinear_count < 4)) { for (int i = 0; i < 4; i++) { @@ -1320,7 +1374,7 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, lights[i], wallshade); - wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); + wallscan_drawcol1(x + i, y1[i], y2[i], sampler[i], draw1column); } continue; } @@ -1332,7 +1386,7 @@ void wallscan_any( R_SetColorMapLight(basecolormap, lights[i], wallshade); if (y1[i] < middle_y1) - uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); + wallscan_drawcol1(x + i, y1[i], middle_y1, sampler[i], draw1column); } // Draw the area where all 4 columns are active @@ -1352,7 +1406,7 @@ void wallscan_any( } } } - wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, sampler, draw4columns); + wallscan_drawcol4(x, middle_y1, middle_y2, sampler, draw4columns); // Draw the last rows where not all 4 columns are active for (int i = 0; i < 4; i++) @@ -1361,7 +1415,7 @@ void wallscan_any( R_SetColorMapLight(basecolormap, lights[i], wallshade); if (middle_y2 < y2[i]) - uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); + wallscan_drawcol1(x + i, middle_y2, y2[i], sampler[i], draw1column); } } @@ -1376,11 +1430,8 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - uint32_t uv_start, uv_step; - calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - - SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column); + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); } NetUpdate (); From 698b5f3db19dd5b3331cae5a7a234c23192c310a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Jun 2016 11:37:51 +0200 Subject: [PATCH 074/912] Simplify drawer code by creating loop iterators Fixed blending bug --- src/r_draw.cpp | 12 +- src/r_draw.h | 6 +- src/r_draw_rgba.cpp | 3338 +++++++++++----------------------------- src/r_draw_rgba.h | 426 ++--- src/r_draw_rgba_sse.h | 116 +- src/r_drawt_rgba.cpp | 70 +- src/r_drawt_rgba_sse.h | 12 +- src/r_plane.cpp | 4 +- src/r_segs.cpp | 16 +- src/r_things.cpp | 2 +- src/v_draw.cpp | 2 +- 11 files changed, 1235 insertions(+), 2769 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 8cca132898..578ca9646e 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1644,6 +1644,8 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v int vlinebits; int mvlinebits; +uint32_t vlinemax; +uint32_t mvlinemax; #ifndef X86_ASM static DWORD vlinec1 (); @@ -1693,11 +1695,12 @@ DWORD (*domvline1)() = mvlineasm1; void (*domvline4)() = mvlineasm4; #endif -void setupvline (int fracbits) +void setupvline (int fracbits, int fracmax) { if (r_swtruecolor) { vlinebits = fracbits; + vlinemax = fracmax; return; } @@ -1777,7 +1780,7 @@ void vlinec4 () } #endif -void setupmvline (int fracbits) +void setupmvline (int fracbits, int fracmax) { if (!r_swtruecolor) { @@ -1792,6 +1795,7 @@ void setupmvline (int fracbits) else { mvlinebits = fracbits; + mvlinemax = fracmax; } } @@ -1964,10 +1968,12 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) } int tmvlinebits; +uint32_t tmvlinemax; -void setuptmvline (int bits) +void setuptmvline (int bits, int fracmax) { tmvlinebits = bits; + tmvlinemax = fracmax; } fixed_t tmvline1_add_C () diff --git a/src/r_draw.h b/src/r_draw.h index 72304e81f7..bd477efc42 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -100,13 +100,13 @@ extern void (*R_DrawColumn)(void); extern DWORD (*dovline1) (); extern DWORD (*doprevline1) (); extern void (*dovline4) (); -extern void setupvline (int); +extern void setupvline (int,int); extern DWORD (*domvline1) (); extern void (*domvline4) (); -extern void setupmvline (int); +extern void setupmvline (int,int); -extern void setuptmvline (int); +extern void setuptmvline (int,int); // The Spectre/Invisibility effect. extern void (*R_DrawFuzzColumn)(void); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 1e2678bd30..dc97fdd478 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -51,6 +51,9 @@ extern int vlinebits; extern int mvlinebits; extern int tmvlinebits; +extern uint32_t vlinemax; +extern uint32_t mvlinemax; +extern uint32_t tmvlinemax; extern "C" short spanend[MAXHEIGHT]; extern float rw_light; @@ -261,353 +264,520 @@ void DrawerCommandQueue::StopThreads() ///////////////////////////////////////////////////////////////////////////// -class DrawColumnRGBACommand : public DrawerCommand +class DrawerColumnCommand : public DrawerCommand { +public: int _count; BYTE * RESTRICT _dest; - DWORD _texturefrac; - DWORD _iscale; - fixed_t _light; - const BYTE * RESTRICT _source; int _pitch; + DWORD _iscale; + DWORD _texturefrac; + + DrawerColumnCommand() + { + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _pitch = dc_pitch; + } + + class LoopIterator + { + public: + int count; + uint32_t *dest; + int pitch; + fixed_t fracstep; + fixed_t frac; + + LoopIterator(DrawerColumnCommand *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->_dest_y, command->_count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); + pitch = command->_pitch * thread->num_cores; + + fracstep = command->_iscale * thread->num_cores; + frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); + } + + uint32_t sample_index() + { + return frac >> FRACBITS; + } + + explicit operator bool() + { + return count > 0; + } + + bool next() + { + dest += pitch; + frac += fracstep; + return (--count) != 0; + } + }; +}; + +class DrawColumnRGBACommand : public DrawerColumnCommand +{ + uint32_t _light; + const BYTE * RESTRICT _source; ShadeConstants _shade_constants; BYTE * RESTRICT _colormap; public: DrawColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _texturefrac = dc_texturefrac; - _iscale = dc_iscale; - _light = dc_light; - _source = dc_source; - _pitch = dc_pitch; + _light = LightBgra::calc_light_multiplier(dc_light); _shade_constants = dc_shade_constants; + _source = dc_source; _colormap = dc_colormap; } void Execute(DrawerThread *thread) override { - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - - // Zero length, column does not exceed a pixel. - if (count <= 0) - return; - - // Framebuffer destination address. - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - // Determine scaling, - // which is the only mapping to be done. - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - BYTE *colormap = _colormap; - + LoopIterator loop(this, thread); + if (!loop) return; do { - *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light, shade_constants); - - dest += pitch; - frac += fracstep; - - } while (--count); + uint32_t fg = LightBgra::shade_pal_index(_colormap[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); } }; -class FillColumnRGBACommand : public DrawerCommand +class FillColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - fixed_t _light; - int _pitch; - int _color; + uint32_t _color; public: FillColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _light = dc_light; - _pitch = dc_pitch; - _color = dc_color; + uint32_t light = LightBgra::calc_light_multiplier(dc_light); + _color = LightBgra::shade_pal_index_simple(dc_color, light); } void Execute(DrawerThread *thread) override { - int count; - uint32_t* dest; - - count = thread->count_for_thread(_dest_y, _count); - - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - uint32_t light = calc_light_multiplier(_light); - + LoopIterator loop(this, thread); + if (!loop) return; + do { - int pitch = _pitch * thread->num_cores; - uint32_t color = shade_pal_index_simple(_color, light); - - do - { - *dest = color; - dest += pitch; - } while (--count); - } + *loop.dest = BlendBgra::copy(_color); + } while (loop.next()); } }; -class FillAddColumnRGBACommand : public DrawerCommand +class FillAddColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - int _pitch; uint32_t _srccolor; public: FillAddColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _pitch = dc_pitch; _srccolor = dc_srccolor_bgra; } void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; + LoopIterator loop(this, thread); + if (!loop) return; - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t fg = _srccolor; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t fg_alpha = fg >> 24; - fg_alpha += fg_alpha >> 7; - - fg_red *= fg_alpha; - fg_green *= fg_alpha; - fg_blue *= fg_alpha; - - uint32_t inv_alpha = 256 - fg_alpha; + uint32_t alpha = APART(_srccolor); + alpha += alpha >> 7; do { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); + *loop.dest = BlendBgra::add(_srccolor, *loop.dest, alpha, 256 - alpha); + } while (loop.next()); } }; -class FillAddClampColumnRGBACommand : public DrawerCommand +class FillAddClampColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - int _pitch; int _color; uint32_t _srccolor; - fixed_t _srcalpha; - fixed_t _destalpha; + uint32_t _srcalpha; + uint32_t _destalpha; public: FillAddClampColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _pitch = dc_pitch; _color = dc_color; _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t fg = _srccolor; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - fg_red *= fg_alpha; - fg_green *= fg_alpha; - fg_blue *= fg_alpha; - - do { - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); + LoopIterator loop(this, thread); + if (!loop) return; + do + { + *loop.dest = BlendBgra::add(_srccolor, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } }; -class FillSubClampColumnRGBACommand : public DrawerCommand +class FillSubClampColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - int _pitch; - int _color; uint32_t _srccolor; - fixed_t _srcalpha; - fixed_t _destalpha; + uint32_t _srcalpha; + uint32_t _destalpha; public: FillSubClampColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _pitch = dc_pitch; - _color = dc_color; _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t fg = _srccolor; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - fg_red *= fg_alpha; - fg_green *= fg_alpha; - fg_blue *= fg_alpha; - - do { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); + LoopIterator loop(this, thread); + if (!loop) return; + do + { + *loop.dest = BlendBgra::sub(_srccolor, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } }; -class FillRevSubClampColumnRGBACommand : public DrawerCommand +class FillRevSubClampColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - int _pitch; - int _color; uint32_t _srccolor; - fixed_t _srcalpha; - fixed_t _destalpha; + uint32_t _srcalpha; + uint32_t _destalpha; public: FillRevSubClampColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _pitch = dc_pitch; - _color = dc_color; _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; + LoopIterator loop(this, thread); + if (!loop) return; + do + { + *loop.dest = BlendBgra::revsub(_srccolor, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; +class DrawAddColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + BYTE * RESTRICT _colormap; - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; +public: + DrawAddColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + _colormap = dc_colormap; + } - uint32_t fg = _srccolor; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; - fg_red *= fg_alpha; - fg_green *= fg_alpha; - fg_blue *= fg_alpha; +class DrawTranslatedColumnRGBACommand : public DrawerColumnCommand +{ + fixed_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; - do { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; +public: + DrawTranslatedColumnRGBACommand() + { + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _translation = dc_translation; + _source = dc_source; + } - uint32_t red = clamp((0x10000 + fg_red - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); + } +}; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); +class DrawTlatedAddColumnRGBACommand : public DrawerColumnCommand +{ + fixed_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawTlatedAddColumnRGBACommand() + { + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _translation = dc_translation; + _source = dc_source; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawShadedColumnRGBACommand : public DrawerColumnCommand +{ +private: + const BYTE * RESTRICT _source; + lighttable_t * RESTRICT _colormap; + uint32_t _color; + +public: + DrawShadedColumnRGBACommand() + { + _source = dc_source; + _colormap = dc_colormap; + _color = LightBgra::shade_pal_index_simple(dc_color, LightBgra::calc_light_multiplier(dc_light)); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t alpha = clamp(_colormap[_source[loop.sample_index()]], 0, 64) * 4; + uint32_t inv_alpha = 256 - alpha; + *loop.dest = BlendBgra::add(_color, *loop.dest, alpha, inv_alpha); + } while (loop.next()); + } +}; + +class DrawAddClampColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawAddClampColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawAddClampTranslatedColumnRGBACommand : public DrawerColumnCommand +{ + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawAddClampTranslatedColumnRGBACommand() + { + _translation = dc_translation; + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawSubClampColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawSubClampColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawSubClampTranslatedColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + BYTE * RESTRICT _translation; + +public: + DrawSubClampTranslatedColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + _translation = dc_translation; + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawRevSubClampColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawRevSubClampColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + BYTE * RESTRICT _translation; + +public: + DrawRevSubClampTranslatedColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + _translation = dc_translation; + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } }; @@ -635,19 +805,16 @@ public: void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; - int yl = MAX(_yl, 1); int yh = MIN(_yh, _fuzzviewheight); - count = thread->count_for_thread(yl, yh - yl + 1); + int count = thread->count_for_thread(yl, yh - yl + 1); // Zero length. if (count <= 0) return; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + (uint32_t*)_destorg); + uint32_t *dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + (uint32_t*)_destorg); int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; @@ -659,13 +826,10 @@ public: if (yl < fuzzstep) { uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep + pitch]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = RPART(bg) * 3 / 4; + uint32_t green = GPART(bg) * 3 / 4; + uint32_t blue = BPART(bg) * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -694,13 +858,10 @@ public: do { uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = RPART(bg) * 3 / 4; + uint32_t green = GPART(bg) * 3 / 4; + uint32_t blue = BPART(bg) * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -714,783 +875,21 @@ public: if (lowerbounds) { uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep - pitch]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = RPART(bg) * 3 / 4; + uint32_t green = GPART(bg) * 3 / 4; + uint32_t blue = BPART(bg) * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } } }; -class DrawAddColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - BYTE * RESTRICT _colormap; +///////////////////////////////////////////////////////////////////////////// +class DrawerSpanCommand : public DrawerCommand +{ public: - DrawAddColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _colormap = dc_colormap; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawTranslatedColumnRGBACommand : public DrawerCommand -{ - int _count; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - int _pitch; - -public: - DrawTranslatedColumnRGBACommand() - { - _count = dc_count; - _light = dc_light; - _shade_constants = dc_shade_constants; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _translation = dc_translation; - _source = dc_source; - _pitch = dc_pitch; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - - do - { - *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawTlatedAddColumnRGBACommand : public DrawerCommand -{ - int _count; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawTlatedAddColumnRGBACommand() - { - _count = dc_count; - _light = dc_light; - _shade_constants = dc_shade_constants; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _translation = dc_translation; - _source = dc_source; - _pitch = dc_pitch; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawShadedColumnRGBACommand : public DrawerCommand -{ -private: - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - fixed_t _light; - const BYTE * RESTRICT _source; - lighttable_t * RESTRICT _colormap; - int _color; - int _pitch; - -public: - DrawShadedColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _light = dc_light; - _source = dc_source; - _colormap = dc_colormap; - _color = dc_color; - _pitch = dc_pitch; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac, fracstep; - - count = thread->count_for_thread(_dest_y, _count); - - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - { - const BYTE *source = _source; - BYTE *colormap = _colormap; - int pitch = _pitch * thread->num_cores; - - do - { - DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); - DWORD inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawAddClampColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawAddClampColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawAddClampTranslatedColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _translation = dc_translation; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawSubClampColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawSubClampColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - BYTE * RESTRICT _translation; - -public: - DrawSubClampTranslatedColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _translation = dc_translation; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawRevSubClampColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawRevSubClampColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - BYTE * RESTRICT _translation; - -public: - DrawRevSubClampTranslatedColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _translation = dc_translation; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - BYTE * RESTRICT translation = _translation; - const BYTE * RESTRICT source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawSpanRGBACommand : public DrawerCommand -{ - const uint32_t * RESTRICT _source; fixed_t _xfrac; fixed_t _yfrac; fixed_t _xstep; @@ -1501,14 +900,17 @@ class DrawSpanRGBACommand : public DrawerCommand int _xbits; int _ybits; BYTE * RESTRICT _destorg; - fixed_t _light; + + const uint32_t * RESTRICT _source; + uint32_t _light; ShadeConstants _shade_constants; bool _magnifying; -public: - DrawSpanRGBACommand() + uint32_t _srcalpha; + uint32_t _destalpha; + + DrawerSpanCommand() { - _source = (const uint32_t*)ds_source; _xfrac = ds_xfrac; _yfrac = ds_yfrac; _xstep = ds_xstep; @@ -1519,752 +921,270 @@ public: _xbits = ds_xbits; _ybits = ds_ybits; _destorg = dc_destorg; - _light = ds_light; + + _source = (const uint32_t*)ds_source; + _light = LightBgra::calc_light_multiplier(ds_light); _shade_constants = ds_shade_constants; - _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _magnifying = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } + class LoopIterator + { + public: + uint32_t *dest; + int count; + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + BYTE yshift; + BYTE xshift; + int xmask; + bool is_64x64; + bool skipped; + + LoopIterator(DrawerSpanCommand *command, DrawerThread *thread) + { + dest = ylookup[command->_y] + command->_x1 + (uint32_t*)command->_destorg; + count = command->_x2 - command->_x1 + 1; + xfrac = command->_xfrac; + yfrac = command->_yfrac; + xstep = command->_xstep; + ystep = command->_ystep; + yshift = 32 - command->_ybits; + xshift = yshift - command->_xbits; + xmask = ((1 << command->_xbits) - 1) << command->_ybits; + is_64x64 = command->_xbits == 6 && command->_ybits == 6; + skipped = thread->line_skipped_by_thread(command->_y); + } + + // 64x64 is the most common case by far, so special case it. + int spot64() + { + return ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + } + + int spot() + { + return ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + } + + explicit operator bool() + { + return !skipped && count > 0; + } + + bool next() + { + dest++; + xfrac += xstep; + yfrac += ystep; + return (--count) != 0; + } + }; +}; + +class DrawSpanRGBACommand : public DrawerSpanCommand +{ +public: void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; + LoopIterator loop(this, thread); + if (!loop) return; if (_magnifying) { - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. - do { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); + *loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - do { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); + *loop.dest = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + } while (loop.next()); } } else { - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. - do { - *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); - xfrac += xstep; - yfrac += ystep; - } while (--count); + *loop.dest = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 26, 26), _light, _shade_constants); + } while (loop.next()); } else { do { - *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); - xfrac += xstep; - yfrac += ystep; - } while (--count); + *loop.dest = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 32 - _xbits, 32 - _ybits), _light, _shade_constants); + } while (loop.next()); } } } }; -class DrawSpanMaskedRGBACommand : public DrawerCommand +class DrawSpanMaskedRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - bool _magnifying; - public: - DrawSpanMaskedRGBACommand() - { - _source = (const uint32_t*)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; + LoopIterator loop(this, thread); + if (!loop) return; if (_magnifying) { - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - uint32_t texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - *dest = alpha_blend(shade_bgra(texdata, light, shade_constants), *dest); - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - uint32_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - *dest = alpha_blend(shade_bgra(texdata, light, shade_constants), *dest); - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } } else { - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - *dest = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants), *dest); - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 26, 26), _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - *dest = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants), *dest); - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 32 - _xbits, 32 - _ybits), _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } } } }; -class DrawSpanTranslucentRGBACommand : public DrawerCommand +class DrawSpanTranslucentRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - DrawSpanTranslucentRGBACommand() - { - _source = (const uint32_t *)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; + LoopIterator loop(this, thread); + if (!loop) return; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_bgra(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_bgra(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } } }; -class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand +class DrawSpanMaskedTranslucentRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - DrawSpanMaskedTranslucentRGBACommand() - { - _source = (const uint32_t*)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; + LoopIterator loop(this, thread); + if (!loop) return; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - uint32_t texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_bgra(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - uint32_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_bgra(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } } }; -class DrawSpanAddClampRGBACommand : public DrawerCommand +class DrawSpanAddClampRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - DrawSpanAddClampRGBACommand() - { - _source = (const uint32_t*)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; + LoopIterator loop(this, thread); + if (!loop) return; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_bgra(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_bgra(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } } }; -class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand +class DrawSpanMaskedAddClampRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - DrawSpanMaskedAddClampRGBACommand() - { - _source = (const uint32_t*)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; + LoopIterator loop(this, thread); + if (!loop) return; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - uint32_t texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_bgra(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - uint32_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_bgra(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } } }; @@ -2296,13 +1216,15 @@ public: uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; int count = (_x2 - _x1 + 1); - uint32_t light = calc_light_multiplier(_light); - uint32_t color = shade_pal_index_simple(_color, light); + uint32_t light = LightBgra::calc_light_multiplier(_light); + uint32_t color = LightBgra::shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } }; +///////////////////////////////////////////////////////////////////////////// + class DrawSlabRGBACommand : public DrawerCommand { int _dx; @@ -2344,7 +1266,7 @@ public: uint32_t *p = _p; ShadeConstants shade_constants = _shade_constants; const BYTE *colormap = _colormap; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); int pitch = _pitch; int x; @@ -2358,7 +1280,7 @@ public: { while (dy > 0) { - *p = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + *p = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p += pitch; v += vi; dy--; @@ -2368,7 +1290,7 @@ public: { while (dy > 0) { - uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p[0] = color; p[1] = color; p += pitch; @@ -2380,7 +1302,7 @@ public: { while (dy > 0) { - uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p[0] = color; p[1] = color; p[2] = color; @@ -2393,7 +1315,7 @@ public: { while (dy > 0) { - uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p[0] = color; p[1] = color; p[2] = color; @@ -2405,7 +1327,7 @@ public: } else while (dy > 0) { - uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); // The optimizer will probably turn this into a memset call. // Since dx is not likely to be large, I'm not sure that's a good thing, // hence the alternatives above. @@ -2420,1000 +1342,484 @@ public: } }; -class Vlinec1RGBACommand : public DrawerCommand +///////////////////////////////////////////////////////////////////////////// + +class DrawerWall1Command : public DrawerCommand { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - const BYTE * RESTRICT _source2; - uint32_t _texturefracx; +public: BYTE * RESTRICT _dest; - int vlinebits; int _pitch; - fixed_t _light; + int _count; + DWORD _texturefrac; + uint32_t _texturefracx; + DWORD _iscale; + int _vlinebits; + uint32_t _vlinemax; + + const uint32 * RESTRICT _source; + const uint32 * RESTRICT _source2; + uint32_t _light; ShadeConstants _shade_constants; -public: - Vlinec1RGBACommand() + uint32_t _srcalpha; + uint32_t _destalpha; + + DrawerWall1Command(int vlinebits, uint32_t vlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _source2 = dc_source2; - _texturefracx = dc_texturefracx; _dest = dc_dest; - vlinebits = ::vlinebits; _pitch = dc_pitch; - _light = dc_light; + _count = dc_count; + _texturefrac = dc_texturefrac; + _texturefracx = dc_texturefracx; + _iscale = dc_iscale; + _vlinebits = vlinebits; + _vlinemax = vlinemax; + + _source = (const uint32 *)dc_source; + _source2 = (const uint32 *)dc_source2; + _light = LightBgra::calc_light_multiplier(dc_light); _shade_constants = dc_shade_constants; + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + class LoopIterator + { + public: + uint32_t *dest; + int pitch; + int count; + uint32_t fracstep; + uint32_t frac; + uint32_t texturefracx; + int bits; + + LoopIterator(DrawerWall1Command *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->_dest_y, command->_count); + if (count <= 0) + return; + + fracstep = command->_iscale * thread->num_cores; + frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); + texturefracx = command->_texturefracx; + dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); + bits = command->_vlinebits; + pitch = command->_pitch * thread->num_cores; + } + + explicit operator bool() + { + return count > 0; + } + + int sample_index() + { + return frac >> bits; + } + + bool next() + { + frac += fracstep; + dest += pitch; + return (--count) != 0; + } + }; +}; + +class DrawerWall4Command : public DrawerCommand +{ +public: + BYTE * RESTRICT _dest; + int _count; + int _pitch; + int _vlinebits; + uint32_t _vlinemax; + ShadeConstants _shade_constants; + uint32_t _vplce[4]; + uint32_t _vince[4]; + uint32_t _buftexturefracx[4]; + const uint32_t * RESTRICT _bufplce[4]; + const uint32_t * RESTRICT _bufplce2[4]; + uint32_t _light[4]; + + uint32_t _srcalpha; + uint32_t _destalpha; + + DrawerWall4Command(int vlinebits, uint32_t vlinemax) + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _vlinebits = vlinebits; + _vlinemax = vlinemax; + _shade_constants = dc_shade_constants; + for (int i = 0; i < 4; i++) + { + _vplce[i] = vplce[i]; + _vince[i] = vince[i]; + _buftexturefracx[i] = buftexturefracx[i]; + _bufplce[i] = (const uint32_t *)bufplce[i]; + _bufplce2[i] = (const uint32_t *)bufplce2[i]; + _light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); + } + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + class LoopIterator + { + public: + uint32_t *dest; + int pitch; + int count; + int bits; + uint32_t vplce[4]; + uint32_t vince[4]; + + LoopIterator(DrawerWall4Command *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->_dest_y, command->_count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); + pitch = command->_pitch * thread->num_cores; + bits = command->_vlinebits; + + int skipped = thread->skipped_by_thread(command->_dest_y); + for (int i = 0; i < 4; i++) + { + vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; + vince[i] = command->_vince[i] * thread->num_cores; + } + } + + explicit operator bool() + { + return count > 0; + } + + int sample_index(int col) + { + return vplce[col] >> bits; + } + + bool next() + { + vplce[0] += vince[0]; + vplce[1] += vince[1]; + vplce[2] += vince[2]; + vplce[3] += vince[3]; + dest += pitch; + return (--count) != 0; + } + }; +}; + +class Vlinec1RGBACommand : public DrawerWall1Command +{ +public: + Vlinec1RGBACommand() : DrawerWall1Command(vlinebits, vlinemax) + { } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - const uint32 *source2 = (const uint32 *)_source2; - uint32_t texturefracx = _texturefracx; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = vlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; + LoopIterator loop(this, thread); + if (!loop) return; if (_source2 == nullptr) { do { - *dest = shade_bgra(source[frac >> bits], light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); } else { do { - *dest = shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); } } }; -class Vlinec4RGBACommand : public DrawerCommand +class Vlinec4RGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - int vlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32_t * RESTRICT bufplce[4]; - const uint32_t * RESTRICT bufplce2[4]; - uint32_t buftexturefracx[4]; - public: - Vlinec4RGBACommand() + Vlinec4RGBACommand() : DrawerWall4Command(vlinebits, vlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - vlinebits = ::vlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32_t *)::bufplce[i]; - bufplce2[i] = (const uint32_t *)::bufplce2[i]; - buftexturefracx[i] = ::buftexturefracx[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; + LoopIterator loop(this, thread); + if (!loop) return; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = vlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (bufplce2[0] == nullptr) + if (_bufplce2[0] == nullptr) { do { - dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::copy(fg); + } + } while (loop.next()); } else { do { - dest[0] = shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + loop.dest[i] = BlendBgra::copy(fg); + } + } while (loop.next()); } } }; -class Mvlinec1RGBACommand : public DrawerCommand +class Mvlinec1RGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - const BYTE * RESTRICT _source2; - uint32_t _texturefracx; - BYTE * RESTRICT _dest; - int mvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - public: - Mvlinec1RGBACommand() + Mvlinec1RGBACommand() : DrawerWall1Command(mvlinebits, mvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _source2 = dc_source2; - _texturefracx = dc_texturefracx; - _dest = dc_dest; - mvlinebits = ::mvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - const uint32 *source2 = (const uint32 *)_source2; - uint32_t texturefracx = _texturefracx; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = mvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; + LoopIterator loop(this, thread); + if (!loop) return; if (_source2 == nullptr) { do { - uint32_t pix = source[frac >> bits]; - *dest = alpha_blend(shade_bgra(pix, light, shade_constants), *dest); - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } else { do { - *dest = alpha_blend(shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants), *dest); - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } } }; -class Mvlinec4RGBACommand : public DrawerCommand +class Mvlinec4RGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - int mvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - const uint32 * RESTRICT bufplce2[4]; - uint32_t buftexturefracx[4]; - public: - Mvlinec4RGBACommand() + Mvlinec4RGBACommand(): DrawerWall4Command(mvlinebits, mvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - mvlinebits = ::mvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufplce2[i] = (const uint32_t *)::bufplce2[i]; - buftexturefracx[i] = ::buftexturefracx[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; + LoopIterator loop(this, thread); + if (!loop) return; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = mvlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (bufplce2[0] == nullptr) + if (_bufplce2[0] == nullptr) { do { - uint32_t pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; dest[0] = alpha_blend(shade_bgra(pix, light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; dest[1] = alpha_blend(shade_bgra(pix, light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; dest[2] = alpha_blend(shade_bgra(pix, light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; dest[3] = alpha_blend(shade_bgra(pix, light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); + } + } while (loop.next()); } else { do { - dest[0] = alpha_blend(shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; - dest[1] = alpha_blend(shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; - dest[2] = alpha_blend(shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; - dest[3] = alpha_blend(shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); + } + } while (loop.next()); } } }; -class Tmvline1AddRGBACommand : public DrawerCommand +class Tmvline1AddRGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - BYTE * RESTRICT _dest; - int tmvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - Tmvline1AddRGBACommand() + Tmvline1AddRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - tmvlinebits = ::tmvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = tmvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - + LoopIterator loop(this, thread); + if (!loop) return; do { - uint32_t pix = source[frac >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } }; -class Tmvline4AddRGBACommand : public DrawerCommand +class Tmvline4AddRGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - int tmvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - public: - Tmvline4AddRGBACommand() + Tmvline4AddRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - + LoopIterator loop(this, thread); + if (!loop) return; do { - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; i++) { - uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - - local_vplce[i] += local_vince[i]; + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); } - dest += pitch; - } while (--count); + } while (loop.next()); } }; -class Tmvline1AddClampRGBACommand : public DrawerCommand +class Tmvline1AddClampRGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - BYTE * RESTRICT _dest; - int tmvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - Tmvline1AddClampRGBACommand() + Tmvline1AddClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - tmvlinebits = ::tmvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = tmvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - + LoopIterator loop(this, thread); + if (!loop) return; do { - uint32_t pix = source[frac >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } }; -class Tmvline4AddClampRGBACommand : public DrawerCommand +class Tmvline4AddClampRGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - int tmvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - public: - Tmvline4AddClampRGBACommand() + Tmvline4AddClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - + LoopIterator loop(this, thread); + if (!loop) return; do { - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; i++) { - uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - - local_vplce[i] += local_vince[i]; + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); } - dest += pitch; - } while (--count); + } while (loop.next()); } }; -class Tmvline1SubClampRGBACommand : public DrawerCommand +class Tmvline1SubClampRGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - BYTE * RESTRICT _dest; - int tmvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - Tmvline1SubClampRGBACommand() + Tmvline1SubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - tmvlinebits = ::tmvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = tmvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - + LoopIterator loop(this, thread); + if (!loop) return; do { - uint32_t pix = source[frac >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } }; -class Tmvline4SubClampRGBACommand : public DrawerCommand +class Tmvline4SubClampRGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - int tmvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - public: - Tmvline4SubClampRGBACommand() + Tmvline4SubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - + LoopIterator loop(this, thread); + if (!loop) return; do { - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; i++) { - uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - - local_vplce[i] += local_vince[i]; + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); } - dest += pitch; - } while (--count); + } while (loop.next()); } }; -class Tmvline1RevSubClampRGBACommand : public DrawerCommand +class Tmvline1RevSubClampRGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - BYTE * RESTRICT _dest; - int tmvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - Tmvline1RevSubClampRGBACommand() + Tmvline1RevSubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - tmvlinebits = ::tmvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = tmvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - + LoopIterator loop(this, thread); + if (!loop) return; do { - uint32_t pix = source[frac >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } }; -class Tmvline4RevSubClampRGBACommand : public DrawerCommand +class Tmvline4RevSubClampRGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - int tmvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - public: - Tmvline4RevSubClampRGBACommand() + Tmvline4RevSubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - + LoopIterator loop(this, thread); + if (!loop) return; do { - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; i++) { - uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - - local_vplce[i] += local_vince[i]; + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); } - dest += pitch; - } while (--count); + } while (loop.next()); } }; +///////////////////////////////////////////////////////////////////////////// + class DrawFogBoundaryLineRGBACommand : public DrawerCommand { int _y; @@ -3446,7 +1852,7 @@ public: uint32_t *dest = ylookup[y] + (uint32_t*)_destorg; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants constants = _shade_constants; do @@ -3563,8 +1969,8 @@ public: uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; int count = (x2 - x1 + 1); - uint32_t light = calc_light_multiplier(_light); - uint32_t color = shade_pal_index_simple(_color, light); + uint32_t light = LightBgra::calc_light_multiplier(_light); + uint32_t color = LightBgra::shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 8f97d4ecd2..20fff4fc06 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -286,7 +286,7 @@ public: }; ///////////////////////////////////////////////////////////////////////////// -// Pixel shading macros and inline functions: +// Pixel shading inline functions: // Give the compiler a strong hint we want these functions inlined: #ifndef FORCEINLINE @@ -310,220 +310,256 @@ public: #endif #endif -// calculates the light constant passed to the shade_pal_index function -FORCEINLINE uint32_t calc_light_multiplier(dsfixed_t light) +class LightBgra { - return 256 - (light >> (FRACBITS - 8)); -} - -// Calculates a ARGB8 color for the given palette index and light multiplier -FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) -{ - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap -FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t alpha = color.d & 0xff000000; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - if (constants.simple_shade) +public: + // calculates the light constant passed to the shade_pal_index function + FORCEINLINE static uint32_t calc_light_multiplier(dsfixed_t light) { + return 256 - (light >> (FRACBITS - 8)); + } + + // Calculates a ARGB8 color for the given palette index and light multiplier + FORCEINLINE static uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) + { + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + red = red * light / 256; green = green * light / 256; blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; } - else + + // Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap + FORCEINLINE static uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return alpha | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) -{ - uint32_t alpha = color & 0xff000000; - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return alpha | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) -{ - uint32_t fg_alpha = fg >> 24; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 - uint32_t inv_alpha = 256 - alpha; - - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = bg & 0xff; - - uint32_t red = clamp(fg_red + (bg_red * inv_alpha) / 256, 0, 255); - uint32_t green = clamp(fg_green + (bg_green * inv_alpha) / 256, 0, 255); - uint32_t blue = clamp(fg_blue + (bg_blue * inv_alpha) / 256, 0, 255); - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -inline bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) -{ - if (!r_bilinear) - return false; - - // Is this a magfilter or minfilter? - fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); - fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); - fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS -1)); - if (magnitude >> FRACBITS == 0) - return false; - - if (r_mipmap) - { - int level = magnitude >> (FRACBITS + 1); - while (level != 0) + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t alpha = color.d & 0xff000000; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) { - if (xbits <= 2 || ybits <= 2) - break; - - source += (1 << (xbits)) * (1 << (ybits)); - xbits -= 1; - ybits -= 1; - level >>= 1; + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return alpha | (red << 16) | (green << 8) | blue; } - return true; -} -FORCEINLINE uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits) + FORCEINLINE static uint32_t shade_bgra_simple(uint32_t color, uint32_t light) + { + uint32_t red = RPART(color) * light / 256; + uint32_t green = GPART(color) * light / 256; + uint32_t blue = BPART(color) * light / 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; + } + + FORCEINLINE static uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) + { + uint32_t alpha = color & 0xff000000; + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return alpha | (red << 16) | (green << 8) | blue; + } +}; + +class BlendBgra { - uint32_t half = 1 << (ybits - 1); - uint32_t y = (texturefracy - half) >> ybits; +public: + FORCEINLINE static uint32_t copy(uint32_t fg) + { + return fg; + } - uint32_t p00 = col0[y]; - uint32_t p01 = col0[y + 1]; - uint32_t p10 = col1[y]; - uint32_t p11 = col1[y + 1]; + FORCEINLINE static uint32_t add(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha) + { + uint32_t red = MIN((RPART(fg) * srcalpha + RPART(bg) * destalpha) >> 8, 255); + uint32_t green = MIN((GPART(fg) * srcalpha + GPART(bg) * destalpha) >> 8, 255); + uint32_t blue = MIN((BPART(fg) * srcalpha + BPART(bg) * destalpha) >> 8, 255); + return 0xff000000 | (red << 16) | (green << 8) | blue; + } - uint32_t inv_b = texturefracx; - uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; + FORCEINLINE static uint32_t sub(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha) + { + uint32_t red = clamp((0x10000 - RPART(fg) * srcalpha + RPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - GPART(fg) * srcalpha + GPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - BPART(fg) * srcalpha + BPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; + } - uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + FORCEINLINE static uint32_t revsub(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha) + { + uint32_t red = clamp((0x10000 + RPART(fg) * srcalpha - RPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + GPART(fg) * srcalpha - GPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + BPART(fg) * srcalpha - BPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; + } - return (alpha << 24) | (red << 16) | (green << 8) | blue; -} + FORCEINLINE static uint32_t alpha_blend(uint32_t fg, uint32_t bg) + { + uint32_t alpha = APART(fg) + (APART(fg) >> 7); // 255 -> 256 + uint32_t inv_alpha = 256 - alpha; + uint32_t red = MIN(RPART(fg) + (RPART(bg) * inv_alpha) / 256, 255); + uint32_t green = MIN(GPART(fg) + (GPART(bg) * inv_alpha) / 256, 255); + uint32_t blue = MIN(BPART(fg) + (BPART(bg) * inv_alpha) / 256, 255); + return 0xff000000 | (red << 16) | (green << 8) | blue; + } +}; -FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits) +class SampleBgra { - int xshift = (32 - xbits); - int yshift = (32 - ybits); - int xmask = (1 << xshift) - 1; - int ymask = (1 << yshift) - 1; - uint32_t xhalf = 1 << (xbits - 1); - uint32_t yhalf = 1 << (ybits - 1); - uint32_t x = (xfrac - xhalf) >> xbits; - uint32_t y = (yfrac - yhalf) >> ybits; +public: + inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) + { + if (!r_bilinear) + return false; - uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; - uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; - uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; + // Is this a magfilter or minfilter? + fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); + fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); + fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); + if (magnitude >> FRACBITS == 0) + return false; - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; + if (r_mipmap) + { + int level = magnitude >> (FRACBITS + 1); + while (level != 0) + { + if (xbits <= 2 || ybits <= 2) + break; - uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + source += (1 << (xbits)) * (1 << (ybits)); + xbits -= 1; + ybits -= 1; + level >>= 1; + } + } + return true; + } - return (alpha << 24) | (red << 16) | (green << 8) | blue; -} + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits, uint32_t ymax) + { + uint32_t half = 1 << (ybits - 1); + uint32_t y0 = (texturefracy - half) >> ybits; + if (y0 > ymax) + y0 = 0; + uint32_t y1 = y0 + 1; + if (y1 > ymax) + y1 = 0; -#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits) { \ + uint32_t p00 = col0[y0]; + uint32_t p01 = col0[y1]; + uint32_t p10 = col1[y0]; + uint32_t p11 = col1[y1]; + + uint32_t inv_b = texturefracx; + uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + + return (alpha << 24) | (red << 16) | (green << 8) | blue; + } + + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits) + { + int xshift = (32 - xbits); + int yshift = (32 - ybits); + int xmask = (1 << xshift) - 1; + int ymask = (1 << yshift) - 1; + uint32_t xhalf = 1 << (xbits - 1); + uint32_t yhalf = 1 << (ybits - 1); + uint32_t x = (xfrac - xhalf) >> xbits; + uint32_t y = (yfrac - yhalf) >> ybits; + + uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; + uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; + uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; + uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; + + uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; + uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + + return (alpha << 24) | (red << 16) | (green << 8) | blue; + } +}; + +///////////////////////////////////////////////////////////////////////////// +// SSE/AVX shading macros: + +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits, ymax) { \ uint32_t half = 1 << (ybits - 1); \ \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t y = (texturefracy[i] - half) >> ybits; \ + uint32_t y0 = (texturefracy[i] - half) >> ybits; \ + if (y0 > ymax) y0 = 0; \ + uint32_t y1 = y0 + 1; \ + if (y1 > ymax) y1 = 0; \ \ uint32_t inv_b = texturefracx[i]; \ uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; \ @@ -537,8 +573,8 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ \ - __m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); \ - __m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); \ + __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, col0[i][y1], col0[i][y0]), _mm_setzero_si128()); \ + __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, col1[i][y1], col1[i][y0]), _mm_setzero_si128()); \ \ __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ @@ -758,12 +794,16 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) { uint32_t alpha = fg >> 24; alpha += alpha >> 7; - return 256 - alpha; // (dest_alpha * (256 - alpha)) >> 8; + uint32_t inv_alpha = 256 - alpha; + return (dest_alpha * alpha + 256 * inv_alpha + 128) >> 8; } #define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \ __m128i msrc_alpha = _mm_set1_epi16(src_alpha); \ - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha * 255 / 256); \ + __m128i m256 = _mm_set1_epi16(256); \ + __m128i m255 = _mm_set1_epi16(255); \ + __m128i m128 = _mm_set1_epi16(128); // Calculates the final alpha values to be used when combined with the source texture alpha channel #define VEC_CALC_BLEND_ALPHA(fg) \ @@ -772,8 +812,10 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) __m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \ alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \ - bg_alpha_hi = _mm_sub_epi16(_mm_set1_epi16(256), alpha_hi); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_hi), mdest_alpha), 8);*/ \ - bg_alpha_lo = _mm_sub_epi16(_mm_set1_epi16(256), alpha_lo); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_lo), mdest_alpha), 8);*/ \ + bg_alpha_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_mullo_epi16(mdest_alpha, alpha_hi), _mm_mullo_epi16(m255, _mm_sub_epi16(m256, alpha_hi))), m128), 8); \ + bg_alpha_hi = _mm_add_epi16(bg_alpha_hi, _mm_srli_epi16(bg_alpha_hi, 7)); \ + bg_alpha_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_mullo_epi16(mdest_alpha, alpha_lo), _mm_mullo_epi16(m255, _mm_sub_epi16(m256, alpha_lo))), m128), 8); \ + bg_alpha_lo = _mm_add_epi16(bg_alpha_lo, _mm_srli_epi16(bg_alpha_lo, 7)); \ fg_alpha_hi = msrc_alpha; \ fg_alpha_lo = msrc_alpha; \ } diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index af761c6e7d..408a2f5a23 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -43,7 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; - _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _magnifying = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -70,7 +70,7 @@ public: xstep = _xstep; ystep = _ystep; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; if (_magnifying) @@ -166,7 +166,7 @@ public: spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); + *dest++ = LightBgra::shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -258,7 +258,7 @@ public: spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); + *dest++ = LightBgra::shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -305,7 +305,7 @@ public: do { - *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); + *dest++ = LightBgra::shade_bgra(SampleBgra::sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); xfrac += xstep; yfrac += ystep; } while (--count); @@ -349,7 +349,7 @@ public: do { - *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); + *dest++ = LightBgra::shade_bgra(SampleBgra::sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); xfrac += xstep; yfrac += ystep; } while (--count); @@ -364,7 +364,8 @@ class VecCommand(Vlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int vlinebits; + int _vlinebits; + uint32_t _vlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -379,7 +380,8 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - vlinebits = ::vlinebits; + _vlinebits = vlinebits; + _vlinemax = vlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -398,13 +400,13 @@ public: return; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = vlinebits; + int bits = _vlinebits; int pitch = _pitch * thread->num_cores; - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); + uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); + uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); + uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); + uint32_t light3 = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -480,7 +482,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -498,7 +500,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -520,7 +522,8 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int mvlinebits; + int _mvlinebits; + uint32_t _mvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -535,7 +538,8 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - mvlinebits = ::mvlinebits; + _mvlinebits = mvlinebits; + _mvlinemax = mvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -555,12 +559,12 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = mvlinebits; + int bits = _mvlinebits; - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); + uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); + uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); + uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); + uint32_t light3 = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -640,7 +644,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -660,7 +664,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -686,7 +690,8 @@ class VecCommand(Tmvline4AddRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int tmvlinebits; + int _tmvlinebits; + uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -701,7 +706,8 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; + _tmvlinebits = tmvlinebits; + _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -719,13 +725,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; + int bits = _tmvlinebits; uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); + light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); + light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); + light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); + light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -825,7 +831,8 @@ class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int tmvlinebits; + int _tmvlinebits; + uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -840,7 +847,8 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; + _tmvlinebits = tmvlinebits; + _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -858,13 +866,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; + int bits = _tmvlinebits; uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); + light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); + light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); + light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); + light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -963,7 +971,8 @@ class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int tmvlinebits; + int _tmvlinebits; + uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -978,7 +987,8 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; + _tmvlinebits = tmvlinebits; + _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -996,13 +1006,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; + int bits = _tmvlinebits; uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); + light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); + light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); + light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); + light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -1101,7 +1111,8 @@ class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int tmvlinebits; + int _tmvlinebits; + uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -1116,7 +1127,8 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; + _tmvlinebits = tmvlinebits; + _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -1134,13 +1146,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; + int bits = _tmvlinebits; uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); + light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); + light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); + light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); + light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index e239674e8c..c39fdc2874 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -185,7 +185,7 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); @@ -196,7 +196,7 @@ public: BYTE *colormap = _colormap; if (count & 1) { - *dest = shade_pal_index(colormap[*source], light, shade_constants); + *dest = LightBgra::shade_pal_index(colormap[*source], light, shade_constants); source += sincr; dest += pitch; } @@ -204,8 +204,8 @@ public: return; do { - dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); - dest[pitch] = shade_pal_index(colormap[source[sincr]], light, shade_constants); + dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); + dest[pitch] = LightBgra::shade_pal_index(colormap[source[sincr]], light, shade_constants); source += sincr * 2; dest += pitch * 2; } while (--count); @@ -249,7 +249,7 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); @@ -260,10 +260,10 @@ public: BYTE *colormap = _colormap; if (count & 1) { - dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); - dest[1] = shade_pal_index(colormap[source[1]], light, shade_constants); - dest[2] = shade_pal_index(colormap[source[2]], light, shade_constants); - dest[3] = shade_pal_index(colormap[source[3]], light, shade_constants); + dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); + dest[1] = LightBgra::shade_pal_index(colormap[source[1]], light, shade_constants); + dest[2] = LightBgra::shade_pal_index(colormap[source[2]], light, shade_constants); + dest[3] = LightBgra::shade_pal_index(colormap[source[3]], light, shade_constants); source += sincr; dest += pitch; } @@ -271,14 +271,14 @@ public: return; do { - dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); - dest[1] = shade_pal_index(colormap[source[1]], light, shade_constants); - dest[2] = shade_pal_index(colormap[source[2]], light, shade_constants); - dest[3] = shade_pal_index(colormap[source[3]], light, shade_constants); - dest[pitch] = shade_pal_index(colormap[source[sincr]], light, shade_constants); - dest[pitch + 1] = shade_pal_index(colormap[source[sincr + 1]], light, shade_constants); - dest[pitch + 2] = shade_pal_index(colormap[source[sincr + 2]], light, shade_constants); - dest[pitch + 3] = shade_pal_index(colormap[source[sincr + 3]], light, shade_constants); + dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); + dest[1] = LightBgra::shade_pal_index(colormap[source[1]], light, shade_constants); + dest[2] = LightBgra::shade_pal_index(colormap[source[2]], light, shade_constants); + dest[3] = LightBgra::shade_pal_index(colormap[source[3]], light, shade_constants); + dest[pitch] = LightBgra::shade_pal_index(colormap[source[sincr]], light, shade_constants); + dest[pitch + 1] = LightBgra::shade_pal_index(colormap[source[sincr + 1]], light, shade_constants); + dest[pitch + 2] = LightBgra::shade_pal_index(colormap[source[sincr + 2]], light, shade_constants); + dest[pitch + 3] = LightBgra::shade_pal_index(colormap[source[sincr + 3]], light, shade_constants); source += sincr * 2; dest += pitch * 2; } while (--count); @@ -453,7 +453,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; BYTE *colormap = _colormap; @@ -461,7 +461,7 @@ public: uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(colormap[*source], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -528,7 +528,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; BYTE *colormap = _colormap; @@ -538,7 +538,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(colormap[source[i]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -606,7 +606,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); + uint32_t fg = LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -674,7 +674,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); + uint32_t fg = LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -747,14 +747,14 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -818,7 +818,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); @@ -827,7 +827,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -894,14 +894,14 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -965,7 +965,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); @@ -974,7 +974,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1042,14 +1042,14 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1113,7 +1113,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); @@ -1122,7 +1122,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h index 684be2b6ae..64a77e2882 100644 --- a/src/r_drawt_rgba_sse.h +++ b/src/r_drawt_rgba_sse.h @@ -48,7 +48,7 @@ public: return; ShadeConstants shade_constants = _shade_constants; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); @@ -207,7 +207,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; BYTE *colormap = _colormap; @@ -335,7 +335,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -411,7 +411,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); @@ -538,7 +538,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; ShadeConstants shade_constants = _shade_constants; @@ -664,7 +664,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; ShadeConstants shade_constants = _shade_constants; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 6913db9183..0ede451e06 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -514,8 +514,8 @@ void R_MapColoredPlane_rgba(int y, int x1) { uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); + uint32_t light = LightBgra::calc_light_multiplier(ds_light); + uint32_t color = LightBgra::shade_pal_index_simple(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; } diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 84c967d1dd..95dd287aa3 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1276,7 +1276,7 @@ typedef void(*Draw4ColumnsFuncPtr)(); void wallscan_any( int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits,Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) + void(setupwallscan(int bits, int fracmax, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -1286,7 +1286,7 @@ void wallscan_any( DWORD(*draw1column)(); void(*draw4columns)(); - setupwallscan(32 - rw_pic->HeightBits, draw1column, draw4columns); + setupwallscan(32 - rw_pic->HeightBits, (rw_pic->GetHeight() - 1) << (32 - rw_pic->HeightBits), draw1column, draw4columns); bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1439,9 +1439,9 @@ void wallscan_any( void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupvline(bits); + setupvline(bits, fracmax); line1 = dovline1; line4 = dovline4; }); @@ -1455,9 +1455,9 @@ void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupmvline(bits); + setupmvline(bits, fracmax); line1 = domvline1; line4 = domvline4; }); @@ -1475,9 +1475,9 @@ void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fi } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setuptmvline(bits); + setuptmvline(bits, fracmax); line1 = reinterpret_cast(tmvline1); line4 = tmvline4; }); diff --git a/src/r_things.cpp b/src/r_things.cpp index e1f1017f36..74707ff728 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2732,7 +2732,7 @@ void R_DrawParticle_rgba(vissprite_t *vis) DrawerCommandQueue::WaitForWorkers(); - uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); + uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index fd12a15871..0fb4333431 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1026,7 +1026,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) { uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index_simple(basecolor, calc_light_multiplier(0)); + uint32_t fg = LightBgra::shade_pal_index_simple(basecolor, LightBgra::calc_light_multiplier(0)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; From b7f32d1bfce120c6372d3ca453aaaba755f8207f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Jun 2016 18:05:32 +0200 Subject: [PATCH 075/912] Added LoopIterator to the drawt family of drawers --- src/r_drawt_rgba.cpp | 1193 ++++++++++++------------------------------ 1 file changed, 330 insertions(+), 863 deletions(-) diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index c39fdc2874..82932b1f2f 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -84,8 +84,9 @@ extern unsigned int *horizspan[4]; ///////////////////////////////////////////////////////////////////////////// -class RtCopy1colRGBACommand : public DrawerCommand +class DrawerRt1colCommand : public DrawerCommand { +public: int hx; int sx; int yl; @@ -93,8 +94,14 @@ class RtCopy1colRGBACommand : public DrawerCommand BYTE * RESTRICT _destorg; int _pitch; -public: - RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) + uint32_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _colormap; + + uint32_t _srcalpha; + uint32_t _destalpha; + + DrawerRt1colCommand(int hx, int sx, int yl, int yh) { this->hx = hx; this->sx = sx; @@ -103,185 +110,384 @@ public: _destorg = dc_destorg; _pitch = dc_pitch; + + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } - void Execute(DrawerThread *thread) override + class LoopIterator { + public: uint32_t *source; uint32_t *dest; int count; int pitch, sincr; - count = thread->count_for_thread(yl, (yh - yl + 1)); - if (count <= 0) - return; + LoopIterator(DrawerRt1colCommand *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->yl, (command->yh - command->yl + 1)); + if (count <= 0) + return; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; + dest = thread->dest_for_thread(command->yl, command->_pitch, ylookup[command->yl] + command->sx + (uint32_t*)command->_destorg); + source = &thread->dc_temp_rgba[command->yl * 4 + command->hx] + thread->skipped_by_thread(command->yl) * 4; + pitch = command->_pitch * thread->num_cores; + sincr = thread->num_cores * 4; + } - if (count & 1) { - *dest = GPalette.BaseColors[*source]; - source += sincr; + explicit operator bool() + { + return count > 0; + } + + bool next() + { dest += pitch; + source += sincr; + return (--count) != 0; } - if (count & 2) { - dest[0] = GPalette.BaseColors[source[0]]; - dest[pitch] = GPalette.BaseColors[source[sincr]]; - source += sincr * 2; - dest += pitch * 2; - } - if (!(count >>= 2)) - return; - - do { - dest[0] = GPalette.BaseColors[source[0]]; - dest[pitch] = GPalette.BaseColors[source[sincr]]; - dest[pitch * 2] = GPalette.BaseColors[source[sincr * 2]]; - dest[pitch * 3] = GPalette.BaseColors[source[sincr * 3]]; - source += sincr * 4; - dest += pitch * 4; - } while (--count); - } + }; }; -class RtMap1colRGBACommand : public DrawerCommand +class DrawerRt4colsCommand : public DrawerCommand { - int hx; +public: int sx; int yl; int yh; - fixed_t _light; + uint32_t _light; ShadeConstants _shade_constants; BYTE * RESTRICT _destorg; int _pitch; BYTE * RESTRICT _colormap; + uint32_t _srcalpha; + uint32_t _destalpha; -public: - RtMap1colRGBACommand(int hx, int sx, int yl, int yh) + DrawerRt4colsCommand(int sx, int yl, int yh) { - this->hx = hx; this->sx = sx; this->yl = yl; this->yh = yh; - _light = dc_light; + _light = LightBgra::calc_light_multiplier(dc_light); _shade_constants = dc_shade_constants; _destorg = dc_destorg; _pitch = dc_pitch; _colormap = dc_colormap; + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } - void Execute(DrawerThread *thread) override + class LoopIterator { + public: uint32_t *source; uint32_t *dest; int count; int pitch; int sincr; - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; + LoopIterator(DrawerRt4colsCommand *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->yl, command->yh - command->yl + 1); + if (count <= 0) + return; - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; - - BYTE *colormap = _colormap; - - if (count & 1) { - *dest = LightBgra::shade_pal_index(colormap[*source], light, shade_constants); - source += sincr; - dest += pitch; + dest = thread->dest_for_thread(command->yl, command->_pitch, ylookup[command->yl] + command->sx + (uint32_t*)command->_destorg); + source = &thread->dc_temp_rgba[command->yl * 4] + thread->skipped_by_thread(command->yl) * 4; + pitch = command->_pitch * thread->num_cores; + sincr = thread->num_cores * 4; } - if (!(count >>= 1)) - return; - do { - dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); - dest[pitch] = LightBgra::shade_pal_index(colormap[source[sincr]], light, shade_constants); - source += sincr * 2; - dest += pitch * 2; - } while (--count); - } + explicit operator bool() + { + return count > 0; + } + + bool next() + { + dest += pitch; + source += sincr; + return (--count) != 0; + } + }; }; -class RtMap4colsRGBACommand : public DrawerCommand +class RtCopy1colRGBACommand : public DrawerRt1colCommand { - int sx; - int yl; - int yh; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _destorg; - int _pitch; - BYTE * RESTRICT _colormap; - public: - RtMap4colsRGBACommand(int sx, int yl, int yh) + RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _light = dc_light; - _shade_constants = dc_shade_constants; - _destorg = dc_destorg; - _pitch = dc_pitch; - _colormap = dc_colormap; } void Execute(DrawerThread *thread) override { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = GPalette.BaseColors[*loop.source]; + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); + } +}; - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; +class RtMap1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtMap1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[*loop.source], _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); + } +}; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; - - BYTE *colormap = _colormap; +class RtMap4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtMap4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } - if (count & 1) { - dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); - dest[1] = LightBgra::shade_pal_index(colormap[source[1]], light, shade_constants); - dest[2] = LightBgra::shade_pal_index(colormap[source[2]], light, shade_constants); - dest[3] = LightBgra::shade_pal_index(colormap[source[3]], light, shade_constants); - source += sincr; - dest += pitch; - } - if (!(count >>= 1)) - return; + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[loop.source[i]], _light, _shade_constants); + loop.dest[i] = BlendBgra::copy(fg); + } + } while (loop.next()); + } +}; - do { - dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); - dest[1] = LightBgra::shade_pal_index(colormap[source[1]], light, shade_constants); - dest[2] = LightBgra::shade_pal_index(colormap[source[2]], light, shade_constants); - dest[3] = LightBgra::shade_pal_index(colormap[source[3]], light, shade_constants); - dest[pitch] = LightBgra::shade_pal_index(colormap[source[sincr]], light, shade_constants); - dest[pitch + 1] = LightBgra::shade_pal_index(colormap[source[sincr + 1]], light, shade_constants); - dest[pitch + 2] = LightBgra::shade_pal_index(colormap[source[sincr + 2]], light, shade_constants); - dest[pitch + 3] = LightBgra::shade_pal_index(colormap[source[sincr + 3]], light, shade_constants); - source += sincr * 2; - dest += pitch * 2; - } while (--count); +class RtAdd1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[*loop.source], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class RtAdd4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtAdd4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[loop.source[i]], _light, _shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, _destalpha); + } + } while (loop.next()); + } +}; + +class RtShaded1colRGBACommand : public DrawerRt1colCommand +{ + uint32_t _color; + +public: + RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + _color = LightBgra::shade_pal_index(dc_color, _light, _shade_constants); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t alpha = _colormap[*loop.source] * 4; + uint32_t inv_alpha = 256 - alpha; + *loop.dest = BlendBgra::add(_color, *loop.dest, alpha, inv_alpha); + } while (loop.next()); + } +}; + +class RtShaded4colsRGBACommand : public DrawerRt4colsCommand +{ + uint32_t _color; + +public: + RtShaded4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + _color = LightBgra::shade_pal_index(dc_color, _light, _shade_constants); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = _colormap[loop.source[i]] * 4; + uint32_t inv_alpha = 256 - alpha; + loop.dest[i] = BlendBgra::add(_color, loop.dest[i], alpha, inv_alpha); + } + } while (loop.next()); + } +}; + +class RtAddClamp1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class RtAddClamp4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtAddClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, _destalpha); + } + } while (loop.next()); + } +}; + +class RtSubClamp1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants); + *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class RtSubClamp4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtSubClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants); + loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], _srcalpha, _destalpha); + } + } while (loop.next()); + } +}; + +class RtRevSubClamp1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants); + *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class RtRevSubClamp4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants); + loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], _srcalpha, _destalpha); + } + } while (loop.next()); } }; @@ -405,745 +611,6 @@ public: } }; -class RtAdd1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - BYTE * RESTRICT _colormap; - -public: - RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _colormap = dc_colormap; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = LightBgra::shade_pal_index(colormap[*source], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtAdd4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _colormap; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - RtAdd4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _colormap = dc_colormap; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(colormap[source[i]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtShaded1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - lighttable_t * RESTRICT _colormap; - BYTE * RESTRICT _destorg; - int _pitch; - int _color; - fixed_t _light; - -public: - RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _colormap = dc_colormap; - _destorg = dc_destorg; - _pitch = dc_pitch; - _color = dc_color; - _light = dc_light; - } - - void Execute(DrawerThread *thread) override - { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - colormap = _colormap; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t fg = LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - uint32_t alpha = colormap[*source]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtShaded4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - lighttable_t * RESTRICT _colormap; - int _color; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - -public: - RtShaded4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _colormap = dc_colormap; - _color = dc_color; - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - } - - void Execute(DrawerThread *thread) override - { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - colormap = _colormap; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t fg = LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - for (int i = 0; i < 4; i++) - { - uint32_t alpha = colormap[source[i]]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtAddClamp1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtAddClamp4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtAddClamp4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtSubClamp1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtSubClamp4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtSubClamp4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtRevSubClamp1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtRevSubClamp4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += sincr; - dest += pitch; - } while (--count); - } -}; - class RtInitColsRGBACommand : public DrawerCommand { BYTE * RESTRICT buff; From 8ec420a597ee40f52aa0de394d782784a02c6cb9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Jun 2016 19:05:04 +0200 Subject: [PATCH 076/912] Added support for more texture filtering control --- src/r_draw.h | 6 +++++- src/r_draw_rgba.cpp | 15 ++++++++------- src/r_draw_rgba.h | 9 +++------ src/r_draw_rgba_sse.h | 6 +++--- src/r_segs.cpp | 33 ++++++++++++++++++++++----------- 5 files changed, 41 insertions(+), 28 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index bd477efc42..6a078b08ff 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -379,6 +379,10 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); extern bool r_swtruecolor; -EXTERN_CVAR(Bool, r_bilinear); + +EXTERN_CVAR(Bool, r_multithreaded); +EXTERN_CVAR(Bool, r_magfilter_linear); +EXTERN_CVAR(Bool, r_minfilter_linear); +EXTERN_CVAR(Bool, r_mipmap); #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index dc97fdd478..5a6e88e3bf 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -60,9 +60,10 @@ extern float rw_light; extern float rw_lightstep; extern int wallshade; -CVAR(Bool, r_multithreaded, true, 0) -CVAR(Bool, r_bilinear, true, 0) -CVAR(Bool, r_mipmap, true, 0) +CVAR(Bool, r_multithreaded, true, 0); +CVAR(Bool, r_magfilter_linear, false, 0); +CVAR(Bool, r_minfilter_linear, false, 0); +CVAR(Bool, r_mipmap, true, 0); #ifndef NO_SSE @@ -904,7 +905,7 @@ public: const uint32_t * RESTRICT _source; uint32_t _light; ShadeConstants _shade_constants; - bool _magnifying; + bool _nearest_filter; uint32_t _srcalpha; uint32_t _destalpha; @@ -925,7 +926,7 @@ public: _source = (const uint32_t*)ds_source; _light = LightBgra::calc_light_multiplier(ds_light); _shade_constants = ds_shade_constants; - _magnifying = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); _srcalpha = dc_srcalpha >> (FRACBITS - 8); _destalpha = dc_destalpha >> (FRACBITS - 8); @@ -995,7 +996,7 @@ public: LoopIterator loop(this, thread); if (!loop) return; - if (_magnifying) + if (_nearest_filter) { if (loop.is_64x64) { @@ -1040,7 +1041,7 @@ public: LoopIterator loop(this, thread); if (!loop) return; - if (_magnifying) + if (_nearest_filter) { if (loop.is_64x64) { diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 20fff4fc06..56f1faa240 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -461,15 +461,11 @@ class SampleBgra public: inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) { - if (!r_bilinear) - return false; - // Is this a magfilter or minfilter? fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); - if (magnitude >> FRACBITS == 0) - return false; + bool magnifying = (magnitude >> FRACBITS == 0); if (r_mipmap) { @@ -485,7 +481,8 @@ public: level >>= 1; } } - return true; + + return (magnifying && r_magfilter_linear) || (!magnifying && r_minfilter_linear); } FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits, uint32_t ymax) diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 408a2f5a23..bca30185c1 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -25,7 +25,7 @@ class VecCommand(DrawSpanRGBA) : public DrawerCommand BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; - bool _magnifying; + bool _nearest_filter; public: VecCommand(DrawSpanRGBA)() @@ -43,7 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; - _magnifying = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -73,7 +73,7 @@ public: uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - if (_magnifying) + if (_nearest_filter) { if (_xbits == 6 && _ybits == 6) { diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 95dd287aa3..5c90373753 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -50,6 +50,7 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" +#include "r_draw.h" #include "v_palette.h" #include "r_data/colormaps.h" @@ -58,8 +59,6 @@ CVAR(Bool, r_np2, true, 0) -EXTERN_CVAR(Bool, r_bilinear) - //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) @@ -1104,8 +1103,7 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof bool magnifying = uv_step >> (uv_fracbits - 1) == 0; - // Only do bilinear filtering if enabled and not a magnifying filter - if (!r_swtruecolor || !r_bilinear || magnifying || getcol != R_GetColumn) + if (!r_swtruecolor || getcol != R_GetColumn) { source = getcol(texture, xoffset >> FRACBITS); source2 = nullptr; @@ -1138,13 +1136,26 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; - int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; - if (tx0 < 0) - tx0 += mip_width; - int tx1 = (tx0 + 1) % mip_width; - source = (BYTE*)(pixels + tx0 * mip_height); - source2 = (BYTE*)(pixels + tx1 * mip_height); - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + bool filter_nearest = (magnifying && !r_magfilter_linear) || (!magnifying && !r_minfilter_linear); + if (filter_nearest) + { + int tx = (xoffset >> FRACBITS) % mip_width; + if (tx < 0) + tx += mip_width; + source = (BYTE*)(pixels + tx * mip_height); + source2 = nullptr; + texturefracx = 0; + } + else + { + int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } } } From 77054639666f967c7f885e63205ed7978203d3f0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 25 Jun 2016 10:33:35 +0200 Subject: [PATCH 077/912] Improved linear filtering of walls Fixed some crash bugs Added mipmap and filtering options to the display menu --- src/r_draw.cpp | 16 +- src/r_draw.h | 13 +- src/r_draw_rgba.cpp | 120 ++++----------- src/r_draw_rgba.h | 35 ++--- src/r_draw_rgba_sse.h | 194 +++++++++++++----------- src/r_segs.cpp | 292 +++++++++++++++++++++++-------------- src/textures/textures.h | 4 + wadsrc/static/language.enu | 3 + wadsrc/static/menudef.txt | 3 + 9 files changed, 364 insertions(+), 316 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 578ca9646e..682ed46688 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -154,6 +154,7 @@ int dc_yl; int dc_yh; fixed_t dc_iscale; fixed_t dc_texturefrac; +uint32_t dc_textureheight; int dc_color; // [RH] Color for column filler DWORD dc_srccolor; uint32_t dc_srccolor_bgra; @@ -177,6 +178,7 @@ fixed_t palookuplight[4]; const BYTE* bufplce[4]; const BYTE* bufplce2[4]; uint32_t buftexturefracx[4]; +uint32_t bufheight[4]; // just for profiling int dccount; @@ -1044,6 +1046,7 @@ int ds_ybits; // start of a floor/ceiling tile image const BYTE* ds_source; +bool ds_source_mipmapped; // just for profiling int dscount; @@ -1067,6 +1070,7 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; void R_SetSpanSource(FTexture *tex) { ds_source = r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels(); + ds_source_mipmapped = tex->Mipmapped(); #ifdef X86_ASM if (!r_swtruecolor && ds_cursource != ds_source) { @@ -1644,8 +1648,6 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v int vlinebits; int mvlinebits; -uint32_t vlinemax; -uint32_t mvlinemax; #ifndef X86_ASM static DWORD vlinec1 (); @@ -1695,12 +1697,11 @@ DWORD (*domvline1)() = mvlineasm1; void (*domvline4)() = mvlineasm4; #endif -void setupvline (int fracbits, int fracmax) +void setupvline (int fracbits) { if (r_swtruecolor) { vlinebits = fracbits; - vlinemax = fracmax; return; } @@ -1780,7 +1781,7 @@ void vlinec4 () } #endif -void setupmvline (int fracbits, int fracmax) +void setupmvline (int fracbits) { if (!r_swtruecolor) { @@ -1795,7 +1796,6 @@ void setupmvline (int fracbits, int fracmax) else { mvlinebits = fracbits; - mvlinemax = fracmax; } } @@ -1968,12 +1968,10 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) } int tmvlinebits; -uint32_t tmvlinemax; -void setuptmvline (int bits, int fracmax) +void setuptmvline (int bits) { tmvlinebits = bits; - tmvlinemax = fracmax; } fixed_t tmvline1_add_C () diff --git a/src/r_draw.h b/src/r_draw.h index 6a078b08ff..591ae0b5fe 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -61,6 +61,7 @@ extern "C" int dc_yh; extern "C" fixed_t dc_iscale; extern double dc_texturemid; extern "C" fixed_t dc_texturefrac; +extern "C" uint32_t dc_textureheight; extern "C" int dc_color; // [RH] For flat colors (no texturing) extern "C" DWORD dc_srccolor; extern "C" uint32_t dc_srccolor_bgra; @@ -84,6 +85,7 @@ extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; extern "C" const BYTE* bufplce2[4]; extern "C" uint32_t buftexturefracx[4]; +extern "C" uint32_t bufheight[4]; // [RH] Temporary buffer for column drawing extern "C" BYTE *dc_temp; @@ -100,13 +102,13 @@ extern void (*R_DrawColumn)(void); extern DWORD (*dovline1) (); extern DWORD (*doprevline1) (); extern void (*dovline4) (); -extern void setupvline (int,int); +extern void setupvline (int); extern DWORD (*domvline1) (); extern void (*domvline4) (); -extern void setupmvline (int,int); +extern void setupmvline (int); -extern void setuptmvline (int,int); +extern void setuptmvline (int); // The Spectre/Invisibility effect. extern void (*R_DrawFuzzColumn)(void); @@ -316,6 +318,7 @@ extern "C" fixed_t ds_alpha; // start of a 64*64 tile image extern "C" const BYTE* ds_source; +extern "C" bool ds_source_mipmapped; extern "C" int ds_color; // [RH] For flat color (no texturing) @@ -381,8 +384,8 @@ void R_SetTranslationMap(lighttable_t *translation); extern bool r_swtruecolor; EXTERN_CVAR(Bool, r_multithreaded); -EXTERN_CVAR(Bool, r_magfilter_linear); -EXTERN_CVAR(Bool, r_minfilter_linear); +EXTERN_CVAR(Bool, r_magfilter); +EXTERN_CVAR(Bool, r_minfilter); EXTERN_CVAR(Bool, r_mipmap); #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 5a6e88e3bf..2576cfeda2 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -48,22 +48,22 @@ #endif #include -extern int vlinebits; -extern int mvlinebits; -extern int tmvlinebits; -extern uint32_t vlinemax; -extern uint32_t mvlinemax; -extern uint32_t tmvlinemax; - extern "C" short spanend[MAXHEIGHT]; extern float rw_light; extern float rw_lightstep; extern int wallshade; +// Use multiple threads when drawing CVAR(Bool, r_multithreaded, true, 0); -CVAR(Bool, r_magfilter_linear, false, 0); -CVAR(Bool, r_minfilter_linear, false, 0); -CVAR(Bool, r_mipmap, true, 0); + +// Use linear filtering when scaling up +CVAR(Bool, r_magfilter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +// Use linear filtering when scaling down +CVAR(Bool, r_minfilter, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +// Use mipmapped textures +CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); #ifndef NO_SSE @@ -926,7 +926,7 @@ public: _source = (const uint32_t*)ds_source; _light = LightBgra::calc_light_multiplier(ds_light); _shade_constants = ds_shade_constants; - _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); _srcalpha = dc_srcalpha >> (FRACBITS - 8); _destalpha = dc_destalpha >> (FRACBITS - 8); @@ -1354,8 +1354,7 @@ public: DWORD _texturefrac; uint32_t _texturefracx; DWORD _iscale; - int _vlinebits; - uint32_t _vlinemax; + uint32_t _textureheight; const uint32 * RESTRICT _source; const uint32 * RESTRICT _source2; @@ -1365,7 +1364,7 @@ public: uint32_t _srcalpha; uint32_t _destalpha; - DrawerWall1Command(int vlinebits, uint32_t vlinemax) + DrawerWall1Command() { _dest = dc_dest; _pitch = dc_pitch; @@ -1373,8 +1372,7 @@ public: _texturefrac = dc_texturefrac; _texturefracx = dc_texturefracx; _iscale = dc_iscale; - _vlinebits = vlinebits; - _vlinemax = vlinemax; + _textureheight = dc_textureheight; _source = (const uint32 *)dc_source; _source2 = (const uint32 *)dc_source2; @@ -1394,7 +1392,8 @@ public: uint32_t fracstep; uint32_t frac; uint32_t texturefracx; - int bits; + uint32_t height; + uint32_t half; LoopIterator(DrawerWall1Command *command, DrawerThread *thread) { @@ -1406,8 +1405,10 @@ public: frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); texturefracx = command->_texturefracx; dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); - bits = command->_vlinebits; pitch = command->_pitch * thread->num_cores; + + height = command->_textureheight; + half = (0x80000000 + height - 1) / height; } explicit operator bool() @@ -1417,7 +1418,7 @@ public: int sample_index() { - return frac >> bits; + return ((frac >> FRACBITS) * height) >> FRACBITS; } bool next() @@ -1435,12 +1436,11 @@ public: BYTE * RESTRICT _dest; int _count; int _pitch; - int _vlinebits; - uint32_t _vlinemax; ShadeConstants _shade_constants; uint32_t _vplce[4]; uint32_t _vince[4]; uint32_t _buftexturefracx[4]; + uint32_t _bufheight[4]; const uint32_t * RESTRICT _bufplce[4]; const uint32_t * RESTRICT _bufplce2[4]; uint32_t _light[4]; @@ -1448,19 +1448,18 @@ public: uint32_t _srcalpha; uint32_t _destalpha; - DrawerWall4Command(int vlinebits, uint32_t vlinemax) + DrawerWall4Command() { _dest = dc_dest; _count = dc_count; _pitch = dc_pitch; - _vlinebits = vlinebits; - _vlinemax = vlinemax; _shade_constants = dc_shade_constants; for (int i = 0; i < 4; i++) { _vplce[i] = vplce[i]; _vince[i] = vince[i]; _buftexturefracx[i] = buftexturefracx[i]; + _bufheight[i] = bufheight[i]; _bufplce[i] = (const uint32_t *)bufplce[i]; _bufplce2[i] = (const uint32_t *)bufplce2[i]; _light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); @@ -1475,9 +1474,10 @@ public: uint32_t *dest; int pitch; int count; - int bits; uint32_t vplce[4]; uint32_t vince[4]; + uint32_t height[4]; + uint32_t half[4]; LoopIterator(DrawerWall4Command *command, DrawerThread *thread) { @@ -1487,13 +1487,14 @@ public: dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); pitch = command->_pitch * thread->num_cores; - bits = command->_vlinebits; int skipped = thread->skipped_by_thread(command->_dest_y); for (int i = 0; i < 4; i++) { vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; vince[i] = command->_vince[i] * thread->num_cores; + height[i] = command->_bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; } } @@ -1504,7 +1505,7 @@ public: int sample_index(int col) { - return vplce[col] >> bits; + return ((vplce[col] >> FRACBITS) * height[col]) >> FRACBITS; } bool next() @@ -1522,10 +1523,6 @@ public: class Vlinec1RGBACommand : public DrawerWall1Command { public: - Vlinec1RGBACommand() : DrawerWall1Command(vlinebits, vlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1543,7 +1540,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::copy(fg); } while (loop.next()); } @@ -1553,10 +1550,6 @@ public: class Vlinec4RGBACommand : public DrawerWall4Command { public: - Vlinec4RGBACommand() : DrawerWall4Command(vlinebits, vlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1579,7 +1572,7 @@ public: { for (int i = 0; i < 4; i++) { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); loop.dest[i] = BlendBgra::copy(fg); } } while (loop.next()); @@ -1590,10 +1583,6 @@ public: class Mvlinec1RGBACommand : public DrawerWall1Command { public: - Mvlinec1RGBACommand() : DrawerWall1Command(mvlinebits, mvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1611,7 +1600,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); } while (loop.next()); } @@ -1621,10 +1610,6 @@ public: class Mvlinec4RGBACommand : public DrawerWall4Command { public: - Mvlinec4RGBACommand(): DrawerWall4Command(mvlinebits, mvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1647,7 +1632,7 @@ public: { for (int i = 0; i < 4; i++) { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); } } while (loop.next()); @@ -1658,10 +1643,6 @@ public: class Tmvline1AddRGBACommand : public DrawerWall1Command { public: - Tmvline1AddRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1677,10 +1658,6 @@ public: class Tmvline4AddRGBACommand : public DrawerWall4Command { public: - Tmvline4AddRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1699,10 +1676,6 @@ public: class Tmvline1AddClampRGBACommand : public DrawerWall1Command { public: - Tmvline1AddClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1718,10 +1691,6 @@ public: class Tmvline4AddClampRGBACommand : public DrawerWall4Command { public: - Tmvline4AddClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1740,10 +1709,6 @@ public: class Tmvline1SubClampRGBACommand : public DrawerWall1Command { public: - Tmvline1SubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1759,10 +1724,6 @@ public: class Tmvline4SubClampRGBACommand : public DrawerWall4Command { public: - Tmvline4SubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1781,10 +1742,6 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerWall1Command { public: - Tmvline1RevSubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1800,10 +1757,6 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerWall4Command { public: - Tmvline4RevSubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -2362,17 +2315,8 @@ void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BY DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); } -//extern FTexture *rw_pic; // For the asserts below - DWORD vlinec1_rgba() { - /*DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - DWORD height = rw_pic->GetHeight(); - assert((frac >> vlinebits) < height); - frac += (dc_count-1) * fracstep; - assert((frac >> vlinebits) <= height);*/ - DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 56f1faa240..a60fd65c73 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -459,7 +459,7 @@ public: class SampleBgra { public: - inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) + inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep, bool mipmapped) { // Is this a magfilter or minfilter? fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); @@ -467,7 +467,7 @@ public: fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); bool magnifying = (magnitude >> FRACBITS == 0); - if (r_mipmap) + if (r_mipmap && mipmapped) { int level = magnitude >> (FRACBITS + 1); while (level != 0) @@ -482,18 +482,15 @@ public: } } - return (magnifying && r_magfilter_linear) || (!magnifying && r_minfilter_linear); + return (magnifying && r_magfilter) || (!magnifying && r_minfilter); } - FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits, uint32_t ymax) + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t half, uint32_t height) { - uint32_t half = 1 << (ybits - 1); - uint32_t y0 = (texturefracy - half) >> ybits; - if (y0 > ymax) - y0 = 0; - uint32_t y1 = y0 + 1; - if (y1 > ymax) - y1 = 0; + uint32_t frac_y0 = ((texturefracy - half) >> FRACBITS) * height; + uint32_t frac_y1 = ((texturefracy + half) >> FRACBITS) * height; + uint32_t y0 = frac_y0 >> FRACBITS; + uint32_t y1 = frac_y1 >> FRACBITS; uint32_t p00 = col0[y0]; uint32_t p01 = col0[y1]; @@ -501,7 +498,7 @@ public: uint32_t p11 = col1[y1]; uint32_t inv_b = texturefracx; - uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; + uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; uint32_t a = 16 - inv_a; uint32_t b = 16 - inv_b; @@ -546,20 +543,18 @@ public: ///////////////////////////////////////////////////////////////////////////// // SSE/AVX shading macros: -#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits, ymax) { \ - uint32_t half = 1 << (ybits - 1); \ - \ +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t y0 = (texturefracy[i] - half) >> ybits; \ - if (y0 > ymax) y0 = 0; \ - uint32_t y1 = y0 + 1; \ - if (y1 > ymax) y1 = 0; \ + uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \ + uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \ + uint32_t y0 = frac_y0 >> FRACBITS; \ + uint32_t y1 = frac_y1 >> FRACBITS; \ \ uint32_t inv_b = texturefracx[i]; \ - uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; \ + uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; \ uint32_t a = 16 - inv_a; \ uint32_t b = 16 - inv_b; \ \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index bca30185c1..ae8d3bf427 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -43,7 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; - _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); } void Execute(DrawerThread *thread) override @@ -364,14 +364,13 @@ class VecCommand(Vlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int _vlinebits; - uint32_t _vlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; const uint32_t * RESTRICT bufplce2[4]; uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; public: VecCommand(Vlinec4RGBA)() @@ -380,8 +379,6 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - _vlinebits = vlinebits; - _vlinemax = vlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -390,6 +387,7 @@ public: bufplce[i] = (const uint32 *)::bufplce[i]; bufplce2[i] = (const uint32_t *)::bufplce2[i]; buftexturefracx[i] = ::buftexturefracx[i]; + bufheight[i] = ::bufheight[i]; } } @@ -400,9 +398,16 @@ public: return; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = _vlinebits; int pitch = _pitch * thread->num_cores; + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } + uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); @@ -431,10 +436,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -457,10 +462,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -482,7 +487,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -500,7 +505,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -522,7 +527,6 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int _mvlinebits; uint32_t _mvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -530,6 +534,7 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand const uint32 * RESTRICT bufplce[4]; const uint32 * RESTRICT bufplce2[4]; uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; public: VecCommand(Mvlinec4RGBA)() @@ -538,8 +543,6 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - _mvlinebits = mvlinebits; - _mvlinemax = mvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -548,6 +551,7 @@ public: bufplce[i] = (const uint32 *)::bufplce[i]; bufplce2[i] = (const uint32_t *)::bufplce2[i]; buftexturefracx[i] = ::buftexturefracx[i]; + bufheight[i] = ::bufheight[i]; } } @@ -559,7 +563,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _mvlinebits; + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); @@ -589,10 +599,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -617,10 +627,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -644,7 +654,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -664,7 +674,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -690,12 +700,11 @@ class VecCommand(Tmvline4AddRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4AddRGBA)() @@ -706,14 +715,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[i]; } } @@ -725,7 +733,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -754,10 +769,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -791,10 +806,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -831,12 +846,11 @@ class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 *RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4AddClampRGBA)() @@ -847,14 +861,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[i]; } } @@ -866,7 +879,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -895,10 +915,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -931,10 +951,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -971,12 +991,11 @@ class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 *RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4SubClampRGBA)() @@ -987,14 +1006,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[i]; } } @@ -1006,7 +1024,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -1035,10 +1060,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -1071,10 +1096,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -1111,12 +1136,11 @@ class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 *RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4RevSubClampRGBA)() @@ -1127,14 +1151,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[4]; } } @@ -1146,7 +1169,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -1175,10 +1205,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -1211,10 +1241,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 5c90373753..630d64da07 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1075,86 +1075,112 @@ struct WallscanSampler uint32_t uv_pos; uint32_t uv_step; - int32_t uv_fracbits; uint32_t uv_max; const BYTE *source; const BYTE *source2; uint32_t texturefracx; + uint32_t height; }; WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) { - int base_width = texture->GetWidth(); - int base_height = texture->GetHeight(); - uv_fracbits = 32 - texture->HeightBits; - uv_max = base_height << uv_fracbits; - - // Find start uv in [0-base_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / base_height; - v = v - floor(v); - v *= base_height; - v *= (1 << uv_fracbits); - - uv_pos = (uint32_t)v; - uv_step = xs_ToFixed(uv_fracbits, uv_stepd); - - bool magnifying = uv_step >> (uv_fracbits - 1) == 0; - - if (!r_swtruecolor || getcol != R_GetColumn) + if (!r_swtruecolor) { + height = texture->GetHeight(); + int uv_fracbits = 32 - texture->HeightBits; + uv_max = height << uv_fracbits; + + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + v = v - floor(v); + v *= height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + if (uv_step == 0) // To prevent divide by zero elsewhere + uv_step = 1; + source = getcol(texture, xoffset >> FRACBITS); source2 = nullptr; texturefracx = 0; } else { - int mipmap_offset = 0; - int mip_width = base_width; - int mip_height = base_height; - if (r_mipmap) - { - fixed_t magnitude = abs((int32_t)uv_step) >> (uv_fracbits - FRACBITS); - int level = magnitude >> FRACBITS; - while (level != 0) - { - if (uv_fracbits > 30) - break; + // Normalize to 0-1 range: + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); + v = v - floor(v); + double v_step = uv_stepd / texture->GetHeight(); - mipmap_offset += mip_width * mip_height; - uv_fracbits += 1; - uv_pos >>= 1; - uv_step >>= 1; - xoffset >>= 1; - level >>= 1; - mip_width = MAX(mip_width >> 1, 1); - mip_height = MAX(mip_height >> 1, 1); - } + if (isnan(v) || isnan(v_step)) // this should never happen, but it apparently does.. + { + uv_stepd = 0.0; + v = 0.0; + v_step = 0.0; } - const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + // Convert to uint32: + uv_pos = (uint32_t)(v * 0x100000000LL); + uv_step = (uint32_t)(v_step * 0x100000000LL); + uv_max = 0; - bool filter_nearest = (magnifying && !r_magfilter_linear) || (!magnifying && !r_minfilter_linear); - if (filter_nearest) + // Texture mipmap and filter selection: + if (getcol != R_GetColumn) { - int tx = (xoffset >> FRACBITS) % mip_width; - if (tx < 0) - tx += mip_width; - source = (BYTE*)(pixels + tx * mip_height); + source = getcol(texture, xoffset >> FRACBITS); source2 = nullptr; + height = texture->GetHeight(); texturefracx = 0; } else { - int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; - if (tx0 < 0) - tx0 += mip_width; - int tx1 = (tx0 + 1) % mip_width; - source = (BYTE*)(pixels + tx0 * mip_height); - source2 = (BYTE*)(pixels + tx1 * mip_height); - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + double magnitude = abs(uv_stepd * 2); + bool magnifying = magnitude < 1.0f; + + int mipmap_offset = 0; + int mip_width = texture->GetWidth(); + int mip_height = texture->GetHeight(); + if (r_mipmap && texture->Mipmapped()) + { + int level = (int)MAX(magnitude - 1.0, 0.0); + while (level != 0) + { + mipmap_offset += mip_width * mip_height; + xoffset >>= 1; + level >>= 1; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + } + + const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + + bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); + if (filter_nearest) + { + int tx = (xoffset >> FRACBITS) % mip_width; + if (tx < 0) + tx += mip_width; + source = (BYTE*)(pixels + tx * mip_height); + source2 = nullptr; + height = mip_height; + texturefracx = 0; + } + else + { + int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + height = mip_height; + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } } } } @@ -1162,18 +1188,18 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof // Draw a column with support for non-power-of-two ranges void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) { - int pixelsize = r_swtruecolor ? 4 : 1; - if (sampler.uv_max == 0) // power of two + if (r_swtruecolor) { int count = y2 - y1; dc_source = sampler.source; dc_source2 = sampler.source2; dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; dc_count = count; dc_iscale = sampler.uv_step; dc_texturefrac = sampler.uv_pos; + dc_textureheight = sampler.height; draw1column(); uint64_t step64 = sampler.uv_step; @@ -1182,41 +1208,60 @@ void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*d } else { - uint32_t uv_pos = sampler.uv_pos; - - uint32_t left = y2 - y1; - while (left > 0) + if (sampler.uv_max == 0) // power of two { - uint32_t available = sampler.uv_max - uv_pos; - uint32_t next_uv_wrap = available / sampler.uv_step; - if (available % sampler.uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); + int count = y2 - y1; dc_source = sampler.source; dc_source2 = sampler.source2; dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_dest = (ylookup[y1] + x) + dc_destorg; dc_count = count; dc_iscale = sampler.uv_step; - dc_texturefrac = uv_pos; + dc_texturefrac = sampler.uv_pos; draw1column(); - left -= count; - uv_pos += sampler.uv_step * count; - if (uv_pos >= sampler.uv_max) - uv_pos -= sampler.uv_max; + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); } + else + { + uint32_t uv_pos = sampler.uv_pos; - sampler.uv_pos = uv_pos; + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = uv_pos; + draw1column(); + + left -= count; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; + } + + sampler.uv_pos = uv_pos; + } } } // Draw four columns with support for non-power-of-two ranges void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) { - int pixelsize = r_swtruecolor ? 4 : 1; - if (sampler[0].uv_max == 0) // power of two, no wrap handling needed + if (r_swtruecolor) { int count = y2 - y1; for (int i = 0; i < 4; i++) @@ -1224,6 +1269,7 @@ void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*dr bufplce[i] = sampler[i].source; bufplce2[i] = sampler[i].source2; buftexturefracx[i] = sampler[i].texturefracx; + bufheight[i] = sampler[i].height; vplce[i] = sampler[i].uv_pos; vince[i] = sampler[i].uv_step; @@ -1231,52 +1277,74 @@ void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*dr uint64_t pos64 = sampler[i].uv_pos; sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; dc_count = count; draw4columns(); } else { - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - for (int i = 0; i < 4; i++) + if (sampler[0].uv_max == 0) // power of two, no wrap handling needed { - bufplce[i] = sampler[i].source; - bufplce2[i] = sampler[i].source2; - buftexturefracx[i] = sampler[i].texturefracx; - } - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; - uint32_t next_uv_wrap = available / sampler[i].uv_step; - if (available % sampler[i].uv_step != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps + int count = y2 - y1; for (int i = 0; i < 4; i++) { + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; vplce[i] = sampler[i].uv_pos; vince[i] = sampler[i].uv_step; + + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } + dc_dest = (ylookup[y1] + x) + dc_destorg; dc_count = count; draw4columns(); - - // Wrap the uv position + } + else + { + dc_dest = (ylookup[y1] + x) + dc_destorg; for (int i = 0; i < 4; i++) { - sampler[i].uv_pos += sampler[i].uv_step * count; - if (sampler[i].uv_pos >= sampler[i].uv_max) - sampler[i].uv_pos -= sampler[i].uv_max; + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; } - left -= count; + uint32_t left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + uint32_t count = left; + for (int i = 0; i < 4; i++) + { + uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; + uint32_t next_uv_wrap = available / sampler[i].uv_step; + if (available % sampler[i].uv_step != 0) + next_uv_wrap++; + count = MIN(next_uv_wrap, count); + } + + // Draw until that column wraps + for (int i = 0; i < 4; i++) + { + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + } + dc_count = count; + draw4columns(); + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + sampler[i].uv_pos += sampler[i].uv_step * count; + if (sampler[i].uv_pos >= sampler[i].uv_max) + sampler[i].uv_pos -= sampler[i].uv_max; + } + + left -= count; + } } } } @@ -1287,7 +1355,7 @@ typedef void(*Draw4ColumnsFuncPtr)(); void wallscan_any( int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits, int fracmax, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) + void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -1297,7 +1365,7 @@ void wallscan_any( DWORD(*draw1column)(); void(*draw4columns)(); - setupwallscan(32 - rw_pic->HeightBits, (rw_pic->GetHeight() - 1) << (32 - rw_pic->HeightBits), draw1column, draw4columns); + setupwallscan(r_swtruecolor ? FRACBITS : 32 - rw_pic->HeightBits, draw1column, draw4columns); bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1450,9 +1518,9 @@ void wallscan_any( void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupvline(bits, fracmax); + setupvline(bits); line1 = dovline1; line4 = dovline4; }); @@ -1466,9 +1534,9 @@ void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupmvline(bits, fracmax); + setupmvline(bits); line1 = domvline1; line4 = domvline4; }); @@ -1486,9 +1554,9 @@ void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fi } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setuptmvline(bits, fracmax); + setuptmvline(bits); line1 = reinterpret_cast(tmvline1); line4 = tmvline4; }); diff --git a/src/textures/textures.h b/src/textures/textures.h index ab9dc3719c..bb83f79e7a 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -185,6 +185,9 @@ public: // Returns the whole texture, stored in column-major order, in BGRA8 format virtual const uint32_t *GetPixelsBgra(); + // Returns true if GetPixelsBgra includes mipmaps + virtual bool Mipmapped() { return true; } + virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL); int CopyTrueColorTranslated(FBitmap *bmp, int x, int y, int rotate, FRemapTable *remap, FCopyInfo *inf = NULL); virtual bool UseBasePalette(); @@ -530,6 +533,7 @@ public: void SetUpdated() { bNeedsUpdate = false; bDidUpdate = true; bFirstUpdate = false; } DSimpleCanvas *GetCanvas() { return Canvas; } DSimpleCanvas *GetCanvasBgra() { return CanvasBgra; } + bool Mipmapped() override { return false; } void MakeTexture (); void MakeTextureBgra (); diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index f9050a27b6..0bb3a84fdf 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1781,6 +1781,9 @@ DSPLYMNU_VSYNC = "Vertical Sync"; DSPLYMNU_CAPFPS = "Rendering Interpolation"; DSPLYMNU_COLUMNMETHOD = "Column render mode"; DSPLYMNU_TRUECOLOR = "True color output"; +DSPLYMNU_MINFILTER = "Linear filter when downscaling"; +DSPLYMNU_MAGFILTER = "Linear filter when upscaling"; +DSPLYMNU_MIPMAP = "Use mipmapped textures"; DSPLYMNU_WIPETYPE = "Screen wipe style"; DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen"; DSPLYMNU_PALLETEHACK = "DirectDraw palette hack"; // Not used diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 3c712de967..679db909b5 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -662,6 +662,9 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn" Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" Option "$DSPLYMNU_TRUECOLOR", "swtruecolor", "OnOff" + Option "$DSPLYMNU_MINFILTER", "r_minfilter", "OnOff" + Option "$DSPLYMNU_MAGFILTER", "r_magfilter", "OnOff" + Option "$DSPLYMNU_MIPMAP", "r_mipmap", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From 3b6d177787842f0d1844673a4b220797215fa1d8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 25 Jun 2016 12:14:15 +0200 Subject: [PATCH 078/912] Added bicubic interpolation when generating mipmaps --- src/CMakeLists.txt | 1 + src/textures/bicubic_interpolation.cpp | 107 +++++++++++++++++++++++++ src/textures/bicubic_interpolation.h | 50 ++++++++++++ src/textures/texture.cpp | 19 +++++ src/textures/textures.h | 1 + 5 files changed, 178 insertions(+) create mode 100644 src/textures/bicubic_interpolation.cpp create mode 100644 src/textures/bicubic_interpolation.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c0a30ea07..042da0c8f0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1190,6 +1190,7 @@ set (PCH_SOURCES textures/texturemanager.cpp textures/tgatexture.cpp textures/warptexture.cpp + textures/bicubic_interpolation.cpp thingdef/olddecorations.cpp thingdef/thingdef.cpp thingdef/thingdef_codeptr.cpp diff --git a/src/textures/bicubic_interpolation.cpp b/src/textures/bicubic_interpolation.cpp new file mode 100644 index 0000000000..2c8a3049d1 --- /dev/null +++ b/src/textures/bicubic_interpolation.cpp @@ -0,0 +1,107 @@ + +#include "doomtype.h" +#include "bicubic_interpolation.h" + +void BicubicInterpolation::ScaleImage(uint32_t *dest_data, int dest_width, int dest_height, const uint32_t *src_data, int src_width, int src_height) +{ + if (dest_width <= 0 || dest_height <= 0 || src_width <= 0 || src_height <= 0) + return; + + // Scale factor as a rational number r = n / d + int n = dest_width; + int d = src_width; + + const unsigned char *src_ptr = (const unsigned char *)src_data; + unsigned char *dest_ptr = (unsigned char *)dest_data; + + scale(n, d, src_width, src_width * 4, src_height, src_ptr + 0, dest_width, dest_width * 4, dest_height, dest_ptr + 0); + scale(n, d, src_width, src_width * 4, src_height, src_ptr + 1, dest_width, dest_width * 4, dest_height, dest_ptr + 1); + scale(n, d, src_width, src_width * 4, src_height, src_ptr + 2, dest_width, dest_width * 4, dest_height, dest_ptr + 2); + scale(n, d, src_width, src_width * 4, src_height, src_ptr + 3, dest_width, dest_width * 4, dest_height, dest_ptr + 3); +} + +void BicubicInterpolation::scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *f, int out_width, int out_pitch, int out_height, unsigned char *g) +{ + // Implementation of Michael J. Aramini's Efficient Image Magnification by Bicubic Spline Interpolation + + int dimension_size = (out_width > out_height) ? out_width : out_height; + L_vector.resize(dimension_size); + + for (int i=0;i<4;i++) + c_vector[i].resize(dimension_size); + h_vector.resize(in_width); + + int larger_out_dimension; + int j, k, l, m, index; + int *L = &L_vector[0]; + float x; + float *c[4] = { &c_vector[0][0], &c_vector[1][0], &c_vector[2][0], &c_vector[3][0] }; + float *h = &h_vector[0]; + + larger_out_dimension = (out_width > out_height) ? out_width : out_height; + + for (k = 0; k < larger_out_dimension; k++) + L[k] = (k * d) / n; + + for (k = 0; k < n; k++) + { + x = (float)((k * d) % n) / (float)n; + c[0][k] = C0(x); + c[1][k] = C1(x); + c[2][k] = C2(x); + c[3][k] = C3(x); + } + for (k = n; k < larger_out_dimension; k++) + for (l = 0; l < 4; l++) + c[l][k] = c[l][k % n]; + + for (k = 0; k < out_height; k++) + { + for (j = 0; j < in_width; j++) + { + h[j] = 0.0f; + for (l = 0; l < 4; l++) + { + index = L[k] + l - 1; + if ((index >= 0) && (index < in_height)) + h[j] += f[index*in_pitch+j*4] * c[3 - l][k]; + } + } + for (m = 0; m < out_width; m++) + { + x = 0.5f; + for (l = 0; l < 4; l++) + { + index = L[m] + l - 1; + if ((index >= 0) && (index < in_width)) + x += h[index] * c[3 - l][m]; + } + if (x <= 0.0f) + g[k*out_pitch+m*4] = 0; + else if (x >= 255) + g[k*out_pitch+m*4] = 255; + else + g[k*out_pitch+m*4] = (unsigned char)x; + } + } +} + +inline float BicubicInterpolation::C0(float t) +{ + return -a * t * t * t + a * t * t; +} + +inline float BicubicInterpolation::C1(float t) +{ + return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; +} + +inline float BicubicInterpolation::C2(float t) +{ + return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; +} + +inline float BicubicInterpolation::C3(float t) +{ + return a * t * t * t - 2.0f * a * t * t + a * t; +} diff --git a/src/textures/bicubic_interpolation.h b/src/textures/bicubic_interpolation.h new file mode 100644 index 0000000000..da547ad83b --- /dev/null +++ b/src/textures/bicubic_interpolation.h @@ -0,0 +1,50 @@ +/* +** Bicubic Image Scaler +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef __BICUBIC_INTERPOLATION_H__ +#define __BICUBIC_INTERPOLATION_H__ + +#pragma once + +#include + +// Bicubic image scaler +class BicubicInterpolation +{ +public: + void ScaleImage(uint32_t *dest, int dest_width, int dest_height, const uint32_t *src, int src_width, int src_height); + +private: + void scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *in_data, int out_width, int out_pitch, int out_height, unsigned char *out_data); + + float a = -0.5f; // a is a spline parameter such that -1 <= a <= 0 + + inline float C0(float t); + inline float C1(float t); + inline float C2(float t); + inline float C3(float t); + + std::vector L_vector; + std::vector c_vector[4]; + std::vector h_vector; +}; + +#endif diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index f5e4d4aa8a..7ff5c9ba25 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -45,6 +45,7 @@ #include "v_video.h" #include "m_fixed.h" #include "textures/textures.h" +#include "textures/bicubic_interpolation.h" #include "v_palette.h" typedef bool (*CheckFunc)(FileReader & file); @@ -381,6 +382,24 @@ int FTexture::MipmapLevels() const } void FTexture::GenerateBgraMipmaps() +{ + BicubicInterpolation bicubic; + + uint32_t *src = PixelsBgra.data(); + uint32_t *dest = src + Width * Height; + int levels = MipmapLevels(); + for (int i = 1; i < levels; i++) + { + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + + bicubic.ScaleImage(dest, h, w, src, Height, Width); + + dest += w * h; + } +} + +void FTexture::GenerateBgraMipmapsFast() { uint32_t *src = PixelsBgra.data(); uint32_t *dest = src + Width * Height; diff --git a/src/textures/textures.h b/src/textures/textures.h index bb83f79e7a..ff1093a49d 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -276,6 +276,7 @@ protected: void CreatePixelsBgraWithMipmaps(); void GenerateBgraMipmaps(); + void GenerateBgraMipmapsFast(); int MipmapLevels() const; public: From 4fd127651d9177a8a32b3a5415654ff741f8c459 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 26 Jun 2016 06:54:32 +0200 Subject: [PATCH 079/912] Fixed fuzz drawer crash --- src/r_draw_rgba.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2576cfeda2..a0f534164b 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -826,7 +826,10 @@ public: // Handle the case where we would go out of bounds at the top: if (yl < fuzzstep) { - uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep + pitch]; + uint32_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep + pitch; + //assert(static_cast((srcdest - (uint32_t*)dc_destorg) / (_pitch)) < viewheight); + + uint32_t bg = *srcdest; uint32_t red = RPART(bg) * 3 / 4; uint32_t green = GPART(bg) * 3 / 4; @@ -842,7 +845,7 @@ public: return; } - bool lowerbounds = (yl + count * fuzzstep > _fuzzviewheight); + bool lowerbounds = (yl + (count + fuzzstep - 1) * fuzzstep > _fuzzviewheight); if (lowerbounds) count--; @@ -858,7 +861,10 @@ public: count -= cnt; do { - uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep]; + uint32_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep; + //assert(static_cast((srcdest - (uint32_t*)dc_destorg) / (_pitch)) < viewheight); + + uint32_t bg = *srcdest; uint32_t red = RPART(bg) * 3 / 4; uint32_t green = GPART(bg) * 3 / 4; @@ -875,7 +881,10 @@ public: // Handle the case where we would go out of bounds at the bottom if (lowerbounds) { - uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep - pitch]; + uint32_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep - pitch; + //assert(static_cast((srcdest - (uint32_t*)dc_destorg) / (_pitch)) < viewheight); + + uint32_t bg = *srcdest; uint32_t red = RPART(bg) * 3 / 4; uint32_t green = GPART(bg) * 3 / 4; From 928e8e0d4374cff2d2f07957d7ca77d153cec128 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 26 Jun 2016 12:53:10 +0200 Subject: [PATCH 080/912] Improved linear filtering performance by adding a lookup table --- src/r_draw_rgba.cpp | 25 ++++++++++++ src/r_draw_rgba.h | 94 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 99 insertions(+), 20 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a0f534164b..bfabdfbbbb 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -98,12 +98,37 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); ///////////////////////////////////////////////////////////////////////////// +__m128i SampleBgra::samplertable[256 * 2]; + DrawerCommandQueue *DrawerCommandQueue::Instance() { static DrawerCommandQueue queue; return &queue; } +DrawerCommandQueue::DrawerCommandQueue() +{ + for (int inv_b = 0; inv_b < 16; inv_b++) + { + for (int inv_a = 0; inv_a < 16; inv_a++) + { + int a = 16 - inv_a; + int b = 16 - inv_b; + + int ab = a * b; + int invab = inv_a * b; + int ainvb = a * inv_b; + int invainvb = inv_a * inv_b; + + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); + + _mm_store_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2, ab_invab); + _mm_store_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1, ainvb_invainvb); + } + } +} + DrawerCommandQueue::~DrawerCommandQueue() { StopThreads(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index a60fd65c73..4961fa6dc6 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -222,6 +222,7 @@ class DrawerCommandQueue static DrawerCommandQueue *Instance(); + DrawerCommandQueue(); ~DrawerCommandQueue(); public: @@ -538,11 +539,75 @@ public: return (alpha << 24) | (red << 16) | (green << 8) | blue; } + +#ifndef NO_SSE + static __m128i samplertable[256 * 2]; +#endif }; ///////////////////////////////////////////////////////////////////////////// // SSE/AVX shading macros: +#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, half, height, texturefracx) \ + const uint32_t *baseptr = col0[0]; \ + __m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \ + __m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \ + __m128i mhalf = _mm_loadu_si128((const __m128i*)half); \ + __m128i m127 = _mm_set1_epi16(127); \ + __m128i m16 = _mm_set1_epi32(16); \ + __m128i m15 = _mm_set1_epi32(15); \ + __m128i mheight = _mm_loadu_si128((const __m128i*)height); \ + __m128i mtexturefracx = _mm_loadu_si128((const __m128i*)texturefracx); + +#define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \ + __m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \ + __m128i multmp0 = _mm_srli_epi32(_mm_sub_epi32(mtexturefracy, mhalf), FRACBITS); \ + __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mhalf), FRACBITS); \ + __m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \ + __m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \ + __m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \ + __m128i y1 = _mm_srli_epi32(frac_y1, FRACBITS); \ + __m128i inv_b = mtexturefracx; \ + __m128i inv_a = _mm_and_si128(_mm_srli_epi32(frac_y1, FRACBITS - 4), m15); \ + __m128i a = _mm_sub_epi32(m16, inv_a); \ + __m128i b = _mm_sub_epi32(m16, inv_b); \ + __m128i ab = _mm_mullo_epi16(a, b); \ + __m128i invab = _mm_mullo_epi16(inv_a, b); \ + __m128i ainvb = _mm_mullo_epi16(a, inv_b); \ + __m128i invainvb = _mm_mullo_epi16(inv_a, inv_b); \ + __m128i ab_lo = _mm_shuffle_epi32(ab, _MM_SHUFFLE(1, 1, 0, 0)); \ + __m128i ab_hi = _mm_shuffle_epi32(ab, _MM_SHUFFLE(3, 3, 2, 2)); \ + __m128i invab_lo = _mm_shuffle_epi32(invab, _MM_SHUFFLE(1, 1, 0, 0)); \ + __m128i invab_hi = _mm_shuffle_epi32(invab, _MM_SHUFFLE(3, 3, 2, 2)); \ + __m128i ainvb_lo = _mm_shuffle_epi32(ainvb, _MM_SHUFFLE(1, 1, 0, 0)); \ + __m128i ainvb_hi = _mm_shuffle_epi32(ainvb, _MM_SHUFFLE(3, 3, 2, 2)); \ + __m128i invainvb_lo = _mm_shuffle_epi32(invainvb, _MM_SHUFFLE(1, 1, 0, 0)); \ + __m128i invainvb_hi = _mm_shuffle_epi32(invainvb, _MM_SHUFFLE(3, 3, 2, 2)); \ + ab_lo = _mm_or_si128(ab_lo, _mm_slli_epi32(ab_lo, 16)); \ + ab_hi = _mm_or_si128(ab_hi, _mm_slli_epi32(ab_hi, 16)); \ + invab_lo = _mm_or_si128(invab_lo, _mm_slli_epi32(invab_lo, 16)); \ + invab_hi = _mm_or_si128(invab_hi, _mm_slli_epi32(invab_hi, 16)); \ + ainvb_lo = _mm_or_si128(ainvb_lo, _mm_slli_epi32(ainvb_lo, 16)); \ + ainvb_hi = _mm_or_si128(ainvb_hi, _mm_slli_epi32(ainvb_hi, 16)); \ + invainvb_lo = _mm_or_si128(invainvb_lo, _mm_slli_epi32(invainvb_lo, 16)); \ + invainvb_hi = _mm_or_si128(invainvb_hi, _mm_slli_epi32(invainvb_hi, 16)); \ + __m128i p00 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y0, coloffsets0), 4); \ + __m128i p01 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y1, coloffsets0), 4); \ + __m128i p10 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y0, coloffsets1), 4); \ + __m128i p11 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y1, coloffsets1), 4); \ + __m128i p00_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p00, _mm_setzero_si128()), ab_lo); \ + __m128i p01_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p01, _mm_setzero_si128()), invab_lo); \ + __m128i p10_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p10, _mm_setzero_si128()), ainvb_lo); \ + __m128i p11_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p11, _mm_setzero_si128()), invainvb_lo); \ + __m128i p00_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p00, _mm_setzero_si128()), ab_hi); \ + __m128i p01_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p01, _mm_setzero_si128()), invab_hi); \ + __m128i p10_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p10, _mm_setzero_si128()), ainvb_hi); \ + __m128i p11_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p11, _mm_setzero_si128()), invainvb_hi); \ + __m128i fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_adds_epu16(p00_lo, p01_lo), _mm_adds_epu16(p10_lo, p11_lo)), m127), 8); \ + __m128i fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_adds_epu16(p00_hi, p01_hi), _mm_adds_epu16(p10_hi, p11_hi)), m127), 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + #define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ @@ -550,23 +615,18 @@ public: { \ uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \ uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \ - uint32_t y0 = frac_y0 >> FRACBITS; \ - uint32_t y1 = frac_y1 >> FRACBITS; \ + uint32_t y0 = (frac_y0 >> FRACBITS); \ + uint32_t y1 = (frac_y1 >> FRACBITS); \ \ uint32_t inv_b = texturefracx[i]; \ uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; \ - uint32_t a = 16 - inv_a; \ - uint32_t b = 16 - inv_b; \ \ - uint32_t ab = a * b; \ - uint32_t invab = inv_a * b; \ - uint32_t ainvb = a * inv_b; \ - uint32_t invainvb = inv_a * inv_b; \ - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ + __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ + __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ \ - __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, col0[i][y1], col0[i][y0]), _mm_setzero_si128()); \ - __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, col1[i][y1], col1[i][y0]), _mm_setzero_si128()); \ + __m128i gather = _mm_set_epi32(col1[i][y1], col1[i][y0], col0[i][y1], col1[i][y0]); \ + __m128i p0 = _mm_unpacklo_epi8(gather, _mm_setzero_si128()); \ + __m128i p1 = _mm_unpackhi_epi8(gather, _mm_setzero_si128()); \ \ __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ @@ -597,15 +657,9 @@ public: \ uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \ uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \ - uint32_t a = 16 - inv_a; \ - uint32_t b = 16 - inv_b; \ \ - uint32_t ab = a * b; \ - uint32_t invab = inv_a * b; \ - uint32_t ainvb = a * inv_b; \ - uint32_t invainvb = inv_a * inv_b; \ - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ + __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ + __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ \ __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); \ __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); \ From 6c037fa24971df781b5581a42cf58651bcb71954 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 26 Jun 2016 21:23:32 +0200 Subject: [PATCH 081/912] Throwing templates at the code redundancy problem in drawers --- src/r_draw_rgba.cpp | 630 ++++++++++++++++++++-------- src/r_draw_rgba.h | 88 ++-- src/r_draw_rgba_sse.h | 923 +---------------------------------------- src/r_drawt_rgba.cpp | 5 + src/r_drawt_rgba_sse.h | 10 + src/r_segs.cpp | 3 +- 6 files changed, 551 insertions(+), 1108 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index bfabdfbbbb..fbb2c12c5a 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -67,8 +67,13 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); #ifndef NO_SSE +#ifdef _MSC_VER +#pragma warning(disable: 4101) // warning C4101: unreferenced local variable +#endif + // Generate SSE drawers: #define VecCommand(name) name##_SSE_Command +#define VEC_SHADE_VARS SSE_SHADE_VARS #define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT #define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 #define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE @@ -1552,8 +1557,446 @@ public: return (--count) != 0; } }; + +#ifdef NO_SSE + struct NearestSampler + { + FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) + { + return cmd._bufplce[index][loop.sample_index(index)]; + } + }; + struct LinearSampler + { + FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) + { + return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.half[index], loop.height[index]); + } + }; +#else + struct NearestSampler + { + FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) + { + return _mm_set_epi32(cmd._bufplce[3][loop.sample_index(3)], cmd._bufplce[2][loop.sample_index(2)], cmd._bufplce[1][loop.sample_index(1)], cmd._bufplce[0][loop.sample_index(0)]); + } + }; + + struct LinearSampler + { + FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.half, loop.height); + return fg; + } + }; +#endif + +#ifdef NO_SSE + template + struct Copy + { + Copy(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::copy(fg); + } + } + }; + + template + struct Mask + { + Mask(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); + } + } + }; + + template + struct TMaskAdd + { + TMaskAdd(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], cmd._srcalpha, calc_blend_bgalpha(fg, cmd._destalpha)); + } + } + }; + + template + struct TMaskSub + { + TMaskSub(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], cmd._srcalpha, calc_blend_bgalpha(fg, cmd._destalpha)); + } + } + }; + + template + struct TMaskRevSub + { + TMaskRevSub(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], cmd._srcalpha, calc_blend_bgalpha(fg, cmd._destalpha)); + } + } + }; + + typedef Copy CopyNearestSimple; + typedef Copy CopyLinearSimple; + typedef Copy CopyNearest; + typedef Copy CopyLinear; + typedef Mask MaskNearestSimple; + typedef Mask MaskLinearSimple; + typedef Mask MaskNearest; + typedef Mask MaskLinear; + typedef TMaskAdd TMaskAddNearestSimple; + typedef TMaskAdd TMaskAddLinearSimple; + typedef TMaskAdd TMaskAddNearest; + typedef TMaskAdd TMaskAddLinear; + typedef TMaskSub TMaskSubNearestSimple; + typedef TMaskSub TMaskSubLinearSimple; + typedef TMaskSub TMaskSubNearest; + typedef TMaskSub TMaskSubLinear; + typedef TMaskRevSub TMaskRevSubNearestSimple; + typedef TMaskRevSub TMaskRevSubLinearSimple; + typedef TMaskRevSub TMaskRevSubNearest; + typedef TMaskRevSub TMaskRevSubLinear; +#else + template + struct CopySimple + { + VEC_SHADE_VARS(); + CopySimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)loop.dest, fg); + } + }; + + template + struct Copy + { + VEC_SHADE_VARS(); + Copy(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + VEC_SHADE(fg, cmd._shade_constants); + _mm_storeu_si128((__m128i*)loop.dest, fg); + } + }; + + template + struct MaskSimple + { + VEC_SHADE_VARS(); + MaskSimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + VEC_SHADE_SIMPLE(fg); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)loop.dest, fg); + } + }; + + template + struct Mask + { + VEC_SHADE_VARS(); + Mask(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + VEC_SHADE(fg, cmd._shade_constants); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)loop.dest, fg); + } + }; + + template + struct TMaskAddSimple + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskAddSimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskAdd + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskAdd(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskSubSimple + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskSubSimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskSub + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskSub(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskRevSubSimple + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskRevSubSimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskRevSub + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskRevSub(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + typedef CopySimple CopyNearestSimple; + typedef CopySimple CopyLinearSimple; + typedef Copy CopyNearest; + typedef Copy CopyLinear; + typedef MaskSimple MaskNearestSimple; + typedef MaskSimple MaskLinearSimple; + typedef Mask MaskNearest; + typedef Mask MaskLinear; + typedef TMaskAddSimple TMaskAddNearestSimple; + typedef TMaskAddSimple TMaskAddLinearSimple; + typedef TMaskAdd TMaskAddNearest; + typedef TMaskAdd TMaskAddLinear; + typedef TMaskSubSimple TMaskSubNearestSimple; + typedef TMaskSubSimple TMaskSubLinearSimple; + typedef TMaskSub TMaskSubNearest; + typedef TMaskSub TMaskSubLinear; + typedef TMaskRevSubSimple TMaskRevSubNearestSimple; + typedef TMaskRevSubSimple TMaskRevSubLinearSimple; + typedef TMaskRevSub TMaskRevSubNearest; + typedef TMaskRevSub TMaskRevSubLinear; +#endif }; +typedef DrawerBlendCommand Vlinec4NearestSimpleRGBACommand; +typedef DrawerBlendCommand Vlinec4NearestRGBACommand; +typedef DrawerBlendCommand Vlinec4LinearSimpleRGBACommand; +typedef DrawerBlendCommand Vlinec4LinearRGBACommand; +typedef DrawerBlendCommand Mvlinec4NearestSimpleRGBACommand; +typedef DrawerBlendCommand Mvlinec4NearestRGBACommand; +typedef DrawerBlendCommand Mvlinec4LinearSimpleRGBACommand; +typedef DrawerBlendCommand Mvlinec4LinearRGBACommand; +typedef DrawerBlendCommand Tmvline4AddNearestSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4AddNearestRGBACommand; +typedef DrawerBlendCommand Tmvline4AddLinearSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4AddLinearRGBACommand; +typedef DrawerBlendCommand Tmvline4AddClampNearestSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4AddClampNearestRGBACommand; +typedef DrawerBlendCommand Tmvline4AddClampLinearSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4AddClampLinearRGBACommand; +typedef DrawerBlendCommand Tmvline4SubClampNearestSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4SubClampNearestRGBACommand; +typedef DrawerBlendCommand Tmvline4SubClampLinearSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4SubClampLinearRGBACommand; +typedef DrawerBlendCommand Tmvline4RevSubClampNearestSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4RevSubClampNearestRGBACommand; +typedef DrawerBlendCommand Tmvline4RevSubClampLinearSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4RevSubClampLinearRGBACommand; + class Vlinec1RGBACommand : public DrawerWall1Command { public: @@ -1581,39 +2024,6 @@ public: } }; -class Vlinec4RGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (_bufplce2[0] == nullptr) - { - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::copy(fg); - } - } while (loop.next()); - } - else - { - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); - loop.dest[i] = BlendBgra::copy(fg); - } - } while (loop.next()); - } - } -}; - class Mvlinec1RGBACommand : public DrawerWall1Command { public: @@ -1641,39 +2051,6 @@ public: } }; -class Mvlinec4RGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (_bufplce2[0] == nullptr) - { - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); - } - } while (loop.next()); - } - else - { - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); - loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); - } - } while (loop.next()); - } - } -}; - class Tmvline1AddRGBACommand : public DrawerWall1Command { public: @@ -1689,24 +2066,6 @@ public: } }; -class Tmvline4AddRGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } - } while (loop.next()); - } -}; - class Tmvline1AddClampRGBACommand : public DrawerWall1Command { public: @@ -1722,24 +2081,6 @@ public: } }; -class Tmvline4AddClampRGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } - } while (loop.next()); - } -}; - class Tmvline1SubClampRGBACommand : public DrawerWall1Command { public: @@ -1755,24 +2096,6 @@ public: } }; -class Tmvline4SubClampRGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } - } while (loop.next()); - } -}; - class Tmvline1RevSubClampRGBACommand : public DrawerWall1Command { public: @@ -1788,24 +2111,6 @@ public: } }; -class Tmvline4RevSubClampRGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } - } while (loop.next()); - } -}; - ///////////////////////////////////////////////////////////////////////////// class DrawFogBoundaryLineRGBACommand : public DrawerCommand @@ -2355,13 +2660,22 @@ DWORD vlinec1_rgba() return dc_texturefrac + dc_count * dc_iscale; } +template +void queue_wallcommand() +{ + if (bufplce2[0] == nullptr && dc_shade_constants.simple_shade) + DrawerCommandQueue::QueueCommand(); + else if (bufplce2[0] == nullptr) + DrawerCommandQueue::QueueCommand(); + else if (dc_shade_constants.simple_shade) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +} + void vlinec4_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2374,11 +2688,7 @@ DWORD mvlinec1_rgba() void mvlinec4_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2391,11 +2701,7 @@ fixed_t tmvline1_add_rgba() void tmvline4_add_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2408,11 +2714,7 @@ fixed_t tmvline1_addclamp_rgba() void tmvline4_addclamp_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2425,11 +2727,7 @@ fixed_t tmvline1_subclamp_rgba() void tmvline4_subclamp_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2442,11 +2740,7 @@ fixed_t tmvline1_revsubclamp_rgba() void tmvline4_revsubclamp_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 4961fa6dc6..53572c88b4 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -286,6 +286,22 @@ public: void Execute(DrawerThread *thread) override; }; +template +class DrawerBlendCommand : public CommandType +{ +public: + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + BlendMode blend(*this, loop); + do + { + blend.Blend(*this, loop); + } while (loop.next()); + } +}; + ///////////////////////////////////////////////////////////////////////////// // Pixel shading inline functions: @@ -624,7 +640,7 @@ public: __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ \ - __m128i gather = _mm_set_epi32(col1[i][y1], col1[i][y0], col0[i][y1], col1[i][y0]); \ + __m128i gather = _mm_set_epi32(col1[i][y1], col1[i][y0], col0[i][y1], col0[i][y0]); \ __m128i p0 = _mm_unpacklo_epi8(gather, _mm_setzero_si128()); \ __m128i p1 = _mm_unpackhi_epi8(gather, _mm_setzero_si128()); \ \ @@ -635,6 +651,26 @@ public: } \ } +#define VEC_SAMPLE_MIP_NEAREST4_COLUMN(fg, col0, col1, mipfrac, texturefracy, height0, height1) { \ + uint32_t y0[4], y1[4]; \ + for (int i = 0; i < 4; i++) \ + { \ + y0[i] = (texturefracy[i] >> FRACBITS) * height0[i]; \ + y1[i] = (texturefracy[i] >> FRACBITS) * height1[i]; \ + } \ + __m128i p0 = _mm_set_epi32(col0[y0[3]], col0[y0[2]], col0[y0[1]], col0[y0[0]]); \ + __m128i p1 = _mm_set_epi32(col1[y1[3]], col1[y1[2]], col1[y1[1]], col1[y1[0]]); \ + __m128i t = _mm_loadu_si128((const __m128i*)mipfrac); \ + __m128i inv_t = _mm_sub_epi32(_mm_set1_epi32(256), mipfrac); \ + __m128i p0_lo = _mm_unpacklo_epi8(p0, _mm_setzero_si128()); \ + __m128i p0_hi = _mm_unpackhi_epi8(p0, _mm_setzero_si128()); \ + __m128i p1_lo = _mm_unpacklo_epi8(p1, _mm_setzero_si128()); \ + __m128i p1_hi = _mm_unpackhi_epi8(p1, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(p0_lo, t), _mm_mullo_epi16(p1_lo, inv_t)), 8); \ + __m128i fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(p0_hi, t), _mm_mullo_epi16(p1_hi, inv_t)), 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + #define VEC_SAMPLE_BILINEAR4_SPAN(fg, texture, xfrac, yfrac, xstep, ystep, xbits, ybits) { \ int xshift = (32 - xbits); \ int yshift = (32 - ybits); \ @@ -844,12 +880,14 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) return (dest_alpha * alpha + 256 * inv_alpha + 128) >> 8; } +#define VEC_CALC_BLEND_ALPHA_VARS() __m128i msrc_alpha, mdest_alpha, m256, m255, m128; + #define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \ - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); \ - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha * 255 / 256); \ - __m128i m256 = _mm_set1_epi16(256); \ - __m128i m255 = _mm_set1_epi16(255); \ - __m128i m128 = _mm_set1_epi16(128); + msrc_alpha = _mm_set1_epi16(src_alpha); \ + mdest_alpha = _mm_set1_epi16(dest_alpha * 255 / 256); \ + m256 = _mm_set1_epi16(256); \ + m255 = _mm_set1_epi16(255); \ + m128 = _mm_set1_epi16(128); // Calculates the final alpha values to be used when combined with the source texture alpha channel #define VEC_CALC_BLEND_ALPHA(fg) \ @@ -866,15 +904,17 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) fg_alpha_lo = msrc_alpha; \ } +#define SSE_SHADE_VARS() __m128i mlight_hi, mlight_lo, color, fade, fade_amount_hi, fade_amount_lo, inv_desaturate; + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; + mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + mlight_lo = mlight_hi; // Calculate constants for a simple shade with different light levels for each pixel #define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); // Simple shade 4 pixels #define SSE_SHADE_SIMPLE(fg) { \ @@ -889,31 +929,31 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) // Calculate constants for a complex shade #define SSE_SHADE_INIT(light, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; \ - __m128i color = _mm_set_epi16( \ + mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + mlight_lo = mlight_hi; \ + color = _mm_set_epi16( \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ + fade = _mm_set_epi16( \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = fade_amount_hi; \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + fade_amount_lo = fade_amount_hi; \ + inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ // Calculate constants for a complex shade with different light levels for each pixel #define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ - __m128i color = _mm_set_epi16( \ + mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ + color = _mm_set_epi16( \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ + fade = _mm_set_epi16( \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ + inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ // Complex shade 4 pixels #define SSE_SHADE(fg, shade_constants) { \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index ae8d3bf427..4ee5576930 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -84,6 +84,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) @@ -121,6 +122,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); while (sse_count--) @@ -184,6 +186,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) @@ -217,6 +220,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); while (sse_count--) @@ -277,6 +281,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) { @@ -289,6 +294,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); while (sse_count--) { @@ -317,6 +323,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) { @@ -331,6 +338,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); while (sse_count--) { @@ -357,918 +365,3 @@ public: } } }; - -class VecCommand(Vlinec4RGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - const uint32_t * RESTRICT bufplce2[4]; - uint32_t buftexturefracx[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Vlinec4RGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufplce2[i] = (const uint32_t *)::bufplce2[i]; - buftexturefracx[i] = ::buftexturefracx[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); - uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); - uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); - uint32_t light3 = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (bufplce2[0] == nullptr) - { - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } - else - { - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } - } -}; - -class VecCommand(Mvlinec4RGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - uint32_t _mvlinemax; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - const uint32 * RESTRICT bufplce2[4]; - uint32_t buftexturefracx[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Mvlinec4RGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufplce2[i] = (const uint32_t *)::bufplce2[i]; - buftexturefracx[i] = ::buftexturefracx[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); - uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); - uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); - uint32_t light3 = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (bufplce2[0] == nullptr) - { - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE_SIMPLE(fg); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE(fg, shade_constants); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } - else - { - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE_SIMPLE(fg); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE(fg, shade_constants); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } - } -}; - -class VecCommand(Tmvline4AddRGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Tmvline4AddRGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light[4]; - light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); - light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); - light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); - light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE(fg, shade_constants); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - } -}; - -class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Tmvline4AddClampRGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light[4]; - light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); - light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); - light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); - light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE(fg, shade_constants); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - } -}; - -class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Tmvline4SubClampRGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light[4]; - light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); - light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); - light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); - light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE(fg, shade_constants); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - } -}; - -class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Tmvline4RevSubClampRGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufheight[i] = ::bufheight[4]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light[4]; - light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); - light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); - light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); - light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE(fg, shade_constants); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - } -}; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 82932b1f2f..45bd5c029e 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -53,8 +53,13 @@ extern unsigned int *horizspan[4]; #ifndef NO_SSE +#ifdef _MSC_VER +#pragma warning(disable: 4101) // warning C4101: unreferenced local variable +#endif + // Generate SSE drawers: #define VecCommand(name) name##_SSE_Command +#define VEC_SHADE_VARS SSE_SHADE_VARS #define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT #define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 #define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h index 64a77e2882..7a02f2282b 100644 --- a/src/r_drawt_rgba_sse.h +++ b/src/r_drawt_rgba_sse.h @@ -60,6 +60,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); if (count & 1) { @@ -110,6 +111,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); if (count & 1) { @@ -218,6 +220,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -254,6 +257,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -421,6 +425,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -457,6 +462,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -547,6 +553,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -583,6 +590,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -673,6 +681,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -709,6 +718,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 630d64da07..870d748947 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1146,15 +1146,16 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof int mip_height = texture->GetHeight(); if (r_mipmap && texture->Mipmapped()) { + uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); int level = (int)MAX(magnitude - 1.0, 0.0); while (level != 0) { mipmap_offset += mip_width * mip_height; - xoffset >>= 1; level >>= 1; mip_width = MAX(mip_width >> 1, 1); mip_height = MAX(mip_height >> 1, 1); } + xoffset = (xpos >> FRACBITS) * mip_width; } const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; From 8f38d3af990c5e9373f109781add7448f2de3c9f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 27 Jun 2016 10:49:15 +0200 Subject: [PATCH 082/912] Replaced the bicubic interpolation filter with a simple sharpening filter --- src/CMakeLists.txt | 1 - src/r_draw_rgba.cpp | 4 + src/textures/bicubic_interpolation.cpp | 107 --------------------- src/textures/bicubic_interpolation.h | 50 ---------- src/textures/texture.cpp | 124 ++++++++++++++++++++++--- 5 files changed, 117 insertions(+), 169 deletions(-) delete mode 100644 src/textures/bicubic_interpolation.cpp delete mode 100644 src/textures/bicubic_interpolation.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 042da0c8f0..8c0a30ea07 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1190,7 +1190,6 @@ set (PCH_SOURCES textures/texturemanager.cpp textures/tgatexture.cpp textures/warptexture.cpp - textures/bicubic_interpolation.cpp thingdef/olddecorations.cpp thingdef/thingdef.cpp thingdef/thingdef_codeptr.cpp diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index fbb2c12c5a..aa88e43026 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -103,7 +103,9 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); ///////////////////////////////////////////////////////////////////////////// +#ifndef NO_SSE __m128i SampleBgra::samplertable[256 * 2]; +#endif DrawerCommandQueue *DrawerCommandQueue::Instance() { @@ -113,6 +115,7 @@ DrawerCommandQueue *DrawerCommandQueue::Instance() DrawerCommandQueue::DrawerCommandQueue() { +#ifndef NO_SSE for (int inv_b = 0; inv_b < 16; inv_b++) { for (int inv_a = 0; inv_a < 16; inv_a++) @@ -132,6 +135,7 @@ DrawerCommandQueue::DrawerCommandQueue() _mm_store_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1, ainvb_invainvb); } } +#endif } DrawerCommandQueue::~DrawerCommandQueue() diff --git a/src/textures/bicubic_interpolation.cpp b/src/textures/bicubic_interpolation.cpp deleted file mode 100644 index 2c8a3049d1..0000000000 --- a/src/textures/bicubic_interpolation.cpp +++ /dev/null @@ -1,107 +0,0 @@ - -#include "doomtype.h" -#include "bicubic_interpolation.h" - -void BicubicInterpolation::ScaleImage(uint32_t *dest_data, int dest_width, int dest_height, const uint32_t *src_data, int src_width, int src_height) -{ - if (dest_width <= 0 || dest_height <= 0 || src_width <= 0 || src_height <= 0) - return; - - // Scale factor as a rational number r = n / d - int n = dest_width; - int d = src_width; - - const unsigned char *src_ptr = (const unsigned char *)src_data; - unsigned char *dest_ptr = (unsigned char *)dest_data; - - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 0, dest_width, dest_width * 4, dest_height, dest_ptr + 0); - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 1, dest_width, dest_width * 4, dest_height, dest_ptr + 1); - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 2, dest_width, dest_width * 4, dest_height, dest_ptr + 2); - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 3, dest_width, dest_width * 4, dest_height, dest_ptr + 3); -} - -void BicubicInterpolation::scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *f, int out_width, int out_pitch, int out_height, unsigned char *g) -{ - // Implementation of Michael J. Aramini's Efficient Image Magnification by Bicubic Spline Interpolation - - int dimension_size = (out_width > out_height) ? out_width : out_height; - L_vector.resize(dimension_size); - - for (int i=0;i<4;i++) - c_vector[i].resize(dimension_size); - h_vector.resize(in_width); - - int larger_out_dimension; - int j, k, l, m, index; - int *L = &L_vector[0]; - float x; - float *c[4] = { &c_vector[0][0], &c_vector[1][0], &c_vector[2][0], &c_vector[3][0] }; - float *h = &h_vector[0]; - - larger_out_dimension = (out_width > out_height) ? out_width : out_height; - - for (k = 0; k < larger_out_dimension; k++) - L[k] = (k * d) / n; - - for (k = 0; k < n; k++) - { - x = (float)((k * d) % n) / (float)n; - c[0][k] = C0(x); - c[1][k] = C1(x); - c[2][k] = C2(x); - c[3][k] = C3(x); - } - for (k = n; k < larger_out_dimension; k++) - for (l = 0; l < 4; l++) - c[l][k] = c[l][k % n]; - - for (k = 0; k < out_height; k++) - { - for (j = 0; j < in_width; j++) - { - h[j] = 0.0f; - for (l = 0; l < 4; l++) - { - index = L[k] + l - 1; - if ((index >= 0) && (index < in_height)) - h[j] += f[index*in_pitch+j*4] * c[3 - l][k]; - } - } - for (m = 0; m < out_width; m++) - { - x = 0.5f; - for (l = 0; l < 4; l++) - { - index = L[m] + l - 1; - if ((index >= 0) && (index < in_width)) - x += h[index] * c[3 - l][m]; - } - if (x <= 0.0f) - g[k*out_pitch+m*4] = 0; - else if (x >= 255) - g[k*out_pitch+m*4] = 255; - else - g[k*out_pitch+m*4] = (unsigned char)x; - } - } -} - -inline float BicubicInterpolation::C0(float t) -{ - return -a * t * t * t + a * t * t; -} - -inline float BicubicInterpolation::C1(float t) -{ - return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; -} - -inline float BicubicInterpolation::C2(float t) -{ - return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; -} - -inline float BicubicInterpolation::C3(float t) -{ - return a * t * t * t - 2.0f * a * t * t + a * t; -} diff --git a/src/textures/bicubic_interpolation.h b/src/textures/bicubic_interpolation.h deleted file mode 100644 index da547ad83b..0000000000 --- a/src/textures/bicubic_interpolation.h +++ /dev/null @@ -1,50 +0,0 @@ -/* -** Bicubic Image Scaler -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef __BICUBIC_INTERPOLATION_H__ -#define __BICUBIC_INTERPOLATION_H__ - -#pragma once - -#include - -// Bicubic image scaler -class BicubicInterpolation -{ -public: - void ScaleImage(uint32_t *dest, int dest_width, int dest_height, const uint32_t *src, int src_width, int src_height); - -private: - void scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *in_data, int out_width, int out_pitch, int out_height, unsigned char *out_data); - - float a = -0.5f; // a is a spline parameter such that -1 <= a <= 0 - - inline float C0(float t); - inline float C1(float t); - inline float C2(float t); - inline float C3(float t); - - std::vector L_vector; - std::vector c_vector[4]; - std::vector h_vector; -}; - -#endif diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 7ff5c9ba25..ce7874ee6c 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -45,7 +45,6 @@ #include "v_video.h" #include "m_fixed.h" #include "textures/textures.h" -#include "textures/bicubic_interpolation.h" #include "v_palette.h" typedef bool (*CheckFunc)(FileReader & file); @@ -383,19 +382,122 @@ int FTexture::MipmapLevels() const void FTexture::GenerateBgraMipmaps() { - BicubicInterpolation bicubic; - - uint32_t *src = PixelsBgra.data(); - uint32_t *dest = src + Width * Height; - int levels = MipmapLevels(); - for (int i = 1; i < levels; i++) + struct Color4f { - int w = MAX(Width >> i, 1); - int h = MAX(Height >> i, 1); + float a, r, g, b; + Color4f operator*(const Color4f &v) const { return Color4f{ a * v.a, r * v.r, g * v.g, b * v.b }; } + Color4f operator/(const Color4f &v) const { return Color4f{ a / v.a, r / v.r, g / v.g, b / v.b }; } + Color4f operator+(const Color4f &v) const { return Color4f{ a + v.a, r + v.r, g + v.g, b + v.b }; } + Color4f operator-(const Color4f &v) const { return Color4f{ a - v.a, r - v.r, g - v.g, b - v.b }; } + Color4f operator*(float s) const { return Color4f{ a * s, r * s, g * s, b * s }; } + Color4f operator/(float s) const { return Color4f{ a / s, r / s, g / s, b / s }; } + Color4f operator+(float s) const { return Color4f{ a + s, r + s, g + s, b + s }; } + Color4f operator-(float s) const { return Color4f{ a - s, r - s, g - s, b - s }; } + }; - bicubic.ScaleImage(dest, h, w, src, Height, Width); + int levels = MipmapLevels(); + std::vector image(PixelsBgra.size()); - dest += w * h; + // Convert to normalized linear colorspace + { + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t c8 = PixelsBgra[x * Height + y]; + Color4f c; + c.a = std::pow(APART(c8) * (1.0f / 255.0f), 2.2f); + c.r = std::pow(RPART(c8) * (1.0f / 255.0f), 2.2f); + c.g = std::pow(GPART(c8) * (1.0f / 255.0f), 2.2f); + c.b = std::pow(BPART(c8) * (1.0f / 255.0f), 2.2f); + image[x * Height + y] = c; + } + } + } + + // Generate mipmaps + { + std::vector smoothed(Width * Height); + Color4f *src = image.data(); + Color4f *dest = src + Width * Height; + for (int i = 1; i < levels; i++) + { + int srcw = MAX(Width >> (i - 1), 1); + int srch = MAX(Height >> (i - 1), 1); + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + + // Downscale + for (int x = 0; x < w; x++) + { + int sx0 = x * 2; + int sx1 = MIN((x + 1) * 2, srcw - 1); + for (int y = 0; y < h; y++) + { + int sy0 = y * 2; + int sy1 = MIN((y + 1) * 2, srch - 1); + + Color4f src00 = src[sy0 + sx0 * srch]; + Color4f src01 = src[sy1 + sx0 * srch]; + Color4f src10 = src[sy0 + sx1 * srch]; + Color4f src11 = src[sy1 + sx1 * srch]; + Color4f c = (src00 + src01 + src10 + src11) * 0.25f; + + dest[y + x * h] = src00; + } + } + + // Sharpen filter with a 3x3 kernel: + for (int x = 0; x < w; x++) + { + for (int y = 0; y < h; y++) + { + Color4f c = { 0.0f, 0.0f, 0.0f, 0.0f }; + for (int kx = -1; kx < 2; kx++) + { + for (int ky = -1; ky < 2; ky++) + { + int a = y + ky; + int b = x + kx; + if (a < 0) a = h - 1; + if (a == h) a = 0; + if (b < 0) b = w - 1; + if (b == h) b = 0; + c = c + dest[a + b * h]; + } + } + c = c * (1.0f / 9.0f); + smoothed[y + x * h] = c; + } + } + float k = 0.04f; + for (int j = 0; j < w * h; j++) + dest[j] = dest[j] + (dest[j] - smoothed[j]) * k; + + src = dest; + dest += w * h; + } + } + + // Convert to bgra8 sRGB colorspace + { + Color4f *src = image.data() + Width * Height; + uint32_t *dest = PixelsBgra.data() + Width * Height; + for (int i = 1; i < levels; i++) + { + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + for (int j = 0; j < w * h; j++) + { + uint32_t a = (uint32_t)clamp(std::pow(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t r = (uint32_t)clamp(std::pow(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t g = (uint32_t)clamp(std::pow(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t b = (uint32_t)clamp(std::pow(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + dest[j] = (a << 24) | (r << 16) | (g << 8) | b; + } + src += w * h; + dest += w * h; + } } } From 200d357b0d1f609ce67fdb23c03c77836285f0e3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 27 Jun 2016 11:43:24 +0200 Subject: [PATCH 083/912] Linear filtering bug fix --- src/r_draw_rgba.cpp | 16 ++++++++-------- src/r_draw_rgba.h | 40 ++++++++++++++++++---------------------- src/r_segs.cpp | 4 ++-- 3 files changed, 28 insertions(+), 32 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index aa88e43026..7a071e1d49 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1436,7 +1436,7 @@ public: uint32_t frac; uint32_t texturefracx; uint32_t height; - uint32_t half; + uint32_t one; LoopIterator(DrawerWall1Command *command, DrawerThread *thread) { @@ -1451,7 +1451,7 @@ public: pitch = command->_pitch * thread->num_cores; height = command->_textureheight; - half = (0x80000000 + height - 1) / height; + one = ((0x80000000 + height - 1) / height) * 2 + 1; } explicit operator bool() @@ -1520,7 +1520,7 @@ public: uint32_t vplce[4]; uint32_t vince[4]; uint32_t height[4]; - uint32_t half[4]; + uint32_t one[4]; LoopIterator(DrawerWall4Command *command, DrawerThread *thread) { @@ -1537,7 +1537,7 @@ public: vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; vince[i] = command->_vince[i] * thread->num_cores; height[i] = command->_bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; + one[i] = ((0x80000000 + height[i] - 1) / height[i]) * 2 + 1; } } @@ -1574,7 +1574,7 @@ public: { FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) { - return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.half[index], loop.height[index]); + return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.one[index], loop.height[index]); } }; #else @@ -1591,7 +1591,7 @@ public: FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.half, loop.height); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.one, loop.height); return fg; } }; @@ -2021,7 +2021,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::copy(fg); } while (loop.next()); } @@ -2048,7 +2048,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); } while (loop.next()); } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 53572c88b4..27d7bd035f 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -502,10 +502,10 @@ public: return (magnifying && r_magfilter) || (!magnifying && r_minfilter); } - FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t half, uint32_t height) + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t one, uint32_t height) { - uint32_t frac_y0 = ((texturefracy - half) >> FRACBITS) * height; - uint32_t frac_y1 = ((texturefracy + half) >> FRACBITS) * height; + uint32_t frac_y0 = (texturefracy >> FRACBITS) * height; + uint32_t frac_y1 = ((texturefracy + one) >> FRACBITS) * height; uint32_t y0 = frac_y0 >> FRACBITS; uint32_t y1 = frac_y1 >> FRACBITS; @@ -533,18 +533,16 @@ public: int yshift = (32 - ybits); int xmask = (1 << xshift) - 1; int ymask = (1 << yshift) - 1; - uint32_t xhalf = 1 << (xbits - 1); - uint32_t yhalf = 1 << (ybits - 1); - uint32_t x = (xfrac - xhalf) >> xbits; - uint32_t y = (yfrac - yhalf) >> ybits; + uint32_t x = xfrac >> xbits; + uint32_t y = yfrac >> ybits; uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; uint32_t a = 16 - inv_a; uint32_t b = 16 - inv_b; @@ -564,11 +562,11 @@ public: ///////////////////////////////////////////////////////////////////////////// // SSE/AVX shading macros: -#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, half, height, texturefracx) \ +#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, one, height, texturefracx) \ const uint32_t *baseptr = col0[0]; \ __m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \ __m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \ - __m128i mhalf = _mm_loadu_si128((const __m128i*)half); \ + __m128i mone = _mm_loadu_si128((const __m128i*)one); \ __m128i m127 = _mm_set1_epi16(127); \ __m128i m16 = _mm_set1_epi32(16); \ __m128i m15 = _mm_set1_epi32(15); \ @@ -577,8 +575,8 @@ public: #define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \ __m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \ - __m128i multmp0 = _mm_srli_epi32(_mm_sub_epi32(mtexturefracy, mhalf), FRACBITS); \ - __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mhalf), FRACBITS); \ + __m128i multmp0 = _mm_srli_epi32(mtexturefracy, FRACBITS); \ + __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mone), FRACBITS); \ __m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \ __m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \ __m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \ @@ -624,13 +622,13 @@ public: fg = _mm_packus_epi16(fg_lo, fg_hi); \ } -#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \ +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, one, height) { \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \ - uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \ + uint32_t frac_y0 = (texturefracy[i] >> FRACBITS) * height[i]; \ + uint32_t frac_y1 = ((texturefracy[i] + one[i]) >> FRACBITS) * height[i]; \ uint32_t y0 = (frac_y0 >> FRACBITS); \ uint32_t y1 = (frac_y1 >> FRACBITS); \ \ @@ -676,23 +674,21 @@ public: int yshift = (32 - ybits); \ int xmask = (1 << xshift) - 1; \ int ymask = (1 << yshift) - 1; \ - uint32_t xhalf = 1 << (xbits - 1); \ - uint32_t yhalf = 1 << (ybits - 1); \ \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t x = (xfrac - xhalf) >> xbits; \ - uint32_t y = (yfrac - yhalf) >> ybits; \ + uint32_t x = xfrac >> xbits; \ + uint32_t y = yfrac >> ybits; \ \ uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \ uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \ uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \ uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \ \ - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \ - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \ + uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; \ + uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; \ \ __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 870d748947..96bb1f948c 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1173,14 +1173,14 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof } else { - int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + int tx0 = (xoffset >> FRACBITS) % mip_width; if (tx0 < 0) tx0 += mip_width; int tx1 = (tx0 + 1) % mip_width; source = (BYTE*)(pixels + tx0 * mip_height); source2 = (BYTE*)(pixels + tx1 * mip_height); height = mip_height; - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + texturefracx = (xoffset >> (FRACBITS - 4)) & 15; } } } From 7a65a0f5953c33ea32ab4600064541e82603a8be Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 27 Jun 2016 11:57:27 +0200 Subject: [PATCH 084/912] Made mipmapping a little less aggressive --- src/r_segs.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 96bb1f948c..2d39a6d973 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1147,11 +1147,12 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof if (r_mipmap && texture->Mipmapped()) { uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); - int level = (int)MAX(magnitude - 1.0, 0.0); - while (level != 0) + double texture_bias = 1.7f; + double level = MAX(magnitude - 3.0, 0.0); + while (level > texture_bias) { mipmap_offset += mip_width * mip_height; - level >>= 1; + level *= 0.5f; mip_width = MAX(mip_width >> 1, 1); mip_height = MAX(mip_height >> 1, 1); } From d1617fcdf08ddc4f03dca8d92195261bf7dcc4ef Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 30 Jun 2016 13:45:06 +0200 Subject: [PATCH 085/912] GCC compile fixes --- src/r_draw_rgba.h | 4 ++++ src/textures/texture.cpp | 16 ++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 27d7bd035f..96e96530c5 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -31,6 +31,10 @@ #include #include +#ifndef NO_SSE +#include +#endif + ///////////////////////////////////////////////////////////////////////////// // Drawer functions: diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index ce7874ee6c..1602236170 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -406,10 +406,10 @@ void FTexture::GenerateBgraMipmaps() { uint32_t c8 = PixelsBgra[x * Height + y]; Color4f c; - c.a = std::pow(APART(c8) * (1.0f / 255.0f), 2.2f); - c.r = std::pow(RPART(c8) * (1.0f / 255.0f), 2.2f); - c.g = std::pow(GPART(c8) * (1.0f / 255.0f), 2.2f); - c.b = std::pow(BPART(c8) * (1.0f / 255.0f), 2.2f); + c.a = powf(APART(c8) * (1.0f / 255.0f), 2.2f); + c.r = powf(RPART(c8) * (1.0f / 255.0f), 2.2f); + c.g = powf(GPART(c8) * (1.0f / 255.0f), 2.2f); + c.b = powf(BPART(c8) * (1.0f / 255.0f), 2.2f); image[x * Height + y] = c; } } @@ -489,10 +489,10 @@ void FTexture::GenerateBgraMipmaps() int h = MAX(Height >> i, 1); for (int j = 0; j < w * h; j++) { - uint32_t a = (uint32_t)clamp(std::pow(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t r = (uint32_t)clamp(std::pow(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t g = (uint32_t)clamp(std::pow(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t b = (uint32_t)clamp(std::pow(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t a = (uint32_t)clamp(powf(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t r = (uint32_t)clamp(powf(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t g = (uint32_t)clamp(powf(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t b = (uint32_t)clamp(powf(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); dest[j] = (a << 24) | (r << 16) | (g << 8) | b; } src += w * h; From 13ef9a834c45355ba70fb029c54170e44b54cb76 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 30 Jun 2016 13:56:53 +0200 Subject: [PATCH 086/912] Compile fix for gcc/clang --- src/r_draw_rgba.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 96e96530c5..c976602f62 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -296,7 +296,7 @@ class DrawerBlendCommand : public CommandType public: void Execute(DrawerThread *thread) override { - LoopIterator loop(this, thread); + typename CommandType::LoopIterator loop(this, thread); if (!loop) return; BlendMode blend(*this, loop); do From b0e9adfc10ad40dde1c080ff8cb0e034e91cd069 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 4 Jul 2016 16:33:19 +0200 Subject: [PATCH 087/912] Fix single layer skies by using a cube box rather than a cylinder --- src/r_plane.cpp | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 0ede451e06..8345a83cec 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -880,14 +880,34 @@ static DWORD lastskycol_bgra[4]; static int skycolplace; static int skycolplace_bgra; +// Treat sky as a cube rather than a cylinder +CVAR(Bool, r_cubesky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { - angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - if (!r_swtruecolor) - return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + int tx; + if (r_cubesky) + { + int tx0 = (UMulScale16((skyangle + xtoviewangle[0]) ^ skyflip, frontcyl) + frontpos) >> FRACBITS; + int tx1 = tx0 - ((UMulScale16(xtoviewangle[0], frontcyl) * 2) >> FRACBITS); + tx = (int)(tx0 + (tx1 - tx0) * x / viewwidth + 0.5); + tx %= fronttex->GetWidth(); + if (tx < 0) + tx += fronttex->GetWidth(); + } else - return (const BYTE *)fronttex->GetColumnBgra((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + { + angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; + tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; + } + + if (!r_swtruecolor) + return fronttex->GetColumn(tx, NULL); + else + { + return (const BYTE *)fronttex->GetColumnBgra(tx, NULL); + } } // Get a column of sky when there are two overlapping sky textures @@ -1030,7 +1050,7 @@ static void R_DrawSky (visplane_t *pl) { // The texture does not tile nicely frontyScale *= skyscale; frontiScale = 1 / frontyScale; - R_DrawSkyStriped (pl); + //R_DrawSkyStriped (pl); } } From 19030b555f233f85334eaca0f2c8c66b91f1e577 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 6 Jul 2016 20:19:01 +0200 Subject: [PATCH 088/912] Fix sky stretching on widescreen displays --- src/r_plane.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 8345a83cec..75826d3289 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -880,21 +880,17 @@ static DWORD lastskycol_bgra[4]; static int skycolplace; static int skycolplace_bgra; -// Treat sky as a cube rather than a cylinder -CVAR(Bool, r_cubesky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +CVAR(Bool, r_linearsky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { int tx; - if (r_cubesky) + if (r_linearsky) { - int tx0 = (UMulScale16((skyangle + xtoviewangle[0]) ^ skyflip, frontcyl) + frontpos) >> FRACBITS; - int tx1 = tx0 - ((UMulScale16(xtoviewangle[0], frontcyl) * 2) >> FRACBITS); - tx = (int)(tx0 + (tx1 - tx0) * x / viewwidth + 0.5); - tx %= fronttex->GetWidth(); - if (tx < 0) - tx += fronttex->GetWidth(); + angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); + angle_t column = (skyangle + xangle) ^ skyflip; + tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; } else { From 21390e91b8a28c71ba44bf62ee3c7545508a74e2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 6 Aug 2016 21:04:45 +0200 Subject: [PATCH 089/912] Remove linear sky again --- src/r_plane.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index e25812fbd4..c751fc5dcb 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -880,30 +880,16 @@ static DWORD lastskycol_bgra[4]; static int skycolplace; static int skycolplace_bgra; -CVAR(Bool, r_linearsky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); - // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { - int tx; - if (r_linearsky) - { - angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); - angle_t column = (skyangle + xangle) ^ skyflip; - tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; - } - else - { - angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; - } + angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; + int tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; if (!r_swtruecolor) return fronttex->GetColumn(tx, NULL); else - { return (const BYTE *)fronttex->GetColumnBgra(tx, NULL); - } } // Get a column of sky when there are two overlapping sky textures From 7000d0ccf9a97a01ab74853ea571d753e6e252b0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 6 Aug 2016 22:59:16 +0200 Subject: [PATCH 090/912] Change GetPixelsBgra to use CopyTrueColorPixels --- src/g_strife/strife_sbar.cpp | 11 --- src/menu/playerdisplay.cpp | 11 --- src/textures/jpegtexture.cpp | 109 ------------------------- src/textures/pngtexture.cpp | 151 ----------------------------------- src/textures/texture.cpp | 46 ++++++++--- src/textures/textures.h | 1 + src/textures/warptexture.cpp | 14 +++- 7 files changed, 45 insertions(+), 298 deletions(-) diff --git a/src/g_strife/strife_sbar.cpp b/src/g_strife/strife_sbar.cpp index e1fcb3cda1..eb3fa26087 100644 --- a/src/g_strife/strife_sbar.cpp +++ b/src/g_strife/strife_sbar.cpp @@ -34,7 +34,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - const uint32_t *GetPixelsBgra() override; bool CheckModified (); void SetVial (int level); @@ -116,16 +115,6 @@ const BYTE *FHealthBar::GetPixels () return Pixels; } -const uint32_t *FHealthBar::GetPixelsBgra() -{ - if (NeedRefresh) - { - MakeTexture(); - PixelsBgra.clear(); - } - return FTexture::GetPixelsBgra(); -} - void FHealthBar::SetVial (int level) { if (level < 0) diff --git a/src/menu/playerdisplay.cpp b/src/menu/playerdisplay.cpp index 7b7e9ca5d4..16671975a0 100644 --- a/src/menu/playerdisplay.cpp +++ b/src/menu/playerdisplay.cpp @@ -78,7 +78,6 @@ public: const BYTE *GetColumn(unsigned int column, const Span **spans_out); const BYTE *GetPixels(); - const uint32_t *GetPixelsBgra() override; bool CheckModified(); protected: @@ -247,16 +246,6 @@ const BYTE *FBackdropTexture::GetPixels() return Pixels; } -const uint32_t *FBackdropTexture::GetPixelsBgra() -{ - if (LastRenderTic != gametic) - { - Render(); - PixelsBgra.clear(); - } - return FTexture::GetPixelsBgra(); -} - //============================================================================= // // This is one plasma and two rotozoomers. I think it turned out quite awesome. diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index f44b34d088..fc629b37e4 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -187,7 +187,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -199,7 +198,6 @@ protected: Span DummySpans[2]; void MakeTexture (); - void MakeTextureBgra (); friend class FTexture; }; @@ -358,15 +356,6 @@ const BYTE *FJPEGTexture::GetPixels () return Pixels; } -const uint32_t *FJPEGTexture::GetPixelsBgra() -{ - if (PixelsBgra.empty()) - { - MakeTextureBgra(); - } - return PixelsBgra.data(); -} - //========================================================================== // // @@ -466,104 +455,6 @@ void FJPEGTexture::MakeTexture () } } -void FJPEGTexture::MakeTextureBgra() -{ - FWadLump lump = Wads.OpenLumpNum(SourceLump); - JSAMPLE *buff = NULL; - - jpeg_decompress_struct cinfo; - jpeg_error_mgr jerr; - - CreatePixelsBgraWithMipmaps(); - - cinfo.err = jpeg_std_error(&jerr); - cinfo.err->output_message = JPEG_OutputMessage; - cinfo.err->error_exit = JPEG_ErrorExit; - jpeg_create_decompress(&cinfo); - try - { - FLumpSourceMgr sourcemgr(&lump, &cinfo); - jpeg_read_header(&cinfo, TRUE); - if (!((cinfo.out_color_space == JCS_RGB && cinfo.num_components == 3) || - (cinfo.out_color_space == JCS_CMYK && cinfo.num_components == 4) || - (cinfo.out_color_space == JCS_GRAYSCALE && cinfo.num_components == 1))) - { - Printf(TEXTCOLOR_ORANGE "Unsupported color format\n"); - throw - 1; - } - - jpeg_start_decompress(&cinfo); - - int y = 0; - buff = new BYTE[cinfo.output_width * cinfo.output_components]; - - while (cinfo.output_scanline < cinfo.output_height) - { - int num_scanlines = jpeg_read_scanlines(&cinfo, &buff, 1); - BYTE *in = buff; - uint32_t *out = PixelsBgra.data() + y; - switch (cinfo.out_color_space) - { - case JCS_RGB: - for (int x = Width; x > 0; --x) - { - uint32_t r = in[0]; - uint32_t g = in[1]; - uint32_t b = in[2]; - *out = 0xff000000 | (r << 16) | (g << 8) | b; - out += Height; - in += 3; - } - break; - - case JCS_GRAYSCALE: - for (int x = Width; x > 0; --x) - { - uint32_t gray = in[0]; - *out = 0xff000000 | (gray << 16) | (gray << 8) | gray; - out += Height; - in += 1; - } - break; - - case JCS_CMYK: - // What are you doing using a CMYK image? :) - for (int x = Width; x > 0; --x) - { - // To be precise, these calculations should use 255, but - // 256 is much faster and virtually indistinguishable. - uint32_t r = in[3] - (((256 - in[0])*in[3]) >> 8); - uint32_t g = in[3] - (((256 - in[1])*in[3]) >> 8); - uint32_t b = in[3] - (((256 - in[2])*in[3]) >> 8); - *out = 0xff000000 | (r << 16) | (g << 8) | b; - out += Height; - in += 4; - } - break; - - default: - // The other colorspaces were considered above and discarded, - // but GCC will complain without a default for them here. - break; - } - y++; - } - jpeg_finish_decompress(&cinfo); - jpeg_destroy_decompress(&cinfo); - } - catch (int) - { - Printf(TEXTCOLOR_ORANGE " in texture %s\n", Name.GetChars()); - jpeg_destroy_decompress(&cinfo); - } - if (buff != NULL) - { - delete[] buff; - } - - GenerateBgraMipmaps(); -} - //=========================================================================== // diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index ee4eabe900..31d76f5674 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -57,7 +57,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -81,7 +80,6 @@ protected: DWORD StartOfIDAT; void MakeTexture (); - void MakeTextureBgra (); friend class FTexture; }; @@ -454,15 +452,6 @@ const BYTE *FPNGTexture::GetPixels () return Pixels; } -const uint32_t *FPNGTexture::GetPixelsBgra() -{ - if (PixelsBgra.empty()) - { - MakeTextureBgra(); - } - return PixelsBgra.data(); -} - //========================================================================== // @@ -620,146 +609,6 @@ void FPNGTexture::MakeTexture () delete lump; } -void FPNGTexture::MakeTextureBgra () -{ - FileReader *lump; - - if (SourceLump >= 0) - { - lump = new FWadLump(Wads.OpenLumpNum(SourceLump)); - } - else - { - lump = new FileReader(SourceFile.GetChars()); - } - - CreatePixelsBgraWithMipmaps(); - if (StartOfIDAT != 0) - { - DWORD len, id; - lump->Seek (StartOfIDAT, SEEK_SET); - lump->Read(&len, 4); - lump->Read(&id, 4); - - if (ColorType == 0 || ColorType == 3) /* Grayscale and paletted */ - { - std::vector src(Width*Height); - M_ReadIDAT (lump, src.data(), Width, Height, Width, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); - - if (!PngPalette.empty()) - { - for (int x = 0; x < Width; x++) - { - for (int y = 0; y < Height; y++) - { - uint32_t r = PngPalette[src[x + y * Width] * 3 + 0]; - uint32_t g = PngPalette[src[x + y * Width] * 3 + 1]; - uint32_t b = PngPalette[src[x + y * Width] * 3 + 2]; - PixelsBgra[x * Height + y] = 0xff000000 | (r << 16) | (g << 8) | b; - } - } - } - else - { - for (int x = 0; x < Width; x++) - { - for (int y = 0; y < Height; y++) - { - uint32_t gray = src[x + y * Width]; - PixelsBgra[x * Height + y] = 0xff000000 | (gray << 16) | (gray << 8) | gray; - } - } - } - } - else /* RGB and/or Alpha present */ - { - int bytesPerPixel = ColorType == 2 ? 3 : ColorType == 4 ? 2 : 4; - BYTE *tempix = new BYTE[Width * Height * bytesPerPixel]; - BYTE *in; - uint32_t *out; - int x, y, pitch, backstep; - - M_ReadIDAT (lump, tempix, Width, Height, Width*bytesPerPixel, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); - in = tempix; - out = PixelsBgra.data(); - - // Convert from source format to paletted, column-major. - // Formats with alpha maps are reduced to only 1 bit of alpha. - switch (ColorType) - { - case 2: // RGB - pitch = Width * 3; - backstep = Height * pitch - 3; - for (x = Width; x > 0; --x) - { - for (y = Height; y > 0; --y) - { - if (!HaveTrans) - { - *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); - } - else - { - if (in[0] == NonPaletteTrans[0] && - in[1] == NonPaletteTrans[1] && - in[2] == NonPaletteTrans[2]) - { - *out++ = 0; - } - else - { - *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); - } - } - in += pitch; - } - in -= backstep; - } - break; - - case 4: // Grayscale + Alpha - pitch = Width * 2; - backstep = Height * pitch - 2; - for (x = Width; x > 0; --x) - { - for (y = Height; y > 0; --y) - { - // output as premultiplied alpha - uint32_t alpha = in[1]; - uint32_t gray = (in[0] * alpha + 127) / 255; - *out++ = (alpha << 24) | (gray << 16) | (gray << 8) | gray; - in += pitch; - } - in -= backstep; - } - break; - - case 6: // RGB + Alpha - pitch = Width * 4; - backstep = Height * pitch - 4; - for (x = Width; x > 0; --x) - { - for (y = Height; y > 0; --y) - { - // output as premultiplied alpha - uint32_t alpha = in[3]; - uint32_t red = (in[0] * alpha + 127) / 255; - uint32_t green = (in[1] * alpha + 127) / 255; - uint32_t blue = (in[2] * alpha + 127) / 255; - *out++ = (alpha << 24) | (red << 16) | (green << 8) | blue; - in += pitch; - } - in -= backstep; - } - break; - } - delete[] tempix; - } - } - delete lump; - GenerateBgraMipmaps(); -} - //=========================================================================== // // FPNGTexture::CopyTrueColorPixels diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 1602236170..7dfe04b230 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -194,21 +194,15 @@ const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_ const uint32_t *FTexture::GetPixelsBgra() { - if (PixelsBgra.empty()) + if (PixelsBgra.empty() || CheckModified()) { - GetColumn(0, nullptr); - const BYTE *indices = GetPixels(); - if (indices == nullptr) + if (!GetColumn(0, nullptr)) return nullptr; - CreatePixelsBgraWithMipmaps(); - for (int i = 0; i < Width * Height; i++) - { - if (indices[i] != 0) - PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; - else - PixelsBgra[i] = 0; - } - GenerateBgraMipmaps(); + + FBitmap bitmap; + bitmap.Create(GetWidth(), GetHeight()); + CopyTrueColorPixels(&bitmap, 0, 0); + GenerateBgraFromBitmap(bitmap); } return PixelsBgra.data(); } @@ -356,6 +350,32 @@ void FTexture::FreeSpans (Span **spans) const M_Free (spans); } +void FTexture::GenerateBgraFromBitmap(const FBitmap &bitmap) +{ + CreatePixelsBgraWithMipmaps(); + + // Transpose and premultiply alpha + const uint32_t *src = (const uint32_t *)bitmap.GetPixels(); + uint32_t *dest = PixelsBgra.data(); + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t p = src[x + y * Width]; + uint32_t red = RPART(p); + uint32_t green = GPART(p); + uint32_t blue = BPART(p); + uint32_t alpha = APART(p); + red = (red * alpha + 127) / 255; + green = (green * alpha + 127) / 255; + blue = (blue * alpha + 127) / 255; + dest[y + x * Height] = (alpha << 24) | (red << 16) | (green << 8) | blue; + } + } + + GenerateBgraMipmaps(); +} + void FTexture::CreatePixelsBgraWithMipmaps() { int levels = MipmapLevels(); diff --git a/src/textures/textures.h b/src/textures/textures.h index ff1093a49d..e5ecdc6797 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -274,6 +274,7 @@ protected: std::vector PixelsBgra; + void GenerateBgraFromBitmap(const FBitmap &bitmap); void CreatePixelsBgraWithMipmaps(); void GenerateBgraMipmaps(); void GenerateBgraMipmapsFast(); diff --git a/src/textures/warptexture.cpp b/src/textures/warptexture.cpp index 0d18ab58f4..91c7b9fc43 100644 --- a/src/textures/warptexture.cpp +++ b/src/textures/warptexture.cpp @@ -39,6 +39,7 @@ #include "r_utility.h" #include "textures/textures.h" #include "warpbuffer.h" +#include "v_palette.h" FWarpTexture::FWarpTexture (FTexture *source, int warptype) @@ -96,13 +97,20 @@ const BYTE *FWarpTexture::GetPixels () const uint32_t *FWarpTexture::GetPixelsBgra() { DWORD time = r_FrameTime; - if (Pixels == NULL || time != GenTime) { MakeTexture(time); - PixelsBgra.clear(); + CreatePixelsBgraWithMipmaps(); + for (int i = 0; i < Width * Height; i++) + { + if (Pixels[i] != 0) + PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[Pixels[i]].d; + else + PixelsBgra[i] = 0; + } + GenerateBgraMipmapsFast(); } - return FTexture::GetPixelsBgra(); + return PixelsBgra.data(); } const BYTE *FWarpTexture::GetColumn (unsigned int column, const Span **spans_out) From 2f512e54cdee4c1d15f632fcab64d3850004028b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 6 Aug 2016 23:12:34 +0200 Subject: [PATCH 091/912] Remove unused code --- src/textures/pngtexture.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index 31d76f5674..9a64bac616 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -41,7 +41,6 @@ #include "bitmap.h" #include "v_palette.h" #include "textures/textures.h" -#include //========================================================================== // @@ -74,7 +73,6 @@ protected: bool HaveTrans; WORD NonPaletteTrans[3]; - std::vector PngPalette; BYTE *PaletteMap; int PaletteSize; DWORD StartOfIDAT; @@ -268,12 +266,6 @@ FPNGTexture::FPNGTexture (FileReader &lump, int lumpnum, const FString &filename { lump.Seek (len - PaletteSize * 3, SEEK_CUR); } - for (i = 0; i < PaletteSize; i++) - { - PngPalette.push_back(p.pngpal[i][0]); - PngPalette.push_back(p.pngpal[i][1]); - PngPalette.push_back(p.pngpal[i][2]); - } for (i = PaletteSize - 1; i >= 0; --i) { p.palette[i] = MAKERGB(p.pngpal[i][0], p.pngpal[i][1], p.pngpal[i][2]); From 3c8719f9458d4f210f27e827d89ad86234717232 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 8 Aug 2016 22:35:26 +0200 Subject: [PATCH 092/912] Fix buffer overflow in FTexture::GenerateBgraMipmaps --- src/textures/texture.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 7dfe04b230..05574e9da5 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -482,7 +482,7 @@ void FTexture::GenerateBgraMipmaps() if (a < 0) a = h - 1; if (a == h) a = 0; if (b < 0) b = w - 1; - if (b == h) b = 0; + if (b == w) b = 0; c = c + dest[a + b * h]; } } From abef073ea499f85337ad688e61ac8c65c3a689ac Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 9 Aug 2016 01:17:45 +0200 Subject: [PATCH 093/912] Implemented sloped planes for true color mode --- src/r_draw_rgba.cpp | 144 +++++++++++++++++++++++++++++++++++++++----- src/r_draw_rgba.h | 3 + src/r_plane.cpp | 25 +------- 3 files changed, 134 insertions(+), 38 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 7a071e1d49..69ebfeb842 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2191,23 +2191,44 @@ public: class DrawTiltedSpanRGBACommand : public DrawerCommand { - int _y; int _x1; int _x2; + int _y; BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; - const BYTE * RESTRICT _source; + FVector3 _plane_sz; + FVector3 _plane_su; + FVector3 _plane_sv; + bool _plane_shade; + int _planeshade; + float _planelightfloat; + fixed_t _pviewx; + fixed_t _pviewy; + int _xbits; + int _ybits; + const uint32_t * RESTRICT _source; public: - DrawTiltedSpanRGBACommand(int y, int x1, int x2) + DrawTiltedSpanRGBACommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { - _y = y; _x1 = x1; _x2 = x2; - + _y = y; _destorg = dc_destorg; - _source = ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _plane_sz = plane_sz; + _plane_su = plane_su; + _plane_sv = plane_sv; + _plane_shade = plane_shade; + _planeshade = planeshade; + _planelightfloat = planelightfloat; + _pviewx = pviewx; + _pviewy = pviewy; + _source = (const uint32_t*)ds_source; + _xbits = ds_xbits; + _ybits = ds_ybits; } void Execute(DrawerThread *thread) override @@ -2215,20 +2236,103 @@ public: if (thread->line_skipped_by_thread(_y)) return; - int y = _y; - int x1 = _x1; - int x2 = _x2; + //#define SPANSIZE 32 + //#define INVSPAN 0.03125f + //#define SPANSIZE 8 + //#define INVSPAN 0.125f + #define SPANSIZE 16 + #define INVSPAN 0.0625f - // Slopes are broken currently in master. - // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. + int source_width = 1 << _xbits; + int source_height = 1 << _ybits; - uint32_t *source = (uint32_t*)_source; - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; + uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + int count = _x2 - _x1 + 1; - int count = x2 - x1 + 1; + // Depth (Z) change across the span + double iz = _plane_sz[2] + _plane_sz[1] * (centery - _y) + _plane_sz[0] * (_x1 - centerx); + + // Light change across the span + fixed_t lightstart = _light; + fixed_t lightend = lightstart; + if (_plane_shade) + { + double vis_start = iz * _planelightfloat; + double vis_end = (iz + _plane_sz[0] * count) * _planelightfloat; + + lightstart = LIGHTSCALE(vis_start, _planeshade); + lightend = LIGHTSCALE(vis_end, _planeshade); + } + fixed_t light = lightstart; + fixed_t steplight = (lightend - lightstart) / count; + + // Texture coordinates + double uz = _plane_su[2] + _plane_su[1] * (centery - _y) + _plane_su[0] * (_x1 - centerx); + double vz = _plane_sv[2] + _plane_sv[1] * (centery - _y) + _plane_sv[0] * (_x1 - centerx); + double startz = 1.f / iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep = _plane_sz[0] * SPANSIZE; + double uzstep = _plane_su[0] * SPANSIZE; + double vzstep = _plane_sv[0] * SPANSIZE; + + // Linear interpolate in sizes of SPANSIZE to increase speed + while (count >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f / iz; + double endu = uz*endz; + double endv = vz*endz; + uint32_t stepu = (uint32_t)(SQWORD((endu - startu) * INVSPAN)); + uint32_t stepv = (uint32_t)(SQWORD((endv - startv) * INVSPAN)); + uint32_t u = (uint32_t)(SQWORD(startu) + _pviewx); + uint32_t v = (uint32_t)(SQWORD(startv) + _pviewy); + + for (int i = 0; i < SPANSIZE; i++) + { + uint32_t sx = ((u >> 16) * source_width) >> 16; + uint32_t sy = ((v >> 16) * source_height) >> 16; + uint32_t fg = _source[sy + sx * source_height]; + + if (_shade_constants.simple_shade) + *(dest++) = LightBgra::shade_bgra_simple(fg, LightBgra::calc_light_multiplier(light)); + else + *(dest++) = LightBgra::shade_bgra(fg, LightBgra::calc_light_multiplier(light), _shade_constants); + + u += stepu; + v += stepv; + light += steplight; + } + startu = endu; + startv = endv; + count -= SPANSIZE; + } + + // The last few pixels at the end while (count > 0) { - *(dest++) = source[0]; + double endz = 1.f / iz; + startu = uz*endz; + startv = vz*endz; + uint32_t u = (uint32_t)(SQWORD(startu) + _pviewx); + uint32_t v = (uint32_t)(SQWORD(startv) + _pviewy); + + uint32_t sx = ((u >> 16) * source_width) >> 16; + uint32_t sy = ((v >> 16) * source_height) >> 16; + uint32_t fg = _source[sy + sx * source_height]; + + if (_shade_constants.simple_shade) + *(dest++) = LightBgra::shade_bgra_simple(fg, LightBgra::calc_light_multiplier(light)); + else + *(dest++) = LightBgra::shade_bgra(fg, LightBgra::calc_light_multiplier(light), _shade_constants); + + iz += _plane_sz[0]; + uz += _plane_su[0]; + vz += _plane_sv[0]; + light += steplight; count--; } } @@ -2633,6 +2737,16 @@ void R_FillSpan_rgba() DrawerCommandQueue::QueueCommand(); } +void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) +{ + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); +} + +void R_DrawColoredSpan_rgba(int y, int x1, int x2) +{ + DrawerCommandQueue::QueueCommand(y, x1, x2); +} + static ShadeConstants slab_rgba_shade_constants; static const BYTE *slab_rgba_colormap; static fixed_t slab_rgba_light; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index c976602f62..083258bf07 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -94,6 +94,9 @@ void R_DrawSpanAddClamp_rgba(); void R_DrawSpanMaskedAddClamp_rgba(); void R_FillSpan_rgba(); +void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); +void R_DrawColoredSpan_rgba(int y, int x1, int x2); + void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade); void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index c751fc5dcb..706d6fad75 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -480,23 +480,7 @@ void R_MapTiltedPlane_C (int y, int x1) void R_MapTiltedPlane_rgba (int y, int x1) { - int x2 = spanend[y]; - - // Slopes are broken currently in master. - // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. - - uint32_t *source = (uint32_t*)ds_source; - int source_width = 1 << ds_xbits; - int source_height = 1 << ds_ybits; - - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; - - int count = x2 - x1 + 1; - while (count > 0) - { - *(dest++) = source[0]; - count--; - } + R_DrawTiltedSpan_rgba(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } //========================================================================== @@ -512,12 +496,7 @@ void R_MapColoredPlane_C (int y, int x1) void R_MapColoredPlane_rgba(int y, int x1) { - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; - int count = (spanend[y] - x1 + 1); - uint32_t light = LightBgra::calc_light_multiplier(ds_light); - uint32_t color = LightBgra::shade_pal_index_simple(ds_color, light); - for (int i = 0; i < count; i++) - dest[i] = color; + R_DrawColoredSpan_rgba(y, x1, spanend[y]); } //========================================================================== From f56250b9107ab0446c040aca51419a7c1cd25479 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 14 Aug 2016 05:10:34 +0200 Subject: [PATCH 094/912] Remove premultiplied alpha --- src/r_draw_rgba.h | 23 +++++++++++++++-------- src/textures/texture.cpp | 12 ++---------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 083258bf07..ca54f72634 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -473,9 +473,9 @@ public: { uint32_t alpha = APART(fg) + (APART(fg) >> 7); // 255 -> 256 uint32_t inv_alpha = 256 - alpha; - uint32_t red = MIN(RPART(fg) + (RPART(bg) * inv_alpha) / 256, 255); - uint32_t green = MIN(GPART(fg) + (GPART(bg) * inv_alpha) / 256, 255); - uint32_t blue = MIN(BPART(fg) + (BPART(bg) * inv_alpha) / 256, 255); + uint32_t red = MIN(RPART(fg) * alpha + (RPART(bg) * inv_alpha) / 256, 255); + uint32_t green = MIN(GPART(fg) * alpha + (GPART(bg) * inv_alpha) / 256, 255); + uint32_t blue = MIN(BPART(fg) * alpha + (BPART(bg) * inv_alpha) / 256, 255); return 0xff000000 | (red << 16) | (green << 8) | blue; } }; @@ -861,11 +861,18 @@ public: __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); \ __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); \ - __m128i m255 = _mm_set1_epi16(255); \ - __m128i inv_alpha_hi = _mm_sub_epi16(m255, _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_hi, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3))); \ - __m128i inv_alpha_lo = _mm_sub_epi16(m255, _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_lo, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3))); \ - inv_alpha_hi = _mm_add_epi16(inv_alpha_hi, _mm_srli_epi16(inv_alpha_hi, 7)); \ - inv_alpha_lo = _mm_add_epi16(inv_alpha_lo, _mm_srli_epi16(inv_alpha_lo, 7)); \ + __m128i m256 = _mm_set1_epi16(256); \ + __m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_hi, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3)); \ + __m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_lo, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3)); \ + alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \ + alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \ + __m128i inv_alpha_hi = _mm_sub_epi16(m256, alpha_hi); \ + __m128i inv_alpha_lo = _mm_sub_epi16(m256, alpha_lo); \ + fg_hi = _mm_mullo_epi16(fg_hi, alpha_hi); \ + fg_hi = _mm_srli_epi16(fg_hi, 8); \ + fg_lo = _mm_mullo_epi16(fg_lo, alpha_lo); \ + fg_lo = _mm_srli_epi16(fg_lo, 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ bg_hi = _mm_mullo_epi16(bg_hi, inv_alpha_hi); \ bg_hi = _mm_srli_epi16(bg_hi, 8); \ bg_lo = _mm_mullo_epi16(bg_lo, inv_alpha_lo); \ diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 05574e9da5..12e9d8549c 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -354,22 +354,14 @@ void FTexture::GenerateBgraFromBitmap(const FBitmap &bitmap) { CreatePixelsBgraWithMipmaps(); - // Transpose and premultiply alpha + // Transpose const uint32_t *src = (const uint32_t *)bitmap.GetPixels(); uint32_t *dest = PixelsBgra.data(); for (int x = 0; x < Width; x++) { for (int y = 0; y < Height; y++) { - uint32_t p = src[x + y * Width]; - uint32_t red = RPART(p); - uint32_t green = GPART(p); - uint32_t blue = BPART(p); - uint32_t alpha = APART(p); - red = (red * alpha + 127) / 255; - green = (green * alpha + 127) / 255; - blue = (blue * alpha + 127) / 255; - dest[y + x * Height] = (alpha << 24) | (red << 16) | (green << 8) | blue; + dest[y + x * Height] = src[x + y * Width]; } } From 9525d3690f64ac06c8f5cb083c122b4bbeaf1cf5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 25 Aug 2016 06:25:05 +0200 Subject: [PATCH 095/912] Added gl_light_math and changed pixelpos + lights to be in eye space --- src/gl/dynlights/gl_dynlight1.cpp | 29 ++++++- src/gl/renderer/gl_renderstate.cpp | 1 + src/gl/shaders/gl_shader.cpp | 1 + src/gl/shaders/gl_shader.h | 1 + src/gl/system/gl_cvars.h | 1 + wadsrc/static/language.enu | 6 +- wadsrc/static/menudef.z | 8 ++ wadsrc/static/shaders/glsl/main.fp | 102 ++++++++++++++++++++++-- wadsrc/static/shaders/glsl/main.vp | 2 +- wadsrc/static/shaders/glsl/shaderdefs.i | 1 + 10 files changed, 140 insertions(+), 12 deletions(-) diff --git a/src/gl/dynlights/gl_dynlight1.cpp b/src/gl/dynlights/gl_dynlight1.cpp index 361b94618b..56db94e4d7 100644 --- a/src/gl/dynlights/gl_dynlight1.cpp +++ b/src/gl/dynlights/gl_dynlight1.cpp @@ -80,6 +80,11 @@ CUSTOM_CVAR (Bool, gl_lights_additive, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG gl_RecreateAllAttachedLights(); } +CUSTOM_CVAR(Int, gl_light_math, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) +{ + if (self < 0 || self > 2) self = 0; +} + //========================================================================== // // Sets up the parameters to render one dynamic light onto one plane @@ -128,10 +133,28 @@ bool gl_GetLight(int group, Plane & p, ADynamicLight * light, bool checkside, bo i = 1; } + float worldPos[4] = { (float)pos.X, (float)pos.Z, (float)pos.Y, 1.0f }; + float eyePos[4]; + gl_RenderState.mViewMatrix.multMatrixPoint(worldPos, eyePos); + + if (gl_light_math != 0) + { + // Adjust light slightly to make the range better match plain attenuation + radius *= 1.5; + + // Move light up because flasks/vials have their light source location at/below the floor. + // + // If the point is exactly on the wall plane it might cause some acne as some pixels could + // be in front and some behind. Move light just a tiny bit to avoid this. + eyePos[0] += 0.01f; + eyePos[1] += 5.01f; + eyePos[2] += 0.01f; + } + float *data = &ldata.arrays[i][ldata.arrays[i].Reserve(8)]; - data[0] = pos.X; - data[1] = pos.Z; - data[2] = pos.Y; + data[0] = eyePos[0]; + data[1] = eyePos[1]; + data[2] = eyePos[2]; data[3] = radius; data[4] = r; data[5] = g; diff --git a/src/gl/renderer/gl_renderstate.cpp b/src/gl/renderer/gl_renderstate.cpp index 83303d61e7..61b3e13d70 100644 --- a/src/gl/renderer/gl_renderstate.cpp +++ b/src/gl/renderer/gl_renderstate.cpp @@ -144,6 +144,7 @@ bool FRenderState::ApplyShader() activeShader->muTimer.Set(gl_frameMS * mShaderTimer / 1000.f); activeShader->muAlphaThreshold.Set(mAlphaThreshold); activeShader->muLightIndex.Set(mLightIndex); // will always be -1 for now + activeShader->muLightMath.Set(gl_light_math); activeShader->muClipSplit.Set(mClipSplit); if (mGlowEnabled) diff --git a/src/gl/shaders/gl_shader.cpp b/src/gl/shaders/gl_shader.cpp index 67b9a2d6ae..4acfd4bbea 100644 --- a/src/gl/shaders/gl_shader.cpp +++ b/src/gl/shaders/gl_shader.cpp @@ -246,6 +246,7 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char * muColormapStart.Init(hShader, "uFixedColormapStart"); muColormapRange.Init(hShader, "uFixedColormapRange"); muLightIndex.Init(hShader, "uLightIndex"); + muLightMath.Init(hShader, "uLightMath"); muFogColor.Init(hShader, "uFogColor"); muDynLightColor.Init(hShader, "uDynLightColor"); muObjectColor.Init(hShader, "uObjectColor"); diff --git a/src/gl/shaders/gl_shader.h b/src/gl/shaders/gl_shader.h index 75e4b4e5e5..09b43310d3 100644 --- a/src/gl/shaders/gl_shader.h +++ b/src/gl/shaders/gl_shader.h @@ -221,6 +221,7 @@ class FShader FUniform1i muFixedColormap; FUniform4f muColormapStart; FUniform4f muColormapRange; + FBufferedUniform1i muLightMath; FBufferedUniform1i muLightIndex; FBufferedUniformPE muFogColor; FBufferedUniform4f muDynLightColor; diff --git a/src/gl/system/gl_cvars.h b/src/gl/system/gl_cvars.h index 0c31f53a8d..290f3ea8c2 100644 --- a/src/gl/system/gl_cvars.h +++ b/src/gl/system/gl_cvars.h @@ -29,6 +29,7 @@ EXTERN_CVAR (Float, gl_lights_size); EXTERN_CVAR (Bool, gl_lights_additive); EXTERN_CVAR (Bool, gl_light_sprites); EXTERN_CVAR (Bool, gl_light_particles); +EXTERN_CVAR (Int, gl_light_math); EXTERN_CVAR(Int, gl_fogmode) EXTERN_CVAR(Int, gl_lightmode) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 1e20ebeb27..79f71edf8c 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -2603,6 +2603,7 @@ GLLIGHTMNU_LIGHTPARTICLES = "Lights affect particles"; GLLIGHTMNU_FORCEADDITIVE = "Force additive lighting"; GLLIGHTMNU_LIGHTINTENSITY = "Light intensity"; GLLIGHTMNU_LIGHTSIZE = "Light size"; +GLLIGHTMNU_LIGHTMATH = "Light quality"; // OpenGL Preferences GLPREFMNU_TITLE = "OPENGL PREFERENCES"; @@ -2700,4 +2701,7 @@ OPTVAL_QUADBUFFERED = "Quad-buffered"; OPTVAL_UNCHARTED2 = "Uncharted 2"; OPTVAL_HEJLDAWSON = "Hejl Dawson"; OPTVAL_REINHARD = "Reinhard"; -OPTVAL_PALETTE = "Palette"; \ No newline at end of file +OPTVAL_PALETTE = "Palette"; +OPTVAL_LOW = "Low"; +OPTVAL_MEDIUM = "Medium"; +OPTVAL_HIGH = "High"; diff --git a/wadsrc/static/menudef.z b/wadsrc/static/menudef.z index 2386b1076c..0d080d7096 100644 --- a/wadsrc/static/menudef.z +++ b/wadsrc/static/menudef.z @@ -25,6 +25,13 @@ OptionValue "FilterModes" 4, "$OPTVAL_TRILINEAR" } +OptionValue "LightMathModes" +{ + 0, "$OPTVAL_LOW" + 1, "$OPTVAL_MEDIUM" + 2, "$OPTVAL_HIGH" +} + OptionValue "HWGammaModes" { 0, "$OPTVAL_ON" @@ -193,6 +200,7 @@ OptionMenu "GLLightOptions" Option "$GLLIGHTMNU_LIGHTSPRITES", gl_light_sprites, "YesNo" Option "$GLLIGHTMNU_LIGHTPARTICLES", gl_light_particles, "YesNo" Option "$GLLIGHTMNU_FORCEADDITIVE", gl_lights_additive, "YesNo" + Option "$GLLIGHTMNU_LIGHTMATH", gl_light_math, "LightMathModes" Slider "$GLLIGHTMNU_LIGHTINTENSITY", gl_lights_intensity, 0.0, 1.0, 0.1 Slider "$GLLIGHTMNU_LIGHTSIZE", gl_lights_size, 0.0, 2.0, 0.1 } diff --git a/wadsrc/static/shaders/glsl/main.fp b/wadsrc/static/shaders/glsl/main.fp index b10c99a172..2a783e00ce 100644 --- a/wadsrc/static/shaders/glsl/main.fp +++ b/wadsrc/static/shaders/glsl/main.fp @@ -25,6 +25,88 @@ vec4 Process(vec4 color); vec4 ProcessTexel(); vec4 ProcessLight(vec4 color); +// Smoothed normal used for the face, in eye space. Should be converted to an 'in' variable in the future. +vec3 pixelnormal; + +//=========================================================================== +// +// Calculates the face normal vector for the fragment, in eye space +// +//=========================================================================== + +vec3 calculateFaceNormal() +{ +#if __VERSION__ < 450 + vec3 dFdxPos = dFdx(pixelpos.xyz); + vec3 dFdyPos = dFdy(pixelpos.xyz); +#else + vec3 dFdxPos = dFdxCoarse(pixelpos.xyz); + vec3 dFdyPos = dFdyCoarse(pixelpos.xyz); +#endif + return normalize(cross(dFdxPos,dFdyPos)); +} + +//=========================================================================== +// +// Standard lambertian diffuse light calculation +// +//=========================================================================== + +float diffuseContribution(vec3 eyeLightDirection, vec3 eyeNormal) +{ + return max(dot(eyeNormal, eyeLightDirection), 0.0f); +} + +//=========================================================================== +// +// Blinn specular light calculation +// +//=========================================================================== + +float blinnSpecularContribution(float diffuseContribution, vec3 eyeLightDirection, vec3 eyePosition, vec3 eyeNormal, float glossiness, float specularLevel) +{ + if (diffuseContribution > 0.0f) + { + vec3 viewDir = normalize(-eyePosition); + vec3 halfDir = normalize(eyeLightDirection + viewDir); + float specAngle = max(dot(halfDir, eyeNormal), 0.0f); + float phExp = glossiness * 4.0f; + return specularLevel * pow(specAngle, phExp); + } + else + { + return 0.0f; + } +} + +//=========================================================================== +// +// Calculates the brightness of a dynamic point light +// +//=========================================================================== + +float pointLightAttenuation(vec4 lightpos) +{ + float attenuation = max(lightpos.w - distance(pixelpos.xyz, lightpos.xyz),0.0) / lightpos.w; + if (uLightMath == 0) + { + return attenuation; + } + else + { + vec3 lightDirection = normalize(lightpos.xyz - pixelpos.xyz); + float diffuseAmount = diffuseContribution(lightDirection, pixelnormal); + if (uLightMath == 1) + { + return attenuation * diffuseAmount; + } + else + { + float specularAmount = blinnSpecularContribution(diffuseAmount, lightDirection, pixelpos.xyz, pixelnormal, 3.0, 1.2); + return attenuation * (diffuseAmount + specularAmount); + } + } +} //=========================================================================== // @@ -223,7 +305,7 @@ vec4 getLightColor(float fogdist, float fogfactor) vec4 lightpos = lights[i]; vec4 lightcolor = lights[i+1]; - lightcolor.rgb *= max(lightpos.w - distance(pixelpos.xyz, lightpos.xyz),0.0) / lightpos.w; + lightcolor.rgb *= pointLightAttenuation(lightpos); dynlight.rgb += lightcolor.rgb; } // @@ -233,8 +315,8 @@ vec4 getLightColor(float fogdist, float fogfactor) { vec4 lightpos = lights[i]; vec4 lightcolor = lights[i+1]; - - lightcolor.rgb *= max(lightpos.w - distance(pixelpos.xyz, lightpos.xyz),0.0) / lightpos.w; + + lightcolor.rgb *= pointLightAttenuation(lightpos); dynlight.rgb -= lightcolor.rgb; } } @@ -267,6 +349,13 @@ vec4 applyFog(vec4 frag, float fogfactor) void main() { vec4 frag = ProcessTexel(); + +#if defined NUM_UBO_LIGHTS || defined SHADER_STORAGE_LIGHTS + if (uLightMath != 0) // Remove this if pixelnormal is converted to an 'in' variable + { + pixelnormal = calculateFaceNormal(); + } +#endif #ifndef NO_ALPHATEST if (frag.a <= uAlphaThreshold) discard; @@ -292,12 +381,11 @@ void main() } else { - fogdist = max(16.0, distance(pixelpos.xyz, uCameraPos.xyz)); + fogdist = max(16.0, length(pixelpos.xyz)); } fogfactor = exp2 (uFogDensity * fogdist); } - frag *= getLightColor(fogdist, fogfactor); #if defined NUM_UBO_LIGHTS || defined SHADER_STORAGE_LIGHTS @@ -316,7 +404,7 @@ void main() vec4 lightpos = lights[i]; vec4 lightcolor = lights[i+1]; - lightcolor.rgb *= max(lightpos.w - distance(pixelpos.xyz, lightpos.xyz),0.0) / lightpos.w; + lightcolor.rgb *= pointLightAttenuation(lightpos); addlight.rgb += lightcolor.rgb; } frag.rgb = clamp(frag.rgb + desaturate(addlight).rgb, 0.0, 1.0); @@ -363,7 +451,7 @@ void main() } else { - fogdist = max(16.0, distance(pixelpos.xyz, uCameraPos.xyz)); + fogdist = max(16.0, length(pixelpos.xyz)); } fogfactor = exp2 (uFogDensity * fogdist); diff --git a/wadsrc/static/shaders/glsl/main.vp b/wadsrc/static/shaders/glsl/main.vp index a2c1bac5bb..27faafb86e 100644 --- a/wadsrc/static/shaders/glsl/main.vp +++ b/wadsrc/static/shaders/glsl/main.vp @@ -43,7 +43,7 @@ void main() vColor = aColor; #ifndef SIMPLE - pixelpos.xyz = worldcoord.xyz; + pixelpos.xyz = eyeCoordPos.xyz; pixelpos.w = -eyeCoordPos.z/eyeCoordPos.w; glowdist.x = -((uGlowTopPlane.w + uGlowTopPlane.x * worldcoord.x + uGlowTopPlane.y * worldcoord.z) * uGlowTopPlane.z) - worldcoord.y; diff --git a/wadsrc/static/shaders/glsl/shaderdefs.i b/wadsrc/static/shaders/glsl/shaderdefs.i index 3701694bcd..0f0545b2d9 100644 --- a/wadsrc/static/shaders/glsl/shaderdefs.i +++ b/wadsrc/static/shaders/glsl/shaderdefs.i @@ -44,6 +44,7 @@ uniform int uFogEnabled; // dynamic lights uniform int uLightIndex; +uniform int uLightMath; // 0, when using only attenuation, 1 for diffuse light, 2 for blinn specular light // quad drawer stuff #ifdef USE_QUAD_DRAWER From 737e70077421023fcebf7a2c177afa032f3a7297 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 29 Aug 2016 13:10:22 +0200 Subject: [PATCH 096/912] Added SSAO pass --- src/CMakeLists.txt | 1 + src/gl/renderer/gl_postprocess.cpp | 106 ++++++++++++++- src/gl/renderer/gl_renderbuffers.cpp | 152 +++++++++++++++++----- src/gl/renderer/gl_renderbuffers.h | 30 +++-- src/gl/renderer/gl_renderer.cpp | 5 + src/gl/renderer/gl_renderer.h | 9 ++ src/gl/scene/gl_scene.cpp | 10 +- src/gl/shaders/gl_ambientshader.cpp | 89 +++++++++++++ src/gl/shaders/gl_ambientshader.h | 39 ++++++ src/gl/stereo3d/gl_stereo3d.cpp | 2 +- wadsrc/static/language.enu | 1 + wadsrc/static/menudef.z | 1 + wadsrc/static/shaders/glsl/lineardepth.fp | 16 +++ wadsrc/static/shaders/glsl/ssao.fp | 117 +++++++++++++++++ 14 files changed, 531 insertions(+), 47 deletions(-) create mode 100644 src/gl/shaders/gl_ambientshader.cpp create mode 100644 src/gl/shaders/gl_ambientshader.h create mode 100644 wadsrc/static/shaders/glsl/lineardepth.fp create mode 100644 wadsrc/static/shaders/glsl/ssao.fp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 03f3372f06..13ef11ec3d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1113,6 +1113,7 @@ set( FASTMATH_SOURCES gl/shaders/gl_shaderprogram.cpp gl/shaders/gl_presentshader.cpp gl/shaders/gl_bloomshader.cpp + gl/shaders/gl_ambientshader.cpp gl/shaders/gl_blurshader.cpp gl/shaders/gl_tonemapshader.cpp gl/shaders/gl_lensshader.cpp diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index 3a83f4caf5..c7d0494c80 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -68,6 +68,7 @@ #include "gl/renderer/gl_postprocessstate.h" #include "gl/data/gl_data.h" #include "gl/data/gl_vertexbuffer.h" +#include "gl/shaders/gl_ambientshader.h" #include "gl/shaders/gl_bloomshader.h" #include "gl/shaders/gl_blurshader.h" #include "gl/shaders/gl_tonemapshader.h" @@ -106,6 +107,20 @@ CVAR(Float, gl_lens_k, -0.12f, 0) CVAR(Float, gl_lens_kcube, 0.1f, 0) CVAR(Float, gl_lens_chromatic, 1.12f, 0) +CVAR(Bool, gl_ssao, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) +CVAR(Bool, gl_ssao_debug, false, 0) +CVAR(Float, gl_ssao_bias, 0.5f, 0) +CVAR(Float, gl_ssao_radius, 100.0f, 0) +CUSTOM_CVAR(Float, gl_ssao_blur_amount, 6.0f, 0) +{ + if (self < 0.1f) self = 0.1f; +} +CUSTOM_CVAR(Int, gl_ssao_blur_samples, 9, 0) +{ + if (self < 3 || self > 15 || self % 2 == 0) + self = 9; +} + EXTERN_CVAR(Float, vid_brightness) EXTERN_CVAR(Float, vid_contrast) @@ -117,6 +132,95 @@ void FGLRenderer::RenderScreenQuad() GLRenderer->mVBO->RenderArray(GL_TRIANGLE_STRIP, FFlatVertexBuffer::PRESENT_INDEX, 4); } +void FGLRenderer::PostProcessScene() +{ + if (FGLRenderBuffers::IsEnabled()) mBuffers->BlitSceneToTexture(); + AmbientOccludeScene(); + BloomScene(); + TonemapScene(); + LensDistortScene(); +} + +//----------------------------------------------------------------------------- +// +// Adds ambient occlusion to the scene +// +//----------------------------------------------------------------------------- + +void FGLRenderer::AmbientOccludeScene() +{ + if (!gl_ssao || !FGLRenderBuffers::IsEnabled()) + return; + + FGLDebug::PushGroup("AmbientOccludeScene"); + + FGLPostProcessState savedState; + + float bias = gl_ssao_bias; + float aoRadius = gl_ssao_radius; + const float blurAmount = gl_ssao_blur_amount; + int blurSampleCount = gl_ssao_blur_samples; + + //float tanHalfFovy = tan(fovy * (M_PI / 360.0f)); + float tanHalfFovy = 1.0f / 1.33333302f; //gl_RenderState.mProjectionMatrix.get()[5]; + float invFocalLenX = tanHalfFovy * (mBuffers->AmbientWidth / (float)mBuffers->AmbientHeight); + float invFocalLenY = tanHalfFovy; + float nDotVBias = clamp(bias, 0.0f, 1.0f); + float r2 = aoRadius * aoRadius; + + // Calculate linear depth values + glBindFramebuffer(GL_FRAMEBUFFER, mBuffers->AmbientFB0); + glViewport(0, 0, mBuffers->AmbientWidth, mBuffers->AmbientHeight); + mBuffers->BindSceneDepthTexture(0); + mLinearDepthShader->Bind(); + mLinearDepthShader->DepthTexture.Set(0); + mLinearDepthShader->LinearizeDepthA.Set(1.0f / GetZFar() - 1.0f / GetZNear()); + mLinearDepthShader->LinearizeDepthB.Set(MAX(1.0f / GetZNear(), 1.e-8f)); + mLinearDepthShader->InverseDepthRangeA.Set(1.0f); + mLinearDepthShader->InverseDepthRangeB.Set(0.0f); + RenderScreenQuad(); + + // Apply ambient occlusion + glBindFramebuffer(GL_FRAMEBUFFER, mBuffers->AmbientFB1); + glBindTexture(GL_TEXTURE_2D, mBuffers->AmbientTexture0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + mSSAOShader->Bind(); + mSSAOShader->DepthTexture.Set(0); + mSSAOShader->UVToViewA.Set(2.0f * invFocalLenX, -2.0f * invFocalLenY); + mSSAOShader->UVToViewB.Set(-invFocalLenX, invFocalLenY); + mSSAOShader->InvFullResolution.Set(1.0f / mBuffers->AmbientWidth, 1.0f / mBuffers->AmbientHeight); + mSSAOShader->NDotVBias.Set(nDotVBias); + mSSAOShader->NegInvR2.Set(-1.0f / r2); + mSSAOShader->RadiusToScreen.Set(aoRadius * 0.5 / tanHalfFovy * mBuffers->AmbientHeight); + mSSAOShader->AOMultiplier.Set(1.0f / (1.0f - nDotVBias)); + RenderScreenQuad(); + + // Blur SSAO texture + mBlurShader->BlurHorizontal(this, blurAmount, blurSampleCount, mBuffers->AmbientTexture1, mBuffers->AmbientFB0, mBuffers->AmbientWidth, mBuffers->AmbientHeight); + mBlurShader->BlurVertical(this, blurAmount, blurSampleCount, mBuffers->AmbientTexture0, mBuffers->AmbientFB1, mBuffers->AmbientWidth, mBuffers->AmbientHeight); + + // Add SSAO back to scene texture: + mBuffers->BindCurrentFB(); + glViewport(mSceneViewport.left, mSceneViewport.top, mSceneViewport.width, mSceneViewport.height); + glEnable(GL_BLEND); + glBlendEquation(GL_FUNC_ADD); + if (gl_ssao_debug) + glBlendFunc(GL_ONE, GL_ZERO); + else + glBlendFunc(GL_ZERO, GL_SRC_COLOR); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, mBuffers->AmbientTexture1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + mBloomCombineShader->Bind(); + mBloomCombineShader->BloomTexture.Set(0); + RenderScreenQuad(); + glViewport(mScreenViewport.left, mScreenViewport.top, mScreenViewport.width, mScreenViewport.height); + + FGLDebug::PopGroup(); +} + //----------------------------------------------------------------------------- // // Adds bloom contribution to scene texture @@ -126,7 +230,7 @@ void FGLRenderer::RenderScreenQuad() void FGLRenderer::BloomScene() { // Only bloom things if enabled and no special fixed light mode is active - if (!gl_bloom || !FGLRenderBuffers::IsEnabled() || gl_fixedcolormap != CM_DEFAULT) + if (!gl_bloom || !FGLRenderBuffers::IsEnabled() || gl_fixedcolormap != CM_DEFAULT || gl_ssao_debug) return; FGLDebug::PushGroup("BloomScene"); diff --git a/src/gl/renderer/gl_renderbuffers.cpp b/src/gl/renderer/gl_renderbuffers.cpp index fd98522d87..c1976565b8 100644 --- a/src/gl/renderer/gl_renderbuffers.cpp +++ b/src/gl/renderer/gl_renderbuffers.cpp @@ -86,15 +86,16 @@ FGLRenderBuffers::~FGLRenderBuffers() ClearScene(); ClearPipeline(); ClearBloom(); + ClearAmbientOcclusion(); } void FGLRenderBuffers::ClearScene() { DeleteFrameBuffer(mSceneFB); - DeleteRenderBuffer(mSceneMultisample); - DeleteRenderBuffer(mSceneDepthStencil); - DeleteRenderBuffer(mSceneDepth); - DeleteRenderBuffer(mSceneStencil); + DeleteRenderBuffer(mSceneMSColor); + DeleteRenderBuffer(mSceneMSDepthStencil); + DeleteRenderBuffer(mSceneMSDepth); + DeleteRenderBuffer(mSceneMSStencil); } void FGLRenderBuffers::ClearPipeline() @@ -104,6 +105,9 @@ void FGLRenderBuffers::ClearPipeline() DeleteFrameBuffer(mPipelineFB[i]); DeleteTexture(mPipelineTexture[i]); } + DeleteTexture(mPipelineDepthStencil); + DeleteTexture(mPipelineDepth); + DeleteTexture(mPipelineStencil); } void FGLRenderBuffers::ClearBloom() @@ -119,6 +123,14 @@ void FGLRenderBuffers::ClearBloom() } } +void FGLRenderBuffers::ClearAmbientOcclusion() +{ + DeleteFrameBuffer(AmbientFB0); + DeleteFrameBuffer(AmbientFB1); + DeleteTexture(AmbientTexture0); + DeleteTexture(AmbientTexture1); +} + void FGLRenderBuffers::DeleteTexture(GLuint &handle) { if (handle != 0) @@ -178,11 +190,12 @@ bool FGLRenderBuffers::Setup(int width, int height, int sceneWidth, int sceneHei } // Bloom bluring buffers need to match the scene to avoid bloom bleeding artifacts - if (mBloomWidth != sceneWidth || mBloomHeight != sceneHeight) + if (mSceneWidth != sceneWidth || mSceneHeight != sceneHeight) { CreateBloom(sceneWidth, sceneHeight); - mBloomWidth = sceneWidth; - mBloomHeight = sceneHeight; + CreateAmbientOcclusion(sceneWidth, sceneHeight); + mSceneWidth = sceneWidth; + mSceneHeight = sceneHeight; } glBindTexture(GL_TEXTURE_2D, textureBinding); @@ -198,8 +211,8 @@ bool FGLRenderBuffers::Setup(int width, int height, int sceneWidth, int sceneHei mWidth = 0; mHeight = 0; mSamples = 0; - mBloomWidth = 0; - mBloomHeight = 0; + mSceneWidth = 0; + mSceneHeight = 0; } return !FailedCreate; @@ -216,18 +229,27 @@ void FGLRenderBuffers::CreateScene(int width, int height, int samples) ClearScene(); if (samples > 1) - mSceneMultisample = CreateRenderBuffer("SceneMultisample", GetHdrFormat(), samples, width, height); - - if ((gl.flags & RFL_NO_DEPTHSTENCIL) != 0) { - mSceneDepth = CreateRenderBuffer("SceneDepth", GL_DEPTH_COMPONENT24, samples, width, height); - mSceneStencil = CreateRenderBuffer("SceneStencil", GL_STENCIL_INDEX8, samples, width, height); - mSceneFB = CreateFrameBuffer("SceneFB", samples > 1 ? mSceneMultisample : mPipelineTexture[0], mSceneDepth, mSceneStencil, samples > 1); + mSceneMSColor = CreateRenderBuffer("SceneMSColor", GetHdrFormat(), samples, width, height); + + if ((gl.flags & RFL_NO_DEPTHSTENCIL) != 0) + { + mSceneMSDepth = CreateRenderBuffer("SceneMSDepth", GL_DEPTH_COMPONENT24, samples, width, height); + mSceneMSStencil = CreateRenderBuffer("SceneMSStencil", GL_STENCIL_INDEX8, samples, width, height); + mSceneFB = CreateFrameBuffer("SceneFB", mSceneMSColor, mSceneMSDepth, mSceneMSStencil, true); + } + else + { + mSceneMSDepthStencil = CreateRenderBuffer("SceneMSDepthStencil", GL_DEPTH24_STENCIL8, samples, width, height); + mSceneFB = CreateFrameBuffer("SceneFB", mSceneMSColor, mSceneMSDepthStencil, true); + } } else { - mSceneDepthStencil = CreateRenderBuffer("SceneDepthStencil", GL_DEPTH24_STENCIL8, samples, width, height); - mSceneFB = CreateFrameBuffer("SceneFB", samples > 1 ? mSceneMultisample : mPipelineTexture[0], mSceneDepthStencil, samples > 1); + if ((gl.flags & RFL_NO_DEPTHSTENCIL) != 0) + mSceneFB = CreateFrameBuffer("SceneFB", mPipelineTexture[0], mPipelineDepth, mPipelineStencil, false); + else + mSceneFB = CreateFrameBuffer("SceneFB", mPipelineTexture[0], mPipelineDepthStencil, false); } } @@ -241,10 +263,23 @@ void FGLRenderBuffers::CreatePipeline(int width, int height) { ClearPipeline(); + if ((gl.flags & RFL_NO_DEPTHSTENCIL) != 0) + { + mPipelineDepth = Create2DTexture("PipelineDepth", GL_DEPTH_COMPONENT24, width, height); + mPipelineStencil = Create2DTexture("PipelineStencil", GL_STENCIL_INDEX8, width, height); + } + else + { + mPipelineDepthStencil = Create2DTexture("PipelineDepthStencil", GL_DEPTH24_STENCIL8, width, height); + } + for (int i = 0; i < NumPipelineTextures; i++) { mPipelineTexture[i] = Create2DTexture("PipelineTexture", GetHdrFormat(), width, height); - mPipelineFB[i] = CreateFrameBuffer("PipelineFB", mPipelineTexture[i]); + if ((gl.flags & RFL_NO_DEPTHSTENCIL) != 0) + mPipelineFB[i] = CreateFrameBuffer("PipelineFB", mPipelineTexture[i], mPipelineDepth, mPipelineStencil, false); + else + mPipelineFB[i] = CreateFrameBuffer("PipelineFB", mPipelineTexture[i], mPipelineDepthStencil, false); } } @@ -280,6 +315,27 @@ void FGLRenderBuffers::CreateBloom(int width, int height) } } +//========================================================================== +// +// Creates ambient occlusion working buffers +// +//========================================================================== + +void FGLRenderBuffers::CreateAmbientOcclusion(int width, int height) +{ + ClearAmbientOcclusion(); + + if (width <= 0 || height <= 0) + return; + + AmbientWidth = width / 2; + AmbientHeight = height / 2; + AmbientTexture0 = Create2DTexture("AmbientTexture0", GetHdrFormat(), AmbientWidth, AmbientHeight); + AmbientTexture1 = Create2DTexture("AmbientTexture1", GetHdrFormat(), AmbientWidth, AmbientHeight); + AmbientFB0 = CreateFrameBuffer("AmbientFB0", AmbientTexture0); + AmbientFB1 = CreateFrameBuffer("AmbientFB1", AmbientTexture1); +} + //========================================================================== // // Fallback support for older OpenGL where RGBA16F might not be available @@ -299,12 +355,24 @@ GLuint FGLRenderBuffers::GetHdrFormat() GLuint FGLRenderBuffers::Create2DTexture(const FString &name, GLuint format, int width, int height) { - GLuint type = (format == GL_RGBA16F) ? GL_FLOAT : GL_UNSIGNED_BYTE; GLuint handle = 0; glGenTextures(1, &handle); glBindTexture(GL_TEXTURE_2D, handle); FGLDebug::LabelObject(GL_TEXTURE, handle, name); - glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, GL_RGBA, type, nullptr); + + GLenum dataformat, datatype; + switch (format) // Special thanks to the designers of OpenGL.. + { + case GL_RGBA8: dataformat = GL_RGBA; datatype = GL_UNSIGNED_BYTE; break; + case GL_RGBA16: dataformat = GL_RGBA; datatype = GL_UNSIGNED_SHORT; break; + case GL_RGBA16F: dataformat = GL_RGBA; datatype = GL_FLOAT; break; + case GL_DEPTH_COMPONENT24: dataformat = GL_DEPTH_COMPONENT; datatype = GL_FLOAT; break; + case GL_STENCIL_INDEX8: dataformat = GL_STENCIL_INDEX; datatype = GL_INT; break; + case GL_DEPTH24_STENCIL8: dataformat = GL_DEPTH_STENCIL; datatype = GL_UNSIGNED_INT_24_8; break; + default: I_FatalError("Unknown format passed to FGLRenderBuffers.Create2DTexture"); + } + + glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, dataformat, datatype, nullptr); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); @@ -359,34 +427,45 @@ GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuff return handle; } -GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depthstencil, bool colorIsARenderBuffer) +GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depthstencil, bool fromRenderBuffers) { GLuint handle = 0; glGenFramebuffers(1, &handle); glBindFramebuffer(GL_FRAMEBUFFER, handle); FGLDebug::LabelObject(GL_FRAMEBUFFER, handle, name); - if (colorIsARenderBuffer) + if (fromRenderBuffers) + { glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, colorbuffer); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_RENDERBUFFER, depthstencil); + } else + { glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorbuffer, 0); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_RENDERBUFFER, depthstencil); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthstencil, 0); + } if (CheckFrameBufferCompleteness()) ClearFrameBuffer(true, true); return handle; } -GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depth, GLuint stencil, bool colorIsARenderBuffer) +GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depth, GLuint stencil, bool fromRenderBuffers) { GLuint handle = 0; glGenFramebuffers(1, &handle); glBindFramebuffer(GL_FRAMEBUFFER, handle); FGLDebug::LabelObject(GL_FRAMEBUFFER, handle, name); - if (colorIsARenderBuffer) + if (fromRenderBuffers) + { glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, colorbuffer); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depth); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, stencil); + } else + { glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorbuffer, 0); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depth); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, stencil); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencil, 0); + } if (CheckFrameBufferCompleteness()) ClearFrameBuffer(true, true); return handle; @@ -441,7 +520,7 @@ void FGLRenderBuffers::ClearFrameBuffer(bool stencil, bool depth) glGetIntegerv(GL_STENCIL_CLEAR_VALUE, &stencilValue); glGetDoublev(GL_DEPTH_CLEAR_VALUE, &depthValue); glDisable(GL_SCISSOR_TEST); - glClearColor(0.0, 0.0, 0.0, 0.0); + glClearColor(1.0, 0.0, 0.0, 0.0); glClearDepth(0.0); glClearStencil(0); GLenum flags = GL_COLOR_BUFFER_BIT; @@ -471,7 +550,7 @@ void FGLRenderBuffers::BlitSceneToTexture() glBindFramebuffer(GL_READ_FRAMEBUFFER, mSceneFB); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, mPipelineFB[mCurrentPipelineTexture]); - glBlitFramebuffer(0, 0, mWidth, mHeight, 0, 0, mWidth, mHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); + glBlitFramebuffer(0, 0, mWidth, mHeight, 0, 0, mWidth, mHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, GL_NEAREST); if ((gl.flags & RFL_INVALIDATE_BUFFER) != 0) { @@ -494,6 +573,21 @@ void FGLRenderBuffers::BindSceneFB() glBindFramebuffer(GL_FRAMEBUFFER, mSceneFB); } +//========================================================================== +// +// Binds the depth texture to the specified texture unit +// +//========================================================================== + +void FGLRenderBuffers::BindSceneDepthTexture(int index) +{ + glActiveTexture(GL_TEXTURE0 + index); + if ((gl.flags & RFL_NO_DEPTHSTENCIL) != 0) + glBindTexture(GL_TEXTURE_2D, mPipelineDepth); + else + glBindTexture(GL_TEXTURE_2D, mPipelineDepthStencil); +} + //========================================================================== // // Binds the current scene/effect/hud texture to the specified texture unit diff --git a/src/gl/renderer/gl_renderbuffers.h b/src/gl/renderer/gl_renderbuffers.h index ee6d8de5ed..898193e59c 100644 --- a/src/gl/renderer/gl_renderbuffers.h +++ b/src/gl/renderer/gl_renderbuffers.h @@ -23,6 +23,7 @@ public: bool Setup(int width, int height, int sceneWidth, int sceneHeight); void BindSceneFB(); + void BindSceneDepthTexture(int index); void BlitSceneToTexture(); void BindCurrentTexture(int index); @@ -35,6 +36,14 @@ public: enum { NumBloomLevels = 4 }; FGLBloomTextureLevel BloomLevels[NumBloomLevels]; + // Ambient occlusion buffers + GLuint AmbientTexture0 = 0; + GLuint AmbientTexture1 = 0; + GLuint AmbientFB0 = 0; + GLuint AmbientFB1 = 0; + int AmbientWidth = 0; + int AmbientHeight = 0; + static bool IsEnabled(); int GetWidth() const { return mWidth; } @@ -44,15 +53,17 @@ private: void ClearScene(); void ClearPipeline(); void ClearBloom(); + void ClearAmbientOcclusion(); void CreateScene(int width, int height, int samples); void CreatePipeline(int width, int height); void CreateBloom(int width, int height); + void CreateAmbientOcclusion(int width, int height); GLuint Create2DTexture(const FString &name, GLuint format, int width, int height); GLuint CreateRenderBuffer(const FString &name, GLuint format, int width, int height); GLuint CreateRenderBuffer(const FString &name, GLuint format, int samples, int width, int height); GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer); - GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depthstencil, bool colorIsARenderBuffer); - GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depth, GLuint stencil, bool colorIsARenderBuffer); + GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depthstencil, bool fromRenderBuffers); + GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depth, GLuint stencil, bool fromRenderBuffers); bool CheckFrameBufferCompleteness(); void ClearFrameBuffer(bool stencil, bool depth); void DeleteTexture(GLuint &handle); @@ -65,22 +76,25 @@ private: int mHeight = 0; int mSamples = 0; int mMaxSamples = 0; - int mBloomWidth = 0; - int mBloomHeight = 0; + int mSceneWidth = 0; + int mSceneHeight = 0; static const int NumPipelineTextures = 2; int mCurrentPipelineTexture = 0; // Buffers for the scene - GLuint mSceneMultisample = 0; - GLuint mSceneDepthStencil = 0; - GLuint mSceneDepth = 0; - GLuint mSceneStencil = 0; + GLuint mSceneMSColor = 0; + GLuint mSceneMSDepthStencil = 0; + GLuint mSceneMSDepth = 0; + GLuint mSceneMSStencil = 0; GLuint mSceneFB = 0; // Effect/HUD buffers GLuint mPipelineTexture[NumPipelineTextures]; GLuint mPipelineFB[NumPipelineTextures]; + GLuint mPipelineDepthStencil = 0; + GLuint mPipelineDepth = 0; + GLuint mPipelineStencil = 0; // Back buffer frame buffer GLuint mOutputFB = 0; diff --git a/src/gl/renderer/gl_renderer.cpp b/src/gl/renderer/gl_renderer.cpp index c8f2eb2249..9e64c13b0e 100644 --- a/src/gl/renderer/gl_renderer.cpp +++ b/src/gl/renderer/gl_renderer.cpp @@ -64,6 +64,7 @@ #include "gl/data/gl_vertexbuffer.h" #include "gl/scene/gl_drawinfo.h" #include "gl/shaders/gl_shader.h" +#include "gl/shaders/gl_ambientshader.h" #include "gl/shaders/gl_bloomshader.h" #include "gl/shaders/gl_blurshader.h" #include "gl/shaders/gl_tonemapshader.h" @@ -120,6 +121,8 @@ void gl_FlushModels(); void FGLRenderer::Initialize(int width, int height) { mBuffers = new FGLRenderBuffers(); + mLinearDepthShader = new FLinearDepthShader(); + mSSAOShader = new FSSAOShader(); mBloomExtractShader = new FBloomExtractShader(); mBloomCombineShader = new FBloomCombineShader(); mBlurShader = new FBlurShader(); @@ -179,6 +182,8 @@ FGLRenderer::~FGLRenderer() } if (mBuffers) delete mBuffers; if (mPresentShader) delete mPresentShader; + if (mLinearDepthShader) delete mLinearDepthShader; + if (mSSAOShader) delete mSSAOShader; if (mBloomExtractShader) delete mBloomExtractShader; if (mBloomCombineShader) delete mBloomCombineShader; if (mBlurShader) delete mBlurShader; diff --git a/src/gl/renderer/gl_renderer.h b/src/gl/renderer/gl_renderer.h index 4b663680f6..b07870738e 100644 --- a/src/gl/renderer/gl_renderer.h +++ b/src/gl/renderer/gl_renderer.h @@ -19,6 +19,8 @@ class FLightBuffer; class FSamplerManager; class DPSprite; class FGLRenderBuffers; +class FLinearDepthShader; +class FSSAOShader; class FBloomExtractShader; class FBloomCombineShader; class FBlurShader; @@ -89,6 +91,8 @@ public: int mOldFBID; FGLRenderBuffers *mBuffers; + FLinearDepthShader *mLinearDepthShader; + FSSAOShader *mSSAOShader; FBloomExtractShader *mBloomExtractShader; FBloomCombineShader *mBloomCombineShader; FBlurShader *mBlurShader; @@ -164,6 +168,8 @@ public: void SetFixedColormap (player_t *player); void WriteSavePic (player_t *player, FILE *file, int width, int height); void EndDrawScene(sector_t * viewsector); + void PostProcessScene(); + void AmbientOccludeScene(); void BloomScene(); void TonemapScene(); void BindTonemapPalette(int texunit); @@ -186,6 +192,9 @@ public: void FillSimplePoly(FTexture *texture, FVector2 *points, int npoints, double originx, double originy, double scalex, double scaley, DAngle rotation, FDynamicColormap *colormap, int lightlevel); + + static float GetZNear() { return 5.f; } + static float GetZFar() { return 65536.f; } }; // Global functions. Make them members of GLRenderer later? diff --git a/src/gl/scene/gl_scene.cpp b/src/gl/scene/gl_scene.cpp index 08591283c0..7537010b30 100644 --- a/src/gl/scene/gl_scene.cpp +++ b/src/gl/scene/gl_scene.cpp @@ -221,7 +221,7 @@ void FGLRenderer::SetProjection(float fov, float ratio, float fovratio) { float fovy = 2 * RAD2DEG(atan(tan(DEG2RAD(fov) / 2) / fovratio)); - gl_RenderState.mProjectionMatrix.perspective(fovy, ratio, 5.f, 65536.f); + gl_RenderState.mProjectionMatrix.perspective(fovy, ratio, GetZNear(), GetZFar()); } // raw matrix input from stereo 3d modes @@ -855,13 +855,7 @@ sector_t * FGLRenderer::RenderViewpoint (AActor * camera, GL_IRECT * bounds, flo ProcessScene(toscreen); if (mainview && toscreen) EndDrawScene(retval); // do not call this for camera textures. - if (mainview) - { - if (FGLRenderBuffers::IsEnabled()) mBuffers->BlitSceneToTexture(); - BloomScene(); - TonemapScene(); - LensDistortScene(); - } + if (mainview) PostProcessScene(); mDrawingScene2D = false; eye->TearDown(); } diff --git a/src/gl/shaders/gl_ambientshader.cpp b/src/gl/shaders/gl_ambientshader.cpp new file mode 100644 index 0000000000..d9d4a34257 --- /dev/null +++ b/src/gl/shaders/gl_ambientshader.cpp @@ -0,0 +1,89 @@ +/* +** gl_bloomshader.cpp +** Shaders used for screen space ambient occlusion +** +**--------------------------------------------------------------------------- +** Copyright 2016 Magnus Norddahl +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** 4. When not used as part of GZDoom or a GZDoom derivative, this code will be +** covered by the terms of the GNU Lesser General Public License as published +** by the Free Software Foundation; either version 2.1 of the License, or (at +** your option) any later version. +** 5. Full disclosure of the entire project's source code, except for third +** party libraries is mandatory. (NOTE: This clause is non-negotiable!) +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +*/ + +#include "gl/system/gl_system.h" +#include "files.h" +#include "m_swap.h" +#include "v_video.h" +#include "gl/gl_functions.h" +#include "vectors.h" +#include "gl/system/gl_interface.h" +#include "gl/system/gl_framebuffer.h" +#include "gl/system/gl_cvars.h" +#include "gl/shaders/gl_ambientshader.h" + +void FLinearDepthShader::Bind() +{ + if (!mShader) + { + mShader.Compile(FShaderProgram::Vertex, "shaders/glsl/screenquad.vp", "", 330); + mShader.Compile(FShaderProgram::Fragment, "shaders/glsl/lineardepth.fp", "", 330); + mShader.SetFragDataLocation(0, "FragColor"); + mShader.Link("shaders/glsl/lineardepth"); + mShader.SetAttribLocation(0, "PositionInProjection"); + DepthTexture.Init(mShader, "DepthTexture"); + LinearizeDepthA.Init(mShader, "LinearizeDepthA"); + LinearizeDepthB.Init(mShader, "LinearizeDepthB"); + InverseDepthRangeA.Init(mShader, "InverseDepthRangeA"); + InverseDepthRangeB.Init(mShader, "InverseDepthRangeB"); + } + mShader.Bind(); +} + +void FSSAOShader::Bind() +{ + if (!mShader) + { + mShader.Compile(FShaderProgram::Vertex, "shaders/glsl/screenquad.vp", "", 330); + mShader.Compile(FShaderProgram::Fragment, "shaders/glsl/ssao.fp", "", 330); + mShader.SetFragDataLocation(0, "FragColor"); + mShader.Link("shaders/glsl/ssao"); + mShader.SetAttribLocation(0, "PositionInProjection"); + DepthTexture.Init(mShader, "DepthTexture"); + UVToViewA.Init(mShader, "UVToViewA"); + UVToViewB.Init(mShader, "UVToViewB"); + InvFullResolution.Init(mShader, "InvFullResolution"); + NDotVBias.Init(mShader, "NDotVBias"); + NegInvR2.Init(mShader, "NegInvR2"); + RadiusToScreen.Init(mShader, "RadiusToScreen"); + AOMultiplier.Init(mShader, "AOMultiplier"); + } + mShader.Bind(); +} diff --git a/src/gl/shaders/gl_ambientshader.h b/src/gl/shaders/gl_ambientshader.h new file mode 100644 index 0000000000..44f9165964 --- /dev/null +++ b/src/gl/shaders/gl_ambientshader.h @@ -0,0 +1,39 @@ +#ifndef __GL_AMBIENTSHADER_H +#define __GL_AMBIENTSHADER_H + +#include "gl_shaderprogram.h" + +class FLinearDepthShader +{ +public: + void Bind(); + + FBufferedUniformSampler DepthTexture; + FBufferedUniform1f LinearizeDepthA; + FBufferedUniform1f LinearizeDepthB; + FBufferedUniform1f InverseDepthRangeA; + FBufferedUniform1f InverseDepthRangeB; + +private: + FShaderProgram mShader; +}; + +class FSSAOShader +{ +public: + void Bind(); + + FBufferedUniformSampler DepthTexture; + FBufferedUniform2f UVToViewA; + FBufferedUniform2f UVToViewB; + FBufferedUniform2f InvFullResolution; + FBufferedUniform1f NDotVBias; + FBufferedUniform1f NegInvR2; + FBufferedUniform1f RadiusToScreen; + FBufferedUniform1f AOMultiplier; + +private: + FShaderProgram mShader; +}; + +#endif \ No newline at end of file diff --git a/src/gl/stereo3d/gl_stereo3d.cpp b/src/gl/stereo3d/gl_stereo3d.cpp index d686a4a43e..fa2a20ad4b 100644 --- a/src/gl/stereo3d/gl_stereo3d.cpp +++ b/src/gl/stereo3d/gl_stereo3d.cpp @@ -49,7 +49,7 @@ VSMatrix EyePose::GetProjection(float fov, float aspectRatio, float fovRatio) co // Lifted from gl_scene.cpp FGLRenderer::SetProjection() float fovy = (float)(2 * RAD2DEG(atan(tan(DEG2RAD(fov) / 2) / fovRatio))); - result.perspective(fovy, aspectRatio, 5.f, 65536.f); + result.perspective(fovy, aspectRatio, FGLRenderer::GetZNear(), FGLRenderer::GetZFar()); return result; } diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 010c70565f..6c455b94c4 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -2634,6 +2634,7 @@ GLPREFMNU_MULTISAMPLE = "Multisample"; GLPREFMNU_TONEMAP = "Tonemap Mode"; GLPREFMNU_BLOOM = "Bloom effect"; GLPREFMNU_LENS = "Lens distortion effect"; +GLPREFMNU_SSAO = "Ambient occlusion"; // Option Values OPTVAL_SMART = "Smart"; diff --git a/wadsrc/static/menudef.z b/wadsrc/static/menudef.z index 0d080d7096..aae2ae76d4 100644 --- a/wadsrc/static/menudef.z +++ b/wadsrc/static/menudef.z @@ -230,4 +230,5 @@ OptionMenu "GLPrefOptions" Option "$GLPREFMNU_TONEMAP", gl_tonemap, "TonemapModes" Option "$GLPREFMNU_BLOOM", gl_bloom, "OnOff" Option "$GLPREFMNU_LENS", gl_lens, "OnOff" + Option "$GLPREFMNU_SSAO", gl_ssao, "OnOff" } diff --git a/wadsrc/static/shaders/glsl/lineardepth.fp b/wadsrc/static/shaders/glsl/lineardepth.fp new file mode 100644 index 0000000000..f61bb39955 --- /dev/null +++ b/wadsrc/static/shaders/glsl/lineardepth.fp @@ -0,0 +1,16 @@ + +in vec2 TexCoord; +out vec4 FragColor; + +uniform sampler2D DepthTexture; +uniform float LinearizeDepthA; +uniform float LinearizeDepthB; +uniform float InverseDepthRangeA; +uniform float InverseDepthRangeB; + +void main() +{ + float depth = texture(DepthTexture, TexCoord).x; + float normalizedDepth = clamp(InverseDepthRangeA * depth + InverseDepthRangeB, 0.0, 1.0); + FragColor = vec4(1.0 / (normalizedDepth * LinearizeDepthA + LinearizeDepthB), 0.0, 0.0, 1.0); +} diff --git a/wadsrc/static/shaders/glsl/ssao.fp b/wadsrc/static/shaders/glsl/ssao.fp new file mode 100644 index 0000000000..c9f8534d9f --- /dev/null +++ b/wadsrc/static/shaders/glsl/ssao.fp @@ -0,0 +1,117 @@ + +in vec2 TexCoord; +out vec4 FragColor; + +uniform vec2 UVToViewA; +uniform vec2 UVToViewB; +uniform vec2 InvFullResolution; + +uniform float NDotVBias; +uniform float NegInvR2; +uniform float RadiusToScreen; +uniform float AOMultiplier; + +uniform sampler2D DepthTexture; + +#if USE_RANDOM_TEXTURE +uniform sampler2D RandomTexture; +#endif + +#define NUM_DIRECTIONS 8.0 +#define NUM_STEPS 4.0 + +#define PI 3.14159265358979323846 + +// Calculate eye space position for the specified texture coordinate +vec3 FetchViewPos(vec2 uv) +{ + float z = texture(DepthTexture, uv).x; + return vec3((UVToViewA * uv + UVToViewB) * z, z); +} + +vec3 MinDiff(vec3 p, vec3 pr, vec3 pl) +{ + vec3 v1 = pr - p; + vec3 v2 = p - pl; + return (dot(v1, v1) < dot(v2, v2)) ? v1 : v2; +} + +// Reconstruct eye space normal from nearest neighbors +vec3 ReconstructNormal(vec3 p) +{ + vec3 pr = FetchViewPos(TexCoord + vec2(InvFullResolution.x, 0)); + vec3 pl = FetchViewPos(TexCoord + vec2(-InvFullResolution.x, 0)); + vec3 pt = FetchViewPos(TexCoord + vec2(0, InvFullResolution.y)); + vec3 pb = FetchViewPos(TexCoord + vec2(0, -InvFullResolution.y)); + return normalize(cross(MinDiff(p, pr, pl), MinDiff(p, pt, pb))); +} + +// Compute normalized 2D direction +vec2 RotateDirection(vec2 dir, vec2 cossin) +{ + return vec2(dir.x * cossin.x - dir.y * cossin.y, dir.x * cossin.y + dir.y * cossin.x); +} + +vec4 GetJitter() +{ +#if !USE_RANDOM_TEXTURE + return vec4(1,0,1,1); + //vec3 rand = noise3(TexCoord.x + TexCoord.y); + //float angle = 2.0 * PI * rand.x / NUM_DIRECTIONS; + //return vec4(cos(angle), sin(angle), rand.y, rand.z); +#else + #define RANDOM_TEXTURE_WIDTH 4.0 + return texture(RandomTexture, gl_FragCoord.xy / RANDOM_TEXTURE_WIDTH); +#endif +} + +// Calculates the ambient occlusion of a sample +float ComputeSampleAO(vec3 kernelPos, vec3 normal, vec3 samplePos) +{ + vec3 v = samplePos - kernelPos; + float distanceSquare = dot(v, v); + float nDotV = dot(normal, v) * inversesqrt(distanceSquare); + return clamp(nDotV - NDotVBias, 0.0, 1.0) * clamp(distanceSquare * NegInvR2 + 1.0, 0.0, 1.0); +} + +// Calculates the total ambient occlusion for the entire fragment +float ComputeAO(vec3 viewPosition, vec3 viewNormal) +{ + vec4 rand = GetJitter(); + + float radiusPixels = RadiusToScreen / viewPosition.z; + float stepSizePixels = radiusPixels / (NUM_STEPS + 1.0); + + const float directionAngleStep = 2.0 * PI / NUM_DIRECTIONS; + float ao = 0.0; + + for (float directionIndex = 0.0; directionIndex < NUM_DIRECTIONS; ++directionIndex) + { + float angle = directionAngleStep * directionIndex; + + vec2 direction = RotateDirection(vec2(cos(angle), sin(angle)), rand.xy); + float rayPixels = (rand.z * stepSizePixels + 1.0); + + for (float StepIndex = 0.0; StepIndex < NUM_STEPS; ++StepIndex) + { + vec2 sampleUV = round(rayPixels * direction) * InvFullResolution + TexCoord; + vec3 samplePos = FetchViewPos(sampleUV); + ao += ComputeSampleAO(viewPosition, viewNormal, samplePos); + rayPixels += stepSizePixels; + } + } + + ao *= AOMultiplier / (NUM_DIRECTIONS * NUM_STEPS); + return clamp(1.0 - ao * 2.0, 0.0, 1.0); +} + +void main() +{ + vec3 viewPosition = FetchViewPos(TexCoord); + vec3 viewNormal = ReconstructNormal(viewPosition); + float occlusion = ComputeAO(viewPosition, viewNormal); + //FragColor = vec4(viewPosition.x * 0.001 + 0.5, viewPosition.y * 0.001 + 0.5, viewPosition.z * 0.001, 1.0); + //FragColor = vec4(viewNormal.x * 0.5 + 0.5, viewNormal.y * 0.5 + 0.5, viewNormal.z * 0.5 + 0.5, 1.0); + //FragColor = vec4(occlusion, viewPosition.z, 0.0, 1.0); + FragColor = vec4(occlusion, occlusion, occlusion, 1.0); +} From 240ebf94a1d3c8463a4a4e8eb36ae3b2d95160d5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 29 Aug 2016 22:42:46 +0200 Subject: [PATCH 097/912] Remove radius increase in point light modes --- src/gl/dynlights/gl_dynlight1.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/gl/dynlights/gl_dynlight1.cpp b/src/gl/dynlights/gl_dynlight1.cpp index 56db94e4d7..858bcfd094 100644 --- a/src/gl/dynlights/gl_dynlight1.cpp +++ b/src/gl/dynlights/gl_dynlight1.cpp @@ -139,9 +139,6 @@ bool gl_GetLight(int group, Plane & p, ADynamicLight * light, bool checkside, bo if (gl_light_math != 0) { - // Adjust light slightly to make the range better match plain attenuation - radius *= 1.5; - // Move light up because flasks/vials have their light source location at/below the floor. // // If the point is exactly on the wall plane it might cause some acne as some pixels could From 55ea4a7729b012fb1cda06e274d5dd4d991e6b8b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 30 Aug 2016 01:09:21 +0200 Subject: [PATCH 098/912] Add ssao random texture --- src/gl/renderer/gl_postprocess.cpp | 10 +++++++++ src/gl/renderer/gl_postprocessstate.cpp | 26 ++++++++++++++++++++++-- src/gl/renderer/gl_postprocessstate.h | 5 ++++- src/gl/renderer/gl_renderbuffers.cpp | 27 +++++++++++++++++++++++-- src/gl/renderer/gl_renderbuffers.h | 3 ++- src/gl/shaders/gl_ambientshader.cpp | 11 +++++++++- src/gl/shaders/gl_ambientshader.h | 2 ++ wadsrc/static/shaders/glsl/ssao.fp | 16 ++++++++------- 8 files changed, 86 insertions(+), 14 deletions(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index c7d0494c80..bd35fedd3d 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -108,6 +108,7 @@ CVAR(Float, gl_lens_kcube, 0.1f, 0) CVAR(Float, gl_lens_chromatic, 1.12f, 0) CVAR(Bool, gl_ssao, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) +CVAR(Float, gl_ssao_strength, 0.7, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) CVAR(Bool, gl_ssao_debug, false, 0) CVAR(Float, gl_ssao_bias, 0.5f, 0) CVAR(Float, gl_ssao_radius, 100.0f, 0) @@ -155,11 +156,13 @@ void FGLRenderer::AmbientOccludeScene() FGLDebug::PushGroup("AmbientOccludeScene"); FGLPostProcessState savedState; + savedState.SaveTextureBinding1(); float bias = gl_ssao_bias; float aoRadius = gl_ssao_radius; const float blurAmount = gl_ssao_blur_amount; int blurSampleCount = gl_ssao_blur_samples; + float aoStrength = gl_ssao_strength; //float tanHalfFovy = tan(fovy * (M_PI / 360.0f)); float tanHalfFovy = 1.0f / 1.33333302f; //gl_RenderState.mProjectionMatrix.get()[5]; @@ -185,8 +188,14 @@ void FGLRenderer::AmbientOccludeScene() glBindTexture(GL_TEXTURE_2D, mBuffers->AmbientTexture0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, mBuffers->AmbientRandomTexture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glActiveTexture(GL_TEXTURE0); mSSAOShader->Bind(); mSSAOShader->DepthTexture.Set(0); + mSSAOShader->RandomTexture.Set(1); mSSAOShader->UVToViewA.Set(2.0f * invFocalLenX, -2.0f * invFocalLenY); mSSAOShader->UVToViewB.Set(-invFocalLenX, invFocalLenY); mSSAOShader->InvFullResolution.Set(1.0f / mBuffers->AmbientWidth, 1.0f / mBuffers->AmbientHeight); @@ -194,6 +203,7 @@ void FGLRenderer::AmbientOccludeScene() mSSAOShader->NegInvR2.Set(-1.0f / r2); mSSAOShader->RadiusToScreen.Set(aoRadius * 0.5 / tanHalfFovy * mBuffers->AmbientHeight); mSSAOShader->AOMultiplier.Set(1.0f / (1.0f - nDotVBias)); + mSSAOShader->AOStrength.Set(aoStrength); RenderScreenQuad(); // Blur SSAO texture diff --git a/src/gl/renderer/gl_postprocessstate.cpp b/src/gl/renderer/gl_postprocessstate.cpp index 05cca7f312..06a50d89aa 100644 --- a/src/gl/renderer/gl_postprocessstate.cpp +++ b/src/gl/renderer/gl_postprocessstate.cpp @@ -58,7 +58,7 @@ FGLPostProcessState::FGLPostProcessState() { glGetIntegerv(GL_ACTIVE_TEXTURE, &activeTex); glActiveTexture(GL_TEXTURE0); - glGetIntegerv(GL_TEXTURE_BINDING_2D, &textureBinding); + glGetIntegerv(GL_TEXTURE_BINDING_2D, &textureBinding[0]); glBindTexture(GL_TEXTURE_2D, 0); if (gl.flags & RFL_SAMPLER_OBJECTS) { @@ -88,6 +88,15 @@ FGLPostProcessState::FGLPostProcessState() glDisable(GL_BLEND); } +void FGLPostProcessState::SaveTextureBinding1() +{ + glActiveTexture(GL_TEXTURE1); + glGetIntegerv(GL_TEXTURE_BINDING_2D, &textureBinding[1]); + glBindTexture(GL_TEXTURE_2D, 0); + textureBinding1Saved = true; + glActiveTexture(GL_TEXTURE0); +} + //----------------------------------------------------------------------------- // // Restores state at the end of post processing @@ -121,6 +130,12 @@ FGLPostProcessState::~FGLPostProcessState() glUseProgram(currentProgram); + if (textureBinding1Saved) + { + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); + } + glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, 0); if (gl.flags & RFL_SAMPLER_OBJECTS) @@ -128,6 +143,13 @@ FGLPostProcessState::~FGLPostProcessState() glBindSampler(0, samplerBinding[0]); glBindSampler(1, samplerBinding[1]); } - glBindTexture(GL_TEXTURE_2D, textureBinding); + glBindTexture(GL_TEXTURE_2D, textureBinding[0]); + + if (textureBinding1Saved) + { + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, textureBinding[1]); + } + glActiveTexture(activeTex); } diff --git a/src/gl/renderer/gl_postprocessstate.h b/src/gl/renderer/gl_postprocessstate.h index 4f2ca81a12..bf53aa7de9 100644 --- a/src/gl/renderer/gl_postprocessstate.h +++ b/src/gl/renderer/gl_postprocessstate.h @@ -14,12 +14,14 @@ public: FGLPostProcessState(); ~FGLPostProcessState(); + void SaveTextureBinding1(); + private: FGLPostProcessState(const FGLPostProcessState &) = delete; FGLPostProcessState &operator=(const FGLPostProcessState &) = delete; GLint activeTex; - GLint textureBinding; + GLint textureBinding[2]; GLint samplerBinding[2]; GLboolean blendEnabled; GLboolean scissorEnabled; @@ -32,6 +34,7 @@ private: GLint blendSrcAlpha; GLint blendDestRgb; GLint blendDestAlpha; + bool textureBinding1Saved = false; }; #endif diff --git a/src/gl/renderer/gl_renderbuffers.cpp b/src/gl/renderer/gl_renderbuffers.cpp index c1976565b8..e00d836bfe 100644 --- a/src/gl/renderer/gl_renderbuffers.cpp +++ b/src/gl/renderer/gl_renderbuffers.cpp @@ -53,6 +53,7 @@ #include "w_wad.h" #include "i_system.h" #include "doomerrors.h" +#include CVAR(Int, gl_multisample, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG); CVAR(Bool, gl_renderbuffers, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG); @@ -129,6 +130,7 @@ void FGLRenderBuffers::ClearAmbientOcclusion() DeleteFrameBuffer(AmbientFB1); DeleteTexture(AmbientTexture0); DeleteTexture(AmbientTexture1); + DeleteTexture(AmbientRandomTexture); } void FGLRenderBuffers::DeleteTexture(GLuint &handle) @@ -334,6 +336,26 @@ void FGLRenderBuffers::CreateAmbientOcclusion(int width, int height) AmbientTexture1 = Create2DTexture("AmbientTexture1", GetHdrFormat(), AmbientWidth, AmbientHeight); AmbientFB0 = CreateFrameBuffer("AmbientFB0", AmbientTexture0); AmbientFB1 = CreateFrameBuffer("AmbientFB1", AmbientTexture1); + + int16_t randomValues[16 * 4]; + std::mt19937 generator(1337); + std::uniform_real_distribution distribution(-1.0, 1.0); + for (int i = 0; i < 16; i++) + { + double num_directions = 8.0; // Must be same as the define in ssao.fp + double angle = 2.0 * M_PI * distribution(generator) / num_directions; + double x = cos(angle); + double y = sin(angle); + double z = distribution(generator); + double w = distribution(generator); + + randomValues[i * 4 + 0] = (int16_t)clamp(x * 32768.0, -32767.0, 32768.0); + randomValues[i * 4 + 1] = (int16_t)clamp(y * 32768.0, -32767.0, 32768.0); + randomValues[i * 4 + 2] = (int16_t)clamp(z * 32768.0, -32767.0, 32768.0); + randomValues[i * 4 + 3] = (int16_t)clamp(w * 32768.0, -32767.0, 32768.0); + } + + AmbientRandomTexture = Create2DTexture("AmbientRandomTexture", GL_RGBA16_SNORM, 4, 4, randomValues); } //========================================================================== @@ -353,7 +375,7 @@ GLuint FGLRenderBuffers::GetHdrFormat() // //========================================================================== -GLuint FGLRenderBuffers::Create2DTexture(const FString &name, GLuint format, int width, int height) +GLuint FGLRenderBuffers::Create2DTexture(const FString &name, GLuint format, int width, int height, const void *data) { GLuint handle = 0; glGenTextures(1, &handle); @@ -369,10 +391,11 @@ GLuint FGLRenderBuffers::Create2DTexture(const FString &name, GLuint format, int case GL_DEPTH_COMPONENT24: dataformat = GL_DEPTH_COMPONENT; datatype = GL_FLOAT; break; case GL_STENCIL_INDEX8: dataformat = GL_STENCIL_INDEX; datatype = GL_INT; break; case GL_DEPTH24_STENCIL8: dataformat = GL_DEPTH_STENCIL; datatype = GL_UNSIGNED_INT_24_8; break; + case GL_RGBA16_SNORM: dataformat = GL_RGBA; datatype = GL_SHORT; break; default: I_FatalError("Unknown format passed to FGLRenderBuffers.Create2DTexture"); } - glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, dataformat, datatype, nullptr); + glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, dataformat, datatype, data); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); diff --git a/src/gl/renderer/gl_renderbuffers.h b/src/gl/renderer/gl_renderbuffers.h index 898193e59c..814fd9de85 100644 --- a/src/gl/renderer/gl_renderbuffers.h +++ b/src/gl/renderer/gl_renderbuffers.h @@ -43,6 +43,7 @@ public: GLuint AmbientFB1 = 0; int AmbientWidth = 0; int AmbientHeight = 0; + GLuint AmbientRandomTexture = 0; static bool IsEnabled(); @@ -58,7 +59,7 @@ private: void CreatePipeline(int width, int height); void CreateBloom(int width, int height); void CreateAmbientOcclusion(int width, int height); - GLuint Create2DTexture(const FString &name, GLuint format, int width, int height); + GLuint Create2DTexture(const FString &name, GLuint format, int width, int height, const void *data = nullptr); GLuint CreateRenderBuffer(const FString &name, GLuint format, int width, int height); GLuint CreateRenderBuffer(const FString &name, GLuint format, int samples, int width, int height); GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer); diff --git a/src/gl/shaders/gl_ambientshader.cpp b/src/gl/shaders/gl_ambientshader.cpp index d9d4a34257..55682a91e0 100644 --- a/src/gl/shaders/gl_ambientshader.cpp +++ b/src/gl/shaders/gl_ambientshader.cpp @@ -71,12 +71,20 @@ void FSSAOShader::Bind() { if (!mShader) { + const char *defines = R"( + #define USE_RANDOM_TEXTURE + #define RANDOM_TEXTURE_WIDTH 4.0 + #define NUM_DIRECTIONS 8.0 + #define NUM_STEPS 4.0 + )"; + mShader.Compile(FShaderProgram::Vertex, "shaders/glsl/screenquad.vp", "", 330); - mShader.Compile(FShaderProgram::Fragment, "shaders/glsl/ssao.fp", "", 330); + mShader.Compile(FShaderProgram::Fragment, "shaders/glsl/ssao.fp", defines, 330); mShader.SetFragDataLocation(0, "FragColor"); mShader.Link("shaders/glsl/ssao"); mShader.SetAttribLocation(0, "PositionInProjection"); DepthTexture.Init(mShader, "DepthTexture"); + RandomTexture.Init(mShader, "RandomTexture"); UVToViewA.Init(mShader, "UVToViewA"); UVToViewB.Init(mShader, "UVToViewB"); InvFullResolution.Init(mShader, "InvFullResolution"); @@ -84,6 +92,7 @@ void FSSAOShader::Bind() NegInvR2.Init(mShader, "NegInvR2"); RadiusToScreen.Init(mShader, "RadiusToScreen"); AOMultiplier.Init(mShader, "AOMultiplier"); + AOStrength.Init(mShader, "AOStrength"); } mShader.Bind(); } diff --git a/src/gl/shaders/gl_ambientshader.h b/src/gl/shaders/gl_ambientshader.h index 44f9165964..fdff178ec2 100644 --- a/src/gl/shaders/gl_ambientshader.h +++ b/src/gl/shaders/gl_ambientshader.h @@ -24,6 +24,7 @@ public: void Bind(); FBufferedUniformSampler DepthTexture; + FBufferedUniformSampler RandomTexture; FBufferedUniform2f UVToViewA; FBufferedUniform2f UVToViewB; FBufferedUniform2f InvFullResolution; @@ -31,6 +32,7 @@ public: FBufferedUniform1f NegInvR2; FBufferedUniform1f RadiusToScreen; FBufferedUniform1f AOMultiplier; + FBufferedUniform1f AOStrength; private: FShaderProgram mShader; diff --git a/wadsrc/static/shaders/glsl/ssao.fp b/wadsrc/static/shaders/glsl/ssao.fp index c9f8534d9f..f143e913f7 100644 --- a/wadsrc/static/shaders/glsl/ssao.fp +++ b/wadsrc/static/shaders/glsl/ssao.fp @@ -11,15 +11,14 @@ uniform float NegInvR2; uniform float RadiusToScreen; uniform float AOMultiplier; +uniform float AOStrength; + uniform sampler2D DepthTexture; -#if USE_RANDOM_TEXTURE +#if defined(USE_RANDOM_TEXTURE) uniform sampler2D RandomTexture; #endif -#define NUM_DIRECTIONS 8.0 -#define NUM_STEPS 4.0 - #define PI 3.14159265358979323846 // Calculate eye space position for the specified texture coordinate @@ -54,13 +53,12 @@ vec2 RotateDirection(vec2 dir, vec2 cossin) vec4 GetJitter() { -#if !USE_RANDOM_TEXTURE +#if !defined(USE_RANDOM_TEXTURE) return vec4(1,0,1,1); //vec3 rand = noise3(TexCoord.x + TexCoord.y); //float angle = 2.0 * PI * rand.x / NUM_DIRECTIONS; //return vec4(cos(angle), sin(angle), rand.y, rand.z); #else - #define RANDOM_TEXTURE_WIDTH 4.0 return texture(RandomTexture, gl_FragCoord.xy / RANDOM_TEXTURE_WIDTH); #endif } @@ -109,7 +107,11 @@ void main() { vec3 viewPosition = FetchViewPos(TexCoord); vec3 viewNormal = ReconstructNormal(viewPosition); - float occlusion = ComputeAO(viewPosition, viewNormal); + float occlusion = ComputeAO(viewPosition, viewNormal) * AOStrength + (1.0 - AOStrength); + + // GZDoom does not use linear buffers at the moment, apply some gamma to get it closer to correct + occlusion = occlusion * occlusion; + //FragColor = vec4(viewPosition.x * 0.001 + 0.5, viewPosition.y * 0.001 + 0.5, viewPosition.z * 0.001, 1.0); //FragColor = vec4(viewNormal.x * 0.5 + 0.5, viewNormal.y * 0.5 + 0.5, viewNormal.z * 0.5 + 0.5, 1.0); //FragColor = vec4(occlusion, viewPosition.z, 0.0, 1.0); From 8a2737a0ce3b2cc40e03dc3037a315c8105b8a83 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 1 Sep 2016 07:15:40 +0200 Subject: [PATCH 099/912] Fix precision issue in SSAO shader --- src/gl/renderer/gl_postprocess.cpp | 8 ++++---- src/gl/renderer/gl_renderbuffers.cpp | 5 +++-- src/gl/renderer/gl_renderbuffers.h | 3 +++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index bd35fedd3d..62815f7186 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -112,11 +112,11 @@ CVAR(Float, gl_ssao_strength, 0.7, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) CVAR(Bool, gl_ssao_debug, false, 0) CVAR(Float, gl_ssao_bias, 0.5f, 0) CVAR(Float, gl_ssao_radius, 100.0f, 0) -CUSTOM_CVAR(Float, gl_ssao_blur_amount, 6.0f, 0) +CUSTOM_CVAR(Float, gl_ssao_blur_amount, 4.0f, 0) { if (self < 0.1f) self = 0.1f; } -CUSTOM_CVAR(Int, gl_ssao_blur_samples, 9, 0) +CUSTOM_CVAR(Int, gl_ssao_blur_samples, 5, 0) { if (self < 3 || self > 15 || self % 2 == 0) self = 9; @@ -165,8 +165,8 @@ void FGLRenderer::AmbientOccludeScene() float aoStrength = gl_ssao_strength; //float tanHalfFovy = tan(fovy * (M_PI / 360.0f)); - float tanHalfFovy = 1.0f / 1.33333302f; //gl_RenderState.mProjectionMatrix.get()[5]; - float invFocalLenX = tanHalfFovy * (mBuffers->AmbientWidth / (float)mBuffers->AmbientHeight); + float tanHalfFovy = 1.0f / 1.33333302f; // 1.0f / gl_RenderState.mProjectionMatrix.get()[5]; + float invFocalLenX = tanHalfFovy * (mBuffers->GetSceneWidth() / (float)mBuffers->GetSceneHeight()); float invFocalLenY = tanHalfFovy; float nDotVBias = clamp(bias, 0.0f, 1.0f); float r2 = aoRadius * aoRadius; diff --git a/src/gl/renderer/gl_renderbuffers.cpp b/src/gl/renderer/gl_renderbuffers.cpp index e00d836bfe..6da85bba7f 100644 --- a/src/gl/renderer/gl_renderbuffers.cpp +++ b/src/gl/renderer/gl_renderbuffers.cpp @@ -332,8 +332,8 @@ void FGLRenderBuffers::CreateAmbientOcclusion(int width, int height) AmbientWidth = width / 2; AmbientHeight = height / 2; - AmbientTexture0 = Create2DTexture("AmbientTexture0", GetHdrFormat(), AmbientWidth, AmbientHeight); - AmbientTexture1 = Create2DTexture("AmbientTexture1", GetHdrFormat(), AmbientWidth, AmbientHeight); + AmbientTexture0 = Create2DTexture("AmbientTexture0", GL_RGBA32F, AmbientWidth, AmbientHeight); + AmbientTexture1 = Create2DTexture("AmbientTexture1", GL_RGBA32F, AmbientWidth, AmbientHeight); AmbientFB0 = CreateFrameBuffer("AmbientFB0", AmbientTexture0); AmbientFB1 = CreateFrameBuffer("AmbientFB1", AmbientTexture1); @@ -388,6 +388,7 @@ GLuint FGLRenderBuffers::Create2DTexture(const FString &name, GLuint format, int case GL_RGBA8: dataformat = GL_RGBA; datatype = GL_UNSIGNED_BYTE; break; case GL_RGBA16: dataformat = GL_RGBA; datatype = GL_UNSIGNED_SHORT; break; case GL_RGBA16F: dataformat = GL_RGBA; datatype = GL_FLOAT; break; + case GL_RGBA32F: dataformat = GL_RGBA; datatype = GL_FLOAT; break; case GL_DEPTH_COMPONENT24: dataformat = GL_DEPTH_COMPONENT; datatype = GL_FLOAT; break; case GL_STENCIL_INDEX8: dataformat = GL_STENCIL_INDEX; datatype = GL_INT; break; case GL_DEPTH24_STENCIL8: dataformat = GL_DEPTH_STENCIL; datatype = GL_UNSIGNED_INT_24_8; break; diff --git a/src/gl/renderer/gl_renderbuffers.h b/src/gl/renderer/gl_renderbuffers.h index 814fd9de85..5dc2e49bda 100644 --- a/src/gl/renderer/gl_renderbuffers.h +++ b/src/gl/renderer/gl_renderbuffers.h @@ -50,6 +50,9 @@ public: int GetWidth() const { return mWidth; } int GetHeight() const { return mHeight; } + int GetSceneWidth() const { return mSceneWidth; } + int GetSceneHeight() const { return mSceneHeight; } + private: void ClearScene(); void ClearPipeline(); From 98032bc73f7bbdd3a9f8254f42abd2e8aa717e5a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 2 Sep 2016 05:45:00 +0200 Subject: [PATCH 100/912] Change SSAO blur to be depth aware --- src/gl/renderer/gl_postprocess.cpp | 22 ++++-- src/gl/renderer/gl_renderbuffers.cpp | 6 +- src/gl/renderer/gl_renderer.cpp | 8 +++ src/gl/renderer/gl_renderer.h | 4 ++ src/gl/shaders/gl_ambientshader.cpp | 32 +++++++++ src/gl/shaders/gl_ambientshader.h | 25 +++++++ wadsrc/static/shaders/glsl/depthblur.fp | 81 +++++++++++++++++++++++ wadsrc/static/shaders/glsl/ssao.fp | 9 +-- wadsrc/static/shaders/glsl/ssaocombine.fp | 11 +++ 9 files changed, 184 insertions(+), 14 deletions(-) create mode 100644 wadsrc/static/shaders/glsl/depthblur.fp create mode 100644 wadsrc/static/shaders/glsl/ssaocombine.fp diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index 6849850744..edf1148ef7 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -209,8 +209,22 @@ void FGLRenderer::AmbientOccludeScene() RenderScreenQuad(); // Blur SSAO texture - mBlurShader->BlurHorizontal(this, blurAmount, blurSampleCount, mBuffers->AmbientTexture1, mBuffers->AmbientFB0, mBuffers->AmbientWidth, mBuffers->AmbientHeight); - mBlurShader->BlurVertical(this, blurAmount, blurSampleCount, mBuffers->AmbientTexture0, mBuffers->AmbientFB1, mBuffers->AmbientWidth, mBuffers->AmbientHeight); + glBindFramebuffer(GL_FRAMEBUFFER, mBuffers->AmbientFB0); + glBindTexture(GL_TEXTURE_2D, mBuffers->AmbientTexture1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + mDepthBlurShader->Bind(false); + mDepthBlurShader->BlurSharpness[false].Set(blurAmount); + mDepthBlurShader->InvFullResolution[false].Set(1.0f / mBuffers->AmbientWidth, 1.0f / mBuffers->AmbientHeight); + RenderScreenQuad(); + + glBindFramebuffer(GL_FRAMEBUFFER, mBuffers->AmbientFB1); + glBindTexture(GL_TEXTURE_2D, mBuffers->AmbientTexture0); + mDepthBlurShader->Bind(true); + mDepthBlurShader->BlurSharpness[true].Set(blurAmount); + mDepthBlurShader->InvFullResolution[true].Set(1.0f / mBuffers->AmbientWidth, 1.0f / mBuffers->AmbientHeight); + mDepthBlurShader->PowExponent[true].Set(1.8f); + RenderScreenQuad(); // Add SSAO back to scene texture: mBuffers->BindCurrentFB(); @@ -225,8 +239,8 @@ void FGLRenderer::AmbientOccludeScene() glBindTexture(GL_TEXTURE_2D, mBuffers->AmbientTexture1); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - mBloomCombineShader->Bind(); - mBloomCombineShader->BloomTexture.Set(0); + mSSAOCombineShader->Bind(); + mSSAOCombineShader->AODepthTexture.Set(0); RenderScreenQuad(); glViewport(mScreenViewport.left, mScreenViewport.top, mScreenViewport.width, mScreenViewport.height); diff --git a/src/gl/renderer/gl_renderbuffers.cpp b/src/gl/renderer/gl_renderbuffers.cpp index 93625722da..9eb7c6f393 100644 --- a/src/gl/renderer/gl_renderbuffers.cpp +++ b/src/gl/renderer/gl_renderbuffers.cpp @@ -313,8 +313,8 @@ void FGLRenderBuffers::CreateAmbientOcclusion(int width, int height) AmbientWidth = width / 2; AmbientHeight = height / 2; - AmbientTexture0 = Create2DTexture("AmbientTexture0", GL_RGBA32F, AmbientWidth, AmbientHeight); - AmbientTexture1 = Create2DTexture("AmbientTexture1", GL_RGBA32F, AmbientWidth, AmbientHeight); + AmbientTexture0 = Create2DTexture("AmbientTexture0", GL_RG32F, AmbientWidth, AmbientHeight); + AmbientTexture1 = Create2DTexture("AmbientTexture1", GL_RG32F, AmbientWidth, AmbientHeight); AmbientFB0 = CreateFrameBuffer("AmbientFB0", AmbientTexture0); AmbientFB1 = CreateFrameBuffer("AmbientFB1", AmbientTexture1); @@ -370,6 +370,8 @@ GLuint FGLRenderBuffers::Create2DTexture(const FString &name, GLuint format, int case GL_RGBA16: dataformat = GL_RGBA; datatype = GL_UNSIGNED_SHORT; break; case GL_RGBA16F: dataformat = GL_RGBA; datatype = GL_FLOAT; break; case GL_RGBA32F: dataformat = GL_RGBA; datatype = GL_FLOAT; break; + case GL_R32F: dataformat = GL_RED; datatype = GL_FLOAT; break; + case GL_RG32F: dataformat = GL_RG; datatype = GL_FLOAT; break; case GL_DEPTH_COMPONENT24: dataformat = GL_DEPTH_COMPONENT; datatype = GL_FLOAT; break; case GL_STENCIL_INDEX8: dataformat = GL_STENCIL_INDEX; datatype = GL_INT; break; case GL_DEPTH24_STENCIL8: dataformat = GL_DEPTH_STENCIL; datatype = GL_UNSIGNED_INT_24_8; break; diff --git a/src/gl/renderer/gl_renderer.cpp b/src/gl/renderer/gl_renderer.cpp index 2d32f0d204..ed45ec0f31 100644 --- a/src/gl/renderer/gl_renderer.cpp +++ b/src/gl/renderer/gl_renderer.cpp @@ -123,6 +123,10 @@ FGLRenderer::FGLRenderer(OpenGLFrameBuffer *fb) mTonemapPalette = nullptr; mColormapShader = nullptr; mLensShader = nullptr; + mLinearDepthShader = nullptr; + mDepthBlurShader = nullptr; + mSSAOShader = nullptr; + mSSAOCombineShader = nullptr; } void gl_LoadModels(); @@ -132,7 +136,9 @@ void FGLRenderer::Initialize(int width, int height) { mBuffers = new FGLRenderBuffers(); mLinearDepthShader = new FLinearDepthShader(); + mDepthBlurShader = new FDepthBlurShader(); mSSAOShader = new FSSAOShader(); + mSSAOCombineShader = new FSSAOCombineShader(); mBloomExtractShader = new FBloomExtractShader(); mBloomCombineShader = new FBloomCombineShader(); mBlurShader = new FBlurShader(); @@ -194,7 +200,9 @@ FGLRenderer::~FGLRenderer() if (mBuffers) delete mBuffers; if (mPresentShader) delete mPresentShader; if (mLinearDepthShader) delete mLinearDepthShader; + if (mDepthBlurShader) delete mDepthBlurShader; if (mSSAOShader) delete mSSAOShader; + if (mSSAOCombineShader) delete mSSAOCombineShader; if (mBloomExtractShader) delete mBloomExtractShader; if (mBloomCombineShader) delete mBloomCombineShader; if (mBlurShader) delete mBlurShader; diff --git a/src/gl/renderer/gl_renderer.h b/src/gl/renderer/gl_renderer.h index c487c4a77c..8955acfd60 100644 --- a/src/gl/renderer/gl_renderer.h +++ b/src/gl/renderer/gl_renderer.h @@ -20,7 +20,9 @@ class FSamplerManager; class DPSprite; class FGLRenderBuffers; class FLinearDepthShader; +class FDepthBlurShader; class FSSAOShader; +class FSSAOCombineShader; class FBloomExtractShader; class FBloomCombineShader; class FBlurShader; @@ -94,6 +96,8 @@ public: FGLRenderBuffers *mBuffers; FLinearDepthShader *mLinearDepthShader; FSSAOShader *mSSAOShader; + FDepthBlurShader *mDepthBlurShader; + FSSAOCombineShader *mSSAOCombineShader; FBloomExtractShader *mBloomExtractShader; FBloomCombineShader *mBloomCombineShader; FBlurShader *mBlurShader; diff --git a/src/gl/shaders/gl_ambientshader.cpp b/src/gl/shaders/gl_ambientshader.cpp index 55682a91e0..6b8c9cc485 100644 --- a/src/gl/shaders/gl_ambientshader.cpp +++ b/src/gl/shaders/gl_ambientshader.cpp @@ -96,3 +96,35 @@ void FSSAOShader::Bind() } mShader.Bind(); } + +void FDepthBlurShader::Bind(bool vertical) +{ + auto &shader = mShader[vertical]; + if (!shader) + { + shader.Compile(FShaderProgram::Vertex, "shaders/glsl/screenquad.vp", "", 330); + shader.Compile(FShaderProgram::Fragment, "shaders/glsl/depthblur.fp", vertical ? "#define BLUR_VERTICAL\n" : "#define BLUR_HORIZONTAL\n", 330); + shader.SetFragDataLocation(0, "FragColor"); + shader.Link("shaders/glsl/depthblur"); + shader.SetAttribLocation(0, "PositionInProjection"); + AODepthTexture[vertical].Init(shader, "AODepthTexture"); + BlurSharpness[vertical].Init(shader, "BlurSharpness"); + InvFullResolution[vertical].Init(shader, "InvFullResolution"); + PowExponent[vertical].Init(shader, "PowExponent"); + } + shader.Bind(); +} + +void FSSAOCombineShader::Bind() +{ + if (!mShader) + { + mShader.Compile(FShaderProgram::Vertex, "shaders/glsl/screenquad.vp", "", 330); + mShader.Compile(FShaderProgram::Fragment, "shaders/glsl/ssaocombine.fp", "", 330); + mShader.SetFragDataLocation(0, "FragColor"); + mShader.Link("shaders/glsl/ssaocombine"); + mShader.SetAttribLocation(0, "PositionInProjection"); + AODepthTexture.Init(mShader, "AODepthTexture"); + } + mShader.Bind(); +} diff --git a/src/gl/shaders/gl_ambientshader.h b/src/gl/shaders/gl_ambientshader.h index fdff178ec2..419ca939e2 100644 --- a/src/gl/shaders/gl_ambientshader.h +++ b/src/gl/shaders/gl_ambientshader.h @@ -38,4 +38,29 @@ private: FShaderProgram mShader; }; +class FDepthBlurShader +{ +public: + void Bind(bool vertical); + + FBufferedUniformSampler AODepthTexture[2]; + FBufferedUniform1f BlurSharpness[2]; + FBufferedUniform2f InvFullResolution[2]; + FBufferedUniform1f PowExponent[2]; + +private: + FShaderProgram mShader[2]; +}; + +class FSSAOCombineShader +{ +public: + void Bind(); + + FBufferedUniformSampler AODepthTexture; + +private: + FShaderProgram mShader; +}; + #endif \ No newline at end of file diff --git a/wadsrc/static/shaders/glsl/depthblur.fp b/wadsrc/static/shaders/glsl/depthblur.fp new file mode 100644 index 0000000000..bd464d03da --- /dev/null +++ b/wadsrc/static/shaders/glsl/depthblur.fp @@ -0,0 +1,81 @@ + +in vec2 TexCoord; +out vec4 FragColor; + +uniform sampler2D AODepthTexture; +uniform float BlurSharpness; +uniform vec2 InvFullResolution; +uniform float PowExponent; + +#define KERNEL_RADIUS 3.0 + +struct CenterPixelData +{ + vec2 UV; + float Depth; + float Sharpness; +}; + +float CrossBilateralWeight(float r, float sampleDepth, CenterPixelData center) +{ + const float blurSigma = KERNEL_RADIUS * 0.5; + const float blurFalloff = 1.0 / (2.0 * blurSigma * blurSigma); + + float deltaZ = (sampleDepth - center.Depth) * center.Sharpness; + + return exp2(-r * r * blurFalloff - deltaZ * deltaZ); +} + +void ProcessSample(float ao, float z, float r, CenterPixelData center, inout float totalAO, inout float totalW) +{ + float w = CrossBilateralWeight(r, z, center); + totalAO += w * ao; + totalW += w; +} + +void ProcessRadius(vec2 deltaUV, CenterPixelData center, inout float totalAO, inout float totalW) +{ + for (float r = 1; r <= KERNEL_RADIUS; r += 1.0) + { + vec2 uv = r * deltaUV + center.UV; + vec2 aoZ = texture(AODepthTexture, uv).xy; + ProcessSample(aoZ.x, aoZ.y, r, center, totalAO, totalW); + } +} + +vec2 ComputeBlur(vec2 deltaUV) +{ + vec2 aoZ = texture(AODepthTexture, TexCoord).xy; + + CenterPixelData center; + center.UV = TexCoord; + center.Depth = aoZ.y; + center.Sharpness = BlurSharpness; + + float totalAO = aoZ.x; + float totalW = 1.0; + + ProcessRadius(deltaUV, center, totalAO, totalW); + ProcessRadius(-deltaUV, center, totalAO, totalW); + + return vec2(totalAO / totalW, aoZ.y); +} + +vec2 BlurX() +{ + return ComputeBlur(vec2(InvFullResolution.x, 0.0)); +} + +float BlurY() +{ + return pow(clamp(ComputeBlur(vec2(0.0, InvFullResolution.y)).x, 0.0, 1.0), PowExponent); +} + +void main() +{ +#if defined(BLUR_HORIZONTAL) + FragColor = vec4(BlurX(), 0.0, 1.0); +#else + FragColor = vec4(BlurY(), 0.0, 0.0, 1.0); +#endif +} diff --git a/wadsrc/static/shaders/glsl/ssao.fp b/wadsrc/static/shaders/glsl/ssao.fp index f143e913f7..e0d972c579 100644 --- a/wadsrc/static/shaders/glsl/ssao.fp +++ b/wadsrc/static/shaders/glsl/ssao.fp @@ -108,12 +108,5 @@ void main() vec3 viewPosition = FetchViewPos(TexCoord); vec3 viewNormal = ReconstructNormal(viewPosition); float occlusion = ComputeAO(viewPosition, viewNormal) * AOStrength + (1.0 - AOStrength); - - // GZDoom does not use linear buffers at the moment, apply some gamma to get it closer to correct - occlusion = occlusion * occlusion; - - //FragColor = vec4(viewPosition.x * 0.001 + 0.5, viewPosition.y * 0.001 + 0.5, viewPosition.z * 0.001, 1.0); - //FragColor = vec4(viewNormal.x * 0.5 + 0.5, viewNormal.y * 0.5 + 0.5, viewNormal.z * 0.5 + 0.5, 1.0); - //FragColor = vec4(occlusion, viewPosition.z, 0.0, 1.0); - FragColor = vec4(occlusion, occlusion, occlusion, 1.0); + FragColor = vec4(occlusion, viewPosition.z, 0.0, 1.0); } diff --git a/wadsrc/static/shaders/glsl/ssaocombine.fp b/wadsrc/static/shaders/glsl/ssaocombine.fp new file mode 100644 index 0000000000..21fdff1024 --- /dev/null +++ b/wadsrc/static/shaders/glsl/ssaocombine.fp @@ -0,0 +1,11 @@ + +in vec2 TexCoord; +out vec4 FragColor; + +uniform sampler2D AODepthTexture; + +void main() +{ + float attenutation = texture(AODepthTexture, TexCoord).x; + FragColor = vec4(attenutation, attenutation, attenutation, 0.0); +} From 902097d6da05cc4e4ca315d40bccef7dcf791d8a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 3 Sep 2016 04:12:00 +0200 Subject: [PATCH 101/912] Resolve multisampling depth in shader --- src/gl/renderer/gl_postprocess.cpp | 20 ++- src/gl/renderer/gl_renderbuffers.cpp | 186 ++++++++++++++++------ src/gl/renderer/gl_renderbuffers.h | 27 +++- src/gl/shaders/gl_ambientshader.cpp | 28 ++-- src/gl/shaders/gl_ambientshader.h | 15 +- wadsrc/static/shaders/glsl/lineardepth.fp | 19 +++ 6 files changed, 211 insertions(+), 84 deletions(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index edf1148ef7..1cd893f1ce 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -136,8 +136,8 @@ void FGLRenderer::RenderScreenQuad() void FGLRenderer::PostProcessScene() { - mBuffers->BlitSceneToTexture(); AmbientOccludeScene(); + mBuffers->BlitSceneToTexture(); BloomScene(); TonemapScene(); ColormapScene(); @@ -165,6 +165,7 @@ void FGLRenderer::AmbientOccludeScene() const float blurAmount = gl_ssao_blur_amount; int blurSampleCount = gl_ssao_blur_samples; float aoStrength = gl_ssao_strength; + bool multisample = gl_multisample > 1; //float tanHalfFovy = tan(fovy * (M_PI / 360.0f)); float tanHalfFovy = 1.0f / 1.33333302f; // 1.0f / gl_RenderState.mProjectionMatrix.get()[5]; @@ -177,12 +178,15 @@ void FGLRenderer::AmbientOccludeScene() glBindFramebuffer(GL_FRAMEBUFFER, mBuffers->AmbientFB0); glViewport(0, 0, mBuffers->AmbientWidth, mBuffers->AmbientHeight); mBuffers->BindSceneDepthTexture(0); - mLinearDepthShader->Bind(); - mLinearDepthShader->DepthTexture.Set(0); - mLinearDepthShader->LinearizeDepthA.Set(1.0f / GetZFar() - 1.0f / GetZNear()); - mLinearDepthShader->LinearizeDepthB.Set(MAX(1.0f / GetZNear(), 1.e-8f)); - mLinearDepthShader->InverseDepthRangeA.Set(1.0f); - mLinearDepthShader->InverseDepthRangeB.Set(0.0f); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + mLinearDepthShader->Bind(multisample); + mLinearDepthShader->DepthTexture[multisample].Set(0); + if (multisample) mLinearDepthShader->SampleCount[multisample].Set(gl_multisample); + mLinearDepthShader->LinearizeDepthA[multisample].Set(1.0f / GetZFar() - 1.0f / GetZNear()); + mLinearDepthShader->LinearizeDepthB[multisample].Set(MAX(1.0f / GetZNear(), 1.e-8f)); + mLinearDepthShader->InverseDepthRangeA[multisample].Set(1.0f); + mLinearDepthShader->InverseDepthRangeB[multisample].Set(0.0f); RenderScreenQuad(); // Apply ambient occlusion @@ -227,7 +231,7 @@ void FGLRenderer::AmbientOccludeScene() RenderScreenQuad(); // Add SSAO back to scene texture: - mBuffers->BindCurrentFB(); + mBuffers->BindSceneFB(); glViewport(mSceneViewport.left, mSceneViewport.top, mSceneViewport.width, mSceneViewport.height); glEnable(GL_BLEND); glBlendEquation(GL_FUNC_ADD); diff --git a/src/gl/renderer/gl_renderbuffers.cpp b/src/gl/renderer/gl_renderbuffers.cpp index f66ea1354e..b2cee57049 100644 --- a/src/gl/renderer/gl_renderbuffers.cpp +++ b/src/gl/renderer/gl_renderbuffers.cpp @@ -53,6 +53,7 @@ #include "w_wad.h" #include "i_system.h" #include "doomerrors.h" +#include CVAR(Int, gl_multisample, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG); CVAR(Bool, gl_renderbuffers, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) @@ -86,15 +87,14 @@ FGLRenderBuffers::~FGLRenderBuffers() ClearScene(); ClearPipeline(); ClearBloom(); + ClearAmbientOcclusion(); } void FGLRenderBuffers::ClearScene() { DeleteFrameBuffer(mSceneFB); - DeleteRenderBuffer(mSceneMultisample); - DeleteRenderBuffer(mSceneDepthStencil); - DeleteRenderBuffer(mSceneDepth); - DeleteRenderBuffer(mSceneStencil); + DeleteTexture(mSceneMultisample); + DeleteTexture(mSceneDepthStencil); } void FGLRenderBuffers::ClearPipeline() @@ -119,6 +119,15 @@ void FGLRenderBuffers::ClearBloom() } } +void FGLRenderBuffers::ClearAmbientOcclusion() +{ + DeleteFrameBuffer(AmbientFB0); + DeleteFrameBuffer(AmbientFB1); + DeleteTexture(AmbientTexture0); + DeleteTexture(AmbientTexture1); + DeleteTexture(AmbientRandomTexture); +} + void FGLRenderBuffers::DeleteTexture(GLuint &handle) { if (handle != 0) @@ -186,11 +195,12 @@ bool FGLRenderBuffers::Setup(int width, int height, int sceneWidth, int sceneHei } // Bloom bluring buffers need to match the scene to avoid bloom bleeding artifacts - if (mBloomWidth != sceneWidth || mBloomHeight != sceneHeight) + if (mSceneWidth != sceneWidth || mSceneHeight != sceneHeight) { CreateBloom(sceneWidth, sceneHeight); - mBloomWidth = sceneWidth; - mBloomHeight = sceneHeight; + CreateAmbientOcclusion(sceneWidth, sceneHeight); + mSceneWidth = sceneWidth; + mSceneHeight = sceneHeight; } glBindTexture(GL_TEXTURE_2D, textureBinding); @@ -206,8 +216,8 @@ bool FGLRenderBuffers::Setup(int width, int height, int sceneWidth, int sceneHei mWidth = 0; mHeight = 0; mSamples = 0; - mBloomWidth = 0; - mBloomHeight = 0; + mSceneWidth = 0; + mSceneHeight = 0; } return !FailedCreate; @@ -224,9 +234,15 @@ void FGLRenderBuffers::CreateScene(int width, int height, int samples) ClearScene(); if (samples > 1) - mSceneMultisample = CreateRenderBuffer("SceneMultisample", GL_RGBA16F, samples, width, height); + { + mSceneMultisample = Create2DMultisampleTexture("SceneMultisample", GL_RGBA16F, width, height, samples, false); + mSceneDepthStencil = Create2DMultisampleTexture("SceneDepthStencil", GL_DEPTH24_STENCIL8, width, height, samples, false); + } + else + { + mSceneDepthStencil = Create2DTexture("SceneDepthStencil", GL_DEPTH24_STENCIL8, width, height); + } - mSceneDepthStencil = CreateRenderBuffer("SceneDepthStencil", GL_DEPTH24_STENCIL8, samples, width, height); mSceneFB = CreateFrameBuffer("SceneFB", samples > 1 ? mSceneMultisample : mPipelineTexture[0], mSceneDepthStencil, samples > 1); } @@ -279,20 +295,77 @@ void FGLRenderBuffers::CreateBloom(int width, int height) } } +//========================================================================== +// +// Creates ambient occlusion working buffers +// +//========================================================================== + +void FGLRenderBuffers::CreateAmbientOcclusion(int width, int height) +{ + ClearAmbientOcclusion(); + + if (width <= 0 || height <= 0) + return; + + AmbientWidth = width / 2; + AmbientHeight = height / 2; + AmbientTexture0 = Create2DTexture("AmbientTexture0", GL_RG32F, AmbientWidth, AmbientHeight); + AmbientTexture1 = Create2DTexture("AmbientTexture1", GL_RG32F, AmbientWidth, AmbientHeight); + AmbientFB0 = CreateFrameBuffer("AmbientFB0", AmbientTexture0); + AmbientFB1 = CreateFrameBuffer("AmbientFB1", AmbientTexture1); + + int16_t randomValues[16 * 4]; + std::mt19937 generator(1337); + std::uniform_real_distribution distribution(-1.0, 1.0); + for (int i = 0; i < 16; i++) + { + double num_directions = 8.0; // Must be same as the define in ssao.fp + double angle = 2.0 * M_PI * distribution(generator) / num_directions; + double x = cos(angle); + double y = sin(angle); + double z = distribution(generator); + double w = distribution(generator); + + randomValues[i * 4 + 0] = (int16_t)clamp(x * 32768.0, -32767.0, 32768.0); + randomValues[i * 4 + 1] = (int16_t)clamp(y * 32768.0, -32767.0, 32768.0); + randomValues[i * 4 + 2] = (int16_t)clamp(z * 32768.0, -32767.0, 32768.0); + randomValues[i * 4 + 3] = (int16_t)clamp(w * 32768.0, -32767.0, 32768.0); + } + + AmbientRandomTexture = Create2DTexture("AmbientRandomTexture", GL_RGBA16_SNORM, 4, 4, randomValues); +} + //========================================================================== // // Creates a 2D texture defaulting to linear filtering and clamp to edge // //========================================================================== -GLuint FGLRenderBuffers::Create2DTexture(const FString &name, GLuint format, int width, int height) +GLuint FGLRenderBuffers::Create2DTexture(const FString &name, GLuint format, int width, int height, const void *data) { - GLuint type = (format == GL_RGBA16F) ? GL_FLOAT : GL_UNSIGNED_BYTE; GLuint handle = 0; glGenTextures(1, &handle); glBindTexture(GL_TEXTURE_2D, handle); FGLDebug::LabelObject(GL_TEXTURE, handle, name); - glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, GL_RGBA, type, nullptr); + + GLenum dataformat, datatype; + switch (format) + { + case GL_RGBA8: dataformat = GL_RGBA; datatype = GL_UNSIGNED_BYTE; break; + case GL_RGBA16: dataformat = GL_RGBA; datatype = GL_UNSIGNED_SHORT; break; + case GL_RGBA16F: dataformat = GL_RGBA; datatype = GL_FLOAT; break; + case GL_RGBA32F: dataformat = GL_RGBA; datatype = GL_FLOAT; break; + case GL_R32F: dataformat = GL_RED; datatype = GL_FLOAT; break; + case GL_RG32F: dataformat = GL_RG; datatype = GL_FLOAT; break; + case GL_DEPTH_COMPONENT24: dataformat = GL_DEPTH_COMPONENT; datatype = GL_FLOAT; break; + case GL_STENCIL_INDEX8: dataformat = GL_STENCIL_INDEX; datatype = GL_INT; break; + case GL_DEPTH24_STENCIL8: dataformat = GL_DEPTH_STENCIL; datatype = GL_UNSIGNED_INT_24_8; break; + case GL_RGBA16_SNORM: dataformat = GL_RGBA; datatype = GL_SHORT; break; + default: I_FatalError("Unknown format passed to FGLRenderBuffers.Create2DTexture"); + } + + glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, dataformat, datatype, data); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); @@ -300,6 +373,17 @@ GLuint FGLRenderBuffers::Create2DTexture(const FString &name, GLuint format, int return handle; } +GLuint FGLRenderBuffers::Create2DMultisampleTexture(const FString &name, GLuint format, int width, int height, int samples, bool fixedSampleLocations) +{ + GLuint handle = 0; + glGenTextures(1, &handle); + glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, handle); + FGLDebug::LabelObject(GL_TEXTURE, handle, name); + glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, samples, format, width, height, fixedSampleLocations); + glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, 0); + return handle; +} + //========================================================================== // // Creates a render buffer @@ -347,34 +431,22 @@ GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuff return handle; } -GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depthstencil, bool colorIsARenderBuffer) +GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depthstencil, bool multisample) { GLuint handle = 0; glGenFramebuffers(1, &handle); glBindFramebuffer(GL_FRAMEBUFFER, handle); FGLDebug::LabelObject(GL_FRAMEBUFFER, handle, name); - if (colorIsARenderBuffer) - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, colorbuffer); + if (multisample) + { + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_MULTISAMPLE, colorbuffer, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D_MULTISAMPLE, depthstencil, 0); + } else + { glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorbuffer, 0); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_RENDERBUFFER, depthstencil); - if (CheckFrameBufferCompleteness()) - ClearFrameBuffer(true, true); - return handle; -} - -GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depth, GLuint stencil, bool colorIsARenderBuffer) -{ - GLuint handle = 0; - glGenFramebuffers(1, &handle); - glBindFramebuffer(GL_FRAMEBUFFER, handle); - FGLDebug::LabelObject(GL_FRAMEBUFFER, handle, name); - if (colorIsARenderBuffer) - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, colorbuffer); - else - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorbuffer, 0); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depth); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, stencil); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthstencil, 0); + } if (CheckFrameBufferCompleteness()) ClearFrameBuffer(true, true); return handle; @@ -394,22 +466,23 @@ bool FGLRenderBuffers::CheckFrameBufferCompleteness() FailedCreate = true; -#if 0 - FString error = "glCheckFramebufferStatus failed: "; - switch (result) + if (gl_debug_level > 0) { - default: error.AppendFormat("error code %d", (int)result); break; - case GL_FRAMEBUFFER_UNDEFINED: error << "GL_FRAMEBUFFER_UNDEFINED"; break; - case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT: error << "GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT"; break; - case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT: error << "GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT"; break; - case GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER: error << "GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER"; break; - case GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER: error << "GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER"; break; - case GL_FRAMEBUFFER_UNSUPPORTED: error << "GL_FRAMEBUFFER_UNSUPPORTED"; break; - case GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE: error << "GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE"; break; - case GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS: error << "GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS"; break; + FString error = "glCheckFramebufferStatus failed: "; + switch (result) + { + default: error.AppendFormat("error code %d", (int)result); break; + case GL_FRAMEBUFFER_UNDEFINED: error << "GL_FRAMEBUFFER_UNDEFINED"; break; + case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT: error << "GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT"; break; + case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT: error << "GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT"; break; + case GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER: error << "GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER"; break; + case GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER: error << "GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER"; break; + case GL_FRAMEBUFFER_UNSUPPORTED: error << "GL_FRAMEBUFFER_UNSUPPORTED"; break; + case GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE: error << "GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE"; break; + case GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS: error << "GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS"; break; + } + Printf("%s\n", error.GetChars()); } - I_FatalError(error); -#endif return false; } @@ -482,6 +555,21 @@ void FGLRenderBuffers::BindSceneFB() glBindFramebuffer(GL_FRAMEBUFFER, mSceneFB); } +//========================================================================== +// +// Binds the depth texture to the specified texture unit +// +//========================================================================== + +void FGLRenderBuffers::BindSceneDepthTexture(int index) +{ + glActiveTexture(GL_TEXTURE0 + index); + if (mSamples > 1) + glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, mSceneDepthStencil); + else + glBindTexture(GL_TEXTURE_2D, mSceneDepthStencil); +} + //========================================================================== // // Binds the current scene/effect/hud texture to the specified texture unit diff --git a/src/gl/renderer/gl_renderbuffers.h b/src/gl/renderer/gl_renderbuffers.h index 08303a912e..32430f2ed4 100644 --- a/src/gl/renderer/gl_renderbuffers.h +++ b/src/gl/renderer/gl_renderbuffers.h @@ -23,6 +23,7 @@ public: bool Setup(int width, int height, int sceneWidth, int sceneHeight); void BindSceneFB(); + void BindSceneDepthTexture(int index); void BlitSceneToTexture(); void BindCurrentTexture(int index); @@ -35,24 +36,38 @@ public: enum { NumBloomLevels = 4 }; FGLBloomTextureLevel BloomLevels[NumBloomLevels]; + // Ambient occlusion buffers + GLuint AmbientTexture0 = 0; + GLuint AmbientTexture1 = 0; + GLuint AmbientFB0 = 0; + GLuint AmbientFB1 = 0; + int AmbientWidth = 0; + int AmbientHeight = 0; + GLuint AmbientRandomTexture = 0; + static bool IsEnabled(); int GetWidth() const { return mWidth; } int GetHeight() const { return mHeight; } + int GetSceneWidth() const { return mSceneWidth; } + int GetSceneHeight() const { return mSceneHeight; } + private: void ClearScene(); void ClearPipeline(); void ClearBloom(); + void ClearAmbientOcclusion(); void CreateScene(int width, int height, int samples); void CreatePipeline(int width, int height); void CreateBloom(int width, int height); - GLuint Create2DTexture(const FString &name, GLuint format, int width, int height); + void CreateAmbientOcclusion(int width, int height); + GLuint Create2DTexture(const FString &name, GLuint format, int width, int height, const void *data = nullptr); + GLuint Create2DMultisampleTexture(const FString &name, GLuint format, int width, int height, int samples, bool fixedSampleLocations); GLuint CreateRenderBuffer(const FString &name, GLuint format, int width, int height); GLuint CreateRenderBuffer(const FString &name, GLuint format, int samples, int width, int height); GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer); - GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depthstencil, bool colorIsARenderBuffer); - GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depth, GLuint stencil, bool colorIsARenderBuffer); + GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depthstencil, bool multisample); bool CheckFrameBufferCompleteness(); void ClearFrameBuffer(bool stencil, bool depth); void DeleteTexture(GLuint &handle); @@ -63,8 +78,8 @@ private: int mHeight = 0; int mSamples = 0; int mMaxSamples = 0; - int mBloomWidth = 0; - int mBloomHeight = 0; + int mSceneWidth = 0; + int mSceneHeight = 0; static const int NumPipelineTextures = 2; int mCurrentPipelineTexture = 0; @@ -72,8 +87,6 @@ private: // Buffers for the scene GLuint mSceneMultisample = 0; GLuint mSceneDepthStencil = 0; - GLuint mSceneDepth = 0; - GLuint mSceneStencil = 0; GLuint mSceneFB = 0; // Effect/HUD buffers diff --git a/src/gl/shaders/gl_ambientshader.cpp b/src/gl/shaders/gl_ambientshader.cpp index 6b8c9cc485..f3b6eced67 100644 --- a/src/gl/shaders/gl_ambientshader.cpp +++ b/src/gl/shaders/gl_ambientshader.cpp @@ -49,22 +49,24 @@ #include "gl/system/gl_cvars.h" #include "gl/shaders/gl_ambientshader.h" -void FLinearDepthShader::Bind() +void FLinearDepthShader::Bind(bool multisample) { - if (!mShader) + auto &shader = mShader[multisample]; + if (!shader) { - mShader.Compile(FShaderProgram::Vertex, "shaders/glsl/screenquad.vp", "", 330); - mShader.Compile(FShaderProgram::Fragment, "shaders/glsl/lineardepth.fp", "", 330); - mShader.SetFragDataLocation(0, "FragColor"); - mShader.Link("shaders/glsl/lineardepth"); - mShader.SetAttribLocation(0, "PositionInProjection"); - DepthTexture.Init(mShader, "DepthTexture"); - LinearizeDepthA.Init(mShader, "LinearizeDepthA"); - LinearizeDepthB.Init(mShader, "LinearizeDepthB"); - InverseDepthRangeA.Init(mShader, "InverseDepthRangeA"); - InverseDepthRangeB.Init(mShader, "InverseDepthRangeB"); + shader.Compile(FShaderProgram::Vertex, "shaders/glsl/screenquad.vp", "", 330); + shader.Compile(FShaderProgram::Fragment, "shaders/glsl/lineardepth.fp", multisample ? "#define MULTISAMPLE\n" : "", 330); + shader.SetFragDataLocation(0, "FragColor"); + shader.Link("shaders/glsl/lineardepth"); + shader.SetAttribLocation(0, "PositionInProjection"); + DepthTexture[multisample].Init(shader, "DepthTexture"); + SampleCount[multisample].Init(shader, "SampleCount"); + LinearizeDepthA[multisample].Init(shader, "LinearizeDepthA"); + LinearizeDepthB[multisample].Init(shader, "LinearizeDepthB"); + InverseDepthRangeA[multisample].Init(shader, "InverseDepthRangeA"); + InverseDepthRangeB[multisample].Init(shader, "InverseDepthRangeB"); } - mShader.Bind(); + shader.Bind(); } void FSSAOShader::Bind() diff --git a/src/gl/shaders/gl_ambientshader.h b/src/gl/shaders/gl_ambientshader.h index 419ca939e2..5f9ef630b5 100644 --- a/src/gl/shaders/gl_ambientshader.h +++ b/src/gl/shaders/gl_ambientshader.h @@ -6,16 +6,17 @@ class FLinearDepthShader { public: - void Bind(); + void Bind(bool multisample); - FBufferedUniformSampler DepthTexture; - FBufferedUniform1f LinearizeDepthA; - FBufferedUniform1f LinearizeDepthB; - FBufferedUniform1f InverseDepthRangeA; - FBufferedUniform1f InverseDepthRangeB; + FBufferedUniformSampler DepthTexture[2]; + FBufferedUniform1i SampleCount[2]; + FBufferedUniform1f LinearizeDepthA[2]; + FBufferedUniform1f LinearizeDepthB[2]; + FBufferedUniform1f InverseDepthRangeA[2]; + FBufferedUniform1f InverseDepthRangeB[2]; private: - FShaderProgram mShader; + FShaderProgram mShader[2]; }; class FSSAOShader diff --git a/wadsrc/static/shaders/glsl/lineardepth.fp b/wadsrc/static/shaders/glsl/lineardepth.fp index f61bb39955..31df9dc321 100644 --- a/wadsrc/static/shaders/glsl/lineardepth.fp +++ b/wadsrc/static/shaders/glsl/lineardepth.fp @@ -2,7 +2,12 @@ in vec2 TexCoord; out vec4 FragColor; +#if defined(MULTISAMPLE) +uniform sampler2DMS DepthTexture; +uniform int SampleCount; +#else uniform sampler2D DepthTexture; +#endif uniform float LinearizeDepthA; uniform float LinearizeDepthB; uniform float InverseDepthRangeA; @@ -10,7 +15,21 @@ uniform float InverseDepthRangeB; void main() { +#if defined(MULTISAMPLE) + ivec2 texSize = textureSize(DepthTexture); + ivec2 ipos = ivec2(TexCoord * vec2(texSize)); + float depth = 0.0; + for (int i = 0; i < SampleCount; i++) + depth += texelFetch(DepthTexture, ipos, i).x; + depth /= float(SampleCount); +#else + /*ivec2 texSize = textureSize(DepthTexture, 0); + ivec2 ipos = ivec2(TexCoord * vec2(texSize)); + if (ipos.x < 0) ipos.x += texSize.x; + if (ipos.y < 0) ipos.y += texSize.y; + float depth = texelFetch(DepthTexture, ipos, 0).x;*/ float depth = texture(DepthTexture, TexCoord).x; +#endif float normalizedDepth = clamp(InverseDepthRangeA * depth + InverseDepthRangeB, 0.0, 1.0); FragColor = vec4(1.0 / (normalizedDepth * LinearizeDepthA + LinearizeDepthB), 0.0, 0.0, 1.0); } From e7765bb240820c608dcb710a94a7b42d19ea465c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 3 Sep 2016 04:29:50 +0200 Subject: [PATCH 102/912] Move SSAO pass to be before translucent rendering Fix depth sampling location when not using fullscreen scene --- src/gl/renderer/gl_postprocess.cpp | 4 ++-- src/gl/scene/gl_scene.cpp | 2 ++ src/gl/shaders/gl_ambientshader.cpp | 2 ++ src/gl/shaders/gl_ambientshader.h | 2 ++ wadsrc/static/shaders/glsl/lineardepth.fp | 12 +++++++++--- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index 1cd893f1ce..56032704ee 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -136,7 +136,6 @@ void FGLRenderer::RenderScreenQuad() void FGLRenderer::PostProcessScene() { - AmbientOccludeScene(); mBuffers->BlitSceneToTexture(); BloomScene(); TonemapScene(); @@ -187,6 +186,8 @@ void FGLRenderer::AmbientOccludeScene() mLinearDepthShader->LinearizeDepthB[multisample].Set(MAX(1.0f / GetZNear(), 1.e-8f)); mLinearDepthShader->InverseDepthRangeA[multisample].Set(1.0f); mLinearDepthShader->InverseDepthRangeB[multisample].Set(0.0f); + mLinearDepthShader->Scale[multisample].Set(mSceneViewport.width / (float)mScreenViewport.width, mSceneViewport.height / (float)mScreenViewport.height); + mLinearDepthShader->Offset[multisample].Set(mSceneViewport.left / (float)mScreenViewport.width, mSceneViewport.top / (float)mScreenViewport.height); RenderScreenQuad(); // Apply ambient occlusion @@ -246,7 +247,6 @@ void FGLRenderer::AmbientOccludeScene() mSSAOCombineShader->Bind(); mSSAOCombineShader->AODepthTexture.Set(0); RenderScreenQuad(); - glViewport(mScreenViewport.left, mScreenViewport.top, mScreenViewport.width, mScreenViewport.height); FGLDebug::PopGroup(); } diff --git a/src/gl/scene/gl_scene.cpp b/src/gl/scene/gl_scene.cpp index d5df317841..087bd9f624 100644 --- a/src/gl/scene/gl_scene.cpp +++ b/src/gl/scene/gl_scene.cpp @@ -503,6 +503,8 @@ void FGLRenderer::DrawScene(int drawmode) RenderScene(recursion); + AmbientOccludeScene(); + // Handle all portals after rendering the opaque objects but before // doing all translucent stuff recursion++; diff --git a/src/gl/shaders/gl_ambientshader.cpp b/src/gl/shaders/gl_ambientshader.cpp index f3b6eced67..54a651281c 100644 --- a/src/gl/shaders/gl_ambientshader.cpp +++ b/src/gl/shaders/gl_ambientshader.cpp @@ -65,6 +65,8 @@ void FLinearDepthShader::Bind(bool multisample) LinearizeDepthB[multisample].Init(shader, "LinearizeDepthB"); InverseDepthRangeA[multisample].Init(shader, "InverseDepthRangeA"); InverseDepthRangeB[multisample].Init(shader, "InverseDepthRangeB"); + Scale[multisample].Init(shader, "Scale"); + Offset[multisample].Init(shader, "Offset"); } shader.Bind(); } diff --git a/src/gl/shaders/gl_ambientshader.h b/src/gl/shaders/gl_ambientshader.h index 5f9ef630b5..5b18ea85fd 100644 --- a/src/gl/shaders/gl_ambientshader.h +++ b/src/gl/shaders/gl_ambientshader.h @@ -14,6 +14,8 @@ public: FBufferedUniform1f LinearizeDepthB[2]; FBufferedUniform1f InverseDepthRangeA[2]; FBufferedUniform1f InverseDepthRangeB[2]; + FBufferedUniform2f Scale[2]; + FBufferedUniform2f Offset[2]; private: FShaderProgram mShader[2]; diff --git a/wadsrc/static/shaders/glsl/lineardepth.fp b/wadsrc/static/shaders/glsl/lineardepth.fp index 31df9dc321..7e4eee745b 100644 --- a/wadsrc/static/shaders/glsl/lineardepth.fp +++ b/wadsrc/static/shaders/glsl/lineardepth.fp @@ -8,28 +8,34 @@ uniform int SampleCount; #else uniform sampler2D DepthTexture; #endif + uniform float LinearizeDepthA; uniform float LinearizeDepthB; uniform float InverseDepthRangeA; uniform float InverseDepthRangeB; +uniform vec2 Scale; +uniform vec2 Offset; void main() { + vec2 uv = Offset + TexCoord * Scale; + #if defined(MULTISAMPLE) ivec2 texSize = textureSize(DepthTexture); - ivec2 ipos = ivec2(TexCoord * vec2(texSize)); + ivec2 ipos = ivec2(uv * vec2(texSize)); float depth = 0.0; for (int i = 0; i < SampleCount; i++) depth += texelFetch(DepthTexture, ipos, i).x; depth /= float(SampleCount); #else /*ivec2 texSize = textureSize(DepthTexture, 0); - ivec2 ipos = ivec2(TexCoord * vec2(texSize)); + ivec2 ipos = ivec2(uv * vec2(texSize)); if (ipos.x < 0) ipos.x += texSize.x; if (ipos.y < 0) ipos.y += texSize.y; float depth = texelFetch(DepthTexture, ipos, 0).x;*/ - float depth = texture(DepthTexture, TexCoord).x; + float depth = texture(DepthTexture, uv).x; #endif + float normalizedDepth = clamp(InverseDepthRangeA * depth + InverseDepthRangeB, 0.0, 1.0); FragColor = vec4(1.0 / (normalizedDepth * LinearizeDepthA + LinearizeDepthB), 0.0, 0.0, 1.0); } From 3727c5ed0fc50f2bcaff689766e9d728ddf017a5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 4 Sep 2016 08:15:29 +0200 Subject: [PATCH 103/912] Mark portals in scene alpha channel for the SSAO pass --- src/gl/renderer/gl_postprocess.cpp | 5 +++++ src/gl/renderer/gl_renderbuffers.cpp | 15 +++++++++++++++ src/gl/renderer/gl_renderbuffers.h | 1 + src/gl/scene/gl_portal.cpp | 8 +++++--- src/gl/shaders/gl_ambientshader.cpp | 1 + src/gl/shaders/gl_ambientshader.h | 1 + wadsrc/static/shaders/glsl/lineardepth.fp | 8 +++++--- wadsrc/static/shaders/glsl/stencil.fp | 2 +- 8 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index 56032704ee..5122094540 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -179,8 +179,13 @@ void FGLRenderer::AmbientOccludeScene() mBuffers->BindSceneDepthTexture(0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + mBuffers->BindSceneColorTexture(1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glActiveTexture(GL_TEXTURE0); mLinearDepthShader->Bind(multisample); mLinearDepthShader->DepthTexture[multisample].Set(0); + mLinearDepthShader->ColorTexture[multisample].Set(1); if (multisample) mLinearDepthShader->SampleCount[multisample].Set(gl_multisample); mLinearDepthShader->LinearizeDepthA[multisample].Set(1.0f / GetZFar() - 1.0f / GetZNear()); mLinearDepthShader->LinearizeDepthB[multisample].Set(MAX(1.0f / GetZNear(), 1.e-8f)); diff --git a/src/gl/renderer/gl_renderbuffers.cpp b/src/gl/renderer/gl_renderbuffers.cpp index b2cee57049..b0ea123a00 100644 --- a/src/gl/renderer/gl_renderbuffers.cpp +++ b/src/gl/renderer/gl_renderbuffers.cpp @@ -555,6 +555,21 @@ void FGLRenderBuffers::BindSceneFB() glBindFramebuffer(GL_FRAMEBUFFER, mSceneFB); } +//========================================================================== +// +// Binds the scene color texture to the specified texture unit +// +//========================================================================== + +void FGLRenderBuffers::BindSceneColorTexture(int index) +{ + glActiveTexture(GL_TEXTURE0 + index); + if (mSamples > 1) + glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, mSceneMultisample); + else + glBindTexture(GL_TEXTURE_2D, mPipelineTexture[0]); +} + //========================================================================== // // Binds the depth texture to the specified texture unit diff --git a/src/gl/renderer/gl_renderbuffers.h b/src/gl/renderer/gl_renderbuffers.h index 32430f2ed4..2661908e36 100644 --- a/src/gl/renderer/gl_renderbuffers.h +++ b/src/gl/renderer/gl_renderbuffers.h @@ -23,6 +23,7 @@ public: bool Setup(int width, int height, int sceneWidth, int sceneHeight); void BindSceneFB(); + void BindSceneColorTexture(int index); void BindSceneDepthTexture(int index); void BlitSceneToTexture(); diff --git a/src/gl/scene/gl_portal.cpp b/src/gl/scene/gl_portal.cpp index cd3efb9e3b..20ded84aa9 100644 --- a/src/gl/scene/gl_portal.cpp +++ b/src/gl/scene/gl_portal.cpp @@ -429,14 +429,16 @@ void GLPortal::End(bool usestencil) glDepthFunc(GL_LEQUAL); glDepthRange(0, 1); { - ScopedColorMask colorMask(0, 0, 0, 0); - // glColorMask(0,0,0,0); // no graphics + ScopedColorMask colorMask(0, 0, 0, 1); // mark portal in alpha channel but don't touch color gl_RenderState.SetEffect(EFF_STENCIL); gl_RenderState.EnableTexture(false); + gl_RenderState.BlendFunc(GL_ONE, GL_ZERO); + gl_RenderState.BlendEquation(GL_ADD); + gl_RenderState.Apply(); DrawPortalStencil(); gl_RenderState.SetEffect(EFF_NONE); gl_RenderState.EnableTexture(true); - } // glColorMask(1, 1, 1, 1); + } glDepthFunc(GL_LESS); } PortalAll.Unclock(); diff --git a/src/gl/shaders/gl_ambientshader.cpp b/src/gl/shaders/gl_ambientshader.cpp index 54a651281c..3fbd034348 100644 --- a/src/gl/shaders/gl_ambientshader.cpp +++ b/src/gl/shaders/gl_ambientshader.cpp @@ -60,6 +60,7 @@ void FLinearDepthShader::Bind(bool multisample) shader.Link("shaders/glsl/lineardepth"); shader.SetAttribLocation(0, "PositionInProjection"); DepthTexture[multisample].Init(shader, "DepthTexture"); + ColorTexture[multisample].Init(shader, "ColorTexture"); SampleCount[multisample].Init(shader, "SampleCount"); LinearizeDepthA[multisample].Init(shader, "LinearizeDepthA"); LinearizeDepthB[multisample].Init(shader, "LinearizeDepthB"); diff --git a/src/gl/shaders/gl_ambientshader.h b/src/gl/shaders/gl_ambientshader.h index 5b18ea85fd..9c97791ba8 100644 --- a/src/gl/shaders/gl_ambientshader.h +++ b/src/gl/shaders/gl_ambientshader.h @@ -9,6 +9,7 @@ public: void Bind(bool multisample); FBufferedUniformSampler DepthTexture[2]; + FBufferedUniformSampler ColorTexture[2]; FBufferedUniform1i SampleCount[2]; FBufferedUniform1f LinearizeDepthA[2]; FBufferedUniform1f LinearizeDepthB[2]; diff --git a/wadsrc/static/shaders/glsl/lineardepth.fp b/wadsrc/static/shaders/glsl/lineardepth.fp index 7e4eee745b..61e5e81261 100644 --- a/wadsrc/static/shaders/glsl/lineardepth.fp +++ b/wadsrc/static/shaders/glsl/lineardepth.fp @@ -4,9 +4,11 @@ out vec4 FragColor; #if defined(MULTISAMPLE) uniform sampler2DMS DepthTexture; +uniform sampler2DMS ColorTexture; uniform int SampleCount; #else uniform sampler2D DepthTexture; +uniform sampler2D ColorTexture; #endif uniform float LinearizeDepthA; @@ -25,15 +27,15 @@ void main() ivec2 ipos = ivec2(uv * vec2(texSize)); float depth = 0.0; for (int i = 0; i < SampleCount; i++) - depth += texelFetch(DepthTexture, ipos, i).x; + depth += texelFetch(ColorTexture, ipos, i).a != 0.0 ? texelFetch(DepthTexture, ipos, i).x : 1.0; depth /= float(SampleCount); #else /*ivec2 texSize = textureSize(DepthTexture, 0); ivec2 ipos = ivec2(uv * vec2(texSize)); if (ipos.x < 0) ipos.x += texSize.x; if (ipos.y < 0) ipos.y += texSize.y; - float depth = texelFetch(DepthTexture, ipos, 0).x;*/ - float depth = texture(DepthTexture, uv).x; + float depth = texelFetch(ColorTexture, ipos, 0).a != 0.0 ? texelFetch(DepthTexture, ipos, 0).x : 1.0;*/ + float depth = texture(ColorTexture, uv).a != 0.0 ? texture(DepthTexture, uv).x : 1.0; #endif float normalizedDepth = clamp(InverseDepthRangeA * depth + InverseDepthRangeB, 0.0, 1.0); diff --git a/wadsrc/static/shaders/glsl/stencil.fp b/wadsrc/static/shaders/glsl/stencil.fp index d1b8745f6e..65f12b4055 100644 --- a/wadsrc/static/shaders/glsl/stencil.fp +++ b/wadsrc/static/shaders/glsl/stencil.fp @@ -3,6 +3,6 @@ out vec4 FragColor; void main() { - FragColor = vec4(1.0); + FragColor = vec4(1.0, 1.0, 1.0, 0.0); } From 8861b1aaffe2f0f6b9cb416cf2ad7ed5a6e8ad0b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 9 Sep 2016 09:47:46 +0200 Subject: [PATCH 104/912] Grab tanHalfFovy from the projection matrix --- src/gl/renderer/gl_postprocess.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index b4fd4d05f9..06c85a7bc0 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -168,7 +168,7 @@ void FGLRenderer::AmbientOccludeScene() bool multisample = gl_multisample > 1; //float tanHalfFovy = tan(fovy * (M_PI / 360.0f)); - float tanHalfFovy = 1.0f / 1.33333302f; // 1.0f / gl_RenderState.mProjectionMatrix.get()[5]; + float tanHalfFovy = 1.0f / gl_RenderState.mProjectionMatrix.get()[5]; float invFocalLenX = tanHalfFovy * (mBuffers->GetSceneWidth() / (float)mBuffers->GetSceneHeight()); float invFocalLenY = tanHalfFovy; float nDotVBias = clamp(bias, 0.0f, 1.0f); From 63fb604e988298edf1ad9cf13e1f8a1ad95b747e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 9 Sep 2016 18:19:00 +0200 Subject: [PATCH 105/912] Fix stripes in the reconstructed normals due to down scaling --- src/gl/renderer/gl_postprocess.cpp | 2 +- wadsrc/static/shaders/glsl/lineardepth.fp | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index 06c85a7bc0..e00ca73058 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -192,7 +192,7 @@ void FGLRenderer::AmbientOccludeScene() mLinearDepthShader->LinearizeDepthB[multisample].Set(MAX(1.0f / GetZNear(), 1.e-8f)); mLinearDepthShader->InverseDepthRangeA[multisample].Set(1.0f); mLinearDepthShader->InverseDepthRangeB[multisample].Set(0.0f); - mLinearDepthShader->Scale[multisample].Set(mSceneViewport.width / (float)mScreenViewport.width, mSceneViewport.height / (float)mScreenViewport.height); + mLinearDepthShader->Scale[multisample].Set(mBuffers->AmbientWidth * 2.0f / (float)mScreenViewport.width, mBuffers->AmbientHeight * 2.0f / (float)mScreenViewport.height); mLinearDepthShader->Offset[multisample].Set(mSceneViewport.left / (float)mScreenViewport.width, mSceneViewport.top / (float)mScreenViewport.height); RenderScreenQuad(); diff --git a/wadsrc/static/shaders/glsl/lineardepth.fp b/wadsrc/static/shaders/glsl/lineardepth.fp index 61e5e81261..558738bd9f 100644 --- a/wadsrc/static/shaders/glsl/lineardepth.fp +++ b/wadsrc/static/shaders/glsl/lineardepth.fp @@ -24,18 +24,21 @@ void main() #if defined(MULTISAMPLE) ivec2 texSize = textureSize(DepthTexture); - ivec2 ipos = ivec2(uv * vec2(texSize)); +#else + ivec2 texSize = textureSize(DepthTexture, 0); +#endif + + // Use floor here because as we downscale the sampling error has to remain uniform to prevent + // noise in the depth values. + ivec2 ipos = ivec2(max(floor(uv * vec2(texSize) - 0.75), vec2(0.0))); + +#if defined(MULTISAMPLE) float depth = 0.0; for (int i = 0; i < SampleCount; i++) depth += texelFetch(ColorTexture, ipos, i).a != 0.0 ? texelFetch(DepthTexture, ipos, i).x : 1.0; depth /= float(SampleCount); #else - /*ivec2 texSize = textureSize(DepthTexture, 0); - ivec2 ipos = ivec2(uv * vec2(texSize)); - if (ipos.x < 0) ipos.x += texSize.x; - if (ipos.y < 0) ipos.y += texSize.y; - float depth = texelFetch(ColorTexture, ipos, 0).a != 0.0 ? texelFetch(DepthTexture, ipos, 0).x : 1.0;*/ - float depth = texture(ColorTexture, uv).a != 0.0 ? texture(DepthTexture, uv).x : 1.0; + float depth = texelFetch(ColorTexture, ipos, 0).a != 0.0 ? texelFetch(DepthTexture, ipos, 0).x : 1.0; #endif float normalizedDepth = clamp(InverseDepthRangeA * depth + InverseDepthRangeB, 0.0, 1.0); From 03d0b09f293b20d454dd5a7357096a2aae557746 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 10 Sep 2016 22:39:09 +0200 Subject: [PATCH 106/912] Fix depth blur --- src/gl/renderer/gl_postprocess.cpp | 12 ++++------ wadsrc/static/shaders/glsl/depthblur.fp | 32 ++++++++----------------- 2 files changed, 14 insertions(+), 30 deletions(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index e00ca73058..9b791a3f19 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -118,11 +118,6 @@ CUSTOM_CVAR(Float, gl_ssao_blur_amount, 4.0f, 0) { if (self < 0.1f) self = 0.1f; } -CUSTOM_CVAR(Int, gl_ssao_blur_samples, 5, 0) -{ - if (self < 3 || self > 15 || self % 2 == 0) - self = 9; -} EXTERN_CVAR(Float, vid_brightness) EXTERN_CVAR(Float, vid_contrast) @@ -163,7 +158,6 @@ void FGLRenderer::AmbientOccludeScene() float bias = gl_ssao_bias; float aoRadius = gl_ssao_radius; const float blurAmount = gl_ssao_blur_amount; - int blurSampleCount = gl_ssao_blur_samples; float aoStrength = gl_ssao_strength; bool multisample = gl_multisample > 1; @@ -174,6 +168,8 @@ void FGLRenderer::AmbientOccludeScene() float nDotVBias = clamp(bias, 0.0f, 1.0f); float r2 = aoRadius * aoRadius; + float blurSharpness = 1.0f / blurAmount; + // Calculate linear depth values glBindFramebuffer(GL_FRAMEBUFFER, mBuffers->AmbientFB0); glViewport(0, 0, mBuffers->AmbientWidth, mBuffers->AmbientHeight); @@ -225,14 +221,14 @@ void FGLRenderer::AmbientOccludeScene() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); mDepthBlurShader->Bind(false); - mDepthBlurShader->BlurSharpness[false].Set(blurAmount); + mDepthBlurShader->BlurSharpness[false].Set(blurSharpness); mDepthBlurShader->InvFullResolution[false].Set(1.0f / mBuffers->AmbientWidth, 1.0f / mBuffers->AmbientHeight); RenderScreenQuad(); glBindFramebuffer(GL_FRAMEBUFFER, mBuffers->AmbientFB1); glBindTexture(GL_TEXTURE_2D, mBuffers->AmbientTexture0); mDepthBlurShader->Bind(true); - mDepthBlurShader->BlurSharpness[true].Set(blurAmount); + mDepthBlurShader->BlurSharpness[true].Set(blurSharpness); mDepthBlurShader->InvFullResolution[true].Set(1.0f / mBuffers->AmbientWidth, 1.0f / mBuffers->AmbientHeight); mDepthBlurShader->PowExponent[true].Set(1.8f); RenderScreenQuad(); diff --git a/wadsrc/static/shaders/glsl/depthblur.fp b/wadsrc/static/shaders/glsl/depthblur.fp index bd464d03da..c4f4438d30 100644 --- a/wadsrc/static/shaders/glsl/depthblur.fp +++ b/wadsrc/static/shaders/glsl/depthblur.fp @@ -7,39 +7,32 @@ uniform float BlurSharpness; uniform vec2 InvFullResolution; uniform float PowExponent; -#define KERNEL_RADIUS 3.0 +#define KERNEL_RADIUS 7.0 -struct CenterPixelData -{ - vec2 UV; - float Depth; - float Sharpness; -}; - -float CrossBilateralWeight(float r, float sampleDepth, CenterPixelData center) +float CrossBilateralWeight(float r, float sampleDepth, float centerDepth) { const float blurSigma = KERNEL_RADIUS * 0.5; const float blurFalloff = 1.0 / (2.0 * blurSigma * blurSigma); - float deltaZ = (sampleDepth - center.Depth) * center.Sharpness; + float deltaZ = (sampleDepth - centerDepth) * BlurSharpness; return exp2(-r * r * blurFalloff - deltaZ * deltaZ); } -void ProcessSample(float ao, float z, float r, CenterPixelData center, inout float totalAO, inout float totalW) +void ProcessSample(float ao, float z, float r, float centerDepth, inout float totalAO, inout float totalW) { - float w = CrossBilateralWeight(r, z, center); + float w = CrossBilateralWeight(r, z, centerDepth); totalAO += w * ao; totalW += w; } -void ProcessRadius(vec2 deltaUV, CenterPixelData center, inout float totalAO, inout float totalW) +void ProcessRadius(vec2 deltaUV, float centerDepth, inout float totalAO, inout float totalW) { for (float r = 1; r <= KERNEL_RADIUS; r += 1.0) { - vec2 uv = r * deltaUV + center.UV; + vec2 uv = r * deltaUV + TexCoord; vec2 aoZ = texture(AODepthTexture, uv).xy; - ProcessSample(aoZ.x, aoZ.y, r, center, totalAO, totalW); + ProcessSample(aoZ.x, aoZ.y, r, centerDepth, totalAO, totalW); } } @@ -47,16 +40,11 @@ vec2 ComputeBlur(vec2 deltaUV) { vec2 aoZ = texture(AODepthTexture, TexCoord).xy; - CenterPixelData center; - center.UV = TexCoord; - center.Depth = aoZ.y; - center.Sharpness = BlurSharpness; - float totalAO = aoZ.x; float totalW = 1.0; - ProcessRadius(deltaUV, center, totalAO, totalW); - ProcessRadius(-deltaUV, center, totalAO, totalW); + ProcessRadius(deltaUV, aoZ.y, totalAO, totalW); + ProcessRadius(-deltaUV, aoZ.y, totalAO, totalW); return vec2(totalAO / totalW, aoZ.y); } From a0b6a0275c5c521455e843709153ed1d7270435b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 11 Sep 2016 11:09:40 +0200 Subject: [PATCH 107/912] Fix random texture sampling bug --- src/gl/renderer/gl_postprocess.cpp | 2 ++ src/gl/renderer/gl_renderbuffers.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index 9b791a3f19..f79ab2bf12 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -201,6 +201,8 @@ void FGLRenderer::AmbientOccludeScene() glBindTexture(GL_TEXTURE_2D, mBuffers->AmbientRandomTexture); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glActiveTexture(GL_TEXTURE0); mSSAOShader->Bind(); mSSAOShader->DepthTexture.Set(0); diff --git a/src/gl/renderer/gl_renderbuffers.cpp b/src/gl/renderer/gl_renderbuffers.cpp index 9c442ce4e6..dec9ab8ede 100644 --- a/src/gl/renderer/gl_renderbuffers.cpp +++ b/src/gl/renderer/gl_renderbuffers.cpp @@ -342,10 +342,10 @@ void FGLRenderBuffers::CreateAmbientOcclusion(int width, int height) double z = distribution(generator); double w = distribution(generator); - randomValues[i * 4 + 0] = (int16_t)clamp(x * 32768.0, -32767.0, 32768.0); - randomValues[i * 4 + 1] = (int16_t)clamp(y * 32768.0, -32767.0, 32768.0); - randomValues[i * 4 + 2] = (int16_t)clamp(z * 32768.0, -32767.0, 32768.0); - randomValues[i * 4 + 3] = (int16_t)clamp(w * 32768.0, -32767.0, 32768.0); + randomValues[i * 4 + 0] = (int16_t)clamp(x * 32767.0, -32768.0, 32767.0); + randomValues[i * 4 + 1] = (int16_t)clamp(y * 32767.0, -32768.0, 32767.0); + randomValues[i * 4 + 2] = (int16_t)clamp(z * 32767.0, -32768.0, 32767.0); + randomValues[i * 4 + 3] = (int16_t)clamp(w * 32767.0, -32768.0, 32767.0); } AmbientRandomTexture = Create2DTexture("AmbientRandomTexture", GL_RGBA16_SNORM, 4, 4, randomValues); From b85e3b56e38e5c1a7edfbd0640889a0dbc6b0216 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 14 Sep 2016 02:21:35 -0400 Subject: [PATCH 108/912] Establish QZDoom --- CMakeLists.txt | 4 ++-- src/version.h | 8 ++++---- src/win32/zdoom.rc | 16 ++++++++-------- wadsrc/CMakeLists.txt | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 80eac0c547..97c9410c25 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required( VERSION 2.8.7 ) -project(GZDoom) +project(QZDoom) if( COMMAND cmake_policy ) if( POLICY CMP0011 ) @@ -68,7 +68,7 @@ IF( NOT CMAKE_BUILD_TYPE ) ENDIF() set( ZDOOM_OUTPUT_DIR ${CMAKE_BINARY_DIR} CACHE PATH "Directory where zdoom.pk3 and the executable will be created." ) -set( ZDOOM_EXE_NAME "gzdoom" CACHE FILEPATH "Name of the executable to create" ) +set( ZDOOM_EXE_NAME "qzdoom" CACHE FILEPATH "Name of the executable to create" ) if( MSVC ) # Allow the user to use ZDOOM_OUTPUT_DIR as a single release point. # Use zdoom, zdoomd, zdoom64, and zdoomd64 for the binary names diff --git a/src/version.h b/src/version.h index 0b2fa22e7c..9b2cd8f1a1 100644 --- a/src/version.h +++ b/src/version.h @@ -85,12 +85,12 @@ const char *GetVersionString(); #define DYNLIGHT // This is so that derivates can use the same savegame versions without worrying about engine compatibility -#define GAMESIG "GZDOOM" -#define BASEWAD "gzdoom.pk3" +#define GAMESIG "QZDOOM" +#define BASEWAD "qzdoom.pk3" // More stuff that needs to be different for derivatives. -#define GAMENAME "GZDoom" -#define GAMENAMELOWERCASE "gzdoom" +#define GAMENAME "QZDoom" +#define GAMENAMELOWERCASE "qzdoom" #define FORUM_URL "http://forum.drdteam.org" #define BUGS_FORUM_URL "http://forum.drdteam.org/viewforum.php?f=24" diff --git a/src/win32/zdoom.rc b/src/win32/zdoom.rc index 5e2226c6dd..1f760d8ea2 100644 --- a/src/win32/zdoom.rc +++ b/src/win32/zdoom.rc @@ -72,13 +72,13 @@ BEGIN " BEGIN\r\n" " VALUE ""Comments"", ""Thanks to id Software for creating DOOM and then releasing the source code. Thanks also to TeamTNT for creating BOOM, which ZDoom is partially based on. Includes code based on the Cajun Bot 0.97 by Martin Collberg.""\r\n" " VALUE ""CompanyName"", "" ""\r\n" - " VALUE ""FileDescription"", ""GZDoom""\r\n" + " VALUE ""FileDescription"", ""QZDoom""\r\n" " VALUE ""FileVersion"", RC_FILEVERSION2\r\n" - " VALUE ""InternalName"", ""GZDoom""\r\n" + " VALUE ""InternalName"", ""QZDoom""\r\n" " VALUE ""LegalCopyright"", ""Copyright \\u00A9 1993-1996 id Software, 1998-2010 Randy Heit, 2002-2010 Christoph Oelckers, et al.""\r\n" " VALUE ""LegalTrademarks"", ""DoomR is a Registered Trademark of id Software, Inc.""\r\n" - " VALUE ""OriginalFilename"", ""gzdoom.exe""\r\n" - " VALUE ""ProductName"", ""GZDoom""\r\n" + " VALUE ""OriginalFilename"", ""qzdoom.exe""\r\n" + " VALUE ""ProductName"", ""QZDoom""\r\n" " VALUE ""ProductVersion"", RC_PRODUCTVERSION2\r\n" " END\r\n" " END\r\n" @@ -492,13 +492,13 @@ BEGIN BEGIN VALUE "Comments", "Thanks to id Software for creating DOOM and then releasing the source code. Thanks also to TeamTNT for creating BOOM, which ZDoom is partially based on. Includes code based on the Cajun Bot 0.97 by Martin Collberg." VALUE "CompanyName", " " - VALUE "FileDescription", "GZDoom" + VALUE "FileDescription", "QZDoom" VALUE "FileVersion", RC_FILEVERSION2 - VALUE "InternalName", "GZDoom" + VALUE "InternalName", "QZDoom" VALUE "LegalCopyright", "Copyright \u00A9 1993-1996 id Software, 1998-2010 Randy Heit, 2002-2010 Christoph Oelckers, et al." VALUE "LegalTrademarks", "DoomR is a Registered Trademark of id Software, Inc." - VALUE "OriginalFilename", "gzdoom.exe" - VALUE "ProductName", "GZDoom" + VALUE "OriginalFilename", "qzdoom.exe" + VALUE "ProductName", "QZDoom" VALUE "ProductVersion", RC_PRODUCTVERSION2 END END diff --git a/wadsrc/CMakeLists.txt b/wadsrc/CMakeLists.txt index 80189a328c..5a85840e01 100644 --- a/wadsrc/CMakeLists.txt +++ b/wadsrc/CMakeLists.txt @@ -1,3 +1,3 @@ cmake_minimum_required( VERSION 2.8.7 ) -add_pk3(gzdoom.pk3 ${CMAKE_CURRENT_SOURCE_DIR}/static) +add_pk3(qzdoom.pk3 ${CMAKE_CURRENT_SOURCE_DIR}/static) From 004c7de89bb812cc9f01b7f86af242c3f11fe5c6 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 14 Sep 2016 04:03:39 -0400 Subject: [PATCH 109/912] Part 1 of code merge --- src/gl/renderer/gl_colormap.h | 1 - src/r_data/colormaps.cpp | 2 +- src/r_data/colormaps.h | 8 ++++---- src/r_defs.h | 4 ++-- src/r_draw.cpp | 14 +++++++------- src/r_draw.h | 12 ++++++------ src/r_draw_rgba.cpp | 2 +- src/r_draw_rgba.h | 2 +- src/r_main.cpp | 2 +- src/r_main.h | 2 +- src/r_swrenderer.cpp | 2 +- src/r_things.cpp | 8 ++++---- src/r_things.h | 2 +- src/win32/win32gliface.cpp | 5 +++-- src/win32/win32gliface.h | 2 +- 15 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/gl/renderer/gl_colormap.h b/src/gl/renderer/gl_colormap.h index 2122b1248c..d66950309c 100644 --- a/src/gl/renderer/gl_colormap.h +++ b/src/gl/renderer/gl_colormap.h @@ -75,5 +75,4 @@ struct FColormap }; - #endif diff --git a/src/r_data/colormaps.cpp b/src/r_data/colormaps.cpp index ffaaa38ac6..3bfc89b4b5 100644 --- a/src/r_data/colormaps.cpp +++ b/src/r_data/colormaps.cpp @@ -71,7 +71,7 @@ struct FakeCmap }; TArray fakecmaps; -FColormap realcolormaps; +FSWColormap realcolormaps; size_t numfakecmaps; diff --git a/src/r_data/colormaps.h b/src/r_data/colormaps.h index bda6a5ea4f..039a85189b 100644 --- a/src/r_data/colormaps.h +++ b/src/r_data/colormaps.h @@ -9,10 +9,10 @@ void R_DeinitColormaps (); DWORD R_ColormapNumForName(const char *name); // killough 4/4/98 void R_SetDefaultColormap (const char *name); // [RH] change normal fadetable DWORD R_BlendForColormap (DWORD map); // [RH] return calculated blend for a colormap -extern FColormap realcolormaps; // [RH] make the colormaps externally visible +extern FSWColormap realcolormaps; // [RH] make the colormaps externally visible extern size_t numfakecmaps; -struct FColormap +struct FSWColormap { BYTE *Maps = nullptr; PalEntry Color = 0xffffffff; @@ -20,7 +20,7 @@ struct FColormap int Desaturate = 0; }; -struct FDynamicColormap : FColormap +struct FDynamicColormap : FSWColormap { void ChangeFade (PalEntry fadecolor); void ChangeColor (PalEntry lightcolor, int desaturate); @@ -47,7 +47,7 @@ enum }; -struct FSpecialColormap : FColormap +struct FSpecialColormap : FSWColormap { FSpecialColormap() { diff --git a/src/r_defs.h b/src/r_defs.h index f4bfbcdcf7..7d7ad7eaba 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1509,13 +1509,13 @@ struct FMiniBSP // typedef BYTE lighttable_t; // This could be wider for >8 bit display. -struct FColormap; +struct FSWColormap; // This encapsulates the fields of vissprite_t that can be altered by AlterWeaponSprite struct visstyle_t { int ColormapNum; // Which colormap is rendered - FColormap *BaseColormap; // Base colormap used together with ColormapNum + FSWColormap *BaseColormap; // Base colormap used together with ColormapNum float Alpha; FRenderStyle RenderStyle; }; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 682ed46688..f255352f50 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -100,7 +100,7 @@ void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); void (*R_MapTiltedPlane)(int y, int x1); void (*R_MapColoredPlane)(int y, int x1); void (*R_DrawParticle)(vissprite_t *); -void (*R_SetupDrawSlab)(FColormap *base_colormap, float light, int shade); +void (*R_SetupDrawSlab)(FSWColormap *base_colormap, float light, int shade); void (*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); fixed_t (*tmvline1_add)(); void (*tmvline4_add)(); @@ -146,7 +146,7 @@ extern "C" { int dc_pitch=0xABadCafe; // [RH] Distance between rows lighttable_t* dc_colormap; -FColormap *dc_fcolormap; +FSWColormap *dc_fcolormap; ShadeConstants dc_shade_constants; fixed_t dc_light; int dc_x; @@ -1032,7 +1032,7 @@ int ds_y; int ds_x1; int ds_x2; -FColormap* ds_fcolormap; +FSWColormap* ds_fcolormap; lighttable_t* ds_colormap; ShadeConstants ds_shade_constants; dsfixed_t ds_light; @@ -2413,10 +2413,10 @@ void R_InitColumnDrawers () R_DrawParticle = R_DrawParticle_C; #ifdef X86_ASM - R_SetupDrawSlab = [](FColormap *colormap, float light, int shade) { R_SetupDrawSlabA(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; + R_SetupDrawSlab = [](FSWColormap *colormap, float light, int shade) { R_SetupDrawSlabA(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; R_DrawSlab = R_DrawSlabA; #else - R_SetupDrawSlab = [](FColormap *colormap, float light, int shade) { R_SetupDrawSlabC(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; + R_SetupDrawSlab = [](FSWColormap *colormap, float light, int shade) { R_SetupDrawSlabC(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; R_DrawSlab = R_DrawSlabC; #endif @@ -2806,7 +2806,7 @@ void R_SetTranslationMap(lighttable_t *translation) } } -void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) +void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade) { dc_fcolormap = base_colormap; if (r_swtruecolor) @@ -2830,7 +2830,7 @@ void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) } } -void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) +void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade) { ds_fcolormap = base_colormap; if (r_swtruecolor) diff --git a/src/r_draw.h b/src/r_draw.h index 591ae0b5fe..204f2a4935 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -31,7 +31,7 @@ extern "C" int fuzzoffset[FUZZTABLE + 1]; // [RH] +1 for the assembly routine extern "C" int fuzzpos; extern "C" int fuzzviewheight; -struct FColormap; +struct FSWColormap; struct ShadeConstants { @@ -52,7 +52,7 @@ extern "C" int ylookup[MAXHEIGHT]; extern "C" int dc_pitch; // [RH] Distance between rows extern "C" lighttable_t*dc_colormap; -extern "C" FColormap *dc_fcolormap; +extern "C" FSWColormap *dc_fcolormap; extern "C" ShadeConstants dc_shade_constants; extern "C" fixed_t dc_light; extern "C" int dc_x; @@ -288,7 +288,7 @@ void R_FillColumnP_C (void); void R_FillColumnHorizP_C (void); void R_FillSpan_C (void); -extern void(*R_SetupDrawSlab)(FColormap *base_colormap, float light, int shade); +extern void(*R_SetupDrawSlab)(FSWColormap *base_colormap, float light, int shade); extern void(*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); #ifdef X86_ASM @@ -303,7 +303,7 @@ extern "C" int ds_y; extern "C" int ds_x1; extern "C" int ds_x2; -extern "C" FColormap* ds_fcolormap; +extern "C" FSWColormap* ds_fcolormap; extern "C" lighttable_t* ds_colormap; extern "C" ShadeConstants ds_shade_constants; extern "C" dsfixed_t ds_light; @@ -374,10 +374,10 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) -void R_SetColorMapLight(FColormap *base_colormap, float light, int shade); +void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade); // Same as R_SetColorMapLight, but for ds_colormap and ds_light -void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); +void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 69ebfeb842..0d86ead478 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2751,7 +2751,7 @@ static ShadeConstants slab_rgba_shade_constants; static const BYTE *slab_rgba_colormap; static fixed_t slab_rgba_light; -void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade) +void R_SetupDrawSlab_rgba(FSWColormap *base_colormap, float light, int shade) { slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index ca54f72634..df3d0f2330 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -97,7 +97,7 @@ void R_FillSpan_rgba(); void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); void R_DrawColoredSpan_rgba(int y, int x1, int x2); -void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade); +void R_SetupDrawSlab_rgba(FSWColormap *base_colormap, float light, int shade); void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip); diff --git a/src/r_main.cpp b/src/r_main.cpp index 5ff80b1012..ba02a7c605 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -120,7 +120,7 @@ double FocalLengthX; double FocalLengthY; FDynamicColormap*basecolormap; // [RH] colormap currently drawing with int fixedlightlev; -FColormap *fixedcolormap; +FSWColormap *fixedcolormap; FSpecialColormap *realfixedcolormap; double WallTMapScale2; diff --git a/src/r_main.h b/src/r_main.h index fa8fe0bb15..8d18675264 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -106,7 +106,7 @@ extern double r_SpriteVisibility; extern int r_actualextralight; extern bool foggy; extern int fixedlightlev; -extern FColormap* fixedcolormap; +extern FSWColormap* fixedcolormap; extern FSpecialColormap*realfixedcolormap; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 0342751010..9bc8e4b96d 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -340,7 +340,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin // curse Doom's overuse of global variables in the renderer. // These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette. - FColormap *savecolormap = fixedcolormap; + FSWColormap *savecolormap = fixedcolormap; FSpecialColormap *savecm = realfixedcolormap; DAngle savedfov = FieldOfView; diff --git a/src/r_things.cpp b/src/r_things.cpp index 013fc7152b..6f1fb2700c 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -1481,7 +1481,7 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double if (camera->Inventory != nullptr) { BYTE oldcolormapnum = vis->Style.ColormapNum; - FColormap *oldcolormap = vis->Style.BaseColormap; + FSWColormap *oldcolormap = vis->Style.BaseColormap; camera->Inventory->AlterWeaponSprite (&vis->Style); if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum) { @@ -1960,7 +1960,7 @@ void R_DrawSprite (vissprite_t *spr) int r1, r2; short topclip, botclip; short *clip1, *clip2; - FColormap *colormap = spr->Style.BaseColormap; + FSWColormap *colormap = spr->Style.BaseColormap; int colormapnum = spr->Style.ColormapNum; F3DFloor *rover; FDynamicColormap *mybasecolormap; @@ -2486,7 +2486,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, int x1, x2, y1, y2; vissprite_t* vis; sector_t* heightsec = NULL; - FColormap* map; + FSWColormap* map; // [ZZ] Particle not visible through the portal plane if (CurrentPortal && !!P_PointOnLineSide(particle->Pos, CurrentPortal->dst)) @@ -2785,7 +2785,7 @@ extern double BaseYaspectMul;; void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, const FVector3 &dasprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, - FColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) + FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) { int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff; fixed_t cosang, sinang, sprcosang, sprsinang; diff --git a/src/r_things.h b/src/r_things.h index 13f89574b5..cbe34015f4 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -144,7 +144,7 @@ enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle, const FVector3 &sprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, struct FVoxel *voxobj, - FColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags); + FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags); void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); diff --git a/src/win32/win32gliface.cpp b/src/win32/win32gliface.cpp index 7ca001e1ed..59ef471d3b 100644 --- a/src/win32/win32gliface.cpp +++ b/src/win32/win32gliface.cpp @@ -346,7 +346,8 @@ bool Win32GLVideo::GoFullscreen(bool yes) // //========================================================================== -DFrameBuffer *Win32GLVideo::CreateFrameBuffer(int width, int height, bool fs, DFrameBuffer *old) + +DFrameBuffer *Win32GLVideo::CreateFrameBuffer(int width, int height, bool bgra, bool fs, DFrameBuffer *old) { Win32GLFrameBuffer *fb; @@ -860,7 +861,7 @@ IMPLEMENT_ABSTRACT_CLASS(Win32GLFrameBuffer) // //========================================================================== -Win32GLFrameBuffer::Win32GLFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen) : BaseWinFB(width, height) +Win32GLFrameBuffer::Win32GLFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen) : BaseWinFB(width, height, false) { m_Width = width; m_Height = height; diff --git a/src/win32/win32gliface.h b/src/win32/win32gliface.h index 6320e29034..87eb10de6b 100644 --- a/src/win32/win32gliface.h +++ b/src/win32/win32gliface.h @@ -38,7 +38,7 @@ public: void StartModeIterator (int bits, bool fs); bool NextMode (int *width, int *height, bool *letterbox); bool GoFullscreen(bool yes); - DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old); + DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old); virtual bool SetResolution (int width, int height, int bits); void DumpAdapters(); bool InitHardware (HWND Window, int multisample); From 3ebf8c7e746ec0f0b9bab827e7b2fa9a0f91bd30 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 14 Sep 2016 06:28:39 -0400 Subject: [PATCH 110/912] More code fixes - now it compiles. --- src/r_data/colormaps.h | 2 +- src/r_defs.h | 1 + src/v_video.cpp | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/r_data/colormaps.h b/src/r_data/colormaps.h index 039a85189b..ca15748930 100644 --- a/src/r_data/colormaps.h +++ b/src/r_data/colormaps.h @@ -1,7 +1,7 @@ #ifndef __RES_CMAP_H #define __RES_CMAP_H -struct FColormap; +struct FSWColormap; void R_InitColormaps (); void R_DeinitColormaps (); diff --git a/src/r_defs.h b/src/r_defs.h index 7d7ad7eaba..50d514fc5b 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1516,6 +1516,7 @@ struct visstyle_t { int ColormapNum; // Which colormap is rendered FSWColormap *BaseColormap; // Base colormap used together with ColormapNum + lighttable_t *colormap; // [SP] Restored from GZDoom - will this work? float Alpha; FRenderStyle RenderStyle; }; diff --git a/src/v_video.cpp b/src/v_video.cpp index d07fdc61a2..1bef7df409 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -71,6 +71,7 @@ FRenderer *Renderer; IMPLEMENT_ABSTRACT_CLASS (DCanvas) IMPLEMENT_ABSTRACT_CLASS (DFrameBuffer) +EXTERN_CVAR (Bool, swtruecolor) #if defined(_DEBUG) && defined(_M_IX86) #define DBGBREAK { __asm int 3 } @@ -813,7 +814,7 @@ void DSimpleCanvas::Resize(int width, int height) Pitch = width + MAX(0, CPU.DataL1LineSize - 8); } } - int bytes_per_pixel = bgra ? 4 : 1; + int bytes_per_pixel = swtruecolor ? 4 : 1; MemBuffer = new BYTE[Pitch * height * bytes_per_pixel]; memset (MemBuffer, 0, Pitch * height * bytes_per_pixel); } From b0029fcd1ebb67e7ff23fdebd72c51b13a17a55f Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 14 Sep 2016 06:38:08 -0400 Subject: [PATCH 111/912] Set version to 0.0 (prerelease), set render defaults for true-color software renderer since that is the focus of this project --- src/posix/cocoa/i_video.mm | 2 +- src/posix/sdl/hardware.cpp | 2 +- src/version.h | 8 ++++---- src/win32/hardware.cpp | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 20a93ce257..4f91fd3692 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -106,7 +106,7 @@ EXTERN_CVAR(Bool, ticker ) EXTERN_CVAR(Bool, vid_vsync) EXTERN_CVAR(Bool, vid_hidpi) -CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR(Bool, swtruecolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // Strictly speaking this doesn't require a mode switch, but it is the easiest // way to force a CreateFramebuffer call without a lot of refactoring. diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index 2c0ba617e0..b69a0d3d8e 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -321,7 +321,7 @@ CUSTOM_CVAR (Int, vid_maxfps, 200, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) extern int NewWidth, NewHeight, NewBits, DisplayBits; -CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) +CUSTOM_CVAR(Bool, swtruecolor, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) { // Strictly speaking this doesn't require a mode switch, but it is the easiest // way to force a CreateFramebuffer call without a lot of refactoring. diff --git a/src/version.h b/src/version.h index 9b2cd8f1a1..f3706b1de9 100644 --- a/src/version.h +++ b/src/version.h @@ -41,12 +41,12 @@ const char *GetVersionString(); /** Lots of different version numbers **/ -#define VERSIONSTR "2.2pre" +#define VERSIONSTR "0.0pre" // The version as seen in the Windows resource -#define RC_FILEVERSION 2,1,9999,0 -#define RC_PRODUCTVERSION 2,1,9999,0 -#define RC_PRODUCTVERSION2 "2.2pre" +#define RC_FILEVERSION 0,0,9999,0 +#define RC_PRODUCTVERSION 0,0,9999,0 +#define RC_PRODUCTVERSION2 "0.0pre" // Version identifier for network games. // Bump it every time you do a release unless you're certain you diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index cc55dd400a..3cf941307f 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -72,7 +72,7 @@ int currentrenderer = -1; bool changerenderer; // [ZDoomGL] -CUSTOM_CVAR (Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // 0: Software renderer // 1: OpenGL renderer @@ -358,7 +358,7 @@ void I_RestoreWindowedPos () extern int NewWidth, NewHeight, NewBits, DisplayBits; -CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) +CUSTOM_CVAR(Bool, swtruecolor, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) { // Strictly speaking this doesn't require a mode switch, but it is the easiest // way to force a CreateFramebuffer call without a lot of refactoring. From 842558384a74c9f427fb41aaeab426f4fadbd7c7 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 14 Sep 2016 07:33:31 -0400 Subject: [PATCH 112/912] Forgot to set vid_renderer defaults for Linux and Mac. --- src/posix/cocoa/i_video.mm | 2 +- src/posix/sdl/hardware.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 4f91fd3692..ba3a3e27e8 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -134,7 +134,7 @@ CUSTOM_CVAR(Bool, vid_autoswitch, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_ static int s_currentRenderer; -CUSTOM_CVAR(Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR(Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // 0: Software renderer // 1: OpenGL renderer diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index b69a0d3d8e..18c7ad7376 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -65,7 +65,7 @@ void I_RestartRenderer(); int currentrenderer; // [ZDoomGL] -CUSTOM_CVAR (Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // 0: Software renderer // 1: OpenGL renderer From 1e2935f4e0bb49e7af64ace8d1b26db04cb58727 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 18 Sep 2016 15:57:22 +0200 Subject: [PATCH 113/912] Added exposure pass calculating the bloom/tonemap exposure based on what the eye is seeing --- src/gl/renderer/gl_postprocess.cpp | 90 ++++++++++++++++++- src/gl/renderer/gl_postprocessstate.cpp | 26 +++++- src/gl/renderer/gl_postprocessstate.h | 5 +- src/gl/renderer/gl_renderbuffers.cpp | 66 ++++++++++++-- src/gl/renderer/gl_renderbuffers.h | 22 ++++- src/gl/renderer/gl_renderer.cpp | 9 ++ src/gl/renderer/gl_renderer.h | 8 +- src/gl/scene/gl_scene.cpp | 15 +--- src/gl/shaders/gl_bloomshader.cpp | 2 +- src/gl/shaders/gl_bloomshader.h | 2 +- src/gl/shaders/gl_tonemapshader.cpp | 50 ++++++++++- src/gl/shaders/gl_tonemapshader.h | 41 ++++++++- wadsrc/static/shaders/glsl/bloomextract.fp | 5 +- wadsrc/static/shaders/glsl/exposureaverage.fp | 23 +++++ wadsrc/static/shaders/glsl/exposurecombine.fp | 16 ++++ wadsrc/static/shaders/glsl/exposureextract.fp | 13 +++ wadsrc/static/shaders/glsl/tonemap.fp | 5 +- 17 files changed, 358 insertions(+), 40 deletions(-) create mode 100644 wadsrc/static/shaders/glsl/exposureaverage.fp create mode 100644 wadsrc/static/shaders/glsl/exposurecombine.fp create mode 100644 wadsrc/static/shaders/glsl/exposureextract.fp diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index b6b658a415..1cdad9f9be 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -75,7 +75,10 @@ CUSTOM_CVAR(Float, gl_bloom_amount, 1.4f, 0) if (self < 0.1f) self = 0.1f; } -CVAR(Float, gl_exposure, 0.0f, 0) +CVAR(Float, gl_exposure_scale, 0.75f, 0) +CVAR(Float, gl_exposure_min, 0.35f, 0) +CVAR(Float, gl_exposure_base, 0.35f, 0) +CVAR(Float, gl_exposure_speed, 0.05f, 0) CUSTOM_CVAR(Int, gl_tonemap, 0, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) { @@ -106,6 +109,78 @@ void FGLRenderer::RenderScreenQuad() GLRenderer->mVBO->RenderArray(GL_TRIANGLE_STRIP, FFlatVertexBuffer::PRESENT_INDEX, 4); } +//----------------------------------------------------------------------------- +// +// Extracts light average from the scene and updates the camera exposure texture +// +//----------------------------------------------------------------------------- + +void FGLRenderer::UpdateCameraExposure() +{ + if (!gl_bloom && gl_tonemap == 0) + return; + + FGLDebug::PushGroup("UpdateCameraExposure"); + + FGLPostProcessState savedState; + savedState.SaveTextureBinding1(); + + // Extract light level from scene texture: + const auto &level0 = mBuffers->ExposureLevels[0]; + glBindFramebuffer(GL_FRAMEBUFFER, level0.Framebuffer); + glViewport(0, 0, level0.Width, level0.Height); + mBuffers->BindCurrentTexture(0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + mExposureExtractShader->Bind(); + mExposureExtractShader->SceneTexture.Set(0); + mExposureExtractShader->Scale.Set(mSceneViewport.width / (float)mScreenViewport.width, mSceneViewport.height / (float)mScreenViewport.height); + mExposureExtractShader->Offset.Set(mSceneViewport.left / (float)mScreenViewport.width, mSceneViewport.top / (float)mScreenViewport.height); + RenderScreenQuad(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + // Find the average value: + for (int i = 0; i + 1 < mBuffers->ExposureLevels.Size(); i++) + { + const auto &level = mBuffers->ExposureLevels[i]; + const auto &next = mBuffers->ExposureLevels[i + 1]; + + glBindFramebuffer(GL_FRAMEBUFFER, next.Framebuffer); + glViewport(0, 0, next.Width, next.Height); + glBindTexture(GL_TEXTURE_2D, level.Texture); + mExposureAverageShader->Bind(); + mExposureAverageShader->ExposureTexture.Set(0); + RenderScreenQuad(); + } + + // Combine average value with current camera exposure: + glBindFramebuffer(GL_FRAMEBUFFER, mBuffers->ExposureFB); + glViewport(0, 0, 1, 1); + if (!mBuffers->FirstExposureFrame) + { + glEnable(GL_BLEND); + glBlendEquation(GL_FUNC_ADD); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + else + { + mBuffers->FirstExposureFrame = false; + } + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, mBuffers->ExposureLevels.Last().Texture); + mExposureCombineShader->Bind(); + mExposureCombineShader->ExposureTexture.Set(0); + mExposureCombineShader->ExposureBase.Set(gl_exposure_base); + mExposureCombineShader->ExposureMin.Set(gl_exposure_min); + mExposureCombineShader->ExposureScale.Set(gl_exposure_scale); + mExposureCombineShader->ExposureSpeed.Set(gl_exposure_speed); + RenderScreenQuad(); + glViewport(mScreenViewport.left, mScreenViewport.top, mScreenViewport.width, mScreenViewport.height); + + FGLDebug::PopGroup(); +} + //----------------------------------------------------------------------------- // // Adds bloom contribution to scene texture @@ -121,6 +196,7 @@ void FGLRenderer::BloomScene() FGLDebug::PushGroup("BloomScene"); FGLPostProcessState savedState; + savedState.SaveTextureBinding1(); const float blurAmount = gl_bloom_amount; int sampleCount = gl_bloom_kernel_size; @@ -133,9 +209,12 @@ void FGLRenderer::BloomScene() mBuffers->BindCurrentTexture(0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, mBuffers->ExposureTexture); + glActiveTexture(GL_TEXTURE0); mBloomExtractShader->Bind(); mBloomExtractShader->SceneTexture.Set(0); - mBloomExtractShader->Exposure.Set(mCameraExposure); + mBloomExtractShader->ExposureTexture.Set(1); mBloomExtractShader->Scale.Set(mSceneViewport.width / (float)mScreenViewport.width, mSceneViewport.height / (float)mScreenViewport.height); mBloomExtractShader->Offset.Set(mSceneViewport.left / (float)mScreenViewport.width, mSceneViewport.top / (float)mScreenViewport.height); RenderScreenQuad(); @@ -220,7 +299,12 @@ void FGLRenderer::TonemapScene() } else { - mTonemapShader->Exposure.Set(mCameraExposure); + savedState.SaveTextureBinding1(); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, mBuffers->ExposureTexture); + glActiveTexture(GL_TEXTURE0); + + mTonemapShader->ExposureTexture.Set(1); } RenderScreenQuad(); diff --git a/src/gl/renderer/gl_postprocessstate.cpp b/src/gl/renderer/gl_postprocessstate.cpp index 9d995783d4..57b7862f5d 100644 --- a/src/gl/renderer/gl_postprocessstate.cpp +++ b/src/gl/renderer/gl_postprocessstate.cpp @@ -45,7 +45,7 @@ FGLPostProcessState::FGLPostProcessState() { glGetIntegerv(GL_ACTIVE_TEXTURE, &activeTex); glActiveTexture(GL_TEXTURE0); - glGetIntegerv(GL_TEXTURE_BINDING_2D, &textureBinding); + glGetIntegerv(GL_TEXTURE_BINDING_2D, &textureBinding[0]); glBindTexture(GL_TEXTURE_2D, 0); if (gl.flags & RFL_SAMPLER_OBJECTS) { @@ -75,6 +75,15 @@ FGLPostProcessState::FGLPostProcessState() glDisable(GL_BLEND); } +void FGLPostProcessState::SaveTextureBinding1() +{ + glActiveTexture(GL_TEXTURE1); + glGetIntegerv(GL_TEXTURE_BINDING_2D, &textureBinding[1]); + glBindTexture(GL_TEXTURE_2D, 0); + textureBinding1Saved = true; + glActiveTexture(GL_TEXTURE0); +} + //----------------------------------------------------------------------------- // // Restores state at the end of post processing @@ -108,6 +117,12 @@ FGLPostProcessState::~FGLPostProcessState() glUseProgram(currentProgram); + if (textureBinding1Saved) + { + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); + } + glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, 0); if (gl.flags & RFL_SAMPLER_OBJECTS) @@ -115,6 +130,13 @@ FGLPostProcessState::~FGLPostProcessState() glBindSampler(0, samplerBinding[0]); glBindSampler(1, samplerBinding[1]); } - glBindTexture(GL_TEXTURE_2D, textureBinding); + glBindTexture(GL_TEXTURE_2D, textureBinding[0]); + + if (textureBinding1Saved) + { + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, textureBinding[1]); + } + glActiveTexture(activeTex); } diff --git a/src/gl/renderer/gl_postprocessstate.h b/src/gl/renderer/gl_postprocessstate.h index 4f2ca81a12..bf53aa7de9 100644 --- a/src/gl/renderer/gl_postprocessstate.h +++ b/src/gl/renderer/gl_postprocessstate.h @@ -14,12 +14,14 @@ public: FGLPostProcessState(); ~FGLPostProcessState(); + void SaveTextureBinding1(); + private: FGLPostProcessState(const FGLPostProcessState &) = delete; FGLPostProcessState &operator=(const FGLPostProcessState &) = delete; GLint activeTex; - GLint textureBinding; + GLint textureBinding[2]; GLint samplerBinding[2]; GLboolean blendEnabled; GLboolean scissorEnabled; @@ -32,6 +34,7 @@ private: GLint blendSrcAlpha; GLint blendDestRgb; GLint blendDestAlpha; + bool textureBinding1Saved = false; }; #endif diff --git a/src/gl/renderer/gl_renderbuffers.cpp b/src/gl/renderer/gl_renderbuffers.cpp index 4383d706e4..b2471e4b91 100644 --- a/src/gl/renderer/gl_renderbuffers.cpp +++ b/src/gl/renderer/gl_renderbuffers.cpp @@ -74,6 +74,7 @@ FGLRenderBuffers::~FGLRenderBuffers() ClearPipeline(); ClearEyeBuffers(); ClearBloom(); + ClearExposureLevels(); } void FGLRenderBuffers::ClearScene() @@ -107,6 +108,18 @@ void FGLRenderBuffers::ClearBloom() } } +void FGLRenderBuffers::ClearExposureLevels() +{ + for (auto &level : ExposureLevels) + { + DeleteTexture(level.Texture); + DeleteFrameBuffer(level.Framebuffer); + } + ExposureLevels.Clear(); + DeleteTexture(ExposureTexture); + DeleteFrameBuffer(ExposureFB); +} + void FGLRenderBuffers::ClearEyeBuffers() { for (auto handle : mEyeFBs) @@ -186,11 +199,12 @@ bool FGLRenderBuffers::Setup(int width, int height, int sceneWidth, int sceneHei } // Bloom bluring buffers need to match the scene to avoid bloom bleeding artifacts - if (mBloomWidth != sceneWidth || mBloomHeight != sceneHeight) + if (mSceneWidth != sceneWidth || mSceneHeight != sceneHeight) { CreateBloom(sceneWidth, sceneHeight); - mBloomWidth = sceneWidth; - mBloomHeight = sceneHeight; + CreateExposureLevels(sceneWidth, sceneHeight); + mSceneWidth = sceneWidth; + mSceneHeight = sceneHeight; } glBindTexture(GL_TEXTURE_2D, textureBinding); @@ -204,11 +218,12 @@ bool FGLRenderBuffers::Setup(int width, int height, int sceneWidth, int sceneHei ClearPipeline(); ClearEyeBuffers(); ClearBloom(); + ClearExposureLevels(); mWidth = 0; mHeight = 0; mSamples = 0; - mBloomWidth = 0; - mBloomHeight = 0; + mSceneWidth = 0; + mSceneHeight = 0; } return !FailedCreate; @@ -281,6 +296,41 @@ void FGLRenderBuffers::CreateBloom(int width, int height) } } +//========================================================================== +// +// Creates camera exposure level buffers +// +//========================================================================== + +void FGLRenderBuffers::CreateExposureLevels(int width, int height) +{ + ClearExposureLevels(); + + int i = 0; + do + { + width = MAX(width / 2, 1); + height = MAX(height / 2, 1); + + FString textureName, fbName; + textureName.Format("Exposure.Texture%d", i); + fbName.Format("Exposure.Framebuffer%d", i); + i++; + + FGLExposureTextureLevel level; + level.Width = width; + level.Height = height; + level.Texture = Create2DTexture(textureName, GL_R32F, level.Width, level.Height); + level.Framebuffer = CreateFrameBuffer(fbName, level.Texture); + ExposureLevels.Push(level); + } while (width > 1 || height > 1); + + ExposureTexture = Create2DTexture("Exposure.CameraTexture", GL_R32F, 1, 1); + ExposureFB = CreateFrameBuffer("Exposure.CameraFB", ExposureTexture); + + FirstExposureFrame = true; +} + //========================================================================== // // Creates eye buffers if needed @@ -316,14 +366,14 @@ void FGLRenderBuffers::CreateEyeBuffers(int eye) // //========================================================================== -GLuint FGLRenderBuffers::Create2DTexture(const FString &name, GLuint format, int width, int height) +GLuint FGLRenderBuffers::Create2DTexture(const FString &name, GLuint format, int width, int height, const void *data) { - GLuint type = (format == GL_RGBA16F) ? GL_FLOAT : GL_UNSIGNED_BYTE; + GLuint type = (format == GL_RGBA16F || format == GL_R32F) ? GL_FLOAT : GL_UNSIGNED_BYTE; GLuint handle = 0; glGenTextures(1, &handle); glBindTexture(GL_TEXTURE_2D, handle); FGLDebug::LabelObject(GL_TEXTURE, handle, name); - glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, GL_RGBA, type, nullptr); + glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, format != GL_R32F ? GL_RGBA : GL_RED, type, data); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); diff --git a/src/gl/renderer/gl_renderbuffers.h b/src/gl/renderer/gl_renderbuffers.h index 08731e39f7..4477718f4b 100644 --- a/src/gl/renderer/gl_renderbuffers.h +++ b/src/gl/renderer/gl_renderbuffers.h @@ -14,6 +14,15 @@ public: GLuint Height = 0; }; +class FGLExposureTextureLevel +{ +public: + GLuint Texture = 0; + GLuint Framebuffer = 0; + GLuint Width = 0; + GLuint Height = 0; +}; + class FGLRenderBuffers { public: @@ -39,6 +48,11 @@ public: enum { NumBloomLevels = 4 }; FGLBloomTextureLevel BloomLevels[NumBloomLevels]; + TArray ExposureLevels; + GLuint ExposureTexture = 0; + GLuint ExposureFB = 0; + bool FirstExposureFrame = true; + static bool IsEnabled(); int GetWidth() const { return mWidth; } @@ -49,11 +63,13 @@ private: void ClearPipeline(); void ClearEyeBuffers(); void ClearBloom(); + void ClearExposureLevels(); void CreateScene(int width, int height, int samples); void CreatePipeline(int width, int height); void CreateBloom(int width, int height); + void CreateExposureLevels(int width, int height); void CreateEyeBuffers(int eye); - GLuint Create2DTexture(const FString &name, GLuint format, int width, int height); + GLuint Create2DTexture(const FString &name, GLuint format, int width, int height, const void *data = nullptr); GLuint CreateRenderBuffer(const FString &name, GLuint format, int width, int height); GLuint CreateRenderBuffer(const FString &name, GLuint format, int samples, int width, int height); GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer); @@ -69,8 +85,8 @@ private: int mHeight = 0; int mSamples = 0; int mMaxSamples = 0; - int mBloomWidth = 0; - int mBloomHeight = 0; + int mSceneWidth = 0; + int mSceneHeight = 0; static const int NumPipelineTextures = 2; int mCurrentPipelineTexture = 0; diff --git a/src/gl/renderer/gl_renderer.cpp b/src/gl/renderer/gl_renderer.cpp index 4b59ad1a91..ab0e658e70 100644 --- a/src/gl/renderer/gl_renderer.cpp +++ b/src/gl/renderer/gl_renderer.cpp @@ -104,6 +104,9 @@ FGLRenderer::FGLRenderer(OpenGLFrameBuffer *fb) mPresentShader = nullptr; mBloomExtractShader = nullptr; mBloomCombineShader = nullptr; + mExposureExtractShader = nullptr; + mExposureAverageShader = nullptr; + mExposureCombineShader = nullptr; mBlurShader = nullptr; mTonemapShader = nullptr; mTonemapPalette = nullptr; @@ -119,6 +122,9 @@ void FGLRenderer::Initialize(int width, int height) mBuffers = new FGLRenderBuffers(); mBloomExtractShader = new FBloomExtractShader(); mBloomCombineShader = new FBloomCombineShader(); + mExposureExtractShader = new FExposureExtractShader(); + mExposureAverageShader = new FExposureAverageShader(); + mExposureCombineShader = new FExposureCombineShader(); mBlurShader = new FBlurShader(); mTonemapShader = new FTonemapShader(); mColormapShader = new FColormapShader(); @@ -179,6 +185,9 @@ FGLRenderer::~FGLRenderer() if (mPresentShader) delete mPresentShader; if (mBloomExtractShader) delete mBloomExtractShader; if (mBloomCombineShader) delete mBloomCombineShader; + if (mExposureExtractShader) delete mExposureExtractShader; + if (mExposureAverageShader) delete mExposureAverageShader; + if (mExposureCombineShader) delete mExposureCombineShader; if (mBlurShader) delete mBlurShader; if (mTonemapShader) delete mTonemapShader; if (mTonemapPalette) delete mTonemapPalette; diff --git a/src/gl/renderer/gl_renderer.h b/src/gl/renderer/gl_renderer.h index 162fb6dcf4..b954085214 100644 --- a/src/gl/renderer/gl_renderer.h +++ b/src/gl/renderer/gl_renderer.h @@ -21,6 +21,9 @@ class DPSprite; class FGLRenderBuffers; class FBloomExtractShader; class FBloomCombineShader; +class FExposureExtractShader; +class FExposureAverageShader; +class FExposureCombineShader; class FBlurShader; class FTonemapShader; class FColormapShader; @@ -92,6 +95,9 @@ public: FGLRenderBuffers *mBuffers; FBloomExtractShader *mBloomExtractShader; FBloomCombineShader *mBloomCombineShader; + FExposureExtractShader *mExposureExtractShader; + FExposureAverageShader *mExposureAverageShader; + FExposureCombineShader *mExposureCombineShader; FBlurShader *mBlurShader; FTonemapShader *mTonemapShader; FColormapShader *mColormapShader; @@ -118,7 +124,6 @@ public: GL_IRECT mSceneViewport; GL_IRECT mOutputLetterbox; bool mDrawingScene2D = false; - float mCameraExposure = 1.0f; float mSceneClearColor[3]; @@ -166,6 +171,7 @@ public: void SetFixedColormap (player_t *player); void WriteSavePic (player_t *player, FILE *file, int width, int height); void EndDrawScene(sector_t * viewsector); + void UpdateCameraExposure(); void BloomScene(); void TonemapScene(); void ColormapScene(); diff --git a/src/gl/scene/gl_scene.cpp b/src/gl/scene/gl_scene.cpp index ccceeac5fe..c66bd64e88 100644 --- a/src/gl/scene/gl_scene.cpp +++ b/src/gl/scene/gl_scene.cpp @@ -790,20 +790,6 @@ sector_t * FGLRenderer::RenderViewpoint (AActor * camera, GL_IRECT * bounds, flo mViewActor=camera; } - if (toscreen) - { - if (gl_exposure == 0.0f) - { - float light = viewsector->lightlevel / 255.0f; - float exposure = MAX(1.0f + (1.0f - light * light) * 0.9f, 0.5f); - mCameraExposure = mCameraExposure * 0.995f + exposure * 0.005f; - } - else - { - mCameraExposure = gl_exposure; - } - } - // 'viewsector' will not survive the rendering so it cannot be used anymore below. lviewsector = viewsector; @@ -839,6 +825,7 @@ sector_t * FGLRenderer::RenderViewpoint (AActor * camera, GL_IRECT * bounds, flo if (mainview && FGLRenderBuffers::IsEnabled()) { mBuffers->BlitSceneToTexture(); + UpdateCameraExposure(); BloomScene(); TonemapScene(); ColormapScene(); diff --git a/src/gl/shaders/gl_bloomshader.cpp b/src/gl/shaders/gl_bloomshader.cpp index a16c9ed1ea..44253ae79c 100644 --- a/src/gl/shaders/gl_bloomshader.cpp +++ b/src/gl/shaders/gl_bloomshader.cpp @@ -46,7 +46,7 @@ void FBloomExtractShader::Bind() mShader.Link("shaders/glsl/bloomextract"); mShader.SetAttribLocation(0, "PositionInProjection"); SceneTexture.Init(mShader, "SceneTexture"); - Exposure.Init(mShader, "ExposureAdjustment"); + ExposureTexture.Init(mShader, "ExposureTexture"); Scale.Init(mShader, "Scale"); Offset.Init(mShader, "Offset"); } diff --git a/src/gl/shaders/gl_bloomshader.h b/src/gl/shaders/gl_bloomshader.h index b20277a42d..8b34bb479c 100644 --- a/src/gl/shaders/gl_bloomshader.h +++ b/src/gl/shaders/gl_bloomshader.h @@ -9,7 +9,7 @@ public: void Bind(); FBufferedUniformSampler SceneTexture; - FBufferedUniform1f Exposure; + FBufferedUniformSampler ExposureTexture; FBufferedUniform2f Scale; FBufferedUniform2f Offset; diff --git a/src/gl/shaders/gl_tonemapshader.cpp b/src/gl/shaders/gl_tonemapshader.cpp index 144981b180..3db6db8ba8 100644 --- a/src/gl/shaders/gl_tonemapshader.cpp +++ b/src/gl/shaders/gl_tonemapshader.cpp @@ -47,7 +47,7 @@ void FTonemapShader::Bind() shader.Link("shaders/glsl/tonemap"); shader.SetAttribLocation(0, "PositionInProjection"); SceneTexture.Init(shader, "InputTexture"); - Exposure.Init(shader, "ExposureAdjustment"); + ExposureTexture.Init(shader, "ExposureTexture"); PaletteLUT.Init(shader, "PaletteLUT"); } shader.Bind(); @@ -70,3 +70,51 @@ const char *FTonemapShader::GetDefines(int mode) case Palette: return "#define PALETTE\n"; } } + +void FExposureExtractShader::Bind() +{ + if (!mShader) + { + mShader.Compile(FShaderProgram::Vertex, "shaders/glsl/screenquad.vp", "", 330); + mShader.Compile(FShaderProgram::Fragment, "shaders/glsl/exposureextract.fp", "", 330); + mShader.SetFragDataLocation(0, "FragColor"); + mShader.Link("shaders/glsl/exposureextract"); + mShader.SetAttribLocation(0, "PositionInProjection"); + SceneTexture.Init(mShader, "SceneTexture"); + Scale.Init(mShader, "Scale"); + Offset.Init(mShader, "Offset"); + } + mShader.Bind(); +} + +void FExposureAverageShader::Bind() +{ + if (!mShader) + { + mShader.Compile(FShaderProgram::Vertex, "shaders/glsl/screenquad.vp", "", 400); + mShader.Compile(FShaderProgram::Fragment, "shaders/glsl/exposureaverage.fp", "", 400); + mShader.SetFragDataLocation(0, "FragColor"); + mShader.Link("shaders/glsl/exposureaverage"); + mShader.SetAttribLocation(0, "PositionInProjection"); + ExposureTexture.Init(mShader, "ExposureTexture"); + } + mShader.Bind(); +} + +void FExposureCombineShader::Bind() +{ + if (!mShader) + { + mShader.Compile(FShaderProgram::Vertex, "shaders/glsl/screenquad.vp", "", 330); + mShader.Compile(FShaderProgram::Fragment, "shaders/glsl/exposurecombine.fp", "", 330); + mShader.SetFragDataLocation(0, "FragColor"); + mShader.Link("shaders/glsl/exposurecombine"); + mShader.SetAttribLocation(0, "PositionInProjection"); + ExposureTexture.Init(mShader, "ExposureTexture"); + ExposureBase.Init(mShader, "ExposureBase"); + ExposureMin.Init(mShader, "ExposureMin"); + ExposureScale.Init(mShader, "ExposureScale"); + ExposureSpeed.Init(mShader, "ExposureSpeed"); + } + mShader.Bind(); +} \ No newline at end of file diff --git a/src/gl/shaders/gl_tonemapshader.h b/src/gl/shaders/gl_tonemapshader.h index 7ec24117b1..b4cd102de6 100644 --- a/src/gl/shaders/gl_tonemapshader.h +++ b/src/gl/shaders/gl_tonemapshader.h @@ -9,7 +9,7 @@ public: void Bind(); FBufferedUniformSampler SceneTexture; - FBufferedUniform1f Exposure; + FBufferedUniformSampler ExposureTexture; FBufferedUniformSampler PaletteLUT; static bool IsPaletteMode(); @@ -31,4 +31,43 @@ private: FShaderProgram mShader[NumTonemapModes]; }; +class FExposureExtractShader +{ +public: + void Bind(); + + FBufferedUniformSampler SceneTexture; + FBufferedUniform2f Scale; + FBufferedUniform2f Offset; + +private: + FShaderProgram mShader; +}; + +class FExposureAverageShader +{ +public: + void Bind(); + + FBufferedUniformSampler ExposureTexture; + +private: + FShaderProgram mShader; +}; + +class FExposureCombineShader +{ +public: + void Bind(); + + FBufferedUniformSampler ExposureTexture; + FBufferedUniform1f ExposureBase; + FBufferedUniform1f ExposureMin; + FBufferedUniform1f ExposureScale; + FBufferedUniform1f ExposureSpeed; + +private: + FShaderProgram mShader; +}; + #endif \ No newline at end of file diff --git a/wadsrc/static/shaders/glsl/bloomextract.fp b/wadsrc/static/shaders/glsl/bloomextract.fp index bc94c3c0e4..9c5aa0bea3 100644 --- a/wadsrc/static/shaders/glsl/bloomextract.fp +++ b/wadsrc/static/shaders/glsl/bloomextract.fp @@ -3,12 +3,13 @@ in vec2 TexCoord; out vec4 FragColor; uniform sampler2D SceneTexture; -uniform float ExposureAdjustment; +uniform sampler2D ExposureTexture; uniform vec2 Scale; uniform vec2 Offset; void main() { + float exposureAdjustment = texture(ExposureTexture, vec2(0.5)).x; vec4 color = texture(SceneTexture, Offset + TexCoord * Scale); - FragColor = max(vec4(color.rgb * ExposureAdjustment - 1, 1), vec4(0)); + FragColor = max(vec4((color.rgb + vec3(0.001)) * exposureAdjustment - 1, 1), vec4(0)); } diff --git a/wadsrc/static/shaders/glsl/exposureaverage.fp b/wadsrc/static/shaders/glsl/exposureaverage.fp new file mode 100644 index 0000000000..41c0909d2d --- /dev/null +++ b/wadsrc/static/shaders/glsl/exposureaverage.fp @@ -0,0 +1,23 @@ + +in vec2 TexCoord; +out vec4 FragColor; + +uniform sampler2D ExposureTexture; + +void main() +{ +#if __VERSION__ < 400 + ivec2 size = textureSize(ExposureTexture, 0); + ivec2 tl = max(ivec2(TexCoord * vec2(size) - 0.5), ivec2(0)); + ivec2 br = min(tl + ivec2(1), size - ivec2(1)); + vec4 values = vec4( + texelFetch(ExposureTexture, tl, 0).x, + texelFetch(ExposureTexture, ivec2(tl.x, br.y), 0).x, + texelFetch(ExposureTexture, ivec2(br.x, tl.y), 0).x, + texelFetch(ExposureTexture, br, 0).x); +#else + vec4 values = textureGather(ExposureTexture, TexCoord); +#endif + + FragColor = vec4((values.x + values.y + values.z + values.w) * 0.25, 0.0, 0.0, 1.0); +} diff --git a/wadsrc/static/shaders/glsl/exposurecombine.fp b/wadsrc/static/shaders/glsl/exposurecombine.fp new file mode 100644 index 0000000000..f806f8f860 --- /dev/null +++ b/wadsrc/static/shaders/glsl/exposurecombine.fp @@ -0,0 +1,16 @@ + +in vec2 TexCoord; +out vec4 FragColor; + +uniform sampler2D ExposureTexture; +uniform float ExposureBase; +uniform float ExposureMin; +uniform float ExposureScale; +uniform float ExposureSpeed; + +void main() +{ + float light = texture(ExposureTexture, TexCoord).x; + float exposureAdjustment = 1.0 / max(ExposureBase + light * ExposureScale, ExposureMin); + FragColor = vec4(exposureAdjustment, 0.0, 0.0, ExposureSpeed); +} diff --git a/wadsrc/static/shaders/glsl/exposureextract.fp b/wadsrc/static/shaders/glsl/exposureextract.fp new file mode 100644 index 0000000000..f673bf0295 --- /dev/null +++ b/wadsrc/static/shaders/glsl/exposureextract.fp @@ -0,0 +1,13 @@ + +in vec2 TexCoord; +out vec4 FragColor; + +uniform sampler2D SceneTexture; +uniform vec2 Scale; +uniform vec2 Offset; + +void main() +{ + vec4 color = texture(SceneTexture, Offset + TexCoord * Scale); + FragColor = vec4(max(max(color.r, color.g), color.b), 0.0, 0.0, 1.0); +} diff --git a/wadsrc/static/shaders/glsl/tonemap.fp b/wadsrc/static/shaders/glsl/tonemap.fp index d6574b2959..5cb7cf8bd2 100644 --- a/wadsrc/static/shaders/glsl/tonemap.fp +++ b/wadsrc/static/shaders/glsl/tonemap.fp @@ -3,7 +3,7 @@ in vec2 TexCoord; out vec4 FragColor; uniform sampler2D InputTexture; -uniform float ExposureAdjustment; +uniform sampler2D ExposureTexture; vec3 Linear(vec3 c) { @@ -84,7 +84,8 @@ void main() { vec3 color = texture(InputTexture, TexCoord).rgb; #ifndef PALETTE - color = color * ExposureAdjustment; + float exposureAdjustment = texture(ExposureTexture, vec2(0.5)).x; + color = color * exposureAdjustment; color = Linear(color); // needed because gzdoom's scene texture is not linear at the moment #endif FragColor = vec4(Tonemap(color), 1.0); From 210dd6d26a3e3041092d5aba14b1d140a8f377b6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 18 Sep 2016 19:31:09 +0200 Subject: [PATCH 114/912] Make bloom/exposure less aggressive --- src/gl/renderer/gl_postprocess.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index 1cdad9f9be..7ce8be0adc 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -75,7 +75,7 @@ CUSTOM_CVAR(Float, gl_bloom_amount, 1.4f, 0) if (self < 0.1f) self = 0.1f; } -CVAR(Float, gl_exposure_scale, 0.75f, 0) +CVAR(Float, gl_exposure_scale, 1.3f, 0) CVAR(Float, gl_exposure_min, 0.35f, 0) CVAR(Float, gl_exposure_base, 0.35f, 0) CVAR(Float, gl_exposure_speed, 0.05f, 0) From 24f748da03dad1c43c2c8a779546da7bf97d3176 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 20 Sep 2016 02:57:57 +0200 Subject: [PATCH 115/912] Add gbuffer pass support to FShaderManager and FRenderState --- src/gl/renderer/gl_renderstate.cpp | 7 +- src/gl/renderer/gl_renderstate.h | 19 +++ src/gl/shaders/gl_shader.cpp | 179 ++++++++++++++++++----------- src/gl/shaders/gl_shader.h | 46 +++++--- wadsrc/static/shaders/glsl/main.fp | 6 + 5 files changed, 170 insertions(+), 87 deletions(-) diff --git a/src/gl/renderer/gl_renderstate.cpp b/src/gl/renderer/gl_renderstate.cpp index cd51e540a4..2cfd739e90 100644 --- a/src/gl/renderer/gl_renderstate.cpp +++ b/src/gl/renderer/gl_renderstate.cpp @@ -105,6 +105,7 @@ void FRenderState::Reset() mViewMatrix.loadIdentity(); mModelMatrix.loadIdentity(); mTextureMatrix.loadIdentity(); + mPassType = NORMAL_PASS; } //========================================================================== @@ -118,11 +119,11 @@ bool FRenderState::ApplyShader() static const float nulvec[] = { 0.f, 0.f, 0.f, 0.f }; if (mSpecialEffect > EFF_NONE) { - activeShader = GLRenderer->mShaderManager->BindEffect(mSpecialEffect); + activeShader = GLRenderer->mShaderManager->BindEffect(mSpecialEffect, mPassType); } else { - activeShader = GLRenderer->mShaderManager->Get(mTextureEnabled ? mEffectState : 4, mAlphaThreshold >= 0.f); + activeShader = GLRenderer->mShaderManager->Get(mTextureEnabled ? mEffectState : 4, mAlphaThreshold >= 0.f, mPassType); activeShader->Bind(); } @@ -343,7 +344,7 @@ void FRenderState::ApplyMatrices() { if (GLRenderer->mShaderManager != NULL) { - GLRenderer->mShaderManager->ApplyMatrices(&mProjectionMatrix, &mViewMatrix); + GLRenderer->mShaderManager->ApplyMatrices(&mProjectionMatrix, &mViewMatrix, mPassType); } } diff --git a/src/gl/renderer/gl_renderstate.h b/src/gl/renderer/gl_renderstate.h index 1c0a20348a..7bec51be04 100644 --- a/src/gl/renderer/gl_renderstate.h +++ b/src/gl/renderer/gl_renderstate.h @@ -63,6 +63,13 @@ enum EEffect MAX_EFFECTS }; +enum EPassType +{ + NORMAL_PASS, + GBUFFER_PASS, + MAX_PASS_TYPES +}; + class FRenderState { bool mTextureEnabled; @@ -111,6 +118,8 @@ class FRenderState FShader *activeShader; + EPassType mPassType = NORMAL_PASS; + bool ApplyShader(); public: @@ -459,6 +468,16 @@ public: return mInterpolationFactor; } + void SetPassType(EPassType passType) + { + mPassType = passType; + } + + EPassType GetPassType() + { + return mPassType; + } + // Backwards compatibility crap follows void ApplyFixedFunction(); void DrawColormapOverlay(); diff --git a/src/gl/shaders/gl_shader.cpp b/src/gl/shaders/gl_shader.cpp index 158163a483..26a449bf22 100644 --- a/src/gl/shaders/gl_shader.cpp +++ b/src/gl/shaders/gl_shader.cpp @@ -180,6 +180,9 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char * glBindAttribLocation(hShader, VATTR_COLOR, "aColor"); glBindAttribLocation(hShader, VATTR_VERTEX2, "aVertex2"); + glBindFragDataLocation(hShader, 0, "FragColor"); + glBindFragDataLocation(hShader, 1, "FragData"); + glLinkProgram(hShader); glGetShaderInfoLog(hVertProg, 10000, NULL, buffer); @@ -298,12 +301,13 @@ bool FShader::Bind() // //========================================================================== -FShader *FShaderManager::Compile (const char *ShaderName, const char *ShaderPath, bool usediscard) +FShader *FShaderCollection::Compile (const char *ShaderName, const char *ShaderPath, bool usediscard, EPassType passType) { FString defines; // this can't be in the shader code due to ATI strangeness. if (gl.MaxLights() == 128) defines += "#define MAXLIGHTS128\n"; if (!usediscard) defines += "#define NO_ALPHATEST\n"; + if (passType == GBUFFER_PASS) defines += "#define GBUFFER_PASS\n"; FShader *shader = NULL; try @@ -386,27 +390,75 @@ static const FEffectShader effectshaders[]= { "stencil", "shaders/glsl/main.vp", "shaders/glsl/stencil.fp", NULL, "#define SIMPLE\n#define NO_ALPHATEST\n" }, }; - -//========================================================================== -// -// -// -//========================================================================== - FShaderManager::FShaderManager() { - if (!gl.legacyMode) CompileShaders(); + if (!gl.legacyMode) + { + for (int passType = 0; passType < MAX_PASS_TYPES; passType++) + mPassShaders.Push(new FShaderCollection((EPassType)passType)); + } } -//========================================================================== -// -// -// -//========================================================================== - FShaderManager::~FShaderManager() { - if (!gl.legacyMode) Clean(); + if (!gl.legacyMode) + { + glUseProgram(0); + mActiveShader = NULL; + + for (auto collection : mPassShaders) + delete collection; + } +} + +void FShaderManager::SetActiveShader(FShader *sh) +{ + if (mActiveShader != sh) + { + glUseProgram(sh!= NULL? sh->GetHandle() : 0); + mActiveShader = sh; + } +} + +FShader *FShaderManager::BindEffect(int effect, EPassType passType) +{ + if (passType < mPassShaders.Size()) + return mPassShaders[passType]->BindEffect(effect); + else + return nullptr; +} + +FShader *FShaderManager::Get(unsigned int eff, bool alphateston, EPassType passType) +{ + if (passType < mPassShaders.Size()) + return mPassShaders[passType]->Get(eff, alphateston); + else + return nullptr; +} + +void FShaderManager::ApplyMatrices(VSMatrix *proj, VSMatrix *view, EPassType passType) +{ + if (gl.legacyMode) + { + glMatrixMode(GL_PROJECTION); + glLoadMatrixf(proj->get()); + glMatrixMode(GL_MODELVIEW); + glLoadMatrixf(view->get()); + } + else + { + if (passType < mPassShaders.Size()) + mPassShaders[passType]->ApplyMatrices(proj, view); + + if (mActiveShader) + mActiveShader->Bind(); + } +} + +void FShaderManager::ResetFixedColormap() +{ + for (auto &collection : mPassShaders) + collection->ResetFixedColormap(); } //========================================================================== @@ -415,10 +467,30 @@ FShaderManager::~FShaderManager() // //========================================================================== -void FShaderManager::CompileShaders() +FShaderCollection::FShaderCollection(EPassType passType) { - mActiveShader = NULL; + CompileShaders(passType); +} +//========================================================================== +// +// +// +//========================================================================== + +FShaderCollection::~FShaderCollection() +{ + Clean(); +} + +//========================================================================== +// +// +// +//========================================================================== + +void FShaderCollection::CompileShaders(EPassType passType) +{ mTextureEffects.Clear(); mTextureEffectsNAT.Clear(); for (int i = 0; i < MAX_EFFECTS; i++) @@ -428,11 +500,11 @@ void FShaderManager::CompileShaders() for(int i=0;defaultshaders[i].ShaderName != NULL;i++) { - FShader *shc = Compile(defaultshaders[i].ShaderName, defaultshaders[i].gettexelfunc, true); + FShader *shc = Compile(defaultshaders[i].ShaderName, defaultshaders[i].gettexelfunc, true, passType); mTextureEffects.Push(shc); if (i <= 3) { - FShader *shc = Compile(defaultshaders[i].ShaderName, defaultshaders[i].gettexelfunc, false); + FShader *shc = Compile(defaultshaders[i].ShaderName, defaultshaders[i].gettexelfunc, false, passType); mTextureEffectsNAT.Push(shc); } } @@ -442,7 +514,7 @@ void FShaderManager::CompileShaders() FString name = ExtractFileBase(usershaders[i]); FName sfn = name; - FShader *shc = Compile(sfn, usershaders[i], true); + FShader *shc = Compile(sfn, usershaders[i], true, passType); mTextureEffects.Push(shc); } @@ -464,11 +536,8 @@ void FShaderManager::CompileShaders() // //========================================================================== -void FShaderManager::Clean() +void FShaderCollection::Clean() { - glUseProgram(0); - mActiveShader = NULL; - for (unsigned int i = 0; i < mTextureEffectsNAT.Size(); i++) { if (mTextureEffectsNAT[i] != NULL) delete mTextureEffectsNAT[i]; @@ -492,7 +561,7 @@ void FShaderManager::Clean() // //========================================================================== -int FShaderManager::Find(const char * shn) +int FShaderCollection::Find(const char * shn) { FName sfn = shn; @@ -506,21 +575,6 @@ int FShaderManager::Find(const char * shn) return -1; } -//========================================================================== -// -// -// -//========================================================================== - -void FShaderManager::SetActiveShader(FShader *sh) -{ - if (mActiveShader != sh) - { - glUseProgram(sh!= NULL? sh->GetHandle() : 0); - mActiveShader = sh; - } -} - //========================================================================== // @@ -528,7 +582,7 @@ void FShaderManager::SetActiveShader(FShader *sh) // //========================================================================== -FShader *FShaderManager::BindEffect(int effect) +FShader *FShaderCollection::BindEffect(int effect) { if (effect >= 0 && effect < MAX_EFFECTS && mEffectShaders[effect] != NULL) { @@ -546,36 +600,25 @@ FShader *FShaderManager::BindEffect(int effect) //========================================================================== EXTERN_CVAR(Int, gl_fuzztype) -void FShaderManager::ApplyMatrices(VSMatrix *proj, VSMatrix *view) +void FShaderCollection::ApplyMatrices(VSMatrix *proj, VSMatrix *view) { - if (gl.legacyMode) + for (int i = 0; i < 4; i++) { - glMatrixMode(GL_PROJECTION); - glLoadMatrixf(proj->get()); - glMatrixMode(GL_MODELVIEW); - glLoadMatrixf(view->get()); + mTextureEffects[i]->ApplyMatrices(proj, view); + mTextureEffectsNAT[i]->ApplyMatrices(proj, view); } - else + mTextureEffects[4]->ApplyMatrices(proj, view); + if (gl_fuzztype != 0) { - for (int i = 0; i < 4; i++) - { - mTextureEffects[i]->ApplyMatrices(proj, view); - mTextureEffectsNAT[i]->ApplyMatrices(proj, view); - } - mTextureEffects[4]->ApplyMatrices(proj, view); - if (gl_fuzztype != 0) - { - mTextureEffects[4 + gl_fuzztype]->ApplyMatrices(proj, view); - } - for (unsigned i = 12; i < mTextureEffects.Size(); i++) - { - mTextureEffects[i]->ApplyMatrices(proj, view); - } - for (int i = 0; i < MAX_EFFECTS; i++) - { - mEffectShaders[i]->ApplyMatrices(proj, view); - } - if (mActiveShader != NULL) mActiveShader->Bind(); + mTextureEffects[4 + gl_fuzztype]->ApplyMatrices(proj, view); + } + for (unsigned i = 12; i < mTextureEffects.Size(); i++) + { + mTextureEffects[i]->ApplyMatrices(proj, view); + } + for (int i = 0; i < MAX_EFFECTS; i++) + { + mEffectShaders[i]->ApplyMatrices(proj, view); } } diff --git a/src/gl/shaders/gl_shader.h b/src/gl/shaders/gl_shader.h index 5af927e6db..793fa63dd1 100644 --- a/src/gl/shaders/gl_shader.h +++ b/src/gl/shaders/gl_shader.h @@ -37,6 +37,7 @@ enum VATTR_NORMAL = 4 }; +class FShaderCollection; //========================================================================== // @@ -248,7 +249,7 @@ public: class FShader { - friend class FShaderManager; + friend class FShaderCollection; friend class FRenderState; unsigned int hShader; @@ -323,7 +324,6 @@ public: }; - //========================================================================== // // The global shader manager @@ -331,26 +331,40 @@ public: //========================================================================== class FShaderManager { - TArray mTextureEffects; - TArray mTextureEffectsNAT; - FShader *mActiveShader; - FShader *mEffectShaders[MAX_EFFECTS]; - - void Clean(); - void CompileShaders(); - public: FShaderManager(); ~FShaderManager(); - FShader *Compile(const char *ShaderName, const char *ShaderPath, bool usediscard); + + void SetActiveShader(FShader *sh); + FShader *GetActiveShader() const { return mActiveShader; } + + FShader *BindEffect(int effect, EPassType passType); + FShader *Get(unsigned int eff, bool alphateston, EPassType passType); + void ApplyMatrices(VSMatrix *proj, VSMatrix *view, EPassType passType); + + void ResetFixedColormap(); + +private: + FShader *mActiveShader = nullptr; + TArray mPassShaders; +}; + +class FShaderCollection +{ + TArray mTextureEffects; + TArray mTextureEffectsNAT; + FShader *mEffectShaders[MAX_EFFECTS]; + + void Clean(); + void CompileShaders(EPassType passType); + +public: + FShaderCollection(EPassType passType); + ~FShaderCollection(); + FShader *Compile(const char *ShaderName, const char *ShaderPath, bool usediscard, EPassType passType); int Find(const char *mame); FShader *BindEffect(int effect); - void SetActiveShader(FShader *sh); void ApplyMatrices(VSMatrix *proj, VSMatrix *view); - FShader *GetActiveShader() const - { - return mActiveShader; - } void ResetFixedColormap() { diff --git a/wadsrc/static/shaders/glsl/main.fp b/wadsrc/static/shaders/glsl/main.fp index 84b7f5742d..0b66f05694 100644 --- a/wadsrc/static/shaders/glsl/main.fp +++ b/wadsrc/static/shaders/glsl/main.fp @@ -5,6 +5,9 @@ in vec4 vTexCoord; in vec4 vColor; out vec4 FragColor; +#ifdef GBUFFER_PASS +out vec4 FragData; +#endif #ifdef SHADER_STORAGE_LIGHTS layout(std430, binding = 1) buffer LightBufferSSO @@ -433,5 +436,8 @@ void main() } } FragColor = frag; +#ifdef GBUFFER_PASS + FragData = vec4(uFogColor.rgb, 1.0); +#endif } From b6c64416be6180f53ffeee09755a1f65f9e6ba70 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 21 Sep 2016 02:04:56 +0200 Subject: [PATCH 116/912] Added SceneData texture as the second colorbuffer when rendering a scene and placed fog data into it --- src/gl/renderer/gl_postprocess.cpp | 18 +++++++----- src/gl/renderer/gl_renderbuffers.cpp | 36 +++++++++++++++++++---- src/gl/renderer/gl_renderbuffers.h | 7 +++-- src/gl/renderer/gl_renderer.cpp | 2 +- src/gl/scene/gl_scene.cpp | 23 +++++++++++++-- src/gl/shaders/gl_ambientshader.cpp | 23 +++++++++------ src/gl/shaders/gl_ambientshader.h | 10 +++++-- src/gl/system/gl_cvars.h | 6 ++++ wadsrc/static/shaders/glsl/main.fp | 28 +++++++++++++++++- wadsrc/static/shaders/glsl/ssaocombine.fp | 29 +++++++++++++++++- 10 files changed, 150 insertions(+), 32 deletions(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index 8b8afd52ed..b3d1259220 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -138,9 +138,6 @@ void FGLRenderer::PostProcessScene() void FGLRenderer::AmbientOccludeScene() { - if (!gl_ssao || !FGLRenderBuffers::IsEnabled()) - return; - FGLDebug::PushGroup("AmbientOccludeScene"); FGLPostProcessState savedState; @@ -227,20 +224,27 @@ void FGLRenderer::AmbientOccludeScene() RenderScreenQuad(); // Add SSAO back to scene texture: - mBuffers->BindSceneFB(); + mBuffers->BindSceneFB(false); + GLenum buffers[] = { GL_COLOR_ATTACHMENT0 }; + glDrawBuffers(1, buffers); glViewport(mSceneViewport.left, mSceneViewport.top, mSceneViewport.width, mSceneViewport.height); glEnable(GL_BLEND); glBlendEquation(GL_FUNC_ADD); if (gl_ssao_debug) glBlendFunc(GL_ONE, GL_ZERO); else - glBlendFunc(GL_ZERO, GL_SRC_COLOR); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, mBuffers->AmbientTexture1); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - mSSAOCombineShader->Bind(); - mSSAOCombineShader->AODepthTexture.Set(0); + mBuffers->BindSceneDataTexture(1); + mSSAOCombineShader->Bind(multisample); + mSSAOCombineShader->AODepthTexture[multisample].Set(0); + mSSAOCombineShader->SceneDataTexture[multisample].Set(1); + if (multisample) mSSAOCombineShader->SampleCount[multisample].Set(gl_multisample); + mSSAOCombineShader->Scale[multisample].Set(mBuffers->AmbientWidth * 2.0f / (float)mScreenViewport.width, mBuffers->AmbientHeight * 2.0f / (float)mScreenViewport.height); + mSSAOCombineShader->Offset[multisample].Set(mSceneViewport.left / (float)mScreenViewport.width, mSceneViewport.top / (float)mScreenViewport.height); RenderScreenQuad(); FGLDebug::PopGroup(); diff --git a/src/gl/renderer/gl_renderbuffers.cpp b/src/gl/renderer/gl_renderbuffers.cpp index b52f2a0b67..54d597c4da 100644 --- a/src/gl/renderer/gl_renderbuffers.cpp +++ b/src/gl/renderer/gl_renderbuffers.cpp @@ -82,7 +82,9 @@ FGLRenderBuffers::~FGLRenderBuffers() void FGLRenderBuffers::ClearScene() { DeleteFrameBuffer(mSceneFB); + DeleteFrameBuffer(mSceneDataFB); DeleteTexture(mSceneMultisample); + DeleteTexture(mSceneData); DeleteTexture(mSceneDepthStencil); } @@ -253,13 +255,16 @@ void FGLRenderBuffers::CreateScene(int width, int height, int samples) { mSceneMultisample = Create2DMultisampleTexture("SceneMultisample", GL_RGBA16F, width, height, samples, false); mSceneDepthStencil = Create2DMultisampleTexture("SceneDepthStencil", GL_DEPTH24_STENCIL8, width, height, samples, false); + mSceneData = Create2DMultisampleTexture("SceneSSAOData", GL_RGBA8, width, height, samples, false); } else { mSceneDepthStencil = Create2DTexture("SceneDepthStencil", GL_DEPTH24_STENCIL8, width, height); + mSceneData = Create2DTexture("SceneSSAOData", GL_RGBA8, width, height); } - mSceneFB = CreateFrameBuffer("SceneFB", samples > 1 ? mSceneMultisample : mPipelineTexture[0], mSceneDepthStencil, samples > 1); + mSceneFB = CreateFrameBuffer("SceneFB", samples > 1 ? mSceneMultisample : mPipelineTexture[0], 0, mSceneDepthStencil, samples > 1); + mSceneDataFB = CreateFrameBuffer("SSAOSceneFB", samples > 1 ? mSceneMultisample : mPipelineTexture[0], mSceneData, mSceneDepthStencil, samples > 1); } //========================================================================== @@ -512,7 +517,7 @@ GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuff return handle; } -GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depthstencil, bool multisample) +GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuffer0, GLuint colorbuffer1, GLuint depthstencil, bool multisample) { GLuint handle = 0; glGenFramebuffers(1, &handle); @@ -520,12 +525,16 @@ GLuint FGLRenderBuffers::CreateFrameBuffer(const FString &name, GLuint colorbuff FGLDebug::LabelObject(GL_FRAMEBUFFER, handle, name); if (multisample) { - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_MULTISAMPLE, colorbuffer, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_MULTISAMPLE, colorbuffer0, 0); + if (colorbuffer1 != 0) + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D_MULTISAMPLE, colorbuffer1, 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D_MULTISAMPLE, depthstencil, 0); } else { - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorbuffer, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorbuffer0, 0); + if (colorbuffer1 != 0) + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, colorbuffer1, 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthstencil, 0); } if (CheckFrameBufferCompleteness()) @@ -668,9 +677,9 @@ void FGLRenderBuffers::BindEyeFB(int eye, bool readBuffer) // //========================================================================== -void FGLRenderBuffers::BindSceneFB() +void FGLRenderBuffers::BindSceneFB(bool sceneData) { - glBindFramebuffer(GL_FRAMEBUFFER, mSceneFB); + glBindFramebuffer(GL_FRAMEBUFFER, sceneData ? mSceneDataFB : mSceneFB); } //========================================================================== @@ -688,6 +697,21 @@ void FGLRenderBuffers::BindSceneColorTexture(int index) glBindTexture(GL_TEXTURE_2D, mPipelineTexture[0]); } +//========================================================================== +// +// Binds the scene data texture to the specified texture unit +// +//========================================================================== + +void FGLRenderBuffers::BindSceneDataTexture(int index) +{ + glActiveTexture(GL_TEXTURE0 + index); + if (mSamples > 1) + glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, mSceneData); + else + glBindTexture(GL_TEXTURE_2D, mSceneData); +} + //========================================================================== // // Binds the depth texture to the specified texture unit diff --git a/src/gl/renderer/gl_renderbuffers.h b/src/gl/renderer/gl_renderbuffers.h index 19a2979f2a..080593881b 100644 --- a/src/gl/renderer/gl_renderbuffers.h +++ b/src/gl/renderer/gl_renderbuffers.h @@ -31,8 +31,9 @@ public: bool Setup(int width, int height, int sceneWidth, int sceneHeight); - void BindSceneFB(); + void BindSceneFB(bool sceneData); void BindSceneColorTexture(int index); + void BindSceneDataTexture(int index); void BindSceneDepthTexture(int index); void BlitSceneToTexture(); @@ -90,7 +91,7 @@ private: GLuint CreateRenderBuffer(const FString &name, GLuint format, int width, int height); GLuint CreateRenderBuffer(const FString &name, GLuint format, int samples, int width, int height); GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer); - GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer, GLuint depthstencil, bool multisample); + GLuint CreateFrameBuffer(const FString &name, GLuint colorbuffer0, GLuint colorbuffer1, GLuint depthstencil, bool multisample); bool CheckFrameBufferCompleteness(); void ClearFrameBuffer(bool stencil, bool depth); void DeleteTexture(GLuint &handle); @@ -110,7 +111,9 @@ private: // Buffers for the scene GLuint mSceneMultisample = 0; GLuint mSceneDepthStencil = 0; + GLuint mSceneData = 0; GLuint mSceneFB = 0; + GLuint mSceneDataFB = 0; // Effect/HUD buffers GLuint mPipelineTexture[NumPipelineTextures]; diff --git a/src/gl/renderer/gl_renderer.cpp b/src/gl/renderer/gl_renderer.cpp index db2be0d9d6..a02d278528 100644 --- a/src/gl/renderer/gl_renderer.cpp +++ b/src/gl/renderer/gl_renderer.cpp @@ -320,7 +320,7 @@ void FGLRenderer::Begin2D() if (mBuffers->Setup(mScreenViewport.width, mScreenViewport.height, mSceneViewport.width, mSceneViewport.height)) { if (mDrawingScene2D) - mBuffers->BindSceneFB(); + mBuffers->BindSceneFB(false); else mBuffers->BindCurrentFB(); } diff --git a/src/gl/scene/gl_scene.cpp b/src/gl/scene/gl_scene.cpp index 2659e8ea24..596f2c22bd 100644 --- a/src/gl/scene/gl_scene.cpp +++ b/src/gl/scene/gl_scene.cpp @@ -157,7 +157,11 @@ void FGLRenderer::Set3DViewport(bool mainview) { if (mainview && mBuffers->Setup(mScreenViewport.width, mScreenViewport.height, mSceneViewport.width, mSceneViewport.height)) { - mBuffers->BindSceneFB(); + mBuffers->BindSceneFB(gl_ssao); + GLenum buffers[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 }; + glDrawBuffers(gl_ssao ? 2 : 1, buffers); + gl_RenderState.SetPassType(gl_ssao ? GBUFFER_PASS : NORMAL_PASS); + gl_RenderState.Apply(); } // Always clear all buffers with scissor test disabled. @@ -490,7 +494,13 @@ void FGLRenderer::DrawScene(int drawmode) RenderScene(recursion); - AmbientOccludeScene(); + bool applySSAO = gl_ssao && FGLRenderBuffers::IsEnabled() && drawmode != DM_PORTAL; + if (applySSAO) + { + AmbientOccludeScene(); + gl_RenderState.SetPassType(GBUFFER_PASS); + gl_RenderState.Apply(); + } // Handle all portals after rendering the opaque objects but before // doing all translucent stuff @@ -498,6 +508,15 @@ void FGLRenderer::DrawScene(int drawmode) GLPortal::EndFrame(); recursion--; RenderTranslucent(); + + if (applySSAO) + { + mBuffers->BindSceneFB(true); + GLenum buffers[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 }; + glDrawBuffers(2, buffers); + gl_RenderState.SetPassType(NORMAL_PASS); + gl_RenderState.Apply(); + } } diff --git a/src/gl/shaders/gl_ambientshader.cpp b/src/gl/shaders/gl_ambientshader.cpp index 3fbd034348..4fa5b0e019 100644 --- a/src/gl/shaders/gl_ambientshader.cpp +++ b/src/gl/shaders/gl_ambientshader.cpp @@ -120,16 +120,21 @@ void FDepthBlurShader::Bind(bool vertical) shader.Bind(); } -void FSSAOCombineShader::Bind() +void FSSAOCombineShader::Bind(bool multisample) { - if (!mShader) + auto &shader = mShader[multisample]; + if (!shader) { - mShader.Compile(FShaderProgram::Vertex, "shaders/glsl/screenquad.vp", "", 330); - mShader.Compile(FShaderProgram::Fragment, "shaders/glsl/ssaocombine.fp", "", 330); - mShader.SetFragDataLocation(0, "FragColor"); - mShader.Link("shaders/glsl/ssaocombine"); - mShader.SetAttribLocation(0, "PositionInProjection"); - AODepthTexture.Init(mShader, "AODepthTexture"); + shader.Compile(FShaderProgram::Vertex, "shaders/glsl/screenquad.vp", "", 330); + shader.Compile(FShaderProgram::Fragment, "shaders/glsl/ssaocombine.fp", multisample ? "#define MULTISAMPLE\n" : "", 330); + shader.SetFragDataLocation(0, "FragColor"); + shader.Link("shaders/glsl/ssaocombine"); + shader.SetAttribLocation(0, "PositionInProjection"); + AODepthTexture[multisample].Init(shader, "AODepthTexture"); + SceneDataTexture[multisample].Init(shader, "SceneDataTexture"); + SampleCount[multisample].Init(shader, "SampleCount"); + Scale[multisample].Init(shader, "Scale"); + Offset[multisample].Init(shader, "Offset"); } - mShader.Bind(); + shader.Bind(); } diff --git a/src/gl/shaders/gl_ambientshader.h b/src/gl/shaders/gl_ambientshader.h index 9c97791ba8..f36b10625e 100644 --- a/src/gl/shaders/gl_ambientshader.h +++ b/src/gl/shaders/gl_ambientshader.h @@ -59,12 +59,16 @@ private: class FSSAOCombineShader { public: - void Bind(); + void Bind(bool multisample); - FBufferedUniformSampler AODepthTexture; + FBufferedUniformSampler AODepthTexture[2]; + FBufferedUniformSampler SceneDataTexture[2]; + FBufferedUniform1i SampleCount[2]; + FBufferedUniform2f Scale[2]; + FBufferedUniform2f Offset[2]; private: - FShaderProgram mShader; + FShaderProgram mShader[2]; }; #endif \ No newline at end of file diff --git a/src/gl/system/gl_cvars.h b/src/gl/system/gl_cvars.h index f6dc614724..c1bc5c2fe9 100644 --- a/src/gl/system/gl_cvars.h +++ b/src/gl/system/gl_cvars.h @@ -51,6 +51,12 @@ EXTERN_CVAR(Bool, gl_lens) EXTERN_CVAR(Float, gl_lens_k) EXTERN_CVAR(Float, gl_lens_kcube) EXTERN_CVAR(Float, gl_lens_chromatic) +EXTERN_CVAR(Bool, gl_ssao) +EXTERN_CVAR(Float, gl_ssao_strength) +EXTERN_CVAR(Bool, gl_ssao_debug) +EXTERN_CVAR(Float, gl_ssao_bias) +EXTERN_CVAR(Float, gl_ssao_radius) +EXTERN_CVAR(Float, gl_ssao_blur_amount) EXTERN_CVAR(Int, gl_debug_level) EXTERN_CVAR(Bool, gl_debug_breakpoint) diff --git a/wadsrc/static/shaders/glsl/main.fp b/wadsrc/static/shaders/glsl/main.fp index 0b66f05694..cdd6ffe3f0 100644 --- a/wadsrc/static/shaders/glsl/main.fp +++ b/wadsrc/static/shaders/glsl/main.fp @@ -315,6 +315,32 @@ vec4 applyFog(vec4 frag, float fogfactor) return vec4(mix(uFogColor.rgb, frag.rgb, fogfactor), frag.a); } +//=========================================================================== +// +// The color of the fragment if it is fully occluded by ambient lighting +// +//=========================================================================== + +vec3 AmbientOcclusionColor() +{ + float fogdist; + float fogfactor; + + // + // calculate fog factor + // + if (uFogEnabled == -1) + { + fogdist = pixelpos.w; + } + else + { + fogdist = max(16.0, length(pixelpos.xyz)); + } + fogfactor = exp2 (uFogDensity * fogdist); + + return mix(uFogColor.rgb, vec3(0.0), fogfactor); +} //=========================================================================== // @@ -437,7 +463,7 @@ void main() } FragColor = frag; #ifdef GBUFFER_PASS - FragData = vec4(uFogColor.rgb, 1.0); + FragData = vec4(AmbientOcclusionColor(), 1.0); #endif } diff --git a/wadsrc/static/shaders/glsl/ssaocombine.fp b/wadsrc/static/shaders/glsl/ssaocombine.fp index 21fdff1024..7cd0962763 100644 --- a/wadsrc/static/shaders/glsl/ssaocombine.fp +++ b/wadsrc/static/shaders/glsl/ssaocombine.fp @@ -4,8 +4,35 @@ out vec4 FragColor; uniform sampler2D AODepthTexture; +#if defined(MULTISAMPLE) +uniform sampler2DMS SceneDataTexture; +uniform int SampleCount; +#else +uniform sampler2D SceneDataTexture; +#endif + +uniform vec2 Scale; +uniform vec2 Offset; + void main() { + vec2 uv = Offset + TexCoord * Scale; +#if defined(MULTISAMPLE) + ivec2 texSize = textureSize(SceneDataTexture); +#else + ivec2 texSize = textureSize(SceneDataTexture, 0); +#endif + ivec2 ipos = ivec2(max(floor(uv * vec2(texSize) - 0.75), vec2(0.0))); + +#if defined(MULTISAMPLE) + vec3 fogColor = vec3(0.0); + for (int i = 0; i < SampleCount; i++) + fogColor += texelFetch(SceneDataTexture, ipos, i).rgb; + fogColor /= float(SampleCount); +#else + vec3 fogColor = texelFetch(SceneDataTexture, ipos, 0).rgb; +#endif + float attenutation = texture(AODepthTexture, TexCoord).x; - FragColor = vec4(attenutation, attenutation, attenutation, 0.0); + FragColor = vec4(fogColor, 1.0 - attenutation); } From d9e60644b1e256c6e8bed445e070a3f17598820a Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 21 Sep 2016 01:08:00 -0400 Subject: [PATCH 117/912] Some Linux SDL fixes. Will have to do this for Mac, later, too. --- src/posix/sdl/sdlglvideo.cpp | 4 ++-- src/posix/sdl/sdlglvideo.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/posix/sdl/sdlglvideo.cpp b/src/posix/sdl/sdlglvideo.cpp index d8c00f2363..e581cfde99 100644 --- a/src/posix/sdl/sdlglvideo.cpp +++ b/src/posix/sdl/sdlglvideo.cpp @@ -163,7 +163,7 @@ bool SDLGLVideo::NextMode (int *width, int *height, bool *letterbox) return false; } -DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old) +DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old) { static int retry = 0; static int owidth, oheight; @@ -315,7 +315,7 @@ bool SDLGLVideo::InitHardware (bool allowsoftware, int multisample) // FrameBuffer implementation ----------------------------------------------- SDLGLFB::SDLGLFB (void *, int width, int height, int, int, bool fullscreen) - : DFrameBuffer (width, height) + : DFrameBuffer (width, height, false) { int i; diff --git a/src/posix/sdl/sdlglvideo.h b/src/posix/sdl/sdlglvideo.h index d8ce9005dc..3b84f83c4e 100644 --- a/src/posix/sdl/sdlglvideo.h +++ b/src/posix/sdl/sdlglvideo.h @@ -21,7 +21,7 @@ class SDLGLVideo : public IVideo EDisplayType GetDisplayType () { return DISPLAY_Both; } void SetWindowedScale (float scale); - DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old); + DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old); void StartModeIterator (int bits, bool fs); bool NextMode (int *width, int *height, bool *letterbox); From 7c862b85b3d9c939ac17031b6d4e1a7aa29bab7e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 22 Sep 2016 03:49:19 +0200 Subject: [PATCH 118/912] Don't do ambient occlusion when rendering to texture --- src/gl/scene/gl_scene.cpp | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/gl/scene/gl_scene.cpp b/src/gl/scene/gl_scene.cpp index 596f2c22bd..8331930bcd 100644 --- a/src/gl/scene/gl_scene.cpp +++ b/src/gl/scene/gl_scene.cpp @@ -492,14 +492,29 @@ void FGLRenderer::DrawScene(int drawmode) } GLRenderer->mClipPortal = NULL; // this must be reset before any portal recursion takes place. - RenderScene(recursion); - - bool applySSAO = gl_ssao && FGLRenderBuffers::IsEnabled() && drawmode != DM_PORTAL; + // If SSAO is active, switch to gbuffer shaders and use the gbuffer framebuffer + bool applySSAO = gl_ssao && FGLRenderBuffers::IsEnabled() && drawmode == DM_MAINVIEW; if (applySSAO) { - AmbientOccludeScene(); + GLenum buffers[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 }; + glDrawBuffers(2, buffers); gl_RenderState.SetPassType(GBUFFER_PASS); gl_RenderState.Apply(); + gl_RenderState.ApplyMatrices(); + } + + RenderScene(recursion); + + // Apply ambient occlusion and switch back to shaders without gbuffer output + if (applySSAO) + { + GLenum buffers[] = { GL_COLOR_ATTACHMENT0 }; + glDrawBuffers(1, buffers); + AmbientOccludeScene(); + mBuffers->BindSceneFB(true); + gl_RenderState.SetPassType(NORMAL_PASS); + gl_RenderState.Apply(); + gl_RenderState.ApplyMatrices(); } // Handle all portals after rendering the opaque objects but before @@ -508,15 +523,6 @@ void FGLRenderer::DrawScene(int drawmode) GLPortal::EndFrame(); recursion--; RenderTranslucent(); - - if (applySSAO) - { - mBuffers->BindSceneFB(true); - GLenum buffers[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 }; - glDrawBuffers(2, buffers); - gl_RenderState.SetPassType(NORMAL_PASS); - gl_RenderState.Apply(); - } } From 43505877143dbba40e8ef454625b053ff5b7ca6b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 24 Sep 2016 08:40:28 +0200 Subject: [PATCH 119/912] Remove accidental UpdateCameraExposure duplicate from merge --- src/gl/renderer/gl_postprocess.cpp | 72 ------------------------------ 1 file changed, 72 deletions(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index 0747e95d89..b3d1259220 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -322,78 +322,6 @@ void FGLRenderer::UpdateCameraExposure() FGLDebug::PopGroup(); } -//----------------------------------------------------------------------------- -// -// Extracts light average from the scene and updates the camera exposure texture -// -//----------------------------------------------------------------------------- - -void FGLRenderer::UpdateCameraExposure() -{ - if (!gl_bloom && gl_tonemap == 0) - return; - - FGLDebug::PushGroup("UpdateCameraExposure"); - - FGLPostProcessState savedState; - savedState.SaveTextureBinding1(); - - // Extract light level from scene texture: - const auto &level0 = mBuffers->ExposureLevels[0]; - glBindFramebuffer(GL_FRAMEBUFFER, level0.Framebuffer); - glViewport(0, 0, level0.Width, level0.Height); - mBuffers->BindCurrentTexture(0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - mExposureExtractShader->Bind(); - mExposureExtractShader->SceneTexture.Set(0); - mExposureExtractShader->Scale.Set(mSceneViewport.width / (float)mScreenViewport.width, mSceneViewport.height / (float)mScreenViewport.height); - mExposureExtractShader->Offset.Set(mSceneViewport.left / (float)mScreenViewport.width, mSceneViewport.top / (float)mScreenViewport.height); - RenderScreenQuad(); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - - // Find the average value: - for (int i = 0; i + 1 < mBuffers->ExposureLevels.Size(); i++) - { - const auto &level = mBuffers->ExposureLevels[i]; - const auto &next = mBuffers->ExposureLevels[i + 1]; - - glBindFramebuffer(GL_FRAMEBUFFER, next.Framebuffer); - glViewport(0, 0, next.Width, next.Height); - glBindTexture(GL_TEXTURE_2D, level.Texture); - mExposureAverageShader->Bind(); - mExposureAverageShader->ExposureTexture.Set(0); - RenderScreenQuad(); - } - - // Combine average value with current camera exposure: - glBindFramebuffer(GL_FRAMEBUFFER, mBuffers->ExposureFB); - glViewport(0, 0, 1, 1); - if (!mBuffers->FirstExposureFrame) - { - glEnable(GL_BLEND); - glBlendEquation(GL_FUNC_ADD); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - } - else - { - mBuffers->FirstExposureFrame = false; - } - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, mBuffers->ExposureLevels.Last().Texture); - mExposureCombineShader->Bind(); - mExposureCombineShader->ExposureTexture.Set(0); - mExposureCombineShader->ExposureBase.Set(gl_exposure_base); - mExposureCombineShader->ExposureMin.Set(gl_exposure_min); - mExposureCombineShader->ExposureScale.Set(gl_exposure_scale); - mExposureCombineShader->ExposureSpeed.Set(gl_exposure_speed); - RenderScreenQuad(); - glViewport(mScreenViewport.left, mScreenViewport.top, mScreenViewport.width, mScreenViewport.height); - - FGLDebug::PopGroup(); -} - //----------------------------------------------------------------------------- // // Adds bloom contribution to scene texture From 7ef5a9f1177f682f01d6730058c064697531bb80 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 24 Sep 2016 09:36:37 +0200 Subject: [PATCH 120/912] Add the experimental swrenderer2 --- src/CMakeLists.txt | 1 + src/r_main.cpp | 23 +- src/r_swrenderer2.cpp | 1871 +++++++++++++++++++++++++++++++++++++++++ src/r_swrenderer2.h | 345 ++++++++ 4 files changed, 2236 insertions(+), 4 deletions(-) create mode 100644 src/r_swrenderer2.cpp create mode 100644 src/r_swrenderer2.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0f42523006..a213af8f73 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -971,6 +971,7 @@ set( NOT_COMPILED_SOURCE_FILES set( FASTMATH_PCH_SOURCES r_swrenderer.cpp + r_swrenderer2.cpp r_3dfloors.cpp r_bsp.cpp r_draw.cpp diff --git a/src/r_main.cpp b/src/r_main.cpp index ba02a7c605..f1d41506ed 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -60,6 +60,9 @@ #include "r_data/colormaps.h" #include "farchive.h" #include "p_maputl.h" +#include "r_swrenderer2.h" + +CVAR(Bool, r_newrenderer, 0, 0); // MACROS ------------------------------------------------------------------ @@ -897,7 +900,15 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function PO_LinkToSubsectors(); InSubsector = NULL; - R_RenderBSPNode (nodes + numnodes - 1); // The head node is the last node output. + if (!r_newrenderer || !r_swtruecolor) + { + R_RenderBSPNode(nodes + numnodes - 1); // The head node is the last node output. + } + else + { + RenderBsp bsp; + bsp.Render(); + } R_3D_ResetClip(); // reset clips (floor/ceiling) camera->renderflags = savedflags; WallCycles.Unclock(); @@ -907,8 +918,11 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) if (viewactive) { PlaneCycles.Clock(); - R_DrawPlanes (); - R_DrawPortals (); + if (!r_newrenderer || !r_swtruecolor) + { + R_DrawPlanes(); + R_DrawPortals(); + } PlaneCycles.Unclock(); // [RH] Walk through mirrors @@ -925,7 +939,8 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) NetUpdate (); MaskedCycles.Clock(); - R_DrawMasked (); + if (!r_newrenderer || !r_swtruecolor) + R_DrawMasked (); MaskedCycles.Unclock(); NetUpdate (); diff --git a/src/r_swrenderer2.cpp b/src/r_swrenderer2.cpp new file mode 100644 index 0000000000..f7fb6afb90 --- /dev/null +++ b/src/r_swrenderer2.cpp @@ -0,0 +1,1871 @@ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_swrenderer2.h" +#include "r_draw.h" +#include "r_plane.h" // for yslope +#include "r_sky.h" // for skyflatnum +#include "r_things.h" // for pspritexscale + +EXTERN_CVAR(Bool, r_drawplayersprites) +EXTERN_CVAR(Bool, r_deathcamera) +EXTERN_CVAR(Bool, st_scale) + +DVector3 ViewPosTransform::WorldToEye(const DVector3 &worldPoint) const +{ + double tr_x = worldPoint.X - ViewPos.X; + double tr_y = worldPoint.Y - ViewPos.Y; + double tr_z = worldPoint.Z - ViewPos.Z; + double tx = tr_x * ViewSin - tr_y * ViewCos; + double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; + return DVector3(tx, tr_z, tz); +} + +DVector3 ViewPosTransform::EyeToViewport(const DVector3 &eyePoint) const +{ + double rcp_z = 1.0 / eyePoint.Z; + return DVector3(eyePoint.X * rcp_z, eyePoint.Y * rcp_z, rcp_z); +} + +DVector3 ViewPosTransform::ViewportToScreen(const DVector3 &viewportPoint) const +{ + return DVector3(CenterX + viewportPoint.X * CenterX, CenterY - viewportPoint.Y * InvZtoScale, viewportPoint.Z); +} + +double ViewPosTransform::ScreenXToEye(int x, double z) const +{ + return (x + 0.5 - CenterX) / CenterX * z; +} + +double ViewPosTransform::ScreenYToEye(int y, double z) const +{ + return -(y + 0.5 - CenterY) / InvZtoScale * z; +} + +///////////////////////////////////////////////////////////////////////////// + +WallCoords::WallCoords(const ViewPosTransform &transform, const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2) : Transform(transform) +{ + // Transform wall to eye space + DVector3 top1 = transform.WorldToEye(DVector3(v1, ceil1)); + DVector3 top2 = transform.WorldToEye(DVector3(v2, ceil2)); + DVector3 bottom1 = transform.WorldToEye(DVector3(v1, floor1)); + DVector3 bottom2 = transform.WorldToEye(DVector3(v2, floor2)); + + double clipNearZ = transform.NearZ(); + + // Is entire wall behind znear clipping plane? If so, wall is culled + if ((top1.Z < clipNearZ && top2.Z < clipNearZ)) + return; + + PlaneNormal = (top2 - top1) ^ (top1 - bottom1); + PlaneD = -(PlaneNormal | top1); + + // Clip wall to znear clipping plane + if (top1.Z < clipNearZ) + { + double t = (clipNearZ - top1.Z) / (top2.Z - top1.Z); + top1 = Mix(top1, top2, t); + bottom1 = Mix(bottom1, bottom2, t); + + VaryingXScale = 1.0 - t; + VaryingXOffset = t; + } + else if (top2.Z < clipNearZ) + { + double t = (clipNearZ - top1.Z) / (top2.Z - top1.Z); + top2 = Mix(top1, top2, t); + bottom2 = Mix(bottom1, bottom2, t); + VaryingXScale = t; + VaryingXOffset = 0.0; + } + + NearZ = MIN(top1.Z, top2.Z); + FarZ = MAX(top1.Z, top2.Z); + + // Transform to screen coordinates + ScreenTopLeft = transform.EyeToScreen(top1); + ScreenTopRight = transform.EyeToScreen(top2); + ScreenBottomLeft = transform.EyeToScreen(bottom1); + ScreenBottomRight = transform.EyeToScreen(bottom2); + if (ScreenTopLeft.X > ScreenTopRight.X) + { + std::swap(ScreenTopLeft, ScreenTopRight); + std::swap(ScreenBottomLeft, ScreenBottomRight); + } + + ScreenX1 = xs_RoundToInt(MAX(ScreenTopLeft.X, 0.0)); + ScreenX2 = xs_RoundToInt(MIN(ScreenTopRight.X, (double)viewwidth)); + ScreenY1 = xs_RoundToInt(MAX(MIN(ScreenTopLeft.Y, ScreenTopRight.Y), 0.0)); + ScreenY2 = xs_RoundToInt(MIN(MAX(ScreenBottomLeft.Y, ScreenBottomRight.Y), (double)viewheight)); + + // Cull if nothing of the wall is visible + if (ScreenX2 <= ScreenX1 || ScreenY2 <= ScreenY1) + return; + + RcpDeltaScreenX = 1.0 / (ScreenTopRight.X - ScreenTopLeft.X); + Culled = false; +} + +DVector3 WallCoords::Mix(const DVector3 &a, const DVector3 &b, double t) +{ + double invt = 1.0 - t; + return DVector3(a.X * invt + b.X * t, a.Y * invt + b.Y * t, a.Z * invt + b.Z * t); +} + +double WallCoords::Mix(double a, double b, double t) +{ + return a * (1.0 - t) + b * t; +} + +short WallCoords::Y1(int x) const +{ + double t = (x + 0.5 - ScreenTopLeft.X) * RcpDeltaScreenX; + return (short)MAX(xs_RoundToInt(Mix(ScreenTopLeft.Y, ScreenTopRight.Y, t)), 0); +} + +short WallCoords::Y2(int x) const +{ + double t = (x + 0.5 - ScreenBottomLeft.X) * RcpDeltaScreenX; + return (short)MIN(xs_RoundToInt(Mix(ScreenBottomLeft.Y, ScreenBottomRight.Y, t)), viewheight); +} + +double WallCoords::Z(int x) const +{ + double t = (x + 0.5 - ScreenTopLeft.X) * RcpDeltaScreenX; + double rcp_z = Mix(ScreenTopLeft.Z, ScreenTopRight.Z, t); + return 1.0 / rcp_z; +} + +double WallCoords::VaryingX(int x, double start, double end) const +{ + double t = (x + 0.5 - ScreenTopLeft.X) * RcpDeltaScreenX; + double rcp_z = Mix(ScreenTopLeft.Z, ScreenTopRight.Z, t); + double t2 = VaryingXOffset + t / rcp_z * ScreenTopRight.Z * VaryingXScale; + return Mix(start, end, t2); +} + +double WallCoords::VaryingY(int x, int y, double start, double end) const +{ + double t = (x + 0.5 - ScreenTopLeft.X) * RcpDeltaScreenX; + double y1 = Mix(ScreenTopLeft.Y, ScreenTopRight.Y, t); + double y2 = Mix(ScreenBottomLeft.Y, ScreenBottomRight.Y, t); + if (y1 == y2 || y1 == -y2) + return start; + double t2 = (y + 0.5 - y1) / (y2 - y1); + return Mix(start, end, t2); +} + +///////////////////////////////////////////////////////////////////////////// + +void RenderBsp::Render() +{ + Clip.Clear(0, viewwidth); + Planes.Clear(); + VisibleSprites.clear(); + ScreenSprites.clear(); + + if (numnodes == 0) + RenderSubsector(subsectors); + else + RenderNode(nodes + numnodes - 1); // The head node is the last node output. + + Planes.Render(); + + RenderMaskedObjects(); + RenderPlayerSprites(); + RenderScreenSprites(); // To do: should be called by FSoftwareRenderer::DrawRemainingPlayerSprites instead of here +} + +void RenderBsp::RenderScreenSprites() +{ + for (auto &sprite : ScreenSprites) + sprite.Render(); +} + +void RenderBsp::RenderMaskedObjects() +{ + Clip.DrawMaskedWall = [&](short x1, short x2, int drawIndex, const short *clipTop, const short *clipBottom) + { + VisibleMaskedWalls[drawIndex].RenderMasked(x1, x2, clipTop, clipBottom); + }; + + std::stable_sort(VisibleSprites.begin(), VisibleSprites.end(), [](const auto &a, const auto &b) { return a.EyePos.Z > b.EyePos.Z; }); + + for (auto &sprite : VisibleSprites) + sprite.Render(&Clip); + + Clip.RenderMaskedWalls(); +} + +void RenderBsp::RenderPlayerSprites() +{ + if (!r_drawplayersprites || + !camera || + !camera->player || + (players[consoleplayer].cheats & CF_CHASECAM) || + (r_deathcamera && camera->health <= 0)) + return; + + float bobx, boby; + P_BobWeapon(camera->player, &bobx, &boby, r_TicFracF); + + // Interpolate the main weapon layer once so as to be able to add it to other layers. + double wx, wy; + DPSprite *weapon = camera->player->FindPSprite(PSP_WEAPON); + if (weapon) + { + if (weapon->firstTic) + { + wx = weapon->x; + wy = weapon->y; + } + else + { + wx = weapon->oldx + (weapon->x - weapon->oldx) * r_TicFracF; + wy = weapon->oldy + (weapon->y - weapon->oldy) * r_TicFracF; + } + } + else + { + wx = 0; + wy = 0; + } + + for (DPSprite *sprite = camera->player->psprites; sprite != nullptr; sprite = sprite->GetNext()) + { + // [RH] Don't draw the targeter's crosshair if the player already has a crosshair set. + // It's possible this psprite's caller is now null but the layer itself hasn't been destroyed + // because it didn't tick yet (if we typed 'take all' while in the console for example). + // In this case let's simply not draw it to avoid crashing. + if ((sprite->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && sprite->GetCaller() != nullptr) + { + RenderPlayerSprite(sprite, camera, bobx, boby, wx, wy, r_TicFracF); + } + } +} + +void RenderBsp::RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac) +{ + // decide which patch to use + if ((unsigned)sprite->GetSprite() >= (unsigned)sprites.Size()) + { + DPrintf(DMSG_ERROR, "RenderPlayerSprite: invalid sprite number %i\n", sprite->GetSprite()); + return; + } + + spritedef_t *def = &sprites[sprite->GetSprite()]; + if (sprite->GetFrame() >= def->numframes) + { + DPrintf(DMSG_ERROR, "RenderPlayerSprite: invalid sprite frame %i : %i\n", sprite->GetSprite(), sprite->GetFrame()); + return; + } + + spriteframe_t *frame = &SpriteFrames[def->spriteframes + sprite->GetFrame()]; + FTextureID picnum = frame->Texture[0]; + bool flip = (frame->Flip & 1) != 0; + + FTexture *tex = TexMan(picnum); + if (tex->UseType == FTexture::TEX_Null) + return; + + // Can't interpolate the first tic. + if (sprite->firstTic) + { + sprite->firstTic = false; + sprite->oldx = sprite->x; + sprite->oldy = sprite->y; + } + + double sx = sprite->oldx + (sprite->x - sprite->oldx) * ticfrac; + double sy = sprite->oldy + (sprite->y - sprite->oldy) * ticfrac; + + if (sprite->Flags & PSPF_ADDBOB) + { + sx += bobx; + sy += boby; + } + + if (sprite->Flags & PSPF_ADDWEAPON && sprite->GetID() != PSP_WEAPON) + { + sx += wx; + sy += wy; + } + + // calculate edges of the shape + double tx = sx - BaseXCenter; + + tx -= tex->GetScaledLeftOffset(); + int x1 = xs_RoundToInt(CenterX + tx * pspritexscale); + + // off the right side + if (x1 > viewwidth) + return; + + tx += tex->GetScaledWidth(); + int x2 = xs_RoundToInt(CenterX + tx * pspritexscale); + + // off the left side + if (x2 <= 0) + return; + + double texturemid = (BaseYCenter - sy) * tex->Scale.Y + tex->TopOffset; + + // Adjust PSprite for fullscreen views + if (camera->player && (RenderTarget != screen || viewheight == RenderTarget->GetHeight() || (RenderTarget->GetWidth() > (BaseXCenter * 2) && !st_scale))) + { + AWeapon *weapon = dyn_cast(sprite->GetCaller()); + if (weapon != nullptr && weapon->YAdjust != 0) + { + if (RenderTarget != screen || viewheight == RenderTarget->GetHeight()) + { + texturemid -= weapon->YAdjust; + } + else + { + texturemid -= StatusBar->GetDisplacement() * weapon->YAdjust; + } + } + } + + // Move the weapon down for 1280x1024. + if (sprite->GetID() < PSP_TARGETCENTER) + { + texturemid -= AspectPspriteOffset(WidescreenRatio); + } + + int clipped_x1 = MAX(x1, 0); + int clipped_x2 = MIN(x2, viewwidth); + double xscale = pspritexscale / tex->Scale.X; + double yscale = pspriteyscale / tex->Scale.Y; + uint32_t translation = 0; // [RH] Use default colors + + double xiscale, startfrac; + if (flip) + { + xiscale = -pspritexiscale * tex->Scale.X; + startfrac = 1; + } + else + { + xiscale = pspritexiscale * tex->Scale.X; + startfrac = 0; + } + + if (clipped_x1 > x1) + startfrac += xiscale * (clipped_x1 - x1); + + bool noaccel = false; + + FDynamicColormap *basecolormap = viewsector->ColorMap; + FDynamicColormap *colormap_to_use = basecolormap; + + visstyle_t visstyle; + visstyle.ColormapNum = 0; + visstyle.BaseColormap = basecolormap; + visstyle.Alpha = 0; + visstyle.RenderStyle = STYLE_Normal; + + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + int spriteshade = LIGHT2SHADE(owner->Sector->lightlevel + actualextralight); + double minz = double((2048 * 4) / double(1 << 20)); + visstyle.ColormapNum = GETPALOOKUP(r_SpriteVisibility / minz, spriteshade); + + if (sprite->GetID() < PSP_TARGETCENTER) + { + // Lots of complicated style and noaccel stuff + } + + // Check for hardware-assisted 2D. If it's available, and this sprite is not + // fuzzy, don't draw it until after the switch to 2D mode. + if (!noaccel && RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) + { + FRenderStyle style = visstyle.RenderStyle; + style.CheckFuzz(); + if (style.BlendOp != STYLEOP_Fuzz) + { + ScreenSprite screenSprite; + screenSprite.Pic = tex; + screenSprite.X1 = viewwindowx + x1; + screenSprite.Y1 = viewwindowy + viewheight / 2 - texturemid * yscale - 0.5; + screenSprite.Width = tex->GetWidth() * xscale; + screenSprite.Height = tex->GetHeight() * yscale; + screenSprite.Translation = TranslationToTable(translation); + screenSprite.Flip = xiscale < 0; + screenSprite.visstyle = visstyle; + screenSprite.Colormap = colormap_to_use; + ScreenSprites.push_back(screenSprite); + return; + } + } + + //R_DrawVisSprite(vis); +} + +bool RenderBsp::IsThingCulled(AActor *thing) +{ + FIntCVar *cvar = thing->GetClass()->distancecheck; + if (cvar != nullptr && *cvar >= 0) + { + double dist = (thing->Pos() - ViewPos).LengthSquared(); + double check = (double)**cvar; + if (dist >= check * check) + return true; + } + + // Don't waste time projecting sprites that are definitely not visible. + if (thing == nullptr || + (thing->renderflags & RF_INVISIBLE) || + !thing->RenderStyle.IsVisible(thing->Alpha) || + !thing->IsVisibleToPlayer()) + { + return true; + } + + return false; +} + +void RenderBsp::AddSprite(AActor *thing) +{ + if (IsThingCulled(thing)) + return; + + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); + pos.Z += thing->GetBobOffset(r_TicFracF); + + DVector3 eyePos = Transform.WorldToEye(pos); + + // thing is behind view plane? + if (eyePos.Z < Transform.NearZ()) + return; + + // too far off the side? + if (fabs(eyePos.X / 64) > eyePos.Z) + return; + + VisibleSprites.push_back({ thing, eyePos }); +} + +void RenderBsp::AddWallSprite(AActor *thing) +{ + if (IsThingCulled(thing)) + return; +} + +void RenderBsp::RenderSubsector(subsector_t *sub) +{ + sector_t *frontsector = sub->sector; + frontsector->MoreFlags |= SECF_DRAWN; + + for (AActor *thing = sub->sector->thinglist; thing != nullptr; thing = thing->snext) + { + if ((thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) + AddWallSprite(thing); + else + AddSprite(thing); + } + + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + if (line->sidedef == NULL || !(line->sidedef->Flags & WALLF_POLYOBJ)) + AddLine(line, frontsector); + } +} + +void RenderBsp::AddLine(seg_t *line, sector_t *frontsector) +{ + // Reject lines not facing viewer + DVector2 pt1 = line->v1->fPos() - ViewPos; + DVector2 pt2 = line->v2->fPos() - ViewPos; + if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) + return; + + double frontceilz1 = frontsector->ceilingplane.ZatPoint(line->v1); + double frontfloorz1 = frontsector->floorplane.ZatPoint(line->v1); + double frontceilz2 = frontsector->ceilingplane.ZatPoint(line->v2); + double frontfloorz2 = frontsector->floorplane.ZatPoint(line->v2); + + WallCoords entireWall(Transform, line->v1->fPos(), line->v2->fPos(), frontceilz1, frontfloorz1, frontceilz2, frontfloorz2); + if (entireWall.Culled) + return; + + VisiblePlaneKey ceilingPlaneKey(frontsector->GetTexture(sector_t::ceiling), frontsector->ColorMap, frontsector->lightlevel, frontsector->ceilingplane, frontsector->planes[sector_t::ceiling].xform); + VisiblePlaneKey floorPlaneKey(frontsector->GetTexture(sector_t::floor), frontsector->ColorMap, frontsector->lightlevel, frontsector->floorplane, frontsector->planes[sector_t::floor].xform); + + RenderWall wall; + wall.Line = line; + wall.Colormap = frontsector->ColorMap; + wall.Masked = false; + + if (line->backsector == nullptr) + { + Planes.MarkCeilingPlane(ceilingPlaneKey, Clip, entireWall); + Planes.MarkFloorPlane(floorPlaneKey, Clip, entireWall); + + wall.Coords = entireWall; + wall.TopZ = frontceilz1; + wall.BottomZ = frontfloorz1; + wall.UnpeggedCeil = frontceilz1; + wall.Texpart = side_t::mid; + wall.Render(Clip); + + Clip.MarkSegmentCulled(entireWall, -1); + } + else + { + sector_t *backsector = (line->backsector != line->frontsector) ? line->backsector : line->frontsector; + + double backceilz1 = backsector->ceilingplane.ZatPoint(line->v1); + double backfloorz1 = backsector->floorplane.ZatPoint(line->v1); + double backceilz2 = backsector->ceilingplane.ZatPoint(line->v2); + double backfloorz2 = backsector->floorplane.ZatPoint(line->v2); + + double topceilz1 = frontceilz1; + double topceilz2 = frontceilz2; + double topfloorz1 = MIN(backceilz1, frontceilz1); + double topfloorz2 = MIN(backceilz2, frontceilz2); + double bottomceilz1 = MAX(frontfloorz1, backfloorz1); + double bottomceilz2 = MAX(frontfloorz2, backfloorz2); + double bottomfloorz1 = frontfloorz1; + double bottomfloorz2 = frontfloorz2; + double middleceilz1 = topfloorz1; + double middleceilz2 = topfloorz2; + double middlefloorz1 = MIN(bottomceilz1, middleceilz1); + double middlefloorz2 = MIN(bottomceilz2, middleceilz2); + + bool bothSkyCeiling = frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; + bool bothSkyFloor = frontsector->GetTexture(sector_t::floor) == skyflatnum && backsector->GetTexture(sector_t::floor) == skyflatnum; + + int maskedWallIndex = -1; + + if ((topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && !bothSkyCeiling && line->sidedef) + { + WallCoords topwall(Transform, line->v1->fPos(), line->v2->fPos(), topceilz1, topfloorz1, topceilz2, topfloorz2); + if (!topwall.Culled) + { + wall.Coords = topwall; + wall.TopZ = topceilz1; + wall.BottomZ = topfloorz1; + wall.UnpeggedCeil = topceilz1; + wall.Texpart = side_t::top; + wall.Render(Clip); + } + } + + if ((bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && !bothSkyFloor && line->sidedef) + { + WallCoords bottomwall(Transform, line->v1->fPos(), line->v2->fPos(), bottomceilz1, bottomfloorz2, bottomceilz2, bottomfloorz2); + if (!bottomwall.Culled) + { + wall.Coords = bottomwall; + wall.TopZ = bottomceilz1; + wall.BottomZ = bottomfloorz2; + wall.UnpeggedCeil = topceilz1; + wall.Texpart = side_t::bottom; + wall.Render(Clip); + } + } + + WallCoords midwall(Transform, line->v1->fPos(), line->v2->fPos(), middleceilz1, middlefloorz1, middleceilz2, middlefloorz2); + if (!midwall.Culled && line->sidedef) + { + FTexture *midtex = TexMan(line->sidedef->GetTexture(side_t::mid), true); + if (midtex && midtex->UseType != FTexture::TEX_Null) + { + DVector3 v1 = Transform.WorldToEye({ line->v1->fPos(), 0.0 }); + DVector3 v2 = Transform.WorldToEye({ line->v2->fPos(), 0.0 }); + wall.Coords = midwall; + wall.TopZ = middleceilz1; + wall.BottomZ = middlefloorz1; + wall.UnpeggedCeil = topceilz1; + wall.Texpart = side_t::mid; + wall.Masked = true; + + maskedWallIndex = (int)VisibleMaskedWalls.size(); + VisibleMaskedWalls.push_back(wall); + } + } + + if (!bothSkyCeiling && !bothSkyFloor) + { + Planes.MarkCeilingPlane(ceilingPlaneKey, Clip, entireWall); + Planes.MarkFloorPlane(floorPlaneKey, Clip, entireWall); + if (!midwall.Culled) + Clip.ClipVertical(midwall, maskedWallIndex); + else + Clip.MarkSegmentCulled(entireWall, maskedWallIndex); + } + else if (bothSkyCeiling) + { + Planes.MarkFloorPlane(floorPlaneKey, Clip, entireWall); + if (!midwall.Culled) + Clip.ClipBottom(midwall, maskedWallIndex); + else + Clip.MarkSegmentCulled(entireWall, maskedWallIndex); + } + else if (bothSkyFloor) + { + Planes.MarkCeilingPlane(ceilingPlaneKey, Clip, entireWall); + if (!midwall.Culled) + Clip.ClipTop(midwall, maskedWallIndex); + else + Clip.MarkSegmentCulled(entireWall, maskedWallIndex); + } + } +} + +void RenderBsp::RenderNode(void *node) +{ + while (!((size_t)node & 1)) // Keep going until found a subsector + { + node_t *bsp = (node_t *)node; + + // Decide which side the view point is on. + int side = PointOnSide(ViewPos, bsp); + + // Recursively divide front space (toward the viewer). + RenderNode(bsp->children[side]); + + // Possibly divide back space (away from the viewer). + side ^= 1; + if (!CheckBBox(bsp->bbox[side])) + return; + + node = bsp->children[side]; + } + RenderSubsector((subsector_t *)((BYTE *)node - 1)); +} + +int RenderBsp::PointOnSide(const DVector2 &pos, const node_t *node) +{ + return DMulScale32(FLOAT2FIXED(pos.Y) - node->y, node->dx, node->x - FLOAT2FIXED(pos.X), node->dy) > 0; +} + +bool RenderBsp::CheckBBox(float *bspcoord) +{ + static const int checkcoord[12][4] = + { + { 3,0,2,1 }, + { 3,0,2,0 }, + { 3,1,2,0 }, + { 0 }, + { 2,0,2,1 }, + { 0,0,0,0 }, + { 3,1,3,0 }, + { 0 }, + { 2,0,3,1 }, + { 2,1,3,1 }, + { 2,1,3,0 } + }; + + int boxx; + int boxy; + int boxpos; + + double x1, y1, x2, y2; + double rx1, ry1, rx2, ry2; + int sx1, sx2; + + // Find the corners of the box + // that define the edges from current viewpoint. + if (ViewPos.X <= bspcoord[BOXLEFT]) + boxx = 0; + else if (ViewPos.X < bspcoord[BOXRIGHT]) + boxx = 1; + else + boxx = 2; + + if (ViewPos.Y >= bspcoord[BOXTOP]) + boxy = 0; + else if (ViewPos.Y > bspcoord[BOXBOTTOM]) + boxy = 1; + else + boxy = 2; + + boxpos = (boxy << 2) + boxx; + if (boxpos == 5) + return true; + + x1 = bspcoord[checkcoord[boxpos][0]] - ViewPos.X; + y1 = bspcoord[checkcoord[boxpos][1]] - ViewPos.Y; + x2 = bspcoord[checkcoord[boxpos][2]] - ViewPos.X; + y2 = bspcoord[checkcoord[boxpos][3]] - ViewPos.Y; + + // check clip list for an open space + + // Sitting on a line? + if (y1 * (x1 - x2) + x1 * (y2 - y1) >= -EQUAL_EPSILON) + return true; + + rx1 = x1 * ViewSin - y1 * ViewCos; + rx2 = x2 * ViewSin - y2 * ViewCos; + ry1 = x1 * ViewTanCos + y1 * ViewTanSin; + ry2 = x2 * ViewTanCos + y2 * ViewTanSin; + + /*if (MirrorFlags & RF_XFLIP) + { + double t = -rx1; + rx1 = -rx2; + rx2 = t; + swapvalues(ry1, ry2); + }*/ + + if (rx1 >= -ry1) + { + if (rx1 > ry1) return false; // left edge is off the right side + if (ry1 == 0) return false; + sx1 = xs_RoundToInt(CenterX + rx1 * CenterX / ry1); + } + else + { + if (rx2 < -ry2) return false; // wall is off the left side + if (rx1 - rx2 - ry2 + ry1 == 0) return false; // wall does not intersect view volume + sx1 = 0; + } + + if (rx2 <= ry2) + { + if (rx2 < -ry2) return false; // right edge is off the left side + if (ry2 == 0) return false; + sx2 = xs_RoundToInt(CenterX + rx2 * CenterX / ry2); + } + else + { + if (rx1 > ry1) return false; // wall is off the right side + if (ry2 - ry1 - rx2 + rx1 == 0) return false; // wall does not intersect view volume + sx2 = viewwidth; + } + + // Find the first clippost that touches the source post + // (adjacent pixels are touching). + + // Does not cross a pixel. + if (sx2 <= sx1) + return false; + + return !Clip.IsSegmentCulled(sx1, sx2); +} + +///////////////////////////////////////////////////////////////////////////// + +WallTextureCoords::WallTextureCoords(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) +{ + CalcU(tex, line, texpart); + CalcV(tex, line, texpart, topz, bottomz, unpeggedceil); +} + +void WallTextureCoords::CalcU(FTexture *tex, const seg_t *line, side_t::ETexpart texpart) +{ + double lineLength = line->sidedef->TexelLength; + double lineStart = 0.0; + + bool entireSegment = ((line->linedef->v1 == line->v1) && (line->linedef->v2 == line->v2) || (line->linedef->v2 == line->v1) && (line->linedef->v1 == line->v2)); + if (!entireSegment) + { + lineLength = (line->v2->fPos() - line->v1->fPos()).Length(); + lineStart = (line->v1->fPos() - line->linedef->v1->fPos()).Length(); + } + + int texWidth = tex->GetWidth(); + double uscale = line->sidedef->GetTextureXScale(texpart) * tex->Scale.X; + u1 = lineStart + line->sidedef->GetTextureXOffset(texpart); + u2 = u1 + lineLength; + u1 *= uscale; + u2 *= uscale; + u1 /= texWidth; + u2 /= texWidth; +} + +void WallTextureCoords::CalcV(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) +{ + double vscale = line->sidedef->GetTextureYScale(texpart) * tex->Scale.Y; + + double yoffset = line->sidedef->GetTextureYOffset(texpart); + if (tex->bWorldPanning) + yoffset *= vscale; + + switch (texpart) + { + default: + case side_t::mid: + CalcVMidPart(tex, line, topz, bottomz, vscale, yoffset); + break; + case side_t::top: + CalcVTopPart(tex, line, topz, bottomz, vscale, yoffset); + break; + case side_t::bottom: + CalcVBottomPart(tex, line, topz, bottomz, unpeggedceil, vscale, yoffset); + break; + } + + int texHeight = tex->GetHeight(); + v1 /= texHeight; + v2 /= texHeight; +} + +void WallTextureCoords::CalcVTopPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset) +{ + bool pegged = (line->linedef->flags & ML_DONTPEGTOP) == 0; + if (pegged) // bottom to top + { + int texHeight = tex->GetHeight(); + v1 = -yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + v1 = texHeight - v1; + v2 = texHeight - v2; + std::swap(v1, v2); + } + else // top to bottom + { + v1 = yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + } +} + +void WallTextureCoords::CalcVMidPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset) +{ + bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; + if (pegged) // top to bottom + { + v1 = yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + } + else // bottom to top + { + int texHeight = tex->GetHeight(); + v1 = yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + v1 = texHeight - v1; + v2 = texHeight - v2; + std::swap(v1, v2); + } +} + +void WallTextureCoords::CalcVBottomPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset) +{ + bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; + if (pegged) // top to bottom + { + v1 = yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + } + else + { + v1 = yoffset + (unpeggedceil - topz); + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + } +} + +///////////////////////////////////////////////////////////////////////////// + +void RenderClipBuffer::Clear(short left, short right) +{ + SolidSegments.clear(); + SolidSegments.reserve(MAXWIDTH / 2 + 2); + SolidSegments.push_back({ -0x7fff, left }); + SolidSegments.push_back({ right, 0x7fff }); + + DrawSegments.clear(); + ClipValues.clear(); + + for (int x = left; x < right; x++) + { + Top[x] = 0; + Bottom[x] = viewheight; + } +} + +bool RenderClipBuffer::IsSegmentCulled(short x1, short x2) const +{ + int next = 0; + while (SolidSegments[next].X2 <= x2) + next++; + return (x1 >= SolidSegments[next].X1 && x2 <= SolidSegments[next].X2); +} + +void RenderClipBuffer::MarkSegmentCulled(const WallCoords &wallCoords, int drawIndex) +{ + if (wallCoords.Culled) + return; + + VisibleSegmentsIterator it(*this, wallCoords.ScreenX1, wallCoords.ScreenX2); + while (it.Step()) + { + for (short x = it.X1; x < it.X2; x++) + { + Bottom[x] = Top[x]; + } + + AddDrawSegment(it.X1, it.X2, wallCoords, true, true, drawIndex); + } + + short x1 = wallCoords.ScreenX1; + short x2 = wallCoords.ScreenX2; + + if (x1 >= x2) + return; + + int cur = 1; + while (true) + { + if (SolidSegments[cur].X1 <= x1 && SolidSegments[cur].X2 >= x2) // Already fully marked + { + break; + } + else if (cur + 1 != SolidSegments.size() && SolidSegments[cur].X2 >= x1 && SolidSegments[cur].X1 <= x2) // Merge segments + { + // Find last segment + int merge = cur; + while (merge + 2 != SolidSegments.size() && SolidSegments[merge + 1].X1 <= x2) + merge++; + + // Apply new merged range + SolidSegments[cur].X1 = MIN(SolidSegments[cur].X1, x1); + SolidSegments[cur].X2 = MAX(SolidSegments[merge].X2, x2); + + // Remove additional segments we merged with + if (merge > cur) + SolidSegments.erase(SolidSegments.begin() + (cur + 1), SolidSegments.begin() + (merge + 1)); + + break; + } + else if (SolidSegments[cur].X1 > x1) // Insert new segment + { + SolidSegments.insert(SolidSegments.begin() + cur, { x1, x2 }); + break; + } + cur++; + } +} + +void RenderClipBuffer::ClipVertical(const WallCoords &wallCoords, int drawIndex) +{ + if (wallCoords.Culled) + return; + + VisibleSegmentsIterator it(*this, wallCoords.ScreenX1, wallCoords.ScreenX2); + while (it.Step()) + { + for (short x = it.X1; x < it.X2; x++) + { + Top[x] = MAX(wallCoords.Y1(x), Top[x]); + Bottom[x] = MIN(wallCoords.Y2(x), Bottom[x]); + } + AddDrawSegment(it.X1, it.X2, wallCoords, true, true, drawIndex); + } +} + +void RenderClipBuffer::ClipTop(const WallCoords &wallCoords, int drawIndex) +{ + if (wallCoords.Culled) + return; + + VisibleSegmentsIterator it(*this, wallCoords.ScreenX1, wallCoords.ScreenX2); + while (it.Step()) + { + for (short x = it.X1; x < it.X2; x++) + { + Top[x] = MAX(wallCoords.Y1(x), Top[x]); + } + AddDrawSegment(it.X1, it.X2, wallCoords, true, false, drawIndex); + } +} + +void RenderClipBuffer::ClipBottom(const WallCoords &wallCoords, int drawIndex) +{ + if (wallCoords.Culled) + return; + + VisibleSegmentsIterator it(*this, wallCoords.ScreenX1, wallCoords.ScreenX2); + while (it.Step()) + { + for (short x = it.X1; x < it.X2; x++) + { + Bottom[x] = MIN(wallCoords.Y2(x), Bottom[x]); + } + AddDrawSegment(it.X1, it.X2, wallCoords, false, true, drawIndex); + } +} + +void RenderClipBuffer::AddDrawSegment(short x1, short x2, const WallCoords &wall, bool clipTop, bool clipBottom, int drawIndex) +{ + if (drawIndex != -1) // DrawMaskedWall needs both clipping ranges + { + clipTop = true; + clipBottom = true; + } + + DrawSegment segment; + segment.X1 = x1; + segment.X2 = x2; + segment.ClipOffset = (int)ClipValues.size(); + segment.ClipTop = clipTop; + segment.ClipBottom = clipBottom; + segment.PlaneNormal = wall.PlaneNormal; + segment.PlaneD = wall.PlaneD; + segment.NearZ = wall.NearZ; + segment.FarZ = wall.FarZ; + segment.DrawIndex = drawIndex; + + if (clipTop) + { + ClipValues.insert(ClipValues.end(), Top + x1, Top + x2); + } + + if (clipBottom) + { + ClipValues.insert(ClipValues.end(), Bottom + x1, Bottom + x2); + } + + DrawSegments.push_back(segment); +} + +void RenderClipBuffer::SetupSpriteClip(short x1, short x2, const DVector3 &pos, bool wallSprite) +{ + for (int i = x1; i < x2; i++) + { + Top[i] = 0; + Bottom[i] = viewheight; + } + + for (auto it = DrawSegments.crbegin(); it != DrawSegments.crend(); ++it) + { + const auto &segment = *it; + + int r1 = MAX(segment.X1, x1); + int r2 = MIN(segment.X2, x2); + if (r2 <= r1) + continue; + + short *clipTop = ClipValues.data() + segment.ClipOffset; + short *clipBottom = segment.ClipTop ? clipTop + (segment.X2 - segment.X1) : clipTop; + + double side = (pos | segment.PlaneNormal) + segment.PlaneD; + bool segBehindSprite; + if (!wallSprite) + segBehindSprite = (segment.NearZ >= pos.Z) || (segment.FarZ >= pos.Z && side <= 0.0); + else + segBehindSprite = side <= 0.0; + + if (segBehindSprite) + { + if (segment.DrawIndex != -1 && DrawMaskedWall) + DrawMaskedWall(r1, r2, segment.DrawIndex, clipTop + (r1 - segment.X1), clipBottom + (r1 - segment.X1)); + + if (segment.ClipTop) + { + for (int i = r1 - segment.X1; i < r2 - segment.X1; i++) + clipTop[i] = 0; + } + + if (segment.ClipBottom) + { + for (int i = r1 - segment.X1; i < r2 - segment.X1; i++) + clipBottom[i] = 0; + } + } + else + { + if (segment.ClipTop) + { + for (int x = r1; x < r2; x++) + Top[x] = MAX(clipTop[x - segment.X1], Top[x]); + } + + if (segment.ClipBottom) + { + for (int x = r1; x < r2; x++) + Bottom[x] = MIN(clipBottom[x - segment.X1], Bottom[x]); + } + } + } +} + +void RenderClipBuffer::RenderMaskedWalls() +{ + for (int i = 0; i < viewwidth; i++) + { + Top[i] = 0; + Bottom[i] = viewheight; + } + + for (auto it = DrawSegments.crbegin(); it != DrawSegments.crend(); ++it) + { + const auto &segment = *it; + if (segment.DrawIndex != -1 && DrawMaskedWall) + { + short *clipTop = ClipValues.data() + segment.ClipOffset; + short *clipBottom = segment.ClipTop ? clipTop + (segment.X2 - segment.X1) : clipTop; + DrawMaskedWall(segment.X1, segment.X2, segment.DrawIndex, clipTop, clipBottom); + } + } +} + +///////////////////////////////////////////////////////////////////////////// + +VisibleSegmentsIterator::VisibleSegmentsIterator(const RenderClipBuffer &buffer, short startx, short endx) : SolidSegments(buffer.SolidSegments), endx(endx) +{ + X1 = startx; + X2 = startx; +} + +bool VisibleSegmentsIterator::Step() +{ + if (next == 0) + { + while (SolidSegments[next].X2 <= X1) + next++; + if (SolidSegments[next].X1 <= X1) + X1 = SolidSegments[next++].X2; + X2 = MIN(SolidSegments[next].X1, endx); + } + else if (X2 == SolidSegments[next].X1 && next + 1 != SolidSegments.size()) + { + X1 = SolidSegments[next++].X2; + X2 = MIN(SolidSegments[next].X1, endx); + } + else + { + X1 = X2; + } + + return X1 < X2; +} + +///////////////////////////////////////////////////////////////////////////// + +RenderVisiblePlane::RenderVisiblePlane(VisiblePlane *plane, FTexture *tex) +{ + const auto &key = plane->Key; + + double xscale = key.Transform.xScale * tex->Scale.X; + double yscale = key.Transform.yScale * tex->Scale.Y; + + double planeang = (key.Transform.Angle + key.Transform.baseAngle).Radians(); + double cosine = cos(planeang); + double sine = sin(planeang); + viewx = (key.Transform.xOffs + ViewPos.X * cosine - ViewPos.Y * sine) * xscale; + viewy = (key.Transform.yOffs - ViewPos.X * sine - ViewPos.Y * cosine) * yscale; + + // left to right mapping + planeang += (ViewAngle - 90).Radians(); + + // Scale will be unit scale at FocalLengthX (normally SCREENWIDTH/2) distance + double xstep = cos(planeang) / FocalLengthX; + double ystep = -sin(planeang) / FocalLengthX; + + // [RH] flip for mirrors + /*if (MirrorFlags & RF_XFLIP) + { + xstep = -xstep; + ystep = -ystep; + }*/ + + planeang += M_PI / 2; + cosine = cos(planeang); + sine = -sin(planeang); + double x = plane->Right - centerx - 0.5; + double rightxfrac = xscale * (cosine + x * xstep); + double rightyfrac = yscale * (sine + x * ystep); + x = plane->Left - centerx - 0.5; + double leftxfrac = xscale * (cosine + x * xstep); + double leftyfrac = yscale * (sine + x * ystep); + + basexfrac = rightxfrac; + baseyfrac = rightyfrac; + xstepscale = (rightxfrac - leftxfrac) / (plane->Right - plane->Left); + ystepscale = (rightyfrac - leftyfrac) / (plane->Right - plane->Left); + + planeheight = fabs(key.Plane.Zat0() - ViewPos.Z); +} + +void RenderVisiblePlane::Step() +{ + basexfrac -= xstepscale; + baseyfrac -= ystepscale; +} + +///////////////////////////////////////////////////////////////////////////// + +void RenderPlanes::Render() +{ + for (int i = 0; i < NumBuckets; i++) + { + VisiblePlane *plane = PlaneBuckets[i].get(); + while (plane) + { + RenderPlane(plane); + plane = plane->Next.get(); + } + } +} + +void RenderPlanes::RenderPlane(VisiblePlane *plane) +{ + FTexture *tex = TexMan(plane->Key.Picnum); + if (tex->UseType == FTexture::TEX_Null) + return; + + RenderVisiblePlane render(plane, tex); + + short spanend[MAXHEIGHT]; + int x = plane->Right - 1; + int t2 = plane->Top[x]; + int b2 = plane->Bottom[x]; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + for (--x; x >= plane->Left; --x) + { + int t1 = plane->Top[x]; + int b1 = plane->Bottom[x]; + const int xr = x + 1; + int stop; + + // Draw any spans that have just closed + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + RenderSpan(y, xr, spanend[y], plane->Key, tex, render); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + RenderSpan(y, xr, spanend[y], plane->Key, tex, render); + } + + // Mark any spans that have just opened + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + + t2 = plane->Top[x]; + b2 = plane->Bottom[x]; + render.Step(); + } + // Draw any spans that are still open + while (t2 < b2) + { + int y = --b2; + RenderSpan(y, plane->Left, spanend[y], plane->Key, tex, render); + } +} + +void RenderPlanes::RenderSpan(int y, int x1, int x2, const VisiblePlaneKey &key, FTexture *tex, const RenderVisiblePlane &renderInfo) +{ + if (key.Picnum != skyflatnum) + { + double distance = renderInfo.planeheight * yslope[y]; + + double u = distance * renderInfo.basexfrac + renderInfo.viewx; + double v = distance * renderInfo.baseyfrac + renderInfo.viewy; + double uscale = distance * renderInfo.xstepscale; + double vscale = distance * renderInfo.ystepscale; + + double vis = r_FloorVisibility / renderInfo.planeheight; + + if (fixedlightlev >= 0) + R_SetDSColorMapLight(key.ColorMap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + else if (fixedcolormap) + R_SetDSColorMapLight(fixedcolormap, 0, 0); + else + R_SetDSColorMapLight(key.ColorMap, (float)(vis * fabs(CenterY - y)), LIGHT2SHADE(key.LightLevel)); + + ds_source = (const BYTE *)tex->GetPixelsBgra(); + ds_source_mipmapped = false; + ds_xbits = tex->WidthBits; + ds_ybits = tex->HeightBits; + ds_xfrac = (uint32_t)(u * (1 << (32 - ds_xbits))); + ds_yfrac = (uint32_t)(v * (1 << (32 - ds_ybits))); + ds_xstep = (uint32_t)(uscale * (1 << (32 - ds_xbits))); + ds_ystep = (uint32_t)(vscale * (1 << (32 - ds_ybits))); + ds_y = y; + ds_x1 = x1; + ds_x2 = x2; + R_DrawSpan(); + } + else + { + tex = TexMan(sky1texture, true); + + double xangle1 = ((0.5 - x1 / (double)viewwidth) * FocalTangent * 90.0); + double xangle2 = ((0.5 - x2 / (double)viewwidth) * FocalTangent * 90.0); + + double u1 = sky1pos + (ViewAngle.Degrees + xangle1) * sky1cyl / 360.0; + double u2 = sky1pos + (ViewAngle.Degrees + xangle2) * sky1cyl / 360.0; + double u = u1; + double v = (y - CenterY) * skyiscale + skytexturemid * tex->Scale.Y; + double uscale = (u2 - u1) / (x2 - x1); + double vscale = 0.0; + + if (fixedlightlev >= 0) + R_SetDSColorMapLight(key.ColorMap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + else if (fixedcolormap) + R_SetDSColorMapLight(fixedcolormap, 0, 0); + else + R_SetDSColorMapLight(key.ColorMap, 0, 0); + + ds_source = (const BYTE *)tex->GetPixelsBgra(); + ds_source_mipmapped = false; + ds_xbits = tex->WidthBits; + ds_ybits = tex->HeightBits; + ds_xfrac = (uint32_t)(u * (1 << (32 - ds_xbits))); + ds_yfrac = (uint32_t)(v * (1 << (32 - ds_ybits))); + ds_xstep = (uint32_t)(uscale * (1 << (32 - ds_xbits))); + ds_ystep = (uint32_t)(vscale * (1 << (32 - ds_ybits))); + ds_y = y; + ds_x1 = x1; + ds_x2 = x2; + R_DrawSpan(); + } +} + +void RenderPlanes::Clear() +{ + for (int i = 0; i < NumBuckets; i++) + { + std::unique_ptr plane = std::move(PlaneBuckets[i]); + while (plane) + { + std::unique_ptr next = std::move(plane->Next); + FreePlanes.push_back(std::move(plane)); + plane = std::move(next); + } + } +} + +void RenderPlanes::MarkCeilingPlane(const VisiblePlaneKey &key, const RenderClipBuffer &clip, const WallCoords &wallCoords) +{ + VisibleSegmentsIterator it(clip, wallCoords.ScreenX1, wallCoords.ScreenX2); + while (it.Step()) + { + VisiblePlane *plane = GetPlaneWithUnsetRange(key, it.X1, it.X2); + + for (short x = it.X1; x < it.X2; x++) + { + short walltop = MAX(wallCoords.Y1(x), clip.Top[x]); + short top = clip.Top[x]; + short bottom = MIN(walltop, clip.Bottom[x]); + if (top < bottom) + { + plane->Top[x] = top; + plane->Bottom[x] = bottom; + } + } + } +} + +void RenderPlanes::MarkFloorPlane(const VisiblePlaneKey &key, const RenderClipBuffer &clip, const WallCoords &wallCoords) +{ + VisibleSegmentsIterator it(clip, wallCoords.ScreenX1, wallCoords.ScreenX2); + while (it.Step()) + { + VisiblePlane *plane = GetPlaneWithUnsetRange(key, it.X1, it.X2); + + for (short x = it.X1; x < it.X2; x++) + { + short wallbottom = MIN(wallCoords.Y2(x), clip.Bottom[x]); + short top = MAX(wallbottom, clip.Top[x]); + short bottom = clip.Bottom[x]; + if (top < bottom) + { + plane->Top[x] = top; + plane->Bottom[x] = bottom; + } + } + } +} + +VisiblePlane *RenderPlanes::GetPlaneWithUnsetRange(const VisiblePlaneKey &key, int start, int stop) +{ + VisiblePlane *plane = GetPlane(key); + + int intrl, intrh; + int unionl, unionh; + + if (start < plane->Left) + { + intrl = plane->Left; + unionl = start; + } + else + { + unionl = plane->Left; + intrl = start; + } + + if (stop > plane->Right) + { + intrh = plane->Right; + unionh = stop; + } + else + { + unionh = plane->Right; + intrh = stop; + } + + // Verify that the entire range has unset values + int x = intrl; + while (x < intrh && plane->Top[x] == VisiblePlane::UnsetValue) + x++; + + if (x >= intrh) // They do. Use the current plane + { + plane->Left = unionl; + plane->Right = unionh; + return plane; + } + else // Create new plane and make sure it is found first + { + auto &bucket = PlaneBuckets[Hash(key)]; + std::unique_ptr newPlane = AllocPlane(key); + newPlane->Left = start; + newPlane->Right = stop; + newPlane->Next = std::move(bucket); + bucket = std::move(newPlane); + return bucket.get(); + } +} + +VisiblePlane *RenderPlanes::GetPlane(const VisiblePlaneKey &key) +{ + auto &bucket = PlaneBuckets[Hash(key)]; + VisiblePlane *plane = bucket.get(); + + while (plane != nullptr) + { + if (plane->Key == key) + return plane; + plane = plane->Next.get(); + } + + std::unique_ptr new_plane = AllocPlane(key); + new_plane->Next = std::move(bucket); + bucket = std::move(new_plane); + return bucket.get(); +} + +std::unique_ptr RenderPlanes::AllocPlane(const VisiblePlaneKey &key) +{ + if (!FreePlanes.empty()) + { + std::unique_ptr plane = std::move(FreePlanes.back()); + FreePlanes.pop_back(); + plane->Clear(key); + return std::move(plane); + } + else + { + return std::make_unique(key); + } +} + +///////////////////////////////////////////////////////////////////////////// + +void RenderWall::Render(const RenderClipBuffer &clip) +{ + FTexture *tex = GetTexture(); + if (!tex) + return; + int texWidth = tex->GetWidth(); + int texHeight = tex->GetHeight(); + + WallTextureCoords texcoords(tex, Line, Texpart, TopZ, BottomZ, UnpeggedCeil); + + VisibleSegmentsIterator it(clip, Coords.ScreenX1, Coords.ScreenX2); + while (it.Step()) + { + for (short x = it.X1; x < it.X2; x++) + { + short y1 = MAX(Coords.Y1(x), clip.Top[x]); + short y2 = MIN(Coords.Y2(x), clip.Bottom[x]); + if (y1 >= y2) + continue; + + double u = Coords.VaryingX(x, texcoords.u1, texcoords.u2); + double v1 = Coords.VaryingY(x, y1, texcoords.v1, texcoords.v2); + double v2 = Coords.VaryingY(x, y2, texcoords.v1, texcoords.v2); + + R_SetColorMapLight(Colormap, GetLight(x), GetShade()); + + dc_source = (const BYTE *)tex->GetColumnBgra((int)(u * texWidth), nullptr); + dc_source2 = nullptr; + dc_textureheight = texHeight; + dc_texturefrac = (uint32_t)(v1 * 0xffffffff); + dc_iscale = (uint32_t)((v2 - v1) / (y2 - y1) * 0xffffffff); + dc_dest = dc_destorg + (ylookup[y1] + x) * 4; + dc_count = y2 - y1; + dovline1(); + } + } +} + +void RenderWall::RenderMasked(short x1, short x2, const short *clipTop, const short *clipBottom) +{ + FTexture *tex = GetTexture(); + if (!tex) + return; + int texWidth = tex->GetWidth(); + int texHeight = tex->GetHeight(); + + WallTextureCoords texcoords(tex, Line, Texpart, TopZ, BottomZ, UnpeggedCeil); + + for (short x = x1; x < x2; x++) + { + short y1 = MAX(Coords.Y1(x), clipTop[x - x1]); + short y2 = MIN(Coords.Y2(x), clipBottom[x - x1]); + if (y1 >= y2) + continue; + + double u = Coords.VaryingX(x, texcoords.u1, texcoords.u2); + double v1 = Coords.VaryingY(x, y1, texcoords.v1, texcoords.v2); + double v2 = Coords.VaryingY(x, y2, texcoords.v1, texcoords.v2); + + R_SetColorMapLight(Colormap, GetLight(x), GetShade()); + + dc_source = (const BYTE *)tex->GetColumnBgra((int)(u * texWidth), nullptr); + dc_source2 = nullptr; + dc_textureheight = texHeight; + dc_texturefrac = (uint32_t)(v1 * 0xffffffff); + dc_iscale = (uint32_t)((v2 - v1) / (y2 - y1) * 0xffffffff); + dc_dest = dc_destorg + (ylookup[y1] + x) * 4; + dc_count = y2 - y1; + domvline1(); + } +} + +FTexture *RenderWall::GetTexture() +{ + FTexture *tex = TexMan(Line->sidedef->GetTexture(Texpart), true); + if (tex == nullptr || tex->UseType == FTexture::TEX_Null) + return nullptr; + else + return tex; +} + +int RenderWall::GetShade() +{ + if (fixedlightlev >= 0 || fixedcolormap) + { + return 0; + } + else + { + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + int shade = LIGHT2SHADE(Line->sidedef->GetLightLevel(foggy, Line->frontsector->lightlevel) + actualextralight); + return shade; + } +} + +float RenderWall::GetLight(short x) +{ + if (fixedlightlev >= 0 || fixedcolormap) + return 0.0f; + else + return (float)(r_WallVisibility / Coords.Z(x)); +} + +///////////////////////////////////////////////////////////////////////////// + +VisibleSprite::VisibleSprite(AActor *actor, const DVector3 &eyePos) : Actor(actor), EyePos(eyePos) +{ +} + +void VisibleSprite::Render(RenderClipBuffer *clip) +{ + //if (MirrorFlags & RF_XFLIP) + // tx = -tx; + + bool flipTextureX = false; + FTexture *tex = GetSpriteTexture(Actor, flipTextureX); + DVector2 spriteScale = Actor->Scale; + + const double thingxscalemul = spriteScale.X / tex->Scale.X; + + double xscale = CenterX / EyePos.Z; + double yscale = spriteScale.Y / tex->Scale.Y;// spriteScale.Y / tex->Scale.Y * InvZtoScale / EyePos.Z; + + double tx; + if (flipTextureX) + { + tx = EyePos.X - (tex->GetWidth() - tex->LeftOffset - 1) * thingxscalemul; + } + else + { + tx = EyePos.X - tex->LeftOffset * thingxscalemul; + } + + double texturemid = tex->TopOffset + (EyePos.Y - Actor->Floorclip) / yscale; + double y = CenterY - texturemid * (InvZtoScale * yscale / EyePos.Z); + + int x1 = centerx + xs_RoundToInt(tx * xscale); + int x2 = centerx + xs_RoundToInt((tx + tex->GetWidth() * thingxscalemul) * xscale); + int y1 = xs_RoundToInt(y); + int y2 = xs_RoundToInt(y + (InvZtoScale * yscale / EyePos.Z) * tex->GetHeight()); + + xscale = spriteScale.X * xscale / tex->Scale.X; + + int clipped_x1 = clamp(x1, 0, viewwidth - 1); + int clipped_x2 = clamp(x2, 0, viewwidth - 1); + int clipped_y1 = clamp(y1, 0, viewheight - 1); + int clipped_y2 = clamp(y2, 0, viewheight - 1); + if (clipped_x1 >= clipped_x2 || clipped_y1 >= clipped_y2) + return; + + clip->SetupSpriteClip(clipped_x1, clipped_x2, EyePos, false); + + uint32_t texwidth = tex->GetWidth(); + uint32_t texheight = tex->GetHeight(); + + visstyle_t visstyle = GetSpriteVisStyle(Actor, EyePos.Z); + // Rumor has it that AlterWeaponSprite needs to be called with visstyle passed in somewhere around here.. + R_SetColorMapLight(visstyle.BaseColormap, 0, visstyle.ColormapNum << FRACBITS); + + for (int x = clipped_x1; x < clipped_x2; x++) + { + short top = MAX(clipped_y1, clip->Top[x]); + short bottom = MIN(clipped_y2, clip->Bottom[x]); + if (top < bottom) + { + float u = (x - x1) / (float)(x2 - x1); + float v = (top - y1) / (float)(y2 - y1); + if (flipTextureX) + u = 1.0f - u; + u = u - floor(u); + + dc_source = (const BYTE *)tex->GetColumnBgra((int)(u * texwidth), nullptr); + dc_source2 = nullptr; + dc_textureheight = texheight; + dc_texturefrac = (uint32_t)(v * 0xffffffff); + dc_iscale = 0xffffffff / (y2 - y1); + dc_dest = dc_destorg + (ylookup[top] + x) * 4; + dc_count = bottom - top; + domvline1(); + } + } +} + +visstyle_t VisibleSprite::GetSpriteVisStyle(AActor *thing, double z) +{ + visstyle_t visstyle; + + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + int spriteshade = LIGHT2SHADE(thing->Sector->lightlevel + actualextralight); + + visstyle.RenderStyle = thing->RenderStyle; + visstyle.Alpha = float(thing->Alpha); + visstyle.ColormapNum = 0; + + // The software renderer cannot invert the source without inverting the overlay + // too. That means if the source is inverted, we need to do the reverse of what + // the invert overlay flag says to do. + bool invertcolormap = (visstyle.RenderStyle.Flags & STYLEF_InvertOverlay) != 0; + + if (visstyle.RenderStyle.Flags & STYLEF_InvertSource) + { + invertcolormap = !invertcolormap; + } + + FDynamicColormap *mybasecolormap = thing->Sector->ColorMap; + + // Sprites that are added to the scene must fade to black. + if (visstyle.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); + } + + if (visstyle.RenderStyle.Flags & STYLEF_FadeToBlack) + { + if (invertcolormap) + { // Fade to white + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); + invertcolormap = false; + } + else + { // Fade to black + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); + } + } + + // get light level + if (fixedcolormap != NULL) + { // fixed map + visstyle.BaseColormap = fixedcolormap; + visstyle.ColormapNum = 0; + } + else + { + if (invertcolormap) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); + } + if (fixedlightlev >= 0) + { + visstyle.BaseColormap = mybasecolormap; + visstyle.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + } + else if (!foggy && ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) + { // full bright + visstyle.BaseColormap = mybasecolormap; + visstyle.ColormapNum = 0; + } + else + { // diminished light + double minz = double((2048 * 4) / double(1 << 20)); + visstyle.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(z, minz), spriteshade); + visstyle.BaseColormap = mybasecolormap; + } + } + + return visstyle; +} + +FTexture *VisibleSprite::GetSpriteTexture(AActor *thing, /*out*/ bool &flipX) +{ + flipX = false; + if (thing->picnum.isValid()) + { + FTexture *tex = TexMan(thing->picnum); + if (tex->UseType == FTexture::TEX_Null) + { + return nullptr; + } + + if (tex->Rotations != 0xFFFF) + { + // choose a different rotation based on player view + spriteframe_t *sprframe = &SpriteFrames[tex->Rotations]; + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); + pos.Z += thing->GetBobOffset(r_TicFracF); + DAngle ang = (pos - ViewPos).Angle(); + angle_t rot; + if (sprframe->Texture[0] == sprframe->Texture[1]) + { + rot = (ang - thing->Angles.Yaw + 45.0 / 2 * 9).BAMs() >> 28; + } + else + { + rot = (ang - thing->Angles.Yaw + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + } + flipX = (sprframe->Flip & (1 << rot)) != 0; + tex = TexMan[sprframe->Texture[rot]]; // Do not animate the rotation + } + return tex; + } + else + { + // decide which texture to use for the sprite + int spritenum = thing->sprite; + if (spritenum >= (signed)sprites.Size() || spritenum < 0) + return nullptr; + + spritedef_t *sprdef = &sprites[spritenum]; + if (thing->frame >= sprdef->numframes) + { + // If there are no frames at all for this sprite, don't draw it. + return nullptr; + } + else + { + //picnum = SpriteFrames[sprdef->spriteframes + thing->frame].Texture[0]; + // choose a different rotation based on player view + spriteframe_t *sprframe = &SpriteFrames[sprdef->spriteframes + thing->frame]; + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); + pos.Z += thing->GetBobOffset(r_TicFracF); + DAngle ang = (pos - ViewPos).Angle(); + angle_t rot; + if (sprframe->Texture[0] == sprframe->Texture[1]) + { + rot = (ang - thing->Angles.Yaw + 45.0 / 2 * 9).BAMs() >> 28; + } + else + { + rot = (ang - thing->Angles.Yaw + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + } + flipX = (sprframe->Flip & (1 << rot)) != 0; + return TexMan[sprframe->Texture[rot]]; // Do not animate the rotation + } + } +} + +///////////////////////////////////////////////////////////////////////////// + +void ScreenSprite::Render() +{ + FSpecialColormap *special = nullptr; + FColormapStyle colormapstyle; + PalEntry overlay = 0; + bool usecolormapstyle = false; + if (visstyle.BaseColormap >= &SpecialColormaps[0] && + visstyle.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) + { + special = static_cast(visstyle.BaseColormap); + } + else if (Colormap->Color == PalEntry(255, 255, 255) && + Colormap->Desaturate == 0) + { + overlay = Colormap->Fade; + overlay.a = BYTE(visstyle.ColormapNum * 255 / NUMCOLORMAPS); + } + else + { + usecolormapstyle = true; + colormapstyle.Color = Colormap->Color; + colormapstyle.Fade = Colormap->Fade; + colormapstyle.Desaturate = Colormap->Desaturate; + colormapstyle.FadeLevel = visstyle.ColormapNum / float(NUMCOLORMAPS); + } + + screen->DrawTexture(Pic, + X1, + Y1, + DTA_DestWidthF, Width, + DTA_DestHeightF, Height, + DTA_Translation, Translation, + DTA_FlipX, Flip, + DTA_TopOffset, 0, + DTA_LeftOffset, 0, + DTA_ClipLeft, viewwindowx, + DTA_ClipTop, viewwindowy, + DTA_ClipRight, viewwindowx + viewwidth, + DTA_ClipBottom, viewwindowy + viewheight, + DTA_AlphaF, visstyle.Alpha, + DTA_RenderStyle, visstyle.RenderStyle, + DTA_FillColor, FillColor, + DTA_SpecialColormap, special, + DTA_ColorOverlay, overlay.d, + DTA_ColormapStyle, usecolormapstyle ? &colormapstyle : NULL, + TAG_DONE); +} diff --git a/src/r_swrenderer2.h b/src/r_swrenderer2.h new file mode 100644 index 0000000000..1004b66bd3 --- /dev/null +++ b/src/r_swrenderer2.h @@ -0,0 +1,345 @@ + +#pragma once + +#include +#include +#include +#include +#include "doomdata.h" +#include "r_utility.h" +#include "r_main.h" + +// Transform for a view position and its viewport +// +// World space uses map coordinates in the XY plane. Z is up. +// Eye space means relative to viewer, Y is up and Z is into the screen. +// Viewport means in normalized device coordinates (-1 to 1 range with perspective division). 0,0 is in the center of the viewport and Y is still up. +// Screen means in final screen coordinates. 0,0 is the upper left corner and Y is down. Z is still 1/z. +// +class ViewPosTransform +{ +public: + DVector3 WorldToEye(const DVector3 &worldPoint) const; + DVector3 WorldToViewport(const DVector3 &worldPoint) const { return EyeToViewport(WorldToEye(worldPoint)); } + DVector3 WorldToScreen(const DVector3 &worldPoint) const { return EyeToScreen(WorldToEye(worldPoint)); } + + DVector3 EyeToViewport(const DVector3 &eyePoint) const; + DVector3 EyeToScreen(const DVector3 &eyePoint) const { return ViewportToScreen(EyeToViewport(eyePoint)); } + + DVector3 ViewportToScreen(const DVector3 &viewportPoint) const; + + double ScreenXToEye(int x, double z) const; + double ScreenYToEye(int y, double z) const; + + double NearZ() const { return 0.0078125; }; +}; + +// Screen space coordinates for a wall +class WallCoords +{ +public: + WallCoords() = default; + WallCoords(const ViewPosTransform &transform, const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2); + + // True if transform and clip culled the wall + bool Culled = true; + + // Plane for wall in eye space + DVector3 PlaneNormal; + double PlaneD = 0.0; + + // Z range of the wall in eye space + double NearZ = 0.0; + double FarZ = 0.0; + + // Screen space bounding box of the wall + int ScreenX1 = 0; + int ScreenX2 = 0; + int ScreenY1 = 0; + int ScreenY2 = 0; + + // Get the Y positions for the given column + short Y1(int x) const; + short Y2(int x) const; + + // Get the depth for a column + double Z(int x) const; + + // Perspective correct interpolation from start to end (used to calculate texture coordinates) + double VaryingX(int x, double start, double end) const; + double VaryingY(int x, int y, double start, double end) const; + +private: + static DVector3 Mix(const DVector3 &a, const DVector3 &b, double t); + static double Mix(double a, double b, double t); + + ViewPosTransform Transform; + DVector3 ScreenTopLeft; + DVector3 ScreenTopRight; + DVector3 ScreenBottomLeft; + DVector3 ScreenBottomRight; + double RcpDeltaScreenX = 0.0; + double VaryingXScale = 1.0; + double VaryingXOffset = 0.0; +}; + +// Texture coordinates for a wall +class WallTextureCoords +{ +public: + WallTextureCoords(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); + + double u1, u2; + double v1, v2; + +private: + void CalcU(FTexture *tex, const seg_t *line, side_t::ETexpart texpart); + void CalcV(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); + void CalcVTopPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset); + void CalcVMidPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset); + void CalcVBottomPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset); +}; + +// Clipping buffers used during rendering +class RenderClipBuffer +{ +public: + void Clear(short left, short right); + void MarkSegmentCulled(const WallCoords &wallCoords, int drawIndex); + void ClipVertical(const WallCoords &wallCoords, int drawIndex); + void ClipTop(const WallCoords &wallCoords, int drawIndex); + void ClipBottom(const WallCoords &wallCoords, int drawIndex); + bool IsSegmentCulled(short x1, short x2) const; + + void SetupSpriteClip(short x1, short x2, const DVector3 &pos, bool wallSprite); + void RenderMaskedWalls(); + + short Top[MAXWIDTH]; + short Bottom[MAXWIDTH]; + + std::function DrawMaskedWall; + +private: + void AddDrawSegment(short x1, short x2, const WallCoords &wall, bool clipTop, bool clipBottom, int drawIndex); + + struct SolidSegment + { + SolidSegment(short x1, short x2) : X1(x1), X2(x2) { } + short X1, X2; + }; + + struct DrawSegment + { + short X1; + short X2; + int ClipOffset; + bool ClipTop; + bool ClipBottom; + DVector3 PlaneNormal; + double PlaneD; + double NearZ; + double FarZ; + int DrawIndex; + }; + + std::vector SolidSegments; + std::vector DrawSegments; + std::vector ClipValues; + + friend class VisibleSegmentsIterator; +}; + +// Walks the visible segments in a range +class VisibleSegmentsIterator +{ +public: + VisibleSegmentsIterator(const RenderClipBuffer &buffer, short startx, short endx); + bool Step(); + + short X1; + short X2; + +private: + const std::vector &SolidSegments; + short endx; + int next = 0; +}; + +// Class used to group sector ceilings/floors sharing common properties +class VisiblePlaneKey +{ +public: + VisiblePlaneKey() { } + VisiblePlaneKey(FTextureID picnum, FSWColormap *colormap, int lightlevel, secplane_t plane, const FTransform &xform) : Picnum(picnum), ColorMap(colormap), LightLevel(lightlevel), Plane(plane), Transform(xform) { } + + bool operator==(const VisiblePlaneKey &other) const + { + return Picnum == other.Picnum && LightLevel == other.LightLevel && Plane.fD() == other.Plane.fD(); + } + + FTextureID Picnum; + FSWColormap *ColorMap; + int LightLevel; + secplane_t Plane; + FTransform Transform; +}; + +// Visible plane to be rendered +class VisiblePlane +{ +public: + VisiblePlane(const VisiblePlaneKey &key) { Clear(key); } + + void Clear(const VisiblePlaneKey &key) + { + Key = key; + Left = viewwidth; + Right = 0; + for (int i = 0; i < MAXWIDTH; i++) + { + Top[i] = UnsetValue; + Bottom[i] = 0; + } + } + + VisiblePlaneKey Key; + + enum { UnsetValue = 0x7fff }; + short Left; + short Right; + short Top[MAXWIDTH]; + short Bottom[MAXWIDTH]; + std::unique_ptr Next; +}; + +class RenderVisiblePlane +{ +public: + RenderVisiblePlane(VisiblePlane *plane, FTexture *tex); + void Step(); + + double viewx; + double viewy; + double planeheight; + double basexfrac; + double baseyfrac; + double xstepscale; + double ystepscale; +}; + +// Tracks plane locations and renders them +class RenderPlanes +{ +public: + void Clear(); + void MarkCeilingPlane(const VisiblePlaneKey &key, const RenderClipBuffer &clip, const WallCoords &wallCoords); + void MarkFloorPlane(const VisiblePlaneKey &key, const RenderClipBuffer &clip, const WallCoords &wallCoords); + void Render(); + +private: + void RenderPlane(VisiblePlane *plane); + void RenderSpan(int y, int x1, int x2, const VisiblePlaneKey &key, FTexture *texture, const RenderVisiblePlane &renderInfo); + + VisiblePlane *GetPlaneWithUnsetRange(const VisiblePlaneKey &key, int x0, int x1); + VisiblePlane *GetPlane(const VisiblePlaneKey &key); + std::unique_ptr AllocPlane(const VisiblePlaneKey &key); + static uint32_t Hash(const VisiblePlaneKey &key) { return ((unsigned)((key.Picnum.GetIndex()) * 3 + (key.LightLevel) + (FLOAT2FIXED(key.Plane.fD())) * 7) & (NumBuckets - 1)); } + + enum { NumBuckets = 128 /* must be a power of 2 */ }; + std::unique_ptr PlaneBuckets[NumBuckets]; + std::vector> FreePlanes; +}; + +// Renders a wall texture +class RenderWall +{ +public: + void Render(const RenderClipBuffer &clip); + void RenderMasked(short x1, short x2, const short *clipTop, const short *clipBottom); + + WallCoords Coords; + const seg_t *Line; + side_t::ETexpart Texpart; + double TopZ; + double BottomZ; + double UnpeggedCeil; + FSWColormap *Colormap; + bool Masked; + +private: + FTexture *GetTexture(); + int GetShade(); + float GetLight(short x); +}; + +// Sprite thing to be rendered +class VisibleSprite +{ +public: + VisibleSprite(AActor *actor, const DVector3 &eyePos); + void Render(RenderClipBuffer *clip); + +private: + AActor *Actor; + DVector3 EyePos; + + FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); + visstyle_t GetSpriteVisStyle(AActor *thing, double z); + + friend class RenderBsp; // For sorting +}; + +// DScreen accelerated sprite to be rendered +class ScreenSprite +{ +public: + void Render(); + + FTexture *Pic = nullptr; + double X1 = 0.0; + double Y1 = 0.0; + double Width = 0.0; + double Height = 0.0; + FRemapTable *Translation = nullptr; + bool Flip = false; + visstyle_t visstyle; + uint32_t FillColor = 0; + FDynamicColormap *Colormap = nullptr; +}; + +// Renders a BSP tree in a scene +class RenderBsp +{ +public: + void Render(); + void RenderScreenSprites(); + +private: + void RenderNode(void *node); + void RenderSubsector(subsector_t *sub); + void AddLine(seg_t *line, sector_t *frontsector); + + void AddSprite(AActor *thing); + void AddWallSprite(AActor *thing); + bool IsThingCulled(AActor *thing); + void RenderMaskedObjects(); + + void RenderPlayerSprites(); + void RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac); + + int PointOnSide(const DVector2 &pos, const node_t *node); + + // Checks BSP node/subtree bounding box. + // Returns true if some part of the bbox might be visible. + bool CheckBBox(float *bspcoord); + + ViewPosTransform Transform; + RenderClipBuffer Clip; + RenderPlanes Planes; + std::vector VisibleSprites; + std::vector VisibleMaskedWalls; + std::vector ScreenSprites; + + const int BaseXCenter = 160; + const int BaseYCenter = 100; +}; From e9b1da57a397aa7c53e7c545a59d5f785fa8c227 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sun, 25 Sep 2016 17:43:52 +0300 Subject: [PATCH 121/912] Fixed compilation on macOS --- src/posix/cocoa/i_video.mm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index ba3a3e27e8..7dc7a6eed9 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -1101,7 +1101,7 @@ void CocoaFrameBuffer::Flip() #endif // __LITTLE_ENDIAN__ if (IsBgra()) - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, GL_BGRA_EXT, GL_UNSIGNED_BYTE, m_pixelBuffer); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, GL_BGRA, GL_UNSIGNED_BYTE, m_pixelBuffer); else glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, format, GL_UNSIGNED_BYTE, m_pixelBuffer); @@ -1127,7 +1127,7 @@ void CocoaFrameBuffer::Flip() SDLGLFB::SDLGLFB(void*, const int width, const int height, int, int, const bool fullscreen) -: DFrameBuffer(width, height) +: DFrameBuffer(width, height, false) , m_lock(-1) , m_isUpdatePending(false) { From 0bc54cbda4461d430905008450868d3e19b43595 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Sun, 25 Sep 2016 17:40:37 -0400 Subject: [PATCH 122/912] Fixed SDL backend. This project is now Linux capable! --- src/posix/sdl/sdlglvideo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/posix/sdl/sdlglvideo.cpp b/src/posix/sdl/sdlglvideo.cpp index e581cfde99..86371bd491 100644 --- a/src/posix/sdl/sdlglvideo.cpp +++ b/src/posix/sdl/sdlglvideo.cpp @@ -237,7 +237,7 @@ DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool bgra, b } ++retry; - fb = static_cast(CreateFrameBuffer (width, height, fullscreen, NULL)); + fb = static_cast(CreateFrameBuffer (width, height, false, fullscreen, NULL)); } // fb->SetFlash (flashColor, flashAmount); From 3dd8b593b6a9e35fefd4ce76490f963cccc70fb4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 26 Sep 2016 09:00:19 +0200 Subject: [PATCH 123/912] Use LLVM to JIT the code for one of the drawer functions --- src/CMakeLists.txt | 86 ++ .../fixedfunction/fixedfunction.cpp | 1046 +++++++++++++++++ src/r_compiler/fixedfunction/fixedfunction.h | 130 ++ src/r_compiler/llvm_include.h | 46 + src/r_compiler/ssa/ssa_barycentric_weight.h | 97 ++ src/r_compiler/ssa/ssa_bool.cpp | 91 ++ src/r_compiler/ssa/ssa_bool.h | 37 + src/r_compiler/ssa/ssa_float.cpp | 152 +++ src/r_compiler/ssa/ssa_float.h | 42 + src/r_compiler/ssa/ssa_float_ptr.cpp | 65 + src/r_compiler/ssa/ssa_float_ptr.h | 27 + src/r_compiler/ssa/ssa_for_block.cpp | 25 + src/r_compiler/ssa/ssa_for_block.h | 18 + src/r_compiler/ssa/ssa_function.cpp | 55 + src/r_compiler/ssa/ssa_function.h | 30 + src/r_compiler/ssa/ssa_if_block.cpp | 30 + src/r_compiler/ssa/ssa_if_block.h | 46 + src/r_compiler/ssa/ssa_int.cpp | 117 ++ src/r_compiler/ssa/ssa_int.h | 41 + src/r_compiler/ssa/ssa_int_ptr.cpp | 58 + src/r_compiler/ssa/ssa_int_ptr.h | 27 + src/r_compiler/ssa/ssa_phi.h | 33 + src/r_compiler/ssa/ssa_pixelformat4f.h | 28 + src/r_compiler/ssa/ssa_pixelformat4ub.h | 28 + .../ssa/ssa_pixelformat4ub_argb_rev.h | 35 + src/r_compiler/ssa/ssa_pixelformat4ub_rev.h | 28 + src/r_compiler/ssa/ssa_pixels.h | 39 + src/r_compiler/ssa/ssa_pixeltype.h | 498 ++++++++ src/r_compiler/ssa/ssa_scope.cpp | 65 + src/r_compiler/ssa/ssa_scope.h | 41 + src/r_compiler/ssa/ssa_stack.h | 25 + src/r_compiler/ssa/ssa_struct_type.cpp | 18 + src/r_compiler/ssa/ssa_struct_type.h | 17 + src/r_compiler/ssa/ssa_ubyte.cpp | 95 ++ src/r_compiler/ssa/ssa_ubyte.h | 35 + src/r_compiler/ssa/ssa_ubyte_ptr.cpp | 106 ++ src/r_compiler/ssa/ssa_ubyte_ptr.h | 32 + src/r_compiler/ssa/ssa_value.cpp | 56 + src/r_compiler/ssa/ssa_value.h | 53 + src/r_compiler/ssa/ssa_vec16ub.cpp | 155 +++ src/r_compiler/ssa/ssa_vec16ub.h | 42 + src/r_compiler/ssa/ssa_vec4f.cpp | 244 ++++ src/r_compiler/ssa/ssa_vec4f.h | 57 + src/r_compiler/ssa/ssa_vec4f_ptr.cpp | 50 + src/r_compiler/ssa/ssa_vec4f_ptr.h | 24 + src/r_compiler/ssa/ssa_vec4i.cpp | 213 ++++ src/r_compiler/ssa/ssa_vec4i.h | 56 + src/r_compiler/ssa/ssa_vec4i_ptr.cpp | 50 + src/r_compiler/ssa/ssa_vec4i_ptr.h | 24 + src/r_compiler/ssa/ssa_vec8s.cpp | 178 +++ src/r_compiler/ssa/ssa_vec8s.h | 48 + src/r_draw_rgba.cpp | 66 ++ 52 files changed, 4705 insertions(+) create mode 100644 src/r_compiler/fixedfunction/fixedfunction.cpp create mode 100644 src/r_compiler/fixedfunction/fixedfunction.h create mode 100644 src/r_compiler/llvm_include.h create mode 100644 src/r_compiler/ssa/ssa_barycentric_weight.h create mode 100644 src/r_compiler/ssa/ssa_bool.cpp create mode 100644 src/r_compiler/ssa/ssa_bool.h create mode 100644 src/r_compiler/ssa/ssa_float.cpp create mode 100644 src/r_compiler/ssa/ssa_float.h create mode 100644 src/r_compiler/ssa/ssa_float_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_float_ptr.h create mode 100644 src/r_compiler/ssa/ssa_for_block.cpp create mode 100644 src/r_compiler/ssa/ssa_for_block.h create mode 100644 src/r_compiler/ssa/ssa_function.cpp create mode 100644 src/r_compiler/ssa/ssa_function.h create mode 100644 src/r_compiler/ssa/ssa_if_block.cpp create mode 100644 src/r_compiler/ssa/ssa_if_block.h create mode 100644 src/r_compiler/ssa/ssa_int.cpp create mode 100644 src/r_compiler/ssa/ssa_int.h create mode 100644 src/r_compiler/ssa/ssa_int_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_int_ptr.h create mode 100644 src/r_compiler/ssa/ssa_phi.h create mode 100644 src/r_compiler/ssa/ssa_pixelformat4f.h create mode 100644 src/r_compiler/ssa/ssa_pixelformat4ub.h create mode 100644 src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h create mode 100644 src/r_compiler/ssa/ssa_pixelformat4ub_rev.h create mode 100644 src/r_compiler/ssa/ssa_pixels.h create mode 100644 src/r_compiler/ssa/ssa_pixeltype.h create mode 100644 src/r_compiler/ssa/ssa_scope.cpp create mode 100644 src/r_compiler/ssa/ssa_scope.h create mode 100644 src/r_compiler/ssa/ssa_stack.h create mode 100644 src/r_compiler/ssa/ssa_struct_type.cpp create mode 100644 src/r_compiler/ssa/ssa_struct_type.h create mode 100644 src/r_compiler/ssa/ssa_ubyte.cpp create mode 100644 src/r_compiler/ssa/ssa_ubyte.h create mode 100644 src/r_compiler/ssa/ssa_ubyte_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_ubyte_ptr.h create mode 100644 src/r_compiler/ssa/ssa_value.cpp create mode 100644 src/r_compiler/ssa/ssa_value.h create mode 100644 src/r_compiler/ssa/ssa_vec16ub.cpp create mode 100644 src/r_compiler/ssa/ssa_vec16ub.h create mode 100644 src/r_compiler/ssa/ssa_vec4f.cpp create mode 100644 src/r_compiler/ssa/ssa_vec4f.h create mode 100644 src/r_compiler/ssa/ssa_vec4f_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_vec4f_ptr.h create mode 100644 src/r_compiler/ssa/ssa_vec4i.cpp create mode 100644 src/r_compiler/ssa/ssa_vec4i.h create mode 100644 src/r_compiler/ssa/ssa_vec4i_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_vec4i_ptr.h create mode 100644 src/r_compiler/ssa/ssa_vec8s.cpp create mode 100644 src/r_compiler/ssa/ssa_vec8s.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2d71170eef..4f9599b35b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -104,6 +104,15 @@ if( WIN32 ) endif() add_definitions( -D_WIN32 ) + + set( FMOD_SEARCH_PATHS + "C:/Program Files/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" + "C:/Program Files (x86)/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" + # This next one is for Randy. + "E:/Software/Dev/FMOD/${WIN_TYPE}/api" + ) + set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc ) + set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib ) set( FMOD_SEARCH_PATHS "C:/Program Files/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" @@ -255,6 +264,57 @@ if( NOT NO_OPENAL ) endif() endif() +# C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm +find_package(LLVM REQUIRED CONFIG) +message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") +message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") +llvm_map_components_to_libnames(llvm_libs + analysis + asmparser + asmprinter + bitreader + bitwriter + codegen + core + executionengine + globalisel + instcombine + ipo + irreader + linker + lto + mc + mcdisassembler + mcjit + mcparser + mirparser + object + objectyaml + orcjit + passes + scalaropts + selectiondag + support + symbolize + tablegen + target + transformutils + vectorize + x86asmparser + x86asmprinter + x86codegen + x86desc + x86info + x86utils + aarch64asmparser + aarch64asmprinter + aarch64codegen + aarch64desc + aarch64info + aarch64utils) +include_directories(${LLVM_INCLUDE_DIRS}) +set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) + if( NOT NO_FMOD ) # Search for FMOD include files if( NOT WIN32 ) @@ -843,6 +903,9 @@ file( GLOB HEADER_FILES posix/*.h posix/cocoa/*.h posix/sdl/*.h + r_compiler/*.h + r_compiler/ssa/*.h + r_compiler/fixedfunction/*.h r_data/*.h resourcefiles/*.h sfmt/*.h @@ -1372,6 +1435,26 @@ set (PCH_SOURCES fragglescript/t_spec.cpp fragglescript/t_variable.cpp fragglescript/t_cmd.cpp + r_compiler/ssa/ssa_bool.cpp + r_compiler/ssa/ssa_float.cpp + r_compiler/ssa/ssa_float_ptr.cpp + r_compiler/ssa/ssa_for_block.cpp + r_compiler/ssa/ssa_function.cpp + r_compiler/ssa/ssa_if_block.cpp + r_compiler/ssa/ssa_int.cpp + r_compiler/ssa/ssa_int_ptr.cpp + r_compiler/ssa/ssa_scope.cpp + r_compiler/ssa/ssa_struct_type.cpp + r_compiler/ssa/ssa_ubyte.cpp + r_compiler/ssa/ssa_ubyte_ptr.cpp + r_compiler/ssa/ssa_value.cpp + r_compiler/ssa/ssa_vec4f.cpp + r_compiler/ssa/ssa_vec4f_ptr.cpp + r_compiler/ssa/ssa_vec4i.cpp + r_compiler/ssa/ssa_vec4i_ptr.cpp + r_compiler/ssa/ssa_vec8s.cpp + r_compiler/ssa/ssa_vec16ub.cpp + r_compiler/fixedfunction/fixedfunction.cpp r_data/sprites.cpp r_data/voxels.cpp r_data/renderstyle.cpp @@ -1587,6 +1670,9 @@ source_group("Render Data\\Resource Headers" REGULAR_EXPRESSION "^${CMAKE_CURREN source_group("Render Data\\Resource Sources" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_data/.+\\.cpp$") source_group("Render Data\\Textures" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/textures/.+") source_group("Render Interface" FILES r_defs.h r_renderer.h r_sky.cpp r_sky.h r_state.h r_utility.cpp r_utility.h) +source_group("Render Compiler" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/.+") +source_group("Render Compiler\\SSA" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/ssa/.+") +source_group("Render Compiler\\Fixed Function" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/fixedfunction/.+") source_group("Resource Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/resourcefiles/.+") source_group("POSIX Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/posix/.+") source_group("Cocoa Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/posix/cocoa/.+") diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp new file mode 100644 index 0000000000..347ba6de35 --- /dev/null +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -0,0 +1,1046 @@ + +#include "i_system.h" +#include "r_compiler/fixedfunction/fixedfunction.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" +#include "r_compiler/ssa/ssa_barycentric_weight.h" + +RenderProgram::RenderProgram() +{ + llvm::install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { + I_FatalError(reason.c_str()); + }); + + //llvm::llvm_start_multithreaded(); + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + llvm::InitializeNativeTargetAsmParser(); + + mContext = std::make_unique(); + + auto moduleOwner = std::make_unique("render", context()); + mModule = moduleOwner.get(); + + std::string errorstring; + llvm::EngineBuilder engineBuilder(std::move(moduleOwner)); + engineBuilder.setErrorStr(&errorstring); + engineBuilder.setOptLevel(llvm::CodeGenOpt::Aggressive); + engineBuilder.setRelocationModel(llvm::Reloc::Static); + engineBuilder.setEngineKind(llvm::EngineKind::JIT); + mEngine.reset(engineBuilder.create()); + if (!mEngine) + I_FatalError(errorstring.c_str()); +} + +RenderProgram::~RenderProgram() +{ + mEngine.reset(); + mContext.reset(); + //llvm::llvm_stop_multithreaded(); +} + +void *RenderProgram::PointerToFunction(const char *name) +{ + llvm::Function *function = mModule->getFunction(name); + if (!function) + return nullptr; + return mEngine->getPointerToFunction(function); +} + +FixedFunction::FixedFunction() +{ + CodegenDrawSpan(); + mProgram.engine()->finalizeObject(); + + DrawSpan = mProgram.GetProcAddress("DrawSpan"); +} + +void FixedFunction::CodegenDrawSpan() +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function("DrawSpan"); + function.add_parameter(SSAInt::llvm_type()); + function.add_parameter(SSAUBytePtr::llvm_type()); + function.create_public(); + + SSAInt count = function.parameter(0); + SSAUBytePtr data = function.parameter(1); + SSAStack stack_index; + + stack_index.store(0); + SSAForBlock loop; + { + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + //SSAVec4i color(255, 255, 0, 255); + //data[index * 4].store_vec4ub(color); + data[index * 4].store(0); + data[index * 4 + 1].store(128); + data[index * 4 + 2].store(255); + data[index * 4 + 3].store(255); + stack_index.store(index + 1); + } + loop.end_block(); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); +} + +#if 0 + +GlslFixedFunction::GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen) +: program(program), vertex_codegen(vertex_codegen), fragment_codegen(fragment_codegen) +{ +} + +llvm::Type *GlslFixedFunction::get_sampler_struct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt32Ty(context)); // width + elements.push_back(llvm::Type::getInt32Ty(context)); // height + elements.push_back(llvm::Type::getInt8PtrTy(context)); // data + return llvm::StructType::get(context, elements, false); +} + +void GlslFixedFunction::codegen() +{ + codegen_render_scanline(5); + codegen_calc_window_positions(); + codegen_calc_polygon_face_direction(); + codegen_calc_polygon_y_range(); + codegen_update_polygon_edge(); + codegen_draw_triangles(5, 5); + codegen_texture(); + codegen_normalize(); + codegen_reflect(); + codegen_max(); + codegen_pow(); + codegen_dot(); + codegen_mix(); +} + +void GlslFixedFunction::codegen_texture() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_texture"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(get_sampler_struct(program.context())); + function.add_parameter(SSAVec4f::llvm_type()); + function.create_private(); + + SSAValue sampler_ptr = function.parameter(1); + SSAVec4f pos = function.parameter(2); + + SSAInt width = sampler_ptr[0][0].load(); + SSAInt height = sampler_ptr[0][1].load(); + SSAUBytePtr data = sampler_ptr[0][2].load(); + + SSAPixels4ub_argb_rev pixels(width, height, data); + //builder.CreateRet(pixels.linear_clamp4f(pos).v); + builder.CreateRet(pixels.linear_clamp4f(pos[0], pos[1]).v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_normalize() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_normalize"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.create_private(); + + SSAVec4f vec = function.parameter(1); + + // To do: this can probably be done a lot faster with _mm_rsqrt_ss + SSAVec4f vec2 = vec * vec; + SSAVec4f length3(SSAFloat::sqrt(vec2[0] + vec2[1] + vec2[2])); + SSAVec4f normalized = vec / length3; + builder.CreateRet(normalized.v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_reflect() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_reflect"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.create_private(); + + SSAVec4f i = function.parameter(1); + SSAVec4f n = function.parameter(2); + + SSAVec4f c = i * n; + SSAFloat dot3 = c[0] + c[1] + c[2]; + SSAVec4f result = i - (2.0f * dot3) * n; + builder.CreateRet(result.v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_max() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_max"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.create_private(); + + SSAFloat a = function.parameter(1); + SSAFloat b = function.parameter(2); + + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(a >= b); + phi.add_incoming(a); + branch.else_block(); + phi.add_incoming(b); + branch.end_block(); + SSAFloat c = phi.create(); + + builder.CreateRet(c.v); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_pow() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_pow"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.create_private(); + + SSAFloat a = function.parameter(1); + SSAFloat b = function.parameter(2); + builder.CreateRet(a.v); + //builder.CreateRet(SSAFloat::pow(a, b).v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_dot() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_dot"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.create_private(); + + SSAVec4f a = function.parameter(1); + SSAVec4f b = function.parameter(2); + + SSAVec4f c = a * b; + SSAFloat dot3 = c[0] + c[1] + c[2]; + builder.CreateRet(dot3.v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_mix() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_mix"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.create_private(); + + SSAVec4f v1 = function.parameter(1); + SSAVec4f v2 = function.parameter(2); + SSAFloat t = function.parameter(3); + + SSAVec4f b = t; + SSAVec4f a = 1.0f - b; + SSAVec4f mix = v1 * a + v2 * b; + builder.CreateRet(mix.v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_draw_triangles(int num_vertex_in, int num_vertex_out) +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("draw_triangles"); + function.add_parameter(SSAInt::llvm_type()); // input_width + function.add_parameter(SSAInt::llvm_type()); // input_height + function.add_parameter(SSAUBytePtr::llvm_type()); // input_data + function.add_parameter(SSAInt::llvm_type()); // output_width + function.add_parameter(SSAInt::llvm_type()); // output_height + function.add_parameter(SSAUBytePtr::llvm_type()); // output_data + function.add_parameter(SSAInt::llvm_type()); // viewport_x + function.add_parameter(SSAInt::llvm_type()); // viewport_y + function.add_parameter(SSAInt::llvm_type()); // viewport_width + function.add_parameter(SSAInt::llvm_type()); // viewport_height + function.add_parameter(SSAVec4fPtr::llvm_type()); // uniforms + function.add_parameter(SSAInt::llvm_type()); // first_vertex + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()->getPointerTo()); // vertex attributes + function.add_parameter(SSAInt::llvm_type()); // core + function.add_parameter(SSAInt::llvm_type()); // num_cores + function.create_public(); + + SSAInt input_width = function.parameter(0); + SSAInt input_height = function.parameter(1); + SSAUBytePtr input_data = function.parameter(2); + SSAInt output_width = function.parameter(3); + SSAInt output_height = function.parameter(4); + SSAUBytePtr output_data = function.parameter(5); + SSAInt viewport_x = function.parameter(6); + SSAInt viewport_y = function.parameter(7); + SSAInt viewport_width = function.parameter(8); + SSAInt viewport_height = function.parameter(9); + SSAVec4fPtr uniforms = function.parameter(10); + SSAInt first_vertex = function.parameter(11); + SSAInt num_vertices = function.parameter(12); + SSAValue vertex_in_ptr = function.parameter(13); + SSAInt core = function.parameter(14); + SSAInt num_cores = function.parameter(15); + + SSAStack stack_vertex_index; + SSAValue vertex_globals_ptr = SSAValue::from_llvm(SSAScope::alloca(vertex_codegen.get_global_struct_type())); + std::vector vertex_outs; + for (int i = 0; i < num_vertex_out; i++) + vertex_outs.push_back(SSAVec4fPtr::from_llvm(SSAScope::builder().CreateAlloca(SSAVec4f::llvm_type(), SSAInt(3).v))); + + int num_uniforms = 1; + { + llvm::Type *type = llvm::ArrayType::get(llvm::VectorType::get(llvm::Type::getFloatTy(program.context()), 4), 4); + llvm::Value *matrix = llvm::UndefValue::get(type); + for (int col = 0; col < 4; col++) + { + SSAVec4f column = uniforms[col].load_unaligned(); + std::vector indexes; + indexes.push_back(col); + matrix = builder.CreateInsertValue(matrix, column.v, indexes); + } + vertex_globals_ptr[0][0].store(matrix); + } + + stack_vertex_index.store(0); + SSAForBlock loop; + SSAInt vertex_index = stack_vertex_index.load(); + loop.loop_block(vertex_index + 2 < num_vertices); + for (int v = 0; v < 3; v++) + { + for (int i = 0; i < num_vertex_in; i++) + { + SSAValue attribute_ptr = vertex_in_ptr[i].load(); + SSAVec4f vertex_in = SSAVec4f::shuffle(SSAVec4fPtr(attribute_ptr)[first_vertex + vertex_index + v].load_unaligned(), 0, 1, 2, 3); + vertex_globals_ptr[0][num_uniforms + i].store(vertex_in.v); + } + SSAScope::builder().CreateCall(SSAScope::module()->getFunction((vertex_codegen.shader_prefix() + "main").c_str()), vertex_globals_ptr.v); + for (int i = 0; i < num_vertex_out; i++) + { + vertex_outs[i][v].store(vertex_globals_ptr[0][num_uniforms + num_vertex_in + i].load()); + } + } + + render_polygon(input_width, input_height, input_data, output_width, output_height, output_data, viewport_x, viewport_y, viewport_width, viewport_height, 3, vertex_outs, core, num_cores); + + stack_vertex_index.store(vertex_index + 3); + loop.end_block(); + + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_calc_window_positions() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("calc_window_positions"); + function.add_parameter(SSAInt::llvm_type()); // viewport_x + function.add_parameter(SSAInt::llvm_type()); // viewport_y + function.add_parameter(SSAInt::llvm_type()); // viewport_width + function.add_parameter(SSAInt::llvm_type()); // viewport_height + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()); // gl_Position + function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos + function.create_private(); + SSAInt viewport_x = function.parameter(0); + SSAInt viewport_y = function.parameter(1); + SSAInt viewport_width = function.parameter(2); + SSAInt viewport_height = function.parameter(3); + SSAInt num_vertices = function.parameter(4); + SSAVec4fPtr clip_positions = function.parameter(5); + SSAVec4fPtr window_positions = function.parameter(6); + + SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); + SSAStack stack_transform_index; + stack_transform_index.store(0); + SSAForBlock loop_transform; + SSAInt transform_index = stack_transform_index.load(); + loop_transform.loop_block(transform_index < num_vertices); + { + SSAVec4f clip_pos = clip_positions[transform_index].load(); + SSAVec4f window_pos = viewport.clip_to_window(clip_pos); + window_positions[transform_index].store(window_pos); + + stack_transform_index.store(transform_index + 1); + } + loop_transform.end_block(); + + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_calc_polygon_face_direction() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("calc_polygon_face_direction"); + function.set_return_type(SSABool::llvm_type()); + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos + function.create_private(); + SSAInt num_vertices = function.parameter(0); + SSAVec4fPtr window_positions = function.parameter(1); + + SSAStack stack_face_direction; + SSAStack stack_face_vertex_index; + stack_face_direction.store(0.0f); + stack_face_vertex_index.store(0); + SSAForBlock loop_face_direction; + SSAInt face_vertex_index = stack_face_vertex_index.load(); + loop_face_direction.loop_block(face_vertex_index < num_vertices); + { + SSAVec4f v0 = window_positions[face_vertex_index].load(); + SSAVec4f v1 = window_positions[(face_vertex_index + 1) % num_vertices].load(); + stack_face_direction.store(stack_face_direction.load() + v0[0] * v1[1] - v1[0] * v0[1]); + stack_face_vertex_index.store(face_vertex_index + 1); + } + loop_face_direction.end_block(); + SSABool front_facing_ccw = (stack_face_direction.load() >= 0.0f); + + builder.CreateRet(front_facing_ccw.v); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_calc_polygon_y_range() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("calc_polygon_y_range"); + function.add_parameter(SSAInt::llvm_type()); // viewport_y + function.add_parameter(SSAInt::llvm_type()); // viewport_height + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos + function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // out_y_start + function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // out_y_end + function.create_private(); + SSAInt viewport_y = function.parameter(0); + SSAInt viewport_height = function.parameter(1); + SSAInt num_vertices = function.parameter(2); + SSAVec4fPtr window_positions = function.parameter(3); + SSAValue out_y_start = function.parameter(4); + SSAValue out_y_end = function.parameter(5); + + SSAStack y_start; + SSAStack y_end; + y_start.store(0x7fffffff); + y_end.store(0); + + SSAStack stack_minmax_index; + stack_minmax_index.store(0); + SSAForBlock loop_minmax; + SSAInt minmax_index = stack_minmax_index.load(); + loop_minmax.loop_block(minmax_index < num_vertices); + { + SSAInt y = SSAInt(window_positions[minmax_index].load()[1] + 0.5f); + y_start.store(ssa_min(y_start.load(), y)); + y_end.store(ssa_max(y_end.load(), y)); + stack_minmax_index.store(minmax_index + 1); + } + loop_minmax.end_block(); + + y_start.store(ssa_max(y_start.load(), viewport_y)); + y_end.store(ssa_min(y_end.load(), viewport_y + viewport_height)); + + out_y_start.store(y_start.load().v); + out_y_end.store(y_end.load().v); + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_update_polygon_edge() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("update_polygon_edge"); + function.add_parameter(SSAFloat::llvm_type()); // y_position + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos + function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // inout left_index + function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // inout right_index + function.create_private(); + SSAFloat float_y = function.parameter(0); + SSAInt num_vertices = function.parameter(1); + SSAVec4fPtr window_positions = function.parameter(2); + SSAValue ptr_left_index = function.parameter(3); + SSAValue ptr_right_index = function.parameter(4); + + SSAStack max_iterate; + max_iterate.store(num_vertices); + SSAForBlock loop_left; + SSAInt left_index = ptr_left_index.load(); + SSAInt right_index = ptr_right_index.load(); + SSAInt next_left_index = (left_index + 1) % num_vertices; + SSAFloat left_y0 = window_positions[left_index].load()[1]; + SSAFloat left_y1 = window_positions[next_left_index].load()[1]; + SSABool in_range = (left_y0 >= float_y && left_y1 < float_y) || (left_y1 >= float_y && left_y0 < float_y); + loop_left.loop_block((left_index == right_index || !in_range) && max_iterate.load() > 0); + ptr_left_index.store(next_left_index.v); + max_iterate.store(max_iterate.load() - 1); + loop_left.end_block(); + + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::render_polygon( + SSAInt input_width, + SSAInt input_height, + SSAUBytePtr input_data, + SSAInt output_width, + SSAInt output_height, + SSAUBytePtr output_data, + SSAInt viewport_x, + SSAInt viewport_y, + SSAInt viewport_width, + SSAInt viewport_height, + SSAInt num_vertices, + std::vector fragment_ins, + SSAInt core, + SSAInt num_cores) +{ + SSAVec4fPtr window_positions = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), num_vertices)); + SSAVec4fPtr left_line_varyings = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), fragment_ins.size())); + SSAVec4fPtr right_line_varyings = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), fragment_ins.size())); + + /////////////////////////////////// + + llvm::Value *calc_window_positions_args[] = { viewport_x.v, viewport_y.v, viewport_width.v, viewport_height.v, num_vertices.v, fragment_ins[0].v, window_positions.v }; + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_window_positions"), calc_window_positions_args); + + llvm::Value *calc_polygon_face_direction_args[] = { num_vertices.v, window_positions.v }; + SSABool front_facing_ccw = SSABool::from_llvm(SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_polygon_face_direction"), calc_polygon_face_direction_args)); + + SSAIfBlock cull_if; + cull_if.if_block(front_facing_ccw); + { + SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); + + SSAStack y_start; + SSAStack y_end; + + llvm::Value *calc_polygon_y_range_args[] = { viewport_y.v, viewport_height.v, num_vertices.v, window_positions.v, y_start.v, y_end.v }; + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_polygon_y_range"), calc_polygon_y_range_args); + + y_start.store((y_start.load() + num_cores - core - 1) / num_cores * num_cores + core); // find_first_line_for_core + + SSAStack stack_left_index; + SSAStack stack_right_index; + SSAStack stack_int_y; + stack_left_index.store(0); + stack_right_index.store(1); + stack_int_y.store(y_start.load()); + SSAForBlock scanlines_loop; + scanlines_loop.loop_block(stack_int_y.load() < y_end.load()); + { + SSAInt int_y = stack_int_y.load(); + SSAFloat float_y = SSAFloat(int_y) + 0.5f; + + llvm::Value *update_polygon_edge_args0[] = { float_y.v, num_vertices.v, window_positions.v, stack_left_index.v, stack_right_index.v }; + llvm::Value *update_polygon_edge_args1[] = { float_y.v, num_vertices.v, window_positions.v, stack_right_index.v, stack_left_index.v }; + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("update_polygon_edge"), update_polygon_edge_args0); + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("update_polygon_edge"), update_polygon_edge_args1); + + SSAInt left_index = stack_left_index.load(); + SSAInt right_index = stack_right_index.load(); + SSAInt next_left_index = (left_index + 1) % num_vertices; + SSAInt next_right_index = (right_index + 1) % num_vertices; + + SSABarycentricWeight left_weight(viewport, fragment_ins[0][left_index].load(), fragment_ins[0][next_left_index].load()); + SSABarycentricWeight right_weight(viewport, fragment_ins[0][right_index].load(), fragment_ins[0][next_right_index].load()); + + SSAFloat a = left_weight.from_window_y(int_y); + SSAFloat b = right_weight.from_window_y(int_y); + + SSAVec4f left_clip_pos = left_weight.v1 * a + left_weight.v2 * (1.0f - a); + SSAVec4f right_clip_pos = right_weight.v1 * b + right_weight.v2 * (1.0f - b); + + for (size_t i = 0; i + 1 < fragment_ins.size(); i++) + { + left_line_varyings[i].store(fragment_ins[i + 1][left_index].load() * a + fragment_ins[i + 1][next_left_index].load() * (1.0f - a)); + right_line_varyings[i].store(fragment_ins[i + 1][right_index].load() * b + fragment_ins[i + 1][next_right_index].load() * (1.0f - b)); + } + + llvm::Value *render_scanline_args[] = { output_width.v, output_height.v, output_data.v, viewport_x.v, viewport_y.v, viewport_width.v, viewport_height.v, int_y.v, left_clip_pos.v, right_clip_pos.v, left_line_varyings.v, right_line_varyings.v, input_width.v, input_height.v, input_data.v }; + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("render_scanline"), render_scanline_args); + + stack_int_y.store(stack_int_y.load() + num_cores); + } + scanlines_loop.end_block(); + } + cull_if.end_block(); +} + +void GlslFixedFunction::codegen_render_scanline(int num_varyings) +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("render_scanline"); + function.add_parameter(SSAInt::llvm_type()); // output_width + function.add_parameter(SSAInt::llvm_type()); // output_height + function.add_parameter(SSAUBytePtr::llvm_type()); // output_data + function.add_parameter(SSAInt::llvm_type()); // viewport_x + function.add_parameter(SSAInt::llvm_type()); // viewport_y + function.add_parameter(SSAInt::llvm_type()); // viewport_width + function.add_parameter(SSAInt::llvm_type()); // viewport_height + function.add_parameter(SSAInt::llvm_type()); // y + function.add_parameter(SSAVec4f::llvm_type()); // left_clip_pos + function.add_parameter(SSAVec4f::llvm_type()); // right_clip_pos + function.add_parameter(SSAVec4fPtr::llvm_type()); // left_line_varyings + function.add_parameter(SSAVec4fPtr::llvm_type()); // right_line_varyings + function.add_parameter(SSAInt::llvm_type()); // input_width + function.add_parameter(SSAInt::llvm_type()); // input_height + function.add_parameter(SSAUBytePtr::llvm_type()); // input_data + function.create_private(); + SSAInt output_width = function.parameter(0); + SSAInt output_height = function.parameter(1); + SSAUBytePtr output_data = function.parameter(2); + SSAInt viewport_x = function.parameter(3); + SSAInt viewport_y = function.parameter(4); + SSAInt viewport_width = function.parameter(5); + SSAInt viewport_height = function.parameter(6); + SSAInt y = function.parameter(7); + SSAVec4f left_clip_pos = function.parameter(8); + SSAVec4f right_clip_pos = function.parameter(9); + SSAVec4fPtr left_line_varyings = function.parameter(10); + SSAVec4fPtr right_line_varyings = function.parameter(11); + SSAInt input_width = function.parameter(12); + SSAInt input_height = function.parameter(13); + SSAUBytePtr input_data = function.parameter(14); + + SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); + + SSAScopeHint hint; + + SSAStack stack_x; + SSAStack stack_xnormalized; + + //////////////////////////////// + // Prepare to render scanline: + + hint.set("prepare"); + OuterData outer_data; + + SSAVec4f left_window_pos = viewport.clip_to_window(left_clip_pos); + SSAVec4f right_window_pos = viewport.clip_to_window(right_clip_pos); + + SSAFloat x0 = left_window_pos[0]; + SSAFloat x1 = right_window_pos[0]; + SSAInt start(ssa_min(x0, x1)); + SSAInt end(ssa_max(x1, x0) + 0.5f); + + start = ssa_max(start, viewport.x); + end = ssa_min(end, viewport.right); + + SSABarycentricWeight weight_scanline(viewport, left_clip_pos, right_clip_pos); + + outer_data.start = start; + outer_data.end = end; + outer_data.input_width = input_width; + outer_data.input_height = input_height; + outer_data.output_width = output_width; + outer_data.output_height = output_height; + outer_data.input_pixels = input_data; + outer_data.output_pixels_line = output_data[output_width * y * 4]; + + outer_data.viewport_x = SSAFloat(viewport.x); + outer_data.viewport_rcp_half_width = viewport.rcp_half_width; + outer_data.dx = weight_scanline.v2[0] - weight_scanline.v1[0]; + outer_data.dw = weight_scanline.v2[3] - weight_scanline.v1[3]; + outer_data.v1w = weight_scanline.v1[3]; + outer_data.v1x = weight_scanline.v1[0]; + outer_data.sse_left_varying_in = left_line_varyings; + outer_data.sse_right_varying_in = right_line_varyings; + outer_data.num_varyings = num_varyings; + + outer_data.sampler = SSAScope::alloca(get_sampler_struct(SSAScope::context())); + std::vector index_list; + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + llvm::Value *sampler_width_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); + index_list[1] = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)1)); + llvm::Value *sampler_height_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); + index_list[1] = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)2)); + llvm::Value *sampler_data_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); + SSAScope::builder().CreateStore(outer_data.input_width.v, sampler_width_ptr, false); + SSAScope::builder().CreateStore(outer_data.input_height.v, sampler_height_ptr, false); + SSAScope::builder().CreateStore(outer_data.input_pixels.v, sampler_data_ptr, false); + + + SSAVec4i xposinit = SSAVec4i(outer_data.start) + SSAVec4i(0, 1, 2, 3); + stack_x.store(outer_data.start); + stack_xnormalized.store((SSAVec4f(xposinit) + 0.5f - outer_data.viewport_x) * outer_data.viewport_rcp_half_width - 1.0f); + + ///////////////////////////////////////////////////////////////////////// + // First pixels: + + hint.set("firstpixels"); + SSAIfBlock if_block; + if_block.if_block(outer_data.end - outer_data.start > 3); + process_first_pixels(outer_data, stack_x, stack_xnormalized); + if_block.end_block(); + + ///////////////////////////////////////////////////////////////////////// + // Start: for (SSAInt x = start; x < end; x += 4) + + hint.set("loopstart"); + + SSAForBlock for_block; + SSAInt x = stack_x.load(); + for_block.loop_block(x + 3 < outer_data.end); + + ///////////////////////////////////////////////////////////////////////// + // Loop body + { + SSAVec4f xnormalized = stack_xnormalized.load(); + + hint.set("blendload"); + SSAVec4i desti[4]; + SSAVec16ub dest_block = outer_data.output_pixels_line[x << 2].load_vec16ub(); + SSAVec4i::extend(dest_block, desti[0], desti[1], desti[2], desti[3]); + + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + + hint.set("blendstore"); + outer_data.output_pixels_line[x << 2].store_vec16ub(dest_block); + hint.clear(); + + xnormalized = xnormalized + 4.0f * outer_data.viewport_rcp_half_width; + stack_xnormalized.store(xnormalized); + } + ///////////////////////////////////////////////////////////////////////// + // End: for (SSAInt x = start; x < end; x += 4) + + hint.set("loopend"); + x = x + 4; + stack_x.store(x); + for_block.end_block(); + + ///////////////////////////////////////////////////////////////////////// + // Last pixels: + + hint.set("lastpixels"); + process_last_pixels(outer_data, stack_x, stack_xnormalized); + + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::process_first_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized) +{ + SSAInt x = stack_x.load(); + SSAVec4f xnormalized = stack_xnormalized.load(); + SSAInt offset = x << 2; + + // Find how many pixels we have left until we 16 byte align: + llvm::Value *output_line_align = SSAScope::builder().CreatePtrToInt(outer_data.output_pixels_line.v, llvm::Type::getInt32Ty(SSAScope::context())); + output_line_align = SSAScope::builder().CreateAdd(output_line_align, offset.v); + SSAInt left = 4 - (SSAInt::from_llvm(SSAScope::builder().CreateURem(output_line_align, SSAInt(16).v)) >> 2); + + SSAIfBlock if_block0; + if_block0.if_block(left == 3); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + outer_data.output_pixels_line[offset + 4].load_vec4ub(), + outer_data.output_pixels_line[offset + 8].load_vec4ub(), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); + outer_data.output_pixels_line[offset + 8].store_vec4ub(dest[2]); + + stack_x.store(x + 3); + stack_xnormalized.store(xnormalized + 3.0f * outer_data.viewport_rcp_half_width); + } + if_block0.else_block(); + { + SSAIfBlock if_block1; + if_block1.if_block(left == 2); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + outer_data.output_pixels_line[offset + 4].load_vec4ub(), + SSAVec4i(0), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); + + stack_x.store(x + 2); + stack_xnormalized.store(xnormalized + 2.0f * outer_data.viewport_rcp_half_width); + } + if_block1.else_block(); + { + SSAIfBlock if_block2; + if_block2.if_block(left == 1); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + SSAVec4i(0), + SSAVec4i(0), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + + stack_x.store(x + 1); + stack_xnormalized.store(xnormalized + outer_data.viewport_rcp_half_width); + } + if_block2.end_block(); + } + if_block1.end_block(); + } + if_block0.end_block(); +} + +void GlslFixedFunction::process_last_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized) +{ + SSAInt x = stack_x.load(); + SSAVec4f xnormalized = stack_xnormalized.load(); + + SSAInt left = outer_data.end - x; + SSAInt offset = x << 2; + SSAIfBlock if_block0; + SSAIfBlock if_block1; + SSAIfBlock if_block2; + if_block0.if_block(left == 3); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + outer_data.output_pixels_line[offset + 4].load_vec4ub(), + outer_data.output_pixels_line[offset + 8].load_vec4ub(), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); + outer_data.output_pixels_line[offset + 8].store_vec4ub(dest[2]); + } + if_block0.else_block(); + if_block1.if_block(left == 2); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + outer_data.output_pixels_line[offset + 4].load_vec4ub(), + SSAVec4i(0), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); + } + if_block1.else_block(); + if_block2.if_block(left == 1); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + SSAVec4i(0), + SSAVec4i(0), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + } + if_block2.end_block(); + if_block1.end_block(); + if_block0.end_block(); +} + +void GlslFixedFunction::inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *frag_color) +{ + SSAScopeHint hint; + hint.set("varying"); + SSAVec4f a = (xnormalized * data.v1w - data.v1x) * SSAVec4f::rcp(data.dx - xnormalized * data.dw); + SSAVec4f one_minus_a = 1.0f - a; + + llvm::Value *globals_ptr[4]; + for (int i = 0; i < 4; i++) + { + globals_ptr[i] = SSAScope::alloca(fragment_codegen.get_global_struct_type()); + + std::vector index_list; + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + llvm::Value *sampler_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); + SSAScope::builder().CreateStore(data.sampler, sampler_ptr, false); + + for (int j = 0; j < data.num_varyings; j++) + { + SSAVec4f field_value = + data.sse_left_varying_in[j].load() * SSAVec4f::shuffle(one_minus_a, i, i, i, i) + + data.sse_right_varying_in[j].load() * SSAVec4f::shuffle(a, i, i, i, i); + index_list.clear(); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)j+1))); + llvm::Value *field_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); + SSAScope::builder().CreateStore(field_value.v, field_ptr, false); + } + } + + hint.set("fragprogram"); + for (int i = 0; i < 4; i++) + { + SSAScope::builder().CreateCall(SSAScope::module()->getFunction((fragment_codegen.shader_prefix() + "main").c_str()), globals_ptr[i]); + + std::vector index_list; + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)5))); + llvm::Value *field_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); + frag_color[i] = SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(field_ptr, false)); + } +} +/* +void GlslFixedFunction::blend(SSAVec4f frag_color[4], SSAVec16ub &dest) +{ + SSAVec4i desti[4]; + SSAVec4i::extend(dest, desti[0], desti[1], desti[2], desti[3]); + + // Pre-mulitiplied alpha blend: + for (int pixel_index = 0; pixel_index < 4; pixel_index++) + { + SSAVec4f src = SSAVec4f::shuffle(frag_color[pixel_index], 2, 1, 0, 3); + desti[pixel_index] = SSAVec4i(src * 255.0f); + SSAVec4f dest = SSAVec4f(desti[pixel_index]) * (1.0f / 255.0f); + SSAVec4f alpha = SSAVec4f::shuffle(dest, 3, 3, 3, 3); + SSAVec4f resultf = src + dest * (1.0f - alpha); + desti[pixel_index] = SSAVec4i(resultf * 255.0f); + } + + dest = SSAVec16ub(SSAVec8s(desti[0], desti[1]), SSAVec8s(desti[2], desti[3])); +} +*/ +void GlslFixedFunction::blend(SSAVec4f frag_color[4], SSAVec16ub &dest) +{ + for (int i = 0; i < 4; i++) + frag_color[i] = SSAVec4f::shuffle(frag_color[i], 2, 1, 0, 3); + + // Pre-mulitiplied alpha blend: + SSAVec8s dest0 = SSAVec8s::extendlo(dest); + SSAVec8s dest1 = SSAVec8s::extendhi(dest); + + SSAVec8s src0(SSAVec4i(frag_color[0] * 255.0f), SSAVec4i(frag_color[1] * 255.0f)); + SSAVec8s src1(SSAVec4i(frag_color[2] * 255.0f), SSAVec4i(frag_color[3] * 255.0f)); + + // Extract and duplicate alpha components: + SSAVec8s alpha0 = SSAVec8s::shuffle(src0, 3, 3, 3, 3, 7, 7, 7, 7); + SSAVec8s alpha1 = SSAVec8s::shuffle(src1, 3, 3, 3, 3, 7, 7, 7, 7); + + // Convert from 0-255 to 0-256 range: + alpha0 = SSAVec8s::max_sse2(alpha0, 255); + alpha1 = SSAVec8s::max_sse2(alpha1, 255); + alpha0 = alpha0 + (alpha0 >> 7); + alpha1 = alpha1 + (alpha1 >> 7); + + SSAVec8s result0 = src0 + ((dest0 * (256 - alpha0)) >> 8); + SSAVec8s result1 = src1 + ((dest1 * (256 - alpha1)) >> 8); + + dest = SSAVec16ub(result0, result1); +} + +#endif diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h new file mode 100644 index 0000000000..4c81fc1081 --- /dev/null +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -0,0 +1,130 @@ + +#pragma once + +#include "r_compiler/ssa/ssa_vec4f.h" +#include "r_compiler/ssa/ssa_vec4i.h" +#include "r_compiler/ssa/ssa_vec8s.h" +#include "r_compiler/ssa/ssa_vec16ub.h" +#include "r_compiler/ssa/ssa_int.h" +#include "r_compiler/ssa/ssa_ubyte_ptr.h" +#include "r_compiler/ssa/ssa_vec4f_ptr.h" +#include "r_compiler/ssa/ssa_vec4i_ptr.h" +#include "r_compiler/ssa/ssa_pixels.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_barycentric_weight.h" +#include "r_compiler/llvm_include.h" + +class RenderProgram +{ +public: + RenderProgram(); + ~RenderProgram(); + + template + Func *GetProcAddress(const char *name) { return reinterpret_cast(PointerToFunction(name)); } + + llvm::LLVMContext &context() { return *mContext; } + llvm::Module *module() { return mModule; } + llvm::ExecutionEngine *engine() { return mEngine.get(); } + +private: + void *PointerToFunction(const char *name); + + std::unique_ptr mContext; + llvm::Module *mModule; + std::unique_ptr mEngine; +}; + +class FixedFunction +{ +public: + FixedFunction(); + + void(*DrawSpan)(int, uint32_t *) = nullptr; + +private: + void CodegenDrawSpan(); + + RenderProgram mProgram; +}; + +#if 0 + +class GlslProgram; +class GlslCodeGen; + +class GlslFixedFunction +{ +public: + GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen); + void codegen(); + static llvm::Type *get_sampler_struct(llvm::LLVMContext &context); + +private: + void codegen_draw_triangles(int num_vertex_in, int num_vertex_out); + void codegen_calc_window_positions(); + void codegen_calc_polygon_face_direction(); + void codegen_calc_polygon_y_range(); + void codegen_update_polygon_edge(); + void codegen_texture(); + void codegen_normalize(); + void codegen_reflect(); + void codegen_max(); + void codegen_pow(); + void codegen_dot(); + void codegen_mix(); + + struct OuterData + { + OuterData() : sampler() { } + + SSAInt start; + SSAInt end; + SSAInt input_width; + SSAInt input_height; + SSAInt output_width; + SSAInt output_height; + SSAUBytePtr input_pixels; + SSAUBytePtr output_pixels_line; + + SSAVec4fPtr sse_left_varying_in; + SSAVec4fPtr sse_right_varying_in; + int num_varyings; + SSAVec4f viewport_x; + SSAVec4f viewport_rcp_half_width; + SSAVec4f dx; + SSAVec4f dw; + SSAVec4f v1w; + SSAVec4f v1x; + + llvm::Value *sampler; + }; + + void render_polygon( + SSAInt input_width, + SSAInt input_height, + SSAUBytePtr input_data, + SSAInt output_width, + SSAInt output_height, + SSAUBytePtr output_data, + SSAInt viewport_x, + SSAInt viewport_y, + SSAInt viewport_width, + SSAInt viewport_height, + SSAInt num_vertices, + std::vector fragment_ins, + SSAInt core, + SSAInt num_cores); + + void codegen_render_scanline(int num_varyings); + void process_first_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized); + void process_last_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized); + void inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *out_frag_colors); + void blend(SSAVec4f frag_colors[4], SSAVec16ub &dest); + + GlslProgram &program; + GlslCodeGen &vertex_codegen; + GlslCodeGen &fragment_codegen; +}; + +#endif diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h new file mode 100644 index 0000000000..1eed549e10 --- /dev/null +++ b/src/r_compiler/llvm_include.h @@ -0,0 +1,46 @@ + +#pragma once + +#if defined(min) +#define llvm_min_bug min +#undef min +#endif +#if defined(max) +#define llvm_max_bug max +#undef max +#endif + +#pragma warning(disable: 4146) // warning C4146: unary minus operator applied to unsigned type, result still unsigned +#pragma warning(disable: 4624) // warning C4624: 'llvm::AugmentedUse' : destructor could not be generated because a base class destructor is inaccessible +#pragma warning(disable: 4355) // warning C4355: 'this' : used in base member initializer list +#pragma warning(disable: 4800) // warning C4800: 'const unsigned int' : forcing value to bool 'true' or 'false' (performance warning) +#pragma warning(disable: 4996) // warning C4996: 'std::_Copy_impl': Function call with parameters that may be unsafe - this call relies on the caller to check that the passed values are correct. To disable this warning, use -D_Sclan::SECURE_NO_WARNINGS. See documentation on how to use Visual C++ 'Checked Iterators' +#pragma warning(disable: 4244) // warning C4244: 'return' : conversion from 'uint64_t' to 'unsigned int', possible loss of data +#pragma warning(disable: 4141) // warning C4141: 'inline': used more than once +#pragma warning(disable: 4291) // warning C4291: 'void *llvm::User::operator new(std::size_t,unsigned int,unsigned int)': no matching operator delete found; memory will not be freed if initialization throws an exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(llvm_min_bug) +#define min llvm_min_bug +#undef llvm_min_bug +#endif +#if defined(llvm_max_bug) +#define max llvm_max_bug +#undef llvm_max_bug +#endif diff --git a/src/r_compiler/ssa/ssa_barycentric_weight.h b/src/r_compiler/ssa/ssa_barycentric_weight.h new file mode 100644 index 0000000000..52117ccc69 --- /dev/null +++ b/src/r_compiler/ssa/ssa_barycentric_weight.h @@ -0,0 +1,97 @@ + +#pragma once + +#include "ssa_vec4f.h" +#include "ssa_float.h" +#include "ssa_int.h" + +class SSAViewport +{ +public: + SSAViewport(SSAInt x, SSAInt y, SSAInt width, SSAInt height) + : x(x), y(y), width(width), height(height), right(x + width), bottom(y + height), + half_width(SSAFloat(width) * 0.5f), half_height(SSAFloat(height) * 0.5f), + rcp_half_width(1.0f / (SSAFloat(width) * 0.5f)), + rcp_half_height(1.0f / (SSAFloat(height) * 0.5f)) + { + } + + SSAInt x, y; + SSAInt width, height; + SSAInt right, bottom; + SSAFloat half_width; + SSAFloat half_height; + SSAFloat rcp_half_width; + SSAFloat rcp_half_height; + + SSAVec4f clip_to_window(SSAVec4f clip) const + { + SSAFloat w = clip[3]; + SSAVec4f normalized = SSAVec4f::insert_element(clip / SSAVec4f::shuffle(clip, 3, 3, 3, 3), w, 3); + return normalized_to_window(normalized); + } + + SSAVec4f normalized_to_window(SSAVec4f normalized) const + { + return SSAVec4f( + SSAFloat(x) + (normalized[0] + 1.0f) * half_width, + SSAFloat(y) + (normalized[1] + 1.0f) * half_height, + 0.0f - normalized[2], + normalized[3]); + } +}; + +class SSABarycentricWeight +{ +public: + SSABarycentricWeight(SSAViewport vp, SSAVec4f v1, SSAVec4f v2); + SSAFloat from_window_x(SSAInt x) const; + SSAFloat from_window_y(SSAInt y) const; + + SSAViewport viewport; + SSAVec4f v1; + SSAVec4f v2; +}; + +inline SSABarycentricWeight::SSABarycentricWeight(SSAViewport viewport, SSAVec4f v1, SSAVec4f v2) +: viewport(viewport), v1(v1), v2(v2) +{ +} + +inline SSAFloat SSABarycentricWeight::from_window_x(SSAInt x) const +{ +/* SSAFloat xnormalized = (x + 0.5f - viewport.x) * viewport.rcp_half_width - 1.0f; + SSAFloat dx = v2.x-v1.x; + SSAFloat dw = v2.w-v1.w; + SSAFloat a = (v2.x - xnormalized * v2.w) / (dx - xnormalized * dw); + return a;*/ + + SSAFloat xnormalized = (SSAFloat(x) + 0.5f - SSAFloat(viewport.x)) * viewport.rcp_half_width - 1.0f; + SSAFloat dx = v2[0]-v1[0]; + SSAFloat dw = v2[3]-v1[3]; + SSAFloat t = (xnormalized * v1[3] - v1[0]) / (dx - xnormalized * dw); + return 1.0f - t; +} + +inline SSAFloat SSABarycentricWeight::from_window_y(SSAInt y) const +{ +/* SSAFloat ynormalized = (y + 0.5f - viewport.y) * viewport.rcp_half_height - 1.0f; + SSAFloat dy = v2.y-v1.y; + SSAFloat dw = v2.w-v1.w; + SSAFloat a = (v2.y - ynormalized * v2.w) / (dy - ynormalized * dw); + return a;*/ + + SSAFloat ynormalized = (SSAFloat(y) + 0.5f - SSAFloat(viewport.y)) * viewport.rcp_half_height - 1.0f; + SSAFloat dy = v2[1]-v1[1]; + SSAFloat dw = v2[3]-v1[3]; + SSAFloat t = (ynormalized * v1[3] - v1[1]) / (dy - ynormalized * dw); + return 1.0f - t; +} + +/* + y = (v1.y + t * dy) / (v1.w + t * dw) + + y * v1.w + y * t * dw = v1.y + t * dy + y * v1.w - v1.y = t * (dy - y * dw) + t = (y * v1.w - v1.y) / (dy - y * dw) +*/ diff --git a/src/r_compiler/ssa/ssa_bool.cpp b/src/r_compiler/ssa/ssa_bool.cpp new file mode 100644 index 0000000000..1013239117 --- /dev/null +++ b/src/r_compiler/ssa/ssa_bool.cpp @@ -0,0 +1,91 @@ + +#include "ssa_bool.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSABool::SSABool() +: v(0) +{ +} +/* +SSABool::SSABool(bool constant) +: v(0) +{ +} +*/ +SSABool::SSABool(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSABool::llvm_type() +{ + return llvm::Type::getInt1Ty(SSAScope::context()); +} + +SSABool operator&&(const SSABool &a, const SSABool &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); +} + +SSABool operator||(const SSABool &a, const SSABool &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); +} + +SSABool operator!(const SSABool &a) +{ + return SSABool::from_llvm(SSAScope::builder().CreateNot(a.v, SSAScope::hint())); +} + +SSABool operator<(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSLT(a.v, b.v, SSAScope::hint())); +} + +SSABool operator<=(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSLE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator==(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpEQ(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>=(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSGE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSGT(a.v, b.v, SSAScope::hint())); +} + +///////////////////////////////////////////////////////////////////////////// + +SSABool operator<(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLT(a.v, b.v, SSAScope::hint())); +} + +SSABool operator<=(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator==(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOEQ(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>=(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGT(a.v, b.v, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_bool.h b/src/r_compiler/ssa/ssa_bool.h new file mode 100644 index 0000000000..2ef79e49b7 --- /dev/null +++ b/src/r_compiler/ssa/ssa_bool.h @@ -0,0 +1,37 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_float.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSABool +{ +public: + SSABool(); + //SSABool(bool constant); + explicit SSABool(llvm::Value *v); + static SSABool from_llvm(llvm::Value *v) { return SSABool(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSABool operator&&(const SSABool &a, const SSABool &b); +SSABool operator||(const SSABool &a, const SSABool &b); + +SSABool operator!(const SSABool &a); + +SSABool operator<(const SSAInt &a, const SSAInt &b); +SSABool operator<=(const SSAInt &a, const SSAInt &b); +SSABool operator==(const SSAInt &a, const SSAInt &b); +SSABool operator>=(const SSAInt &a, const SSAInt &b); +SSABool operator>(const SSAInt &a, const SSAInt &b); + +SSABool operator<(const SSAFloat &a, const SSAFloat &b); +SSABool operator<=(const SSAFloat &a, const SSAFloat &b); +SSABool operator==(const SSAFloat &a, const SSAFloat &b); +SSABool operator>=(const SSAFloat &a, const SSAFloat &b); +SSABool operator>(const SSAFloat &a, const SSAFloat &b); diff --git a/src/r_compiler/ssa/ssa_float.cpp b/src/r_compiler/ssa/ssa_float.cpp new file mode 100644 index 0000000000..87488af744 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float.cpp @@ -0,0 +1,152 @@ + +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAFloat::SSAFloat() +: v(0) +{ +} + +SSAFloat::SSAFloat(float constant) +: v(0) +{ + v = llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant)); +} + +SSAFloat::SSAFloat(SSAInt i) +: v(0) +{ + v = SSAScope::builder().CreateSIToFP(i.v, llvm::Type::getFloatTy(SSAScope::context()), SSAScope::hint()); +} + +SSAFloat::SSAFloat(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAFloat::llvm_type() +{ + return llvm::Type::getFloatTy(SSAScope::context()); +} + +SSAFloat SSAFloat::sqrt(SSAFloat f) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::sin(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sin, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::cos(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::cos, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::pow(SSAFloat val, SSAFloat power) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + //params.push_back(SSAFloat::llvm_type()); + std::vector args; + args.push_back(val.v); + args.push_back(power.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::pow, params), args, SSAScope::hint())); +} + +SSAFloat SSAFloat::exp(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::exp, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::log(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::log, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::fma(SSAFloat a, SSAFloat b, SSAFloat c) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + //params.push_back(SSAFloat::llvm_type()); + //params.push_back(SSAFloat::llvm_type()); + std::vector args; + args.push_back(a.v); + args.push_back(b.v); + args.push_back(c.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint())); +} + +SSAFloat operator+(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator-(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator*(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator/(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator+(float a, const SSAFloat &b) +{ + return SSAFloat(a) + b; +} + +SSAFloat operator-(float a, const SSAFloat &b) +{ + return SSAFloat(a) - b; +} + +SSAFloat operator*(float a, const SSAFloat &b) +{ + return SSAFloat(a) * b; +} + +SSAFloat operator/(float a, const SSAFloat &b) +{ + return SSAFloat(a) / b; +} + +SSAFloat operator+(const SSAFloat &a, float b) +{ + return a + SSAFloat(b); +} + +SSAFloat operator-(const SSAFloat &a, float b) +{ + return a - SSAFloat(b); +} + +SSAFloat operator*(const SSAFloat &a, float b) +{ + return a * SSAFloat(b); +} + +SSAFloat operator/(const SSAFloat &a, float b) +{ + return a / SSAFloat(b); +} + diff --git a/src/r_compiler/ssa/ssa_float.h b/src/r_compiler/ssa/ssa_float.h new file mode 100644 index 0000000000..2349ab8773 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float.h @@ -0,0 +1,42 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAInt; + +class SSAFloat +{ +public: + SSAFloat(); + SSAFloat(SSAInt i); + SSAFloat(float constant); + explicit SSAFloat(llvm::Value *v); + static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); } + static llvm::Type *llvm_type(); + static SSAFloat sqrt(SSAFloat f); + static SSAFloat sin(SSAFloat val); + static SSAFloat cos(SSAFloat val); + static SSAFloat pow(SSAFloat val, SSAFloat power); + static SSAFloat exp(SSAFloat val); + static SSAFloat log(SSAFloat val); + static SSAFloat fma(SSAFloat a, SSAFloat b, SSAFloat c); + + llvm::Value *v; +}; + +SSAFloat operator+(const SSAFloat &a, const SSAFloat &b); +SSAFloat operator-(const SSAFloat &a, const SSAFloat &b); +SSAFloat operator*(const SSAFloat &a, const SSAFloat &b); +SSAFloat operator/(const SSAFloat &a, const SSAFloat &b); + +SSAFloat operator+(float a, const SSAFloat &b); +SSAFloat operator-(float a, const SSAFloat &b); +SSAFloat operator*(float a, const SSAFloat &b); +SSAFloat operator/(float a, const SSAFloat &b); + +SSAFloat operator+(const SSAFloat &a, float b); +SSAFloat operator-(const SSAFloat &a, float b); +SSAFloat operator*(const SSAFloat &a, float b); +SSAFloat operator/(const SSAFloat &a, float b); diff --git a/src/r_compiler/ssa/ssa_float_ptr.cpp b/src/r_compiler/ssa/ssa_float_ptr.cpp new file mode 100644 index 0000000000..4413c6e923 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float_ptr.cpp @@ -0,0 +1,65 @@ + +#include "ssa_float_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAFloatPtr::SSAFloatPtr() +: v(0) +{ +} + +SSAFloatPtr::SSAFloatPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAFloatPtr::llvm_type() +{ + return llvm::Type::getFloatPtrTy(SSAScope::context()); +} + +SSAFloatPtr SSAFloatPtr::operator[](SSAInt index) const +{ + return SSAFloatPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAFloat SSAFloatPtr::load() const +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4f SSAFloatPtr::load_vec4f() const +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint())); +} + +SSAVec4f SSAFloatPtr::load_unaligned_vec4f() const +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); + // return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(get_intrinsic(llvm::Intrinsic::x86_sse2_loadu_dq), SSAScope::builder().CreateBitCast(v, llvm::PointerType::getUnqual(llvm::IntegerType::get(SSAScope::context(), 8))))); +} + +void SSAFloatPtr::store(const SSAFloat &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value) +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16); +} + +void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value) +{ + /*llvm::Value *values[2] = + { + SSAScope::builder().CreateBitCast(v, llvm::Type::getFloatPtrTy(SSAScope::context())), + new_value.v + }; + SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_storeu_ps), values);*/ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_float_ptr.h b/src/r_compiler/ssa/ssa_float_ptr.h new file mode 100644 index 0000000000..a4318e027f --- /dev/null +++ b/src/r_compiler/ssa/ssa_float_ptr.h @@ -0,0 +1,27 @@ + +#pragma once + +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_vec4f.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAFloatPtr +{ +public: + SSAFloatPtr(); + explicit SSAFloatPtr(llvm::Value *v); + static SSAFloatPtr from_llvm(llvm::Value *v) { return SSAFloatPtr(v); } + static llvm::Type *llvm_type(); + SSAFloatPtr operator[](SSAInt index) const; + SSAFloat load() const; + SSAVec4f load_vec4f() const; + SSAVec4f load_unaligned_vec4f() const; + void store(const SSAFloat &new_value); + void store_vec4f(const SSAVec4f &new_value); + void store_unaligned_vec4f(const SSAVec4f &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_for_block.cpp b/src/r_compiler/ssa/ssa_for_block.cpp new file mode 100644 index 0000000000..ce93286076 --- /dev/null +++ b/src/r_compiler/ssa/ssa_for_block.cpp @@ -0,0 +1,25 @@ + +#include "ssa_for_block.h" +#include "ssa_scope.h" + +SSAForBlock::SSAForBlock() +: if_basic_block(0), loop_basic_block(0), end_basic_block(0) +{ + if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forbegin", SSAScope::builder().GetInsertBlock()->getParent()); + loop_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forloop", SSAScope::builder().GetInsertBlock()->getParent()); + end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forend", SSAScope::builder().GetInsertBlock()->getParent()); + SSAScope::builder().CreateBr(if_basic_block); + SSAScope::builder().SetInsertPoint(if_basic_block); +} + +void SSAForBlock::loop_block(SSABool true_condition) +{ + SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block); + SSAScope::builder().SetInsertPoint(loop_basic_block); +} + +void SSAForBlock::end_block() +{ + SSAScope::builder().CreateBr(if_basic_block); + SSAScope::builder().SetInsertPoint(end_basic_block); +} diff --git a/src/r_compiler/ssa/ssa_for_block.h b/src/r_compiler/ssa/ssa_for_block.h new file mode 100644 index 0000000000..58803dee5c --- /dev/null +++ b/src/r_compiler/ssa/ssa_for_block.h @@ -0,0 +1,18 @@ + +#pragma once + +#include "ssa_bool.h" +#include "r_compiler/llvm_include.h" + +class SSAForBlock +{ +public: + SSAForBlock(); + void loop_block(SSABool true_condition); + void end_block(); + +private: + llvm::BasicBlock *if_basic_block; + llvm::BasicBlock *loop_basic_block; + llvm::BasicBlock *end_basic_block; +}; diff --git a/src/r_compiler/ssa/ssa_function.cpp b/src/r_compiler/ssa/ssa_function.cpp new file mode 100644 index 0000000000..aee4de5a92 --- /dev/null +++ b/src/r_compiler/ssa/ssa_function.cpp @@ -0,0 +1,55 @@ + +#include "ssa_function.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "ssa_value.h" +#include "r_compiler/llvm_include.h" + +SSAFunction::SSAFunction(const std::string name) +: name(name), return_type(llvm::Type::getVoidTy(SSAScope::context())), func() +{ +} + +void SSAFunction::set_return_type(llvm::Type *type) +{ + return_type = type; +} + +void SSAFunction::add_parameter(llvm::Type *type) +{ + parameters.push_back(type); +} + +void SSAFunction::create_public() +{ + func = SSAScope::module()->getFunction(name.c_str()); + if (func == 0) + { + llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false); + func = llvm::Function::Create(function_type, llvm::Function::ExternalLinkage, name.c_str(), SSAScope::module()); + //func->setCallingConv(llvm::CallingConv::X86_StdCall); + } + llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func); + SSAScope::builder().SetInsertPoint(entry); +} + +void SSAFunction::create_private() +{ + func = SSAScope::module()->getFunction(name.c_str()); + if (func == 0) + { + llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false); + func = llvm::Function::Create(function_type, llvm::Function::PrivateLinkage, name.c_str(), SSAScope::module()); + func->addFnAttr(llvm::Attribute::AlwaysInline); + } + llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func); + SSAScope::builder().SetInsertPoint(entry); +} + +SSAValue SSAFunction::parameter(int index) +{ + llvm::Function::arg_iterator arg_it = func->arg_begin(); + for (int i = 0; i < index; i++) + ++arg_it; + return SSAValue::from_llvm(static_cast(arg_it)); +} diff --git a/src/r_compiler/ssa/ssa_function.h b/src/r_compiler/ssa/ssa_function.h new file mode 100644 index 0000000000..f1969c35b5 --- /dev/null +++ b/src/r_compiler/ssa/ssa_function.h @@ -0,0 +1,30 @@ + +#pragma once + +#include +#include + +namespace llvm { class Value; } +namespace llvm { class Type; } +namespace llvm { class Function; } + +class SSAInt; +class SSAValue; + +class SSAFunction +{ +public: + SSAFunction(const std::string name); + void set_return_type(llvm::Type *type); + void add_parameter(llvm::Type *type); + void create_public(); + void create_private(); + SSAValue parameter(int index); + + llvm::Function *func; + +private: + std::string name; + llvm::Type *return_type; + std::vector parameters; +}; diff --git a/src/r_compiler/ssa/ssa_if_block.cpp b/src/r_compiler/ssa/ssa_if_block.cpp new file mode 100644 index 0000000000..e2de9ecadc --- /dev/null +++ b/src/r_compiler/ssa/ssa_if_block.cpp @@ -0,0 +1,30 @@ + +#include "ssa_if_block.h" +#include "ssa_scope.h" + +SSAIfBlock::SSAIfBlock() +: if_basic_block(0), else_basic_block(0), end_basic_block(0) +{ +} + +void SSAIfBlock::if_block(SSABool true_condition) +{ + if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "if", SSAScope::builder().GetInsertBlock()->getParent()); + else_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "else", SSAScope::builder().GetInsertBlock()->getParent()); + end_basic_block = else_basic_block; + SSAScope::builder().CreateCondBr(true_condition.v, if_basic_block, else_basic_block); + SSAScope::builder().SetInsertPoint(if_basic_block); +} + +void SSAIfBlock::else_block() +{ + end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "end", SSAScope::builder().GetInsertBlock()->getParent()); + SSAScope::builder().CreateBr(end_basic_block); + SSAScope::builder().SetInsertPoint(else_basic_block); +} + +void SSAIfBlock::end_block() +{ + SSAScope::builder().CreateBr(end_basic_block); + SSAScope::builder().SetInsertPoint(end_basic_block); +} diff --git a/src/r_compiler/ssa/ssa_if_block.h b/src/r_compiler/ssa/ssa_if_block.h new file mode 100644 index 0000000000..98c534a867 --- /dev/null +++ b/src/r_compiler/ssa/ssa_if_block.h @@ -0,0 +1,46 @@ + +#pragma once + +#include "ssa_bool.h" +#include "ssa_phi.h" +#include "r_compiler/llvm_include.h" + +class SSAIfBlock +{ +public: + SSAIfBlock(); + void if_block(SSABool true_condition); + void else_block(); + void end_block(); + +private: + llvm::BasicBlock *if_basic_block; + llvm::BasicBlock *else_basic_block; + llvm::BasicBlock *end_basic_block; +}; + +template +T ssa_min(T a, T b) +{ + SSAPhi phi; + SSAIfBlock if_block; + if_block.if_block(a <= b); + phi.add_incoming(a); + if_block.else_block(); + phi.add_incoming(b); + if_block.end_block(); + return phi.create(); +} + +template +T ssa_max(T a, T b) +{ + SSAPhi phi; + SSAIfBlock if_block; + if_block.if_block(a >= b); + phi.add_incoming(a); + if_block.else_block(); + phi.add_incoming(b); + if_block.end_block(); + return phi.create(); +} diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp new file mode 100644 index 0000000000..9f3c54f50c --- /dev/null +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -0,0 +1,117 @@ + +#include "ssa_int.h" +#include "ssa_float.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAInt::SSAInt() +: v(0) +{ +} + +SSAInt::SSAInt(int constant) +: v(0) +{ + v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true)); +} + +SSAInt::SSAInt(SSAFloat f) +: v(0) +{ + v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt32Ty(SSAScope::context()), SSAScope::hint()); +} + +SSAInt::SSAInt(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAInt::llvm_type() +{ + return llvm::Type::getInt32Ty(SSAScope::context()); +} + +SSAInt operator+(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator-(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator*(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator/(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator%(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSRem(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator+(int a, const SSAInt &b) +{ + return SSAInt(a) + b; +} + +SSAInt operator-(int a, const SSAInt &b) +{ + return SSAInt(a) - b; +} + +SSAInt operator*(int a, const SSAInt &b) +{ + return SSAInt(a) * b; +} + +SSAInt operator/(int a, const SSAInt &b) +{ + return SSAInt(a) / b; +} + +SSAInt operator%(int a, const SSAInt &b) +{ + return SSAInt(a) % b; +} + +SSAInt operator+(const SSAInt &a, int b) +{ + return a + SSAInt(b); +} + +SSAInt operator-(const SSAInt &a, int b) +{ + return a - SSAInt(b); +} + +SSAInt operator*(const SSAInt &a, int b) +{ + return a * SSAInt(b); +} + +SSAInt operator/(const SSAInt &a, int b) +{ + return a / SSAInt(b); +} + +SSAInt operator%(const SSAInt &a, int b) +{ + return a % SSAInt(b); +} + +SSAInt operator<<(const SSAInt &a, int bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); +} + +SSAInt operator>>(const SSAInt &a, int bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h new file mode 100644 index 0000000000..0be37ee7eb --- /dev/null +++ b/src/r_compiler/ssa/ssa_int.h @@ -0,0 +1,41 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAFloat; + +class SSAInt +{ +public: + SSAInt(); + SSAInt(int constant); + SSAInt(SSAFloat f); + explicit SSAInt(llvm::Value *v); + static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAInt operator+(const SSAInt &a, const SSAInt &b); +SSAInt operator-(const SSAInt &a, const SSAInt &b); +SSAInt operator*(const SSAInt &a, const SSAInt &b); +SSAInt operator/(const SSAInt &a, const SSAInt &b); +SSAInt operator%(const SSAInt &a, const SSAInt &b); + +SSAInt operator+(int a, const SSAInt &b); +SSAInt operator-(int a, const SSAInt &b); +SSAInt operator*(int a, const SSAInt &b); +SSAInt operator/(int a, const SSAInt &b); +SSAInt operator%(int a, const SSAInt &b); + +SSAInt operator+(const SSAInt &a, int b); +SSAInt operator-(const SSAInt &a, int b); +SSAInt operator*(const SSAInt &a, int b); +SSAInt operator/(const SSAInt &a, int b); +SSAInt operator%(const SSAInt &a, int b); + +SSAInt operator<<(const SSAInt &a, int bits); +SSAInt operator>>(const SSAInt &a, int bits); diff --git a/src/r_compiler/ssa/ssa_int_ptr.cpp b/src/r_compiler/ssa/ssa_int_ptr.cpp new file mode 100644 index 0000000000..dd0ca17f6f --- /dev/null +++ b/src/r_compiler/ssa/ssa_int_ptr.cpp @@ -0,0 +1,58 @@ + +#include "ssa_int_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAIntPtr::SSAIntPtr() +: v(0) +{ +} + +SSAIntPtr::SSAIntPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAIntPtr::llvm_type() +{ + return llvm::Type::getInt32PtrTy(SSAScope::context()); +} + +SSAIntPtr SSAIntPtr::operator[](SSAInt index) const +{ + return SSAIntPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAInt SSAIntPtr::load() const +{ + return SSAInt::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4i SSAIntPtr::load_vec4i() const +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint())); +} + +SSAVec4i SSAIntPtr::load_unaligned_vec4i() const +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAIntPtr::store(const SSAInt &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAIntPtr::store_vec4i(const SSAVec4i &new_value) +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16); +} + +void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value) +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_int_ptr.h b/src/r_compiler/ssa/ssa_int_ptr.h new file mode 100644 index 0000000000..20e024a311 --- /dev/null +++ b/src/r_compiler/ssa/ssa_int_ptr.h @@ -0,0 +1,27 @@ + +#pragma once + +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_vec4i.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAIntPtr +{ +public: + SSAIntPtr(); + explicit SSAIntPtr(llvm::Value *v); + static SSAIntPtr from_llvm(llvm::Value *v) { return SSAIntPtr(v); } + static llvm::Type *llvm_type(); + SSAIntPtr operator[](SSAInt index) const; + SSAInt load() const; + SSAVec4i load_vec4i() const; + SSAVec4i load_unaligned_vec4i() const; + void store(const SSAInt &new_value); + void store_vec4i(const SSAVec4i &new_value); + void store_unaligned_vec4i(const SSAVec4i &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_phi.h b/src/r_compiler/ssa/ssa_phi.h new file mode 100644 index 0000000000..89cbc8cf05 --- /dev/null +++ b/src/r_compiler/ssa/ssa_phi.h @@ -0,0 +1,33 @@ + +#pragma once + +#include "ssa_scope.h" + +class SSAIfBlock; + +template +class SSAPhi +{ +public: + void add_incoming(SSAVariable var) + { + incoming.push_back(Incoming(var.v, SSAScope::builder().GetInsertBlock())); + } + + SSAVariable create() + { + llvm::PHINode *phi_node = SSAScope::builder().CreatePHI(SSAVariable::llvm_type(), (unsigned int)incoming.size(), SSAScope::hint()); + for (size_t i = 0; i < incoming.size(); i++) + phi_node->addIncoming(incoming[i].v, incoming[i].bb); + return SSAVariable::from_llvm(phi_node); + } + +private: + struct Incoming + { + Incoming(llvm::Value *v, llvm::BasicBlock *bb) : v(v), bb(bb) { } + llvm::Value *v; + llvm::BasicBlock *bb; + }; + std::vector incoming; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4f.h b/src/r_compiler/ssa/ssa_pixelformat4f.h new file mode 100644 index 0000000000..507e95b5d1 --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4f.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_float_ptr.h" + +class SSAPixelFormat4f +{ +public: + SSAPixelFormat4f() { } + SSAPixelFormat4f(SSAFloatPtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAFloatPtr pixels() { return _pixels; } + SSAFloatPtr pixels() const { return _pixels; } + + SSAVec4f get4f(SSAInt index) const + { + return _pixels[index * 4].load_vec4f(); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4f(pixel); + } + +protected: + SSAFloatPtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub.h b/src/r_compiler/ssa/ssa_pixelformat4ub.h new file mode 100644 index 0000000000..fdf98c4aa6 --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4ub.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_ubyte_ptr.h" + +class SSAPixelFormat4ub +{ +public: + SSAPixelFormat4ub() { } + SSAPixelFormat4ub(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAUBytePtr pixels() { return _pixels; } + SSAUBytePtr pixels() const { return _pixels; } + + SSAVec4f get4f(SSAInt index) const + { + return SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4ub(SSAVec4i(pixel * 255.0f)); + } + +private: + SSAUBytePtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h new file mode 100644 index 0000000000..4601eeb3c1 --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h @@ -0,0 +1,35 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_ubyte_ptr.h" + +class SSAPixelFormat4ub_argb_rev +{ +public: + SSAPixelFormat4ub_argb_rev() { } + SSAPixelFormat4ub_argb_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAUBytePtr pixels() { return _pixels; } + SSAUBytePtr pixels() const { return _pixels; } +/* + void get4f(SSAInt index, SSAVec4f &out_pixel1, SSAVec4f &out_pixel2) const + { + SSAVec8s p = _pixels[index * 4].load_vec8s(); + out_pixel1 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendlo(p)) * (1.0f / 255.0f), 2, 1, 0, 3); + out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3); + } +*/ + SSAVec4f get4f(SSAInt index) const + { + return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 2, 1, 0, 3); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 2, 1, 0, 3))); + } + +public: + SSAUBytePtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h new file mode 100644 index 0000000000..402480c49b --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_ubyte_ptr.h" + +class SSAPixelFormat4ub_rev +{ +public: + SSAPixelFormat4ub_rev() { } + SSAPixelFormat4ub_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAUBytePtr pixels() { return _pixels; } + SSAUBytePtr pixels() const { return _pixels; } + + SSAVec4f get4f(SSAInt index) const + { + return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 3, 2, 1, 0); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 3, 2, 1, 0))); + } + +public: + SSAUBytePtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixels.h b/src/r_compiler/ssa/ssa_pixels.h new file mode 100644 index 0000000000..a4209d439a --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixels.h @@ -0,0 +1,39 @@ + +#pragma once + +#include "ssa_ubyte.h" +#include "ssa_ubyte_ptr.h" +#include "ssa_float.h" +#include "ssa_float_ptr.h" +#include "ssa_int.h" +#include "ssa_pixeltype.h" +//#include "ssa_pixelformat1f.h" +//#include "ssa_pixelformat2f.h" +//#include "ssa_pixelformat3f.h" +#include "ssa_pixelformat4f.h" +//#include "ssa_pixelformat1ub.h" +//#include "ssa_pixelformat2ub.h" +//#include "ssa_pixelformat3ub.h" +//#include "ssa_pixelformat3ub_rev.h" +#include "ssa_pixelformat4ub.h" +//#include "ssa_pixelformat4ub_argb.h" +#include "ssa_pixelformat4ub_rev.h" +#include "ssa_pixelformat4ub_argb_rev.h" +//#include "ssa_pixelformat4ub_channel.h" + +//typedef SSAPixelType SSAPixels1f; +//typedef SSAPixelType SSAPixels2f; +//typedef SSAPixelType SSAPixels3f; +typedef SSAPixelType SSAPixels4f; + +//typedef SSAPixelType SSAPixels1ub; +//typedef SSAPixelType SSAPixels2ub; +//typedef SSAPixelType SSAPixels3ub; +typedef SSAPixelType SSAPixels4ub; +//typedef SSAPixelType SSAPixels4ub_argb; + +//typedef SSAPixelType SSAPixels3ub_rev; +typedef SSAPixelType SSAPixels4ub_rev; +typedef SSAPixelType SSAPixels4ub_argb_rev; + +//typedef SSAPixelType SSAPixels4ub_channel; diff --git a/src/r_compiler/ssa/ssa_pixeltype.h b/src/r_compiler/ssa/ssa_pixeltype.h new file mode 100644 index 0000000000..8614f171d1 --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixeltype.h @@ -0,0 +1,498 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_float.h" +#include "ssa_vec4f.h" +#include "ssa_bool.h" +#include "ssa_if_block.h" +#include "ssa_phi.h" + +template +class SSAPixelType : public PixelFormat +{ +public: + SSAPixelType() + { + } + + SSAPixelType(SSAInt width, SSAInt height, PixelType pixels) + : PixelFormat(pixels, width, height), _width(width), _height(height) + { + _width32 = SSAVec4i(_width); + SSAVec4i height32(_height); + _widthps = SSAVec4f(_width32); + _heightps = SSAVec4f(height32); + _width16 = SSAVec8s(_width32, _width32); + + _widthheight = SSAVec4i::shuffle(_width32, height32, 0, 0, 4, 4); + _widthheightps = SSAVec4i::shuffle(_widthps, _heightps, 0, 0, 4, 4); + } + + SSAInt width() const { return _width; } + SSAInt height() const { return _height; } + SSAInt size() const { return _width * _height; } + + SSABool in_bounds(SSAInt i) const { return i >= 0 && i < _width * _height; } + SSABool in_bounds(SSAInt x, SSAInt y) const { return x>= 0 && x < _width && y >= 0 && y < _height; } + //void throw_if_out_of_bounds(SSAInt i) const { if (!in_bounds(i)) throw clan::Exception("Out of bounds"); } + //void throw_if_out_of_bounds(SSAInt x, SSAInt y) const { if (!in_bounds(x, y)) throw clan::Exception("Out of bounds"); } + + SSAInt s_to_x(SSAFloat s) const { return round(s * SSAFloat(_width)); } + SSAInt t_to_y(SSAFloat t) const { return round(t * SSAFloat(_height)); } + SSAInt clamp_x(SSAInt x) const { return clamp(x, _width); } + SSAInt clamp_y(SSAInt y) const { return clamp(y, _height); } + SSAInt repeat_x(SSAInt x) const { return repeat(x,_width); } + SSAInt repeat_y(SSAInt y) const { return repeat(y, _height); } + SSAInt mirror_x(SSAInt x) const { return mirror(x, _width); } + SSAInt mirror_y(SSAInt y) const { return mirror(y, _height); } + + static SSAInt int_min(SSAInt a, SSAInt b) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(a <= b); + phi.add_incoming(a); + branch.else_block(); + phi.add_incoming(b); + branch.end_block(); + return phi.create(); + } + + static SSAInt int_max(SSAInt a, SSAInt b) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(a >= b); + phi.add_incoming(a); + branch.else_block(); + phi.add_incoming(b); + branch.end_block(); + return phi.create(); + } + + static SSAInt clamp(SSAInt v, SSAInt size) + { + return int_max(int_min(v, size - 1), 0); + } + + static SSAInt repeat(SSAInt v, SSAInt size) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v >= 0); + phi.add_incoming(v % size); + branch.else_block(); + phi.add_incoming(size - 1 + v % size); + branch.end_block(); + return phi.create(); + } + + static SSAInt mirror(SSAInt v, SSAInt size) + { + SSAInt size2 = size * 2; + v = repeat(v, size2); + + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v < size); + phi.add_incoming(v); + branch.else_block(); + phi.add_incoming(size2 - v - 1); + branch.end_block(); + return phi.create(); + } + + static SSAInt round(SSAFloat v) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v >= 0.0f); + phi.add_incoming(v + 0.5f); + branch.else_block(); + phi.add_incoming(v - 0.5f); + branch.end_block(); + return SSAInt(phi.create()); + } + + // To do: fix this: + static SSAInt int_floor(SSAFloat v) + { + return SSAInt(v); + } + static SSAFloat fract(SSAFloat v) { return v - SSAFloat(int_floor(v)); } + + SSAVec4f get4f(SSAInt x, SSAInt y) const { return PixelFormat::get4f(x + y * _width); } + void set4f(SSAInt x, SSAInt y, const SSAVec4f &pixel) { PixelFormat::set4f(x + y * _width, pixel); } + + SSAVec4f get_clamp4f(SSAInt x, SSAInt y) const { return get4f(clamp_x(x), clamp_y(y)); } + SSAVec4f get_repeat4f(SSAInt x, SSAInt y) const { return get4f(repeat_x(x), repeat_y(y)); } + SSAVec4f get_mirror4f(SSAInt x, SSAInt y) const { return get4f(mirror_x(x), mirror_y(y)); } + + SSAVec4f linear_interpolate4f(SSAFloat s, SSAFloat t, const SSAVec4f *samples) const + { + SSAFloat a = fract(s * SSAFloat(_width) - 0.5f); + SSAFloat b = fract(t * SSAFloat(_height) - 0.5f); + SSAFloat inv_a = 1.0f - a; + SSAFloat inv_b = 1.0f - b; + return + samples[0] * (inv_a * inv_b) + + samples[1] * (a * inv_b) + + samples[2] * (inv_a * b) + + samples[3] * (a * b); + } + + void gather_clamp4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const + { + SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); + SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); + out_pixels[0] = get_clamp4f(x, y); + out_pixels[1] = get_clamp4f(x + 1, y); + out_pixels[2] = get_clamp4f(x, y + 1); + out_pixels[3] = get_clamp4f(x + 1, y + 1); + /* + SSAInt x0 = clamp_x(x); + SSAInt x1 = clamp_x(x + 1); + SSAInt y0 = clamp_y(y); + SSAInt y1 = clamp_y(y + 1); + SSAInt offset0 = y0 * _width; + SSAInt offset1 = y1 * _width; + SSAPhi phi0; + SSAPhi phi1; + SSAPhi phi2; + SSAPhi phi3; + SSAIfBlock if0; + if0.if_block(x0 + 1 == x1); + phi0.add_incoming(PixelFormat::get4f(x0 + offset0)); + phi1.add_incoming(PixelFormat::get4f(x1 + offset0)); + phi2.add_incoming(PixelFormat::get4f(x0 + offset1)); + phi3.add_incoming(PixelFormat::get4f(x1 + offset1)); + if0.else_block(); + phi0.add_incoming(PixelFormat::get4f(x0 + offset0)); + phi1.add_incoming(PixelFormat::get4f(x1 + offset0)); + phi2.add_incoming(PixelFormat::get4f(x0 + offset1)); + phi3.add_incoming(PixelFormat::get4f(x1 + offset1)); + if0.end_block(); + out_pixels[0] = phi0.create(); + out_pixels[1] = phi1.create(); + out_pixels[2] = phi2.create(); + out_pixels[3] = phi3.create(); + */ + } + + void gather_repeat4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const + { + SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); + SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); + out_pixels[0] = get_repeat4f(x, y); + out_pixels[1] = get_repeat4f(x + 1, y); + out_pixels[2] = get_repeat4f(x, y + 1); + out_pixels[3] = get_repeat4f(x + 1, y + 1); + } + + void gather_mirror4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const + { + SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); + SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); + out_pixels[0] = get_mirror4f(x, y); + out_pixels[1] = get_mirror4f(x + 1, y); + out_pixels[2] = get_mirror4f(x, y + 1); + out_pixels[3] = get_mirror4f(x + 1, y + 1); + } + + SSAVec4f nearest_clamp4f(SSAFloat s, SSAFloat t) const { return get_clamp4f(s_to_x(s), t_to_y(t)); } + SSAVec4f nearest_repeat4f(SSAFloat s, SSAFloat t) const { return get_repeat4f(s_to_x(s), t_to_y(t)); } + SSAVec4f nearest_mirror4f(SSAFloat s, SSAFloat t) const { return get_mirror4f(s_to_x(s), t_to_y(t)); } + + SSAVec4f linear_clamp4f(SSAFloat s, SSAFloat t) const + { + SSAVec4f samples[4]; + gather_clamp4f(s, t, samples); + return linear_interpolate4f(s, t, samples); + } + + SSAVec4f linear_repeat4f(SSAFloat s, SSAFloat t) const + { + SSAVec4f samples[4]; + gather_repeat4f(s, t, samples); + return linear_interpolate4f(s, t, samples); + } + + SSAVec4f linear_mirror4f(SSAFloat s, SSAFloat t) const + { + SSAVec4f samples[4]; + gather_mirror4f(s, t, samples); + return linear_interpolate4f(s, t, samples); + } + + ///////////////////////////////////////////////////////////////////////// + // Packed versions: + + SSAVec4i s_to_x(SSAVec4f s) const { return round(s * SSAVec4f(_width)); } + SSAVec4i t_to_y(SSAVec4f t) const { return round(t * SSAVec4f(_height)); } + SSAVec4i clamp_x(SSAVec4i x) const { return clamp(x, _width); } + SSAVec4i clamp_y(SSAVec4i y) const { return clamp(y, _height); } + SSAVec4i repeat_x(SSAVec4i x) const { return repeat(x,_width); } + SSAVec4i repeat_y(SSAVec4i y) const { return repeat(y, _height); } + SSAVec4i mirror_x(SSAVec4i x) const { return mirror(x, _width); } + SSAVec4i mirror_y(SSAVec4i y) const { return mirror(y, _height); } + + static SSAVec4i clamp(SSAVec4i v, SSAInt size) + { + return SSAVec4i::max_sse41(SSAVec4i::min_sse41(v, size - 1), 0); + } + + static SSAVec4i repeat(SSAVec4i v, SSAInt size) + { + return clamp(v, size); + /*SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v >= 0); + phi.add_incoming(v % size); + branch.else_block(); + phi.add_incoming(size - 1 + v % size); + branch.end_block(); + return phi.create();*/ + } + + static SSAVec4i mirror(SSAVec4i v, SSAInt size) + { + return clamp(v, size); + /*SSAInt size2 = size * 2; + v = repeat(v, size2); + + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v < size); + phi.add_incoming(v); + branch.else_block(); + phi.add_incoming(size2 - v - 1); + branch.end_block(); + return phi.create();*/ + } + + static SSAVec4i round(SSAVec4f v) + { + // Maybe we should use the normal round SSE function (but that requires the rounding mode is set the round to nearest before the code runs) + SSAVec4i signbit = (SSAVec4i::bitcast(v) & 0x80000000); + SSAVec4f signed_half = SSAVec4f::bitcast(signbit | SSAVec4i::bitcast(SSAVec4f(0.5f))); + return v + signed_half; + } + + static SSAVec4i int_floor(SSAVec4f v) + { + return SSAVec4i(v) - (SSAVec4i::bitcast(v) >> 31); + } + + static SSAVec4f fract(SSAVec4f v) + { + // return v - SSAVec4f::floor_sse4(v); + return v - SSAVec4f(int_floor(v)); + } + + template + SSAVec4f nearest_helper4f(SSAVec4f s, SSAVec4f t, int index, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const + { + SSAVec4i x = int_floor(s * _widthps - 0.5f); + SSAVec4i y = int_floor(t * _heightps - 0.5f); + SSAVec8s y16 = SSAVec8s(wrap_y(y), wrap_y(y)); + SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16); + SSAVec8s offsetlo = y16 * _width16; + SSAVec4i offset = SSAVec4i::combinelo(offsetlo, offsethi) + x; + return PixelFormat::get4f(offset[index]); + } + + SSAVec4f nearest_clamp4f(SSAVec4f s, SSAVec4f t, int index) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; }; + return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); + /* + return nearest_helper4f( + s, t, index, + [this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); }); + */ + } + + SSAVec4f nearest_repeat4f(SSAVec4f s, SSAVec4f t, int index) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; }; + return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); + /* + return nearest_helper4f( + s, t, index, + [this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); }); + */ + } + + SSAVec4f nearest_mirror4f(SSAVec4f s, SSAVec4f t, int index) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; }; + return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); + /* + return nearest_helper4f( + s, t, index, + [this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); }); + */ + } + + template + void gather_helper4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const + { + SSAVec4i x = int_floor(s * _widthps - 0.5f); + SSAVec4i y = int_floor(t * _heightps - 0.5f); + SSAVec8s y16 = SSAVec8s(wrap_y(y + 1), wrap_y(y)); + SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16); + SSAVec8s offsetlo = y16 * _width16; + SSAVec4i x0 = wrap_x(x); + SSAVec4i x1 = wrap_x(x + 1); + SSAVec4i line0 = SSAVec4i::combinehi(offsetlo, offsethi); + SSAVec4i line1 = SSAVec4i::combinelo(offsetlo, offsethi); + SSAVec4i offset0 = x0 + line0; + SSAVec4i offset1 = x1 + line0; + SSAVec4i offset2 = x0 + line1; + SSAVec4i offset3 = x1 + line1; + out_pixels[0] = PixelFormat::get4f(offset0[index]); + out_pixels[1] = PixelFormat::get4f(offset1[index]); + out_pixels[2] = PixelFormat::get4f(offset2[index]); + out_pixels[3] = PixelFormat::get4f(offset3[index]); + } + + void gather_clamp4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; }; + return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); + /* + gather_helper4f( + s, t, index, out_pixels, + [this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); }); + */ + } + + void gather_repeat4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; }; + return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); + /* + gather_helper4f( + s, t, index, out_pixels, + [this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); }); + */ + } + + void gather_mirror4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; }; + return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); + /* + gather_helper4f( + s, t, index, out_pixels, + [this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); }); + */ + } + + SSAVec4f linear_clamp4f(SSAVec4f s, SSAVec4f t, int index) const + { + SSAScopeHint hint("linearclamp"); + SSAVec4f samples[4]; + gather_clamp4f(s, t, index, samples); + return linear_interpolate4f(s, t, index, samples); + } + + SSAVec4f linear_repeat4f(SSAVec4f s, SSAVec4f t, int index) const + { + SSAVec4f samples[4]; + gather_repeat4f(s, t, index, samples); + return linear_interpolate4f(s, t, index, samples); + } + + SSAVec4f linear_mirror4f(SSAVec4f s, SSAVec4f t, int index) const + { + SSAVec4f samples[4]; + gather_mirror4f(s, t, index, samples); + return linear_interpolate4f(s, t, index, samples); + } + + SSAVec4f linear_interpolate4f(SSAVec4f s, SSAVec4f t, int index, const SSAVec4f *samples) const + { + SSAVec4f a = fract(s * _widthps - 0.5f); + SSAVec4f b = fract(t * _heightps - 0.5f); + SSAVec4f inv_a = 1.0f - a; + SSAVec4f inv_b = 1.0f - b; + return + samples[0] * SSAVec4f::shuffle(inv_a * inv_b, index, index, index, index) + + samples[1] * SSAVec4f::shuffle(a * inv_b, index, index, index, index) + + samples[2] * SSAVec4f::shuffle(inv_a * b, index, index, index, index) + + samples[3] * SSAVec4f::shuffle(a * b, index, index, index, index); + } + + ///////////////////////////////////////////////////////////////////////// + + SSAVec4i clamp(SSAVec4i sstt) const + { + return SSAVec4i::max_sse41(SSAVec4i::min_sse41(sstt, _widthheight - 1), 0); + } + + template + void gather_helper4f(SSAVec4f st, SSAVec4f *out_pixels, WrapFunctor wrap) const + { + SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1); + SSAVec4i xxyy = wrap(int_floor(sstt * _widthheightps - 0.5f) + SSAVec4i(0, 1, 0, 1)); + SSAVec4i xxoffset = SSAVec4f::shuffle(xxyy, xxyy * _width32, 0, 1, 6, 7); + SSAVec4i offsets = SSAVec4i::shuffle(xxoffset, 0, 1, 0, 1) + SSAVec4i::shuffle(xxoffset, 2, 2, 3, 3); + out_pixels[0] = PixelFormat::get4f(offsets[0]); + out_pixels[1] = PixelFormat::get4f(offsets[1]); + out_pixels[2] = PixelFormat::get4f(offsets[2]); + out_pixels[3] = PixelFormat::get4f(offsets[3]); + } + + void gather_clamp4f(SSAVec4f st, SSAVec4f *out_pixels) const + { + struct Wrap { Wrap(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i sstt) { return self->clamp(sstt); } const SSAPixelType *self; }; + return gather_helper4f(st, out_pixels, Wrap(this)); + } + + SSAVec4f linear_clamp4f(SSAVec4f st) const + { + SSAScopeHint hint("linearclamp"); + SSAVec4f samples[4]; + gather_clamp4f(st, samples); + return linear_interpolate4f(st, samples); + } + + SSAVec4f linear_interpolate4f(SSAVec4f st, const SSAVec4f *samples) const + { + SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1); + SSAVec4f aabb = fract(sstt * _widthheightps - 0.5f); + SSAVec4f inv_aabb = 1.0f - aabb; + SSAVec4f ab_inv_ab = SSAVec4f::shuffle(aabb, inv_aabb, 0, 2, 4, 6); + SSAVec4f ab__inv_a_b__inv_a_inv_b__a_invb = ab_inv_ab * SSAVec4f::shuffle(ab_inv_ab, 1, 2, 3, 0); + return + samples[0] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 2, 2, 2, 2) + + samples[1] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 3, 3, 3, 3) + + samples[2] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 1, 1, 1, 1) + + samples[3] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 0, 0, 0, 0); + } + +public: + SSAInt _width; + SSAInt _height; + SSAVec4i _width32; + SSAVec8s _width16; + SSAVec4f _widthps; + SSAVec4f _heightps; + + SSAVec4i _widthheight; + SSAVec4f _widthheightps; +}; diff --git a/src/r_compiler/ssa/ssa_scope.cpp b/src/r_compiler/ssa/ssa_scope.cpp new file mode 100644 index 0000000000..f9d16f1889 --- /dev/null +++ b/src/r_compiler/ssa/ssa_scope.cpp @@ -0,0 +1,65 @@ + +#include "ssa_scope.h" +#include "ssa_int.h" + +SSAScope::SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder) +: _context(context), _module(module), _builder(builder) +{ + instance = this; +} + +SSAScope::~SSAScope() +{ + instance = 0; +} + +llvm::LLVMContext &SSAScope::context() +{ + return *instance->_context; +} + +llvm::Module *SSAScope::module() +{ + return instance->_module; +} + +llvm::IRBuilder<> &SSAScope::builder() +{ + return *instance->_builder; +} + +llvm::Function *SSAScope::intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types) +{ + llvm::Function *func = module()->getFunction(llvm::Intrinsic::getName(id)); + if (func == 0) + func = llvm::Function::Create(llvm::Intrinsic::getType(context(), id, parameter_types), llvm::Function::ExternalLinkage, llvm::Intrinsic::getName(id, parameter_types), module()); + return func; +} + +llvm::Value *SSAScope::alloca(llvm::Type *type) +{ + return alloca(type, SSAInt(1)); +} + +llvm::Value *SSAScope::alloca(llvm::Type *type, SSAInt size) +{ + // Allocas must be created at top of entry block for the PromoteMemoryToRegisterPass to work + llvm::BasicBlock &entry = SSAScope::builder().GetInsertBlock()->getParent()->getEntryBlock(); + llvm::IRBuilder<> alloca_builder(&entry, entry.begin()); + return alloca_builder.CreateAlloca(type, size.v, hint()); +} + +const std::string &SSAScope::hint() +{ + return instance->_hint; +} + +void SSAScope::set_hint(const std::string &new_hint) +{ + if (new_hint.empty()) + instance->_hint = "tmp"; + else + instance->_hint = new_hint; +} + +SSAScope *SSAScope::instance = 0; diff --git a/src/r_compiler/ssa/ssa_scope.h b/src/r_compiler/ssa/ssa_scope.h new file mode 100644 index 0000000000..d184643adb --- /dev/null +++ b/src/r_compiler/ssa/ssa_scope.h @@ -0,0 +1,41 @@ + +#pragma once + +#include "r_compiler/llvm_include.h" + +class SSAInt; + +class SSAScope +{ +public: + SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder); + ~SSAScope(); + static llvm::LLVMContext &context(); + static llvm::Module *module(); + static llvm::IRBuilder<> &builder(); + static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types = llvm::ArrayRef()); + static llvm::Value *alloca(llvm::Type *type); + static llvm::Value *alloca(llvm::Type *type, SSAInt size); + static const std::string &hint(); + static void set_hint(const std::string &hint); + +private: + static SSAScope *instance; + llvm::LLVMContext *_context; + llvm::Module *_module; + llvm::IRBuilder<> *_builder; + std::string _hint; +}; + +class SSAScopeHint +{ +public: + SSAScopeHint() : old_hint(SSAScope::hint()) { } + SSAScopeHint(const std::string &hint) : old_hint(SSAScope::hint()) { SSAScope::set_hint(hint); } + ~SSAScopeHint() { SSAScope::set_hint(old_hint); } + void set(const std::string &hint) { SSAScope::set_hint(hint); } + void clear() { SSAScope::set_hint(old_hint); } + +private: + std::string old_hint; +}; diff --git a/src/r_compiler/ssa/ssa_stack.h b/src/r_compiler/ssa/ssa_stack.h new file mode 100644 index 0000000000..435530be1f --- /dev/null +++ b/src/r_compiler/ssa/ssa_stack.h @@ -0,0 +1,25 @@ + +#pragma once + +template +class SSAStack +{ +public: + SSAStack() + : v(0) + { + v = SSAScope::alloca(SSAVariable::llvm_type()); + } + + SSAVariable load() const + { + return SSAVariable::from_llvm(SSAScope::builder().CreateLoad(v, SSAScope::hint())); + } + + void store(const SSAVariable &new_value) + { + SSAScope::builder().CreateStore(new_value.v, v); + } + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_struct_type.cpp b/src/r_compiler/ssa/ssa_struct_type.cpp new file mode 100644 index 0000000000..4a79768cea --- /dev/null +++ b/src/r_compiler/ssa/ssa_struct_type.cpp @@ -0,0 +1,18 @@ + +#include "ssa_struct_type.h" +#include "ssa_scope.h" + +void SSAStructType::add_parameter(llvm::Type *type) +{ + elements.push_back(type); +} + +llvm::Type *SSAStructType::llvm_type() +{ + return llvm::StructType::get(SSAScope::context(), elements, false); +} + +llvm::Type *SSAStructType::llvm_type_packed() +{ + return llvm::StructType::get(SSAScope::context(), elements, true); +} diff --git a/src/r_compiler/ssa/ssa_struct_type.h b/src/r_compiler/ssa/ssa_struct_type.h new file mode 100644 index 0000000000..67b056b325 --- /dev/null +++ b/src/r_compiler/ssa/ssa_struct_type.h @@ -0,0 +1,17 @@ + +#pragma once + +#include + +namespace llvm { class Type; } + +class SSAStructType +{ +public: + void add_parameter(llvm::Type *type); + llvm::Type *llvm_type(); + llvm::Type *llvm_type_packed(); + +private: + std::vector elements; +}; diff --git a/src/r_compiler/ssa/ssa_ubyte.cpp b/src/r_compiler/ssa/ssa_ubyte.cpp new file mode 100644 index 0000000000..04db4fd28f --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte.cpp @@ -0,0 +1,95 @@ + +#include "ssa_ubyte.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAUByte::SSAUByte() +: v(0) +{ +} + +SSAUByte::SSAUByte(unsigned char constant) +: v(0) +{ + v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false)); +} + +SSAUByte::SSAUByte(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAUByte::llvm_type() +{ + return llvm::Type::getInt8Ty(SSAScope::context()); +} + +SSAUByte operator+(const SSAUByte &a, const SSAUByte &b) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAUByte operator-(const SSAUByte &a, const SSAUByte &b) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAUByte operator*(const SSAUByte &a, const SSAUByte &b) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} +/* +SSAUByte operator/(const SSAUByte &a, const SSAUByte &b) +{ + return SSAScope::builder().CreateDiv(a.v, b.v); +} +*/ +SSAUByte operator+(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) + b; +} + +SSAUByte operator-(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) - b; +} + +SSAUByte operator*(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) * b; +} +/* +SSAUByte operator/(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) / b; +} +*/ +SSAUByte operator+(const SSAUByte &a, unsigned char b) +{ + return a + SSAUByte(b); +} + +SSAUByte operator-(const SSAUByte &a, unsigned char b) +{ + return a - SSAUByte(b); +} + +SSAUByte operator*(const SSAUByte &a, unsigned char b) +{ + return a * SSAUByte(b); +} +/* +SSAUByte operator/(const SSAUByte &a, unsigned char b) +{ + return a / SSAUByte(b); +} +*/ +SSAUByte operator<<(const SSAUByte &a, unsigned char bits) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateShl(a.v, bits)); +} + +SSAUByte operator>>(const SSAUByte &a, unsigned char bits) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateLShr(a.v, bits)); +} diff --git a/src/r_compiler/ssa/ssa_ubyte.h b/src/r_compiler/ssa/ssa_ubyte.h new file mode 100644 index 0000000000..f1e12afba4 --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte.h @@ -0,0 +1,35 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAUByte +{ +public: + SSAUByte(); + SSAUByte(unsigned char constant); + explicit SSAUByte(llvm::Value *v); + static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAUByte operator+(const SSAUByte &a, const SSAUByte &b); +SSAUByte operator-(const SSAUByte &a, const SSAUByte &b); +SSAUByte operator*(const SSAUByte &a, const SSAUByte &b); +//SSAUByte operator/(const SSAUByte &a, const SSAUByte &b); + +SSAUByte operator+(unsigned char a, const SSAUByte &b); +SSAUByte operator-(unsigned char a, const SSAUByte &b); +SSAUByte operator*(unsigned char a, const SSAUByte &b); +//SSAUByte operator/(unsigned char a, const SSAUByte &b); + +SSAUByte operator+(const SSAUByte &a, unsigned char b); +SSAUByte operator-(const SSAUByte &a, unsigned char b); +SSAUByte operator*(const SSAUByte &a, unsigned char b); +//SSAUByte operator/(const SSAUByte &a, unsigned char b); + +SSAUByte operator<<(const SSAUByte &a, unsigned char bits); +SSAUByte operator>>(const SSAUByte &a, unsigned char bits); diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp new file mode 100644 index 0000000000..825806148b --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -0,0 +1,106 @@ + +#include "ssa_ubyte_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAUBytePtr::SSAUBytePtr() +: v(0) +{ +} + +SSAUBytePtr::SSAUBytePtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAUBytePtr::llvm_type() +{ + return llvm::Type::getInt8PtrTy(SSAScope::context()); +} + +SSAUBytePtr SSAUBytePtr::operator[](SSAInt index) const +{ + return SSAUBytePtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAUByte SSAUBytePtr::load() const +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4i SSAUBytePtr::load_vec4ub() const +{ + // _mm_cvtsi32_si128 as implemented by clang: + SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint())); + llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint()); + SSAVec4i v4i = SSAVec4i::from_llvm(v); + + SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), 0, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 + return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16 +/* + llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); + llvm::Type *m4xint32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); + llvm::Value *v4ub = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false, SSAScope::hint()); + return SSAVec4i::from_llvm(SSAScope::builder().CreateZExt(v4ub, m4xint32type)); +*/ +} + +SSAVec16ub SSAUBytePtr::load_vec16ub() const +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + return SSAVec16ub::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint())); +} + +SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub() const +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + return SSAVec16ub::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAUBytePtr::store(const SSAUByte &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) +{ + // Store using saturate: + SSAVec8s v8s(new_value, new_value); + SSAVec16ub v16ub(v8s, v8s); + + llvm::Type *m16xint8type = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16); + llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), mask, SSAScope::hint()); + SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false); +} + +void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16); + + // The following generates _mm_stream_si128, maybe! + // llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1)); + // inst->setMetadata(SSAScope::module()->getMDKindID("nontemporal"), node); +} + +void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value) +{ + /*llvm::Value *values[2] = + { + SSAScope::builder().CreateBitCast(v, llvm::Type::getInt8PtrTy(SSAScope::context())), + new_value.v + }; + SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_storeu_dq), values);*/ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.h b/src/r_compiler/ssa/ssa_ubyte_ptr.h new file mode 100644 index 0000000000..5b68ee1add --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.h @@ -0,0 +1,32 @@ + +#pragma once + +#include "ssa_ubyte.h" +#include "ssa_int.h" +#include "ssa_vec4i.h" +#include "ssa_vec8s.h" +#include "ssa_vec16ub.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAUBytePtr +{ +public: + SSAUBytePtr(); + explicit SSAUBytePtr(llvm::Value *v); + static SSAUBytePtr from_llvm(llvm::Value *v) { return SSAUBytePtr(v); } + static llvm::Type *llvm_type(); + SSAUBytePtr operator[](SSAInt index) const; + SSAUByte load() const; + SSAVec4i load_vec4ub() const; + SSAVec8s load_vec8s() const; + SSAVec16ub load_vec16ub() const; + SSAVec16ub load_unaligned_vec16ub() const; + void store(const SSAUByte &new_value); + void store_vec4ub(const SSAVec4i &new_value); + void store_vec16ub(const SSAVec16ub &new_value); + void store_unaligned_vec16ub(const SSAVec16ub &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_value.cpp b/src/r_compiler/ssa/ssa_value.cpp new file mode 100644 index 0000000000..877420fc5d --- /dev/null +++ b/src/r_compiler/ssa/ssa_value.cpp @@ -0,0 +1,56 @@ + +#include "ssa_value.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAValue SSAValue::load() +{ + return SSAValue::from_llvm(SSAScope::builder().CreateLoad(v, false)); +} + +void SSAValue::store(llvm::Value *value) +{ + SSAScope::builder().CreateStore(value, v, false); +} + +SSAIndexLookup SSAValue::operator[](int index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes.push_back(SSAInt(index).v); + return result; +} + +SSAIndexLookup SSAValue::operator[](SSAInt index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes.push_back(index.v); + return result; +} + +///////////////////////////////////////////////////////////////////////////// + +SSAIndexLookup::operator SSAValue() +{ + return SSAValue::from_llvm(SSAScope::builder().CreateGEP(v, indexes)); +} + +SSAIndexLookup SSAIndexLookup::operator[](int index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes = indexes; + result.indexes.push_back(SSAInt(index).v); + return result; +} + +SSAIndexLookup SSAIndexLookup::operator[](SSAInt index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes = indexes; + result.indexes.push_back(index.v); + return result; +} diff --git a/src/r_compiler/ssa/ssa_value.h b/src/r_compiler/ssa/ssa_value.h new file mode 100644 index 0000000000..ec156a4529 --- /dev/null +++ b/src/r_compiler/ssa/ssa_value.h @@ -0,0 +1,53 @@ + +#pragma once + +#include + +namespace llvm { class Value; } + +class SSAInt; +class SSAIndexLookup; + +class SSAValue +{ +public: + SSAValue() : v(0) { } + + static SSAValue from_llvm(llvm::Value *v) { SSAValue val; val.v = v; return val; } + + SSAValue load(); + void store(llvm::Value *v); + + template + operator Type() + { + return Type::from_llvm(v); + } + + SSAIndexLookup operator[](int index); + SSAIndexLookup operator[](SSAInt index); + + llvm::Value *v; +}; + +class SSAIndexLookup +{ +public: + SSAIndexLookup() : v(0) { } + + llvm::Value *v; + std::vector indexes; + + SSAValue load() { SSAValue value = *this; return value.load(); } + void store(llvm::Value *v) { SSAValue value = *this; return value.store(v); } + + template + operator Type() + { + return Type::from_llvm(v); + } + + operator SSAValue(); + SSAIndexLookup operator[](int index); + SSAIndexLookup operator[](SSAInt index); +}; diff --git a/src/r_compiler/ssa/ssa_vec16ub.cpp b/src/r_compiler/ssa/ssa_vec16ub.cpp new file mode 100644 index 0000000000..f18d687188 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec16ub.cpp @@ -0,0 +1,155 @@ + +#include "ssa_vec16ub.h" +#include "ssa_vec8s.h" +#include "ssa_vec4i.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec16ub::SSAVec16ub() +: v(0) +{ +} + +SSAVec16ub::SSAVec16ub(unsigned char constant) +: v(0) +{ + std::vector constants; + constants.resize(16, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec16ub::SSAVec16ub( + unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7, + unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant0, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant1, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant2, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant3, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant4, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant5, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant6, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant7, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant8, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant9, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant10, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant11, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant12, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant13, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant14, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant15, false))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec16ub::SSAVec16ub(llvm::Value *v) +: v(v) +{ +} + +SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1) +: v(0) +{ + llvm::Value *values[2] = { s0.v, s1.v }; + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint()); +} + +llvm::Type *SSAVec16ub::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16); +} + +SSAVec16ub SSAVec16ub::bitcast(SSAVec4i i32) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint())); +} + +SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15) +{ + return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7, index8, index9, index10, index11, index12, index13, index14, index15); +} + +SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index8))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index9))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index10))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index11))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index12))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index13))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index14))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index15))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec16ub::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); +} + +SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} +/* +SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAScope::builder().CreateDiv(a.v, b.v, SSAScope::hint()); +} +*/ +SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) + b; +} + +SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) - b; +} + +SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) * b; +} +/* +SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) / b; +} +*/ +SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b) +{ + return a + SSAVec16ub(b); +} + +SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b) +{ + return a - SSAVec16ub(b); +} + +SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b) +{ + return a * SSAVec16ub(b); +} +/* +SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b) +{ + return a / SSAVec16ub(b); +} +*/ \ No newline at end of file diff --git a/src/r_compiler/ssa/ssa_vec16ub.h b/src/r_compiler/ssa/ssa_vec16ub.h new file mode 100644 index 0000000000..e4cfcdc87b --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec16ub.h @@ -0,0 +1,42 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec8s; +class SSAVec4i; + +class SSAVec16ub +{ +public: + SSAVec16ub(); + SSAVec16ub(unsigned char constant); + SSAVec16ub( + unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7, + unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15); + explicit SSAVec16ub(llvm::Value *v); + SSAVec16ub(SSAVec8s s0, SSAVec8s s1); + static SSAVec16ub from_llvm(llvm::Value *v) { return SSAVec16ub(v); } + static llvm::Type *llvm_type(); + static SSAVec16ub bitcast(SSAVec4i i32); + static SSAVec16ub shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15); + static SSAVec16ub shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15); + + llvm::Value *v; +}; + +SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b); +SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b); +SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b); +SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b); + +SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b); +SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b); +SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b); +SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b); + +SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b); +SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b); +SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b); +SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b); diff --git a/src/r_compiler/ssa/ssa_vec4f.cpp b/src/r_compiler/ssa/ssa_vec4f.cpp new file mode 100644 index 0000000000..e002018fe8 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f.cpp @@ -0,0 +1,244 @@ + +#include "ssa_vec4f.h" +#include "ssa_vec4i.h" +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec4f::SSAVec4f() +: v(0) +{ +} + +SSAVec4f::SSAVec4f(float constant) +: v(0) +{ + std::vector constants; + constants.resize(4, llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4f::SSAVec4f(float constant0, float constant1, float constant2, float constant3) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant0))); + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant1))); + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant2))); + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant3))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4f::SSAVec4f(SSAFloat f) +: v(0) +{ + llvm::Type *m1xfloattype = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 1); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(f.v, m1xfloattype, SSAScope::hint()), llvm::UndefValue::get(m1xfloattype), mask, SSAScope::hint()); +} + +SSAVec4f::SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3) +: v(0) +{ + v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(llvm_type()), f0.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + v = SSAScope::builder().CreateInsertElement(v, f1.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)1))); + v = SSAScope::builder().CreateInsertElement(v, f2.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)2))); + v = SSAScope::builder().CreateInsertElement(v, f3.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)3))); +} + +SSAVec4f::SSAVec4f(llvm::Value *v) +: v(v) +{ +} + +SSAVec4f::SSAVec4f(SSAVec4i i32) +: v(0) +{ + //llvm::VectorType *m128type = llvm::VectorType::get(llvm::Type::getFloatTy(*context), 4); + //return builder->CreateSIToFP(i32.v, m128type); + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvtdq2ps), i32.v, SSAScope::hint()); +} + +llvm::Type *SSAVec4f::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4); +} + +SSAFloat SSAVec4f::operator[](SSAInt index) const +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::insert_element(SSAVec4f vec4f, SSAFloat value, int index) +{ + return from_llvm(SSAScope::builder().CreateInsertElement(vec4f.v, value.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)index)))); +} + +SSAVec4f SSAVec4f::bitcast(SSAVec4i i32) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint())); +} + +SSAVec4f SSAVec4f::sqrt(SSAVec4f f) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint())); + //return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_sqrt_ps), f.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::rcp(SSAVec4f f) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rcp_ps), f.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::sin(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sin, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::cos(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::cos, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::pow(SSAVec4f val, SSAVec4f power) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + //params.push_back(SSAVec4f::llvm_type()); + std::vector args; + args.push_back(val.v); + args.push_back(power.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::pow, params), args, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::exp(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::exp, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::log(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::log, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::fma(SSAVec4f a, SSAVec4f b, SSAVec4f c) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + //params.push_back(SSAVec4f::llvm_type()); + //params.push_back(SSAVec4f::llvm_type()); + std::vector args; + args.push_back(a.v); + args.push_back(b.v); + args.push_back(c.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint())); +} + +void SSAVec4f::transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3) +{ + SSAVec4f tmp0 = shuffle(row0, row1, 0x44);//_MM_SHUFFLE(1,0,1,0)); + SSAVec4f tmp2 = shuffle(row0, row1, 0xEE);//_MM_SHUFFLE(3,2,3,2)); + SSAVec4f tmp1 = shuffle(row2, row3, 0x44);//_MM_SHUFFLE(1,0,1,0)); + SSAVec4f tmp3 = shuffle(row2, row3, 0xEE);//_MM_SHUFFLE(3,2,3,2)); + row0 = shuffle(tmp0, tmp1, 0x88);//_MM_SHUFFLE(2,0,2,0)); + row1 = shuffle(tmp0, tmp1, 0xDD);//_MM_SHUFFLE(3,1,3,1)); + row2 = shuffle(tmp2, tmp3, 0x88);//_MM_SHUFFLE(2,0,2,0)); + row3 = shuffle(tmp2, tmp3, 0xDD);//_MM_SHUFFLE(3,1,3,1)); +} + +SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3) +{ + return shuffle(f0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3); +} + +SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec4f::from_llvm(SSAScope::builder().CreateShuffleVector(f0.v, f1.v, mask, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask) +{ + return shuffle(f0, f1, mask & 3, (mask >> 2) & 3, ((mask >> 4) & 3) + 4, ((mask >> 6) & 3) + 4); +} + +SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator+(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) + b; +} + +SSAVec4f operator-(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) - b; +} + +SSAVec4f operator*(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) * b; +} + +SSAVec4f operator/(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) / b; +} + +SSAVec4f operator+(const SSAVec4f &a, float b) +{ + return a + SSAVec4f(b); +} + +SSAVec4f operator-(const SSAVec4f &a, float b) +{ + return a - SSAVec4f(b); +} + +SSAVec4f operator*(const SSAVec4f &a, float b) +{ + return a * SSAVec4f(b); +} + +SSAVec4f operator/(const SSAVec4f &a, float b) +{ + return a / SSAVec4f(b); +} diff --git a/src/r_compiler/ssa/ssa_vec4f.h b/src/r_compiler/ssa/ssa_vec4f.h new file mode 100644 index 0000000000..5e3397e580 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f.h @@ -0,0 +1,57 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4i; +class SSAFloat; +class SSAInt; + +class SSAVec4f +{ +public: + SSAVec4f(); + SSAVec4f(float constant); + SSAVec4f(float constant0, float constant1, float constant2, float constant3); + SSAVec4f(SSAFloat f); + SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3); + explicit SSAVec4f(llvm::Value *v); + SSAVec4f(SSAVec4i i32); + SSAFloat operator[](SSAInt index) const; + static SSAVec4f insert_element(SSAVec4f vec4f, SSAFloat value, int index); + static SSAVec4f bitcast(SSAVec4i i32); + static SSAVec4f sqrt(SSAVec4f f); + static SSAVec4f rcp(SSAVec4f f); + static SSAVec4f sin(SSAVec4f val); + static SSAVec4f cos(SSAVec4f val); + static SSAVec4f pow(SSAVec4f val, SSAVec4f power); + static SSAVec4f exp(SSAVec4f val); + static SSAVec4f log(SSAVec4f val); + static SSAVec4f fma(SSAVec4f a, SSAVec4f b, SSAVec4f c); + static void transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3); + static SSAVec4f shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3); + static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3); + static SSAVec4f from_llvm(llvm::Value *v) { return SSAVec4f(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; + +private: + static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask); +}; + +SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b); +SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b); +SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b); +SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b); + +SSAVec4f operator+(float a, const SSAVec4f &b); +SSAVec4f operator-(float a, const SSAVec4f &b); +SSAVec4f operator*(float a, const SSAVec4f &b); +SSAVec4f operator/(float a, const SSAVec4f &b); + +SSAVec4f operator+(const SSAVec4f &a, float b); +SSAVec4f operator-(const SSAVec4f &a, float b); +SSAVec4f operator*(const SSAVec4f &a, float b); +SSAVec4f operator/(const SSAVec4f &a, float b); diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp new file mode 100644 index 0000000000..e2df64167a --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp @@ -0,0 +1,50 @@ + +#include "ssa_vec4f_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec4fPtr::SSAVec4fPtr() +: v(0) +{ +} + +SSAVec4fPtr::SSAVec4fPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAVec4fPtr::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); +} + +SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const +{ + return SSAVec4fPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4fPtr::load() const +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4f SSAVec4fPtr::load_unaligned() const +{ + return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAVec4fPtr::store(const SSAVec4f &new_value) +{ + SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false); +} + +void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value) +{ + /*llvm::Value *values[2] = + { + SSAScope::builder().CreateBitCast(v, llvm::Type::getFloatPtrTy(SSAScope::context())), + new_value.v + }; + SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_storeu_ps), values);*/ + SSAScope::builder().CreateStore(new_value.v, v, false); +} diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.h b/src/r_compiler/ssa/ssa_vec4f_ptr.h new file mode 100644 index 0000000000..ab4e841900 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.h @@ -0,0 +1,24 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_vec4f.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4fPtr +{ +public: + SSAVec4fPtr(); + explicit SSAVec4fPtr(llvm::Value *v); + static SSAVec4fPtr from_llvm(llvm::Value *v) { return SSAVec4fPtr(v); } + static llvm::Type *llvm_type(); + SSAVec4fPtr operator[](SSAInt index) const; + SSAVec4f load() const; + SSAVec4f load_unaligned() const; + void store(const SSAVec4f &new_value); + void store_unaligned(const SSAVec4f &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp new file mode 100644 index 0000000000..80e07c8d48 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -0,0 +1,213 @@ + +#include "ssa_vec4i.h" +#include "ssa_vec4f.h" +#include "ssa_vec8s.h" +#include "ssa_vec16ub.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec4i::SSAVec4i() +: v(0) +{ +} + +SSAVec4i::SSAVec4i(int constant) +: v(0) +{ + std::vector constants; + constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4i::SSAVec4i(int constant0, int constant1, int constant2, int constant3) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant0, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant1, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant2, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant3, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4i::SSAVec4i(llvm::Value *v) +: v(v) +{ +} + +SSAVec4i::SSAVec4i(SSAInt i) +: v(0) +{ + llvm::Type *m1xi32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 1); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint()); +} + +SSAVec4i::SSAVec4i(SSAVec4f f32) +: v(0) +{ + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint()); +} + +SSAInt SSAVec4i::operator[](SSAInt index) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); +} + +llvm::Type *SSAVec4i::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); +} + +SSAVec4i SSAVec4i::bitcast(SSAVec4f f32) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(f32.v, llvm_type(), SSAScope::hint())); +} + +SSAVec4i SSAVec4i::bitcast(SSAVec8s i16) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i16.v, llvm_type(), SSAScope::hint())); +} + +SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, int index0, int index1, int index2, int index3) +{ + return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3); +} + +SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, const SSAVec4i &i1, int index0, int index1, int index2, int index3) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec4i::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); +} + +void SSAVec4i::extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3) +{ + SSAVec8s low = SSAVec8s::extendlo(a); + SSAVec8s high = SSAVec8s::extendhi(a); + out0 = extendlo(low); + out1 = extendhi(low); + out2 = extendlo(high); + out3 = extendhi(high); +} + +SSAVec4i SSAVec4i::extendhi(SSAVec8s i16) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, 0, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 +} + +SSAVec4i SSAVec4i::extendlo(SSAVec8s i16) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, 0, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 +} + +SSAVec4i SSAVec4i::combinehi(SSAVec8s a, SSAVec8s b) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 +} + +SSAVec4i SSAVec4i::combinelo(SSAVec8s a, SSAVec8s b) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 +} + +SSAVec4i SSAVec4i::sqrt(SSAVec4i f) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_sqrt_pd), f.v, SSAScope::hint())); +} + +/* +SSAVec4i SSAVec4i::min_sse41(SSAVec4i a, SSAVec4i b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse41_pminsd), values, SSAScope::hint())); +} + +SSAVec4i SSAVec4i::max_sse41(SSAVec4i a, SSAVec4i b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse41_pmaxsd), values, SSAScope::hint())); +} +*/ + +SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator+(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) + b; +} + +SSAVec4i operator-(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) - b; +} + +SSAVec4i operator*(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) * b; +} + +SSAVec4i operator/(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) / b; +} + +SSAVec4i operator+(const SSAVec4i &a, int b) +{ + return a + SSAVec4i(b); +} + +SSAVec4i operator-(const SSAVec4i &a, int b) +{ + return a - SSAVec4i(b); +} + +SSAVec4i operator*(const SSAVec4i &a, int b) +{ + return a * SSAVec4i(b); +} + +SSAVec4i operator/(const SSAVec4i &a, int b) +{ + return a / SSAVec4i(b); +} + +SSAVec4i operator<<(const SSAVec4i &a, int bits) +{ + //return SSAScope::builder().CreateShl(a.v, bits); + llvm::Value *values[2] = { a.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)bits)) }; + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pslli_d), values, SSAScope::hint())); +} + +SSAVec4i operator>>(const SSAVec4i &a, int bits) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h new file mode 100644 index 0000000000..d19f1d1aab --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -0,0 +1,56 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4f; +class SSAVec8s; +class SSAVec16ub; +class SSAInt; + +class SSAVec4i +{ +public: + SSAVec4i(); + SSAVec4i(int constant); + SSAVec4i(int constant0, int constant1, int constant2, int constant3); + SSAVec4i(SSAInt i); + explicit SSAVec4i(llvm::Value *v); + SSAVec4i(SSAVec4f f32); + SSAInt operator[](SSAInt index); + static SSAVec4i bitcast(SSAVec4f f32); + static SSAVec4i bitcast(SSAVec8s i16); + static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3); + static SSAVec4i shuffle(const SSAVec4i &f0, const SSAVec4i &f1, int index0, int index1, int index2, int index3); + static SSAVec4i extendhi(SSAVec8s i16); + static SSAVec4i extendlo(SSAVec8s i16); + static void extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3); + static SSAVec4i combinehi(SSAVec8s v0, SSAVec8s v1); + static SSAVec4i combinelo(SSAVec8s v0, SSAVec8s v1); + static SSAVec4i sqrt(SSAVec4i f); + //static SSAVec4i min_sse41(SSAVec4i a, SSAVec4i b); + //static SSAVec4i max_sse41(SSAVec4i a, SSAVec4i b); + static SSAVec4i from_llvm(llvm::Value *v) { return SSAVec4i(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b); +SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b); +SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b); +SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b); + +SSAVec4i operator+(int a, const SSAVec4i &b); +SSAVec4i operator-(int a, const SSAVec4i &b); +SSAVec4i operator*(int a, const SSAVec4i &b); +SSAVec4i operator/(int a, const SSAVec4i &b); + +SSAVec4i operator+(const SSAVec4i &a, int b); +SSAVec4i operator-(const SSAVec4i &a, int b); +SSAVec4i operator*(const SSAVec4i &a, int b); +SSAVec4i operator/(const SSAVec4i &a, int b); + +SSAVec4i operator<<(const SSAVec4i &a, int bits); +SSAVec4i operator>>(const SSAVec4i &a, int bits); diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp new file mode 100644 index 0000000000..a28befb707 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp @@ -0,0 +1,50 @@ + +#include "ssa_vec4i_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec4iPtr::SSAVec4iPtr() +: v(0) +{ +} + +SSAVec4iPtr::SSAVec4iPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAVec4iPtr::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); +} + +SSAVec4iPtr SSAVec4iPtr::operator[](SSAInt index) const +{ + return SSAVec4iPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAVec4i SSAVec4iPtr::load() const +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4i SSAVec4iPtr::load_unaligned() const +{ + return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4))); +} + +void SSAVec4iPtr::store(const SSAVec4i &new_value) +{ + SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false); +} + +void SSAVec4iPtr::store_unaligned(const SSAVec4i &new_value) +{ + /*llvm::Value *values[2] = + { + v, + new_value.v + }; + SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_storeu_pd), values);*/ + SSAScope::builder().CreateStore(new_value.v, v, false); +} diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.h b/src/r_compiler/ssa/ssa_vec4i_ptr.h new file mode 100644 index 0000000000..56937b1cce --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.h @@ -0,0 +1,24 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_vec4i.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4iPtr +{ +public: + SSAVec4iPtr(); + explicit SSAVec4iPtr(llvm::Value *v); + static SSAVec4iPtr from_llvm(llvm::Value *v) { return SSAVec4iPtr(v); } + static llvm::Type *llvm_type(); + SSAVec4iPtr operator[](SSAInt index) const; + SSAVec4i load() const; + SSAVec4i load_unaligned() const; + void store(const SSAVec4i &new_value); + void store_unaligned(const SSAVec4i &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_vec8s.cpp b/src/r_compiler/ssa/ssa_vec8s.cpp new file mode 100644 index 0000000000..d61a4c4a9c --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec8s.cpp @@ -0,0 +1,178 @@ + +#include "ssa_vec8s.h" +#include "ssa_vec4i.h" +#include "ssa_vec16ub.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec8s::SSAVec8s() +: v(0) +{ +} + +SSAVec8s::SSAVec8s(short constant) +: v(0) +{ + std::vector constants; + constants.resize(8, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec8s::SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant0, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant1, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant2, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant3, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant4, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant5, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant6, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant7, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec8s::SSAVec8s(llvm::Value *v) +: v(v) +{ +} + +SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1) +: v(0) +{ + llvm::Value *values[2] = { i0.v, i1.v }; + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint()); +} + +llvm::Type *SSAVec8s::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 8); +} + +SSAVec8s SSAVec8s::bitcast(SSAVec16ub i8) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateBitCast(i8.v, llvm_type(), SSAScope::hint())); +} + +SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7) +{ + return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7); +} + +SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec8s::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); +} + +SSAVec8s SSAVec8s::extendhi(SSAVec16ub a) +{ + return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, 0, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15)); // _mm_unpackhi_epi8 +} + +SSAVec8s SSAVec8s::extendlo(SSAVec16ub a) +{ + return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, 0, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 +} + +/* +SSAVec8s SSAVec8s::min_sse2(SSAVec8s a, SSAVec8s b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmins_w), values, SSAScope::hint())); +} + +SSAVec8s SSAVec8s::max_sse2(SSAVec8s a, SSAVec8s b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmaxs_w), values, SSAScope::hint())); +} +*/ + +SSAVec8s SSAVec8s::mulhi(SSAVec8s a, SSAVec8s b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmulh_w), values, SSAScope::hint())); +} + +SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator+(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) + b; +} + +SSAVec8s operator-(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) - b; +} + +SSAVec8s operator*(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) * b; +} + +SSAVec8s operator/(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) / b; +} + +SSAVec8s operator+(const SSAVec8s &a, short b) +{ + return a + SSAVec8s(b); +} + +SSAVec8s operator-(const SSAVec8s &a, short b) +{ + return a - SSAVec8s(b); +} + +SSAVec8s operator*(const SSAVec8s &a, short b) +{ + return a * SSAVec8s(b); +} + +SSAVec8s operator/(const SSAVec8s &a, short b) +{ + return a / SSAVec8s(b); +} + +SSAVec8s operator<<(const SSAVec8s &a, int bits) +{ + //return SSAScope::builder().CreateShl(a.v, bits); + llvm::Value *values[2] = { a.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)bits)) }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pslli_d), values, SSAScope::hint())); +} + +SSAVec8s operator>>(const SSAVec8s &a, int bits) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_vec8s.h b/src/r_compiler/ssa/ssa_vec8s.h new file mode 100644 index 0000000000..aded358dde --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec8s.h @@ -0,0 +1,48 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4i; +class SSAVec16ub; + +class SSAVec8s +{ +public: + SSAVec8s(); + SSAVec8s(short constant); + SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7); + explicit SSAVec8s(llvm::Value *v); + SSAVec8s(SSAVec4i i0, SSAVec4i i1); + static SSAVec8s bitcast(SSAVec16ub i8); + static SSAVec8s shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7); + static SSAVec8s shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7); + static SSAVec8s extendhi(SSAVec16ub a); + static SSAVec8s extendlo(SSAVec16ub a); + //static SSAVec8s min_sse2(SSAVec8s a, SSAVec8s b); + //static SSAVec8s max_sse2(SSAVec8s a, SSAVec8s b); + static SSAVec8s mulhi(SSAVec8s a, SSAVec8s b); + static SSAVec8s from_llvm(llvm::Value *v) { return SSAVec8s(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b); +SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b); +SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b); +SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b); + +SSAVec8s operator+(short a, const SSAVec8s &b); +SSAVec8s operator-(short a, const SSAVec8s &b); +SSAVec8s operator*(short a, const SSAVec8s &b); +SSAVec8s operator/(short a, const SSAVec8s &b); + +SSAVec8s operator+(const SSAVec8s &a, short b); +SSAVec8s operator-(const SSAVec8s &a, short b); +SSAVec8s operator*(const SSAVec8s &a, short b); +SSAVec8s operator/(const SSAVec8s &a, short b); + +SSAVec8s operator<<(const SSAVec8s &a, int bits); +SSAVec8s operator>>(const SSAVec8s &a, int bits); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 0d86ead478..d54bad7aef 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -38,6 +38,7 @@ #include "r_data/colormaps.h" #include "r_plane.h" #include "r_draw_rgba.h" +#include "r_compiler/fixedfunction/fixedfunction.h" #include "gi.h" #include "stats.h" @@ -299,6 +300,68 @@ void DrawerCommandQueue::StopThreads() ///////////////////////////////////////////////////////////////////////////// +class DrawSpanFFCommand : public DrawerCommand +{ + fixed_t _xfrac; + fixed_t _yfrac; + fixed_t _xstep; + fixed_t _ystep; + int _x1; + int _x2; + int _y; + int _xbits; + int _ybits; + BYTE * RESTRICT _destorg; + + const uint32_t * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + bool _nearest_filter; + + uint32_t _srcalpha; + uint32_t _destalpha; + + FixedFunction *_ff; + +public: + DrawSpanFFCommand() + { + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _xstep = ds_xstep; + _ystep = ds_ystep; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xbits = ds_xbits; + _ybits = ds_ybits; + _destorg = dc_destorg; + + _source = (const uint32_t*)ds_source; + _light = LightBgra::calc_light_multiplier(ds_light); + _shade_constants = ds_shade_constants; + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + + static FixedFunction ff; + _ff = &ff; + } + + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(_y)) + return; + + uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + int count = _x2 - _x1 + 1; + _ff->DrawSpan(count, dest); + } +}; + +///////////////////////////////////////////////////////////////////////////// + class DrawerColumnCommand : public DrawerCommand { public: @@ -2700,11 +2763,14 @@ void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawSpan_rgba() { + DrawerCommandQueue::QueueCommand(); +/* #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else DrawerCommandQueue::QueueCommand(); #endif +*/ } void R_DrawSpanMasked_rgba() From 4f2ae42ed59307a5e823cb97df110f0920be6b34 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 26 Sep 2016 09:04:29 +0200 Subject: [PATCH 124/912] Revert duplicate entry in CMakeLists.txt --- src/CMakeLists.txt | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4f9599b35b..09238ff57d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -104,15 +104,6 @@ if( WIN32 ) endif() add_definitions( -D_WIN32 ) - - set( FMOD_SEARCH_PATHS - "C:/Program Files/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" - "C:/Program Files (x86)/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" - # This next one is for Randy. - "E:/Software/Dev/FMOD/${WIN_TYPE}/api" - ) - set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc ) - set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib ) set( FMOD_SEARCH_PATHS "C:/Program Files/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" From d5c7a7ab76bb34e248a4dbb6ad3ca3800176da94 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Sep 2016 03:07:03 +0200 Subject: [PATCH 125/912] Make LLVM compile and optimize for the current CPU --- .../fixedfunction/fixedfunction.cpp | 86 ++++++++++++++----- src/r_compiler/fixedfunction/fixedfunction.h | 4 + src/r_compiler/llvm_include.h | 11 ++- 3 files changed, 75 insertions(+), 26 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index 347ba6de35..cc46b8d50a 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -13,36 +13,75 @@ RenderProgram::RenderProgram() { - llvm::install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { - I_FatalError(reason.c_str()); + using namespace llvm; + + install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { + I_FatalError("LLVM fatal error: %s", reason.c_str()); }); - //llvm::llvm_start_multithreaded(); - llvm::InitializeNativeTarget(); - llvm::InitializeNativeTargetAsmPrinter(); - llvm::InitializeNativeTargetAsmParser(); - - mContext = std::make_unique(); - - auto moduleOwner = std::make_unique("render", context()); - mModule = moduleOwner.get(); + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); std::string errorstring; - llvm::EngineBuilder engineBuilder(std::move(moduleOwner)); + + std::string targetTriple = sys::getProcessTriple(); + std::string cpuName = sys::getHostCPUName(); + StringMap cpuFeatures; + sys::getHostCPUFeatures(cpuFeatures); + std::string cpuFeaturesStr; + for (const auto &it : cpuFeatures) + { + if (!cpuFeaturesStr.empty()) + cpuFeaturesStr.push_back(' '); + cpuFeaturesStr.push_back(it.getValue() ? '+' : '-'); + cpuFeaturesStr += it.getKey(); + } + + Printf("LLVM target triple: %s\n", targetTriple.c_str()); + Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); + + const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); + if (!target) + I_FatalError("Could not find LLVM target: %s", errorstring.c_str()); + + TargetOptions opt; + auto relocModel = Optional(Reloc::Static); + TargetMachine *machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::Default, CodeGenOpt::Aggressive); + if (!machine) + I_FatalError("Could not create LLVM target machine"); + + mContext = std::make_unique(); + + auto moduleOwner = std::make_unique("render", context()); + mModule = moduleOwner.get(); + mModule->setTargetTriple(targetTriple); + mModule->setDataLayout(machine->createDataLayout()); + + EngineBuilder engineBuilder(std::move(moduleOwner)); engineBuilder.setErrorStr(&errorstring); - engineBuilder.setOptLevel(llvm::CodeGenOpt::Aggressive); - engineBuilder.setRelocationModel(llvm::Reloc::Static); - engineBuilder.setEngineKind(llvm::EngineKind::JIT); - mEngine.reset(engineBuilder.create()); + engineBuilder.setOptLevel(CodeGenOpt::Aggressive); + engineBuilder.setRelocationModel(Reloc::Static); + engineBuilder.setEngineKind(EngineKind::JIT); + mEngine.reset(engineBuilder.create(machine)); if (!mEngine) - I_FatalError(errorstring.c_str()); + I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str()); + + mModulePassManager = std::make_unique(); + mFunctionPassManager = std::make_unique(mModule); + + PassManagerBuilder passManagerBuilder; + passManagerBuilder.OptLevel = 3; + passManagerBuilder.SizeLevel = 0; + passManagerBuilder.Inliner = createFunctionInliningPass(); + passManagerBuilder.populateModulePassManager(*mModulePassManager.get()); + passManagerBuilder.populateFunctionPassManager(*mFunctionPassManager.get()); } RenderProgram::~RenderProgram() { mEngine.reset(); mContext.reset(); - //llvm::llvm_stop_multithreaded(); } void *RenderProgram::PointerToFunction(const char *name) @@ -57,6 +96,7 @@ FixedFunction::FixedFunction() { CodegenDrawSpan(); mProgram.engine()->finalizeObject(); + mProgram.modulePassManager()->run(*mProgram.module()); DrawSpan = mProgram.GetProcAddress("DrawSpan"); } @@ -81,12 +121,12 @@ void FixedFunction::CodegenDrawSpan() SSAInt index = stack_index.load(); loop.loop_block(index < count); - //SSAVec4i color(255, 255, 0, 255); - //data[index * 4].store_vec4ub(color); - data[index * 4].store(0); + SSAVec4i color(0, 128, 255, 255); + data[index * 4].store_vec4ub(color); + /*data[index * 4].store(0); data[index * 4 + 1].store(128); data[index * 4 + 2].store(255); - data[index * 4 + 3].store(255); + data[index * 4 + 3].store(255);*/ stack_index.store(index + 1); } loop.end_block(); @@ -95,6 +135,8 @@ void FixedFunction::CodegenDrawSpan() if (llvm::verifyFunction(*function.func)) I_FatalError("verifyFunction failed for " __FUNCTION__); + + mProgram.functionPassManager()->run(*function.func); } #if 0 diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index 4c81fc1081..7ee68032e1 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -26,6 +26,8 @@ public: llvm::LLVMContext &context() { return *mContext; } llvm::Module *module() { return mModule; } llvm::ExecutionEngine *engine() { return mEngine.get(); } + llvm::legacy::PassManager *modulePassManager() { return mModulePassManager.get(); } + llvm::legacy::FunctionPassManager *functionPassManager() { return mFunctionPassManager.get(); } private: void *PointerToFunction(const char *name); @@ -33,6 +35,8 @@ private: std::unique_ptr mContext; llvm::Module *mModule; std::unique_ptr mEngine; + std::unique_ptr mModulePassManager; + std::unique_ptr mFunctionPassManager; }; class FixedFunction diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h index 1eed549e10..b916bad0e9 100644 --- a/src/r_compiler/llvm_include.h +++ b/src/r_compiler/llvm_include.h @@ -20,19 +20,22 @@ #pragma warning(disable: 4291) // warning C4291: 'void *llvm::User::operator new(std::size_t,unsigned int,unsigned int)': no matching operator delete found; memory will not be freed if initialization throws an exception #include -#include -#include #include #include #include #include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include +#include #include #include From 1b7827342ccdedbd11339df2d0b1df70a2882bb4 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Tue, 27 Sep 2016 04:14:00 -0400 Subject: [PATCH 126/912] - Fixed: Changed r_clearbuffer to do a little more than clear the top quarter of the screen. Still need to do an actual color fill - will do it later. --- src/r_swrenderer.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 5be8475027..368b1c3fd8 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -289,7 +289,12 @@ void FSoftwareRenderer::ErrorCleanup () void FSoftwareRenderer::ClearBuffer(int color) { - memset(RenderTarget->GetBuffer(), color, RenderTarget->GetPitch() * RenderTarget->GetHeight()); + // [SP] For now, for truecolor, this just outputs black. We'll figure out how to get something more meaningful + // later when this actually matters more. This is just to clear HOMs for now. + if (!r_swtruecolor) + memset(RenderTarget->GetBuffer(), color, RenderTarget->GetPitch() * RenderTarget->GetHeight()); + else + memset(RenderTarget->GetBuffer(), 0, RenderTarget->GetPitch() * RenderTarget->GetHeight() * 4); } //=========================================================================== From 20f67ad40a96b90e17fe44e98a026ac6cc9dac7c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Sep 2016 22:53:20 +0200 Subject: [PATCH 127/912] Add SSAShort, shift, and, or, and fix unaligned store --- src/CMakeLists.txt | 1 + src/r_compiler/ssa/ssa_float_ptr.cpp | 11 +- src/r_compiler/ssa/ssa_int.cpp | 30 ++++++ src/r_compiler/ssa/ssa_int.h | 7 ++ src/r_compiler/ssa/ssa_int_ptr.cpp | 4 +- src/r_compiler/ssa/ssa_short.cpp | 148 +++++++++++++++++++++++++++ src/r_compiler/ssa/ssa_short.h | 49 +++++++++ src/r_compiler/ssa/ssa_ubyte_ptr.cpp | 10 +- src/r_compiler/ssa/ssa_vec4f_ptr.cpp | 10 +- src/r_compiler/ssa/ssa_vec4i_ptr.cpp | 10 +- 10 files changed, 245 insertions(+), 35 deletions(-) create mode 100644 src/r_compiler/ssa/ssa_short.cpp create mode 100644 src/r_compiler/ssa/ssa_short.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 09238ff57d..4b81a24f44 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1434,6 +1434,7 @@ set (PCH_SOURCES r_compiler/ssa/ssa_if_block.cpp r_compiler/ssa/ssa_int.cpp r_compiler/ssa/ssa_int_ptr.cpp + r_compiler/ssa/ssa_short.cpp r_compiler/ssa/ssa_scope.cpp r_compiler/ssa/ssa_struct_type.cpp r_compiler/ssa/ssa_ubyte.cpp diff --git a/src/r_compiler/ssa/ssa_float_ptr.cpp b/src/r_compiler/ssa/ssa_float_ptr.cpp index 4413c6e923..6a1409271b 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.cpp +++ b/src/r_compiler/ssa/ssa_float_ptr.cpp @@ -38,7 +38,6 @@ SSAVec4f SSAFloatPtr::load_unaligned_vec4f() const { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); - // return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(get_intrinsic(llvm::Intrinsic::x86_sse2_loadu_dq), SSAScope::builder().CreateBitCast(v, llvm::PointerType::getUnqual(llvm::IntegerType::get(SSAScope::context(), 8))))); } void SSAFloatPtr::store(const SSAFloat &new_value) @@ -49,17 +48,11 @@ void SSAFloatPtr::store(const SSAFloat &new_value) void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value) { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16); + SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); } void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value) { - /*llvm::Value *values[2] = - { - SSAScope::builder().CreateBitCast(v, llvm::Type::getFloatPtrTy(SSAScope::context())), - new_value.v - }; - SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_storeu_ps), values);*/ llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); + SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4); } diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 9f3c54f50c..674f44350f 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -115,3 +115,33 @@ SSAInt operator>>(const SSAInt &a, int bits) { return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); } + +SSAInt operator<<(const SSAInt &a, const SSAInt &bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits.v, SSAScope::hint())); +} + +SSAInt operator>>(const SSAInt &a, const SSAInt &bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits.v, SSAScope::hint())); +} + +SSAInt operator&(const SSAInt &a, int b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAnd(a.v, b, SSAScope::hint())); +} + +SSAInt operator&(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator|(const SSAInt &a, int b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateOr(a.v, b, SSAScope::hint())); +} + +SSAInt operator|(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index 0be37ee7eb..5e373c62e3 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -39,3 +39,10 @@ SSAInt operator%(const SSAInt &a, int b); SSAInt operator<<(const SSAInt &a, int bits); SSAInt operator>>(const SSAInt &a, int bits); +SSAInt operator<<(const SSAInt &a, const SSAInt &bits); +SSAInt operator>>(const SSAInt &a, const SSAInt &bits); + +SSAInt operator&(const SSAInt &a, int b); +SSAInt operator&(const SSAInt &a, const SSAInt &b); +SSAInt operator|(const SSAInt &a, int b); +SSAInt operator|(const SSAInt &a, const SSAInt &b); diff --git a/src/r_compiler/ssa/ssa_int_ptr.cpp b/src/r_compiler/ssa/ssa_int_ptr.cpp index dd0ca17f6f..3c26370736 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.cpp +++ b/src/r_compiler/ssa/ssa_int_ptr.cpp @@ -48,11 +48,11 @@ void SSAIntPtr::store(const SSAInt &new_value) void SSAIntPtr::store_vec4i(const SSAVec4i &new_value) { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16); + SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); } void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value) { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); + SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4); } diff --git a/src/r_compiler/ssa/ssa_short.cpp b/src/r_compiler/ssa/ssa_short.cpp new file mode 100644 index 0000000000..fc8de9449b --- /dev/null +++ b/src/r_compiler/ssa/ssa_short.cpp @@ -0,0 +1,148 @@ + +#include "ssa_short.h" +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAShort::SSAShort() +: v(0) +{ +} + +SSAShort::SSAShort(int constant) +: v(0) +{ + v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant, true)); +} + +SSAShort::SSAShort(SSAFloat f) +: v(0) +{ + v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt16Ty(SSAScope::context()), SSAScope::hint()); +} + +SSAShort::SSAShort(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAShort::llvm_type() +{ + return llvm::Type::getInt16Ty(SSAScope::context()); +} + +SSAShort operator+(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator-(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator*(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator/(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator%(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateSRem(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator+(int a, const SSAShort &b) +{ + return SSAShort(a) + b; +} + +SSAShort operator-(int a, const SSAShort &b) +{ + return SSAShort(a) - b; +} + +SSAShort operator*(int a, const SSAShort &b) +{ + return SSAShort(a) * b; +} + +SSAShort operator/(int a, const SSAShort &b) +{ + return SSAShort(a) / b; +} + +SSAShort operator%(int a, const SSAShort &b) +{ + return SSAShort(a) % b; +} + +SSAShort operator+(const SSAShort &a, int b) +{ + return a + SSAShort(b); +} + +SSAShort operator-(const SSAShort &a, int b) +{ + return a - SSAShort(b); +} + +SSAShort operator*(const SSAShort &a, int b) +{ + return a * SSAShort(b); +} + +SSAShort operator/(const SSAShort &a, int b) +{ + return a / SSAShort(b); +} + +SSAShort operator%(const SSAShort &a, int b) +{ + return a % SSAShort(b); +} + +SSAShort operator<<(const SSAShort &a, int bits) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); +} + +SSAShort operator>>(const SSAShort &a, int bits) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} + +SSAShort operator<<(const SSAShort &a, const SSAInt &bits) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateShl(a.v, bits.v, SSAScope::hint())); +} + +SSAShort operator>>(const SSAShort &a, const SSAInt &bits) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateLShr(a.v, bits.v, SSAScope::hint())); +} + +SSAShort operator&(const SSAShort &a, int b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateAnd(a.v, b, SSAScope::hint())); +} + +SSAShort operator&(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator|(const SSAShort &a, int b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateOr(a.v, b, SSAScope::hint())); +} + +SSAShort operator|(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_short.h b/src/r_compiler/ssa/ssa_short.h new file mode 100644 index 0000000000..ae71a13363 --- /dev/null +++ b/src/r_compiler/ssa/ssa_short.h @@ -0,0 +1,49 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAFloat; +class SSAInt; + +class SSAShort +{ +public: + SSAShort(); + SSAShort(int constant); + SSAShort(SSAFloat f); + explicit SSAShort(llvm::Value *v); + static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAShort operator+(const SSAShort &a, const SSAShort &b); +SSAShort operator-(const SSAShort &a, const SSAShort &b); +SSAShort operator*(const SSAShort &a, const SSAShort &b); +SSAShort operator/(const SSAShort &a, const SSAShort &b); +SSAShort operator%(const SSAShort &a, const SSAShort &b); + +SSAShort operator+(int a, const SSAShort &b); +SSAShort operator-(int a, const SSAShort &b); +SSAShort operator*(int a, const SSAShort &b); +SSAShort operator/(int a, const SSAShort &b); +SSAShort operator%(int a, const SSAShort &b); + +SSAShort operator+(const SSAShort &a, int b); +SSAShort operator-(const SSAShort &a, int b); +SSAShort operator*(const SSAShort &a, int b); +SSAShort operator/(const SSAShort &a, int b); +SSAShort operator%(const SSAShort &a, int b); + +SSAShort operator<<(const SSAShort &a, int bits); +SSAShort operator>>(const SSAShort &a, int bits); +SSAShort operator<<(const SSAShort &a, const SSAInt &bits); +SSAShort operator>>(const SSAShort &a, const SSAInt &bits); + +SSAShort operator&(const SSAShort &a, int b); +SSAShort operator&(const SSAShort &a, const SSAShort &b); +SSAShort operator|(const SSAShort &a, int b); +SSAShort operator|(const SSAShort &a, const SSAShort &b); diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp index 825806148b..b2408066ee 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -86,7 +86,7 @@ void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16); + llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint())); // The following generates _mm_stream_si128, maybe! // llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1)); @@ -95,12 +95,6 @@ void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value) { - /*llvm::Value *values[2] = - { - SSAScope::builder().CreateBitCast(v, llvm::Type::getInt8PtrTy(SSAScope::context())), - new_value.v - }; - SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_storeu_dq), values);*/ llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint())); + llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4); } diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp index e2df64167a..6a197ec90e 100644 --- a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp @@ -35,16 +35,10 @@ SSAVec4f SSAVec4fPtr::load_unaligned() const void SSAVec4fPtr::store(const SSAVec4f &new_value) { - SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false); + SSAScope::builder().CreateStore(new_value.v, v, false); } void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value) { - /*llvm::Value *values[2] = - { - SSAScope::builder().CreateBitCast(v, llvm::Type::getFloatPtrTy(SSAScope::context())), - new_value.v - }; - SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_storeu_ps), values);*/ - SSAScope::builder().CreateStore(new_value.v, v, false); + SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); } diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp index a28befb707..7138c30d2a 100644 --- a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp @@ -35,16 +35,10 @@ SSAVec4i SSAVec4iPtr::load_unaligned() const void SSAVec4iPtr::store(const SSAVec4i &new_value) { - SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false); + SSAScope::builder().CreateStore(new_value.v, v, false); } void SSAVec4iPtr::store_unaligned(const SSAVec4i &new_value) { - /*llvm::Value *values[2] = - { - v, - new_value.v - }; - SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_storeu_pd), values);*/ - SSAScope::builder().CreateStore(new_value.v, v, false); + SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); } From f9a7186550bf6b6e72f9770ba79e3789b7acf541 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Sep 2016 22:54:37 +0200 Subject: [PATCH 128/912] Improve DrawSpan codegen enough to do the simple shade for 64x64 flats --- .../fixedfunction/fixedfunction.cpp | 112 +++++++++++++++--- src/r_compiler/fixedfunction/fixedfunction.h | 26 +++- src/r_draw_rgba.cpp | 20 +++- 3 files changed, 139 insertions(+), 19 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index cc46b8d50a..8f8b09f23b 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -38,8 +38,8 @@ RenderProgram::RenderProgram() cpuFeaturesStr += it.getKey(); } - Printf("LLVM target triple: %s\n", targetTriple.c_str()); - Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); + //Printf("LLVM target triple: %s\n", targetTriple.c_str()); + //Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); if (!target) @@ -98,7 +98,7 @@ FixedFunction::FixedFunction() mProgram.engine()->finalizeObject(); mProgram.modulePassManager()->run(*mProgram.module()); - DrawSpan = mProgram.GetProcAddress("DrawSpan"); + DrawSpan = mProgram.GetProcAddress("DrawSpan"); } void FixedFunction::CodegenDrawSpan() @@ -107,29 +107,90 @@ void FixedFunction::CodegenDrawSpan() SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); SSAFunction function("DrawSpan"); - function.add_parameter(SSAInt::llvm_type()); - function.add_parameter(SSAUBytePtr::llvm_type()); + function.add_parameter(GetRenderArgsStruct(mProgram.context())); function.create_public(); - SSAInt count = function.parameter(0); - SSAUBytePtr data = function.parameter(1); - SSAStack stack_index; + SSAStack stack_index, stack_xfrac, stack_yfrac; + SSAValue args = function.parameter(0); + SSAUBytePtr destorg = args[0][0].load(); + SSAUBytePtr source = args[0][1].load(); + SSAInt destpitch = args[0][2].load(); + stack_xfrac.store(args[0][3].load()); + stack_yfrac.store(args[0][4].load()); + SSAInt xstep = args[0][5].load(); + SSAInt ystep = args[0][6].load(); + SSAInt x1 = args[0][7].load(); + SSAInt x2 = args[0][8].load(); + SSAInt y = args[0][9].load(); + SSAInt xbits = args[0][10].load(); + SSAInt ybits = args[0][11].load(); + SSAInt light = args[0][12].load(); + SSAInt srcalpha = args[0][13].load(); + SSAInt destalpha = args[0][14].load(); + + SSAInt count = x2 - x1 + 1; + SSAUBytePtr data = destorg[(x1 + y * destpitch) * 4]; + + SSAInt yshift = 32 - ybits; + SSAInt xshift = yshift - xbits; + SSAInt xmask = ((SSAInt(1) << xbits) - 1) << ybits; + //is_64x64 = xbits == 6 && ybits == 6; + + SSAInt sseLength = count / 4; stack_index.store(0); - SSAForBlock loop; { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < sseLength); + + SSAVec4i colors[4]; + for (int i = 0; i < 4; i++) + { + SSAInt xfrac = stack_xfrac.load(); + SSAInt yfrac = stack_yfrac.load(); + + // 64x64 is the most common case by far, so special case it. + SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + //*loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + colors[i] = source[spot64 * 4].load_vec4ub() * light / 256; + + stack_xfrac.store(xfrac + xstep); + stack_yfrac.store(yfrac + ystep); + } + + SSAVec16ub ssecolors(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + data[index * 16].store_unaligned_vec16ub(ssecolors); + + stack_index.store(index + 1); + loop.end_block(); + } + + stack_index.store(sseLength * 4); + { + SSAForBlock loop; SSAInt index = stack_index.load(); loop.loop_block(index < count); - SSAVec4i color(0, 128, 255, 255); + SSAInt xfrac = stack_xfrac.load(); + SSAInt yfrac = stack_yfrac.load(); + + // 64x64 is the most common case by far, so special case it. + SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + //*loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + SSAVec4i color = source[spot64 * 4].load_vec4ub(); + color = color * light / 256; data[index * 4].store_vec4ub(color); - /*data[index * 4].store(0); - data[index * 4 + 1].store(128); - data[index * 4 + 2].store(255); - data[index * 4 + 3].store(255);*/ + stack_index.store(index + 1); + stack_xfrac.store(xfrac + xstep); + stack_yfrac.store(yfrac + ystep); + loop.end_block(); } - loop.end_block(); builder.CreateRetVoid(); @@ -139,6 +200,27 @@ void FixedFunction::CodegenDrawSpan() mProgram.functionPassManager()->run(*function.func); } +llvm::Type *FixedFunction::GetRenderArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *source; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + #if 0 GlslFixedFunction::GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen) diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index 7ee68032e1..3bbf05abe1 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -6,6 +6,7 @@ #include "r_compiler/ssa/ssa_vec8s.h" #include "r_compiler/ssa/ssa_vec16ub.h" #include "r_compiler/ssa/ssa_int.h" +#include "r_compiler/ssa/ssa_short.h" #include "r_compiler/ssa/ssa_ubyte_ptr.h" #include "r_compiler/ssa/ssa_vec4f_ptr.h" #include "r_compiler/ssa/ssa_vec4i_ptr.h" @@ -39,16 +40,39 @@ private: std::unique_ptr mFunctionPassManager; }; +struct RenderArgs +{ + uint32_t *destorg; + const uint32_t *source; + int32_t destpitch; + int32_t xfrac; + int32_t yfrac; + int32_t xstep; + int32_t ystep; + int32_t x1; + int32_t x2; + int32_t y; + int32_t xbits; + int32_t ybits; + uint32_t light; + uint32_t srcalpha; + uint32_t destalpha; + //ShadeConstants _shade_constants; + //int32_t nearest_filter; +}; + class FixedFunction { public: FixedFunction(); - void(*DrawSpan)(int, uint32_t *) = nullptr; + void(*DrawSpan)(const RenderArgs *) = nullptr; private: void CodegenDrawSpan(); + static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); + RenderProgram mProgram; }; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index d54bad7aef..9757390957 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -354,9 +354,23 @@ public: if (thread->skipped_by_thread(_y)) return; - uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - int count = _x2 - _x1 + 1; - _ff->DrawSpan(count, dest); + RenderArgs args; + args.destorg = (uint32_t *)_destorg; + args.source = _source; + args.destpitch = dc_pitch; + args.xfrac = _xfrac; + args.yfrac = _yfrac; + args.xstep = _xstep; + args.ystep = _ystep; + args.x1 = _x1; + args.x2 = _x2; + args.y = _y; + args.xbits = _xbits; + args.ybits = _ybits; + args.light = _light; + args.srcalpha = _srcalpha; + args.destalpha = _destalpha; + _ff->DrawSpan(&args); } }; From d05ed3740b90dfda8b6ab8afbb688baa9439c619 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Tue, 27 Sep 2016 19:27:57 -0400 Subject: [PATCH 129/912] Corrected forum links in version.h --- src/version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/version.h b/src/version.h index 0c9b851206..c7a95e66ef 100644 --- a/src/version.h +++ b/src/version.h @@ -90,8 +90,8 @@ const char *GetVersionString(); // More stuff that needs to be different for derivatives. #define GAMENAME "QZDoom" #define GAMENAMELOWERCASE "qzdoom" -#define FORUM_URL "http://forum.drdteam.org" -#define BUGS_FORUM_URL "http://forum.drdteam.org/viewforum.php?f=24" +#define FORUM_URL "http://forum.drdteam.org/viewforum.php?f=196" +#define BUGS_FORUM_URL "http://forum.drdteam.org/viewforum.php?f=197" #if defined(__APPLE__) || defined(_WIN32) #define GAME_DIR GAMENAME From 576fed5afceebd8b6a08c7580fbd2cb25e25b2e5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 28 Sep 2016 05:18:16 +0200 Subject: [PATCH 130/912] Add light, blend and sampler functions --- .../fixedfunction/fixedfunction.cpp | 123 +++++++++++++++++- src/r_compiler/fixedfunction/fixedfunction.h | 45 ++++++- src/r_compiler/ssa/ssa_vec4i.cpp | 10 ++ src/r_compiler/ssa/ssa_vec4i.h | 2 + 4 files changed, 173 insertions(+), 7 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index 8f8b09f23b..d70248864b 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -154,8 +154,7 @@ void FixedFunction::CodegenDrawSpan() SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - //*loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); - colors[i] = source[spot64 * 4].load_vec4ub() * light / 256; + colors[i] = shade_bgra_simple(source[spot64 * 4].load_vec4ub(), light); stack_xfrac.store(xfrac + xstep); stack_yfrac.store(yfrac + ystep); @@ -181,9 +180,7 @@ void FixedFunction::CodegenDrawSpan() SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - //*loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); - SSAVec4i color = source[spot64 * 4].load_vec4ub(); - color = color * light / 256; + SSAVec4i color = shade_bgra_simple(source[spot64 * 4].load_vec4ub(), light); data[index * 4].store_vec4ub(color); stack_index.store(index + 1); @@ -200,6 +197,122 @@ void FixedFunction::CodegenDrawSpan() mProgram.functionPassManager()->run(*function.func); } +SSAInt FixedFunction::calc_light_multiplier(SSAInt light) +{ + return 256 - (light >> (FRACBITS - 8)); +} + +SSAVec4i FixedFunction::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) +{ + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_simple(color, light); +} + +SSAVec4i FixedFunction::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) +{ + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_advanced(color, light, constants); +} + +SSAVec4i FixedFunction::shade_bgra_simple(SSAVec4i color, SSAInt light) +{ + color = color * light / 256; + return color.insert(3, 255); +} + +SSAVec4i FixedFunction::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) +{ + SSAInt blue = color[0]; + SSAInt green = color[1]; + SSAInt red = color[2]; + SSAInt alpha = color[3]; + + SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + SSAVec4i inv_light = 256 - light; + SSAVec4i inv_desaturate = 256 - constants.desaturate; + + color = (color * inv_desaturate + intensity) / 256; + color = (constants.fade * inv_light + color * light) / 256; + color = (color * constants.light) / 256; + + return color.insert(3, alpha); +} + +SSAVec4i FixedFunction::blend_copy(SSAVec4i fg) +{ + return fg; +} + +SSAVec4i FixedFunction::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i FixedFunction::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i FixedFunction::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i FixedFunction::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) +{ + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); // // 255 -> 256 + SSAInt inv_alpha = 256 - alpha; + SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i FixedFunction::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) +{ + SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; + SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; + + SSAVec4i p00 = col0[y0].load_vec4ub(); + SSAVec4i p01 = col0[y1].load_vec4ub(); + SSAVec4i p10 = col1[y0].load_vec4ub(); + SSAVec4i p11 = col1[y1].load_vec4ub(); + + SSAInt inv_b = texturefracx; + SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} + +SSAVec4i FixedFunction::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) +{ + SSAInt xshift = (32 - xbits); + SSAInt yshift = (32 - ybits); + SSAInt xmask = (SSAInt(1) << xshift) - 1; + SSAInt ymask = (SSAInt(1) << yshift) - 1; + SSAInt x = xfrac >> xbits; + SSAInt y = yfrac >> ybits; + + SSAVec4i p00 = texture[(y & ymask) + ((x & xmask) << yshift)].load_vec4ub(); + SSAVec4i p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)].load_vec4ub(); + SSAVec4i p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); + SSAVec4i p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); + + SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; + SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} + llvm::Type *FixedFunction::GetRenderArgsStruct(llvm::LLVMContext &context) { std::vector elements; diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index 3bbf05abe1..40236d233f 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -6,6 +6,7 @@ #include "r_compiler/ssa/ssa_vec8s.h" #include "r_compiler/ssa/ssa_vec16ub.h" #include "r_compiler/ssa/ssa_int.h" +#include "r_compiler/ssa/ssa_int_ptr.h" #include "r_compiler/ssa/ssa_short.h" #include "r_compiler/ssa/ssa_ubyte_ptr.h" #include "r_compiler/ssa/ssa_vec4f_ptr.h" @@ -57,8 +58,30 @@ struct RenderArgs uint32_t light; uint32_t srcalpha; uint32_t destalpha; - //ShadeConstants _shade_constants; - //int32_t nearest_filter; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1, + nearest_filter = 2 + }; +}; + +class SSAShadeConstants +{ +public: + SSAVec4i light; + SSAVec4i fade; + SSAInt desaturate; }; class FixedFunction @@ -71,6 +94,24 @@ public: private: void CodegenDrawSpan(); + // LightBgra + SSAInt calc_light_multiplier(SSAInt light); + SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors); + SSAVec4i shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors); + SSAVec4i shade_bgra_simple(SSAVec4i color, SSAInt light); + SSAVec4i shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants); + + // BlendBgra + SSAVec4i blend_copy(SSAVec4i fg); + SSAVec4i blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); + SSAVec4i blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); + SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); + SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg); + + // SampleBgra + SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); + SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); + static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); RenderProgram mProgram; diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp index 80e07c8d48..d8e31276ce 100644 --- a/src/r_compiler/ssa/ssa_vec4i.cpp +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -60,6 +60,16 @@ SSAInt SSAVec4i::operator[](SSAInt index) return SSAInt::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); } +SSAVec4i SSAVec4i::insert(SSAInt index, SSAInt value) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index.v, SSAScope::hint())); +} + +SSAVec4i SSAVec4i::insert(int index, SSAInt value) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint())); +} + llvm::Type *SSAVec4i::llvm_type() { return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h index d19f1d1aab..a654a87ae1 100644 --- a/src/r_compiler/ssa/ssa_vec4i.h +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -19,6 +19,8 @@ public: explicit SSAVec4i(llvm::Value *v); SSAVec4i(SSAVec4f f32); SSAInt operator[](SSAInt index); + SSAVec4i insert(SSAInt index, SSAInt value); + SSAVec4i insert(int index, SSAInt value); static SSAVec4i bitcast(SSAVec4f f32); static SSAVec4i bitcast(SSAVec8s i16); static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3); From 77ea195162884d0d035d0e5b1cdbe331293a8331 Mon Sep 17 00:00:00 2001 From: Marisa Heit Date: Tue, 27 Sep 2016 16:27:31 -0500 Subject: [PATCH 131/912] Fixed: Y and Z were flipped for sound velocity --- src/s_sound.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/s_sound.cpp b/src/s_sound.cpp index 866af965a1..06df0dbad4 100644 --- a/src/s_sound.cpp +++ b/src/s_sound.cpp @@ -753,8 +753,8 @@ static void CalcPosVel(int type, const AActor *actor, const sector_t *sector, if (type == SOURCE_Actor && actor != NULL) { vel->X = float(actor->Vel.X * TICRATE); - vel->Y = float(actor->Vel.Y * TICRATE); - vel->Z = float(actor->Vel.Z * TICRATE); + vel->Y = float(actor->Vel.Z * TICRATE); + vel->Z = float(actor->Vel.Y * TICRATE); } else { From 62769fa5eaac3ea60eabdf2992005b3ba8cb9757 Mon Sep 17 00:00:00 2001 From: Marisa Heit Date: Tue, 27 Sep 2016 18:40:36 -0500 Subject: [PATCH 132/912] Fixed: Heretic platforms make a mid-move sound, unlike Doom's --- wadsrc/static/filter/game-heretic/sndinfo.txt | 1 + wadsrc/static/filter/game-heretic/sndseq.txt | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 wadsrc/static/filter/game-heretic/sndseq.txt diff --git a/wadsrc/static/filter/game-heretic/sndinfo.txt b/wadsrc/static/filter/game-heretic/sndinfo.txt index b9f1fd7c3c..53017a49a8 100644 --- a/wadsrc/static/filter/game-heretic/sndinfo.txt +++ b/wadsrc/static/filter/game-heretic/sndinfo.txt @@ -151,6 +151,7 @@ $alias switches/exitbutn switches/normbutn // Heretic has no special exit button plats/pt1_strt pstart plats/pt1_stop pstop plats/pt1_mid dormov +plats/pt2_mid stnmov // // Door Sounds diff --git a/wadsrc/static/filter/game-heretic/sndseq.txt b/wadsrc/static/filter/game-heretic/sndseq.txt new file mode 100644 index 0000000000..405ebc8fbf --- /dev/null +++ b/wadsrc/static/filter/game-heretic/sndseq.txt @@ -0,0 +1,5 @@ +:Platform + playuntildone plats/pt1_strt + playrepeat plats/pt2_mid + stopsound plats/pt1_stop +end From 3988a63789d14a3bf900506c888e02639e6fa715 Mon Sep 17 00:00:00 2001 From: Marisa Heit Date: Tue, 27 Sep 2016 18:58:09 -0500 Subject: [PATCH 133/912] Fixed: The menu no longer refreshed the screen border --- src/menu/menu.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/menu/menu.cpp b/src/menu/menu.cpp index 5e5124b2ab..7e938230e7 100644 --- a/src/menu/menu.cpp +++ b/src/menu/menu.cpp @@ -717,7 +717,11 @@ void M_Drawer (void) if (DMenu::CurrentMenu != NULL && menuactive != MENU_Off) { - if (DMenu::CurrentMenu->DimAllowed()) screen->Dim(fade); + if (DMenu::CurrentMenu->DimAllowed()) + { + screen->Dim(fade); + V_SetBorderNeedRefresh(); + } DMenu::CurrentMenu->Drawer(); } } From 3aea3a0beedcfcedf793a5858dba93645da46bef Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 28 Sep 2016 18:49:39 +0200 Subject: [PATCH 134/912] Fully implemented codegen for DrawSpan --- .../fixedfunction/fixedfunction.cpp | 1310 +++-------------- src/r_compiler/fixedfunction/fixedfunction.h | 140 +- src/r_compiler/ssa/ssa_short.cpp | 5 + src/r_compiler/ssa/ssa_short.h | 2 + src/r_compiler/ssa/ssa_vec4i.cpp | 12 + src/r_compiler/ssa/ssa_vec4i.h | 1 + src/r_draw_rgba.cpp | 92 +- 7 files changed, 337 insertions(+), 1225 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index d70248864b..cc53a069a5 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -92,6 +92,8 @@ void *RenderProgram::PointerToFunction(const char *name) return mEngine->getPointerToFunction(function); } +///////////////////////////////////////////////////////////////////////////// + FixedFunction::FixedFunction() { CodegenDrawSpan(); @@ -110,84 +112,8 @@ void FixedFunction::CodegenDrawSpan() function.add_parameter(GetRenderArgsStruct(mProgram.context())); function.create_public(); - SSAStack stack_index, stack_xfrac, stack_yfrac; - - SSAValue args = function.parameter(0); - SSAUBytePtr destorg = args[0][0].load(); - SSAUBytePtr source = args[0][1].load(); - SSAInt destpitch = args[0][2].load(); - stack_xfrac.store(args[0][3].load()); - stack_yfrac.store(args[0][4].load()); - SSAInt xstep = args[0][5].load(); - SSAInt ystep = args[0][6].load(); - SSAInt x1 = args[0][7].load(); - SSAInt x2 = args[0][8].load(); - SSAInt y = args[0][9].load(); - SSAInt xbits = args[0][10].load(); - SSAInt ybits = args[0][11].load(); - SSAInt light = args[0][12].load(); - SSAInt srcalpha = args[0][13].load(); - SSAInt destalpha = args[0][14].load(); - - SSAInt count = x2 - x1 + 1; - SSAUBytePtr data = destorg[(x1 + y * destpitch) * 4]; - - SSAInt yshift = 32 - ybits; - SSAInt xshift = yshift - xbits; - SSAInt xmask = ((SSAInt(1) << xbits) - 1) << ybits; - //is_64x64 = xbits == 6 && ybits == 6; - - SSAInt sseLength = count / 4; - stack_index.store(0); - { - SSAForBlock loop; - SSAInt index = stack_index.load(); - loop.loop_block(index < sseLength); - - SSAVec4i colors[4]; - for (int i = 0; i < 4; i++) - { - SSAInt xfrac = stack_xfrac.load(); - SSAInt yfrac = stack_yfrac.load(); - - // 64x64 is the most common case by far, so special case it. - SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - colors[i] = shade_bgra_simple(source[spot64 * 4].load_vec4ub(), light); - - stack_xfrac.store(xfrac + xstep); - stack_yfrac.store(yfrac + ystep); - } - - SSAVec16ub ssecolors(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); - data[index * 16].store_unaligned_vec16ub(ssecolors); - - stack_index.store(index + 1); - loop.end_block(); - } - - stack_index.store(sseLength * 4); - { - SSAForBlock loop; - SSAInt index = stack_index.load(); - loop.loop_block(index < count); - - SSAInt xfrac = stack_xfrac.load(); - SSAInt yfrac = stack_yfrac.load(); - - // 64x64 is the most common case by far, so special case it. - SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - SSAVec4i color = shade_bgra_simple(source[spot64 * 4].load_vec4ub(), light); - data[index * 4].store_vec4ub(color); - - stack_index.store(index + 1); - stack_xfrac.store(xfrac + xstep); - stack_yfrac.store(yfrac + ystep); - loop.end_block(); - } + DrawSpanCodegen codegen; + codegen.Generate(function.parameter(0)); builder.CreateRetVoid(); @@ -197,122 +123,6 @@ void FixedFunction::CodegenDrawSpan() mProgram.functionPassManager()->run(*function.func); } -SSAInt FixedFunction::calc_light_multiplier(SSAInt light) -{ - return 256 - (light >> (FRACBITS - 8)); -} - -SSAVec4i FixedFunction::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) -{ - SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; - return shade_bgra_simple(color, light); -} - -SSAVec4i FixedFunction::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) -{ - SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; - return shade_bgra_advanced(color, light, constants); -} - -SSAVec4i FixedFunction::shade_bgra_simple(SSAVec4i color, SSAInt light) -{ - color = color * light / 256; - return color.insert(3, 255); -} - -SSAVec4i FixedFunction::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) -{ - SSAInt blue = color[0]; - SSAInt green = color[1]; - SSAInt red = color[2]; - SSAInt alpha = color[3]; - - SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - SSAVec4i inv_light = 256 - light; - SSAVec4i inv_desaturate = 256 - constants.desaturate; - - color = (color * inv_desaturate + intensity) / 256; - color = (constants.fade * inv_light + color * light) / 256; - color = (color * constants.light) / 256; - - return color.insert(3, alpha); -} - -SSAVec4i FixedFunction::blend_copy(SSAVec4i fg) -{ - return fg; -} - -SSAVec4i FixedFunction::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i FixedFunction::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i FixedFunction::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i FixedFunction::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) -{ - SSAInt alpha = fg[3]; - alpha = alpha + (alpha >> 7); // // 255 -> 256 - SSAInt inv_alpha = 256 - alpha; - SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i FixedFunction::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) -{ - SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; - SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; - SSAInt y0 = frac_y0 >> FRACBITS; - SSAInt y1 = frac_y1 >> FRACBITS; - - SSAVec4i p00 = col0[y0].load_vec4ub(); - SSAVec4i p01 = col0[y1].load_vec4ub(); - SSAVec4i p10 = col1[y0].load_vec4ub(); - SSAVec4i p11 = col1[y1].load_vec4ub(); - - SSAInt inv_b = texturefracx; - SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} - -SSAVec4i FixedFunction::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) -{ - SSAInt xshift = (32 - xbits); - SSAInt yshift = (32 - ybits); - SSAInt xmask = (SSAInt(1) << xshift) - 1; - SSAInt ymask = (SSAInt(1) << yshift) - 1; - SSAInt x = xfrac >> xbits; - SSAInt y = yfrac >> ybits; - - SSAVec4i p00 = texture[(y & ymask) + ((x & xmask) << yshift)].load_vec4ub(); - SSAVec4i p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)].load_vec4ub(); - SSAVec4i p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); - SSAVec4i p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); - - SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; - SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} - llvm::Type *FixedFunction::GetRenderArgsStruct(llvm::LLVMContext &context) { std::vector elements; @@ -331,953 +141,297 @@ llvm::Type *FixedFunction::GetRenderArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; return llvm::StructType::get(context, elements, false)->getPointerTo(); } -#if 0 +///////////////////////////////////////////////////////////////////////////// -GlslFixedFunction::GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen) -: program(program), vertex_codegen(vertex_codegen), fragment_codegen(fragment_codegen) +void DrawSpanCodegen::Generate(SSAValue args) { -} + destorg = args[0][0].load(); + source = args[0][1].load(); + destpitch = args[0][2].load(); + stack_xfrac.store(args[0][3].load()); + stack_yfrac.store(args[0][4].load()); + xstep = args[0][5].load(); + ystep = args[0][6].load(); + x1 = args[0][7].load(); + x2 = args[0][8].load(); + y = args[0][9].load(); + xbits = args[0][10].load(); + ybits = args[0][11].load(); + light = args[0][12].load(); + srcalpha = args[0][13].load(); + destalpha = args[0][14].load(); + SSAShort light_alpha = args[0][15].load(); + SSAShort light_red = args[0][16].load(); + SSAShort light_green = args[0][17].load(); + SSAShort light_blue = args[0][18].load(); + SSAShort fade_alpha = args[0][19].load(); + SSAShort fade_red = args[0][20].load(); + SSAShort fade_green = args[0][21].load(); + SSAShort fade_blue = args[0][22].load(); + SSAShort desaturate = args[0][23].load(); + SSAInt flags = args[0][24].load(); + shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); + shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); + shade_constants.desaturate = desaturate.zext_int(); -llvm::Type *GlslFixedFunction::get_sampler_struct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt32Ty(context)); // width - elements.push_back(llvm::Type::getInt32Ty(context)); // height - elements.push_back(llvm::Type::getInt8PtrTy(context)); // data - return llvm::StructType::get(context, elements, false); -} + count = x2 - x1 + 1; + data = destorg[(x1 + y * destpitch) * 4]; -void GlslFixedFunction::codegen() -{ - codegen_render_scanline(5); - codegen_calc_window_positions(); - codegen_calc_polygon_face_direction(); - codegen_calc_polygon_y_range(); - codegen_update_polygon_edge(); - codegen_draw_triangles(5, 5); - codegen_texture(); - codegen_normalize(); - codegen_reflect(); - codegen_max(); - codegen_pow(); - codegen_dot(); - codegen_mix(); -} + yshift = 32 - ybits; + xshift = yshift - xbits; + xmask = ((SSAInt(1) << xbits) - 1) << ybits; -void GlslFixedFunction::codegen_texture() -{ - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); + // 64x64 is the most common case by far, so special case it. + is_64x64 = xbits == 6 && ybits == 6; + is_simple_shade = (flags & RenderArgs::simple_shade) == RenderArgs::simple_shade; + is_nearest_filter = (flags & RenderArgs::nearest_filter) == RenderArgs::nearest_filter; - SSAFunction function("fragment_texture"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(get_sampler_struct(program.context())); - function.add_parameter(SSAVec4f::llvm_type()); - function.create_private(); - - SSAValue sampler_ptr = function.parameter(1); - SSAVec4f pos = function.parameter(2); - - SSAInt width = sampler_ptr[0][0].load(); - SSAInt height = sampler_ptr[0][1].load(); - SSAUBytePtr data = sampler_ptr[0][2].load(); - - SSAPixels4ub_argb_rev pixels(width, height, data); - //builder.CreateRet(pixels.linear_clamp4f(pos).v); - builder.CreateRet(pixels.linear_clamp4f(pos[0], pos[1]).v); - - llvm::verifyFunction(*function.func); -} - -void GlslFixedFunction::codegen_normalize() -{ - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("fragment_normalize"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.create_private(); - - SSAVec4f vec = function.parameter(1); - - // To do: this can probably be done a lot faster with _mm_rsqrt_ss - SSAVec4f vec2 = vec * vec; - SSAVec4f length3(SSAFloat::sqrt(vec2[0] + vec2[1] + vec2[2])); - SSAVec4f normalized = vec / length3; - builder.CreateRet(normalized.v); - - llvm::verifyFunction(*function.func); -} - -void GlslFixedFunction::codegen_reflect() -{ - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("fragment_reflect"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.create_private(); - - SSAVec4f i = function.parameter(1); - SSAVec4f n = function.parameter(2); - - SSAVec4f c = i * n; - SSAFloat dot3 = c[0] + c[1] + c[2]; - SSAVec4f result = i - (2.0f * dot3) * n; - builder.CreateRet(result.v); - - llvm::verifyFunction(*function.func); -} - -void GlslFixedFunction::codegen_max() -{ - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("fragment_max"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(SSAFloat::llvm_type()); - function.add_parameter(SSAFloat::llvm_type()); - function.create_private(); - - SSAFloat a = function.parameter(1); - SSAFloat b = function.parameter(2); - - SSAPhi phi; SSAIfBlock branch; - branch.if_block(a >= b); - phi.add_incoming(a); + branch.if_block(is_simple_shade); + LoopShade(true); branch.else_block(); - phi.add_incoming(b); + LoopShade(false); branch.end_block(); - SSAFloat c = phi.create(); - - builder.CreateRet(c.v); - llvm::verifyFunction(*function.func); } -void GlslFixedFunction::codegen_pow() +void DrawSpanCodegen::LoopShade(bool isSimpleShade) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("fragment_pow"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(SSAFloat::llvm_type()); - function.add_parameter(SSAFloat::llvm_type()); - function.create_private(); - - SSAFloat a = function.parameter(1); - SSAFloat b = function.parameter(2); - builder.CreateRet(a.v); - //builder.CreateRet(SSAFloat::pow(a, b).v); - - llvm::verifyFunction(*function.func); + SSAIfBlock branch; + branch.if_block(is_nearest_filter); + LoopFilter(isSimpleShade, true); + branch.else_block(); + LoopFilter(isSimpleShade, false); + branch.end_block(); } -void GlslFixedFunction::codegen_dot() +void DrawSpanCodegen::LoopFilter(bool isSimpleShade, bool isNearestFilter) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("fragment_dot"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.create_private(); - - SSAVec4f a = function.parameter(1); - SSAVec4f b = function.parameter(2); - - SSAVec4f c = a * b; - SSAFloat dot3 = c[0] + c[1] + c[2]; - builder.CreateRet(dot3.v); - - llvm::verifyFunction(*function.func); -} - -void GlslFixedFunction::codegen_mix() -{ - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("fragment_mix"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.add_parameter(SSAFloat::llvm_type()); - function.create_private(); - - SSAVec4f v1 = function.parameter(1); - SSAVec4f v2 = function.parameter(2); - SSAFloat t = function.parameter(3); - - SSAVec4f b = t; - SSAVec4f a = 1.0f - b; - SSAVec4f mix = v1 * a + v2 * b; - builder.CreateRet(mix.v); - - llvm::verifyFunction(*function.func); -} - -void GlslFixedFunction::codegen_draw_triangles(int num_vertex_in, int num_vertex_out) -{ - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("draw_triangles"); - function.add_parameter(SSAInt::llvm_type()); // input_width - function.add_parameter(SSAInt::llvm_type()); // input_height - function.add_parameter(SSAUBytePtr::llvm_type()); // input_data - function.add_parameter(SSAInt::llvm_type()); // output_width - function.add_parameter(SSAInt::llvm_type()); // output_height - function.add_parameter(SSAUBytePtr::llvm_type()); // output_data - function.add_parameter(SSAInt::llvm_type()); // viewport_x - function.add_parameter(SSAInt::llvm_type()); // viewport_y - function.add_parameter(SSAInt::llvm_type()); // viewport_width - function.add_parameter(SSAInt::llvm_type()); // viewport_height - function.add_parameter(SSAVec4fPtr::llvm_type()); // uniforms - function.add_parameter(SSAInt::llvm_type()); // first_vertex - function.add_parameter(SSAInt::llvm_type()); // num_vertices - function.add_parameter(SSAVec4fPtr::llvm_type()->getPointerTo()); // vertex attributes - function.add_parameter(SSAInt::llvm_type()); // core - function.add_parameter(SSAInt::llvm_type()); // num_cores - function.create_public(); - - SSAInt input_width = function.parameter(0); - SSAInt input_height = function.parameter(1); - SSAUBytePtr input_data = function.parameter(2); - SSAInt output_width = function.parameter(3); - SSAInt output_height = function.parameter(4); - SSAUBytePtr output_data = function.parameter(5); - SSAInt viewport_x = function.parameter(6); - SSAInt viewport_y = function.parameter(7); - SSAInt viewport_width = function.parameter(8); - SSAInt viewport_height = function.parameter(9); - SSAVec4fPtr uniforms = function.parameter(10); - SSAInt first_vertex = function.parameter(11); - SSAInt num_vertices = function.parameter(12); - SSAValue vertex_in_ptr = function.parameter(13); - SSAInt core = function.parameter(14); - SSAInt num_cores = function.parameter(15); - - SSAStack stack_vertex_index; - SSAValue vertex_globals_ptr = SSAValue::from_llvm(SSAScope::alloca(vertex_codegen.get_global_struct_type())); - std::vector vertex_outs; - for (int i = 0; i < num_vertex_out; i++) - vertex_outs.push_back(SSAVec4fPtr::from_llvm(SSAScope::builder().CreateAlloca(SSAVec4f::llvm_type(), SSAInt(3).v))); - - int num_uniforms = 1; + SSAIfBlock branch; + branch.if_block(is_64x64); { - llvm::Type *type = llvm::ArrayType::get(llvm::VectorType::get(llvm::Type::getFloatTy(program.context()), 4), 4); - llvm::Value *matrix = llvm::UndefValue::get(type); - for (int col = 0; col < 4; col++) - { - SSAVec4f column = uniforms[col].load_unaligned(); - std::vector indexes; - indexes.push_back(col); - matrix = builder.CreateInsertValue(matrix, column.v, indexes); - } - vertex_globals_ptr[0][0].store(matrix); + SSAInt sseLength = Loop4x(isSimpleShade, isNearestFilter, true); + Loop(sseLength * 4, isSimpleShade, isNearestFilter, true); } - - stack_vertex_index.store(0); - SSAForBlock loop; - SSAInt vertex_index = stack_vertex_index.load(); - loop.loop_block(vertex_index + 2 < num_vertices); - for (int v = 0; v < 3; v++) + branch.else_block(); { - for (int i = 0; i < num_vertex_in; i++) + SSAInt sseLength = Loop4x(isSimpleShade, isNearestFilter, false); + Loop(sseLength * 4, isSimpleShade, isNearestFilter, false); + } + branch.end_block(); +} + +SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64) +{ + SSAInt sseLength = count / 4; + stack_index.store(0); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < sseLength); + + SSAVec4i colors[4]; + for (int i = 0; i < 4; i++) { - SSAValue attribute_ptr = vertex_in_ptr[i].load(); - SSAVec4f vertex_in = SSAVec4f::shuffle(SSAVec4fPtr(attribute_ptr)[first_vertex + vertex_index + v].load_unaligned(), 0, 1, 2, 3); - vertex_globals_ptr[0][num_uniforms + i].store(vertex_in.v); + SSAInt xfrac = stack_xfrac.load(); + SSAInt yfrac = stack_yfrac.load(); + + SSAVec4i fg = Sample(xfrac, yfrac, isNearestFilter, is64x64); + if (isSimpleShade) + colors[i] = shade_bgra_simple(fg, light); + else + colors[i] = shade_bgra_advanced(fg, light, shade_constants); + + stack_xfrac.store(xfrac + xstep); + stack_yfrac.store(yfrac + ystep); } - SSAScope::builder().CreateCall(SSAScope::module()->getFunction((vertex_codegen.shader_prefix() + "main").c_str()), vertex_globals_ptr.v); - for (int i = 0; i < num_vertex_out; i++) + + SSAVec16ub ssecolors(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + data[index * 16].store_unaligned_vec16ub(ssecolors); + + stack_index.store(index + 1); + loop.end_block(); + } + return sseLength; +} + +void DrawSpanCodegen::Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64) +{ + stack_index.store(start); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + SSAInt xfrac = stack_xfrac.load(); + SSAInt yfrac = stack_yfrac.load(); + + SSAVec4i fg = Sample(xfrac, yfrac, isNearestFilter, is64x64); + SSAVec4i color; + if (isSimpleShade) + color = shade_bgra_simple(fg, light); + else + color = shade_bgra_advanced(fg, light, shade_constants); + + data[index * 4].store_vec4ub(color); + + stack_index.store(index + 1); + stack_xfrac.store(xfrac + xstep); + stack_yfrac.store(yfrac + ystep); + loop.end_block(); + } +} + +SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64) +{ + if (isNearestFilter) + { + SSAInt spot; + if (is64x64) + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + else + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + return source[spot * 4].load_vec4ub(); + } + else + { + if (is64x64) { - vertex_outs[i][v].store(vertex_globals_ptr[0][num_uniforms + num_vertex_in + i].load()); + return sample_linear(source, xfrac, yfrac, 26, 26); + } + else + { + return sample_linear(source, xfrac, yfrac, 32 - xbits, 32 - ybits); } } - - render_polygon(input_width, input_height, input_data, output_width, output_height, output_data, viewport_x, viewport_y, viewport_width, viewport_height, 3, vertex_outs, core, num_cores); - - stack_vertex_index.store(vertex_index + 3); - loop.end_block(); - - builder.CreateRetVoid(); - llvm::verifyFunction(*function.func); } -void GlslFixedFunction::codegen_calc_window_positions() +///////////////////////////////////////////////////////////////////////////// + +SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("calc_window_positions"); - function.add_parameter(SSAInt::llvm_type()); // viewport_x - function.add_parameter(SSAInt::llvm_type()); // viewport_y - function.add_parameter(SSAInt::llvm_type()); // viewport_width - function.add_parameter(SSAInt::llvm_type()); // viewport_height - function.add_parameter(SSAInt::llvm_type()); // num_vertices - function.add_parameter(SSAVec4fPtr::llvm_type()); // gl_Position - function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos - function.create_private(); - SSAInt viewport_x = function.parameter(0); - SSAInt viewport_y = function.parameter(1); - SSAInt viewport_width = function.parameter(2); - SSAInt viewport_height = function.parameter(3); - SSAInt num_vertices = function.parameter(4); - SSAVec4fPtr clip_positions = function.parameter(5); - SSAVec4fPtr window_positions = function.parameter(6); - - SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); - SSAStack stack_transform_index; - stack_transform_index.store(0); - SSAForBlock loop_transform; - SSAInt transform_index = stack_transform_index.load(); - loop_transform.loop_block(transform_index < num_vertices); - { - SSAVec4f clip_pos = clip_positions[transform_index].load(); - SSAVec4f window_pos = viewport.clip_to_window(clip_pos); - window_positions[transform_index].store(window_pos); - - stack_transform_index.store(transform_index + 1); - } - loop_transform.end_block(); - - builder.CreateRetVoid(); - llvm::verifyFunction(*function.func); + return 256 - (light >> (FRACBITS - 8)); } -void GlslFixedFunction::codegen_calc_polygon_face_direction() +SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("calc_polygon_face_direction"); - function.set_return_type(SSABool::llvm_type()); - function.add_parameter(SSAInt::llvm_type()); // num_vertices - function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos - function.create_private(); - SSAInt num_vertices = function.parameter(0); - SSAVec4fPtr window_positions = function.parameter(1); - - SSAStack stack_face_direction; - SSAStack stack_face_vertex_index; - stack_face_direction.store(0.0f); - stack_face_vertex_index.store(0); - SSAForBlock loop_face_direction; - SSAInt face_vertex_index = stack_face_vertex_index.load(); - loop_face_direction.loop_block(face_vertex_index < num_vertices); - { - SSAVec4f v0 = window_positions[face_vertex_index].load(); - SSAVec4f v1 = window_positions[(face_vertex_index + 1) % num_vertices].load(); - stack_face_direction.store(stack_face_direction.load() + v0[0] * v1[1] - v1[0] * v0[1]); - stack_face_vertex_index.store(face_vertex_index + 1); - } - loop_face_direction.end_block(); - SSABool front_facing_ccw = (stack_face_direction.load() >= 0.0f); - - builder.CreateRet(front_facing_ccw.v); - llvm::verifyFunction(*function.func); + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_simple(color, light); } -void GlslFixedFunction::codegen_calc_polygon_y_range() +SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("calc_polygon_y_range"); - function.add_parameter(SSAInt::llvm_type()); // viewport_y - function.add_parameter(SSAInt::llvm_type()); // viewport_height - function.add_parameter(SSAInt::llvm_type()); // num_vertices - function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos - function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // out_y_start - function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // out_y_end - function.create_private(); - SSAInt viewport_y = function.parameter(0); - SSAInt viewport_height = function.parameter(1); - SSAInt num_vertices = function.parameter(2); - SSAVec4fPtr window_positions = function.parameter(3); - SSAValue out_y_start = function.parameter(4); - SSAValue out_y_end = function.parameter(5); - - SSAStack y_start; - SSAStack y_end; - y_start.store(0x7fffffff); - y_end.store(0); - - SSAStack stack_minmax_index; - stack_minmax_index.store(0); - SSAForBlock loop_minmax; - SSAInt minmax_index = stack_minmax_index.load(); - loop_minmax.loop_block(minmax_index < num_vertices); - { - SSAInt y = SSAInt(window_positions[minmax_index].load()[1] + 0.5f); - y_start.store(ssa_min(y_start.load(), y)); - y_end.store(ssa_max(y_end.load(), y)); - stack_minmax_index.store(minmax_index + 1); - } - loop_minmax.end_block(); - - y_start.store(ssa_max(y_start.load(), viewport_y)); - y_end.store(ssa_min(y_end.load(), viewport_y + viewport_height)); - - out_y_start.store(y_start.load().v); - out_y_end.store(y_end.load().v); - builder.CreateRetVoid(); - llvm::verifyFunction(*function.func); + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_advanced(color, light, constants); } -void GlslFixedFunction::codegen_update_polygon_edge() +SSAVec4i DrawerCodegen::shade_bgra_simple(SSAVec4i color, SSAInt light) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("update_polygon_edge"); - function.add_parameter(SSAFloat::llvm_type()); // y_position - function.add_parameter(SSAInt::llvm_type()); // num_vertices - function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos - function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // inout left_index - function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // inout right_index - function.create_private(); - SSAFloat float_y = function.parameter(0); - SSAInt num_vertices = function.parameter(1); - SSAVec4fPtr window_positions = function.parameter(2); - SSAValue ptr_left_index = function.parameter(3); - SSAValue ptr_right_index = function.parameter(4); - - SSAStack max_iterate; - max_iterate.store(num_vertices); - SSAForBlock loop_left; - SSAInt left_index = ptr_left_index.load(); - SSAInt right_index = ptr_right_index.load(); - SSAInt next_left_index = (left_index + 1) % num_vertices; - SSAFloat left_y0 = window_positions[left_index].load()[1]; - SSAFloat left_y1 = window_positions[next_left_index].load()[1]; - SSABool in_range = (left_y0 >= float_y && left_y1 < float_y) || (left_y1 >= float_y && left_y0 < float_y); - loop_left.loop_block((left_index == right_index || !in_range) && max_iterate.load() > 0); - ptr_left_index.store(next_left_index.v); - max_iterate.store(max_iterate.load() - 1); - loop_left.end_block(); - - builder.CreateRetVoid(); - llvm::verifyFunction(*function.func); + color = color * light / 256; + return color.insert(3, 255); } -void GlslFixedFunction::render_polygon( - SSAInt input_width, - SSAInt input_height, - SSAUBytePtr input_data, - SSAInt output_width, - SSAInt output_height, - SSAUBytePtr output_data, - SSAInt viewport_x, - SSAInt viewport_y, - SSAInt viewport_width, - SSAInt viewport_height, - SSAInt num_vertices, - std::vector fragment_ins, - SSAInt core, - SSAInt num_cores) +SSAVec4i DrawerCodegen::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) { - SSAVec4fPtr window_positions = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), num_vertices)); - SSAVec4fPtr left_line_varyings = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), fragment_ins.size())); - SSAVec4fPtr right_line_varyings = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), fragment_ins.size())); + SSAInt blue = color[0]; + SSAInt green = color[1]; + SSAInt red = color[2]; + SSAInt alpha = color[3]; - /////////////////////////////////// + SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - llvm::Value *calc_window_positions_args[] = { viewport_x.v, viewport_y.v, viewport_width.v, viewport_height.v, num_vertices.v, fragment_ins[0].v, window_positions.v }; - SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_window_positions"), calc_window_positions_args); + SSAVec4i inv_light = 256 - light; + SSAVec4i inv_desaturate = 256 - constants.desaturate; - llvm::Value *calc_polygon_face_direction_args[] = { num_vertices.v, window_positions.v }; - SSABool front_facing_ccw = SSABool::from_llvm(SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_polygon_face_direction"), calc_polygon_face_direction_args)); + color = (color * inv_desaturate + intensity) / 256; + color = (constants.fade * inv_light + color * light) / 256; + color = (color * constants.light) / 256; - SSAIfBlock cull_if; - cull_if.if_block(front_facing_ccw); - { - SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); - - SSAStack y_start; - SSAStack y_end; - - llvm::Value *calc_polygon_y_range_args[] = { viewport_y.v, viewport_height.v, num_vertices.v, window_positions.v, y_start.v, y_end.v }; - SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_polygon_y_range"), calc_polygon_y_range_args); - - y_start.store((y_start.load() + num_cores - core - 1) / num_cores * num_cores + core); // find_first_line_for_core - - SSAStack stack_left_index; - SSAStack stack_right_index; - SSAStack stack_int_y; - stack_left_index.store(0); - stack_right_index.store(1); - stack_int_y.store(y_start.load()); - SSAForBlock scanlines_loop; - scanlines_loop.loop_block(stack_int_y.load() < y_end.load()); - { - SSAInt int_y = stack_int_y.load(); - SSAFloat float_y = SSAFloat(int_y) + 0.5f; - - llvm::Value *update_polygon_edge_args0[] = { float_y.v, num_vertices.v, window_positions.v, stack_left_index.v, stack_right_index.v }; - llvm::Value *update_polygon_edge_args1[] = { float_y.v, num_vertices.v, window_positions.v, stack_right_index.v, stack_left_index.v }; - SSAScope::builder().CreateCall(SSAScope::module()->getFunction("update_polygon_edge"), update_polygon_edge_args0); - SSAScope::builder().CreateCall(SSAScope::module()->getFunction("update_polygon_edge"), update_polygon_edge_args1); - - SSAInt left_index = stack_left_index.load(); - SSAInt right_index = stack_right_index.load(); - SSAInt next_left_index = (left_index + 1) % num_vertices; - SSAInt next_right_index = (right_index + 1) % num_vertices; - - SSABarycentricWeight left_weight(viewport, fragment_ins[0][left_index].load(), fragment_ins[0][next_left_index].load()); - SSABarycentricWeight right_weight(viewport, fragment_ins[0][right_index].load(), fragment_ins[0][next_right_index].load()); - - SSAFloat a = left_weight.from_window_y(int_y); - SSAFloat b = right_weight.from_window_y(int_y); - - SSAVec4f left_clip_pos = left_weight.v1 * a + left_weight.v2 * (1.0f - a); - SSAVec4f right_clip_pos = right_weight.v1 * b + right_weight.v2 * (1.0f - b); - - for (size_t i = 0; i + 1 < fragment_ins.size(); i++) - { - left_line_varyings[i].store(fragment_ins[i + 1][left_index].load() * a + fragment_ins[i + 1][next_left_index].load() * (1.0f - a)); - right_line_varyings[i].store(fragment_ins[i + 1][right_index].load() * b + fragment_ins[i + 1][next_right_index].load() * (1.0f - b)); - } - - llvm::Value *render_scanline_args[] = { output_width.v, output_height.v, output_data.v, viewport_x.v, viewport_y.v, viewport_width.v, viewport_height.v, int_y.v, left_clip_pos.v, right_clip_pos.v, left_line_varyings.v, right_line_varyings.v, input_width.v, input_height.v, input_data.v }; - SSAScope::builder().CreateCall(SSAScope::module()->getFunction("render_scanline"), render_scanline_args); - - stack_int_y.store(stack_int_y.load() + num_cores); - } - scanlines_loop.end_block(); - } - cull_if.end_block(); + return color.insert(3, alpha); } -void GlslFixedFunction::codegen_render_scanline(int num_varyings) +SSAVec4i DrawerCodegen::blend_copy(SSAVec4i fg) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("render_scanline"); - function.add_parameter(SSAInt::llvm_type()); // output_width - function.add_parameter(SSAInt::llvm_type()); // output_height - function.add_parameter(SSAUBytePtr::llvm_type()); // output_data - function.add_parameter(SSAInt::llvm_type()); // viewport_x - function.add_parameter(SSAInt::llvm_type()); // viewport_y - function.add_parameter(SSAInt::llvm_type()); // viewport_width - function.add_parameter(SSAInt::llvm_type()); // viewport_height - function.add_parameter(SSAInt::llvm_type()); // y - function.add_parameter(SSAVec4f::llvm_type()); // left_clip_pos - function.add_parameter(SSAVec4f::llvm_type()); // right_clip_pos - function.add_parameter(SSAVec4fPtr::llvm_type()); // left_line_varyings - function.add_parameter(SSAVec4fPtr::llvm_type()); // right_line_varyings - function.add_parameter(SSAInt::llvm_type()); // input_width - function.add_parameter(SSAInt::llvm_type()); // input_height - function.add_parameter(SSAUBytePtr::llvm_type()); // input_data - function.create_private(); - SSAInt output_width = function.parameter(0); - SSAInt output_height = function.parameter(1); - SSAUBytePtr output_data = function.parameter(2); - SSAInt viewport_x = function.parameter(3); - SSAInt viewport_y = function.parameter(4); - SSAInt viewport_width = function.parameter(5); - SSAInt viewport_height = function.parameter(6); - SSAInt y = function.parameter(7); - SSAVec4f left_clip_pos = function.parameter(8); - SSAVec4f right_clip_pos = function.parameter(9); - SSAVec4fPtr left_line_varyings = function.parameter(10); - SSAVec4fPtr right_line_varyings = function.parameter(11); - SSAInt input_width = function.parameter(12); - SSAInt input_height = function.parameter(13); - SSAUBytePtr input_data = function.parameter(14); - - SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); - - SSAScopeHint hint; - - SSAStack stack_x; - SSAStack stack_xnormalized; - - //////////////////////////////// - // Prepare to render scanline: - - hint.set("prepare"); - OuterData outer_data; - - SSAVec4f left_window_pos = viewport.clip_to_window(left_clip_pos); - SSAVec4f right_window_pos = viewport.clip_to_window(right_clip_pos); - - SSAFloat x0 = left_window_pos[0]; - SSAFloat x1 = right_window_pos[0]; - SSAInt start(ssa_min(x0, x1)); - SSAInt end(ssa_max(x1, x0) + 0.5f); - - start = ssa_max(start, viewport.x); - end = ssa_min(end, viewport.right); - - SSABarycentricWeight weight_scanline(viewport, left_clip_pos, right_clip_pos); - - outer_data.start = start; - outer_data.end = end; - outer_data.input_width = input_width; - outer_data.input_height = input_height; - outer_data.output_width = output_width; - outer_data.output_height = output_height; - outer_data.input_pixels = input_data; - outer_data.output_pixels_line = output_data[output_width * y * 4]; - - outer_data.viewport_x = SSAFloat(viewport.x); - outer_data.viewport_rcp_half_width = viewport.rcp_half_width; - outer_data.dx = weight_scanline.v2[0] - weight_scanline.v1[0]; - outer_data.dw = weight_scanline.v2[3] - weight_scanline.v1[3]; - outer_data.v1w = weight_scanline.v1[3]; - outer_data.v1x = weight_scanline.v1[0]; - outer_data.sse_left_varying_in = left_line_varyings; - outer_data.sse_right_varying_in = right_line_varyings; - outer_data.num_varyings = num_varyings; - - outer_data.sampler = SSAScope::alloca(get_sampler_struct(SSAScope::context())); - std::vector index_list; - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - llvm::Value *sampler_width_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); - index_list[1] = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)1)); - llvm::Value *sampler_height_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); - index_list[1] = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)2)); - llvm::Value *sampler_data_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); - SSAScope::builder().CreateStore(outer_data.input_width.v, sampler_width_ptr, false); - SSAScope::builder().CreateStore(outer_data.input_height.v, sampler_height_ptr, false); - SSAScope::builder().CreateStore(outer_data.input_pixels.v, sampler_data_ptr, false); - - - SSAVec4i xposinit = SSAVec4i(outer_data.start) + SSAVec4i(0, 1, 2, 3); - stack_x.store(outer_data.start); - stack_xnormalized.store((SSAVec4f(xposinit) + 0.5f - outer_data.viewport_x) * outer_data.viewport_rcp_half_width - 1.0f); - - ///////////////////////////////////////////////////////////////////////// - // First pixels: - - hint.set("firstpixels"); - SSAIfBlock if_block; - if_block.if_block(outer_data.end - outer_data.start > 3); - process_first_pixels(outer_data, stack_x, stack_xnormalized); - if_block.end_block(); - - ///////////////////////////////////////////////////////////////////////// - // Start: for (SSAInt x = start; x < end; x += 4) - - hint.set("loopstart"); - - SSAForBlock for_block; - SSAInt x = stack_x.load(); - for_block.loop_block(x + 3 < outer_data.end); - - ///////////////////////////////////////////////////////////////////////// - // Loop body - { - SSAVec4f xnormalized = stack_xnormalized.load(); - - hint.set("blendload"); - SSAVec4i desti[4]; - SSAVec16ub dest_block = outer_data.output_pixels_line[x << 2].load_vec16ub(); - SSAVec4i::extend(dest_block, desti[0], desti[1], desti[2], desti[3]); - - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - - hint.set("blendstore"); - outer_data.output_pixels_line[x << 2].store_vec16ub(dest_block); - hint.clear(); - - xnormalized = xnormalized + 4.0f * outer_data.viewport_rcp_half_width; - stack_xnormalized.store(xnormalized); - } - ///////////////////////////////////////////////////////////////////////// - // End: for (SSAInt x = start; x < end; x += 4) - - hint.set("loopend"); - x = x + 4; - stack_x.store(x); - for_block.end_block(); - - ///////////////////////////////////////////////////////////////////////// - // Last pixels: - - hint.set("lastpixels"); - process_last_pixels(outer_data, stack_x, stack_xnormalized); - - builder.CreateRetVoid(); - llvm::verifyFunction(*function.func); + return fg; } -void GlslFixedFunction::process_first_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized) +SSAVec4i DrawerCodegen::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) { - SSAInt x = stack_x.load(); - SSAVec4f xnormalized = stack_xnormalized.load(); - SSAInt offset = x << 2; - - // Find how many pixels we have left until we 16 byte align: - llvm::Value *output_line_align = SSAScope::builder().CreatePtrToInt(outer_data.output_pixels_line.v, llvm::Type::getInt32Ty(SSAScope::context())); - output_line_align = SSAScope::builder().CreateAdd(output_line_align, offset.v); - SSAInt left = 4 - (SSAInt::from_llvm(SSAScope::builder().CreateURem(output_line_align, SSAInt(16).v)) >> 2); - - SSAIfBlock if_block0; - if_block0.if_block(left == 3); - { - SSAVec4i dest[4] = - { - outer_data.output_pixels_line[offset].load_vec4ub(), - outer_data.output_pixels_line[offset + 4].load_vec4ub(), - outer_data.output_pixels_line[offset + 8].load_vec4ub(), - SSAVec4i(0) - }; - - // To do: do this in a less braindead way - SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); - - outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); - outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); - outer_data.output_pixels_line[offset + 8].store_vec4ub(dest[2]); - - stack_x.store(x + 3); - stack_xnormalized.store(xnormalized + 3.0f * outer_data.viewport_rcp_half_width); - } - if_block0.else_block(); - { - SSAIfBlock if_block1; - if_block1.if_block(left == 2); - { - SSAVec4i dest[4] = - { - outer_data.output_pixels_line[offset].load_vec4ub(), - outer_data.output_pixels_line[offset + 4].load_vec4ub(), - SSAVec4i(0), - SSAVec4i(0) - }; - - // To do: do this in a less braindead way - SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); - - outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); - outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); - - stack_x.store(x + 2); - stack_xnormalized.store(xnormalized + 2.0f * outer_data.viewport_rcp_half_width); - } - if_block1.else_block(); - { - SSAIfBlock if_block2; - if_block2.if_block(left == 1); - { - SSAVec4i dest[4] = - { - outer_data.output_pixels_line[offset].load_vec4ub(), - SSAVec4i(0), - SSAVec4i(0), - SSAVec4i(0) - }; - - // To do: do this in a less braindead way - SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); - - outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); - - stack_x.store(x + 1); - stack_xnormalized.store(xnormalized + outer_data.viewport_rcp_half_width); - } - if_block2.end_block(); - } - if_block1.end_block(); - } - if_block0.end_block(); + SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; + return color.insert(3, 255); } -void GlslFixedFunction::process_last_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized) +SSAVec4i DrawerCodegen::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) { - SSAInt x = stack_x.load(); - SSAVec4f xnormalized = stack_xnormalized.load(); - - SSAInt left = outer_data.end - x; - SSAInt offset = x << 2; - SSAIfBlock if_block0; - SSAIfBlock if_block1; - SSAIfBlock if_block2; - if_block0.if_block(left == 3); - { - SSAVec4i dest[4] = - { - outer_data.output_pixels_line[offset].load_vec4ub(), - outer_data.output_pixels_line[offset + 4].load_vec4ub(), - outer_data.output_pixels_line[offset + 8].load_vec4ub(), - SSAVec4i(0) - }; - - // To do: do this in a less braindead way - SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); - - outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); - outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); - outer_data.output_pixels_line[offset + 8].store_vec4ub(dest[2]); - } - if_block0.else_block(); - if_block1.if_block(left == 2); - { - SSAVec4i dest[4] = - { - outer_data.output_pixels_line[offset].load_vec4ub(), - outer_data.output_pixels_line[offset + 4].load_vec4ub(), - SSAVec4i(0), - SSAVec4i(0) - }; - - // To do: do this in a less braindead way - SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); - - outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); - outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); - } - if_block1.else_block(); - if_block2.if_block(left == 1); - { - SSAVec4i dest[4] = - { - outer_data.output_pixels_line[offset].load_vec4ub(), - SSAVec4i(0), - SSAVec4i(0), - SSAVec4i(0) - }; - - // To do: do this in a less braindead way - SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); - - outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); - } - if_block2.end_block(); - if_block1.end_block(); - if_block0.end_block(); + SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; + return color.insert(3, 255); } -void GlslFixedFunction::inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *frag_color) +SSAVec4i DrawerCodegen::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) { - SSAScopeHint hint; - hint.set("varying"); - SSAVec4f a = (xnormalized * data.v1w - data.v1x) * SSAVec4f::rcp(data.dx - xnormalized * data.dw); - SSAVec4f one_minus_a = 1.0f - a; - - llvm::Value *globals_ptr[4]; - for (int i = 0; i < 4; i++) - { - globals_ptr[i] = SSAScope::alloca(fragment_codegen.get_global_struct_type()); - - std::vector index_list; - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - llvm::Value *sampler_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); - SSAScope::builder().CreateStore(data.sampler, sampler_ptr, false); - - for (int j = 0; j < data.num_varyings; j++) - { - SSAVec4f field_value = - data.sse_left_varying_in[j].load() * SSAVec4f::shuffle(one_minus_a, i, i, i, i) + - data.sse_right_varying_in[j].load() * SSAVec4f::shuffle(a, i, i, i, i); - index_list.clear(); - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)j+1))); - llvm::Value *field_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); - SSAScope::builder().CreateStore(field_value.v, field_ptr, false); - } - } - - hint.set("fragprogram"); - for (int i = 0; i < 4; i++) - { - SSAScope::builder().CreateCall(SSAScope::module()->getFunction((fragment_codegen.shader_prefix() + "main").c_str()), globals_ptr[i]); - - std::vector index_list; - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)5))); - llvm::Value *field_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); - frag_color[i] = SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(field_ptr, false)); - } + SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; + return color.insert(3, 255); } -/* -void GlslFixedFunction::blend(SSAVec4f frag_color[4], SSAVec16ub &dest) + +SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) { - SSAVec4i desti[4]; - SSAVec4i::extend(dest, desti[0], desti[1], desti[2], desti[3]); - - // Pre-mulitiplied alpha blend: - for (int pixel_index = 0; pixel_index < 4; pixel_index++) - { - SSAVec4f src = SSAVec4f::shuffle(frag_color[pixel_index], 2, 1, 0, 3); - desti[pixel_index] = SSAVec4i(src * 255.0f); - SSAVec4f dest = SSAVec4f(desti[pixel_index]) * (1.0f / 255.0f); - SSAVec4f alpha = SSAVec4f::shuffle(dest, 3, 3, 3, 3); - SSAVec4f resultf = src + dest * (1.0f - alpha); - desti[pixel_index] = SSAVec4i(resultf * 255.0f); - } - - dest = SSAVec16ub(SSAVec8s(desti[0], desti[1]), SSAVec8s(desti[2], desti[3])); + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); // // 255 -> 256 + SSAInt inv_alpha = 256 - alpha; + SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; + return color.insert(3, 255); } -*/ -void GlslFixedFunction::blend(SSAVec4f frag_color[4], SSAVec16ub &dest) + +SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) { - for (int i = 0; i < 4; i++) - frag_color[i] = SSAVec4f::shuffle(frag_color[i], 2, 1, 0, 3); + SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; + SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; - // Pre-mulitiplied alpha blend: - SSAVec8s dest0 = SSAVec8s::extendlo(dest); - SSAVec8s dest1 = SSAVec8s::extendhi(dest); + SSAVec4i p00 = col0[y0].load_vec4ub(); + SSAVec4i p01 = col0[y1].load_vec4ub(); + SSAVec4i p10 = col1[y0].load_vec4ub(); + SSAVec4i p11 = col1[y1].load_vec4ub(); - SSAVec8s src0(SSAVec4i(frag_color[0] * 255.0f), SSAVec4i(frag_color[1] * 255.0f)); - SSAVec8s src1(SSAVec4i(frag_color[2] * 255.0f), SSAVec4i(frag_color[3] * 255.0f)); + SSAInt inv_b = texturefracx; + SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; - // Extract and duplicate alpha components: - SSAVec8s alpha0 = SSAVec8s::shuffle(src0, 3, 3, 3, 3, 7, 7, 7, 7); - SSAVec8s alpha1 = SSAVec8s::shuffle(src1, 3, 3, 3, 3, 7, 7, 7, 7); - - // Convert from 0-255 to 0-256 range: - alpha0 = SSAVec8s::max_sse2(alpha0, 255); - alpha1 = SSAVec8s::max_sse2(alpha1, 255); - alpha0 = alpha0 + (alpha0 >> 7); - alpha1 = alpha1 + (alpha1 >> 7); - - SSAVec8s result0 = src0 + ((dest0 * (256 - alpha0)) >> 8); - SSAVec8s result1 = src1 + ((dest1 * (256 - alpha1)) >> 8); - - dest = SSAVec16ub(result0, result1); + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; } -#endif +SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) +{ + SSAInt xshift = (32 - xbits); + SSAInt yshift = (32 - ybits); + SSAInt xmask = (SSAInt(1) << xshift) - 1; + SSAInt ymask = (SSAInt(1) << yshift) - 1; + SSAInt x = xfrac >> xbits; + SSAInt y = yfrac >> ybits; + + SSAVec4i p00 = texture[(y & ymask) + ((x & xmask) << yshift)].load_vec4ub(); + SSAVec4i p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)].load_vec4ub(); + SSAVec4i p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); + SSAVec4i p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); + + SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; + SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index 40236d233f..d9b8f042eb 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -1,6 +1,7 @@ #pragma once +#include "r_compiler/ssa/ssa_value.h" #include "r_compiler/ssa/ssa_vec4f.h" #include "r_compiler/ssa/ssa_vec4i.h" #include "r_compiler/ssa/ssa_vec8s.h" @@ -84,16 +85,9 @@ public: SSAInt desaturate; }; -class FixedFunction +class DrawerCodegen { public: - FixedFunction(); - - void(*DrawSpan)(const RenderArgs *) = nullptr; - -private: - void CodegenDrawSpan(); - // LightBgra SSAInt calc_light_multiplier(SSAInt light); SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors); @@ -111,89 +105,57 @@ private: // SampleBgra SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); +}; + +class DrawSpanCodegen : public DrawerCodegen +{ +public: + void Generate(SSAValue args); + +private: + void LoopShade(bool isSimpleShade); + void LoopFilter(bool isSimpleShade, bool isNearestFilter); + SSAInt Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64); + void Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64); + SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); + + SSAStack stack_index, stack_xfrac, stack_yfrac; + + SSAUBytePtr destorg; + SSAUBytePtr source; + SSAInt destpitch; + SSAInt xstep; + SSAInt ystep; + SSAInt x1; + SSAInt x2; + SSAInt y; + SSAInt xbits; + SSAInt ybits; + SSAInt light; + SSAInt srcalpha; + SSAInt destalpha; + SSAInt count; + SSAUBytePtr data; + SSAInt yshift; + SSAInt xshift; + SSAInt xmask; + SSABool is_64x64; + SSABool is_simple_shade; + SSABool is_nearest_filter; + SSAShadeConstants shade_constants; +}; + +class FixedFunction +{ +public: + FixedFunction(); + + void(*DrawSpan)(const RenderArgs *) = nullptr; + +private: + void CodegenDrawSpan(); static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); RenderProgram mProgram; }; - -#if 0 - -class GlslProgram; -class GlslCodeGen; - -class GlslFixedFunction -{ -public: - GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen); - void codegen(); - static llvm::Type *get_sampler_struct(llvm::LLVMContext &context); - -private: - void codegen_draw_triangles(int num_vertex_in, int num_vertex_out); - void codegen_calc_window_positions(); - void codegen_calc_polygon_face_direction(); - void codegen_calc_polygon_y_range(); - void codegen_update_polygon_edge(); - void codegen_texture(); - void codegen_normalize(); - void codegen_reflect(); - void codegen_max(); - void codegen_pow(); - void codegen_dot(); - void codegen_mix(); - - struct OuterData - { - OuterData() : sampler() { } - - SSAInt start; - SSAInt end; - SSAInt input_width; - SSAInt input_height; - SSAInt output_width; - SSAInt output_height; - SSAUBytePtr input_pixels; - SSAUBytePtr output_pixels_line; - - SSAVec4fPtr sse_left_varying_in; - SSAVec4fPtr sse_right_varying_in; - int num_varyings; - SSAVec4f viewport_x; - SSAVec4f viewport_rcp_half_width; - SSAVec4f dx; - SSAVec4f dw; - SSAVec4f v1w; - SSAVec4f v1x; - - llvm::Value *sampler; - }; - - void render_polygon( - SSAInt input_width, - SSAInt input_height, - SSAUBytePtr input_data, - SSAInt output_width, - SSAInt output_height, - SSAUBytePtr output_data, - SSAInt viewport_x, - SSAInt viewport_y, - SSAInt viewport_width, - SSAInt viewport_height, - SSAInt num_vertices, - std::vector fragment_ins, - SSAInt core, - SSAInt num_cores); - - void codegen_render_scanline(int num_varyings); - void process_first_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized); - void process_last_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized); - void inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *out_frag_colors); - void blend(SSAVec4f frag_colors[4], SSAVec16ub &dest); - - GlslProgram &program; - GlslCodeGen &vertex_codegen; - GlslCodeGen &fragment_codegen; -}; - -#endif diff --git a/src/r_compiler/ssa/ssa_short.cpp b/src/r_compiler/ssa/ssa_short.cpp index fc8de9449b..3fa59b688a 100644 --- a/src/r_compiler/ssa/ssa_short.cpp +++ b/src/r_compiler/ssa/ssa_short.cpp @@ -32,6 +32,11 @@ llvm::Type *SSAShort::llvm_type() return llvm::Type::getInt16Ty(SSAScope::context()); } +SSAInt SSAShort::zext_int() +{ + return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint())); +} + SSAShort operator+(const SSAShort &a, const SSAShort &b) { return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_short.h b/src/r_compiler/ssa/ssa_short.h index ae71a13363..932aafc0ea 100644 --- a/src/r_compiler/ssa/ssa_short.h +++ b/src/r_compiler/ssa/ssa_short.h @@ -17,6 +17,8 @@ public: static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); } static llvm::Type *llvm_type(); + SSAInt zext_int(); + llvm::Value *v; }; diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp index d8e31276ce..1eed7b269a 100644 --- a/src/r_compiler/ssa/ssa_vec4i.cpp +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -49,6 +49,18 @@ SSAVec4i::SSAVec4i(SSAInt i) v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint()); } +SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3) +: v(0) +{ + std::vector constants; + constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true))); + v = llvm::ConstantVector::get(constants); + v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint()); +} + SSAVec4i::SSAVec4i(SSAVec4f f32) : v(0) { diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h index a654a87ae1..c1c9140d7f 100644 --- a/src/r_compiler/ssa/ssa_vec4i.h +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -16,6 +16,7 @@ public: SSAVec4i(int constant); SSAVec4i(int constant0, int constant1, int constant2, int constant3); SSAVec4i(SSAInt i); + SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3); explicit SSAVec4i(llvm::Value *v); SSAVec4i(SSAVec4f f32); SSAInt operator[](SSAInt index); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9757390957..665e6b84ea 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -300,50 +300,43 @@ void DrawerCommandQueue::StopThreads() ///////////////////////////////////////////////////////////////////////////// -class DrawSpanFFCommand : public DrawerCommand +class DrawSpanLLVMCommand : public DrawerCommand { - fixed_t _xfrac; - fixed_t _yfrac; - fixed_t _xstep; - fixed_t _ystep; - int _x1; - int _x2; - int _y; - int _xbits; - int _ybits; - BYTE * RESTRICT _destorg; - - const uint32_t * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - bool _nearest_filter; - - uint32_t _srcalpha; - uint32_t _destalpha; - + RenderArgs args; FixedFunction *_ff; public: - DrawSpanFFCommand() + DrawSpanLLVMCommand() { - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _xstep = ds_xstep; - _ystep = ds_ystep; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xbits = ds_xbits; - _ybits = ds_ybits; - _destorg = dc_destorg; - - _source = (const uint32_t*)ds_source; - _light = LightBgra::calc_light_multiplier(ds_light); - _shade_constants = ds_shade_constants; - _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); - - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); + args.xfrac = ds_xfrac; + args.yfrac = ds_yfrac; + args.xstep = ds_xstep; + args.ystep = ds_ystep; + args.x1 = ds_x1; + args.x2 = ds_x2; + args.y = ds_y; + args.xbits = ds_xbits; + args.ybits = ds_ybits; + args.destorg = (uint32_t*)dc_destorg; + args.destpitch = dc_pitch; + args.source = (const uint32_t*)ds_source; + args.light = LightBgra::calc_light_multiplier(ds_light); + args.light_red = ds_shade_constants.light_red; + args.light_green = ds_shade_constants.light_green; + args.light_blue = ds_shade_constants.light_blue; + args.light_alpha = ds_shade_constants.light_alpha; + args.fade_red = ds_shade_constants.fade_red; + args.fade_green = ds_shade_constants.fade_green; + args.fade_blue = ds_shade_constants.fade_blue; + args.fade_alpha = ds_shade_constants.fade_alpha; + args.desaturate = ds_shade_constants.desaturate; + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.flags = 0; + if (ds_shade_constants.simple_shade) + args.flags |= RenderArgs::simple_shade; + if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped)) + args.flags |= RenderArgs::nearest_filter; static FixedFunction ff; _ff = &ff; @@ -351,25 +344,8 @@ public: void Execute(DrawerThread *thread) override { - if (thread->skipped_by_thread(_y)) + if (thread->skipped_by_thread(args.y)) return; - - RenderArgs args; - args.destorg = (uint32_t *)_destorg; - args.source = _source; - args.destpitch = dc_pitch; - args.xfrac = _xfrac; - args.yfrac = _yfrac; - args.xstep = _xstep; - args.ystep = _ystep; - args.x1 = _x1; - args.x2 = _x2; - args.y = _y; - args.xbits = _xbits; - args.ybits = _ybits; - args.light = _light; - args.srcalpha = _srcalpha; - args.destalpha = _destalpha; _ff->DrawSpan(&args); } }; @@ -2777,7 +2753,7 @@ void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawSpan_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); /* #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); From bfa291b02f9242f41bfddb1d6a93994ce9b1b6c3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 29 Sep 2016 02:10:14 +0200 Subject: [PATCH 135/912] Create LLVMDrawers class as the external interface to the drawers --- src/CMakeLists.txt | 1 + .../fixedfunction/fixedfunction.cpp | 145 ----------- src/r_compiler/fixedfunction/fixedfunction.h | 76 +----- src/r_compiler/llvmdrawers.cpp | 232 ++++++++++++++++++ src/r_compiler/llvmdrawers.h | 52 ++++ src/r_draw_rgba.cpp | 8 +- src/r_swrenderer.cpp | 11 + src/r_swrenderer.h | 3 + 8 files changed, 302 insertions(+), 226 deletions(-) create mode 100644 src/r_compiler/llvmdrawers.cpp create mode 100644 src/r_compiler/llvmdrawers.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4b81a24f44..508951510a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1426,6 +1426,7 @@ set (PCH_SOURCES fragglescript/t_spec.cpp fragglescript/t_variable.cpp fragglescript/t_cmd.cpp + r_compiler/llvmdrawers.cpp r_compiler/ssa/ssa_bool.cpp r_compiler/ssa/ssa_float.cpp r_compiler/ssa/ssa_float_ptr.cpp diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index cc53a069a5..fffd2c8854 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -11,151 +11,6 @@ #include "r_compiler/ssa/ssa_value.h" #include "r_compiler/ssa/ssa_barycentric_weight.h" -RenderProgram::RenderProgram() -{ - using namespace llvm; - - install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { - I_FatalError("LLVM fatal error: %s", reason.c_str()); - }); - - InitializeNativeTarget(); - InitializeNativeTargetAsmPrinter(); - InitializeNativeTargetAsmParser(); - - std::string errorstring; - - std::string targetTriple = sys::getProcessTriple(); - std::string cpuName = sys::getHostCPUName(); - StringMap cpuFeatures; - sys::getHostCPUFeatures(cpuFeatures); - std::string cpuFeaturesStr; - for (const auto &it : cpuFeatures) - { - if (!cpuFeaturesStr.empty()) - cpuFeaturesStr.push_back(' '); - cpuFeaturesStr.push_back(it.getValue() ? '+' : '-'); - cpuFeaturesStr += it.getKey(); - } - - //Printf("LLVM target triple: %s\n", targetTriple.c_str()); - //Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); - - const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); - if (!target) - I_FatalError("Could not find LLVM target: %s", errorstring.c_str()); - - TargetOptions opt; - auto relocModel = Optional(Reloc::Static); - TargetMachine *machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::Default, CodeGenOpt::Aggressive); - if (!machine) - I_FatalError("Could not create LLVM target machine"); - - mContext = std::make_unique(); - - auto moduleOwner = std::make_unique("render", context()); - mModule = moduleOwner.get(); - mModule->setTargetTriple(targetTriple); - mModule->setDataLayout(machine->createDataLayout()); - - EngineBuilder engineBuilder(std::move(moduleOwner)); - engineBuilder.setErrorStr(&errorstring); - engineBuilder.setOptLevel(CodeGenOpt::Aggressive); - engineBuilder.setRelocationModel(Reloc::Static); - engineBuilder.setEngineKind(EngineKind::JIT); - mEngine.reset(engineBuilder.create(machine)); - if (!mEngine) - I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str()); - - mModulePassManager = std::make_unique(); - mFunctionPassManager = std::make_unique(mModule); - - PassManagerBuilder passManagerBuilder; - passManagerBuilder.OptLevel = 3; - passManagerBuilder.SizeLevel = 0; - passManagerBuilder.Inliner = createFunctionInliningPass(); - passManagerBuilder.populateModulePassManager(*mModulePassManager.get()); - passManagerBuilder.populateFunctionPassManager(*mFunctionPassManager.get()); -} - -RenderProgram::~RenderProgram() -{ - mEngine.reset(); - mContext.reset(); -} - -void *RenderProgram::PointerToFunction(const char *name) -{ - llvm::Function *function = mModule->getFunction(name); - if (!function) - return nullptr; - return mEngine->getPointerToFunction(function); -} - -///////////////////////////////////////////////////////////////////////////// - -FixedFunction::FixedFunction() -{ - CodegenDrawSpan(); - mProgram.engine()->finalizeObject(); - mProgram.modulePassManager()->run(*mProgram.module()); - - DrawSpan = mProgram.GetProcAddress("DrawSpan"); -} - -void FixedFunction::CodegenDrawSpan() -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function("DrawSpan"); - function.add_parameter(GetRenderArgsStruct(mProgram.context())); - function.create_public(); - - DrawSpanCodegen codegen; - codegen.Generate(function.parameter(0)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - I_FatalError("verifyFunction failed for " __FUNCTION__); - - mProgram.functionPassManager()->run(*function.func); -} - -llvm::Type *FixedFunction::GetRenderArgsStruct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *source; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::get(context, elements, false)->getPointerTo(); -} - -///////////////////////////////////////////////////////////////////////////// - void DrawSpanCodegen::Generate(SSAValue args) { destorg = args[0][0].load(); diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index d9b8f042eb..4b5bfc8b71 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -1,6 +1,7 @@ #pragma once +#include "r_compiler/llvmdrawers.h" #include "r_compiler/ssa/ssa_value.h" #include "r_compiler/ssa/ssa_vec4f.h" #include "r_compiler/ssa/ssa_vec4i.h" @@ -17,66 +18,6 @@ #include "r_compiler/ssa/ssa_barycentric_weight.h" #include "r_compiler/llvm_include.h" -class RenderProgram -{ -public: - RenderProgram(); - ~RenderProgram(); - - template - Func *GetProcAddress(const char *name) { return reinterpret_cast(PointerToFunction(name)); } - - llvm::LLVMContext &context() { return *mContext; } - llvm::Module *module() { return mModule; } - llvm::ExecutionEngine *engine() { return mEngine.get(); } - llvm::legacy::PassManager *modulePassManager() { return mModulePassManager.get(); } - llvm::legacy::FunctionPassManager *functionPassManager() { return mFunctionPassManager.get(); } - -private: - void *PointerToFunction(const char *name); - - std::unique_ptr mContext; - llvm::Module *mModule; - std::unique_ptr mEngine; - std::unique_ptr mModulePassManager; - std::unique_ptr mFunctionPassManager; -}; - -struct RenderArgs -{ - uint32_t *destorg; - const uint32_t *source; - int32_t destpitch; - int32_t xfrac; - int32_t yfrac; - int32_t xstep; - int32_t ystep; - int32_t x1; - int32_t x2; - int32_t y; - int32_t xbits; - int32_t ybits; - uint32_t light; - uint32_t srcalpha; - uint32_t destalpha; - - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - uint32_t flags; - enum Flags - { - simple_shade = 1, - nearest_filter = 2 - }; -}; - class SSAShadeConstants { public: @@ -144,18 +85,3 @@ private: SSABool is_nearest_filter; SSAShadeConstants shade_constants; }; - -class FixedFunction -{ -public: - FixedFunction(); - - void(*DrawSpan)(const RenderArgs *) = nullptr; - -private: - void CodegenDrawSpan(); - - static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); - - RenderProgram mProgram; -}; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp new file mode 100644 index 0000000000..4082137072 --- /dev/null +++ b/src/r_compiler/llvmdrawers.cpp @@ -0,0 +1,232 @@ + +#include "i_system.h" +#include "r_compiler/fixedfunction/fixedfunction.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" +#include "r_compiler/ssa/ssa_barycentric_weight.h" + +class LLVMProgram +{ +public: + LLVMProgram(); + ~LLVMProgram(); + + void StopLogFatalErrors(); + + template + Func *GetProcAddress(const char *name) { return reinterpret_cast(PointerToFunction(name)); } + + llvm::LLVMContext &context() { return *mContext; } + llvm::Module *module() { return mModule; } + llvm::ExecutionEngine *engine() { return mEngine.get(); } + llvm::legacy::PassManager *modulePassManager() { return mModulePassManager.get(); } + llvm::legacy::FunctionPassManager *functionPassManager() { return mFunctionPassManager.get(); } + +private: + void *PointerToFunction(const char *name); + + std::unique_ptr mContext; + llvm::Module *mModule; + std::unique_ptr mEngine; + std::unique_ptr mModulePassManager; + std::unique_ptr mFunctionPassManager; +}; + +class LLVMDrawersImpl : public LLVMDrawers +{ +public: + LLVMDrawersImpl(); + +private: + void CodegenDrawSpan(); + static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); + + LLVMProgram mProgram; +}; + +///////////////////////////////////////////////////////////////////////////// + +LLVMDrawers *LLVMDrawers::Singleton = nullptr; + +void LLVMDrawers::Create() +{ + if (!Singleton) + Singleton = new LLVMDrawersImpl(); +} + +void LLVMDrawers::Destroy() +{ + delete Singleton; + Singleton = nullptr; +} + +LLVMDrawers *LLVMDrawers::Instance() +{ + return Singleton; +} + +///////////////////////////////////////////////////////////////////////////// + +LLVMDrawersImpl::LLVMDrawersImpl() +{ + CodegenDrawSpan(); + mProgram.engine()->finalizeObject(); + mProgram.modulePassManager()->run(*mProgram.module()); + + DrawSpan = mProgram.GetProcAddress("DrawSpan"); + + mProgram.StopLogFatalErrors(); +} + +void LLVMDrawersImpl::CodegenDrawSpan() +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function("DrawSpan"); + function.add_parameter(GetRenderArgsStruct(mProgram.context())); + function.create_public(); + + DrawSpanCodegen codegen; + codegen.Generate(function.parameter(0)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); + + mProgram.functionPassManager()->run(*function.func); +} + +llvm::Type *LLVMDrawersImpl::GetRenderArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *source; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + +///////////////////////////////////////////////////////////////////////////// + +namespace { static bool LogFatalErrors = false; } + +LLVMProgram::LLVMProgram() +{ + using namespace llvm; + + // We have to extra careful about this because both LLVM and ZDoom made + // the very unwise decision to hook atexit. To top it off, LLVM decided + // to log something in the atexit handler.. + LogFatalErrors = true; + + install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { + if (LogFatalErrors) + I_FatalError("LLVM fatal error: %s", reason.c_str()); + }); + + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); + + std::string errorstring; + + std::string targetTriple = sys::getProcessTriple(); + std::string cpuName = sys::getHostCPUName(); + StringMap cpuFeatures; + sys::getHostCPUFeatures(cpuFeatures); + std::string cpuFeaturesStr; + for (const auto &it : cpuFeatures) + { + if (!cpuFeaturesStr.empty()) + cpuFeaturesStr.push_back(' '); + cpuFeaturesStr.push_back(it.getValue() ? '+' : '-'); + cpuFeaturesStr += it.getKey(); + } + + //Printf("LLVM target triple: %s\n", targetTriple.c_str()); + //Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); + + const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); + if (!target) + I_FatalError("Could not find LLVM target: %s", errorstring.c_str()); + + TargetOptions opt; + auto relocModel = Optional(Reloc::Static); + TargetMachine *machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::Default, CodeGenOpt::Aggressive); + if (!machine) + I_FatalError("Could not create LLVM target machine"); + + mContext = std::make_unique(); + + auto moduleOwner = std::make_unique("render", context()); + mModule = moduleOwner.get(); + mModule->setTargetTriple(targetTriple); + mModule->setDataLayout(machine->createDataLayout()); + + EngineBuilder engineBuilder(std::move(moduleOwner)); + engineBuilder.setErrorStr(&errorstring); + engineBuilder.setOptLevel(CodeGenOpt::Aggressive); + engineBuilder.setRelocationModel(Reloc::Static); + engineBuilder.setEngineKind(EngineKind::JIT); + mEngine.reset(engineBuilder.create(machine)); + if (!mEngine) + I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str()); + + mModulePassManager = std::make_unique(); + mFunctionPassManager = std::make_unique(mModule); + + PassManagerBuilder passManagerBuilder; + passManagerBuilder.OptLevel = 3; + passManagerBuilder.SizeLevel = 0; + passManagerBuilder.Inliner = createFunctionInliningPass(); + passManagerBuilder.populateModulePassManager(*mModulePassManager.get()); + passManagerBuilder.populateFunctionPassManager(*mFunctionPassManager.get()); +} + +LLVMProgram::~LLVMProgram() +{ + mEngine.reset(); + mContext.reset(); +} + +void *LLVMProgram::PointerToFunction(const char *name) +{ + llvm::Function *function = mModule->getFunction(name); + if (!function) + return nullptr; + return mEngine->getPointerToFunction(function); +} + +void LLVMProgram::StopLogFatalErrors() +{ + LogFatalErrors = false; +} diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h new file mode 100644 index 0000000000..2ad6c3d529 --- /dev/null +++ b/src/r_compiler/llvmdrawers.h @@ -0,0 +1,52 @@ + +#pragma once + +struct RenderArgs +{ + uint32_t *destorg; + const uint32_t *source; + int32_t destpitch; + int32_t xfrac; + int32_t yfrac; + int32_t xstep; + int32_t ystep; + int32_t x1; + int32_t x2; + int32_t y; + int32_t xbits; + int32_t ybits; + uint32_t light; + uint32_t srcalpha; + uint32_t destalpha; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1, + nearest_filter = 2 + }; +}; + +class LLVMDrawers +{ +public: + virtual ~LLVMDrawers() { } + + static void Create(); + static void Destroy(); + static LLVMDrawers *Instance(); + + void(*DrawSpan)(const RenderArgs *) = nullptr; + +private: + static LLVMDrawers *Singleton; +}; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 665e6b84ea..9c2cd62937 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -38,7 +38,7 @@ #include "r_data/colormaps.h" #include "r_plane.h" #include "r_draw_rgba.h" -#include "r_compiler/fixedfunction/fixedfunction.h" +#include "r_compiler/llvmdrawers.h" #include "gi.h" #include "stats.h" @@ -303,7 +303,6 @@ void DrawerCommandQueue::StopThreads() class DrawSpanLLVMCommand : public DrawerCommand { RenderArgs args; - FixedFunction *_ff; public: DrawSpanLLVMCommand() @@ -337,16 +336,13 @@ public: args.flags |= RenderArgs::simple_shade; if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped)) args.flags |= RenderArgs::nearest_filter; - - static FixedFunction ff; - _ff = &ff; } void Execute(DrawerThread *thread) override { if (thread->skipped_by_thread(args.y)) return; - _ff->DrawSpan(&args); + LLVMDrawers::Instance()->DrawSpan(&args); } }; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 5be8475027..b9a9ea7fd3 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -43,6 +43,7 @@ #include "textures/textures.h" #include "r_data/voxels.h" #include "r_draw_rgba.h" +#include "r_compiler/llvmdrawers.h" EXTERN_CVAR(Bool, r_shadercolormaps) @@ -51,6 +52,16 @@ void R_SetupColormap(player_t *); void R_SetupFreelook(); void R_InitRenderer(); +FSoftwareRenderer::FSoftwareRenderer() +{ + LLVMDrawers::Create(); +} + +FSoftwareRenderer::~FSoftwareRenderer() +{ + LLVMDrawers::Destroy(); +} + //========================================================================== // // DCanvas :: Init diff --git a/src/r_swrenderer.h b/src/r_swrenderer.h index f9d5609a0d..fc3ec25512 100644 --- a/src/r_swrenderer.h +++ b/src/r_swrenderer.h @@ -5,6 +5,9 @@ struct FSoftwareRenderer : public FRenderer { + FSoftwareRenderer(); + ~FSoftwareRenderer(); + // Can be overridden so that the colormaps for sector color/fade won't be built. virtual bool UsesColormap() const override; From efd22346d8ec29dace77fe3b788c59f98b4ab340 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 29 Sep 2016 02:26:36 +0200 Subject: [PATCH 136/912] Fix linear sampling bug --- src/r_compiler/fixedfunction/fixedfunction.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index fffd2c8854..c205bc45eb 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -256,10 +256,10 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt SSAInt y0 = frac_y0 >> FRACBITS; SSAInt y1 = frac_y1 >> FRACBITS; - SSAVec4i p00 = col0[y0].load_vec4ub(); - SSAVec4i p01 = col0[y1].load_vec4ub(); - SSAVec4i p10 = col1[y0].load_vec4ub(); - SSAVec4i p11 = col1[y1].load_vec4ub(); + SSAVec4i p00 = col0[y0 * 4].load_vec4ub(); + SSAVec4i p01 = col0[y1 * 4].load_vec4ub(); + SSAVec4i p10 = col1[y0 * 4].load_vec4ub(); + SSAVec4i p11 = col1[y1 * 4].load_vec4ub(); SSAInt inv_b = texturefracx; SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; @@ -278,10 +278,10 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt SSAInt x = xfrac >> xbits; SSAInt y = yfrac >> ybits; - SSAVec4i p00 = texture[(y & ymask) + ((x & xmask) << yshift)].load_vec4ub(); - SSAVec4i p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)].load_vec4ub(); - SSAVec4i p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); - SSAVec4i p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); + SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; From e5f3c119cdf4c547be763ceacedf629752938014 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 29 Sep 2016 04:01:42 +0200 Subject: [PATCH 137/912] Codegen all DrawSpan variants --- .../fixedfunction/fixedfunction.cpp | 94 +++++++++++++------ src/r_compiler/fixedfunction/fixedfunction.h | 25 ++++- src/r_compiler/llvmdrawers.cpp | 29 ++++-- src/r_compiler/llvmdrawers.h | 9 +- src/r_draw_rgba.cpp | 87 +++++++++++++++-- 5 files changed, 193 insertions(+), 51 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index c205bc45eb..fc5402a42a 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -11,7 +11,7 @@ #include "r_compiler/ssa/ssa_value.h" #include "r_compiler/ssa/ssa_barycentric_weight.h" -void DrawSpanCodegen::Generate(SSAValue args) +void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args) { destorg = args[0][0].load(); source = args[0][1].load(); @@ -51,44 +51,44 @@ void DrawSpanCodegen::Generate(SSAValue args) // 64x64 is the most common case by far, so special case it. is_64x64 = xbits == 6 && ybits == 6; - is_simple_shade = (flags & RenderArgs::simple_shade) == RenderArgs::simple_shade; - is_nearest_filter = (flags & RenderArgs::nearest_filter) == RenderArgs::nearest_filter; + is_simple_shade = (flags & DrawSpanArgs::simple_shade) == DrawSpanArgs::simple_shade; + is_nearest_filter = (flags & DrawSpanArgs::nearest_filter) == DrawSpanArgs::nearest_filter; SSAIfBlock branch; branch.if_block(is_simple_shade); - LoopShade(true); + LoopShade(variant, true); branch.else_block(); - LoopShade(false); + LoopShade(variant, false); branch.end_block(); } -void DrawSpanCodegen::LoopShade(bool isSimpleShade) +void DrawSpanCodegen::LoopShade(DrawSpanVariant variant, bool isSimpleShade) { SSAIfBlock branch; branch.if_block(is_nearest_filter); - LoopFilter(isSimpleShade, true); + LoopFilter(variant, isSimpleShade, true); branch.else_block(); - LoopFilter(isSimpleShade, false); + LoopFilter(variant, isSimpleShade, false); branch.end_block(); } -void DrawSpanCodegen::LoopFilter(bool isSimpleShade, bool isNearestFilter) +void DrawSpanCodegen::LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter) { SSAIfBlock branch; branch.if_block(is_64x64); { - SSAInt sseLength = Loop4x(isSimpleShade, isNearestFilter, true); - Loop(sseLength * 4, isSimpleShade, isNearestFilter, true); + SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, true); + Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, true); } branch.else_block(); { - SSAInt sseLength = Loop4x(isSimpleShade, isNearestFilter, false); - Loop(sseLength * 4, isSimpleShade, isNearestFilter, false); + SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, false); + Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, false); } branch.end_block(); } -SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64) +SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64) { SSAInt sseLength = count / 4; stack_index.store(0); @@ -97,24 +97,31 @@ SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is SSAInt index = stack_index.load(); loop.loop_block(index < sseLength); + SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub(); + SSAVec8s bg0 = SSAVec8s::extendlo(bg); + SSAVec8s bg1 = SSAVec8s::extendhi(bg); + SSAVec4i bgcolors[4] = + { + SSAVec4i::extendlo(bg0), + SSAVec4i::extendhi(bg0), + SSAVec4i::extendlo(bg1), + SSAVec4i::extendhi(bg1) + }; + SSAVec4i colors[4]; for (int i = 0; i < 4; i++) { SSAInt xfrac = stack_xfrac.load(); SSAInt yfrac = stack_yfrac.load(); - SSAVec4i fg = Sample(xfrac, yfrac, isNearestFilter, is64x64); - if (isSimpleShade) - colors[i] = shade_bgra_simple(fg, light); - else - colors[i] = shade_bgra_advanced(fg, light, shade_constants); + colors[i] = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolors[i], variant); stack_xfrac.store(xfrac + xstep); stack_yfrac.store(yfrac + ystep); } - SSAVec16ub ssecolors(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); - data[index * 16].store_unaligned_vec16ub(ssecolors); + SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + data[index * 16].store_unaligned_vec16ub(color); stack_index.store(index + 1); loop.end_block(); @@ -122,7 +129,7 @@ SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is return sseLength; } -void DrawSpanCodegen::Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64) +void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64) { stack_index.store(start); { @@ -133,13 +140,8 @@ void DrawSpanCodegen::Loop(SSAInt start, bool isSimpleShade, bool isNearestFilte SSAInt xfrac = stack_xfrac.load(); SSAInt yfrac = stack_yfrac.load(); - SSAVec4i fg = Sample(xfrac, yfrac, isNearestFilter, is64x64); - SSAVec4i color; - if (isSimpleShade) - color = shade_bgra_simple(fg, light); - else - color = shade_bgra_advanced(fg, light, shade_constants); - + SSAVec4i bgcolor = data[index * 4].load_vec4ub(); + SSAVec4i color = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolor, variant); data[index * 4].store_vec4ub(color); stack_index.store(index + 1); @@ -173,6 +175,32 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte } } +SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) +{ + if (isSimpleShade) + return shade_bgra_simple(fg, light); + else + return shade_bgra_advanced(fg, light, shade_constants); +} + +SSAVec4i DrawSpanCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant) +{ + switch (variant) + { + default: + case DrawSpanVariant::Opaque: + return blend_copy(fg); + case DrawSpanVariant::Masked: + return blend_alpha_blend(fg, bg); + case DrawSpanVariant::Translucent: + case DrawSpanVariant::AddClamp: + return blend_add(fg, bg, srcalpha, destalpha); + case DrawSpanVariant::MaskedTranslucent: + case DrawSpanVariant::MaskedAddClamp: + return blend_add(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + } +} + ///////////////////////////////////////////////////////////////////////////// SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) @@ -249,6 +277,14 @@ SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) return color.insert(3, 255); } +SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha) +{ + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); + SSAInt inv_alpha = 256 - alpha; + return (destalpha * alpha + 256 * inv_alpha + 128) >> 8; +} + SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) { SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index 4b5bfc8b71..1c58740d58 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -43,22 +43,37 @@ public: SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg); + // Calculates the final alpha values to be used when combined with the source texture alpha channel + SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha); + // SampleBgra SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); }; +enum class DrawSpanVariant +{ + Opaque, + Masked, + Translucent, + MaskedTranslucent, + AddClamp, + MaskedAddClamp +}; + class DrawSpanCodegen : public DrawerCodegen { public: - void Generate(SSAValue args); + void Generate(DrawSpanVariant variant, SSAValue args); private: - void LoopShade(bool isSimpleShade); - void LoopFilter(bool isSimpleShade, bool isNearestFilter); - SSAInt Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64); - void Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64); + void LoopShade(DrawSpanVariant variant, bool isSimpleShade); + void LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter); + SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); + void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); + SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); + SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant); SSAStack stack_index, stack_xfrac, stack_yfrac; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 4082137072..fb4a6d0232 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -44,8 +44,8 @@ public: LLVMDrawersImpl(); private: - void CodegenDrawSpan(); - static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); + void CodegenDrawSpan(const char *name, DrawSpanVariant variant); + static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); LLVMProgram mProgram; }; @@ -75,26 +75,37 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - CodegenDrawSpan(); + CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); + CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); + CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); + CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); + CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); + CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); + mProgram.engine()->finalizeObject(); mProgram.modulePassManager()->run(*mProgram.module()); - DrawSpan = mProgram.GetProcAddress("DrawSpan"); + DrawSpan = mProgram.GetProcAddress("DrawSpan"); + DrawSpanMasked = mProgram.GetProcAddress("DrawSpanMasked"); + DrawSpanTranslucent = mProgram.GetProcAddress("DrawSpanTranslucent"); + DrawSpanMaskedTranslucent = mProgram.GetProcAddress("DrawSpanMaskedTranslucent"); + DrawSpanAddClamp = mProgram.GetProcAddress("DrawSpanAddClamp"); + DrawSpanMaskedAddClamp = mProgram.GetProcAddress("DrawSpanMaskedAddClamp"); mProgram.StopLogFatalErrors(); } -void LLVMDrawersImpl::CodegenDrawSpan() +void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) { llvm::IRBuilder<> builder(mProgram.context()); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - SSAFunction function("DrawSpan"); - function.add_parameter(GetRenderArgsStruct(mProgram.context())); + SSAFunction function(name); + function.add_parameter(GetDrawSpanArgsStruct(mProgram.context())); function.create_public(); DrawSpanCodegen codegen; - codegen.Generate(function.parameter(0)); + codegen.Generate(variant, function.parameter(0)); builder.CreateRetVoid(); @@ -104,7 +115,7 @@ void LLVMDrawersImpl::CodegenDrawSpan() mProgram.functionPassManager()->run(*function.func); } -llvm::Type *LLVMDrawersImpl::GetRenderArgsStruct(llvm::LLVMContext &context) +llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) { std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 2ad6c3d529..53e64032f6 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -1,7 +1,7 @@ #pragma once -struct RenderArgs +struct DrawSpanArgs { uint32_t *destorg; const uint32_t *source; @@ -45,7 +45,12 @@ public: static void Destroy(); static LLVMDrawers *Instance(); - void(*DrawSpan)(const RenderArgs *) = nullptr; + void(*DrawSpan)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanTranslucent)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanMaskedTranslucent)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr; private: static LLVMDrawers *Singleton; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9c2cd62937..8a0a6871a0 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -302,7 +302,8 @@ void DrawerCommandQueue::StopThreads() class DrawSpanLLVMCommand : public DrawerCommand { - RenderArgs args; +protected: + DrawSpanArgs args; public: DrawSpanLLVMCommand() @@ -333,9 +334,9 @@ public: args.destalpha = dc_destalpha >> (FRACBITS - 8); args.flags = 0; if (ds_shade_constants.simple_shade) - args.flags |= RenderArgs::simple_shade; + args.flags |= DrawSpanArgs::simple_shade; if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped)) - args.flags |= RenderArgs::nearest_filter; + args.flags |= DrawSpanArgs::nearest_filter; } void Execute(DrawerThread *thread) override @@ -346,6 +347,61 @@ public: } }; +class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanMasked(&args); + } +}; + +class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanTranslucent(&args); + } +}; + +class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanMaskedTranslucent(&args); + } +}; + +class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanAddClamp(&args); + } +}; + +class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanMaskedAddClamp(&args); + } +}; + ///////////////////////////////////////////////////////////////////////////// class DrawerColumnCommand : public DrawerCommand @@ -2749,39 +2805,58 @@ void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawSpan_rgba() { +#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -/* -#ifdef NO_SSE +#elif defined(NO_SSE) DrawerCommandQueue::QueueCommand(); #else DrawerCommandQueue::QueueCommand(); #endif -*/ } void R_DrawSpanMasked_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanTranslucent_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanMaskedTranslucent_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanAddClamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanMaskedAddClamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_FillSpan_rgba() From 7be25112699a71f0c504607ec90e154bb4fa3a52 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 29 Sep 2016 05:21:43 +0200 Subject: [PATCH 138/912] Add codegen files for walls and columns --- src/CMakeLists.txt | 5 +- .../fixedfunction/drawcolumncodegen.cpp | 15 ++ .../fixedfunction/drawcolumncodegen.h | 26 ++++ .../fixedfunction/drawercodegen.cpp | 135 ++++++++++++++++++ .../{fixedfunction.h => drawercodegen.h} | 50 ------- ...{fixedfunction.cpp => drawspancodegen.cpp} | 129 +---------------- .../fixedfunction/drawspancodegen.h | 54 +++++++ .../fixedfunction/drawwallcodegen.cpp | 15 ++ .../fixedfunction/drawwallcodegen.h | 26 ++++ src/r_compiler/llvmdrawers.cpp | 4 +- 10 files changed, 279 insertions(+), 180 deletions(-) create mode 100644 src/r_compiler/fixedfunction/drawcolumncodegen.cpp create mode 100644 src/r_compiler/fixedfunction/drawcolumncodegen.h create mode 100644 src/r_compiler/fixedfunction/drawercodegen.cpp rename src/r_compiler/fixedfunction/{fixedfunction.h => drawercodegen.h} (61%) rename src/r_compiler/fixedfunction/{fixedfunction.cpp => drawspancodegen.cpp} (57%) create mode 100644 src/r_compiler/fixedfunction/drawspancodegen.h create mode 100644 src/r_compiler/fixedfunction/drawwallcodegen.cpp create mode 100644 src/r_compiler/fixedfunction/drawwallcodegen.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 508951510a..41829b996f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1447,7 +1447,10 @@ set (PCH_SOURCES r_compiler/ssa/ssa_vec4i_ptr.cpp r_compiler/ssa/ssa_vec8s.cpp r_compiler/ssa/ssa_vec16ub.cpp - r_compiler/fixedfunction/fixedfunction.cpp + r_compiler/fixedfunction/drawercodegen.cpp + r_compiler/fixedfunction/drawspancodegen.cpp + r_compiler/fixedfunction/drawwallcodegen.cpp + r_compiler/fixedfunction/drawcolumncodegen.cpp r_data/sprites.cpp r_data/voxels.cpp r_data/renderstyle.cpp diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp new file mode 100644 index 0000000000..4594e22902 --- /dev/null +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -0,0 +1,15 @@ + +#include "i_system.h" +#include "r_compiler/fixedfunction/drawcolumncodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" + +void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args) +{ +} diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h new file mode 100644 index 0000000000..0749def7f5 --- /dev/null +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -0,0 +1,26 @@ + +#pragma once + +#include "drawercodegen.h" + +enum class DrawColumnVariant +{ + Opaque, + Fuzz, + Add, + Translated, + TlatedAdd, + Shaded, + AddClamp, + AddClampTranslated, + SubClamp, + SubClampTranslated, + RevSubClamp, + RevSubClampTranslated +}; + +class DrawColumnCodegen : public DrawerCodegen +{ +public: + void Generate(DrawColumnVariant variant, SSAValue args); +}; diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp new file mode 100644 index 0000000000..5da858e27f --- /dev/null +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -0,0 +1,135 @@ + +#include "i_system.h" +#include "r_compiler/fixedfunction/drawercodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" + +SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) +{ + return 256 - (light >> (FRACBITS - 8)); +} + +SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) +{ + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_simple(color, light); +} + +SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) +{ + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_advanced(color, light, constants); +} + +SSAVec4i DrawerCodegen::shade_bgra_simple(SSAVec4i color, SSAInt light) +{ + color = color * light / 256; + return color.insert(3, 255); +} + +SSAVec4i DrawerCodegen::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) +{ + SSAInt blue = color[0]; + SSAInt green = color[1]; + SSAInt red = color[2]; + SSAInt alpha = color[3]; + + SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + SSAVec4i inv_light = 256 - light; + SSAVec4i inv_desaturate = 256 - constants.desaturate; + + color = (color * inv_desaturate + intensity) / 256; + color = (constants.fade * inv_light + color * light) / 256; + color = (color * constants.light) / 256; + + return color.insert(3, alpha); +} + +SSAVec4i DrawerCodegen::blend_copy(SSAVec4i fg) +{ + return fg; +} + +SSAVec4i DrawerCodegen::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i DrawerCodegen::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i DrawerCodegen::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) +{ + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); // // 255 -> 256 + SSAInt inv_alpha = 256 - alpha; + SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; + return color.insert(3, 255); +} + +SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha) +{ + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); + SSAInt inv_alpha = 256 - alpha; + return (destalpha * alpha + 256 * inv_alpha + 128) >> 8; +} + +SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) +{ + SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; + SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; + + SSAVec4i p00 = col0[y0 * 4].load_vec4ub(); + SSAVec4i p01 = col0[y1 * 4].load_vec4ub(); + SSAVec4i p10 = col1[y0 * 4].load_vec4ub(); + SSAVec4i p11 = col1[y1 * 4].load_vec4ub(); + + SSAInt inv_b = texturefracx; + SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} + +SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) +{ + SSAInt xshift = (32 - xbits); + SSAInt yshift = (32 - ybits); + SSAInt xmask = (SSAInt(1) << xshift) - 1; + SSAInt ymask = (SSAInt(1) << yshift) - 1; + SSAInt x = xfrac >> xbits; + SSAInt y = yfrac >> ybits; + + SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); + + SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; + SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/drawercodegen.h similarity index 61% rename from src/r_compiler/fixedfunction/fixedfunction.h rename to src/r_compiler/fixedfunction/drawercodegen.h index 1c58740d58..9e0706ed1a 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -50,53 +50,3 @@ public: SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); }; - -enum class DrawSpanVariant -{ - Opaque, - Masked, - Translucent, - MaskedTranslucent, - AddClamp, - MaskedAddClamp -}; - -class DrawSpanCodegen : public DrawerCodegen -{ -public: - void Generate(DrawSpanVariant variant, SSAValue args); - -private: - void LoopShade(DrawSpanVariant variant, bool isSimpleShade); - void LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter); - SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); - void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); - SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); - SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); - SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant); - - SSAStack stack_index, stack_xfrac, stack_yfrac; - - SSAUBytePtr destorg; - SSAUBytePtr source; - SSAInt destpitch; - SSAInt xstep; - SSAInt ystep; - SSAInt x1; - SSAInt x2; - SSAInt y; - SSAInt xbits; - SSAInt ybits; - SSAInt light; - SSAInt srcalpha; - SSAInt destalpha; - SSAInt count; - SSAUBytePtr data; - SSAInt yshift; - SSAInt xshift; - SSAInt xmask; - SSABool is_64x64; - SSABool is_simple_shade; - SSABool is_nearest_filter; - SSAShadeConstants shade_constants; -}; diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/drawspancodegen.cpp similarity index 57% rename from src/r_compiler/fixedfunction/fixedfunction.cpp rename to src/r_compiler/fixedfunction/drawspancodegen.cpp index fc5402a42a..1623c38f21 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/drawspancodegen.cpp @@ -1,6 +1,6 @@ #include "i_system.h" -#include "r_compiler/fixedfunction/fixedfunction.h" +#include "r_compiler/fixedfunction/drawspancodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" #include "r_compiler/ssa/ssa_for_block.h" @@ -9,7 +9,6 @@ #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -#include "r_compiler/ssa/ssa_barycentric_weight.h" void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args) { @@ -200,129 +199,3 @@ SSAVec4i DrawSpanCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant varian return blend_add(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); } } - -///////////////////////////////////////////////////////////////////////////// - -SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) -{ - return 256 - (light >> (FRACBITS - 8)); -} - -SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) -{ - SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; - return shade_bgra_simple(color, light); -} - -SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) -{ - SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; - return shade_bgra_advanced(color, light, constants); -} - -SSAVec4i DrawerCodegen::shade_bgra_simple(SSAVec4i color, SSAInt light) -{ - color = color * light / 256; - return color.insert(3, 255); -} - -SSAVec4i DrawerCodegen::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) -{ - SSAInt blue = color[0]; - SSAInt green = color[1]; - SSAInt red = color[2]; - SSAInt alpha = color[3]; - - SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - SSAVec4i inv_light = 256 - light; - SSAVec4i inv_desaturate = 256 - constants.desaturate; - - color = (color * inv_desaturate + intensity) / 256; - color = (constants.fade * inv_light + color * light) / 256; - color = (color * constants.light) / 256; - - return color.insert(3, alpha); -} - -SSAVec4i DrawerCodegen::blend_copy(SSAVec4i fg) -{ - return fg; -} - -SSAVec4i DrawerCodegen::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i DrawerCodegen::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i DrawerCodegen::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) -{ - SSAInt alpha = fg[3]; - alpha = alpha + (alpha >> 7); // // 255 -> 256 - SSAInt inv_alpha = 256 - alpha; - SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; - return color.insert(3, 255); -} - -SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha) -{ - SSAInt alpha = fg[3]; - alpha = alpha + (alpha >> 7); - SSAInt inv_alpha = 256 - alpha; - return (destalpha * alpha + 256 * inv_alpha + 128) >> 8; -} - -SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) -{ - SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; - SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; - SSAInt y0 = frac_y0 >> FRACBITS; - SSAInt y1 = frac_y1 >> FRACBITS; - - SSAVec4i p00 = col0[y0 * 4].load_vec4ub(); - SSAVec4i p01 = col0[y1 * 4].load_vec4ub(); - SSAVec4i p10 = col1[y0 * 4].load_vec4ub(); - SSAVec4i p11 = col1[y1 * 4].load_vec4ub(); - - SSAInt inv_b = texturefracx; - SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} - -SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) -{ - SSAInt xshift = (32 - xbits); - SSAInt yshift = (32 - ybits); - SSAInt xmask = (SSAInt(1) << xshift) - 1; - SSAInt ymask = (SSAInt(1) << yshift) - 1; - SSAInt x = xfrac >> xbits; - SSAInt y = yfrac >> ybits; - - SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); - SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); - SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); - SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); - - SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; - SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} diff --git a/src/r_compiler/fixedfunction/drawspancodegen.h b/src/r_compiler/fixedfunction/drawspancodegen.h new file mode 100644 index 0000000000..20869ac2ff --- /dev/null +++ b/src/r_compiler/fixedfunction/drawspancodegen.h @@ -0,0 +1,54 @@ + +#pragma once + +#include "drawercodegen.h" + +enum class DrawSpanVariant +{ + Opaque, + Masked, + Translucent, + MaskedTranslucent, + AddClamp, + MaskedAddClamp +}; + +class DrawSpanCodegen : public DrawerCodegen +{ +public: + void Generate(DrawSpanVariant variant, SSAValue args); + +private: + void LoopShade(DrawSpanVariant variant, bool isSimpleShade); + void LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter); + SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); + void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); + SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); + SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); + SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant); + + SSAStack stack_index, stack_xfrac, stack_yfrac; + + SSAUBytePtr destorg; + SSAUBytePtr source; + SSAInt destpitch; + SSAInt xstep; + SSAInt ystep; + SSAInt x1; + SSAInt x2; + SSAInt y; + SSAInt xbits; + SSAInt ybits; + SSAInt light; + SSAInt srcalpha; + SSAInt destalpha; + SSAInt count; + SSAUBytePtr data; + SSAInt yshift; + SSAInt xshift; + SSAInt xmask; + SSABool is_64x64; + SSABool is_simple_shade; + SSABool is_nearest_filter; + SSAShadeConstants shade_constants; +}; diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp new file mode 100644 index 0000000000..0e94c11ed7 --- /dev/null +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -0,0 +1,15 @@ + +#include "i_system.h" +#include "r_compiler/fixedfunction/drawwallcodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" + +void DrawWallCodegen::Generate(DrawWallVariant variant, SSAValue args) +{ +} diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.h b/src/r_compiler/fixedfunction/drawwallcodegen.h new file mode 100644 index 0000000000..f514ca8ca5 --- /dev/null +++ b/src/r_compiler/fixedfunction/drawwallcodegen.h @@ -0,0 +1,26 @@ + +#pragma once + +#include "drawercodegen.h" + +enum class DrawWallVariant +{ + Opaque1, // vlinec1 + Opaque4, // vlinec4 + Masked1, // mvlinec1 + Masked4, // mvlinec4 + Add1, // tmvline1_add + Add4, // tmvline4_add + AddClamp1, // tmvline1_addclamp + AddClamp4, // tmvline4_addclamp + SubClamp1, // tmvline1_subclamp + SubClamp4, // tmvline4_subclamp + RevSubClamp1, // tmvline1_revsubclamp + RevSubClamp4, // tmvline4_revsubclamp +}; + +class DrawWallCodegen : public DrawerCodegen +{ +public: + void Generate(DrawWallVariant variant, SSAValue args); +}; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index fb4a6d0232..320bfb653d 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -1,6 +1,8 @@ #include "i_system.h" -#include "r_compiler/fixedfunction/fixedfunction.h" +#include "r_compiler/fixedfunction/drawspancodegen.h" +#include "r_compiler/fixedfunction/drawwallcodegen.h" +#include "r_compiler/fixedfunction/drawcolumncodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" #include "r_compiler/ssa/ssa_for_block.h" From afab45674ba32901d789a631d858757862650d3d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 29 Sep 2016 07:38:33 +0200 Subject: [PATCH 139/912] Added half of wall codegen --- .../fixedfunction/drawwallcodegen.cpp | 154 +++++++++++++++++- .../fixedfunction/drawwallcodegen.h | 49 ++++-- src/r_compiler/llvmdrawers.cpp | 70 +++++++- src/r_compiler/llvmdrawers.h | 46 ++++++ 4 files changed, 304 insertions(+), 15 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 0e94c11ed7..65b2224b57 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -10,6 +10,158 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -void DrawWallCodegen::Generate(DrawWallVariant variant, SSAValue args) +void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args) { + dest = args[0][0].load(); + source[0] = args[0][1].load(); + source[1] = args[0][2].load(); + source[2] = args[0][3].load(); + source[3] = args[0][4].load(); + source2[0] = args[0][5].load(); + source2[1] = args[0][6].load(); + source2[2] = args[0][7].load(); + source2[3] = args[0][8].load(); + pitch = args[0][9].load(); + count = args[0][10].load(); + dest_y = args[0][11].load(); + texturefrac[0] = args[0][12].load(); + texturefrac[1] = args[0][13].load(); + texturefrac[2] = args[0][14].load(); + texturefrac[3] = args[0][15].load(); + texturefracx[0] = args[0][16].load(); + texturefracx[1] = args[0][17].load(); + texturefracx[2] = args[0][18].load(); + texturefracx[3] = args[0][19].load(); + iscale[0] = args[0][20].load(); + iscale[1] = args[0][21].load(); + iscale[2] = args[0][22].load(); + iscale[3] = args[0][23].load(); + textureheight[0] = args[0][24].load(); + textureheight[1] = args[0][25].load(); + textureheight[2] = args[0][26].load(); + textureheight[3] = args[0][27].load(); + light[0] = args[0][28].load(); + light[1] = args[0][29].load(); + light[2] = args[0][30].load(); + light[3] = args[0][31].load(); + srcalpha = args[0][32].load(); + destalpha = args[0][33].load(); + SSAShort light_alpha = args[0][34].load(); + SSAShort light_red = args[0][35].load(); + SSAShort light_green = args[0][36].load(); + SSAShort light_blue = args[0][37].load(); + SSAShort fade_alpha = args[0][38].load(); + SSAShort fade_red = args[0][39].load(); + SSAShort fade_green = args[0][40].load(); + SSAShort fade_blue = args[0][41].load(); + SSAShort desaturate = args[0][42].load(); + SSAInt flags = args[0][43].load(); + shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); + shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); + shade_constants.desaturate = desaturate.zext_int(); + + is_simple_shade = (flags & DrawWallArgs::simple_shade) == DrawWallArgs::simple_shade; + is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == DrawWallArgs::nearest_filter; + + /* + count = thread->count_for_thread(command->_dest_y, command->_count); + fracstep = command->_iscale * thread->num_cores; + frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); + texturefracx = command->_texturefracx; + dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); + pitch = command->_pitch * thread->num_cores; + height = command->_textureheight; + one = ((0x80000000 + height - 1) / height) * 2 + 1; + */ + int numColumns = fourColumns ? 4 : 1; + for (int i = 0; i < numColumns; i++) + { + stack_frac[i].store(texturefrac[i] + iscale[i]);// * skipped_by_thread(dest_y); + fracstep[i] = iscale[i];// * num_cores; + one[i] = ((0x80000000 + textureheight[i] - 1) / textureheight[i]) * 2 + 1; + } + + SSAIfBlock branch; + branch.if_block(is_simple_shade); + LoopShade(variant, fourColumns, true); + branch.else_block(); + LoopShade(variant, fourColumns, false); + branch.end_block(); +} + +void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade) +{ + SSAIfBlock branch; + branch.if_block(is_nearest_filter); + Loop(variant, fourColumns, isSimpleShade, true); + branch.else_block(); + Loop(variant, fourColumns, isSimpleShade, false); + branch.end_block(); +} + +void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter) +{ + int numColumns = fourColumns ? 4 : 1; + + stack_index.store(0); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + SSAInt frac[4]; + for (int i = 0; i < numColumns; i++) + frac[i] = stack_frac[i].load(); + + SSAInt offset = (dest_y + index) * pitch * 4; + + if (fourColumns) + { + + } + else + { + SSAVec4i bgcolor = dest[offset].load_vec4ub(); + SSAVec4i color = Blend(Shade(Sample(frac[0], isNearestFilter), 0, isSimpleShade), bgcolor, variant); + dest[offset].store_vec4ub(color); + } + + stack_index.store(index + 1); + for (int i = 0; i < numColumns; i++) + stack_frac[i].store(frac[i] + fracstep[i]); + loop.end_block(); + } +} + +SSAVec4i DrawWallCodegen::Sample(SSAInt frac, bool isNearestFilter) +{ + // int sample_index() { return ((frac >> FRACBITS) * height) >> FRACBITS; } + return SSAVec4i(0); +} + +SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) +{ + if (isSimpleShade) + return shade_bgra_simple(fg, light[index]); + else + return shade_bgra_advanced(fg, light[index], shade_constants); +} + +SSAVec4i DrawWallCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant) +{ + switch (variant) + { + default: + case DrawWallVariant::Opaque: + return blend_copy(fg); + case DrawWallVariant::Masked: + return blend_alpha_blend(fg, bg); + case DrawWallVariant::Add: + case DrawWallVariant::AddClamp: + return blend_add(fg, bg, srcalpha, destalpha); + case DrawWallVariant::SubClamp: + return blend_sub(fg, bg, srcalpha, destalpha); + case DrawWallVariant::RevSubClamp: + return blend_revsub(fg, bg, srcalpha, destalpha); + } } diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.h b/src/r_compiler/fixedfunction/drawwallcodegen.h index f514ca8ca5..eafc8cf697 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.h +++ b/src/r_compiler/fixedfunction/drawwallcodegen.h @@ -5,22 +5,45 @@ enum class DrawWallVariant { - Opaque1, // vlinec1 - Opaque4, // vlinec4 - Masked1, // mvlinec1 - Masked4, // mvlinec4 - Add1, // tmvline1_add - Add4, // tmvline4_add - AddClamp1, // tmvline1_addclamp - AddClamp4, // tmvline4_addclamp - SubClamp1, // tmvline1_subclamp - SubClamp4, // tmvline4_subclamp - RevSubClamp1, // tmvline1_revsubclamp - RevSubClamp4, // tmvline4_revsubclamp + Opaque, + Masked, + Add, + AddClamp, + SubClamp, + RevSubClamp }; class DrawWallCodegen : public DrawerCodegen { public: - void Generate(DrawWallVariant variant, SSAValue args); + void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args); + +private: + void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade); + void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter); + SSAVec4i Sample(SSAInt frac, bool isNearestFilter); + SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); + SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); + + SSAStack stack_index, stack_frac[4]; + + SSAUBytePtr dest; + SSAUBytePtr source[4]; + SSAUBytePtr source2[4]; + SSAInt pitch; + SSAInt count; + SSAInt dest_y; + SSAInt texturefrac[4]; + SSAInt texturefracx[4]; + SSAInt iscale[4]; + SSAInt textureheight[4]; + SSAInt light[4]; + SSAInt srcalpha; + SSAInt destalpha; + SSABool is_simple_shade; + SSABool is_nearest_filter; + SSAShadeConstants shade_constants; + + SSAInt fracstep[4]; + SSAInt one[4]; }; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 320bfb653d..57c3293bbd 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -47,7 +47,10 @@ public: private: void CodegenDrawSpan(const char *name, DrawSpanVariant variant); + void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); + static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); LLVMProgram mProgram; }; @@ -83,6 +86,18 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); + CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1); + CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4); + CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1); + CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4); + CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1); + CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4); + CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1); + CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4); + CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1); + CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); + CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); + CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); mProgram.engine()->finalizeObject(); mProgram.modulePassManager()->run(*mProgram.module()); @@ -93,6 +108,18 @@ LLVMDrawersImpl::LLVMDrawersImpl() DrawSpanMaskedTranslucent = mProgram.GetProcAddress("DrawSpanMaskedTranslucent"); DrawSpanAddClamp = mProgram.GetProcAddress("DrawSpanAddClamp"); DrawSpanMaskedAddClamp = mProgram.GetProcAddress("DrawSpanMaskedAddClamp"); + vlinec1 = mProgram.GetProcAddress("vlinec1"); + vlinec4 = mProgram.GetProcAddress("vlinec4"); + mvlinec1 = mProgram.GetProcAddress("mvlinec1"); + mvlinec4 = mProgram.GetProcAddress("mvlinec4"); + tmvline1_add = mProgram.GetProcAddress("tmvline1_add"); + tmvline4_add = mProgram.GetProcAddress("tmvline4_add"); + tmvline1_addclamp = mProgram.GetProcAddress("tmvline1_addclamp"); + tmvline4_addclamp = mProgram.GetProcAddress("tmvline4_addclamp"); + tmvline1_subclamp = mProgram.GetProcAddress("tmvline1_subclamp"); + tmvline4_subclamp = mProgram.GetProcAddress("tmvline4_subclamp"); + tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); + tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); mProgram.StopLogFatalErrors(); } @@ -117,11 +144,31 @@ void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) mProgram.functionPassManager()->run(*function.func); } +void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name); + function.add_parameter(GetDrawWallArgsStruct(mProgram.context())); + function.create_public(); + + DrawWallCodegen codegen; + codegen.Generate(variant, columns == 4, function.parameter(0)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); + + mProgram.functionPassManager()->run(*function.func); +} + llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) { std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *source; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; @@ -148,6 +195,27 @@ llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) return llvm::StructType::get(context, elements, false)->getPointerTo(); } +llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 8; i++) + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 25; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + ///////////////////////////////////////////////////////////////////////////// namespace { static bool LogFatalErrors = false; } diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 53e64032f6..92f7e9440c 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -1,6 +1,39 @@ #pragma once +struct DrawWallArgs +{ + uint32_t *dest; + const uint32_t *source[4]; + const uint32_t *source2[4]; + int32_t pitch; + int32_t count; + int32_t dest_y; + uint32_t texturefrac[4]; + uint32_t texturefracx[4]; + uint32_t iscale[4]; + uint32_t textureheight[4]; + uint32_t light[4]; + uint32_t srcalpha; + uint32_t destalpha; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1, + nearest_filter = 2 + }; +}; + struct DrawSpanArgs { uint32_t *destorg; @@ -52,6 +85,19 @@ public: void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr; + void(*vlinec1)(const DrawWallArgs *) = nullptr; + void(*vlinec4)(const DrawWallArgs *) = nullptr; + void(*mvlinec1)(const DrawWallArgs *) = nullptr; + void(*mvlinec4)(const DrawWallArgs *) = nullptr; + void(*tmvline1_add)(const DrawWallArgs *) = nullptr; + void(*tmvline4_add)(const DrawWallArgs *) = nullptr; + void(*tmvline1_addclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline4_addclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline1_subclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline4_subclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline1_revsubclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline4_revsubclamp)(const DrawWallArgs *) = nullptr; + private: static LLVMDrawers *Singleton; }; From 2987668af848b5780370051f20bbf2bc39e12930 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Thu, 29 Sep 2016 06:20:30 -0400 Subject: [PATCH 140/912] Non-tiling skies (Heretic/Hexen) now render properly. --- src/r_plane.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 706d6fad75..7c4e443587 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1011,7 +1011,7 @@ static void R_DrawSky (visplane_t *pl) { // The texture does not tile nicely frontyScale *= skyscale; frontiScale = 1 / frontyScale; - //R_DrawSkyStriped (pl); + R_DrawSkyStriped (pl); } } From d58da58aeef3833d79910933acb32c2d2744f811 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Thu, 29 Sep 2016 12:20:32 -0400 Subject: [PATCH 141/912] - fixed: Prevents too many drawer thread commands from queueing up. Previously, drawing too many columns (which was accumulated by amassing a huge number of sprites) would crash the game. --- src/r_draw_rgba.cpp | 2 ++ src/r_draw_rgba.h | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 0d86ead478..91044d1dd0 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -55,6 +55,8 @@ extern int wallshade; // Use multiple threads when drawing CVAR(Bool, r_multithreaded, true, 0); +// [SP] Set Max Threads to a sane amount +CVAR(Int, r_multithreadedmax, 1024, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Use linear filtering when scaling up CVAR(Bool, r_magfilter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index df3d0f2330..e5fa88ed07 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -196,6 +196,7 @@ public: EXTERN_CVAR(Bool, r_multithreaded) EXTERN_CVAR(Bool, r_mipmap) +EXTERN_CVAR(Int, r_multithreadedmax) // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue @@ -248,6 +249,14 @@ public: } else { + // [SP] Note: I've put in a hack here to throttle the speed of the rendering if + // the thread queue gets to big. This is one way to prevent too many commands + // going into the thread queue, which is causing crashes when there are too + // many threads (of which, there can be only as many as there are columns on + // the screen - guess what happens when you're too full of sprites!) + if (queue->commands.size() > r_multithreadedmax) + R_EndDrawerCommands(); + void *ptr = AllocMemory(sizeof(T)); if (!ptr) // Out of memory - render what we got { From 7cbaf80a2a7e89cab1a1164c724d29330a6042a1 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Thu, 29 Sep 2016 12:49:10 -0400 Subject: [PATCH 142/912] - Okay - so - last commit didn't actually fix anything. I think for now, it's better to turn off multi-threading by default until this can be fixed. r_multithreading has been changed to false by default, and is now saved in the user's .ini. --- src/r_draw_rgba.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 91044d1dd0..127697356a 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -54,7 +54,7 @@ extern float rw_lightstep; extern int wallshade; // Use multiple threads when drawing -CVAR(Bool, r_multithreaded, true, 0); +CVAR(Bool, r_multithreaded, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // [SP] Set Max Threads to a sane amount CVAR(Int, r_multithreadedmax, 1024, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); From 1560ed07af4fb5a3a03cf7d2051bce334aee2530 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Thu, 29 Sep 2016 20:00:00 -0400 Subject: [PATCH 143/912] Added a hack that allows maps using fogmap to display properly in the true-color renderer. Unfortunately, this could be implemented better but at least for now it's here. --- src/g_level.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/g_level.cpp b/src/g_level.cpp index e27141ecd1..2f4939c62a 100644 --- a/src/g_level.cpp +++ b/src/g_level.cpp @@ -1340,6 +1340,7 @@ void G_InitLevelLocals () R_SetDefaultColormap (info->FadeTable); if (strnicmp (info->FadeTable, "COLORMAP", 8) != 0) { + //level.fadeto = 0xff939393; //[SP] Hexen True-color compatibility, just use gray. level.flags |= LEVEL_HASFADETABLE; } /* From bbec9b86a35c847b2fc394117524b3212f10370e Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Thu, 29 Sep 2016 20:00:56 -0400 Subject: [PATCH 144/912] - fixed: un-commented the line with said hack. oops. --- src/g_level.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/g_level.cpp b/src/g_level.cpp index 2f4939c62a..dc73bdaf85 100644 --- a/src/g_level.cpp +++ b/src/g_level.cpp @@ -1340,7 +1340,7 @@ void G_InitLevelLocals () R_SetDefaultColormap (info->FadeTable); if (strnicmp (info->FadeTable, "COLORMAP", 8) != 0) { - //level.fadeto = 0xff939393; //[SP] Hexen True-color compatibility, just use gray. + level.fadeto = 0xff939393; //[SP] Hexen True-color compatibility, just use gray. level.flags |= LEVEL_HASFADETABLE; } /* From 6e3a49b065b00fb69695cec45601c35e7c0fdccd Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Thu, 29 Sep 2016 21:29:40 -0400 Subject: [PATCH 145/912] Multilayer skies (Hexen) now properly show in truecolor mode. This is only supported for paletted skies, but it still offers a true-color output. --- src/r_plane.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 7c4e443587..0cfd51ca56 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -920,7 +920,7 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) } else { - return R_GetOneSkyColumn(fronttex, x); + //return R_GetOneSkyColumn(fronttex, x); for (i = 0; i < 4; ++i) { if (lastskycol_bgra[i] == skycol) @@ -939,11 +939,14 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) const uint32_t *front = (const uint32_t *)fronttex->GetColumnBgra(angle1, NULL); const uint32_t *back = (const uint32_t *)backskytex->GetColumnBgra(angle2, NULL); + //[SP] Paletted version is used for comparison only + const BYTE *frontcompare = fronttex->GetColumn(angle1, NULL); + int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); i = 0; do { - if (front[i]) + if (frontcompare[i]) { composite[i] = front[i]; } From 28bb5da181535e5639d655efc44a0b177be5fa72 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Sep 2016 07:27:25 +0200 Subject: [PATCH 146/912] Hooked up LLVM wall drawers --- .../fixedfunction/drawercodegen.cpp | 29 +- src/r_compiler/fixedfunction/drawercodegen.h | 21 ++ .../fixedfunction/drawwallcodegen.cpp | 59 ++-- .../fixedfunction/drawwallcodegen.h | 5 +- src/r_compiler/llvmdrawers.cpp | 36 ++- src/r_compiler/llvmdrawers.h | 32 ++- src/r_compiler/ssa/ssa_int.cpp | 11 + src/r_compiler/ssa/ssa_int.h | 3 + src/r_draw_rgba.cpp | 261 ++++++++++++++++++ 9 files changed, 410 insertions(+), 47 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp index 5da858e27f..2cba501218 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -10,6 +10,31 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" +SSABool DrawerCodegen::line_skipped_by_thread(SSAInt line, SSAWorkerThread thread) +{ + return line < thread.pass_start_y || line >= thread.pass_end_y || !(line % thread.num_cores == thread.core); +} + +SSAInt DrawerCodegen::skipped_by_thread(SSAInt first_line, SSAWorkerThread thread) +{ + SSAInt pass_skip = SSAInt::MAX(thread.pass_start_y - first_line, 0); + SSAInt core_skip = (thread.num_cores - (first_line + pass_skip - thread.core) % thread.num_cores) % thread.num_cores; + return pass_skip + core_skip; +} + +SSAInt DrawerCodegen::count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread) +{ + SSAInt lines_until_pass_end = SSAInt::MAX(thread.pass_end_y - first_line, 0); + count = SSAInt::MIN(count, lines_until_pass_end); + SSAInt c = (count - skipped_by_thread(first_line, thread) + thread.num_cores - 1) / thread.num_cores; + return SSAInt::MAX(c, 0); +} + +SSAUBytePtr DrawerCodegen::dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread) +{ + return dest[skipped_by_thread(first_line, thread) * pitch * 4]; +} + SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) { return 256 - (light >> (FRACBITS - 8)); @@ -105,8 +130,8 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt SSAVec4i p11 = col1[y1 * 4].load_vec4ub(); SSAInt inv_b = texturefracx; - SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt a = 16 - inv_a; + SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt inv_a = 16 - a; SSAInt b = 16 - inv_b; return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; diff --git a/src/r_compiler/fixedfunction/drawercodegen.h b/src/r_compiler/fixedfunction/drawercodegen.h index 9e0706ed1a..17b36234dc 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.h +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -18,6 +18,15 @@ #include "r_compiler/ssa/ssa_barycentric_weight.h" #include "r_compiler/llvm_include.h" +class SSAWorkerThread +{ +public: + SSAInt core; + SSAInt num_cores; + SSAInt pass_start_y; + SSAInt pass_end_y; +}; + class SSAShadeConstants { public: @@ -29,6 +38,18 @@ public: class DrawerCodegen { public: + // Checks if a line is rendered by this thread + SSABool line_skipped_by_thread(SSAInt line, SSAWorkerThread thread); + + // The number of lines to skip to reach the first line to be rendered by this thread + SSAInt skipped_by_thread(SSAInt first_line, SSAWorkerThread thread); + + // The number of lines to be rendered by this thread + SSAInt count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread); + + // Calculate the dest address for the first line to be rendered by this thread + SSAUBytePtr dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread); + // LightBgra SSAInt calc_light_multiplier(SSAInt light); SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors); diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 65b2224b57..0ca5377234 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -10,7 +10,7 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args) +void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data) { dest = args[0][0].load(); source[0] = args[0][1].load(); @@ -60,24 +60,24 @@ void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAVal shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); + thread.core = thread_data[0][0].load(); + thread.num_cores = thread_data[0][1].load(); + thread.pass_start_y = thread_data[0][2].load(); + thread.pass_end_y = thread_data[0][3].load(); + is_simple_shade = (flags & DrawWallArgs::simple_shade) == DrawWallArgs::simple_shade; is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == DrawWallArgs::nearest_filter; - /* - count = thread->count_for_thread(command->_dest_y, command->_count); - fracstep = command->_iscale * thread->num_cores; - frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); - texturefracx = command->_texturefracx; - dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); - pitch = command->_pitch * thread->num_cores; - height = command->_textureheight; - one = ((0x80000000 + height - 1) / height) * 2 + 1; - */ + count = count_for_thread(dest_y, count, thread); + dest = dest_for_thread(dest_y, pitch, dest, thread); + + pitch = pitch * thread.num_cores; + int numColumns = fourColumns ? 4 : 1; for (int i = 0; i < numColumns; i++) { - stack_frac[i].store(texturefrac[i] + iscale[i]);// * skipped_by_thread(dest_y); - fracstep[i] = iscale[i];// * num_cores; + stack_frac[i].store(texturefrac[i] + iscale[i] * skipped_by_thread(dest_y, thread)); + fracstep[i] = iscale[i] * thread.num_cores; one[i] = ((0x80000000 + textureheight[i] - 1) / textureheight[i]) * 2 + 1; } @@ -113,16 +113,32 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim for (int i = 0; i < numColumns; i++) frac[i] = stack_frac[i].load(); - SSAInt offset = (dest_y + index) * pitch * 4; + SSAInt offset = index * pitch * 4; if (fourColumns) { + SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(); + SSAVec8s bg0 = SSAVec8s::extendlo(bg); + SSAVec8s bg1 = SSAVec8s::extendhi(bg); + SSAVec4i bgcolors[4] = + { + SSAVec4i::extendlo(bg0), + SSAVec4i::extendhi(bg0), + SSAVec4i::extendlo(bg1), + SSAVec4i::extendhi(bg1) + }; + SSAVec4i colors[4]; + for (int i = 0; i < 4; i++) + colors[i] = Blend(Shade(Sample(frac[i], i, isNearestFilter), i, isSimpleShade), bgcolors[i], variant); + + SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + dest[offset].store_unaligned_vec16ub(color); } else { SSAVec4i bgcolor = dest[offset].load_vec4ub(); - SSAVec4i color = Blend(Shade(Sample(frac[0], isNearestFilter), 0, isSimpleShade), bgcolor, variant); + SSAVec4i color = Blend(Shade(Sample(frac[0], 0, isNearestFilter), 0, isSimpleShade), bgcolor, variant); dest[offset].store_vec4ub(color); } @@ -133,10 +149,17 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim } } -SSAVec4i DrawWallCodegen::Sample(SSAInt frac, bool isNearestFilter) +SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter) { - // int sample_index() { return ((frac >> FRACBITS) * height) >> FRACBITS; } - return SSAVec4i(0); + if (isNearestFilter) + { + SSAInt sample_index = ((frac >> FRACBITS) * textureheight[index]) >> FRACBITS; + return source[index][sample_index * 4].load_vec4ub(); + } + else + { + return sample_linear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]); + } } SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.h b/src/r_compiler/fixedfunction/drawwallcodegen.h index eafc8cf697..0e1cce5fcf 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.h +++ b/src/r_compiler/fixedfunction/drawwallcodegen.h @@ -16,12 +16,12 @@ enum class DrawWallVariant class DrawWallCodegen : public DrawerCodegen { public: - void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args); + void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data); private: void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade); void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter); - SSAVec4i Sample(SSAInt frac, bool isNearestFilter); + SSAVec4i Sample(SSAInt frac, int index, bool isNearestFilter); SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); @@ -43,6 +43,7 @@ private: SSABool is_simple_shade; SSABool is_nearest_filter; SSAShadeConstants shade_constants; + SSAWorkerThread thread; SSAInt fracstep[4]; SSAInt one[4]; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 57c3293bbd..60727744c8 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -51,6 +51,7 @@ private: static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); LLVMProgram mProgram; }; @@ -108,18 +109,18 @@ LLVMDrawersImpl::LLVMDrawersImpl() DrawSpanMaskedTranslucent = mProgram.GetProcAddress("DrawSpanMaskedTranslucent"); DrawSpanAddClamp = mProgram.GetProcAddress("DrawSpanAddClamp"); DrawSpanMaskedAddClamp = mProgram.GetProcAddress("DrawSpanMaskedAddClamp"); - vlinec1 = mProgram.GetProcAddress("vlinec1"); - vlinec4 = mProgram.GetProcAddress("vlinec4"); - mvlinec1 = mProgram.GetProcAddress("mvlinec1"); - mvlinec4 = mProgram.GetProcAddress("mvlinec4"); - tmvline1_add = mProgram.GetProcAddress("tmvline1_add"); - tmvline4_add = mProgram.GetProcAddress("tmvline4_add"); - tmvline1_addclamp = mProgram.GetProcAddress("tmvline1_addclamp"); - tmvline4_addclamp = mProgram.GetProcAddress("tmvline4_addclamp"); - tmvline1_subclamp = mProgram.GetProcAddress("tmvline1_subclamp"); - tmvline4_subclamp = mProgram.GetProcAddress("tmvline4_subclamp"); - tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); - tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); + vlinec1 = mProgram.GetProcAddress("vlinec1"); + vlinec4 = mProgram.GetProcAddress("vlinec4"); + mvlinec1 = mProgram.GetProcAddress("mvlinec1"); + mvlinec4 = mProgram.GetProcAddress("mvlinec4"); + tmvline1_add = mProgram.GetProcAddress("tmvline1_add"); + tmvline4_add = mProgram.GetProcAddress("tmvline4_add"); + tmvline1_addclamp = mProgram.GetProcAddress("tmvline1_addclamp"); + tmvline4_addclamp = mProgram.GetProcAddress("tmvline4_addclamp"); + tmvline1_subclamp = mProgram.GetProcAddress("tmvline1_subclamp"); + tmvline4_subclamp = mProgram.GetProcAddress("tmvline4_subclamp"); + tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); + tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); mProgram.StopLogFatalErrors(); } @@ -151,10 +152,11 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, SSAFunction function(name); function.add_parameter(GetDrawWallArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); function.create_public(); DrawWallCodegen codegen; - codegen.Generate(variant, columns == 4, function.parameter(0)); + codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); @@ -216,6 +218,14 @@ llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context) return llvm::StructType::get(context, elements, false)->getPointerTo(); } +llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &context) +{ + std::vector elements; + for (int i = 0; i < 4; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + ///////////////////////////////////////////////////////////////////////////// namespace { static bool LogFatalErrors = false; } diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 92f7e9440c..b1039cf496 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -1,6 +1,14 @@ #pragma once +struct WorkerThreadData +{ + int32_t core; + int32_t num_cores; + int32_t pass_start_y; + int32_t pass_end_y; +}; + struct DrawWallArgs { uint32_t *dest; @@ -85,18 +93,18 @@ public: void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr; - void(*vlinec1)(const DrawWallArgs *) = nullptr; - void(*vlinec4)(const DrawWallArgs *) = nullptr; - void(*mvlinec1)(const DrawWallArgs *) = nullptr; - void(*mvlinec4)(const DrawWallArgs *) = nullptr; - void(*tmvline1_add)(const DrawWallArgs *) = nullptr; - void(*tmvline4_add)(const DrawWallArgs *) = nullptr; - void(*tmvline1_addclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline4_addclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline1_subclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline4_subclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline1_revsubclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline4_revsubclamp)(const DrawWallArgs *) = nullptr; + void(*vlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*vlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*mvlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*mvlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; private: static LLVMDrawers *Singleton; diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 674f44350f..8d5a32e4c6 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -1,6 +1,7 @@ #include "ssa_int.h" #include "ssa_float.h" +#include "ssa_bool.h" #include "ssa_scope.h" #include "r_compiler/llvm_include.h" @@ -31,6 +32,16 @@ llvm::Type *SSAInt::llvm_type() return llvm::Type::getInt32Ty(SSAScope::context()); } +SSAInt SSAInt::MIN(SSAInt a, SSAInt b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a < b).v, a.v, b.v, SSAScope::hint())); +} + +SSAInt SSAInt::MAX(SSAInt a, SSAInt b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint())); +} + SSAInt operator+(const SSAInt &a, const SSAInt &b) { return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index 5e373c62e3..d928c41f2c 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -16,6 +16,9 @@ public: static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); } static llvm::Type *llvm_type(); + static SSAInt MIN(SSAInt a, SSAInt b); + static SSAInt MAX(SSAInt a, SSAInt b); + llvm::Value *v; }; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 8a0a6871a0..c76c2c3c59 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -404,6 +404,219 @@ public: ///////////////////////////////////////////////////////////////////////////// +class DrawWall4LLVMCommand : public DrawerCommand +{ +protected: + DrawWallArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + return d; + } + +public: + DrawWall4LLVMCommand() + { + args.dest = (uint32_t*)dc_dest; + args.dest_y = _dest_y; + args.count = dc_count; + args.pitch = dc_pitch; + args.light_red = dc_shade_constants.light_red; + args.light_green = dc_shade_constants.light_green; + args.light_blue = dc_shade_constants.light_blue; + args.light_alpha = dc_shade_constants.light_alpha; + args.fade_red = dc_shade_constants.fade_red; + args.fade_green = dc_shade_constants.fade_green; + args.fade_blue = dc_shade_constants.fade_blue; + args.fade_alpha = dc_shade_constants.fade_alpha; + args.desaturate = dc_shade_constants.desaturate; + for (int i = 0; i < 4; i++) + { + args.texturefrac[i] = vplce[i]; + args.iscale[i] = vince[i]; + args.texturefracx[i] = buftexturefracx[i]; + args.textureheight[i] = bufheight[i]; + args.source[i] = (const uint32_t *)bufplce[i]; + args.source2[i] = (const uint32_t *)bufplce2[i]; + args.light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); + } + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.flags = 0; + if (dc_shade_constants.simple_shade) + args.flags |= DrawWallArgs::simple_shade; + if (args.source2[0] == nullptr) + args.flags |= DrawWallArgs::nearest_filter; + } + + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->vlinec4(&args, &d); + } +}; + +class DrawWallMasked4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->mvlinec4(&args, &d); + } +}; + +class DrawWallAdd4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_add(&args, &d); + } +}; + +class DrawWallAddClamp4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_addclamp(&args, &d); + } +}; + +class DrawWallSubClamp4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_subclamp(&args, &d); + } +}; + +class DrawWallRevSubClamp4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_revsubclamp(&args, &d); + } +}; + +class DrawWall1LLVMCommand : public DrawerCommand +{ +protected: + DrawWallArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + return d; + } + +public: + DrawWall1LLVMCommand() + { + args.dest = (uint32_t*)dc_dest; + args.dest_y = _dest_y; + args.pitch = dc_pitch; + args.count = dc_count; + args.texturefrac[0] = dc_texturefrac; + args.texturefracx[0] = dc_texturefracx; + args.iscale[0] = dc_iscale; + args.textureheight[0] = dc_textureheight; + args.source[0] = (const uint32 *)dc_source; + args.source2[0] = (const uint32 *)dc_source2; + args.light[0] = LightBgra::calc_light_multiplier(dc_light); + args.light_red = dc_shade_constants.light_red; + args.light_green = dc_shade_constants.light_green; + args.light_blue = dc_shade_constants.light_blue; + args.light_alpha = dc_shade_constants.light_alpha; + args.fade_red = dc_shade_constants.fade_red; + args.fade_green = dc_shade_constants.fade_green; + args.fade_blue = dc_shade_constants.fade_blue; + args.fade_alpha = dc_shade_constants.fade_alpha; + args.desaturate = dc_shade_constants.desaturate; + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.flags = 0; + if (dc_shade_constants.simple_shade) + args.flags |= DrawWallArgs::simple_shade; + if (args.source2[0] == nullptr) + args.flags |= DrawWallArgs::nearest_filter; + } + + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->vlinec1(&args, &d); + } +}; + +class DrawWallMasked1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->mvlinec1(&args, &d); + } +}; + +class DrawWallAdd1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_add(&args, &d); + } +}; + +class DrawWallAddClamp1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_addclamp(&args, &d); + } +}; + +class DrawWallSubClamp1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_subclamp(&args, &d); + } +}; + +class DrawWallRevSubClamp1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_revsubclamp(&args, &d); + } +}; + +///////////////////////////////////////////////////////////////////////////// + class DrawerColumnCommand : public DrawerCommand { public: @@ -2901,7 +3114,11 @@ void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BY DWORD vlinec1_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } @@ -2920,72 +3137,116 @@ void queue_wallcommand() void vlinec4_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } DWORD mvlinec1_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void mvlinec4_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_add_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_add_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_addclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_addclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_subclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_subclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_revsubclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_revsubclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } From 8765cf2016e4a747a7031b0df5c2522d7e4c9bb5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 1 Oct 2016 06:51:55 +0200 Subject: [PATCH 147/912] Change Windows build to use a precompiled version of LLVM --- .gitignore | 1 + src/CMakeLists.txt | 98 +++++++++++++++++----------------- src/r_compiler/llvmdrawers.cpp | 5 +- 3 files changed, 51 insertions(+), 53 deletions(-) diff --git a/.gitignore b/.gitignore index 1b078ed632..7cc9d98607 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ /build_vc2015-32 /build_vc2015-64 /build +/llvm diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 41829b996f..37dd9b5a8f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -255,56 +255,44 @@ if( NOT NO_OPENAL ) endif() endif() -# C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm -find_package(LLVM REQUIRED CONFIG) -message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") -message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") -llvm_map_components_to_libnames(llvm_libs - analysis - asmparser - asmprinter - bitreader - bitwriter - codegen - core - executionengine - globalisel - instcombine - ipo - irreader - linker - lto - mc - mcdisassembler - mcjit - mcparser - mirparser - object - objectyaml - orcjit - passes - scalaropts - selectiondag - support - symbolize - tablegen - target - transformutils - vectorize - x86asmparser - x86asmprinter - x86codegen - x86desc - x86info - x86utils - aarch64asmparser - aarch64asmprinter - aarch64codegen - aarch64desc - aarch64info - aarch64utils) -include_directories(${LLVM_INCLUDE_DIRS}) -set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) +set( LLVM_COMPONENTS core support asmparser asmprinter bitreader codegen passes ipo + irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld + object instcombine linker analysis selectiondag scalaropts vectorize executionengine + mc mcdisassembler mcparser mcjit target ) +set( LLVM_COMPONENTS_X86 x86asmprinter x86info x86desc x86utils x86codegen ) +set( LLVM_COMPONENTS_X64 aarch64asmprinter aarch64info aarch64desc aarch64utils aarch64codegen ) + +# Path where it looks for the LLVM compiled files on Windows +set( LLVM_PRECOMPILED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../llvm" ) + +if( NOT WIN32 ) + # Example LLVM_DIR folder: C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm + find_package(LLVM REQUIRED CONFIG) + message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") + message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") + llvm_map_components_to_libnames( llvm_libs ${LLVM_COMPONENTS} ${LLVM_COMPONENTS_X86} ${LLVM_COMPONENTS_X64} ) + include_directories( ${LLVM_INCLUDE_DIRS} ) + set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) +else() + include_directories( "${LLVM_PRECOMPILED_DIR}/include" ) + if( X64 ) + include_directories( "${LLVM_PRECOMPILED_DIR}/64bit/include" ) + set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/llvm/64bit" ) + set( LLVM_ALL_COMPONENTS ${LLVM_COMPONENTS} ${LLVM_COMPONENTS_X64} ) + else() + include_directories( "${LLVM_PRECOMPILED_DIR}/32bit/include" ) + set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/32bit" ) + set( LLVM_ALL_COMPONENTS ${LLVM_COMPONENTS} ${LLVM_COMPONENTS_X86} ) + endif() + foreach(buildtype IN ITEMS RELEASE DEBUG) + set( llvm_libs_${buildtype} "${llvm_libs_base}/${buildtype}" ) + set( LLVM_${buildtype}_LIBS "" ) + foreach( llvm_module ${LLVM_ALL_COMPONENTS} ) + find_library( LLVM_${llvm_module}_LIBRARY_${buildtype} LLVM${llvm_module} PATHS ${llvm_libs_${buildtype}} ) + set( LLVM_${buildtype}_LIBS ${LLVM_${buildtype}_LIBS} ${LLVM_${llvm_module}_LIBRARY_${buildtype}} ) + endforeach( llvm_module ) + endforeach(buildtype) +endif() if( NOT NO_FMOD ) # Search for FMOD include files @@ -1513,6 +1501,16 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") endif() target_link_libraries( zdoom ${ZDOOM_LIBS} gdtoa dumb lzma ) + +if( WIN32 ) + foreach(debuglib ${LLVM_DEBUG_LIBS}) + target_link_libraries( zdoom debug ${debuglib} ) + endforeach(debuglib) + foreach(releaselib ${LLVM_RELEASE_LIBS}) + target_link_libraries( zdoom optimized ${releaselib} ) + endforeach(releaselib) +endif() + include_directories( . g_doom g_heretic diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 60727744c8..4f59419adf 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -246,7 +246,6 @@ LLVMProgram::LLVMProgram() InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); - InitializeNativeTargetAsmParser(); std::string errorstring; @@ -263,8 +262,8 @@ LLVMProgram::LLVMProgram() cpuFeaturesStr += it.getKey(); } - //Printf("LLVM target triple: %s\n", targetTriple.c_str()); - //Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); + DPrintf(DMSG_SPAMMY, "LLVM target triple: %s\n", targetTriple.c_str()); + DPrintf(DMSG_SPAMMY, "LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); if (!target) From c960742dbd197b9a73ba2ab0b1175e8e5d10655f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 1 Oct 2016 11:47:21 +0200 Subject: [PATCH 148/912] Fix 64 bit compile errors --- src/CMakeLists.txt | 24 +++++++++---------- .../fixedfunction/drawcolumncodegen.cpp | 1 + .../fixedfunction/drawercodegen.cpp | 7 +++--- .../fixedfunction/drawspancodegen.cpp | 11 +++++---- .../fixedfunction/drawwallcodegen.cpp | 7 +++--- src/r_compiler/llvm_include.h | 10 ++++++++ src/r_compiler/llvmdrawers.cpp | 1 + src/r_compiler/ssa/ssa_bool.cpp | 2 +- src/r_compiler/ssa/ssa_float.cpp | 2 +- src/r_compiler/ssa/ssa_float.h | 2 +- src/r_compiler/ssa/ssa_float_ptr.cpp | 2 +- src/r_compiler/ssa/ssa_float_ptr.h | 1 + src/r_compiler/ssa/ssa_for_block.cpp | 1 + src/r_compiler/ssa/ssa_for_block.h | 1 - src/r_compiler/ssa/ssa_function.cpp | 2 +- src/r_compiler/ssa/ssa_if_block.cpp | 1 + src/r_compiler/ssa/ssa_if_block.h | 1 - src/r_compiler/ssa/ssa_int.cpp | 2 +- src/r_compiler/ssa/ssa_int.h | 2 +- src/r_compiler/ssa/ssa_int_ptr.cpp | 2 +- src/r_compiler/ssa/ssa_int_ptr.h | 1 + src/r_compiler/ssa/ssa_scope.cpp | 1 + src/r_compiler/ssa/ssa_scope.h | 2 -- src/r_compiler/ssa/ssa_short.cpp | 2 +- src/r_compiler/ssa/ssa_short.h | 2 +- src/r_compiler/ssa/ssa_struct_type.cpp | 1 + src/r_compiler/ssa/ssa_ubyte.cpp | 2 +- src/r_compiler/ssa/ssa_ubyte.h | 2 +- src/r_compiler/ssa/ssa_ubyte_ptr.cpp | 4 ++-- src/r_compiler/ssa/ssa_ubyte_ptr.h | 1 + src/r_compiler/ssa/ssa_value.cpp | 2 +- src/r_compiler/ssa/ssa_vec16ub.cpp | 2 +- src/r_compiler/ssa/ssa_vec16ub.h | 4 ++-- src/r_compiler/ssa/ssa_vec4f.cpp | 7 +++++- src/r_compiler/ssa/ssa_vec4f.h | 5 ++-- src/r_compiler/ssa/ssa_vec4f_ptr.cpp | 2 +- src/r_compiler/ssa/ssa_vec4i.cpp | 18 ++++++++++---- src/r_compiler/ssa/ssa_vec4i.h | 8 ++++--- src/r_compiler/ssa/ssa_vec4i_ptr.cpp | 2 +- src/r_compiler/ssa/ssa_vec4i_ptr.h | 1 + src/r_compiler/ssa/ssa_vec8s.cpp | 6 ++--- src/r_compiler/ssa/ssa_vec8s.h | 4 ++-- 42 files changed, 99 insertions(+), 62 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 37dd9b5a8f..ac6de85f68 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -258,9 +258,7 @@ endif() set( LLVM_COMPONENTS core support asmparser asmprinter bitreader codegen passes ipo irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld object instcombine linker analysis selectiondag scalaropts vectorize executionengine - mc mcdisassembler mcparser mcjit target ) -set( LLVM_COMPONENTS_X86 x86asmprinter x86info x86desc x86utils x86codegen ) -set( LLVM_COMPONENTS_X64 aarch64asmprinter aarch64info aarch64desc aarch64utils aarch64codegen ) + mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) # Path where it looks for the LLVM compiled files on Windows set( LLVM_PRECOMPILED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../llvm" ) @@ -270,24 +268,22 @@ if( NOT WIN32 ) find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") - llvm_map_components_to_libnames( llvm_libs ${LLVM_COMPONENTS} ${LLVM_COMPONENTS_X86} ${LLVM_COMPONENTS_X64} ) + llvm_map_components_to_libnames( llvm_libs ${LLVM_COMPONENTS} ) include_directories( ${LLVM_INCLUDE_DIRS} ) set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) else() include_directories( "${LLVM_PRECOMPILED_DIR}/include" ) if( X64 ) - include_directories( "${LLVM_PRECOMPILED_DIR}/64bit/include" ) - set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/llvm/64bit" ) - set( LLVM_ALL_COMPONENTS ${LLVM_COMPONENTS} ${LLVM_COMPONENTS_X64} ) + include_directories( "${LLVM_PRECOMPILED_DIR}/64bit-include" ) + set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/64bit-" ) else() - include_directories( "${LLVM_PRECOMPILED_DIR}/32bit/include" ) - set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/32bit" ) - set( LLVM_ALL_COMPONENTS ${LLVM_COMPONENTS} ${LLVM_COMPONENTS_X86} ) + include_directories( "${LLVM_PRECOMPILED_DIR}/32bit-include" ) + set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/32bit-" ) endif() foreach(buildtype IN ITEMS RELEASE DEBUG) - set( llvm_libs_${buildtype} "${llvm_libs_base}/${buildtype}" ) + set( llvm_libs_${buildtype} "${llvm_libs_base}${buildtype}" ) set( LLVM_${buildtype}_LIBS "" ) - foreach( llvm_module ${LLVM_ALL_COMPONENTS} ) + foreach( llvm_module ${LLVM_COMPONENTS} ) find_library( LLVM_${llvm_module}_LIBRARY_${buildtype} LLVM${llvm_module} PATHS ${llvm_libs_${buildtype}} ) set( LLVM_${buildtype}_LIBS ${LLVM_${buildtype}_LIBS} ${LLVM_${llvm_module}_LIBRARY_${buildtype}} ) endforeach( llvm_module ) @@ -1492,6 +1488,10 @@ set_source_files_properties( sc_man.cpp PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRE set_source_files_properties( ${NOT_COMPILED_SOURCE_FILES} PROPERTIES HEADER_FILE_ONLY TRUE ) if ( WIN32 ) set_source_files_properties( win32/fb_d3d9.cpp win32/fb_d3d9_wipe.cpp PROPERTIES COMPILE_FLAGS ${ZD_FASTMATH_FLAG} ) + + # Supress C4244: 'initializing': conversion from '__int64' to 'unsigned int', possible loss of data + # For some reason using #pragma warning(disable: 4244) is not working.. + set_source_files_properties( ${PCH_SOURCES} PROPERTIES COMPILE_FLAGS /wd4244 ) endif() diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index 4594e22902..67d801162b 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -1,5 +1,6 @@ #include "i_system.h" +#include "r_compiler/llvm_include.h" #include "r_compiler/fixedfunction/drawcolumncodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp index 2cba501218..822a811411 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -1,5 +1,6 @@ #include "i_system.h" +#include "r_compiler/llvm_include.h" #include "r_compiler/fixedfunction/drawercodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" @@ -17,17 +18,17 @@ SSABool DrawerCodegen::line_skipped_by_thread(SSAInt line, SSAWorkerThread threa SSAInt DrawerCodegen::skipped_by_thread(SSAInt first_line, SSAWorkerThread thread) { - SSAInt pass_skip = SSAInt::MAX(thread.pass_start_y - first_line, 0); + SSAInt pass_skip = SSAInt::MAX(thread.pass_start_y - first_line, SSAInt(0)); SSAInt core_skip = (thread.num_cores - (first_line + pass_skip - thread.core) % thread.num_cores) % thread.num_cores; return pass_skip + core_skip; } SSAInt DrawerCodegen::count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread) { - SSAInt lines_until_pass_end = SSAInt::MAX(thread.pass_end_y - first_line, 0); + SSAInt lines_until_pass_end = SSAInt::MAX(thread.pass_end_y - first_line, SSAInt(0)); count = SSAInt::MIN(count, lines_until_pass_end); SSAInt c = (count - skipped_by_thread(first_line, thread) + thread.num_cores - 1) / thread.num_cores; - return SSAInt::MAX(c, 0); + return SSAInt::MAX(c, SSAInt(0)); } SSAUBytePtr DrawerCodegen::dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread) diff --git a/src/r_compiler/fixedfunction/drawspancodegen.cpp b/src/r_compiler/fixedfunction/drawspancodegen.cpp index 1623c38f21..70ecb0abd0 100644 --- a/src/r_compiler/fixedfunction/drawspancodegen.cpp +++ b/src/r_compiler/fixedfunction/drawspancodegen.cpp @@ -1,5 +1,6 @@ #include "i_system.h" +#include "r_compiler/llvm_include.h" #include "r_compiler/fixedfunction/drawspancodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" @@ -49,9 +50,9 @@ void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args) xmask = ((SSAInt(1) << xbits) - 1) << ybits; // 64x64 is the most common case by far, so special case it. - is_64x64 = xbits == 6 && ybits == 6; - is_simple_shade = (flags & DrawSpanArgs::simple_shade) == DrawSpanArgs::simple_shade; - is_nearest_filter = (flags & DrawSpanArgs::nearest_filter) == DrawSpanArgs::nearest_filter; + is_64x64 = xbits == SSAInt(6) && ybits == SSAInt(6); + is_simple_shade = (flags & DrawSpanArgs::simple_shade) == SSAInt(DrawSpanArgs::simple_shade); + is_nearest_filter = (flags & DrawSpanArgs::nearest_filter) == SSAInt(DrawSpanArgs::nearest_filter); SSAIfBlock branch; branch.if_block(is_simple_shade); @@ -90,7 +91,7 @@ void DrawSpanCodegen::LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bo SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64) { SSAInt sseLength = count / 4; - stack_index.store(0); + stack_index.store(SSAInt(0)); { SSAForBlock loop; SSAInt index = stack_index.load(); @@ -165,7 +166,7 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte { if (is64x64) { - return sample_linear(source, xfrac, yfrac, 26, 26); + return sample_linear(source, xfrac, yfrac, SSAInt(26), SSAInt(26)); } else { diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 0ca5377234..55b17dafee 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -1,5 +1,6 @@ #include "i_system.h" +#include "r_compiler/llvm_include.h" #include "r_compiler/fixedfunction/drawwallcodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" @@ -65,8 +66,8 @@ void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAVal thread.pass_start_y = thread_data[0][2].load(); thread.pass_end_y = thread_data[0][3].load(); - is_simple_shade = (flags & DrawWallArgs::simple_shade) == DrawWallArgs::simple_shade; - is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == DrawWallArgs::nearest_filter; + is_simple_shade = (flags & DrawWallArgs::simple_shade) == SSAInt(DrawWallArgs::simple_shade); + is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == SSAInt(DrawWallArgs::nearest_filter); count = count_for_thread(dest_y, count, thread); dest = dest_for_thread(dest_y, pitch, dest, thread); @@ -103,7 +104,7 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim { int numColumns = fourColumns ? 4 : 1; - stack_index.store(0); + stack_index.store(SSAInt(0)); { SSAForBlock loop; SSAInt index = stack_index.load(); diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h index b916bad0e9..d1550f38a5 100644 --- a/src/r_compiler/llvm_include.h +++ b/src/r_compiler/llvm_include.h @@ -1,6 +1,8 @@ #pragma once +#ifdef _MSC_VER + #if defined(min) #define llvm_min_bug min #undef min @@ -18,6 +20,10 @@ #pragma warning(disable: 4244) // warning C4244: 'return' : conversion from 'uint64_t' to 'unsigned int', possible loss of data #pragma warning(disable: 4141) // warning C4141: 'inline': used more than once #pragma warning(disable: 4291) // warning C4291: 'void *llvm::User::operator new(std::size_t,unsigned int,unsigned int)': no matching operator delete found; memory will not be freed if initialization throws an exception +#pragma warning(disable: 4267) // warning C4267: 'return': conversion from 'size_t' to 'unsigned int', possible loss of data +#pragma warning(disable: 4244) // warning C4244: 'initializing': conversion from '__int64' to 'unsigned int', possible loss of data + +#endif #include #include @@ -39,6 +45,8 @@ #include #include +#ifdef _MSC_VER + #if defined(llvm_min_bug) #define min llvm_min_bug #undef llvm_min_bug @@ -47,3 +55,5 @@ #define max llvm_max_bug #undef llvm_max_bug #endif + +#endif diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 4f59419adf..7691af35b1 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -1,5 +1,6 @@ #include "i_system.h" +#include "r_compiler/llvm_include.h" #include "r_compiler/fixedfunction/drawspancodegen.h" #include "r_compiler/fixedfunction/drawwallcodegen.h" #include "r_compiler/fixedfunction/drawcolumncodegen.h" diff --git a/src/r_compiler/ssa/ssa_bool.cpp b/src/r_compiler/ssa/ssa_bool.cpp index 1013239117..bfd9ba5abf 100644 --- a/src/r_compiler/ssa/ssa_bool.cpp +++ b/src/r_compiler/ssa/ssa_bool.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_bool.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSABool::SSABool() : v(0) diff --git a/src/r_compiler/ssa/ssa_float.cpp b/src/r_compiler/ssa/ssa_float.cpp index 87488af744..4ec5c516df 100644 --- a/src/r_compiler/ssa/ssa_float.cpp +++ b/src/r_compiler/ssa/ssa_float.cpp @@ -1,8 +1,8 @@ +#include "r_compiler/llvm_include.h" #include "ssa_float.h" #include "ssa_int.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAFloat::SSAFloat() : v(0) diff --git a/src/r_compiler/ssa/ssa_float.h b/src/r_compiler/ssa/ssa_float.h index 2349ab8773..0edbcfcba8 100644 --- a/src/r_compiler/ssa/ssa_float.h +++ b/src/r_compiler/ssa/ssa_float.h @@ -11,7 +11,7 @@ class SSAFloat public: SSAFloat(); SSAFloat(SSAInt i); - SSAFloat(float constant); + explicit SSAFloat(float constant); explicit SSAFloat(llvm::Value *v); static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); } static llvm::Type *llvm_type(); diff --git a/src/r_compiler/ssa/ssa_float_ptr.cpp b/src/r_compiler/ssa/ssa_float_ptr.cpp index 6a1409271b..582821ca03 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.cpp +++ b/src/r_compiler/ssa/ssa_float_ptr.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_float_ptr.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAFloatPtr::SSAFloatPtr() : v(0) diff --git a/src/r_compiler/ssa/ssa_float_ptr.h b/src/r_compiler/ssa/ssa_float_ptr.h index a4318e027f..f29b2de3f7 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.h +++ b/src/r_compiler/ssa/ssa_float_ptr.h @@ -16,6 +16,7 @@ public: static SSAFloatPtr from_llvm(llvm::Value *v) { return SSAFloatPtr(v); } static llvm::Type *llvm_type(); SSAFloatPtr operator[](SSAInt index) const; + SSAFloatPtr operator[](int index) const { return (*this)[SSAInt(index)]; } SSAFloat load() const; SSAVec4f load_vec4f() const; SSAVec4f load_unaligned_vec4f() const; diff --git a/src/r_compiler/ssa/ssa_for_block.cpp b/src/r_compiler/ssa/ssa_for_block.cpp index ce93286076..f7cd6ad0bd 100644 --- a/src/r_compiler/ssa/ssa_for_block.cpp +++ b/src/r_compiler/ssa/ssa_for_block.cpp @@ -1,4 +1,5 @@ +#include "r_compiler/llvm_include.h" #include "ssa_for_block.h" #include "ssa_scope.h" diff --git a/src/r_compiler/ssa/ssa_for_block.h b/src/r_compiler/ssa/ssa_for_block.h index 58803dee5c..4c1952c14e 100644 --- a/src/r_compiler/ssa/ssa_for_block.h +++ b/src/r_compiler/ssa/ssa_for_block.h @@ -2,7 +2,6 @@ #pragma once #include "ssa_bool.h" -#include "r_compiler/llvm_include.h" class SSAForBlock { diff --git a/src/r_compiler/ssa/ssa_function.cpp b/src/r_compiler/ssa/ssa_function.cpp index aee4de5a92..a326beaf76 100644 --- a/src/r_compiler/ssa/ssa_function.cpp +++ b/src/r_compiler/ssa/ssa_function.cpp @@ -1,9 +1,9 @@ +#include "r_compiler/llvm_include.h" #include "ssa_function.h" #include "ssa_int.h" #include "ssa_scope.h" #include "ssa_value.h" -#include "r_compiler/llvm_include.h" SSAFunction::SSAFunction(const std::string name) : name(name), return_type(llvm::Type::getVoidTy(SSAScope::context())), func() diff --git a/src/r_compiler/ssa/ssa_if_block.cpp b/src/r_compiler/ssa/ssa_if_block.cpp index e2de9ecadc..7187a0759c 100644 --- a/src/r_compiler/ssa/ssa_if_block.cpp +++ b/src/r_compiler/ssa/ssa_if_block.cpp @@ -1,4 +1,5 @@ +#include "r_compiler/llvm_include.h" #include "ssa_if_block.h" #include "ssa_scope.h" diff --git a/src/r_compiler/ssa/ssa_if_block.h b/src/r_compiler/ssa/ssa_if_block.h index 98c534a867..4f0c8a26bb 100644 --- a/src/r_compiler/ssa/ssa_if_block.h +++ b/src/r_compiler/ssa/ssa_if_block.h @@ -3,7 +3,6 @@ #include "ssa_bool.h" #include "ssa_phi.h" -#include "r_compiler/llvm_include.h" class SSAIfBlock { diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 8d5a32e4c6..3d9cb22bdf 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -1,9 +1,9 @@ +#include "r_compiler/llvm_include.h" #include "ssa_int.h" #include "ssa_float.h" #include "ssa_bool.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAInt::SSAInt() : v(0) diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index d928c41f2c..c0f46e4b67 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -10,7 +10,7 @@ class SSAInt { public: SSAInt(); - SSAInt(int constant); + explicit SSAInt(int constant); SSAInt(SSAFloat f); explicit SSAInt(llvm::Value *v); static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); } diff --git a/src/r_compiler/ssa/ssa_int_ptr.cpp b/src/r_compiler/ssa/ssa_int_ptr.cpp index 3c26370736..974645d08c 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.cpp +++ b/src/r_compiler/ssa/ssa_int_ptr.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_int_ptr.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAIntPtr::SSAIntPtr() : v(0) diff --git a/src/r_compiler/ssa/ssa_int_ptr.h b/src/r_compiler/ssa/ssa_int_ptr.h index 20e024a311..c75ed6a8d5 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.h +++ b/src/r_compiler/ssa/ssa_int_ptr.h @@ -16,6 +16,7 @@ public: static SSAIntPtr from_llvm(llvm::Value *v) { return SSAIntPtr(v); } static llvm::Type *llvm_type(); SSAIntPtr operator[](SSAInt index) const; + SSAIntPtr operator[](int index) const { return (*this)[SSAInt(index)]; } SSAInt load() const; SSAVec4i load_vec4i() const; SSAVec4i load_unaligned_vec4i() const; diff --git a/src/r_compiler/ssa/ssa_scope.cpp b/src/r_compiler/ssa/ssa_scope.cpp index f9d16f1889..e5d34a2033 100644 --- a/src/r_compiler/ssa/ssa_scope.cpp +++ b/src/r_compiler/ssa/ssa_scope.cpp @@ -1,4 +1,5 @@ +#include "r_compiler/llvm_include.h" #include "ssa_scope.h" #include "ssa_int.h" diff --git a/src/r_compiler/ssa/ssa_scope.h b/src/r_compiler/ssa/ssa_scope.h index d184643adb..ad080fde6c 100644 --- a/src/r_compiler/ssa/ssa_scope.h +++ b/src/r_compiler/ssa/ssa_scope.h @@ -1,8 +1,6 @@ #pragma once -#include "r_compiler/llvm_include.h" - class SSAInt; class SSAScope diff --git a/src/r_compiler/ssa/ssa_short.cpp b/src/r_compiler/ssa/ssa_short.cpp index 3fa59b688a..017f3002a2 100644 --- a/src/r_compiler/ssa/ssa_short.cpp +++ b/src/r_compiler/ssa/ssa_short.cpp @@ -1,9 +1,9 @@ +#include "r_compiler/llvm_include.h" #include "ssa_short.h" #include "ssa_float.h" #include "ssa_int.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAShort::SSAShort() : v(0) diff --git a/src/r_compiler/ssa/ssa_short.h b/src/r_compiler/ssa/ssa_short.h index 932aafc0ea..4a53434026 100644 --- a/src/r_compiler/ssa/ssa_short.h +++ b/src/r_compiler/ssa/ssa_short.h @@ -11,7 +11,7 @@ class SSAShort { public: SSAShort(); - SSAShort(int constant); + explicit SSAShort(int constant); SSAShort(SSAFloat f); explicit SSAShort(llvm::Value *v); static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); } diff --git a/src/r_compiler/ssa/ssa_struct_type.cpp b/src/r_compiler/ssa/ssa_struct_type.cpp index 4a79768cea..d4ae2acb1c 100644 --- a/src/r_compiler/ssa/ssa_struct_type.cpp +++ b/src/r_compiler/ssa/ssa_struct_type.cpp @@ -1,4 +1,5 @@ +#include "r_compiler/llvm_include.h" #include "ssa_struct_type.h" #include "ssa_scope.h" diff --git a/src/r_compiler/ssa/ssa_ubyte.cpp b/src/r_compiler/ssa/ssa_ubyte.cpp index 04db4fd28f..3204d064d1 100644 --- a/src/r_compiler/ssa/ssa_ubyte.cpp +++ b/src/r_compiler/ssa/ssa_ubyte.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_ubyte.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAUByte::SSAUByte() : v(0) diff --git a/src/r_compiler/ssa/ssa_ubyte.h b/src/r_compiler/ssa/ssa_ubyte.h index f1e12afba4..ef878b3259 100644 --- a/src/r_compiler/ssa/ssa_ubyte.h +++ b/src/r_compiler/ssa/ssa_ubyte.h @@ -8,7 +8,7 @@ class SSAUByte { public: SSAUByte(); - SSAUByte(unsigned char constant); + explicit SSAUByte(unsigned char constant); explicit SSAUByte(llvm::Value *v); static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); } static llvm::Type *llvm_type(); diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp index b2408066ee..98bf27c462 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_ubyte_ptr.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAUBytePtr::SSAUBytePtr() : v(0) @@ -38,7 +38,7 @@ SSAVec4i SSAUBytePtr::load_vec4ub() const v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint()); SSAVec4i v4i = SSAVec4i::from_llvm(v); - SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), 0, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 + SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), SSAVec16ub((unsigned char)0), 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16 /* llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.h b/src/r_compiler/ssa/ssa_ubyte_ptr.h index 5b68ee1add..c084068bc7 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.h +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.h @@ -18,6 +18,7 @@ public: static SSAUBytePtr from_llvm(llvm::Value *v) { return SSAUBytePtr(v); } static llvm::Type *llvm_type(); SSAUBytePtr operator[](SSAInt index) const; + SSAUBytePtr operator[](int index) const { return (*this)[SSAInt(index)]; } SSAUByte load() const; SSAVec4i load_vec4ub() const; SSAVec8s load_vec8s() const; diff --git a/src/r_compiler/ssa/ssa_value.cpp b/src/r_compiler/ssa/ssa_value.cpp index 877420fc5d..c37b7f4c1d 100644 --- a/src/r_compiler/ssa/ssa_value.cpp +++ b/src/r_compiler/ssa/ssa_value.cpp @@ -1,8 +1,8 @@ +#include "r_compiler/llvm_include.h" #include "ssa_value.h" #include "ssa_int.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAValue SSAValue::load() { diff --git a/src/r_compiler/ssa/ssa_vec16ub.cpp b/src/r_compiler/ssa/ssa_vec16ub.cpp index f18d687188..4a077382eb 100644 --- a/src/r_compiler/ssa/ssa_vec16ub.cpp +++ b/src/r_compiler/ssa/ssa_vec16ub.cpp @@ -1,9 +1,9 @@ +#include "r_compiler/llvm_include.h" #include "ssa_vec16ub.h" #include "ssa_vec8s.h" #include "ssa_vec4i.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAVec16ub::SSAVec16ub() : v(0) diff --git a/src/r_compiler/ssa/ssa_vec16ub.h b/src/r_compiler/ssa/ssa_vec16ub.h index e4cfcdc87b..8f48c0c490 100644 --- a/src/r_compiler/ssa/ssa_vec16ub.h +++ b/src/r_compiler/ssa/ssa_vec16ub.h @@ -11,8 +11,8 @@ class SSAVec16ub { public: SSAVec16ub(); - SSAVec16ub(unsigned char constant); - SSAVec16ub( + explicit SSAVec16ub(unsigned char constant); + explicit SSAVec16ub( unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7, unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15); explicit SSAVec16ub(llvm::Value *v); diff --git a/src/r_compiler/ssa/ssa_vec4f.cpp b/src/r_compiler/ssa/ssa_vec4f.cpp index e002018fe8..dc6f9a716b 100644 --- a/src/r_compiler/ssa/ssa_vec4f.cpp +++ b/src/r_compiler/ssa/ssa_vec4f.cpp @@ -1,10 +1,10 @@ +#include "r_compiler/llvm_include.h" #include "ssa_vec4f.h" #include "ssa_vec4i.h" #include "ssa_float.h" #include "ssa_int.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAVec4f::SSAVec4f() : v(0) @@ -75,6 +75,11 @@ SSAFloat SSAVec4f::operator[](SSAInt index) const return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); } +SSAFloat SSAVec4f::operator[](int index) const +{ + return (*this)[SSAInt(index)]; +} + SSAVec4f SSAVec4f::insert_element(SSAVec4f vec4f, SSAFloat value, int index) { return from_llvm(SSAScope::builder().CreateInsertElement(vec4f.v, value.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)index)))); diff --git a/src/r_compiler/ssa/ssa_vec4f.h b/src/r_compiler/ssa/ssa_vec4f.h index 5e3397e580..6d4ae63352 100644 --- a/src/r_compiler/ssa/ssa_vec4f.h +++ b/src/r_compiler/ssa/ssa_vec4f.h @@ -12,13 +12,14 @@ class SSAVec4f { public: SSAVec4f(); - SSAVec4f(float constant); - SSAVec4f(float constant0, float constant1, float constant2, float constant3); + explicit SSAVec4f(float constant); + explicit SSAVec4f(float constant0, float constant1, float constant2, float constant3); SSAVec4f(SSAFloat f); SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3); explicit SSAVec4f(llvm::Value *v); SSAVec4f(SSAVec4i i32); SSAFloat operator[](SSAInt index) const; + SSAFloat operator[](int index) const; static SSAVec4f insert_element(SSAVec4f vec4f, SSAFloat value, int index); static SSAVec4f bitcast(SSAVec4i i32); static SSAVec4f sqrt(SSAVec4f f); diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp index 6a197ec90e..e0ed8bc868 100644 --- a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_vec4f_ptr.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAVec4fPtr::SSAVec4fPtr() : v(0) diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp index 1eed7b269a..3b508412f3 100644 --- a/src/r_compiler/ssa/ssa_vec4i.cpp +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -1,11 +1,11 @@ +#include "r_compiler/llvm_include.h" #include "ssa_vec4i.h" #include "ssa_vec4f.h" #include "ssa_vec8s.h" #include "ssa_vec16ub.h" #include "ssa_int.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAVec4i::SSAVec4i() : v(0) @@ -67,11 +67,16 @@ SSAVec4i::SSAVec4i(SSAVec4f f32) v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint()); } -SSAInt SSAVec4i::operator[](SSAInt index) +SSAInt SSAVec4i::operator[](SSAInt index) const { return SSAInt::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); } +SSAInt SSAVec4i::operator[](int index) const +{ + return (*this)[SSAInt(index)]; +} + SSAVec4i SSAVec4i::insert(SSAInt index, SSAInt value) { return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index.v, SSAScope::hint())); @@ -82,6 +87,11 @@ SSAVec4i SSAVec4i::insert(int index, SSAInt value) return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint())); } +SSAVec4i SSAVec4i::insert(int index, int value) +{ + return insert(index, SSAInt(value)); +} + llvm::Type *SSAVec4i::llvm_type() { return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); @@ -125,12 +135,12 @@ void SSAVec4i::extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &ou SSAVec4i SSAVec4i::extendhi(SSAVec8s i16) { - return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, 0, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 + return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, SSAVec8s((short)0), 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 } SSAVec4i SSAVec4i::extendlo(SSAVec8s i16) { - return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, 0, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 + return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, SSAVec8s((short)0), 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 } SSAVec4i SSAVec4i::combinehi(SSAVec8s a, SSAVec8s b) diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h index c1c9140d7f..89cda16465 100644 --- a/src/r_compiler/ssa/ssa_vec4i.h +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -13,15 +13,17 @@ class SSAVec4i { public: SSAVec4i(); - SSAVec4i(int constant); - SSAVec4i(int constant0, int constant1, int constant2, int constant3); + explicit SSAVec4i(int constant); + explicit SSAVec4i(int constant0, int constant1, int constant2, int constant3); SSAVec4i(SSAInt i); SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3); explicit SSAVec4i(llvm::Value *v); SSAVec4i(SSAVec4f f32); - SSAInt operator[](SSAInt index); + SSAInt operator[](SSAInt index) const; + SSAInt operator[](int index) const; SSAVec4i insert(SSAInt index, SSAInt value); SSAVec4i insert(int index, SSAInt value); + SSAVec4i insert(int index, int value); static SSAVec4i bitcast(SSAVec4f f32); static SSAVec4i bitcast(SSAVec8s i16); static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3); diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp index 7138c30d2a..f75ccd43fa 100644 --- a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_vec4i_ptr.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAVec4iPtr::SSAVec4iPtr() : v(0) diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.h b/src/r_compiler/ssa/ssa_vec4i_ptr.h index 56937b1cce..257b4e34f2 100644 --- a/src/r_compiler/ssa/ssa_vec4i_ptr.h +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.h @@ -15,6 +15,7 @@ public: static SSAVec4iPtr from_llvm(llvm::Value *v) { return SSAVec4iPtr(v); } static llvm::Type *llvm_type(); SSAVec4iPtr operator[](SSAInt index) const; + SSAVec4iPtr operator[](int index) const { return (*this)[SSAInt(index)]; } SSAVec4i load() const; SSAVec4i load_unaligned() const; void store(const SSAVec4i &new_value); diff --git a/src/r_compiler/ssa/ssa_vec8s.cpp b/src/r_compiler/ssa/ssa_vec8s.cpp index d61a4c4a9c..6016b551fe 100644 --- a/src/r_compiler/ssa/ssa_vec8s.cpp +++ b/src/r_compiler/ssa/ssa_vec8s.cpp @@ -1,9 +1,9 @@ +#include "r_compiler/llvm_include.h" #include "ssa_vec8s.h" #include "ssa_vec4i.h" #include "ssa_vec16ub.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAVec8s::SSAVec8s() : v(0) @@ -77,12 +77,12 @@ SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, i SSAVec8s SSAVec8s::extendhi(SSAVec16ub a) { - return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, 0, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15)); // _mm_unpackhi_epi8 + return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, SSAVec16ub((unsigned char)0), 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15)); // _mm_unpackhi_epi8 } SSAVec8s SSAVec8s::extendlo(SSAVec16ub a) { - return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, 0, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 + return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, SSAVec16ub((unsigned char)0), 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 } /* diff --git a/src/r_compiler/ssa/ssa_vec8s.h b/src/r_compiler/ssa/ssa_vec8s.h index aded358dde..40263773b4 100644 --- a/src/r_compiler/ssa/ssa_vec8s.h +++ b/src/r_compiler/ssa/ssa_vec8s.h @@ -11,8 +11,8 @@ class SSAVec8s { public: SSAVec8s(); - SSAVec8s(short constant); - SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7); + explicit SSAVec8s(short constant); + explicit SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7); explicit SSAVec8s(llvm::Value *v); SSAVec8s(SSAVec4i i0, SSAVec4i i1); static SSAVec8s bitcast(SSAVec16ub i8); From 4865e7109c69ccd621190ff6ab3612dd6a0b7b68 Mon Sep 17 00:00:00 2001 From: Major Cooke Date: Wed, 28 Sep 2016 21:20:15 -0500 Subject: [PATCH 149/912] Fixed merge conflicts. --- src/p_pspr.cpp | 97 +++++++++++++++++++++++++++------- src/p_pspr.h | 14 +++-- src/r_things.cpp | 37 ++++++++++--- wadsrc/static/actors/actor.txt | 7 ++- 4 files changed, 123 insertions(+), 32 deletions(-) diff --git a/src/p_pspr.cpp b/src/p_pspr.cpp index fab7e38d8d..4c9872993b 100644 --- a/src/p_pspr.cpp +++ b/src/p_pspr.cpp @@ -111,14 +111,16 @@ END_POINTERS //------------------------------------------------------------------------ DPSprite::DPSprite(player_t *owner, AActor *caller, int id) -: x(.0), y(.0), - oldx(.0), oldy(.0), - firstTic(true), - Flags(0), - Caller(caller), - Owner(owner), - ID(id), - processPending(true) +: x(.0), y(.0), + oldx(.0), oldy(.0), + alpha(1.), + firstTic(true), + Flags(0), + Caller(caller), + Owner(owner), + ID(id), + processPending(true), + RenderStyle(STYLE_Normal) { DPSprite *prev = nullptr; DPSprite *next = Owner->psprites; @@ -967,7 +969,7 @@ void A_OverlayOffset(AActor *self, int layer, double wx, double wy, int flags) player_t *player = self->player; DPSprite *psp; - if (player && (player->playerstate != PST_DEAD)) + if (player) { psp = player->FindPSprite(layer); @@ -1051,22 +1053,80 @@ DEFINE_ACTION_FUNCTION(AActor, A_OverlayFlags) //--------------------------------------------------------------------------- // -// PROC OverlayID -// Because non-action functions cannot acquire the ID of the overlay... +// PROC A_OverlayAlpha +// //--------------------------------------------------------------------------- -DEFINE_ACTION_FUNCTION(AActor, OverlayID) +DEFINE_ACTION_FUNCTION(AActor, A_OverlayAlpha) { PARAM_ACTION_PROLOGUE; + PARAM_INT(layer); + PARAM_FLOAT(alphaset); - if (ACTION_CALL_FROM_PSPRITE()) - { - ACTION_RETURN_INT(stateinfo->mPSPIndex); - } - ACTION_RETURN_INT(0); + if (self->player == nullptr) + return 0; + + DPSprite *pspr = self->player->FindPSprite(layer); + + if (pspr == nullptr) + return 0; + + pspr->alpha = clamp(alphaset, 0.0, 1.0); + + return 0; } +// NON-ACTION function to get the overlay alpha of a layer. +DEFINE_ACTION_FUNCTION(AActor, OverlayAlpha) +{ + if (numret > 0) + { + assert(ret != nullptr); + PARAM_SELF_PROLOGUE(AActor); + PARAM_INT(layer); + if (self->player == nullptr) + return 0; + + DPSprite *pspr = self->player->FindPSprite(layer); + + if (pspr == nullptr) + { + ret->SetFloat(0.0); + } + else + { + ret->SetFloat(pspr->alpha); + } + return 1; + } + return 0; +} + +//--------------------------------------------------------------------------- +// +// PROC A_OverlayRenderStyle +// +//--------------------------------------------------------------------------- + +DEFINE_ACTION_FUNCTION(AActor, A_OverlayRenderStyle) +{ + PARAM_ACTION_PROLOGUE; + PARAM_INT(layer); + PARAM_INT(style); + + if (self->player == nullptr) + return 0; + + DPSprite *pspr = self->player->FindPSprite(layer); + + if (pspr == nullptr || style >= STYLE_Count) + return 0; + + pspr->RenderStyle = style; + + return 0; +} //--------------------------------------------------------------------------- // @@ -1483,7 +1543,8 @@ void DPSprite::Serialize(FSerializer &arc) ("x", x) ("y", y) ("oldx", oldx) - ("oldy", oldy); + ("oldy", oldy) + ("alpha", alpha); } //------------------------------------------------------------------------ diff --git a/src/p_pspr.h b/src/p_pspr.h index 7e51096577..86f484a3a4 100644 --- a/src/p_pspr.h +++ b/src/p_pspr.h @@ -53,10 +53,13 @@ enum PSPLayers enum PSPFlags { - PSPF_ADDWEAPON = 1 << 0, - PSPF_ADDBOB = 1 << 1, - PSPF_POWDOUBLE = 1 << 2, - PSPF_CVARFAST = 1 << 3, + PSPF_ADDWEAPON = 1 << 0, + PSPF_ADDBOB = 1 << 1, + PSPF_POWDOUBLE = 1 << 2, + PSPF_CVARFAST = 1 << 3, + PSPF_ALPHA = 1 << 4, + PSPF_RENDERSTYLE = 1 << 5, + PSPF_FLIP = 1 << 6, }; class DPSprite : public DObject @@ -77,11 +80,12 @@ public: AActor* GetCaller() { return Caller; } void SetCaller(AActor *newcaller) { Caller = newcaller; } - double x, y; + double x, y, alpha; double oldx, oldy; bool firstTic; int Tics; int Flags; + int RenderStyle; private: DPSprite () {} diff --git a/src/r_things.cpp b/src/r_things.cpp index a45a6826a7..0c49a2dc2c 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -1290,7 +1290,7 @@ void R_AddSprites (sector_t *sec, int lightlevel, int fakeside) // // R_DrawPSprite // -void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac) +void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, double alpha) { double tx; int x1; @@ -1326,7 +1326,7 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double flip = sprframe->Flip & 1; tex = TexMan(picnum); - if (tex->UseType == FTexture::TEX_Null) + if (tex->UseType == FTexture::TEX_Null || pspr->RenderStyle == STYLE_None) return; if (pspr->firstTic) @@ -1363,10 +1363,10 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double tx += tex->GetScaledWidth(); x2 = xs_RoundToInt(CenterX + tx * pspritexscale); - + // off the left side if (x2 <= 0) - return; + return; // store information in a vissprite vis = &avis[vispspindex]; @@ -1404,7 +1404,9 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double vis->pic = tex; vis->ColormapNum = 0; - if (flip) + // If flip is used, provided that it's not already flipped (that would just invert itself) + // (It's an XOR...) + if (!(flip) != !(pspr->Flags & PSPF_FLIP)) { vis->xiscale = -FLOAT2FIXED(pspritexiscale * tex->Scale.X); vis->startfrac = (tex->GetWidth() << FRACBITS) - 1; @@ -1422,8 +1424,26 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double FDynamicColormap *colormap_to_use = nullptr; if (pspr->GetID() < PSP_TARGETCENTER) { - vis->Style.Alpha = float(owner->Alpha); - vis->Style.RenderStyle = owner->RenderStyle; + // Set the alpha based on if using the overlay's own or not. + vis->Style.Alpha = (pspr->Flags & PSPF_ALPHA) ? float(alpha) : float(owner->Alpha); + + // Set the render style + if (pspr->Flags & PSPF_RENDERSTYLE) + { + const int rs = clamp(pspr->RenderStyle, 0, STYLE_Count); + + if (rs == STYLE_Normal && vis->Style.Alpha < 1.0) + vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Translucent]; + else + vis->Style.RenderStyle = LegacyRenderStyles[rs]; + } + else + { + if (owner->RenderStyle == LegacyRenderStyles[STYLE_Normal] && vis->Style.Alpha < 1.0) + vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Translucent]; + else + vis->Style.RenderStyle = owner->RenderStyle; + } // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what @@ -1657,9 +1677,10 @@ void R_DrawPlayerSprites () // It's possible this psprite's caller is now null but the layer itself hasn't been destroyed // because it didn't tick yet (if we typed 'take all' while in the console for example). // In this case let's simply not draw it to avoid crashing. + if ((psp->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && psp->GetCaller() != nullptr) { - R_DrawPSprite(psp, camera, bobx, boby, wx, wy, r_TicFracF); + R_DrawPSprite(psp, camera, bobx, boby, wx, wy, r_TicFracF, psp->alpha); } psp = psp->GetNext(); diff --git a/wadsrc/static/actors/actor.txt b/wadsrc/static/actors/actor.txt index 44b801f511..77c60bbca1 100644 --- a/wadsrc/static/actors/actor.txt +++ b/wadsrc/static/actors/actor.txt @@ -59,6 +59,9 @@ ACTOR Actor native //: Thinker native int GetMissileDamage(int mask, int add, int ptr = AAPTR_DEFAULT); action native int OverlayID(); + // Overlay Functions + native float OverlayAlpha(int layer); + // Action functions // Meh, MBF redundant functions. Only for DeHackEd support. action native A_Turn(float angle = 0); @@ -340,7 +343,6 @@ ACTOR Actor native //: Thinker native state A_CheckSightOrRange(float distance, state label, bool two_dimension = false); native state A_CheckRange(float distance, state label, bool two_dimension = false); action native bool A_FaceMovementDirection(float offset = 0, float anglelimit = 0, float pitchlimit = 0, int flags = 0, int ptr = AAPTR_DEFAULT); - action native int A_ClearOverlays(int sstart = 0, int sstop = 0, bool safety = true); action native bool A_CopySpriteFrame(int from, int to, int flags = 0); action native bool A_SetSpriteAngle(float angle = 0, int ptr = AAPTR_DEFAULT); action native bool A_SetSpriteRotation(float angle = 0, int ptr = AAPTR_DEFAULT); @@ -352,9 +354,12 @@ ACTOR Actor native //: Thinker action native A_CopyFriendliness(int ptr_source = AAPTR_MASTER); action native bool A_Overlay(int layer, state start = "", bool nooverride = false); + action native int A_ClearOverlays(int sstart = 0, int sstop = 0, bool safety = true); action native A_WeaponOffset(float wx = 0, float wy = 32, int flags = 0); action native A_OverlayOffset(int layer = PSP_WEAPON, float wx = 0, float wy = 32, int flags = 0); action native A_OverlayFlags(int layer, int flags, bool set); + action native A_OverlayAlpha(int layer, float alphaset); + action native A_OverlayRenderStyle(int layer, int style); native int ACS_NamedExecute(name script, int mapnum=0, int arg1=0, int arg2=0, int arg3=0); native int ACS_NamedSuspend(name script, int mapnum=0); From 8cfeca655db95a917b8674609ba085fc71fbc9e8 Mon Sep 17 00:00:00 2001 From: Major Cooke Date: Thu, 29 Sep 2016 11:10:15 -0500 Subject: [PATCH 150/912] Added priority renderstyles, and added PSPF_FORCE(ALPHA / STYLE). - Renderstyles now override alpha based on which is used. - The new flags will override whatever renderstyle and alpha is currently being utilized. --- src/p_pspr.h | 2 + src/r_things.cpp | 97 +++++++++++++++++++++++++++--- wadsrc/static/actors/constants.txt | 13 ++-- 3 files changed, 98 insertions(+), 14 deletions(-) diff --git a/src/p_pspr.h b/src/p_pspr.h index 86f484a3a4..6a782f8e07 100644 --- a/src/p_pspr.h +++ b/src/p_pspr.h @@ -60,6 +60,8 @@ enum PSPFlags PSPF_ALPHA = 1 << 4, PSPF_RENDERSTYLE = 1 << 5, PSPF_FLIP = 1 << 6, + PSPF_FORCEALPHA = 1 << 7, + PSPF_FORCESTYLE = 1 << 8, }; class DPSprite : public DObject diff --git a/src/r_things.cpp b/src/r_things.cpp index 0c49a2dc2c..c8d0eacdc4 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -1424,27 +1424,104 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double FDynamicColormap *colormap_to_use = nullptr; if (pspr->GetID() < PSP_TARGETCENTER) { - // Set the alpha based on if using the overlay's own or not. - vis->Style.Alpha = (pspr->Flags & PSPF_ALPHA) ? float(alpha) : float(owner->Alpha); + // [MC] Set the render style - // Set the render style if (pspr->Flags & PSPF_RENDERSTYLE) { const int rs = clamp(pspr->RenderStyle, 0, STYLE_Count); - - if (rs == STYLE_Normal && vis->Style.Alpha < 1.0) - vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Translucent]; + + if (pspr->Flags & PSPF_FORCESTYLE) + { + if (rs == STYLE_Normal && vis->Style.Alpha < 1.0) + { + vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Translucent]; + } + else + { + vis->Style.RenderStyle = LegacyRenderStyles[rs]; + } + } + else if (owner->RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) + { + vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Fuzzy]; + } + else if (owner->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) + { + vis->Style.RenderStyle = LegacyRenderStyles[STYLE_OptFuzzy]; + vis->Style.RenderStyle.CheckFuzz(); + } + else if (owner->RenderStyle == LegacyRenderStyles[STYLE_Subtract]) + { + vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Subtract]; + } else + { vis->Style.RenderStyle = LegacyRenderStyles[rs]; + } } else { - if (owner->RenderStyle == LegacyRenderStyles[STYLE_Normal] && vis->Style.Alpha < 1.0) - vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Translucent]; - else - vis->Style.RenderStyle = owner->RenderStyle; + vis->Style.RenderStyle = owner->RenderStyle; } + // Set the alpha based on if using the overlay's own or not. Also adjust + // and override the alpha if not forced. + if (pspr->Flags & PSPF_ALPHA) + { + if (pspr->Flags & PSPF_FORCEALPHA) + { + vis->Style.Alpha = float(alpha); + } + else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) + { + vis->Style.Alpha = float(owner->Alpha); + } + else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) + { + FRenderStyle style = vis->Style.RenderStyle; + style.CheckFuzz(); + switch (style.BlendOp) + { + default: + vis->Style.Alpha = float(alpha * owner->Alpha); + break; + case STYLEOP_Fuzz: + case STYLEOP_Sub: + vis->Style.Alpha = float(owner->Alpha); + break; + } + + } + else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Subtract]) + { + vis->Style.Alpha = float(owner->Alpha); + } + else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] || + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Translucent] || + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_TranslucentStencil] || + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_AddStencil] || + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_AddShaded]) + { + vis->Style.Alpha = float(alpha * owner->Alpha); + } + else + { + vis->Style.Alpha = float(owner->Alpha); + } + } + + // Should normal renderstyle come out on top at the end and we desire alpha, + // switch it to translucent. Normal never applies any sort of alpha. + if ((pspr->Flags & PSPF_ALPHA) && + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Normal] && + vis->Style.Alpha < 1.0) + { + vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Translucent]; + vis->Style.Alpha = (pspr->Flags & PSPF_FORCEALPHA) ? float(alpha) : float(alpha * owner->Alpha); + } + + //----------------------------------------------------------------------------- + // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what // the invert overlay flag says to do. diff --git a/wadsrc/static/actors/constants.txt b/wadsrc/static/actors/constants.txt index dce2488c25..35f39a39d8 100644 --- a/wadsrc/static/actors/constants.txt +++ b/wadsrc/static/actors/constants.txt @@ -594,10 +594,15 @@ enum // Flags for psprite layers enum { - PSPF_ADDWEAPON = 1 << 0, - PSPF_ADDBOB = 1 << 1, - PSPF_POWDOUBLE = 1 << 2, - PSPF_CVARFAST = 1 << 3, + PSPF_ADDWEAPON = 1 << 0, + PSPF_ADDBOB = 1 << 1, + PSPF_POWDOUBLE = 1 << 2, + PSPF_CVARFAST = 1 << 3, + PSPF_ALPHA = 1 << 4, + PSPF_RENDERSTYLE = 1 << 5, + PSPF_FLIP = 1 << 6, + PSPF_FORCEALPHA = 1 << 7, + PSPF_FORCESTYLE = 1 << 8, }; // Default psprite layers From d597af149494337e1a4ef6f3a17ea67a59bad61b Mon Sep 17 00:00:00 2001 From: Major Cooke Date: Mon, 3 Oct 2016 13:14:48 -0500 Subject: [PATCH 151/912] - Fixed a few corner cases where forcing alpha wouldn't work. - On the other hand, soultrans, fuzzy and stencil (as is, not including the mixing styles like stenciladd) are no longer accounted for by FORCEALPHA. --- src/r_data/renderstyle.h | 3 +++ src/r_draw.cpp | 6 ++++- src/r_things.cpp | 58 ++++++++++++++++++++++++---------------- 3 files changed, 43 insertions(+), 24 deletions(-) diff --git a/src/r_data/renderstyle.h b/src/r_data/renderstyle.h index 501c14c5f9..63e0856cd7 100644 --- a/src/r_data/renderstyle.h +++ b/src/r_data/renderstyle.h @@ -125,6 +125,9 @@ enum ERenderFlags // Actors only: Ignore sector fade and fade to black. To fade to white, // combine this with STYLEF_InvertOverlay. STYLEF_FadeToBlack = 64, + + // Force alpha. + STYLEF_ForceAlpha = 128, }; union FRenderStyle diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 80b91ed2d4..d0d1013aea 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2373,7 +2373,11 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, color = 0; } - if (style.Flags & STYLEF_TransSoulsAlpha) + if (style.Flags & STYLEF_ForceAlpha) + { + alpha = clamp(alpha, 0, OPAQUE); + } + else if (style.Flags & STYLEF_TransSoulsAlpha) { alpha = fixed_t(transsouls * OPAQUE); } diff --git a/src/r_things.cpp b/src/r_things.cpp index c8d0eacdc4..8f0f808f50 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -1290,7 +1290,7 @@ void R_AddSprites (sector_t *sec, int lightlevel, int fakeside) // // R_DrawPSprite // -void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, double alpha) +void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac) { double tx; int x1; @@ -1303,6 +1303,7 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double FTexture* tex; vissprite_t* vis; bool noaccel; + double alpha = owner->Alpha; static TArray avis; if (avis.Size() < vispspindex + 1) @@ -1432,14 +1433,7 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double if (pspr->Flags & PSPF_FORCESTYLE) { - if (rs == STYLE_Normal && vis->Style.Alpha < 1.0) - { - vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Translucent]; - } - else - { - vis->Style.RenderStyle = LegacyRenderStyles[rs]; - } + vis->Style.RenderStyle = LegacyRenderStyles[rs]; } else if (owner->RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) { @@ -1468,13 +1462,9 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double // and override the alpha if not forced. if (pspr->Flags & PSPF_ALPHA) { - if (pspr->Flags & PSPF_FORCEALPHA) + if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) { - vis->Style.Alpha = float(alpha); - } - else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) - { - vis->Style.Alpha = float(owner->Alpha); + alpha = owner->Alpha; } else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) { @@ -1483,18 +1473,18 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double switch (style.BlendOp) { default: - vis->Style.Alpha = float(alpha * owner->Alpha); + alpha = pspr->alpha * owner->Alpha; break; case STYLEOP_Fuzz: case STYLEOP_Sub: - vis->Style.Alpha = float(owner->Alpha); + alpha = owner->Alpha; break; } } else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Subtract]) { - vis->Style.Alpha = float(owner->Alpha); + alpha = owner->Alpha; } else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] || vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Translucent] || @@ -1502,11 +1492,11 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double vis->Style.RenderStyle == LegacyRenderStyles[STYLE_AddStencil] || vis->Style.RenderStyle == LegacyRenderStyles[STYLE_AddShaded]) { - vis->Style.Alpha = float(alpha * owner->Alpha); + alpha = owner->Alpha * pspr->alpha; } else { - vis->Style.Alpha = float(owner->Alpha); + alpha = owner->Alpha; } } @@ -1517,9 +1507,31 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double vis->Style.Alpha < 1.0) { vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Translucent]; - vis->Style.Alpha = (pspr->Flags & PSPF_FORCEALPHA) ? float(alpha) : float(alpha * owner->Alpha); + alpha = owner->Alpha * pspr->alpha; } + // ALWAYS take priority if asked for, except fuzz. Fuzz does absolutely nothing + // no matter what way it's changed. + if (pspr->Flags & PSPF_FORCEALPHA) + { + //Due to lack of != operators... + if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Fuzzy] || + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_SoulTrans] || + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Stencil]) + { } + else + { + alpha = pspr->alpha; + vis->Style.RenderStyle.Flags |= STYLEF_ForceAlpha; + } + } + vis->Style.Alpha = clamp(float(alpha), 0.f, 1.f); + + // Due to how some of the effects are handled, going to 0 or less causes some + // weirdness to display. There's no point rendering it anyway if it's 0. + if (vis->Style.Alpha <= 0.) + return; + //----------------------------------------------------------------------------- // The software renderer cannot invert the source without inverting the overlay @@ -1619,7 +1631,6 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double { colormap_to_use = basecolormap; vis->Style.colormap = basecolormap->Maps; - vis->Style.RenderStyle = STYLE_Normal; } // Check for hardware-assisted 2D. If it's available, and this sprite is not @@ -1640,6 +1651,7 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double return; } } + R_DrawVisSprite(vis); } @@ -1757,7 +1769,7 @@ void R_DrawPlayerSprites () if ((psp->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && psp->GetCaller() != nullptr) { - R_DrawPSprite(psp, camera, bobx, boby, wx, wy, r_TicFracF, psp->alpha); + R_DrawPSprite(psp, camera, bobx, boby, wx, wy, r_TicFracF); } psp = psp->GetNext(); From dcb6c1ac3a517e77a183d3f915b62a04fecea5ba Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 3 Oct 2016 23:30:00 +0200 Subject: [PATCH 152/912] Move ApplyFXAA to PostProcessScene --- src/gl/renderer/gl_postprocess.cpp | 1 + src/gl/scene/gl_scene.cpp | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index 5c5abf02a0..25050c22f9 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -137,6 +137,7 @@ void FGLRenderer::PostProcessScene() TonemapScene(); ColormapScene(); LensDistortScene(); + ApplyFXAA(); } //----------------------------------------------------------------------------- diff --git a/src/gl/scene/gl_scene.cpp b/src/gl/scene/gl_scene.cpp index d1fdc71773..59d6e09871 100644 --- a/src/gl/scene/gl_scene.cpp +++ b/src/gl/scene/gl_scene.cpp @@ -854,7 +854,6 @@ sector_t * FGLRenderer::RenderViewpoint (AActor * camera, GL_IRECT * bounds, flo if (mainview && FGLRenderBuffers::IsEnabled()) { PostProcessScene(); - ApplyFXAA(); // This should be done after postprocessing, not before. mBuffers->BindCurrentFB(); From e9d13e5d7440b450eeebbd5fd89322427718fbf9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 4 Oct 2016 00:25:35 +0200 Subject: [PATCH 153/912] Remove gl_light_math (reverts last remains of old lightmath branch) --- src/gl/dynlights/gl_dynlight1.cpp | 26 +------ src/gl/renderer/gl_renderstate.cpp | 1 - src/gl/shaders/gl_shader.cpp | 1 - src/gl/shaders/gl_shader.h | 1 - src/gl/system/gl_cvars.h | 1 - wadsrc/static/menudef.zz | 8 --- wadsrc/static/shaders/glsl/main.fp | 94 +------------------------ wadsrc/static/shaders/glsl/main.vp | 2 +- wadsrc/static/shaders/glsl/shaderdefs.i | 1 - 9 files changed, 7 insertions(+), 128 deletions(-) diff --git a/src/gl/dynlights/gl_dynlight1.cpp b/src/gl/dynlights/gl_dynlight1.cpp index 5c5e26f030..d20f5c01b1 100644 --- a/src/gl/dynlights/gl_dynlight1.cpp +++ b/src/gl/dynlights/gl_dynlight1.cpp @@ -60,11 +60,6 @@ CVAR (Bool, gl_lights_checkside, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); CVAR (Bool, gl_light_sprites, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); CVAR (Bool, gl_light_particles, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); -CUSTOM_CVAR(Int, gl_light_math, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) -{ - if (self < 0 || self > 2) self = 0; -} - //========================================================================== // // Sets up the parameters to render one dynamic light onto one plane @@ -113,25 +108,10 @@ bool gl_GetLight(int group, Plane & p, ADynamicLight * light, bool checkside, FD i = 1; } - float worldPos[4] = { (float)pos.X, (float)pos.Z, (float)pos.Y, 1.0f }; - float eyePos[4]; - gl_RenderState.mViewMatrix.multMatrixPoint(worldPos, eyePos); - - if (gl_light_math != 0) - { - // Move light up because flasks/vials have their light source location at/below the floor. - // - // If the point is exactly on the wall plane it might cause some acne as some pixels could - // be in front and some behind. Move light just a tiny bit to avoid this. - eyePos[0] += 0.01f; - eyePos[1] += 5.01f; - eyePos[2] += 0.01f; - } - float *data = &ldata.arrays[i][ldata.arrays[i].Reserve(8)]; - data[0] = eyePos[0]; - data[1] = eyePos[1]; - data[2] = eyePos[2]; + data[0] = pos.X; + data[1] = pos.Z; + data[2] = pos.Y; data[3] = radius; data[4] = r; data[5] = g; diff --git a/src/gl/renderer/gl_renderstate.cpp b/src/gl/renderer/gl_renderstate.cpp index cfaaea3cc0..fa8237cca1 100644 --- a/src/gl/renderer/gl_renderstate.cpp +++ b/src/gl/renderer/gl_renderstate.cpp @@ -158,7 +158,6 @@ bool FRenderState::ApplyShader() activeShader->muTimer.Set(gl_frameMS * mShaderTimer / 1000.f); activeShader->muAlphaThreshold.Set(mAlphaThreshold); activeShader->muLightIndex.Set(mLightIndex); // will always be -1 for now - activeShader->muLightMath.Set(gl_light_math); activeShader->muClipSplit.Set(mClipSplit); if (mGlowEnabled) diff --git a/src/gl/shaders/gl_shader.cpp b/src/gl/shaders/gl_shader.cpp index 9368a268cb..9cbd69ce22 100644 --- a/src/gl/shaders/gl_shader.cpp +++ b/src/gl/shaders/gl_shader.cpp @@ -220,7 +220,6 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char * muColormapStart.Init(hShader, "uFixedColormapStart"); muColormapRange.Init(hShader, "uFixedColormapRange"); muLightIndex.Init(hShader, "uLightIndex"); - muLightMath.Init(hShader, "uLightMath"); muFogColor.Init(hShader, "uFogColor"); muDynLightColor.Init(hShader, "uDynLightColor"); muObjectColor.Init(hShader, "uObjectColor"); diff --git a/src/gl/shaders/gl_shader.h b/src/gl/shaders/gl_shader.h index 934674cbeb..acdd530aae 100644 --- a/src/gl/shaders/gl_shader.h +++ b/src/gl/shaders/gl_shader.h @@ -266,7 +266,6 @@ class FShader FUniform1i muFixedColormap; FUniform4f muColormapStart; FUniform4f muColormapRange; - FBufferedUniform1i muLightMath; FBufferedUniform1i muLightIndex; FBufferedUniformPE muFogColor; FBufferedUniform4f muDynLightColor; diff --git a/src/gl/system/gl_cvars.h b/src/gl/system/gl_cvars.h index b7ac9e435a..836787117d 100644 --- a/src/gl/system/gl_cvars.h +++ b/src/gl/system/gl_cvars.h @@ -26,7 +26,6 @@ EXTERN_CVAR (Bool, gl_attachedlights); EXTERN_CVAR (Bool, gl_lights_checkside); EXTERN_CVAR (Bool, gl_light_sprites); EXTERN_CVAR (Bool, gl_light_particles); -EXTERN_CVAR (Int, gl_light_math); EXTERN_CVAR(Int, gl_fogmode) EXTERN_CVAR(Int, gl_lightmode) diff --git a/wadsrc/static/menudef.zz b/wadsrc/static/menudef.zz index 6eb56be948..d5c2bbc5c0 100644 --- a/wadsrc/static/menudef.zz +++ b/wadsrc/static/menudef.zz @@ -25,13 +25,6 @@ OptionValue "FilterModes" 4, "$OPTVAL_TRILINEAR" } -OptionValue "LightMathModes" -{ - 0, "$OPTVAL_LOW" - 1, "$OPTVAL_MEDIUM" - 2, "$OPTVAL_HIGH" -} - OptionValue "HWGammaModes" { 0, "$OPTVAL_ON" @@ -225,7 +218,6 @@ OptionMenu "GLLightOptions" Option "$GLLIGHTMNU_CLIPLIGHTS", gl_lights_checkside, "YesNo" Option "$GLLIGHTMNU_LIGHTSPRITES", gl_light_sprites, "YesNo" Option "$GLLIGHTMNU_LIGHTPARTICLES", gl_light_particles, "YesNo" - Option "$GLLIGHTMNU_LIGHTMATH", gl_light_math, "LightMathModes" } OptionMenu "GLPrefOptions" diff --git a/wadsrc/static/shaders/glsl/main.fp b/wadsrc/static/shaders/glsl/main.fp index 374edb59c0..1f270728df 100644 --- a/wadsrc/static/shaders/glsl/main.fp +++ b/wadsrc/static/shaders/glsl/main.fp @@ -30,88 +30,6 @@ vec4 Process(vec4 color); vec4 ProcessTexel(); vec4 ProcessLight(vec4 color); -// Smoothed normal used for the face, in eye space. Should be converted to an 'in' variable in the future. -vec3 pixelnormal; - -//=========================================================================== -// -// Calculates the face normal vector for the fragment, in eye space -// -//=========================================================================== - -vec3 calculateFaceNormal() -{ -#if __VERSION__ < 450 - vec3 dFdxPos = dFdx(pixelpos.xyz); - vec3 dFdyPos = dFdy(pixelpos.xyz); -#else - vec3 dFdxPos = dFdxCoarse(pixelpos.xyz); - vec3 dFdyPos = dFdyCoarse(pixelpos.xyz); -#endif - return normalize(cross(dFdxPos,dFdyPos)); -} - -//=========================================================================== -// -// Standard lambertian diffuse light calculation -// -//=========================================================================== - -float diffuseContribution(vec3 eyeLightDirection, vec3 eyeNormal) -{ - return max(dot(eyeNormal, eyeLightDirection), 0.0f); -} - -//=========================================================================== -// -// Blinn specular light calculation -// -//=========================================================================== - -float blinnSpecularContribution(float diffuseContribution, vec3 eyeLightDirection, vec3 eyePosition, vec3 eyeNormal, float glossiness, float specularLevel) -{ - if (diffuseContribution > 0.0f) - { - vec3 viewDir = normalize(-eyePosition); - vec3 halfDir = normalize(eyeLightDirection + viewDir); - float specAngle = max(dot(halfDir, eyeNormal), 0.0f); - float phExp = glossiness * 4.0f; - return specularLevel * pow(specAngle, phExp); - } - else - { - return 0.0f; - } -} - -//=========================================================================== -// -// Calculates the brightness of a dynamic point light -// -//=========================================================================== - -float pointLightAttenuation(vec4 lightpos) -{ - float attenuation = max(lightpos.w - distance(pixelpos.xyz, lightpos.xyz),0.0) / lightpos.w; - if (uLightMath == 0) - { - return attenuation; - } - else - { - vec3 lightDirection = normalize(lightpos.xyz - pixelpos.xyz); - float diffuseAmount = diffuseContribution(lightDirection, pixelnormal); - if (uLightMath == 1) - { - return attenuation * diffuseAmount; - } - else - { - float specularAmount = blinnSpecularContribution(diffuseAmount, lightDirection, pixelpos.xyz, pixelnormal, 3.0, 1.2); - return attenuation * (diffuseAmount + specularAmount); - } - } -} //=========================================================================== // @@ -384,13 +302,6 @@ vec3 AmbientOcclusionColor() void main() { vec4 frag = ProcessTexel(); - -#if defined NUM_UBO_LIGHTS || defined SHADER_STORAGE_LIGHTS - if (uLightMath != 0) // Remove this if pixelnormal is converted to an 'in' variable - { - pixelnormal = calculateFaceNormal(); - } -#endif #ifndef NO_ALPHATEST if (frag.a <= uAlphaThreshold) discard; @@ -416,11 +327,12 @@ void main() } else { - fogdist = max(16.0, length(pixelpos.xyz)); + fogdist = max(16.0, distance(pixelpos.xyz, uCameraPos.xyz)); } fogfactor = exp2 (uFogDensity * fogdist); } + frag *= getLightColor(fogdist, fogfactor); #if defined NUM_UBO_LIGHTS || defined SHADER_STORAGE_LIGHTS @@ -486,7 +398,7 @@ void main() } else { - fogdist = max(16.0, length(pixelpos.xyz)); + fogdist = max(16.0, distance(pixelpos.xyz, uCameraPos.xyz)); } fogfactor = exp2 (uFogDensity * fogdist); diff --git a/wadsrc/static/shaders/glsl/main.vp b/wadsrc/static/shaders/glsl/main.vp index 0baca31f92..1de8854c0d 100644 --- a/wadsrc/static/shaders/glsl/main.vp +++ b/wadsrc/static/shaders/glsl/main.vp @@ -47,7 +47,7 @@ void main() vColor = aColor; #ifndef SIMPLE - pixelpos.xyz = eyeCoordPos.xyz; + pixelpos.xyz = worldcoord.xyz; pixelpos.w = -eyeCoordPos.z/eyeCoordPos.w; glowdist.x = -((uGlowTopPlane.w + uGlowTopPlane.x * worldcoord.x + uGlowTopPlane.y * worldcoord.z) * uGlowTopPlane.z) - worldcoord.y; diff --git a/wadsrc/static/shaders/glsl/shaderdefs.i b/wadsrc/static/shaders/glsl/shaderdefs.i index 81edd7b046..8c5697a661 100644 --- a/wadsrc/static/shaders/glsl/shaderdefs.i +++ b/wadsrc/static/shaders/glsl/shaderdefs.i @@ -44,7 +44,6 @@ uniform int uFogEnabled; // dynamic lights uniform int uLightIndex; -uniform int uLightMath; // 0, when using only attenuation, 1 for diffuse light, 2 for blinn specular light // quad drawer stuff #ifdef USE_QUAD_DRAWER From e05ed47fee1599eef2c80066eeb96dfe60a1d34f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 4 Oct 2016 01:36:26 +0200 Subject: [PATCH 154/912] Remove C++ and SSE drawers --- src/r_draw_rgba.cpp | 1178 +---------------------------------------- src/r_draw_rgba.h | 12 - src/r_draw_rgba_sse.h | 367 ------------- 3 files changed, 2 insertions(+), 1555 deletions(-) delete mode 100644 src/r_draw_rgba_sse.h diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index ae6c26cfcf..c5b1b478e6 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -43,10 +43,6 @@ #include "gi.h" #include "stats.h" #include "x86.h" -#ifndef NO_SSE -#include -#include -#endif #include extern "C" short spanend[MAXHEIGHT]; @@ -55,9 +51,7 @@ extern float rw_lightstep; extern int wallshade; // Use multiple threads when drawing -CVAR(Bool, r_multithreaded, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); -// [SP] Set Max Threads to a sane amount -CVAR(Int, r_multithreadedmax, 1024, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +CVAR(Bool, r_multithreaded, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Use linear filtering when scaling up CVAR(Bool, r_magfilter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); @@ -68,48 +62,8 @@ CVAR(Bool, r_minfilter, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Use mipmapped textures CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); -#ifndef NO_SSE - -#ifdef _MSC_VER -#pragma warning(disable: 4101) // warning C4101: unreferenced local variable -#endif - -// Generate SSE drawers: -#define VecCommand(name) name##_SSE_Command -#define VEC_SHADE_VARS SSE_SHADE_VARS -#define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT -#define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 -#define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE -#define VEC_SHADE_INIT SSE_SHADE_INIT -#define VEC_SHADE_INIT4 SSE_SHADE_INIT4 -#define VEC_SHADE SSE_SHADE -#include "r_draw_rgba_sse.h" -/* -// Generate AVX drawers: -#undef VecCommand -#undef VEC_SHADE_SIMPLE_INIT -#undef VEC_SHADE_SIMPLE_INIT4 -#undef VEC_SHADE_SIMPLE -#undef VEC_SHADE_INIT -#undef VEC_SHADE_INIT4 -#undef VEC_SHADE -#define VecCommand(name) name##_AVX_Command -#define VEC_SHADE_SIMPLE_INIT AVX_LINEAR_SHADE_SIMPLE_INIT -#define VEC_SHADE_SIMPLE_INIT4 AVX_LINEAR_SHADE_SIMPLE_INIT4 -#define VEC_SHADE_SIMPLE AVX_LINEAR_SHADE_SIMPLE -#define VEC_SHADE_INIT AVX_LINEAR_SHADE_INIT -#define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 -#define VEC_SHADE AVX_LINEAR_SHADE -#include "r_draw_rgba_sse.h" -*/ -#endif - ///////////////////////////////////////////////////////////////////////////// -#ifndef NO_SSE -__m128i SampleBgra::samplertable[256 * 2]; -#endif - DrawerCommandQueue *DrawerCommandQueue::Instance() { static DrawerCommandQueue queue; @@ -118,27 +72,6 @@ DrawerCommandQueue *DrawerCommandQueue::Instance() DrawerCommandQueue::DrawerCommandQueue() { -#ifndef NO_SSE - for (int inv_b = 0; inv_b < 16; inv_b++) - { - for (int inv_a = 0; inv_a < 16; inv_a++) - { - int a = 16 - inv_a; - int b = 16 - inv_b; - - int ab = a * b; - int invab = inv_a * b; - int ainvb = a * inv_b; - int invainvb = inv_a * inv_b; - - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); - - _mm_store_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2, ab_invab); - _mm_store_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1, ainvb_invainvb); - } - } -#endif } DrawerCommandQueue::~DrawerCommandQueue() @@ -200,7 +133,7 @@ void DrawerCommandQueue::Finish() DrawerThread thread; thread.core = 0; - thread.num_cores = queue->threads.size() + 1; + thread.num_cores = (int)(queue->threads.size() + 1); for (int pass = 0; pass < queue->num_passes; pass++) { @@ -1249,310 +1182,6 @@ public: } }; -///////////////////////////////////////////////////////////////////////////// - -class DrawerSpanCommand : public DrawerCommand -{ -public: - fixed_t _xfrac; - fixed_t _yfrac; - fixed_t _xstep; - fixed_t _ystep; - int _x1; - int _x2; - int _y; - int _xbits; - int _ybits; - BYTE * RESTRICT _destorg; - - const uint32_t * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - bool _nearest_filter; - - uint32_t _srcalpha; - uint32_t _destalpha; - - DrawerSpanCommand() - { - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _xstep = ds_xstep; - _ystep = ds_ystep; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xbits = ds_xbits; - _ybits = ds_ybits; - _destorg = dc_destorg; - - _source = (const uint32_t*)ds_source; - _light = LightBgra::calc_light_multiplier(ds_light); - _shade_constants = ds_shade_constants; - _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); - - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - class LoopIterator - { - public: - uint32_t *dest; - int count; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE yshift; - BYTE xshift; - int xmask; - bool is_64x64; - bool skipped; - - LoopIterator(DrawerSpanCommand *command, DrawerThread *thread) - { - dest = ylookup[command->_y] + command->_x1 + (uint32_t*)command->_destorg; - count = command->_x2 - command->_x1 + 1; - xfrac = command->_xfrac; - yfrac = command->_yfrac; - xstep = command->_xstep; - ystep = command->_ystep; - yshift = 32 - command->_ybits; - xshift = yshift - command->_xbits; - xmask = ((1 << command->_xbits) - 1) << command->_ybits; - is_64x64 = command->_xbits == 6 && command->_ybits == 6; - skipped = thread->line_skipped_by_thread(command->_y); - } - - // 64x64 is the most common case by far, so special case it. - int spot64() - { - return ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - } - - int spot() - { - return ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - } - - explicit operator bool() - { - return !skipped && count > 0; - } - - bool next() - { - dest++; - xfrac += xstep; - yfrac += ystep; - return (--count) != 0; - } - }; -}; - -class DrawSpanRGBACommand : public DrawerSpanCommand -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (_nearest_filter) - { - if (loop.is_64x64) - { - do - { - *loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); - } while (loop.next()); - } - else - { - do - { - *loop.dest = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); - } while (loop.next()); - } - } - else - { - if (loop.is_64x64) - { - do - { - *loop.dest = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 26, 26), _light, _shade_constants); - } while (loop.next()); - } - else - { - do - { - *loop.dest = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 32 - _xbits, 32 - _ybits), _light, _shade_constants); - } while (loop.next()); - } - } - } -}; - -class DrawSpanMaskedRGBACommand : public DrawerSpanCommand -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (_nearest_filter) - { - if (loop.is_64x64) - { - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); - *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); - } while (loop.next()); - } - else - { - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); - *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); - } while (loop.next()); - } - } - else - { - if (loop.is_64x64) - { - do - { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 26, 26), _light, _shade_constants); - *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); - } while (loop.next()); - } - else - { - do - { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 32 - _xbits, 32 - _ybits), _light, _shade_constants); - *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); - } while (loop.next()); - } - } - } -}; - -class DrawSpanTranslucentRGBACommand : public DrawerSpanCommand -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (loop.is_64x64) - { - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } - else - { - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } - } -}; - -class DrawSpanMaskedTranslucentRGBACommand : public DrawerSpanCommand -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (loop.is_64x64) - { - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } while (loop.next()); - } - else - { - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } while (loop.next()); - } - } -}; - -class DrawSpanAddClampRGBACommand : public DrawerSpanCommand -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (loop.is_64x64) - { - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } - else - { - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } - } -}; - -class DrawSpanMaskedAddClampRGBACommand : public DrawerSpanCommand -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (loop.is_64x64) - { - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } while (loop.next()); - } - else - { - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } while (loop.next()); - } - } -}; - class FillSpanRGBACommand : public DrawerCommand { int _x1; @@ -1708,735 +1337,6 @@ public: ///////////////////////////////////////////////////////////////////////////// -class DrawerWall1Command : public DrawerCommand -{ -public: - BYTE * RESTRICT _dest; - int _pitch; - int _count; - DWORD _texturefrac; - uint32_t _texturefracx; - DWORD _iscale; - uint32_t _textureheight; - - const uint32 * RESTRICT _source; - const uint32 * RESTRICT _source2; - uint32_t _light; - ShadeConstants _shade_constants; - - uint32_t _srcalpha; - uint32_t _destalpha; - - DrawerWall1Command() - { - _dest = dc_dest; - _pitch = dc_pitch; - _count = dc_count; - _texturefrac = dc_texturefrac; - _texturefracx = dc_texturefracx; - _iscale = dc_iscale; - _textureheight = dc_textureheight; - - _source = (const uint32 *)dc_source; - _source2 = (const uint32 *)dc_source2; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - class LoopIterator - { - public: - uint32_t *dest; - int pitch; - int count; - uint32_t fracstep; - uint32_t frac; - uint32_t texturefracx; - uint32_t height; - uint32_t one; - - LoopIterator(DrawerWall1Command *command, DrawerThread *thread) - { - count = thread->count_for_thread(command->_dest_y, command->_count); - if (count <= 0) - return; - - fracstep = command->_iscale * thread->num_cores; - frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); - texturefracx = command->_texturefracx; - dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); - pitch = command->_pitch * thread->num_cores; - - height = command->_textureheight; - one = ((0x80000000 + height - 1) / height) * 2 + 1; - } - - explicit operator bool() - { - return count > 0; - } - - int sample_index() - { - return ((frac >> FRACBITS) * height) >> FRACBITS; - } - - bool next() - { - frac += fracstep; - dest += pitch; - return (--count) != 0; - } - }; -}; - -class DrawerWall4Command : public DrawerCommand -{ -public: - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - uint32_t _vplce[4]; - uint32_t _vince[4]; - uint32_t _buftexturefracx[4]; - uint32_t _bufheight[4]; - const uint32_t * RESTRICT _bufplce[4]; - const uint32_t * RESTRICT _bufplce2[4]; - uint32_t _light[4]; - - uint32_t _srcalpha; - uint32_t _destalpha; - - DrawerWall4Command() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - for (int i = 0; i < 4; i++) - { - _vplce[i] = vplce[i]; - _vince[i] = vince[i]; - _buftexturefracx[i] = buftexturefracx[i]; - _bufheight[i] = bufheight[i]; - _bufplce[i] = (const uint32_t *)bufplce[i]; - _bufplce2[i] = (const uint32_t *)bufplce2[i]; - _light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); - } - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - class LoopIterator - { - public: - uint32_t *dest; - int pitch; - int count; - uint32_t vplce[4]; - uint32_t vince[4]; - uint32_t height[4]; - uint32_t one[4]; - - LoopIterator(DrawerWall4Command *command, DrawerThread *thread) - { - count = thread->count_for_thread(command->_dest_y, command->_count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); - pitch = command->_pitch * thread->num_cores; - - int skipped = thread->skipped_by_thread(command->_dest_y); - for (int i = 0; i < 4; i++) - { - vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; - vince[i] = command->_vince[i] * thread->num_cores; - height[i] = command->_bufheight[i]; - one[i] = ((0x80000000 + height[i] - 1) / height[i]) * 2 + 1; - } - } - - explicit operator bool() - { - return count > 0; - } - - int sample_index(int col) - { - return ((vplce[col] >> FRACBITS) * height[col]) >> FRACBITS; - } - - bool next() - { - vplce[0] += vince[0]; - vplce[1] += vince[1]; - vplce[2] += vince[2]; - vplce[3] += vince[3]; - dest += pitch; - return (--count) != 0; - } - }; - -#ifdef NO_SSE - struct NearestSampler - { - FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) - { - return cmd._bufplce[index][loop.sample_index(index)]; - } - }; - struct LinearSampler - { - FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) - { - return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.one[index], loop.height[index]); - } - }; -#else - struct NearestSampler - { - FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) - { - return _mm_set_epi32(cmd._bufplce[3][loop.sample_index(3)], cmd._bufplce[2][loop.sample_index(2)], cmd._bufplce[1][loop.sample_index(1)], cmd._bufplce[0][loop.sample_index(0)]); - } - }; - - struct LinearSampler - { - FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.one, loop.height); - return fg; - } - }; -#endif - -#ifdef NO_SSE - template - struct Copy - { - Copy(DrawerWall4Command &cmd, LoopIterator &loop) - { - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); - loop.dest[i] = BlendBgra::copy(fg); - } - } - }; - - template - struct Mask - { - Mask(DrawerWall4Command &cmd, LoopIterator &loop) - { - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); - loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); - } - } - }; - - template - struct TMaskAdd - { - TMaskAdd(DrawerWall4Command &cmd, LoopIterator &loop) - { - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); - loop.dest[i] = BlendBgra::add(fg, loop.dest[i], cmd._srcalpha, calc_blend_bgalpha(fg, cmd._destalpha)); - } - } - }; - - template - struct TMaskSub - { - TMaskSub(DrawerWall4Command &cmd, LoopIterator &loop) - { - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); - loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], cmd._srcalpha, calc_blend_bgalpha(fg, cmd._destalpha)); - } - } - }; - - template - struct TMaskRevSub - { - TMaskRevSub(DrawerWall4Command &cmd, LoopIterator &loop) - { - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); - loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], cmd._srcalpha, calc_blend_bgalpha(fg, cmd._destalpha)); - } - } - }; - - typedef Copy CopyNearestSimple; - typedef Copy CopyLinearSimple; - typedef Copy CopyNearest; - typedef Copy CopyLinear; - typedef Mask MaskNearestSimple; - typedef Mask MaskLinearSimple; - typedef Mask MaskNearest; - typedef Mask MaskLinear; - typedef TMaskAdd TMaskAddNearestSimple; - typedef TMaskAdd TMaskAddLinearSimple; - typedef TMaskAdd TMaskAddNearest; - typedef TMaskAdd TMaskAddLinear; - typedef TMaskSub TMaskSubNearestSimple; - typedef TMaskSub TMaskSubLinearSimple; - typedef TMaskSub TMaskSubNearest; - typedef TMaskSub TMaskSubLinear; - typedef TMaskRevSub TMaskRevSubNearestSimple; - typedef TMaskRevSub TMaskRevSubLinearSimple; - typedef TMaskRevSub TMaskRevSubNearest; - typedef TMaskRevSub TMaskRevSubLinear; -#else - template - struct CopySimple - { - VEC_SHADE_VARS(); - CopySimple(DrawerWall4Command &cmd, LoopIterator &loop) - { - VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - __m128i fg = Sampler::Sample4(cmd, loop); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)loop.dest, fg); - } - }; - - template - struct Copy - { - VEC_SHADE_VARS(); - Copy(DrawerWall4Command &cmd, LoopIterator &loop) - { - VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - __m128i fg = Sampler::Sample4(cmd, loop); - VEC_SHADE(fg, cmd._shade_constants); - _mm_storeu_si128((__m128i*)loop.dest, fg); - } - }; - - template - struct MaskSimple - { - VEC_SHADE_VARS(); - MaskSimple(DrawerWall4Command &cmd, LoopIterator &loop) - { - VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - __m128i fg = Sampler::Sample4(cmd, loop); - __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); - VEC_SHADE_SIMPLE(fg); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)loop.dest, fg); - } - }; - - template - struct Mask - { - VEC_SHADE_VARS(); - Mask(DrawerWall4Command &cmd, LoopIterator &loop) - { - VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - __m128i fg = Sampler::Sample4(cmd, loop); - __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); - VEC_SHADE(fg, cmd._shade_constants); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)loop.dest, fg); - } - }; - - template - struct TMaskAddSimple - { - VEC_SHADE_VARS(); - VEC_CALC_BLEND_ALPHA_VARS(); - TMaskAddSimple(DrawerWall4Command &cmd, LoopIterator &loop) - { - VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); - VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - __m128i fg = Sampler::Sample4(cmd, loop); - __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); - - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)loop.dest, out); - } - }; - - template - struct TMaskAdd - { - VEC_SHADE_VARS(); - VEC_CALC_BLEND_ALPHA_VARS(); - TMaskAdd(DrawerWall4Command &cmd, LoopIterator &loop) - { - VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - __m128i fg = Sampler::Sample4(cmd, loop); - __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); - - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)loop.dest, out); - } - }; - - template - struct TMaskSubSimple - { - VEC_SHADE_VARS(); - VEC_CALC_BLEND_ALPHA_VARS(); - TMaskSubSimple(DrawerWall4Command &cmd, LoopIterator &loop) - { - VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); - VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - __m128i fg = Sampler::Sample4(cmd, loop); - __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); - - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)loop.dest, out); - } - }; - - template - struct TMaskSub - { - VEC_SHADE_VARS(); - VEC_CALC_BLEND_ALPHA_VARS(); - TMaskSub(DrawerWall4Command &cmd, LoopIterator &loop) - { - VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - __m128i fg = Sampler::Sample4(cmd, loop); - __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); - - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)loop.dest, out); - } - }; - - template - struct TMaskRevSubSimple - { - VEC_SHADE_VARS(); - VEC_CALC_BLEND_ALPHA_VARS(); - TMaskRevSubSimple(DrawerWall4Command &cmd, LoopIterator &loop) - { - VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); - VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - __m128i fg = Sampler::Sample4(cmd, loop); - __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); - - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)loop.dest, out); - } - }; - - template - struct TMaskRevSub - { - VEC_SHADE_VARS(); - VEC_CALC_BLEND_ALPHA_VARS(); - TMaskRevSub(DrawerWall4Command &cmd, LoopIterator &loop) - { - VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); - } - void Blend(DrawerWall4Command &cmd, LoopIterator &loop) - { - __m128i fg = Sampler::Sample4(cmd, loop); - __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); - - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)loop.dest, out); - } - }; - - typedef CopySimple CopyNearestSimple; - typedef CopySimple CopyLinearSimple; - typedef Copy CopyNearest; - typedef Copy CopyLinear; - typedef MaskSimple MaskNearestSimple; - typedef MaskSimple MaskLinearSimple; - typedef Mask MaskNearest; - typedef Mask MaskLinear; - typedef TMaskAddSimple TMaskAddNearestSimple; - typedef TMaskAddSimple TMaskAddLinearSimple; - typedef TMaskAdd TMaskAddNearest; - typedef TMaskAdd TMaskAddLinear; - typedef TMaskSubSimple TMaskSubNearestSimple; - typedef TMaskSubSimple TMaskSubLinearSimple; - typedef TMaskSub TMaskSubNearest; - typedef TMaskSub TMaskSubLinear; - typedef TMaskRevSubSimple TMaskRevSubNearestSimple; - typedef TMaskRevSubSimple TMaskRevSubLinearSimple; - typedef TMaskRevSub TMaskRevSubNearest; - typedef TMaskRevSub TMaskRevSubLinear; -#endif -}; - -typedef DrawerBlendCommand Vlinec4NearestSimpleRGBACommand; -typedef DrawerBlendCommand Vlinec4NearestRGBACommand; -typedef DrawerBlendCommand Vlinec4LinearSimpleRGBACommand; -typedef DrawerBlendCommand Vlinec4LinearRGBACommand; -typedef DrawerBlendCommand Mvlinec4NearestSimpleRGBACommand; -typedef DrawerBlendCommand Mvlinec4NearestRGBACommand; -typedef DrawerBlendCommand Mvlinec4LinearSimpleRGBACommand; -typedef DrawerBlendCommand Mvlinec4LinearRGBACommand; -typedef DrawerBlendCommand Tmvline4AddNearestSimpleRGBACommand; -typedef DrawerBlendCommand Tmvline4AddNearestRGBACommand; -typedef DrawerBlendCommand Tmvline4AddLinearSimpleRGBACommand; -typedef DrawerBlendCommand Tmvline4AddLinearRGBACommand; -typedef DrawerBlendCommand Tmvline4AddClampNearestSimpleRGBACommand; -typedef DrawerBlendCommand Tmvline4AddClampNearestRGBACommand; -typedef DrawerBlendCommand Tmvline4AddClampLinearSimpleRGBACommand; -typedef DrawerBlendCommand Tmvline4AddClampLinearRGBACommand; -typedef DrawerBlendCommand Tmvline4SubClampNearestSimpleRGBACommand; -typedef DrawerBlendCommand Tmvline4SubClampNearestRGBACommand; -typedef DrawerBlendCommand Tmvline4SubClampLinearSimpleRGBACommand; -typedef DrawerBlendCommand Tmvline4SubClampLinearRGBACommand; -typedef DrawerBlendCommand Tmvline4RevSubClampNearestSimpleRGBACommand; -typedef DrawerBlendCommand Tmvline4RevSubClampNearestRGBACommand; -typedef DrawerBlendCommand Tmvline4RevSubClampLinearSimpleRGBACommand; -typedef DrawerBlendCommand Tmvline4RevSubClampLinearRGBACommand; - -class Vlinec1RGBACommand : public DrawerWall1Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (_source2 == nullptr) - { - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); - *loop.dest = BlendBgra::copy(fg); - } while (loop.next()); - } - else - { - do - { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants); - *loop.dest = BlendBgra::copy(fg); - } while (loop.next()); - } - } -}; - -class Mvlinec1RGBACommand : public DrawerWall1Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (_source2 == nullptr) - { - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); - *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); - } while (loop.next()); - } - else - { - do - { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants); - *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); - } while (loop.next()); - } - } -}; - -class Tmvline1AddRGBACommand : public DrawerWall1Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } while (loop.next()); - } -}; - -class Tmvline1AddClampRGBACommand : public DrawerWall1Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } while (loop.next()); - } -}; - -class Tmvline1SubClampRGBACommand : public DrawerWall1Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); - *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } while (loop.next()); - } -}; - -class Tmvline1RevSubClampRGBACommand : public DrawerWall1Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); - *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } while (loop.next()); - } -}; - -///////////////////////////////////////////////////////////////////////////// - class DrawFogBoundaryLineRGBACommand : public DrawerCommand { int _y; @@ -3020,58 +1920,32 @@ void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawSpan_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#elif defined(NO_SSE) - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif } void R_DrawSpanMasked_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif } void R_DrawSpanTranslucent_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif } void R_DrawSpanMaskedTranslucent_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif } void R_DrawSpanAddClamp_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif } void R_DrawSpanMaskedAddClamp_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif } void R_FillSpan_rgba() @@ -3116,11 +1990,7 @@ void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BY DWORD vlinec1_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif return dc_texturefrac + dc_count * dc_iscale; } @@ -3139,116 +2009,72 @@ void queue_wallcommand() void vlinec4_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - queue_wallcommand(); -#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } DWORD mvlinec1_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif return dc_texturefrac + dc_count * dc_iscale; } void mvlinec4_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - queue_wallcommand(); -#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_add_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_add_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - queue_wallcommand(); -#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_addclamp_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_addclamp_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - queue_wallcommand(); -#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_subclamp_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_subclamp_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - queue_wallcommand(); -#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_revsubclamp_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_revsubclamp_rgba() { -#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -#else - queue_wallcommand(); -#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index e5fa88ed07..34b8741794 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -249,14 +249,6 @@ public: } else { - // [SP] Note: I've put in a hack here to throttle the speed of the rendering if - // the thread queue gets to big. This is one way to prevent too many commands - // going into the thread queue, which is causing crashes when there are too - // many threads (of which, there can be only as many as there are columns on - // the screen - guess what happens when you're too full of sprites!) - if (queue->commands.size() > r_multithreadedmax) - R_EndDrawerCommands(); - void *ptr = AllocMemory(sizeof(T)); if (!ptr) // Out of memory - render what we got { @@ -569,10 +561,6 @@ public: return (alpha << 24) | (red << 16) | (green << 8) | blue; } - -#ifndef NO_SSE - static __m128i samplertable[256 * 2]; -#endif }; ///////////////////////////////////////////////////////////////////////////// diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h deleted file mode 100644 index 4ee5576930..0000000000 --- a/src/r_draw_rgba_sse.h +++ /dev/null @@ -1,367 +0,0 @@ -// -// SSE/AVX intrinsics based drawers for the r_draw family of drawers. -// -// Note: This header file is intentionally not guarded by a __R_DRAW_RGBA_SSE__ define. -// It is because the code is nearly identical for SSE vs AVX. The file is included -// multiple times by r_draw_rgba.cpp with different defines that changes the class -// names outputted and the type of intrinsics used. - -#ifdef _MSC_VER -#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX -#endif - -class VecCommand(DrawSpanRGBA) : public DrawerCommand -{ - const uint32_t * RESTRICT _source; - fixed_t _xfrac; - fixed_t _yfrac; - fixed_t _xstep; - fixed_t _ystep; - int _x1; - int _x2; - int _y; - int _xbits; - int _ybits; - BYTE * RESTRICT _destorg; - fixed_t _light; - ShadeConstants _shade_constants; - bool _nearest_filter; - -public: - VecCommand(DrawSpanRGBA)() - { - _source = (const uint32_t*)ds_source; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _xstep = ds_xstep; - _ystep = ds_ystep; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xbits = ds_xbits; - _ybits = ds_ybits; - _destorg = dc_destorg; - _light = ds_light; - _shade_constants = ds_shade_constants; - _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); - } - - void Execute(DrawerThread *thread) override - { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - if (_nearest_filter) - { - if (_xbits == 6 && _ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - VEC_SHADE_VARS(); - VEC_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - else - { - VEC_SHADE_VARS(); - VEC_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = LightBgra::shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - VEC_SHADE_VARS(); - VEC_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; - } - } - else - { - VEC_SHADE_VARS(); - VEC_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = LightBgra::shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - } - else - { - if (_xbits == 6 && _ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - VEC_SHADE_VARS(); - VEC_SHADE_SIMPLE_INIT(light); - while (sse_count--) - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; - } - } - else - { - VEC_SHADE_VARS(); - VEC_SHADE_INIT(light, shade_constants); - while (sse_count--) - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; - } - } - - if (count == 0) - return; - - do - { - *dest++ = LightBgra::shade_bgra(SampleBgra::sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - VEC_SHADE_VARS(); - VEC_SHADE_SIMPLE_INIT(light); - while (sse_count--) - { - __m128i fg; - int tmpx = 32 - _xbits; - int tmpy = 32 - _ybits; - VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; - } - } - else - { - VEC_SHADE_VARS(); - VEC_SHADE_INIT(light, shade_constants); - while (sse_count--) - { - __m128i fg; - int tmpx = 32 - _xbits; - int tmpy = 32 - _ybits; - VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; - } - } - - if (count == 0) - return; - - do - { - *dest++ = LightBgra::shade_bgra(SampleBgra::sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - } - } -}; From f9795a25334f0deff827f17b29dac0158cbf5c34 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Mon, 3 Oct 2016 21:39:15 -0400 Subject: [PATCH 155/912] Revert "Remove linear sky again" This reverts commit 21390e91b8a28c71ba44bf62ee3c7545508a74e2. - also set r_linearsky to "false" by default. --- src/r_plane.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 0cfd51ca56..cfc30e1c7b 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -859,16 +859,30 @@ static DWORD lastskycol_bgra[4]; static int skycolplace; static int skycolplace_bgra; +CVAR(Bool, r_linearsky, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { - angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - int tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; + int tx; + if (r_linearsky) + { + angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); + angle_t column = (skyangle + xangle) ^ skyflip; + tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; + } + else + { + angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; + tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; + } if (!r_swtruecolor) return fronttex->GetColumn(tx, NULL); else + { return (const BYTE *)fronttex->GetColumnBgra(tx, NULL); + } } // Get a column of sky when there are two overlapping sky textures From b9a644e7622a10eea4c72a50f3108aa917948718 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Mon, 3 Oct 2016 21:41:37 -0400 Subject: [PATCH 156/912] Added menu option for Linear sky. --- wadsrc/static/menudef.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 5894072620..4a11462695 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -690,6 +690,7 @@ OptionMenu "VideoOptions" } Option "$DSPLYMNU_STRETCHSKY", "r_stretchsky", "OnOff" + Option "$DSPLYMNU_LINEARSKY", "r_linearsky", "OnOff" Option "$DSPLYMNU_DRAWFUZZ", "r_drawfuzz", "Fuzziness" Slider "$DSPLYMNU_TRANSSOUL", "transsouls", 0.25, 1.0, 0.05, 2 Option "$DSPLYMNU_FAKECONTRAST", "r_fakecontrast", "Contrast" From 4d1ea5c47727b1213015fd42cc2961b4159a1e63 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Mon, 3 Oct 2016 21:52:25 -0400 Subject: [PATCH 157/912] Added language entry for Linear skies. (oops) --- wadsrc/static/language.enu | 1 + 1 file changed, 1 insertion(+) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 89b38329cf..9aa3b74396 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1789,6 +1789,7 @@ DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen"; DSPLYMNU_PALLETEHACK = "DirectDraw palette hack"; // Not used DSPLYMNU_ATTACHEDSURFACES = "Use attached surfaces"; // Not used DSPLYMNU_STRETCHSKY = "Stretch short skies"; +DSPLYMNU_LINEARSKY = "Linear skies"; DSPLYMNU_DRAWFUZZ = "Use fuzz effect"; DSPLYMNU_TRANSSOUL = "Lost Soul translucency"; DSPLYMNU_FAKECONTRAST = "Use fake contrast"; From c1e859dbca3e6301bf570bf1bdc467f90c415d4e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 7 Oct 2016 03:38:43 +0200 Subject: [PATCH 158/912] Added codegen for column drawers --- .../fixedfunction/drawcolumncodegen.cpp | 147 +++- .../fixedfunction/drawcolumncodegen.h | 57 +- src/r_compiler/llvmdrawers.cpp | 86 +++ src/r_compiler/llvmdrawers.h | 51 ++ src/r_compiler/ssa/ssa_ubyte.cpp | 6 + src/r_compiler/ssa/ssa_ubyte.h | 4 + src/r_compiler/ssa/ssa_ubyte_ptr.cpp | 16 +- src/r_compiler/ssa/ssa_vec4i.cpp | 13 + src/r_compiler/ssa/ssa_vec4i.h | 1 + src/r_draw_rgba.cpp | 718 +++--------------- 10 files changed, 449 insertions(+), 650 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index 67d801162b..116744f1cb 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -11,6 +11,151 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args) +void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data) { + dest = args[0][0].load(); + source = args[0][1].load(); + colormap = args[0][2].load(); + translation = args[0][3].load(); + basecolors = args[0][4].load(); + pitch = args[0][5].load(); + count = args[0][6].load(); + dest_y = args[0][7].load(); + iscale = args[0][8].load(); + texturefrac = args[0][9].load(); + light = args[0][10].load(); + color = SSAVec4i::unpack(args[0][11].load()); + srccolor = SSAVec4i::unpack(args[0][12].load()); + srcalpha = args[0][13].load(); + destalpha = args[0][14].load(); + SSAShort light_alpha = args[0][15].load(); + SSAShort light_red = args[0][16].load(); + SSAShort light_green = args[0][17].load(); + SSAShort light_blue = args[0][18].load(); + SSAShort fade_alpha = args[0][19].load(); + SSAShort fade_red = args[0][20].load(); + SSAShort fade_green = args[0][21].load(); + SSAShort fade_blue = args[0][22].load(); + SSAShort desaturate = args[0][23].load(); + SSAInt flags = args[0][24].load(); + shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); + shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); + shade_constants.desaturate = desaturate.zext_int(); + + thread.core = thread_data[0][0].load(); + thread.num_cores = thread_data[0][1].load(); + thread.pass_start_y = thread_data[0][2].load(); + thread.pass_end_y = thread_data[0][3].load(); + + is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade); + + count = count_for_thread(dest_y, count, thread); + dest = dest_for_thread(dest_y, pitch, dest, thread); + pitch = pitch * thread.num_cores; + stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); + iscale = iscale * thread.num_cores; + + SSAIfBlock branch; + branch.if_block(is_simple_shade); + Loop(variant, true); + branch.else_block(); + Loop(variant, false); + branch.end_block(); +} + +void DrawColumnCodegen::Loop(DrawColumnVariant variant, bool isSimpleShade) +{ + stack_index.store(SSAInt(0)); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + SSAInt frac = stack_frac.load(); + + SSAInt offset = index * pitch * 4; + SSAVec4i bgcolor = dest[offset].load_vec4ub(); + + SSAInt alpha, inv_alpha; + SSAVec4i outcolor; + switch (variant) + { + default: + case DrawColumnVariant::Draw: + outcolor = blend_copy(Shade(ColormapSample(frac), isSimpleShade)); + break; + case DrawColumnVariant::DrawAdd: + case DrawColumnVariant::DrawAddClamp: + outcolor = blend_add(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::DrawShaded: + alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(frac), SSAInt(64)), SSAInt(0)) * 4; + inv_alpha = 256 - alpha; + outcolor = blend_add(color, bgcolor, alpha, inv_alpha); + break; + case DrawColumnVariant::DrawSubClamp: + outcolor = blend_sub(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::DrawRevSubClamp: + outcolor = blend_revsub(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::DrawTranslated: + outcolor = blend_copy(Shade(TranslateSample(frac), isSimpleShade)); + break; + case DrawColumnVariant::DrawTlatedAdd: + case DrawColumnVariant::DrawAddClampTranslated: + outcolor = blend_add(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::DrawSubClampTranslated: + outcolor = blend_sub(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::DrawRevSubClampTranslated: + outcolor = blend_revsub(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::Fill: + outcolor = blend_copy(color); + break; + case DrawColumnVariant::FillAdd: + alpha = srccolor[3]; + alpha = alpha + (alpha >> 7); + inv_alpha = 256 - alpha; + outcolor = blend_add(srccolor, bgcolor, alpha, inv_alpha); + break; + case DrawColumnVariant::FillAddClamp: + outcolor = blend_add(srccolor, bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::FillSubClamp: + outcolor = blend_sub(srccolor, bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::FillRevSubClamp: + outcolor = blend_revsub(srccolor, bgcolor, srcalpha, destalpha); + break; + } + + dest[offset].store_vec4ub(outcolor); + + stack_index.store(index + 1); + stack_frac.store(frac + iscale); + loop.end_block(); + } +} + +SSAInt DrawColumnCodegen::ColormapSample(SSAInt frac) +{ + SSAInt sample_index = frac >> FRACBITS; + return colormap[source[sample_index].load().zext_int()].load().zext_int(); +} + +SSAInt DrawColumnCodegen::TranslateSample(SSAInt frac) +{ + SSAInt sample_index = frac >> FRACBITS; + return translation[source[sample_index].load().zext_int()].load().zext_int(); +} + +SSAVec4i DrawColumnCodegen::Shade(SSAInt palIndex, bool isSimpleShade) +{ + if (isSimpleShade) + return shade_pal_index_simple(palIndex, light, basecolors); + else + return shade_pal_index_advanced(palIndex, light, shade_constants, basecolors); } diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h index 0749def7f5..488c36295f 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.h +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -5,22 +5,53 @@ enum class DrawColumnVariant { - Opaque, - Fuzz, - Add, - Translated, - TlatedAdd, - Shaded, - AddClamp, - AddClampTranslated, - SubClamp, - SubClampTranslated, - RevSubClamp, - RevSubClampTranslated + Fill, + FillAdd, + FillAddClamp, + FillSubClamp, + FillRevSubClamp, + Draw, + DrawAdd, + DrawTranslated, + DrawTlatedAdd, + DrawShaded, + DrawAddClamp, + DrawAddClampTranslated, + DrawSubClamp, + DrawSubClampTranslated, + DrawRevSubClamp, + DrawRevSubClampTranslated }; class DrawColumnCodegen : public DrawerCodegen { public: - void Generate(DrawColumnVariant variant, SSAValue args); + void Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data); + +private: + void Loop(DrawColumnVariant variant, bool isSimpleShade); + SSAInt ColormapSample(SSAInt frac); + SSAInt TranslateSample(SSAInt frac); + SSAVec4i Shade(SSAInt palIndex, bool isSimpleShade); + + SSAStack stack_index, stack_frac; + + SSAUBytePtr dest; + SSAUBytePtr source; + SSAUBytePtr colormap; + SSAUBytePtr translation; + SSAUBytePtr basecolors; + SSAInt pitch; + SSAInt count; + SSAInt dest_y; + SSAInt iscale; + SSAInt texturefrac; + SSAInt light; + SSAVec4i color; + SSAVec4i srccolor; + SSAInt srcalpha; + SSAInt destalpha; + SSABool is_simple_shade; + SSAShadeConstants shade_constants; + SSAWorkerThread thread; }; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 7691af35b1..6ab4f5a4fd 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -47,9 +47,11 @@ public: LLVMDrawersImpl(); private: + void CodegenDrawColumn(const char *name, DrawColumnVariant variant); void CodegenDrawSpan(const char *name, DrawSpanVariant variant); void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); + static llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); @@ -82,6 +84,22 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { + CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill); + CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd); + CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp); + CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp); + CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp); + CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw); + CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd); + CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated); + CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd); + CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded); + CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp); + CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated); + CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp); + CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated); + CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp); + CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated); CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); @@ -104,6 +122,22 @@ LLVMDrawersImpl::LLVMDrawersImpl() mProgram.engine()->finalizeObject(); mProgram.modulePassManager()->run(*mProgram.module()); + FillColumn = mProgram.GetProcAddress("FillColumn"); + FillColumnAdd = mProgram.GetProcAddress("FillColumnAdd"); + FillColumnAddClamp = mProgram.GetProcAddress("FillColumnAddClamp"); + FillColumnSubClamp = mProgram.GetProcAddress("FillColumnSubClamp"); + FillColumnRevSubClamp = mProgram.GetProcAddress("FillColumnRevSubClamp"); + DrawColumn = mProgram.GetProcAddress("DrawColumn"); + DrawColumnAdd = mProgram.GetProcAddress("DrawColumnAdd"); + DrawColumnTranslated = mProgram.GetProcAddress("DrawColumnTranslated"); + DrawColumnTlatedAdd = mProgram.GetProcAddress("DrawColumnTlatedAdd"); + DrawColumnShaded = mProgram.GetProcAddress("DrawColumnShaded"); + DrawColumnAddClamp = mProgram.GetProcAddress("DrawColumnAddClamp"); + DrawColumnAddClampTranslated = mProgram.GetProcAddress("DrawColumnAddClampTranslated"); + DrawColumnSubClamp = mProgram.GetProcAddress("DrawColumnSubClamp"); + DrawColumnSubClampTranslated = mProgram.GetProcAddress("DrawColumnSubClampTranslated"); + DrawColumnRevSubClamp = mProgram.GetProcAddress("DrawColumnRevSubClamp"); + DrawColumnRevSubClampTranslated = mProgram.GetProcAddress("DrawColumnRevSubClampTranslated"); DrawSpan = mProgram.GetProcAddress("DrawSpan"); DrawSpanMasked = mProgram.GetProcAddress("DrawSpanMasked"); DrawSpanTranslucent = mProgram.GetProcAddress("DrawSpanTranslucent"); @@ -126,6 +160,27 @@ LLVMDrawersImpl::LLVMDrawersImpl() mProgram.StopLogFatalErrors(); } +void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant variant) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name); + function.add_parameter(GetDrawColumnArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawColumnCodegen codegen; + codegen.Generate(variant, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); + + mProgram.functionPassManager()->run(*function.func); +} + void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) { llvm::IRBuilder<> builder(mProgram.context()); @@ -167,6 +222,37 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, mProgram.functionPassManager()->run(*function.func); } +llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srccolor; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) { std::vector elements; diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index b1039cf496..2ce4c52306 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -77,6 +77,40 @@ struct DrawSpanArgs }; }; +struct DrawColumnArgs +{ + uint32_t *dest; + const uint8_t *source; + uint8_t *colormap; + uint8_t *translation; + const uint32_t *basecolors; + int32_t pitch; + int32_t count; + int32_t dest_y; + uint32_t iscale; + uint32_t texturefrac; + uint32_t light; + uint32_t color; + uint32_t srccolor; + uint32_t srcalpha; + uint32_t destalpha; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1 + }; +}; + class LLVMDrawers { public: @@ -86,6 +120,23 @@ public: static void Destroy(); static LLVMDrawers *Instance(); + void(*DrawColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnTlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnShaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnAddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*FillColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*FillColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*FillColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*FillColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*FillColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawSpan)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; void(*DrawSpanTranslucent)(const DrawSpanArgs *) = nullptr; diff --git a/src/r_compiler/ssa/ssa_ubyte.cpp b/src/r_compiler/ssa/ssa_ubyte.cpp index 3204d064d1..6fe9c3bb16 100644 --- a/src/r_compiler/ssa/ssa_ubyte.cpp +++ b/src/r_compiler/ssa/ssa_ubyte.cpp @@ -1,6 +1,7 @@ #include "r_compiler/llvm_include.h" #include "ssa_ubyte.h" +#include "ssa_int.h" #include "ssa_scope.h" SSAUByte::SSAUByte() @@ -24,6 +25,11 @@ llvm::Type *SSAUByte::llvm_type() return llvm::Type::getInt8Ty(SSAScope::context()); } +SSAInt SSAUByte::zext_int() +{ + return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint())); +} + SSAUByte operator+(const SSAUByte &a, const SSAUByte &b) { return SSAUByte::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_ubyte.h b/src/r_compiler/ssa/ssa_ubyte.h index ef878b3259..41ed3939be 100644 --- a/src/r_compiler/ssa/ssa_ubyte.h +++ b/src/r_compiler/ssa/ssa_ubyte.h @@ -4,6 +4,8 @@ namespace llvm { class Value; } namespace llvm { class Type; } +class SSAInt; + class SSAUByte { public: @@ -13,6 +15,8 @@ public: static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); } static llvm::Type *llvm_type(); + SSAInt zext_int(); + llvm::Value *v; }; diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp index 98bf27c462..34de0ab889 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -30,22 +30,8 @@ SSAUByte SSAUBytePtr::load() const SSAVec4i SSAUBytePtr::load_vec4ub() const { - // _mm_cvtsi32_si128 as implemented by clang: SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint())); - llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint()); - SSAVec4i v4i = SSAVec4i::from_llvm(v); - - SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), SSAVec16ub((unsigned char)0), 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 - return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16 -/* - llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); - llvm::Type *m4xint32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); - llvm::Value *v4ub = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false, SSAScope::hint()); - return SSAVec4i::from_llvm(SSAScope::builder().CreateZExt(v4ub, m4xint32type)); -*/ + return SSAVec4i::unpack(i32); } SSAVec16ub SSAUBytePtr::load_vec16ub() const diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp index 3b508412f3..3be0ec194a 100644 --- a/src/r_compiler/ssa/ssa_vec4i.cpp +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -97,6 +97,19 @@ llvm::Type *SSAVec4i::llvm_type() return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); } +SSAVec4i SSAVec4i::unpack(SSAInt i32) +{ + // _mm_cvtsi32_si128 as implemented by clang: + llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint()); + SSAVec4i v4i = SSAVec4i::from_llvm(v); + + SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), SSAVec16ub((unsigned char)0), 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7)); // _mm_unpacklo_epi8 + return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16 +} + SSAVec4i SSAVec4i::bitcast(SSAVec4f f32) { return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(f32.v, llvm_type(), SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h index 89cda16465..f8ef92f1e2 100644 --- a/src/r_compiler/ssa/ssa_vec4i.h +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -24,6 +24,7 @@ public: SSAVec4i insert(SSAInt index, SSAInt value); SSAVec4i insert(int index, SSAInt value); SSAVec4i insert(int index, int value); + static SSAVec4i unpack(SSAInt value); static SSAVec4i bitcast(SSAVec4f f32); static SSAVec4i bitcast(SSAVec8s i16); static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index c5b1b478e6..7da2f183f8 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -396,56 +396,6 @@ public: } }; -class DrawWallMasked4LLVMCommand : public DrawWall4LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->mvlinec4(&args, &d); - } -}; - -class DrawWallAdd4LLVMCommand : public DrawWall4LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline4_add(&args, &d); - } -}; - -class DrawWallAddClamp4LLVMCommand : public DrawWall4LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline4_addclamp(&args, &d); - } -}; - -class DrawWallSubClamp4LLVMCommand : public DrawWall4LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline4_subclamp(&args, &d); - } -}; - -class DrawWallRevSubClamp4LLVMCommand : public DrawWall4LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline4_revsubclamp(&args, &d); - } -}; - class DrawWall1LLVMCommand : public DrawerCommand { protected: @@ -500,575 +450,101 @@ public: } }; -class DrawWallMasked1LLVMCommand : public DrawWall1LLVMCommand +class DrawColumnLLVMCommand : public DrawerCommand { +protected: + DrawColumnArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + return d; + } + public: + DrawColumnLLVMCommand() + { + args.dest = (uint32_t*)dc_dest; + args.source = dc_source; + args.colormap = dc_colormap; + args.translation = dc_translation; + args.basecolors = (const uint32_t *)GPalette.BaseColors; + args.pitch = dc_pitch; + args.count = dc_count; + args.dest_y = _dest_y; + args.iscale = dc_iscale; + args.texturefrac = dc_texturefrac; + args.light = LightBgra::calc_light_multiplier(dc_light); + args.color = LightBgra::shade_pal_index_simple(dc_color, args.light); + args.srccolor = dc_srccolor_bgra; + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.light_red = dc_shade_constants.light_red; + args.light_green = dc_shade_constants.light_green; + args.light_blue = dc_shade_constants.light_blue; + args.light_alpha = dc_shade_constants.light_alpha; + args.fade_red = dc_shade_constants.fade_red; + args.fade_green = dc_shade_constants.fade_green; + args.fade_blue = dc_shade_constants.fade_blue; + args.fade_alpha = dc_shade_constants.fade_alpha; + args.desaturate = dc_shade_constants.desaturate; + args.flags = 0; + if (dc_shade_constants.simple_shade) + args.flags |= DrawColumnArgs::simple_shade; + } + void Execute(DrawerThread *thread) override { WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->mvlinec1(&args, &d); + LLVMDrawers::Instance()->DrawColumn(&args, &d); } }; -class DrawWallAdd1LLVMCommand : public DrawWall1LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline1_add(&args, &d); - } +#define DECLARE_DRAW_COMMAND(name, func, base) \ +class name##LLVMCommand : public base \ +{ \ +public: \ + void Execute(DrawerThread *thread) override \ + { \ + WorkerThreadData d = ThreadData(thread); \ + LLVMDrawers::Instance()->func(&args, &d); \ + } \ }; -class DrawWallAddClamp1LLVMCommand : public DrawWall1LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline1_addclamp(&args, &d); - } -}; +//DECLARE_DRAW_COMMAND(name, func, DrawSpanLLVMCommand); -class DrawWallSubClamp1LLVMCommand : public DrawWall1LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline1_subclamp(&args, &d); - } -}; - -class DrawWallRevSubClamp1LLVMCommand : public DrawWall1LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline1_revsubclamp(&args, &d); - } -}; +DECLARE_DRAW_COMMAND(DrawWallMasked4, mvlinec4, DrawWall4LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallAdd4, tmvline4_add, DrawWall4LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallAddClamp4, tmvline4_addclamp, DrawWall4LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallSubClamp4, tmvline4_subclamp, DrawWall4LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallRevSubClamp4, tmvline4_revsubclamp, DrawWall4LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallSubClamp1, tmvline1_subclamp, DrawWall1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallRevSubClamp1, tmvline1_revsubclamp, DrawWall1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnAdd, DrawColumnAdd, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnTranslated, DrawColumnTranslated, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnTlatedAdd, DrawColumnTlatedAdd, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnShaded, DrawColumnShaded, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnAddClamp, DrawColumnAddClamp, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnAddClampTranslated, DrawColumnAddClampTranslated, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnSubClamp, DrawColumnSubClamp, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnSubClampTranslated, DrawColumnSubClampTranslated, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRevSubClamp, DrawColumnRevSubClamp, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRevSubClampTranslated, DrawColumnRevSubClampTranslated, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(FillColumn, FillColumn, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(FillColumnAdd, FillColumnAdd, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(FillColumnAddClamp, FillColumnAddClamp, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand); ///////////////////////////////////////////////////////////////////////////// -class DrawerColumnCommand : public DrawerCommand -{ -public: - int _count; - BYTE * RESTRICT _dest; - int _pitch; - DWORD _iscale; - DWORD _texturefrac; - - DrawerColumnCommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _pitch = dc_pitch; - } - - class LoopIterator - { - public: - int count; - uint32_t *dest; - int pitch; - fixed_t fracstep; - fixed_t frac; - - LoopIterator(DrawerColumnCommand *command, DrawerThread *thread) - { - count = thread->count_for_thread(command->_dest_y, command->_count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); - pitch = command->_pitch * thread->num_cores; - - fracstep = command->_iscale * thread->num_cores; - frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); - } - - uint32_t sample_index() - { - return frac >> FRACBITS; - } - - explicit operator bool() - { - return count > 0; - } - - bool next() - { - dest += pitch; - frac += fracstep; - return (--count) != 0; - } - }; -}; - -class DrawColumnRGBACommand : public DrawerColumnCommand -{ - uint32_t _light; - const BYTE * RESTRICT _source; - ShadeConstants _shade_constants; - BYTE * RESTRICT _colormap; - -public: - DrawColumnRGBACommand() - { - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _source = dc_source; - _colormap = dc_colormap; - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_colormap[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::copy(fg); - } while (loop.next()); - } -}; - -class FillColumnRGBACommand : public DrawerColumnCommand -{ - uint32_t _color; - -public: - FillColumnRGBACommand() - { - uint32_t light = LightBgra::calc_light_multiplier(dc_light); - _color = LightBgra::shade_pal_index_simple(dc_color, light); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - *loop.dest = BlendBgra::copy(_color); - } while (loop.next()); - } -}; - -class FillAddColumnRGBACommand : public DrawerColumnCommand -{ - uint32_t _srccolor; - -public: - FillAddColumnRGBACommand() - { - _srccolor = dc_srccolor_bgra; - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - uint32_t alpha = APART(_srccolor); - alpha += alpha >> 7; - - do - { - *loop.dest = BlendBgra::add(_srccolor, *loop.dest, alpha, 256 - alpha); - } while (loop.next()); - } -}; - -class FillAddClampColumnRGBACommand : public DrawerColumnCommand -{ - int _color; - uint32_t _srccolor; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - FillAddClampColumnRGBACommand() - { - _color = dc_color; - _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - *loop.dest = BlendBgra::add(_srccolor, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class FillSubClampColumnRGBACommand : public DrawerColumnCommand -{ - uint32_t _srccolor; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - FillSubClampColumnRGBACommand() - { - _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - *loop.dest = BlendBgra::sub(_srccolor, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class FillRevSubClampColumnRGBACommand : public DrawerColumnCommand -{ - uint32_t _srccolor; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - FillRevSubClampColumnRGBACommand() - { - _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - *loop.dest = BlendBgra::revsub(_srccolor, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawAddColumnRGBACommand : public DrawerColumnCommand -{ - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - BYTE * RESTRICT _colormap; - -public: - DrawAddColumnRGBACommand() - { - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - _colormap = dc_colormap; - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_colormap[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawTranslatedColumnRGBACommand : public DrawerColumnCommand -{ - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - -public: - DrawTranslatedColumnRGBACommand() - { - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _translation = dc_translation; - _source = dc_source; - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::copy(fg); - } while (loop.next()); - } -}; - -class DrawTlatedAddColumnRGBACommand : public DrawerColumnCommand -{ - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - DrawTlatedAddColumnRGBACommand() - { - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _translation = dc_translation; - _source = dc_source; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawShadedColumnRGBACommand : public DrawerColumnCommand -{ -private: - const BYTE * RESTRICT _source; - lighttable_t * RESTRICT _colormap; - uint32_t _color; - -public: - DrawShadedColumnRGBACommand() - { - _source = dc_source; - _colormap = dc_colormap; - _color = LightBgra::shade_pal_index_simple(dc_color, LightBgra::calc_light_multiplier(dc_light)); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t alpha = clamp(_colormap[_source[loop.sample_index()]], 0, 64) * 4; - uint32_t inv_alpha = 256 - alpha; - *loop.dest = BlendBgra::add(_color, *loop.dest, alpha, inv_alpha); - } while (loop.next()); - } -}; - -class DrawAddClampColumnRGBACommand : public DrawerColumnCommand -{ - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - DrawAddClampColumnRGBACommand() - { - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawAddClampTranslatedColumnRGBACommand : public DrawerColumnCommand -{ - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - DrawAddClampTranslatedColumnRGBACommand() - { - _translation = dc_translation; - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawSubClampColumnRGBACommand : public DrawerColumnCommand -{ - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - DrawSubClampColumnRGBACommand() - { - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); - *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawSubClampTranslatedColumnRGBACommand : public DrawerColumnCommand -{ - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - BYTE * RESTRICT _translation; - -public: - DrawSubClampTranslatedColumnRGBACommand() - { - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - _translation = dc_translation; - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawRevSubClampColumnRGBACommand : public DrawerColumnCommand -{ - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - DrawRevSubClampColumnRGBACommand() - { - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); - *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerColumnCommand -{ - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - BYTE * RESTRICT _translation; - -public: - DrawRevSubClampTranslatedColumnRGBACommand() - { - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - _translation = dc_translation; - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - class DrawFuzzColumnRGBACommand : public DrawerCommand { int _x; @@ -1830,32 +1306,32 @@ void R_EndDrawerCommands() void R_DrawColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_FillColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_FillAddColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_FillAddClampColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_FillSubClampColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_FillRevSubClampColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawFuzzColumn_rgba() @@ -1870,52 +1346,52 @@ void R_DrawFuzzColumn_rgba() void R_DrawAddColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawTranslatedColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawTlatedAddColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawShadedColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawAddClampColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawAddClampTranslatedColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawSubClampColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawSubClampTranslatedColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawRevSubClampColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawRevSubClampTranslatedColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawSpan_rgba() From 584220edf01cc776169c4524295943276b0a0433 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 7 Oct 2016 04:01:38 +0200 Subject: [PATCH 159/912] Move DrawerCommandQueue to its own file --- src/CMakeLists.txt | 1 + src/r_draw_rgba.cpp | 184 ----------------------------------------- src/r_draw_rgba.h | 165 ++----------------------------------- src/r_thread.cpp | 196 ++++++++++++++++++++++++++++++++++++++++++++ src/r_thread.h | 157 +++++++++++++++++++++++++++++++++++ 5 files changed, 359 insertions(+), 344 deletions(-) create mode 100644 src/r_thread.cpp create mode 100644 src/r_thread.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index acbec1612b..9f2ee2e28c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1033,6 +1033,7 @@ set( FASTMATH_PCH_SOURCES r_draw_rgba.cpp r_drawt.cpp r_drawt_rgba.cpp + r_thread.cpp r_main.cpp r_plane.cpp r_segs.cpp diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 7da2f183f8..43075d0a64 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -50,9 +50,6 @@ extern float rw_light; extern float rw_lightstep; extern int wallshade; -// Use multiple threads when drawing -CVAR(Bool, r_multithreaded, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); - // Use linear filtering when scaling up CVAR(Bool, r_magfilter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); @@ -64,177 +61,6 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); ///////////////////////////////////////////////////////////////////////////// -DrawerCommandQueue *DrawerCommandQueue::Instance() -{ - static DrawerCommandQueue queue; - return &queue; -} - -DrawerCommandQueue::DrawerCommandQueue() -{ -} - -DrawerCommandQueue::~DrawerCommandQueue() -{ - StopThreads(); -} - -void* DrawerCommandQueue::AllocMemory(size_t size) -{ - // Make sure allocations remain 16-byte aligned - size = (size + 15) / 16 * 16; - - auto queue = Instance(); - if (queue->memorypool_pos + size > memorypool_size) - return nullptr; - - void *data = queue->memorypool + queue->memorypool_pos; - queue->memorypool_pos += size; - return data; -} - -void DrawerCommandQueue::Begin() -{ - auto queue = Instance(); - queue->Finish(); - queue->threaded_render++; -} - -void DrawerCommandQueue::End() -{ - auto queue = Instance(); - queue->Finish(); - if (queue->threaded_render > 0) - queue->threaded_render--; -} - -void DrawerCommandQueue::WaitForWorkers() -{ - Instance()->Finish(); -} - -void DrawerCommandQueue::Finish() -{ - auto queue = Instance(); - if (queue->commands.empty()) - return; - - // Give worker threads something to do: - - std::unique_lock start_lock(queue->start_mutex); - queue->active_commands.swap(queue->commands); - queue->run_id++; - start_lock.unlock(); - - queue->StartThreads(); - queue->start_condition.notify_all(); - - // Do one thread ourselves: - - DrawerThread thread; - thread.core = 0; - thread.num_cores = (int)(queue->threads.size() + 1); - - for (int pass = 0; pass < queue->num_passes; pass++) - { - thread.pass_start_y = pass * queue->rows_in_pass; - thread.pass_end_y = (pass + 1) * queue->rows_in_pass; - if (pass + 1 == queue->num_passes) - thread.pass_end_y = MAX(thread.pass_end_y, MAXHEIGHT); - - size_t size = queue->active_commands.size(); - for (size_t i = 0; i < size; i++) - { - auto &command = queue->active_commands[i]; - command->Execute(&thread); - } - } - - // Wait for everyone to finish: - - std::unique_lock end_lock(queue->end_mutex); - queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); - - // Clean up batch: - - for (auto &command : queue->active_commands) - command->~DrawerCommand(); - queue->active_commands.clear(); - queue->memorypool_pos = 0; - queue->finished_threads = 0; -} - -void DrawerCommandQueue::StartThreads() -{ - if (!threads.empty()) - return; - - int num_threads = std::thread::hardware_concurrency(); - if (num_threads == 0) - num_threads = 4; - - threads.resize(num_threads - 1); - - for (int i = 0; i < num_threads - 1; i++) - { - DrawerCommandQueue *queue = this; - DrawerThread *thread = &threads[i]; - thread->core = i + 1; - thread->num_cores = num_threads; - thread->thread = std::thread([=]() - { - int run_id = 0; - while (true) - { - // Wait until we are signalled to run: - std::unique_lock start_lock(queue->start_mutex); - queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; }); - if (queue->shutdown_flag) - break; - run_id = queue->run_id; - start_lock.unlock(); - - // Do the work: - for (int pass = 0; pass < queue->num_passes; pass++) - { - thread->pass_start_y = pass * queue->rows_in_pass; - thread->pass_end_y = (pass + 1) * queue->rows_in_pass; - if (pass + 1 == queue->num_passes) - thread->pass_end_y = MAX(thread->pass_end_y, MAXHEIGHT); - - size_t size = queue->active_commands.size(); - for (size_t i = 0; i < size; i++) - { - auto &command = queue->active_commands[i]; - command->Execute(thread); - } - } - - // Notify main thread that we finished: - std::unique_lock end_lock(queue->end_mutex); - queue->finished_threads++; - end_lock.unlock(); - queue->end_condition.notify_all(); - } - }); - } -} - -void DrawerCommandQueue::StopThreads() -{ - std::unique_lock lock(start_mutex); - shutdown_flag = true; - lock.unlock(); - start_condition.notify_all(); - for (auto &thread : threads) - thread.thread.join(); - threads.clear(); - lock.lock(); - shutdown_flag = false; -} - -///////////////////////////////////////////////////////////////////////////// - class DrawSpanLLVMCommand : public DrawerCommand { protected: @@ -1294,16 +1120,6 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// -void R_BeginDrawerCommands() -{ - DrawerCommandQueue::Begin(); -} - -void R_EndDrawerCommands() -{ - DrawerCommandQueue::End(); -} - void R_DrawColumn_rgba() { DrawerCommandQueue::QueueCommand(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 34b8741794..8b704c0a64 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -25,16 +25,16 @@ #include "r_draw.h" #include "v_palette.h" -#include -#include -#include -#include -#include +#include "r_thread.h" #ifndef NO_SSE #include #endif +struct FSpecialColormap; + +EXTERN_CVAR(Bool, r_mipmap) + ///////////////////////////////////////////////////////////////////////////// // Drawer functions: @@ -118,161 +118,6 @@ void tmvline4_revsubclamp_rgba(); void R_FillColumnHoriz_rgba(); void R_FillSpan_rgba(); -///////////////////////////////////////////////////////////////////////////// -// Multithreaded rendering infrastructure: - -// Redirect drawer commands to worker threads -void R_BeginDrawerCommands(); - -// Wait until all drawers finished executing -void R_EndDrawerCommands(); - -struct FSpecialColormap; -class DrawerCommandQueue; - -// Worker data for each thread executing drawer commands -class DrawerThread -{ -public: - std::thread thread; - - // Thread line index of this thread - int core = 0; - - // Number of active threads - int num_cores = 1; - - // Range of rows processed this pass - int pass_start_y = 0; - int pass_end_y = MAXHEIGHT; - - uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; - uint32_t *dc_temp_rgba; - - // Checks if a line is rendered by this thread - bool line_skipped_by_thread(int line) - { - return line < pass_start_y || line >= pass_end_y || line % num_cores != core; - } - - // The number of lines to skip to reach the first line to be rendered by this thread - int skipped_by_thread(int first_line) - { - int pass_skip = MAX(pass_start_y - first_line, 0); - int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; - return pass_skip + core_skip; - } - - // The number of lines to be rendered by this thread - int count_for_thread(int first_line, int count) - { - int lines_until_pass_end = MAX(pass_end_y - first_line, 0); - count = MIN(count, lines_until_pass_end); - int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; - return MAX(c, 0); - } - - // Calculate the dest address for the first line to be rendered by this thread - uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) - { - return dest + skipped_by_thread(first_line) * pitch; - } -}; - -// Task to be executed by each worker thread -class DrawerCommand -{ -protected: - int _dest_y; - -public: - DrawerCommand() - { - _dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); - } - - virtual void Execute(DrawerThread *thread) = 0; -}; - -EXTERN_CVAR(Bool, r_multithreaded) -EXTERN_CVAR(Bool, r_mipmap) -EXTERN_CVAR(Int, r_multithreadedmax) - -// Manages queueing up commands and executing them on worker threads -class DrawerCommandQueue -{ - enum { memorypool_size = 16 * 1024 * 1024 }; - char memorypool[memorypool_size]; - size_t memorypool_pos = 0; - - std::vector commands; - - std::vector threads; - - std::mutex start_mutex; - std::condition_variable start_condition; - std::vector active_commands; - bool shutdown_flag = false; - int run_id = 0; - - std::mutex end_mutex; - std::condition_variable end_condition; - size_t finished_threads = 0; - - int threaded_render = 0; - DrawerThread single_core_thread; - int num_passes = 1; - int rows_in_pass = MAXHEIGHT; - - void StartThreads(); - void StopThreads(); - void Finish(); - - static DrawerCommandQueue *Instance(); - - DrawerCommandQueue(); - ~DrawerCommandQueue(); - -public: - // Allocate memory valid for the duration of a command execution - static void* AllocMemory(size_t size); - - // Queue command to be executed by drawer worker threads - template - static void QueueCommand(Types &&... args) - { - auto queue = Instance(); - if (queue->threaded_render == 0 || !r_multithreaded) - { - T command(std::forward(args)...); - command.Execute(&queue->single_core_thread); - } - else - { - void *ptr = AllocMemory(sizeof(T)); - if (!ptr) // Out of memory - render what we got - { - queue->Finish(); - ptr = AllocMemory(sizeof(T)); - if (!ptr) - return; - } - T *command = new (ptr)T(std::forward(args)...); - queue->commands.push_back(command); - } - } - - // Redirects all drawing commands to worker threads until End is called - // Begin/End blocks can be nested. - static void Begin(); - - // End redirection and wait until all worker threads finished executing - static void End(); - - // Waits until all worker threads finished executing - static void WaitForWorkers(); -}; - ///////////////////////////////////////////////////////////////////////////// // Drawer commands: diff --git a/src/r_thread.cpp b/src/r_thread.cpp new file mode 100644 index 0000000000..dec0b8c6ca --- /dev/null +++ b/src/r_thread.cpp @@ -0,0 +1,196 @@ + +#include +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_thread.h" + +CVAR(Bool, r_multithreaded, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +void R_BeginDrawerCommands() +{ + DrawerCommandQueue::Begin(); +} + +void R_EndDrawerCommands() +{ + DrawerCommandQueue::End(); +} + +///////////////////////////////////////////////////////////////////////////// + +DrawerCommandQueue *DrawerCommandQueue::Instance() +{ + static DrawerCommandQueue queue; + return &queue; +} + +DrawerCommandQueue::DrawerCommandQueue() +{ +} + +DrawerCommandQueue::~DrawerCommandQueue() +{ + StopThreads(); +} + +void* DrawerCommandQueue::AllocMemory(size_t size) +{ + // Make sure allocations remain 16-byte aligned + size = (size + 15) / 16 * 16; + + auto queue = Instance(); + if (queue->memorypool_pos + size > memorypool_size) + return nullptr; + + void *data = queue->memorypool + queue->memorypool_pos; + queue->memorypool_pos += size; + return data; +} + +void DrawerCommandQueue::Begin() +{ + auto queue = Instance(); + queue->Finish(); + queue->threaded_render++; +} + +void DrawerCommandQueue::End() +{ + auto queue = Instance(); + queue->Finish(); + if (queue->threaded_render > 0) + queue->threaded_render--; +} + +void DrawerCommandQueue::WaitForWorkers() +{ + Instance()->Finish(); +} + +void DrawerCommandQueue::Finish() +{ + auto queue = Instance(); + if (queue->commands.empty()) + return; + + // Give worker threads something to do: + + std::unique_lock start_lock(queue->start_mutex); + queue->active_commands.swap(queue->commands); + queue->run_id++; + start_lock.unlock(); + + queue->StartThreads(); + queue->start_condition.notify_all(); + + // Do one thread ourselves: + + DrawerThread thread; + thread.core = 0; + thread.num_cores = (int)(queue->threads.size() + 1); + + for (int pass = 0; pass < queue->num_passes; pass++) + { + thread.pass_start_y = pass * queue->rows_in_pass; + thread.pass_end_y = (pass + 1) * queue->rows_in_pass; + if (pass + 1 == queue->num_passes) + thread.pass_end_y = MAX(thread.pass_end_y, MAXHEIGHT); + + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(&thread); + } + } + + // Wait for everyone to finish: + + std::unique_lock end_lock(queue->end_mutex); + queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); + + // Clean up batch: + + for (auto &command : queue->active_commands) + command->~DrawerCommand(); + queue->active_commands.clear(); + queue->memorypool_pos = 0; + queue->finished_threads = 0; +} + +void DrawerCommandQueue::StartThreads() +{ + if (!threads.empty()) + return; + + int num_threads = std::thread::hardware_concurrency(); + if (num_threads == 0) + num_threads = 4; + + threads.resize(num_threads - 1); + + for (int i = 0; i < num_threads - 1; i++) + { + DrawerCommandQueue *queue = this; + DrawerThread *thread = &threads[i]; + thread->core = i + 1; + thread->num_cores = num_threads; + thread->thread = std::thread([=]() + { + int run_id = 0; + while (true) + { + // Wait until we are signalled to run: + std::unique_lock start_lock(queue->start_mutex); + queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; }); + if (queue->shutdown_flag) + break; + run_id = queue->run_id; + start_lock.unlock(); + + // Do the work: + for (int pass = 0; pass < queue->num_passes; pass++) + { + thread->pass_start_y = pass * queue->rows_in_pass; + thread->pass_end_y = (pass + 1) * queue->rows_in_pass; + if (pass + 1 == queue->num_passes) + thread->pass_end_y = MAX(thread->pass_end_y, MAXHEIGHT); + + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(thread); + } + } + + // Notify main thread that we finished: + std::unique_lock end_lock(queue->end_mutex); + queue->finished_threads++; + end_lock.unlock(); + queue->end_condition.notify_all(); + } + }); + } +} + +void DrawerCommandQueue::StopThreads() +{ + std::unique_lock lock(start_mutex); + shutdown_flag = true; + lock.unlock(); + start_condition.notify_all(); + for (auto &thread : threads) + thread.thread.join(); + threads.clear(); + lock.lock(); + shutdown_flag = false; +} diff --git a/src/r_thread.h b/src/r_thread.h new file mode 100644 index 0000000000..312c5ad226 --- /dev/null +++ b/src/r_thread.h @@ -0,0 +1,157 @@ + +#pragma once + +#include "r_draw.h" +#include +#include +#include +#include +#include + +// Use multiple threads when drawing +EXTERN_CVAR(Bool, r_multithreaded) + +// Redirect drawer commands to worker threads +void R_BeginDrawerCommands(); + +// Wait until all drawers finished executing +void R_EndDrawerCommands(); + +// Worker data for each thread executing drawer commands +class DrawerThread +{ +public: + std::thread thread; + + // Thread line index of this thread + int core = 0; + + // Number of active threads + int num_cores = 1; + + // Range of rows processed this pass + int pass_start_y = 0; + int pass_end_y = MAXHEIGHT; + + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba; + + // Checks if a line is rendered by this thread + bool line_skipped_by_thread(int line) + { + return line < pass_start_y || line >= pass_end_y || line % num_cores != core; + } + + // The number of lines to skip to reach the first line to be rendered by this thread + int skipped_by_thread(int first_line) + { + int pass_skip = MAX(pass_start_y - first_line, 0); + int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; + return pass_skip + core_skip; + } + + // The number of lines to be rendered by this thread + int count_for_thread(int first_line, int count) + { + int lines_until_pass_end = MAX(pass_end_y - first_line, 0); + count = MIN(count, lines_until_pass_end); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); + } + + // Calculate the dest address for the first line to be rendered by this thread + uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) + { + return dest + skipped_by_thread(first_line) * pitch; + } +}; + +// Task to be executed by each worker thread +class DrawerCommand +{ +protected: + int _dest_y; + +public: + DrawerCommand() + { + _dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + } + + virtual void Execute(DrawerThread *thread) = 0; +}; + +// Manages queueing up commands and executing them on worker threads +class DrawerCommandQueue +{ + enum { memorypool_size = 16 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + std::vector threads; + + std::mutex start_mutex; + std::condition_variable start_condition; + std::vector active_commands; + bool shutdown_flag = false; + int run_id = 0; + + std::mutex end_mutex; + std::condition_variable end_condition; + size_t finished_threads = 0; + + int threaded_render = 0; + DrawerThread single_core_thread; + int num_passes = 1; + int rows_in_pass = MAXHEIGHT; + + void StartThreads(); + void StopThreads(); + void Finish(); + + static DrawerCommandQueue *Instance(); + + DrawerCommandQueue(); + ~DrawerCommandQueue(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + auto queue = Instance(); + if (queue->threaded_render == 0 || !r_multithreaded) + { + T command(std::forward(args)...); + command.Execute(&queue->single_core_thread); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) // Out of memory - render what we got + { + queue->Finish(); + ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + } + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } + } + + // Redirects all drawing commands to worker threads until End is called + // Begin/End blocks can be nested. + static void Begin(); + + // End redirection and wait until all worker threads finished executing + static void End(); + + // Waits until all worker threads finished executing + static void WaitForWorkers(); +}; From 9dc51b27437c6221d1ede9d15808590d9a2a3c58 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Thu, 6 Oct 2016 08:50:46 +0200 Subject: [PATCH 160/912] - rewrote AActor::DestroyAllInventory so that it clears the item's link to its owner and the owner's inventory list before destroying them. There have been reports about crashes in here with Linux that point to some of the code that gets called here doing unwanted things on the owner, so with these links cleared that should no longer be possible. --- src/p_mobj.cpp | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/p_mobj.cpp b/src/p_mobj.cpp index 496f52e869..0225049192 100644 --- a/src/p_mobj.cpp +++ b/src/p_mobj.cpp @@ -685,11 +685,30 @@ bool AActor::TakeInventory(PClassActor *itemclass, int amount, bool fromdecorate void AActor::DestroyAllInventory () { - while (Inventory != NULL) + AInventory *inv = Inventory; + if (inv != nullptr) { - AInventory *item = Inventory; - item->Destroy (); - assert (item != Inventory); + TArray toDelete; + + // Delete the list in a two stage approach. + // This is necessary because an item may destroy another item (e.g. sister weapons) + // which would break the list and leave parts of it undestroyed, maybe doing bad things later. + while (inv != nullptr) + { + toDelete.Push(inv); + AInventory *item = inv->Inventory; + inv->Inventory = nullptr; + inv->Owner = nullptr; + inv = item; + } + for (auto p : toDelete) + { + // the item may already have been deleted by another one, so check this here to avoid problems. + if (!(p->ObjectFlags & OF_EuthanizeMe)) + { + p->Destroy(); + } + } } } From f89d47d03ea34fda644f3a618ae61f9bebd9c1f6 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Thu, 6 Oct 2016 12:08:38 +0200 Subject: [PATCH 161/912] - fixed: ZCC_OpInfoType::FindBestProto was missing checks for exact match of the required conversion before testing if it is a F32->F64 conversion. Of course, since 0 means that there is no conversion it also means that there is no data that could be checked. --- src/zscript/zcc_expr.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zscript/zcc_expr.cpp b/src/zscript/zcc_expr.cpp index 0cd399d64b..e1a091b180 100644 --- a/src/zscript/zcc_expr.cpp +++ b/src/zscript/zcc_expr.cpp @@ -162,11 +162,11 @@ ZCC_OpProto *ZCC_OpInfoType::FindBestProto( // [[float32 (op) int]] will choose the integer version instead of the floating point // version, which we do not want. int test_dist1 = dist1, test_dist2 = dist2; - if (routes[0][cur_route1][0]->ConvertConstant == FtoD) + if (test_dist1 > 0 && routes[0][cur_route1][0]->ConvertConstant == FtoD) { test_dist1--; } - if (routes[1][cur_route2][0]->ConvertConstant == FtoD) + if (test_dist2 > 0 && routes[1][cur_route2][0]->ConvertConstant == FtoD) { test_dist2--; } From 2e78d34046adbe191cd75425e1fbb649d2368958 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Thu, 6 Oct 2016 21:20:49 +0200 Subject: [PATCH 162/912] - fixed: DCeiling's main constructor could leave some fields uninitialized. --- src/p_ceiling.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/p_ceiling.cpp b/src/p_ceiling.cpp index 32e5d62bed..e90dd2096d 100644 --- a/src/p_ceiling.cpp +++ b/src/p_ceiling.cpp @@ -214,6 +214,12 @@ DCeiling::DCeiling (sector_t *sec, double speed1, double speed2, int silent) m_Speed = m_Speed1 = speed1; m_Speed2 = speed2; m_Silent = silent; + m_BottomHeight = 0; + m_TopHeight = 0; + m_Direction = 0; + m_Texture = FNullTextureID(); + m_Tag = 0; + m_OldDirection = 0; } //============================================================================ From a80c67c2ca1df6e0a0977eb8e950d5248916dc82 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Thu, 6 Oct 2016 20:01:20 -0400 Subject: [PATCH 163/912] - Backported blood_fade_scalar from Skulltag - Added new pickup_fade_scalar which works the same way for pickups - Default for blood_fade_scalar is 1.0 instead of 0.5 from Skulltag. --- src/v_blend.cpp | 11 +++++++++-- wadsrc/static/menudef.txt | 4 ++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/v_blend.cpp b/src/v_blend.cpp index e1552105ff..c13ed547c2 100644 --- a/src/v_blend.cpp +++ b/src/v_blend.cpp @@ -52,7 +52,8 @@ #include "v_palette.h" #include "d_player.h" - +CVAR( Float, blood_fade_scalar, 1.0f, CVAR_ARCHIVE ) // [SP] Pulled from Skulltag - changed default from 0.5 to 1.0 +CVAR( Float, pickup_fade_scalar, 1.0f, CVAR_ARCHIVE ) // [SP] Uses same logic as blood_fade_scalar except for pickups // [RH] Amount of red flash for up to 114 damage points. Calculated by hand // using a logarithmic scale and my trusty HP48G. @@ -113,6 +114,9 @@ void V_AddPlayerBlend (player_t *CPlayer, float blend[4], float maxinvalpha, int if (CPlayer->bonuscount) { cnt = CPlayer->bonuscount << 3; + + // [SP] Allow player to tone down intensity of pickup flash. + cnt = (int)( cnt * pickup_fade_scalar ); V_AddBlend (RPART(gameinfo.pickupcolor)/255.f, GPART(gameinfo.pickupcolor)/255.f, BPART(gameinfo.pickupcolor)/255.f, cnt > 128 ? 0.5f : cnt / 255.f, blend); @@ -124,7 +128,10 @@ void V_AddPlayerBlend (player_t *CPlayer, float blend[4], float maxinvalpha, int if (painFlash.a != 0) { cnt = DamageToAlpha[MIN (113, CPlayer->damagecount * painFlash.a / 255)]; - + + // [BC] Allow users to tone down the intensity of the blood on the screen. + cnt = (int)( cnt * blood_fade_scalar ); + if (cnt) { if (cnt > maxpainblend) diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 36a5918e12..715eff63b7 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -674,6 +674,9 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_VSYNC", "vid_vsync", "OnOff" Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn" + Slider "$DSPLYMNU_BLOODFADE", "blood_fade_scalar", 0.0, 1.0, 0.05, 1 + Slider "$DSPLYMNU_PICKUPFADE", "pickup_fade_scalar", 0.0, 1.0, 0.05, 1 + Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" Option "$DSPLYMNU_TRUECOLOR", "swtruecolor", "OnOff" Option "$DSPLYMNU_MINFILTER", "r_minfilter", "OnOff" Option "$DSPLYMNU_MAGFILTER", "r_magfilter", "OnOff" @@ -710,6 +713,7 @@ OptionMenu "VideoOptions" Slider "$DSPLYMNU_MOVEBOB", "movebob", 0, 1.0, 0.05, 2 Slider "$DSPLYMNU_STILLBOB", "stillbob", 0, 1.0, 0.05, 2 Slider "$DSPLYMNU_BOBSPEED", "wbobspeed", 0, 2.0, 0.1, 2 + } //------------------------------------------------------------------------------------------- From e75cf427566008ef0793079075b32ebf670d0904 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Thu, 6 Oct 2016 20:05:30 -0400 Subject: [PATCH 164/912] - Forgot to add language entries. --- wadsrc/static/language.enu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index a0f1f9245b..c76414c8a2 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1786,6 +1786,8 @@ DSPLYMNU_MAGFILTER = "Linear filter when upscaling"; DSPLYMNU_MIPMAP = "Use mipmapped textures"; DSPLYMNU_WIPETYPE = "Screen wipe style"; DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen"; +DSPLYMNU_BLOODFADE = "Blood Flash Intensity"; +DSPLYMNU_PICKUPFADE = "Pickup Flash Intensity"; DSPLYMNU_PALLETEHACK = "DirectDraw palette hack"; // Not used DSPLYMNU_ATTACHEDSURFACES = "Use attached surfaces"; // Not used DSPLYMNU_STRETCHSKY = "Stretch short skies"; From 8c259f50b10c7b797f3e10e794436df75f522503 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 7 Oct 2016 06:40:29 +0200 Subject: [PATCH 165/912] Add codegen for rt column drawers --- .../fixedfunction/drawcolumncodegen.cpp | 182 +++++++++++------- .../fixedfunction/drawcolumncodegen.h | 13 +- src/r_compiler/fixedfunction/drawercodegen.h | 1 + src/r_compiler/llvmdrawers.cpp | 75 +++++--- src/r_compiler/llvmdrawers.h | 23 ++- src/r_draw_rgba.cpp | 1 + src/r_drawt_rgba.cpp | 134 +++++++++---- 7 files changed, 292 insertions(+), 137 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index 116744f1cb..6013582747 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -11,7 +11,7 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data) +void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data) { dest = args[0][0].load(); source = args[0][1].load(); @@ -21,7 +21,8 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAVa pitch = args[0][5].load(); count = args[0][6].load(); dest_y = args[0][7].load(); - iscale = args[0][8].load(); + if (method == DrawColumnMethod::Normal) + iscale = args[0][8].load(); texturefrac = args[0][9].load(); light = args[0][10].load(); color = SSAVec4i::unpack(args[0][11].load()); @@ -46,109 +47,148 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAVa thread.num_cores = thread_data[0][1].load(); thread.pass_start_y = thread_data[0][2].load(); thread.pass_end_y = thread_data[0][3].load(); + thread.temp = thread_data[0][4].load(); is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade); count = count_for_thread(dest_y, count, thread); dest = dest_for_thread(dest_y, pitch, dest, thread); pitch = pitch * thread.num_cores; - stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); - iscale = iscale * thread.num_cores; + if (method == DrawColumnMethod::Normal) + { + stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); + iscale = iscale * thread.num_cores; + } + else + { + source = thread.temp[((dest_y + skipped_by_thread(dest_y, thread)) * 4 + texturefrac) * 4]; + } SSAIfBlock branch; branch.if_block(is_simple_shade); - Loop(variant, true); + Loop(variant, method, true); branch.else_block(); - Loop(variant, false); + Loop(variant, method, false); branch.end_block(); } -void DrawColumnCodegen::Loop(DrawColumnVariant variant, bool isSimpleShade) +void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade) { + SSAInt sincr; + if (method != DrawColumnMethod::Normal) + sincr = thread.num_cores * 4; + stack_index.store(SSAInt(0)); { SSAForBlock loop; SSAInt index = stack_index.load(); loop.loop_block(index < count); - SSAInt frac = stack_frac.load(); - - SSAInt offset = index * pitch * 4; - SSAVec4i bgcolor = dest[offset].load_vec4ub(); - - SSAInt alpha, inv_alpha; - SSAVec4i outcolor; - switch (variant) + SSAInt sample_index, frac; + if (method == DrawColumnMethod::Normal) { - default: - case DrawColumnVariant::Draw: - outcolor = blend_copy(Shade(ColormapSample(frac), isSimpleShade)); - break; - case DrawColumnVariant::DrawAdd: - case DrawColumnVariant::DrawAddClamp: - outcolor = blend_add(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::DrawShaded: - alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(frac), SSAInt(64)), SSAInt(0)) * 4; - inv_alpha = 256 - alpha; - outcolor = blend_add(color, bgcolor, alpha, inv_alpha); - break; - case DrawColumnVariant::DrawSubClamp: - outcolor = blend_sub(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::DrawRevSubClamp: - outcolor = blend_revsub(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::DrawTranslated: - outcolor = blend_copy(Shade(TranslateSample(frac), isSimpleShade)); - break; - case DrawColumnVariant::DrawTlatedAdd: - case DrawColumnVariant::DrawAddClampTranslated: - outcolor = blend_add(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::DrawSubClampTranslated: - outcolor = blend_sub(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::DrawRevSubClampTranslated: - outcolor = blend_revsub(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::Fill: - outcolor = blend_copy(color); - break; - case DrawColumnVariant::FillAdd: - alpha = srccolor[3]; - alpha = alpha + (alpha >> 7); - inv_alpha = 256 - alpha; - outcolor = blend_add(srccolor, bgcolor, alpha, inv_alpha); - break; - case DrawColumnVariant::FillAddClamp: - outcolor = blend_add(srccolor, bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::FillSubClamp: - outcolor = blend_sub(srccolor, bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::FillRevSubClamp: - outcolor = blend_revsub(srccolor, bgcolor, srcalpha, destalpha); - break; + frac = stack_frac.load(); + sample_index = frac >> FRACBITS; + } + else + { + sample_index = index * sincr * 4; } - dest[offset].store_vec4ub(outcolor); + SSAInt offset = index * pitch * 4; + SSAVec4i bgcolor[4]; + + int numColumns = (method == DrawColumnMethod::Rt4) ? 4 : 1; + + if (numColumns == 4) + { + SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(); + SSAVec8s bg0 = SSAVec8s::extendlo(bg); + SSAVec8s bg1 = SSAVec8s::extendhi(bg); + bgcolor[0] = SSAVec4i::extendlo(bg0); + bgcolor[1] = SSAVec4i::extendhi(bg0); + bgcolor[2] = SSAVec4i::extendlo(bg1); + bgcolor[3] = SSAVec4i::extendhi(bg1); + } + else + { + bgcolor[0] = dest[offset].load_vec4ub(); + } + + SSAVec4i outcolor[4]; + for (int i = 0; i < numColumns; i++) + outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, isSimpleShade); + + if (numColumns == 4) + { + SSAVec16ub packedcolor(SSAVec8s(outcolor[0], outcolor[1]), SSAVec8s(outcolor[2], outcolor[3])); + dest[offset].store_unaligned_vec16ub(packedcolor); + } + else + { + dest[offset].store_vec4ub(outcolor[0]); + } stack_index.store(index + 1); - stack_frac.store(frac + iscale); + if (method == DrawColumnMethod::Normal) + stack_frac.store(frac + iscale); loop.end_block(); } } -SSAInt DrawColumnCodegen::ColormapSample(SSAInt frac) +SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade) +{ + SSAInt alpha, inv_alpha; + switch (variant) + { + default: + case DrawColumnVariant::DrawCopy: + return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub()); + case DrawColumnVariant::Draw: + return blend_copy(Shade(ColormapSample(sample_index), isSimpleShade)); + case DrawColumnVariant::DrawAdd: + case DrawColumnVariant::DrawAddClamp: + return blend_add(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawShaded: + alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; + inv_alpha = 256 - alpha; + return blend_add(color, bgcolor, alpha, inv_alpha); + case DrawColumnVariant::DrawSubClamp: + return blend_sub(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawRevSubClamp: + return blend_revsub(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawTranslated: + return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade)); + case DrawColumnVariant::DrawTlatedAdd: + case DrawColumnVariant::DrawAddClampTranslated: + return blend_add(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawSubClampTranslated: + return blend_sub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawRevSubClampTranslated: + return blend_revsub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::Fill: + return blend_copy(color); + case DrawColumnVariant::FillAdd: + alpha = srccolor[3]; + alpha = alpha + (alpha >> 7); + inv_alpha = 256 - alpha; + return blend_add(srccolor, bgcolor, alpha, inv_alpha); + case DrawColumnVariant::FillAddClamp: + return blend_add(srccolor, bgcolor, srcalpha, destalpha); + case DrawColumnVariant::FillSubClamp: + return blend_sub(srccolor, bgcolor, srcalpha, destalpha); + case DrawColumnVariant::FillRevSubClamp: + return blend_revsub(srccolor, bgcolor, srcalpha, destalpha); + } +} + +SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index) { - SSAInt sample_index = frac >> FRACBITS; return colormap[source[sample_index].load().zext_int()].load().zext_int(); } -SSAInt DrawColumnCodegen::TranslateSample(SSAInt frac) +SSAInt DrawColumnCodegen::TranslateSample(SSAInt sample_index) { - SSAInt sample_index = frac >> FRACBITS; return translation[source[sample_index].load().zext_int()].load().zext_int(); } diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h index 488c36295f..675a5ea670 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.h +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -10,6 +10,7 @@ enum class DrawColumnVariant FillAddClamp, FillSubClamp, FillRevSubClamp, + DrawCopy, Draw, DrawAdd, DrawTranslated, @@ -23,13 +24,21 @@ enum class DrawColumnVariant DrawRevSubClampTranslated }; +enum class DrawColumnMethod +{ + Normal, + Rt1, + Rt4 +}; + class DrawColumnCodegen : public DrawerCodegen { public: - void Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data); + void Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data); private: - void Loop(DrawColumnVariant variant, bool isSimpleShade); + void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade); + SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); SSAInt ColormapSample(SSAInt frac); SSAInt TranslateSample(SSAInt frac); SSAVec4i Shade(SSAInt palIndex, bool isSimpleShade); diff --git a/src/r_compiler/fixedfunction/drawercodegen.h b/src/r_compiler/fixedfunction/drawercodegen.h index 17b36234dc..27dc6f21d4 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.h +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -25,6 +25,7 @@ public: SSAInt num_cores; SSAInt pass_start_y; SSAInt pass_end_y; + SSAUBytePtr temp; }; class SSAShadeConstants diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 6ab4f5a4fd..3108b8c6a5 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -47,7 +47,7 @@ public: LLVMDrawersImpl(); private: - void CodegenDrawColumn(const char *name, DrawColumnVariant variant); + void CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method); void CodegenDrawSpan(const char *name, DrawSpanVariant variant); void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); @@ -84,22 +84,36 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill); - CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd); - CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp); - CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp); - CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp); - CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw); - CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd); - CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated); - CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd); - CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded); - CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp); - CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated); - CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp); - CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated); - CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp); - CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated); + CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRt1", DrawColumnVariant::Draw, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4); CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); @@ -129,15 +143,29 @@ LLVMDrawersImpl::LLVMDrawersImpl() FillColumnRevSubClamp = mProgram.GetProcAddress("FillColumnRevSubClamp"); DrawColumn = mProgram.GetProcAddress("DrawColumn"); DrawColumnAdd = mProgram.GetProcAddress("DrawColumnAdd"); - DrawColumnTranslated = mProgram.GetProcAddress("DrawColumnTranslated"); - DrawColumnTlatedAdd = mProgram.GetProcAddress("DrawColumnTlatedAdd"); DrawColumnShaded = mProgram.GetProcAddress("DrawColumnShaded"); DrawColumnAddClamp = mProgram.GetProcAddress("DrawColumnAddClamp"); - DrawColumnAddClampTranslated = mProgram.GetProcAddress("DrawColumnAddClampTranslated"); DrawColumnSubClamp = mProgram.GetProcAddress("DrawColumnSubClamp"); - DrawColumnSubClampTranslated = mProgram.GetProcAddress("DrawColumnSubClampTranslated"); DrawColumnRevSubClamp = mProgram.GetProcAddress("DrawColumnRevSubClamp"); + DrawColumnTranslated = mProgram.GetProcAddress("DrawColumnTranslated"); + DrawColumnTlatedAdd = mProgram.GetProcAddress("DrawColumnTlatedAdd"); + DrawColumnAddClampTranslated = mProgram.GetProcAddress("DrawColumnAddClampTranslated"); + DrawColumnSubClampTranslated = mProgram.GetProcAddress("DrawColumnSubClampTranslated"); DrawColumnRevSubClampTranslated = mProgram.GetProcAddress("DrawColumnRevSubClampTranslated"); + DrawColumnRt1 = mProgram.GetProcAddress("DrawColumnRt1"); + DrawColumnRt1Copy = mProgram.GetProcAddress("DrawColumnRt1Copy"); + DrawColumnRt1Add = mProgram.GetProcAddress("DrawColumnRt1Add"); + DrawColumnRt1Shaded = mProgram.GetProcAddress("DrawColumnRt1Shaded"); + DrawColumnRt1AddClamp = mProgram.GetProcAddress("DrawColumnRt1AddClamp"); + DrawColumnRt1SubClamp = mProgram.GetProcAddress("DrawColumnRt1SubClamp"); + DrawColumnRt1RevSubClamp = mProgram.GetProcAddress("DrawColumnRt1RevSubClamp"); + DrawColumnRt4 = mProgram.GetProcAddress("DrawColumnRt4"); + DrawColumnRt4Copy = mProgram.GetProcAddress("DrawColumnRt4Copy"); + DrawColumnRt4Add = mProgram.GetProcAddress("DrawColumnRt4Add"); + DrawColumnRt4Shaded = mProgram.GetProcAddress("DrawColumnRt4Shaded"); + DrawColumnRt4AddClamp = mProgram.GetProcAddress("DrawColumnRt4AddClamp"); + DrawColumnRt4SubClamp = mProgram.GetProcAddress("DrawColumnRt4SubClamp"); + DrawColumnRt4RevSubClamp = mProgram.GetProcAddress("DrawColumnRt4RevSubClamp"); DrawSpan = mProgram.GetProcAddress("DrawSpan"); DrawSpanMasked = mProgram.GetProcAddress("DrawSpanMasked"); DrawSpanTranslucent = mProgram.GetProcAddress("DrawSpanTranslucent"); @@ -160,7 +188,7 @@ LLVMDrawersImpl::LLVMDrawersImpl() mProgram.StopLogFatalErrors(); } -void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant variant) +void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method) { llvm::IRBuilder<> builder(mProgram.context()); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); @@ -171,7 +199,7 @@ void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant vari function.create_public(); DrawColumnCodegen codegen; - codegen.Generate(variant, function.parameter(0), function.parameter(1)); + codegen.Generate(variant, method, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); @@ -310,6 +338,7 @@ llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &contex std::vector elements; for (int i = 0; i < 4; i++) elements.push_back(llvm::Type::getInt32Ty(context)); + elements.push_back(llvm::Type::getInt8PtrTy(context)); return llvm::StructType::get(context, elements, false)->getPointerTo(); } diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 2ce4c52306..549825e4f9 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -7,6 +7,7 @@ struct WorkerThreadData int32_t num_cores; int32_t pass_start_y; int32_t pass_end_y; + uint32_t *temp; }; struct DrawWallArgs @@ -122,20 +123,34 @@ public: void(*DrawColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnTlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnShaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnAddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnTlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnAddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1Shaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4Shaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawSpan)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 43075d0a64..b875bd4139 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -334,6 +334,7 @@ public: class name##LLVMCommand : public base \ { \ public: \ + using base::base; \ void Execute(DrawerThread *thread) override \ { \ WorkerThreadData d = ThreadData(thread); \ diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 45bd5c029e..a73ba643c9 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -43,6 +43,7 @@ #include "r_things.h" #include "v_video.h" #include "r_draw_rgba.h" +#include "r_compiler/llvmdrawers.h" #ifndef NO_SSE #include #endif @@ -89,6 +90,89 @@ extern unsigned int *horizspan[4]; ///////////////////////////////////////////////////////////////////////////// +class DrawColumnRt1LLVMCommand : public DrawerCommand +{ +protected: + DrawColumnArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + d.temp = thread->dc_temp_rgba; + return d; + } + +public: + DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh) + { + args.dest = (uint32_t*)dc_destorg + ylookup[yl] + sx; + args.source = nullptr; + args.colormap = dc_colormap; + args.translation = dc_translation; + args.basecolors = (const uint32_t *)GPalette.BaseColors; + args.pitch = dc_pitch; + args.count = yh - yl + 1; + args.dest_y = yl; + args.iscale = dc_iscale; + args.texturefrac = hx; + args.light = LightBgra::calc_light_multiplier(dc_light); + args.color = LightBgra::shade_pal_index_simple(dc_color, args.light); + args.srccolor = dc_srccolor_bgra; + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.light_red = dc_shade_constants.light_red; + args.light_green = dc_shade_constants.light_green; + args.light_blue = dc_shade_constants.light_blue; + args.light_alpha = dc_shade_constants.light_alpha; + args.fade_red = dc_shade_constants.fade_red; + args.fade_green = dc_shade_constants.fade_green; + args.fade_blue = dc_shade_constants.fade_blue; + args.fade_alpha = dc_shade_constants.fade_alpha; + args.desaturate = dc_shade_constants.desaturate; + args.flags = 0; + if (dc_shade_constants.simple_shade) + args.flags |= DrawColumnArgs::simple_shade; + } + + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->DrawColumnRt1(&args, &d); + } +}; + +#define DECLARE_DRAW_COMMAND(name, func, base) \ +class name##LLVMCommand : public base \ +{ \ +public: \ + using base::base; \ + void Execute(DrawerThread *thread) override \ + { \ + WorkerThreadData d = ThreadData(thread); \ + LLVMDrawers::Instance()->func(&args, &d); \ + } \ +}; + +DECLARE_DRAW_COMMAND(DrawColumnRt1Copy, DrawColumnRt1Copy, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1Add, DrawColumnRt1Add, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1Shaded, DrawColumnRt1Shaded, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1AddClamp, DrawColumnRt1AddClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1SubClamp, DrawColumnRt1SubClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClamp, DrawColumnRt1RevSubClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4, DrawColumnRt4, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4Copy, DrawColumnRt4Copy, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4Add, DrawColumnRt4Add, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4Shaded, DrawColumnRt4Shaded, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4AddClamp, DrawColumnRt4AddClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4SubClamp, DrawColumnRt4SubClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawColumnRt1LLVMCommand); + +///////////////////////////////////////////////////////////////////////////// + class DrawerRt1colCommand : public DrawerCommand { public: @@ -756,7 +840,7 @@ public: // Copies one span at hx to the screen at sx. void rt_copy1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. @@ -772,17 +856,13 @@ void rt_copy4cols_rgba (int sx, int yl, int yh) // Maps one span at hx to the screen at sx. void rt_map1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. void rt_map4cols_rgba (int sx, int yl, int yh) { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#else - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#endif + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh) @@ -812,17 +892,13 @@ void rt_tlate4cols_rgba (int sx, int yl, int yh) // Adds one span at hx to the screen at sx without clamping. void rt_add1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_rgba (int sx, int yl, int yh) { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#else - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#endif + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and adds one span at hx to the screen at sx without clamping. @@ -842,33 +918,25 @@ void rt_tlateadd4cols_rgba(int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. void rt_shaded4cols_rgba (int sx, int yl, int yh) { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#else - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#endif + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_rgba (int sx, int yl, int yh) { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#else - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#endif + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and adds one span at hx to the screen at sx with clamping. @@ -888,17 +956,13 @@ void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh) // Subtracts one span at hx to the screen at sx with clamping. void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_rgba (int sx, int yl, int yh) { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#else - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#endif + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. @@ -918,17 +982,13 @@ void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh) // Subtracts one span at hx from the screen at sx with clamping. void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#else - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#endif + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. From 78415461b90ad7e406507ddb93d26707ae57874b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 7 Oct 2016 06:56:20 +0200 Subject: [PATCH 166/912] Removed old SSE macros and drawers --- src/r_draw_rgba.cpp | 35 +- src/r_draw_rgba.h | 550 ------------------------------ src/r_drawt_rgba.cpp | 446 ------------------------ src/r_drawt_rgba_sse.h | 757 ----------------------------------------- 4 files changed, 31 insertions(+), 1757 deletions(-) delete mode 100644 src/r_drawt_rgba_sse.h diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index b875bd4139..424d3140bf 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -63,9 +63,6 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); class DrawSpanLLVMCommand : public DrawerCommand { -protected: - DrawSpanArgs args; - public: DrawSpanLLVMCommand() { @@ -96,7 +93,7 @@ public: args.flags = 0; if (ds_shade_constants.simple_shade) args.flags |= DrawSpanArgs::simple_shade; - if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped)) + if (!sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped)) args.flags |= DrawSpanArgs::nearest_filter; } @@ -106,6 +103,36 @@ public: return; LLVMDrawers::Instance()->DrawSpan(&args); } + +protected: + DrawSpanArgs args; + +private: + inline static bool sampler_setup(const uint32_t * &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep, bool mipmapped) + { + // Is this a magfilter or minfilter? + fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); + fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); + fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); + bool magnifying = (magnitude >> FRACBITS == 0); + + if (r_mipmap && mipmapped) + { + int level = magnitude >> (FRACBITS + 1); + while (level != 0) + { + if (xbits <= 2 || ybits <= 2) + break; + + source += (1 << (xbits)) * (1 << (ybits)); + xbits -= 1; + ybits -= 1; + level >>= 1; + } + } + + return (magnifying && r_magfilter) || (!magnifying && r_minfilter); + } }; class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 8b704c0a64..d3ad0613ab 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -283,554 +283,4 @@ public: } }; -class BlendBgra -{ -public: - FORCEINLINE static uint32_t copy(uint32_t fg) - { - return fg; - } - - FORCEINLINE static uint32_t add(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha) - { - uint32_t red = MIN((RPART(fg) * srcalpha + RPART(bg) * destalpha) >> 8, 255); - uint32_t green = MIN((GPART(fg) * srcalpha + GPART(bg) * destalpha) >> 8, 255); - uint32_t blue = MIN((BPART(fg) * srcalpha + BPART(bg) * destalpha) >> 8, 255); - return 0xff000000 | (red << 16) | (green << 8) | blue; - } - - FORCEINLINE static uint32_t sub(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha) - { - uint32_t red = clamp((0x10000 - RPART(fg) * srcalpha + RPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - GPART(fg) * srcalpha + GPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - BPART(fg) * srcalpha + BPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; - return 0xff000000 | (red << 16) | (green << 8) | blue; - } - - FORCEINLINE static uint32_t revsub(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha) - { - uint32_t red = clamp((0x10000 + RPART(fg) * srcalpha - RPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + GPART(fg) * srcalpha - GPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + BPART(fg) * srcalpha - BPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; - return 0xff000000 | (red << 16) | (green << 8) | blue; - } - - FORCEINLINE static uint32_t alpha_blend(uint32_t fg, uint32_t bg) - { - uint32_t alpha = APART(fg) + (APART(fg) >> 7); // 255 -> 256 - uint32_t inv_alpha = 256 - alpha; - uint32_t red = MIN(RPART(fg) * alpha + (RPART(bg) * inv_alpha) / 256, 255); - uint32_t green = MIN(GPART(fg) * alpha + (GPART(bg) * inv_alpha) / 256, 255); - uint32_t blue = MIN(BPART(fg) * alpha + (BPART(bg) * inv_alpha) / 256, 255); - return 0xff000000 | (red << 16) | (green << 8) | blue; - } -}; - -class SampleBgra -{ -public: - inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep, bool mipmapped) - { - // Is this a magfilter or minfilter? - fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); - fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); - fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); - bool magnifying = (magnitude >> FRACBITS == 0); - - if (r_mipmap && mipmapped) - { - int level = magnitude >> (FRACBITS + 1); - while (level != 0) - { - if (xbits <= 2 || ybits <= 2) - break; - - source += (1 << (xbits)) * (1 << (ybits)); - xbits -= 1; - ybits -= 1; - level >>= 1; - } - } - - return (magnifying && r_magfilter) || (!magnifying && r_minfilter); - } - - FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t one, uint32_t height) - { - uint32_t frac_y0 = (texturefracy >> FRACBITS) * height; - uint32_t frac_y1 = ((texturefracy + one) >> FRACBITS) * height; - uint32_t y0 = frac_y0 >> FRACBITS; - uint32_t y1 = frac_y1 >> FRACBITS; - - uint32_t p00 = col0[y0]; - uint32_t p01 = col0[y1]; - uint32_t p10 = col1[y0]; - uint32_t p11 = col1[y1]; - - uint32_t inv_b = texturefracx; - uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; - - return (alpha << 24) | (red << 16) | (green << 8) | blue; - } - - FORCEINLINE static uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits) - { - int xshift = (32 - xbits); - int yshift = (32 - ybits); - int xmask = (1 << xshift) - 1; - int ymask = (1 << yshift) - 1; - uint32_t x = xfrac >> xbits; - uint32_t y = yfrac >> ybits; - - uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; - uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; - uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; - - uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; - - return (alpha << 24) | (red << 16) | (green << 8) | blue; - } -}; - -///////////////////////////////////////////////////////////////////////////// -// SSE/AVX shading macros: - -#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, one, height, texturefracx) \ - const uint32_t *baseptr = col0[0]; \ - __m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \ - __m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \ - __m128i mone = _mm_loadu_si128((const __m128i*)one); \ - __m128i m127 = _mm_set1_epi16(127); \ - __m128i m16 = _mm_set1_epi32(16); \ - __m128i m15 = _mm_set1_epi32(15); \ - __m128i mheight = _mm_loadu_si128((const __m128i*)height); \ - __m128i mtexturefracx = _mm_loadu_si128((const __m128i*)texturefracx); - -#define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \ - __m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \ - __m128i multmp0 = _mm_srli_epi32(mtexturefracy, FRACBITS); \ - __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mone), FRACBITS); \ - __m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \ - __m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \ - __m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \ - __m128i y1 = _mm_srli_epi32(frac_y1, FRACBITS); \ - __m128i inv_b = mtexturefracx; \ - __m128i inv_a = _mm_and_si128(_mm_srli_epi32(frac_y1, FRACBITS - 4), m15); \ - __m128i a = _mm_sub_epi32(m16, inv_a); \ - __m128i b = _mm_sub_epi32(m16, inv_b); \ - __m128i ab = _mm_mullo_epi16(a, b); \ - __m128i invab = _mm_mullo_epi16(inv_a, b); \ - __m128i ainvb = _mm_mullo_epi16(a, inv_b); \ - __m128i invainvb = _mm_mullo_epi16(inv_a, inv_b); \ - __m128i ab_lo = _mm_shuffle_epi32(ab, _MM_SHUFFLE(1, 1, 0, 0)); \ - __m128i ab_hi = _mm_shuffle_epi32(ab, _MM_SHUFFLE(3, 3, 2, 2)); \ - __m128i invab_lo = _mm_shuffle_epi32(invab, _MM_SHUFFLE(1, 1, 0, 0)); \ - __m128i invab_hi = _mm_shuffle_epi32(invab, _MM_SHUFFLE(3, 3, 2, 2)); \ - __m128i ainvb_lo = _mm_shuffle_epi32(ainvb, _MM_SHUFFLE(1, 1, 0, 0)); \ - __m128i ainvb_hi = _mm_shuffle_epi32(ainvb, _MM_SHUFFLE(3, 3, 2, 2)); \ - __m128i invainvb_lo = _mm_shuffle_epi32(invainvb, _MM_SHUFFLE(1, 1, 0, 0)); \ - __m128i invainvb_hi = _mm_shuffle_epi32(invainvb, _MM_SHUFFLE(3, 3, 2, 2)); \ - ab_lo = _mm_or_si128(ab_lo, _mm_slli_epi32(ab_lo, 16)); \ - ab_hi = _mm_or_si128(ab_hi, _mm_slli_epi32(ab_hi, 16)); \ - invab_lo = _mm_or_si128(invab_lo, _mm_slli_epi32(invab_lo, 16)); \ - invab_hi = _mm_or_si128(invab_hi, _mm_slli_epi32(invab_hi, 16)); \ - ainvb_lo = _mm_or_si128(ainvb_lo, _mm_slli_epi32(ainvb_lo, 16)); \ - ainvb_hi = _mm_or_si128(ainvb_hi, _mm_slli_epi32(ainvb_hi, 16)); \ - invainvb_lo = _mm_or_si128(invainvb_lo, _mm_slli_epi32(invainvb_lo, 16)); \ - invainvb_hi = _mm_or_si128(invainvb_hi, _mm_slli_epi32(invainvb_hi, 16)); \ - __m128i p00 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y0, coloffsets0), 4); \ - __m128i p01 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y1, coloffsets0), 4); \ - __m128i p10 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y0, coloffsets1), 4); \ - __m128i p11 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y1, coloffsets1), 4); \ - __m128i p00_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p00, _mm_setzero_si128()), ab_lo); \ - __m128i p01_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p01, _mm_setzero_si128()), invab_lo); \ - __m128i p10_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p10, _mm_setzero_si128()), ainvb_lo); \ - __m128i p11_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p11, _mm_setzero_si128()), invainvb_lo); \ - __m128i p00_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p00, _mm_setzero_si128()), ab_hi); \ - __m128i p01_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p01, _mm_setzero_si128()), invab_hi); \ - __m128i p10_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p10, _mm_setzero_si128()), ainvb_hi); \ - __m128i p11_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p11, _mm_setzero_si128()), invainvb_hi); \ - __m128i fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_adds_epu16(p00_lo, p01_lo), _mm_adds_epu16(p10_lo, p11_lo)), m127), 8); \ - __m128i fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_adds_epu16(p00_hi, p01_hi), _mm_adds_epu16(p10_hi, p11_hi)), m127), 8); \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ -} - -#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, one, height) { \ - __m128i m127 = _mm_set1_epi16(127); \ - fg = _mm_setzero_si128(); \ - for (int i = 0; i < 4; i++) \ - { \ - uint32_t frac_y0 = (texturefracy[i] >> FRACBITS) * height[i]; \ - uint32_t frac_y1 = ((texturefracy[i] + one[i]) >> FRACBITS) * height[i]; \ - uint32_t y0 = (frac_y0 >> FRACBITS); \ - uint32_t y1 = (frac_y1 >> FRACBITS); \ - \ - uint32_t inv_b = texturefracx[i]; \ - uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; \ - \ - __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ - __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ - \ - __m128i gather = _mm_set_epi32(col1[i][y1], col1[i][y0], col0[i][y1], col0[i][y0]); \ - __m128i p0 = _mm_unpacklo_epi8(gather, _mm_setzero_si128()); \ - __m128i p1 = _mm_unpackhi_epi8(gather, _mm_setzero_si128()); \ - \ - __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ - __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ - \ - fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \ - } \ -} - -#define VEC_SAMPLE_MIP_NEAREST4_COLUMN(fg, col0, col1, mipfrac, texturefracy, height0, height1) { \ - uint32_t y0[4], y1[4]; \ - for (int i = 0; i < 4; i++) \ - { \ - y0[i] = (texturefracy[i] >> FRACBITS) * height0[i]; \ - y1[i] = (texturefracy[i] >> FRACBITS) * height1[i]; \ - } \ - __m128i p0 = _mm_set_epi32(col0[y0[3]], col0[y0[2]], col0[y0[1]], col0[y0[0]]); \ - __m128i p1 = _mm_set_epi32(col1[y1[3]], col1[y1[2]], col1[y1[1]], col1[y1[0]]); \ - __m128i t = _mm_loadu_si128((const __m128i*)mipfrac); \ - __m128i inv_t = _mm_sub_epi32(_mm_set1_epi32(256), mipfrac); \ - __m128i p0_lo = _mm_unpacklo_epi8(p0, _mm_setzero_si128()); \ - __m128i p0_hi = _mm_unpackhi_epi8(p0, _mm_setzero_si128()); \ - __m128i p1_lo = _mm_unpacklo_epi8(p1, _mm_setzero_si128()); \ - __m128i p1_hi = _mm_unpackhi_epi8(p1, _mm_setzero_si128()); \ - __m128i fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(p0_lo, t), _mm_mullo_epi16(p1_lo, inv_t)), 8); \ - __m128i fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(p0_hi, t), _mm_mullo_epi16(p1_hi, inv_t)), 8); \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ -} - -#define VEC_SAMPLE_BILINEAR4_SPAN(fg, texture, xfrac, yfrac, xstep, ystep, xbits, ybits) { \ - int xshift = (32 - xbits); \ - int yshift = (32 - ybits); \ - int xmask = (1 << xshift) - 1; \ - int ymask = (1 << yshift) - 1; \ - \ - __m128i m127 = _mm_set1_epi16(127); \ - fg = _mm_setzero_si128(); \ - for (int i = 0; i < 4; i++) \ - { \ - uint32_t x = xfrac >> xbits; \ - uint32_t y = yfrac >> ybits; \ - \ - uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \ - uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \ - uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \ - uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \ - \ - uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; \ - uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; \ - \ - __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ - __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ - \ - __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); \ - __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); \ - \ - __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ - __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ - \ - fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \ - \ - xfrac += xstep; \ - yfrac += ystep; \ - } \ -} - -// Calculate constants for a simple shade with gamma correction -#define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ - __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ - mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ - __m256 mlight_lo = mlight_hi; \ - __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ - __m256 m255 = _mm256_set1_ps(255.0f); - -// Calculate constants for a simple shade with different light levels for each pixel and gamma correction -#define AVX_LINEAR_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ - __m256 mlight_hi = _mm256_set_ps(1.0f, light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), 1.0f, light0 * (1.0f/256.0f), light0 * (1.0f/256.0f), light0 * (1.0f/256.0f)); \ - __m256 mlight_lo = _mm256_set_ps(1.0f, light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), 1.0f, light2 * (1.0f/256.0f), light2 * (1.0f/256.0f), light2 * (1.0f/256.0f)); \ - mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ - mlight_lo = _mm256_mul_ps(mlight_lo, mlight_lo); \ - __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ - __m256 m255 = _mm256_set1_ps(255.0f); - -// Simple shade 4 pixels with gamma correction -#define AVX_LINEAR_SHADE_SIMPLE(fg) { \ - __m256i fg_16 = _mm256_set_m128i(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _mm_unpacklo_epi8(fg, _mm_setzero_si128())); \ - __m256 fg_hi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(fg_16, _mm256_setzero_si256())); \ - __m256 fg_lo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(fg_16, _mm256_setzero_si256())); \ - fg_hi = _mm256_mul_ps(fg_hi, mrcp_255); \ - fg_hi = _mm256_mul_ps(fg_hi, fg_hi); \ - fg_hi = _mm256_mul_ps(fg_hi, mlight_hi); \ - fg_hi = _mm256_sqrt_ps(fg_hi); \ - fg_hi = _mm256_mul_ps(fg_hi, m255); \ - fg_lo = _mm256_mul_ps(fg_lo, mrcp_255); \ - fg_lo = _mm256_mul_ps(fg_lo, fg_lo); \ - fg_lo = _mm256_mul_ps(fg_lo, mlight_lo); \ - fg_lo = _mm256_sqrt_ps(fg_lo); \ - fg_lo = _mm256_mul_ps(fg_lo, m255); \ - fg_16 = _mm256_packus_epi32(_mm256_cvtps_epi32(fg_lo), _mm256_cvtps_epi32(fg_hi)); \ - fg = _mm_packus_epi16(_mm256_extractf128_si256(fg_16, 0), _mm256_extractf128_si256(fg_16, 1)); \ -} - -// Calculate constants for a complex shade with gamma correction -#define AVX_LINEAR_SHADE_INIT(light, shade_constants) \ - __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ - mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ - __m256 mlight_lo = mlight_hi; \ - __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ - __m256 m255 = _mm256_set1_ps(255.0f); \ - __m256 color = _mm256_set_ps( \ - 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ - 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ - __m256 fade = _mm256_set_ps( \ - 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ - 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ - __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ - __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ - __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ - __m128 ss_desaturate = _mm_set_ss(shade_constants.desaturate * (1.0f/256.0f)); \ - __m128 intensity_weight = _mm_set_ps(0.0f, 77.0f/256.0f, 143.0f/256.0f, 37.0f/256.0f); - -// Calculate constants for a complex shade with different light levels for each pixel and gamma correction -#define AVX_LINEAR_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ - __m256 mlight_hi = _mm256_set_ps(1.0f, light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), 1.0f, light0 * (1.0f/256.0f), light0 * (1.0f/256.0f), light0 * (1.0f/256.0f)); \ - __m256 mlight_lo = _mm256_set_ps(1.0f, light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), 1.0f, light2 * (1.0f/256.0f), light2 * (1.0f/256.0f), light2 * (1.0f/256.0f)); \ - mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ - mlight_lo = _mm256_mul_ps(mlight_lo, mlight_lo); \ - __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ - __m256 m255 = _mm256_set1_ps(255.0f); \ - __m256 color = _mm256_set_ps( \ - 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ - 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ - __m256 fade = _mm256_set_ps( \ - 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ - 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ - __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ - __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ - __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ - __m128 ss_desaturate = _mm_set_ss(shade_constants.desaturate * (1.0f/256.0f)); \ - __m128 intensity_weight = _mm_set_ps(0.0f, 77.0f/256.0f, 143.0f/256.0f, 37.0f/256.0f); - -// Complex shade 4 pixels with gamma correction -#define AVX_LINEAR_SHADE(fg, shade_constants) { \ - __m256i fg_16 = _mm256_set_m128i(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _mm_unpacklo_epi8(fg, _mm_setzero_si128())); \ - __m256 fg_hi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(fg_16, _mm256_setzero_si256())); \ - __m256 fg_lo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(fg_16, _mm256_setzero_si256())); \ - fg_hi = _mm256_mul_ps(fg_hi, mrcp_255); \ - fg_hi = _mm256_mul_ps(fg_hi, fg_hi); \ - fg_lo = _mm256_mul_ps(fg_lo, mrcp_255); \ - fg_lo = _mm256_mul_ps(fg_lo, fg_lo); \ - \ - __m128 intensity_hi0 = _mm_mul_ps(_mm256_extractf128_ps(fg_hi, 0), intensity_weight); \ - __m128 intensity_hi1 = _mm_mul_ps(_mm256_extractf128_ps(fg_hi, 1), intensity_weight); \ - intensity_hi0 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_hi0, _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ - intensity_hi0 = _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(0,0,0,0)); \ - intensity_hi1 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_hi1, _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ - intensity_hi1 = _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(0,0,0,0)); \ - __m256 intensity_hi = _mm256_set_m128(intensity_hi1, intensity_hi0); \ - \ - fg_hi = _mm256_add_ps(_mm256_mul_ps(fg_hi, inv_desaturate), intensity_hi); \ - fg_hi = _mm256_add_ps(_mm256_mul_ps(fg_hi, mlight_hi), fade_amount_hi); \ - fg_hi = _mm256_mul_ps(fg_hi, color); \ - \ - __m128 intensity_lo0 = _mm_mul_ps(_mm256_extractf128_ps(fg_lo, 0), intensity_weight); \ - __m128 intensity_lo1 = _mm_mul_ps(_mm256_extractf128_ps(fg_lo, 1), intensity_weight); \ - intensity_lo0 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_lo0, _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ - intensity_lo0 = _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(0,0,0,0)); \ - intensity_lo1 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_lo1, _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ - intensity_lo1 = _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(0,0,0,0)); \ - __m256 intensity_lo = _mm256_set_m128(intensity_lo1, intensity_lo0); \ - \ - fg_lo = _mm256_add_ps(_mm256_mul_ps(fg_lo, inv_desaturate), intensity_lo); \ - fg_lo = _mm256_add_ps(_mm256_mul_ps(fg_lo, mlight_lo), fade_amount_lo); \ - fg_lo = _mm256_mul_ps(fg_lo, color); \ - \ - fg_hi = _mm256_sqrt_ps(fg_hi); \ - fg_hi = _mm256_mul_ps(fg_hi, m255); \ - fg_lo = _mm256_sqrt_ps(fg_lo); \ - fg_lo = _mm256_mul_ps(fg_lo, m255); \ - fg_16 = _mm256_packus_epi32(_mm256_cvtps_epi32(fg_lo), _mm256_cvtps_epi32(fg_hi)); \ - fg = _mm_packus_epi16(_mm256_extractf128_si256(fg_16, 0), _mm256_extractf128_si256(fg_16, 1)); \ -} - -/* -// Complex shade 8 pixels -#define AVX_SHADE(fg, shade_constants) { \ - __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ - __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ - \ - __m256i intensity_hi = _mm256_mullo_epi16(fg_hi, _mm256_set_epi16(0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37)); \ - __m256i intensity_lo = _mm256_mullo_epi16(fg_lo, _mm256_set_epi16(0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37)); \ - __m256i intensity = _mm256_mullo_epi16(_mm256_srli_epi16(_mm256_hadd_epi16(_mm256_hadd_epi16(intensity_lo, intensity_hi), _mm256_setzero_si256()), 8), desaturate); \ - intensity = _mm256_unpacklo_epi16(intensity, intensity); \ - intensity_hi = _mm256_unpackhi_epi32(intensity, intensity); \ - intensity_lo = _mm256_unpacklo_epi32(intensity, intensity); \ - \ - fg_hi = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ - fg_hi = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_hi, mlight), fade_amount), 8); \ - fg_hi = _mm256_srli_epi16(_mm256_mullo_epi16(fg_hi, color), 8); \ - \ - fg_lo = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ - fg_lo = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_lo, mlight), fade_amount), 8); \ - fg_lo = _mm256_srli_epi16(_mm256_mullo_epi16(fg_lo, color), 8); \ - \ - fg = _mm256_packus_epi16(fg_lo, fg_hi); \ -} -*/ - -// Normal premultiplied alpha blend using the alpha from fg -#define VEC_ALPHA_BLEND(fg,bg) { \ - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); \ - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); \ - __m128i m256 = _mm_set1_epi16(256); \ - __m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_hi, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3)); \ - __m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_lo, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3)); \ - alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \ - alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \ - __m128i inv_alpha_hi = _mm_sub_epi16(m256, alpha_hi); \ - __m128i inv_alpha_lo = _mm_sub_epi16(m256, alpha_lo); \ - fg_hi = _mm_mullo_epi16(fg_hi, alpha_hi); \ - fg_hi = _mm_srli_epi16(fg_hi, 8); \ - fg_lo = _mm_mullo_epi16(fg_lo, alpha_lo); \ - fg_lo = _mm_srli_epi16(fg_lo, 8); \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ - bg_hi = _mm_mullo_epi16(bg_hi, inv_alpha_hi); \ - bg_hi = _mm_srli_epi16(bg_hi, 8); \ - bg_lo = _mm_mullo_epi16(bg_lo, inv_alpha_lo); \ - bg_lo = _mm_srli_epi16(bg_lo, 8); \ - bg = _mm_packus_epi16(bg_lo, bg_hi); \ - fg = _mm_adds_epu8(fg, bg); \ -} - -// Calculates the final alpha values to be used when combined with the source texture alpha channel -FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) -{ - uint32_t alpha = fg >> 24; - alpha += alpha >> 7; - uint32_t inv_alpha = 256 - alpha; - return (dest_alpha * alpha + 256 * inv_alpha + 128) >> 8; -} - -#define VEC_CALC_BLEND_ALPHA_VARS() __m128i msrc_alpha, mdest_alpha, m256, m255, m128; - -#define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \ - msrc_alpha = _mm_set1_epi16(src_alpha); \ - mdest_alpha = _mm_set1_epi16(dest_alpha * 255 / 256); \ - m256 = _mm_set1_epi16(256); \ - m255 = _mm_set1_epi16(255); \ - m128 = _mm_set1_epi16(128); - -// Calculates the final alpha values to be used when combined with the source texture alpha channel -#define VEC_CALC_BLEND_ALPHA(fg) \ - __m128i fg_alpha_hi, fg_alpha_lo, bg_alpha_hi, bg_alpha_lo; { \ - __m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ - __m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ - alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \ - alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \ - bg_alpha_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_mullo_epi16(mdest_alpha, alpha_hi), _mm_mullo_epi16(m255, _mm_sub_epi16(m256, alpha_hi))), m128), 8); \ - bg_alpha_hi = _mm_add_epi16(bg_alpha_hi, _mm_srli_epi16(bg_alpha_hi, 7)); \ - bg_alpha_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_mullo_epi16(mdest_alpha, alpha_lo), _mm_mullo_epi16(m255, _mm_sub_epi16(m256, alpha_lo))), m128), 8); \ - bg_alpha_lo = _mm_add_epi16(bg_alpha_lo, _mm_srli_epi16(bg_alpha_lo, 7)); \ - fg_alpha_hi = msrc_alpha; \ - fg_alpha_lo = msrc_alpha; \ - } - -#define SSE_SHADE_VARS() __m128i mlight_hi, mlight_lo, color, fade, fade_amount_hi, fade_amount_lo, inv_desaturate; - -// Calculate constants for a simple shade -#define SSE_SHADE_SIMPLE_INIT(light) \ - mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - mlight_lo = mlight_hi; - -// Calculate constants for a simple shade with different light levels for each pixel -#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ - mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); - -// Simple shade 4 pixels -#define SSE_SHADE_SIMPLE(fg) { \ - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ - fg_hi = _mm_srli_epi16(fg_hi, 8); \ - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ - fg_lo = _mm_srli_epi16(fg_lo, 8); \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ -} - -// Calculate constants for a complex shade -#define SSE_SHADE_INIT(light, shade_constants) \ - mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - mlight_lo = mlight_hi; \ - color = _mm_set_epi16( \ - 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - fade = _mm_set_epi16( \ - 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - fade_amount_lo = fade_amount_hi; \ - inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ - -// Calculate constants for a complex shade with different light levels for each pixel -#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ - mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ - color = _mm_set_epi16( \ - 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - fade = _mm_set_epi16( \ - 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ - inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ - -// Complex shade 4 pixels -#define SSE_SHADE(fg, shade_constants) { \ - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ - \ - __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \ - intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ - \ - fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ - fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ - fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ - \ - __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \ - intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ - \ - fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ - fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ - fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ - \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ -} - #endif diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index a73ba643c9..98d8c22429 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -44,50 +44,11 @@ #include "v_video.h" #include "r_draw_rgba.h" #include "r_compiler/llvmdrawers.h" -#ifndef NO_SSE -#include -#endif extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; extern unsigned int *horizspan[4]; -#ifndef NO_SSE - -#ifdef _MSC_VER -#pragma warning(disable: 4101) // warning C4101: unreferenced local variable -#endif - -// Generate SSE drawers: -#define VecCommand(name) name##_SSE_Command -#define VEC_SHADE_VARS SSE_SHADE_VARS -#define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT -#define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 -#define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE -#define VEC_SHADE_INIT SSE_SHADE_INIT -#define VEC_SHADE_INIT4 SSE_SHADE_INIT4 -#define VEC_SHADE SSE_SHADE -#include "r_drawt_rgba_sse.h" -/* -// Generate AVX drawers: -#undef VecCommand -#undef VEC_SHADE_SIMPLE_INIT -#undef VEC_SHADE_SIMPLE_INIT4 -#undef VEC_SHADE_SIMPLE -#undef VEC_SHADE_INIT -#undef VEC_SHADE_INIT4 -#undef VEC_SHADE -#define VecCommand(name) name##_AVX_Command -#define VEC_SHADE_SIMPLE_INIT AVX_LINEAR_SHADE_SIMPLE_INIT -#define VEC_SHADE_SIMPLE_INIT4 AVX_LINEAR_SHADE_SIMPLE_INIT4 -#define VEC_SHADE_SIMPLE AVX_LINEAR_SHADE_SIMPLE -#define VEC_SHADE_INIT AVX_LINEAR_SHADE_INIT -#define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 -#define VEC_SHADE AVX_LINEAR_SHADE -#include "r_drawt_rgba_sse.h" -*/ -#endif - ///////////////////////////////////////////////////////////////////////////// class DrawColumnRt1LLVMCommand : public DrawerCommand @@ -173,413 +134,6 @@ DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawCol ///////////////////////////////////////////////////////////////////////////// -class DrawerRt1colCommand : public DrawerCommand -{ -public: - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - - uint32_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _colormap; - - uint32_t _srcalpha; - uint32_t _destalpha; - - DrawerRt1colCommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _colormap = dc_colormap; - - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - class LoopIterator - { - public: - uint32_t *source; - uint32_t *dest; - int count; - int pitch, sincr; - - LoopIterator(DrawerRt1colCommand *command, DrawerThread *thread) - { - count = thread->count_for_thread(command->yl, (command->yh - command->yl + 1)); - if (count <= 0) - return; - - dest = thread->dest_for_thread(command->yl, command->_pitch, ylookup[command->yl] + command->sx + (uint32_t*)command->_destorg); - source = &thread->dc_temp_rgba[command->yl * 4 + command->hx] + thread->skipped_by_thread(command->yl) * 4; - pitch = command->_pitch * thread->num_cores; - sincr = thread->num_cores * 4; - } - - explicit operator bool() - { - return count > 0; - } - - bool next() - { - dest += pitch; - source += sincr; - return (--count) != 0; - } - }; -}; - -class DrawerRt4colsCommand : public DrawerCommand -{ -public: - int sx; - int yl; - int yh; - uint32_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _destorg; - int _pitch; - BYTE * RESTRICT _colormap; - uint32_t _srcalpha; - uint32_t _destalpha; - - DrawerRt4colsCommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _destorg = dc_destorg; - _pitch = dc_pitch; - _colormap = dc_colormap; - - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - class LoopIterator - { - public: - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - LoopIterator(DrawerRt4colsCommand *command, DrawerThread *thread) - { - count = thread->count_for_thread(command->yl, command->yh - command->yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(command->yl, command->_pitch, ylookup[command->yl] + command->sx + (uint32_t*)command->_destorg); - source = &thread->dc_temp_rgba[command->yl * 4] + thread->skipped_by_thread(command->yl) * 4; - pitch = command->_pitch * thread->num_cores; - sincr = thread->num_cores * 4; - } - - explicit operator bool() - { - return count > 0; - } - - bool next() - { - dest += pitch; - source += sincr; - return (--count) != 0; - } - }; -}; - -class RtCopy1colRGBACommand : public DrawerRt1colCommand -{ -public: - RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) - { - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = GPalette.BaseColors[*loop.source]; - *loop.dest = BlendBgra::copy(fg); - } while (loop.next()); - } -}; - -class RtMap1colRGBACommand : public DrawerRt1colCommand -{ -public: - RtMap1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) - { - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_colormap[*loop.source], _light, _shade_constants); - *loop.dest = BlendBgra::copy(fg); - } while (loop.next()); - } -}; - -class RtMap4colsRGBACommand : public DrawerRt4colsCommand -{ -public: - RtMap4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) - { - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(_colormap[loop.source[i]], _light, _shade_constants); - loop.dest[i] = BlendBgra::copy(fg); - } - } while (loop.next()); - } -}; - -class RtAdd1colRGBACommand : public DrawerRt1colCommand -{ -public: - RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) - { - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_colormap[*loop.source], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class RtAdd4colsRGBACommand : public DrawerRt4colsCommand -{ -public: - RtAdd4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) - { - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(_colormap[loop.source[i]], _light, _shade_constants); - loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, _destalpha); - } - } while (loop.next()); - } -}; - -class RtShaded1colRGBACommand : public DrawerRt1colCommand -{ - uint32_t _color; - -public: - RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) - { - _color = LightBgra::shade_pal_index(dc_color, _light, _shade_constants); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t alpha = _colormap[*loop.source] * 4; - uint32_t inv_alpha = 256 - alpha; - *loop.dest = BlendBgra::add(_color, *loop.dest, alpha, inv_alpha); - } while (loop.next()); - } -}; - -class RtShaded4colsRGBACommand : public DrawerRt4colsCommand -{ - uint32_t _color; - -public: - RtShaded4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) - { - _color = LightBgra::shade_pal_index(dc_color, _light, _shade_constants); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t alpha = _colormap[loop.source[i]] * 4; - uint32_t inv_alpha = 256 - alpha; - loop.dest[i] = BlendBgra::add(_color, loop.dest[i], alpha, inv_alpha); - } - } while (loop.next()); - } -}; - -class RtAddClamp1colRGBACommand : public DrawerRt1colCommand -{ -public: - RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) - { - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class RtAddClamp4colsRGBACommand : public DrawerRt4colsCommand -{ -public: - RtAddClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) - { - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants); - loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, _destalpha); - } - } while (loop.next()); - } -}; - -class RtSubClamp1colRGBACommand : public DrawerRt1colCommand -{ -public: - RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) - { - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants); - *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class RtSubClamp4colsRGBACommand : public DrawerRt4colsCommand -{ -public: - RtSubClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) - { - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants); - loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], _srcalpha, _destalpha); - } - } while (loop.next()); - } -}; - -class RtRevSubClamp1colRGBACommand : public DrawerRt1colCommand -{ -public: - RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) - { - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants); - *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class RtRevSubClamp4colsRGBACommand : public DrawerRt4colsCommand -{ -public: - RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) - { - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants); - loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], _srcalpha, _destalpha); - } - } while (loop.next()); - } -}; - class RtTranslate1colRGBACommand : public DrawerCommand { const BYTE * RESTRICT translation; diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h deleted file mode 100644 index 7a02f2282b..0000000000 --- a/src/r_drawt_rgba_sse.h +++ /dev/null @@ -1,757 +0,0 @@ -// -// SSE/AVX intrinsics based drawers for the r_drawt family of drawers. -// -// Note: This header file is intentionally not guarded by a __R_DRAWT_RGBA_SSE__ define. -// It is because the code is nearly identical for SSE vs AVX. The file is included -// multiple times by r_drawt_rgba.cpp with different defines that changes the class -// names outputted and the type of intrinsics used. - -#ifdef _MSC_VER -#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX -#endif - -class VecCommand(RtMap4colsRGBA) : public DrawerCommand -{ - int sx; - int yl; - int yh; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _destorg; - int _pitch; - BYTE * RESTRICT _colormap; - -public: - VecCommand(RtMap4colsRGBA)(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _light = dc_light; - _shade_constants = dc_shade_constants; - _destorg = dc_destorg; - _pitch = dc_pitch; - _colormap = dc_colormap; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - ShadeConstants shade_constants = _shade_constants; - uint32_t light = LightBgra::calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; - - BYTE *colormap = _colormap; - - if (shade_constants.simple_shade) - { - VEC_SHADE_VARS(); - VEC_SHADE_SIMPLE_INIT(light); - - if (count & 1) { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - source += sincr; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = colormap[source[sincr]]; - uint32_t p1 = colormap[source[sincr + 1]]; - uint32_t p2 = colormap[source[sincr + 2]]; - uint32_t p3 = colormap[source[sincr + 3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += sincr * 2; - dest += pitch * 2; - } while (--count); - } - else - { - VEC_SHADE_VARS(); - VEC_SHADE_INIT(light, shade_constants); - - if (count & 1) { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - source += sincr; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = colormap[source[sincr]]; - uint32_t p1 = colormap[source[sincr + 1]]; - uint32_t p2 = colormap[source[sincr + 2]]; - uint32_t p3 = colormap[source[sincr + 3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += sincr * 2; - dest += pitch * 2; - } while (--count); - } - } -}; - -class VecCommand(RtAdd4colsRGBA) : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _colormap; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - VecCommand(RtAdd4colsRGBA)(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _colormap = dc_colormap; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = _shade_constants; - - if (shade_constants.simple_shade) - { - VEC_SHADE_VARS(); - VEC_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_VARS(); - VEC_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - } -}; - -class VecCommand(RtShaded4colsRGBA) : public DrawerCommand -{ - int sx; - int yl; - int yh; - lighttable_t * RESTRICT _colormap; - int _color; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - -public: - VecCommand(RtShaded4colsRGBA)(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _colormap = dc_colormap; - _color = dc_color; - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - } - - void Execute(DrawerThread *thread) override - { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - colormap = _colormap; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light))), _mm_setzero_si128()); - __m128i alpha_one = _mm_set1_epi16(64); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); - __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); - __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); - __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * alpha + bg_red * inv_alpha) / 64: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class VecCommand(RtAddClamp4colsRGBA) : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - VecCommand(RtAddClamp4colsRGBA)(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = _shade_constants; - - if (shade_constants.simple_shade) - { - VEC_SHADE_VARS(); - VEC_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_VARS(); - VEC_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - } -}; - -class VecCommand(RtSubClamp4colsRGBA) : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - VecCommand(RtSubClamp4colsRGBA)(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - if (shade_constants.simple_shade) - { - VEC_SHADE_VARS(); - VEC_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (bg_red * bg_alpha - fg_red * fg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, mbg_alpha), _mm_mullo_epi16(fg_hi, mfg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, mbg_alpha), _mm_mullo_epi16(fg_lo, mfg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_VARS(); - VEC_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (bg_red * bg_alpha - fg_red * fg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, mbg_alpha), _mm_mullo_epi16(fg_hi, mfg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, mbg_alpha), _mm_mullo_epi16(fg_lo, mfg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - } -}; - -class VecCommand(RtRevSubClamp4colsRGBA) : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - VecCommand(RtRevSubClamp4colsRGBA)(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - if (shade_constants.simple_shade) - { - VEC_SHADE_VARS(); - VEC_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha - bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_VARS(); - VEC_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - VEC_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha - bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - } -}; From 6512068005ebbd618a7791fe0500ab5428a7083c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 7 Oct 2016 07:03:13 +0200 Subject: [PATCH 167/912] Remove unused queue_wallcommand --- src/r_draw_rgba.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 424d3140bf..953a078580 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1314,19 +1314,6 @@ DWORD vlinec1_rgba() return dc_texturefrac + dc_count * dc_iscale; } -template -void queue_wallcommand() -{ - if (bufplce2[0] == nullptr && dc_shade_constants.simple_shade) - DrawerCommandQueue::QueueCommand(); - else if (bufplce2[0] == nullptr) - DrawerCommandQueue::QueueCommand(); - else if (dc_shade_constants.simple_shade) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); -} - void vlinec4_rgba() { DrawerCommandQueue::QueueCommand(); From a4ccbddfd699104a7fdf32a1860253971f52265b Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Fri, 7 Oct 2016 02:38:08 -0400 Subject: [PATCH 168/912] - Menudef: Split Truecolor options into their own menu. --- wadsrc/static/language.enu | 18 ++++++++++++++---- wadsrc/static/menudef.txt | 16 ++++++++++++---- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index c76414c8a2..b7d8877273 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1780,10 +1780,6 @@ DSPLYMNU_BRIGHTNESS = "Brightness"; DSPLYMNU_VSYNC = "Vertical Sync"; DSPLYMNU_CAPFPS = "Rendering Interpolation"; DSPLYMNU_COLUMNMETHOD = "Column render mode"; -DSPLYMNU_TRUECOLOR = "True color output"; -DSPLYMNU_MINFILTER = "Linear filter when downscaling"; -DSPLYMNU_MAGFILTER = "Linear filter when upscaling"; -DSPLYMNU_MIPMAP = "Use mipmapped textures"; DSPLYMNU_WIPETYPE = "Screen wipe style"; DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen"; DSPLYMNU_BLOODFADE = "Blood Flash Intensity"; @@ -2726,3 +2722,17 @@ OPTVAL_LOW = "Low"; OPTVAL_MEDIUM = "Medium"; OPTVAL_HIGH = "High"; OPTVAL_EXTREME = "Extreme"; + +// QZDoom exclusive: + +DSPLYMNU_TCOPT = "TrueColor Options"; + +TCMNU_TITLE = "TRUECOLOR OPTIONS"; + + +TCMNU_MULTITHREADED = "Multithreaded Drawing"; +TCMNU_TRUECOLOR = "True color output"; +TCMNU_MINFILTER = "Linear filter when downscaling"; +TCMNU_MAGFILTER = "Linear filter when upscaling"; +TCMNU_MIPMAP = "Use mipmapped textures"; + diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 715eff63b7..ac4e02be9f 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -658,11 +658,23 @@ OptionMenu "OpenGLOptions" Submenu "$GLMNU_PREFS", "GLPrefOptions" } +OptionMenu "TrueColorOptions" +{ + Title "$TCMNU_TITLE" + Option "$TCMNU_MULTITHREADED", "r_multithreaded", "OnOff" + StaticText " " + Option "$TCMNU_TRUECOLOR", "swtruecolor", "OnOff" + Option "$TCMNU_MINFILTER", "r_minfilter", "OnOff" + Option "$TCMNU_MAGFILTER", "r_magfilter", "OnOff" + Option "$TCMNU_MIPMAP", "r_mipmap", "OnOff" +} + OptionMenu "VideoOptions" { Title "$DSPLYMNU_TITLE" Submenu "$DSPLYMNU_GLOPT", "OpenGLOptions" + Submenu "$DSPLYMNU_TCOPT", "TrueColorOptions" Submenu "$DSPLYMNU_SCOREBOARD", "ScoreboardOptions" StaticText " " Slider "$DSPLYMNU_SCREENSIZE", "screenblocks", 3.0, 12.0, 1.0, 0 @@ -677,10 +689,6 @@ OptionMenu "VideoOptions" Slider "$DSPLYMNU_BLOODFADE", "blood_fade_scalar", 0.0, 1.0, 0.05, 1 Slider "$DSPLYMNU_PICKUPFADE", "pickup_fade_scalar", 0.0, 1.0, 0.05, 1 Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" - Option "$DSPLYMNU_TRUECOLOR", "swtruecolor", "OnOff" - Option "$DSPLYMNU_MINFILTER", "r_minfilter", "OnOff" - Option "$DSPLYMNU_MAGFILTER", "r_magfilter", "OnOff" - Option "$DSPLYMNU_MIPMAP", "r_mipmap", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From aae4571c95c52feab20172ecf5a39b21327cac01 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 7 Oct 2016 12:45:21 +0200 Subject: [PATCH 169/912] Changed column drawers to use true color textures --- .../fixedfunction/drawcolumncodegen.cpp | 108 ++++++++++- .../fixedfunction/drawcolumncodegen.h | 6 +- src/r_compiler/llvmdrawers.cpp | 20 ++ src/r_compiler/llvmdrawers.h | 10 + src/r_draw.cpp | 10 + src/r_drawt.cpp | 3 +- src/r_drawt_rgba.cpp | 182 +++--------------- src/r_main.h | 2 +- src/r_segs.cpp | 6 +- src/r_things.cpp | 26 ++- src/v_draw.cpp | 18 +- 11 files changed, 215 insertions(+), 176 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index 6013582747..e920fb73a9 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -89,6 +89,8 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, { frac = stack_frac.load(); sample_index = frac >> FRACBITS; + if (!IsPaletteInput(variant)) + sample_index = sample_index * 4; } else { @@ -136,6 +138,33 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, } } +bool DrawColumnCodegen::IsPaletteInput(DrawColumnVariant variant) +{ + switch (variant) + { + default: + case DrawColumnVariant::DrawCopy: + case DrawColumnVariant::Draw: + case DrawColumnVariant::DrawAdd: + case DrawColumnVariant::DrawAddClamp: + case DrawColumnVariant::DrawSubClamp: + case DrawColumnVariant::DrawRevSubClamp: + case DrawColumnVariant::Fill: + case DrawColumnVariant::FillAdd: + case DrawColumnVariant::FillAddClamp: + case DrawColumnVariant::FillSubClamp: + case DrawColumnVariant::FillRevSubClamp: + return false; + case DrawColumnVariant::DrawShaded: + case DrawColumnVariant::DrawTranslated: + case DrawColumnVariant::DrawTlatedAdd: + case DrawColumnVariant::DrawAddClampTranslated: + case DrawColumnVariant::DrawSubClampTranslated: + case DrawColumnVariant::DrawRevSubClampTranslated: + return true; + } +} + SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade) { SSAInt alpha, inv_alpha; @@ -143,29 +172,29 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, { default: case DrawColumnVariant::DrawCopy: - return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub()); + return blend_copy(Sample(sample_index)); case DrawColumnVariant::Draw: - return blend_copy(Shade(ColormapSample(sample_index), isSimpleShade)); + return blend_copy(Shade(Sample(sample_index), isSimpleShade)); case DrawColumnVariant::DrawAdd: case DrawColumnVariant::DrawAddClamp: - return blend_add(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_add(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawShaded: alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; inv_alpha = 256 - alpha; return blend_add(color, bgcolor, alpha, inv_alpha); case DrawColumnVariant::DrawSubClamp: - return blend_sub(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_sub(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawRevSubClamp: - return blend_revsub(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_revsub(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawTranslated: - return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade)); + return blend_copy(ShadePal(TranslateSample(sample_index), isSimpleShade)); case DrawColumnVariant::DrawTlatedAdd: case DrawColumnVariant::DrawAddClampTranslated: - return blend_add(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_add(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawSubClampTranslated: - return blend_sub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_sub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawRevSubClampTranslated: - return blend_revsub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_revsub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::Fill: return blend_copy(color); case DrawColumnVariant::FillAdd: @@ -182,6 +211,57 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, } } +SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade) +{ + SSAInt alpha, inv_alpha; + switch (variant) + { + default: + case DrawColumnVariant::DrawCopy: + return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub()); + case DrawColumnVariant::Draw: + return blend_copy(ShadePal(ColormapSample(sample_index), isSimpleShade)); + case DrawColumnVariant::DrawAdd: + case DrawColumnVariant::DrawAddClamp: + return blend_add(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawShaded: + alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; + inv_alpha = 256 - alpha; + return blend_add(color, bgcolor, alpha, inv_alpha); + case DrawColumnVariant::DrawSubClamp: + return blend_sub(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawRevSubClamp: + return blend_revsub(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawTranslated: + return blend_copy(ShadePal(TranslateSample(sample_index), isSimpleShade)); + case DrawColumnVariant::DrawTlatedAdd: + case DrawColumnVariant::DrawAddClampTranslated: + return blend_add(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawSubClampTranslated: + return blend_sub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawRevSubClampTranslated: + return blend_revsub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::Fill: + return blend_copy(color); + case DrawColumnVariant::FillAdd: + alpha = srccolor[3]; + alpha = alpha + (alpha >> 7); + inv_alpha = 256 - alpha; + return blend_add(srccolor, bgcolor, alpha, inv_alpha); + case DrawColumnVariant::FillAddClamp: + return blend_add(srccolor, bgcolor, srcalpha, destalpha); + case DrawColumnVariant::FillSubClamp: + return blend_sub(srccolor, bgcolor, srcalpha, destalpha); + case DrawColumnVariant::FillRevSubClamp: + return blend_revsub(srccolor, bgcolor, srcalpha, destalpha); + } +} + +SSAVec4i DrawColumnCodegen::Sample(SSAInt sample_index) +{ + return source[sample_index].load_vec4ub(); +} + SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index) { return colormap[source[sample_index].load().zext_int()].load().zext_int(); @@ -192,7 +272,15 @@ SSAInt DrawColumnCodegen::TranslateSample(SSAInt sample_index) return translation[source[sample_index].load().zext_int()].load().zext_int(); } -SSAVec4i DrawColumnCodegen::Shade(SSAInt palIndex, bool isSimpleShade) +SSAVec4i DrawColumnCodegen::Shade(SSAVec4i fg, bool isSimpleShade) +{ + if (isSimpleShade) + return shade_bgra_simple(fg, light); + else + return shade_bgra_advanced(fg, light, shade_constants); +} + +SSAVec4i DrawColumnCodegen::ShadePal(SSAInt palIndex, bool isSimpleShade) { if (isSimpleShade) return shade_pal_index_simple(palIndex, light, basecolors); diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h index 675a5ea670..ffba50a15a 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.h +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -39,9 +39,13 @@ public: private: void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade); SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); + SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); + SSAVec4i Sample(SSAInt frac); SSAInt ColormapSample(SSAInt frac); SSAInt TranslateSample(SSAInt frac); - SSAVec4i Shade(SSAInt palIndex, bool isSimpleShade); + SSAVec4i Shade(SSAVec4i fgcolor, bool isSimpleShade); + SSAVec4i ShadePal(SSAInt palIndex, bool isSimpleShade); + bool IsPaletteInput(DrawColumnVariant variant); SSAStack stack_index, stack_frac; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 3108b8c6a5..ac0633058d 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -107,6 +107,11 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1); CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1); CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt1); CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4); CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4); CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4); @@ -114,6 +119,11 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4); CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4); CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt4); CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); @@ -159,6 +169,11 @@ LLVMDrawersImpl::LLVMDrawersImpl() DrawColumnRt1AddClamp = mProgram.GetProcAddress("DrawColumnRt1AddClamp"); DrawColumnRt1SubClamp = mProgram.GetProcAddress("DrawColumnRt1SubClamp"); DrawColumnRt1RevSubClamp = mProgram.GetProcAddress("DrawColumnRt1RevSubClamp"); + DrawColumnRt1Translated = mProgram.GetProcAddress("DrawColumnRt1Translated"); + DrawColumnRt1TlatedAdd = mProgram.GetProcAddress("DrawColumnRt1TlatedAdd"); + DrawColumnRt1AddClampTranslated = mProgram.GetProcAddress("DrawColumnRt1AddClampTranslated"); + DrawColumnRt1SubClampTranslated = mProgram.GetProcAddress("DrawColumnRt1SubClampTranslated"); + DrawColumnRt1RevSubClampTranslated = mProgram.GetProcAddress("DrawColumnRt1RevSubClampTranslated"); DrawColumnRt4 = mProgram.GetProcAddress("DrawColumnRt4"); DrawColumnRt4Copy = mProgram.GetProcAddress("DrawColumnRt4Copy"); DrawColumnRt4Add = mProgram.GetProcAddress("DrawColumnRt4Add"); @@ -166,6 +181,11 @@ LLVMDrawersImpl::LLVMDrawersImpl() DrawColumnRt4AddClamp = mProgram.GetProcAddress("DrawColumnRt4AddClamp"); DrawColumnRt4SubClamp = mProgram.GetProcAddress("DrawColumnRt4SubClamp"); DrawColumnRt4RevSubClamp = mProgram.GetProcAddress("DrawColumnRt4RevSubClamp"); + DrawColumnRt4Translated = mProgram.GetProcAddress("DrawColumnRt4Translated"); + DrawColumnRt4TlatedAdd = mProgram.GetProcAddress("DrawColumnRt4TlatedAdd"); + DrawColumnRt4AddClampTranslated = mProgram.GetProcAddress("DrawColumnRt4AddClampTranslated"); + DrawColumnRt4SubClampTranslated = mProgram.GetProcAddress("DrawColumnRt4SubClampTranslated"); + DrawColumnRt4RevSubClampTranslated = mProgram.GetProcAddress("DrawColumnRt4RevSubClampTranslated"); DrawSpan = mProgram.GetProcAddress("DrawSpan"); DrawSpanMasked = mProgram.GetProcAddress("DrawSpanMasked"); DrawSpanTranslucent = mProgram.GetProcAddress("DrawSpanTranslucent"); diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 549825e4f9..60a6c799ac 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -144,6 +144,11 @@ public: void(*DrawColumnRt1AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt1SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt1RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1Translated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1TlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1AddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1SubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1RevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt4)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt4Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt4Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; @@ -151,6 +156,11 @@ public: void(*DrawColumnRt4AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt4SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt4RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4Translated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4TlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4AddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4SubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4RevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawSpan)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index f255352f50..11c7020b4b 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -191,6 +191,8 @@ FDynamicColormap ShadeFakeColormap[16]; BYTE identitymap[256]; FDynamicColormap identitycolormap; +bool drawer_needs_pal_input; + EXTERN_CVAR (Int, r_columnmethod) void R_InitShadeMaps() @@ -2516,6 +2518,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) colfunc = transcolfunc; hcolfunc_post1 = rt_tlate1col; hcolfunc_post4 = rt_tlate4cols; + drawer_needs_pal_input = true; } return true; } @@ -2566,6 +2569,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) colfunc = R_DrawTlatedAddColumn; hcolfunc_post1 = rt_tlateadd1col; hcolfunc_post4 = rt_tlateadd4cols; + drawer_needs_pal_input = true; } } else @@ -2587,6 +2591,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) colfunc = R_DrawAddClampTranslatedColumn; hcolfunc_post1 = rt_tlateaddclamp1col; hcolfunc_post4 = rt_tlateaddclamp4cols; + drawer_needs_pal_input = true; } } return true; @@ -2609,6 +2614,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) colfunc = R_DrawSubClampTranslatedColumn; hcolfunc_post1 = rt_tlatesubclamp1col; hcolfunc_post4 = rt_tlatesubclamp4cols; + drawer_needs_pal_input = true; } return true; @@ -2634,6 +2640,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) colfunc = R_DrawRevSubClampTranslatedColumn; hcolfunc_post1 = rt_tlaterevsubclamp1col; hcolfunc_post4 = rt_tlaterevsubclamp4cols; + drawer_needs_pal_input = true; } return true; @@ -2658,6 +2665,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, { fixed_t fglevel, bglevel; + drawer_needs_pal_input = false; + style.CheckFuzz(); if (style.BlendOp == STYLEOP_Shadow) @@ -2706,6 +2715,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, colfunc = R_DrawShadedColumn; hcolfunc_post1 = rt_shaded1col; hcolfunc_post4 = rt_shaded4cols; + drawer_needs_pal_input = true; dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; basecolormap = &ShadeFakeColormap[16-alpha]; if (fixedlightlev >= 0 && fixedcolormap == NULL) diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 8370930441..43354bfd52 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -1121,6 +1121,7 @@ void R_FillColumnHorizP_C (void) void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) { int pixelsize = r_swtruecolor ? 4 : 1; + int inputpixelsize = (r_swtruecolor && !drawer_needs_pal_input) ? 4 : 1; const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); while (span->Length != 0) { @@ -1189,7 +1190,7 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) endfrac -= dc_iscale; } } - dc_source = column + top; + dc_source = column + top * inputpixelsize; dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; hcolfunc_pre (); diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 98d8c22429..18ae228e4e 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -124,6 +124,11 @@ DECLARE_DRAW_COMMAND(DrawColumnRt1Shaded, DrawColumnRt1Shaded, DrawColumnRt1LLVM DECLARE_DRAW_COMMAND(DrawColumnRt1AddClamp, DrawColumnRt1AddClamp, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt1SubClamp, DrawColumnRt1SubClamp, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClamp, DrawColumnRt1RevSubClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1Translated, DrawColumnRt1Translated, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1TlatedAdd, DrawColumnRt1TlatedAdd, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1AddClampTranslated, DrawColumnRt1AddClampTranslated, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1SubClampTranslated, DrawColumnRt1SubClampTranslated, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClampTranslated, DrawColumnRt1RevSubClampTranslated, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt4, DrawColumnRt4, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt4Copy, DrawColumnRt4Copy, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt4Add, DrawColumnRt4Add, DrawColumnRt1LLVMCommand); @@ -131,129 +136,14 @@ DECLARE_DRAW_COMMAND(DrawColumnRt4Shaded, DrawColumnRt4Shaded, DrawColumnRt1LLVM DECLARE_DRAW_COMMAND(DrawColumnRt4AddClamp, DrawColumnRt4AddClamp, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt4SubClamp, DrawColumnRt4SubClamp, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4Translated, DrawColumnRt4Translated, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4TlatedAdd, DrawColumnRt4TlatedAdd, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4AddClampTranslated, DrawColumnRt4AddClampTranslated, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4SubClampTranslated, DrawColumnRt4SubClampTranslated, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClampTranslated, DrawColumnRt4RevSubClampTranslated, DrawColumnRt1LLVMCommand); ///////////////////////////////////////////////////////////////////////////// -class RtTranslate1colRGBACommand : public DrawerCommand -{ - const BYTE * RESTRICT translation; - int hx; - int yl; - int yh; - -public: - RtTranslate1colRGBACommand(const BYTE *translation, int hx, int yl, int yh) - { - this->translation = translation; - this->hx = hx; - this->yl = yl; - this->yh = yh; - } - - void Execute(DrawerThread *thread) override - { - int count = yh - yl + 1; - uint32_t *source = &thread->dc_temp_rgba[yl*4 + hx]; - - // Things we do to hit the compiler's optimizer with a clue bat: - // 1. Parallelism is explicitly spelled out by using a separate - // C instruction for each assembly instruction. GCC lets me - // have four temporaries, but VC++ spills to the stack with - // more than two. Two is probably optimal, anyway. - // 2. The results of the translation lookups are explicitly - // stored in byte-sized variables. This causes the VC++ code - // to use byte mov instructions in most cases; for apparently - // random reasons, it will use movzx for some places. GCC - // ignores this and uses movzx always. - - // Do 8 rows at a time. - for (int count8 = count >> 3; count8; --count8) - { - int c0, c1; - BYTE b0, b1; - - c0 = source[0]; c1 = source[4]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[4] = b1; - - c0 = source[8]; c1 = source[12]; - b0 = translation[c0]; b1 = translation[c1]; - source[8] = b0; source[12] = b1; - - c0 = source[16]; c1 = source[20]; - b0 = translation[c0]; b1 = translation[c1]; - source[16] = b0; source[20] = b1; - - c0 = source[24]; c1 = source[28]; - b0 = translation[c0]; b1 = translation[c1]; - source[24] = b0; source[28] = b1; - - source += 32; - } - // Finish by doing 1 row at a time. - for (count &= 7; count; --count, source += 4) - { - source[0] = translation[source[0]]; - } - } -}; - -class RtTranslate4colsRGBACommand : public DrawerCommand -{ - const BYTE * RESTRICT translation; - int yl; - int yh; - -public: - RtTranslate4colsRGBACommand(const BYTE *translation, int yl, int yh) - { - this->translation = translation; - this->yl = yl; - this->yh = yh; - } - - void Execute(DrawerThread *thread) override - { - int count = yh - yl + 1; - uint32_t *source = &thread->dc_temp_rgba[yl*4]; - int c0, c1; - BYTE b0, b1; - - // Do 2 rows at a time. - for (int count8 = count >> 1; count8; --count8) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - - c0 = source[4]; c1 = source[5]; - b0 = translation[c0]; b1 = translation[c1]; - source[4] = b0; source[5] = b1; - - c0 = source[6]; c1 = source[7]; - b0 = translation[c0]; b1 = translation[c1]; - source[6] = b0; source[7] = b1; - - source += 8; - } - // Do the final row if count was odd. - if (count & 1) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - } - } -}; - class RtInitColsRGBACommand : public DrawerCommand { BYTE * RESTRICT buff; @@ -270,12 +160,13 @@ public: } }; +template class DrawColumnHorizRGBACommand : public DrawerCommand { int _count; fixed_t _iscale; fixed_t _texturefrac; - const BYTE * RESTRICT _source; + const InputPixelType * RESTRICT _source; int _x; int _yl; int _yh; @@ -286,7 +177,7 @@ public: _count = dc_count; _iscale = dc_iscale; _texturefrac = dc_texturefrac; - _source = dc_source; + _source = (const InputPixelType *)dc_source; _x = dc_x; _yl = dc_yl; _yh = dc_yh; @@ -309,7 +200,7 @@ public: fracstep = _iscale; frac = _texturefrac; - const BYTE *source = _source; + const InputPixelType *source = _source; if (count & 1) { *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; @@ -419,28 +310,16 @@ void rt_map4cols_rgba (int sx, int yl, int yh) DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } -void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); -} - -void rt_Translate4cols_rgba(const BYTE *translation, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(translation, yl, yh); -} - // Translates one span at hx to the screen at sx. void rt_tlate1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_rgba(dc_translation, hx, yl, yh); - rt_map1col(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. void rt_tlate4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_rgba(dc_translation, yl, yh); - rt_map4cols(sx, yl, yh); + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. @@ -458,15 +337,13 @@ void rt_add4cols_rgba (int sx, int yl, int yh) // Translates and adds one span at hx to the screen at sx without clamping. void rt_tlateadd1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_rgba(dc_translation, hx, yl, yh); - rt_add1col(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. void rt_tlateadd4cols_rgba(int sx, int yl, int yh) { - rt_Translate4cols_rgba(dc_translation, yl, yh); - rt_add4cols(sx, yl, yh); + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Shades one span at hx to the screen at sx. @@ -496,15 +373,13 @@ void rt_addclamp4cols_rgba (int sx, int yl, int yh) // Translates and adds one span at hx to the screen at sx with clamping. void rt_tlateaddclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_rgba(dc_translation, hx, yl, yh); - rt_addclamp1col_rgba(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_rgba(dc_translation, yl, yh); - rt_addclamp4cols(sx, yl, yh); + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. @@ -522,15 +397,13 @@ void rt_subclamp4cols_rgba (int sx, int yl, int yh) // Translates and subtracts one span at hx to the screen at sx with clamping. void rt_tlatesubclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_rgba(dc_translation, hx, yl, yh); - rt_subclamp1col_rgba(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_rgba(dc_translation, yl, yh); - rt_subclamp4cols_rgba(sx, yl, yh); + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. @@ -548,15 +421,13 @@ void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) // Translates and subtracts one span at hx from the screen at sx with clamping. void rt_tlaterevsubclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_rgba(dc_translation, hx, yl, yh); - rt_revsubclamp1col_rgba(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. void rt_tlaterevsubclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_rgba(dc_translation, yl, yh); - rt_revsubclamp4cols_rgba(sx, yl, yh); + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Before each pass through a rendering loop that uses these routines, @@ -590,7 +461,10 @@ void R_DrawColumnHoriz_rgba (void) (*span)[1] = dc_yh; *span += 2; - DrawerCommandQueue::QueueCommand(); + if (drawer_needs_pal_input) + DrawerCommandQueue::QueueCommand>(); + else + DrawerCommandQueue::QueueCommand>(); } // [RH] Just fills a column with a given color diff --git a/src/r_main.h b/src/r_main.h index 8d18675264..6a802e7993 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -126,7 +126,7 @@ extern void (*hcolfunc_pre) (void); extern void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); extern void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); extern void (*hcolfunc_post4) (int sx, int yl, int yh); - +extern bool drawer_needs_pal_input; void R_InitTextureMapping (); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 078f1d9215..92e6a447a4 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -197,7 +197,11 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText // draw the texture const FTexture::Span *spans; - const BYTE *pixels = tex->GetColumn (maskedtexturecol[dc_x] >> FRACBITS, &spans); + const BYTE *pixels; + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)tex->GetColumnBgra(maskedtexturecol[dc_x] >> FRACBITS, &spans); + else + pixels = tex->GetColumn(maskedtexturecol[dc_x] >> FRACBITS, &spans); blastfunc (pixels, spans); rw_light += rw_lightstep; spryscale += rw_scalestep; diff --git a/src/r_things.cpp b/src/r_things.cpp index 639ed725ed..8e306e04f3 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -253,6 +253,7 @@ bool sprflipvert; void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) { int pixelsize = r_swtruecolor ? 4 : 1; + int inputpixelsize = (r_swtruecolor && !drawer_needs_pal_input) ? 4 : 1; const fixed_t centeryfrac = FLOAT2FIXED(CenterY); const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); while (span->Length != 0) @@ -322,7 +323,7 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) endfrac -= dc_iscale; } } - dc_source = column + top; + dc_source = column + top * inputpixelsize; dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; colfunc (); @@ -469,7 +470,11 @@ void R_DrawVisSprite (vissprite_t *vis) { while ((dc_x < stop4) && (dc_x & 3)) { - pixels = tex->GetColumn (frac >> FRACBITS, &spans); + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)tex->GetColumnBgra (frac >> FRACBITS, &spans); + else + pixels = tex->GetColumn (frac >> FRACBITS, &spans); + if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) R_DrawMaskedColumn (pixels, spans); dc_x++; @@ -481,7 +486,11 @@ void R_DrawVisSprite (vissprite_t *vis) rt_initcols(nullptr); for (int zz = 4; zz; --zz) { - pixels = tex->GetColumn (frac >> FRACBITS, &spans); + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)tex->GetColumnBgra (frac >> FRACBITS, &spans); + else + pixels = tex->GetColumn (frac >> FRACBITS, &spans); + if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) R_DrawMaskedColumnHoriz (pixels, spans); dc_x++; @@ -492,7 +501,11 @@ void R_DrawVisSprite (vissprite_t *vis) while (dc_x < x2) { - pixels = tex->GetColumn (frac >> FRACBITS, &spans); + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)tex->GetColumnBgra (frac >> FRACBITS, &spans); + else + pixels = tex->GetColumn (frac >> FRACBITS, &spans); + if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) R_DrawMaskedColumn (pixels, spans); dc_x++; @@ -650,7 +663,10 @@ void R_WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Sp const BYTE *column; const FTexture::Span *spans; - column = WallSpriteTile->GetColumn (lwall[dc_x] >> FRACBITS, &spans); + if (r_swtruecolor && !drawer_needs_pal_input) + column = (const BYTE *)WallSpriteTile->GetColumnBgra (lwall[dc_x] >> FRACBITS, &spans); + else + column = WallSpriteTile->GetColumn (lwall[dc_x] >> FRACBITS, &spans); dc_texturefrac = 0; drawfunc (column, spans); rw_light += rw_lightstep; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 37ced09d52..1c23523d6d 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -305,7 +305,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) { while ((dc_x < stop4) && (dc_x & 3)) { - pixels = img->GetColumn(frac >> FRACBITS, spanptr); + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)img->GetColumnBgra(frac >> FRACBITS, spanptr); + else + pixels = img->GetColumn(frac >> FRACBITS, spanptr); + R_DrawMaskedColumn(pixels, spans); dc_x++; frac += xiscale_i; @@ -316,7 +320,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) rt_initcols(nullptr); for (int zz = 4; zz; --zz) { - pixels = img->GetColumn(frac >> FRACBITS, spanptr); + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)img->GetColumnBgra(frac >> FRACBITS, spanptr); + else + pixels = img->GetColumn(frac >> FRACBITS, spanptr); + R_DrawMaskedColumnHoriz(pixels, spans); dc_x++; frac += xiscale_i; @@ -326,7 +334,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) while (dc_x < x2_i) { - pixels = img->GetColumn(frac >> FRACBITS, spanptr); + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)img->GetColumnBgra(frac >> FRACBITS, spanptr); + else + pixels = img->GetColumn(frac >> FRACBITS, spanptr); + R_DrawMaskedColumn(pixels, spans); dc_x++; frac += xiscale_i; From 606b7f5a0067d3edf9c32f462567ede04ea189a7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 8 Oct 2016 04:59:24 +0200 Subject: [PATCH 170/912] Run module level LLVM optimization passes before calling finalizeObject --- src/r_compiler/llvmdrawers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index ac0633058d..ae9dcb6626 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -143,8 +143,8 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); - mProgram.engine()->finalizeObject(); mProgram.modulePassManager()->run(*mProgram.module()); + mProgram.engine()->finalizeObject(); FillColumn = mProgram.GetProcAddress("FillColumn"); FillColumnAdd = mProgram.GetProcAddress("FillColumnAdd"); From b3be0d22ccd6f1cb89e0dd8e3ca42b942139b19e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 8 Oct 2016 06:50:33 +0200 Subject: [PATCH 171/912] Unroll loops by 8 and enable some additional passes --- src/r_compiler/llvmdrawers.cpp | 3 +++ src/r_compiler/ssa/ssa_for_block.cpp | 17 +++++++++++++++-- src/r_compiler/ssa/ssa_for_block.h | 2 +- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index ae9dcb6626..9391e7e201 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -434,6 +434,9 @@ LLVMProgram::LLVMProgram() passManagerBuilder.OptLevel = 3; passManagerBuilder.SizeLevel = 0; passManagerBuilder.Inliner = createFunctionInliningPass(); + passManagerBuilder.SLPVectorize = true; + passManagerBuilder.LoopVectorize = true; + passManagerBuilder.LoadCombine = true; passManagerBuilder.populateModulePassManager(*mModulePassManager.get()); passManagerBuilder.populateFunctionPassManager(*mFunctionPassManager.get()); } diff --git a/src/r_compiler/ssa/ssa_for_block.cpp b/src/r_compiler/ssa/ssa_for_block.cpp index f7cd6ad0bd..0f7e01e198 100644 --- a/src/r_compiler/ssa/ssa_for_block.cpp +++ b/src/r_compiler/ssa/ssa_for_block.cpp @@ -13,9 +13,22 @@ SSAForBlock::SSAForBlock() SSAScope::builder().SetInsertPoint(if_basic_block); } -void SSAForBlock::loop_block(SSABool true_condition) +void SSAForBlock::loop_block(SSABool true_condition, int unroll_count) { - SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block); + auto branch = SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block); + if (unroll_count > 0) + { + using namespace llvm; + auto md_unroll_enable = MDNode::get(SSAScope::context(), { + MDString::get(SSAScope::context(), "llvm.loop.unroll.enable") + }); + auto md_unroll_count = MDNode::get(SSAScope::context(), { + MDString::get(SSAScope::context(), "llvm.loop.unroll.count"), + ConstantAsMetadata::get(ConstantInt::get(SSAScope::context(), APInt(32, unroll_count))) + }); + auto md_loop = MDNode::getDistinct(SSAScope::context(), { md_unroll_enable, md_unroll_count }); + branch->setMetadata(LLVMContext::MD_loop, md_loop); + } SSAScope::builder().SetInsertPoint(loop_basic_block); } diff --git a/src/r_compiler/ssa/ssa_for_block.h b/src/r_compiler/ssa/ssa_for_block.h index 4c1952c14e..9dddef4d61 100644 --- a/src/r_compiler/ssa/ssa_for_block.h +++ b/src/r_compiler/ssa/ssa_for_block.h @@ -7,7 +7,7 @@ class SSAForBlock { public: SSAForBlock(); - void loop_block(SSABool true_condition); + void loop_block(SSABool true_condition, int unroll_count = 8); void end_block(); private: From fc07a253068d26b043cd40492a7c16874505ed15 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 8 Oct 2016 09:29:26 +0200 Subject: [PATCH 172/912] Add aliasing meta data to loads and stores --- .../fixedfunction/drawcolumncodegen.cpp | 74 ++++++------- .../fixedfunction/drawercodegen.cpp | 20 ++-- .../fixedfunction/drawspancodegen.cpp | 60 +++++----- .../fixedfunction/drawwallcodegen.cpp | 104 +++++++++--------- src/r_compiler/ssa/ssa_float_ptr.cpp | 30 +++-- src/r_compiler/ssa/ssa_float_ptr.h | 6 +- src/r_compiler/ssa/ssa_int.cpp | 5 + src/r_compiler/ssa/ssa_int.h | 2 + src/r_compiler/ssa/ssa_int_ptr.cpp | 30 +++-- src/r_compiler/ssa/ssa_int_ptr.h | 6 +- src/r_compiler/ssa/ssa_pixelformat4f.h | 4 +- src/r_compiler/ssa/ssa_pixelformat4ub.h | 4 +- .../ssa/ssa_pixelformat4ub_argb_rev.h | 4 +- src/r_compiler/ssa/ssa_pixelformat4ub_rev.h | 4 +- src/r_compiler/ssa/ssa_scope.cpp | 9 ++ src/r_compiler/ssa/ssa_scope.h | 4 + src/r_compiler/ssa/ssa_ubyte_ptr.cpp | 36 ++++-- src/r_compiler/ssa/ssa_ubyte_ptr.h | 9 +- src/r_compiler/ssa/ssa_value.cpp | 10 +- src/r_compiler/ssa/ssa_value.h | 4 +- src/r_compiler/ssa/ssa_vec4f_ptr.cpp | 20 +++- src/r_compiler/ssa/ssa_vec4f_ptr.h | 4 +- 22 files changed, 260 insertions(+), 189 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index e920fb73a9..721c85dd85 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -13,41 +13,41 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data) { - dest = args[0][0].load(); - source = args[0][1].load(); - colormap = args[0][2].load(); - translation = args[0][3].load(); - basecolors = args[0][4].load(); - pitch = args[0][5].load(); - count = args[0][6].load(); - dest_y = args[0][7].load(); + dest = args[0][0].load(true); + source = args[0][1].load(true); + colormap = args[0][2].load(true); + translation = args[0][3].load(true); + basecolors = args[0][4].load(true); + pitch = args[0][5].load(true); + count = args[0][6].load(true); + dest_y = args[0][7].load(true); if (method == DrawColumnMethod::Normal) - iscale = args[0][8].load(); - texturefrac = args[0][9].load(); - light = args[0][10].load(); - color = SSAVec4i::unpack(args[0][11].load()); - srccolor = SSAVec4i::unpack(args[0][12].load()); - srcalpha = args[0][13].load(); - destalpha = args[0][14].load(); - SSAShort light_alpha = args[0][15].load(); - SSAShort light_red = args[0][16].load(); - SSAShort light_green = args[0][17].load(); - SSAShort light_blue = args[0][18].load(); - SSAShort fade_alpha = args[0][19].load(); - SSAShort fade_red = args[0][20].load(); - SSAShort fade_green = args[0][21].load(); - SSAShort fade_blue = args[0][22].load(); - SSAShort desaturate = args[0][23].load(); - SSAInt flags = args[0][24].load(); + iscale = args[0][8].load(true); + texturefrac = args[0][9].load(true); + light = args[0][10].load(true); + color = SSAVec4i::unpack(args[0][11].load(true)); + srccolor = SSAVec4i::unpack(args[0][12].load(true)); + srcalpha = args[0][13].load(true); + destalpha = args[0][14].load(true); + SSAShort light_alpha = args[0][15].load(true); + SSAShort light_red = args[0][16].load(true); + SSAShort light_green = args[0][17].load(true); + SSAShort light_blue = args[0][18].load(true); + SSAShort fade_alpha = args[0][19].load(true); + SSAShort fade_red = args[0][20].load(true); + SSAShort fade_green = args[0][21].load(true); + SSAShort fade_blue = args[0][22].load(true); + SSAShort desaturate = args[0][23].load(true); + SSAInt flags = args[0][24].load(true); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); - thread.core = thread_data[0][0].load(); - thread.num_cores = thread_data[0][1].load(); - thread.pass_start_y = thread_data[0][2].load(); - thread.pass_end_y = thread_data[0][3].load(); - thread.temp = thread_data[0][4].load(); + thread.core = thread_data[0][0].load(true); + thread.num_cores = thread_data[0][1].load(true); + thread.pass_start_y = thread_data[0][2].load(true); + thread.pass_end_y = thread_data[0][3].load(true); + thread.temp = thread_data[0][4].load(true); is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade); @@ -104,7 +104,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, if (numColumns == 4) { - SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(); + SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false); SSAVec8s bg0 = SSAVec8s::extendlo(bg); SSAVec8s bg1 = SSAVec8s::extendhi(bg); bgcolor[0] = SSAVec4i::extendlo(bg0); @@ -114,7 +114,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, } else { - bgcolor[0] = dest[offset].load_vec4ub(); + bgcolor[0] = dest[offset].load_vec4ub(false); } SSAVec4i outcolor[4]; @@ -131,7 +131,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, dest[offset].store_vec4ub(outcolor[0]); } - stack_index.store(index + 1); + stack_index.store(index.add(SSAInt(1), true, true)); if (method == DrawColumnMethod::Normal) stack_frac.store(frac + iscale); loop.end_block(); @@ -218,7 +218,7 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo { default: case DrawColumnVariant::DrawCopy: - return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub()); + return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub(true)); case DrawColumnVariant::Draw: return blend_copy(ShadePal(ColormapSample(sample_index), isSimpleShade)); case DrawColumnVariant::DrawAdd: @@ -259,17 +259,17 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo SSAVec4i DrawColumnCodegen::Sample(SSAInt sample_index) { - return source[sample_index].load_vec4ub(); + return source[sample_index].load_vec4ub(true); } SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index) { - return colormap[source[sample_index].load().zext_int()].load().zext_int(); + return colormap[source[sample_index].load(true).zext_int()].load(true).zext_int(); } SSAInt DrawColumnCodegen::TranslateSample(SSAInt sample_index) { - return translation[source[sample_index].load().zext_int()].load().zext_int(); + return translation[source[sample_index].load(true).zext_int()].load(true).zext_int(); } SSAVec4i DrawColumnCodegen::Shade(SSAVec4i fg, bool isSimpleShade) diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp index 822a811411..3916c29ab0 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -43,13 +43,13 @@ SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) { - SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + SSAVec4i color = basecolors[index * 4].load_vec4ub(true); // = GPalette.BaseColors[index]; return shade_bgra_simple(color, light); } SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) { - SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + SSAVec4i color = basecolors[index * 4].load_vec4ub(true); // = GPalette.BaseColors[index]; return shade_bgra_advanced(color, light, constants); } @@ -125,10 +125,10 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt SSAInt y0 = frac_y0 >> FRACBITS; SSAInt y1 = frac_y1 >> FRACBITS; - SSAVec4i p00 = col0[y0 * 4].load_vec4ub(); - SSAVec4i p01 = col0[y1 * 4].load_vec4ub(); - SSAVec4i p10 = col1[y0 * 4].load_vec4ub(); - SSAVec4i p11 = col1[y1 * 4].load_vec4ub(); + SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true); + SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true); + SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true); + SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true); SSAInt inv_b = texturefracx; SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15; @@ -147,10 +147,10 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt SSAInt x = xfrac >> xbits; SSAInt y = yfrac >> ybits; - SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); - SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); - SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); - SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); + SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); + SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); + SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; diff --git a/src/r_compiler/fixedfunction/drawspancodegen.cpp b/src/r_compiler/fixedfunction/drawspancodegen.cpp index 70ecb0abd0..4404456ab8 100644 --- a/src/r_compiler/fixedfunction/drawspancodegen.cpp +++ b/src/r_compiler/fixedfunction/drawspancodegen.cpp @@ -13,31 +13,31 @@ void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args) { - destorg = args[0][0].load(); - source = args[0][1].load(); - destpitch = args[0][2].load(); - stack_xfrac.store(args[0][3].load()); - stack_yfrac.store(args[0][4].load()); - xstep = args[0][5].load(); - ystep = args[0][6].load(); - x1 = args[0][7].load(); - x2 = args[0][8].load(); - y = args[0][9].load(); - xbits = args[0][10].load(); - ybits = args[0][11].load(); - light = args[0][12].load(); - srcalpha = args[0][13].load(); - destalpha = args[0][14].load(); - SSAShort light_alpha = args[0][15].load(); - SSAShort light_red = args[0][16].load(); - SSAShort light_green = args[0][17].load(); - SSAShort light_blue = args[0][18].load(); - SSAShort fade_alpha = args[0][19].load(); - SSAShort fade_red = args[0][20].load(); - SSAShort fade_green = args[0][21].load(); - SSAShort fade_blue = args[0][22].load(); - SSAShort desaturate = args[0][23].load(); - SSAInt flags = args[0][24].load(); + destorg = args[0][0].load(true); + source = args[0][1].load(true); + destpitch = args[0][2].load(true); + stack_xfrac.store(args[0][3].load(true)); + stack_yfrac.store(args[0][4].load(true)); + xstep = args[0][5].load(true); + ystep = args[0][6].load(true); + x1 = args[0][7].load(true); + x2 = args[0][8].load(true); + y = args[0][9].load(true); + xbits = args[0][10].load(true); + ybits = args[0][11].load(true); + light = args[0][12].load(true); + srcalpha = args[0][13].load(true); + destalpha = args[0][14].load(true); + SSAShort light_alpha = args[0][15].load(true); + SSAShort light_red = args[0][16].load(true); + SSAShort light_green = args[0][17].load(true); + SSAShort light_blue = args[0][18].load(true); + SSAShort fade_alpha = args[0][19].load(true); + SSAShort fade_red = args[0][20].load(true); + SSAShort fade_green = args[0][21].load(true); + SSAShort fade_blue = args[0][22].load(true); + SSAShort desaturate = args[0][23].load(true); + SSAInt flags = args[0][24].load(true); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); @@ -97,7 +97,7 @@ SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool SSAInt index = stack_index.load(); loop.loop_block(index < sseLength); - SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub(); + SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub(false); SSAVec8s bg0 = SSAVec8s::extendlo(bg); SSAVec8s bg1 = SSAVec8s::extendhi(bg); SSAVec4i bgcolors[4] = @@ -123,7 +123,7 @@ SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); data[index * 16].store_unaligned_vec16ub(color); - stack_index.store(index + 1); + stack_index.store(index.add(SSAInt(1), true, true)); loop.end_block(); } return sseLength; @@ -140,11 +140,11 @@ void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleS SSAInt xfrac = stack_xfrac.load(); SSAInt yfrac = stack_yfrac.load(); - SSAVec4i bgcolor = data[index * 4].load_vec4ub(); + SSAVec4i bgcolor = data[index * 4].load_vec4ub(false); SSAVec4i color = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolor, variant); data[index * 4].store_vec4ub(color); - stack_index.store(index + 1); + stack_index.store(index.add(SSAInt(1), true, true)); stack_xfrac.store(xfrac + xstep); stack_yfrac.store(yfrac + ystep); loop.end_block(); @@ -160,7 +160,7 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); else spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - return source[spot * 4].load_vec4ub(); + return source[spot * 4].load_vec4ub(true); } else { diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 55b17dafee..56d99e78ee 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -13,58 +13,58 @@ void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data) { - dest = args[0][0].load(); - source[0] = args[0][1].load(); - source[1] = args[0][2].load(); - source[2] = args[0][3].load(); - source[3] = args[0][4].load(); - source2[0] = args[0][5].load(); - source2[1] = args[0][6].load(); - source2[2] = args[0][7].load(); - source2[3] = args[0][8].load(); - pitch = args[0][9].load(); - count = args[0][10].load(); - dest_y = args[0][11].load(); - texturefrac[0] = args[0][12].load(); - texturefrac[1] = args[0][13].load(); - texturefrac[2] = args[0][14].load(); - texturefrac[3] = args[0][15].load(); - texturefracx[0] = args[0][16].load(); - texturefracx[1] = args[0][17].load(); - texturefracx[2] = args[0][18].load(); - texturefracx[3] = args[0][19].load(); - iscale[0] = args[0][20].load(); - iscale[1] = args[0][21].load(); - iscale[2] = args[0][22].load(); - iscale[3] = args[0][23].load(); - textureheight[0] = args[0][24].load(); - textureheight[1] = args[0][25].load(); - textureheight[2] = args[0][26].load(); - textureheight[3] = args[0][27].load(); - light[0] = args[0][28].load(); - light[1] = args[0][29].load(); - light[2] = args[0][30].load(); - light[3] = args[0][31].load(); - srcalpha = args[0][32].load(); - destalpha = args[0][33].load(); - SSAShort light_alpha = args[0][34].load(); - SSAShort light_red = args[0][35].load(); - SSAShort light_green = args[0][36].load(); - SSAShort light_blue = args[0][37].load(); - SSAShort fade_alpha = args[0][38].load(); - SSAShort fade_red = args[0][39].load(); - SSAShort fade_green = args[0][40].load(); - SSAShort fade_blue = args[0][41].load(); - SSAShort desaturate = args[0][42].load(); - SSAInt flags = args[0][43].load(); + dest = args[0][0].load(true); + source[0] = args[0][1].load(true); + source[1] = args[0][2].load(true); + source[2] = args[0][3].load(true); + source[3] = args[0][4].load(true); + source2[0] = args[0][5].load(true); + source2[1] = args[0][6].load(true); + source2[2] = args[0][7].load(true); + source2[3] = args[0][8].load(true); + pitch = args[0][9].load(true); + count = args[0][10].load(true); + dest_y = args[0][11].load(true); + texturefrac[0] = args[0][12].load(true); + texturefrac[1] = args[0][13].load(true); + texturefrac[2] = args[0][14].load(true); + texturefrac[3] = args[0][15].load(true); + texturefracx[0] = args[0][16].load(true); + texturefracx[1] = args[0][17].load(true); + texturefracx[2] = args[0][18].load(true); + texturefracx[3] = args[0][19].load(true); + iscale[0] = args[0][20].load(true); + iscale[1] = args[0][21].load(true); + iscale[2] = args[0][22].load(true); + iscale[3] = args[0][23].load(true); + textureheight[0] = args[0][24].load(true); + textureheight[1] = args[0][25].load(true); + textureheight[2] = args[0][26].load(true); + textureheight[3] = args[0][27].load(true); + light[0] = args[0][28].load(true); + light[1] = args[0][29].load(true); + light[2] = args[0][30].load(true); + light[3] = args[0][31].load(true); + srcalpha = args[0][32].load(true); + destalpha = args[0][33].load(true); + SSAShort light_alpha = args[0][34].load(true); + SSAShort light_red = args[0][35].load(true); + SSAShort light_green = args[0][36].load(true); + SSAShort light_blue = args[0][37].load(true); + SSAShort fade_alpha = args[0][38].load(true); + SSAShort fade_red = args[0][39].load(true); + SSAShort fade_green = args[0][40].load(true); + SSAShort fade_blue = args[0][41].load(true); + SSAShort desaturate = args[0][42].load(true); + SSAInt flags = args[0][43].load(true); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); - thread.core = thread_data[0][0].load(); - thread.num_cores = thread_data[0][1].load(); - thread.pass_start_y = thread_data[0][2].load(); - thread.pass_end_y = thread_data[0][3].load(); + thread.core = thread_data[0][0].load(true); + thread.num_cores = thread_data[0][1].load(true); + thread.pass_start_y = thread_data[0][2].load(true); + thread.pass_end_y = thread_data[0][3].load(true); is_simple_shade = (flags & DrawWallArgs::simple_shade) == SSAInt(DrawWallArgs::simple_shade); is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == SSAInt(DrawWallArgs::nearest_filter); @@ -118,7 +118,7 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim if (fourColumns) { - SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(); + SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false); SSAVec8s bg0 = SSAVec8s::extendlo(bg); SSAVec8s bg1 = SSAVec8s::extendhi(bg); SSAVec4i bgcolors[4] = @@ -138,12 +138,12 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim } else { - SSAVec4i bgcolor = dest[offset].load_vec4ub(); + SSAVec4i bgcolor = dest[offset].load_vec4ub(false); SSAVec4i color = Blend(Shade(Sample(frac[0], 0, isNearestFilter), 0, isSimpleShade), bgcolor, variant); dest[offset].store_vec4ub(color); } - stack_index.store(index + 1); + stack_index.store(index.add(SSAInt(1), true, true)); for (int i = 0; i < numColumns; i++) stack_frac[i].store(frac[i] + fracstep[i]); loop.end_block(); @@ -155,7 +155,7 @@ SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter) if (isNearestFilter) { SSAInt sample_index = ((frac >> FRACBITS) * textureheight[index]) >> FRACBITS; - return source[index][sample_index * 4].load_vec4ub(); + return source[index][sample_index * 4].load_vec4ub(false); } else { diff --git a/src/r_compiler/ssa/ssa_float_ptr.cpp b/src/r_compiler/ssa/ssa_float_ptr.cpp index 582821ca03..f694be15d1 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.cpp +++ b/src/r_compiler/ssa/ssa_float_ptr.cpp @@ -23,36 +23,48 @@ SSAFloatPtr SSAFloatPtr::operator[](SSAInt index) const return SSAFloatPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); } -SSAFloat SSAFloatPtr::load() const +SSAFloat SSAFloatPtr::load(bool constantScopeDomain) const { - return SSAFloat::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAFloat::from_llvm(loadInst); } -SSAVec4f SSAFloatPtr::load_vec4f() const +SSAVec4f SSAFloatPtr::load_vec4f(bool constantScopeDomain) const { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec4f::from_llvm(loadInst); } -SSAVec4f SSAFloatPtr::load_unaligned_vec4f() const +SSAVec4f SSAFloatPtr::load_unaligned_vec4f(bool constantScopeDomain) const { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec4f::from_llvm(loadInst); } void SSAFloatPtr::store(const SSAFloat &new_value) { - SSAScope::builder().CreateStore(new_value.v, v, false); + auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value) { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); + auto inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value) { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/src/r_compiler/ssa/ssa_float_ptr.h b/src/r_compiler/ssa/ssa_float_ptr.h index f29b2de3f7..a9953eb813 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.h +++ b/src/r_compiler/ssa/ssa_float_ptr.h @@ -17,9 +17,9 @@ public: static llvm::Type *llvm_type(); SSAFloatPtr operator[](SSAInt index) const; SSAFloatPtr operator[](int index) const { return (*this)[SSAInt(index)]; } - SSAFloat load() const; - SSAVec4f load_vec4f() const; - SSAVec4f load_unaligned_vec4f() const; + SSAFloat load(bool constantScopeDomain) const; + SSAVec4f load_vec4f(bool constantScopeDomain) const; + SSAVec4f load_unaligned_vec4f(bool constantScopeDomain) const; void store(const SSAFloat &new_value); void store_vec4f(const SSAVec4f &new_value); void store_unaligned_vec4f(const SSAVec4f &new_value); diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 3d9cb22bdf..1815985c5f 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -42,6 +42,11 @@ SSAInt SSAInt::MAX(SSAInt a, SSAInt b) return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint())); } +SSAInt SSAInt::add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAdd(v, b.v, SSAScope::hint(), no_unsigned_wrap, no_signed_wrap)); +} + SSAInt operator+(const SSAInt &a, const SSAInt &b) { return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index c0f46e4b67..e9ce978c47 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -19,6 +19,8 @@ public: static SSAInt MIN(SSAInt a, SSAInt b); static SSAInt MAX(SSAInt a, SSAInt b); + SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap); + llvm::Value *v; }; diff --git a/src/r_compiler/ssa/ssa_int_ptr.cpp b/src/r_compiler/ssa/ssa_int_ptr.cpp index 974645d08c..d9441088e6 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.cpp +++ b/src/r_compiler/ssa/ssa_int_ptr.cpp @@ -23,36 +23,48 @@ SSAIntPtr SSAIntPtr::operator[](SSAInt index) const return SSAIntPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); } -SSAInt SSAIntPtr::load() const +SSAInt SSAIntPtr::load(bool constantScopeDomain) const { - return SSAInt::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAInt::from_llvm(loadInst); } -SSAVec4i SSAIntPtr::load_vec4i() const +SSAVec4i SSAIntPtr::load_vec4i(bool constantScopeDomain) const { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec4i::from_llvm(loadInst); } -SSAVec4i SSAIntPtr::load_unaligned_vec4i() const +SSAVec4i SSAIntPtr::load_unaligned_vec4i(bool constantScopeDomain) const { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec4i::from_llvm(loadInst); } void SSAIntPtr::store(const SSAInt &new_value) { - SSAScope::builder().CreateStore(new_value.v, v, false); + auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAIntPtr::store_vec4i(const SSAVec4i &new_value) { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); + auto inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value) { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/src/r_compiler/ssa/ssa_int_ptr.h b/src/r_compiler/ssa/ssa_int_ptr.h index c75ed6a8d5..9685283651 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.h +++ b/src/r_compiler/ssa/ssa_int_ptr.h @@ -17,9 +17,9 @@ public: static llvm::Type *llvm_type(); SSAIntPtr operator[](SSAInt index) const; SSAIntPtr operator[](int index) const { return (*this)[SSAInt(index)]; } - SSAInt load() const; - SSAVec4i load_vec4i() const; - SSAVec4i load_unaligned_vec4i() const; + SSAInt load(bool constantScopeDomain) const; + SSAVec4i load_vec4i(bool constantScopeDomain) const; + SSAVec4i load_unaligned_vec4i(bool constantScopeDomain) const; void store(const SSAInt &new_value); void store_vec4i(const SSAVec4i &new_value); void store_unaligned_vec4i(const SSAVec4i &new_value); diff --git a/src/r_compiler/ssa/ssa_pixelformat4f.h b/src/r_compiler/ssa/ssa_pixelformat4f.h index 507e95b5d1..9cefb517b4 100644 --- a/src/r_compiler/ssa/ssa_pixelformat4f.h +++ b/src/r_compiler/ssa/ssa_pixelformat4f.h @@ -13,9 +13,9 @@ public: SSAFloatPtr pixels() { return _pixels; } SSAFloatPtr pixels() const { return _pixels; } - SSAVec4f get4f(SSAInt index) const + SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const { - return _pixels[index * 4].load_vec4f(); + return _pixels[index * 4].load_vec4f(constantScopeDomain); } void set4f(SSAInt index, const SSAVec4f &pixel) diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub.h b/src/r_compiler/ssa/ssa_pixelformat4ub.h index fdf98c4aa6..91b04557c4 100644 --- a/src/r_compiler/ssa/ssa_pixelformat4ub.h +++ b/src/r_compiler/ssa/ssa_pixelformat4ub.h @@ -13,9 +13,9 @@ public: SSAUBytePtr pixels() { return _pixels; } SSAUBytePtr pixels() const { return _pixels; } - SSAVec4f get4f(SSAInt index) const + SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const { - return SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f); + return SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f); } void set4f(SSAInt index, const SSAVec4f &pixel) diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h index 4601eeb3c1..1f7e4eb0f4 100644 --- a/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h +++ b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h @@ -20,9 +20,9 @@ public: out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3); } */ - SSAVec4f get4f(SSAInt index) const + SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const { - return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 2, 1, 0, 3); + return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 2, 1, 0, 3); } void set4f(SSAInt index, const SSAVec4f &pixel) diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h index 402480c49b..9b50ec00fe 100644 --- a/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h +++ b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h @@ -13,9 +13,9 @@ public: SSAUBytePtr pixels() { return _pixels; } SSAUBytePtr pixels() const { return _pixels; } - SSAVec4f get4f(SSAInt index) const + SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const { - return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 3, 2, 1, 0); + return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 3, 2, 1, 0); } void set4f(SSAInt index, const SSAVec4f &pixel) diff --git a/src/r_compiler/ssa/ssa_scope.cpp b/src/r_compiler/ssa/ssa_scope.cpp index e5d34a2033..520f301a4a 100644 --- a/src/r_compiler/ssa/ssa_scope.cpp +++ b/src/r_compiler/ssa/ssa_scope.cpp @@ -7,6 +7,10 @@ SSAScope::SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBui : _context(context), _module(module), _builder(builder) { instance = this; + + _constant_scope_domain = llvm::MDNode::get(SSAScope::context(), { llvm::MDString::get(SSAScope::context(), "ConstantScopeDomain") }); + _constant_scope = llvm::MDNode::getDistinct(SSAScope::context(), { _constant_scope_domain }); + _constant_scope_list = llvm::MDNode::get(SSAScope::context(), { _constant_scope }); } SSAScope::~SSAScope() @@ -50,6 +54,11 @@ llvm::Value *SSAScope::alloca(llvm::Type *type, SSAInt size) return alloca_builder.CreateAlloca(type, size.v, hint()); } +llvm::MDNode *SSAScope::constant_scope_list() +{ + return instance->_constant_scope_list; +} + const std::string &SSAScope::hint() { return instance->_hint; diff --git a/src/r_compiler/ssa/ssa_scope.h b/src/r_compiler/ssa/ssa_scope.h index ad080fde6c..c942a7c377 100644 --- a/src/r_compiler/ssa/ssa_scope.h +++ b/src/r_compiler/ssa/ssa_scope.h @@ -14,6 +14,7 @@ public: static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types = llvm::ArrayRef()); static llvm::Value *alloca(llvm::Type *type); static llvm::Value *alloca(llvm::Type *type, SSAInt size); + static llvm::MDNode *constant_scope_list(); static const std::string &hint(); static void set_hint(const std::string &hint); @@ -22,6 +23,9 @@ private: llvm::LLVMContext *_context; llvm::Module *_module; llvm::IRBuilder<> *_builder; + llvm::MDNode *_constant_scope_domain; + llvm::MDNode *_constant_scope; + llvm::MDNode *_constant_scope_list; std::string _hint; }; diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp index 34de0ab889..1ce4a6ae28 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -23,32 +23,45 @@ SSAUBytePtr SSAUBytePtr::operator[](SSAInt index) const return SSAUBytePtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); } -SSAUByte SSAUBytePtr::load() const +SSAUByte SSAUBytePtr::load(bool constantScopeDomain) const { - return SSAUByte::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAUByte::from_llvm(loadInst); } -SSAVec4i SSAUBytePtr::load_vec4ub() const +SSAVec4i SSAUBytePtr::load_vec4ub(bool constantScopeDomain) const { - SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + SSAInt i32 = SSAInt::from_llvm(loadInst); return SSAVec4i::unpack(i32); } -SSAVec16ub SSAUBytePtr::load_vec16ub() const +SSAVec16ub SSAUBytePtr::load_vec16ub(bool constantScopeDomain) const { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - return SSAVec16ub::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec16ub::from_llvm(loadInst); } -SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub() const +SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub(bool constantScopeDomain) const { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - return SSAVec16ub::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec16ub::from_llvm(loadInst); } void SSAUBytePtr::store(const SSAUByte &new_value) { - SSAScope::builder().CreateStore(new_value.v, v, false); + auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) @@ -66,13 +79,15 @@ void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3))); llvm::Value *mask = llvm::ConstantVector::get(constants); llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), mask, SSAScope::hint()); - SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false); + llvm::StoreInst *inst = SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint())); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); // The following generates _mm_stream_si128, maybe! // llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1)); @@ -83,4 +98,5 @@ void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value) { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.h b/src/r_compiler/ssa/ssa_ubyte_ptr.h index c084068bc7..167a5877d6 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.h +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.h @@ -19,11 +19,10 @@ public: static llvm::Type *llvm_type(); SSAUBytePtr operator[](SSAInt index) const; SSAUBytePtr operator[](int index) const { return (*this)[SSAInt(index)]; } - SSAUByte load() const; - SSAVec4i load_vec4ub() const; - SSAVec8s load_vec8s() const; - SSAVec16ub load_vec16ub() const; - SSAVec16ub load_unaligned_vec16ub() const; + SSAUByte load(bool constantScopeDomain) const; + SSAVec4i load_vec4ub(bool constantScopeDomain) const; + SSAVec16ub load_vec16ub(bool constantScopeDomain) const; + SSAVec16ub load_unaligned_vec16ub(bool constantScopeDomain) const; void store(const SSAUByte &new_value); void store_vec4ub(const SSAVec4i &new_value); void store_vec16ub(const SSAVec16ub &new_value); diff --git a/src/r_compiler/ssa/ssa_value.cpp b/src/r_compiler/ssa/ssa_value.cpp index c37b7f4c1d..65f9da15d6 100644 --- a/src/r_compiler/ssa/ssa_value.cpp +++ b/src/r_compiler/ssa/ssa_value.cpp @@ -4,14 +4,18 @@ #include "ssa_int.h" #include "ssa_scope.h" -SSAValue SSAValue::load() +SSAValue SSAValue::load(bool constantScopeDomain) { - return SSAValue::from_llvm(SSAScope::builder().CreateLoad(v, false)); + auto loadInst = SSAScope::builder().CreateLoad(v, false); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAValue::from_llvm(loadInst); } void SSAValue::store(llvm::Value *value) { - SSAScope::builder().CreateStore(value, v, false); + auto inst = SSAScope::builder().CreateStore(value, v, false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } SSAIndexLookup SSAValue::operator[](int index) diff --git a/src/r_compiler/ssa/ssa_value.h b/src/r_compiler/ssa/ssa_value.h index ec156a4529..d0d73043c9 100644 --- a/src/r_compiler/ssa/ssa_value.h +++ b/src/r_compiler/ssa/ssa_value.h @@ -15,7 +15,7 @@ public: static SSAValue from_llvm(llvm::Value *v) { SSAValue val; val.v = v; return val; } - SSAValue load(); + SSAValue load(bool constantScopeDomain); void store(llvm::Value *v); template @@ -38,7 +38,7 @@ public: llvm::Value *v; std::vector indexes; - SSAValue load() { SSAValue value = *this; return value.load(); } + SSAValue load(bool constantScopeDomain) { SSAValue value = *this; return value.load(constantScopeDomain); } void store(llvm::Value *v) { SSAValue value = *this; return value.store(v); } template diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp index e0ed8bc868..e8bac71f17 100644 --- a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp @@ -23,22 +23,30 @@ SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const return SSAVec4fPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); } -SSAVec4f SSAVec4fPtr::load() const +SSAVec4f SSAVec4fPtr::load(bool constantScopeDomain) const { - return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec4f::from_llvm(loadInst); } -SSAVec4f SSAVec4fPtr::load_unaligned() const +SSAVec4f SSAVec4fPtr::load_unaligned(bool constantScopeDomain) const { - return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4), SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 4, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec4f::from_llvm(loadInst); } void SSAVec4fPtr::store(const SSAVec4f &new_value) { - SSAScope::builder().CreateStore(new_value.v, v, false); + auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value) { - SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.h b/src/r_compiler/ssa/ssa_vec4f_ptr.h index ab4e841900..15192352a9 100644 --- a/src/r_compiler/ssa/ssa_vec4f_ptr.h +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.h @@ -15,8 +15,8 @@ public: static SSAVec4fPtr from_llvm(llvm::Value *v) { return SSAVec4fPtr(v); } static llvm::Type *llvm_type(); SSAVec4fPtr operator[](SSAInt index) const; - SSAVec4f load() const; - SSAVec4f load_unaligned() const; + SSAVec4f load(bool constantScopeDomain) const; + SSAVec4f load_unaligned(bool constantScopeDomain) const; void store(const SSAVec4f &new_value); void store_unaligned(const SSAVec4f &new_value); From 5f0088ab8b8885279407104093aed13e45cbfb0b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 9 Oct 2016 12:50:57 +0200 Subject: [PATCH 173/912] Created new OpenGL framebuffer using the software renderer's hardware acceleration --- src/CMakeLists.txt | 1 + src/gl/system/gl_swframebuffer.cpp | 3357 ++++++++++++++++++++++++++++ src/gl/system/gl_swframebuffer.h | 470 ++++ src/textures/textures.h | 1 + 4 files changed, 3829 insertions(+) create mode 100644 src/gl/system/gl_swframebuffer.cpp create mode 100644 src/gl/system/gl_swframebuffer.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index de3316ced0..dd241a3d7b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1151,6 +1151,7 @@ set( FASTMATH_SOURCES gl/shaders/gl_fxaashader.cpp gl/system/gl_interface.cpp gl/system/gl_framebuffer.cpp + gl/system/gl_swframebuffer.cpp gl/system/gl_debug.cpp gl/system/gl_menu.cpp gl/system/gl_wipe.cpp diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp new file mode 100644 index 0000000000..e0ff2be477 --- /dev/null +++ b/src/gl/system/gl_swframebuffer.cpp @@ -0,0 +1,3357 @@ +/* +** gl_swframebuffer.cpp +** Code to let ZDoom use OpenGL as a simple framebuffer +** +**--------------------------------------------------------------------------- +** Copyright 1998-2011 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +** This file does _not_ implement hardware-acclerated 3D rendering. It is +** just a means of getting the pixel data to the screen in a more reliable +** method on modern hardware by copying the entire frame to a texture, +** drawing that to the screen, and presenting. +** +** That said, it does implement hardware-accelerated 2D rendering. +*/ + +#include "gl/system/gl_system.h" +#include "files.h" +#include "m_swap.h" +#include "v_video.h" +#include "doomstat.h" +#include "m_png.h" +#include "m_crc32.h" +#include "vectors.h" +#include "v_palette.h" +#include "templates.h" + +#include "c_dispatch.h" +#include "templates.h" +#include "i_system.h" +#include "i_video.h" +#include "i_input.h" +#include "v_pfx.h" +#include "stats.h" +#include "doomerrors.h" +#include "r_main.h" +#include "r_data/r_translate.h" +#include "f_wipe.h" +#include "sbar.h" +#include "w_wad.h" +#include "r_data/colormaps.h" + +#include "gl/system/gl_interface.h" +#include "gl/system/gl_swframebuffer.h" +#include "gl/data/gl_data.h" +#include "gl/utility/gl_clock.h" +#include "gl/utility/gl_templates.h" +#include "gl/gl_functions.h" + +CVAR(Bool, gl_antilag, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) +CVAR(Int, gl_showpacks, 0, 0) +#ifndef WIN32 // Defined in fb_d3d9 for Windows +CVAR(Bool, vid_hwaalines, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) +#else +EXTERN_CVAR(Bool, vid_hwaalines) +#endif + +EXTERN_CVAR(Bool, vid_hw2d) +EXTERN_CVAR(Bool, fullscreen) +EXTERN_CVAR(Float, Gamma) +EXTERN_CVAR(Bool, vid_vsync) +EXTERN_CVAR(Float, transsouls) +EXTERN_CVAR(Int, vid_refreshrate) + +extern cycle_t BlitCycles; + +IMPLEMENT_CLASS(OpenGLSWFrameBuffer) + +const char *const OpenGLSWFrameBuffer::ShaderNames[OpenGLSWFrameBuffer::NUM_SHADERS] = +{ + "NormalColor.fp", + "NormalColorPal.fp", + "NormalColorInv.fp", + "NormalColorPalInv.fp", + + "RedToAlpha.fp", + "RedToAlphaInv.fp", + + "VertexColor.fp", + + "SpecialColormap.fp", + "SpecialColorMapPal.fp", + + "InGameColormap.fp", + "InGameColormapDesat.fp", + "InGameColormapInv.fp", + "InGameColormapInvDesat.fp", + "InGameColormapPal.fp", + "InGameColormapPalDesat.fp", + "InGameColormapPalInv.fp", + "InGameColormapPalInvDesat.fp", + + "BurnWipe.fp", + "GammaCorrection.fp", +}; + +OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen) : + Super(hMonitor, width, height, bits, refreshHz, fullscreen) +{ + // SetVSync needs to be at the very top to workaround a bug in Nvidia's OpenGL driver. + // If wglSwapIntervalEXT is called after glBindFramebuffer in a frame the setting is not changed! + SetVSync(vid_vsync); + + VertexBuffer = nullptr; + IndexBuffer = nullptr; + FBTexture = nullptr; + TempRenderTexture = nullptr; + RenderTexture[0] = nullptr; + RenderTexture[1] = nullptr; + InitialWipeScreen = nullptr; + ScreenshotTexture = nullptr; + ScreenshotSurface = nullptr; + FinalWipeScreen = nullptr; + PaletteTexture = nullptr; + GammaTexture = nullptr; + FrontCopySurface = nullptr; + for (int i = 0; i < NUM_SHADERS; ++i) + { + Shaders[i] = nullptr; + } + GammaShader = nullptr; + BlockSurface[0] = nullptr; + BlockSurface[1] = nullptr; + VSync = vid_vsync; + BlendingRect.left = 0; + BlendingRect.top = 0; + BlendingRect.right = FBWidth; + BlendingRect.bottom = FBHeight; + In2D = 0; + Palettes = nullptr; + Textures = nullptr; + Accel2D = true; + GatheringWipeScreen = false; + ScreenWipe = nullptr; + InScene = false; + QuadExtra = new BufferedTris[MAX_QUAD_BATCH]; + Atlases = nullptr; + PixelDoubling = 0; + SkipAt = -1; + CurrRenderTexture = 0; + RenderTextureToggle = 0; + + Gamma = 1.0; + FlashColor0 = 0; + FlashColor1 = 0xFFFFFFFF; + FlashColor = 0; + FlashAmount = 0; + + NeedGammaUpdate = false; + NeedPalUpdate = false; + + if (MemBuffer == nullptr) + { + return; + } + + memcpy(SourcePalette, GPalette.BaseColors, sizeof(PalEntry) * 256); + + Windowed = !(static_cast(Video)->GoFullscreen(fullscreen)); + + TrueHeight = height; + /*if (fullscreen) + { + for (Win32Video::ModeInfo *mode = static_cast(Video)->m_Modes; mode != nullptr; mode = mode->next) + { + if (mode->width == Width && mode->height == Height) + { + TrueHeight = mode->realheight; + PixelDoubling = mode->doubling; + break; + } + } + }*/ + // Offset from top of screen to top of letterboxed screen + LBOffsetI = (TrueHeight - Height) / 2; + LBOffset = float(LBOffsetI); + + CreateResources(); + SetInitialState(); +} + +OpenGLSWFrameBuffer::~OpenGLSWFrameBuffer() +{ + ReleaseResources(); + delete[] QuadExtra; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: SetInitialState +// +// Called after initial device creation and reset, when everything is set +// to OpenGL's defaults. +// +//========================================================================== + +void OpenGLSWFrameBuffer::SetInitialState() +{ + AlphaBlendEnabled = FALSE; + AlphaBlendOp = GL_FUNC_ADD; + AlphaSrcBlend = 0; + AlphaDestBlend = 0; + + CurPixelShader = nullptr; + memset(Constant, 0, sizeof(Constant)); + + for (unsigned i = 0; i < countof(Texture); ++i) + { + Texture[i] = nullptr; + SamplerWrapS[i] = GL_CLAMP_TO_EDGE; + SamplerWrapT[i] = GL_CLAMP_TO_EDGE; + } + + NeedGammaUpdate = true; + NeedPalUpdate = true; + OldRenderTarget = nullptr; + + // This constant is used for grayscaling weights (.xyz) and color inversion (.w) + float weights[4] = { 77 / 256.f, 143 / 256.f, 37 / 256.f, 1 }; + SetPixelShaderConstantF(PSCONST_Weights, weights, 1); + + AlphaTestEnabled = FALSE; + + CurBorderColor = 0; + + // Clear to black, just in case it wasn't done already. + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: CreateResources +// +//========================================================================== + +bool OpenGLSWFrameBuffer::CreateResources() +{ + Atlases = nullptr; + if (!LoadShaders()) + { + return false; + } + if (!CreateFBTexture() || + !CreatePaletteTexture()) + { + return false; + } + if (!CreateVertexes()) + { + return false; + } + CreateGammaTexture(); + CreateBlockSurfaces(); + return true; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: LoadShaders +// +// Returns true if all required shaders were loaded. (Gamma and burn wipe +// are the only ones not considered "required".) +// +//========================================================================== + +bool OpenGLSWFrameBuffer::LoadShaders() +{ + static const char models[][4] = { "30/", "20/", "14/" }; + FString shaderdir, shaderpath; + unsigned model, i; + int lump; + + // We determine the best available model simply by trying them all in + // order of decreasing preference. + for (model = 0; model < countof(models); ++model) + { + shaderdir = "shaders/gl/sm"; + shaderdir += models[model]; + for (i = 0; i < NUM_SHADERS; ++i) + { + shaderpath = shaderdir; + shaderpath += ShaderNames[i]; + lump = Wads.CheckNumForFullName(shaderpath); + if (lump >= 0) + { + FMemLump data = Wads.ReadLump(lump); + if (!CreatePixelShader((uint32_t *)data.GetMem(), &Shaders[i]) && i < SHADER_BurnWipe) + { + break; + } + } + } + if (i == NUM_SHADERS) + { // Success! + return true; + } + // Failure. Release whatever managed to load (which is probably nothing.) + for (i = 0; i < NUM_SHADERS; ++i) + { + SafeRelease(Shaders[i]); + } + } + return false; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: ReleaseResources +// +//========================================================================== + +void OpenGLSWFrameBuffer::ReleaseResources() +{ + I_SaveWindowedPos(); + KillNativeTexs(); + KillNativePals(); + ReleaseDefaultPoolItems(); + SafeRelease(ScreenshotSurface); + SafeRelease(ScreenshotTexture); + SafeRelease(PaletteTexture); + for (int i = 0; i < NUM_SHADERS; ++i) + { + SafeRelease(Shaders[i]); + } + GammaShader = nullptr; + if (ScreenWipe != nullptr) + { + delete ScreenWipe; + ScreenWipe = nullptr; + } + Atlas *pack, *next; + for (pack = Atlases; pack != nullptr; pack = next) + { + next = pack->Next; + delete pack; + } + GatheringWipeScreen = false; +} + +void OpenGLSWFrameBuffer::ReleaseDefaultPoolItems() +{ + SafeRelease(FBTexture); + SafeRelease(FinalWipeScreen); + SafeRelease(RenderTexture[0]); + SafeRelease(RenderTexture[1]); + SafeRelease(InitialWipeScreen); + SafeRelease(VertexBuffer); + SafeRelease(IndexBuffer); + SafeRelease(BlockSurface[0]); + SafeRelease(BlockSurface[1]); + SafeRelease(FrontCopySurface); +} + +bool OpenGLSWFrameBuffer::Reset() +{ + ReleaseDefaultPoolItems(); + if (!CreateFBTexture() || !CreateVertexes()) + { + return false; + } + CreateBlockSurfaces(); + SetInitialState(); + return true; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: CreateBlockSurfaces +// +// Create blocking surfaces for antilag. It's okay if these can't be +// created; antilag just won't work. +// +//========================================================================== + +void OpenGLSWFrameBuffer::CreateBlockSurfaces() +{ + BlockNum = 0; + if (CreateOffscreenPlainSurface(16, 16, GL_RGBA8, &BlockSurface[0])) + { + if (!CreateOffscreenPlainSurface(16, 16, GL_RGBA8, &BlockSurface[1])) + { + SafeRelease(BlockSurface[0]); + } + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: KillNativePals +// +// Frees all native palettes. +// +//========================================================================== + +void OpenGLSWFrameBuffer::KillNativePals() +{ + while (Palettes != nullptr) + { + delete Palettes; + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: KillNativeTexs +// +// Frees all native textures. +// +//========================================================================== + +void OpenGLSWFrameBuffer::KillNativeTexs() +{ + while (Textures != nullptr) + { + delete Textures; + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: CreateFBTexture +// +// Creates the "Framebuffer" texture. With the advent of hardware-assisted +// 2D, this is something of a misnomer now. The FBTexture is only used for +// uploading the software 3D image to video memory so that it can be drawn +// to the real frame buffer. +// +// It also creates the TempRenderTexture, since this seemed like a +// convenient place to do so. +// +//========================================================================== + +bool OpenGLSWFrameBuffer::CreateFBTexture() +{ + if (!CreateTexture(Width, Height, 1, GL_R8, &FBTexture)) + { + int pow2width, pow2height, i; + + for (i = 1; i < Width; i <<= 1) {} pow2width = i; + for (i = 1; i < Height; i <<= 1) {} pow2height = i; + + if (!CreateTexture(pow2width, pow2height, 1, GL_R8, &FBTexture)) + { + return false; + } + else + { + FBWidth = pow2width; + FBHeight = pow2height; + } + } + else + { + FBWidth = Width; + FBHeight = Height; + } + RenderTextureToggle = 0; + RenderTexture[0] = nullptr; + RenderTexture[1] = nullptr; + if (!CreateTexture(FBWidth, FBHeight, 1, GL_RGBA8, &RenderTexture[0])) + { + return false; + } + if (Windowed || PixelDoubling) + { + // Windowed or pixel doubling: Create another render texture so we can flip between them. + RenderTextureToggle = 1; + if (!CreateTexture(FBWidth, FBHeight, 1, GL_RGBA8, &RenderTexture[1])) + { + return false; + } + } + else + { + // Fullscreen and not pixel doubling: Create a render target to have the back buffer copied to. + if (!CreateRenderTarget(Width, Height, GL_RGBA8, &FrontCopySurface)) + { + return false; + } + } + // Initialize the TempRenderTextures to black. + for (int i = 0; i <= RenderTextureToggle; ++i) + { + HWSurface *surf; + if (RenderTexture[i]->GetSurfaceLevel(0, &surf)) + { + ColorFill(surf, 0.0f, 0.0f, 0.0f); + delete surf; + } + } + TempRenderTexture = RenderTexture[0]; + CurrRenderTexture = 0; + return true; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: CreatePaletteTexture +// +//========================================================================== + +bool OpenGLSWFrameBuffer::CreatePaletteTexture() +{ + if (!CreateTexture(256, 1, 1, GL_RGBA8, &PaletteTexture)) + { + return false; + } + return true; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: CreateGammaTexture +// +//========================================================================== + +bool OpenGLSWFrameBuffer::CreateGammaTexture() +{ + return false; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: CreateVertexes +// +//========================================================================== + +bool OpenGLSWFrameBuffer::CreateVertexes() +{ + VertexPos = -1; + IndexPos = -1; + QuadBatchPos = -1; + BatchType = BATCH_None; + if (!CreateVertexBuffer(sizeof(FBVERTEX)*NUM_VERTS, &VertexBuffer)) + { + return false; + } + if (!CreateIndexBuffer(sizeof(uint16_t)*NUM_INDEXES, &IndexBuffer)) + { + return false; + } + return true; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: CalcFullscreenCoords +// +//========================================================================== + +void OpenGLSWFrameBuffer::CalcFullscreenCoords(FBVERTEX verts[4], bool viewarea_only, bool can_double, uint32_t color0, uint32_t color1) const +{ + float offset = OldRenderTarget != nullptr ? 0 : LBOffset; + float top = offset - 0.5f; + float texright = float(Width) / float(FBWidth); + float texbot = float(Height) / float(FBHeight); + float mxl, mxr, myt, myb, tmxl, tmxr, tmyt, tmyb; + + if (viewarea_only) + { // Just calculate vertices for the viewarea/BlendingRect + mxl = float(BlendingRect.left) - 0.5f; + mxr = float(BlendingRect.right) - 0.5f; + myt = float(BlendingRect.top) + top; + myb = float(BlendingRect.bottom) + top; + tmxl = float(BlendingRect.left) / float(Width) * texright; + tmxr = float(BlendingRect.right) / float(Width) * texright; + tmyt = float(BlendingRect.top) / float(Height) * texbot; + tmyb = float(BlendingRect.bottom) / float(Height) * texbot; + } + else + { // Calculate vertices for the whole screen + mxl = -0.5f; + mxr = float(Width << (can_double ? PixelDoubling : 0)) - 0.5f; + myt = top; + myb = float(Height << (can_double ? PixelDoubling : 0)) + top; + tmxl = 0; + tmxr = texright; + tmyt = 0; + tmyb = texbot; + } + + //{ mxl, myt, 0, 1, 0, 0xFFFFFFFF, tmxl, tmyt }, + //{ mxr, myt, 0, 1, 0, 0xFFFFFFFF, tmxr, tmyt }, + //{ mxr, myb, 0, 1, 0, 0xFFFFFFFF, tmxr, tmyb }, + //{ mxl, myb, 0, 1, 0, 0xFFFFFFFF, tmxl, tmyb }, + + verts[0].x = mxl; + verts[0].y = myt; + verts[0].z = 0; + verts[0].rhw = 1; + verts[0].color0 = color0; + verts[0].color1 = color1; + verts[0].tu = tmxl; + verts[0].tv = tmyt; + + verts[1].x = mxr; + verts[1].y = myt; + verts[1].z = 0; + verts[1].rhw = 1; + verts[1].color0 = color0; + verts[1].color1 = color1; + verts[1].tu = tmxr; + verts[1].tv = tmyt; + + verts[2].x = mxr; + verts[2].y = myb; + verts[2].z = 0; + verts[2].rhw = 1; + verts[2].color0 = color0; + verts[2].color1 = color1; + verts[2].tu = tmxr; + verts[2].tv = tmyb; + + verts[3].x = mxl; + verts[3].y = myb; + verts[3].z = 0; + verts[3].rhw = 1; + verts[3].color0 = color0; + verts[3].color1 = color1; + verts[3].tu = tmxl; + verts[3].tv = tmyb; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: GetPageCount +// +//========================================================================== + +int OpenGLSWFrameBuffer::GetPageCount() +{ + return 1; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: PaletteChanged +// +//========================================================================== + +void OpenGLSWFrameBuffer::PaletteChanged() +{ +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: QueryNewPalette +// +//========================================================================== + +int OpenGLSWFrameBuffer::QueryNewPalette() +{ + return 0; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: IsValid +// +//========================================================================== + +bool OpenGLSWFrameBuffer::IsValid() +{ + return true; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: IsFullscreen +// +//========================================================================== + +bool OpenGLSWFrameBuffer::IsFullscreen() +{ + return !Windowed; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Lock +// +//========================================================================== + +bool OpenGLSWFrameBuffer::Lock(bool buffered) +{ + if (LockCount++ > 0) + { + return false; + } + assert(!In2D); + Accel2D = vid_hw2d; + Buffer = MemBuffer; + return false; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Unlock +// +//========================================================================== + +void OpenGLSWFrameBuffer::Unlock() +{ + LOG1("Unlock <%d>\n", LockCount); + + if (LockCount == 0) + { + return; + } + + if (UpdatePending && LockCount == 1) + { + Update(); + } + else if (--LockCount == 0) + { + Buffer = nullptr; + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Update +// +// When In2D == 0: Copy buffer to screen and present +// When In2D == 1: Copy buffer to screen but do not present +// When In2D == 2: Set up for 2D drawing but do not draw anything +// When In2D == 3: Present and set In2D to 0 +// +//========================================================================== + +void OpenGLSWFrameBuffer::Update() +{ + if (In2D == 3) + { + if (InScene) + { + DrawRateStuff(); + DrawPackedTextures(gl_showpacks); + EndBatch(); // Make sure all batched primitives are drawn. + Flip(); + } + In2D = 0; + return; + } + + if (LockCount != 1) + { + I_FatalError("Framebuffer must have exactly 1 lock to be updated"); + if (LockCount > 0) + { + UpdatePending = true; + --LockCount; + } + return; + } + + if (In2D == 0) + { + DrawRateStuff(); + } + + if (NeedGammaUpdate) + { + float psgamma[4]; + float igamma; + + NeedGammaUpdate = false; + igamma = 1 / Gamma; + if (!Windowed) + { + GammaRamp ramp; + + for (int i = 0; i < 256; ++i) + { + ramp.blue[i] = ramp.green[i] = ramp.red[i] = uint16_t(65535.f * powf(i / 255.f, igamma)); + } + LOG("SetGammaRamp\n"); + SetGammaRamp(&ramp); + } + else + { + if (igamma != 1) + { + UpdateGammaTexture(igamma); + GammaShader = Shaders[SHADER_GammaCorrection]; + } + else + { + GammaShader = nullptr; + } + } + psgamma[2] = psgamma[1] = psgamma[0] = igamma; + psgamma[3] = 0.5; // For SM14 version + SetPixelShaderConstantF(PSCONST_Gamma, psgamma, 1); + } + + if (NeedPalUpdate) + { + UploadPalette(); + } + + BlitCycles.Reset(); + BlitCycles.Clock(); + + LockCount = 0; + Draw3DPart(In2D <= 1); + if (In2D == 0) + { + Flip(); + } + + BlitCycles.Unclock(); + //LOG1 ("cycles = %d\n", BlitCycles); + + Buffer = nullptr; + UpdatePending = false; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Flip +// +//========================================================================== + +void OpenGLSWFrameBuffer::Flip() +{ + assert(InScene); + + DrawLetterbox(); + DoWindowedGamma(); + + CopyNextFrontBuffer(); + + // Attempt to counter input lag. + if (gl_antilag && BlockSurface[0] != nullptr) + { + LockedRect lr; + volatile int dummy; + ColorFill(BlockSurface[BlockNum], 0.0f, 0x20/255.0f, 0x50/255.0f); + BlockNum ^= 1; + if (BlockSurface[BlockNum]->LockRect(&lr, nullptr, false)) + { + dummy = *(int *)lr.pBits; + BlockSurface[BlockNum]->UnlockRect(); + } + } + // Limiting the frame rate is as simple as waiting for the timer to signal this event. + if (FPSLimitEvent != nullptr) + { + WaitForSingleObject(FPSLimitEvent, 1000); + } + Present(); + InScene = false; + + if (RenderTextureToggle) + { + // Flip the TempRenderTexture to the other one now. + CurrRenderTexture ^= RenderTextureToggle; + TempRenderTexture = RenderTexture[CurrRenderTexture]; + } + + if (Windowed) + { + int clientWidth = GetClientWidth(); + int clientHeight = GetClientHeight(); + if (clientWidth > 0 && clientHeight > 0 && (Width != clientWidth || Height != clientHeight)) + { + Resize(clientWidth, clientHeight); + + TrueHeight = Height; + PixelDoubling = 0; + LBOffsetI = 0; + LBOffset = 0.0f; + Reset(); + + V_OutputResized(Width, Height); + } + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: CopyNextFrontBuffer +// +// Duplicates the contents of the back buffer that will become the front +// buffer upon Present into FrontCopySurface so that we can get the +// contents of the display without wasting time in GetFrontBufferData(). +// +//========================================================================== + +void OpenGLSWFrameBuffer::CopyNextFrontBuffer() +{ + HWSurface *backbuff; + + if (Windowed || PixelDoubling) + { + // Windowed mode or pixel doubling: TempRenderTexture has what we want + SafeRelease(FrontCopySurface); + if (TempRenderTexture->GetSurfaceLevel(0, &backbuff)) + { + FrontCopySurface = backbuff; + } + } + else + { + // Fullscreen, not pixel doubled: The back buffer has what we want, + // but it might be letter boxed. + if (GetBackBuffer(&backbuff)) + { + LTRBRect srcrect = { 0, LBOffsetI, Width, LBOffsetI + Height }; + StretchRect(backbuff, &srcrect, FrontCopySurface); + delete backbuff; + } + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: PaintToWindow +// +//========================================================================== + +bool OpenGLSWFrameBuffer::PaintToWindow() +{ + if (LockCount != 0) + { + return false; + } + Draw3DPart(true); + return true; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Draw3DPart +// +// The software 3D part, to be exact. +// +//========================================================================== + +void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) +{ + if (copy3d) + { + LTRBRect texrect = { 0, 0, Width, Height }; + LockedRect lockrect; + + if ((FBWidth == Width && FBHeight == Height && + FBTexture->LockRect(&lockrect, nullptr, true)) || + FBTexture->LockRect(&lockrect, &texrect, false)) + { + if (lockrect.Pitch == Pitch && Pitch == Width) + { + memcpy(lockrect.pBits, MemBuffer, Width * Height); + } + else + { + uint8_t *dest = (uint8_t *)lockrect.pBits; + uint8_t *src = MemBuffer; + for (int y = 0; y < Height; y++) + { + memcpy(dest, src, Width); + dest += lockrect.Pitch; + src += Pitch; + } + } + FBTexture->UnlockRect(); + } + } + InScene = true; + if (vid_hwaalines) + glEnable(GL_LINE_SMOOTH); + else + glDisable(GL_LINE_SMOOTH); + assert(OldRenderTarget == nullptr); + if (TempRenderTexture != nullptr && + ((Windowed && TempRenderTexture != FinalWipeScreen) || GatheringWipeScreen || PixelDoubling)) + { + HWSurface *targetsurf; + if (TempRenderTexture->GetSurfaceLevel(0, &targetsurf)) + { + if (GetRenderTarget(0, &OldRenderTarget)) + { + SetRenderTarget(0, targetsurf); + } + delete targetsurf; + } + } + + SetTexture(0, FBTexture); + SetPaletteTexture(PaletteTexture, 256, BorderColor); + memset(Constant, 0, sizeof(Constant)); + SetAlphaBlend(0); + EnableAlphaTest(FALSE); + SetPixelShader(Shaders[SHADER_NormalColorPal]); + if (copy3d) + { + FBVERTEX verts[4]; + uint32_t color0, color1; + if (Accel2D) + { + if (realfixedcolormap == nullptr) + { + color0 = 0; + color1 = 0xFFFFFFF; + } + else + { + color0 = ColorValue(realfixedcolormap->ColorizeStart[0] / 2, + realfixedcolormap->ColorizeStart[1] / 2, realfixedcolormap->ColorizeStart[2] / 2, 0); + color1 = ColorValue(realfixedcolormap->ColorizeEnd[0] / 2, + realfixedcolormap->ColorizeEnd[1] / 2, realfixedcolormap->ColorizeEnd[2] / 2, 1); + SetPixelShader(Shaders[SHADER_SpecialColormapPal]); + } + } + else + { + color0 = FlashColor0; + color1 = FlashColor1; + } + CalcFullscreenCoords(verts, Accel2D, false, color0, color1); + DrawTriangleFans(2, verts); + } + SetPixelShader(Shaders[SHADER_NormalColorPal]); +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: DrawLetterbox +// +// Draws the black bars at the top and bottom of the screen for letterboxed +// modes. +// +//========================================================================== + +void OpenGLSWFrameBuffer::DrawLetterbox() +{ + if (LBOffsetI != 0) + { + glEnable(GL_SCISSOR_TEST); + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glScissor(0, 0, Width, LBOffsetI); + glClear(GL_COLOR_BUFFER_BIT); + glScissor(0, Height + LBOffsetI, Width, TrueHeight - Height + LBOffsetI); + glClear(GL_COLOR_BUFFER_BIT); + glDisable(GL_SCISSOR_TEST); + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: DoWindowedGamma +// +// Draws the render target texture to the real back buffer using a gamma- +// correcting pixel shader. +// +//========================================================================== + +void OpenGLSWFrameBuffer::DoWindowedGamma() +{ + if (OldRenderTarget != nullptr) + { + FBVERTEX verts[4]; + + CalcFullscreenCoords(verts, false, true, 0, 0xFFFFFFFF); + SetRenderTarget(0, OldRenderTarget); + SetTexture(0, TempRenderTexture); + SetPixelShader(Windowed && GammaShader ? GammaShader : Shaders[SHADER_NormalColor]); + SetAlphaBlend(0); + EnableAlphaTest(FALSE); + DrawTriangleFans(2, verts); + delete OldRenderTarget; + OldRenderTarget = nullptr; + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: UpdateGammaTexture +// +// Updates the gamma texture used by the PS14 shader. We only use the first +// half of the texture so that we needn't worry about imprecision causing +// it to grab from the border. +// +//========================================================================== + +void OpenGLSWFrameBuffer::UpdateGammaTexture(float igamma) +{ + LockedRect lockrect; + + if (GammaTexture != nullptr && GammaTexture->LockRect(&lockrect, nullptr, false)) + { + uint8_t *pix = (uint8_t *)lockrect.pBits; + for (int i = 0; i <= 128; ++i) + { + pix[i * 4 + 2] = pix[i * 4 + 1] = pix[i * 4] = uint8_t(255.f * powf(i / 128.f, igamma)); + pix[i * 4 + 3] = 255; + } + GammaTexture->UnlockRect(); + } +} + +void OpenGLSWFrameBuffer::UploadPalette() +{ + LockedRect lockrect; + + if (SkipAt < 0) + { + SkipAt = 256; + } + if (PaletteTexture->LockRect(&lockrect, nullptr, false)) + { + uint8_t *pix = (uint8_t *)lockrect.pBits; + int i; + + for (i = 0; i < SkipAt; ++i, pix += 4) + { + pix[0] = SourcePalette[i].b; + pix[1] = SourcePalette[i].g; + pix[2] = SourcePalette[i].r; + pix[3] = (i == 0 ? 0 : 255); + // To let masked textures work, the first palette entry's alpha is 0. + } + pix += 4; + for (; i < 255; ++i, pix += 4) + { + pix[0] = SourcePalette[i].b; + pix[1] = SourcePalette[i].g; + pix[2] = SourcePalette[i].r; + pix[3] = 255; + } + PaletteTexture->UnlockRect(); + BorderColor = ColorXRGB(SourcePalette[255].r, SourcePalette[255].g, SourcePalette[255].b); + } +} + +PalEntry *OpenGLSWFrameBuffer::GetPalette() +{ + return SourcePalette; +} + +void OpenGLSWFrameBuffer::UpdatePalette() +{ + NeedPalUpdate = true; +} + +bool OpenGLSWFrameBuffer::SetGamma(float gamma) +{ + LOG1("SetGamma %g\n", gamma); + Gamma = gamma; + NeedGammaUpdate = true; + return true; +} + +bool OpenGLSWFrameBuffer::SetFlash(PalEntry rgb, int amount) +{ + FlashColor = rgb; + FlashAmount = amount; + + // Fill in the constants for the pixel shader to do linear interpolation between the palette and the flash: + float r = rgb.r / 255.f, g = rgb.g / 255.f, b = rgb.b / 255.f, a = amount / 256.f; + FlashColor0 = ColorValue(r * a, g * a, b * a, 0); + a = 1 - a; + FlashColor1 = ColorValue(a, a, a, 1); + return true; +} + +void OpenGLSWFrameBuffer::GetFlash(PalEntry &rgb, int &amount) +{ + rgb = FlashColor; + amount = FlashAmount; +} + +void OpenGLSWFrameBuffer::GetFlashedPalette(PalEntry pal[256]) +{ + memcpy(pal, SourcePalette, 256 * sizeof(PalEntry)); + if (FlashAmount) + { + DoBlending(pal, pal, 256, FlashColor.r, FlashColor.g, FlashColor.b, FlashAmount); + } +} + +void OpenGLSWFrameBuffer::SetVSync(bool vsync) +{ + if (VSync != vsync) + { + VSync = vsync; + Reset(); + } +} + +void OpenGLSWFrameBuffer::NewRefreshRate() +{ + if (!Windowed) + { + Reset(); + } +} + +void OpenGLSWFrameBuffer::Blank() +{ +} + +void OpenGLSWFrameBuffer::SetBlendingRect(int x1, int y1, int x2, int y2) +{ + BlendingRect.left = x1; + BlendingRect.top = y1; + BlendingRect.right = x2; + BlendingRect.bottom = y2; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: GetScreenshotBuffer +// +// Returns a pointer into a surface holding the current screen data. +// +//========================================================================== + +void OpenGLSWFrameBuffer::GetScreenshotBuffer(const uint8_t *&buffer, int &pitch, ESSType &color_type) +{ + LockedRect lrect; + + if (!Accel2D) + { + Super::GetScreenshotBuffer(buffer, pitch, color_type); + return; + } + buffer = nullptr; + if ((ScreenshotTexture = GetCurrentScreen()) != nullptr) + { + if (!ScreenshotTexture->GetSurfaceLevel(0, &ScreenshotSurface)) + { + delete ScreenshotTexture; + ScreenshotTexture = nullptr; + } + else if (!ScreenshotSurface->LockRect(&lrect, nullptr, false)) + { + delete ScreenshotSurface; + ScreenshotSurface = nullptr; + delete ScreenshotTexture; + ScreenshotTexture = nullptr; + } + else + { + buffer = (const uint8_t *)lrect.pBits; + pitch = lrect.Pitch; + color_type = SS_BGRA; + } + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: ReleaseScreenshotBuffer +// +//========================================================================== + +void OpenGLSWFrameBuffer::ReleaseScreenshotBuffer() +{ + if (LockCount > 0) + { + Super::ReleaseScreenshotBuffer(); + } + if (ScreenshotSurface != nullptr) + { + ScreenshotSurface->UnlockRect(); + delete ScreenshotSurface; + ScreenshotSurface = nullptr; + } + SafeRelease(ScreenshotTexture); +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: GetCurrentScreen +// +// Returns a texture containing the pixels currently visible on-screen. +// +//========================================================================== + +OpenGLSWFrameBuffer::HWTexture *OpenGLSWFrameBuffer::GetCurrentScreen() +{ + HWTexture *tex; + HWSurface *surf; + bool hr; + + if (FrontCopySurface == nullptr) + { + return nullptr; + } + + hr = CreateTexture(FBWidth, FBHeight, 1, GL_RGBA8, &tex); + + if (!hr) + { + return nullptr; + } + if (!tex->GetSurfaceLevel(0, &surf)) + { + delete tex; + return nullptr; + } + + // Video -> System memory : use GetRenderTargetData + GetRenderTargetData(FrontCopySurface, surf); + delete surf; + + if (!hr) + { + delete tex; + return nullptr; + } + return tex; +} + +/**************************************************************************/ +/* 2D Stuff */ +/**************************************************************************/ + +//========================================================================== +// +// OpenGLSWFrameBuffer :: DrawPackedTextures +// +// DEBUG: Draws the texture atlases to the screen, starting with the +// 1-based packnum. Ignores atlases that are flagged for use by one +// texture only. +// +//========================================================================== + +void OpenGLSWFrameBuffer::DrawPackedTextures(int packnum) +{ + uint32_t empty_colors[8] = + { + 0x50FF0000, 0x5000FF00, 0x500000FF, 0x50FFFF00, + 0x50FF00FF, 0x5000FFFF, 0x50FF8000, 0x500080FF + }; + Atlas *pack; + int x = 8, y = 8; + + if (packnum <= 0) + { + return; + } + pack = Atlases; + // Find the first texture atlas that is an actual atlas. + while (pack != nullptr && pack->OneUse) + { // Skip textures that aren't used as atlases + pack = pack->Next; + } + // Skip however many atlases we would have otherwise drawn + // until we've skipped of them. + while (pack != nullptr && packnum != 1) + { + if (!pack->OneUse) + { // Skip textures that aren't used as atlases + packnum--; + } + pack = pack->Next; + } + // Draw atlases until we run out of room on the screen. + while (pack != nullptr) + { + if (pack->OneUse) + { // Skip textures that aren't used as atlases + pack = pack->Next; + continue; + } + + AddColorOnlyRect(x - 1, y - 1 - LBOffsetI, 258, 258, ColorXRGB(255, 255, 0)); + int back = 0; + for (PackedTexture *box = pack->UsedList; box != nullptr; box = box->Next) + { + AddColorOnlyQuad( + x + box->Area.left * 256 / pack->Width, + y + box->Area.top * 256 / pack->Height, + (box->Area.right - box->Area.left) * 256 / pack->Width, + (box->Area.bottom - box->Area.top) * 256 / pack->Height, empty_colors[back]); + back = (back + 1) & 7; + } + // AddColorOnlyQuad(x, y-LBOffsetI, 256, 256, ColorARGB(180,0,0,0)); + + CheckQuadBatch(); + + BufferedTris *quad = &QuadExtra[QuadBatchPos]; + FBVERTEX *vert = &VertexData[VertexPos]; + + quad->Group1 = 0; + if (pack->Format == GL_R8/* && !tex->IsGray*/) + { + quad->Flags = BQF_WrapUV | BQF_GamePalette/* | BQF_DisableAlphaTest*/; + quad->ShaderNum = BQS_PalTex; + } + else + { + quad->Flags = BQF_WrapUV/* | BQF_DisableAlphaTest*/; + quad->ShaderNum = BQS_Plain; + } + quad->Palette = nullptr; + quad->Texture = pack->Tex; + quad->NumVerts = 4; + quad->NumTris = 2; + + float x0 = float(x) - 0.5f; + float y0 = float(y) - 0.5f; + float x1 = x0 + 256.f; + float y1 = y0 + 256.f; + + vert[0].x = x0; + vert[0].y = y0; + vert[0].z = 0; + vert[0].rhw = 1; + vert[0].color0 = 0; + vert[0].color1 = 0xFFFFFFFF; + vert[0].tu = 0; + vert[0].tv = 0; + + vert[1].x = x1; + vert[1].y = y0; + vert[1].z = 0; + vert[1].rhw = 1; + vert[1].color0 = 0; + vert[1].color1 = 0xFFFFFFFF; + vert[1].tu = 1; + vert[1].tv = 0; + + vert[2].x = x1; + vert[2].y = y1; + vert[2].z = 0; + vert[2].rhw = 1; + vert[2].color0 = 0; + vert[2].color1 = 0xFFFFFFFF; + vert[2].tu = 1; + vert[2].tv = 1; + + vert[3].x = x0; + vert[3].y = y1; + vert[3].z = 0; + vert[3].rhw = 1; + vert[3].color0 = 0; + vert[3].color1 = 0xFFFFFFFF; + vert[3].tu = 0; + vert[3].tv = 1; + + IndexData[IndexPos] = VertexPos; + IndexData[IndexPos + 1] = VertexPos + 1; + IndexData[IndexPos + 2] = VertexPos + 2; + IndexData[IndexPos + 3] = VertexPos; + IndexData[IndexPos + 4] = VertexPos + 2; + IndexData[IndexPos + 5] = VertexPos + 3; + + QuadBatchPos++; + VertexPos += 4; + IndexPos += 6; + + x += 256 + 8; + if (x > Width - 256) + { + x = 8; + y += 256 + 8; + if (y > TrueHeight - 256) + { + return; + } + } + pack = pack->Next; + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: AllocPackedTexture +// +// Finds space to pack an image inside a texture atlas and returns it. +// Large images and those that need to wrap always get their own textures. +// +//========================================================================== + +OpenGLSWFrameBuffer::PackedTexture *OpenGLSWFrameBuffer::AllocPackedTexture(int w, int h, bool wrapping, int format) +{ + Atlas *pack; + Rect box; + bool padded; + + // The - 2 to account for padding + if (w > 256 - 2 || h > 256 - 2 || wrapping) + { // Create a new texture atlas. + pack = new Atlas(this, w, h, format); + pack->OneUse = true; + box = pack->Packer.Insert(w, h); + padded = false; + } + else + { // Try to find space in an existing texture atlas. + w += 2; // Add padding + h += 2; + for (pack = Atlases; pack != nullptr; pack = pack->Next) + { + // Use the first atlas it fits in. + if (pack->Format == format) + { + box = pack->Packer.Insert(w, h); + if (box.width != 0) + { + break; + } + } + } + if (pack == nullptr) + { // Create a new texture atlas. + pack = new Atlas(this, DEF_ATLAS_WIDTH, DEF_ATLAS_HEIGHT, format); + box = pack->Packer.Insert(w, h); + } + padded = true; + } + assert(box.width != 0 && box.height != 0); + return pack->AllocateImage(box, padded); +} + +//========================================================================== +// +// Atlas Constructor +// +//========================================================================== + +OpenGLSWFrameBuffer::Atlas::Atlas(OpenGLSWFrameBuffer *fb, int w, int h, int format) + : Packer(w, h, true) +{ + Tex = nullptr; + Format = format; + UsedList = nullptr; + OneUse = false; + Width = 0; + Height = 0; + Next = nullptr; + + // Attach to the end of the atlas list + Atlas **prev = &fb->Atlases; + while (*prev != nullptr) + { + prev = &((*prev)->Next); + } + *prev = this; + + fb->CreateTexture(w, h, 1, format, &Tex); + Width = w; + Height = h; +} + +//========================================================================== +// +// Atlas Destructor +// +//========================================================================== + +OpenGLSWFrameBuffer::Atlas::~Atlas() +{ + PackedTexture *box, *next; + + SafeRelease(Tex); + for (box = UsedList; box != nullptr; box = next) + { + next = box->Next; + delete box; + } +} + +//========================================================================== +// +// Atlas :: AllocateImage +// +// Moves the box from the empty list to the used list, sizing it to the +// requested dimensions and adding additional boxes to the empty list if +// needed. +// +// The passed box *MUST* be in this texture atlas's empty list. +// +//========================================================================== + +OpenGLSWFrameBuffer::PackedTexture *OpenGLSWFrameBuffer::Atlas::AllocateImage(const Rect &rect, bool padded) +{ + PackedTexture *box = new PackedTexture; + + box->Owner = this; + box->Area.left = rect.x; + box->Area.top = rect.y; + box->Area.right = rect.x + rect.width; + box->Area.bottom = rect.y + rect.height; + + box->Left = float(box->Area.left + padded) / Width; + box->Right = float(box->Area.right - padded) / Width; + box->Top = float(box->Area.top + padded) / Height; + box->Bottom = float(box->Area.bottom - padded) / Height; + + box->Padded = padded; + + // Add it to the used list. + box->Next = UsedList; + if (box->Next != nullptr) + { + box->Next->Prev = &box->Next; + } + UsedList = box; + box->Prev = &UsedList; + + return box; +} + +//========================================================================== +// +// Atlas :: FreeBox +// +// Removes a box from the used list and deletes it. Space is returned to the +// waste list. Once all boxes for this atlas are freed, the entire bin +// packer is reinitialized for maximum efficiency. +// +//========================================================================== + +void OpenGLSWFrameBuffer::Atlas::FreeBox(OpenGLSWFrameBuffer::PackedTexture *box) +{ + *(box->Prev) = box->Next; + if (box->Next != nullptr) + { + box->Next->Prev = box->Prev; + } + Rect waste; + waste.x = box->Area.left; + waste.y = box->Area.top; + waste.width = box->Area.right - box->Area.left; + waste.height = box->Area.bottom - box->Area.top; + box->Owner->Packer.AddWaste(waste); + delete box; + if (UsedList == nullptr) + { + Packer.Init(Width, Height, true); + } +} + +//========================================================================== +// +// OpenGLTex Constructor +// +//========================================================================== + +OpenGLSWFrameBuffer::OpenGLTex::OpenGLTex(FTexture *tex, OpenGLSWFrameBuffer *fb, bool wrapping) +{ + // Attach to the texture list for the OpenGLSWFrameBuffer + Next = fb->Textures; + if (Next != nullptr) + { + Next->Prev = &Next; + } + Prev = &fb->Textures; + fb->Textures = this; + + GameTex = tex; + Box = nullptr; + IsGray = false; + + Create(fb, wrapping); +} + +//========================================================================== +// +// OpenGLTex Destructor +// +//========================================================================== + +OpenGLSWFrameBuffer::OpenGLTex::~OpenGLTex() +{ + if (Box != nullptr) + { + Box->Owner->FreeBox(Box); + Box = nullptr; + } + // Detach from the texture list + *Prev = Next; + if (Next != nullptr) + { + Next->Prev = Prev; + } + // Remove link from the game texture + if (GameTex != nullptr) + { + GameTex->Native = nullptr; + } +} + +//========================================================================== +// +// OpenGLTex :: CheckWrapping +// +// Returns true if the texture is compatible with the specified wrapping +// mode. +// +//========================================================================== + +bool OpenGLSWFrameBuffer::OpenGLTex::CheckWrapping(bool wrapping) +{ + // If it doesn't need to wrap, then it works. + if (!wrapping) + { + return true; + } + // If it needs to wrap, then it can't be packed inside another texture. + return Box->Owner->OneUse; +} + +//========================================================================== +// +// OpenGLTex :: Create +// +// Creates an HWTexture for the texture and copies the image data +// to it. Note that unlike FTexture, this image is row-major. +// +//========================================================================== + +bool OpenGLSWFrameBuffer::OpenGLTex::Create(OpenGLSWFrameBuffer *fb, bool wrapping) +{ + assert(Box == nullptr); + if (Box != nullptr) + { + Box->Owner->FreeBox(Box); + } + + Box = fb->AllocPackedTexture(GameTex->GetWidth(), GameTex->GetHeight(), wrapping, GetTexFormat()); + + if (Box == nullptr) + { + return false; + } + if (!Update()) + { + Box->Owner->FreeBox(Box); + Box = nullptr; + return false; + } + return true; +} + +//========================================================================== +// +// OpenGLTex :: Update +// +// Copies image data from the underlying FTexture to the OpenGL texture. +// +//========================================================================== + +bool OpenGLSWFrameBuffer::OpenGLTex::Update() +{ + LockedRect lrect; + LTRBRect rect; + uint8_t *dest; + + assert(Box != nullptr); + assert(Box->Owner != nullptr); + assert(Box->Owner->Tex != nullptr); + assert(GameTex != nullptr); + + int format = Box->Owner->Tex->Format; + + rect = Box->Area; + if (!Box->Owner->Tex->LockRect(&lrect, &rect, false)) + { + return false; + } + dest = (uint8_t *)lrect.pBits; + if (Box->Padded) + { + dest += lrect.Pitch + (format == GL_R8 ? 1 : 4); + } + GameTex->FillBuffer(dest, lrect.Pitch, GameTex->GetHeight(), ToTexFmt(format)); + if (Box->Padded) + { + // Clear top padding row. + dest = (uint8_t *)lrect.pBits; + int numbytes = GameTex->GetWidth() + 2; + if (format != GL_R8) + { + numbytes <<= 2; + } + memset(dest, 0, numbytes); + dest += lrect.Pitch; + // Clear left and right padding columns. + if (format == GL_R8) + { + for (int y = Box->Area.bottom - Box->Area.top - 2; y > 0; --y) + { + dest[0] = 0; + dest[numbytes - 1] = 0; + dest += lrect.Pitch; + } + } + else + { + for (int y = Box->Area.bottom - Box->Area.top - 2; y > 0; --y) + { + *(uint32_t *)dest = 0; + *(uint32_t *)(dest + numbytes - 4) = 0; + dest += lrect.Pitch; + } + } + // Clear bottom padding row. + memset(dest, 0, numbytes); + } + Box->Owner->Tex->UnlockRect(); + return true; +} + +//========================================================================== +// +// OpenGLTex :: GetTexFormat +// +// Returns the texture format that would best fit this texture. +// +//========================================================================== + +int OpenGLSWFrameBuffer::OpenGLTex::GetTexFormat() +{ + FTextureFormat fmt = GameTex->GetFormat(); + + IsGray = false; + + switch (fmt) + { + case TEX_Pal: return GL_R8; + case TEX_Gray: IsGray = true; return GL_R8; + case TEX_RGB: return GL_RGBA8; + case TEX_DXT1: return GL_COMPRESSED_RGB_S3TC_DXT1_EXT; + case TEX_DXT2: return GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; + case TEX_DXT3: return GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; + case TEX_DXT4: return GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; // Doesn't exist in OpenGL. Closest match is DXT5. + case TEX_DXT5: return GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; + default: I_FatalError("GameTex->GetFormat() returned invalid format."); + } + return GL_R8; +} + +//========================================================================== +// +// OpenGLTex :: ToTexFmt +// +// Converts an OpenGL internal format constant to something the FTexture system +// understands. +// +//========================================================================== + +FTextureFormat OpenGLSWFrameBuffer::OpenGLTex::ToTexFmt(int fmt) +{ + switch (fmt) + { + case GL_R8: return IsGray ? TEX_Gray : TEX_Pal; + case GL_RGBA8: return TEX_RGB; + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: return TEX_DXT1; + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: return TEX_DXT2; + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: return TEX_DXT3; + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: return TEX_DXT5; + default: + assert(0); // LOL WUT? + return TEX_Pal; + } +} + +//========================================================================== +// +// OpenGLPal Constructor +// +//========================================================================== + +OpenGLSWFrameBuffer::OpenGLPal::OpenGLPal(FRemapTable *remap, OpenGLSWFrameBuffer *fb) + : Tex(nullptr), Remap(remap) +{ + int count; + + // Attach to the palette list for the OpenGLSWFrameBuffer + Next = fb->Palettes; + if (Next != nullptr) + { + Next->Prev = &Next; + } + Prev = &fb->Palettes; + fb->Palettes = this; + + int pow2count; + + // Round up to the nearest power of 2. + for (pow2count = 1; pow2count < remap->NumEntries; pow2count <<= 1) + { + } + count = pow2count; + DoColorSkip = false; + + BorderColor = 0; + RoundedPaletteSize = count; + if (fb->CreateTexture(count, 1, 1, GL_RGBA8, &Tex)) + { + if (!Update()) + { + delete Tex; + Tex = nullptr; + } + } +} + +//========================================================================== +// +// OpenGLPal Destructor +// +//========================================================================== + +OpenGLSWFrameBuffer::OpenGLPal::~OpenGLPal() +{ + SafeRelease(Tex); + // Detach from the palette list + *Prev = Next; + if (Next != nullptr) + { + Next->Prev = Prev; + } + // Remove link from the remap table + if (Remap != nullptr) + { + Remap->Native = nullptr; + } +} + +//========================================================================== +// +// OpenGLPal :: Update +// +// Copies the palette to the texture. +// +//========================================================================== + +bool OpenGLSWFrameBuffer::OpenGLPal::Update() +{ + LockedRect lrect; + uint32_t *buff; + const PalEntry *pal; + int skipat, i; + + assert(Tex != nullptr); + + if (!Tex->LockRect(&lrect, nullptr, 0)) + { + return false; + } + buff = (uint32_t *)lrect.pBits; + pal = Remap->Palette; + + // See explanation in UploadPalette() for skipat rationale. + skipat = MIN(Remap->NumEntries, DoColorSkip ? 256 - 8 : 256); + + for (i = 0; i < skipat; ++i) + { + buff[i] = ColorARGB(pal[i].a, pal[i].r, pal[i].g, pal[i].b); + } + for (++i; i < Remap->NumEntries; ++i) + { + buff[i] = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); + } + BorderColor = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); + + Tex->UnlockRect(); + return true; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Begin2D +// +// Begins 2D mode drawing operations. In particular, DrawTexture is +// rerouted to use Direct3D instead of the software renderer. +// +//========================================================================== + +bool OpenGLSWFrameBuffer::Begin2D(bool copy3d) +{ + if (!Accel2D) + { + return false; + } + if (In2D) + { + return true; + } + In2D = 2 - copy3d; + Update(); + In2D = 3; + + return true; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: DrawBlendingRect +// +// Call after Begin2D to blend the 3D view. +// +//========================================================================== + +void OpenGLSWFrameBuffer::DrawBlendingRect() +{ + if (!In2D || !Accel2D) + { + return; + } + Dim(FlashColor, FlashAmount / 256.f, viewwindowx, viewwindowy, viewwidth, viewheight); +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: CreateTexture +// +// Returns a native texture that wraps a FTexture. +// +//========================================================================== + +FNativeTexture *OpenGLSWFrameBuffer::CreateTexture(FTexture *gametex, bool wrapping) +{ + OpenGLTex *tex = new OpenGLTex(gametex, this, wrapping); + if (tex->Box == nullptr) + { + delete tex; + return nullptr; + } + return tex; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: CreatePalette +// +// Returns a native texture that contains a palette. +// +//========================================================================== + +FNativePalette *OpenGLSWFrameBuffer::CreatePalette(FRemapTable *remap) +{ + OpenGLPal *tex = new OpenGLPal(remap, this); + if (tex->Tex == nullptr) + { + delete tex; + return nullptr; + } + return tex; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Clear +// +// Fills the specified region with a color. +// +//========================================================================== + +void OpenGLSWFrameBuffer::Clear(int left, int top, int right, int bottom, int palcolor, uint32 color) +{ + if (In2D < 2) + { + Super::Clear(left, top, right, bottom, palcolor, color); + return; + } + if (!InScene) + { + return; + } + if (palcolor >= 0 && color == 0) + { + color = GPalette.BaseColors[palcolor]; + } + else if (APART(color) < 255) + { + Dim(color, APART(color) / 255.f, left, top, right - left, bottom - top); + return; + } + AddColorOnlyQuad(left, top, right - left, bottom - top, color | 0xFF000000); +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Dim +// +//========================================================================== + +void OpenGLSWFrameBuffer::Dim(PalEntry color, float amount, int x1, int y1, int w, int h) +{ + if (amount <= 0) + { + return; + } + if (In2D < 2) + { + Super::Dim(color, amount, x1, y1, w, h); + return; + } + if (!InScene) + { + return; + } + if (amount > 1) + { + amount = 1; + } + AddColorOnlyQuad(x1, y1, w, h, color | (int(amount * 255) << 24)); +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: BeginLineBatch +// +//========================================================================== + +void OpenGLSWFrameBuffer::BeginLineBatch() +{ + if (In2D < 2 || !InScene || BatchType == BATCH_Lines) + { + return; + } + EndQuadBatch(); // Make sure all quads have been drawn first. + VertexData = VertexBuffer->Lock(); + VertexPos = 0; + BatchType = BATCH_Lines; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: EndLineBatch +// +//========================================================================== + +void OpenGLSWFrameBuffer::EndLineBatch() +{ + if (In2D < 2 || !InScene || BatchType != BATCH_Lines) + { + return; + } + VertexBuffer->Unlock(); + if (VertexPos > 0) + { + SetPixelShader(Shaders[SHADER_VertexColor]); + SetAlphaBlend(GL_FUNC_ADD, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + SetStreamSource(VertexBuffer); + DrawLineList(VertexPos / 2); + } + VertexPos = -1; + BatchType = BATCH_None; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: DrawLine +// +//========================================================================== + +void OpenGLSWFrameBuffer::DrawLine(int x0, int y0, int x1, int y1, int palcolor, uint32 color) +{ + if (In2D < 2) + { + Super::DrawLine(x0, y0, x1, y1, palcolor, color); + return; + } + if (!InScene) + { + return; + } + if (BatchType != BATCH_Lines) + { + BeginLineBatch(); + } + if (VertexPos == NUM_VERTS) + { // Flush the buffer and refill it. + EndLineBatch(); + BeginLineBatch(); + } + // Add the endpoints to the vertex buffer. + VertexData[VertexPos].x = float(x0); + VertexData[VertexPos].y = float(y0) + LBOffset; + VertexData[VertexPos].z = 0; + VertexData[VertexPos].rhw = 1; + VertexData[VertexPos].color0 = color; + VertexData[VertexPos].color1 = 0; + VertexData[VertexPos].tu = 0; + VertexData[VertexPos].tv = 0; + + VertexData[VertexPos + 1].x = float(x1); + VertexData[VertexPos + 1].y = float(y1) + LBOffset; + VertexData[VertexPos + 1].z = 0; + VertexData[VertexPos + 1].rhw = 1; + VertexData[VertexPos + 1].color0 = color; + VertexData[VertexPos + 1].color1 = 0; + VertexData[VertexPos + 1].tu = 0; + VertexData[VertexPos + 1].tv = 0; + + VertexPos += 2; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: DrawPixel +// +//========================================================================== + +void OpenGLSWFrameBuffer::DrawPixel(int x, int y, int palcolor, uint32 color) +{ + if (In2D < 2) + { + Super::DrawPixel(x, y, palcolor, color); + return; + } + if (!InScene) + { + return; + } + FBVERTEX pt = + { + float(x), float(y), 0, 1, color + }; + EndBatch(); // Draw out any batched operations. + SetPixelShader(Shaders[SHADER_VertexColor]); + SetAlphaBlend(GL_FUNC_ADD, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + DrawPoints(1, &pt); +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: DrawTextureV +// +// If not in 2D mode, just call the normal software version. +// If in 2D mode, then use Direct3D calls to perform the drawing. +// +//========================================================================== + +void OpenGLSWFrameBuffer::DrawTextureParms(FTexture *img, DrawParms &parms) +{ + if (In2D < 2) + { + Super::DrawTextureParms(img, parms); + return; + } + if (!InScene) + { + return; + } + + OpenGLTex *tex = static_cast(img->GetNative(false)); + + if (tex == nullptr) + { + assert(tex != nullptr); + return; + } + + CheckQuadBatch(); + + double xscale = parms.destwidth / parms.texwidth; + double yscale = parms.destheight / parms.texheight; + double x0 = parms.x - parms.left * xscale; + double y0 = parms.y - parms.top * yscale; + double x1 = x0 + parms.destwidth; + double y1 = y0 + parms.destheight; + float u0 = tex->Box->Left; + float v0 = tex->Box->Top; + float u1 = tex->Box->Right; + float v1 = tex->Box->Bottom; + double uscale = 1.f / tex->Box->Owner->Width; + bool scissoring = false; + FBVERTEX *vert; + float yoffs; + + if (parms.flipX) + { + swapvalues(u0, u1); + } + if (parms.windowleft > 0 || parms.windowright < parms.texwidth) + { + double wi = MIN(parms.windowright, parms.texwidth); + x0 += parms.windowleft * xscale; + u0 = float(u0 + parms.windowleft * uscale); + x1 -= (parms.texwidth - wi) * xscale; + u1 = float(u1 - (parms.texwidth - wi) * uscale); + } + +#if 0 + float vscale = 1.f / tex->Box->Owner->Height / yscale; + if (y0 < parms.uclip) + { + v0 += (float(parms.uclip) - y0) * vscale; + y0 = float(parms.uclip); + } + if (y1 > parms.dclip) + { + v1 -= (y1 - float(parms.dclip)) * vscale; + y1 = float(parms.dclip); + } + if (x0 < parms.lclip) + { + u0 += float(parms.lclip - x0) * uscale / xscale * 2; + x0 = float(parms.lclip); + } + if (x1 > parms.rclip) + { + u1 -= (x1 - parms.rclip) * uscale / xscale * 2; + x1 = float(parms.rclip); + } +#else + // Use a scissor test because the math above introduces some jitter + // that is noticeable at low resolutions. Unfortunately, this means this + // quad has to be in a batch by itself. + if (y0 < parms.uclip || y1 > parms.dclip || x0 < parms.lclip || x1 > parms.rclip) + { + scissoring = true; + if (QuadBatchPos > 0) + { + EndQuadBatch(); + BeginQuadBatch(); + } + glEnable(GL_SCISSOR_TEST); + glScissor(parms.lclip, parms.uclip + LBOffsetI, parms.rclip - parms.lclip, parms.dclip - parms.uclip); + } +#endif + parms.bilinear = false; + + uint32_t color0, color1; + BufferedTris *quad = &QuadExtra[QuadBatchPos]; + + if (!SetStyle(tex, parms, color0, color1, *quad)) + { + goto done; + } + + quad->Texture = tex->Box->Owner->Tex; + if (parms.bilinear) + { + quad->Flags |= BQF_Bilinear; + } + quad->NumTris = 2; + quad->NumVerts = 4; + + yoffs = GatheringWipeScreen ? 0.5f : 0.5f - LBOffset; + +#if 0 + // Coordinates are truncated to integers, because that's effectively + // what the software renderer does. The hardware will instead round + // to nearest, it seems. + x0 = floorf(x0) - 0.5f; + y0 = floorf(y0) - yoffs; + x1 = floorf(x1) - 0.5f; + y1 = floorf(y1) - yoffs; +#else + x0 = x0 - 0.5f; + y0 = y0 - yoffs; + x1 = x1 - 0.5f; + y1 = y1 - yoffs; +#endif + + vert = &VertexData[VertexPos]; + + // Fill the vertex buffer. + vert[0].x = float(x0); + vert[0].y = float(y0); + vert[0].z = 0; + vert[0].rhw = 1; + vert[0].color0 = color0; + vert[0].color1 = color1; + vert[0].tu = u0; + vert[0].tv = v0; + + vert[1].x = float(x1); + vert[1].y = float(y0); + vert[1].z = 0; + vert[1].rhw = 1; + vert[1].color0 = color0; + vert[1].color1 = color1; + vert[1].tu = u1; + vert[1].tv = v0; + + vert[2].x = float(x1); + vert[2].y = float(y1); + vert[2].z = 0; + vert[2].rhw = 1; + vert[2].color0 = color0; + vert[2].color1 = color1; + vert[2].tu = u1; + vert[2].tv = v1; + + vert[3].x = float(x0); + vert[3].y = float(y1); + vert[3].z = 0; + vert[3].rhw = 1; + vert[3].color0 = color0; + vert[3].color1 = color1; + vert[3].tu = u0; + vert[3].tv = v1; + + // Fill the vertex index buffer. + IndexData[IndexPos] = VertexPos; + IndexData[IndexPos + 1] = VertexPos + 1; + IndexData[IndexPos + 2] = VertexPos + 2; + IndexData[IndexPos + 3] = VertexPos; + IndexData[IndexPos + 4] = VertexPos + 2; + IndexData[IndexPos + 5] = VertexPos + 3; + + // Batch the quad. + QuadBatchPos++; + VertexPos += 4; + IndexPos += 6; +done: + if (scissoring) + { + EndQuadBatch(); + glDisable(GL_SCISSOR_TEST); + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: FlatFill +// +// Fills an area with a repeating copy of the texture. +// +//========================================================================== + +void OpenGLSWFrameBuffer::FlatFill(int left, int top, int right, int bottom, FTexture *src, bool local_origin) +{ + if (In2D < 2) + { + Super::FlatFill(left, top, right, bottom, src, local_origin); + return; + } + if (!InScene) + { + return; + } + OpenGLTex *tex = static_cast(src->GetNative(true)); + if (tex == nullptr) + { + return; + } + float yoffs = GatheringWipeScreen ? 0.5f : 0.5f - LBOffset; + float x0 = float(left); + float y0 = float(top); + float x1 = float(right); + float y1 = float(bottom); + float itw = 1.f / float(src->GetWidth()); + float ith = 1.f / float(src->GetHeight()); + float xo = local_origin ? x0 : 0; + float yo = local_origin ? y0 : 0; + float u0 = (x0 - xo) * itw; + float v0 = (y0 - yo) * ith; + float u1 = (x1 - xo) * itw; + float v1 = (y1 - yo) * ith; + x0 -= 0.5f; + y0 -= yoffs; + x1 -= 0.5f; + y1 -= yoffs; + + CheckQuadBatch(); + + BufferedTris *quad = &QuadExtra[QuadBatchPos]; + FBVERTEX *vert = &VertexData[VertexPos]; + + quad->Group1 = 0; + if (tex->GetTexFormat() == GL_R8 && !tex->IsGray) + { + quad->Flags = BQF_WrapUV | BQF_GamePalette; // | BQF_DisableAlphaTest; + quad->ShaderNum = BQS_PalTex; + } + else + { + quad->Flags = BQF_WrapUV; // | BQF_DisableAlphaTest; + quad->ShaderNum = BQS_Plain; + } + quad->Palette = nullptr; + quad->Texture = tex->Box->Owner->Tex; + quad->NumVerts = 4; + quad->NumTris = 2; + + vert[0].x = x0; + vert[0].y = y0; + vert[0].z = 0; + vert[0].rhw = 1; + vert[0].color0 = 0; + vert[0].color1 = 0xFFFFFFFF; + vert[0].tu = u0; + vert[0].tv = v0; + + vert[1].x = x1; + vert[1].y = y0; + vert[1].z = 0; + vert[1].rhw = 1; + vert[1].color0 = 0; + vert[1].color1 = 0xFFFFFFFF; + vert[1].tu = u1; + vert[1].tv = v0; + + vert[2].x = x1; + vert[2].y = y1; + vert[2].z = 0; + vert[2].rhw = 1; + vert[2].color0 = 0; + vert[2].color1 = 0xFFFFFFFF; + vert[2].tu = u1; + vert[2].tv = v1; + + vert[3].x = x0; + vert[3].y = y1; + vert[3].z = 0; + vert[3].rhw = 1; + vert[3].color0 = 0; + vert[3].color1 = 0xFFFFFFFF; + vert[3].tu = u0; + vert[3].tv = v1; + + IndexData[IndexPos] = VertexPos; + IndexData[IndexPos + 1] = VertexPos + 1; + IndexData[IndexPos + 2] = VertexPos + 2; + IndexData[IndexPos + 3] = VertexPos; + IndexData[IndexPos + 4] = VertexPos + 2; + IndexData[IndexPos + 5] = VertexPos + 3; + + QuadBatchPos++; + VertexPos += 4; + IndexPos += 6; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: FillSimplePoly +// +// Here, "simple" means that a simple triangle fan can draw it. +// +//========================================================================== + +void OpenGLSWFrameBuffer::FillSimplePoly(FTexture *texture, FVector2 *points, int npoints, + double originx, double originy, double scalex, double scaley, + DAngle rotation, FDynamicColormap *colormap, int lightlevel) +{ + // Use an equation similar to player sprites to determine shade + double fadelevel = clamp((LIGHT2SHADE(lightlevel) / 65536. - 12) / NUMCOLORMAPS, 0.0, 1.0); + + BufferedTris *quad; + FBVERTEX *verts; + OpenGLTex *tex; + float yoffs, uscale, vscale; + int i, ipos; + uint32_t color0, color1; + float ox, oy; + float cosrot, sinrot; + bool dorotate = rotation != 0; + + if (npoints < 3) + { // This is no polygon. + return; + } + if (In2D < 2) + { + Super::FillSimplePoly(texture, points, npoints, originx, originy, scalex, scaley, rotation, colormap, lightlevel); + return; + } + if (!InScene) + { + return; + } + tex = static_cast(texture->GetNative(true)); + if (tex == nullptr) + { + return; + } + + cosrot = (float)cos(rotation.Radians()); + sinrot = (float)sin(rotation.Radians()); + + CheckQuadBatch(npoints - 2, npoints); + quad = &QuadExtra[QuadBatchPos]; + verts = &VertexData[VertexPos]; + + color0 = 0; + color1 = 0xFFFFFFFF; + + quad->Group1 = 0; + if (tex->GetTexFormat() == GL_R8 && !tex->IsGray) + { + quad->Flags = BQF_WrapUV | BQF_GamePalette | BQF_DisableAlphaTest; + quad->ShaderNum = BQS_PalTex; + if (colormap != nullptr) + { + if (colormap->Desaturate != 0) + { + quad->Flags |= BQF_Desaturated; + } + quad->ShaderNum = BQS_InGameColormap; + quad->Desat = colormap->Desaturate; + color0 = ColorARGB(255, colormap->Color.r, colormap->Color.g, colormap->Color.b); + color1 = ColorARGB(uint32_t((1 - fadelevel) * 255), + uint32_t(colormap->Fade.r * fadelevel), + uint32_t(colormap->Fade.g * fadelevel), + uint32_t(colormap->Fade.b * fadelevel)); + } + } + else + { + quad->Flags = BQF_WrapUV | BQF_DisableAlphaTest; + quad->ShaderNum = BQS_Plain; + } + quad->Palette = nullptr; + quad->Texture = tex->Box->Owner->Tex; + quad->NumVerts = npoints; + quad->NumTris = npoints - 2; + + yoffs = GatheringWipeScreen ? 0 : LBOffset; + uscale = float(1.f / (texture->GetScaledWidth() * scalex)); + vscale = float(1.f / (texture->GetScaledHeight() * scaley)); + ox = float(originx); + oy = float(originy); + + for (i = 0; i < npoints; ++i) + { + verts[i].x = points[i].X; + verts[i].y = points[i].Y + yoffs; + verts[i].z = 0; + verts[i].rhw = 1; + verts[i].color0 = color0; + verts[i].color1 = color1; + float u = points[i].X - 0.5f - ox; + float v = points[i].Y - 0.5f - oy; + if (dorotate) + { + float t = u; + u = t * cosrot - v * sinrot; + v = v * cosrot + t * sinrot; + } + verts[i].tu = u * uscale; + verts[i].tv = v * vscale; + } + for (ipos = IndexPos, i = 2; i < npoints; ++i, ipos += 3) + { + IndexData[ipos] = VertexPos; + IndexData[ipos + 1] = VertexPos + i - 1; + IndexData[ipos + 2] = VertexPos + i; + } + + QuadBatchPos++; + VertexPos += npoints; + IndexPos = ipos; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: AddColorOnlyQuad +// +// Adds a single-color, untextured quad to the batch. +// +//========================================================================== + +void OpenGLSWFrameBuffer::AddColorOnlyQuad(int left, int top, int width, int height, uint32_t color) +{ + BufferedTris *quad; + FBVERTEX *verts; + + CheckQuadBatch(); + quad = &QuadExtra[QuadBatchPos]; + verts = &VertexData[VertexPos]; + + float x = float(left) - 0.5f; + float y = float(top) - 0.5f + (GatheringWipeScreen ? 0 : LBOffset); + + quad->Group1 = 0; + quad->ShaderNum = BQS_ColorOnly; + if ((color & 0xFF000000) != 0xFF000000) + { + quad->BlendOp = GL_FUNC_ADD; + quad->SrcBlend = GL_SRC_ALPHA; + quad->DestBlend = GL_ONE_MINUS_SRC_ALPHA; + } + quad->Palette = nullptr; + quad->Texture = nullptr; + quad->NumVerts = 4; + quad->NumTris = 2; + + verts[0].x = x; + verts[0].y = y; + verts[0].z = 0; + verts[0].rhw = 1; + verts[0].color0 = color; + verts[0].color1 = 0; + verts[0].tu = 0; + verts[0].tv = 0; + + verts[1].x = x + width; + verts[1].y = y; + verts[1].z = 0; + verts[1].rhw = 1; + verts[1].color0 = color; + verts[1].color1 = 0; + verts[1].tu = 0; + verts[1].tv = 0; + + verts[2].x = x + width; + verts[2].y = y + height; + verts[2].z = 0; + verts[2].rhw = 1; + verts[2].color0 = color; + verts[2].color1 = 0; + verts[2].tu = 0; + verts[2].tv = 0; + + verts[3].x = x; + verts[3].y = y + height; + verts[3].z = 0; + verts[3].rhw = 1; + verts[3].color0 = color; + verts[3].color1 = 0; + verts[3].tu = 0; + verts[3].tv = 0; + + IndexData[IndexPos] = VertexPos; + IndexData[IndexPos + 1] = VertexPos + 1; + IndexData[IndexPos + 2] = VertexPos + 2; + IndexData[IndexPos + 3] = VertexPos; + IndexData[IndexPos + 4] = VertexPos + 2; + IndexData[IndexPos + 5] = VertexPos + 3; + + QuadBatchPos++; + VertexPos += 4; + IndexPos += 6; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: AddColorOnlyRect +// +// Like AddColorOnlyQuad, except it's hollow. +// +//========================================================================== + +void OpenGLSWFrameBuffer::AddColorOnlyRect(int left, int top, int width, int height, uint32_t color) +{ + AddColorOnlyQuad(left, top, width - 1, 1, color); // top + AddColorOnlyQuad(left + width - 1, top, 1, height - 1, color); // right + AddColorOnlyQuad(left + 1, top + height - 1, width - 1, 1, color); // bottom + AddColorOnlyQuad(left, top + 1, 1, height - 1, color); // left +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: CheckQuadBatch +// +// Make sure there's enough room in the batch for one more set of triangles. +// +//========================================================================== + +void OpenGLSWFrameBuffer::CheckQuadBatch(int numtris, int numverts) +{ + if (BatchType == BATCH_Lines) + { + EndLineBatch(); + } + else if (QuadBatchPos == MAX_QUAD_BATCH || + VertexPos + numverts > NUM_VERTS || + IndexPos + numtris * 3 > NUM_INDEXES) + { + EndQuadBatch(); + } + if (QuadBatchPos < 0) + { + BeginQuadBatch(); + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: BeginQuadBatch +// +// Locks the vertex buffer for quads and sets the cursor to 0. +// +//========================================================================== + +void OpenGLSWFrameBuffer::BeginQuadBatch() +{ + if (In2D < 2 || !InScene || QuadBatchPos >= 0) + { + return; + } + EndLineBatch(); // Make sure all lines have been drawn first. + VertexData = VertexBuffer->Lock(); + IndexData = IndexBuffer->Lock(); + VertexPos = 0; + IndexPos = 0; + QuadBatchPos = 0; + BatchType = BATCH_Quads; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: EndQuadBatch +// +// Draws all the quads that have been batched up. +// +//========================================================================== + +void OpenGLSWFrameBuffer::EndQuadBatch() +{ + if (In2D < 2 || !InScene || BatchType != BATCH_Quads) + { + return; + } + BatchType = BATCH_None; + VertexBuffer->Unlock(); + IndexBuffer->Unlock(); + if (QuadBatchPos == 0) + { + QuadBatchPos = -1; + VertexPos = -1; + IndexPos = -1; + return; + } + SetStreamSource(VertexBuffer); + SetIndices(IndexBuffer); + bool uv_wrapped = false; + bool uv_should_wrap; + int indexpos, vertpos; + + indexpos = vertpos = 0; + for (int i = 0; i < QuadBatchPos; ) + { + const BufferedTris *quad = &QuadExtra[i]; + int j; + + int startindex = indexpos; + int startvertex = vertpos; + + indexpos += quad->NumTris * 3; + vertpos += quad->NumVerts; + + // Quads with matching parameters should be done with a single + // DrawPrimitive call. + for (j = i + 1; j < QuadBatchPos; ++j) + { + const BufferedTris *q2 = &QuadExtra[j]; + if (quad->Texture != q2->Texture || + quad->Group1 != q2->Group1 || + quad->Palette != q2->Palette) + { + break; + } + if (quad->ShaderNum == BQS_InGameColormap && (quad->Flags & BQF_Desaturated) && quad->Desat != q2->Desat) + { + break; + } + indexpos += q2->NumTris * 3; + vertpos += q2->NumVerts; + } + + // Set the palette (if one) + if ((quad->Flags & BQF_Paletted) == BQF_GamePalette) + { + SetPaletteTexture(PaletteTexture, 256, BorderColor); + } + else if ((quad->Flags & BQF_Paletted) == BQF_CustomPalette) + { + assert(quad->Palette != nullptr); + SetPaletteTexture(quad->Palette->Tex, quad->Palette->RoundedPaletteSize, quad->Palette->BorderColor); + } + + // Set the alpha blending + SetAlphaBlend(quad->BlendOp, quad->SrcBlend, quad->DestBlend); + + // Set the alpha test + EnableAlphaTest(!(quad->Flags & BQF_DisableAlphaTest)); + + // Set the pixel shader + if (quad->ShaderNum == BQS_PalTex) + { + SetPixelShader(Shaders[(quad->Flags & BQF_InvertSource) ? + SHADER_NormalColorPalInv : SHADER_NormalColorPal]); + } + else if (quad->ShaderNum == BQS_Plain) + { + SetPixelShader(Shaders[(quad->Flags & BQF_InvertSource) ? + SHADER_NormalColorInv : SHADER_NormalColor]); + } + else if (quad->ShaderNum == BQS_RedToAlpha) + { + SetPixelShader(Shaders[(quad->Flags & BQF_InvertSource) ? + SHADER_RedToAlphaInv : SHADER_RedToAlpha]); + } + else if (quad->ShaderNum == BQS_ColorOnly) + { + SetPixelShader(Shaders[SHADER_VertexColor]); + } + else if (quad->ShaderNum == BQS_SpecialColormap) + { + int select; + + select = !!(quad->Flags & BQF_Paletted); + SetPixelShader(Shaders[SHADER_SpecialColormap + select]); + } + else if (quad->ShaderNum == BQS_InGameColormap) + { + int select; + + select = !!(quad->Flags & BQF_Desaturated); + select |= !!(quad->Flags & BQF_InvertSource) << 1; + select |= !!(quad->Flags & BQF_Paletted) << 2; + if (quad->Flags & BQF_Desaturated) + { + SetConstant(PSCONST_Desaturation, quad->Desat / 255.f, (255 - quad->Desat) / 255.f, 0, 0); + } + SetPixelShader(Shaders[SHADER_InGameColormap + select]); + } + + // Set the texture clamp addressing mode + uv_should_wrap = !!(quad->Flags & BQF_WrapUV); + if (uv_wrapped != uv_should_wrap) + { + uint32_t mode = uv_should_wrap ? GL_REPEAT : GL_CLAMP_TO_EDGE; + uv_wrapped = uv_should_wrap; + SetSamplerWrapS(0, mode); + SetSamplerWrapT(0, mode); + } + + // Set the texture + if (quad->Texture != nullptr) + { + SetTexture(0, quad->Texture); + } + + // Draw the quad + DrawTriangleList( + startvertex, // MinIndex + vertpos - startvertex, // NumVertices + startindex, // StartIndex + (indexpos - startindex) / 3 // PrimitiveCount + /*4 * i, 4 * (j - i), 6 * i, 2 * (j - i)*/); + i = j; + } + if (uv_wrapped) + { + SetSamplerWrapS(0, GL_CLAMP_TO_EDGE); + SetSamplerWrapT(0, GL_CLAMP_TO_EDGE); + } + QuadBatchPos = -1; + VertexPos = -1; + IndexPos = -1; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: EndBatch +// +// Draws whichever type of primitive is currently being batched. +// +//========================================================================== + +void OpenGLSWFrameBuffer::EndBatch() +{ + if (BatchType == BATCH_Quads) + { + EndQuadBatch(); + } + else if (BatchType == BATCH_Lines) + { + EndLineBatch(); + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: SetStyle +// +// Patterned after R_SetPatchStyle. +// +//========================================================================== + +bool OpenGLSWFrameBuffer::SetStyle(OpenGLTex *tex, DrawParms &parms, uint32_t &color0, uint32_t &color1, BufferedTris &quad) +{ + int fmt = tex->GetTexFormat(); + FRenderStyle style = parms.style; + float alpha; + bool stencilling; + + if (style.Flags & STYLEF_TransSoulsAlpha) + { + alpha = transsouls; + } + else if (style.Flags & STYLEF_Alpha1) + { + alpha = 1; + } + else + { + alpha = clamp(parms.Alpha, 0.f, 1.f); + } + + style.CheckFuzz(); + if (style.BlendOp == STYLEOP_Shadow) + { + style = LegacyRenderStyles[STYLE_TranslucentStencil]; + alpha = 0.3f; + parms.fillcolor = 0; + } + + // FIXME: Fuzz effect is not written + if (style.BlendOp == STYLEOP_FuzzOrAdd || style.BlendOp == STYLEOP_Fuzz) + { + style.BlendOp = STYLEOP_Add; + } + else if (style.BlendOp == STYLEOP_FuzzOrSub) + { + style.BlendOp = STYLEOP_Sub; + } + else if (style.BlendOp == STYLEOP_FuzzOrRevSub) + { + style.BlendOp = STYLEOP_RevSub; + } + + stencilling = false; + quad.Palette = nullptr; + quad.Flags = 0; + quad.Desat = 0; + + switch (style.BlendOp) + { + default: + case STYLEOP_Add: quad.BlendOp = GL_FUNC_ADD; break; + case STYLEOP_Sub: quad.BlendOp = GL_FUNC_SUBTRACT; break; + case STYLEOP_RevSub: quad.BlendOp = GL_FUNC_REVERSE_SUBTRACT; break; + case STYLEOP_None: return false; + } + quad.SrcBlend = GetStyleAlpha(style.SrcAlpha); + quad.DestBlend = GetStyleAlpha(style.DestAlpha); + + if (style.Flags & STYLEF_InvertOverlay) + { + // Only the overlay color is inverted, not the overlay alpha. + parms.colorOverlay = ColorARGB(APART(parms.colorOverlay), + 255 - RPART(parms.colorOverlay), 255 - GPART(parms.colorOverlay), + 255 - BPART(parms.colorOverlay)); + } + + SetColorOverlay(parms.colorOverlay, alpha, color0, color1); + + if (style.Flags & STYLEF_ColorIsFixed) + { + if (style.Flags & STYLEF_InvertSource) + { // Since the source color is a constant, we can invert it now + // without spending time doing it in the shader. + parms.fillcolor = ColorXRGB(255 - RPART(parms.fillcolor), + 255 - GPART(parms.fillcolor), 255 - BPART(parms.fillcolor)); + } + // Set up the color mod to replace the color from the image data. + color0 = (color0 & ColorRGBA(0, 0, 0, 255)) | (parms.fillcolor & ColorRGBA(255, 255, 255, 0)); + color1 &= ColorRGBA(0, 0, 0, 255); + + if (style.Flags & STYLEF_RedIsAlpha) + { + // Note that if the source texture is paletted, the palette is ignored. + quad.Flags = 0; + quad.ShaderNum = BQS_RedToAlpha; + } + else if (fmt == GL_R8) + { + quad.Flags = BQF_GamePalette; + quad.ShaderNum = BQS_PalTex; + } + else + { + quad.Flags = 0; + quad.ShaderNum = BQS_Plain; + } + } + else + { + if (style.Flags & STYLEF_RedIsAlpha) + { + quad.Flags = 0; + quad.ShaderNum = BQS_RedToAlpha; + } + else if (fmt == GL_R8) + { + if (parms.remap != nullptr) + { + quad.Flags = BQF_CustomPalette; + quad.Palette = reinterpret_cast(parms.remap->GetNative()); + quad.ShaderNum = BQS_PalTex; + } + else if (tex->IsGray) + { + quad.Flags = 0; + quad.ShaderNum = BQS_Plain; + } + else + { + quad.Flags = BQF_GamePalette; + quad.ShaderNum = BQS_PalTex; + } + } + else + { + quad.Flags = 0; + quad.ShaderNum = BQS_Plain; + } + if (style.Flags & STYLEF_InvertSource) + { + quad.Flags |= BQF_InvertSource; + } + + if (parms.specialcolormap != nullptr) + { // Emulate an invulnerability or similar colormap. + float *start, *end; + start = parms.specialcolormap->ColorizeStart; + end = parms.specialcolormap->ColorizeEnd; + if (quad.Flags & BQF_InvertSource) + { + quad.Flags &= ~BQF_InvertSource; + swapvalues(start, end); + } + quad.ShaderNum = BQS_SpecialColormap; + color0 = ColorRGBA(uint32_t(start[0] / 2 * 255), uint32_t(start[1] / 2 * 255), uint32_t(start[2] / 2 * 255), color0 >> 24); + color1 = ColorRGBA(uint32_t(end[0] / 2 * 255), uint32_t(end[1] / 2 * 255), uint32_t(end[2] / 2 * 255), color1 >> 24); + } + else if (parms.colormapstyle != nullptr) + { // Emulate the fading from an in-game colormap (colorized, faded, and desaturated) + if (parms.colormapstyle->Desaturate != 0) + { + quad.Flags |= BQF_Desaturated; + } + quad.ShaderNum = BQS_InGameColormap; + quad.Desat = parms.colormapstyle->Desaturate; + color0 = ColorARGB(color1 >> 24, + parms.colormapstyle->Color.r, + parms.colormapstyle->Color.g, + parms.colormapstyle->Color.b); + double fadelevel = parms.colormapstyle->FadeLevel; + color1 = ColorARGB(uint32_t((1 - fadelevel) * 255), + uint32_t(parms.colormapstyle->Fade.r * fadelevel), + uint32_t(parms.colormapstyle->Fade.g * fadelevel), + uint32_t(parms.colormapstyle->Fade.b * fadelevel)); + } + } + + // For unmasked images, force the alpha from the image data to be ignored. + if (!parms.masked && quad.ShaderNum != BQS_InGameColormap) + { + color0 = (color0 & ColorRGBA(255, 255, 255, 0)) | ColorValue(0, 0, 0, alpha); + color1 &= ColorRGBA(255, 255, 255, 0); + + // If our alpha is one and we are doing normal adding, then we can turn the blend off completely. + if (quad.BlendOp == GL_FUNC_ADD && + ((alpha == 1 && quad.SrcBlend == GL_SRC_ALPHA) || quad.SrcBlend == GL_ONE) && + ((alpha == 1 && quad.DestBlend == GL_ONE_MINUS_SRC_ALPHA) || quad.DestBlend == GL_ZERO)) + { + quad.BlendOp = 0; + } + quad.Flags |= BQF_DisableAlphaTest; + } + return true; +} + +int OpenGLSWFrameBuffer::GetStyleAlpha(int type) +{ + switch (type) + { + case STYLEALPHA_Zero: return GL_ZERO; + case STYLEALPHA_One: return GL_ONE; + case STYLEALPHA_Src: return GL_SRC_ALPHA; + case STYLEALPHA_InvSrc: return GL_ONE_MINUS_SRC_ALPHA; + default: return GL_ZERO; + } +} + + +void OpenGLSWFrameBuffer::SetColorOverlay(uint32_t color, float alpha, uint32_t &color0, uint32_t &color1) +{ + if (APART(color) != 0) + { + int a = APART(color) * 256 / 255; + color0 = ColorRGBA( + (RPART(color) * a) >> 8, + (GPART(color) * a) >> 8, + (BPART(color) * a) >> 8, + 0); + a = 256 - a; + color1 = ColorRGBA(a, a, a, int(alpha * 255)); + } + else + { + color0 = 0; + color1 = ColorValue(1, 1, 1, alpha); + } +} + +void OpenGLSWFrameBuffer::EnableAlphaTest(BOOL enabled) +{ + if (enabled != AlphaTestEnabled) + { + AlphaTestEnabled = enabled; + //glEnable(GL_ALPHA_TEST); // To do: move to shader as this is only in the compatibility profile + } +} + +void OpenGLSWFrameBuffer::SetAlphaBlend(int op, int srcblend, int destblend) +{ + if (op == 0) + { // Disable alpha blend + if (AlphaBlendEnabled) + { + AlphaBlendEnabled = FALSE; + glDisable(GL_BLEND); + } + } + else + { // Enable alpha blend + assert(srcblend != 0); + assert(destblend != 0); + + if (!AlphaBlendEnabled) + { + AlphaBlendEnabled = TRUE; + glEnable(GL_BLEND); + } + if (AlphaBlendOp != op) + { + AlphaBlendOp = op; + glBlendEquation(op); + } + if (AlphaSrcBlend != srcblend || AlphaDestBlend != destblend) + { + AlphaSrcBlend = srcblend; + AlphaDestBlend = destblend; + glBlendFunc(srcblend, destblend); + } + } +} + +void OpenGLSWFrameBuffer::SetConstant(int cnum, float r, float g, float b, float a) +{ + if (Constant[cnum][0] != r || + Constant[cnum][1] != g || + Constant[cnum][2] != b || + Constant[cnum][3] != a) + { + Constant[cnum][0] = r; + Constant[cnum][1] = g; + Constant[cnum][2] = b; + Constant[cnum][3] = a; + SetPixelShaderConstantF(cnum, Constant[cnum], 1); + } +} + +void OpenGLSWFrameBuffer::SetPixelShader(HWPixelShader *shader) +{ + if (CurPixelShader != shader) + { + CurPixelShader = shader; + SetHWPixelShader(shader); + } +} + +void OpenGLSWFrameBuffer::SetTexture(int tnum, HWTexture *texture) +{ + assert(unsigned(tnum) < countof(Texture)); + if (texture) + { + if (Texture[tnum] != texture || SamplerWrapS[tnum] != texture->WrapS || SamplerWrapT[tnum] != texture->WrapT) + { + Texture[tnum] = texture; + glActiveTexture(GL_TEXTURE0 + tnum); + glBindTexture(GL_TEXTURE_2D, texture->Handle); + if (Texture[tnum]->WrapS != SamplerWrapS[tnum]) + { + Texture[tnum]->WrapS = SamplerWrapS[tnum]; + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, SamplerWrapS[tnum]); + } + if (Texture[tnum]->WrapT != SamplerWrapT[tnum]) + { + Texture[tnum]->WrapT = SamplerWrapT[tnum]; + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, SamplerWrapT[tnum]); + } + } + } + else if (Texture[tnum] != texture) + { + Texture[tnum] = texture; + glActiveTexture(GL_TEXTURE0 + tnum); + glBindTexture(GL_TEXTURE_2D, 0); + } +} + +void OpenGLSWFrameBuffer::SetSamplerWrapS(int tnum, int mode) +{ + assert(unsigned(tnum) < countof(Texture)); + if (Texture[tnum] && SamplerWrapS[tnum] != mode) + { + SamplerWrapS[tnum] = mode; + Texture[tnum]->WrapS = mode; + glActiveTexture(GL_TEXTURE0 + tnum); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, SamplerWrapS[tnum]); + } +} + +void OpenGLSWFrameBuffer::SetSamplerWrapT(int tnum, int mode) +{ + assert(unsigned(tnum) < countof(Texture)); + if (Texture[tnum] && SamplerWrapT[tnum] != mode) + { + SamplerWrapT[tnum] = mode; + Texture[tnum]->WrapT = mode; + glActiveTexture(GL_TEXTURE0 + tnum); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, SamplerWrapT[tnum]); + } +} + +void OpenGLSWFrameBuffer::SetPaletteTexture(HWTexture *texture, int count, uint32_t border_color) +{ + // The pixel shader receives color indexes in the range [0.0,1.0]. + // The palette texture is also addressed in the range [0.0,1.0], + // HOWEVER the coordinate 1.0 is the right edge of the texture and + // not actually the texture itself. We need to scale and shift + // the palette indexes so they lie exactly in the center of each + // texel. For a normal palette with 256 entries, that means the + // range we use should be [0.5,255.5], adjusted so the coordinate + // is still within [0.0,1.0]. + // + // The constant register c2 is used to hold the multiplier in the + // x part and the adder in the y part. + float fcount = 1 / float(count); + SetConstant(PSCONST_PaletteMod, 255 * fcount, 0.5f * fcount, 0, 0); + SetTexture(1, texture); +} diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h new file mode 100644 index 0000000000..30e7715abc --- /dev/null +++ b/src/gl/system/gl_swframebuffer.h @@ -0,0 +1,470 @@ +#ifndef __GL_SWFRAMEBUFFER +#define __GL_SWFRAMEBUFFER + +#ifdef _WIN32 +#include "win32iface.h" +#include "win32gliface.h" +#endif + +#include "SkylineBinPack.h" + +#include + +class FGLDebug; + +#ifdef _WIN32 +class OpenGLSWFrameBuffer : public Win32GLFrameBuffer +{ + typedef Win32GLFrameBuffer Super; + DECLARE_CLASS(OpenGLSWFrameBuffer, Win32GLFrameBuffer) +#else +#include "sdlglvideo.h" +class OpenGLFrameBuffer : public SDLGLFB +{ +// typedef SDLGLFB Super; //[C]commented, DECLARE_CLASS defines this in linux + DECLARE_CLASS(OpenGLSWFrameBuffer, SDLGLFB) +#endif + + +public: + + explicit OpenGLSWFrameBuffer() {} + OpenGLSWFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen); + ~OpenGLSWFrameBuffer(); + + bool IsValid() override; + bool Lock(bool buffered) override; + void Unlock() override; + void Update() override; + PalEntry *GetPalette() override; + void GetFlashedPalette(PalEntry palette[256]) override; + void UpdatePalette() override; + bool SetGamma(float gamma) override; + bool SetFlash(PalEntry rgb, int amount) override; + void GetFlash(PalEntry &rgb, int &amount) override; + int GetPageCount() override; + bool IsFullscreen() override; + void PaletteChanged() override; + int QueryNewPalette() override; + void Blank() override; + bool PaintToWindow() override; + void SetVSync(bool vsync) override; + void NewRefreshRate() override; + void GetScreenshotBuffer(const uint8_t *&buffer, int &pitch, ESSType &color_type) override; + void ReleaseScreenshotBuffer() override; + void SetBlendingRect(int x1, int y1, int x2, int y2) override; + bool Begin2D(bool copy3d) override; + void DrawBlendingRect() override; + FNativeTexture *CreateTexture(FTexture *gametex, bool wrapping) override; + FNativePalette *CreatePalette(FRemapTable *remap) override; + void DrawTextureParms(FTexture *img, DrawParms &parms) override; + void Clear(int left, int top, int right, int bottom, int palcolor, uint32 color) override; + void Dim(PalEntry color, float amount, int x1, int y1, int w, int h) override; + void FlatFill(int left, int top, int right, int bottom, FTexture *src, bool local_origin) override; + void DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) override; + void DrawPixel(int x, int y, int palcolor, uint32 rgbcolor) override; + void FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, double originx, double originy, double scalex, double scaley, DAngle rotation, FDynamicColormap *colormap, int lightlevel) override; + //bool WipeStartScreen(int type) override; + //void WipeEndScreen() override; + //bool WipeDo(int ticks) override; + //void WipeCleanup() override; + bool Is8BitMode() override { return false; } + int GetTrueHeight() override { return TrueHeight; } + +private: + struct FBVERTEX + { + FLOAT x, y, z, rhw; + uint32_t color0, color1; + FLOAT tu, tv; + }; + //#define D3DFVF_FBVERTEX (D3DFVF_XYZRHW | D3DFVF_DIFFUSE | D3DFVF_SPECULAR | D3DFVF_TEX1) + + struct GammaRamp + { + uint16_t red[256], green[256], blue[256]; + }; + + struct LockedRect + { + int Pitch; + void *pBits; + }; + + struct LTRBRect + { + int left, top, right, bottom; + }; + + class HWSurface + { + public: + bool LockRect(LockedRect *outRect, LTRBRect *srcRect, bool discard) { outRect->Pitch = 0; outRect->pBits = nullptr; return false; } + void UnlockRect() { } + }; + + class HWTexture + { + public: + bool LockRect(LockedRect *outRect, LTRBRect *srcRect, bool discard) { outRect->Pitch = 0; outRect->pBits = nullptr; return false; } + void UnlockRect() { } + bool GetSurfaceLevel(int level, HWSurface **outSurface) { *outSurface = nullptr; return false; } + + int Handle = 0; + int WrapS = 0; + int WrapT = 0; + int Format = 0; + }; + + class HWVertexBuffer + { + public: + FBVERTEX *Lock() { return nullptr; } + void Unlock() { } + }; + + class HWIndexBuffer + { + public: + uint16_t *Lock() { return nullptr; } + void Unlock() { } + }; + + class HWPixelShader + { + public: + }; + + bool CreatePixelShader(const void *src, HWPixelShader **outShader) { *outShader = nullptr; return false; } + bool CreateVertexBuffer(int size, HWVertexBuffer **outVertexBuffer) { *outVertexBuffer = nullptr; return false; } + bool CreateIndexBuffer(int size, HWIndexBuffer **outIndexBuffer) { *outIndexBuffer = nullptr; return false; } + bool CreateOffscreenPlainSurface(int width, int height, int format, HWSurface **outSurface) { *outSurface = nullptr; return false; } + bool CreateTexture(int width, int height, int levels, int format, HWTexture **outTexture) { *outTexture = nullptr; return false; } + bool CreateRenderTarget(int width, int height, int format, HWSurface **outSurface) { *outSurface = nullptr; return false; } + bool GetBackBuffer(HWSurface **outSurface) { *outSurface = nullptr; return false; } + bool GetRenderTarget(int index, HWSurface **outSurface) { *outSurface = nullptr; return false; } + void GetRenderTargetData(HWSurface *a, HWSurface *b) { } + void ColorFill(HWSurface *surface, float red, float green, float blue) { } + void StretchRect(HWSurface *src, const LTRBRect *srcrect, HWSurface *dest) { } + bool SetRenderTarget(int index, HWSurface *surface) { return true; } + void SetGammaRamp(const GammaRamp *ramp) { } + void SetPixelShaderConstantF(int uniformIndex, const float *data, int vec4fcount) { } + void SetHWPixelShader(HWPixelShader *shader) { } + void SetStreamSource(HWVertexBuffer *vertexBuffer) { } + void SetIndices(HWIndexBuffer *indexBuffer) { } + void DrawTriangleFans(int count, const FBVERTEX *vertices) { } + void DrawPoints(int count, const FBVERTEX *vertices) { } + void DrawLineList(int count) { } + void DrawTriangleList(int minIndex, int numVertices, int startIndex, int primitiveCount) { } + void Present() { } + + static uint32_t ColorARGB(uint32_t a, uint32_t r, uint32_t g, uint32_t b) { return ((a & 0xff) << 24) | ((r & 0xff) << 16) | ((g & 0xff) << 8) | ((b) & 0xff); } + static uint32_t ColorRGBA(uint32_t a, uint32_t r, uint32_t g, uint32_t b) { return ColorARGB(a, r, g, b); } + static uint32_t ColorXRGB(uint32_t r, uint32_t g, uint32_t b) { return ColorARGB(0xff, r, g, b); } + static uint32_t ColorValue(float r, float g, float b, float a) { return ColorRGBA((uint32_t)(r * 255.0f), (uint32_t)(g * 255.0f), (uint32_t)(b * 255.0f), (uint32_t)(a * 255.0f)); } + + // The number of points for the vertex buffer. + enum { NUM_VERTS = 10240 }; + + // The number of indices for the index buffer. + enum { NUM_INDEXES = ((NUM_VERTS * 6) / 4) }; + + // The number of quads we can batch together. + enum { MAX_QUAD_BATCH = (NUM_INDEXES / 6) }; + + // The default size for a texture atlas. + enum { DEF_ATLAS_WIDTH = 512 }; + enum { DEF_ATLAS_HEIGHT = 512 }; + + // TYPES ------------------------------------------------------------------- + + struct Atlas; + + struct PackedTexture + { + Atlas *Owner; + + PackedTexture **Prev, *Next; + + // Pixels this image covers + LTRBRect Area; + + // Texture coordinates for this image + float Left, Top, Right, Bottom; + + // Texture has extra space on the border? + bool Padded; + }; + + struct Atlas + { + Atlas(OpenGLSWFrameBuffer *fb, int width, int height, int format); + ~Atlas(); + + PackedTexture *AllocateImage(const Rect &rect, bool padded); + void FreeBox(PackedTexture *box); + + SkylineBinPack Packer; + Atlas *Next; + HWTexture *Tex; + int Format; + PackedTexture *UsedList; // Boxes that contain images + int Width, Height; + bool OneUse; + }; + + class OpenGLTex : public FNativeTexture + { + public: + OpenGLTex(FTexture *tex, OpenGLSWFrameBuffer *fb, bool wrapping); + ~OpenGLTex(); + + FTexture *GameTex; + PackedTexture *Box; + + OpenGLTex **Prev; + OpenGLTex *Next; + + bool IsGray; + + bool Create(OpenGLSWFrameBuffer *fb, bool wrapping); + bool Update(); + bool CheckWrapping(bool wrapping); + int GetTexFormat(); + FTextureFormat ToTexFmt(int fmt); + }; + + class OpenGLPal : public FNativePalette + { + public: + OpenGLPal(FRemapTable *remap, OpenGLSWFrameBuffer *fb); + ~OpenGLPal(); + + OpenGLPal **Prev; + OpenGLPal *Next; + + HWTexture *Tex; + uint32_t BorderColor; + bool DoColorSkip; + + bool Update(); + + FRemapTable *Remap; + int RoundedPaletteSize; + }; + + // Flags for a buffered quad + enum + { + BQF_GamePalette = 1, + BQF_CustomPalette = 7, + BQF_Paletted = 7, + BQF_Bilinear = 8, + BQF_WrapUV = 16, + BQF_InvertSource = 32, + BQF_DisableAlphaTest = 64, + BQF_Desaturated = 128, + }; + + // Shaders for a buffered quad + enum + { + BQS_PalTex, + BQS_Plain, + BQS_RedToAlpha, + BQS_ColorOnly, + BQS_SpecialColormap, + BQS_InGameColormap, + }; + + struct PackedTexture; + struct Atlas; + + struct BufferedTris + { + union + { + struct + { + uint8_t Flags; + uint8_t ShaderNum : 4; + int BlendOp; + int SrcBlend, DestBlend; + }; + uint32_t Group1; + }; + uint8_t Desat; + OpenGLPal *Palette; + HWTexture *Texture; + uint16_t NumVerts; // Number of _unique_ vertices used by this set. + uint16_t NumTris; // Number of triangles used by this set. + }; + + enum + { + PSCONST_Desaturation = 1, + PSCONST_PaletteMod = 2, + PSCONST_Weights = 6, + PSCONST_Gamma = 7, + }; + enum + { + SHADER_NormalColor, + SHADER_NormalColorPal, + SHADER_NormalColorInv, + SHADER_NormalColorPalInv, + + SHADER_RedToAlpha, + SHADER_RedToAlphaInv, + + SHADER_VertexColor, + + SHADER_SpecialColormap, + SHADER_SpecialColormapPal, + + SHADER_InGameColormap, + SHADER_InGameColormapDesat, + SHADER_InGameColormapInv, + SHADER_InGameColormapInvDesat, + SHADER_InGameColormapPal, + SHADER_InGameColormapPalDesat, + SHADER_InGameColormapPalInv, + SHADER_InGameColormapPalInvDesat, + + SHADER_BurnWipe, + SHADER_GammaCorrection, + + NUM_SHADERS + }; + static const char *const ShaderNames[NUM_SHADERS]; + + void Flip(); + void SetInitialState(); + bool CreateResources(); + void ReleaseResources(); + bool LoadShaders(); + void CreateBlockSurfaces(); + bool CreateFBTexture(); + bool CreatePaletteTexture(); + bool CreateGammaTexture(); + bool CreateVertexes(); + void UploadPalette(); + void UpdateGammaTexture(float igamma); + void CalcFullscreenCoords(FBVERTEX verts[4], bool viewarea_only, bool can_double, uint32_t color0, uint32_t color1) const; + bool Reset(); + HWTexture *GetCurrentScreen(); + void ReleaseDefaultPoolItems(); + void KillNativePals(); + void KillNativeTexs(); + PackedTexture *AllocPackedTexture(int width, int height, bool wrapping, int format); + void DrawPackedTextures(int packnum); + void DrawLetterbox(); + void Draw3DPart(bool copy3d); + bool SetStyle(OpenGLTex *tex, DrawParms &parms, uint32_t &color0, uint32_t &color1, BufferedTris &quad); + static int GetStyleAlpha(int type); + static void SetColorOverlay(uint32_t color, float alpha, uint32_t &color0, uint32_t &color1); + void DoWindowedGamma(); + void AddColorOnlyQuad(int left, int top, int width, int height, uint32_t color); + void AddColorOnlyRect(int left, int top, int width, int height, uint32_t color); + void CheckQuadBatch(int numtris = 2, int numverts = 4); + void BeginQuadBatch(); + void EndQuadBatch(); + void BeginLineBatch(); + void EndLineBatch(); + void EndBatch(); + void CopyNextFrontBuffer(); + + // State + void EnableAlphaTest(BOOL enabled); + void SetAlphaBlend(int op, int srcblend = 0, int destblend = 0); + void SetConstant(int cnum, float r, float g, float b, float a); + void SetPixelShader(HWPixelShader *shader); + void SetTexture(int tnum, HWTexture *texture); + void SetSamplerWrapS(int tnum, int mode); + void SetSamplerWrapT(int tnum, int mode); + void SetPaletteTexture(HWTexture *texture, int count, uint32_t border_color); + + template static void SafeRelease(T &x) { if (x != nullptr) { delete x; x = nullptr; } } + + BOOL AlphaTestEnabled; + BOOL AlphaBlendEnabled; + int AlphaBlendOp; + int AlphaSrcBlend; + int AlphaDestBlend; + float Constant[3][4]; + uint32_t CurBorderColor; + HWPixelShader *CurPixelShader; + HWTexture *Texture[5]; + int SamplerWrapS[5], SamplerWrapT[5]; + + PalEntry SourcePalette[256]; + uint32_t BorderColor; + uint32_t FlashColor0, FlashColor1; + PalEntry FlashColor; + int FlashAmount; + int TrueHeight; + int PixelDoubling; + int SkipAt; + int LBOffsetI; + int RenderTextureToggle; + int CurrRenderTexture; + float LBOffset; + float Gamma; + bool UpdatePending; + bool NeedPalUpdate; + bool NeedGammaUpdate; + int FBWidth, FBHeight; + bool VSync; + LTRBRect BlendingRect; + int In2D; + bool InScene; + bool GatheringWipeScreen; + bool AALines; + uint8_t BlockNum; + OpenGLPal *Palettes; + OpenGLTex *Textures; + Atlas *Atlases; + + HWTexture *FBTexture; + HWTexture *TempRenderTexture, *RenderTexture[2]; + HWTexture *PaletteTexture; + HWTexture *GammaTexture; + HWTexture *ScreenshotTexture; + HWSurface *ScreenshotSurface; + HWSurface *FrontCopySurface; + + HWVertexBuffer *VertexBuffer; + FBVERTEX *VertexData; + HWIndexBuffer *IndexBuffer; + uint16_t *IndexData; + BufferedTris *QuadExtra; + int VertexPos; + int IndexPos; + int QuadBatchPos; + enum { BATCH_None, BATCH_Quads, BATCH_Lines } BatchType; + + HWPixelShader *Shaders[NUM_SHADERS]; + HWPixelShader *GammaShader; + + HWSurface *BlockSurface[2]; + HWSurface *OldRenderTarget; + HWTexture *InitialWipeScreen, *FinalWipeScreen; + + class Wiper + { + public: + virtual ~Wiper(); + virtual bool Run(int ticks, OpenGLSWFrameBuffer *fb) = 0; + + //void DrawScreen(OpenGLSWFrameBuffer *fb, HWTexture *tex, int blendop = 0, uint32_t color0 = 0, uint32_t color1 = 0xFFFFFFF); + }; + + class Wiper_Melt; friend class Wiper_Melt; + class Wiper_Burn; friend class Wiper_Burn; + class Wiper_Crossfade; friend class Wiper_Crossfade; + + Wiper *ScreenWipe; +}; + + +#endif //__GL_SWFRAMEBUFFER diff --git a/src/textures/textures.h b/src/textures/textures.h index 477c39ecc6..af72dabc0d 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -292,6 +292,7 @@ public: static void FlipNonSquareBlockRemap (BYTE *blockto, const BYTE *blockfrom, int x, int y, int srcpitch, const BYTE *remap); friend class D3DTex; + friend class OpenGLSWFrameBuffer; public: From 1fab0cc5142e83958e70334ff6efc3395554c789 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Mon, 26 Sep 2016 07:24:37 -0400 Subject: [PATCH 174/912] - Added: PlayerPawn property "Player.ViewBob" which acts as a MoveBob/StillBob multiplier. --- src/d_player.h | 3 +++ src/p_pspr.cpp | 5 +++-- src/p_user.cpp | 8 +++++--- src/thingdef/thingdef_properties.cpp | 20 ++++++++++++++++++++ wadsrc/static/actors/shared/player.txt | 1 + 5 files changed, 32 insertions(+), 5 deletions(-) diff --git a/src/d_player.h b/src/d_player.h index e944d67348..7f57837c70 100644 --- a/src/d_player.h +++ b/src/d_player.h @@ -168,6 +168,9 @@ public: // [CW] Fades for when you are being damaged. PalEntry DamageFade; + // [SP] ViewBob Multiplier + double ViewBob; + bool UpdateWaterLevel (bool splash); bool ResetAirSupply (bool playgasp = true); diff --git a/src/p_pspr.cpp b/src/p_pspr.cpp index fab7e38d8d..e97d2c0208 100644 --- a/src/p_pspr.cpp +++ b/src/p_pspr.cpp @@ -582,8 +582,9 @@ void P_BobWeapon (player_t *player, float *x, float *y, double ticfrac) if (curbob != 0) { - float bobx = float(player->bob * Rangex); - float boby = float(player->bob * Rangey); + //[SP] Added in decorate player.viewbob checks + float bobx = float(player->bob * Rangex * (float)player->mo->ViewBob); + float boby = float(player->bob * Rangey * (float)player->mo->ViewBob); switch (bobstyle) { case AWeapon::BobNormal: diff --git a/src/p_user.cpp b/src/p_user.cpp index 0f3b6a516d..845cf9a5f4 100644 --- a/src/p_user.cpp +++ b/src/p_user.cpp @@ -651,7 +651,8 @@ void APlayerPawn::Serialize(FSerializer &arc) ("fallingscreammaxn", FallingScreamMaxSpeed, def->FallingScreamMaxSpeed) ("userange", UseRange, def->UseRange) ("aircapacity", AirCapacity, def->AirCapacity) - ("viewheight", ViewHeight, def->ViewHeight); + ("viewheight", ViewHeight, def->ViewHeight) + ("viewbob", ViewBob, def->ViewBob); } //=========================================================================== @@ -1844,11 +1845,12 @@ void P_CalcHeight (player_t *player) return; } + //[SP] Added (x*player->mo->ViewBob) to allow DECORATE changes to view bobbing speed. if (still) { if (player->health > 0) { - angle = level.time / (120 * TICRATE / 35.) * 360.; + angle = level.time / (120 * TICRATE / 35.) * 360. * player->mo->ViewBob; bob = player->userinfo.GetStillBob() * angle.Sin(); } else @@ -1858,7 +1860,7 @@ void P_CalcHeight (player_t *player) } else { - angle = level.time / (20 * TICRATE / 35.) * 360.; + angle = level.time / (20 * TICRATE / 35.) * 360. * player->mo->ViewBob; bob = player->bob * angle.Sin() * (player->mo->waterlevel > 1 ? 0.25f : 0.5f); } diff --git a/src/thingdef/thingdef_properties.cpp b/src/thingdef/thingdef_properties.cpp index d60cda823b..5b014bf28b 100644 --- a/src/thingdef/thingdef_properties.cpp +++ b/src/thingdef/thingdef_properties.cpp @@ -2945,6 +2945,26 @@ DEFINE_CLASS_PROPERTY_PREFIX(player, weaponslot, ISsssssssssssssssssssssssssssss } } +//========================================================================== +// +// [SP] Player.Viewbob +// +//========================================================================== +DEFINE_CLASS_PROPERTY_PREFIX(player, viewbob, F, PlayerPawn) +{ + PROP_DOUBLE_PARM(z, 0); + // [SP] Hard limits. This is to prevent terrywads from making players sick. + // Remember - this messes with a user option who probably has it set a + // certain way for a reason. I think a 1.5 limit is pretty generous, but + // it may be safe to increase it. I really need opinions from people who + // could be affected by this. + if (z < 0.0 || z > 1.5) + { + I_Error("ViewBob must be between 0.0 and 1.5."); + } + defaults->ViewBob = z; +} + //========================================================================== // //========================================================================== diff --git a/wadsrc/static/actors/shared/player.txt b/wadsrc/static/actors/shared/player.txt index ac5ceb0a7a..08d9a252ac 100644 --- a/wadsrc/static/actors/shared/player.txt +++ b/wadsrc/static/actors/shared/player.txt @@ -33,6 +33,7 @@ Actor PlayerPawn : Actor native Player.MugShotMaxHealth 0 Player.FlechetteType "ArtiPoisonBag3" Player.AirCapacity 1 + Player.ViewBob 1 Obituary "$OB_MPDEFAULT" } From 40df46f94e250000f471870229931a00a027b32f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 10 Oct 2016 07:39:02 +0200 Subject: [PATCH 175/912] Implement OpenGL versions of GPU objects --- src/gl/system/gl_swframebuffer.cpp | 313 +++++++++++++++++++++++++---- src/gl/system/gl_swframebuffer.h | 75 ++++--- 2 files changed, 327 insertions(+), 61 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index e0ff2be477..e03f1fd9c5 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -92,30 +92,30 @@ IMPLEMENT_CLASS(OpenGLSWFrameBuffer) const char *const OpenGLSWFrameBuffer::ShaderNames[OpenGLSWFrameBuffer::NUM_SHADERS] = { - "NormalColor.fp", - "NormalColorPal.fp", - "NormalColorInv.fp", - "NormalColorPalInv.fp", + "NormalColor", + "NormalColorPal", + "NormalColorInv", + "NormalColorPalInv", - "RedToAlpha.fp", - "RedToAlphaInv.fp", + "RedToAlpha", + "RedToAlphaInv", - "VertexColor.fp", + "VertexColor", - "SpecialColormap.fp", - "SpecialColorMapPal.fp", + "SpecialColormap", + "SpecialColorMapPal", - "InGameColormap.fp", - "InGameColormapDesat.fp", - "InGameColormapInv.fp", - "InGameColormapInvDesat.fp", - "InGameColormapPal.fp", - "InGameColormapPalDesat.fp", - "InGameColormapPalInv.fp", - "InGameColormapPalInvDesat.fp", + "InGameColormap", + "InGameColormapDesat", + "InGameColormapInv", + "InGameColormapInvDesat", + "InGameColormapPal", + "InGameColormapPalDesat", + "InGameColormapPalInv", + "InGameColormapPalInvDesat", - "BurnWipe.fp", - "GammaCorrection.fp", + "BurnWipe", + "GammaCorrection", }; OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen) : @@ -209,6 +209,256 @@ OpenGLSWFrameBuffer::~OpenGLSWFrameBuffer() delete[] QuadExtra; } +bool OpenGLSWFrameBuffer::CreatePixelShader(const void *vertexsrc, const void *fragmentsrc, HWPixelShader **outShader) +{ + auto shader = std::make_unique(); + + shader->Program = glCreateProgram(); + shader->VertexShader = glCreateShader(GL_VERTEX_SHADER); + shader->FragmentShader = glCreateShader(GL_FRAGMENT_SHADER); + + { + int lengths[1] = { (int)strlen((const char*)vertexsrc) }; + const char *sources[1] = { (const char*)vertexsrc }; + glShaderSource(shader->VertexShader, 1, sources, lengths); + glCompileShader(shader->VertexShader); + } + + { + int lengths[1] = { (int)strlen((const char*)fragmentsrc) }; + const char *sources[1] = { (const char*)fragmentsrc }; + glShaderSource(shader->FragmentShader, 1, sources, lengths); + glCompileShader(shader->FragmentShader); + } + + GLint status = 0; + glGetShaderiv(shader->VertexShader, GL_COMPILE_STATUS, &status); + if (status != GL_FALSE) glGetShaderiv(shader->FragmentShader, GL_COMPILE_STATUS, &status); + if (status == GL_FALSE) + { + *outShader = nullptr; + return false; + } + + glAttachShader(shader->Program, shader->VertexShader); + glAttachShader(shader->Program, shader->FragmentShader); + glBindFragDataLocation(shader->Program, 0, "FragColor"); + glLinkProgram(shader->Program); + glGetProgramiv(shader->Program, GL_LINK_STATUS, &status); + if (status == GL_FALSE) + { + *outShader = nullptr; + return false; + } + glBindAttribLocation(shader->Program, 0, "Position"); + glBindAttribLocation(shader->Program, 1, "Color0"); + glBindAttribLocation(shader->Program, 2, "Color1"); + glBindAttribLocation(shader->Program, 3, "TexCoord"); + + *outShader = shader.release(); + return true; +} + +bool OpenGLSWFrameBuffer::CreateVertexBuffer(int size, HWVertexBuffer **outVertexBuffer) +{ + auto obj = std::make_unique(); + + GLint oldBinding = 0; + glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &oldBinding); + + glGenVertexArrays(1, (GLuint*)&obj->VertexArray); + glGenBuffers(1, (GLuint*)&obj->Buffer); + glBindVertexArray(obj->VertexArray); + glBindBuffer(GL_ARRAY_BUFFER, obj->Buffer); + glBufferData(GL_ARRAY_BUFFER, size, nullptr, GL_STREAM_DRAW); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, sizeof(FBVERTEX), (const GLvoid*)offsetof(FBVERTEX, x)); + glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(FBVERTEX), (const GLvoid*)offsetof(FBVERTEX, color0)); + glVertexAttribPointer(2, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(FBVERTEX), (const GLvoid*)offsetof(FBVERTEX, color1)); + glVertexAttribPointer(3, 2, GL_FLOAT, GL_FALSE, sizeof(FBVERTEX), (const GLvoid*)offsetof(FBVERTEX, tu)); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindVertexArray(oldBinding); + + *outVertexBuffer = obj.release(); + return true; +} + +bool OpenGLSWFrameBuffer::CreateIndexBuffer(int size, HWIndexBuffer **outIndexBuffer) +{ + auto obj = std::make_unique(); + + GLint oldBinding = 0; + glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING, &oldBinding); + + glGenBuffers(1, (GLuint*)&obj->Buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, obj->Buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, size, nullptr, GL_STREAM_DRAW); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, oldBinding); + + *outIndexBuffer = obj.release(); + return true; +} + +bool OpenGLSWFrameBuffer::CreateTexture(int width, int height, int levels, int format, HWTexture **outTexture) +{ + auto obj = std::make_unique(); + + GLint oldBinding = 0; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); + + glGenTextures(1, (GLuint*)&obj->Texture); + glBindTexture(GL_TEXTURE_2D, obj->Texture); + GLenum srcformat; + switch (format) + { + case GL_R8: srcformat = GL_RED; break; + case GL_RGBA8: srcformat = GL_RGBA; break; + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: srcformat = GL_RGB; break; + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: srcformat = GL_RGBA; break; + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: srcformat = GL_RGBA; break; + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: srcformat = GL_RGBA; break; + default: + I_FatalError("Unknown format passed to CreateTexture"); + return false; + } + glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, srcformat, GL_UNSIGNED_BYTE, nullptr); + + glBindTexture(GL_TEXTURE_2D, oldBinding); + + *outTexture = obj.release(); + return true; +} + +bool OpenGLSWFrameBuffer::CreateOffscreenPlainSurface(int width, int height, int format, HWSurface **outSurface) { *outSurface = nullptr; return false; } +bool OpenGLSWFrameBuffer::CreateRenderTarget(int width, int height, int format, HWSurface **outSurface) { *outSurface = nullptr; return false; } +bool OpenGLSWFrameBuffer::GetBackBuffer(HWSurface **outSurface) { *outSurface = nullptr; return false; } +bool OpenGLSWFrameBuffer::GetRenderTarget(int index, HWSurface **outSurface) { *outSurface = nullptr; return false; } +void OpenGLSWFrameBuffer::GetRenderTargetData(HWSurface *a, HWSurface *b) { } + +void OpenGLSWFrameBuffer::ColorFill(HWSurface *surface, float red, float green, float blue) { } +void OpenGLSWFrameBuffer::StretchRect(HWSurface *src, const LTRBRect *srcrect, HWSurface *dest) { } +bool OpenGLSWFrameBuffer::SetRenderTarget(int index, HWSurface *surface) { return true; } +void OpenGLSWFrameBuffer::SetGammaRamp(const GammaRamp *ramp) { } + +void OpenGLSWFrameBuffer::SetPixelShaderConstantF(int uniformIndex, const float *data, int vec4fcount) +{ + glUniform4fv(uniformIndex, vec4fcount, data); +} + +void OpenGLSWFrameBuffer::SetHWPixelShader(HWPixelShader *shader) +{ + if (shader) + glUseProgram(shader->Program); + else + glUseProgram(0); +} + +void OpenGLSWFrameBuffer::SetStreamSource(HWVertexBuffer *vertexBuffer) +{ + if (vertexBuffer) + glBindVertexArray(vertexBuffer->VertexArray); + else + glBindVertexArray(0); +} + +void OpenGLSWFrameBuffer::SetIndices(HWIndexBuffer *indexBuffer) +{ + if (indexBuffer) + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indexBuffer->Buffer); + else + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); +} + +void OpenGLSWFrameBuffer::DrawTriangleFans(int count, const FBVERTEX *vertices) +{ + GLint oldBinding = 0; + glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &oldBinding); + + if (!StreamVertexBuffer) + { + StreamVertexBuffer = std::make_unique(); + glGenVertexArrays(1, (GLuint*)&StreamVertexBuffer->VertexArray); + glGenBuffers(1, (GLuint*)&StreamVertexBuffer->Buffer); + glBindVertexArray(StreamVertexBuffer->VertexArray); + glBindBuffer(GL_ARRAY_BUFFER, StreamVertexBuffer->Buffer); + glBufferData(GL_ARRAY_BUFFER, count * sizeof(FBVERTEX), vertices, GL_STREAM_DRAW); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, sizeof(FBVERTEX), (const GLvoid*)offsetof(FBVERTEX, x)); + glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(FBVERTEX), (const GLvoid*)offsetof(FBVERTEX, color0)); + glVertexAttribPointer(2, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(FBVERTEX), (const GLvoid*)offsetof(FBVERTEX, color1)); + glVertexAttribPointer(3, 2, GL_FLOAT, GL_FALSE, sizeof(FBVERTEX), (const GLvoid*)offsetof(FBVERTEX, tu)); + } + else + { + glBindVertexArray(StreamVertexBuffer->VertexArray); + glBindBuffer(GL_ARRAY_BUFFER, StreamVertexBuffer->Buffer); + glBufferData(GL_ARRAY_BUFFER, count * sizeof(FBVERTEX), vertices, GL_STREAM_DRAW); + } + + glDrawArrays(GL_TRIANGLE_FAN, 0, count); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindVertexArray(oldBinding); +} + +void OpenGLSWFrameBuffer::DrawPoints(int count, const FBVERTEX *vertices) +{ + GLint oldBinding = 0; + glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &oldBinding); + + if (!StreamVertexBuffer) + { + StreamVertexBuffer = std::make_unique(); + glGenVertexArrays(1, (GLuint*)&StreamVertexBuffer->VertexArray); + glGenBuffers(1, (GLuint*)&StreamVertexBuffer->Buffer); + glBindVertexArray(StreamVertexBuffer->VertexArray); + glBindBuffer(GL_ARRAY_BUFFER, StreamVertexBuffer->Buffer); + glBufferData(GL_ARRAY_BUFFER, count * sizeof(FBVERTEX), vertices, GL_STREAM_DRAW); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, sizeof(FBVERTEX), (const GLvoid*)offsetof(FBVERTEX, x)); + glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(FBVERTEX), (const GLvoid*)offsetof(FBVERTEX, color0)); + glVertexAttribPointer(2, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(FBVERTEX), (const GLvoid*)offsetof(FBVERTEX, color1)); + glVertexAttribPointer(3, 2, GL_FLOAT, GL_FALSE, sizeof(FBVERTEX), (const GLvoid*)offsetof(FBVERTEX, tu)); + } + else + { + glBindVertexArray(StreamVertexBuffer->VertexArray); + glBindBuffer(GL_ARRAY_BUFFER, StreamVertexBuffer->Buffer); + glBufferData(GL_ARRAY_BUFFER, count * sizeof(FBVERTEX), vertices, GL_STREAM_DRAW); + } + + glDrawArrays(GL_POINTS, 0, count); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindVertexArray(oldBinding); +} + +void OpenGLSWFrameBuffer::DrawLineList(int count) +{ + glDrawArrays(GL_LINES, 0, count); +} + +void OpenGLSWFrameBuffer::DrawTriangleList(int minIndex, int numVertices, int startIndex, int primitiveCount) +{ + glDrawRangeElements(GL_TRIANGLES, minIndex, minIndex + numVertices - 1, primitiveCount * 3, GL_UNSIGNED_SHORT, (const void*)(startIndex * sizeof(uint16_t))); +} + +void OpenGLSWFrameBuffer::Present() +{ + SwapBuffers(); +} + //========================================================================== // // OpenGLSWFrameBuffer :: SetInitialState @@ -274,7 +524,6 @@ bool OpenGLSWFrameBuffer::CreateResources() { return false; } - CreateGammaTexture(); CreateBlockSurfaces(); return true; } @@ -293,7 +542,7 @@ bool OpenGLSWFrameBuffer::LoadShaders() static const char models[][4] = { "30/", "20/", "14/" }; FString shaderdir, shaderpath; unsigned model, i; - int lump; + int lump, lumpvert; // We determine the best available model simply by trying them all in // order of decreasing preference. @@ -304,12 +553,13 @@ bool OpenGLSWFrameBuffer::LoadShaders() for (i = 0; i < NUM_SHADERS; ++i) { shaderpath = shaderdir; - shaderpath += ShaderNames[i]; - lump = Wads.CheckNumForFullName(shaderpath); - if (lump >= 0) + lump = Wads.CheckNumForFullName(shaderpath + ShaderNames[i] + ".fp"); + lumpvert = Wads.CheckNumForFullName(shaderpath + ShaderNames[i] + ".vp"); + if (lump >= 0 && lumpvert >= 0) { FMemLump data = Wads.ReadLump(lump); - if (!CreatePixelShader((uint32_t *)data.GetMem(), &Shaders[i]) && i < SHADER_BurnWipe) + FMemLump datavert = Wads.ReadLump(lumpvert); + if (!CreatePixelShader(datavert.GetMem(), data.GetMem(), &Shaders[i]) && i < SHADER_BurnWipe) { break; } @@ -533,17 +783,6 @@ bool OpenGLSWFrameBuffer::CreatePaletteTexture() return true; } -//========================================================================== -// -// OpenGLSWFrameBuffer :: CreateGammaTexture -// -//========================================================================== - -bool OpenGLSWFrameBuffer::CreateGammaTexture() -{ - return false; -} - //========================================================================== // // OpenGLSWFrameBuffer :: CreateVertexes @@ -3293,7 +3532,7 @@ void OpenGLSWFrameBuffer::SetTexture(int tnum, HWTexture *texture) { Texture[tnum] = texture; glActiveTexture(GL_TEXTURE0 + tnum); - glBindTexture(GL_TEXTURE_2D, texture->Handle); + glBindTexture(GL_TEXTURE_2D, texture->Texture); if (Texture[tnum]->WrapS != SamplerWrapS[tnum]) { Texture[tnum]->WrapS = SamplerWrapS[tnum]; diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index 30e7715abc..14ed2ec131 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -110,7 +110,7 @@ private: void UnlockRect() { } bool GetSurfaceLevel(int level, HWSurface **outSurface) { *outSurface = nullptr; return false; } - int Handle = 0; + int Texture = 0; int WrapS = 0; int WrapT = 0; int Format = 0; @@ -119,44 +119,70 @@ private: class HWVertexBuffer { public: + ~HWVertexBuffer() + { + if (Buffer != 0) glDeleteVertexArrays(1, (GLuint*)&VertexArray); + if (Buffer != 0) glDeleteBuffers(1, (GLuint*)&Buffer); + } + FBVERTEX *Lock() { return nullptr; } void Unlock() { } + + int Buffer = 0; + int VertexArray = 0; }; class HWIndexBuffer { public: + ~HWIndexBuffer() + { + if (Buffer != 0) glDeleteBuffers(1, (GLuint*)&Buffer); + } + uint16_t *Lock() { return nullptr; } void Unlock() { } + + int Buffer = 0; }; class HWPixelShader { public: + ~HWPixelShader() + { + if (Program != 0) glDeleteProgram(Program); + if (VertexShader != 0) glDeleteShader(VertexShader); + if (FragmentShader != 0) glDeleteShader(FragmentShader); + } + + int Program = 0; + int VertexShader = 0; + int FragmentShader = 0; }; - bool CreatePixelShader(const void *src, HWPixelShader **outShader) { *outShader = nullptr; return false; } - bool CreateVertexBuffer(int size, HWVertexBuffer **outVertexBuffer) { *outVertexBuffer = nullptr; return false; } - bool CreateIndexBuffer(int size, HWIndexBuffer **outIndexBuffer) { *outIndexBuffer = nullptr; return false; } - bool CreateOffscreenPlainSurface(int width, int height, int format, HWSurface **outSurface) { *outSurface = nullptr; return false; } - bool CreateTexture(int width, int height, int levels, int format, HWTexture **outTexture) { *outTexture = nullptr; return false; } - bool CreateRenderTarget(int width, int height, int format, HWSurface **outSurface) { *outSurface = nullptr; return false; } - bool GetBackBuffer(HWSurface **outSurface) { *outSurface = nullptr; return false; } - bool GetRenderTarget(int index, HWSurface **outSurface) { *outSurface = nullptr; return false; } - void GetRenderTargetData(HWSurface *a, HWSurface *b) { } - void ColorFill(HWSurface *surface, float red, float green, float blue) { } - void StretchRect(HWSurface *src, const LTRBRect *srcrect, HWSurface *dest) { } - bool SetRenderTarget(int index, HWSurface *surface) { return true; } - void SetGammaRamp(const GammaRamp *ramp) { } - void SetPixelShaderConstantF(int uniformIndex, const float *data, int vec4fcount) { } - void SetHWPixelShader(HWPixelShader *shader) { } - void SetStreamSource(HWVertexBuffer *vertexBuffer) { } - void SetIndices(HWIndexBuffer *indexBuffer) { } - void DrawTriangleFans(int count, const FBVERTEX *vertices) { } - void DrawPoints(int count, const FBVERTEX *vertices) { } - void DrawLineList(int count) { } - void DrawTriangleList(int minIndex, int numVertices, int startIndex, int primitiveCount) { } - void Present() { } + bool CreatePixelShader(const void *vertexsrc, const void *fragmentsrc, HWPixelShader **outShader); + bool CreateVertexBuffer(int size, HWVertexBuffer **outVertexBuffer); + bool CreateIndexBuffer(int size, HWIndexBuffer **outIndexBuffer); + bool CreateOffscreenPlainSurface(int width, int height, int format, HWSurface **outSurface); + bool CreateTexture(int width, int height, int levels, int format, HWTexture **outTexture); + bool CreateRenderTarget(int width, int height, int format, HWSurface **outSurface); + bool GetBackBuffer(HWSurface **outSurface); + bool GetRenderTarget(int index, HWSurface **outSurface); + void GetRenderTargetData(HWSurface *a, HWSurface *b); + void ColorFill(HWSurface *surface, float red, float green, float blue); + void StretchRect(HWSurface *src, const LTRBRect *srcrect, HWSurface *dest); + bool SetRenderTarget(int index, HWSurface *surface); + void SetGammaRamp(const GammaRamp *ramp); + void SetPixelShaderConstantF(int uniformIndex, const float *data, int vec4fcount); + void SetHWPixelShader(HWPixelShader *shader); + void SetStreamSource(HWVertexBuffer *vertexBuffer); + void SetIndices(HWIndexBuffer *indexBuffer); + void DrawTriangleFans(int count, const FBVERTEX *vertices); + void DrawPoints(int count, const FBVERTEX *vertices); + void DrawLineList(int count); + void DrawTriangleList(int minIndex, int numVertices, int startIndex, int primitiveCount); + void Present(); static uint32_t ColorARGB(uint32_t a, uint32_t r, uint32_t g, uint32_t b) { return ((a & 0xff) << 24) | ((r & 0xff) << 16) | ((g & 0xff) << 8) | ((b) & 0xff); } static uint32_t ColorRGBA(uint32_t a, uint32_t r, uint32_t g, uint32_t b) { return ColorARGB(a, r, g, b); } @@ -346,7 +372,6 @@ private: void CreateBlockSurfaces(); bool CreateFBTexture(); bool CreatePaletteTexture(); - bool CreateGammaTexture(); bool CreateVertexes(); void UploadPalette(); void UpdateGammaTexture(float igamma); @@ -386,6 +411,8 @@ private: template static void SafeRelease(T &x) { if (x != nullptr) { delete x; x = nullptr; } } + std::unique_ptr StreamVertexBuffer; + BOOL AlphaTestEnabled; BOOL AlphaBlendEnabled; int AlphaBlendOp; From f4308b3184ea1f5ad15f3883c60be7cf2f6515bc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 10 Oct 2016 21:03:55 +0200 Subject: [PATCH 176/912] Add glsl shader version of the d3d9 shaders and get enough of it working for it to boot without errors --- src/gl/system/gl_swframebuffer.cpp | 616 ++++++++++--------------- src/gl/system/gl_swframebuffer.h | 99 ++-- src/win32/hardware.cpp | 5 +- src/win32/win32gliface.cpp | 6 +- wadsrc/static/shaders/glsl/swshader.fp | 146 ++++++ wadsrc/static/shaders/glsl/swshader.vp | 17 + 6 files changed, 435 insertions(+), 454 deletions(-) create mode 100644 wadsrc/static/shaders/glsl/swshader.fp create mode 100644 wadsrc/static/shaders/glsl/swshader.vp diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index e03f1fd9c5..895508d8b1 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -71,7 +71,6 @@ #include "gl/utility/gl_templates.h" #include "gl/gl_functions.h" -CVAR(Bool, gl_antilag, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) CVAR(Int, gl_showpacks, 0, 0) #ifndef WIN32 // Defined in fb_d3d9 for Windows CVAR(Bool, vid_hwaalines, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) @@ -88,63 +87,66 @@ EXTERN_CVAR(Int, vid_refreshrate) extern cycle_t BlitCycles; +void gl_LoadExtensions(); + IMPLEMENT_CLASS(OpenGLSWFrameBuffer) -const char *const OpenGLSWFrameBuffer::ShaderNames[OpenGLSWFrameBuffer::NUM_SHADERS] = +const char *const OpenGLSWFrameBuffer::ShaderDefines[OpenGLSWFrameBuffer::NUM_SHADERS] = { - "NormalColor", - "NormalColorPal", - "NormalColorInv", - "NormalColorPalInv", + "#define ENORMALCOLOR\n#define PALTEX 0\n#define DINVERT 0", // NormalColor + "#define ENORMALCOLOR\n#define PALTEX 1\n#define INVERT 0", // NormalColorPal + "#define ENORMALCOLOR\n#define PALTEX 0\n#define INVERT 1", // NormalColorInv + "#define ENORMALCOLOR\n#define PALTEX 1\n#define INVERT 1", // NormalColorPalInv - "RedToAlpha", - "RedToAlphaInv", + "#define EREDTOALPHA\n#define INVERT 0", // RedToAlpha + "#define EREDTOALPHA\n#define INVERT 1", // RedToAlphaInv - "VertexColor", + "#define EVERTEXCOLOR", // VertexColor - "SpecialColormap", - "SpecialColorMapPal", + "#define ESPECIALCOLORMAP\n#define PALTEX 0\n#define INVERT 0", // SpecialColormap + "#define ESPECIALCOLORMAP\n#define PALTEX 1\n#define INVERT 0", // SpecialColorMapPal - "InGameColormap", - "InGameColormapDesat", - "InGameColormapInv", - "InGameColormapInvDesat", - "InGameColormapPal", - "InGameColormapPalDesat", - "InGameColormapPalInv", - "InGameColormapPalInvDesat", + "#define EINGAMECOLORMAP\n#define PALTEX 0\n#define INVERT 0\n#define DESAT 0", // InGameColormap + "#define EINGAMECOLORMAP\n#define PALTEX 0\n#define INVERT 0\n#define DESAT 1", // InGameColormapDesat + "#define EINGAMECOLORMAP\n#define PALTEX 0\n#define INVERT 1\n#define DESAT 0", // InGameColormapInv + "#define EINGAMECOLORMAP\n#define PALTEX 0\n#define INVERT 1\n#define DESAT 1", // InGameColormapInvDesat + "#define EINGAMECOLORMAP\n#define PALTEX 1\n#define INVERT 0\n#define DESAT 0", // InGameColormapPal + "#define EINGAMECOLORMAP\n#define PALTEX 1\n#define INVERT 0\n#define DESAT 1", // InGameColormapPalDesat + "#define EINGAMECOLORMAP\n#define PALTEX 1\n#define INVERT 1\n#define DESAT 0", // InGameColormapPalInv + "#define EINGAMECOLORMAP\n#define PALTEX 1\n#define INVERT 1\n#define DESAT 1", // InGameColormapPalInvDesat - "BurnWipe", - "GammaCorrection", + "#define EBURNWIPE", // BurnWipe + "#define EGAMMACORRECTION", // GammaCorrection }; OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen) : Super(hMonitor, width, height, bits, refreshHz, fullscreen) { + // To do: this needs to cooperate with the same static in OpenGLFrameBuffer::InitializeState + static bool first = true; + if (first) + { + ogl_LoadFunctions(); + } + gl_LoadExtensions(); + Super::InitializeState(); + // SetVSync needs to be at the very top to workaround a bug in Nvidia's OpenGL driver. // If wglSwapIntervalEXT is called after glBindFramebuffer in a frame the setting is not changed! - SetVSync(vid_vsync); + //SetVSync(vid_vsync); VertexBuffer = nullptr; IndexBuffer = nullptr; FBTexture = nullptr; - TempRenderTexture = nullptr; - RenderTexture[0] = nullptr; - RenderTexture[1] = nullptr; InitialWipeScreen = nullptr; ScreenshotTexture = nullptr; - ScreenshotSurface = nullptr; FinalWipeScreen = nullptr; PaletteTexture = nullptr; - GammaTexture = nullptr; - FrontCopySurface = nullptr; for (int i = 0; i < NUM_SHADERS; ++i) { Shaders[i] = nullptr; } GammaShader = nullptr; - BlockSurface[0] = nullptr; - BlockSurface[1] = nullptr; VSync = vid_vsync; BlendingRect.left = 0; BlendingRect.top = 0; @@ -160,9 +162,6 @@ OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, QuadExtra = new BufferedTris[MAX_QUAD_BATCH]; Atlases = nullptr; PixelDoubling = 0; - SkipAt = -1; - CurrRenderTexture = 0; - RenderTextureToggle = 0; Gamma = 1.0; FlashColor0 = 0; @@ -180,7 +179,7 @@ OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, memcpy(SourcePalette, GPalette.BaseColors, sizeof(PalEntry) * 256); - Windowed = !(static_cast(Video)->GoFullscreen(fullscreen)); + //Windowed = !(static_cast(Video)->GoFullscreen(fullscreen)); TrueHeight = height; /*if (fullscreen) @@ -209,7 +208,56 @@ OpenGLSWFrameBuffer::~OpenGLSWFrameBuffer() delete[] QuadExtra; } -bool OpenGLSWFrameBuffer::CreatePixelShader(const void *vertexsrc, const void *fragmentsrc, HWPixelShader **outShader) +OpenGLSWFrameBuffer::HWTexture::~HWTexture() +{ + if (Texture != 0) glDeleteTextures(1, (GLuint*)&Texture); + if (Buffer != 0) glDeleteBuffers(1, (GLuint*)&Buffer); +} + +OpenGLSWFrameBuffer::HWVertexBuffer::~HWVertexBuffer() +{ + if (VertexArray != 0) glDeleteVertexArrays(1, (GLuint*)&VertexArray); + if (Buffer != 0) glDeleteBuffers(1, (GLuint*)&Buffer); +} + +OpenGLSWFrameBuffer::FBVERTEX *OpenGLSWFrameBuffer::HWVertexBuffer::Lock() +{ + glBindBuffer(GL_ARRAY_BUFFER, Buffer); + return (FBVERTEX*)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); +} + +void OpenGLSWFrameBuffer::HWVertexBuffer::Unlock() +{ + glUnmapBuffer(GL_ARRAY_BUFFER); + glBindBuffer(GL_ARRAY_BUFFER, 0); +} + +OpenGLSWFrameBuffer::HWIndexBuffer::~HWIndexBuffer() +{ + if (Buffer != 0) glDeleteBuffers(1, (GLuint*)&Buffer); +} + +uint16_t *OpenGLSWFrameBuffer::HWIndexBuffer::Lock() +{ + glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING, &LockedOldBinding); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, Buffer); + return (uint16_t*)glMapBuffer(GL_ELEMENT_ARRAY_BUFFER, GL_WRITE_ONLY); +} + +void OpenGLSWFrameBuffer::HWIndexBuffer::Unlock() +{ + glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, LockedOldBinding); +} + +OpenGLSWFrameBuffer::HWPixelShader::~HWPixelShader() +{ + if (Program != 0) glDeleteProgram(Program); + if (VertexShader != 0) glDeleteShader(VertexShader); + if (FragmentShader != 0) glDeleteShader(FragmentShader); +} + +bool OpenGLSWFrameBuffer::CreatePixelShader(FString vertexsrc, FString fragmentsrc, const FString &defines, HWPixelShader **outShader) { auto shader = std::make_unique(); @@ -217,25 +265,34 @@ bool OpenGLSWFrameBuffer::CreatePixelShader(const void *vertexsrc, const void *f shader->VertexShader = glCreateShader(GL_VERTEX_SHADER); shader->FragmentShader = glCreateShader(GL_FRAGMENT_SHADER); + vertexsrc = "#version 130\n" + defines + "\n#line 0\n" + vertexsrc; + fragmentsrc = "#version 130\n" + defines + "\n#line 0\n" + fragmentsrc; + { - int lengths[1] = { (int)strlen((const char*)vertexsrc) }; - const char *sources[1] = { (const char*)vertexsrc }; + int lengths[1] = { (int)vertexsrc.Len() }; + const char *sources[1] = { vertexsrc.GetChars() }; glShaderSource(shader->VertexShader, 1, sources, lengths); glCompileShader(shader->VertexShader); } { - int lengths[1] = { (int)strlen((const char*)fragmentsrc) }; - const char *sources[1] = { (const char*)fragmentsrc }; + int lengths[1] = { (int)fragmentsrc.Len() }; + const char *sources[1] = { fragmentsrc.GetChars() }; glShaderSource(shader->FragmentShader, 1, sources, lengths); glCompileShader(shader->FragmentShader); } GLint status = 0; + int errorShader = shader->VertexShader; glGetShaderiv(shader->VertexShader, GL_COMPILE_STATUS, &status); - if (status != GL_FALSE) glGetShaderiv(shader->FragmentShader, GL_COMPILE_STATUS, &status); + if (status != GL_FALSE) { errorShader = shader->FragmentShader; glGetShaderiv(shader->FragmentShader, GL_COMPILE_STATUS, &status); } if (status == GL_FALSE) { + static char buffer[10000]; + GLsizei length = 0; + buffer[0] = 0; + glGetShaderInfoLog(errorShader, 10000, &length, buffer); + *outShader = nullptr; return false; } @@ -308,6 +365,8 @@ bool OpenGLSWFrameBuffer::CreateTexture(int width, int height, int levels, int f { auto obj = std::make_unique(); + obj->Format = format; + GLint oldBinding = 0; glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); @@ -317,7 +376,7 @@ bool OpenGLSWFrameBuffer::CreateTexture(int width, int height, int levels, int f switch (format) { case GL_R8: srcformat = GL_RED; break; - case GL_RGBA8: srcformat = GL_RGBA; break; + case GL_RGBA8: srcformat = GL_BGRA; break; case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: srcformat = GL_RGB; break; case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: srcformat = GL_RGBA; break; case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: srcformat = GL_RGBA; break; @@ -327,6 +386,8 @@ bool OpenGLSWFrameBuffer::CreateTexture(int width, int height, int levels, int f return false; } glTexImage2D(GL_TEXTURE_2D, 0, format, width, height, 0, srcformat, GL_UNSIGNED_BYTE, nullptr); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glBindTexture(GL_TEXTURE_2D, oldBinding); @@ -334,16 +395,9 @@ bool OpenGLSWFrameBuffer::CreateTexture(int width, int height, int levels, int f return true; } -bool OpenGLSWFrameBuffer::CreateOffscreenPlainSurface(int width, int height, int format, HWSurface **outSurface) { *outSurface = nullptr; return false; } -bool OpenGLSWFrameBuffer::CreateRenderTarget(int width, int height, int format, HWSurface **outSurface) { *outSurface = nullptr; return false; } -bool OpenGLSWFrameBuffer::GetBackBuffer(HWSurface **outSurface) { *outSurface = nullptr; return false; } -bool OpenGLSWFrameBuffer::GetRenderTarget(int index, HWSurface **outSurface) { *outSurface = nullptr; return false; } -void OpenGLSWFrameBuffer::GetRenderTargetData(HWSurface *a, HWSurface *b) { } - -void OpenGLSWFrameBuffer::ColorFill(HWSurface *surface, float red, float green, float blue) { } -void OpenGLSWFrameBuffer::StretchRect(HWSurface *src, const LTRBRect *srcrect, HWSurface *dest) { } -bool OpenGLSWFrameBuffer::SetRenderTarget(int index, HWSurface *surface) { return true; } -void OpenGLSWFrameBuffer::SetGammaRamp(const GammaRamp *ramp) { } +void OpenGLSWFrameBuffer::SetGammaRamp(const GammaRamp *ramp) +{ +} void OpenGLSWFrameBuffer::SetPixelShaderConstantF(int uniformIndex, const float *data, int vec4fcount) { @@ -487,7 +541,6 @@ void OpenGLSWFrameBuffer::SetInitialState() NeedGammaUpdate = true; NeedPalUpdate = true; - OldRenderTarget = nullptr; // This constant is used for grayscaling weights (.xyz) and color inversion (.w) float weights[4] = { 77 / 256.f, 143 / 256.f, 37 / 256.f, 1 }; @@ -524,7 +577,6 @@ bool OpenGLSWFrameBuffer::CreateResources() { return false; } - CreateBlockSurfaces(); return true; } @@ -539,42 +591,36 @@ bool OpenGLSWFrameBuffer::CreateResources() bool OpenGLSWFrameBuffer::LoadShaders() { - static const char models[][4] = { "30/", "20/", "14/" }; + int lumpvert = Wads.CheckNumForFullName("shaders/glsl/swshader.vp"); + int lumpfrag = Wads.CheckNumForFullName("shaders/glsl/swshader.fp"); + if (lumpvert < 0 || lumpfrag < 0) + return false; + + FString vertsource = Wads.ReadLump(lumpvert).GetString(); + FString fragsource = Wads.ReadLump(lumpfrag).GetString(); + FString shaderdir, shaderpath; - unsigned model, i; - int lump, lumpvert; + unsigned int i; // We determine the best available model simply by trying them all in // order of decreasing preference. - for (model = 0; model < countof(models); ++model) + for (i = 0; i < NUM_SHADERS; ++i) { - shaderdir = "shaders/gl/sm"; - shaderdir += models[model]; - for (i = 0; i < NUM_SHADERS; ++i) + shaderpath = shaderdir; + if (!CreatePixelShader(vertsource, fragsource, ShaderDefines[i], &Shaders[i]) && i < SHADER_BurnWipe) { - shaderpath = shaderdir; - lump = Wads.CheckNumForFullName(shaderpath + ShaderNames[i] + ".fp"); - lumpvert = Wads.CheckNumForFullName(shaderpath + ShaderNames[i] + ".vp"); - if (lump >= 0 && lumpvert >= 0) - { - FMemLump data = Wads.ReadLump(lump); - FMemLump datavert = Wads.ReadLump(lumpvert); - if (!CreatePixelShader(datavert.GetMem(), data.GetMem(), &Shaders[i]) && i < SHADER_BurnWipe) - { - break; - } - } - } - if (i == NUM_SHADERS) - { // Success! - return true; - } - // Failure. Release whatever managed to load (which is probably nothing.) - for (i = 0; i < NUM_SHADERS; ++i) - { - SafeRelease(Shaders[i]); + break; } } + if (i == NUM_SHADERS) + { // Success! + return true; + } + // Failure. Release whatever managed to load (which is probably nothing.) + for (i = 0; i < NUM_SHADERS; ++i) + { + SafeRelease(Shaders[i]); + } return false; } @@ -590,7 +636,6 @@ void OpenGLSWFrameBuffer::ReleaseResources() KillNativeTexs(); KillNativePals(); ReleaseDefaultPoolItems(); - SafeRelease(ScreenshotSurface); SafeRelease(ScreenshotTexture); SafeRelease(PaletteTexture); for (int i = 0; i < NUM_SHADERS; ++i) @@ -616,14 +661,9 @@ void OpenGLSWFrameBuffer::ReleaseDefaultPoolItems() { SafeRelease(FBTexture); SafeRelease(FinalWipeScreen); - SafeRelease(RenderTexture[0]); - SafeRelease(RenderTexture[1]); SafeRelease(InitialWipeScreen); SafeRelease(VertexBuffer); SafeRelease(IndexBuffer); - SafeRelease(BlockSurface[0]); - SafeRelease(BlockSurface[1]); - SafeRelease(FrontCopySurface); } bool OpenGLSWFrameBuffer::Reset() @@ -633,32 +673,10 @@ bool OpenGLSWFrameBuffer::Reset() { return false; } - CreateBlockSurfaces(); SetInitialState(); return true; } -//========================================================================== -// -// OpenGLSWFrameBuffer :: CreateBlockSurfaces -// -// Create blocking surfaces for antilag. It's okay if these can't be -// created; antilag just won't work. -// -//========================================================================== - -void OpenGLSWFrameBuffer::CreateBlockSurfaces() -{ - BlockNum = 0; - if (CreateOffscreenPlainSurface(16, 16, GL_RGBA8, &BlockSurface[0])) - { - if (!CreateOffscreenPlainSurface(16, 16, GL_RGBA8, &BlockSurface[1])) - { - SafeRelease(BlockSurface[0]); - } - } -} - //========================================================================== // // OpenGLSWFrameBuffer :: KillNativePals @@ -691,80 +709,11 @@ void OpenGLSWFrameBuffer::KillNativeTexs() } } -//========================================================================== -// -// OpenGLSWFrameBuffer :: CreateFBTexture -// -// Creates the "Framebuffer" texture. With the advent of hardware-assisted -// 2D, this is something of a misnomer now. The FBTexture is only used for -// uploading the software 3D image to video memory so that it can be drawn -// to the real frame buffer. -// -// It also creates the TempRenderTexture, since this seemed like a -// convenient place to do so. -// -//========================================================================== - bool OpenGLSWFrameBuffer::CreateFBTexture() { - if (!CreateTexture(Width, Height, 1, GL_R8, &FBTexture)) - { - int pow2width, pow2height, i; - - for (i = 1; i < Width; i <<= 1) {} pow2width = i; - for (i = 1; i < Height; i <<= 1) {} pow2height = i; - - if (!CreateTexture(pow2width, pow2height, 1, GL_R8, &FBTexture)) - { - return false; - } - else - { - FBWidth = pow2width; - FBHeight = pow2height; - } - } - else - { - FBWidth = Width; - FBHeight = Height; - } - RenderTextureToggle = 0; - RenderTexture[0] = nullptr; - RenderTexture[1] = nullptr; - if (!CreateTexture(FBWidth, FBHeight, 1, GL_RGBA8, &RenderTexture[0])) - { - return false; - } - if (Windowed || PixelDoubling) - { - // Windowed or pixel doubling: Create another render texture so we can flip between them. - RenderTextureToggle = 1; - if (!CreateTexture(FBWidth, FBHeight, 1, GL_RGBA8, &RenderTexture[1])) - { - return false; - } - } - else - { - // Fullscreen and not pixel doubling: Create a render target to have the back buffer copied to. - if (!CreateRenderTarget(Width, Height, GL_RGBA8, &FrontCopySurface)) - { - return false; - } - } - // Initialize the TempRenderTextures to black. - for (int i = 0; i <= RenderTextureToggle; ++i) - { - HWSurface *surf; - if (RenderTexture[i]->GetSurfaceLevel(0, &surf)) - { - ColorFill(surf, 0.0f, 0.0f, 0.0f); - delete surf; - } - } - TempRenderTexture = RenderTexture[0]; - CurrRenderTexture = 0; + CreateTexture(Width, Height, 1, GL_R8, &FBTexture); + FBWidth = Width; + FBHeight = Height; return true; } @@ -814,7 +763,7 @@ bool OpenGLSWFrameBuffer::CreateVertexes() void OpenGLSWFrameBuffer::CalcFullscreenCoords(FBVERTEX verts[4], bool viewarea_only, bool can_double, uint32_t color0, uint32_t color1) const { - float offset = OldRenderTarget != nullptr ? 0 : LBOffset; + float offset = LBOffset;//OldRenderTarget != nullptr ? 0 : LBOffset; float top = offset - 0.5f; float texright = float(Width) / float(FBWidth); float texbot = float(Height) / float(FBHeight); @@ -1042,18 +991,6 @@ void OpenGLSWFrameBuffer::Update() LOG("SetGammaRamp\n"); SetGammaRamp(&ramp); } - else - { - if (igamma != 1) - { - UpdateGammaTexture(igamma); - GammaShader = Shaders[SHADER_GammaCorrection]; - } - else - { - GammaShader = nullptr; - } - } psgamma[2] = psgamma[1] = psgamma[0] = igamma; psgamma[3] = 0.5; // For SM14 version SetPixelShaderConstantF(PSCONST_Gamma, psgamma, 1); @@ -1094,21 +1031,6 @@ void OpenGLSWFrameBuffer::Flip() DrawLetterbox(); DoWindowedGamma(); - CopyNextFrontBuffer(); - - // Attempt to counter input lag. - if (gl_antilag && BlockSurface[0] != nullptr) - { - LockedRect lr; - volatile int dummy; - ColorFill(BlockSurface[BlockNum], 0.0f, 0x20/255.0f, 0x50/255.0f); - BlockNum ^= 1; - if (BlockSurface[BlockNum]->LockRect(&lr, nullptr, false)) - { - dummy = *(int *)lr.pBits; - BlockSurface[BlockNum]->UnlockRect(); - } - } // Limiting the frame rate is as simple as waiting for the timer to signal this event. if (FPSLimitEvent != nullptr) { @@ -1117,13 +1039,6 @@ void OpenGLSWFrameBuffer::Flip() Present(); InScene = false; - if (RenderTextureToggle) - { - // Flip the TempRenderTexture to the other one now. - CurrRenderTexture ^= RenderTextureToggle; - TempRenderTexture = RenderTexture[CurrRenderTexture]; - } - if (Windowed) { int clientWidth = GetClientWidth(); @@ -1143,42 +1058,6 @@ void OpenGLSWFrameBuffer::Flip() } } -//========================================================================== -// -// OpenGLSWFrameBuffer :: CopyNextFrontBuffer -// -// Duplicates the contents of the back buffer that will become the front -// buffer upon Present into FrontCopySurface so that we can get the -// contents of the display without wasting time in GetFrontBufferData(). -// -//========================================================================== - -void OpenGLSWFrameBuffer::CopyNextFrontBuffer() -{ - HWSurface *backbuff; - - if (Windowed || PixelDoubling) - { - // Windowed mode or pixel doubling: TempRenderTexture has what we want - SafeRelease(FrontCopySurface); - if (TempRenderTexture->GetSurfaceLevel(0, &backbuff)) - { - FrontCopySurface = backbuff; - } - } - else - { - // Fullscreen, not pixel doubled: The back buffer has what we want, - // but it might be letter boxed. - if (GetBackBuffer(&backbuff)) - { - LTRBRect srcrect = { 0, LBOffsetI, Width, LBOffsetI + Height }; - StretchRect(backbuff, &srcrect, FrontCopySurface); - delete backbuff; - } - } -} - //========================================================================== // // OpenGLSWFrameBuffer :: PaintToWindow @@ -1207,50 +1086,49 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) { if (copy3d) { - LTRBRect texrect = { 0, 0, Width, Height }; - LockedRect lockrect; - - if ((FBWidth == Width && FBHeight == Height && - FBTexture->LockRect(&lockrect, nullptr, true)) || - FBTexture->LockRect(&lockrect, &texrect, false)) + if (FBTexture->Buffer == 0) { - if (lockrect.Pitch == Pitch && Pitch == Width) + glGenBuffers(1, (GLuint*)&FBTexture->Buffer); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffer); + glBufferData(GL_PIXEL_UNPACK_BUFFER, Width * Height, nullptr, GL_STREAM_DRAW); + } + else + { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffer); + } + + uint8_t *dest = (uint8_t*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); + if (dest) + { + if (Pitch == Width) { - memcpy(lockrect.pBits, MemBuffer, Width * Height); + memcpy(dest, MemBuffer, Width * Height); } else { - uint8_t *dest = (uint8_t *)lockrect.pBits; uint8_t *src = MemBuffer; for (int y = 0; y < Height; y++) { memcpy(dest, src, Width); - dest += lockrect.Pitch; + dest += Width; src += Pitch; } } - FBTexture->UnlockRect(); + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + GLint oldBinding = 0; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); + glBindTexture(GL_TEXTURE_2D, FBTexture->Texture); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, GL_R8, GL_UNSIGNED_BYTE, 0); + glBindTexture(GL_TEXTURE_2D, oldBinding); } + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); } InScene = true; if (vid_hwaalines) glEnable(GL_LINE_SMOOTH); else glDisable(GL_LINE_SMOOTH); - assert(OldRenderTarget == nullptr); - if (TempRenderTexture != nullptr && - ((Windowed && TempRenderTexture != FinalWipeScreen) || GatheringWipeScreen || PixelDoubling)) - { - HWSurface *targetsurf; - if (TempRenderTexture->GetSurfaceLevel(0, &targetsurf)) - { - if (GetRenderTarget(0, &OldRenderTarget)) - { - SetRenderTarget(0, targetsurf); - } - delete targetsurf; - } - } SetTexture(0, FBTexture); SetPaletteTexture(PaletteTexture, 256, BorderColor); @@ -1271,10 +1149,8 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) } else { - color0 = ColorValue(realfixedcolormap->ColorizeStart[0] / 2, - realfixedcolormap->ColorizeStart[1] / 2, realfixedcolormap->ColorizeStart[2] / 2, 0); - color1 = ColorValue(realfixedcolormap->ColorizeEnd[0] / 2, - realfixedcolormap->ColorizeEnd[1] / 2, realfixedcolormap->ColorizeEnd[2] / 2, 1); + color0 = ColorValue(realfixedcolormap->ColorizeStart[0] / 2, realfixedcolormap->ColorizeStart[1] / 2, realfixedcolormap->ColorizeStart[2] / 2, 0); + color1 = ColorValue(realfixedcolormap->ColorizeEnd[0] / 2, realfixedcolormap->ColorizeEnd[1] / 2, realfixedcolormap->ColorizeEnd[2] / 2, 1); SetPixelShader(Shaders[SHADER_SpecialColormapPal]); } } @@ -1323,7 +1199,7 @@ void OpenGLSWFrameBuffer::DrawLetterbox() void OpenGLSWFrameBuffer::DoWindowedGamma() { - if (OldRenderTarget != nullptr) + /*if (OldRenderTarget != nullptr) { FBVERTEX verts[4]; @@ -1336,49 +1212,28 @@ void OpenGLSWFrameBuffer::DoWindowedGamma() DrawTriangleFans(2, verts); delete OldRenderTarget; OldRenderTarget = nullptr; - } -} - -//========================================================================== -// -// OpenGLSWFrameBuffer :: UpdateGammaTexture -// -// Updates the gamma texture used by the PS14 shader. We only use the first -// half of the texture so that we needn't worry about imprecision causing -// it to grab from the border. -// -//========================================================================== - -void OpenGLSWFrameBuffer::UpdateGammaTexture(float igamma) -{ - LockedRect lockrect; - - if (GammaTexture != nullptr && GammaTexture->LockRect(&lockrect, nullptr, false)) - { - uint8_t *pix = (uint8_t *)lockrect.pBits; - for (int i = 0; i <= 128; ++i) - { - pix[i * 4 + 2] = pix[i * 4 + 1] = pix[i * 4] = uint8_t(255.f * powf(i / 128.f, igamma)); - pix[i * 4 + 3] = 255; - } - GammaTexture->UnlockRect(); - } + }*/ } void OpenGLSWFrameBuffer::UploadPalette() { - LockedRect lockrect; - - if (SkipAt < 0) + if (PaletteTexture->Buffer == 0) { - SkipAt = 256; + glGenBuffers(1, (GLuint*)&PaletteTexture->Buffer); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, PaletteTexture->Buffer); + glBufferData(GL_PIXEL_UNPACK_BUFFER, 256 * 4, nullptr, GL_STREAM_DRAW); } - if (PaletteTexture->LockRect(&lockrect, nullptr, false)) + else + { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, PaletteTexture->Buffer); + } + + uint8_t *pix = (uint8_t *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); + if (pix) { - uint8_t *pix = (uint8_t *)lockrect.pBits; int i; - for (i = 0; i < SkipAt; ++i, pix += 4) + for (i = 0; i < 256; ++i, pix += 4) { pix[0] = SourcePalette[i].b; pix[1] = SourcePalette[i].g; @@ -1394,9 +1249,16 @@ void OpenGLSWFrameBuffer::UploadPalette() pix[2] = SourcePalette[i].r; pix[3] = 255; } - PaletteTexture->UnlockRect(); + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + GLint oldBinding = 0; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); + glBindTexture(GL_TEXTURE_2D, PaletteTexture->Texture); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 1, GL_BGRA, GL_UNSIGNED_BYTE, 0); + glBindTexture(GL_TEXTURE_2D, oldBinding); BorderColor = ColorXRGB(SourcePalette[255].r, SourcePalette[255].g, SourcePalette[255].b); } + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); } PalEntry *OpenGLSWFrameBuffer::GetPalette() @@ -1484,6 +1346,8 @@ void OpenGLSWFrameBuffer::SetBlendingRect(int x1, int y1, int x2, int y2) void OpenGLSWFrameBuffer::GetScreenshotBuffer(const uint8_t *&buffer, int &pitch, ESSType &color_type) { + Super::GetScreenshotBuffer(buffer, pitch, color_type); + /* LockedRect lrect; if (!Accel2D) @@ -1513,6 +1377,7 @@ void OpenGLSWFrameBuffer::GetScreenshotBuffer(const uint8_t *&buffer, int &pitch color_type = SS_BGRA; } } + */ } //========================================================================== @@ -1527,58 +1392,9 @@ void OpenGLSWFrameBuffer::ReleaseScreenshotBuffer() { Super::ReleaseScreenshotBuffer(); } - if (ScreenshotSurface != nullptr) - { - ScreenshotSurface->UnlockRect(); - delete ScreenshotSurface; - ScreenshotSurface = nullptr; - } SafeRelease(ScreenshotTexture); } -//========================================================================== -// -// OpenGLSWFrameBuffer :: GetCurrentScreen -// -// Returns a texture containing the pixels currently visible on-screen. -// -//========================================================================== - -OpenGLSWFrameBuffer::HWTexture *OpenGLSWFrameBuffer::GetCurrentScreen() -{ - HWTexture *tex; - HWSurface *surf; - bool hr; - - if (FrontCopySurface == nullptr) - { - return nullptr; - } - - hr = CreateTexture(FBWidth, FBHeight, 1, GL_RGBA8, &tex); - - if (!hr) - { - return nullptr; - } - if (!tex->GetSurfaceLevel(0, &surf)) - { - delete tex; - return nullptr; - } - - // Video -> System memory : use GetRenderTargetData - GetRenderTargetData(FrontCopySurface, surf); - delete surf; - - if (!hr) - { - delete tex; - return nullptr; - } - return tex; -} - /**************************************************************************/ /* 2D Stuff */ /**************************************************************************/ @@ -2013,7 +1829,6 @@ bool OpenGLSWFrameBuffer::OpenGLTex::Create(OpenGLSWFrameBuffer *fb, bool wrappi bool OpenGLSWFrameBuffer::OpenGLTex::Update() { - LockedRect lrect; LTRBRect rect; uint8_t *dest; @@ -2025,27 +1840,44 @@ bool OpenGLSWFrameBuffer::OpenGLTex::Update() int format = Box->Owner->Tex->Format; rect = Box->Area; - if (!Box->Owner->Tex->LockRect(&lrect, &rect, false)) + + if (Box->Owner->Tex->Buffer == 0) + glGenBuffers(1, (GLuint*)&Box->Owner->Tex->Buffer); + + int bytesPerPixel = 4; + switch (format) + { + case GL_R8: bytesPerPixel = 1; break; + case GL_RGBA8: bytesPerPixel = 4; break; + default: return false; + } + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Box->Owner->Tex->Buffer); + glBufferData(GL_PIXEL_UNPACK_BUFFER, (rect.right - rect.left) * (rect.bottom - rect.top) * bytesPerPixel, nullptr, GL_STREAM_DRAW); + + int pitch = (rect.right - rect.left) * bytesPerPixel; + uint8_t *bits = (uint8_t *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); + dest = bits; + if (!dest) { return false; } - dest = (uint8_t *)lrect.pBits; if (Box->Padded) { - dest += lrect.Pitch + (format == GL_R8 ? 1 : 4); + dest += pitch + (format == GL_R8 ? 1 : 4); } - GameTex->FillBuffer(dest, lrect.Pitch, GameTex->GetHeight(), ToTexFmt(format)); + GameTex->FillBuffer(dest, pitch, GameTex->GetHeight(), ToTexFmt(format)); if (Box->Padded) { // Clear top padding row. - dest = (uint8_t *)lrect.pBits; + dest = bits; int numbytes = GameTex->GetWidth() + 2; if (format != GL_R8) { numbytes <<= 2; } memset(dest, 0, numbytes); - dest += lrect.Pitch; + dest += pitch; // Clear left and right padding columns. if (format == GL_R8) { @@ -2053,7 +1885,7 @@ bool OpenGLSWFrameBuffer::OpenGLTex::Update() { dest[0] = 0; dest[numbytes - 1] = 0; - dest += lrect.Pitch; + dest += pitch; } } else @@ -2062,13 +1894,20 @@ bool OpenGLSWFrameBuffer::OpenGLTex::Update() { *(uint32_t *)dest = 0; *(uint32_t *)(dest + numbytes - 4) = 0; - dest += lrect.Pitch; + dest += pitch; } } // Clear bottom padding row. memset(dest, 0, numbytes); } - Box->Owner->Tex->UnlockRect(); + + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + GLint oldBinding = 0; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); + glBindTexture(GL_TEXTURE_2D, Box->Owner->Tex->Texture); + glTexSubImage2D(GL_TEXTURE_2D, 0, rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, format == GL_RGBA8 ? GL_BGRA : GL_RED, GL_UNSIGNED_BYTE, 0); + glBindTexture(GL_TEXTURE_2D, oldBinding); + glBindTexture(GL_PIXEL_UNPACK_BUFFER, 0); return true; } @@ -2199,18 +2038,28 @@ OpenGLSWFrameBuffer::OpenGLPal::~OpenGLPal() bool OpenGLSWFrameBuffer::OpenGLPal::Update() { - LockedRect lrect; uint32_t *buff; const PalEntry *pal; int skipat, i; assert(Tex != nullptr); - if (!Tex->LockRect(&lrect, nullptr, 0)) + if (Tex->Buffer == 0) + { + glGenBuffers(1, (GLuint*)&Tex->Buffer); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Tex->Buffer); + glBufferData(GL_PIXEL_UNPACK_BUFFER, Remap->NumEntries * 4, nullptr, GL_STREAM_DRAW); + } + else + { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Tex->Buffer); + } + + buff = (uint32_t *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); + if (buff == nullptr) { return false; } - buff = (uint32_t *)lrect.pBits; pal = Remap->Palette; // See explanation in UploadPalette() for skipat rationale. @@ -2226,7 +2075,14 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update() } BorderColor = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); - Tex->UnlockRect(); + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + GLint oldBinding = 0; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); + glBindTexture(GL_TEXTURE_2D, Tex->Texture); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Remap->NumEntries, 1, GL_BGRA, GL_UNSIGNED_BYTE, 0); + glBindTexture(GL_TEXTURE_2D, oldBinding); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + return true; } diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index 14ed2ec131..eed9b6f7f4 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -85,32 +85,18 @@ private: uint16_t red[256], green[256], blue[256]; }; - struct LockedRect - { - int Pitch; - void *pBits; - }; - struct LTRBRect { int left, top, right, bottom; }; - class HWSurface - { - public: - bool LockRect(LockedRect *outRect, LTRBRect *srcRect, bool discard) { outRect->Pitch = 0; outRect->pBits = nullptr; return false; } - void UnlockRect() { } - }; - class HWTexture { public: - bool LockRect(LockedRect *outRect, LTRBRect *srcRect, bool discard) { outRect->Pitch = 0; outRect->pBits = nullptr; return false; } - void UnlockRect() { } - bool GetSurfaceLevel(int level, HWSurface **outSurface) { *outSurface = nullptr; return false; } + ~HWTexture(); int Texture = 0; + int Buffer = 0; int WrapS = 0; int WrapT = 0; int Format = 0; @@ -119,60 +105,43 @@ private: class HWVertexBuffer { public: - ~HWVertexBuffer() - { - if (Buffer != 0) glDeleteVertexArrays(1, (GLuint*)&VertexArray); - if (Buffer != 0) glDeleteBuffers(1, (GLuint*)&Buffer); - } + ~HWVertexBuffer(); - FBVERTEX *Lock() { return nullptr; } - void Unlock() { } + FBVERTEX *Lock(); + void Unlock(); - int Buffer = 0; int VertexArray = 0; + int Buffer = 0; }; class HWIndexBuffer { public: - ~HWIndexBuffer() - { - if (Buffer != 0) glDeleteBuffers(1, (GLuint*)&Buffer); - } + ~HWIndexBuffer(); - uint16_t *Lock() { return nullptr; } - void Unlock() { } + uint16_t *Lock(); + void Unlock(); int Buffer = 0; + + private: + int LockedOldBinding = 0; }; class HWPixelShader { public: - ~HWPixelShader() - { - if (Program != 0) glDeleteProgram(Program); - if (VertexShader != 0) glDeleteShader(VertexShader); - if (FragmentShader != 0) glDeleteShader(FragmentShader); - } + ~HWPixelShader(); int Program = 0; int VertexShader = 0; int FragmentShader = 0; }; - bool CreatePixelShader(const void *vertexsrc, const void *fragmentsrc, HWPixelShader **outShader); + bool CreatePixelShader(FString vertexsrc, FString fragmentsrc, const FString &defines, HWPixelShader **outShader); bool CreateVertexBuffer(int size, HWVertexBuffer **outVertexBuffer); bool CreateIndexBuffer(int size, HWIndexBuffer **outIndexBuffer); - bool CreateOffscreenPlainSurface(int width, int height, int format, HWSurface **outSurface); bool CreateTexture(int width, int height, int levels, int format, HWTexture **outTexture); - bool CreateRenderTarget(int width, int height, int format, HWSurface **outSurface); - bool GetBackBuffer(HWSurface **outSurface); - bool GetRenderTarget(int index, HWSurface **outSurface); - void GetRenderTargetData(HWSurface *a, HWSurface *b); - void ColorFill(HWSurface *surface, float red, float green, float blue); - void StretchRect(HWSurface *src, const LTRBRect *srcrect, HWSurface *dest); - bool SetRenderTarget(int index, HWSurface *surface); void SetGammaRamp(const GammaRamp *ramp); void SetPixelShaderConstantF(int uniformIndex, const float *data, int vec4fcount); void SetHWPixelShader(HWPixelShader *shader); @@ -362,19 +331,17 @@ private: NUM_SHADERS }; - static const char *const ShaderNames[NUM_SHADERS]; + static const char *const ShaderDefines[NUM_SHADERS]; void Flip(); void SetInitialState(); bool CreateResources(); void ReleaseResources(); bool LoadShaders(); - void CreateBlockSurfaces(); bool CreateFBTexture(); bool CreatePaletteTexture(); bool CreateVertexes(); void UploadPalette(); - void UpdateGammaTexture(float igamma); void CalcFullscreenCoords(FBVERTEX verts[4], bool viewarea_only, bool can_double, uint32_t color0, uint32_t color1) const; bool Reset(); HWTexture *GetCurrentScreen(); @@ -397,7 +364,6 @@ private: void BeginLineBatch(); void EndLineBatch(); void EndBatch(); - void CopyNextFrontBuffer(); // State void EnableAlphaTest(BOOL enabled); @@ -431,10 +397,7 @@ private: int FlashAmount; int TrueHeight; int PixelDoubling; - int SkipAt; int LBOffsetI; - int RenderTextureToggle; - int CurrRenderTexture; float LBOffset; float Gamma; bool UpdatePending; @@ -448,34 +411,28 @@ private: bool GatheringWipeScreen; bool AALines; uint8_t BlockNum; - OpenGLPal *Palettes; - OpenGLTex *Textures; - Atlas *Atlases; + OpenGLPal *Palettes = nullptr; + OpenGLTex *Textures = nullptr; + Atlas *Atlases = nullptr; - HWTexture *FBTexture; - HWTexture *TempRenderTexture, *RenderTexture[2]; - HWTexture *PaletteTexture; - HWTexture *GammaTexture; - HWTexture *ScreenshotTexture; - HWSurface *ScreenshotSurface; - HWSurface *FrontCopySurface; + HWTexture *FBTexture = nullptr; + HWTexture *PaletteTexture = nullptr; + HWTexture *ScreenshotTexture = nullptr; - HWVertexBuffer *VertexBuffer; - FBVERTEX *VertexData; - HWIndexBuffer *IndexBuffer; - uint16_t *IndexData; - BufferedTris *QuadExtra; + HWVertexBuffer *VertexBuffer = nullptr; + FBVERTEX *VertexData = nullptr; + HWIndexBuffer *IndexBuffer = nullptr; + uint16_t *IndexData = nullptr; + BufferedTris *QuadExtra = nullptr; int VertexPos; int IndexPos; int QuadBatchPos; enum { BATCH_None, BATCH_Quads, BATCH_Lines } BatchType; HWPixelShader *Shaders[NUM_SHADERS]; - HWPixelShader *GammaShader; + HWPixelShader *GammaShader = nullptr; - HWSurface *BlockSurface[2]; - HWSurface *OldRenderTarget; - HWTexture *InitialWipeScreen, *FinalWipeScreen; + HWTexture *InitialWipeScreen = nullptr, *FinalWipeScreen = nullptr; class Wiper { diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 787c0a4f38..2065a9072d 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -137,8 +137,9 @@ void I_InitGraphics () ticker.SetGenericRepDefault (val, CVAR_Bool); //currentrenderer = vid_renderer; - if (currentrenderer==1) Video = gl_CreateVideo(); - else Video = new Win32Video (0); + Video = gl_CreateVideo(); + //if (currentrenderer==1) Video = gl_CreateVideo(); + //else Video = new Win32Video (0); if (Video == NULL) I_FatalError ("Failed to initialize display"); diff --git a/src/win32/win32gliface.cpp b/src/win32/win32gliface.cpp index 5d68d84278..14d49c69bb 100644 --- a/src/win32/win32gliface.cpp +++ b/src/win32/win32gliface.cpp @@ -25,6 +25,7 @@ #include "gl/renderer/gl_renderer.h" #include "gl/system/gl_framebuffer.h" +#include "gl/system/gl_swframebuffer.h" extern "C" { _declspec(dllexport) DWORD NvOptimusEnablement = 0x00000001; @@ -386,7 +387,10 @@ DFrameBuffer *Win32GLVideo::CreateFrameBuffer(int width, int height, bool fs, DF //old->GetFlash(flashColor, flashAmount); delete old; } - fb = new OpenGLFrameBuffer(m_hMonitor, m_DisplayWidth, m_DisplayHeight, m_DisplayBits, m_DisplayHz, fs); + if (vid_renderer == 1) + fb = new OpenGLFrameBuffer(m_hMonitor, m_DisplayWidth, m_DisplayHeight, m_DisplayBits, m_DisplayHz, fs); + else + fb = new OpenGLSWFrameBuffer(m_hMonitor, m_DisplayWidth, m_DisplayHeight, m_DisplayBits, m_DisplayHz, fs); return fb; } diff --git a/wadsrc/static/shaders/glsl/swshader.fp b/wadsrc/static/shaders/glsl/swshader.fp new file mode 100644 index 0000000000..22364f4605 --- /dev/null +++ b/wadsrc/static/shaders/glsl/swshader.fp @@ -0,0 +1,146 @@ + +in vec4 PixelColor0; +in vec4 PixelColor1; +in vec4 PixelTexCoord0; + +out vec4 FragColor; + +uniform sampler2D Image;// : register(s0); +uniform sampler2D Palette;// : register(s1); +uniform sampler2D NewScreen;// : register(s0); +uniform sampler2D Burn;// : register(s1); + +uniform vec4 Desaturation;// : register(c1); // { Desat, 1 - Desat } +uniform vec4 PaletteMod;// : register(c2); +uniform vec4 Weights;// : register(c6); // RGB->Gray weighting { 77/256.0, 143/256.0, 37/256.0, 1 } +uniform vec4 Gamma;// : register(c7); + +vec4 TextureLookup(vec2 tex_coord) +{ +#if PALTEX + float index = texture(Image, tex_coord).x; + index = index * PaletteMod.x + PaletteMod.y; + return texture(Palette, vec2(index, 0.5)); +#else + return texture(Image, tex_coord); +#endif +} + +vec4 Invert(vec4 rgb) +{ +#if INVERT + rgb.rgb = Weights.www - rgb.xyz; +#endif + return rgb; +} + +float Grayscale(vec4 rgb) +{ + return dot(rgb.rgb, Weights.rgb); +} + +vec4 SampleTexture(vec2 tex_coord) +{ + return Invert(TextureLookup(tex_coord)); +} + +// Normal color calculation for most drawing modes. + +vec4 NormalColor(vec2 tex_coord, vec4 Flash, vec4 InvFlash) +{ + return Flash + SampleTexture(tex_coord) * InvFlash; +} + +// Copy the red channel to the alpha channel. Pays no attention to palettes. + +vec4 RedToAlpha(vec2 tex_coord, vec4 Flash, vec4 InvFlash) +{ + vec4 color = Invert(texture(Image, tex_coord)); + color.a = color.r; + return Flash + color * InvFlash; +} + +// Just return the value of c0. + +vec4 VertexColor(vec4 color) +{ + return color; +} + +// Emulate one of the special colormaps. (Invulnerability, gold, etc.) + +vec4 SpecialColormap(vec2 tex_coord, vec4 start, vec4 end) +{ + vec4 color = SampleTexture(tex_coord); + vec4 range = end - start; + // We can't store values greater than 1.0 in a color register, so we multiply + // the final result by 2 and expect the caller to divide the start and end by 2. + color.rgb = 2 * (start + Grayscale(color) * range).rgb; + // Duplicate alpha semantics of NormalColor. + color.a = start.a + color.a * end.a; + return color; +} + +// In-game colormap effect: fade to a particular color and multiply by another, with +// optional desaturation of the original color. Desaturation is stored in c1. +// Fade level is packed int fade.a. Fade.rgb has been premultiplied by alpha. +// Overall alpha is in color.a. +vec4 InGameColormap(vec2 tex_coord, vec4 color, vec4 fade) +{ + vec4 rgb = SampleTexture(tex_coord); + + // Desaturate +#if DESAT + vec3 intensity; + intensity.rgb = vec3(Grayscale(rgb) * Desaturation.x); + rgb.rgb = intensity.rgb + rgb.rgb * Desaturation.y; +#endif + + // Fade + rgb.rgb = rgb.rgb * fade.aaa + fade.rgb; + + // Shade and Alpha + rgb = rgb * color; + + return rgb; +} + +// Windowed gamma correction. + +vec4 GammaCorrection(vec2 tex_coord) +{ + vec4 color = texture(Image, tex_coord); + color.rgb = pow(color.rgb, Gamma.rgb); + return color; +} + +// The burn wipe effect. + +vec4 BurnWipe(vec4 coord) +{ + vec4 color = texture(NewScreen, coord.xy); + vec4 alpha = texture(Burn, coord.zw); + color.a = alpha.r * 2; + return color; +} + +void main() +{ +#if defined(ENORMALCOLOR) + FragColor = NormalColor(PixelTexCoord0.xy, PixelColor0, PixelColor1); +#elif defined(EREDTOALPHA) + FragColor = RedToAlpha(PixelTexCoord0.xy, PixelColor0, PixelColor1); +#elif defined(EVERTEXCOLOR) + FragColor = VertexColor(PixelColor0); +#elif defined(ESPECIALCOLORMAP) + FragColor = SpecialColormap(PixelTexCoord0.xy, PixelColor0, PixelColor1); +#elif defined(EINGAMECOLORMAP) + FragColor = InGameColormap(PixelTexCoord0.xy, PixelColor0, PixelColor1); +#elif defined(EBURNWIPE) + FragColor = BurnWipe(PixelTexCoord0); +#elif defined(EGAMMACORRECTION) + FragColor = GammaCorrection(PixelTexCoord0.xy); +#else + #error Entry point define is missing +#endif +} diff --git a/wadsrc/static/shaders/glsl/swshader.vp b/wadsrc/static/shaders/glsl/swshader.vp new file mode 100644 index 0000000000..4d00296cba --- /dev/null +++ b/wadsrc/static/shaders/glsl/swshader.vp @@ -0,0 +1,17 @@ + +in vec4 AttrPosition; +in vec4 AttrColor0; +in vec4 AttrColor1; +in vec4 AttrTexCoord0; + +out vec4 PixelColor0; +out vec4 PixelColor1; +out vec4 PixelTexCoord0; + +void main() +{ + gl_Position = AttrPosition; + PixelColor0 = AttrColor0; + PixelColor1 = AttrColor1; + PixelTexCoord0 = AttrTexCoord0; +} From 682b040b978f84367eb3aa559b8add3cd265fbbd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 11 Oct 2016 00:03:46 +0200 Subject: [PATCH 177/912] OpenGL based software renderer hardware accel now works for most things --- src/gl/system/gl_swframebuffer.cpp | 84 ++++++++++++++++++++------ src/gl/system/gl_swframebuffer.h | 36 +++++++---- wadsrc/static/shaders/glsl/swshader.fp | 16 ++--- wadsrc/static/shaders/glsl/swshader.vp | 7 ++- 4 files changed, 99 insertions(+), 44 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 895508d8b1..1ff48d13b6 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -70,6 +70,7 @@ #include "gl/utility/gl_clock.h" #include "gl/utility/gl_templates.h" #include "gl/gl_functions.h" +#include "gl_debug.h" CVAR(Int, gl_showpacks, 0, 0) #ifndef WIN32 // Defined in fb_d3d9 for Windows @@ -131,6 +132,9 @@ OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, gl_LoadExtensions(); Super::InitializeState(); + Debug = std::make_shared(); + Debug->Update(); + // SetVSync needs to be at the very top to workaround a bug in Nvidia's OpenGL driver. // If wglSwapIntervalEXT is called after glBindFramebuffer in a frame the setting is not changed! //SetVSync(vid_vsync); @@ -160,6 +164,7 @@ OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, ScreenWipe = nullptr; InScene = false; QuadExtra = new BufferedTris[MAX_QUAD_BATCH]; + memset(QuadExtra, 0, sizeof(BufferedTris) * MAX_QUAD_BATCH); Atlases = nullptr; PixelDoubling = 0; @@ -288,10 +293,10 @@ bool OpenGLSWFrameBuffer::CreatePixelShader(FString vertexsrc, FString fragments if (status != GL_FALSE) { errorShader = shader->FragmentShader; glGetShaderiv(shader->FragmentShader, GL_COMPILE_STATUS, &status); } if (status == GL_FALSE) { - static char buffer[10000]; + /*static char buffer[10000]; GLsizei length = 0; buffer[0] = 0; - glGetShaderInfoLog(errorShader, 10000, &length, buffer); + glGetShaderInfoLog(errorShader, 10000, &length, buffer);*/ *outShader = nullptr; return false; @@ -307,10 +312,19 @@ bool OpenGLSWFrameBuffer::CreatePixelShader(FString vertexsrc, FString fragments *outShader = nullptr; return false; } - glBindAttribLocation(shader->Program, 0, "Position"); - glBindAttribLocation(shader->Program, 1, "Color0"); - glBindAttribLocation(shader->Program, 2, "Color1"); - glBindAttribLocation(shader->Program, 3, "TexCoord"); + glBindAttribLocation(shader->Program, 0, "AttrPosition"); + glBindAttribLocation(shader->Program, 1, "AttrColor0"); + glBindAttribLocation(shader->Program, 2, "AttrColor1"); + glBindAttribLocation(shader->Program, 3, "AttrTexCoord0"); + + shader->ConstantLocations[PSCONST_Desaturation] = glGetUniformLocation(shader->Program, "Desaturation"); + shader->ConstantLocations[PSCONST_PaletteMod] = glGetUniformLocation(shader->Program, "PaletteMod"); + shader->ConstantLocations[PSCONST_Weights] = glGetUniformLocation(shader->Program, "Weights"); + shader->ConstantLocations[PSCONST_Gamma] = glGetUniformLocation(shader->Program, "Gamma"); + shader->ImageLocation = glGetUniformLocation(shader->Program, "Image"); + shader->PaletteLocation = glGetUniformLocation(shader->Program, "Palette"); + shader->NewScreenLocation = glGetUniformLocation(shader->Program, "NewScreen"); + shader->BurnLocation = glGetUniformLocation(shader->Program, "Burn"); *outShader = shader.release(); return true; @@ -361,7 +375,7 @@ bool OpenGLSWFrameBuffer::CreateIndexBuffer(int size, HWIndexBuffer **outIndexBu return true; } -bool OpenGLSWFrameBuffer::CreateTexture(int width, int height, int levels, int format, HWTexture **outTexture) +bool OpenGLSWFrameBuffer::CreateTexture(const FString &name, int width, int height, int levels, int format, HWTexture **outTexture) { auto obj = std::make_unique(); @@ -389,6 +403,8 @@ bool OpenGLSWFrameBuffer::CreateTexture(int width, int height, int levels, int f glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + FGLDebug::LabelObject(GL_TEXTURE, obj->Texture, name); + glBindTexture(GL_TEXTURE_2D, oldBinding); *outTexture = obj.release(); @@ -401,15 +417,32 @@ void OpenGLSWFrameBuffer::SetGammaRamp(const GammaRamp *ramp) void OpenGLSWFrameBuffer::SetPixelShaderConstantF(int uniformIndex, const float *data, int vec4fcount) { - glUniform4fv(uniformIndex, vec4fcount, data); + assert(uniformIndex < 4 && vec4fcount == 1); // This emulation of d3d9 only works for very simple stuff + for (int i = 0; i < 4; i++) + ShaderConstants[uniformIndex * 4 + i] = data[i]; + if (CurrentShader && CurrentShader->ConstantLocations[uniformIndex] != -1) + glUniform4fv(CurrentShader->ConstantLocations[uniformIndex], vec4fcount, data); } void OpenGLSWFrameBuffer::SetHWPixelShader(HWPixelShader *shader) { - if (shader) - glUseProgram(shader->Program); - else - glUseProgram(0); + if (shader != CurrentShader) + { + if (shader) + { + glUseProgram(shader->Program); + for (int i = 0; i < 4; i++) + { + if (shader->ConstantLocations[i] != -1) + glUniform4fv(shader->ConstantLocations[i], 1, &ShaderConstants[i * 4]); + } + } + else + { + glUseProgram(0); + } + } + CurrentShader = shader; } void OpenGLSWFrameBuffer::SetStreamSource(HWVertexBuffer *vertexBuffer) @@ -430,6 +463,8 @@ void OpenGLSWFrameBuffer::SetIndices(HWIndexBuffer *indexBuffer) void OpenGLSWFrameBuffer::DrawTriangleFans(int count, const FBVERTEX *vertices) { + count = 2 + count; + GLint oldBinding = 0; glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &oldBinding); @@ -500,7 +535,7 @@ void OpenGLSWFrameBuffer::DrawPoints(int count, const FBVERTEX *vertices) void OpenGLSWFrameBuffer::DrawLineList(int count) { - glDrawArrays(GL_LINES, 0, count); + glDrawArrays(GL_LINES, 0, count * 2); } void OpenGLSWFrameBuffer::DrawTriangleList(int minIndex, int numVertices, int startIndex, int primitiveCount) @@ -511,6 +546,8 @@ void OpenGLSWFrameBuffer::DrawTriangleList(int minIndex, int numVertices, int st void OpenGLSWFrameBuffer::Present() { SwapBuffers(); + glViewport(0, 0, GetClientWidth(), GetClientHeight()); + Debug->Update(); } //========================================================================== @@ -611,6 +648,13 @@ bool OpenGLSWFrameBuffer::LoadShaders() { break; } + + glUseProgram(Shaders[i]->Program); + if (Shaders[i]->ImageLocation != -1) glUniform1i(Shaders[i]->ImageLocation, 0); + if (Shaders[i]->PaletteLocation != -1) glUniform1i(Shaders[i]->PaletteLocation, 1); + if (Shaders[i]->NewScreenLocation != -1) glUniform1i(Shaders[i]->NewScreenLocation, 0); + if (Shaders[i]->BurnLocation != -1) glUniform1i(Shaders[i]->BurnLocation, 1); + glUseProgram(0); } if (i == NUM_SHADERS) { // Success! @@ -711,7 +755,7 @@ void OpenGLSWFrameBuffer::KillNativeTexs() bool OpenGLSWFrameBuffer::CreateFBTexture() { - CreateTexture(Width, Height, 1, GL_R8, &FBTexture); + CreateTexture("FBTexture", Width, Height, 1, GL_R8, &FBTexture); FBWidth = Width; FBHeight = Height; return true; @@ -725,7 +769,7 @@ bool OpenGLSWFrameBuffer::CreateFBTexture() bool OpenGLSWFrameBuffer::CreatePaletteTexture() { - if (!CreateTexture(256, 1, 1, GL_RGBA8, &PaletteTexture)) + if (!CreateTexture("PaletteTexture", 256, 1, 1, GL_RGBA8, &PaletteTexture)) { return false; } @@ -1118,7 +1162,7 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) GLint oldBinding = 0; glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); glBindTexture(GL_TEXTURE_2D, FBTexture->Texture); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, GL_R8, GL_UNSIGNED_BYTE, 0); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, GL_RED, GL_UNSIGNED_BYTE, 0); glBindTexture(GL_TEXTURE_2D, oldBinding); } @@ -1623,7 +1667,7 @@ OpenGLSWFrameBuffer::Atlas::Atlas(OpenGLSWFrameBuffer *fb, int w, int h, int for } *prev = this; - fb->CreateTexture(w, h, 1, format, &Tex); + fb->CreateTexture("Atlas", w, h, 1, format, &Tex); Width = w; Height = h; } @@ -1907,7 +1951,7 @@ bool OpenGLSWFrameBuffer::OpenGLTex::Update() glBindTexture(GL_TEXTURE_2D, Box->Owner->Tex->Texture); glTexSubImage2D(GL_TEXTURE_2D, 0, rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, format == GL_RGBA8 ? GL_BGRA : GL_RED, GL_UNSIGNED_BYTE, 0); glBindTexture(GL_TEXTURE_2D, oldBinding); - glBindTexture(GL_PIXEL_UNPACK_BUFFER, 0); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); return true; } @@ -1996,7 +2040,7 @@ OpenGLSWFrameBuffer::OpenGLPal::OpenGLPal(FRemapTable *remap, OpenGLSWFrameBuffe BorderColor = 0; RoundedPaletteSize = count; - if (fb->CreateTexture(count, 1, 1, GL_RGBA8, &Tex)) + if (fb->CreateTexture("Pal", count, 1, 1, GL_RGBA8, &Tex)) { if (!Update()) { @@ -3312,7 +3356,7 @@ void OpenGLSWFrameBuffer::SetColorOverlay(uint32_t color, float alpha, uint32_t } } -void OpenGLSWFrameBuffer::EnableAlphaTest(BOOL enabled) +void OpenGLSWFrameBuffer::EnableAlphaTest(bool enabled) { if (enabled != AlphaTestEnabled) { diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index eed9b6f7f4..2c50b2e97a 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -136,12 +136,18 @@ private: int Program = 0; int VertexShader = 0; int FragmentShader = 0; + + int ConstantLocations[4]; + int ImageLocation = -1; + int PaletteLocation = -1; + int NewScreenLocation = -1; + int BurnLocation = -1; }; bool CreatePixelShader(FString vertexsrc, FString fragmentsrc, const FString &defines, HWPixelShader **outShader); bool CreateVertexBuffer(int size, HWVertexBuffer **outVertexBuffer); bool CreateIndexBuffer(int size, HWIndexBuffer **outIndexBuffer); - bool CreateTexture(int width, int height, int levels, int format, HWTexture **outTexture); + bool CreateTexture(const FString &name, int width, int height, int levels, int format, HWTexture **outTexture); void SetGammaRamp(const GammaRamp *ramp); void SetPixelShaderConstantF(int uniformIndex, const float *data, int vec4fcount); void SetHWPixelShader(HWPixelShader *shader); @@ -154,7 +160,7 @@ private: void Present(); static uint32_t ColorARGB(uint32_t a, uint32_t r, uint32_t g, uint32_t b) { return ((a & 0xff) << 24) | ((r & 0xff) << 16) | ((g & 0xff) << 8) | ((b) & 0xff); } - static uint32_t ColorRGBA(uint32_t a, uint32_t r, uint32_t g, uint32_t b) { return ColorARGB(a, r, g, b); } + static uint32_t ColorRGBA(uint32_t r, uint32_t g, uint32_t b, uint32_t a) { return ColorARGB(a, r, g, b); } static uint32_t ColorXRGB(uint32_t r, uint32_t g, uint32_t b) { return ColorARGB(0xff, r, g, b); } static uint32_t ColorValue(float r, float g, float b, float a) { return ColorRGBA((uint32_t)(r * 255.0f), (uint32_t)(g * 255.0f), (uint32_t)(b * 255.0f), (uint32_t)(a * 255.0f)); } @@ -297,10 +303,10 @@ private: enum { - PSCONST_Desaturation = 1, - PSCONST_PaletteMod = 2, - PSCONST_Weights = 6, - PSCONST_Gamma = 7, + PSCONST_Desaturation = 0, + PSCONST_PaletteMod = 1, + PSCONST_Weights = 2, + PSCONST_Gamma = 3, }; enum { @@ -366,7 +372,7 @@ private: void EndBatch(); // State - void EnableAlphaTest(BOOL enabled); + void EnableAlphaTest(bool enabled); void SetAlphaBlend(int op, int srcblend = 0, int destblend = 0); void SetConstant(int cnum, float r, float g, float b, float a); void SetPixelShader(HWPixelShader *shader); @@ -377,13 +383,17 @@ private: template static void SafeRelease(T &x) { if (x != nullptr) { delete x; x = nullptr; } } - std::unique_ptr StreamVertexBuffer; + std::shared_ptr Debug; - BOOL AlphaTestEnabled; - BOOL AlphaBlendEnabled; - int AlphaBlendOp; - int AlphaSrcBlend; - int AlphaDestBlend; + std::unique_ptr StreamVertexBuffer; + float ShaderConstants[16]; + HWPixelShader *CurrentShader = nullptr; + + bool AlphaTestEnabled = false; + bool AlphaBlendEnabled = false; + int AlphaBlendOp = 0; + int AlphaSrcBlend = 0; + int AlphaDestBlend = 0; float Constant[3][4]; uint32_t CurBorderColor; HWPixelShader *CurPixelShader; diff --git a/wadsrc/static/shaders/glsl/swshader.fp b/wadsrc/static/shaders/glsl/swshader.fp index 22364f4605..639ea92e5e 100644 --- a/wadsrc/static/shaders/glsl/swshader.fp +++ b/wadsrc/static/shaders/glsl/swshader.fp @@ -5,15 +5,15 @@ in vec4 PixelTexCoord0; out vec4 FragColor; -uniform sampler2D Image;// : register(s0); -uniform sampler2D Palette;// : register(s1); -uniform sampler2D NewScreen;// : register(s0); -uniform sampler2D Burn;// : register(s1); +uniform sampler2D Image; +uniform sampler2D Palette; +uniform sampler2D NewScreen; +uniform sampler2D Burn; -uniform vec4 Desaturation;// : register(c1); // { Desat, 1 - Desat } -uniform vec4 PaletteMod;// : register(c2); -uniform vec4 Weights;// : register(c6); // RGB->Gray weighting { 77/256.0, 143/256.0, 37/256.0, 1 } -uniform vec4 Gamma;// : register(c7); +uniform vec4 Desaturation; // { Desat, 1 - Desat } +uniform vec4 PaletteMod; +uniform vec4 Weights; // RGB->Gray weighting { 77/256.0, 143/256.0, 37/256.0, 1 } +uniform vec4 Gamma; vec4 TextureLookup(vec2 tex_coord) { diff --git a/wadsrc/static/shaders/glsl/swshader.vp b/wadsrc/static/shaders/glsl/swshader.vp index 4d00296cba..35254a0420 100644 --- a/wadsrc/static/shaders/glsl/swshader.vp +++ b/wadsrc/static/shaders/glsl/swshader.vp @@ -10,8 +10,9 @@ out vec4 PixelTexCoord0; void main() { - gl_Position = AttrPosition; - PixelColor0 = AttrColor0; - PixelColor1 = AttrColor1; + gl_Position = vec4(AttrPosition.xy / vec2(1920*2,1080*2) * 2.0 - 1.0, 1.0, 1.0); + gl_Position.y = -gl_Position.y; + PixelColor0 = AttrColor0.bgra; + PixelColor1 = AttrColor1.bgra; PixelTexCoord0 = AttrTexCoord0; } From 01dc9de8d15cf5ea43b5742673fac926e4bd5223 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 11 Oct 2016 10:27:18 +0200 Subject: [PATCH 178/912] Misc bug fixes and performance improvements --- src/gl/system/gl_swframebuffer.cpp | 112 +++++++++++++++---------- src/gl/system/gl_swframebuffer.h | 59 ++++++++----- src/win32/hardware.cpp | 10 ++- wadsrc/static/shaders/glsl/swshader.vp | 4 +- 4 files changed, 113 insertions(+), 72 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 1ff48d13b6..d2f2e3aee6 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -130,14 +130,14 @@ OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, ogl_LoadFunctions(); } gl_LoadExtensions(); - Super::InitializeState(); - - Debug = std::make_shared(); - Debug->Update(); + InitializeState(); // SetVSync needs to be at the very top to workaround a bug in Nvidia's OpenGL driver. // If wglSwapIntervalEXT is called after glBindFramebuffer in a frame the setting is not changed! - //SetVSync(vid_vsync); + Super::SetVSync(vid_vsync); + + Debug = std::make_shared(); + Debug->Update(); VertexBuffer = nullptr; IndexBuffer = nullptr; @@ -216,7 +216,7 @@ OpenGLSWFrameBuffer::~OpenGLSWFrameBuffer() OpenGLSWFrameBuffer::HWTexture::~HWTexture() { if (Texture != 0) glDeleteTextures(1, (GLuint*)&Texture); - if (Buffer != 0) glDeleteBuffers(1, (GLuint*)&Buffer); + if (Buffers[0] != 0) glDeleteBuffers(2, (GLuint*)Buffers); } OpenGLSWFrameBuffer::HWVertexBuffer::~HWVertexBuffer() @@ -228,7 +228,7 @@ OpenGLSWFrameBuffer::HWVertexBuffer::~HWVertexBuffer() OpenGLSWFrameBuffer::FBVERTEX *OpenGLSWFrameBuffer::HWVertexBuffer::Lock() { glBindBuffer(GL_ARRAY_BUFFER, Buffer); - return (FBVERTEX*)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); + return (FBVERTEX*)glMapBufferRange(GL_ARRAY_BUFFER, 0, Size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); } void OpenGLSWFrameBuffer::HWVertexBuffer::Unlock() @@ -246,7 +246,7 @@ uint16_t *OpenGLSWFrameBuffer::HWIndexBuffer::Lock() { glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING, &LockedOldBinding); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, Buffer); - return (uint16_t*)glMapBuffer(GL_ELEMENT_ARRAY_BUFFER, GL_WRITE_ONLY); + return (uint16_t*)glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER, 0, Size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); } void OpenGLSWFrameBuffer::HWIndexBuffer::Unlock() @@ -321,6 +321,7 @@ bool OpenGLSWFrameBuffer::CreatePixelShader(FString vertexsrc, FString fragments shader->ConstantLocations[PSCONST_PaletteMod] = glGetUniformLocation(shader->Program, "PaletteMod"); shader->ConstantLocations[PSCONST_Weights] = glGetUniformLocation(shader->Program, "Weights"); shader->ConstantLocations[PSCONST_Gamma] = glGetUniformLocation(shader->Program, "Gamma"); + shader->ConstantLocations[PSCONST_ScreenSize] = glGetUniformLocation(shader->Program, "ScreenSize"); shader->ImageLocation = glGetUniformLocation(shader->Program, "Image"); shader->PaletteLocation = glGetUniformLocation(shader->Program, "Palette"); shader->NewScreenLocation = glGetUniformLocation(shader->Program, "NewScreen"); @@ -334,6 +335,8 @@ bool OpenGLSWFrameBuffer::CreateVertexBuffer(int size, HWVertexBuffer **outVerte { auto obj = std::make_unique(); + obj->Size = size; + GLint oldBinding = 0; glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &oldBinding); @@ -341,6 +344,7 @@ bool OpenGLSWFrameBuffer::CreateVertexBuffer(int size, HWVertexBuffer **outVerte glGenBuffers(1, (GLuint*)&obj->Buffer); glBindVertexArray(obj->VertexArray); glBindBuffer(GL_ARRAY_BUFFER, obj->Buffer); + FGLDebug::LabelObject(GL_BUFFER, obj->Buffer, "VertexBuffer"); glBufferData(GL_ARRAY_BUFFER, size, nullptr, GL_STREAM_DRAW); glEnableVertexAttribArray(0); glEnableVertexAttribArray(1); @@ -362,11 +366,14 @@ bool OpenGLSWFrameBuffer::CreateIndexBuffer(int size, HWIndexBuffer **outIndexBu { auto obj = std::make_unique(); + obj->Size = size; + GLint oldBinding = 0; glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING, &oldBinding); glGenBuffers(1, (GLuint*)&obj->Buffer); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, obj->Buffer); + FGLDebug::LabelObject(GL_BUFFER, obj->Buffer, "IndexBuffer"); glBufferData(GL_ELEMENT_ARRAY_BUFFER, size, nullptr, GL_STREAM_DRAW); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, oldBinding); @@ -417,7 +424,7 @@ void OpenGLSWFrameBuffer::SetGammaRamp(const GammaRamp *ramp) void OpenGLSWFrameBuffer::SetPixelShaderConstantF(int uniformIndex, const float *data, int vec4fcount) { - assert(uniformIndex < 4 && vec4fcount == 1); // This emulation of d3d9 only works for very simple stuff + assert(uniformIndex < NumPSCONST && vec4fcount == 1); // This emulation of d3d9 only works for very simple stuff for (int i = 0; i < 4; i++) ShaderConstants[uniformIndex * 4 + i] = data[i]; if (CurrentShader && CurrentShader->ConstantLocations[uniformIndex] != -1) @@ -431,7 +438,7 @@ void OpenGLSWFrameBuffer::SetHWPixelShader(HWPixelShader *shader) if (shader) { glUseProgram(shader->Program); - for (int i = 0; i < 4; i++) + for (int i = 0; i < NumPSCONST; i++) { if (shader->ConstantLocations[i] != -1) glUniform4fv(shader->ConstantLocations[i], 1, &ShaderConstants[i * 4]); @@ -547,6 +554,8 @@ void OpenGLSWFrameBuffer::Present() { SwapBuffers(); glViewport(0, 0, GetClientWidth(), GetClientHeight()); + float screensize[4] = { (float)GetClientWidth(), (float)GetClientHeight(), 1.0f, 1.0f }; + SetPixelShaderConstantF(PSCONST_ScreenSize, screensize, 1); Debug->Update(); } @@ -561,7 +570,7 @@ void OpenGLSWFrameBuffer::Present() void OpenGLSWFrameBuffer::SetInitialState() { - AlphaBlendEnabled = FALSE; + AlphaBlendEnabled = false; AlphaBlendOp = GL_FUNC_ADD; AlphaSrcBlend = 0; AlphaDestBlend = 0; @@ -583,7 +592,10 @@ void OpenGLSWFrameBuffer::SetInitialState() float weights[4] = { 77 / 256.f, 143 / 256.f, 37 / 256.f, 1 }; SetPixelShaderConstantF(PSCONST_Weights, weights, 1); - AlphaTestEnabled = FALSE; + float screensize[4] = { (float)GetClientWidth(), (float)GetClientHeight(), 1.0f, 1.0f }; + SetPixelShaderConstantF(PSCONST_ScreenSize, screensize, 1); + + AlphaTestEnabled = false; CurBorderColor = 0; @@ -1043,6 +1055,7 @@ void OpenGLSWFrameBuffer::Update() if (NeedPalUpdate) { UploadPalette(); + NeedPalUpdate = false; } BlitCycles.Reset(); @@ -1075,11 +1088,6 @@ void OpenGLSWFrameBuffer::Flip() DrawLetterbox(); DoWindowedGamma(); - // Limiting the frame rate is as simple as waiting for the timer to signal this event. - if (FPSLimitEvent != nullptr) - { - WaitForSingleObject(FPSLimitEvent, 1000); - } Present(); InScene = false; @@ -1130,18 +1138,21 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) { if (copy3d) { - if (FBTexture->Buffer == 0) + if (FBTexture->Buffers[0] == 0) { - glGenBuffers(1, (GLuint*)&FBTexture->Buffer); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffer); + glGenBuffers(2, (GLuint*)FBTexture->Buffers); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffers[0]); + glBufferData(GL_PIXEL_UNPACK_BUFFER, Width * Height, nullptr, GL_STREAM_DRAW); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffers[1]); glBufferData(GL_PIXEL_UNPACK_BUFFER, Width * Height, nullptr, GL_STREAM_DRAW); } else { - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffer); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffers[FBTexture->CurrentBuffer]); + FBTexture->CurrentBuffer = (FBTexture->CurrentBuffer + 1) & 1; } - uint8_t *dest = (uint8_t*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); + uint8_t *dest = (uint8_t*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, Width * Height, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); if (dest) { if (Pitch == Width) @@ -1178,7 +1189,7 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) SetPaletteTexture(PaletteTexture, 256, BorderColor); memset(Constant, 0, sizeof(Constant)); SetAlphaBlend(0); - EnableAlphaTest(FALSE); + EnableAlphaTest(false); SetPixelShader(Shaders[SHADER_NormalColorPal]); if (copy3d) { @@ -1252,7 +1263,7 @@ void OpenGLSWFrameBuffer::DoWindowedGamma() SetTexture(0, TempRenderTexture); SetPixelShader(Windowed && GammaShader ? GammaShader : Shaders[SHADER_NormalColor]); SetAlphaBlend(0); - EnableAlphaTest(FALSE); + EnableAlphaTest(false); DrawTriangleFans(2, verts); delete OldRenderTarget; OldRenderTarget = nullptr; @@ -1261,18 +1272,21 @@ void OpenGLSWFrameBuffer::DoWindowedGamma() void OpenGLSWFrameBuffer::UploadPalette() { - if (PaletteTexture->Buffer == 0) + if (PaletteTexture->Buffers[0] == 0) { - glGenBuffers(1, (GLuint*)&PaletteTexture->Buffer); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, PaletteTexture->Buffer); + glGenBuffers(2, (GLuint*)PaletteTexture->Buffers); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, PaletteTexture->Buffers[0]); + glBufferData(GL_PIXEL_UNPACK_BUFFER, 256 * 4, nullptr, GL_STREAM_DRAW); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, PaletteTexture->Buffers[1]); glBufferData(GL_PIXEL_UNPACK_BUFFER, 256 * 4, nullptr, GL_STREAM_DRAW); } else { - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, PaletteTexture->Buffer); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, PaletteTexture->Buffers[PaletteTexture->CurrentBuffer]); + PaletteTexture->CurrentBuffer = (PaletteTexture->CurrentBuffer + 1) & 1; } - uint8_t *pix = (uint8_t *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); + uint8_t *pix = (uint8_t*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, 256 * 4, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); if (pix) { int i; @@ -1358,6 +1372,7 @@ void OpenGLSWFrameBuffer::SetVSync(bool vsync) VSync = vsync; Reset(); } + Super::SetVSync(vsync); } void OpenGLSWFrameBuffer::NewRefreshRate() @@ -1510,7 +1525,7 @@ void OpenGLSWFrameBuffer::DrawPackedTextures(int packnum) BufferedTris *quad = &QuadExtra[QuadBatchPos]; FBVERTEX *vert = &VertexData[VertexPos]; - quad->Group1 = 0; + quad->ClearSetup(); if (pack->Format == GL_R8/* && !tex->IsGray*/) { quad->Flags = BQF_WrapUV | BQF_GamePalette/* | BQF_DisableAlphaTest*/; @@ -1885,8 +1900,8 @@ bool OpenGLSWFrameBuffer::OpenGLTex::Update() rect = Box->Area; - if (Box->Owner->Tex->Buffer == 0) - glGenBuffers(1, (GLuint*)&Box->Owner->Tex->Buffer); + if (Box->Owner->Tex->Buffers[0] == 0) + glGenBuffers(2, (GLuint*)Box->Owner->Tex->Buffers); int bytesPerPixel = 4; switch (format) @@ -1896,11 +1911,13 @@ bool OpenGLSWFrameBuffer::OpenGLTex::Update() default: return false; } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Box->Owner->Tex->Buffer); - glBufferData(GL_PIXEL_UNPACK_BUFFER, (rect.right - rect.left) * (rect.bottom - rect.top) * bytesPerPixel, nullptr, GL_STREAM_DRAW); + int buffersize = (rect.right - rect.left) * (rect.bottom - rect.top) * bytesPerPixel; + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Box->Owner->Tex->Buffers[Box->Owner->Tex->CurrentBuffer]); + glBufferData(GL_PIXEL_UNPACK_BUFFER, buffersize, nullptr, GL_STREAM_DRAW); + Box->Owner->Tex->CurrentBuffer = (Box->Owner->Tex->CurrentBuffer + 1) & 1; int pitch = (rect.right - rect.left) * bytesPerPixel; - uint8_t *bits = (uint8_t *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); + uint8_t *bits = (uint8_t *)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, buffersize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); dest = bits; if (!dest) { @@ -2088,18 +2105,21 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update() assert(Tex != nullptr); - if (Tex->Buffer == 0) + if (Tex->Buffers[0] == 0) { - glGenBuffers(1, (GLuint*)&Tex->Buffer); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Tex->Buffer); + glGenBuffers(2, (GLuint*)Tex->Buffers); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Tex->Buffers[0]); + glBufferData(GL_PIXEL_UNPACK_BUFFER, Remap->NumEntries * 4, nullptr, GL_STREAM_DRAW); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Tex->Buffers[1]); glBufferData(GL_PIXEL_UNPACK_BUFFER, Remap->NumEntries * 4, nullptr, GL_STREAM_DRAW); } else { - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Tex->Buffer); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Tex->Buffers[Tex->CurrentBuffer]); + Tex->CurrentBuffer = (Tex->CurrentBuffer + 1) & 1; } - buff = (uint32_t *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); + buff = (uint32_t *)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, Remap->NumEntries * 4, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); if (buff == nullptr) { return false; @@ -2625,7 +2645,7 @@ void OpenGLSWFrameBuffer::FlatFill(int left, int top, int right, int bottom, FTe BufferedTris *quad = &QuadExtra[QuadBatchPos]; FBVERTEX *vert = &VertexData[VertexPos]; - quad->Group1 = 0; + quad->ClearSetup(); if (tex->GetTexFormat() == GL_R8 && !tex->IsGray) { quad->Flags = BQF_WrapUV | BQF_GamePalette; // | BQF_DisableAlphaTest; @@ -2743,7 +2763,7 @@ void OpenGLSWFrameBuffer::FillSimplePoly(FTexture *texture, FVector2 *points, in color0 = 0; color1 = 0xFFFFFFFF; - quad->Group1 = 0; + quad->ClearSetup(); if (tex->GetTexFormat() == GL_R8 && !tex->IsGray) { quad->Flags = BQF_WrapUV | BQF_GamePalette | BQF_DisableAlphaTest; @@ -2830,7 +2850,7 @@ void OpenGLSWFrameBuffer::AddColorOnlyQuad(int left, int top, int width, int hei float x = float(left) - 0.5f; float y = float(top) - 0.5f + (GatheringWipeScreen ? 0 : LBOffset); - quad->Group1 = 0; + quad->ClearSetup(); quad->ShaderNum = BQS_ColorOnly; if ((color & 0xFF000000) != 0xFF000000) { @@ -3004,7 +3024,7 @@ void OpenGLSWFrameBuffer::EndQuadBatch() { const BufferedTris *q2 = &QuadExtra[j]; if (quad->Texture != q2->Texture || - quad->Group1 != q2->Group1 || + !quad->IsSameSetup(*q2) || quad->Palette != q2->Palette) { break; @@ -3371,7 +3391,7 @@ void OpenGLSWFrameBuffer::SetAlphaBlend(int op, int srcblend, int destblend) { // Disable alpha blend if (AlphaBlendEnabled) { - AlphaBlendEnabled = FALSE; + AlphaBlendEnabled = false; glDisable(GL_BLEND); } } @@ -3382,7 +3402,7 @@ void OpenGLSWFrameBuffer::SetAlphaBlend(int op, int srcblend, int destblend) if (!AlphaBlendEnabled) { - AlphaBlendEnabled = TRUE; + AlphaBlendEnabled = true; glEnable(GL_BLEND); } if (AlphaBlendOp != op) diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index 2c50b2e97a..fca5c0839b 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -78,7 +78,16 @@ private: uint32_t color0, color1; FLOAT tu, tv; }; - //#define D3DFVF_FBVERTEX (D3DFVF_XYZRHW | D3DFVF_DIFFUSE | D3DFVF_SPECULAR | D3DFVF_TEX1) + + enum + { + PSCONST_Desaturation, + PSCONST_PaletteMod, + PSCONST_Weights, + PSCONST_Gamma, + PSCONST_ScreenSize, + NumPSCONST + }; struct GammaRamp { @@ -93,10 +102,12 @@ private: class HWTexture { public: + HWTexture() { Buffers[0] = 0; Buffers[1] = 0; } ~HWTexture(); int Texture = 0; - int Buffer = 0; + int Buffers[2]; + int CurrentBuffer = 0; int WrapS = 0; int WrapT = 0; int Format = 0; @@ -112,6 +123,7 @@ private: int VertexArray = 0; int Buffer = 0; + int Size = 0; }; class HWIndexBuffer @@ -123,6 +135,7 @@ private: void Unlock(); int Buffer = 0; + int Size = 0; private: int LockedOldBinding = 0; @@ -137,7 +150,7 @@ private: int VertexShader = 0; int FragmentShader = 0; - int ConstantLocations[4]; + int ConstantLocations[NumPSCONST]; int ImageLocation = -1; int PaletteLocation = -1; int NewScreenLocation = -1; @@ -283,31 +296,33 @@ private: struct BufferedTris { - union - { - struct - { - uint8_t Flags; - uint8_t ShaderNum : 4; - int BlendOp; - int SrcBlend, DestBlend; - }; - uint32_t Group1; - }; + uint8_t Flags; + uint8_t ShaderNum; + int BlendOp; + int SrcBlend; + int DestBlend; + uint8_t Desat; OpenGLPal *Palette; HWTexture *Texture; uint16_t NumVerts; // Number of _unique_ vertices used by this set. uint16_t NumTris; // Number of triangles used by this set. + + void ClearSetup() + { + Flags = 0; + ShaderNum = 0; + BlendOp = 0; + SrcBlend = 0; + DestBlend = 0; + } + + bool IsSameSetup(const BufferedTris &other) const + { + return Flags == other.Flags && ShaderNum == other.ShaderNum && BlendOp == other.BlendOp && SrcBlend == other.SrcBlend && DestBlend == other.DestBlend; + } }; - enum - { - PSCONST_Desaturation = 0, - PSCONST_PaletteMod = 1, - PSCONST_Weights = 2, - PSCONST_Gamma = 3, - }; enum { SHADER_NormalColor, @@ -386,7 +401,7 @@ private: std::shared_ptr Debug; std::unique_ptr StreamVertexBuffer; - float ShaderConstants[16]; + float ShaderConstants[NumPSCONST * 4]; HWPixelShader *CurrentShader = nullptr; bool AlphaTestEnabled = false; diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 2065a9072d..1d6e654774 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -136,10 +136,14 @@ void I_InitGraphics () val.Bool = !!Args->CheckParm ("-devparm"); ticker.SetGenericRepDefault (val, CVAR_Bool); - //currentrenderer = vid_renderer; +//#define USE_D3D9_VIDEO +#ifdef USE_D3D9_VIDEO + if (currentrenderer == 1) Video = gl_CreateVideo(); + else Video = new Win32Video(0); +#else + currentrenderer = vid_renderer; Video = gl_CreateVideo(); - //if (currentrenderer==1) Video = gl_CreateVideo(); - //else Video = new Win32Video (0); +#endif if (Video == NULL) I_FatalError ("Failed to initialize display"); diff --git a/wadsrc/static/shaders/glsl/swshader.vp b/wadsrc/static/shaders/glsl/swshader.vp index 35254a0420..a95be1e1ac 100644 --- a/wadsrc/static/shaders/glsl/swshader.vp +++ b/wadsrc/static/shaders/glsl/swshader.vp @@ -8,9 +8,11 @@ out vec4 PixelColor0; out vec4 PixelColor1; out vec4 PixelTexCoord0; +uniform vec4 ScreenSize; + void main() { - gl_Position = vec4(AttrPosition.xy / vec2(1920*2,1080*2) * 2.0 - 1.0, 1.0, 1.0); + gl_Position = vec4(AttrPosition.xy / ScreenSize.xy * 2.0 - 1.0, 1.0, 1.0); gl_Position.y = -gl_Position.y; PixelColor0 = AttrColor0.bgra; PixelColor1 = AttrColor1.bgra; From 7911302ad85d204a8e286ce2282665e5474a9f33 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 11 Oct 2016 13:09:32 +0200 Subject: [PATCH 179/912] Hook in gamma shader --- src/gl/system/gl_swframebuffer.cpp | 104 ++++++++++++++++++++--------- src/gl/system/gl_swframebuffer.h | 15 ++++- src/win32/hardware.cpp | 1 - 3 files changed, 86 insertions(+), 34 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index d2f2e3aee6..4f8c257424 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -150,7 +150,6 @@ OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, { Shaders[i] = nullptr; } - GammaShader = nullptr; VSync = vid_vsync; BlendingRect.left = 0; BlendingRect.top = 0; @@ -213,6 +212,12 @@ OpenGLSWFrameBuffer::~OpenGLSWFrameBuffer() delete[] QuadExtra; } +OpenGLSWFrameBuffer::HWFrameBuffer::~HWFrameBuffer() +{ + if (Framebuffer != 0) glDeleteFramebuffers(1, (GLuint*)&Framebuffer); + delete Texture; +} + OpenGLSWFrameBuffer::HWTexture::~HWTexture() { if (Texture != 0) glDeleteTextures(1, (GLuint*)&Texture); @@ -262,6 +267,43 @@ OpenGLSWFrameBuffer::HWPixelShader::~HWPixelShader() if (FragmentShader != 0) glDeleteShader(FragmentShader); } +bool OpenGLSWFrameBuffer::CreateFrameBuffer(const FString &name, int width, int height, HWFrameBuffer **outFramebuffer) +{ + auto fb = std::make_unique(); + + if (!CreateTexture(name, width, height, 1, GL_RGBA16F, &fb->Texture)) + { + outFramebuffer = nullptr; + return false; + } + + glGenFramebuffers(1, (GLuint*)&fb->Framebuffer); + + GLint oldFramebufferBinding = 0, oldTextureBinding = 0; + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &oldFramebufferBinding); + glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldTextureBinding); + + glBindFramebuffer(GL_FRAMEBUFFER, fb->Framebuffer); + FGLDebug::LabelObject(GL_FRAMEBUFFER, fb->Framebuffer, name); + + glBindTexture(GL_TEXTURE_2D, fb->Texture->Texture); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb->Texture->Texture, 0); + + GLenum result = glCheckFramebufferStatus(GL_FRAMEBUFFER); + + glBindFramebuffer(GL_FRAMEBUFFER, oldFramebufferBinding); + glBindTexture(GL_TEXTURE_2D, oldTextureBinding); + + if (result != GL_FRAMEBUFFER_COMPLETE) + { + outFramebuffer = nullptr; + return false; + } + + *outFramebuffer = fb.release(); + return true; +} + bool OpenGLSWFrameBuffer::CreatePixelShader(FString vertexsrc, FString fragmentsrc, const FString &defines, HWPixelShader **outShader) { auto shader = std::make_unique(); @@ -398,6 +440,7 @@ bool OpenGLSWFrameBuffer::CreateTexture(const FString &name, int width, int heig { case GL_R8: srcformat = GL_RED; break; case GL_RGBA8: srcformat = GL_BGRA; break; + case GL_RGBA16F: srcformat = GL_RGBA; break; case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: srcformat = GL_RGB; break; case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: srcformat = GL_RGBA; break; case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: srcformat = GL_RGBA; break; @@ -552,11 +595,28 @@ void OpenGLSWFrameBuffer::DrawTriangleList(int minIndex, int numVertices, int st void OpenGLSWFrameBuffer::Present() { + glBindFramebuffer(GL_FRAMEBUFFER, 0); + + FBVERTEX verts[4]; + + CalcFullscreenCoords(verts, false, true, 0, 0xFFFFFFFF); + for (int i = 0; i < 4; i++) + verts[i].tv = 1.0f - verts[i].tv; + SetTexture(0, OutputFB->Texture); + SetPixelShader(Shaders[SHADER_GammaCorrection]); + SetAlphaBlend(0); + EnableAlphaTest(false); + DrawTriangleFans(2, verts); + SwapBuffers(); + Debug->Update(); + glViewport(0, 0, GetClientWidth(), GetClientHeight()); + float screensize[4] = { (float)GetClientWidth(), (float)GetClientHeight(), 1.0f, 1.0f }; SetPixelShaderConstantF(PSCONST_ScreenSize, screensize, 1); - Debug->Update(); + + glBindFramebuffer(GL_FRAMEBUFFER, OutputFB->Framebuffer); } //========================================================================== @@ -617,6 +677,11 @@ bool OpenGLSWFrameBuffer::CreateResources() { return false; } + + if (!CreateFrameBuffer("OutputFB", Width, Height, &OutputFB)) + return false; + glBindFramebuffer(GL_FRAMEBUFFER, OutputFB->Framebuffer); + if (!CreateFBTexture() || !CreatePaletteTexture()) { @@ -698,7 +763,6 @@ void OpenGLSWFrameBuffer::ReleaseResources() { SafeRelease(Shaders[i]); } - GammaShader = nullptr; if (ScreenWipe != nullptr) { delete ScreenWipe; @@ -720,11 +784,17 @@ void OpenGLSWFrameBuffer::ReleaseDefaultPoolItems() SafeRelease(InitialWipeScreen); SafeRelease(VertexBuffer); SafeRelease(IndexBuffer); + SafeRelease(OutputFB); } bool OpenGLSWFrameBuffer::Reset() { ReleaseDefaultPoolItems(); + + if (!CreateFrameBuffer("OutputFB", Width, Height, &OutputFB)) + return false; + glBindFramebuffer(GL_FRAMEBUFFER, OutputFB->Framebuffer); + if (!CreateFBTexture() || !CreateVertexes()) { return false; @@ -1086,7 +1156,6 @@ void OpenGLSWFrameBuffer::Flip() assert(InScene); DrawLetterbox(); - DoWindowedGamma(); Present(); InScene = false; @@ -1243,33 +1312,6 @@ void OpenGLSWFrameBuffer::DrawLetterbox() } } -//========================================================================== -// -// OpenGLSWFrameBuffer :: DoWindowedGamma -// -// Draws the render target texture to the real back buffer using a gamma- -// correcting pixel shader. -// -//========================================================================== - -void OpenGLSWFrameBuffer::DoWindowedGamma() -{ - /*if (OldRenderTarget != nullptr) - { - FBVERTEX verts[4]; - - CalcFullscreenCoords(verts, false, true, 0, 0xFFFFFFFF); - SetRenderTarget(0, OldRenderTarget); - SetTexture(0, TempRenderTexture); - SetPixelShader(Windowed && GammaShader ? GammaShader : Shaders[SHADER_NormalColor]); - SetAlphaBlend(0); - EnableAlphaTest(false); - DrawTriangleFans(2, verts); - delete OldRenderTarget; - OldRenderTarget = nullptr; - }*/ -} - void OpenGLSWFrameBuffer::UploadPalette() { if (PaletteTexture->Buffers[0] == 0) diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index fca5c0839b..716c77b9ca 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -113,6 +113,16 @@ private: int Format = 0; }; + class HWFrameBuffer + { + public: + ~HWFrameBuffer(); + + int Framebuffer = 0; + HWTexture *Texture = nullptr; + }; + + class HWVertexBuffer { public: @@ -157,6 +167,7 @@ private: int BurnLocation = -1; }; + bool CreateFrameBuffer(const FString &name, int width, int height, HWFrameBuffer **outFramebuffer); bool CreatePixelShader(FString vertexsrc, FString fragmentsrc, const FString &defines, HWPixelShader **outShader); bool CreateVertexBuffer(int size, HWVertexBuffer **outVertexBuffer); bool CreateIndexBuffer(int size, HWIndexBuffer **outIndexBuffer); @@ -376,7 +387,6 @@ private: bool SetStyle(OpenGLTex *tex, DrawParms &parms, uint32_t &color0, uint32_t &color1, BufferedTris &quad); static int GetStyleAlpha(int type); static void SetColorOverlay(uint32_t color, float alpha, uint32_t &color0, uint32_t &color1); - void DoWindowedGamma(); void AddColorOnlyQuad(int left, int top, int width, int height, uint32_t color); void AddColorOnlyRect(int left, int top, int width, int height, uint32_t color); void CheckQuadBatch(int numtris = 2, int numverts = 4); @@ -404,6 +414,8 @@ private: float ShaderConstants[NumPSCONST * 4]; HWPixelShader *CurrentShader = nullptr; + HWFrameBuffer *OutputFB = nullptr; + bool AlphaTestEnabled = false; bool AlphaBlendEnabled = false; int AlphaBlendOp = 0; @@ -455,7 +467,6 @@ private: enum { BATCH_None, BATCH_Quads, BATCH_Lines } BatchType; HWPixelShader *Shaders[NUM_SHADERS]; - HWPixelShader *GammaShader = nullptr; HWTexture *InitialWipeScreen = nullptr, *FinalWipeScreen = nullptr; diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 1d6e654774..095edc47e0 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -141,7 +141,6 @@ void I_InitGraphics () if (currentrenderer == 1) Video = gl_CreateVideo(); else Video = new Win32Video(0); #else - currentrenderer = vid_renderer; Video = gl_CreateVideo(); #endif From 3c7d6234cb8e357761aaca8a358ff9ce3fa6e664 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 11 Oct 2016 14:37:57 +0200 Subject: [PATCH 180/912] Add wipes --- src/CMakeLists.txt | 1 + src/gl/system/gl_swframebuffer.cpp | 59 ++- src/gl/system/gl_swframebuffer.h | 26 +- src/gl/system/gl_swwipe.cpp | 592 +++++++++++++++++++++++++ wadsrc/static/shaders/glsl/swshader.vp | 2 + 5 files changed, 669 insertions(+), 11 deletions(-) create mode 100644 src/gl/system/gl_swwipe.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dd241a3d7b..97bdfe81b0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1152,6 +1152,7 @@ set( FASTMATH_SOURCES gl/system/gl_interface.cpp gl/system/gl_framebuffer.cpp gl/system/gl_swframebuffer.cpp + gl/system/gl_swwipe.cpp gl/system/gl_debug.cpp gl/system/gl_menu.cpp gl/system/gl_wipe.cpp diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 4f8c257424..40d76aab17 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -461,6 +461,28 @@ bool OpenGLSWFrameBuffer::CreateTexture(const FString &name, int width, int heig return true; } +OpenGLSWFrameBuffer::HWTexture *OpenGLSWFrameBuffer::CopyCurrentScreen() +{ + auto obj = std::make_unique(); + obj->Format = GL_RGBA16F; + + GLint oldBinding = 0; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); + + glGenTextures(1, (GLuint*)&obj->Texture); + glBindTexture(GL_TEXTURE_2D, obj->Texture); + + glCopyTexImage2D(GL_TEXTURE_2D, 0, obj->Format, 0, 0, Width, Height, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + FGLDebug::LabelObject(GL_TEXTURE, obj->Texture, "CopyCurrentScreen"); + + glBindTexture(GL_TEXTURE_2D, oldBinding); + + return obj.release(); +} + void OpenGLSWFrameBuffer::SetGammaRamp(const GammaRamp *ramp) { } @@ -548,6 +570,39 @@ void OpenGLSWFrameBuffer::DrawTriangleFans(int count, const FBVERTEX *vertices) glBindVertexArray(oldBinding); } +void OpenGLSWFrameBuffer::DrawTriangleFans(int count, const BURNVERTEX *vertices) +{ + count = 2 + count; + + GLint oldBinding = 0; + glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &oldBinding); + + if (!StreamVertexBufferBurn) + { + StreamVertexBufferBurn = std::make_unique(); + glGenVertexArrays(1, (GLuint*)&StreamVertexBufferBurn->VertexArray); + glGenBuffers(1, (GLuint*)&StreamVertexBufferBurn->Buffer); + glBindVertexArray(StreamVertexBufferBurn->VertexArray); + glBindBuffer(GL_ARRAY_BUFFER, StreamVertexBufferBurn->Buffer); + glBufferData(GL_ARRAY_BUFFER, count * sizeof(BURNVERTEX), vertices, GL_STREAM_DRAW); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, sizeof(BURNVERTEX), (const GLvoid*)offsetof(BURNVERTEX, x)); + glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, sizeof(BURNVERTEX), (const GLvoid*)offsetof(BURNVERTEX, tu0)); + } + else + { + glBindVertexArray(StreamVertexBufferBurn->VertexArray); + glBindBuffer(GL_ARRAY_BUFFER, StreamVertexBufferBurn->Buffer); + glBufferData(GL_ARRAY_BUFFER, count * sizeof(BURNVERTEX), vertices, GL_STREAM_DRAW); + } + + glDrawArrays(GL_TRIANGLE_FAN, 0, count); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindVertexArray(oldBinding); +} + void OpenGLSWFrameBuffer::DrawPoints(int count, const FBVERTEX *vertices) { GLint oldBinding = 0; @@ -600,8 +655,8 @@ void OpenGLSWFrameBuffer::Present() FBVERTEX verts[4]; CalcFullscreenCoords(verts, false, true, 0, 0xFFFFFFFF); - for (int i = 0; i < 4; i++) - verts[i].tv = 1.0f - verts[i].tv; + //for (int i = 0; i < 4; i++) + // verts[i].tv = 1.0f - verts[i].tv; SetTexture(0, OutputFB->Texture); SetPixelShader(Shaders[SHADER_GammaCorrection]); SetAlphaBlend(0); diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index 716c77b9ca..1cf0288ce3 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -64,19 +64,26 @@ public: void DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) override; void DrawPixel(int x, int y, int palcolor, uint32 rgbcolor) override; void FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, double originx, double originy, double scalex, double scaley, DAngle rotation, FDynamicColormap *colormap, int lightlevel) override; - //bool WipeStartScreen(int type) override; - //void WipeEndScreen() override; - //bool WipeDo(int ticks) override; - //void WipeCleanup() override; + bool WipeStartScreen(int type) override; + void WipeEndScreen() override; + bool WipeDo(int ticks) override; + void WipeCleanup() override; bool Is8BitMode() override { return false; } int GetTrueHeight() override { return TrueHeight; } private: struct FBVERTEX { - FLOAT x, y, z, rhw; + float x, y, z, rhw; uint32_t color0, color1; - FLOAT tu, tv; + float tu, tv; + }; + + struct BURNVERTEX + { + float x, y, z, rhw; + float tu0, tv0; + float tu1, tv1; }; enum @@ -178,6 +185,7 @@ private: void SetStreamSource(HWVertexBuffer *vertexBuffer); void SetIndices(HWIndexBuffer *indexBuffer); void DrawTriangleFans(int count, const FBVERTEX *vertices); + void DrawTriangleFans(int count, const BURNVERTEX *vertices); void DrawPoints(int count, const FBVERTEX *vertices); void DrawLineList(int count); void DrawTriangleList(int minIndex, int numVertices, int startIndex, int primitiveCount); @@ -376,7 +384,7 @@ private: void UploadPalette(); void CalcFullscreenCoords(FBVERTEX verts[4], bool viewarea_only, bool can_double, uint32_t color0, uint32_t color1) const; bool Reset(); - HWTexture *GetCurrentScreen(); + HWTexture *CopyCurrentScreen(); void ReleaseDefaultPoolItems(); void KillNativePals(); void KillNativeTexs(); @@ -410,7 +418,7 @@ private: std::shared_ptr Debug; - std::unique_ptr StreamVertexBuffer; + std::unique_ptr StreamVertexBuffer, StreamVertexBufferBurn; float ShaderConstants[NumPSCONST * 4]; HWPixelShader *CurrentShader = nullptr; @@ -476,7 +484,7 @@ private: virtual ~Wiper(); virtual bool Run(int ticks, OpenGLSWFrameBuffer *fb) = 0; - //void DrawScreen(OpenGLSWFrameBuffer *fb, HWTexture *tex, int blendop = 0, uint32_t color0 = 0, uint32_t color1 = 0xFFFFFFF); + void DrawScreen(OpenGLSWFrameBuffer *fb, HWTexture *tex, int blendop = 0, uint32_t color0 = 0, uint32_t color1 = 0xFFFFFFF); }; class Wiper_Melt; friend class Wiper_Melt; diff --git a/src/gl/system/gl_swwipe.cpp b/src/gl/system/gl_swwipe.cpp new file mode 100644 index 0000000000..2f36272b6b --- /dev/null +++ b/src/gl/system/gl_swwipe.cpp @@ -0,0 +1,592 @@ +/* +** gl_swwipe.cpp +** Implements the different screen wipes using OpenGL calls. +** +**--------------------------------------------------------------------------- +** Copyright 1998-2008 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +*/ + +// HEADER FILES ------------------------------------------------------------ + +#include "gl/system/gl_system.h" +#include "files.h" +#include "m_swap.h" +#include "v_video.h" +#include "doomstat.h" +#include "m_png.h" +#include "m_crc32.h" +#include "vectors.h" +#include "v_palette.h" +#include "templates.h" + +#include "c_dispatch.h" +#include "templates.h" +#include "i_system.h" +#include "i_video.h" +#include "i_input.h" +#include "v_pfx.h" +#include "stats.h" +#include "doomerrors.h" +#include "r_main.h" +#include "r_data/r_translate.h" +#include "f_wipe.h" +#include "sbar.h" +#include "w_wad.h" +#include "r_data/colormaps.h" + +#include "gl/system/gl_interface.h" +#include "gl/system/gl_swframebuffer.h" +#include "gl/data/gl_data.h" +#include "gl/utility/gl_clock.h" +#include "gl/utility/gl_templates.h" +#include "gl/gl_functions.h" +#include "gl_debug.h" +#include "m_random.h" + +class OpenGLSWFrameBuffer::Wiper_Crossfade : public OpenGLSWFrameBuffer::Wiper +{ +public: + Wiper_Crossfade(); + bool Run(int ticks, OpenGLSWFrameBuffer *fb); + +private: + int Clock; +}; + +class OpenGLSWFrameBuffer::Wiper_Melt : public OpenGLSWFrameBuffer::Wiper +{ +public: + Wiper_Melt(); + bool Run(int ticks, OpenGLSWFrameBuffer *fb); + +private: + // Match the strip sizes that oldschool Doom used. + static const int WIDTH = 160, HEIGHT = 200; + int y[WIDTH]; +}; + +class OpenGLSWFrameBuffer::Wiper_Burn : public OpenGLSWFrameBuffer::Wiper +{ +public: + Wiper_Burn(OpenGLSWFrameBuffer *fb); + ~Wiper_Burn(); + bool Run(int ticks, OpenGLSWFrameBuffer *fb); + +private: + static const int WIDTH = 64, HEIGHT = 64; + uint8_t BurnArray[WIDTH * (HEIGHT + 5)]; + HWTexture *BurnTexture; + int Density; + int BurnTime; +}; + +//========================================================================== +// +// OpenGLSWFrameBuffer :: WipeStartScreen +// +// Called before the current screen has started rendering. This needs to +// save what was drawn the previous frame so that it can be animated into +// what gets drawn this frame. +// +// In fullscreen mode, we use GetFrontBufferData() to grab the data that +// is visible on screen right now. +// +// In windowed mode, we can't do that because we'll get the whole desktop. +// Instead, we can conveniently use the TempRenderTexture, which is normally +// used for gamma-correcting copying the image to the back buffer. +// +//========================================================================== + +bool OpenGLSWFrameBuffer::WipeStartScreen(int type) +{ + if (!Accel2D) + { + return Super::WipeStartScreen(type); + } + + switch (type) + { + case wipe_Melt: + ScreenWipe = new Wiper_Melt; + break; + + case wipe_Burn: + ScreenWipe = new Wiper_Burn(this); + break; + + case wipe_Fade: + ScreenWipe = new Wiper_Crossfade; + break; + + default: + return false; + } + + InitialWipeScreen = CopyCurrentScreen(); + + // Make even fullscreen model render to the TempRenderTexture, so + // we can have a copy of the new screen readily available. + GatheringWipeScreen = true; + return true; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: WipeEndScreen +// +// The screen we want to animate to has just been drawn. This function is +// called in place of Update(), so it has not been Presented yet. +// +//========================================================================== + +void OpenGLSWFrameBuffer::WipeEndScreen() +{ + if (!Accel2D) + { + Super::WipeEndScreen(); + return; + } + + // Don't do anything if there is no starting point. + if (InitialWipeScreen == NULL) + { + return; + } + + // If the whole screen was drawn without 2D accel, get it in to + // video memory now. + if (!In2D) + { + Begin2D(true); + } + + EndBatch(); // Make sure all batched primitives have been drawn. + + FinalWipeScreen = CopyCurrentScreen(); + + // At this point, InitialWipeScreen holds the screen we are wiping from. + // FinalWipeScreen holds the screen we are wiping to, which may be the + // same texture as TempRenderTexture. +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: WipeDo +// +// Perform the actual wipe animation. The number of tics since the last +// time this function was called is passed in. Returns true when the wipe +// is over. The first time this function has been called, the screen is +// still locked from before and EndScene() still has not been called. +// Successive times need to call BeginScene(). +// +//========================================================================== + +bool OpenGLSWFrameBuffer::WipeDo(int ticks) +{ + if (!Accel2D) + { + return Super::WipeDo(ticks); + } + + // Sanity checks. + if (InitialWipeScreen == NULL || FinalWipeScreen == NULL) + { + return true; + } + if (GatheringWipeScreen) + { // This is the first time we've been called for this wipe. + GatheringWipeScreen = false; + } + else + { // This is the second or later time we've been called for this wipe. + InScene = true; + } + + In2D = 3; + + EnableAlphaTest(false); + bool done = ScreenWipe->Run(ticks, this); + DrawLetterbox(); + return done; +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: WipeCleanup +// +// Release any resources that were specifically created for the wipe. +// +//========================================================================== + +void OpenGLSWFrameBuffer::WipeCleanup() +{ + if (ScreenWipe != NULL) + { + delete ScreenWipe; + ScreenWipe = NULL; + } + SafeRelease( InitialWipeScreen ); + SafeRelease( FinalWipeScreen ); + GatheringWipeScreen = false; + if (!Accel2D) + { + Super::WipeCleanup(); + return; + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Wiper Constructor +// +//========================================================================== + +OpenGLSWFrameBuffer::Wiper::~Wiper() +{ +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Wiper :: DrawScreen +// +// Draw either the initial or target screen completely to the screen. +// +//========================================================================== + +void OpenGLSWFrameBuffer::Wiper::DrawScreen(OpenGLSWFrameBuffer *fb, HWTexture *tex, + int blendop, uint32_t color0, uint32_t color1) +{ + FBVERTEX verts[4]; + + fb->CalcFullscreenCoords(verts, false, false, color0, color1); + fb->SetTexture(0, tex); + fb->SetAlphaBlend(blendop, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + fb->SetPixelShader(fb->Shaders[SHADER_NormalColor]); + fb->DrawTriangleFans(2, verts); +} + +// WIPE: CROSSFADE --------------------------------------------------------- + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Wiper_Crossfade Constructor +// +//========================================================================== + +OpenGLSWFrameBuffer::Wiper_Crossfade::Wiper_Crossfade() +: Clock(0) +{ +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Wiper_Crossfade :: Run +// +// Fades the old screen into the new one over 32 ticks. +// +//========================================================================== + +bool OpenGLSWFrameBuffer::Wiper_Crossfade::Run(int ticks, OpenGLSWFrameBuffer *fb) +{ + Clock += ticks; + + // Put the initial screen back to the buffer. + DrawScreen(fb, fb->InitialWipeScreen); + + // Draw the new screen on top of it. + DrawScreen(fb, fb->FinalWipeScreen, GL_FUNC_ADD, ColorValue(0,0,0,Clock / 32.f), ColorRGBA(255,255,255,0)); + + return Clock >= 32; +} + +// WIPE: MELT -------------------------------------------------------------- + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Wiper_Melt Constructor +// +//========================================================================== + +OpenGLSWFrameBuffer::Wiper_Melt::Wiper_Melt() +{ + int i, r; + + // setup initial column positions + // (y<0 => not ready to scroll yet) + y[0] = -(M_Random() & 15); + for (i = 1; i < WIDTH; ++i) + { + r = (M_Random()%3) - 1; + y[i] = clamp(y[i-1] + r, -15, 0); + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Wiper_Melt :: Run +// +// Fades the old screen into the new one over 32 ticks. +// +//========================================================================== + +bool OpenGLSWFrameBuffer::Wiper_Melt::Run(int ticks, OpenGLSWFrameBuffer *fb) +{ + // Draw the new screen on the bottom. + DrawScreen(fb, fb->FinalWipeScreen); + + int i, dy; + int fbwidth = fb->Width; + int fbheight = fb->Height; + bool done = true; + + // Copy the old screen in vertical strips on top of the new one. + while (ticks--) + { + done = true; + for (i = 0; i < WIDTH; i++) + { + if (y[i] < 0) + { + y[i]++; + done = false; + } + else if (y[i] < HEIGHT) + { + dy = (y[i] < 16) ? y[i]+1 : 8; + y[i] = MIN(y[i] + dy, HEIGHT); + done = false; + } + if (ticks == 0) + { // Only draw for the final tick. + RECT rect; + POINT dpt; + + dpt.x = i * fbwidth / WIDTH; + dpt.y = MAX(0, y[i] * fbheight / HEIGHT); + rect.left = dpt.x; + rect.top = 0; + rect.right = (i + 1) * fbwidth / WIDTH; + rect.bottom = fbheight - dpt.y; + if (rect.bottom > rect.top) + { + fb->CheckQuadBatch(); + + BufferedTris *quad = &fb->QuadExtra[fb->QuadBatchPos]; + FBVERTEX *vert = &fb->VertexData[fb->VertexPos]; + WORD *index = &fb->IndexData[fb->IndexPos]; + + quad->ClearSetup(); + quad->Flags = BQF_DisableAlphaTest; + quad->ShaderNum = BQS_Plain; + quad->Palette = NULL; + quad->Texture = fb->InitialWipeScreen; + quad->NumVerts = 4; + quad->NumTris = 2; + + // Fill the vertex buffer. + float u0 = rect.left / float(fb->FBWidth); + float v0 = 0; + float u1 = rect.right / float(fb->FBWidth); + float v1 = (rect.bottom - rect.top) / float(fb->FBHeight); + + float x0 = float(rect.left) - 0.5f; + float x1 = float(rect.right) - 0.5f; + float y0 = float(dpt.y + fb->LBOffsetI) - 0.5f; + float y1 = float(fbheight + fb->LBOffsetI) - 0.5f; + + vert[0].x = x0; + vert[0].y = y0; + vert[0].z = 0; + vert[0].rhw = 1; + vert[0].color0 = 0; + vert[0].color1 = 0xFFFFFFF; + vert[0].tu = u0; + vert[0].tv = v0; + + vert[1].x = x1; + vert[1].y = y0; + vert[1].z = 0; + vert[1].rhw = 1; + vert[1].color0 = 0; + vert[1].color1 = 0xFFFFFFF; + vert[1].tu = u1; + vert[1].tv = v0; + + vert[2].x = x1; + vert[2].y = y1; + vert[2].z = 0; + vert[2].rhw = 1; + vert[2].color0 = 0; + vert[2].color1 = 0xFFFFFFF; + vert[2].tu = u1; + vert[2].tv = v1; + + vert[3].x = x0; + vert[3].y = y1; + vert[3].z = 0; + vert[3].rhw = 1; + vert[3].color0 = 0; + vert[3].color1 = 0xFFFFFFF; + vert[3].tu = u0; + vert[3].tv = v1; + + // Fill the vertex index buffer. + index[0] = fb->VertexPos; + index[1] = fb->VertexPos + 1; + index[2] = fb->VertexPos + 2; + index[3] = fb->VertexPos; + index[4] = fb->VertexPos + 2; + index[5] = fb->VertexPos + 3; + + // Batch the quad. + fb->QuadBatchPos++; + fb->VertexPos += 4; + fb->IndexPos += 6; + } + } + } + } + fb->EndQuadBatch(); + return done; +} + +// WIPE: BURN -------------------------------------------------------------- + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Wiper_Burn Constructor +// +//========================================================================== + +OpenGLSWFrameBuffer::Wiper_Burn::Wiper_Burn(OpenGLSWFrameBuffer *fb) +{ + Density = 4; + BurnTime = 0; + memset(BurnArray, 0, sizeof(BurnArray)); + if (fb->Shaders[SHADER_BurnWipe] == NULL || !fb->CreateTexture("BurnWipe", WIDTH, HEIGHT, 1, GL_R8, &BurnTexture)) + { + BurnTexture = NULL; + } +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Wiper_Burn Destructor +// +//========================================================================== + +OpenGLSWFrameBuffer::Wiper_Burn::~Wiper_Burn() +{ + SafeRelease( BurnTexture ); +} + +//========================================================================== +// +// OpenGLSWFrameBuffer :: Wiper_Burn :: Run +// +//========================================================================== + +bool OpenGLSWFrameBuffer::Wiper_Burn::Run(int ticks, OpenGLSWFrameBuffer *fb) +{ + bool done; + + BurnTime += ticks; + ticks *= 2; + + // Make the fire burn + done = false; + while (!done && ticks--) + { + Density = wipe_CalcBurn(BurnArray, WIDTH, HEIGHT, Density); + done = (Density < 0); + } + + // Update the burn texture with the new burn data + + if (BurnTexture->Buffers[0] == 0) + { + glGenBuffers(2, (GLuint*)BurnTexture->Buffers); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, BurnTexture->Buffers[0]); + glBufferData(GL_PIXEL_UNPACK_BUFFER, WIDTH * HEIGHT, nullptr, GL_STREAM_DRAW); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, BurnTexture->Buffers[1]); + glBufferData(GL_PIXEL_UNPACK_BUFFER, WIDTH * HEIGHT, nullptr, GL_STREAM_DRAW); + } + else + { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, BurnTexture->Buffers[BurnTexture->CurrentBuffer]); + BurnTexture->CurrentBuffer = (BurnTexture->CurrentBuffer + 1) & 1; + } + + uint8_t *dest = (uint8_t*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, WIDTH * HEIGHT, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + if (dest) + { + memcpy(dest, BurnArray, WIDTH * HEIGHT); + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + + GLint oldBinding = 0; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); + glBindTexture(GL_TEXTURE_2D, BurnTexture->Texture); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, WIDTH, HEIGHT, GL_RED, GL_UNSIGNED_BYTE, 0); + glBindTexture(GL_TEXTURE_2D, oldBinding); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + } + + // Put the initial screen back to the buffer. + DrawScreen(fb, fb->InitialWipeScreen); + + // Burn the new screen on top of it. + float top = fb->LBOffset - 0.5f; + float right = float(fb->Width) - 0.5f; + float bot = float(fb->Height) + top; + float texright = float(fb->Width) / float(fb->FBWidth); + float texbot = float(fb->Height) / float(fb->FBHeight); + + BURNVERTEX verts[4] = + { + { -0.5f, top, 0.5f, 1.f, 0.f, 0.f, 0, 0 }, + { right, top, 0.5f, 1.f, texright, 0.f, 1, 0 }, + { right, bot, 0.5f, 1.f, texright, texbot, 1, 1 }, + { -0.5f, bot, 0.5f, 1.f, 0.f, texbot, 0, 1 } + }; + + fb->SetTexture(0, fb->FinalWipeScreen); + fb->SetTexture(1, BurnTexture); + fb->SetAlphaBlend(GL_FUNC_ADD, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + fb->SetPixelShader(fb->Shaders[SHADER_BurnWipe]); + glActiveTexture(GL_TEXTURE1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + fb->DrawTriangleFans(2, verts); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glActiveTexture(GL_TEXTURE0); + + // The fire may not always stabilize, so the wipe is forced to end + // after an arbitrary maximum time. + return done || (BurnTime > 40); +} diff --git a/wadsrc/static/shaders/glsl/swshader.vp b/wadsrc/static/shaders/glsl/swshader.vp index a95be1e1ac..a317025aaf 100644 --- a/wadsrc/static/shaders/glsl/swshader.vp +++ b/wadsrc/static/shaders/glsl/swshader.vp @@ -13,7 +13,9 @@ uniform vec4 ScreenSize; void main() { gl_Position = vec4(AttrPosition.xy / ScreenSize.xy * 2.0 - 1.0, 1.0, 1.0); +#if defined(EGAMMACORRECTION) gl_Position.y = -gl_Position.y; +#endif PixelColor0 = AttrColor0.bgra; PixelColor1 = AttrColor1.bgra; PixelTexCoord0 = AttrTexCoord0; From f37ee3a024ca08a76182e4b1168c4e97affa9821 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 11 Oct 2016 15:43:12 +0200 Subject: [PATCH 181/912] Add bgra support to OpenGLSWFrameBuffer --- src/gl/system/gl_framebuffer.cpp | 2 +- src/gl/system/gl_swframebuffer.cpp | 43 ++++++++++++++++++++---------- src/gl/system/gl_swframebuffer.h | 2 +- src/posix/cocoa/i_video.mm | 4 +-- src/posix/cocoa/sdlglvideo.h | 2 +- src/posix/sdl/sdlglvideo.cpp | 4 +-- src/posix/sdl/sdlglvideo.h | 2 +- src/win32/win32gliface.cpp | 8 +++--- src/win32/win32gliface.h | 4 +-- 9 files changed, 44 insertions(+), 27 deletions(-) diff --git a/src/gl/system/gl_framebuffer.cpp b/src/gl/system/gl_framebuffer.cpp index 6264352f6c..57e0cd679f 100644 --- a/src/gl/system/gl_framebuffer.cpp +++ b/src/gl/system/gl_framebuffer.cpp @@ -79,7 +79,7 @@ CUSTOM_CVAR(Int, vid_hwgamma, 2, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITC //========================================================================== OpenGLFrameBuffer::OpenGLFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen) : - Super(hMonitor, width, height, bits, refreshHz, fullscreen) + Super(hMonitor, width, height, bits, refreshHz, fullscreen, false) { // SetVSync needs to be at the very top to workaround a bug in Nvidia's OpenGL driver. // If wglSwapIntervalEXT is called after glBindFramebuffer in a frame the setting is not changed! diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 40d76aab17..286066fc96 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -120,8 +120,8 @@ const char *const OpenGLSWFrameBuffer::ShaderDefines[OpenGLSWFrameBuffer::NUM_SH "#define EGAMMACORRECTION", // GammaCorrection }; -OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen) : - Super(hMonitor, width, height, bits, refreshHz, fullscreen) +OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen, bool bgra) : + Super(hMonitor, width, height, bits, refreshHz, fullscreen, bgra) { // To do: this needs to cooperate with the same static in OpenGLFrameBuffer::InitializeState static bool first = true; @@ -892,7 +892,7 @@ void OpenGLSWFrameBuffer::KillNativeTexs() bool OpenGLSWFrameBuffer::CreateFBTexture() { - CreateTexture("FBTexture", Width, Height, 1, GL_R8, &FBTexture); + CreateTexture("FBTexture", Width, Height, 1, IsBgra() ? GL_RGBA8 : GL_R8, &FBTexture); FBWidth = Width; FBHeight = Height; return true; @@ -1262,13 +1262,16 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) { if (copy3d) { + int pixelsize = IsBgra() ? 4 : 1; + int size = Width * Height * pixelsize; + if (FBTexture->Buffers[0] == 0) { glGenBuffers(2, (GLuint*)FBTexture->Buffers); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffers[0]); - glBufferData(GL_PIXEL_UNPACK_BUFFER, Width * Height, nullptr, GL_STREAM_DRAW); + glBufferData(GL_PIXEL_UNPACK_BUFFER, size, nullptr, GL_STREAM_DRAW); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffers[1]); - glBufferData(GL_PIXEL_UNPACK_BUFFER, Width * Height, nullptr, GL_STREAM_DRAW); + glBufferData(GL_PIXEL_UNPACK_BUFFER, size, nullptr, GL_STREAM_DRAW); } else { @@ -1276,28 +1279,31 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) FBTexture->CurrentBuffer = (FBTexture->CurrentBuffer + 1) & 1; } - uint8_t *dest = (uint8_t*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, Width * Height, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + uint8_t *dest = (uint8_t*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); if (dest) { if (Pitch == Width) { - memcpy(dest, MemBuffer, Width * Height); + memcpy(dest, MemBuffer, Width * Height * pixelsize); } else { uint8_t *src = MemBuffer; for (int y = 0; y < Height; y++) { - memcpy(dest, src, Width); - dest += Width; - src += Pitch; + memcpy(dest, src, Width * pixelsize); + dest += Width * pixelsize; + src += Pitch * pixelsize; } } glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); GLint oldBinding = 0; glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); glBindTexture(GL_TEXTURE_2D, FBTexture->Texture); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, GL_RED, GL_UNSIGNED_BYTE, 0); + if (IsBgra()) + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, GL_BGRA, GL_UNSIGNED_BYTE, 0); + else + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, GL_RED, GL_UNSIGNED_BYTE, 0); glBindTexture(GL_TEXTURE_2D, oldBinding); } @@ -1314,7 +1320,10 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) memset(Constant, 0, sizeof(Constant)); SetAlphaBlend(0); EnableAlphaTest(false); - SetPixelShader(Shaders[SHADER_NormalColorPal]); + if (IsBgra()) + SetPixelShader(Shaders[SHADER_NormalColor]); + else + SetPixelShader(Shaders[SHADER_NormalColorPal]); if (copy3d) { FBVERTEX verts[4]; @@ -1330,7 +1339,10 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) { color0 = ColorValue(realfixedcolormap->ColorizeStart[0] / 2, realfixedcolormap->ColorizeStart[1] / 2, realfixedcolormap->ColorizeStart[2] / 2, 0); color1 = ColorValue(realfixedcolormap->ColorizeEnd[0] / 2, realfixedcolormap->ColorizeEnd[1] / 2, realfixedcolormap->ColorizeEnd[2] / 2, 1); - SetPixelShader(Shaders[SHADER_SpecialColormapPal]); + if (IsBgra()) + SetPixelShader(Shaders[SHADER_SpecialColormap]); + else + SetPixelShader(Shaders[SHADER_SpecialColormapPal]); } } else @@ -1341,7 +1353,10 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) CalcFullscreenCoords(verts, Accel2D, false, color0, color1); DrawTriangleFans(2, verts); } - SetPixelShader(Shaders[SHADER_NormalColorPal]); + if (IsBgra()) + SetPixelShader(Shaders[SHADER_NormalColor]); + else + SetPixelShader(Shaders[SHADER_NormalColorPal]); } //========================================================================== diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index 1cf0288ce3..96e3313891 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -29,7 +29,7 @@ class OpenGLFrameBuffer : public SDLGLFB public: explicit OpenGLSWFrameBuffer() {} - OpenGLSWFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen); + OpenGLSWFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen, bool bgra); ~OpenGLSWFrameBuffer(); bool IsValid() override; diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 7dc7a6eed9..8c773fce90 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -1126,8 +1126,8 @@ void CocoaFrameBuffer::Flip() // --------------------------------------------------------------------------- -SDLGLFB::SDLGLFB(void*, const int width, const int height, int, int, const bool fullscreen) -: DFrameBuffer(width, height, false) +SDLGLFB::SDLGLFB(void*, const int width, const int height, int, int, const bool fullscreen, bool bgra) +: DFrameBuffer(width, height, bgra) , m_lock(-1) , m_isUpdatePending(false) { diff --git a/src/posix/cocoa/sdlglvideo.h b/src/posix/cocoa/sdlglvideo.h index fcbf23f2ad..43f2c9c439 100644 --- a/src/posix/cocoa/sdlglvideo.h +++ b/src/posix/cocoa/sdlglvideo.h @@ -52,7 +52,7 @@ class SDLGLFB : public DFrameBuffer { public: // This must have the same parameters as the Windows version, even if they are not used! - SDLGLFB(void *hMonitor, int width, int height, int, int, bool fullscreen); + SDLGLFB(void *hMonitor, int width, int height, int, int, bool fullscreen, bool bgra); ~SDLGLFB(); virtual bool Lock(bool buffered = true); diff --git a/src/posix/sdl/sdlglvideo.cpp b/src/posix/sdl/sdlglvideo.cpp index 86371bd491..58cfc9a234 100644 --- a/src/posix/sdl/sdlglvideo.cpp +++ b/src/posix/sdl/sdlglvideo.cpp @@ -314,8 +314,8 @@ bool SDLGLVideo::InitHardware (bool allowsoftware, int multisample) // FrameBuffer implementation ----------------------------------------------- -SDLGLFB::SDLGLFB (void *, int width, int height, int, int, bool fullscreen) - : DFrameBuffer (width, height, false) +SDLGLFB::SDLGLFB (void *, int width, int height, int, int, bool fullscreen, bool bgra) + : DFrameBuffer (width, height, bgra) { int i; diff --git a/src/posix/sdl/sdlglvideo.h b/src/posix/sdl/sdlglvideo.h index 3b84f83c4e..01e70caac3 100644 --- a/src/posix/sdl/sdlglvideo.h +++ b/src/posix/sdl/sdlglvideo.h @@ -39,7 +39,7 @@ class SDLGLFB : public DFrameBuffer DECLARE_CLASS(SDLGLFB, DFrameBuffer) public: // this must have the same parameters as the Windows version, even if they are not used! - SDLGLFB (void *hMonitor, int width, int height, int, int, bool fullscreen); + SDLGLFB (void *hMonitor, int width, int height, int, int, bool fullscreen, bool bgra); ~SDLGLFB (); void ForceBuffering (bool force); diff --git a/src/win32/win32gliface.cpp b/src/win32/win32gliface.cpp index d46f45a02f..0079271a3e 100644 --- a/src/win32/win32gliface.cpp +++ b/src/win32/win32gliface.cpp @@ -381,7 +381,8 @@ DFrameBuffer *Win32GLVideo::CreateFrameBuffer(int width, int height, bool bgra, fb->m_Height == m_DisplayHeight && fb->m_Bits == m_DisplayBits && fb->m_RefreshHz == m_DisplayHz && - fb->m_Fullscreen == fs) + fb->m_Fullscreen == fs && + fb->m_Bgra == bgra) { return old; } @@ -391,7 +392,7 @@ DFrameBuffer *Win32GLVideo::CreateFrameBuffer(int width, int height, bool bgra, if (vid_renderer == 1) fb = new OpenGLFrameBuffer(m_hMonitor, m_DisplayWidth, m_DisplayHeight, m_DisplayBits, m_DisplayHz, fs); else - fb = new OpenGLSWFrameBuffer(m_hMonitor, m_DisplayWidth, m_DisplayHeight, m_DisplayBits, m_DisplayHz, fs); + fb = new OpenGLSWFrameBuffer(m_hMonitor, m_DisplayWidth, m_DisplayHeight, m_DisplayBits, m_DisplayHz, fs, bgra); return fb; } @@ -867,13 +868,14 @@ IMPLEMENT_ABSTRACT_CLASS(Win32GLFrameBuffer) // //========================================================================== -Win32GLFrameBuffer::Win32GLFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen) : BaseWinFB(width, height, false) +Win32GLFrameBuffer::Win32GLFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen, bool bgra) : BaseWinFB(width, height, bgra) { m_Width = width; m_Height = height; m_Bits = bits; m_RefreshHz = refreshHz; m_Fullscreen = fullscreen; + m_Bgra = bgra; m_Lock=0; RECT r; diff --git a/src/win32/win32gliface.h b/src/win32/win32gliface.h index d16c0dcc36..fbaf34bd57 100644 --- a/src/win32/win32gliface.h +++ b/src/win32/win32gliface.h @@ -101,7 +101,7 @@ public: Win32GLFrameBuffer() {} // Actually, hMonitor is a HMONITOR, but it's passed as a void * as there // look to be some cross-platform bits in the way. - Win32GLFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen); + Win32GLFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen, bool bgra); virtual ~Win32GLFrameBuffer(); @@ -143,7 +143,7 @@ protected: float m_Gamma, m_Brightness, m_Contrast; WORD m_origGamma[768]; BOOL m_supportsGamma; - bool m_Fullscreen; + bool m_Fullscreen, m_Bgra; int m_Width, m_Height, m_Bits, m_RefreshHz; int m_Lock; char m_displayDeviceNameBuffer[CCHDEVICENAME]; From 5e458866bc46c52e19de9fe9b7b75bdf34864dbe Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Tue, 11 Oct 2016 17:10:54 -0400 Subject: [PATCH 182/912] - Added CVAR handling for vid_used3d - allows to switch software mode canvas between OpenGL and Direct3D upon restart. --- src/win32/hardware.cpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index cf39975fd4..da1ced10fa 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -69,8 +69,16 @@ FRenderer *gl_CreateInterface(); void I_RestartRenderer(); int currentrenderer = -1; +int currentcanvas = -1; bool changerenderer; +// Software OpenGL canvas +CUSTOM_CVAR(Bool, vid_used3d, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +{ + if (self != currentcanvas) + Printf("You must restart " GAMENAME " for this change to take effect.\n"); +} + // [ZDoomGL] CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { @@ -137,13 +145,13 @@ void I_InitGraphics () val.Bool = !!Args->CheckParm ("-devparm"); ticker.SetGenericRepDefault (val, CVAR_Bool); -//#define USE_D3D9_VIDEO -#ifdef USE_D3D9_VIDEO - if (currentrenderer == 1) Video = gl_CreateVideo(); - else Video = new Win32Video(0); -#else - Video = gl_CreateVideo(); -#endif + if (currentcanvas == 1) // Software Canvas: 1 = D3D or DirectDraw, 0 = OpenGL + if (currentrenderer == 1) + Video = gl_CreateVideo(); + else + Video = new Win32Video(0); + else + Video = gl_CreateVideo(); if (Video == NULL) I_FatalError ("Failed to initialize display"); @@ -161,6 +169,7 @@ static void I_DeleteRenderer() void I_CreateRenderer() { currentrenderer = vid_renderer; + currentcanvas = vid_used3d; if (Renderer == NULL) { if (currentrenderer==1) Renderer = gl_CreateInterface(); From 13271cb967779605d59d917891167625e34eab6f Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Tue, 11 Oct 2016 17:42:46 -0400 Subject: [PATCH 183/912] - Created menus for all this render-switching insanity. --- wadsrc/static/language.enu | 12 ++++++++++++ wadsrc/static/menudef.txt | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index a662b719ec..c603864a7a 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -2145,6 +2145,12 @@ MODMNU_QUALITY = "Quality"; MODMNU_VOLUMERAMPING = "Volume ramping"; MODMNU_CHIPOMATIC = "Chip-o-matic"; +// Renderer Options +RNDMNU_TITLE = "CHANGE RENDERER"; +RNDMNU_RENDERER = "Hardware Acceleration"; +RNDMNU_TRUECOLOR = "Software Truecolor Mode"; +RNDMNU_CANVAS = "Software Canvas"; + // Video Options VIDMNU_TITLE = "VIDEO MODE"; VIDMNU_FULLSCREEN = "Fullscreen"; @@ -2152,6 +2158,7 @@ VIDMNU_HIDPI = "Retina/HiDPI support"; VIDMNU_ASPECTRATIO = "Aspect ratio"; VIDMNU_FORCEASPECT = "Force aspect ratio"; VIDMNU_5X4ASPECTRATIO = "Enable 5:4 aspect ratio"; +VIDMNU_CHANGERENDER = "Change Rendering Output"; VIDMNU_ENTERTEXT = "Press ENTER to set mode"; VIDMNU_TESTTEXT1 = "T to test mode for 5 seconds"; VIDMNU_TESTTEXT2 = "Please wait 5 seconds..."; @@ -2300,6 +2307,11 @@ OPTVAL_WARNINGS = "Warnings"; OPTVAL_NOTIFICATIONS = "Notifications"; OPTVAL_EVERYTHING = "Everything"; OPTVAL_FULLSCREENONLY = "Fullscreen only"; +OPTVAL_GL = "OpenGL"; +OPTVAL_D3D = "Direct3D"; +OPTVAL_HWPOLY = "OpenGL-Accelerated"; +OPTVAL_SWDOOM = "Doom Software Renderer"; + // Colors C_BRICK = "\cabrick"; C_TAN = "\cbtan"; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index cbe3fcb970..b26c4c4a56 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -1734,6 +1734,41 @@ OptionMenu ModReplayerOptions // the foo_dumb preferences in foobar2000. } +/*======================================= + * + * Change Renderer Menu + * + *=======================================*/ + +OptionValue "PolyDoom" +{ + 0, "$OPTVAL_SWDOOM" + 1, "$OPTVAL_HWPOLY" +} + +OptionValue "D3DGL" +{ + 0, "$OPTVAL_GL" + 1, "$OPTVAL_D3D" +} + +OptionValue "GLD3D" +{ + 0, "$OPTVAL_D3D" + 1, "$OPTVAL_GL" +} + +OptionMenu RendererMenu +{ + Title "$RNDMNU_TITLE" + Option "$RNDMNU_RENDERER", "vid_renderer", "PolyDoom" + Option "$RNDMNU_TRUECOLOR", "swtruecolor", "OnOff" + IfOption(Windows) + { + Option "$RNDMNU_CANVAS", "vid_used3d", "D3DGL" + } +} + /*======================================= * * Video mode menu @@ -1782,6 +1817,7 @@ OptionMenu VideoModeMenu Option "$VIDMNU_ASPECTRATIO", "menu_screenratios", "Ratios" Option "$VIDMNU_FORCEASPECT", "vid_aspect", "ForceRatios" Option "$VIDMNU_5X4ASPECTRATIO", "vid_tft", "YesNo" + Submenu "$VIDMNU_CHANGERENDER", "RendererMenu" StaticText " " ScreenResolution "res_0" ScreenResolution "res_1" From 698b05ee699b9c5afbfa0d7835f8a7a791109c49 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 12 Oct 2016 07:34:07 +0200 Subject: [PATCH 184/912] Fix pixel center and letter box issues --- src/gl/system/gl_swframebuffer.cpp | 203 +++++++++++++---------------- src/gl/system/gl_swframebuffer.h | 7 +- src/gl/system/gl_swwipe.cpp | 32 ++--- 3 files changed, 105 insertions(+), 137 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 40d76aab17..252153b54d 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -153,8 +153,8 @@ OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, VSync = vid_vsync; BlendingRect.left = 0; BlendingRect.top = 0; - BlendingRect.right = FBWidth; - BlendingRect.bottom = FBHeight; + BlendingRect.right = Width; + BlendingRect.bottom = Height; In2D = 0; Palettes = nullptr; Textures = nullptr; @@ -186,21 +186,6 @@ OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, //Windowed = !(static_cast(Video)->GoFullscreen(fullscreen)); TrueHeight = height; - /*if (fullscreen) - { - for (Win32Video::ModeInfo *mode = static_cast(Video)->m_Modes; mode != nullptr; mode = mode->next) - { - if (mode->width == Width && mode->height == Height) - { - TrueHeight = mode->realheight; - PixelDoubling = mode->doubling; - break; - } - } - }*/ - // Offset from top of screen to top of letterboxed screen - LBOffsetI = (TrueHeight - Height) / 2; - LBOffset = float(LBOffsetI); CreateResources(); SetInitialState(); @@ -650,28 +635,39 @@ void OpenGLSWFrameBuffer::DrawTriangleList(int minIndex, int numVertices, int st void OpenGLSWFrameBuffer::Present() { - glBindFramebuffer(GL_FRAMEBUFFER, 0); + int clientWidth = GetClientWidth(); + int clientHeight = GetClientHeight(); + if (clientWidth > 0 && clientHeight > 0) + { + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glViewport(0, 0, clientWidth, clientHeight); - FBVERTEX verts[4]; + float scale = MIN(clientWidth / (float)Width, clientHeight / (float)Height); + int letterboxWidth = (int)round(Width * scale); + int letterboxHeight = (int)round(Height * scale); + int letterboxX = (clientWidth - letterboxWidth) / 2; + int letterboxY = (clientHeight - letterboxHeight) / 2; - CalcFullscreenCoords(verts, false, true, 0, 0xFFFFFFFF); - //for (int i = 0; i < 4; i++) - // verts[i].tv = 1.0f - verts[i].tv; - SetTexture(0, OutputFB->Texture); - SetPixelShader(Shaders[SHADER_GammaCorrection]); - SetAlphaBlend(0); - EnableAlphaTest(false); - DrawTriangleFans(2, verts); + DrawLetterbox(letterboxX, letterboxY, letterboxWidth, letterboxHeight); + glViewport(letterboxX, letterboxY, letterboxWidth, letterboxHeight); + + FBVERTEX verts[4]; + CalcFullscreenCoords(verts, false, 0, 0xFFFFFFFF); + SetTexture(0, OutputFB->Texture); + SetPixelShader(Shaders[SHADER_GammaCorrection]); + SetAlphaBlend(0); + EnableAlphaTest(false); + DrawTriangleFans(2, verts); + } SwapBuffers(); Debug->Update(); - glViewport(0, 0, GetClientWidth(), GetClientHeight()); - - float screensize[4] = { (float)GetClientWidth(), (float)GetClientHeight(), 1.0f, 1.0f }; + float screensize[4] = { (float)Width, (float)Height, 1.0f, 1.0f }; SetPixelShaderConstantF(PSCONST_ScreenSize, screensize, 1); glBindFramebuffer(GL_FRAMEBUFFER, OutputFB->Framebuffer); + glViewport(0, 0, Width, Height); } //========================================================================== @@ -707,7 +703,7 @@ void OpenGLSWFrameBuffer::SetInitialState() float weights[4] = { 77 / 256.f, 143 / 256.f, 37 / 256.f, 1 }; SetPixelShaderConstantF(PSCONST_Weights, weights, 1); - float screensize[4] = { (float)GetClientWidth(), (float)GetClientHeight(), 1.0f, 1.0f }; + float screensize[4] = { (float)Width, (float)Height, 1.0f, 1.0f }; SetPixelShaderConstantF(PSCONST_ScreenSize, screensize, 1); AlphaTestEnabled = false; @@ -846,14 +842,14 @@ bool OpenGLSWFrameBuffer::Reset() { ReleaseDefaultPoolItems(); - if (!CreateFrameBuffer("OutputFB", Width, Height, &OutputFB)) - return false; - glBindFramebuffer(GL_FRAMEBUFFER, OutputFB->Framebuffer); - - if (!CreateFBTexture() || !CreateVertexes()) + if (!CreateFrameBuffer("OutputFB", Width, Height, &OutputFB) || !CreateFBTexture() || !CreateVertexes()) { return false; } + + glBindFramebuffer(GL_FRAMEBUFFER, OutputFB->Framebuffer); + glViewport(0, 0, Width, Height); + SetInitialState(); return true; } @@ -892,10 +888,7 @@ void OpenGLSWFrameBuffer::KillNativeTexs() bool OpenGLSWFrameBuffer::CreateFBTexture() { - CreateTexture("FBTexture", Width, Height, 1, GL_R8, &FBTexture); - FBWidth = Width; - FBHeight = Height; - return true; + return CreateTexture("FBTexture", Width, Height, 1, GL_R8, &FBTexture); } //========================================================================== @@ -906,11 +899,7 @@ bool OpenGLSWFrameBuffer::CreateFBTexture() bool OpenGLSWFrameBuffer::CreatePaletteTexture() { - if (!CreateTexture("PaletteTexture", 256, 1, 1, GL_RGBA8, &PaletteTexture)) - { - return false; - } - return true; + return CreateTexture("PaletteTexture", 256, 1, 1, GL_RGBA8, &PaletteTexture); } //========================================================================== @@ -942,35 +931,31 @@ bool OpenGLSWFrameBuffer::CreateVertexes() // //========================================================================== -void OpenGLSWFrameBuffer::CalcFullscreenCoords(FBVERTEX verts[4], bool viewarea_only, bool can_double, uint32_t color0, uint32_t color1) const +void OpenGLSWFrameBuffer::CalcFullscreenCoords(FBVERTEX verts[4], bool viewarea_only, uint32_t color0, uint32_t color1) const { - float offset = LBOffset;//OldRenderTarget != nullptr ? 0 : LBOffset; - float top = offset - 0.5f; - float texright = float(Width) / float(FBWidth); - float texbot = float(Height) / float(FBHeight); float mxl, mxr, myt, myb, tmxl, tmxr, tmyt, tmyb; if (viewarea_only) { // Just calculate vertices for the viewarea/BlendingRect - mxl = float(BlendingRect.left) - 0.5f; - mxr = float(BlendingRect.right) - 0.5f; - myt = float(BlendingRect.top) + top; - myb = float(BlendingRect.bottom) + top; - tmxl = float(BlendingRect.left) / float(Width) * texright; - tmxr = float(BlendingRect.right) / float(Width) * texright; - tmyt = float(BlendingRect.top) / float(Height) * texbot; - tmyb = float(BlendingRect.bottom) / float(Height) * texbot; + mxl = float(BlendingRect.left); + mxr = float(BlendingRect.right); + myt = float(BlendingRect.top); + myb = float(BlendingRect.bottom); + tmxl = float(BlendingRect.left) / float(Width); + tmxr = float(BlendingRect.right) / float(Width); + tmyt = float(BlendingRect.top) / float(Height); + tmyb = float(BlendingRect.bottom) / float(Height); } else { // Calculate vertices for the whole screen - mxl = -0.5f; - mxr = float(Width << (can_double ? PixelDoubling : 0)) - 0.5f; - myt = top; - myb = float(Height << (can_double ? PixelDoubling : 0)) + top; + mxl = 0.0f; + mxr = float(Width); + myt = 0.0f; + myb = float(Height); tmxl = 0; - tmxr = texright; + tmxr = 1.0f; tmyt = 0; - tmyb = texbot; + tmyb = 1.0f; } //{ mxl, myt, 0, 1, 0, 0xFFFFFFFF, tmxl, tmyt }, @@ -1210,8 +1195,6 @@ void OpenGLSWFrameBuffer::Flip() { assert(InScene); - DrawLetterbox(); - Present(); InScene = false; @@ -1225,8 +1208,6 @@ void OpenGLSWFrameBuffer::Flip() TrueHeight = Height; PixelDoubling = 0; - LBOffsetI = 0; - LBOffset = 0.0f; Reset(); V_OutputResized(Width, Height); @@ -1338,7 +1319,7 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) color0 = FlashColor0; color1 = FlashColor1; } - CalcFullscreenCoords(verts, Accel2D, false, color0, color1); + CalcFullscreenCoords(verts, Accel2D, color0, color1); DrawTriangleFans(2, verts); } SetPixelShader(Shaders[SHADER_NormalColorPal]); @@ -1353,18 +1334,36 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) // //========================================================================== -void OpenGLSWFrameBuffer::DrawLetterbox() +void OpenGLSWFrameBuffer::DrawLetterbox(int x, int y, int width, int height) { - if (LBOffsetI != 0) + int clientWidth = GetClientWidth(); + int clientHeight = GetClientHeight(); + if (clientWidth == 0 || clientHeight == 0) + return; + + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glEnable(GL_SCISSOR_TEST); + if (x > 0) { - glEnable(GL_SCISSOR_TEST); - glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - glScissor(0, 0, Width, LBOffsetI); + glScissor(0, 0, clientWidth, x); glClear(GL_COLOR_BUFFER_BIT); - glScissor(0, Height + LBOffsetI, Width, TrueHeight - Height + LBOffsetI); - glClear(GL_COLOR_BUFFER_BIT); - glDisable(GL_SCISSOR_TEST); } + if (clientHeight - x - height > 0) + { + glScissor(0, x + height, clientWidth, clientHeight - x - height); + glClear(GL_COLOR_BUFFER_BIT); + } + if (y > 0) + { + glScissor(0, x, y, height); + glClear(GL_COLOR_BUFFER_BIT); + } + if (clientWidth - y - width > 0) + { + glScissor(y + width, x, clientWidth - y - width, height); + glClear(GL_COLOR_BUFFER_BIT); + } + glDisable(GL_SCISSOR_TEST); } void OpenGLSWFrameBuffer::UploadPalette() @@ -1604,7 +1603,7 @@ void OpenGLSWFrameBuffer::DrawPackedTextures(int packnum) continue; } - AddColorOnlyRect(x - 1, y - 1 - LBOffsetI, 258, 258, ColorXRGB(255, 255, 0)); + AddColorOnlyRect(x - 1, y - 1, 258, 258, ColorXRGB(255, 255, 0)); int back = 0; for (PackedTexture *box = pack->UsedList; box != nullptr; box = box->Next) { @@ -1638,8 +1637,8 @@ void OpenGLSWFrameBuffer::DrawPackedTextures(int packnum) quad->NumVerts = 4; quad->NumTris = 2; - float x0 = float(x) - 0.5f; - float y0 = float(y) - 0.5f; + float x0 = float(x); + float y0 = float(y); float x1 = x0 + 256.f; float y1 = y0 + 256.f; @@ -1695,7 +1694,7 @@ void OpenGLSWFrameBuffer::DrawPackedTextures(int packnum) { x = 8; y += 256 + 8; - if (y > TrueHeight - 256) + if (y > Height - 256) { return; } @@ -2457,7 +2456,7 @@ void OpenGLSWFrameBuffer::DrawLine(int x0, int y0, int x1, int y1, int palcolor, } // Add the endpoints to the vertex buffer. VertexData[VertexPos].x = float(x0); - VertexData[VertexPos].y = float(y0) + LBOffset; + VertexData[VertexPos].y = float(y0); VertexData[VertexPos].z = 0; VertexData[VertexPos].rhw = 1; VertexData[VertexPos].color0 = color; @@ -2466,7 +2465,7 @@ void OpenGLSWFrameBuffer::DrawLine(int x0, int y0, int x1, int y1, int palcolor, VertexData[VertexPos].tv = 0; VertexData[VertexPos + 1].x = float(x1); - VertexData[VertexPos + 1].y = float(y1) + LBOffset; + VertexData[VertexPos + 1].y = float(y1); VertexData[VertexPos + 1].z = 0; VertexData[VertexPos + 1].rhw = 1; VertexData[VertexPos + 1].color0 = color; @@ -2548,7 +2547,6 @@ void OpenGLSWFrameBuffer::DrawTextureParms(FTexture *img, DrawParms &parms) double uscale = 1.f / tex->Box->Owner->Width; bool scissoring = false; FBVERTEX *vert; - float yoffs; if (parms.flipX) { @@ -2598,7 +2596,7 @@ void OpenGLSWFrameBuffer::DrawTextureParms(FTexture *img, DrawParms &parms) BeginQuadBatch(); } glEnable(GL_SCISSOR_TEST); - glScissor(parms.lclip, parms.uclip + LBOffsetI, parms.rclip - parms.lclip, parms.dclip - parms.uclip); + glScissor(parms.lclip, parms.uclip, parms.rclip - parms.lclip, parms.dclip - parms.uclip); } #endif parms.bilinear = false; @@ -2619,23 +2617,6 @@ void OpenGLSWFrameBuffer::DrawTextureParms(FTexture *img, DrawParms &parms) quad->NumTris = 2; quad->NumVerts = 4; - yoffs = GatheringWipeScreen ? 0.5f : 0.5f - LBOffset; - -#if 0 - // Coordinates are truncated to integers, because that's effectively - // what the software renderer does. The hardware will instead round - // to nearest, it seems. - x0 = floorf(x0) - 0.5f; - y0 = floorf(y0) - yoffs; - x1 = floorf(x1) - 0.5f; - y1 = floorf(y1) - yoffs; -#else - x0 = x0 - 0.5f; - y0 = y0 - yoffs; - x1 = x1 - 0.5f; - y1 = y1 - yoffs; -#endif - vert = &VertexData[VertexPos]; // Fill the vertex buffer. @@ -2719,7 +2700,6 @@ void OpenGLSWFrameBuffer::FlatFill(int left, int top, int right, int bottom, FTe { return; } - float yoffs = GatheringWipeScreen ? 0.5f : 0.5f - LBOffset; float x0 = float(left); float y0 = float(top); float x1 = float(right); @@ -2732,10 +2712,6 @@ void OpenGLSWFrameBuffer::FlatFill(int left, int top, int right, int bottom, FTe float v0 = (y0 - yo) * ith; float u1 = (x1 - xo) * itw; float v1 = (y1 - yo) * ith; - x0 -= 0.5f; - y0 -= yoffs; - x1 -= 0.5f; - y1 -= yoffs; CheckQuadBatch(); @@ -2824,7 +2800,7 @@ void OpenGLSWFrameBuffer::FillSimplePoly(FTexture *texture, FVector2 *points, in BufferedTris *quad; FBVERTEX *verts; OpenGLTex *tex; - float yoffs, uscale, vscale; + float uscale, vscale; int i, ipos; uint32_t color0, color1; float ox, oy; @@ -2890,7 +2866,6 @@ void OpenGLSWFrameBuffer::FillSimplePoly(FTexture *texture, FVector2 *points, in quad->NumVerts = npoints; quad->NumTris = npoints - 2; - yoffs = GatheringWipeScreen ? 0 : LBOffset; uscale = float(1.f / (texture->GetScaledWidth() * scalex)); vscale = float(1.f / (texture->GetScaledHeight() * scaley)); ox = float(originx); @@ -2899,13 +2874,13 @@ void OpenGLSWFrameBuffer::FillSimplePoly(FTexture *texture, FVector2 *points, in for (i = 0; i < npoints; ++i) { verts[i].x = points[i].X; - verts[i].y = points[i].Y + yoffs; + verts[i].y = points[i].Y; verts[i].z = 0; verts[i].rhw = 1; verts[i].color0 = color0; verts[i].color1 = color1; - float u = points[i].X - 0.5f - ox; - float v = points[i].Y - 0.5f - oy; + float u = points[i].X - ox; + float v = points[i].Y - oy; if (dorotate) { float t = u; @@ -2944,8 +2919,8 @@ void OpenGLSWFrameBuffer::AddColorOnlyQuad(int left, int top, int width, int hei quad = &QuadExtra[QuadBatchPos]; verts = &VertexData[VertexPos]; - float x = float(left) - 0.5f; - float y = float(top) - 0.5f + (GatheringWipeScreen ? 0 : LBOffset); + float x = float(left); + float y = float(top); quad->ClearSetup(); quad->ShaderNum = BQS_ColorOnly; diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index 1cf0288ce3..5876c34c6d 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -382,7 +382,7 @@ private: bool CreatePaletteTexture(); bool CreateVertexes(); void UploadPalette(); - void CalcFullscreenCoords(FBVERTEX verts[4], bool viewarea_only, bool can_double, uint32_t color0, uint32_t color1) const; + void CalcFullscreenCoords(FBVERTEX verts[4], bool viewarea_only, uint32_t color0, uint32_t color1) const; bool Reset(); HWTexture *CopyCurrentScreen(); void ReleaseDefaultPoolItems(); @@ -390,7 +390,7 @@ private: void KillNativeTexs(); PackedTexture *AllocPackedTexture(int width, int height, bool wrapping, int format); void DrawPackedTextures(int packnum); - void DrawLetterbox(); + void DrawLetterbox(int x, int y, int width, int height); void Draw3DPart(bool copy3d); bool SetStyle(OpenGLTex *tex, DrawParms &parms, uint32_t &color0, uint32_t &color1, BufferedTris &quad); static int GetStyleAlpha(int type); @@ -442,13 +442,10 @@ private: int FlashAmount; int TrueHeight; int PixelDoubling; - int LBOffsetI; - float LBOffset; float Gamma; bool UpdatePending; bool NeedPalUpdate; bool NeedGammaUpdate; - int FBWidth, FBHeight; bool VSync; LTRBRect BlendingRect; int In2D; diff --git a/src/gl/system/gl_swwipe.cpp b/src/gl/system/gl_swwipe.cpp index 2f36272b6b..148a087c5a 100644 --- a/src/gl/system/gl_swwipe.cpp +++ b/src/gl/system/gl_swwipe.cpp @@ -232,7 +232,6 @@ bool OpenGLSWFrameBuffer::WipeDo(int ticks) EnableAlphaTest(false); bool done = ScreenWipe->Run(ticks, this); - DrawLetterbox(); return done; } @@ -284,7 +283,7 @@ void OpenGLSWFrameBuffer::Wiper::DrawScreen(OpenGLSWFrameBuffer *fb, HWTexture * { FBVERTEX verts[4]; - fb->CalcFullscreenCoords(verts, false, false, color0, color1); + fb->CalcFullscreenCoords(verts, false, color0, color1); fb->SetTexture(0, tex); fb->SetAlphaBlend(blendop, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); fb->SetPixelShader(fb->Shaders[SHADER_NormalColor]); @@ -410,15 +409,15 @@ bool OpenGLSWFrameBuffer::Wiper_Melt::Run(int ticks, OpenGLSWFrameBuffer *fb) quad->NumTris = 2; // Fill the vertex buffer. - float u0 = rect.left / float(fb->FBWidth); + float u0 = rect.left / float(fb->Width); float v0 = 0; - float u1 = rect.right / float(fb->FBWidth); - float v1 = (rect.bottom - rect.top) / float(fb->FBHeight); + float u1 = rect.right / float(fb->Width); + float v1 = (rect.bottom - rect.top) / float(fb->Height); - float x0 = float(rect.left) - 0.5f; - float x1 = float(rect.right) - 0.5f; - float y0 = float(dpt.y + fb->LBOffsetI) - 0.5f; - float y1 = float(fbheight + fb->LBOffsetI) - 0.5f; + float x0 = float(rect.left); + float x1 = float(rect.right); + float y0 = float(dpt.y); + float y1 = float(fbheight); vert[0].x = x0; vert[0].y = y0; @@ -562,18 +561,15 @@ bool OpenGLSWFrameBuffer::Wiper_Burn::Run(int ticks, OpenGLSWFrameBuffer *fb) DrawScreen(fb, fb->InitialWipeScreen); // Burn the new screen on top of it. - float top = fb->LBOffset - 0.5f; - float right = float(fb->Width) - 0.5f; - float bot = float(fb->Height) + top; - float texright = float(fb->Width) / float(fb->FBWidth); - float texbot = float(fb->Height) / float(fb->FBHeight); + float right = float(fb->Width); + float bot = float(fb->Height); BURNVERTEX verts[4] = { - { -0.5f, top, 0.5f, 1.f, 0.f, 0.f, 0, 0 }, - { right, top, 0.5f, 1.f, texright, 0.f, 1, 0 }, - { right, bot, 0.5f, 1.f, texright, texbot, 1, 1 }, - { -0.5f, bot, 0.5f, 1.f, 0.f, texbot, 0, 1 } + { 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0, 0 }, + { right, 0.f, 0.f, 1.f, 1.f, 0.f, 1, 0 }, + { right, bot, 0.f, 1.f, 1.f, 1.f, 1, 1 }, + { 0.f, bot, 0.f, 1.f, 0.f, 1.f, 0, 1 } }; fb->SetTexture(0, fb->FinalWipeScreen); From 052f7900c23cd361a99e88461ed9e8b23db58eea Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 12 Oct 2016 08:04:42 +0200 Subject: [PATCH 185/912] Fix swapped x and y in DrawLetterbox --- src/gl/system/gl_swframebuffer.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 252153b54d..cffee9b915 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -1343,24 +1343,24 @@ void OpenGLSWFrameBuffer::DrawLetterbox(int x, int y, int width, int height) glClearColor(0.0f, 0.0f, 0.0f, 1.0f); glEnable(GL_SCISSOR_TEST); - if (x > 0) - { - glScissor(0, 0, clientWidth, x); - glClear(GL_COLOR_BUFFER_BIT); - } - if (clientHeight - x - height > 0) - { - glScissor(0, x + height, clientWidth, clientHeight - x - height); - glClear(GL_COLOR_BUFFER_BIT); - } if (y > 0) { - glScissor(0, x, y, height); + glScissor(0, 0, clientWidth, y); glClear(GL_COLOR_BUFFER_BIT); } - if (clientWidth - y - width > 0) + if (clientHeight - y - height > 0) { - glScissor(y + width, x, clientWidth - y - width, height); + glScissor(0, y + height, clientWidth, clientHeight - y - height); + glClear(GL_COLOR_BUFFER_BIT); + } + if (x > 0) + { + glScissor(0, y, x, height); + glClear(GL_COLOR_BUFFER_BIT); + } + if (clientWidth - x - width > 0) + { + glScissor(x + width, y, clientWidth - x - width, height); glClear(GL_COLOR_BUFFER_BIT); } glDisable(GL_SCISSOR_TEST); From c5ebfd9e65309744e6cb6e78fafef5d2b33834be Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 12 Oct 2016 08:05:31 +0200 Subject: [PATCH 186/912] Add vid_max_width and vid_max_height --- src/gl/system/gl_swframebuffer.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 78e1a5943c..344aa827a0 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -86,6 +86,15 @@ EXTERN_CVAR(Bool, vid_vsync) EXTERN_CVAR(Float, transsouls) EXTERN_CVAR(Int, vid_refreshrate) +CVAR(Int, vid_max_width, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) +CVAR(Int, vid_max_height, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) + +namespace +{ + int ClampWidth(int width) { return (vid_max_width == 0 || width < vid_max_width) ? width : vid_max_width; } + int ClampHeight(int height) { return (vid_max_height == 0 || height < vid_max_height) ? height : vid_max_height; } +} + extern cycle_t BlitCycles; void gl_LoadExtensions(); @@ -121,7 +130,7 @@ const char *const OpenGLSWFrameBuffer::ShaderDefines[OpenGLSWFrameBuffer::NUM_SH }; OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen, bool bgra) : - Super(hMonitor, width, height, bits, refreshHz, fullscreen, bgra) + Super(hMonitor, ClampWidth(width), ClampHeight(height), bits, refreshHz, fullscreen, bgra) { // To do: this needs to cooperate with the same static in OpenGLFrameBuffer::InitializeState static bool first = true; @@ -1200,8 +1209,8 @@ void OpenGLSWFrameBuffer::Flip() if (Windowed) { - int clientWidth = GetClientWidth(); - int clientHeight = GetClientHeight(); + int clientWidth = ClampWidth(GetClientWidth()); + int clientHeight = ClampHeight(GetClientHeight()); if (clientWidth > 0 && clientHeight > 0 && (Width != clientWidth || Height != clientHeight)) { Resize(clientWidth, clientHeight); From 27b432a9307811566706dbc2d5f5e07754b6caed Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 12 Oct 2016 13:25:05 +0200 Subject: [PATCH 187/912] Improve crash handling in drawers --- src/r_compiler/llvmdrawers.h | 7 +++ src/r_draw_rgba.cpp | 52 ++++++++++++++++ src/r_draw_rgba.h | 1 + src/r_drawt_rgba.cpp | 20 +++++++ src/r_thread.cpp | 111 ++++++++++++++++++++++++++++------- src/r_thread.h | 17 +++++- src/win32/i_crash.cpp | 34 +++++++++++ 7 files changed, 220 insertions(+), 22 deletions(-) diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 60a6c799ac..afb3cadf1b 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -41,6 +41,13 @@ struct DrawWallArgs simple_shade = 1, nearest_filter = 2 }; + + FString ToString() + { + FString info; + info.Format("dest_y = %i, count = %i, flags = %i", dest_y, count, flags); + return info; + } }; struct DrawSpanArgs diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 953a078580..eb840fa3b9 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -104,6 +104,8 @@ public: LLVMDrawers::Instance()->DrawSpan(&args); } + FString DebugInfo() override { return "DrawSpanLLVMCommand"; } + protected: DrawSpanArgs args; @@ -247,6 +249,11 @@ public: WorkerThreadData d = ThreadData(thread); LLVMDrawers::Instance()->vlinec4(&args, &d); } + + FString DebugInfo() override + { + return "DrawWall4LLVMCommand\n" + args.ToString(); + } }; class DrawWall1LLVMCommand : public DrawerCommand @@ -301,6 +308,11 @@ public: WorkerThreadData d = ThreadData(thread); LLVMDrawers::Instance()->vlinec1(&args, &d); } + + FString DebugInfo() override + { + return "DrawWall1LLVMCommand\n" + args.ToString(); + } }; class DrawColumnLLVMCommand : public DrawerCommand @@ -318,6 +330,11 @@ protected: return d; } + FString DebugInfo() override + { + return "DrawColumnLLVMCommand"; + } + public: DrawColumnLLVMCommand() { @@ -510,6 +527,11 @@ public: *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } } + + FString DebugInfo() override + { + return "DrawFuzzColumnRGBACommand"; + } }; class FillSpanRGBACommand : public DrawerCommand @@ -544,6 +566,11 @@ public: for (int i = 0; i < count; i++) dest[i] = color; } + + FString DebugInfo() override + { + return "FillSpanRGBACommand"; + } }; ///////////////////////////////////////////////////////////////////////////// @@ -663,6 +690,11 @@ public: dy--; } } + + FString DebugInfo() override + { + return "DrawSlabRGBACommand"; + } }; ///////////////////////////////////////////////////////////////////////////// @@ -737,6 +769,11 @@ public: dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; } while (++x <= x2); } + + FString DebugInfo() override + { + return "DrawFogBoundaryLineRGBACommand"; + } }; class DrawTiltedSpanRGBACommand : public DrawerCommand @@ -886,6 +923,11 @@ public: count--; } } + + FString DebugInfo() override + { + return "DrawTiltedSpanRGBACommand"; + } }; class DrawColoredSpanRGBACommand : public DrawerCommand @@ -925,6 +967,11 @@ public: for (int i = 0; i < count; i++) dest[i] = color; } + + FString DebugInfo() override + { + return "DrawColoredSpanRGBACommand"; + } }; class FillTransColumnRGBACommand : public DrawerCommand @@ -992,6 +1039,11 @@ public: dest += spacing; } } + + FString DebugInfo() override + { + return "FillTransColumnRGBACommand"; + } }; ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index d3ad0613ab..253315f149 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -137,6 +137,7 @@ class ApplySpecialColormapRGBACommand : public DrawerCommand public: ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "ApplySpecialColormapRGBACommand"; } }; template diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 18ae228e4e..b4f70592ee 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -104,6 +104,11 @@ public: WorkerThreadData d = ThreadData(thread); LLVMDrawers::Instance()->DrawColumnRt1(&args, &d); } + + FString DebugInfo() override + { + return "DrawColumnRt1LLVMCommand"; + } }; #define DECLARE_DRAW_COMMAND(name, func, base) \ @@ -158,6 +163,11 @@ public: { thread->dc_temp_rgba = buff == NULL ? thread->dc_temp_rgbabuff_rgba : (uint32_t*)buff; } + + FString DebugInfo() override + { + return "RtInitColsRGBACommand"; + } }; template @@ -233,6 +243,11 @@ public: dest += 32; } while (--count); } + + FString DebugInfo() override + { + return "DrawColumnHorizRGBACommand"; + } }; class FillColumnHorizRGBACommand : public DrawerCommand @@ -278,6 +293,11 @@ public: dest += 8; } while (--count); } + + FString DebugInfo() override + { + return "FillColumnHorizRGBACommand"; + } }; ///////////////////////////////////////////////////////////////////////////// diff --git a/src/r_thread.cpp b/src/r_thread.cpp index dec0b8c6ca..4f10bd8bb7 100644 --- a/src/r_thread.cpp +++ b/src/r_thread.cpp @@ -97,26 +97,50 @@ void DrawerCommandQueue::Finish() thread.core = 0; thread.num_cores = (int)(queue->threads.size() + 1); - for (int pass = 0; pass < queue->num_passes; pass++) + struct TryCatchData { - thread.pass_start_y = pass * queue->rows_in_pass; - thread.pass_end_y = (pass + 1) * queue->rows_in_pass; - if (pass + 1 == queue->num_passes) - thread.pass_end_y = MAX(thread.pass_end_y, MAXHEIGHT); + DrawerCommandQueue *queue; + DrawerThread *thread; + size_t command_index; + } data; - size_t size = queue->active_commands.size(); - for (size_t i = 0; i < size; i++) + data.queue = queue; + data.thread = &thread; + data.command_index = 0; + VectoredTryCatch(&data, + [](void *data) + { + TryCatchData *d = (TryCatchData*)data; + + for (int pass = 0; pass < d->queue->num_passes; pass++) { - auto &command = queue->active_commands[i]; - command->Execute(&thread); + d->thread->pass_start_y = pass * d->queue->rows_in_pass; + d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass; + if (pass + 1 == d->queue->num_passes) + d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT); + + size_t size = d->queue->active_commands.size(); + for (d->command_index = 0; d->command_index < size; d->command_index++) + { + auto &command = d->queue->active_commands[d->command_index]; + command->Execute(d->thread); + } } - } + }, + [](void *data) + { + TryCatchData *d = (TryCatchData*)data; + ReportFatalError(d->queue->active_commands[d->command_index], true); + }); // Wait for everyone to finish: std::unique_lock end_lock(queue->end_mutex); queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); + if (!queue->thread_error.IsEmpty()) + I_FatalError("Fatal drawer error: %s", queue->thread_error.GetChars()); + // Clean up batch: for (auto &command : queue->active_commands) @@ -157,20 +181,42 @@ void DrawerCommandQueue::StartThreads() start_lock.unlock(); // Do the work: - for (int pass = 0; pass < queue->num_passes; pass++) - { - thread->pass_start_y = pass * queue->rows_in_pass; - thread->pass_end_y = (pass + 1) * queue->rows_in_pass; - if (pass + 1 == queue->num_passes) - thread->pass_end_y = MAX(thread->pass_end_y, MAXHEIGHT); - size_t size = queue->active_commands.size(); - for (size_t i = 0; i < size; i++) + struct TryCatchData + { + DrawerCommandQueue *queue; + DrawerThread *thread; + size_t command_index; + } data; + + data.queue = queue; + data.thread = thread; + data.command_index = 0; + VectoredTryCatch(&data, + [](void *data) + { + TryCatchData *d = (TryCatchData*)data; + + for (int pass = 0; pass < d->queue->num_passes; pass++) { - auto &command = queue->active_commands[i]; - command->Execute(thread); + d->thread->pass_start_y = pass * d->queue->rows_in_pass; + d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass; + if (pass + 1 == d->queue->num_passes) + d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT); + + size_t size = d->queue->active_commands.size(); + for (d->command_index = 0; d->command_index < size; d->command_index++) + { + auto &command = d->queue->active_commands[d->command_index]; + command->Execute(d->thread); + } } - } + }, + [](void *data) + { + TryCatchData *d = (TryCatchData*)data; + ReportFatalError(d->queue->active_commands[d->command_index], true); + }); // Notify main thread that we finished: std::unique_lock end_lock(queue->end_mutex); @@ -194,3 +240,26 @@ void DrawerCommandQueue::StopThreads() lock.lock(); shutdown_flag = false; } + +void DrawerCommandQueue::ReportFatalError(DrawerCommand *command, bool worker_thread) +{ + if (worker_thread) + { + std::unique_lock end_lock(Instance()->end_mutex); + if (Instance()->thread_error.IsEmpty()) + Instance()->thread_error = command->DebugInfo(); + } + else + { + I_FatalError("Fatal drawer error: %s", command->DebugInfo().GetChars()); + } +} + +#ifndef WIN32 + +void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data)) +{ + tryBlock(data); +} + +#endif diff --git a/src/r_thread.h b/src/r_thread.h index 312c5ad226..5bb4132401 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -79,8 +79,11 @@ public: } virtual void Execute(DrawerThread *thread) = 0; + virtual FString DebugInfo() = 0; }; +void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data)); + // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue { @@ -101,6 +104,7 @@ class DrawerCommandQueue std::mutex end_mutex; std::condition_variable end_condition; size_t finished_threads = 0; + FString thread_error; int threaded_render = 0; DrawerThread single_core_thread; @@ -112,6 +116,7 @@ class DrawerCommandQueue void Finish(); static DrawerCommandQueue *Instance(); + static void ReportFatalError(DrawerCommand *command, bool worker_thread); DrawerCommandQueue(); ~DrawerCommandQueue(); @@ -128,7 +133,17 @@ public: if (queue->threaded_render == 0 || !r_multithreaded) { T command(std::forward(args)...); - command.Execute(&queue->single_core_thread); + VectoredTryCatch(&command, + [](void *data) + { + T *c = (T*)data; + c->Execute(&Instance()->single_core_thread); + }, + [](void *data) + { + T *c = (T*)data; + ReportFatalError(c, false); + }); } else { diff --git a/src/win32/i_crash.cpp b/src/win32/i_crash.cpp index d4804ec0e9..373f902c55 100644 --- a/src/win32/i_crash.cpp +++ b/src/win32/i_crash.cpp @@ -3399,3 +3399,37 @@ void DisplayCrashLog () } CloseTarFiles (); } + +///////////////////////////////////////////////////////////////////////////// + +namespace +{ + bool __declspec(thread) DrawerExceptionSetJumpResult; + CONTEXT __declspec(thread) DrawerExceptionSetJumpContext; + PVOID __declspec(thread) DrawerExceptionHandlerHandle; + + LONG WINAPI DrawerExceptionHandler(_EXCEPTION_POINTERS *exceptionInfo) + { + //RtlRestoreContext(&DrawerExceptionSetJumpContext, exceptionInfo->ExceptionRecord); + *exceptionInfo->ContextRecord = DrawerExceptionSetJumpContext; + return EXCEPTION_CONTINUE_EXECUTION; + } +} + +void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data)) +{ + DrawerExceptionSetJumpResult = false; + RtlCaptureContext(&DrawerExceptionSetJumpContext); + if (DrawerExceptionSetJumpResult) + { + RemoveVectoredExceptionHandler(DrawerExceptionHandlerHandle); + catchBlock(data); + } + else + { + DrawerExceptionSetJumpResult = true; + DrawerExceptionHandlerHandle = AddVectoredExceptionHandler(1, DrawerExceptionHandler); + tryBlock(data); + RemoveVectoredExceptionHandler(DrawerExceptionHandlerHandle); + } +} From b5b96ee222d0d7e3e2c71589bd58d3634238f513 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 12 Oct 2016 13:49:12 +0200 Subject: [PATCH 188/912] Add a little bit more debug information --- src/r_compiler/llvmdrawers.h | 14 ++++++++++++++ src/r_draw_rgba.cpp | 7 +++++-- src/r_drawt_rgba.cpp | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index afb3cadf1b..7f5f6658e2 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -83,6 +83,13 @@ struct DrawSpanArgs simple_shade = 1, nearest_filter = 2 }; + + FString ToString() + { + FString info; + info.Format("x1 = %i, x2 = %i, y = %i, flags = %i", x1, x2, y, flags); + return info; + } }; struct DrawColumnArgs @@ -117,6 +124,13 @@ struct DrawColumnArgs { simple_shade = 1 }; + + FString ToString() + { + FString info; + info.Format("dest_y = %i, count = %i, flags = %i", dest_y, count, flags); + return info; + } }; class LLVMDrawers diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index eb840fa3b9..5114b95f06 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -104,7 +104,10 @@ public: LLVMDrawers::Instance()->DrawSpan(&args); } - FString DebugInfo() override { return "DrawSpanLLVMCommand"; } + FString DebugInfo() override + { + return "DrawSpanLLVMCommand\n" + args.ToString(); + } protected: DrawSpanArgs args; @@ -332,7 +335,7 @@ protected: FString DebugInfo() override { - return "DrawColumnLLVMCommand"; + return "DrawColumnLLVMCommand\n" + args.ToString(); } public: diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index b4f70592ee..bf957df3de 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -107,7 +107,7 @@ public: FString DebugInfo() override { - return "DrawColumnRt1LLVMCommand"; + return "DrawColumnRt1LLVMCommand\n" + args.ToString(); } }; From fbbdd403f19eb93c4c7747d1b000c7bea3677ec6 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 12 Oct 2016 14:37:57 -0400 Subject: [PATCH 189/912] - Implemented r_linearsky for dual-layer skies --- src/r_plane.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index cfc30e1c7b..f016c9e042 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -888,9 +888,19 @@ static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) // Get a column of sky when there are two overlapping sky textures static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) { - DWORD ang = (skyangle + xtoviewangle[x]) ^ skyflip; - DWORD angle1 = (DWORD)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); - DWORD angle2 = (DWORD)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); + DWORD ang, angle1, angle2 = (DWORD)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); + + if (r_linearsky) + { + angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); + ang = (skyangle + xangle) ^ skyflip; + } + else + { + ang = (skyangle + xtoviewangle[x]) ^ skyflip; + } + angle1 = (DWORD)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); + angle2 = (DWORD)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); // Check if this column has already been built. If so, there's // no reason to waste time building it again. From d96ec6c311ba5abf867844c931426f9de2b0f730 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Thu, 13 Oct 2016 02:12:48 -0400 Subject: [PATCH 190/912] - Expand truecolor sky buffer from 4 columns to 2048 columns. 2048 is probably the max we'll ever need, even with 4k, 8k, or higher resolutions, since the algorithm checks for repeat columns. --- src/r_plane.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index f016c9e042..a3facc3423 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -852,10 +852,11 @@ extern FTexture *rw_pic; // since the most anyone can ever see of the sky is 500 pixels. // We need 4 skybufs because wallscan can draw up to 4 columns at a time. // Need two versions - one for true color and one for palette +#define MAXSKYBUF 2048 static BYTE skybuf[4][512]; -static uint32_t skybuf_bgra[4][512]; +static uint32_t skybuf_bgra[MAXSKYBUF][512]; static DWORD lastskycol[4]; -static DWORD lastskycol_bgra[4]; +static DWORD lastskycol_bgra[MAXSKYBUF]; static int skycolplace; static int skycolplace_bgra; @@ -945,7 +946,7 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) else { //return R_GetOneSkyColumn(fronttex, x); - for (i = 0; i < 4; ++i) + for (i = 0; i < MAXSKYBUF; ++i) { if (lastskycol_bgra[i] == skycol) { @@ -955,7 +956,7 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) lastskycol_bgra[skycolplace_bgra] = skycol; uint32_t *composite = skybuf_bgra[skycolplace_bgra]; - skycolplace_bgra = (skycolplace_bgra + 1) & 3; + skycolplace_bgra = (skycolplace_bgra + 1) % MAXSKYBUF; // The ordering of the following code has been tuned to allow VC++ to optimize // it well. In particular, this arrangement lets it keep count in a register From 5d08a812404b463ce6b2008f8b74092a2b501c6e Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Thu, 13 Oct 2016 02:37:38 -0400 Subject: [PATCH 191/912] - fixed: compiler warning with bad copy-paste - fixed: speed up sky compositing in truecolor mode with large buffers - only the last 4 buffers used are checked. --- src/r_plane.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index a3facc3423..d69e8a5c4b 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -889,7 +889,7 @@ static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) // Get a column of sky when there are two overlapping sky textures static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) { - DWORD ang, angle1, angle2 = (DWORD)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); + DWORD ang, angle1, angle2; if (r_linearsky) { @@ -946,11 +946,12 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) else { //return R_GetOneSkyColumn(fronttex, x); - for (i = 0; i < MAXSKYBUF; ++i) + for (i = skycolplace_bgra - 4; i < skycolplace_bgra; ++i) { - if (lastskycol_bgra[i] == skycol) + int ic = (i % MAXSKYBUF); // i "checker" - can wrap around the ends of the array + if (lastskycol_bgra[ic] == skycol) { - return (BYTE*)(skybuf_bgra[i]); + return (BYTE*)(skybuf_bgra[ic]); } } From 3a133946610332bec615f444fff75efaaecf14aa Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Thu, 13 Oct 2016 02:47:07 -0400 Subject: [PATCH 192/912] - Changed max sky buffer to 3072 from 2048 --- src/r_plane.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index d69e8a5c4b..c4d72b1a09 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -852,7 +852,7 @@ extern FTexture *rw_pic; // since the most anyone can ever see of the sky is 500 pixels. // We need 4 skybufs because wallscan can draw up to 4 columns at a time. // Need two versions - one for true color and one for palette -#define MAXSKYBUF 2048 +#define MAXSKYBUF 3072 static BYTE skybuf[4][512]; static uint32_t skybuf_bgra[MAXSKYBUF][512]; static DWORD lastskycol[4]; From d1d443497f0b81818de836c47bef34fec95ffa43 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 13 Oct 2016 11:16:51 +0200 Subject: [PATCH 193/912] Fix weird access violation in some of the drawers --- src/r_compiler/llvm_include.h | 1 + src/r_compiler/llvmdrawers.cpp | 129 ++++++++++++++++++--------- src/r_compiler/ssa/ssa_float_ptr.cpp | 8 +- src/r_compiler/ssa/ssa_int_ptr.cpp | 8 +- src/r_compiler/ssa/ssa_ubyte_ptr.cpp | 8 +- src/r_compiler/ssa/ssa_vec4f_ptr.cpp | 8 +- 6 files changed, 104 insertions(+), 58 deletions(-) diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h index d1550f38a5..adad2827d9 100644 --- a/src/r_compiler/llvm_include.h +++ b/src/r_compiler/llvm_include.h @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 9391e7e201..8dcb73a2c6 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -18,27 +18,25 @@ class LLVMProgram { public: LLVMProgram(); - ~LLVMProgram(); + void CreateEE(); + std::string DumpModule(); void StopLogFatalErrors(); template Func *GetProcAddress(const char *name) { return reinterpret_cast(PointerToFunction(name)); } llvm::LLVMContext &context() { return *mContext; } - llvm::Module *module() { return mModule; } + llvm::Module *module() { return mModule.get(); } llvm::ExecutionEngine *engine() { return mEngine.get(); } - llvm::legacy::PassManager *modulePassManager() { return mModulePassManager.get(); } - llvm::legacy::FunctionPassManager *functionPassManager() { return mFunctionPassManager.get(); } private: void *PointerToFunction(const char *name); + llvm::TargetMachine *machine = nullptr; std::unique_ptr mContext; - llvm::Module *mModule; + std::unique_ptr mModule; std::unique_ptr mEngine; - std::unique_ptr mModulePassManager; - std::unique_ptr mFunctionPassManager; }; class LLVMDrawersImpl : public LLVMDrawers @@ -143,8 +141,7 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); - mProgram.modulePassManager()->run(*mProgram.module()); - mProgram.engine()->finalizeObject(); + mProgram.CreateEE(); FillColumn = mProgram.GetProcAddress("FillColumn"); FillColumnAdd = mProgram.GetProcAddress("FillColumnAdd"); @@ -205,6 +202,37 @@ LLVMDrawersImpl::LLVMDrawersImpl() tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); +#if 0 + std::vector foo(1024 * 4); + std::vector boo(256 * 256 * 4); + DrawColumnArgs args = { 0 }; + WorkerThreadData thread = { 0 }; + thread.core = 0; + thread.num_cores = 1; + thread.pass_start_y = 0; + thread.pass_end_y = 3600; + thread.temp = foo.data(); + foo[125 * 4] = 1234; + foo[126 * 4] = 1234; + for (int i = 0; i < 16; i++) + boo[i] = i; + args.dest = boo.data() + 4; + args.dest_y = 125; + args.pitch = 256; + args.count = 1; + args.texturefrac = 0; + args.flags = 0; + args.iscale = 252769; + args.light = 256; + args.color = 4279179008; + args.srcalpha = 12; + args.destalpha = 256; + args.light_red = 192; + args.light_green = 256; + args.light_blue = 128; + DrawColumnRt4AddClamp(&args, &thread); +#endif + mProgram.StopLogFatalErrors(); } @@ -225,8 +253,6 @@ void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant vari if (llvm::verifyFunction(*function.func)) I_FatalError("verifyFunction failed for " __FUNCTION__); - - mProgram.functionPassManager()->run(*function.func); } void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) @@ -245,8 +271,6 @@ void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) if (llvm::verifyFunction(*function.func)) I_FatalError("verifyFunction failed for " __FUNCTION__); - - mProgram.functionPassManager()->run(*function.func); } void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) @@ -266,8 +290,6 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, if (llvm::verifyFunction(*function.func)) I_FatalError("verifyFunction failed for " __FUNCTION__); - - mProgram.functionPassManager()->run(*function.func); } llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) @@ -298,7 +320,7 @@ llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::get(context, elements, false)->getPointerTo(); + return llvm::StructType::create(context, elements, "DrawColumnArgs", false)->getPointerTo(); } llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) @@ -329,7 +351,7 @@ llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::get(context, elements, false)->getPointerTo(); + return llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo(); } llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context) @@ -350,7 +372,7 @@ llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::get(context, elements, false)->getPointerTo(); + return llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo(); } llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &context) @@ -359,7 +381,7 @@ llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &contex for (int i = 0; i < 4; i++) elements.push_back(llvm::Type::getInt32Ty(context)); elements.push_back(llvm::Type::getInt8PtrTy(context)); - return llvm::StructType::get(context, elements, false)->getPointerTo(); + return llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo(); } ///////////////////////////////////////////////////////////////////////////// @@ -406,29 +428,28 @@ LLVMProgram::LLVMProgram() I_FatalError("Could not find LLVM target: %s", errorstring.c_str()); TargetOptions opt; - auto relocModel = Optional(Reloc::Static); - TargetMachine *machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::Default, CodeGenOpt::Aggressive); + auto relocModel = Optional(); + machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::JITDefault, CodeGenOpt::Aggressive); if (!machine) I_FatalError("Could not create LLVM target machine"); mContext = std::make_unique(); - auto moduleOwner = std::make_unique("render", context()); - mModule = moduleOwner.get(); + mModule = std::make_unique("render", context()); mModule->setTargetTriple(targetTriple); mModule->setDataLayout(machine->createDataLayout()); - EngineBuilder engineBuilder(std::move(moduleOwner)); - engineBuilder.setErrorStr(&errorstring); - engineBuilder.setOptLevel(CodeGenOpt::Aggressive); - engineBuilder.setRelocationModel(Reloc::Static); - engineBuilder.setEngineKind(EngineKind::JIT); - mEngine.reset(engineBuilder.create(machine)); - if (!mEngine) - I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str()); +} - mModulePassManager = std::make_unique(); - mFunctionPassManager = std::make_unique(mModule); +void LLVMProgram::CreateEE() +{ + using namespace llvm; + + legacy::FunctionPassManager PerFunctionPasses(mModule.get()); + legacy::PassManager PerModulePasses; + + PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); + PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); PassManagerBuilder passManagerBuilder; passManagerBuilder.OptLevel = 3; @@ -437,22 +458,46 @@ LLVMProgram::LLVMProgram() passManagerBuilder.SLPVectorize = true; passManagerBuilder.LoopVectorize = true; passManagerBuilder.LoadCombine = true; - passManagerBuilder.populateModulePassManager(*mModulePassManager.get()); - passManagerBuilder.populateFunctionPassManager(*mFunctionPassManager.get()); + passManagerBuilder.populateModulePassManager(PerModulePasses); + passManagerBuilder.populateFunctionPassManager(PerFunctionPasses); + + // Run function passes: + PerFunctionPasses.doInitialization(); + for (llvm::Function &func : *mModule.get()) + { + if (!func.isDeclaration()) + PerFunctionPasses.run(func); + } + PerFunctionPasses.doFinalization(); + + // Run module passes: + PerModulePasses.run(*mModule.get()); + + std::string errorstring; + + EngineBuilder engineBuilder(std::move(mModule)); + engineBuilder.setErrorStr(&errorstring); + engineBuilder.setOptLevel(CodeGenOpt::Aggressive); + engineBuilder.setRelocationModel(Reloc::Static); + engineBuilder.setEngineKind(EngineKind::JIT); + mEngine.reset(engineBuilder.create(machine)); + if (!mEngine) + I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str()); + + mEngine->finalizeObject(); } -LLVMProgram::~LLVMProgram() +std::string LLVMProgram::DumpModule() { - mEngine.reset(); - mContext.reset(); + std::string str; + llvm::raw_string_ostream stream(str); + mModule->print(stream, nullptr, false, true); + return stream.str(); } void *LLVMProgram::PointerToFunction(const char *name) { - llvm::Function *function = mModule->getFunction(name); - if (!function) - return nullptr; - return mEngine->getPointerToFunction(function); + return reinterpret_cast(mEngine->getFunctionAddress(name)); } void LLVMProgram::StopLogFatalErrors() diff --git a/src/r_compiler/ssa/ssa_float_ptr.cpp b/src/r_compiler/ssa/ssa_float_ptr.cpp index f694be15d1..9937328f68 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.cpp +++ b/src/r_compiler/ssa/ssa_float_ptr.cpp @@ -34,7 +34,7 @@ SSAFloat SSAFloatPtr::load(bool constantScopeDomain) const SSAVec4f SSAFloatPtr::load_vec4f(bool constantScopeDomain) const { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint()); + auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16, false, SSAScope::hint()); if (constantScopeDomain) loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); return SSAVec4f::from_llvm(loadInst); @@ -43,7 +43,7 @@ SSAVec4f SSAFloatPtr::load_vec4f(bool constantScopeDomain) const SSAVec4f SSAFloatPtr::load_unaligned_vec4f(bool constantScopeDomain) const { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4, false, SSAScope::hint()); + auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 1, false, SSAScope::hint()); if (constantScopeDomain) loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); return SSAVec4f::from_llvm(loadInst); @@ -58,13 +58,13 @@ void SSAFloatPtr::store(const SSAFloat &new_value) void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value) { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - auto inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value) { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 1); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/src/r_compiler/ssa/ssa_int_ptr.cpp b/src/r_compiler/ssa/ssa_int_ptr.cpp index d9441088e6..daef1e7ab4 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.cpp +++ b/src/r_compiler/ssa/ssa_int_ptr.cpp @@ -34,7 +34,7 @@ SSAInt SSAIntPtr::load(bool constantScopeDomain) const SSAVec4i SSAIntPtr::load_vec4i(bool constantScopeDomain) const { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint()); + auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16, false, SSAScope::hint()); if (constantScopeDomain) loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); return SSAVec4i::from_llvm(loadInst); @@ -43,7 +43,7 @@ SSAVec4i SSAIntPtr::load_vec4i(bool constantScopeDomain) const SSAVec4i SSAIntPtr::load_unaligned_vec4i(bool constantScopeDomain) const { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4, false, SSAScope::hint()); + auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 1, false, SSAScope::hint()); if (constantScopeDomain) loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); return SSAVec4i::from_llvm(loadInst); @@ -58,13 +58,13 @@ void SSAIntPtr::store(const SSAInt &new_value) void SSAIntPtr::store_vec4i(const SSAVec4i &new_value) { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - auto inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value) { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 1); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp index 1ce4a6ae28..18bafc6891 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -43,7 +43,7 @@ SSAVec4i SSAUBytePtr::load_vec4ub(bool constantScopeDomain) const SSAVec16ub SSAUBytePtr::load_vec16ub(bool constantScopeDomain) const { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint()); + auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16, false, SSAScope::hint()); if (constantScopeDomain) loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); return SSAVec16ub::from_llvm(loadInst); @@ -52,7 +52,7 @@ SSAVec16ub SSAUBytePtr::load_vec16ub(bool constantScopeDomain) const SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub(bool constantScopeDomain) const { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4, false, SSAScope::hint()); + auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 1, false, SSAScope::hint()); if (constantScopeDomain) loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); return SSAVec16ub::from_llvm(loadInst); @@ -86,7 +86,7 @@ void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint())); + llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); // The following generates _mm_stream_si128, maybe! @@ -97,6 +97,6 @@ void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value) { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4); + llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 1); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp index e8bac71f17..31c23f2bd2 100644 --- a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp @@ -25,7 +25,7 @@ SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const SSAVec4f SSAVec4fPtr::load(bool constantScopeDomain) const { - auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); + auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 16, false, SSAScope::hint()); if (constantScopeDomain) loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); return SSAVec4f::from_llvm(loadInst); @@ -33,7 +33,7 @@ SSAVec4f SSAVec4fPtr::load(bool constantScopeDomain) const SSAVec4f SSAVec4fPtr::load_unaligned(bool constantScopeDomain) const { - auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 4, false, SSAScope::hint()); + auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 1, false, SSAScope::hint()); if (constantScopeDomain) loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); return SSAVec4f::from_llvm(loadInst); @@ -41,12 +41,12 @@ SSAVec4f SSAVec4fPtr::load_unaligned(bool constantScopeDomain) const void SSAVec4fPtr::store(const SSAVec4f &new_value) { - auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value) { - auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 1, false); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } From 366d494b181d3eb3f976e320bebbf061c89965c9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 14 Oct 2016 08:33:59 +0200 Subject: [PATCH 194/912] Merged R_DrawMaskedColumnHoriz into R_DrawMaskedColumn --- src/r_draw.h | 2 +- src/r_drawt.cpp | 115 +++++++---------------------------------------- src/r_segs.cpp | 24 +++++----- src/r_things.cpp | 26 ++++++----- src/r_things.h | 4 +- src/v_draw.cpp | 6 +-- 6 files changed, 49 insertions(+), 128 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 204f2a4935..0708b8714f 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -143,7 +143,6 @@ extern void (*R_DrawSpanMaskedAddClamp)(void); // [RH] Span blit into an interleaved intermediate buffer extern void (*R_DrawColumnHoriz)(void); -void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *spans); // [RH] Initialize the above pointers void R_InitColumnDrawers (); @@ -225,6 +224,7 @@ extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); extern void (*rt_initcols)(BYTE *buffer); extern void (*rt_span_coverage)(int x, int start, int stop); +void rt_flip_posts(); void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 43354bfd52..0baf6d38c9 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -839,6 +839,21 @@ void rt_tlaterevsubclamp4cols_c (int sx, int yl, int yh) rt_revsubclamp4cols(sx, yl, yh); } +// Reorder the posts so that they get drawn top-to-bottom instead of bottom-to-top. +void rt_flip_posts() +{ + unsigned int *front = horizspan[dc_x & 3]; + unsigned int *back = dc_ctspan[dc_x & 3] - 2; + + while (front < back) + { + swapvalues(front[0], back[0]); + swapvalues(front[1], back[1]); + front += 2; + back -= 2; + } +} + // Copies all spans in all four columns to the screen starting at sx. // sx should be dword-aligned. void rt_draw4cols (int sx) @@ -1115,103 +1130,3 @@ void R_FillColumnHorizP_C (void) dest += 8; } while (--count); } - -// Same as R_DrawMaskedColumn() except that it always uses R_DrawColumnHoriz(). - -void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) -{ - int pixelsize = r_swtruecolor ? 4 : 1; - int inputpixelsize = (r_swtruecolor && !drawer_needs_pal_input) ? 4 : 1; - const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); - while (span->Length != 0) - { - const int length = span->Length; - const int top = span->TopOffset; - - // calculate unclipped screen coordinates for post - dc_yl = xs_RoundToInt(sprtopscreen + spryscale * top); - dc_yh = xs_RoundToInt(sprtopscreen + spryscale * (top + length) - 1); - - if (sprflipvert) - { - swapvalues (dc_yl, dc_yh); - } - - if (dc_yh >= mfloorclip[dc_x]) - { - dc_yh = mfloorclip[dc_x] - 1; - } - if (dc_yl < mceilingclip[dc_x]) - { - dc_yl = mceilingclip[dc_x]; - } - - if (dc_yl <= dc_yh) - { - if (sprflipvert) - { - dc_texturefrac = (dc_yl*dc_iscale) - (top << FRACBITS) - - fixed_t(CenterY * dc_iscale) - texturemid; - const fixed_t maxfrac = length << FRACBITS; - while (dc_texturefrac >= maxfrac) - { - if (++dc_yl > dc_yh) - goto nextpost; - dc_texturefrac += dc_iscale; - } - fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; - while (endfrac < 0) - { - if (--dc_yh < dc_yl) - goto nextpost; - endfrac -= dc_iscale; - } - } - else - { - dc_texturefrac = texturemid - (top << FRACBITS) - + (dc_yl*dc_iscale) - fixed_t((CenterY-1) * dc_iscale); - while (dc_texturefrac < 0) - { - if (++dc_yl > dc_yh) - goto nextpost; - dc_texturefrac += dc_iscale; - } - fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; - const fixed_t maxfrac = length << FRACBITS; - if (dc_yh < mfloorclip[dc_x]-1 && endfrac < maxfrac - dc_iscale) - { - dc_yh++; - } - else while (endfrac >= maxfrac) - { - if (--dc_yh < dc_yl) - goto nextpost; - endfrac -= dc_iscale; - } - } - dc_source = column + top * inputpixelsize; - dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; - dc_count = dc_yh - dc_yl + 1; - hcolfunc_pre (); - } -nextpost: - span++; - } - - if (sprflipvert) - { - unsigned int *front = horizspan[dc_x&3]; - unsigned int *back = dc_ctspan[dc_x&3] - 2; - - // Reorder the posts so that they get drawn top-to-bottom - // instead of bottom-to-top. - while (front < back) - { - swapvalues (front[0], back[0]); - swapvalues (front[1], back[1]); - front += 2; - back -= 2; - } - } -} diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 92e6a447a4..cbff7d6e9b 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -173,7 +173,7 @@ CVAR(Bool, r_drawmirrors, true, 0) float *MaskedSWall; float MaskedScaleY; -static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FTexture::Span *spans), FTexture *tex) +static void BlastMaskedColumn (FTexture *tex, bool useRt) { // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) @@ -202,7 +202,7 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText pixels = (const BYTE *)tex->GetColumnBgra(maskedtexturecol[dc_x] >> FRACBITS, &spans); else pixels = tex->GetColumn(maskedtexturecol[dc_x] >> FRACBITS, &spans); - blastfunc (pixels, spans); + R_DrawMaskedColumn(pixels, spans, useRt); rw_light += rw_lightstep; spryscale += rw_scalestep; } @@ -445,7 +445,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) { for (dc_x = x1; dc_x < x2; ++dc_x) { - BlastMaskedColumn (R_DrawMaskedColumn, tex); + BlastMaskedColumn (tex, false); } } else @@ -460,24 +460,24 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) while ((dc_x < stop) && (dc_x & 3)) { - BlastMaskedColumn (R_DrawMaskedColumn, tex); + BlastMaskedColumn (tex, false); dc_x++; } while (dc_x < stop) { rt_initcols(nullptr); - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); + BlastMaskedColumn (tex, true); dc_x++; + BlastMaskedColumn (tex, true); dc_x++; + BlastMaskedColumn (tex, true); dc_x++; + BlastMaskedColumn (tex, true); rt_draw4cols (dc_x - 3); dc_x++; } while (dc_x < x2) { - BlastMaskedColumn (R_DrawMaskedColumn, tex); + BlastMaskedColumn (tex, false); dc_x++; } } @@ -3218,7 +3218,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { // calculate lighting R_SetColorMapLight(usecolormap, rw_light, wallshade); } - R_WallSpriteColumn (R_DrawMaskedColumn); + R_WallSpriteColumn (false); dc_x++; } @@ -3231,7 +3231,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rt_initcols(nullptr); for (int zz = 4; zz; --zz) { - R_WallSpriteColumn (R_DrawMaskedColumnHoriz); + R_WallSpriteColumn (true); dc_x++; } rt_draw4cols (dc_x - 4); @@ -3243,7 +3243,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { // calculate lighting R_SetColorMapLight(usecolormap, rw_light, wallshade); } - R_WallSpriteColumn (R_DrawMaskedColumn); + R_WallSpriteColumn (false); dc_x++; } } diff --git a/src/r_things.cpp b/src/r_things.cpp index dbe66da5e9..6119a1402c 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -250,7 +250,7 @@ double sprtopscreen; bool sprflipvert; -void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) +void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span, bool useRt) { int pixelsize = r_swtruecolor ? 4 : 1; int inputpixelsize = (r_swtruecolor && !drawer_needs_pal_input) ? 4 : 1; @@ -326,11 +326,17 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) dc_source = column + top * inputpixelsize; dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; - colfunc (); + if (useRt) + hcolfunc_pre(); + else + colfunc (); } nextpost: span++; } + + if (sprflipvert && useRt) + rt_flip_posts(); } // [ZZ] @@ -476,7 +482,7 @@ void R_DrawVisSprite (vissprite_t *vis) pixels = tex->GetColumn (frac >> FRACBITS, &spans); if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans); + R_DrawMaskedColumn (pixels, spans, false); dc_x++; frac += xiscale; } @@ -492,7 +498,7 @@ void R_DrawVisSprite (vissprite_t *vis) pixels = tex->GetColumn (frac >> FRACBITS, &spans); if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumnHoriz (pixels, spans); + R_DrawMaskedColumn (pixels, spans, true); dc_x++; frac += xiscale; } @@ -507,7 +513,7 @@ void R_DrawVisSprite (vissprite_t *vis) pixels = tex->GetColumn (frac >> FRACBITS, &spans); if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans); + R_DrawMaskedColumn (pixels, spans, false); dc_x++; frac += xiscale; } @@ -617,7 +623,7 @@ void R_DrawWallSprite(vissprite_t *spr) R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(R_DrawMaskedColumn); + R_WallSpriteColumn(false); dc_x++; } @@ -631,7 +637,7 @@ void R_DrawWallSprite(vissprite_t *spr) for (int zz = 4; zz; --zz) { if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(R_DrawMaskedColumnHoriz); + R_WallSpriteColumn(true); dc_x++; } rt_draw4cols(dc_x - 4); @@ -644,14 +650,14 @@ void R_DrawWallSprite(vissprite_t *spr) R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(R_DrawMaskedColumn); + R_WallSpriteColumn(false); dc_x++; } } R_FinishSetPatchStyle(); } -void R_WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)) +void R_WallSpriteColumn (bool useRt) { float iscale = swall[dc_x] * MaskedScaleY; dc_iscale = FLOAT2FIXED(iscale); @@ -668,7 +674,7 @@ void R_WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Sp else column = WallSpriteTile->GetColumn (lwall[dc_x] >> FRACBITS, &spans); dc_texturefrac = 0; - drawfunc (column, spans); + R_DrawMaskedColumn(column, spans, useRt); rw_light += rw_lightstep; } diff --git a/src/r_things.h b/src/r_things.h index cbe34015f4..fa2db50f75 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -126,8 +126,8 @@ extern double pspriteyscale; extern FTexture *WallSpriteTile; -void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *spans); -void R_WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)); +void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *spans, bool usrRt); +void R_WallSpriteColumn (bool useRt); void R_CacheSprite (spritedef_t *sprite); void R_SortVisSprites (int (*compare)(const void *, const void *), size_t first); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 1c23523d6d..a89a0865d2 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -310,7 +310,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) else pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumn(pixels, spans); + R_DrawMaskedColumn(pixels, spans, false); dc_x++; frac += xiscale_i; } @@ -325,7 +325,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) else pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumnHoriz(pixels, spans); + R_DrawMaskedColumn(pixels, spans, true); dc_x++; frac += xiscale_i; } @@ -339,7 +339,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) else pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumn(pixels, spans); + R_DrawMaskedColumn(pixels, spans, false); dc_x++; frac += xiscale_i; } From f05e2337c22cebd6fa77d6a762471df3dbf9c265 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 14 Oct 2016 11:24:03 +0200 Subject: [PATCH 195/912] Fix SkyViewpoint skyboxes not being rendered --- src/r_plane.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index ff23492ab9..07001bf1bb 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1314,7 +1314,7 @@ void R_DrawPortals () vissprite_p = firstvissprite; visplaneStack.Pop (pl); - if (pl->Alpha > 0) + if (pl->Alpha > 0 && pl->picnum != skyflatnum) { R_DrawSinglePlane (pl, pl->Alpha, pl->Additive, true); } From f82195bc494eec48ebb05620e55ca6640ab4bc02 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 14 Oct 2016 12:10:11 +0200 Subject: [PATCH 196/912] Switch to true color for translated sprites --- .../fixedfunction/drawcolumncodegen.cpp | 23 +++++++++++-------- .../fixedfunction/drawcolumncodegen.h | 3 ++- src/r_draw.cpp | 5 +++- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index 721c85dd85..df35fd53c8 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -187,14 +187,14 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, case DrawColumnVariant::DrawRevSubClamp: return blend_revsub(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawTranslated: - return blend_copy(ShadePal(TranslateSample(sample_index), isSimpleShade)); + return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade)); case DrawColumnVariant::DrawTlatedAdd: case DrawColumnVariant::DrawAddClampTranslated: - return blend_add(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_add(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawSubClampTranslated: - return blend_sub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_sub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawRevSubClampTranslated: - return blend_revsub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_revsub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::Fill: return blend_copy(color); case DrawColumnVariant::FillAdd: @@ -233,14 +233,14 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo case DrawColumnVariant::DrawRevSubClamp: return blend_revsub(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawTranslated: - return blend_copy(ShadePal(TranslateSample(sample_index), isSimpleShade)); + return blend_copy(ShadePal(TranslateSamplePal(sample_index), isSimpleShade)); case DrawColumnVariant::DrawTlatedAdd: case DrawColumnVariant::DrawAddClampTranslated: - return blend_add(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_add(ShadePal(TranslateSamplePal(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawSubClampTranslated: - return blend_sub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_sub(ShadePal(TranslateSamplePal(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawRevSubClampTranslated: - return blend_revsub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_revsub(ShadePal(TranslateSamplePal(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::Fill: return blend_copy(color); case DrawColumnVariant::FillAdd: @@ -267,7 +267,12 @@ SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index) return colormap[source[sample_index].load(true).zext_int()].load(true).zext_int(); } -SSAInt DrawColumnCodegen::TranslateSample(SSAInt sample_index) +SSAVec4i DrawColumnCodegen::TranslateSample(SSAInt sample_index) +{ + return translation[source[sample_index].load(true).zext_int() * 4].load_vec4ub(true); +} + +SSAInt DrawColumnCodegen::TranslateSamplePal(SSAInt sample_index) { return translation[source[sample_index].load(true).zext_int()].load(true).zext_int(); } diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h index ffba50a15a..5f2ad737de 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.h +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -42,7 +42,8 @@ private: SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); SSAVec4i Sample(SSAInt frac); SSAInt ColormapSample(SSAInt frac); - SSAInt TranslateSample(SSAInt frac); + SSAVec4i TranslateSample(SSAInt frac); + SSAInt TranslateSamplePal(SSAInt frac); SSAVec4i Shade(SSAVec4i fgcolor, bool isSimpleShade); SSAVec4i ShadePal(SSAInt palIndex, bool isSimpleShade); bool IsPaletteInput(DrawColumnVariant variant); diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 11c7020b4b..03515c8a11 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2695,7 +2695,10 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, FRemapTable *table = TranslationToTable(translation); if (table != NULL && !table->Inactive) { - dc_translation = table->Remap; + if (r_swtruecolor) + dc_translation = (BYTE*)table->Palette; + else + dc_translation = table->Remap; } } basecolormapsave = basecolormap; From 2a4a61d4d0393bb9ee3bd3c9c98021e41399945f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 14 Oct 2016 15:23:23 +0200 Subject: [PATCH 197/912] Fix divide by zero if yscale is too small a number --- src/r_things.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_things.cpp b/src/r_things.cpp index ab75c15b02..99ca68b601 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -408,7 +408,7 @@ void R_DrawVisSprite (vissprite_t *vis) ESPSResult mode; bool ispsprite = (!vis->sector && vis->gpos != FVector3(0, 0, 0)); - if (vis->xscale == 0 || vis->yscale == 0) + if (vis->xscale == 0 || fabs(vis->yscale) < (1.0f / 32000.0f)) { // scaled to 0; can't see return; } From 6608678ad837324e1f01c26a6470dc8c6c2ce653 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Fri, 14 Oct 2016 18:11:49 -0400 Subject: [PATCH 198/912] - Fullbright sprites and decals now defy sector colormaps as they do in GZDoom. This is controlled by the setting "r_fullbrightignoresectorcolor". --- src/r_things.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/r_things.cpp b/src/r_things.cpp index 86186721ca..bb8be4c6bd 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -100,6 +100,7 @@ EXTERN_CVAR (Bool, st_scale) EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, r_drawfuzz) EXTERN_CVAR(Bool, r_deathcamera); +CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // // Sprite rotation 0 is facing the viewer, @@ -569,7 +570,7 @@ void R_DrawWallSprite(vissprite_t *spr) else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap, 0, 0); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &NormalLight : usecolormap, 0, 0); else calclighting = true; @@ -1088,7 +1089,8 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor vis->deltax = float(pos.X - ViewPos.X); vis->deltay = float(pos.Y - ViewPos.Y); vis->renderflags = renderflags; - if(thing->flags5 & MF5_BRIGHT) vis->renderflags |= RF_FULLBRIGHT; // kg3D + if(thing->flags5 & MF5_BRIGHT) + vis->renderflags |= RF_FULLBRIGHT; // kg3D vis->Style.RenderStyle = thing->RenderStyle; vis->FillColor = thing->fillcolor; vis->Translation = thing->Translation; // [RH] thing translation table @@ -1164,7 +1166,7 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor } else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright - vis->Style.BaseColormap = mybasecolormap; + vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &NormalLight : mybasecolormap; vis->Style.ColormapNum = 0; } else From 3122538006140bbaddecd66a9d861d5bc7c159b7 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Fri, 14 Oct 2016 18:16:22 -0400 Subject: [PATCH 199/912] Added menu entry for "r_fullbrightignoresectorcolor" --- wadsrc/static/language.enu | 1 + wadsrc/static/menudef.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index c603864a7a..b1b790235f 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1788,6 +1788,7 @@ DSPLYMNU_PALLETEHACK = "DirectDraw palette hack"; // Not used DSPLYMNU_ATTACHEDSURFACES = "Use attached surfaces"; // Not used DSPLYMNU_STRETCHSKY = "Stretch short skies"; DSPLYMNU_LINEARSKY = "Linear skies"; +DSPLYMNU_GZDFULLBRIGHT = "Emulate GZDoom FullBright"; DSPLYMNU_DRAWFUZZ = "Use fuzz effect"; DSPLYMNU_TRANSSOUL = "Lost Soul translucency"; DSPLYMNU_FAKECONTRAST = "Use fake contrast"; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index b26c4c4a56..ceae53d822 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -702,6 +702,7 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_STRETCHSKY", "r_stretchsky", "OnOff" Option "$DSPLYMNU_LINEARSKY", "r_linearsky", "OnOff" + Option "$DSPLYMNU_GZDFULLBRIGHT", "r_fullbrightignoresectorcolor", "OnOff" Option "$DSPLYMNU_DRAWFUZZ", "r_drawfuzz", "Fuzziness" Slider "$DSPLYMNU_TRANSSOUL", "transsouls", 0.25, 1.0, 0.05, 2 Option "$DSPLYMNU_FAKECONTRAST", "r_fakecontrast", "Contrast" From 491a4e28c0161fa860fa7e632ad83bfac0bb3282 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 15 Oct 2016 15:04:14 +0200 Subject: [PATCH 200/912] Move true color sky drawing to its own drawers and chamge r_stretchsky to false as the new drawers can fade to a solid color --- src/CMakeLists.txt | 1 + .../fixedfunction/drawskycodegen.cpp | 127 +++++++++++++ src/r_compiler/fixedfunction/drawskycodegen.h | 39 ++++ src/r_compiler/llvmdrawers.cpp | 41 +++++ src/r_compiler/llvmdrawers.h | 28 +++ src/r_compiler/ssa/ssa_int.cpp | 5 + src/r_compiler/ssa/ssa_int.h | 1 + src/r_compiler/ssa/ssa_vec4i.cpp | 14 -- src/r_compiler/ssa/ssa_vec4i.h | 2 - src/r_draw.h | 5 + src/r_draw_rgba.cpp | 65 +++++++ src/r_plane.cpp | 169 ++++++++++++++++++ src/r_sky.cpp | 2 +- 13 files changed, 482 insertions(+), 17 deletions(-) create mode 100644 src/r_compiler/fixedfunction/drawskycodegen.cpp create mode 100644 src/r_compiler/fixedfunction/drawskycodegen.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 40aa5ae374..a900b6352e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1459,6 +1459,7 @@ set (PCH_SOURCES r_compiler/fixedfunction/drawspancodegen.cpp r_compiler/fixedfunction/drawwallcodegen.cpp r_compiler/fixedfunction/drawcolumncodegen.cpp + r_compiler/fixedfunction/drawskycodegen.cpp r_data/sprites.cpp r_data/voxels.cpp r_data/renderstyle.cpp diff --git a/src/r_compiler/fixedfunction/drawskycodegen.cpp b/src/r_compiler/fixedfunction/drawskycodegen.cpp new file mode 100644 index 0000000000..f2ba148fdd --- /dev/null +++ b/src/r_compiler/fixedfunction/drawskycodegen.cpp @@ -0,0 +1,127 @@ + +#include "i_system.h" +#include "r_compiler/llvm_include.h" +#include "r_compiler/fixedfunction/drawskycodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" + +void DrawSkyCodegen::Generate(DrawSkyVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data) +{ + dest = args[0][0].load(true); + source0[0] = args[0][1].load(true); + source0[1] = args[0][2].load(true); + source0[2] = args[0][3].load(true); + source0[3] = args[0][4].load(true); + source1[0] = args[0][5].load(true); + source1[1] = args[0][6].load(true); + source1[2] = args[0][7].load(true); + source1[3] = args[0][8].load(true); + pitch = args[0][9].load(true); + count = args[0][10].load(true); + dest_y = args[0][11].load(true); + texturefrac[0] = args[0][12].load(true); + texturefrac[1] = args[0][13].load(true); + texturefrac[2] = args[0][14].load(true); + texturefrac[3] = args[0][15].load(true); + iscale[0] = args[0][16].load(true); + iscale[1] = args[0][17].load(true); + iscale[2] = args[0][18].load(true); + iscale[3] = args[0][19].load(true); + textureheight0 = args[0][20].load(true); + textureheight1 = args[0][21].load(true); + top_color = SSAVec4i::unpack(args[0][22].load(true)); + bottom_color = SSAVec4i::unpack(args[0][23].load(true)); + + thread.core = thread_data[0][0].load(true); + thread.num_cores = thread_data[0][1].load(true); + thread.pass_start_y = thread_data[0][2].load(true); + thread.pass_end_y = thread_data[0][3].load(true); + + count = count_for_thread(dest_y, count, thread); + dest = dest_for_thread(dest_y, pitch, dest, thread); + + pitch = pitch * thread.num_cores; + + int numColumns = fourColumns ? 4 : 1; + for (int i = 0; i < numColumns; i++) + { + stack_frac[i].store(texturefrac[i] + iscale[i] * skipped_by_thread(dest_y, thread)); + fracstep[i] = iscale[i] * thread.num_cores; + } + + Loop(variant, fourColumns); +} + +void DrawSkyCodegen::Loop(DrawSkyVariant variant, bool fourColumns) +{ + int numColumns = fourColumns ? 4 : 1; + + stack_index.store(SSAInt(0)); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + SSAInt frac[4]; + for (int i = 0; i < numColumns; i++) + frac[i] = stack_frac[i].load(); + + SSAInt offset = index * pitch * 4; + + if (fourColumns) + { + SSAVec4i colors[4]; + for (int i = 0; i < 4; i++) + colors[i] = FadeOut(frac[i], Sample(frac[i], i, variant)); + + SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + dest[offset].store_unaligned_vec16ub(color); + } + else + { + SSAVec4i color = FadeOut(frac[0], Sample(frac[0], 0, variant)); + dest[offset].store_vec4ub(color); + } + + stack_index.store(index.add(SSAInt(1), true, true)); + for (int i = 0; i < numColumns; i++) + stack_frac[i].store(frac[i] + fracstep[i]); + loop.end_block(); + } +} + +SSAVec4i DrawSkyCodegen::Sample(SSAInt frac, int index, DrawSkyVariant variant) +{ + SSAInt sample_index = (((frac << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + if (variant == DrawSkyVariant::Single) + { + return source0[index][sample_index * 4].load_vec4ub(false); + } + else + { + SSAInt sample_index2 = SSAInt::MIN(sample_index, textureheight1); + SSAVec4i color0 = source0[index][sample_index * 4].load_vec4ub(false); + SSAVec4i color1 = source1[index][sample_index2 * 4].load_vec4ub(false); + return blend_alpha_blend(color0, color1); + } +} + +SSAVec4i DrawSkyCodegen::FadeOut(SSAInt frac, SSAVec4i color) +{ + int start_fade = 2; // How fast it should fade out + + SSAInt alpha_top = SSAInt::MAX(SSAInt::MIN(frac.ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); + SSAInt alpha_bottom = SSAInt::MAX(SSAInt::MIN(((2 << 24) - frac).ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); + SSAInt inv_alpha_top = 256 - alpha_top; + SSAInt inv_alpha_bottom = 256 - alpha_bottom; + + color = (color * alpha_top + top_color * inv_alpha_top) / 256; + color = (color * alpha_bottom + bottom_color * inv_alpha_bottom) / 256; + return color.insert(3, 255); +} diff --git a/src/r_compiler/fixedfunction/drawskycodegen.h b/src/r_compiler/fixedfunction/drawskycodegen.h new file mode 100644 index 0000000000..2b90b2a9d0 --- /dev/null +++ b/src/r_compiler/fixedfunction/drawskycodegen.h @@ -0,0 +1,39 @@ + +#pragma once + +#include "drawercodegen.h" + +enum class DrawSkyVariant +{ + Single, + Double +}; + +class DrawSkyCodegen : public DrawerCodegen +{ +public: + void Generate(DrawSkyVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data); + +private: + void Loop(DrawSkyVariant variant, bool fourColumns); + SSAVec4i Sample(SSAInt frac, int index, DrawSkyVariant variant); + SSAVec4i FadeOut(SSAInt frac, SSAVec4i color); + + SSAStack stack_index, stack_frac[4]; + + SSAUBytePtr dest; + SSAUBytePtr source0[4]; + SSAUBytePtr source1[4]; + SSAInt pitch; + SSAInt count; + SSAInt dest_y; + SSAInt texturefrac[4]; + SSAInt iscale[4]; + SSAInt textureheight0; + SSAInt textureheight1; + SSAVec4i top_color; + SSAVec4i bottom_color; + SSAWorkerThread thread; + + SSAInt fracstep[4]; +}; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 8dcb73a2c6..4d4aeccbe7 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -4,6 +4,7 @@ #include "r_compiler/fixedfunction/drawspancodegen.h" #include "r_compiler/fixedfunction/drawwallcodegen.h" #include "r_compiler/fixedfunction/drawcolumncodegen.h" +#include "r_compiler/fixedfunction/drawskycodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" #include "r_compiler/ssa/ssa_for_block.h" @@ -48,10 +49,12 @@ private: void CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method); void CodegenDrawSpan(const char *name, DrawSpanVariant variant); void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); + void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); static llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); LLVMProgram mProgram; @@ -140,6 +143,10 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); + CodegenDrawSky("DrawSky1", DrawSkyVariant::Single, 1); + CodegenDrawSky("DrawSky4", DrawSkyVariant::Single, 4); + CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double, 1); + CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4); mProgram.CreateEE(); @@ -201,6 +208,10 @@ LLVMDrawersImpl::LLVMDrawersImpl() tmvline4_subclamp = mProgram.GetProcAddress("tmvline4_subclamp"); tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); + DrawSky1 = mProgram.GetProcAddress("DrawSky1"); + DrawSky4 = mProgram.GetProcAddress("DrawSky4"); + DrawDoubleSky1 = mProgram.GetProcAddress("DrawDoubleSky1"); + DrawDoubleSky4 = mProgram.GetProcAddress("DrawDoubleSky4"); #if 0 std::vector foo(1024 * 4); @@ -292,6 +303,25 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, I_FatalError("verifyFunction failed for " __FUNCTION__); } +void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name); + function.add_parameter(GetDrawSkyArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawSkyCodegen codegen; + codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); +} + llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) { std::vector elements; @@ -375,6 +405,17 @@ llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context) return llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo(); } +llvm::Type *LLVMDrawersImpl::GetDrawSkyArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 8; i++) + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 15; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + return llvm::StructType::create(context, elements, "DrawSkyArgs", false)->getPointerTo(); +} + llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &context) { std::vector elements; diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 7f5f6658e2..64d73eeee3 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -133,6 +133,29 @@ struct DrawColumnArgs } }; +struct DrawSkyArgs +{ + uint32_t *dest; + const uint32_t *source0[4]; + const uint32_t *source1[4]; + int32_t pitch; + int32_t count; + int32_t dest_y; + uint32_t texturefrac[4]; + uint32_t iscale[4]; + uint32_t textureheight0; + uint32_t textureheight1; + uint32_t top_color; + uint32_t bottom_color; + + FString ToString() + { + FString info; + info.Format("dest_y = %i, count = %i", dest_y, count); + return info; + } +}; + class LLVMDrawers { public: @@ -203,6 +226,11 @@ public: void(*tmvline1_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*tmvline4_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*DrawSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; + void(*DrawSky4)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; + void(*DrawDoubleSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; + void(*DrawDoubleSky4)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; + private: static LLVMDrawers *Singleton; }; diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 1815985c5f..6f2afce7f1 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -47,6 +47,11 @@ SSAInt SSAInt::add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap) return SSAInt::from_llvm(SSAScope::builder().CreateAdd(v, b.v, SSAScope::hint(), no_unsigned_wrap, no_signed_wrap)); } +SSAInt SSAInt::ashr(int bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAShr(v, bits, SSAScope::hint())); +} + SSAInt operator+(const SSAInt &a, const SSAInt &b) { return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index e9ce978c47..ef71e064a2 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -20,6 +20,7 @@ public: static SSAInt MAX(SSAInt a, SSAInt b); SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap); + SSAInt ashr(int bits); llvm::Value *v; }; diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp index 3be0ec194a..60d6161a58 100644 --- a/src/r_compiler/ssa/ssa_vec4i.cpp +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -171,20 +171,6 @@ SSAVec4i SSAVec4i::sqrt(SSAVec4i f) return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_sqrt_pd), f.v, SSAScope::hint())); } -/* -SSAVec4i SSAVec4i::min_sse41(SSAVec4i a, SSAVec4i b) -{ - llvm::Value *values[2] = { a.v, b.v }; - return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse41_pminsd), values, SSAScope::hint())); -} - -SSAVec4i SSAVec4i::max_sse41(SSAVec4i a, SSAVec4i b) -{ - llvm::Value *values[2] = { a.v, b.v }; - return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse41_pmaxsd), values, SSAScope::hint())); -} -*/ - SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b) { return SSAVec4i::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h index f8ef92f1e2..420ab021e9 100644 --- a/src/r_compiler/ssa/ssa_vec4i.h +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -35,8 +35,6 @@ public: static SSAVec4i combinehi(SSAVec8s v0, SSAVec8s v1); static SSAVec4i combinelo(SSAVec8s v0, SSAVec8s v1); static SSAVec4i sqrt(SSAVec4i f); - //static SSAVec4i min_sse41(SSAVec4i a, SSAVec4i b); - //static SSAVec4i max_sse41(SSAVec4i a, SSAVec4i b); static SSAVec4i from_llvm(llvm::Value *v) { return SSAVec4i(v); } static llvm::Type *llvm_type(); diff --git a/src/r_draw.h b/src/r_draw.h index 0708b8714f..4c478a7262 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -381,6 +381,11 @@ void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); +void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); +void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); +void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + extern bool r_swtruecolor; EXTERN_CVAR(Bool, r_multithreaded); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 5114b95f06..4b3d98c04b 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -377,6 +377,47 @@ public: } }; +class DrawSkyLLVMCommand : public DrawerCommand +{ +protected: + DrawSkyArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + return d; + } + +public: + DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom) + { + args.dest = (uint32_t*)dc_dest; + args.dest_y = _dest_y; + args.count = dc_count; + args.pitch = dc_pitch; + for (int i = 0; i < 4; i++) + { + args.texturefrac[i] = vplce[i]; + args.iscale[i] = vince[i]; + args.source0[i] = (const uint32_t *)bufplce[i]; + args.source1[i] = (const uint32_t *)bufplce2[i]; + } + args.textureheight0 = bufheight[0]; + args.textureheight1 = bufheight[1]; + args.top_color = solid_top; + args.bottom_color = solid_bottom; + } + + FString DebugInfo() override + { + return "DrawSkyLLVMCommand\n" + args.ToString(); + } +}; + #define DECLARE_DRAW_COMMAND(name, func, base) \ class name##LLVMCommand : public base \ { \ @@ -416,6 +457,10 @@ DECLARE_DRAW_COMMAND(FillColumnAdd, FillColumnAdd, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(FillColumnAddClamp, FillColumnAddClamp, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawSingleSky1, DrawSky1, DrawSkyLLVMCommand); +DECLARE_DRAW_COMMAND(DrawSingleSky4, DrawSky4, DrawSkyLLVMCommand); +DECLARE_DRAW_COMMAND(DrawDoubleSky1, DrawDoubleSky1, DrawSkyLLVMCommand); +DECLARE_DRAW_COMMAND(DrawDoubleSky4, DrawDoubleSky4, DrawSkyLLVMCommand); ///////////////////////////////////////////////////////////////////////////// @@ -1203,6 +1248,26 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// +void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) +{ + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); +} + +void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) +{ + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); +} + +void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) +{ + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); +} + +void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) +{ + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); +} + void R_DrawColumn_rgba() { DrawerCommandQueue::QueueCommand(); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 0d87be4b9b..8f2a7f6308 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -985,8 +985,177 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) } } +static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) +{ + uint32_t height = frontskytex->GetHeight(); + + for (int i = 0; i < columns; i++) + { + int x = start_x + i; + + int uv_fracbits = 24 - frontskytex->HeightBits; + double uv_stepd = skyiscale * frontskytex->Scale.Y; + double v = (skymid * frontskytex->Scale.Y + uv_stepd * (y1 - CenterY + 0.5)) / height; + v = v + 1.0f; + v *= height; + v *= (1 << uv_fracbits); + uint32_t uv_pos = (uint32_t)v; + uint32_t uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + if (uv_step == 0) // To prevent divide by zero elsewhere + uv_step = 1; + + if (MirrorFlags & RF_XFLIP) + x = (viewwidth - x); + + DWORD ang, angle1, angle2; + + if (r_linearsky) + { + angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); + ang = (skyangle + xangle) ^ skyflip; + } + else + { + ang = (skyangle + xtoviewangle[x]) ^ skyflip; + } + angle1 = (DWORD)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); + angle2 = (DWORD)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); + + bufplce[i] = (const BYTE *)frontskytex->GetColumnBgra(angle1, nullptr); + bufplce2[i] = backskytex ? (const BYTE *)backskytex->GetColumnBgra(angle2, nullptr) : nullptr; + buftexturefracx[i] = 0; + vince[i] = uv_step; + vplce[i] = uv_pos; + } + + bufheight[0] = height; + bufheight[1] = backskytex ? backskytex->GetHeight() : height; + dc_dest = (ylookup[y1] + start_x) * 4 + dc_destorg; + dc_count = y2 - y1; + + // To do: figure out how GZDoom calculates the solid top and bottom colors + uint32_t solid_top = 0xff7f7f7f; + uint32_t solid_bottom = 0xff3f3f3f; + + if (columns == 4) + if (!backskytex) + R_DrawSingleSkyCol4(solid_top, solid_bottom); + else + R_DrawDoubleSkyCol4(solid_top, solid_bottom); + else + if (!backskytex) + R_DrawSingleSkyCol1(solid_top, solid_bottom); + else + R_DrawDoubleSkyCol1(solid_top, solid_bottom); +} + +static void R_DrawTruecolorSky(visplane_t *pl) +{ + R_SetColorMapLight(fixedcolormap, 0, 0); + palookupoffse[0] = dc_colormap; + palookupoffse[1] = dc_colormap; + palookupoffse[2] = dc_colormap; + palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; + setupvline(FRACBITS); + + int x1 = pl->left; + int x2 = pl->right; + short *uwal = (short *)pl->top; + short *dwal = (short *)pl->bottom; + + // Calculate where 4 column alignment begins and ends: + int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); + int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + + // First unaligned columns: + for (int x = x1; x < aligned_x1; x++) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + R_DrawSkyColumn(x, y1, y2, 1); + } + + // The aligned columns + for (int x = aligned_x1; x < aligned_x2; x += 4) + { + // Find y1, y2, light and uv values for four columns: + int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; + int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; + + // Figure out where we vertically can start and stop drawing 4 columns in one go + int middle_y1 = y1[0]; + int middle_y2 = y2[0]; + for (int i = 1; i < 4; i++) + { + middle_y1 = MAX(y1[i], middle_y1); + middle_y2 = MIN(y2[i], middle_y2); + } + + // If we got an empty column in our set we cannot draw 4 columns in one go: + bool empty_column_in_set = false; + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + empty_column_in_set = true; + } + + if (empty_column_in_set || middle_y2 <= middle_y1) + { + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + continue; + + R_DrawSkyColumn(x + i, y1[i], y2[i], 1); + } + continue; + } + + // Draw the first rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (y1[i] < middle_y1) + R_DrawSkyColumn(x + i, y1[i], middle_y1, 1); + } + + // Draw the area where all 4 columns are active + R_DrawSkyColumn(x, middle_y1, middle_y2, 4); + + // Draw the last rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (middle_y2 < y2[i]) + R_DrawSkyColumn(x + i, middle_y2, y2[i], 1); + } + } + + // The last unaligned columns: + for (int x = aligned_x2; x < x2; x++) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + R_DrawSkyColumn(x, y1, y2, 1); + } +} + static void R_DrawSky (visplane_t *pl) { + if (r_swtruecolor) + { + R_DrawTruecolorSky(pl); + return; + } + int x; float swal; diff --git a/src/r_sky.cpp b/src/r_sky.cpp index 9ea44db481..cf3f37367f 100644 --- a/src/r_sky.cpp +++ b/src/r_sky.cpp @@ -49,7 +49,7 @@ fixed_t sky1cyl, sky2cyl; double sky1pos, sky2pos; // [RH] Stretch sky texture if not taller than 128 pixels? -CUSTOM_CVAR (Bool, r_stretchsky, true, CVAR_ARCHIVE) +CUSTOM_CVAR (Bool, r_stretchsky, false, CVAR_ARCHIVE) { R_InitSkyMap (); } From b3e4ba13ba4e0a96212d4c5f57a0e2b16765cee5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 15 Oct 2016 21:53:45 +0200 Subject: [PATCH 201/912] Use GetSkyCapColor to pick the sky solid color --- src/r_plane.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 8f2a7f6308..4e18815ba9 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1033,9 +1033,8 @@ static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) dc_dest = (ylookup[y1] + start_x) * 4 + dc_destorg; dc_count = y2 - y1; - // To do: figure out how GZDoom calculates the solid top and bottom colors - uint32_t solid_top = 0xff7f7f7f; - uint32_t solid_bottom = 0xff3f3f3f; + uint32_t solid_top = frontskytex->GetSkyCapColor(false); + uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); if (columns == 4) if (!backskytex) From 1711f13eeb642a1080524a6b29fa2ddb27489b5c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 16 Oct 2016 13:08:06 +0200 Subject: [PATCH 202/912] Fix off-by-one bug --- src/r_compiler/fixedfunction/drawskycodegen.cpp | 5 +++-- src/r_compiler/fixedfunction/drawskycodegen.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawskycodegen.cpp b/src/r_compiler/fixedfunction/drawskycodegen.cpp index f2ba148fdd..f96d9fc1d5 100644 --- a/src/r_compiler/fixedfunction/drawskycodegen.cpp +++ b/src/r_compiler/fixedfunction/drawskycodegen.cpp @@ -34,7 +34,8 @@ void DrawSkyCodegen::Generate(DrawSkyVariant variant, bool fourColumns, SSAValue iscale[2] = args[0][18].load(true); iscale[3] = args[0][19].load(true); textureheight0 = args[0][20].load(true); - textureheight1 = args[0][21].load(true); + SSAInt textureheight1 = args[0][21].load(true); + maxtextureheight1 = textureheight1 - 1; top_color = SSAVec4i::unpack(args[0][22].load(true)); bottom_color = SSAVec4i::unpack(args[0][23].load(true)); @@ -105,7 +106,7 @@ SSAVec4i DrawSkyCodegen::Sample(SSAInt frac, int index, DrawSkyVariant variant) } else { - SSAInt sample_index2 = SSAInt::MIN(sample_index, textureheight1); + SSAInt sample_index2 = SSAInt::MIN(sample_index, maxtextureheight1); SSAVec4i color0 = source0[index][sample_index * 4].load_vec4ub(false); SSAVec4i color1 = source1[index][sample_index2 * 4].load_vec4ub(false); return blend_alpha_blend(color0, color1); diff --git a/src/r_compiler/fixedfunction/drawskycodegen.h b/src/r_compiler/fixedfunction/drawskycodegen.h index 2b90b2a9d0..a02f8dc2be 100644 --- a/src/r_compiler/fixedfunction/drawskycodegen.h +++ b/src/r_compiler/fixedfunction/drawskycodegen.h @@ -30,7 +30,7 @@ private: SSAInt texturefrac[4]; SSAInt iscale[4]; SSAInt textureheight0; - SSAInt textureheight1; + SSAInt maxtextureheight1; SSAVec4i top_color; SSAVec4i bottom_color; SSAWorkerThread thread; From 42138a6ab9f40d269a5b424e9732eb3c53df3832 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 16 Oct 2016 13:08:52 +0200 Subject: [PATCH 203/912] Fix hexen sky stretching --- src/r_plane.cpp | 63 +++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 4e18815ba9..cbe1a358f1 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -64,6 +64,8 @@ #pragma warning(disable:4244) #endif +CVAR(Bool, r_capsky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + //EXTERN_CVAR (Int, tx) //EXTERN_CVAR (Int, ty) @@ -985,25 +987,20 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) } } -static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) +static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) { uint32_t height = frontskytex->GetHeight(); for (int i = 0; i < columns; i++) { + double uv_stepd = skyiscale * yrepeat; + double v = (texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height + 1.0f; + double v_step = uv_stepd / height; + + uint32_t uv_pos = (uint32_t)(v * 0x01000000); + uint32_t uv_step = (uint32_t)(v_step * 0x01000000); + int x = start_x + i; - - int uv_fracbits = 24 - frontskytex->HeightBits; - double uv_stepd = skyiscale * frontskytex->Scale.Y; - double v = (skymid * frontskytex->Scale.Y + uv_stepd * (y1 - CenterY + 0.5)) / height; - v = v + 1.0f; - v *= height; - v *= (1 << uv_fracbits); - uint32_t uv_pos = (uint32_t)v; - uint32_t uv_step = xs_ToFixed(uv_fracbits, uv_stepd); - if (uv_step == 0) // To prevent divide by zero elsewhere - uv_step = 1; - if (MirrorFlags & RF_XFLIP) x = (viewwidth - x); @@ -1023,7 +1020,6 @@ static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) bufplce[i] = (const BYTE *)frontskytex->GetColumnBgra(angle1, nullptr); bufplce2[i] = backskytex ? (const BYTE *)backskytex->GetColumnBgra(angle2, nullptr) : nullptr; - buftexturefracx[i] = 0; vince[i] = uv_step; vplce[i] = uv_pos; } @@ -1048,18 +1044,30 @@ static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) R_DrawDoubleSkyCol1(solid_top, solid_bottom); } -static void R_DrawTruecolorSky(visplane_t *pl) +static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) { - R_SetColorMapLight(fixedcolormap, 0, 0); - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - palookuplight[0] = 0; - palookuplight[1] = 0; - palookuplight[2] = 0; - palookuplight[3] = 0; - setupvline(FRACBITS); + if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) + { + double texturemid = skymid * frontskytex->Scale.Y; + R_DrawSkyColumnStripe(start_x, y1, y2, columns, frontskytex->Scale.Y, texturemid, frontskytex->Scale.Y); + } + else + { + double yrepeat = frontskytex->Scale.Y; + double scale = frontskytex->Scale.Y * skyscale; + double iscale = 1 / scale; + short drawheight = short(frontskytex->GetHeight() * scale); + double topfrac = fmod(skymid + iscale * (1 - CenterY), frontskytex->GetHeight()); + if (topfrac < 0) topfrac += frontskytex->GetHeight(); + double texturemid = topfrac - iscale * (1 - CenterY); + R_DrawSkyColumnStripe(start_x, y1, y2, columns, scale, texturemid, yrepeat); + } +} + +static void R_DrawCapSky(visplane_t *pl) +{ + if (!r_swtruecolor) + R_SetColorMapLight(fixedcolormap, 0, 0); int x1 = pl->left; int x2 = pl->right; @@ -1104,7 +1112,6 @@ static void R_DrawTruecolorSky(visplane_t *pl) if (y2[i] <= y1[i]) empty_column_in_set = true; } - if (empty_column_in_set || middle_y2 <= middle_y1) { for (int i = 0; i < 4; i++) @@ -1149,9 +1156,9 @@ static void R_DrawTruecolorSky(visplane_t *pl) static void R_DrawSky (visplane_t *pl) { - if (r_swtruecolor) + if (r_swtruecolor && r_capsky) { - R_DrawTruecolorSky(pl); + R_DrawCapSky(pl); return; } From 204c037017d2ed73e3550ffda484c7bc03db9179 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 16 Oct 2016 15:41:47 +0200 Subject: [PATCH 204/912] Fix blending modes --- .../fixedfunction/drawcolumncodegen.cpp | 19 +++++++++++++------ .../fixedfunction/drawercodegen.cpp | 3 ++- .../fixedfunction/drawwallcodegen.cpp | 6 +++--- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index df35fd53c8..71cdf8195c 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -168,6 +168,7 @@ bool DrawColumnCodegen::IsPaletteInput(DrawColumnVariant variant) SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade) { SSAInt alpha, inv_alpha; + SSAVec4i fg; switch (variant) { default: @@ -177,24 +178,30 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, return blend_copy(Shade(Sample(sample_index), isSimpleShade)); case DrawColumnVariant::DrawAdd: case DrawColumnVariant::DrawAddClamp: - return blend_add(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + fg = Shade(Sample(sample_index), isSimpleShade); + return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawShaded: alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; inv_alpha = 256 - alpha; return blend_add(color, bgcolor, alpha, inv_alpha); case DrawColumnVariant::DrawSubClamp: - return blend_sub(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + fg = Shade(Sample(sample_index), isSimpleShade); + return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawRevSubClamp: - return blend_revsub(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + fg = Shade(Sample(sample_index), isSimpleShade); + return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawTranslated: return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade)); case DrawColumnVariant::DrawTlatedAdd: case DrawColumnVariant::DrawAddClampTranslated: - return blend_add(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + fg = Shade(TranslateSample(sample_index), isSimpleShade); + return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawSubClampTranslated: - return blend_sub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + fg = Shade(TranslateSample(sample_index), isSimpleShade); + return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawRevSubClampTranslated: - return blend_revsub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + fg = Shade(TranslateSample(sample_index), isSimpleShade); + return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::Fill: return blend_copy(color); case DrawColumnVariant::FillAdd: diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp index 3916c29ab0..a4009c1f7e 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -55,8 +55,9 @@ SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, con SSAVec4i DrawerCodegen::shade_bgra_simple(SSAVec4i color, SSAInt light) { + SSAInt alpha = color[3]; color = color * light / 256; - return color.insert(3, 255); + return color.insert(3, alpha); } SSAVec4i DrawerCodegen::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 56d99e78ee..b820274efe 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -182,10 +182,10 @@ SSAVec4i DrawWallCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant varian return blend_alpha_blend(fg, bg); case DrawWallVariant::Add: case DrawWallVariant::AddClamp: - return blend_add(fg, bg, srcalpha, destalpha); + return blend_add(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawWallVariant::SubClamp: - return blend_sub(fg, bg, srcalpha, destalpha); + return blend_sub(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawWallVariant::RevSubClamp: - return blend_revsub(fg, bg, srcalpha, destalpha); + return blend_revsub(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); } } From 49e890f212fbf3299549ec521e7dfd5c3e9176c8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 16 Oct 2016 15:48:34 +0200 Subject: [PATCH 205/912] Fix hexen sky cap offset --- src/r_plane.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index cbe1a358f1..5dea6a03ad 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -994,7 +994,7 @@ static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, doub for (int i = 0; i < columns; i++) { double uv_stepd = skyiscale * yrepeat; - double v = (texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height + 1.0f; + double v = (texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; double v_step = uv_stepd / height; uint32_t uv_pos = (uint32_t)(v * 0x01000000); @@ -1048,7 +1048,7 @@ static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) { if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) { - double texturemid = skymid * frontskytex->Scale.Y; + double texturemid = skymid * frontskytex->Scale.Y + frontskytex->GetHeight(); R_DrawSkyColumnStripe(start_x, y1, y2, columns, frontskytex->Scale.Y, texturemid, frontskytex->Scale.Y); } else From 2534e80a1915422042c2c2a0338f1d33d41857a5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 16 Oct 2016 16:14:54 +0200 Subject: [PATCH 206/912] Fix vid_hw2d 0 rendering glitch --- src/r_draw.cpp | 29 +++++++++++++++++++---------- src/v_draw.cpp | 25 +++++++++++++++++++------ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 03515c8a11..6b68cf81bc 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2689,16 +2689,19 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, alpha = clamp (alpha, 0, OPAQUE); } - dc_translation = NULL; - if (translation != 0) + if (translation != -1) { - FRemapTable *table = TranslationToTable(translation); - if (table != NULL && !table->Inactive) + dc_translation = NULL; + if (translation != 0) { - if (r_swtruecolor) - dc_translation = (BYTE*)table->Palette; - else - dc_translation = table->Remap; + FRemapTable *table = TranslationToTable(translation); + if (table != NULL && !table->Inactive) + { + if (r_swtruecolor) + dc_translation = (BYTE*)table->Palette; + else + dc_translation = table->Remap; + } } } basecolormapsave = basecolormap; @@ -2801,10 +2804,11 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) void R_SetTranslationMap(lighttable_t *translation) { - dc_fcolormap = nullptr; - dc_colormap = translation; if (r_swtruecolor) { + dc_fcolormap = nullptr; + dc_colormap = nullptr; + dc_translation = translation; dc_shade_constants.light_red = 256; dc_shade_constants.light_green = 256; dc_shade_constants.light_blue = 256; @@ -2817,6 +2821,11 @@ void R_SetTranslationMap(lighttable_t *translation) dc_shade_constants.simple_shade = true; dc_light = 0; } + else + { + dc_fcolormap = nullptr; + dc_colormap = translation; + } } void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade) diff --git a/src/v_draw.cpp b/src/v_draw.cpp index a89a0865d2..9218962963 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -169,13 +169,19 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) parms.colorOverlay = PalEntry(parms.colorOverlay).InverseColor(); } // Note that this overrides DTA_Translation in software, but not in hardware. - FDynamicColormap *colormap = GetSpecialLights(MAKERGB(255,255,255), - parms.colorOverlay & MAKEARGB(0,255,255,255), 0); - translation = &colormap->Maps[(APART(parms.colorOverlay)*NUMCOLORMAPS/255)*256]; + if (!r_swtruecolor) + { + FDynamicColormap *colormap = GetSpecialLights(MAKERGB(255, 255, 255), + parms.colorOverlay & MAKEARGB(0, 255, 255, 255), 0); + translation = &colormap->Maps[(APART(parms.colorOverlay)*NUMCOLORMAPS / 255) * 256]; + } } else if (parms.remap != NULL) { - translation = parms.remap->Remap; + if (r_swtruecolor) + translation = (const BYTE*)parms.remap->Palette; + else + translation = parms.remap->Remap; } if (translation != NULL) @@ -184,11 +190,18 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) } else { - R_SetTranslationMap(identitymap); + if (r_swtruecolor) + R_SetTranslationMap(nullptr); + else + R_SetTranslationMap(identitymap); } fixedcolormap = dc_fcolormap; - ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); + ESPSResult mode; + if (r_swtruecolor) + mode = R_SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor); + else + mode = R_SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor); BYTE *destorgsave = dc_destorg; dc_destorg = screen->GetBuffer(); From f81d0d3964b429c96fea5bba427a828ec1479f18 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 16 Oct 2016 22:40:08 +0200 Subject: [PATCH 207/912] macOS support and Intel driver bug fixes --- src/gl/system/gl_swframebuffer.cpp | 181 ++++++++++++++----------- src/gl/system/gl_swframebuffer.h | 20 +-- src/gl/system/gl_swwipe.cpp | 4 +- src/posix/cocoa/i_video.mm | 6 +- wadsrc/static/shaders/glsl/swshader.fp | 6 +- 5 files changed, 120 insertions(+), 97 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index cffee9b915..affcb86f9c 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -75,46 +75,62 @@ CVAR(Int, gl_showpacks, 0, 0) #ifndef WIN32 // Defined in fb_d3d9 for Windows CVAR(Bool, vid_hwaalines, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) +CUSTOM_CVAR(Bool, vid_hw2d, true, CVAR_NOINITCALL) +{ + V_SetBorderNeedRefresh(); + ST_SetNeedRefresh(); +} #else EXTERN_CVAR(Bool, vid_hwaalines) +EXTERN_CVAR(Bool, vid_hw2d) #endif -EXTERN_CVAR(Bool, vid_hw2d) EXTERN_CVAR(Bool, fullscreen) EXTERN_CVAR(Float, Gamma) EXTERN_CVAR(Bool, vid_vsync) EXTERN_CVAR(Float, transsouls) EXTERN_CVAR(Int, vid_refreshrate) +#ifdef WIN32 extern cycle_t BlitCycles; +#endif void gl_LoadExtensions(); +void gl_PrintStartupLog(); + +#ifndef WIN32 +// This has to be in this file because system headers conflict Doom headers +DFrameBuffer *CreateGLSWFrameBuffer(int width, int height, bool fullscreen) +{ + return new OpenGLSWFrameBuffer(NULL, width, height, 32, 60, fullscreen); +} +#endif IMPLEMENT_CLASS(OpenGLSWFrameBuffer) const char *const OpenGLSWFrameBuffer::ShaderDefines[OpenGLSWFrameBuffer::NUM_SHADERS] = { - "#define ENORMALCOLOR\n#define PALTEX 0\n#define DINVERT 0", // NormalColor - "#define ENORMALCOLOR\n#define PALTEX 1\n#define INVERT 0", // NormalColorPal - "#define ENORMALCOLOR\n#define PALTEX 0\n#define INVERT 1", // NormalColorInv - "#define ENORMALCOLOR\n#define PALTEX 1\n#define INVERT 1", // NormalColorPalInv + "#define ENORMALCOLOR", // NormalColor + "#define ENORMALCOLOR\n#define PALTEX", // NormalColorPal + "#define ENORMALCOLOR\n#define INVERT", // NormalColorInv + "#define ENORMALCOLOR\n#define PALTEX\n#define INVERT", // NormalColorPalInv - "#define EREDTOALPHA\n#define INVERT 0", // RedToAlpha - "#define EREDTOALPHA\n#define INVERT 1", // RedToAlphaInv + "#define EREDTOALPHA", // RedToAlpha + "#define EREDTOALPHA\n#define INVERT", // RedToAlphaInv "#define EVERTEXCOLOR", // VertexColor - "#define ESPECIALCOLORMAP\n#define PALTEX 0\n#define INVERT 0", // SpecialColormap - "#define ESPECIALCOLORMAP\n#define PALTEX 1\n#define INVERT 0", // SpecialColorMapPal + "#define ESPECIALCOLORMAP\n", // SpecialColormap + "#define ESPECIALCOLORMAP\n#define PALTEX", // SpecialColorMapPal - "#define EINGAMECOLORMAP\n#define PALTEX 0\n#define INVERT 0\n#define DESAT 0", // InGameColormap - "#define EINGAMECOLORMAP\n#define PALTEX 0\n#define INVERT 0\n#define DESAT 1", // InGameColormapDesat - "#define EINGAMECOLORMAP\n#define PALTEX 0\n#define INVERT 1\n#define DESAT 0", // InGameColormapInv - "#define EINGAMECOLORMAP\n#define PALTEX 0\n#define INVERT 1\n#define DESAT 1", // InGameColormapInvDesat - "#define EINGAMECOLORMAP\n#define PALTEX 1\n#define INVERT 0\n#define DESAT 0", // InGameColormapPal - "#define EINGAMECOLORMAP\n#define PALTEX 1\n#define INVERT 0\n#define DESAT 1", // InGameColormapPalDesat - "#define EINGAMECOLORMAP\n#define PALTEX 1\n#define INVERT 1\n#define DESAT 0", // InGameColormapPalInv - "#define EINGAMECOLORMAP\n#define PALTEX 1\n#define INVERT 1\n#define DESAT 1", // InGameColormapPalInvDesat + "#define EINGAMECOLORMAP", // InGameColormap + "#define EINGAMECOLORMAP\n#define DESAT", // InGameColormapDesat + "#define EINGAMECOLORMAP\n#define INVERT", // InGameColormapInv + "#define EINGAMECOLORMAP\n#define INVERT\n#define DESAT", // InGameColormapInvDesat + "#define EINGAMECOLORMAP\n#define PALTEX\n", // InGameColormapPal + "#define EINGAMECOLORMAP\n#define PALTEX\n#define DESAT", // InGameColormapPalDesat + "#define EINGAMECOLORMAP\n#define PALTEX\n#define INVERT", // InGameColormapPalInv + "#define EINGAMECOLORMAP\n#define PALTEX\n#define INVERT\n#define DESAT", // InGameColormapPalInvDesat "#define EBURNWIPE", // BurnWipe "#define EGAMMACORRECTION", // GammaCorrection @@ -131,6 +147,11 @@ OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, } gl_LoadExtensions(); InitializeState(); + if (first) + { + gl_PrintStartupLog(); + first = false; + } // SetVSync needs to be at the very top to workaround a bug in Nvidia's OpenGL driver. // If wglSwapIntervalEXT is called after glBindFramebuffer in a frame the setting is not changed! @@ -197,6 +218,19 @@ OpenGLSWFrameBuffer::~OpenGLSWFrameBuffer() delete[] QuadExtra; } +void *OpenGLSWFrameBuffer::MapBuffer(int target, int size) +{ + if (glMapBufferRange) + { + return (FBVERTEX*)glMapBufferRange(target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + } + else + { + glBufferData(target, size, nullptr, GL_STREAM_DRAW); + return glMapBuffer(target, GL_WRITE_ONLY); + } +} + OpenGLSWFrameBuffer::HWFrameBuffer::~HWFrameBuffer() { if (Framebuffer != 0) glDeleteFramebuffers(1, (GLuint*)&Framebuffer); @@ -218,7 +252,7 @@ OpenGLSWFrameBuffer::HWVertexBuffer::~HWVertexBuffer() OpenGLSWFrameBuffer::FBVERTEX *OpenGLSWFrameBuffer::HWVertexBuffer::Lock() { glBindBuffer(GL_ARRAY_BUFFER, Buffer); - return (FBVERTEX*)glMapBufferRange(GL_ARRAY_BUFFER, 0, Size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + return (FBVERTEX*)MapBuffer(GL_ARRAY_BUFFER, Size); } void OpenGLSWFrameBuffer::HWVertexBuffer::Unlock() @@ -236,7 +270,7 @@ uint16_t *OpenGLSWFrameBuffer::HWIndexBuffer::Lock() { glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING, &LockedOldBinding); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, Buffer); - return (uint16_t*)glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER, 0, Size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + return (uint16_t*)MapBuffer(GL_ELEMENT_ARRAY_BUFFER, Size); } void OpenGLSWFrameBuffer::HWIndexBuffer::Unlock() @@ -281,6 +315,7 @@ bool OpenGLSWFrameBuffer::CreateFrameBuffer(const FString &name, int width, int if (result != GL_FRAMEBUFFER_COMPLETE) { + //Printf("Framebuffer is not complete"); outFramebuffer = nullptr; return false; } @@ -296,9 +331,15 @@ bool OpenGLSWFrameBuffer::CreatePixelShader(FString vertexsrc, FString fragments shader->Program = glCreateProgram(); shader->VertexShader = glCreateShader(GL_VERTEX_SHADER); shader->FragmentShader = glCreateShader(GL_FRAGMENT_SHADER); - - vertexsrc = "#version 130\n" + defines + "\n#line 0\n" + vertexsrc; - fragmentsrc = "#version 130\n" + defines + "\n#line 0\n" + fragmentsrc; + + int maxGlslVersion = 330; + int shaderVersion = MIN((int)round(gl.glslversion * 10) * 10, maxGlslVersion); + + FString prefix; + prefix.AppendFormat("#version %d\n%s\n#line 0\n", shaderVersion, defines.GetChars()); + + vertexsrc = prefix + vertexsrc; + fragmentsrc = prefix + fragmentsrc; { int lengths[1] = { (int)vertexsrc.Len() }; @@ -320,10 +361,11 @@ bool OpenGLSWFrameBuffer::CreatePixelShader(FString vertexsrc, FString fragments if (status != GL_FALSE) { errorShader = shader->FragmentShader; glGetShaderiv(shader->FragmentShader, GL_COMPILE_STATUS, &status); } if (status == GL_FALSE) { - /*static char buffer[10000]; + static char buffer[10000]; GLsizei length = 0; buffer[0] = 0; - glGetShaderInfoLog(errorShader, 10000, &length, buffer);*/ + glGetShaderInfoLog(errorShader, 10000, &length, buffer); + //Printf("Shader compile failed: %s", buffer); *outShader = nullptr; return false; @@ -332,17 +374,23 @@ bool OpenGLSWFrameBuffer::CreatePixelShader(FString vertexsrc, FString fragments glAttachShader(shader->Program, shader->VertexShader); glAttachShader(shader->Program, shader->FragmentShader); glBindFragDataLocation(shader->Program, 0, "FragColor"); - glLinkProgram(shader->Program); - glGetProgramiv(shader->Program, GL_LINK_STATUS, &status); - if (status == GL_FALSE) - { - *outShader = nullptr; - return false; - } glBindAttribLocation(shader->Program, 0, "AttrPosition"); glBindAttribLocation(shader->Program, 1, "AttrColor0"); glBindAttribLocation(shader->Program, 2, "AttrColor1"); glBindAttribLocation(shader->Program, 3, "AttrTexCoord0"); + glLinkProgram(shader->Program); + glGetProgramiv(shader->Program, GL_LINK_STATUS, &status); + if (status == GL_FALSE) + { + static char buffer[10000]; + GLsizei length = 0; + buffer[0] = 0; + glGetProgramInfoLog(shader->Program, 10000, &length, buffer); + //Printf("Shader compile failed: %s", buffer); + + *outShader = nullptr; + return false; + } shader->ConstantLocations[PSCONST_Desaturation] = glGetUniformLocation(shader->Program, "Desaturation"); shader->ConstantLocations[PSCONST_PaletteMod] = glGetUniformLocation(shader->Program, "PaletteMod"); @@ -767,8 +815,6 @@ bool OpenGLSWFrameBuffer::LoadShaders() FString shaderdir, shaderpath; unsigned int i; - // We determine the best available model simply by trying them all in - // order of decreasing preference. for (i = 0; i < NUM_SHADERS; ++i) { shaderpath = shaderdir; @@ -804,7 +850,9 @@ bool OpenGLSWFrameBuffer::LoadShaders() void OpenGLSWFrameBuffer::ReleaseResources() { +#ifdef WIN32 I_SaveWindowedPos(); +#endif KillNativeTexs(); KillNativePals(); ReleaseDefaultPoolItems(); @@ -1011,27 +1059,6 @@ int OpenGLSWFrameBuffer::GetPageCount() return 1; } -//========================================================================== -// -// OpenGLSWFrameBuffer :: PaletteChanged -// -//========================================================================== - -void OpenGLSWFrameBuffer::PaletteChanged() -{ -} - -//========================================================================== -// -// OpenGLSWFrameBuffer :: QueryNewPalette -// -//========================================================================== - -int OpenGLSWFrameBuffer::QueryNewPalette() -{ - return 0; -} - //========================================================================== // // OpenGLSWFrameBuffer :: IsValid @@ -1043,17 +1070,6 @@ bool OpenGLSWFrameBuffer::IsValid() return true; } -//========================================================================== -// -// OpenGLSWFrameBuffer :: IsFullscreen -// -//========================================================================== - -bool OpenGLSWFrameBuffer::IsFullscreen() -{ - return !Windowed; -} - //========================================================================== // // OpenGLSWFrameBuffer :: Lock @@ -1080,8 +1096,6 @@ bool OpenGLSWFrameBuffer::Lock(bool buffered) void OpenGLSWFrameBuffer::Unlock() { - LOG1("Unlock <%d>\n", LockCount); - if (LockCount == 0) { return; @@ -1146,7 +1160,7 @@ void OpenGLSWFrameBuffer::Update() NeedGammaUpdate = false; igamma = 1 / Gamma; - if (!Windowed) + if (IsFullscreen()) { GammaRamp ramp; @@ -1154,7 +1168,6 @@ void OpenGLSWFrameBuffer::Update() { ramp.blue[i] = ramp.green[i] = ramp.red[i] = uint16_t(65535.f * powf(i / 255.f, igamma)); } - LOG("SetGammaRamp\n"); SetGammaRamp(&ramp); } psgamma[2] = psgamma[1] = psgamma[0] = igamma; @@ -1168,8 +1181,10 @@ void OpenGLSWFrameBuffer::Update() NeedPalUpdate = false; } +#ifdef WIN32 BlitCycles.Reset(); BlitCycles.Clock(); +#endif LockCount = 0; Draw3DPart(In2D <= 1); @@ -1178,8 +1193,10 @@ void OpenGLSWFrameBuffer::Update() Flip(); } +#ifdef WIN32 BlitCycles.Unclock(); //LOG1 ("cycles = %d\n", BlitCycles); +#endif Buffer = nullptr; UpdatePending = false; @@ -1198,7 +1215,7 @@ void OpenGLSWFrameBuffer::Flip() Present(); InScene = false; - if (Windowed) + if (!IsFullscreen()) { int clientWidth = GetClientWidth(); int clientHeight = GetClientHeight(); @@ -1221,6 +1238,8 @@ void OpenGLSWFrameBuffer::Flip() // //========================================================================== +#ifdef WIN32 + bool OpenGLSWFrameBuffer::PaintToWindow() { if (LockCount != 0) @@ -1231,6 +1250,8 @@ bool OpenGLSWFrameBuffer::PaintToWindow() return true; } +#endif + //========================================================================== // // OpenGLSWFrameBuffer :: Draw3DPart @@ -1257,7 +1278,7 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) FBTexture->CurrentBuffer = (FBTexture->CurrentBuffer + 1) & 1; } - uint8_t *dest = (uint8_t*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, Width * Height, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + uint8_t *dest = (uint8_t*)MapBuffer(GL_PIXEL_UNPACK_BUFFER, Width * Height); if (dest) { if (Pitch == Width) @@ -1382,7 +1403,7 @@ void OpenGLSWFrameBuffer::UploadPalette() PaletteTexture->CurrentBuffer = (PaletteTexture->CurrentBuffer + 1) & 1; } - uint8_t *pix = (uint8_t*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, 256 * 4, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + uint8_t *pix = (uint8_t*)MapBuffer(GL_PIXEL_UNPACK_BUFFER, 256 * 4); if (pix) { int i; @@ -1427,7 +1448,6 @@ void OpenGLSWFrameBuffer::UpdatePalette() bool OpenGLSWFrameBuffer::SetGamma(float gamma) { - LOG1("SetGamma %g\n", gamma); Gamma = gamma; NeedGammaUpdate = true; return true; @@ -1473,16 +1493,12 @@ void OpenGLSWFrameBuffer::SetVSync(bool vsync) void OpenGLSWFrameBuffer::NewRefreshRate() { - if (!Windowed) + if (IsFullscreen()) { Reset(); } } -void OpenGLSWFrameBuffer::Blank() -{ -} - void OpenGLSWFrameBuffer::SetBlendingRect(int x1, int y1, int x2, int y2) { BlendingRect.left = x1; @@ -2013,7 +2029,7 @@ bool OpenGLSWFrameBuffer::OpenGLTex::Update() Box->Owner->Tex->CurrentBuffer = (Box->Owner->Tex->CurrentBuffer + 1) & 1; int pitch = (rect.right - rect.left) * bytesPerPixel; - uint8_t *bits = (uint8_t *)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, buffersize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + uint8_t *bits = (uint8_t *)MapBuffer(GL_PIXEL_UNPACK_BUFFER, buffersize); dest = bits; if (!dest) { @@ -2062,7 +2078,10 @@ bool OpenGLSWFrameBuffer::OpenGLTex::Update() GLint oldBinding = 0; glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); glBindTexture(GL_TEXTURE_2D, Box->Owner->Tex->Texture); - glTexSubImage2D(GL_TEXTURE_2D, 0, rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, format == GL_RGBA8 ? GL_BGRA : GL_RED, GL_UNSIGNED_BYTE, 0); + if (format == GL_RGBA8) + glTexSubImage2D(GL_TEXTURE_2D, 0, rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, GL_BGRA, GL_UNSIGNED_BYTE, 0); + else + glTexSubImage2D(GL_TEXTURE_2D, 0, rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, GL_RED, GL_UNSIGNED_BYTE, 0); glBindTexture(GL_TEXTURE_2D, oldBinding); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); return true; @@ -2215,7 +2234,7 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update() Tex->CurrentBuffer = (Tex->CurrentBuffer + 1) & 1; } - buff = (uint32_t *)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, Remap->NumEntries * 4, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + buff = (uint32_t *)MapBuffer(GL_PIXEL_UNPACK_BUFFER, Remap->NumEntries * 4); if (buff == nullptr) { return false; diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index 5876c34c6d..51c7ae134c 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -19,7 +19,7 @@ class OpenGLSWFrameBuffer : public Win32GLFrameBuffer DECLARE_CLASS(OpenGLSWFrameBuffer, Win32GLFrameBuffer) #else #include "sdlglvideo.h" -class OpenGLFrameBuffer : public SDLGLFB +class OpenGLSWFrameBuffer : public SDLGLFB { // typedef SDLGLFB Super; //[C]commented, DECLARE_CLASS defines this in linux DECLARE_CLASS(OpenGLSWFrameBuffer, SDLGLFB) @@ -32,6 +32,7 @@ public: OpenGLSWFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen); ~OpenGLSWFrameBuffer(); + bool IsValid() override; bool Lock(bool buffered) override; void Unlock() override; @@ -43,11 +44,6 @@ public: bool SetFlash(PalEntry rgb, int amount) override; void GetFlash(PalEntry &rgb, int &amount) override; int GetPageCount() override; - bool IsFullscreen() override; - void PaletteChanged() override; - int QueryNewPalette() override; - void Blank() override; - bool PaintToWindow() override; void SetVSync(bool vsync) override; void NewRefreshRate() override; void GetScreenshotBuffer(const uint8_t *&buffer, int &pitch, ESSType &color_type) override; @@ -68,8 +64,15 @@ public: void WipeEndScreen() override; bool WipeDo(int ticks) override; void WipeCleanup() override; + +#ifdef WIN32 + void PaletteChanged() override { } + int QueryNewPalette() override { return 0; } + void Blank() override { } + bool PaintToWindow() override; bool Is8BitMode() override { return false; } int GetTrueHeight() override { return TrueHeight; } +#endif private: struct FBVERTEX @@ -195,6 +198,8 @@ private: static uint32_t ColorRGBA(uint32_t r, uint32_t g, uint32_t b, uint32_t a) { return ColorARGB(a, r, g, b); } static uint32_t ColorXRGB(uint32_t r, uint32_t g, uint32_t b) { return ColorARGB(0xff, r, g, b); } static uint32_t ColorValue(float r, float g, float b, float a) { return ColorRGBA((uint32_t)(r * 255.0f), (uint32_t)(g * 255.0f), (uint32_t)(b * 255.0f), (uint32_t)(a * 255.0f)); } + + static void *MapBuffer(int target, int size); // The number of points for the vertex buffer. enum { NUM_VERTS = 10240 }; @@ -310,9 +315,6 @@ private: BQS_InGameColormap, }; - struct PackedTexture; - struct Atlas; - struct BufferedTris { uint8_t Flags; diff --git a/src/gl/system/gl_swwipe.cpp b/src/gl/system/gl_swwipe.cpp index 148a087c5a..44c2203b49 100644 --- a/src/gl/system/gl_swwipe.cpp +++ b/src/gl/system/gl_swwipe.cpp @@ -383,8 +383,8 @@ bool OpenGLSWFrameBuffer::Wiper_Melt::Run(int ticks, OpenGLSWFrameBuffer *fb) } if (ticks == 0) { // Only draw for the final tick. - RECT rect; - POINT dpt; + LTRBRect rect; + struct Point { int x, y; } dpt; dpt.x = i * fbwidth / WIDTH; dpt.y = MAX(0, y[i] * fbheight / HEIGHT); diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 08a563b257..6c61b11480 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -101,6 +101,7 @@ @end +DFrameBuffer *CreateGLSWFrameBuffer(int width, int height, bool fullscreen); EXTERN_CVAR(Bool, ticker ) EXTERN_CVAR(Bool, vid_vsync) @@ -483,7 +484,7 @@ NSOpenGLPixelFormat* CreatePixelFormat(const OpenGLProfile profile) attributes[i++] = NSOpenGLPFAAllowOfflineRenderers; } - if (NSAppKitVersionNumber >= AppKit10_7 && OpenGLProfile::Core == profile && 1 == vid_renderer) + if (NSAppKitVersionNumber >= AppKit10_7 && OpenGLProfile::Core == profile) { NSOpenGLPixelFormatAttribute profile = NSOpenGLProfileVersion3_2Core; const char* const glversion = Args->CheckValue("-glversion"); @@ -622,7 +623,8 @@ DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, c } else { - fb = new CocoaFrameBuffer(width, height, fullscreen); + //fb = new CocoaFrameBuffer(width, height, fullscreen); + fb = CreateGLSWFrameBuffer(width, height, fullscreen); } fb->SetFlash(flashColor, flashAmount); diff --git a/wadsrc/static/shaders/glsl/swshader.fp b/wadsrc/static/shaders/glsl/swshader.fp index 639ea92e5e..01d539c88f 100644 --- a/wadsrc/static/shaders/glsl/swshader.fp +++ b/wadsrc/static/shaders/glsl/swshader.fp @@ -17,7 +17,7 @@ uniform vec4 Gamma; vec4 TextureLookup(vec2 tex_coord) { -#if PALTEX +#if defined(PALTEX) float index = texture(Image, tex_coord).x; index = index * PaletteMod.x + PaletteMod.y; return texture(Palette, vec2(index, 0.5)); @@ -28,7 +28,7 @@ vec4 TextureLookup(vec2 tex_coord) vec4 Invert(vec4 rgb) { -#if INVERT +#if defined(INVERT) rgb.rgb = Weights.www - rgb.xyz; #endif return rgb; @@ -90,7 +90,7 @@ vec4 InGameColormap(vec2 tex_coord, vec4 color, vec4 fade) vec4 rgb = SampleTexture(tex_coord); // Desaturate -#if DESAT +#if defined(DESAT) vec3 intensity; intensity.rgb = vec3(Grayscale(rgb) * Desaturation.x); rgb.rgb = intensity.rgb + rgb.rgb * Desaturation.y; From dfed525e18f80a73288e7ba6c6807faa550b0e86 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 16 Oct 2016 23:18:56 +0200 Subject: [PATCH 208/912] macOS warnings and build fix --- src/CMakeLists.txt | 4 ++-- src/r_segs.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a900b6352e..89341cfa7b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -272,8 +272,8 @@ if( NOT NO_OPENAL ) endif() endif() -set( LLVM_COMPONENTS core support asmparser asmprinter bitreader codegen passes ipo - irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld +set( LLVM_COMPONENTS core support asmparser asmprinter bitreader codegen ipo + irreader transformutils instrumentation profiledata runtimedyld object instcombine linker analysis selectiondag scalaropts vectorize executionengine mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index cbff7d6e9b..0f723f9be3 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1142,7 +1142,7 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof } else { - double magnitude = abs(uv_stepd * 2); + double magnitude = fabs(uv_stepd * 2); bool magnifying = magnitude < 1.0f; int mipmap_offset = 0; From d654301bc2dc27f11d75fa99e1546f4162f4f8bd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 17 Oct 2016 00:19:07 +0200 Subject: [PATCH 209/912] macOS compile fixes and support for slightly older versions of LLVM (to allow Mac users to brew install llvm) --- src/r_compiler/fixedfunction/drawercodegen.h | 2 +- src/r_compiler/llvm_include.h | 15 +- src/r_compiler/llvmdrawers.cpp | 24 +- src/r_compiler/ssa/ssa_for_block.cpp | 2 + src/r_compiler/ssa/ssa_pixelformat4f.h | 28 - src/r_compiler/ssa/ssa_pixelformat4ub.h | 28 - .../ssa/ssa_pixelformat4ub_argb_rev.h | 35 -- src/r_compiler/ssa/ssa_pixelformat4ub_rev.h | 28 - src/r_compiler/ssa/ssa_pixels.h | 39 -- src/r_compiler/ssa/ssa_pixeltype.h | 498 ------------------ src/r_compiler/ssa/ssa_scope.cpp | 6 +- src/r_compiler/ssa/ssa_scope.h | 4 +- src/r_compiler/ssa/ssa_stack.h | 4 +- src/r_compiler/ssa/ssa_vec4i.cpp | 11 + 14 files changed, 55 insertions(+), 669 deletions(-) delete mode 100644 src/r_compiler/ssa/ssa_pixelformat4f.h delete mode 100644 src/r_compiler/ssa/ssa_pixelformat4ub.h delete mode 100644 src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h delete mode 100644 src/r_compiler/ssa/ssa_pixelformat4ub_rev.h delete mode 100644 src/r_compiler/ssa/ssa_pixels.h delete mode 100644 src/r_compiler/ssa/ssa_pixeltype.h diff --git a/src/r_compiler/fixedfunction/drawercodegen.h b/src/r_compiler/fixedfunction/drawercodegen.h index 27dc6f21d4..98451e1503 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.h +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -13,8 +13,8 @@ #include "r_compiler/ssa/ssa_ubyte_ptr.h" #include "r_compiler/ssa/ssa_vec4f_ptr.h" #include "r_compiler/ssa/ssa_vec4i_ptr.h" -#include "r_compiler/ssa/ssa_pixels.h" #include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_bool.h" #include "r_compiler/ssa/ssa_barycentric_weight.h" #include "r_compiler/llvm_include.h" diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h index adad2827d9..c51143c585 100644 --- a/src/r_compiler/llvm_include.h +++ b/src/r_compiler/llvm_include.h @@ -25,12 +25,19 @@ #endif +#ifdef __APPLE__ +#define __STDC_LIMIT_MACROS // DataTypes.h:57:3: error: "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h" +#define __STDC_CONSTANT_MACROS // DataTypes.h:61:3: error: "Must #define __STDC_CONSTANT_MACROS before " "#including Support/DataTypes.h" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wredundant-move" +#endif + #include #include #include #include #include -#include +//#include #include #include #include @@ -43,8 +50,14 @@ #include #include #include +#include #include #include +#include + +#ifdef __APPLE__ +#pragma clang diagnostic pop +#endif #ifdef _MSC_VER diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 4d4aeccbe7..cddf6ec9d4 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -263,7 +263,7 @@ void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant vari builder.CreateRetVoid(); if (llvm::verifyFunction(*function.func)) - I_FatalError("verifyFunction failed for " __FUNCTION__); + I_FatalError("verifyFunction failed for CodegenDrawColumn()"); } void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) @@ -281,7 +281,7 @@ void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) builder.CreateRetVoid(); if (llvm::verifyFunction(*function.func)) - I_FatalError("verifyFunction failed for " __FUNCTION__); + I_FatalError("verifyFunction failed for CodegenDrawSpan()"); } void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) @@ -300,7 +300,7 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, builder.CreateRetVoid(); if (llvm::verifyFunction(*function.func)) - I_FatalError("verifyFunction failed for " __FUNCTION__); + I_FatalError("verifyFunction failed for CodegenDrawWall()"); } void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns) @@ -319,7 +319,7 @@ void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, i builder.CreateRetVoid(); if (llvm::verifyFunction(*function.func)) - I_FatalError("verifyFunction failed for " __FUNCTION__); + I_FatalError("verifyFunction failed for CodegenDrawSky()"); } llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) @@ -469,7 +469,11 @@ LLVMProgram::LLVMProgram() I_FatalError("Could not find LLVM target: %s", errorstring.c_str()); TargetOptions opt; +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) + Reloc::Model relocModel = Reloc::Default; +#else auto relocModel = Optional(); +#endif machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::JITDefault, CodeGenOpt::Aggressive); if (!machine) I_FatalError("Could not create LLVM target machine"); @@ -478,7 +482,11 @@ LLVMProgram::LLVMProgram() mModule = std::make_unique("render", context()); mModule->setTargetTriple(targetTriple); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) + mModule->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); +#else mModule->setDataLayout(machine->createDataLayout()); +#endif } @@ -489,8 +497,10 @@ void LLVMProgram::CreateEE() legacy::FunctionPassManager PerFunctionPasses(mModule.get()); legacy::PassManager PerModulePasses; +#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9) PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); +#endif PassManagerBuilder passManagerBuilder; passManagerBuilder.OptLevel = 3; @@ -532,13 +542,17 @@ std::string LLVMProgram::DumpModule() { std::string str; llvm::raw_string_ostream stream(str); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) + mModule->print(stream, nullptr); +#else mModule->print(stream, nullptr, false, true); +#endif return stream.str(); } void *LLVMProgram::PointerToFunction(const char *name) { - return reinterpret_cast(mEngine->getFunctionAddress(name)); + return reinterpret_cast(mEngine->getFunctionAddress(name)); } void LLVMProgram::StopLogFatalErrors() diff --git a/src/r_compiler/ssa/ssa_for_block.cpp b/src/r_compiler/ssa/ssa_for_block.cpp index 0f7e01e198..6039dab1f6 100644 --- a/src/r_compiler/ssa/ssa_for_block.cpp +++ b/src/r_compiler/ssa/ssa_for_block.cpp @@ -16,6 +16,7 @@ SSAForBlock::SSAForBlock() void SSAForBlock::loop_block(SSABool true_condition, int unroll_count) { auto branch = SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block); +#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9) if (unroll_count > 0) { using namespace llvm; @@ -29,6 +30,7 @@ void SSAForBlock::loop_block(SSABool true_condition, int unroll_count) auto md_loop = MDNode::getDistinct(SSAScope::context(), { md_unroll_enable, md_unroll_count }); branch->setMetadata(LLVMContext::MD_loop, md_loop); } +#endif SSAScope::builder().SetInsertPoint(loop_basic_block); } diff --git a/src/r_compiler/ssa/ssa_pixelformat4f.h b/src/r_compiler/ssa/ssa_pixelformat4f.h deleted file mode 100644 index 9cefb517b4..0000000000 --- a/src/r_compiler/ssa/ssa_pixelformat4f.h +++ /dev/null @@ -1,28 +0,0 @@ - -#pragma once - -#include "ssa_int.h" -#include "ssa_float_ptr.h" - -class SSAPixelFormat4f -{ -public: - SSAPixelFormat4f() { } - SSAPixelFormat4f(SSAFloatPtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } - - SSAFloatPtr pixels() { return _pixels; } - SSAFloatPtr pixels() const { return _pixels; } - - SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const - { - return _pixels[index * 4].load_vec4f(constantScopeDomain); - } - - void set4f(SSAInt index, const SSAVec4f &pixel) - { - _pixels[index * 4].store_vec4f(pixel); - } - -protected: - SSAFloatPtr _pixels; -}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub.h b/src/r_compiler/ssa/ssa_pixelformat4ub.h deleted file mode 100644 index 91b04557c4..0000000000 --- a/src/r_compiler/ssa/ssa_pixelformat4ub.h +++ /dev/null @@ -1,28 +0,0 @@ - -#pragma once - -#include "ssa_int.h" -#include "ssa_ubyte_ptr.h" - -class SSAPixelFormat4ub -{ -public: - SSAPixelFormat4ub() { } - SSAPixelFormat4ub(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } - - SSAUBytePtr pixels() { return _pixels; } - SSAUBytePtr pixels() const { return _pixels; } - - SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const - { - return SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f); - } - - void set4f(SSAInt index, const SSAVec4f &pixel) - { - _pixels[index * 4].store_vec4ub(SSAVec4i(pixel * 255.0f)); - } - -private: - SSAUBytePtr _pixels; -}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h deleted file mode 100644 index 1f7e4eb0f4..0000000000 --- a/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h +++ /dev/null @@ -1,35 +0,0 @@ - -#pragma once - -#include "ssa_int.h" -#include "ssa_ubyte_ptr.h" - -class SSAPixelFormat4ub_argb_rev -{ -public: - SSAPixelFormat4ub_argb_rev() { } - SSAPixelFormat4ub_argb_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } - - SSAUBytePtr pixels() { return _pixels; } - SSAUBytePtr pixels() const { return _pixels; } -/* - void get4f(SSAInt index, SSAVec4f &out_pixel1, SSAVec4f &out_pixel2) const - { - SSAVec8s p = _pixels[index * 4].load_vec8s(); - out_pixel1 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendlo(p)) * (1.0f / 255.0f), 2, 1, 0, 3); - out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3); - } -*/ - SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const - { - return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 2, 1, 0, 3); - } - - void set4f(SSAInt index, const SSAVec4f &pixel) - { - _pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 2, 1, 0, 3))); - } - -public: - SSAUBytePtr _pixels; -}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h deleted file mode 100644 index 9b50ec00fe..0000000000 --- a/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h +++ /dev/null @@ -1,28 +0,0 @@ - -#pragma once - -#include "ssa_int.h" -#include "ssa_ubyte_ptr.h" - -class SSAPixelFormat4ub_rev -{ -public: - SSAPixelFormat4ub_rev() { } - SSAPixelFormat4ub_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } - - SSAUBytePtr pixels() { return _pixels; } - SSAUBytePtr pixels() const { return _pixels; } - - SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const - { - return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 3, 2, 1, 0); - } - - void set4f(SSAInt index, const SSAVec4f &pixel) - { - _pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 3, 2, 1, 0))); - } - -public: - SSAUBytePtr _pixels; -}; diff --git a/src/r_compiler/ssa/ssa_pixels.h b/src/r_compiler/ssa/ssa_pixels.h deleted file mode 100644 index a4209d439a..0000000000 --- a/src/r_compiler/ssa/ssa_pixels.h +++ /dev/null @@ -1,39 +0,0 @@ - -#pragma once - -#include "ssa_ubyte.h" -#include "ssa_ubyte_ptr.h" -#include "ssa_float.h" -#include "ssa_float_ptr.h" -#include "ssa_int.h" -#include "ssa_pixeltype.h" -//#include "ssa_pixelformat1f.h" -//#include "ssa_pixelformat2f.h" -//#include "ssa_pixelformat3f.h" -#include "ssa_pixelformat4f.h" -//#include "ssa_pixelformat1ub.h" -//#include "ssa_pixelformat2ub.h" -//#include "ssa_pixelformat3ub.h" -//#include "ssa_pixelformat3ub_rev.h" -#include "ssa_pixelformat4ub.h" -//#include "ssa_pixelformat4ub_argb.h" -#include "ssa_pixelformat4ub_rev.h" -#include "ssa_pixelformat4ub_argb_rev.h" -//#include "ssa_pixelformat4ub_channel.h" - -//typedef SSAPixelType SSAPixels1f; -//typedef SSAPixelType SSAPixels2f; -//typedef SSAPixelType SSAPixels3f; -typedef SSAPixelType SSAPixels4f; - -//typedef SSAPixelType SSAPixels1ub; -//typedef SSAPixelType SSAPixels2ub; -//typedef SSAPixelType SSAPixels3ub; -typedef SSAPixelType SSAPixels4ub; -//typedef SSAPixelType SSAPixels4ub_argb; - -//typedef SSAPixelType SSAPixels3ub_rev; -typedef SSAPixelType SSAPixels4ub_rev; -typedef SSAPixelType SSAPixels4ub_argb_rev; - -//typedef SSAPixelType SSAPixels4ub_channel; diff --git a/src/r_compiler/ssa/ssa_pixeltype.h b/src/r_compiler/ssa/ssa_pixeltype.h deleted file mode 100644 index 8614f171d1..0000000000 --- a/src/r_compiler/ssa/ssa_pixeltype.h +++ /dev/null @@ -1,498 +0,0 @@ - -#pragma once - -#include "ssa_int.h" -#include "ssa_float.h" -#include "ssa_vec4f.h" -#include "ssa_bool.h" -#include "ssa_if_block.h" -#include "ssa_phi.h" - -template -class SSAPixelType : public PixelFormat -{ -public: - SSAPixelType() - { - } - - SSAPixelType(SSAInt width, SSAInt height, PixelType pixels) - : PixelFormat(pixels, width, height), _width(width), _height(height) - { - _width32 = SSAVec4i(_width); - SSAVec4i height32(_height); - _widthps = SSAVec4f(_width32); - _heightps = SSAVec4f(height32); - _width16 = SSAVec8s(_width32, _width32); - - _widthheight = SSAVec4i::shuffle(_width32, height32, 0, 0, 4, 4); - _widthheightps = SSAVec4i::shuffle(_widthps, _heightps, 0, 0, 4, 4); - } - - SSAInt width() const { return _width; } - SSAInt height() const { return _height; } - SSAInt size() const { return _width * _height; } - - SSABool in_bounds(SSAInt i) const { return i >= 0 && i < _width * _height; } - SSABool in_bounds(SSAInt x, SSAInt y) const { return x>= 0 && x < _width && y >= 0 && y < _height; } - //void throw_if_out_of_bounds(SSAInt i) const { if (!in_bounds(i)) throw clan::Exception("Out of bounds"); } - //void throw_if_out_of_bounds(SSAInt x, SSAInt y) const { if (!in_bounds(x, y)) throw clan::Exception("Out of bounds"); } - - SSAInt s_to_x(SSAFloat s) const { return round(s * SSAFloat(_width)); } - SSAInt t_to_y(SSAFloat t) const { return round(t * SSAFloat(_height)); } - SSAInt clamp_x(SSAInt x) const { return clamp(x, _width); } - SSAInt clamp_y(SSAInt y) const { return clamp(y, _height); } - SSAInt repeat_x(SSAInt x) const { return repeat(x,_width); } - SSAInt repeat_y(SSAInt y) const { return repeat(y, _height); } - SSAInt mirror_x(SSAInt x) const { return mirror(x, _width); } - SSAInt mirror_y(SSAInt y) const { return mirror(y, _height); } - - static SSAInt int_min(SSAInt a, SSAInt b) - { - SSAPhi phi; - SSAIfBlock branch; - branch.if_block(a <= b); - phi.add_incoming(a); - branch.else_block(); - phi.add_incoming(b); - branch.end_block(); - return phi.create(); - } - - static SSAInt int_max(SSAInt a, SSAInt b) - { - SSAPhi phi; - SSAIfBlock branch; - branch.if_block(a >= b); - phi.add_incoming(a); - branch.else_block(); - phi.add_incoming(b); - branch.end_block(); - return phi.create(); - } - - static SSAInt clamp(SSAInt v, SSAInt size) - { - return int_max(int_min(v, size - 1), 0); - } - - static SSAInt repeat(SSAInt v, SSAInt size) - { - SSAPhi phi; - SSAIfBlock branch; - branch.if_block(v >= 0); - phi.add_incoming(v % size); - branch.else_block(); - phi.add_incoming(size - 1 + v % size); - branch.end_block(); - return phi.create(); - } - - static SSAInt mirror(SSAInt v, SSAInt size) - { - SSAInt size2 = size * 2; - v = repeat(v, size2); - - SSAPhi phi; - SSAIfBlock branch; - branch.if_block(v < size); - phi.add_incoming(v); - branch.else_block(); - phi.add_incoming(size2 - v - 1); - branch.end_block(); - return phi.create(); - } - - static SSAInt round(SSAFloat v) - { - SSAPhi phi; - SSAIfBlock branch; - branch.if_block(v >= 0.0f); - phi.add_incoming(v + 0.5f); - branch.else_block(); - phi.add_incoming(v - 0.5f); - branch.end_block(); - return SSAInt(phi.create()); - } - - // To do: fix this: - static SSAInt int_floor(SSAFloat v) - { - return SSAInt(v); - } - static SSAFloat fract(SSAFloat v) { return v - SSAFloat(int_floor(v)); } - - SSAVec4f get4f(SSAInt x, SSAInt y) const { return PixelFormat::get4f(x + y * _width); } - void set4f(SSAInt x, SSAInt y, const SSAVec4f &pixel) { PixelFormat::set4f(x + y * _width, pixel); } - - SSAVec4f get_clamp4f(SSAInt x, SSAInt y) const { return get4f(clamp_x(x), clamp_y(y)); } - SSAVec4f get_repeat4f(SSAInt x, SSAInt y) const { return get4f(repeat_x(x), repeat_y(y)); } - SSAVec4f get_mirror4f(SSAInt x, SSAInt y) const { return get4f(mirror_x(x), mirror_y(y)); } - - SSAVec4f linear_interpolate4f(SSAFloat s, SSAFloat t, const SSAVec4f *samples) const - { - SSAFloat a = fract(s * SSAFloat(_width) - 0.5f); - SSAFloat b = fract(t * SSAFloat(_height) - 0.5f); - SSAFloat inv_a = 1.0f - a; - SSAFloat inv_b = 1.0f - b; - return - samples[0] * (inv_a * inv_b) + - samples[1] * (a * inv_b) + - samples[2] * (inv_a * b) + - samples[3] * (a * b); - } - - void gather_clamp4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const - { - SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); - SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); - out_pixels[0] = get_clamp4f(x, y); - out_pixels[1] = get_clamp4f(x + 1, y); - out_pixels[2] = get_clamp4f(x, y + 1); - out_pixels[3] = get_clamp4f(x + 1, y + 1); - /* - SSAInt x0 = clamp_x(x); - SSAInt x1 = clamp_x(x + 1); - SSAInt y0 = clamp_y(y); - SSAInt y1 = clamp_y(y + 1); - SSAInt offset0 = y0 * _width; - SSAInt offset1 = y1 * _width; - SSAPhi phi0; - SSAPhi phi1; - SSAPhi phi2; - SSAPhi phi3; - SSAIfBlock if0; - if0.if_block(x0 + 1 == x1); - phi0.add_incoming(PixelFormat::get4f(x0 + offset0)); - phi1.add_incoming(PixelFormat::get4f(x1 + offset0)); - phi2.add_incoming(PixelFormat::get4f(x0 + offset1)); - phi3.add_incoming(PixelFormat::get4f(x1 + offset1)); - if0.else_block(); - phi0.add_incoming(PixelFormat::get4f(x0 + offset0)); - phi1.add_incoming(PixelFormat::get4f(x1 + offset0)); - phi2.add_incoming(PixelFormat::get4f(x0 + offset1)); - phi3.add_incoming(PixelFormat::get4f(x1 + offset1)); - if0.end_block(); - out_pixels[0] = phi0.create(); - out_pixels[1] = phi1.create(); - out_pixels[2] = phi2.create(); - out_pixels[3] = phi3.create(); - */ - } - - void gather_repeat4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const - { - SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); - SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); - out_pixels[0] = get_repeat4f(x, y); - out_pixels[1] = get_repeat4f(x + 1, y); - out_pixels[2] = get_repeat4f(x, y + 1); - out_pixels[3] = get_repeat4f(x + 1, y + 1); - } - - void gather_mirror4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const - { - SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); - SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); - out_pixels[0] = get_mirror4f(x, y); - out_pixels[1] = get_mirror4f(x + 1, y); - out_pixels[2] = get_mirror4f(x, y + 1); - out_pixels[3] = get_mirror4f(x + 1, y + 1); - } - - SSAVec4f nearest_clamp4f(SSAFloat s, SSAFloat t) const { return get_clamp4f(s_to_x(s), t_to_y(t)); } - SSAVec4f nearest_repeat4f(SSAFloat s, SSAFloat t) const { return get_repeat4f(s_to_x(s), t_to_y(t)); } - SSAVec4f nearest_mirror4f(SSAFloat s, SSAFloat t) const { return get_mirror4f(s_to_x(s), t_to_y(t)); } - - SSAVec4f linear_clamp4f(SSAFloat s, SSAFloat t) const - { - SSAVec4f samples[4]; - gather_clamp4f(s, t, samples); - return linear_interpolate4f(s, t, samples); - } - - SSAVec4f linear_repeat4f(SSAFloat s, SSAFloat t) const - { - SSAVec4f samples[4]; - gather_repeat4f(s, t, samples); - return linear_interpolate4f(s, t, samples); - } - - SSAVec4f linear_mirror4f(SSAFloat s, SSAFloat t) const - { - SSAVec4f samples[4]; - gather_mirror4f(s, t, samples); - return linear_interpolate4f(s, t, samples); - } - - ///////////////////////////////////////////////////////////////////////// - // Packed versions: - - SSAVec4i s_to_x(SSAVec4f s) const { return round(s * SSAVec4f(_width)); } - SSAVec4i t_to_y(SSAVec4f t) const { return round(t * SSAVec4f(_height)); } - SSAVec4i clamp_x(SSAVec4i x) const { return clamp(x, _width); } - SSAVec4i clamp_y(SSAVec4i y) const { return clamp(y, _height); } - SSAVec4i repeat_x(SSAVec4i x) const { return repeat(x,_width); } - SSAVec4i repeat_y(SSAVec4i y) const { return repeat(y, _height); } - SSAVec4i mirror_x(SSAVec4i x) const { return mirror(x, _width); } - SSAVec4i mirror_y(SSAVec4i y) const { return mirror(y, _height); } - - static SSAVec4i clamp(SSAVec4i v, SSAInt size) - { - return SSAVec4i::max_sse41(SSAVec4i::min_sse41(v, size - 1), 0); - } - - static SSAVec4i repeat(SSAVec4i v, SSAInt size) - { - return clamp(v, size); - /*SSAPhi phi; - SSAIfBlock branch; - branch.if_block(v >= 0); - phi.add_incoming(v % size); - branch.else_block(); - phi.add_incoming(size - 1 + v % size); - branch.end_block(); - return phi.create();*/ - } - - static SSAVec4i mirror(SSAVec4i v, SSAInt size) - { - return clamp(v, size); - /*SSAInt size2 = size * 2; - v = repeat(v, size2); - - SSAPhi phi; - SSAIfBlock branch; - branch.if_block(v < size); - phi.add_incoming(v); - branch.else_block(); - phi.add_incoming(size2 - v - 1); - branch.end_block(); - return phi.create();*/ - } - - static SSAVec4i round(SSAVec4f v) - { - // Maybe we should use the normal round SSE function (but that requires the rounding mode is set the round to nearest before the code runs) - SSAVec4i signbit = (SSAVec4i::bitcast(v) & 0x80000000); - SSAVec4f signed_half = SSAVec4f::bitcast(signbit | SSAVec4i::bitcast(SSAVec4f(0.5f))); - return v + signed_half; - } - - static SSAVec4i int_floor(SSAVec4f v) - { - return SSAVec4i(v) - (SSAVec4i::bitcast(v) >> 31); - } - - static SSAVec4f fract(SSAVec4f v) - { - // return v - SSAVec4f::floor_sse4(v); - return v - SSAVec4f(int_floor(v)); - } - - template - SSAVec4f nearest_helper4f(SSAVec4f s, SSAVec4f t, int index, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const - { - SSAVec4i x = int_floor(s * _widthps - 0.5f); - SSAVec4i y = int_floor(t * _heightps - 0.5f); - SSAVec8s y16 = SSAVec8s(wrap_y(y), wrap_y(y)); - SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16); - SSAVec8s offsetlo = y16 * _width16; - SSAVec4i offset = SSAVec4i::combinelo(offsetlo, offsethi) + x; - return PixelFormat::get4f(offset[index]); - } - - SSAVec4f nearest_clamp4f(SSAVec4f s, SSAVec4f t, int index) const - { - struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; }; - struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; }; - return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); - /* - return nearest_helper4f( - s, t, index, - [this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); }, - [this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); }); - */ - } - - SSAVec4f nearest_repeat4f(SSAVec4f s, SSAVec4f t, int index) const - { - struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; }; - struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; }; - return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); - /* - return nearest_helper4f( - s, t, index, - [this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); }, - [this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); }); - */ - } - - SSAVec4f nearest_mirror4f(SSAVec4f s, SSAVec4f t, int index) const - { - struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; }; - struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; }; - return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); - /* - return nearest_helper4f( - s, t, index, - [this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); }, - [this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); }); - */ - } - - template - void gather_helper4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const - { - SSAVec4i x = int_floor(s * _widthps - 0.5f); - SSAVec4i y = int_floor(t * _heightps - 0.5f); - SSAVec8s y16 = SSAVec8s(wrap_y(y + 1), wrap_y(y)); - SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16); - SSAVec8s offsetlo = y16 * _width16; - SSAVec4i x0 = wrap_x(x); - SSAVec4i x1 = wrap_x(x + 1); - SSAVec4i line0 = SSAVec4i::combinehi(offsetlo, offsethi); - SSAVec4i line1 = SSAVec4i::combinelo(offsetlo, offsethi); - SSAVec4i offset0 = x0 + line0; - SSAVec4i offset1 = x1 + line0; - SSAVec4i offset2 = x0 + line1; - SSAVec4i offset3 = x1 + line1; - out_pixels[0] = PixelFormat::get4f(offset0[index]); - out_pixels[1] = PixelFormat::get4f(offset1[index]); - out_pixels[2] = PixelFormat::get4f(offset2[index]); - out_pixels[3] = PixelFormat::get4f(offset3[index]); - } - - void gather_clamp4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const - { - struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; }; - struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; }; - return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); - /* - gather_helper4f( - s, t, index, out_pixels, - [this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); }, - [this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); }); - */ - } - - void gather_repeat4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const - { - struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; }; - struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; }; - return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); - /* - gather_helper4f( - s, t, index, out_pixels, - [this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); }, - [this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); }); - */ - } - - void gather_mirror4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const - { - struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; }; - struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; }; - return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); - /* - gather_helper4f( - s, t, index, out_pixels, - [this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); }, - [this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); }); - */ - } - - SSAVec4f linear_clamp4f(SSAVec4f s, SSAVec4f t, int index) const - { - SSAScopeHint hint("linearclamp"); - SSAVec4f samples[4]; - gather_clamp4f(s, t, index, samples); - return linear_interpolate4f(s, t, index, samples); - } - - SSAVec4f linear_repeat4f(SSAVec4f s, SSAVec4f t, int index) const - { - SSAVec4f samples[4]; - gather_repeat4f(s, t, index, samples); - return linear_interpolate4f(s, t, index, samples); - } - - SSAVec4f linear_mirror4f(SSAVec4f s, SSAVec4f t, int index) const - { - SSAVec4f samples[4]; - gather_mirror4f(s, t, index, samples); - return linear_interpolate4f(s, t, index, samples); - } - - SSAVec4f linear_interpolate4f(SSAVec4f s, SSAVec4f t, int index, const SSAVec4f *samples) const - { - SSAVec4f a = fract(s * _widthps - 0.5f); - SSAVec4f b = fract(t * _heightps - 0.5f); - SSAVec4f inv_a = 1.0f - a; - SSAVec4f inv_b = 1.0f - b; - return - samples[0] * SSAVec4f::shuffle(inv_a * inv_b, index, index, index, index) + - samples[1] * SSAVec4f::shuffle(a * inv_b, index, index, index, index) + - samples[2] * SSAVec4f::shuffle(inv_a * b, index, index, index, index) + - samples[3] * SSAVec4f::shuffle(a * b, index, index, index, index); - } - - ///////////////////////////////////////////////////////////////////////// - - SSAVec4i clamp(SSAVec4i sstt) const - { - return SSAVec4i::max_sse41(SSAVec4i::min_sse41(sstt, _widthheight - 1), 0); - } - - template - void gather_helper4f(SSAVec4f st, SSAVec4f *out_pixels, WrapFunctor wrap) const - { - SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1); - SSAVec4i xxyy = wrap(int_floor(sstt * _widthheightps - 0.5f) + SSAVec4i(0, 1, 0, 1)); - SSAVec4i xxoffset = SSAVec4f::shuffle(xxyy, xxyy * _width32, 0, 1, 6, 7); - SSAVec4i offsets = SSAVec4i::shuffle(xxoffset, 0, 1, 0, 1) + SSAVec4i::shuffle(xxoffset, 2, 2, 3, 3); - out_pixels[0] = PixelFormat::get4f(offsets[0]); - out_pixels[1] = PixelFormat::get4f(offsets[1]); - out_pixels[2] = PixelFormat::get4f(offsets[2]); - out_pixels[3] = PixelFormat::get4f(offsets[3]); - } - - void gather_clamp4f(SSAVec4f st, SSAVec4f *out_pixels) const - { - struct Wrap { Wrap(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i sstt) { return self->clamp(sstt); } const SSAPixelType *self; }; - return gather_helper4f(st, out_pixels, Wrap(this)); - } - - SSAVec4f linear_clamp4f(SSAVec4f st) const - { - SSAScopeHint hint("linearclamp"); - SSAVec4f samples[4]; - gather_clamp4f(st, samples); - return linear_interpolate4f(st, samples); - } - - SSAVec4f linear_interpolate4f(SSAVec4f st, const SSAVec4f *samples) const - { - SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1); - SSAVec4f aabb = fract(sstt * _widthheightps - 0.5f); - SSAVec4f inv_aabb = 1.0f - aabb; - SSAVec4f ab_inv_ab = SSAVec4f::shuffle(aabb, inv_aabb, 0, 2, 4, 6); - SSAVec4f ab__inv_a_b__inv_a_inv_b__a_invb = ab_inv_ab * SSAVec4f::shuffle(ab_inv_ab, 1, 2, 3, 0); - return - samples[0] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 2, 2, 2, 2) + - samples[1] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 3, 3, 3, 3) + - samples[2] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 1, 1, 1, 1) + - samples[3] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 0, 0, 0, 0); - } - -public: - SSAInt _width; - SSAInt _height; - SSAVec4i _width32; - SSAVec8s _width16; - SSAVec4f _widthps; - SSAVec4f _heightps; - - SSAVec4i _widthheight; - SSAVec4f _widthheightps; -}; diff --git a/src/r_compiler/ssa/ssa_scope.cpp b/src/r_compiler/ssa/ssa_scope.cpp index 520f301a4a..7c6f090fe7 100644 --- a/src/r_compiler/ssa/ssa_scope.cpp +++ b/src/r_compiler/ssa/ssa_scope.cpp @@ -41,12 +41,12 @@ llvm::Function *SSAScope::intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRefgetParent()->getEntryBlock(); diff --git a/src/r_compiler/ssa/ssa_scope.h b/src/r_compiler/ssa/ssa_scope.h index c942a7c377..1c9dfb3629 100644 --- a/src/r_compiler/ssa/ssa_scope.h +++ b/src/r_compiler/ssa/ssa_scope.h @@ -12,8 +12,8 @@ public: static llvm::Module *module(); static llvm::IRBuilder<> &builder(); static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types = llvm::ArrayRef()); - static llvm::Value *alloca(llvm::Type *type); - static llvm::Value *alloca(llvm::Type *type, SSAInt size); + static llvm::Value *alloc_stack(llvm::Type *type); + static llvm::Value *alloc_stack(llvm::Type *type, SSAInt size); static llvm::MDNode *constant_scope_list(); static const std::string &hint(); static void set_hint(const std::string &hint); diff --git a/src/r_compiler/ssa/ssa_stack.h b/src/r_compiler/ssa/ssa_stack.h index 435530be1f..d072f181e9 100644 --- a/src/r_compiler/ssa/ssa_stack.h +++ b/src/r_compiler/ssa/ssa_stack.h @@ -1,6 +1,8 @@ #pragma once +#include "ssa_scope.h" + template class SSAStack { @@ -8,7 +10,7 @@ public: SSAStack() : v(0) { - v = SSAScope::alloca(SSAVariable::llvm_type()); + v = SSAScope::alloc_stack(SSAVariable::llvm_type()); } SSAVariable load() const diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp index 60d6161a58..ac36172f0f 100644 --- a/src/r_compiler/ssa/ssa_vec4i.cpp +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -55,10 +55,17 @@ SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3) std::vector constants; constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true))); v = llvm::ConstantVector::get(constants); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) + v = SSAScope::builder().CreateInsertElement(v, i0.v, SSAInt(0).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, i1.v, SSAInt(1).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, i2.v, SSAInt(2).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, i3.v, SSAInt(3).v, SSAScope::hint()); +#else v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint()); v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint()); v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint()); v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint()); +#endif } SSAVec4i::SSAVec4i(SSAVec4f f32) @@ -84,7 +91,11 @@ SSAVec4i SSAVec4i::insert(SSAInt index, SSAInt value) SSAVec4i SSAVec4i::insert(int index, SSAInt value) { +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) + return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, SSAInt(index).v, SSAScope::hint())); +#else return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint())); +#endif } SSAVec4i SSAVec4i::insert(int index, int value) From f8f710d9e27eb0649546af6d65069a82b61b6fc9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 17 Oct 2016 01:17:48 +0200 Subject: [PATCH 210/912] Fix linker errors on Windows --- src/CMakeLists.txt | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 89341cfa7b..9a5aec83b3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -272,15 +272,16 @@ if( NOT NO_OPENAL ) endif() endif() -set( LLVM_COMPONENTS core support asmparser asmprinter bitreader codegen ipo - irreader transformutils instrumentation profiledata runtimedyld - object instcombine linker analysis selectiondag scalaropts vectorize executionengine - mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) # Path where it looks for the LLVM compiled files on Windows set( LLVM_PRECOMPILED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../llvm" ) if( NOT WIN32 ) + set( LLVM_COMPONENTS core support asmparser asmprinter bitreader codegen ipo + irreader transformutils instrumentation profiledata runtimedyld + object instcombine linker analysis selectiondag scalaropts vectorize executionengine + mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) + # Example LLVM_DIR folder: C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") @@ -289,6 +290,11 @@ if( NOT WIN32 ) include_directories( ${LLVM_INCLUDE_DIRS} ) set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) else() + set( LLVM_COMPONENTS core support asmparser asmprinter bitreader codegen passes ipo + irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld + object instcombine linker analysis selectiondag scalaropts vectorize executionengine + mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) + include_directories( "${LLVM_PRECOMPILED_DIR}/include" ) if( X64 ) include_directories( "${LLVM_PRECOMPILED_DIR}/64bit-include" ) From 931d2410566dd54355ac92849b3018366ee5f92e Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Mon, 17 Oct 2016 01:13:58 -0400 Subject: [PATCH 211/912] - Change "vid_used3d" to true by default, in Windows. --- src/win32/hardware.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index da1ced10fa..d9de965664 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -73,7 +73,7 @@ int currentcanvas = -1; bool changerenderer; // Software OpenGL canvas -CUSTOM_CVAR(Bool, vid_used3d, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR(Bool, vid_used3d, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { if (self != currentcanvas) Printf("You must restart " GAMENAME " for this change to take effect.\n"); From c9a9e93c66cd875ed30a7df408ed8486f612c118 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 17 Oct 2016 11:54:03 +0200 Subject: [PATCH 212/912] Fix warning C4805: '!=': unsafe mix of type 'bool' and type 'int' in operation --- src/win32/hardware.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index d9de965664..e67d11842c 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -75,7 +75,7 @@ bool changerenderer; // Software OpenGL canvas CUSTOM_CVAR(Bool, vid_used3d, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { - if (self != currentcanvas) + if ((self ? 1 : 0) != currentcanvas) Printf("You must restart " GAMENAME " for this change to take effect.\n"); } From af937366d1208e4c5cfa5163ac176796d3b231cc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 17 Oct 2016 12:36:01 +0200 Subject: [PATCH 213/912] Added DetectRangeError function --- src/r_draw.cpp | 1 + src/r_draw.h | 1 + src/r_draw_rgba.cpp | 8 ++++++++ src/r_drawt_rgba.cpp | 2 ++ src/r_main.cpp | 1 + src/r_thread.h | 24 ++++++++++++++++++++++++ src/v_draw.cpp | 3 +++ 7 files changed, 40 insertions(+) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 6b68cf81bc..9d7160d012 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -64,6 +64,7 @@ BYTE* viewimage; extern "C" { int ylookup[MAXHEIGHT]; BYTE *dc_destorg; +int dc_destheight; } int scaledviewwidth; diff --git a/src/r_draw.h b/src/r_draw.h index 4c478a7262..2b664897d8 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -76,6 +76,7 @@ extern "C" const BYTE* dc_source2; extern "C" uint32_t dc_texturefracx; extern "C" BYTE *dc_dest, *dc_destorg; +extern "C" int dc_destheight; extern "C" int dc_count; extern "C" DWORD vplce[4]; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 4b3d98c04b..a2c4fb9aef 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -245,6 +245,8 @@ public: args.flags |= DrawWallArgs::simple_shade; if (args.source2[0] == nullptr) args.flags |= DrawWallArgs::nearest_filter; + + DetectRangeError(args.dest, args.dest_y, args.count); } void Execute(DrawerThread *thread) override @@ -304,6 +306,8 @@ public: args.flags |= DrawWallArgs::simple_shade; if (args.source2[0] == nullptr) args.flags |= DrawWallArgs::nearest_filter; + + DetectRangeError(args.dest, args.dest_y, args.count); } void Execute(DrawerThread *thread) override @@ -368,6 +372,8 @@ public: args.flags = 0; if (dc_shade_constants.simple_shade) args.flags |= DrawColumnArgs::simple_shade; + + DetectRangeError(args.dest, args.dest_y, args.count); } void Execute(DrawerThread *thread) override @@ -410,6 +416,8 @@ public: args.textureheight1 = bufheight[1]; args.top_color = solid_top; args.bottom_color = solid_bottom; + + DetectRangeError(args.dest, args.dest_y, args.count); } FString DebugInfo() override diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index bf957df3de..a3849b9af9 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -97,6 +97,8 @@ public: args.flags = 0; if (dc_shade_constants.simple_shade) args.flags |= DrawColumnArgs::simple_shade; + + DetectRangeError(args.dest, args.dest_y, args.count); } void Execute(DrawerThread *thread) override diff --git a/src/r_main.cpp b/src/r_main.cpp index ba3c4e846b..6840730466 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -823,6 +823,7 @@ void R_SetupBuffer () #endif } dc_destorg = lineptr; + dc_destheight = RenderTarget->GetHeight() - viewwindowy; for (int i = 0; i < RenderTarget->GetHeight(); i++) { ylookup[i] = i * pitch; diff --git a/src/r_thread.h b/src/r_thread.h index 5bb4132401..63c4424420 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -72,6 +72,30 @@ class DrawerCommand protected: int _dest_y; + void DetectRangeError(uint32_t *&dest, int &dest_y, int &count) + { +#if defined(_MSC_VER) && defined(_DEBUG) + if (dest_y < 0 || count < 0 || dest_y + count > dc_destheight) + __debugbreak(); // Buffer overrun detected! +#endif + + if (dest_y < 0) + { + count += dest_y; + dest_y = 0; + dest = (uint32_t*)dc_destorg; + } + else if (dest_y >= dc_destheight) + { + dest_y = 0; + count = 0; + } + + if (count < 0 || count > MAXHEIGHT) count = 0; + if (dest_y + count >= dc_destheight) + count = dc_destheight - dest_y; + } + public: DrawerCommand() { diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 9218962963..1a3262eab2 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -204,7 +204,9 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) mode = R_SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor); BYTE *destorgsave = dc_destorg; + int destheightsave = dc_destheight; dc_destorg = screen->GetBuffer(); + dc_destheight = screen->GetHeight(); if (dc_destorg == NULL) { I_FatalError("Attempt to write to buffer of hardware canvas"); @@ -362,6 +364,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) R_FinishSetPatchStyle (); dc_destorg = destorgsave; + dc_destheight = destheightsave; if (ticdup != 0 && menuactive == MENU_Off) { From f32a2547bc365fdd40158d65fa87e518182424d5 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Mon, 17 Oct 2016 16:05:14 -0400 Subject: [PATCH 214/912] - Fixed LLVM compile for LLVM 3.8 (Ubuntu) --- src/r_compiler/llvmdrawers.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index cddf6ec9d4..d5078a6c0f 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -482,7 +482,7 @@ LLVMProgram::LLVMProgram() mModule = std::make_unique("render", context()); mModule->setTargetTriple(targetTriple); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) mModule->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); #else mModule->setDataLayout(machine->createDataLayout()); @@ -497,7 +497,7 @@ void LLVMProgram::CreateEE() legacy::FunctionPassManager PerFunctionPasses(mModule.get()); legacy::PassManager PerModulePasses; -#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9) +#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); #endif @@ -542,7 +542,7 @@ std::string LLVMProgram::DumpModule() { std::string str; llvm::raw_string_ostream stream(str); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) mModule->print(stream, nullptr); #else mModule->print(stream, nullptr, false, true); From df3f5ae882c888c12ed73323292f5237538d108c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 18 Oct 2016 01:16:36 +0200 Subject: [PATCH 215/912] Use OpenGL to allocate the canvas buffer to avoid a costly memcpy --- src/gl/system/gl_swframebuffer.cpp | 110 +++++++++++++++++++---------- src/gl/system/gl_swframebuffer.h | 4 ++ 2 files changed, 77 insertions(+), 37 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index a239f9d575..b6d163044c 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -1093,7 +1093,24 @@ bool OpenGLSWFrameBuffer::Lock(bool buffered) } assert(!In2D); Accel2D = vid_hw2d; - Buffer = MemBuffer; + if (UseMappedMemBuffer) + { + if (!MappedMemBuffer) + { + BindFBBuffer(); + + MappedMemBuffer = glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_WRITE); + Pitch = Width; + if (MappedMemBuffer == nullptr) + return true; + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + } + Buffer = (uint8_t*)MappedMemBuffer; + } + else + { + Buffer = MemBuffer; + } return false; } @@ -1269,54 +1286,73 @@ bool OpenGLSWFrameBuffer::PaintToWindow() // //========================================================================== +void OpenGLSWFrameBuffer::BindFBBuffer() +{ + int usage = UseMappedMemBuffer ? GL_DYNAMIC_DRAW : GL_STREAM_DRAW; + + int pixelsize = IsBgra() ? 4 : 1; + int size = Width * Height * pixelsize; + + if (FBTexture->Buffers[0] == 0) + { + glGenBuffers(2, (GLuint*)FBTexture->Buffers); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffers[1]); + glBufferData(GL_PIXEL_UNPACK_BUFFER, size, nullptr, usage); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffers[0]); + glBufferData(GL_PIXEL_UNPACK_BUFFER, size, nullptr, usage); + } + else + { + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffers[FBTexture->CurrentBuffer]); + } +} + void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) { if (copy3d) { - int pixelsize = IsBgra() ? 4 : 1; - int size = Width * Height * pixelsize; + BindFBBuffer(); + FBTexture->CurrentBuffer = (FBTexture->CurrentBuffer + 1) & 1; - if (FBTexture->Buffers[0] == 0) + if (!UseMappedMemBuffer) { - glGenBuffers(2, (GLuint*)FBTexture->Buffers); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffers[0]); - glBufferData(GL_PIXEL_UNPACK_BUFFER, size, nullptr, GL_STREAM_DRAW); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffers[1]); - glBufferData(GL_PIXEL_UNPACK_BUFFER, size, nullptr, GL_STREAM_DRAW); - } - else - { - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, FBTexture->Buffers[FBTexture->CurrentBuffer]); - FBTexture->CurrentBuffer = (FBTexture->CurrentBuffer + 1) & 1; - } + int pixelsize = IsBgra() ? 4 : 1; + int size = Width * Height * pixelsize; - uint8_t *dest = (uint8_t*)MapBuffer(GL_PIXEL_UNPACK_BUFFER, size); - if (dest) - { - if (Pitch == Width) + uint8_t *dest = (uint8_t*)MapBuffer(GL_PIXEL_UNPACK_BUFFER, size); + if (dest) { - memcpy(dest, MemBuffer, Width * Height * pixelsize); - } - else - { - uint8_t *src = MemBuffer; - for (int y = 0; y < Height; y++) + if (Pitch == Width) { - memcpy(dest, src, Width * pixelsize); - dest += Width * pixelsize; - src += Pitch * pixelsize; + memcpy(dest, MemBuffer, Width * Height * pixelsize); } + else + { + uint8_t *src = MemBuffer; + for (int y = 0; y < Height; y++) + { + memcpy(dest, src, Width * pixelsize); + dest += Width * pixelsize; + src += Pitch * pixelsize; + } + } + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); } - glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); - GLint oldBinding = 0; - glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); - glBindTexture(GL_TEXTURE_2D, FBTexture->Texture); - if (IsBgra()) - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, GL_BGRA, GL_UNSIGNED_BYTE, 0); - else - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, GL_RED, GL_UNSIGNED_BYTE, 0); - glBindTexture(GL_TEXTURE_2D, oldBinding); } + else if (MappedMemBuffer) + { + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + MappedMemBuffer = nullptr; + } + + GLint oldBinding = 0; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); + glBindTexture(GL_TEXTURE_2D, FBTexture->Texture); + if (IsBgra()) + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, GL_BGRA, GL_UNSIGNED_BYTE, 0); + else + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, GL_RED, GL_UNSIGNED_BYTE, 0); + glBindTexture(GL_TEXTURE_2D, oldBinding); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); } diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index 315101fde3..226d8cca72 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -194,6 +194,10 @@ private: void DrawTriangleList(int minIndex, int numVertices, int startIndex, int primitiveCount); void Present(); + void BindFBBuffer(); + void *MappedMemBuffer = nullptr; + bool UseMappedMemBuffer = true; + static uint32_t ColorARGB(uint32_t a, uint32_t r, uint32_t g, uint32_t b) { return ((a & 0xff) << 24) | ((r & 0xff) << 16) | ((g & 0xff) << 8) | ((b) & 0xff); } static uint32_t ColorRGBA(uint32_t r, uint32_t g, uint32_t b, uint32_t a) { return ColorARGB(a, r, g, b); } static uint32_t ColorXRGB(uint32_t r, uint32_t g, uint32_t b) { return ColorARGB(0xff, r, g, b); } From 79b14bbc0854f9f813e6478b8db6f3b4db737154 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Tue, 18 Oct 2016 11:38:35 -0400 Subject: [PATCH 216/912] - Updated version.h for beta tag --- src/version.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/version.h b/src/version.h index c7a95e66ef..99caa0ad7d 100644 --- a/src/version.h +++ b/src/version.h @@ -41,12 +41,12 @@ const char *GetVersionString(); /** Lots of different version numbers **/ -#define VERSIONSTR "0.0pre" +#define VERSIONSTR "0.1beta" // The version as seen in the Windows resource -#define RC_FILEVERSION 0,0,9999,0 -#define RC_PRODUCTVERSION 0,0,9999,0 -#define RC_PRODUCTVERSION2 "0.0pre" +#define RC_FILEVERSION 0,1,0,0 +#define RC_PRODUCTVERSION 0,1,0,0 +#define RC_PRODUCTVERSION2 "0.1beta" // Version identifier for network games. // Bump it every time you do a release unless you're certain you From 694cae054cb088784221e7df29e17337d3ba1d57 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 18 Oct 2016 18:23:56 +0200 Subject: [PATCH 217/912] Add some experimental draw triangle code --- src/r_draw_rgba.cpp | 249 +++++++++++++++++++++++++++++++++++++++++++ src/r_swrenderer.cpp | 5 + 2 files changed, 254 insertions(+) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a2c4fb9aef..cb1eb3fc05 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1256,6 +1256,255 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// +struct TriVertex +{ + enum { NumVarying = 3 }; + float x, y, z, w; + float varying[NumVarying]; +}; + +float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); + float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); + return top / bottom; +} + +float grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); + float bottom = -((x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2)); + return top / bottom; +} + +void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v2, const TriVertex &v3) +{ + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int minx = (MIN(MIN(X1, X2), X3) + 0xF) >> 4; + int maxx = (MAX(MAX(X1, X2), X3) + 0xF) >> 4; + int miny = (MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4; + int maxy = (MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + dest += miny * pitch; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // Gradients + float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); + float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); + } + + // Loop through blocks + for (int y = miny; y < maxy; y += q) + { + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Calculate varying variables for affine block + float offx0 = (x - minx) + 0.5f; + float offy0 = (y - miny) + 0.5f; + float offx1 = offx0 + q; + float offy1 = offy0 + q; + float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); + float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); + float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); + float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); + float varyingTL[TriVertex::NumVarying]; + float varyingTR[TriVertex::NumVarying]; + float varyingBL[TriVertex::NumVarying]; + float varyingBR[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; + varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; + varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); + varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); + } + + uint32_t *buffer = dest; + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF) + { + for (int iy = 0; iy < q; iy++) + { + float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varying[i] = varyingTL[i] + varyingBL[i] * iy; + varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + } + + for (int ix = x; ix < x + q; ix++) + { + uint32_t red = (uint32_t)clamp(varying[0] * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t green = (uint32_t)clamp(varying[1] * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t blue = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + + buffer[ix] = 0xff000000 | (red << 16) | (green << 8) | blue; + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + } + + buffer += pitch; + } + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varying[i] = varyingTL[i] + varyingBL[i] * iy; + varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + } + + for (int ix = x; ix < x + q; ix++) + { + if (CX1 > 0 && CX2 > 0 && CX3 > 0) + { + uint32_t red = (uint32_t)clamp(varying[0] * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t green = (uint32_t)clamp(varying[1] * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t blue = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + + buffer[ix] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + buffer += pitch; + } + } + } + + dest += q * pitch; + } +} + +void R_DrawTriangle() +{ + TriVertex trivert[6]; + + trivert[0].x = 100; + trivert[0].y = 350; + trivert[0].w = 1.0f; + trivert[0].varying[0] = 0.0f; + trivert[0].varying[1] = 1.0f; + trivert[0].varying[2] = 0.0f; + trivert[1].x = 400; + trivert[1].y = 500; + trivert[1].w = 1.0f; + trivert[1].varying[0] = 1.0f; + trivert[1].varying[1] = 0.0f; + trivert[1].varying[2] = 0.0f; + trivert[2].x = 200; + trivert[2].y = 200; + trivert[2].w = 1.0f; + trivert[2].varying[0] = 0.0f; + trivert[2].varying[1] = 0.0f; + trivert[2].varying[2] = 1.0f; + + triangle((uint32_t*)dc_destorg, dc_pitch, trivert[0], trivert[1], trivert[2]); +} + +///////////////////////////////////////////////////////////////////////////// + void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 372f23accc..49eb0ef277 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -47,10 +47,13 @@ EXTERN_CVAR(Bool, r_shadercolormaps) +CVAR(Bool, r_drawtriangle, false, 0) + void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); void R_SetupColormap(player_t *); void R_SetupFreelook(); void R_InitRenderer(); +void R_DrawTriangle(); FSoftwareRenderer::FSoftwareRenderer() { @@ -192,6 +195,8 @@ void FSoftwareRenderer::RenderView(player_t *player) } R_EndDrawerCommands(); + if (r_swtruecolor && r_drawtriangle) + R_DrawTriangle(); } //========================================================================== From 226287875bf7b4cb517d2b200ef403abd301a8a2 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Tue, 18 Oct 2016 14:07:44 -0400 Subject: [PATCH 218/912] Creating 0.2pre tag --- src/version.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/version.h b/src/version.h index 99caa0ad7d..837ef4eeee 100644 --- a/src/version.h +++ b/src/version.h @@ -41,17 +41,17 @@ const char *GetVersionString(); /** Lots of different version numbers **/ -#define VERSIONSTR "0.1beta" +#define VERSIONSTR "0.2pre" // The version as seen in the Windows resource -#define RC_FILEVERSION 0,1,0,0 -#define RC_PRODUCTVERSION 0,1,0,0 -#define RC_PRODUCTVERSION2 "0.1beta" +#define RC_FILEVERSION 0,1,9999,0 +#define RC_PRODUCTVERSION 0,1,9999,0 +#define RC_PRODUCTVERSION2 "0.2pre" // Version identifier for network games. // Bump it every time you do a release unless you're certain you // didn't change anything that will affect sync. -#define NETGAMEVERSION 232 +#define NETGAMEVERSION 233 // Version stored in the ini's [LastRun] section. // Bump it if you made some configuration change that you want to From 68efdf5cefcbad4588b59a143f6746ec54b18341 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Tue, 18 Oct 2016 18:21:22 -0400 Subject: [PATCH 219/912] - Fixed: Added linker flags for Linux to prevent crashes using LLVM. Using fix found by dpJudas. --- src/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9a5aec83b3..8ce02cdd23 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -610,6 +610,11 @@ if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) set( CMAKE_EXE_LINKER_FLAGS "-stdlib=libc++ ${CMAKE_EXE_LINKER_FLAGS}" ) endif () + # Linux - add these flags for LLVM compatibility to prevent crashing + if ( UNIX AND NOT APPLE ) + set( CMAKE_EXE_LINKER_FLAGS "-Wl,--exclude-libs,ALL ${CMAKE_EXE_LINKER_FLAGS}" ) + endif() + # Remove extra warnings when using the official DirectX headers. # Also, TDM-GCC 4.4.0 no longer accepts glibc-style printf formats as valid, # which is a royal pain. The previous version I had been using was fine with them. From ef70ba09852f44e8116c4120956e8eb61187853b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 19 Oct 2016 03:49:42 +0200 Subject: [PATCH 220/912] More triangle rendering --- src/r_draw_rgba.cpp | 229 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 205 insertions(+), 24 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index cb1eb3fc05..7048c9be89 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -39,6 +39,7 @@ #include "r_plane.h" #include "r_draw_rgba.h" #include "r_compiler/llvmdrawers.h" +#include "gl/data/gl_matrix.h" #include "gi.h" #include "stats.h" @@ -1258,6 +1259,9 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) struct TriVertex { + TriVertex() { } + TriVertex(float x, float y, float z, float w, float u, float v, float light) : x(x), y(y), z(z), w(w) { varying[0] = u; varying[1] = v; varying[2] = light; } + enum { NumVarying = 3 }; float x, y, z, w; float varying[NumVarying]; @@ -1307,10 +1311,12 @@ void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v const int FDY31 = DY31 << 4; // Bounding rectangle - int minx = (MIN(MIN(X1, X2), X3) + 0xF) >> 4; - int maxx = (MAX(MAX(X1, X2), X3) + 0xF) >> 4; - int miny = (MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4; - int maxy = (MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4; + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, viewwidth); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, viewheight); + if (minx >= maxx || miny >= maxy) + return; // Block size, standard 8x8 (must be power of two) const int q = 8; @@ -1477,30 +1483,205 @@ void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v } } +bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2) +{ + if (clipdistance1 < 0.0f && clipdistance2 < 0.0f) + return true; + + if (clipdistance1 < 0.0f) + t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1); + + if (clipdistance2 < 0.0f) + t2 = MIN(1.0f - clipdistance2 / (clipdistance1 - clipdistance2), t2); + + return false; +} + +void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert) +{ + // Clip and cull so that the following is true for all vertices: + // -v.w <= v.x <= v.w + // -v.w <= v.y <= v.w + // -v.w <= v.z <= v.w + + float t1 = 0.0f, t2 = 1.0f; + bool culled = + cullhalfspace(v1.x + v1.w, v2.x + v2.w, t1, t2) || + cullhalfspace(v1.w - v1.x, v2.w - v2.x, t1, t2) || + cullhalfspace(v1.y + v1.w, v2.y + v2.w, t1, t2) || + cullhalfspace(v1.w - v1.y, v2.w - v2.y, t1, t2) || + cullhalfspace(v1.z + v1.w, v2.z + v2.w, t1, t2) || + cullhalfspace(v1.w - v1.z, v2.w - v2.z, t1, t2); + if (culled) + return; + + if (t1 == 0.0f) + { + clippedvert[numclipvert++] = v1; + } + else + { + auto &v = clippedvert[numclipvert++]; + v.x = v1.x * (1.0f - t1) + v2.x * t1; + v.y = v1.y * (1.0f - t1) + v2.y * t1; + v.z = v1.z * (1.0f - t1) + v2.z * t1; + v.w = v1.w * (1.0f - t1) + v2.w * t1; + for (int i = 0; i < TriVertex::NumVarying; i++) + v.varying[i] = v1.varying[i] * (1.0f - t1) + v2.varying[i] * t1; + } + + if (t2 != 1.0f) + { + auto &v = clippedvert[numclipvert++]; + v.x = v1.x * (1.0f - t2) + v2.x * t2; + v.y = v1.y * (1.0f - t2) + v2.y * t2; + v.z = v1.z * (1.0f - t2) + v2.z * t2; + v.w = v1.w * (1.0f - t2) + v2.w * t2; + for (int i = 0; i < TriVertex::NumVarying; i++) + v.varying[i] = v1.varying[i] * (1.0f - t2) + v2.varying[i] * t2; + } +} + void R_DrawTriangle() { - TriVertex trivert[6]; + TriVertex cube[6 * 6] = + { + {-1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - trivert[0].x = 100; - trivert[0].y = 350; - trivert[0].w = 1.0f; - trivert[0].varying[0] = 0.0f; - trivert[0].varying[1] = 1.0f; - trivert[0].varying[2] = 0.0f; - trivert[1].x = 400; - trivert[1].y = 500; - trivert[1].w = 1.0f; - trivert[1].varying[0] = 1.0f; - trivert[1].varying[1] = 0.0f; - trivert[1].varying[2] = 0.0f; - trivert[2].x = 200; - trivert[2].y = 200; - trivert[2].w = 1.0f; - trivert[2].varying[0] = 0.0f; - trivert[2].varying[1] = 0.0f; - trivert[2].varying[2] = 1.0f; + { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + {-1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + {-1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - triangle((uint32_t*)dc_destorg, dc_pitch, trivert[0], trivert[1], trivert[2]); + + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + {-1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + {-1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + {-1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + {-1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + {-1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + {-1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + + {-1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + {-1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + {-1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + + { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + + {-1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + {-1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + {-1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + {-1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + {-1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + {-1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f } + }; + + for (int i = 0; i < 6; i++) + { + cube[i * 6 + 0].varying[0] = 1.0f; + cube[i * 6 + 1].varying[1] = 1.0f; + cube[i * 6 + 2].varying[2] = 1.0f; + cube[i * 6 + 3].varying[2] = 1.0f; + cube[i * 6 + 4].varying[0] = 1.0f; + cube[i * 6 + 4].varying[1] = 1.0f; + cube[i * 6 + 4].varying[2] = 1.0f; + cube[i * 6 + 5].varying[0] = 1.0f; + } + + static float angle = 0.0f; + angle = fmod(angle + 0.5f, 360.0f); + VSMatrix objectToWorld(0); + objectToWorld.translate((float)ViewPos.X, (float)ViewPos.Y + 5.0f, (float)ViewPos.Z); + objectToWorld.rotate(angle, 0.57735f, 0.57735f, 0.57735f); + + TriVertex *vinput = cube; + for (int i = 0; i < 6 * 6 / 3; i++) + { + TriVertex vert[3]; + + // Vertex shader stuff: + for (int j = 0; j < 3; j++) + { + auto &v = vert[j]; + v = *(vinput++); + + // Apply object to world transform: + const float *matrix = objectToWorld.get(); + float vx = matrix[0 * 4 + 0] * v.x + matrix[1 * 4 + 0] * v.y + matrix[2 * 4 + 0] * v.z + matrix[3 * 4 + 0] * v.w; + float vy = matrix[0 * 4 + 1] * v.x + matrix[1 * 4 + 1] * v.y + matrix[2 * 4 + 1] * v.z + matrix[3 * 4 + 1] * v.w; + float vz = matrix[0 * 4 + 2] * v.x + matrix[1 * 4 + 2] * v.y + matrix[2 * 4 + 2] * v.z + matrix[3 * 4 + 2] * v.w; + float vw = matrix[0 * 4 + 3] * v.x + matrix[1 * 4 + 3] * v.y + matrix[2 * 4 + 3] * v.z + matrix[3 * 4 + 3] * v.w; + v.x = vx; + v.y = vy; + v.z = vz; + v.w = vw; + + // The software renderer world to clip transform: + double nearp = 5.0f; + double farp = 65536.f; + double tr_x = v.x - ViewPos.X; + double tr_y = v.y - ViewPos.Y; + double tr_z = v.z - ViewPos.Z; + double tx = tr_x * ViewSin - tr_y * ViewCos; + double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; + v.x = (float)tx; + v.y = (float)tr_z; + v.z = (float)(-tz * (farp + nearp) / (nearp - farp) + (2.0f * farp * nearp) / (nearp - farp)); + v.w = (float)tz; + } + + // Cull, clip and generate additional vertices as needed + TriVertex clippedvert[6]; + int numclipvert = 0; + clipedge(vert[0], vert[1], clippedvert, numclipvert); + clipedge(vert[1], vert[2], clippedvert, numclipvert); + clipedge(vert[2], vert[0], clippedvert, numclipvert); + + // Map to 2D viewport: + for (int j = 0; j < numclipvert; j++) + { + auto &v = clippedvert[j]; + + // Calculate normalized device coordinates: + v.w = 1.0f / v.w; + v.x *= v.w; + v.y *= v.w; + v.z *= v.w; + + // Apply viewport scale to get screen coordinates: + v.x = (float)(CenterX + v.x * CenterX); + v.y = (float)(CenterY - v.y * InvZtoScale); + } + + for (int i = numclipvert; i > 1; i--) + { + triangle((uint32_t*)dc_destorg, dc_pitch, clippedvert[numclipvert - 1], clippedvert[i - 1], clippedvert[i - 2]); + } + } } ///////////////////////////////////////////////////////////////////////////// From 6a7bb43ce1853b95c970bae02d9581c6d4454123 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 19 Oct 2016 14:32:28 +0200 Subject: [PATCH 221/912] Fix clipping bug --- src/r_draw_rgba.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 7048c9be89..f8697bac48 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1492,7 +1492,7 @@ bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1); if (clipdistance2 < 0.0f) - t2 = MIN(1.0f - clipdistance2 / (clipdistance1 - clipdistance2), t2); + t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2); return false; } From ea72152c312842d7b59e30a0286e69a6b0de05af Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 19 Oct 2016 15:43:10 +0200 Subject: [PATCH 222/912] Add doom style column clipping to triangle drawer --- src/r_draw_rgba.cpp | 45 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index f8697bac48..b078131661 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1281,7 +1281,7 @@ float grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0 return top / bottom; } -void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v2, const TriVertex &v3) +void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v2, const TriVertex &v3, int clipleft, int clipright, const short *cliptop, const short *clipbottom) { // 28.4 fixed-point coordinates const int Y1 = (int)round(16.0f * v1.y); @@ -1311,10 +1311,17 @@ void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v const int FDY31 = DY31 << 4; // Bounding rectangle - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, viewwidth); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, viewheight); + int clipymin = cliptop[clipleft]; + int clipymax = clipbottom[clipleft]; + for (int i = clipleft + 1; i <= clipright; i++) + { + clipymin = MIN(clipymin, (int)cliptop[i]); + clipymax = MAX(clipymax, (int)clipbottom[i]); + } + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax); if (minx >= maxx || miny >= maxy) return; @@ -1382,6 +1389,13 @@ void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v // Skip block when outside an edge if (a == 0x0 || b == 0x0 || c == 0x0) continue; + // Check if block needs clipping + int clipcount = 0; + for (int ix = 0; ix < q; ix++) + { + clipcount += (cliptop[x + ix] > y) || (clipbottom[x + ix] < y + q - 1); + } + // Calculate varying variables for affine block float offx0 = (x - minx) + 0.5f; float offy0 = (y - miny) + 0.5f; @@ -1406,7 +1420,7 @@ void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v uint32_t *buffer = dest; // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF) + if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) { for (int iy = 0; iy < q; iy++) { @@ -1453,7 +1467,9 @@ void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v for (int ix = x; ix < x + q; ix++) { - if (CX1 > 0 && CX2 > 0 && CX3 > 0) + bool visible = (cliptop[ix] <= y + iy) && (clipbottom[ix] >= y + iy); + + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) { uint32_t red = (uint32_t)clamp(varying[0] * 255.0f + 0.5f, 0.0f, 255.0f); uint32_t green = (uint32_t)clamp(varying[1] * 255.0f + 0.5f, 0.0f, 255.0f); @@ -1544,6 +1560,16 @@ void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, void R_DrawTriangle() { + int clipleft = 0; + int clipright = viewwidth - 1; + short cliptop[MAXWIDTH]; + short clipbottom[MAXWIDTH]; + for (int i = clipleft; i < clipright; i++) + { + cliptop[i] = (i - clipleft) / 4; + clipbottom[i] = viewheight - 1 - (i - clipleft) / 4; + } + TriVertex cube[6 * 6] = { {-1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, @@ -1615,8 +1641,9 @@ void R_DrawTriangle() static float angle = 0.0f; angle = fmod(angle + 0.5f, 360.0f); VSMatrix objectToWorld(0); - objectToWorld.translate((float)ViewPos.X, (float)ViewPos.Y + 5.0f, (float)ViewPos.Z); + objectToWorld.translate((float)ViewPos.X, (float)ViewPos.Y + 50.0f, (float)ViewPos.Z); objectToWorld.rotate(angle, 0.57735f, 0.57735f, 0.57735f); + objectToWorld.scale(10.0f, 10.0f, 10.0f); TriVertex *vinput = cube; for (int i = 0; i < 6 * 6 / 3; i++) @@ -1679,7 +1706,7 @@ void R_DrawTriangle() for (int i = numclipvert; i > 1; i--) { - triangle((uint32_t*)dc_destorg, dc_pitch, clippedvert[numclipvert - 1], clippedvert[i - 1], clippedvert[i - 2]); + triangle((uint32_t*)dc_destorg, dc_pitch, clippedvert[numclipvert - 1], clippedvert[i - 1], clippedvert[i - 2], clipleft, clipright, cliptop, clipbottom); } } } From d5865a46a0684420df276fb9d95ccc66eefb953e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 19 Oct 2016 17:44:50 +0200 Subject: [PATCH 223/912] Move triangle drawer into a command and change the sky code to use it if r_cubesky is enabled --- src/r_draw.h | 13 + src/r_draw_rgba.cpp | 803 +++++++++++++++++++++---------------------- src/r_plane.cpp | 99 +++++- src/r_swrenderer.cpp | 5 - src/r_thread.h | 3 + 5 files changed, 505 insertions(+), 418 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 2b664897d8..7b12c43bb1 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -387,6 +387,19 @@ void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); +struct TriVertex +{ + TriVertex() { } + TriVertex(float x, float y, float z, float w, float u, float v, float light) : x(x), y(y), z(z), w(w) { varying[0] = u; varying[1] = v; varying[2] = light; } + + enum { NumVarying = 3 }; + float x, y, z, w; + float varying[NumVarying]; +}; + +class VSMatrix; +void R_DrawTriangles(const VSMatrix &objectToWorld, const TriVertex *vertices, int count, int clipleft, int clipright, const short *cliptop, const short *clipbottom); + extern bool r_swtruecolor; EXTERN_CVAR(Bool, r_multithreaded); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index b078131661..9b15d06cef 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1257,458 +1257,437 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// -struct TriVertex +class DrawTrianglesCommand : public DrawerCommand { - TriVertex() { } - TriVertex(float x, float y, float z, float w, float u, float v, float light) : x(x), y(y), z(z), w(w) { varying[0] = u; varying[1] = v; varying[2] = light; } - - enum { NumVarying = 3 }; - float x, y, z, w; - float varying[NumVarying]; -}; - -float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) -{ - float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); - float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); - return top / bottom; -} - -float grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) -{ - float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); - float bottom = -((x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2)); - return top / bottom; -} - -void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v2, const TriVertex &v3, int clipleft, int clipright, const short *cliptop, const short *clipbottom) -{ - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int clipymin = cliptop[clipleft]; - int clipymax = clipbottom[clipleft]; - for (int i = clipleft + 1; i <= clipright; i++) +public: + DrawTrianglesCommand(const VSMatrix &objectToWorld, const TriVertex *vertices, int count, int clipleft, int clipright, const short *clipdata) + : objectToWorld(objectToWorld), vertices(vertices), count(count), clipleft(clipleft), clipright(clipright), clipdata(clipdata) { - clipymin = MIN(clipymin, (int)cliptop[i]); - clipymax = MAX(clipymax, (int)clipbottom[i]); - } - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax); - if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - dest += miny * pitch; - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // Gradients - float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); - float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); } - // Loop through blocks - for (int y = miny; y < maxy; y += q) + void Execute(DrawerThread *thread) override { - for (int x = minx; x < maxx; x += q) + int cliplength = clipright - clipleft + 1; + for (int i = 0; i < cliplength; i++) { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; + thread->triangle_clip_top[clipleft + i] = clipdata[i]; + thread->triangle_clip_bottom[clipleft + i] = clipdata[cliplength + i]; + } - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + draw_triangles(objectToWorld, vertices, count, clipleft, clipright, thread->triangle_clip_top, thread->triangle_clip_bottom, thread); + } - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + FString DebugInfo() override + { + return "DrawTrianglesCommand"; + } - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); +private: + float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) + { + float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); + float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); + return top / bottom; + } - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; + float grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) + { + float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); + float bottom = -((x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2)); + return top / bottom; + } - // Check if block needs clipping - int clipcount = 0; - for (int ix = 0; ix < q; ix++) + void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v2, const TriVertex &v3, int clipleft, int clipright, const short *cliptop, const short *clipbottom, DrawerThread *thread) + { + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int clipymin = cliptop[clipleft]; + int clipymax = clipbottom[clipleft]; + for (int i = clipleft + 1; i <= clipright; i++) + { + clipymin = MIN(clipymin, (int)cliptop[i]); + clipymax = MAX(clipymax, (int)clipbottom[i]); + } + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + dest += miny * pitch; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // Gradients + float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); + float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); + } + + // Loop through blocks + for (int y = miny; y < maxy; y += q) + { + for (int x = minx; x < maxx; x += q) { - clipcount += (cliptop[x + ix] > y) || (clipbottom[x + ix] < y + q - 1); - } + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; - // Calculate varying variables for affine block - float offx0 = (x - minx) + 0.5f; - float offy0 = (y - miny) + 0.5f; - float offx1 = offx0 + q; - float offy1 = offy0 + q; - float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); - float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); - float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); - float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); - float varyingTL[TriVertex::NumVarying]; - float varyingTR[TriVertex::NumVarying]; - float varyingBL[TriVertex::NumVarying]; - float varyingBR[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; - varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; - varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); - varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); - } + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - uint32_t *buffer = dest; + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) - { - for (int iy = 0; iy < q; iy++) + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Check if block needs clipping + int clipcount = 0; + for (int ix = 0; ix < q; ix++) { - float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - varying[i] = varyingTL[i] + varyingBL[i] * iy; - varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); - } - - for (int ix = x; ix < x + q; ix++) - { - uint32_t red = (uint32_t)clamp(varying[0] * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t green = (uint32_t)clamp(varying[1] * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t blue = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); - - buffer[ix] = 0xff000000 | (red << 16) | (green << 8) | blue; - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - } - - buffer += pitch; + clipcount += (cliptop[x + ix] > y) || (clipbottom[x + ix] < y + q - 1); } - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - for (int iy = 0; iy < q; iy++) + // Calculate varying variables for affine block + float offx0 = (x - minx) + 0.5f; + float offy0 = (y - miny) + 0.5f; + float offx1 = offx0 + q; + float offy1 = offy0 + q; + float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); + float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); + float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); + float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); + float varyingTL[TriVertex::NumVarying]; + float varyingTR[TriVertex::NumVarying]; + float varyingBL[TriVertex::NumVarying]; + float varyingBR[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; + varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; + varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; + varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); + varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); + } - float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) + uint32_t *buffer = dest; + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) + { + for (int iy = 0; iy < q; iy++) { - varying[i] = varyingTL[i] + varyingBL[i] * iy; - varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); - } - - for (int ix = x; ix < x + q; ix++) - { - bool visible = (cliptop[ix] <= y + iy) && (clipbottom[ix] >= y + iy); - - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) + float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) { - uint32_t red = (uint32_t)clamp(varying[0] * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t green = (uint32_t)clamp(varying[1] * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t blue = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); - - buffer[ix] = 0xff000000 | (red << 16) | (green << 8) | blue; + varying[i] = varyingTL[i] + varyingBL[i] * iy; + varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); } - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; + if (!thread->skipped_by_thread(y + iy)) + { + for (int ix = x; ix < x + q; ix++) + { + uint32_t red = (uint32_t)clamp(varying[0] * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t green = (uint32_t)clamp(varying[1] * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t blue = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; + buffer[ix] = 0xff000000 | (red << 16) | (green << 8) | blue; + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + } + } + + buffer += pitch; } + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; - buffer += pitch; + float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varying[i] = varyingTL[i] + varyingBL[i] * iy; + varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + } + + if (!thread->skipped_by_thread(y + iy)) + { + for (int ix = x; ix < x + q; ix++) + { + bool visible = (cliptop[ix] <= y + iy) && (clipbottom[ix] >= y + iy); + + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) + { + uint32_t red = (uint32_t)clamp(varying[0] * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t green = (uint32_t)clamp(varying[1] * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t blue = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + + buffer[ix] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + buffer += pitch; + } + } + } + + dest += q * pitch; + } + } + + bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2) + { + if (clipdistance1 < 0.0f && clipdistance2 < 0.0f) + return true; + + if (clipdistance1 < 0.0f) + t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1); + + if (clipdistance2 < 0.0f) + t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2); + + return false; + } + + void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert) + { + // Clip and cull so that the following is true for all vertices: + // -v.w <= v.x <= v.w + // -v.w <= v.y <= v.w + // -v.w <= v.z <= v.w + + float t1 = 0.0f, t2 = 1.0f; + bool culled = + cullhalfspace(v1.x + v1.w, v2.x + v2.w, t1, t2) || + cullhalfspace(v1.w - v1.x, v2.w - v2.x, t1, t2) || + cullhalfspace(v1.y + v1.w, v2.y + v2.w, t1, t2) || + cullhalfspace(v1.w - v1.y, v2.w - v2.y, t1, t2) || + cullhalfspace(v1.z + v1.w, v2.z + v2.w, t1, t2) || + cullhalfspace(v1.w - v1.z, v2.w - v2.z, t1, t2); + if (culled) + return; + + if (t1 == 0.0f) + { + clippedvert[numclipvert++] = v1; + } + else + { + auto &v = clippedvert[numclipvert++]; + v.x = v1.x * (1.0f - t1) + v2.x * t1; + v.y = v1.y * (1.0f - t1) + v2.y * t1; + v.z = v1.z * (1.0f - t1) + v2.z * t1; + v.w = v1.w * (1.0f - t1) + v2.w * t1; + for (int i = 0; i < TriVertex::NumVarying; i++) + v.varying[i] = v1.varying[i] * (1.0f - t1) + v2.varying[i] * t1; + } + + if (t2 != 1.0f) + { + auto &v = clippedvert[numclipvert++]; + v.x = v1.x * (1.0f - t2) + v2.x * t2; + v.y = v1.y * (1.0f - t2) + v2.y * t2; + v.z = v1.z * (1.0f - t2) + v2.z * t2; + v.w = v1.w * (1.0f - t2) + v2.w * t2; + for (int i = 0; i < TriVertex::NumVarying; i++) + v.varying[i] = v1.varying[i] * (1.0f - t2) + v2.varying[i] * t2; + } + } + + void draw_triangles(const VSMatrix &objectToWorld, const TriVertex *vinput, int vcount, int clipleft, int clipright, const short *cliptop, const short *clipbottom, DrawerThread *thread) + { + for (int i = 0; i < vcount / 3; i++) + { + TriVertex vert[3]; + + // Vertex shader stuff: + for (int j = 0; j < 3; j++) + { + auto &v = vert[j]; + v = *(vinput++); + + // Apply object to world transform: + const float *matrix = objectToWorld.get(); + float vx = matrix[0 * 4 + 0] * v.x + matrix[1 * 4 + 0] * v.y + matrix[2 * 4 + 0] * v.z + matrix[3 * 4 + 0] * v.w; + float vy = matrix[0 * 4 + 1] * v.x + matrix[1 * 4 + 1] * v.y + matrix[2 * 4 + 1] * v.z + matrix[3 * 4 + 1] * v.w; + float vz = matrix[0 * 4 + 2] * v.x + matrix[1 * 4 + 2] * v.y + matrix[2 * 4 + 2] * v.z + matrix[3 * 4 + 2] * v.w; + float vw = matrix[0 * 4 + 3] * v.x + matrix[1 * 4 + 3] * v.y + matrix[2 * 4 + 3] * v.z + matrix[3 * 4 + 3] * v.w; + v.x = vx; + v.y = vy; + v.z = vz; + v.w = vw; + + // The software renderer world to clip transform: + double nearp = 5.0f; + double farp = 65536.f; + double tr_x = v.x - ViewPos.X; + double tr_y = v.y - ViewPos.Y; + double tr_z = v.z - ViewPos.Z; + double tx = tr_x * ViewSin - tr_y * ViewCos; + double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; + v.x = (float)tx; + v.y = (float)tr_z; + v.z = (float)(-tz * (farp + nearp) / (nearp - farp) + (2.0f * farp * nearp) / (nearp - farp)); + v.w = (float)tz; + } + + // Cull, clip and generate additional vertices as needed + TriVertex clippedvert[6]; + int numclipvert = 0; + clipedge(vert[0], vert[1], clippedvert, numclipvert); + clipedge(vert[1], vert[2], clippedvert, numclipvert); + clipedge(vert[2], vert[0], clippedvert, numclipvert); + + // Map to 2D viewport: + for (int j = 0; j < numclipvert; j++) + { + auto &v = clippedvert[j]; + + // Calculate normalized device coordinates: + v.w = 1.0f / v.w; + v.x *= v.w; + v.y *= v.w; + v.z *= v.w; + + // Apply viewport scale to get screen coordinates: + v.x = (float)(CenterX + v.x * CenterX); + v.y = (float)(CenterY - v.y * InvZtoScale); + } + + // Draw screen triangles + bool ccw = false; + if (ccw) + { + for (int i = numclipvert; i > 1; i--) + { + triangle((uint32_t*)dc_destorg, dc_pitch, clippedvert[numclipvert - 1], clippedvert[i - 1], clippedvert[i - 2], clipleft, clipright, cliptop, clipbottom, thread); + } + } + else + { + for (int i = 2; i < numclipvert; i++) + { + triangle((uint32_t*)dc_destorg, dc_pitch, clippedvert[0], clippedvert[i - 1], clippedvert[i], clipleft, clipright, cliptop, clipbottom, thread); } } } - - dest += q * pitch; } -} -bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2) + VSMatrix objectToWorld; + const TriVertex *vertices; + int count; + int clipleft; + int clipright; + const short *clipdata; +}; + +void R_DrawTriangles(const VSMatrix &objectToWorld, const TriVertex *vertices, int count, int clipleft, int clipright, const short *cliptop, const short *clipbottom) { - if (clipdistance1 < 0.0f && clipdistance2 < 0.0f) - return true; - - if (clipdistance1 < 0.0f) - t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1); - - if (clipdistance2 < 0.0f) - t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2); - - return false; -} - -void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert) -{ - // Clip and cull so that the following is true for all vertices: - // -v.w <= v.x <= v.w - // -v.w <= v.y <= v.w - // -v.w <= v.z <= v.w - - float t1 = 0.0f, t2 = 1.0f; - bool culled = - cullhalfspace(v1.x + v1.w, v2.x + v2.w, t1, t2) || - cullhalfspace(v1.w - v1.x, v2.w - v2.x, t1, t2) || - cullhalfspace(v1.y + v1.w, v2.y + v2.w, t1, t2) || - cullhalfspace(v1.w - v1.y, v2.w - v2.y, t1, t2) || - cullhalfspace(v1.z + v1.w, v2.z + v2.w, t1, t2) || - cullhalfspace(v1.w - v1.z, v2.w - v2.z, t1, t2); - if (culled) + if (clipright < clipleft || clipleft < 0 || clipright > MAXWIDTH) return; - if (t1 == 0.0f) + int cliplength = clipright - clipleft + 1; + short *clipdata = (short*)DrawerCommandQueue::AllocMemory(cliplength * 2 * sizeof(short)); + if (!clipdata) { - clippedvert[numclipvert++] = v1; - } - else - { - auto &v = clippedvert[numclipvert++]; - v.x = v1.x * (1.0f - t1) + v2.x * t1; - v.y = v1.y * (1.0f - t1) + v2.y * t1; - v.z = v1.z * (1.0f - t1) + v2.z * t1; - v.w = v1.w * (1.0f - t1) + v2.w * t1; - for (int i = 0; i < TriVertex::NumVarying; i++) - v.varying[i] = v1.varying[i] * (1.0f - t1) + v2.varying[i] * t1; + DrawerCommandQueue::WaitForWorkers(); + clipdata = (short*)DrawerCommandQueue::AllocMemory(cliplength * 2 * sizeof(short)); + if (!clipdata) + return; } - if (t2 != 1.0f) - { - auto &v = clippedvert[numclipvert++]; - v.x = v1.x * (1.0f - t2) + v2.x * t2; - v.y = v1.y * (1.0f - t2) + v2.y * t2; - v.z = v1.z * (1.0f - t2) + v2.z * t2; - v.w = v1.w * (1.0f - t2) + v2.w * t2; - for (int i = 0; i < TriVertex::NumVarying; i++) - v.varying[i] = v1.varying[i] * (1.0f - t2) + v2.varying[i] * t2; - } -} + for (int i = 0; i < cliplength; i++) + clipdata[i] = cliptop[clipleft + i]; + for (int i = 0; i < cliplength; i++) + clipdata[cliplength + i] = clipbottom[clipleft + i]; -void R_DrawTriangle() -{ - int clipleft = 0; - int clipright = viewwidth - 1; - short cliptop[MAXWIDTH]; - short clipbottom[MAXWIDTH]; - for (int i = clipleft; i < clipright; i++) - { - cliptop[i] = (i - clipleft) / 4; - clipbottom[i] = viewheight - 1 - (i - clipleft) / 4; - } - - TriVertex cube[6 * 6] = - { - {-1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - {-1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - {-1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - {-1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - {-1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - {-1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - {-1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - {-1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - {-1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - - {-1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - {-1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - {-1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - - { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - - {-1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - {-1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - {-1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - {-1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - {-1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - {-1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f } - }; - - for (int i = 0; i < 6; i++) - { - cube[i * 6 + 0].varying[0] = 1.0f; - cube[i * 6 + 1].varying[1] = 1.0f; - cube[i * 6 + 2].varying[2] = 1.0f; - cube[i * 6 + 3].varying[2] = 1.0f; - cube[i * 6 + 4].varying[0] = 1.0f; - cube[i * 6 + 4].varying[1] = 1.0f; - cube[i * 6 + 4].varying[2] = 1.0f; - cube[i * 6 + 5].varying[0] = 1.0f; - } - - static float angle = 0.0f; - angle = fmod(angle + 0.5f, 360.0f); - VSMatrix objectToWorld(0); - objectToWorld.translate((float)ViewPos.X, (float)ViewPos.Y + 50.0f, (float)ViewPos.Z); - objectToWorld.rotate(angle, 0.57735f, 0.57735f, 0.57735f); - objectToWorld.scale(10.0f, 10.0f, 10.0f); - - TriVertex *vinput = cube; - for (int i = 0; i < 6 * 6 / 3; i++) - { - TriVertex vert[3]; - - // Vertex shader stuff: - for (int j = 0; j < 3; j++) - { - auto &v = vert[j]; - v = *(vinput++); - - // Apply object to world transform: - const float *matrix = objectToWorld.get(); - float vx = matrix[0 * 4 + 0] * v.x + matrix[1 * 4 + 0] * v.y + matrix[2 * 4 + 0] * v.z + matrix[3 * 4 + 0] * v.w; - float vy = matrix[0 * 4 + 1] * v.x + matrix[1 * 4 + 1] * v.y + matrix[2 * 4 + 1] * v.z + matrix[3 * 4 + 1] * v.w; - float vz = matrix[0 * 4 + 2] * v.x + matrix[1 * 4 + 2] * v.y + matrix[2 * 4 + 2] * v.z + matrix[3 * 4 + 2] * v.w; - float vw = matrix[0 * 4 + 3] * v.x + matrix[1 * 4 + 3] * v.y + matrix[2 * 4 + 3] * v.z + matrix[3 * 4 + 3] * v.w; - v.x = vx; - v.y = vy; - v.z = vz; - v.w = vw; - - // The software renderer world to clip transform: - double nearp = 5.0f; - double farp = 65536.f; - double tr_x = v.x - ViewPos.X; - double tr_y = v.y - ViewPos.Y; - double tr_z = v.z - ViewPos.Z; - double tx = tr_x * ViewSin - tr_y * ViewCos; - double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; - v.x = (float)tx; - v.y = (float)tr_z; - v.z = (float)(-tz * (farp + nearp) / (nearp - farp) + (2.0f * farp * nearp) / (nearp - farp)); - v.w = (float)tz; - } - - // Cull, clip and generate additional vertices as needed - TriVertex clippedvert[6]; - int numclipvert = 0; - clipedge(vert[0], vert[1], clippedvert, numclipvert); - clipedge(vert[1], vert[2], clippedvert, numclipvert); - clipedge(vert[2], vert[0], clippedvert, numclipvert); - - // Map to 2D viewport: - for (int j = 0; j < numclipvert; j++) - { - auto &v = clippedvert[j]; - - // Calculate normalized device coordinates: - v.w = 1.0f / v.w; - v.x *= v.w; - v.y *= v.w; - v.z *= v.w; - - // Apply viewport scale to get screen coordinates: - v.x = (float)(CenterX + v.x * CenterX); - v.y = (float)(CenterY - v.y * InvZtoScale); - } - - for (int i = numclipvert; i > 1; i--) - { - triangle((uint32_t*)dc_destorg, dc_pitch, clippedvert[numclipvert - 1], clippedvert[i - 1], clippedvert[i - 2], clipleft, clipright, cliptop, clipbottom); - } - } + DrawerCommandQueue::QueueCommand(objectToWorld, vertices, count, clipleft, clipright, clipdata); } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 5dea6a03ad..b8cda2fcb1 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -59,12 +59,14 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_draw_rgba.h" +#include "gl/data/gl_matrix.h" #ifdef _MSC_VER #pragma warning(disable:4244) #endif CVAR(Bool, r_capsky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +CVAR(Bool, r_cubesky, false, 0) //EXTERN_CVAR (Int, tx) //EXTERN_CVAR (Int, ty) @@ -1154,9 +1156,104 @@ static void R_DrawCapSky(visplane_t *pl) } } +static void R_DrawCubeSky(visplane_t *pl) +{ + int x1 = pl->left; + int x2 = pl->right; + short *uwal = (short *)pl->top; + short *dwal = (short *)pl->bottom; + + static TriVertex cube[6 * 6] = + { + { -1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + { -1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + + { -1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + + { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + + { -1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f } + }; + + static bool first_time = true; + if (first_time) + { + for (int i = 0; i < 6; i++) + { + cube[i * 6 + 0].varying[0] = 1.0f; + cube[i * 6 + 1].varying[1] = 1.0f; + cube[i * 6 + 2].varying[2] = 1.0f; + cube[i * 6 + 3].varying[2] = 1.0f; + cube[i * 6 + 4].varying[0] = 1.0f; + cube[i * 6 + 4].varying[1] = 1.0f; + cube[i * 6 + 4].varying[2] = 1.0f; + cube[i * 6 + 5].varying[0] = 1.0f; + } + first_time = false; + } + + //static float angle = 0.0f; + //angle = fmod(angle + 0.5f, 360.0f); + VSMatrix objectToWorld(0); + objectToWorld.translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); + //objectToWorld.rotate(angle, 0.57735f, 0.57735f, 0.57735f); + objectToWorld.scale(100.0f, 100.0f, 100.0f); + + R_DrawTriangles(objectToWorld, cube, 6 * 6, x1, x2 - 1, uwal, dwal); +} + static void R_DrawSky (visplane_t *pl) { - if (r_swtruecolor && r_capsky) + if (r_swtruecolor && r_cubesky) + { + R_DrawCubeSky(pl); + return; + } + else if (r_swtruecolor && r_capsky) { R_DrawCapSky(pl); return; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 49eb0ef277..372f23accc 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -47,13 +47,10 @@ EXTERN_CVAR(Bool, r_shadercolormaps) -CVAR(Bool, r_drawtriangle, false, 0) - void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); void R_SetupColormap(player_t *); void R_SetupFreelook(); void R_InitRenderer(); -void R_DrawTriangle(); FSoftwareRenderer::FSoftwareRenderer() { @@ -195,8 +192,6 @@ void FSoftwareRenderer::RenderView(player_t *player) } R_EndDrawerCommands(); - if (r_swtruecolor && r_drawtriangle) - R_DrawTriangle(); } //========================================================================== diff --git a/src/r_thread.h b/src/r_thread.h index 63c4424420..a2cc8b9732 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -36,6 +36,9 @@ public: uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; uint32_t *dc_temp_rgba; + short triangle_clip_top[MAXWIDTH]; + short triangle_clip_bottom[MAXWIDTH]; + // Checks if a line is rendered by this thread bool line_skipped_by_thread(int line) { From 4ccb69fa614ebb9a276a4e093022de44db31466c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 19 Oct 2016 23:21:09 +0200 Subject: [PATCH 224/912] Palette version of sky drawers --- src/r_draw.cpp | 240 ++++++++++++++++++++++++++++++++++++++++++++ src/r_draw.h | 5 + src/r_draw_rgba.cpp | 8 +- src/r_plane.cpp | 47 +++++++-- 4 files changed, 285 insertions(+), 15 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 9d7160d012..45758ee610 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2235,6 +2235,246 @@ void tmvline4_revsubclamp_C () } while (--count); } +void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) +{ + uint8_t *dest = dc_dest; + int count = dc_count; + int pitch = dc_pitch; + const uint8_t *source0 = bufplce[0]; + int textureheight0 = bufheight[0]; + + int32_t frac = vplce[0]; + int32_t fracstep = vince[0]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + + int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + + if (alpha_top == 256 && alpha_bottom == 256) + { + *dest = fg; + } + else + { + int inv_alpha_top = 256 - alpha_top; + int inv_alpha_bottom = 256 - alpha_bottom; + + const auto &c = GPalette.BaseColors[fg]; + int c_red = c.r; + int c_green = c.g; + int c_blue = c.b; + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + } + + frac += fracstep; + dest += pitch; + } +} + +void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) +{ + for (int col = 0; col < 4; col++) + { + uint8_t *dest = dc_dest + col; + int count = dc_count; + int pitch = dc_pitch; + const uint8_t *source0 = bufplce[col]; + int textureheight0 = bufheight[0]; + + int32_t frac = vplce[col]; + int32_t fracstep = vince[col]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + + int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + + if (alpha_top == 256 && alpha_bottom == 256) + { + *dest = fg; + } + else + { + int inv_alpha_top = 256 - alpha_top; + int inv_alpha_bottom = 256 - alpha_bottom; + + const auto &c = GPalette.BaseColors[fg]; + int c_red = c.r; + int c_green = c.g; + int c_blue = c.b; + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + } + + frac += fracstep; + dest += pitch; + } + } +} + +void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) +{ + uint8_t *dest = dc_dest; + int count = dc_count; + int pitch = dc_pitch; + const uint8_t *source0 = bufplce[0]; + const uint8_t *source1 = bufplce2[0]; + int textureheight0 = bufheight[0]; + uint32_t maxtextureheight1 = bufheight[1] - 1; + + int32_t frac = vplce[0]; + int32_t fracstep = vince[0]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + + if (alpha_top == 256 && alpha_bottom == 256) + { + *dest = fg; + } + else + { + int inv_alpha_top = 256 - alpha_top; + int inv_alpha_bottom = 256 - alpha_bottom; + + const auto &c = GPalette.BaseColors[fg]; + int c_red = c.r; + int c_green = c.g; + int c_blue = c.b; + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + } + + frac += fracstep; + dest += pitch; + } +} + +void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) +{ + for (int col = 0; col < 4; col++) + { + uint8_t *dest = dc_dest + col; + int count = dc_count; + int pitch = dc_pitch; + const uint8_t *source0 = bufplce[col]; + const uint8_t *source1 = bufplce2[col]; + int textureheight0 = bufheight[0]; + uint32_t maxtextureheight1 = bufheight[1] - 1; + + int32_t frac = vplce[col]; + int32_t fracstep = vince[col]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + + if (alpha_top == 256 && alpha_bottom == 256) + { + *dest = fg; + } + else + { + int inv_alpha_top = 256 - alpha_top; + int inv_alpha_bottom = 256 - alpha_bottom; + + const auto &c = GPalette.BaseColors[fg]; + int c_red = c.r; + int c_green = c.g; + int c_blue = c.b; + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + } + + frac += fracstep; + dest += pitch; + } + } +} + //========================================================================== // // R_GetColumn diff --git a/src/r_draw.h b/src/r_draw.h index 7b12c43bb1..601962809b 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -387,6 +387,11 @@ void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); +void R_DrawSingleSkyCol1_rgba(uint32_t solid_top, uint32_t solid_bottom); +void R_DrawSingleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom); +void R_DrawDoubleSkyCol1_rgba(uint32_t solid_top, uint32_t solid_bottom); +void R_DrawDoubleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom); + struct TriVertex { TriVertex() { } diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9b15d06cef..2b9eb86e75 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1692,22 +1692,22 @@ void R_DrawTriangles(const VSMatrix &objectToWorld, const TriVertex *vertices, i ///////////////////////////////////////////////////////////////////////////// -void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) +void R_DrawSingleSkyCol1_rgba(uint32_t solid_top, uint32_t solid_bottom) { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } -void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) +void R_DrawSingleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom) { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } -void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) +void R_DrawDoubleSkyCol1_rgba(uint32_t solid_top, uint32_t solid_bottom) { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } -void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) +void R_DrawDoubleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom) { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } diff --git a/src/r_plane.cpp b/src/r_plane.cpp index b8cda2fcb1..e8fc31ec1d 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1020,30 +1020,55 @@ static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, doub angle1 = (DWORD)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); angle2 = (DWORD)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); - bufplce[i] = (const BYTE *)frontskytex->GetColumnBgra(angle1, nullptr); - bufplce2[i] = backskytex ? (const BYTE *)backskytex->GetColumnBgra(angle2, nullptr) : nullptr; + if (r_swtruecolor) + { + bufplce[i] = (const BYTE *)frontskytex->GetColumnBgra(angle1, nullptr); + bufplce2[i] = backskytex ? (const BYTE *)backskytex->GetColumnBgra(angle2, nullptr) : nullptr; + } + else + { + bufplce[i] = (const BYTE *)frontskytex->GetColumn(angle1, nullptr); + bufplce2[i] = backskytex ? (const BYTE *)backskytex->GetColumn(angle2, nullptr) : nullptr; + } vince[i] = uv_step; vplce[i] = uv_pos; } bufheight[0] = height; bufheight[1] = backskytex ? backskytex->GetHeight() : height; - dc_dest = (ylookup[y1] + start_x) * 4 + dc_destorg; + int pixelsize = r_swtruecolor ? 4 : 1; + dc_dest = (ylookup[y1] + start_x) * pixelsize + dc_destorg; dc_count = y2 - y1; uint32_t solid_top = frontskytex->GetSkyCapColor(false); uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); - if (columns == 4) - if (!backskytex) - R_DrawSingleSkyCol4(solid_top, solid_bottom); + if (r_swtruecolor) + { + if (columns == 4) + if (!backskytex) + R_DrawSingleSkyCol4_rgba(solid_top, solid_bottom); + else + R_DrawDoubleSkyCol4_rgba(solid_top, solid_bottom); else - R_DrawDoubleSkyCol4(solid_top, solid_bottom); + if (!backskytex) + R_DrawSingleSkyCol1_rgba(solid_top, solid_bottom); + else + R_DrawDoubleSkyCol1_rgba(solid_top, solid_bottom); + } else - if (!backskytex) - R_DrawSingleSkyCol1(solid_top, solid_bottom); + { + if (columns == 4) + if (!backskytex) + R_DrawSingleSkyCol4(solid_top, solid_bottom); + else + R_DrawDoubleSkyCol4(solid_top, solid_bottom); else - R_DrawDoubleSkyCol1(solid_top, solid_bottom); + if (!backskytex) + R_DrawSingleSkyCol1(solid_top, solid_bottom); + else + R_DrawDoubleSkyCol1(solid_top, solid_bottom); + } } static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) @@ -1253,7 +1278,7 @@ static void R_DrawSky (visplane_t *pl) R_DrawCubeSky(pl); return; } - else if (r_swtruecolor && r_capsky) + else if (r_capsky) { R_DrawCapSky(pl); return; From 1c2dcad36e6b2d568225884f46f3257b820a73da Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 20 Oct 2016 15:16:02 +0200 Subject: [PATCH 225/912] Fix linear skies --- src/r_plane.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index be8247d26d..3ba6feaf7d 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1098,7 +1098,15 @@ static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, doub DWORD ang, angle1, angle2; - ang = (skyangle + xtoviewangle[x]) ^ skyflip; + if (r_linearsky) + { + angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); + ang = (skyangle + xangle) ^ skyflip; + } + else + { + ang = (skyangle + xtoviewangle[x]) ^ skyflip; + } angle1 = (DWORD)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); angle2 = (DWORD)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); From 1cd27ca98a01bf9737c721750ffc4b0070aca848 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 20 Oct 2016 21:57:45 +0200 Subject: [PATCH 226/912] Change how the LLVM execution engine to constructed and printf which target triple and CPU is being used --- src/r_compiler/llvmdrawers.cpp | 64 +++++++++++++++------------------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index d5078a6c0f..c936d62603 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -446,10 +446,18 @@ LLVMProgram::LLVMProgram() InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); + mContext = std::make_unique(); + mModule = std::make_unique("render", context()); +} + +void LLVMProgram::CreateEE() +{ + using namespace llvm; + std::string errorstring; +#if 0 std::string targetTriple = sys::getProcessTriple(); - std::string cpuName = sys::getHostCPUName(); StringMap cpuFeatures; sys::getHostCPUFeatures(cpuFeatures); std::string cpuFeaturesStr; @@ -461,40 +469,32 @@ LLVMProgram::LLVMProgram() cpuFeaturesStr += it.getKey(); } - DPrintf(DMSG_SPAMMY, "LLVM target triple: %s\n", targetTriple.c_str()); - DPrintf(DMSG_SPAMMY, "LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); - - const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); - if (!target) - I_FatalError("Could not find LLVM target: %s", errorstring.c_str()); - - TargetOptions opt; -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) - Reloc::Model relocModel = Reloc::Default; -#else - auto relocModel = Optional(); + Printf("LLVM CPU features: %s\n", cpuFeaturesStr.c_str()); #endif - machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::JITDefault, CodeGenOpt::Aggressive); + + llvm::Module *module = mModule.get(); + EngineBuilder engineBuilder(std::move(mModule)); + engineBuilder.setErrorStr(&errorstring); + engineBuilder.setOptLevel(CodeGenOpt::Aggressive); + engineBuilder.setEngineKind(EngineKind::JIT); + engineBuilder.setMCPU(sys::getHostCPUName()); + machine = engineBuilder.selectTarget(); if (!machine) I_FatalError("Could not create LLVM target machine"); - mContext = std::make_unique(); + std::string targetTriple = machine->getTargetTriple().getTriple(); + std::string cpuName = machine->getTargetCPU(); + Printf("LLVM target triple: %s\n", targetTriple.c_str()); + Printf("LLVM target CPU: %s\n", cpuName.c_str()); - mModule = std::make_unique("render", context()); - mModule->setTargetTriple(targetTriple); + module->setTargetTriple(targetTriple); #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - mModule->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); + module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); #else - mModule->setDataLayout(machine->createDataLayout()); + module->setDataLayout(machine->createDataLayout()); #endif -} - -void LLVMProgram::CreateEE() -{ - using namespace llvm; - - legacy::FunctionPassManager PerFunctionPasses(mModule.get()); + legacy::FunctionPassManager PerFunctionPasses(module); legacy::PassManager PerModulePasses; #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) @@ -514,7 +514,7 @@ void LLVMProgram::CreateEE() // Run function passes: PerFunctionPasses.doInitialization(); - for (llvm::Function &func : *mModule.get()) + for (llvm::Function &func : *module) { if (!func.isDeclaration()) PerFunctionPasses.run(func); @@ -522,15 +522,9 @@ void LLVMProgram::CreateEE() PerFunctionPasses.doFinalization(); // Run module passes: - PerModulePasses.run(*mModule.get()); + PerModulePasses.run(*module); - std::string errorstring; - - EngineBuilder engineBuilder(std::move(mModule)); - engineBuilder.setErrorStr(&errorstring); - engineBuilder.setOptLevel(CodeGenOpt::Aggressive); - engineBuilder.setRelocationModel(Reloc::Static); - engineBuilder.setEngineKind(EngineKind::JIT); + // Create execution engine and generate machine code mEngine.reset(engineBuilder.create(machine)); if (!mEngine) I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str()); From b327a3312e021a186f6396f3eb0701b455a15375 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 19 Oct 2016 02:02:37 -0400 Subject: [PATCH 227/912] - Fully Implemented GZDoom fullbright emulation --- src/r_bsp.cpp | 5 +++-- src/r_data/colormaps.cpp | 4 ++++ src/r_data/colormaps.h | 1 + src/r_main.cpp | 7 +++++++ src/r_segs.cpp | 11 ++++++----- src/r_things.cpp | 17 +++++++++++------ 6 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 934d2d3e54..8d423b3b31 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -105,6 +105,7 @@ TArray WallPortals(1000); // note: this array needs to go away as subsector_t *InSubsector; CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs? +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); void R_StoreWallRange (int start, int stop); @@ -1108,7 +1109,7 @@ void R_Subsector (subsector_t *sub) } else { - basecolormap = frontsector->ColorMap; + basecolormap = (r_fullbrightignoresectorcolor && fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; } portal = frontsector->ValidatePortal(sector_t::ceiling); @@ -1142,7 +1143,7 @@ void R_Subsector (subsector_t *sub) } else { - basecolormap = frontsector->ColorMap; + basecolormap = (r_fullbrightignoresectorcolor && fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; } // killough 3/7/98: Add (x,y) offsets to flats, add deep water check diff --git a/src/r_data/colormaps.cpp b/src/r_data/colormaps.cpp index 3bfc89b4b5..16ac0291ea 100644 --- a/src/r_data/colormaps.cpp +++ b/src/r_data/colormaps.cpp @@ -59,6 +59,7 @@ static bool R_CheckForFixedLights(const BYTE *colormaps); extern "C" { FDynamicColormap NormalLight; +FDynamicColormap FullNormalLight; //[SP] Emulate GZDoom brightness } bool NormalLightHasFixedLights; @@ -547,6 +548,9 @@ void R_InitColormaps () NormalLight.Color = PalEntry (255, 255, 255); NormalLight.Fade = 0; NormalLight.Maps = realcolormaps.Maps; + FullNormalLight.Color = PalEntry (255, 255, 255); + FullNormalLight.Fade = 0; + FullNormalLight.Maps = realcolormaps.Maps; NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps.Maps); numfakecmaps = fakecmaps.Size(); diff --git a/src/r_data/colormaps.h b/src/r_data/colormaps.h index ca15748930..8b0bf187bf 100644 --- a/src/r_data/colormaps.h +++ b/src/r_data/colormaps.h @@ -89,6 +89,7 @@ extern BYTE DesaturateColormap[31][256]; extern "C" { extern FDynamicColormap NormalLight; +extern FDynamicColormap FullNormalLight; } extern bool NormalLightHasFixedLights; diff --git a/src/r_main.cpp b/src/r_main.cpp index 6840730466..9a582616e7 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -451,6 +451,8 @@ void R_CopyStackedViewParameters() // //========================================================================== +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) + void R_SetupColormap(player_t *player) { realfixedcolormap = NULL; @@ -477,6 +479,11 @@ void R_SetupColormap(player_t *player) else if (player->fixedlightlevel >= 0 && player->fixedlightlevel < NUMCOLORMAPS) { fixedlightlev = player->fixedlightlevel * 256; + // [SP] Emulate GZDoom's light-amp goggles. + if (r_fullbrightignoresectorcolor && fixedlightlev >= 0) + { + fixedcolormap = &FullNormalLight; + } } } // [RH] Inverse light for shooting the Sigil diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 0f723f9be3..859bc8f1bd 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -58,6 +58,7 @@ CVAR(Bool, r_np2, true, 0) +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) @@ -318,7 +319,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -635,7 +636,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) } if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -1762,7 +1763,7 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -3161,11 +3162,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - WallC.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap, 0, 0); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; diff --git a/src/r_things.cpp b/src/r_things.cpp index bb8be4c6bd..95ffc3703a 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -570,7 +570,7 @@ void R_DrawWallSprite(vissprite_t *spr) else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &NormalLight : usecolormap, 0, 0); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; @@ -1166,7 +1166,7 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor } else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright - vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &NormalLight : mybasecolormap; + vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; vis->Style.ColormapNum = 0; } else @@ -1489,12 +1489,12 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double } if (fixedlightlev >= 0) { - vis->Style.BaseColormap = mybasecolormap; + vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && pspr->GetState()->GetFullbright()) { // full bright - vis->Style.BaseColormap = mybasecolormap; // [RH] use basecolormap + vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap vis->Style.ColormapNum = 0; } else @@ -1546,6 +1546,11 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double { noaccel = true; } + // [SP] If emulating GZDoom fullbright, disable acceleration + if (r_fullbrightignoresectorcolor && fixedlightlev >= 0) + mybasecolormap = &FullNormalLight; + if (r_fullbrightignoresectorcolor && !foggy && pspr->GetState()->GetFullbright()) + mybasecolormap = &FullNormalLight; colormap_to_use = mybasecolormap; } else @@ -2088,7 +2093,7 @@ void R_DrawSprite (vissprite_t *spr) } else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) { // full bright - spr->Style.BaseColormap = mybasecolormap; + spr->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; spr->Style.ColormapNum = 0; } else @@ -2652,7 +2657,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, } else if (particle->bright) { - vis->Style.BaseColormap = map; + vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : map; vis->Style.ColormapNum = 0; } else From ec9dd3f5afe605a6dedfc24468a01bfae18e4032 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Fri, 21 Oct 2016 07:53:32 -0400 Subject: [PATCH 228/912] - Fixed compiler errors with r_fullbrightignoresectorcolor merge --- src/r_data/colormaps.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/r_data/colormaps.cpp b/src/r_data/colormaps.cpp index f2f84a8eab..d64327e796 100644 --- a/src/r_data/colormaps.cpp +++ b/src/r_data/colormaps.cpp @@ -457,11 +457,6 @@ void R_DeinitColormaps () SpecialColormaps.Clear(); fakecmaps.Clear(); delete[] realcolormaps.Maps; - if (realfbcolormaps != NULL) - { - delete[] realfbcolormaps; - realfbcolormaps = NULL; - } delete[] realfbcolormaps.Maps; FreeSpecialLights(); } @@ -554,10 +549,10 @@ void R_InitColormaps () } // [SP] Create a copy of the colormap - if (!realfbcolormaps) + if (!realfbcolormaps.Maps) { - realfbcolormaps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()]; - memcpy(realfbcolormaps, realcolormaps, 256*NUMCOLORMAPS*fakecmaps.Size()); + realfbcolormaps.Maps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()]; + memcpy(realfbcolormaps.Maps, realcolormaps.Maps, 256*NUMCOLORMAPS*fakecmaps.Size()); } NormalLight.Color = PalEntry (255, 255, 255); From 16fded31e1c29732272e4cba1f924547a05f20fc Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Fri, 21 Oct 2016 17:53:21 -0400 Subject: [PATCH 229/912] - Show current renderer on startup. This is for diagnostic purposes when we ask people for their startup logs so we know what key settings are being used when there is a problem. --- src/win32/hardware.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index e67d11842c..b7c74fbd02 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -170,6 +170,16 @@ void I_CreateRenderer() { currentrenderer = vid_renderer; currentcanvas = vid_used3d; + if (currentrenderer == 1) + Printf("Renderer: OpenGL\n"); + else if (currentcanvas == 0) + Printf("Renderer: Software on OpenGL\n"); + else if (currentcanvas == 1 && vid_forceddraw == false) + Printf("Renderer: Software on Direct3D\n"); + else if (currentcanvas == 1) + Printf("Renderer: Software on DirectDraw\n"); + else + Printf("Renderer: Unknown\n"); if (Renderer == NULL) { if (currentrenderer==1) Renderer = gl_CreateInterface(); From 11a98bec7e265260402bfded2a9967f42834f1e5 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Fri, 21 Oct 2016 18:18:48 -0400 Subject: [PATCH 230/912] - fixed: Compiler error in hardware.cpp (vid_forceddraw) --- src/win32/hardware.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index b7c74fbd02..609f56647d 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -53,6 +53,7 @@ EXTERN_CVAR (Bool, ticker) EXTERN_CVAR (Bool, fullscreen) EXTERN_CVAR (Bool, swtruecolor) EXTERN_CVAR (Float, vid_winscale) +EXTERN_CVAR (Bool, vid_forceddraw) CVAR(Int, win_x, -1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) CVAR(Int, win_y, -1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) From 9ee34254c86380b13256268ce6acc89d39210b63 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Fri, 21 Oct 2016 19:26:05 -0400 Subject: [PATCH 231/912] - fixed: Crashing when using the "restart" ccmd --- src/r_data/colormaps.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/r_data/colormaps.cpp b/src/r_data/colormaps.cpp index d64327e796..659470a234 100644 --- a/src/r_data/colormaps.cpp +++ b/src/r_data/colormaps.cpp @@ -457,7 +457,11 @@ void R_DeinitColormaps () SpecialColormaps.Clear(); fakecmaps.Clear(); delete[] realcolormaps.Maps; - delete[] realfbcolormaps.Maps; + if (realfbcolormaps.Maps) + { + delete[] realfbcolormaps.Maps; + realfbcolormaps.Maps = nullptr; + } FreeSpecialLights(); } From edf2556ea3d92d75484c0de5f7aefb7d97652d25 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 23 Oct 2016 06:06:59 -0400 Subject: [PATCH 232/912] - Implemented sv_singleplayerrespawn --- src/g_game.cpp | 3 ++- src/g_level.cpp | 4 +++- src/p_mobj.cpp | 5 +++-- src/p_user.cpp | 5 ++++- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/g_game.cpp b/src/g_game.cpp index 48bda24f5f..b0c8775fc5 100644 --- a/src/g_game.cpp +++ b/src/g_game.cpp @@ -1655,9 +1655,10 @@ static void G_QueueBody (AActor *body) // // G_DoReborn // +EXTERN_CVAR(Bool, sv_singleplayerrespawn) void G_DoReborn (int playernum, bool freshbot) { - if (!multiplayer && !(level.flags2 & LEVEL2_ALLOWRESPAWN)) + if (!multiplayer && !(level.flags2 & LEVEL2_ALLOWRESPAWN) && !sv_singleplayerrespawn) { if (BackupSaveName.Len() > 0 && FileExists (BackupSaveName.GetChars())) { // Load game from the last point it was saved diff --git a/src/g_level.cpp b/src/g_level.cpp index 4c4b56324a..2a1e0e70ca 100644 --- a/src/g_level.cpp +++ b/src/g_level.cpp @@ -526,6 +526,8 @@ static bool unloading; // //========================================================================== +EXTERN_CVAR(Bool, sv_singleplayerrespawn) + void G_ChangeLevel(const char *levelname, int position, int flags, int nextSkill) { level_info_t *nextinfo = NULL; @@ -634,7 +636,7 @@ void G_ChangeLevel(const char *levelname, int position, int flags, int nextSkill // If this is co-op, respawn any dead players now so they can // keep their inventory on the next map. - if ((multiplayer || level.flags2 & LEVEL2_ALLOWRESPAWN) && !deathmatch && player->playerstate == PST_DEAD) + if ((multiplayer || level.flags2 & LEVEL2_ALLOWRESPAWN || sv_singleplayerrespawn) && !deathmatch && player->playerstate == PST_DEAD) { // Copied from the end of P_DeathThink [[ player->cls = NULL; // Force a new class if the player is using a random class diff --git a/src/p_mobj.cpp b/src/p_mobj.cpp index 8f926ea77f..3e20118784 100644 --- a/src/p_mobj.cpp +++ b/src/p_mobj.cpp @@ -1,4 +1,4 @@ -// Emacs style mode select -*- C++ -*- +// Emacs style mode select -*- C++ -*- //----------------------------------------------------------------------------- // // $Id:$ @@ -4495,6 +4495,7 @@ void AActor::AdjustFloorClip () // Most of the player structure stays unchanged between levels. // EXTERN_CVAR (Bool, chasedemo) +EXTERN_CVAR(Bool, sv_singleplayerrespawn) extern bool demonew; @@ -4682,7 +4683,7 @@ APlayerPawn *P_SpawnPlayer (FPlayerStart *mthing, int playernum, int flags) { // Give all cards in death match mode. p->mo->GiveDeathmatchInventory (); } - else if ((multiplayer || (level.flags2 & LEVEL2_ALLOWRESPAWN)) && state == PST_REBORN && oldactor != NULL) + else if ((multiplayer || (level.flags2 & LEVEL2_ALLOWRESPAWN) || sv_singleplayerrespawn) && state == PST_REBORN && oldactor != NULL) { // Special inventory handling for respawning in coop p->mo->FilterCoopRespawnInventory (oldactor); } diff --git a/src/p_user.cpp b/src/p_user.cpp index dec769025e..450d794b6f 100644 --- a/src/p_user.cpp +++ b/src/p_user.cpp @@ -68,6 +68,7 @@ static FRandom pr_skullpop ("SkullPop"); // Variables for prediction CVAR (Bool, cl_noprediction, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) CVAR(Bool, cl_predict_specials, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) +CVAR(Bool, sv_singleplayerrespawn, false, CVAR_SERVERINFO | CVAR_LATCH) CUSTOM_CVAR(Float, cl_predict_lerpscale, 0.05f, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) { @@ -2212,7 +2213,9 @@ void P_DeathThink (player_t *player) if (level.time >= player->respawn_time || ((player->cmd.ucmd.buttons & BT_USE) && player->Bot == NULL)) { player->cls = NULL; // Force a new class if the player is using a random class - player->playerstate = (multiplayer || (level.flags2 & LEVEL2_ALLOWRESPAWN)) ? PST_REBORN : PST_ENTER; + player->playerstate = + (multiplayer || (level.flags2 & LEVEL2_ALLOWRESPAWN) || sv_singleplayerrespawn) + ? PST_REBORN : PST_ENTER; if (player->mo->special1 > 2) { player->mo->special1 = 0; From a39807eb9aee0d97403fbf4a12843896e1c47e4c Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 23 Oct 2016 06:06:59 -0400 Subject: [PATCH 233/912] - Implemented sv_singleplayerrespawn --- src/g_game.cpp | 3 ++- src/g_level.cpp | 4 +++- src/p_mobj.cpp | 5 +++-- src/p_user.cpp | 5 ++++- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/g_game.cpp b/src/g_game.cpp index 48bda24f5f..b0c8775fc5 100644 --- a/src/g_game.cpp +++ b/src/g_game.cpp @@ -1655,9 +1655,10 @@ static void G_QueueBody (AActor *body) // // G_DoReborn // +EXTERN_CVAR(Bool, sv_singleplayerrespawn) void G_DoReborn (int playernum, bool freshbot) { - if (!multiplayer && !(level.flags2 & LEVEL2_ALLOWRESPAWN)) + if (!multiplayer && !(level.flags2 & LEVEL2_ALLOWRESPAWN) && !sv_singleplayerrespawn) { if (BackupSaveName.Len() > 0 && FileExists (BackupSaveName.GetChars())) { // Load game from the last point it was saved diff --git a/src/g_level.cpp b/src/g_level.cpp index 119cde378d..d3a8c4015a 100644 --- a/src/g_level.cpp +++ b/src/g_level.cpp @@ -526,6 +526,8 @@ static bool unloading; // //========================================================================== +EXTERN_CVAR(Bool, sv_singleplayerrespawn) + void G_ChangeLevel(const char *levelname, int position, int flags, int nextSkill) { level_info_t *nextinfo = NULL; @@ -634,7 +636,7 @@ void G_ChangeLevel(const char *levelname, int position, int flags, int nextSkill // If this is co-op, respawn any dead players now so they can // keep their inventory on the next map. - if ((multiplayer || level.flags2 & LEVEL2_ALLOWRESPAWN) && !deathmatch && player->playerstate == PST_DEAD) + if ((multiplayer || level.flags2 & LEVEL2_ALLOWRESPAWN || sv_singleplayerrespawn) && !deathmatch && player->playerstate == PST_DEAD) { // Copied from the end of P_DeathThink [[ player->cls = NULL; // Force a new class if the player is using a random class diff --git a/src/p_mobj.cpp b/src/p_mobj.cpp index 8f926ea77f..3e20118784 100644 --- a/src/p_mobj.cpp +++ b/src/p_mobj.cpp @@ -1,4 +1,4 @@ -// Emacs style mode select -*- C++ -*- +// Emacs style mode select -*- C++ -*- //----------------------------------------------------------------------------- // // $Id:$ @@ -4495,6 +4495,7 @@ void AActor::AdjustFloorClip () // Most of the player structure stays unchanged between levels. // EXTERN_CVAR (Bool, chasedemo) +EXTERN_CVAR(Bool, sv_singleplayerrespawn) extern bool demonew; @@ -4682,7 +4683,7 @@ APlayerPawn *P_SpawnPlayer (FPlayerStart *mthing, int playernum, int flags) { // Give all cards in death match mode. p->mo->GiveDeathmatchInventory (); } - else if ((multiplayer || (level.flags2 & LEVEL2_ALLOWRESPAWN)) && state == PST_REBORN && oldactor != NULL) + else if ((multiplayer || (level.flags2 & LEVEL2_ALLOWRESPAWN) || sv_singleplayerrespawn) && state == PST_REBORN && oldactor != NULL) { // Special inventory handling for respawning in coop p->mo->FilterCoopRespawnInventory (oldactor); } diff --git a/src/p_user.cpp b/src/p_user.cpp index e487120429..6ac39c2a5c 100644 --- a/src/p_user.cpp +++ b/src/p_user.cpp @@ -68,6 +68,7 @@ static FRandom pr_skullpop ("SkullPop"); // Variables for prediction CVAR (Bool, cl_noprediction, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) CVAR(Bool, cl_predict_specials, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) +CVAR(Bool, sv_singleplayerrespawn, false, CVAR_SERVERINFO | CVAR_LATCH) CUSTOM_CVAR(Float, cl_predict_lerpscale, 0.05f, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) { @@ -2211,7 +2212,9 @@ void P_DeathThink (player_t *player) if (level.time >= player->respawn_time || ((player->cmd.ucmd.buttons & BT_USE) && player->Bot == NULL)) { player->cls = NULL; // Force a new class if the player is using a random class - player->playerstate = (multiplayer || (level.flags2 & LEVEL2_ALLOWRESPAWN)) ? PST_REBORN : PST_ENTER; + player->playerstate = + (multiplayer || (level.flags2 & LEVEL2_ALLOWRESPAWN) || sv_singleplayerrespawn) + ? PST_REBORN : PST_ENTER; if (player->mo->special1 > 2) { player->mo->special1 = 0; From 1e9d3b1917d8c877d0307efa9694b30964a8de76 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 23 Oct 2016 08:14:54 -0400 Subject: [PATCH 234/912] - Putting the CVAR definition right in the middle of prediction stuff probably wasn't the best idea. --- src/p_user.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/p_user.cpp b/src/p_user.cpp index 6ac39c2a5c..b7bd4c2ec4 100644 --- a/src/p_user.cpp +++ b/src/p_user.cpp @@ -65,10 +65,12 @@ static FRandom pr_skullpop ("SkullPop"); // [RH] # of ticks to complete a turn180 #define TURN180_TICKS ((TICRATE / 4) + 1) +// [SP] Allows respawn in single player +CVAR(Bool, sv_singleplayerrespawn, false, CVAR_SERVERINFO | CVAR_LATCH) + // Variables for prediction CVAR (Bool, cl_noprediction, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) CVAR(Bool, cl_predict_specials, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) -CVAR(Bool, sv_singleplayerrespawn, false, CVAR_SERVERINFO | CVAR_LATCH) CUSTOM_CVAR(Float, cl_predict_lerpscale, 0.05f, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) { From 4934fc50705e075fd6f8b3909d6af380d1ef6e2e Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 23 Oct 2016 08:19:29 -0400 Subject: [PATCH 235/912] - Removed duplicate definition from ZDoom merge --- src/p_user.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/p_user.cpp b/src/p_user.cpp index b54a0ea625..232214bfd2 100644 --- a/src/p_user.cpp +++ b/src/p_user.cpp @@ -71,7 +71,6 @@ CVAR(Bool, sv_singleplayerrespawn, false, CVAR_SERVERINFO | CVAR_LATCH) // Variables for prediction CVAR (Bool, cl_noprediction, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) CVAR(Bool, cl_predict_specials, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) -CVAR(Bool, sv_singleplayerrespawn, false, CVAR_SERVERINFO | CVAR_LATCH) CUSTOM_CVAR(Float, cl_predict_lerpscale, 0.05f, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) { From 86f36b3081cd3d96f86d43c7cfda367cac7350ba Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 23 Oct 2016 17:43:18 +0200 Subject: [PATCH 236/912] Add function returning the generated assembly for a given llvm module --- src/r_compiler/llvmdrawers.cpp | 89 +++++++++++++++++++++++++++------- 1 file changed, 72 insertions(+), 17 deletions(-) diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index c936d62603..ba1fb4efcc 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -20,7 +20,9 @@ class LLVMProgram public: LLVMProgram(); + void CreateModule(); void CreateEE(); + std::string GenerateAssembly(std::string cpuName); std::string DumpModule(); void StopLogFatalErrors(); @@ -85,6 +87,8 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { + mProgram.CreateModule(); + CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill, DrawColumnMethod::Normal); CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd, DrawColumnMethod::Normal); CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp, DrawColumnMethod::Normal); @@ -447,7 +451,11 @@ LLVMProgram::LLVMProgram() InitializeNativeTargetAsmPrinter(); mContext = std::make_unique(); - mModule = std::make_unique("render", context()); +} + +void LLVMProgram::CreateModule() +{ + mModule = std::make_unique("render", context()); } void LLVMProgram::CreateEE() @@ -456,22 +464,6 @@ void LLVMProgram::CreateEE() std::string errorstring; -#if 0 - std::string targetTriple = sys::getProcessTriple(); - StringMap cpuFeatures; - sys::getHostCPUFeatures(cpuFeatures); - std::string cpuFeaturesStr; - for (const auto &it : cpuFeatures) - { - if (!cpuFeaturesStr.empty()) - cpuFeaturesStr.push_back(' '); - cpuFeaturesStr.push_back(it.getValue() ? '+' : '-'); - cpuFeaturesStr += it.getKey(); - } - - Printf("LLVM CPU features: %s\n", cpuFeaturesStr.c_str()); -#endif - llvm::Module *module = mModule.get(); EngineBuilder engineBuilder(std::move(mModule)); engineBuilder.setErrorStr(&errorstring); @@ -532,6 +524,69 @@ void LLVMProgram::CreateEE() mEngine->finalizeObject(); } +std::string LLVMProgram::GenerateAssembly(std::string cpuName) +{ + using namespace llvm; + + std::string errorstring; + + llvm::Module *module = mModule.get(); + EngineBuilder engineBuilder(std::move(mModule)); + engineBuilder.setErrorStr(&errorstring); + engineBuilder.setOptLevel(CodeGenOpt::Aggressive); + engineBuilder.setEngineKind(EngineKind::JIT); + engineBuilder.setMCPU(cpuName); + machine = engineBuilder.selectTarget(); + if (!machine) + I_FatalError("Could not create LLVM target machine"); + + std::string targetTriple = machine->getTargetTriple().getTriple(); + + module->setTargetTriple(targetTriple); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); +#else + module->setDataLayout(machine->createDataLayout()); +#endif + + legacy::FunctionPassManager PerFunctionPasses(module); + legacy::PassManager PerModulePasses; + +#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) + PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); + PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); +#endif + + SmallString<16*1024> str; + raw_svector_ostream stream(str); + machine->addPassesToEmitFile(PerModulePasses, stream, TargetMachine::CGFT_AssemblyFile); + PerModulePasses.add(createPrintMIRPass(stream)); + + PassManagerBuilder passManagerBuilder; + passManagerBuilder.OptLevel = 3; + passManagerBuilder.SizeLevel = 0; + passManagerBuilder.Inliner = createFunctionInliningPass(); + passManagerBuilder.SLPVectorize = true; + passManagerBuilder.LoopVectorize = true; + passManagerBuilder.LoadCombine = true; + passManagerBuilder.populateModulePassManager(PerModulePasses); + passManagerBuilder.populateFunctionPassManager(PerFunctionPasses); + + // Run function passes: + PerFunctionPasses.doInitialization(); + for (llvm::Function &func : *module) + { + if (!func.isDeclaration()) + PerFunctionPasses.run(func); + } + PerFunctionPasses.doFinalization(); + + // Run module passes: + PerModulePasses.run(*module); + + return str.c_str(); +} + std::string LLVMProgram::DumpModule() { std::string str; From 08a90a13f4b8701f93698aea0174ed46e93ec971 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Wed, 26 Oct 2016 00:38:59 -0400 Subject: [PATCH 237/912] - Moved renderer menu to options menu. Removed duplicate swtruecolor entry in "Truecolor Options" --- wadsrc/static/language.enu | 2 +- wadsrc/static/menudef.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 3ac8d2ce88..ed17f587ac 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1656,6 +1656,7 @@ OPTMNU_NETWORK = "Network Options"; OPTMNU_SOUND = "Sound Options"; OPTMNU_DISPLAY = "Display Options"; OPTMNU_VIDEO = "Set video mode"; +OPTMNU_CHANGERENDER = "Change Rendering Output"; OPTMNU_DEFAULTS = "Reset to defaults"; OPTMNU_RESETTOSAVED = "Reset to last saved"; OPTMNU_CONSOLE = "Go to console"; @@ -2159,7 +2160,6 @@ VIDMNU_HIDPI = "Retina/HiDPI support"; VIDMNU_ASPECTRATIO = "Aspect ratio"; VIDMNU_FORCEASPECT = "Force aspect ratio"; VIDMNU_5X4ASPECTRATIO = "Enable 5:4 aspect ratio"; -VIDMNU_CHANGERENDER = "Change Rendering Output"; VIDMNU_ENTERTEXT = "Press ENTER to set mode"; VIDMNU_TESTTEXT1 = "T to test mode for 5 seconds"; VIDMNU_TESTTEXT2 = "Please wait 5 seconds..."; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 8c1bb54817..42fb35fef5 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -345,6 +345,7 @@ OptionMenu "OptionsMenu" Submenu "$OPTMNU_SOUND", "SoundOptions" Submenu "$OPTMNU_DISPLAY", "VideoOptions" Submenu "$OPTMNU_VIDEO", "VideoModeMenu" + Submenu "$OPTMNU_CHANGERENDER", "RendererMenu" StaticText " " SafeCommand "$OPTMNU_DEFAULTS", "reset2defaults" SafeCommand "$OPTMNU_RESETTOSAVED", "reset2saved" @@ -670,7 +671,7 @@ OptionMenu "TrueColorOptions" Title "$TCMNU_TITLE" Option "$TCMNU_MULTITHREADED", "r_multithreaded", "OnOff" StaticText " " - Option "$TCMNU_TRUECOLOR", "swtruecolor", "OnOff" + //Option "$TCMNU_TRUECOLOR", "swtruecolor", "OnOff" Option "$TCMNU_MINFILTER", "r_minfilter", "OnOff" Option "$TCMNU_MAGFILTER", "r_magfilter", "OnOff" Option "$TCMNU_MIPMAP", "r_mipmap", "OnOff" @@ -1825,7 +1826,6 @@ OptionMenu VideoModeMenu Option "$VIDMNU_ASPECTRATIO", "menu_screenratios", "Ratios" Option "$VIDMNU_FORCEASPECT", "vid_aspect", "ForceRatios" Option "$VIDMNU_5X4ASPECTRATIO", "vid_tft", "YesNo" - Submenu "$VIDMNU_CHANGERENDER", "RendererMenu" StaticText " " ScreenResolution "res_0" ScreenResolution "res_1" From 2838e1b5de8f96604e36c0e3d6c87afb63f4124a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 26 Oct 2016 07:21:19 +0200 Subject: [PATCH 238/912] Compile fix for macOS --- src/r_compiler/llvm_include.h | 4 ++++ src/r_compiler/llvmdrawers.cpp | 14 +++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h index c51143c585..75952d1f9d 100644 --- a/src/r_compiler/llvm_include.h +++ b/src/r_compiler/llvm_include.h @@ -55,6 +55,10 @@ #include #include +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) +#include +#endif + #ifdef __APPLE__ #pragma clang diagnostic pop #endif diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index ba1fb4efcc..2f8609f68c 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -474,7 +474,11 @@ void LLVMProgram::CreateEE() if (!machine) I_FatalError("Could not create LLVM target machine"); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + std::string targetTriple = machine->getTargetTriple(); +#else std::string targetTriple = machine->getTargetTriple().getTriple(); +#endif std::string cpuName = machine->getTargetCPU(); Printf("LLVM target triple: %s\n", targetTriple.c_str()); Printf("LLVM target CPU: %s\n", cpuName.c_str()); @@ -540,7 +544,11 @@ std::string LLVMProgram::GenerateAssembly(std::string cpuName) if (!machine) I_FatalError("Could not create LLVM target machine"); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + std::string targetTriple = machine->getTargetTriple(); +#else std::string targetTriple = machine->getTargetTriple().getTriple(); +#endif module->setTargetTriple(targetTriple); #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) @@ -558,9 +566,13 @@ std::string LLVMProgram::GenerateAssembly(std::string cpuName) #endif SmallString<16*1024> str; +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + raw_svector_ostream vecstream(str); + formatted_raw_ostream stream(vecstream); +#else raw_svector_ostream stream(str); +#endif machine->addPassesToEmitFile(PerModulePasses, stream, TargetMachine::CGFT_AssemblyFile); - PerModulePasses.add(createPrintMIRPass(stream)); PassManagerBuilder passManagerBuilder; passManagerBuilder.OptLevel = 3; From 767c3a2edd9693ea62940797fc0a584af25eb9bd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 28 Oct 2016 01:55:59 +0200 Subject: [PATCH 239/912] Fix dancing sprites due to texture coordinate calculations not taking pixel centers into account --- src/r_things.cpp | 67 +++++++++--------------------------------------- 1 file changed, 12 insertions(+), 55 deletions(-) diff --git a/src/r_things.cpp b/src/r_things.cpp index 95ffc3703a..2156266688 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -255,17 +255,15 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span, bool us { int pixelsize = r_swtruecolor ? 4 : 1; int inputpixelsize = (r_swtruecolor && !drawer_needs_pal_input) ? 4 : 1; - const fixed_t centeryfrac = FLOAT2FIXED(CenterY); - const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); while (span->Length != 0) { const int length = span->Length; const int top = span->TopOffset; // calculate unclipped screen coordinates for post - dc_yl = xs_RoundToInt(sprtopscreen + spryscale * top); - dc_yh = xs_RoundToInt(sprtopscreen + spryscale * (top + length)) - 1; - + dc_yl = (int)(sprtopscreen + spryscale * top); + dc_yh = (int)(sprtopscreen + spryscale * (top + length)) - 1; + if (sprflipvert) { swapvalues (dc_yl, dc_yh); @@ -282,49 +280,8 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span, bool us if (dc_yl <= dc_yh) { - if (sprflipvert) - { - dc_texturefrac = (dc_yl*dc_iscale) - (top << FRACBITS) - - FixedMul (centeryfrac, dc_iscale) - texturemid; - const fixed_t maxfrac = length << FRACBITS; - while (dc_texturefrac >= maxfrac) - { - if (++dc_yl > dc_yh) - goto nextpost; - dc_texturefrac += dc_iscale; - } - fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; - while (endfrac < 0) - { - if (--dc_yh < dc_yl) - goto nextpost; - endfrac -= dc_iscale; - } - } - else - { - dc_texturefrac = texturemid - (top << FRACBITS) - + (dc_yl*dc_iscale) - FixedMul (centeryfrac-FRACUNIT, dc_iscale); - while (dc_texturefrac < 0) - { - if (++dc_yl > dc_yh) - goto nextpost; - dc_texturefrac += dc_iscale; - } - fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; - const fixed_t maxfrac = length << FRACBITS; - if (dc_yh < mfloorclip[dc_x]-1 && endfrac < maxfrac - dc_iscale) - { - dc_yh++; - } - else while (endfrac >= maxfrac) - { - if (--dc_yh < dc_yl) - goto nextpost; - endfrac -= dc_iscale; - } - } - dc_source = column + top * inputpixelsize; + dc_texturefrac = FLOAT2FIXED((dc_yl + 0.5 - sprtopscreen) / spryscale); + dc_source = column; dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; if (useRt) @@ -332,7 +289,6 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span, bool us else colfunc (); } -nextpost: span++; } @@ -1006,21 +962,23 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor const double thingxscalemul = spriteScale.X / tex->Scale.X; tx -= ((renderflags & RF_XFLIP) ? (tex->GetWidth() - tex->LeftOffset - 1) : tex->LeftOffset) * thingxscalemul; - x1 = centerx + xs_RoundToInt(tx * xscale); + double dtx1 = tx * xscale; + x1 = centerx + xs_RoundToInt(dtx1); // off the right side? if (x1 >= WindowRight) return; tx += tex->GetWidth() * thingxscalemul; - x2 = centerx + xs_RoundToInt(tx * xscale); + double dtx2 = tx * xscale; + x2 = centerx + xs_RoundToInt(dtx2); // off the left side or too small? if ((x2 < WindowLeft || x2 <= x1)) return; xscale = spriteScale.X * xscale / tex->Scale.X; - iscale = (tex->GetWidth() << FRACBITS) / (x2 - x1); + iscale = (fixed_t)(tex->GetWidth() / (dtx2 - dtx1) * FRACUNIT); double yscale = spriteScale.Y / tex->Scale.Y; @@ -1047,9 +1005,8 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor vis->startfrac = 0; vis->xiscale = iscale; } - - if (vis->x1 > x1) - vis->startfrac += vis->xiscale * (vis->x1 - x1); + + vis->startfrac += (fixed_t)(vis->xiscale * (vis->x1 - centerx - dtx1 + 0.5 * thingxscalemul)); } else { From 54c94f6e58841717424cbe655cc2e7924be9a685 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 28 Oct 2016 02:41:14 +0200 Subject: [PATCH 240/912] Fix pinkie having the blues --- src/r_drawt_rgba.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index a3849b9af9..0a213418bd 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -258,14 +258,14 @@ class FillColumnHorizRGBACommand : public DrawerCommand int _yl; int _yh; int _count; - int _color; + uint32_t _color; public: FillColumnHorizRGBACommand() { _x = dc_x; _count = dc_count; - _color = dc_color; + _color = GPalette.BaseColors[dc_color].d | (uint32_t)0xff000000; _yl = dc_yl; _yh = dc_yh; } @@ -273,7 +273,7 @@ public: void Execute(DrawerThread *thread) override { int count = _count; - int color = _color; + uint32_t color = _color; uint32_t *dest; if (count <= 0) From ea9b45d988d3c35811c4a204154da380b17a9fb7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 29 Oct 2016 06:50:09 +0200 Subject: [PATCH 241/912] Change read access violations to be non-fatal --- src/r_thread.cpp | 35 +++++++++++++------ src/r_thread.h | 9 ++--- src/win32/i_crash.cpp | 80 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 107 insertions(+), 17 deletions(-) diff --git a/src/r_thread.cpp b/src/r_thread.cpp index 4f10bd8bb7..4d0a2100cb 100644 --- a/src/r_thread.cpp +++ b/src/r_thread.cpp @@ -127,10 +127,10 @@ void DrawerCommandQueue::Finish() } } }, - [](void *data) + [](void *data, const char *reason, bool fatal) { TryCatchData *d = (TryCatchData*)data; - ReportFatalError(d->queue->active_commands[d->command_index], true); + ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal); }); // Wait for everyone to finish: @@ -139,7 +139,14 @@ void DrawerCommandQueue::Finish() queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); if (!queue->thread_error.IsEmpty()) - I_FatalError("Fatal drawer error: %s", queue->thread_error.GetChars()); + { + static bool first = true; + if (queue->thread_error_fatal) + I_FatalError("%s", queue->thread_error.GetChars()); + else if (first) + Printf("%s\n", queue->thread_error.GetChars()); + first = false; + } // Clean up batch: @@ -212,10 +219,10 @@ void DrawerCommandQueue::StartThreads() } } }, - [](void *data) + [](void *data, const char *reason, bool fatal) { TryCatchData *d = (TryCatchData*)data; - ReportFatalError(d->queue->active_commands[d->command_index], true); + ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal); }); // Notify main thread that we finished: @@ -241,23 +248,31 @@ void DrawerCommandQueue::StopThreads() shutdown_flag = false; } -void DrawerCommandQueue::ReportFatalError(DrawerCommand *command, bool worker_thread) +void DrawerCommandQueue::ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal) { if (worker_thread) { std::unique_lock end_lock(Instance()->end_mutex); - if (Instance()->thread_error.IsEmpty()) - Instance()->thread_error = command->DebugInfo(); + if (Instance()->thread_error.IsEmpty() || (!Instance()->thread_error_fatal && fatal)) + { + Instance()->thread_error = reason + (FString)": " + command->DebugInfo(); + Instance()->thread_error_fatal = fatal; + } } else { - I_FatalError("Fatal drawer error: %s", command->DebugInfo().GetChars()); + static bool first = true; + if (fatal) + I_FatalError("%s: %s", reason, command->DebugInfo().GetChars()); + else if (first) + Printf("%s: %s\n", reason, command->DebugInfo().GetChars()); + first = false; } } #ifndef WIN32 -void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data)) +void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal)) { tryBlock(data); } diff --git a/src/r_thread.h b/src/r_thread.h index a2cc8b9732..e0aca2a016 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -109,7 +109,7 @@ public: virtual FString DebugInfo() = 0; }; -void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data)); +void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal)); // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue @@ -132,6 +132,7 @@ class DrawerCommandQueue std::condition_variable end_condition; size_t finished_threads = 0; FString thread_error; + bool thread_error_fatal = false; int threaded_render = 0; DrawerThread single_core_thread; @@ -143,7 +144,7 @@ class DrawerCommandQueue void Finish(); static DrawerCommandQueue *Instance(); - static void ReportFatalError(DrawerCommand *command, bool worker_thread); + static void ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal); DrawerCommandQueue(); ~DrawerCommandQueue(); @@ -166,10 +167,10 @@ public: T *c = (T*)data; c->Execute(&Instance()->single_core_thread); }, - [](void *data) + [](void *data, const char *reason, bool fatal) { T *c = (T*)data; - ReportFatalError(c, false); + ReportDrawerError(c, false, reason, fatal); }); } else diff --git a/src/win32/i_crash.cpp b/src/win32/i_crash.cpp index 373f902c55..c80d74f8a2 100644 --- a/src/win32/i_crash.cpp +++ b/src/win32/i_crash.cpp @@ -3407,23 +3407,97 @@ namespace bool __declspec(thread) DrawerExceptionSetJumpResult; CONTEXT __declspec(thread) DrawerExceptionSetJumpContext; PVOID __declspec(thread) DrawerExceptionHandlerHandle; + char __declspec(thread) *DrawerExceptionReason; + bool __declspec(thread) DrawerExceptionFatal; LONG WINAPI DrawerExceptionHandler(_EXCEPTION_POINTERS *exceptionInfo) { - //RtlRestoreContext(&DrawerExceptionSetJumpContext, exceptionInfo->ExceptionRecord); *exceptionInfo->ContextRecord = DrawerExceptionSetJumpContext; + + DrawerExceptionFatal = false; + switch (exceptionInfo->ExceptionRecord->ExceptionCode) + { + default: DrawerExceptionReason = "Unknown exception code"; break; + case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: DrawerExceptionReason = "Array bounds exceeded"; break; + case EXCEPTION_BREAKPOINT: DrawerExceptionReason = "Breakpoint"; break; + case EXCEPTION_DATATYPE_MISALIGNMENT: DrawerExceptionReason = "Datatype misalignment"; break; + case EXCEPTION_FLT_DENORMAL_OPERAND: DrawerExceptionReason = "Float denormal operand"; break; + case EXCEPTION_FLT_DIVIDE_BY_ZERO: DrawerExceptionReason = "Float divide by zero"; break; + case EXCEPTION_FLT_INEXACT_RESULT: DrawerExceptionReason = "Float inexact result"; break; + case EXCEPTION_FLT_INVALID_OPERATION: DrawerExceptionReason = "Float invalid operation"; break; + case EXCEPTION_FLT_OVERFLOW: DrawerExceptionReason = "Float overflow"; break; + case EXCEPTION_FLT_STACK_CHECK: DrawerExceptionReason = "Float stack check"; break; + case EXCEPTION_FLT_UNDERFLOW: DrawerExceptionReason = "Float underflow"; break; + case EXCEPTION_INT_DIVIDE_BY_ZERO: DrawerExceptionReason = "Int divide by zero"; break; + case EXCEPTION_INT_OVERFLOW: DrawerExceptionReason = "Int overflow"; break; + case EXCEPTION_INVALID_DISPOSITION: DrawerExceptionReason = "Invalid disposition"; break; + case EXCEPTION_NONCONTINUABLE_EXCEPTION: DrawerExceptionReason = "Noncontinuable exception"; break; + case EXCEPTION_PRIV_INSTRUCTION: DrawerExceptionReason = "Priv instruction"; break; + case EXCEPTION_SINGLE_STEP: DrawerExceptionReason = "Single step"; break; + case EXCEPTION_STACK_OVERFLOW: DrawerExceptionReason = "Stack overflow"; break; + + case EXCEPTION_ILLEGAL_INSTRUCTION: + DrawerExceptionReason = "Illegal instruction"; + DrawerExceptionFatal = true; + break; + + case EXCEPTION_ACCESS_VIOLATION: + if (exceptionInfo->ExceptionRecord->ExceptionInformation[0] == 0) + { + DrawerExceptionReason = "Read access violation"; + } + else if (exceptionInfo->ExceptionRecord->ExceptionInformation[0] == 1) + { + DrawerExceptionReason = "Write access violation"; + DrawerExceptionFatal = true; + } + else if (exceptionInfo->ExceptionRecord->ExceptionInformation[0] == 8) + { + DrawerExceptionReason = "User-mode data execution prevention (DEP) violation"; + DrawerExceptionFatal = true; + } + else + { + DrawerExceptionReason = "Unknown access violation"; + DrawerExceptionFatal = true; + } + break; + + case EXCEPTION_IN_PAGE_ERROR: + if (exceptionInfo->ExceptionRecord->ExceptionInformation[0] == 0) + { + DrawerExceptionReason = "In page read error"; + } + else if (exceptionInfo->ExceptionRecord->ExceptionInformation[0] == 1) + { + DrawerExceptionReason = "In page write error"; + DrawerExceptionFatal = true; + } + else if (exceptionInfo->ExceptionRecord->ExceptionInformation[0] == 8) + { + DrawerExceptionReason = "In page user-mode data execution prevention (DEP) error"; + DrawerExceptionFatal = true; + } + else + { + DrawerExceptionReason = "Unknown in page read error"; + DrawerExceptionFatal = true; + } + break; + } + return EXCEPTION_CONTINUE_EXECUTION; } } -void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data)) +void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal)) { DrawerExceptionSetJumpResult = false; RtlCaptureContext(&DrawerExceptionSetJumpContext); if (DrawerExceptionSetJumpResult) { RemoveVectoredExceptionHandler(DrawerExceptionHandlerHandle); - catchBlock(data); + catchBlock(data, DrawerExceptionReason, DrawerExceptionFatal); } else { From ea44a445afd06f4d9642767ef01368b69322cdf4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 29 Oct 2016 07:20:16 +0200 Subject: [PATCH 242/912] Improve drawer debug info details --- src/r_compiler/llvmdrawers.h | 4 ++-- src/r_draw_rgba.cpp | 26 +++++++++++++------------- src/r_drawt_rgba.cpp | 8 ++++---- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 64d73eeee3..e7386678ed 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -45,7 +45,7 @@ struct DrawWallArgs FString ToString() { FString info; - info.Format("dest_y = %i, count = %i, flags = %i", dest_y, count, flags); + info.Format("dest_y = %i, count = %i, flags = %i, texturefrac[0] = %u, textureheight[0] = %u", dest_y, count, flags, texturefrac[0], textureheight[0]); return info; } }; @@ -128,7 +128,7 @@ struct DrawColumnArgs FString ToString() { FString info; - info.Format("dest_y = %i, count = %i, flags = %i", dest_y, count, flags); + info.Format("dest_y = %i, count = %i, flags = %i, iscale = %i (%f), texturefrac = %i (%f)", dest_y, count, flags, iscale, ((fixed_t)iscale) / (float)FRACUNIT, texturefrac, ((fixed_t)texturefrac) / (float)FRACUNIT); return info; } }; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2b9eb86e75..1b46489a5e 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -107,7 +107,7 @@ public: FString DebugInfo() override { - return "DrawSpanLLVMCommand\n" + args.ToString(); + return "DrawSpan\n" + args.ToString(); } protected: @@ -258,7 +258,7 @@ public: FString DebugInfo() override { - return "DrawWall4LLVMCommand\n" + args.ToString(); + return "DrawWall4\n" + args.ToString(); } }; @@ -319,7 +319,7 @@ public: FString DebugInfo() override { - return "DrawWall1LLVMCommand\n" + args.ToString(); + return "DrawWall1\n" + args.ToString(); } }; @@ -340,7 +340,7 @@ protected: FString DebugInfo() override { - return "DrawColumnLLVMCommand\n" + args.ToString(); + return "DrawColumn\n" + args.ToString(); } public: @@ -423,7 +423,7 @@ public: FString DebugInfo() override { - return "DrawSkyLLVMCommand\n" + args.ToString(); + return "DrawSky\n" + args.ToString(); } }; @@ -587,7 +587,7 @@ public: FString DebugInfo() override { - return "DrawFuzzColumnRGBACommand"; + return "DrawFuzzColumn"; } }; @@ -626,7 +626,7 @@ public: FString DebugInfo() override { - return "FillSpanRGBACommand"; + return "FillSpan"; } }; @@ -750,7 +750,7 @@ public: FString DebugInfo() override { - return "DrawSlabRGBACommand"; + return "DrawSlab"; } }; @@ -829,7 +829,7 @@ public: FString DebugInfo() override { - return "DrawFogBoundaryLineRGBACommand"; + return "DrawFogBoundaryLine"; } }; @@ -983,7 +983,7 @@ public: FString DebugInfo() override { - return "DrawTiltedSpanRGBACommand"; + return "DrawTiltedSpan"; } }; @@ -1027,7 +1027,7 @@ public: FString DebugInfo() override { - return "DrawColoredSpanRGBACommand"; + return "DrawColoredSpan"; } }; @@ -1099,7 +1099,7 @@ public: FString DebugInfo() override { - return "FillTransColumnRGBACommand"; + return "FillTransColumn"; } }; @@ -1279,7 +1279,7 @@ public: FString DebugInfo() override { - return "DrawTrianglesCommand"; + return "DrawTriangles"; } private: diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 0a213418bd..2963d2fc17 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -109,7 +109,7 @@ public: FString DebugInfo() override { - return "DrawColumnRt1LLVMCommand\n" + args.ToString(); + return "DrawColumnRt\n" + args.ToString(); } }; @@ -168,7 +168,7 @@ public: FString DebugInfo() override { - return "RtInitColsRGBACommand"; + return "RtInitCols"; } }; @@ -248,7 +248,7 @@ public: FString DebugInfo() override { - return "DrawColumnHorizRGBACommand"; + return "DrawColumnHoriz"; } }; @@ -298,7 +298,7 @@ public: FString DebugInfo() override { - return "FillColumnHorizRGBACommand"; + return "FillColumnHoriz"; } }; From 09961abeea0871d97dbcae067c2b58167fa86a4b Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 29 Oct 2016 21:44:30 -0400 Subject: [PATCH 243/912] - Version 0.1.1 tag --- src/version.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/version.h b/src/version.h index 837ef4eeee..3ff0d7fa98 100644 --- a/src/version.h +++ b/src/version.h @@ -41,17 +41,17 @@ const char *GetVersionString(); /** Lots of different version numbers **/ -#define VERSIONSTR "0.2pre" +#define VERSIONSTR "0.1.1" // The version as seen in the Windows resource -#define RC_FILEVERSION 0,1,9999,0 -#define RC_PRODUCTVERSION 0,1,9999,0 -#define RC_PRODUCTVERSION2 "0.2pre" +#define RC_FILEVERSION 0,1,1,0 +#define RC_PRODUCTVERSION 0,1,1,0 +#define RC_PRODUCTVERSION2 "0.1.1" // Version identifier for network games. // Bump it every time you do a release unless you're certain you // didn't change anything that will affect sync. -#define NETGAMEVERSION 233 +#define NETGAMEVERSION 234 // Version stored in the ini's [LastRun] section. // Bump it if you made some configuration change that you want to From e2fa16c99e7ecf4f6d95ce1584edc8f1cc1fa4ce Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 29 Oct 2016 21:51:43 -0400 Subject: [PATCH 244/912] Actually - changed our minds - this is going to be 1.0. :) --- src/version.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/version.h b/src/version.h index 3ff0d7fa98..8d88b6ed11 100644 --- a/src/version.h +++ b/src/version.h @@ -41,12 +41,12 @@ const char *GetVersionString(); /** Lots of different version numbers **/ -#define VERSIONSTR "0.1.1" +#define VERSIONSTR "1.0" // The version as seen in the Windows resource -#define RC_FILEVERSION 0,1,1,0 -#define RC_PRODUCTVERSION 0,1,1,0 -#define RC_PRODUCTVERSION2 "0.1.1" +#define RC_FILEVERSION 1,0,0,0 +#define RC_PRODUCTVERSION 1,0,0,0 +#define RC_PRODUCTVERSION2 "1.0" // Version identifier for network games. // Bump it every time you do a release unless you're certain you From 6ea0baac3aa00df85360d95cf8718f1431bfc4cf Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 29 Oct 2016 22:49:34 -0400 Subject: [PATCH 245/912] - Next "pre" tag. --- src/version.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/version.h b/src/version.h index 8d88b6ed11..37f3b1dd41 100644 --- a/src/version.h +++ b/src/version.h @@ -41,12 +41,12 @@ const char *GetVersionString(); /** Lots of different version numbers **/ -#define VERSIONSTR "1.0" +#define VERSIONSTR "1.1pre" // The version as seen in the Windows resource -#define RC_FILEVERSION 1,0,0,0 -#define RC_PRODUCTVERSION 1,0,0,0 -#define RC_PRODUCTVERSION2 "1.0" +#define RC_FILEVERSION 1,0,9999,0 +#define RC_PRODUCTVERSION 1,0,9999,0 +#define RC_PRODUCTVERSION2 "1.1pre" // Version identifier for network games. // Bump it every time you do a release unless you're certain you From a7d06ddd77586a6b3dd9434245aefdae4ae73d67 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 30 Oct 2016 08:18:55 +0100 Subject: [PATCH 246/912] Add texturing to triangle drawer --- src/r_draw.h | 2 +- src/r_draw_rgba.cpp | 71 ++++++++++++++++------------ src/r_plane.cpp | 111 ++++++++++++++++++-------------------------- 3 files changed, 89 insertions(+), 95 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 601962809b..2fa662a95e 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -403,7 +403,7 @@ struct TriVertex }; class VSMatrix; -void R_DrawTriangles(const VSMatrix &objectToWorld, const TriVertex *vertices, int count, int clipleft, int clipright, const short *cliptop, const short *clipbottom); +void R_DrawTriangles(const VSMatrix &objectToWorld, const TriVertex *vertices, int count, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture); extern bool r_swtruecolor; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 1b46489a5e..c65718de22 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1260,8 +1260,8 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) class DrawTrianglesCommand : public DrawerCommand { public: - DrawTrianglesCommand(const VSMatrix &objectToWorld, const TriVertex *vertices, int count, int clipleft, int clipright, const short *clipdata) - : objectToWorld(objectToWorld), vertices(vertices), count(count), clipleft(clipleft), clipright(clipright), clipdata(clipdata) + DrawTrianglesCommand(const VSMatrix &transform, const TriVertex *vertices, int count, int clipleft, int clipright, const short *clipdata, const uint32_t *texturePixels, int textureWidth, int textureHeight) + : transform(transform), vertices(vertices), count(count), clipleft(clipleft), clipright(clipright), clipdata(clipdata), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight) { } @@ -1274,7 +1274,7 @@ public: thread->triangle_clip_bottom[clipleft + i] = clipdata[cliplength + i]; } - draw_triangles(objectToWorld, vertices, count, clipleft, clipright, thread->triangle_clip_top, thread->triangle_clip_bottom, thread); + draw_triangles(transform, vertices, count, clipleft, clipright, thread->triangle_clip_top, thread->triangle_clip_bottom, thread); } FString DebugInfo() override @@ -1337,7 +1337,7 @@ private: int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax - 1); if (minx >= maxx || miny >= maxy) return; @@ -1407,9 +1407,9 @@ private: // Check if block needs clipping int clipcount = 0; - for (int ix = 0; ix < q; ix++) + for (int ix = x; ix < x + q; ix++) { - clipcount += (cliptop[x + ix] > y) || (clipbottom[x + ix] < y + q - 1); + clipcount += (clipleft > ix) || (clipright < ix) || (cliptop[ix] > y) || (clipbottom[ix] <= y + q - 1); } // Calculate varying variables for affine block @@ -1451,11 +1451,15 @@ private: { for (int ix = x; ix < x + q; ix++) { - uint32_t red = (uint32_t)clamp(varying[0] * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t green = (uint32_t)clamp(varying[1] * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t blue = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); + uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); - buffer[ix] = 0xff000000 | (red << 16) | (green << 8) | blue; + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + + buffer[ix] = texturePixels[uvoffset]; for (int i = 0; i < TriVertex::NumVarying; i++) varying[i] += varyingStep[i]; @@ -1488,15 +1492,19 @@ private: { for (int ix = x; ix < x + q; ix++) { - bool visible = (cliptop[ix] <= y + iy) && (clipbottom[ix] >= y + iy); + bool visible = ix >= clipleft && ix <= clipright && (cliptop[ix] <= y + iy) && (clipbottom[ix] > y + iy); if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) { - uint32_t red = (uint32_t)clamp(varying[0] * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t green = (uint32_t)clamp(varying[1] * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t blue = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); + uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); - buffer[ix] = 0xff000000 | (red << 16) | (green << 8) | blue; + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + + buffer[ix] = texturePixels[uvoffset]; } for (int i = 0; i < TriVertex::NumVarying; i++) @@ -1523,13 +1531,15 @@ private: bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2) { - if (clipdistance1 < 0.0f && clipdistance2 < 0.0f) + float d1 = clipdistance1 * (1.0f - t1) + clipdistance2 * t1; + float d2 = clipdistance1 * (1.0f - t2) + clipdistance2 * t2; + if (d1 < 0.0f && d2 < 0.0f) return true; - if (clipdistance1 < 0.0f) + if (d1 < 0.0f) t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1); - if (clipdistance2 < 0.0f) + if (d2 < 0.0f) t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2); return false; @@ -1580,7 +1590,7 @@ private: } } - void draw_triangles(const VSMatrix &objectToWorld, const TriVertex *vinput, int vcount, int clipleft, int clipright, const short *cliptop, const short *clipbottom, DrawerThread *thread) + void draw_triangles(const VSMatrix &transform, const TriVertex *vinput, int vcount, int clipleft, int clipright, const short *cliptop, const short *clipbottom, DrawerThread *thread) { for (int i = 0; i < vcount / 3; i++) { @@ -1592,8 +1602,8 @@ private: auto &v = vert[j]; v = *(vinput++); - // Apply object to world transform: - const float *matrix = objectToWorld.get(); + // Apply transform to get world coordinates: + const float *matrix = transform.get(); float vx = matrix[0 * 4 + 0] * v.x + matrix[1 * 4 + 0] * v.y + matrix[2 * 4 + 0] * v.z + matrix[3 * 4 + 0] * v.w; float vy = matrix[0 * 4 + 1] * v.x + matrix[1 * 4 + 1] * v.y + matrix[2 * 4 + 1] * v.z + matrix[3 * 4 + 1] * v.w; float vz = matrix[0 * 4 + 2] * v.x + matrix[1 * 4 + 2] * v.y + matrix[2 * 4 + 2] * v.z + matrix[3 * 4 + 2] * v.w; @@ -1611,9 +1621,9 @@ private: double tr_z = v.z - ViewPos.Z; double tx = tr_x * ViewSin - tr_y * ViewCos; double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; - v.x = (float)tx; - v.y = (float)tr_z; - v.z = (float)(-tz * (farp + nearp) / (nearp - farp) + (2.0f * farp * nearp) / (nearp - farp)); + v.x = (float)tx * 0.5f; + v.y = (float)tr_z * 0.5f; + v.z = (float)((-tz * (farp + nearp) / (nearp - farp) + (2.0f * farp * nearp) / (nearp - farp))); v.w = (float)tz; } @@ -1636,8 +1646,8 @@ private: v.z *= v.w; // Apply viewport scale to get screen coordinates: - v.x = (float)(CenterX + v.x * CenterX); - v.y = (float)(CenterY - v.y * InvZtoScale); + v.x = (float)(CenterX + v.x * 2.0f * CenterX); + v.y = (float)(CenterY - v.y * 2.0f * InvZtoScale); } // Draw screen triangles @@ -1659,15 +1669,18 @@ private: } } - VSMatrix objectToWorld; + VSMatrix transform; const TriVertex *vertices; int count; int clipleft; int clipright; const short *clipdata; + const uint32_t *texturePixels; + int textureWidth; + int textureHeight; }; -void R_DrawTriangles(const VSMatrix &objectToWorld, const TriVertex *vertices, int count, int clipleft, int clipright, const short *cliptop, const short *clipbottom) +void R_DrawTriangles(const VSMatrix &transform, const TriVertex *vertices, int count, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture) { if (clipright < clipleft || clipleft < 0 || clipright > MAXWIDTH) return; @@ -1687,7 +1700,7 @@ void R_DrawTriangles(const VSMatrix &objectToWorld, const TriVertex *vertices, i for (int i = 0; i < cliplength; i++) clipdata[cliplength + i] = clipbottom[clipleft + i]; - DrawerCommandQueue::QueueCommand(objectToWorld, vertices, count, clipleft, clipright, clipdata); + DrawerCommandQueue::QueueCommand(transform, vertices, count, clipleft, clipright, clipdata, texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight()); } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 3ba6feaf7d..6c1cecdb58 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -998,85 +998,66 @@ static void R_DrawCubeSky(visplane_t *pl) static TriVertex cube[6 * 6] = { - { -1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + // Top + { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { 1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.1f, 1.0f }, - { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { -1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { -1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.1f, 1.0f }, + { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.1f, 1.0f }, + { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + // Bottom + { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 0.9f, 1.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f, 1.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f, 1.0f }, + { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.9f, 1.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 0.9f, 1.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + // Front + { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, + { 1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { -1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { -1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { -1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + // Back + { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, - { -1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, + { -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, + { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + // Right + { 1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { 1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, - { -1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, + { 1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { -1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, + // Left + { -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, + { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { -1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - - { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { -1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { -1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - - { -1.0f, -1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 0.0f, 0.0f } + { -1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f } }; - static bool first_time = true; - if (first_time) - { - for (int i = 0; i < 6; i++) - { - cube[i * 6 + 0].varying[0] = 1.0f; - cube[i * 6 + 1].varying[1] = 1.0f; - cube[i * 6 + 2].varying[2] = 1.0f; - cube[i * 6 + 3].varying[2] = 1.0f; - cube[i * 6 + 4].varying[0] = 1.0f; - cube[i * 6 + 4].varying[1] = 1.0f; - cube[i * 6 + 4].varying[2] = 1.0f; - cube[i * 6 + 5].varying[0] = 1.0f; - } - first_time = false; - } + VSMatrix transform(0); + transform.translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); + transform.scale(1000.0f, 1000.0f, 1000.0f); - //static float angle = 0.0f; - //angle = fmod(angle + 0.5f, 360.0f); - VSMatrix objectToWorld(0); - objectToWorld.translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); - //objectToWorld.rotate(angle, 0.57735f, 0.57735f, 0.57735f); - objectToWorld.scale(100.0f, 100.0f, 100.0f); - - R_DrawTriangles(objectToWorld, cube, 6 * 6, x1, x2 - 1, uwal, dwal); + R_DrawTriangles(transform, cube, 6 * 6, x1, x2 - 1, uwal, dwal, frontskytex); } static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) From 4c5723c10a391edb1bfad512b64f094cecf906cd Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 31 Oct 2016 00:29:15 -0400 Subject: [PATCH 247/912] - Implemented "mpmap", "mpopen", and "mprecordmap". All 3 commands do exactly as their non-"mp" counterparts do, except they turn on multiplayer (botmode) emulation before doing so. --- src/doomstat.h | 3 ++ src/g_game.cpp | 1 + src/g_level.cpp | 116 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 119 insertions(+), 1 deletion(-) diff --git a/src/doomstat.h b/src/doomstat.h index bbb323c7e3..70869b5a2c 100644 --- a/src/doomstat.h +++ b/src/doomstat.h @@ -71,6 +71,9 @@ extern bool netgame; // Bot game? Like netgame, but doesn't involve network communication. extern bool multiplayer; +// [SP] MPMap implementation - invokes fake multiplayer without bots +extern bool multiplayernext; + // Flag: true only if started as net deathmatch. EXTERN_CVAR (Int, deathmatch) diff --git a/src/g_game.cpp b/src/g_game.cpp index b0c8775fc5..5d7b1c1535 100644 --- a/src/g_game.cpp +++ b/src/g_game.cpp @@ -164,6 +164,7 @@ bool viewactive; bool netgame; // only true if packets are broadcast bool multiplayer; +bool multiplayernext; // [SP] MPMap implementation player_t players[MAXPLAYERS]; bool playeringame[MAXPLAYERS]; diff --git a/src/g_level.cpp b/src/g_level.cpp index 2a1e0e70ca..4b05188f4b 100644 --- a/src/g_level.cpp +++ b/src/g_level.cpp @@ -268,6 +268,119 @@ CCMD (open) } } +//========================================================================== +// +// +//========================================================================== + +CCMD (mpmap) +{ + if (netgame) + { + Printf ("Use " TEXTCOLOR_BOLD "changemap" TEXTCOLOR_NORMAL " instead. " TEXTCOLOR_BOLD "Map" + TEXTCOLOR_NORMAL " is for single-player only.\n"); + return; + } + if (argv.argc() > 1) + { + try + { + if (!P_CheckMapData(argv[1])) + { + Printf ("No map %s\n", argv[1]); + } + else + { + multiplayernext = true; + G_DeferedInitNew (argv[1]); + } + } + catch(CRecoverableError &error) + { + if (error.GetMessage()) + Printf("%s", error.GetMessage()); + } + } + else + { + Printf ("Usage: map \n"); + } +} + +//========================================================================== +// +// +//========================================================================== + +CCMD(mprecordmap) +{ + if (netgame) + { + Printf("You cannot record a new game while in a netgame."); + return; + } + if (argv.argc() > 2) + { + try + { + if (!P_CheckMapData(argv[2])) + { + Printf("No map %s\n", argv[2]); + } + else + { + multiplayernext = true; + G_DeferedInitNew(argv[2]); + gameaction = ga_recordgame; + newdemoname = argv[1]; + newdemomap = argv[2]; + } + } + catch (CRecoverableError &error) + { + if (error.GetMessage()) + Printf("%s", error.GetMessage()); + } + } + else + { + Printf("Usage: recordmap \n"); + } +} + +//========================================================================== +// +// +//========================================================================== + +CCMD (mpopen) +{ + if (netgame) + { + Printf ("You cannot use open in multiplayer games.\n"); + return; + } + if (argv.argc() > 1) + { + d_mapname = "file:"; + d_mapname += argv[1]; + if (!P_CheckMapData(d_mapname)) + { + Printf ("No map %s\n", d_mapname.GetChars()); + } + else + { + gameaction = ga_newgame2; + d_skill = -1; + multiplayernext = true; + } + } + else + { + Printf ("Usage: open \n"); + } +} + //========================================================================== // @@ -293,7 +406,8 @@ void G_NewInit () G_ClearSnapshots (); ST_SetNeedRefresh(); netgame = false; - multiplayer = false; + multiplayer = multiplayernext; + multiplayernext = false; if (demoplayback) { C_RestoreCVars (); From 49cccfae4290a51bf5e84ef2871337ba53b57cbe Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 31 Oct 2016 00:42:02 -0400 Subject: [PATCH 248/912] - Added initial state for "multiplayernext" to prevent possible initialization errors. --- src/g_game.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/g_game.cpp b/src/g_game.cpp index 5d7b1c1535..2c827e6391 100644 --- a/src/g_game.cpp +++ b/src/g_game.cpp @@ -164,7 +164,7 @@ bool viewactive; bool netgame; // only true if packets are broadcast bool multiplayer; -bool multiplayernext; // [SP] MPMap implementation +bool multiplayernext = false; // [SP] MPMap implementation player_t players[MAXPLAYERS]; bool playeringame[MAXPLAYERS]; From bc319dce1d284fb0e374c30d2d6b58aae747d1a8 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 31 Oct 2016 00:29:15 -0400 Subject: [PATCH 249/912] - Corrected syntax error messages for mpmap, mpopen, mprecordmap - Added initial state for "multiplayernext" to prevent possible initialization errors. - Implemented "mpmap", "mpopen", and "mprecordmap". All 3 commands do exactly as their non-"mp" counterparts do, except they turn on multiplayer (botmode) emulation before doing so. --- src/doomstat.h | 3 ++ src/g_game.cpp | 1 + src/g_level.cpp | 116 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 119 insertions(+), 1 deletion(-) diff --git a/src/doomstat.h b/src/doomstat.h index bbb323c7e3..70869b5a2c 100644 --- a/src/doomstat.h +++ b/src/doomstat.h @@ -71,6 +71,9 @@ extern bool netgame; // Bot game? Like netgame, but doesn't involve network communication. extern bool multiplayer; +// [SP] MPMap implementation - invokes fake multiplayer without bots +extern bool multiplayernext; + // Flag: true only if started as net deathmatch. EXTERN_CVAR (Int, deathmatch) diff --git a/src/g_game.cpp b/src/g_game.cpp index b0c8775fc5..2c827e6391 100644 --- a/src/g_game.cpp +++ b/src/g_game.cpp @@ -164,6 +164,7 @@ bool viewactive; bool netgame; // only true if packets are broadcast bool multiplayer; +bool multiplayernext = false; // [SP] MPMap implementation player_t players[MAXPLAYERS]; bool playeringame[MAXPLAYERS]; diff --git a/src/g_level.cpp b/src/g_level.cpp index d3a8c4015a..05a85ba63b 100644 --- a/src/g_level.cpp +++ b/src/g_level.cpp @@ -268,6 +268,119 @@ CCMD (open) } } +//========================================================================== +// +// +//========================================================================== + +CCMD (mpmap) +{ + if (netgame) + { + Printf ("Use " TEXTCOLOR_BOLD "changemap" TEXTCOLOR_NORMAL " instead. " TEXTCOLOR_BOLD "Map" + TEXTCOLOR_NORMAL " is for single-player only.\n"); + return; + } + if (argv.argc() > 1) + { + try + { + if (!P_CheckMapData(argv[1])) + { + Printf ("No map %s\n", argv[1]); + } + else + { + multiplayernext = true; + G_DeferedInitNew (argv[1]); + } + } + catch(CRecoverableError &error) + { + if (error.GetMessage()) + Printf("%s", error.GetMessage()); + } + } + else + { + Printf ("Usage: mpmap \n"); + } +} + +//========================================================================== +// +// +//========================================================================== + +CCMD(mprecordmap) +{ + if (netgame) + { + Printf("You cannot record a new game while in a netgame."); + return; + } + if (argv.argc() > 2) + { + try + { + if (!P_CheckMapData(argv[2])) + { + Printf("No map %s\n", argv[2]); + } + else + { + multiplayernext = true; + G_DeferedInitNew(argv[2]); + gameaction = ga_recordgame; + newdemoname = argv[1]; + newdemomap = argv[2]; + } + } + catch (CRecoverableError &error) + { + if (error.GetMessage()) + Printf("%s", error.GetMessage()); + } + } + else + { + Printf("Usage: mprecordmap \n"); + } +} + +//========================================================================== +// +// +//========================================================================== + +CCMD (mpopen) +{ + if (netgame) + { + Printf ("You cannot use open in multiplayer games.\n"); + return; + } + if (argv.argc() > 1) + { + d_mapname = "file:"; + d_mapname += argv[1]; + if (!P_CheckMapData(d_mapname)) + { + Printf ("No map %s\n", d_mapname.GetChars()); + } + else + { + gameaction = ga_newgame2; + d_skill = -1; + multiplayernext = true; + } + } + else + { + Printf ("Usage: mpopen \n"); + } +} + //========================================================================== // @@ -293,7 +406,8 @@ void G_NewInit () G_ClearSnapshots (); ST_SetNeedRefresh(); netgame = false; - multiplayer = false; + multiplayer = multiplayernext; + multiplayernext = false; if (demoplayback) { C_RestoreCVars (); From a1ba371eede7d365ae5139866f4685cd8faa86ba Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 31 Oct 2016 06:09:46 +0100 Subject: [PATCH 250/912] Added triangle drawer to the software renderer --- src/CMakeLists.txt | 1 + src/r_triangle.cpp | 715 +++++++++++++++++++++++++++++++++++++++++++++ src/r_triangle.h | 96 ++++++ 3 files changed, 812 insertions(+) create mode 100644 src/r_triangle.cpp create mode 100644 src/r_triangle.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 09e2201434..bb00af82d3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -936,6 +936,7 @@ set( FASTMATH_PCH_SOURCES r_segs.cpp r_sky.cpp r_things.cpp + r_triangle.cpp s_advsound.cpp s_environment.cpp s_playlist.cpp diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp new file mode 100644 index 0000000000..429f4a62ab --- /dev/null +++ b/src/r_triangle.cpp @@ -0,0 +1,715 @@ +/* +** Triangle drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_triangle.h" + +void TriangleDrawer::draw(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture) +{ + draw_any(objectToWorld, vinput, vcount, ccw, clipleft, clipright, cliptop, clipbottom, texture, 0, &ScreenTriangleDrawer::draw); +} + +void TriangleDrawer::fill(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor) +{ + draw_any(objectToWorld, vinput, vcount, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, solidcolor, &ScreenTriangleDrawer::fill); +} + +void TriangleDrawer::draw_any(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture, int solidcolor, void(*drawfunc)(const ScreenTriangleDrawerArgs *)) +{ + for (int i = 0; i < vcount / 3; i++) + { + TriVertex vert[3]; + + // Vertex shader stuff: + for (int j = 0; j < 3; j++) + { + auto &v = vert[j]; + v = *(vinput++); + + // Apply transform to get world coordinates: + v = objectToWorld * v; + + // The software renderer world to clip transform: + double nearp = 5.0f; + double farp = 65536.f; + double tr_x = v.x - ViewPos.X; + double tr_y = v.y - ViewPos.Y; + double tr_z = v.z - ViewPos.Z; + double tx = tr_x * ViewSin - tr_y * ViewCos; + double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; + v.x = (float)tx * 0.5f; + v.y = (float)tr_z * 0.5f; + v.z = (float)((-tz * (farp + nearp) / (nearp - farp) + (2.0f * farp * nearp) / (nearp - farp))); + v.w = (float)tz; + } + + // Cull, clip and generate additional vertices as needed + TriVertex clippedvert[6]; + int numclipvert = 0; + clipedge(vert[0], vert[1], clippedvert, numclipvert); + clipedge(vert[1], vert[2], clippedvert, numclipvert); + clipedge(vert[2], vert[0], clippedvert, numclipvert); + + // Map to 2D viewport: + for (int j = 0; j < numclipvert; j++) + { + auto &v = clippedvert[j]; + + // Calculate normalized device coordinates: + v.w = 1.0f / v.w; + v.x *= v.w; + v.y *= v.w; + v.z *= v.w; + + // Apply viewport scale to get screen coordinates: + v.x = (float)(CenterX + v.x * 2.0f * CenterX); + v.y = (float)(CenterY - v.y * 2.0f * InvZtoScale); + } + + // Draw screen triangles + + ScreenTriangleDrawerArgs args; + args.dest = dc_destorg; + args.pitch = dc_pitch; + args.clipleft = clipleft; + args.clipright = clipright; + args.cliptop = cliptop; + args.clipbottom = clipbottom; + if (texture) + { + args.textureWidth = texture->GetWidth(); + args.textureHeight = texture->GetHeight(); + args.texturePixels = texture->GetPixels(); + } + else + { + args.textureWidth = 0; + args.textureHeight = 0; + args.texturePixels = nullptr; + } + args.solidcolor = solidcolor; + + if (ccw) + { + for (int i = numclipvert; i > 1; i--) + { + args.v1 = &clippedvert[numclipvert - 1]; + args.v2 = &clippedvert[i - 1]; + args.v3 = &clippedvert[i - 2]; + drawfunc(&args); + } + } + else + { + for (int i = 2; i < numclipvert; i++) + { + args.v1 = &clippedvert[0]; + args.v2 = &clippedvert[i - 1]; + args.v3 = &clippedvert[i]; + drawfunc(&args); + } + } + } +} + +bool TriangleDrawer::cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2) +{ + float d1 = clipdistance1 * (1.0f - t1) + clipdistance2 * t1; + float d2 = clipdistance1 * (1.0f - t2) + clipdistance2 * t2; + if (d1 < 0.0f && d2 < 0.0f) + return true; + + if (d1 < 0.0f) + t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1); + + if (d2 < 0.0f) + t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2); + + return false; +} + +void TriangleDrawer::clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert) +{ + // Clip and cull so that the following is true for all vertices: + // -v.w <= v.x <= v.w + // -v.w <= v.y <= v.w + // -v.w <= v.z <= v.w + + float t1 = 0.0f, t2 = 1.0f; + bool culled = + cullhalfspace(v1.x + v1.w, v2.x + v2.w, t1, t2) || + cullhalfspace(v1.w - v1.x, v2.w - v2.x, t1, t2) || + cullhalfspace(v1.y + v1.w, v2.y + v2.w, t1, t2) || + cullhalfspace(v1.w - v1.y, v2.w - v2.y, t1, t2) || + cullhalfspace(v1.z + v1.w, v2.z + v2.w, t1, t2) || + cullhalfspace(v1.w - v1.z, v2.w - v2.z, t1, t2); + if (culled) + return; + + if (t1 == 0.0f) + { + clippedvert[numclipvert++] = v1; + } + else + { + auto &v = clippedvert[numclipvert++]; + v.x = v1.x * (1.0f - t1) + v2.x * t1; + v.y = v1.y * (1.0f - t1) + v2.y * t1; + v.z = v1.z * (1.0f - t1) + v2.z * t1; + v.w = v1.w * (1.0f - t1) + v2.w * t1; + for (int i = 0; i < TriVertex::NumVarying; i++) + v.varying[i] = v1.varying[i] * (1.0f - t1) + v2.varying[i] * t1; + } + + if (t2 != 1.0f) + { + auto &v = clippedvert[numclipvert++]; + v.x = v1.x * (1.0f - t2) + v2.x * t2; + v.y = v1.y * (1.0f - t2) + v2.y * t2; + v.z = v1.z * (1.0f - t2) + v2.z * t2; + v.w = v1.w * (1.0f - t2) + v2.w * t2; + for (int i = 0; i < TriVertex::NumVarying; i++) + v.varying[i] = v1.varying[i] * (1.0f - t2) + v2.varying[i] * t2; + } +} + +///////////////////////////////////////////////////////////////////////////// + +void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args) +{ + uint8_t *dest = args->dest; + int pitch = args->pitch; + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipleft = args->clipleft; + int clipright = args->clipright; + const short *cliptop = args->cliptop; + const short *clipbottom = args->clipbottom; + const uint8_t *texturePixels = args->texturePixels; + int textureWidth = args->textureWidth; + int textureHeight = args->textureHeight; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int clipymin = cliptop[clipleft]; + int clipymax = clipbottom[clipleft]; + for (int i = clipleft + 1; i <= clipright; i++) + { + clipymin = MIN(clipymin, (int)cliptop[i]); + clipymax = MAX(clipymax, (int)clipbottom[i]); + } + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax - 1); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + dest += miny * pitch; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // Gradients + float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); + float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); + } + + // Loop through blocks + for (int y = miny; y < maxy; y += q) + { + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Check if block needs clipping + int clipcount = 0; + for (int ix = x; ix < x + q; ix++) + { + clipcount += (clipleft > ix) || (clipright < ix) || (cliptop[ix] > y) || (clipbottom[ix] <= y + q - 1); + } + + // Calculate varying variables for affine block + float offx0 = (x - minx) + 0.5f; + float offy0 = (y - miny) + 0.5f; + float offx1 = offx0 + q; + float offy1 = offy0 + q; + float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); + float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); + float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); + float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); + float varyingTL[TriVertex::NumVarying]; + float varyingTR[TriVertex::NumVarying]; + float varyingBL[TriVertex::NumVarying]; + float varyingBR[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; + varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; + varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); + varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); + } + + uint8_t *buffer = dest; + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) + { + for (int iy = 0; iy < q; iy++) + { + float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varying[i] = varyingTL[i] + varyingBL[i] * iy; + varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + } + + for (int ix = x; ix < x + q; ix++) + { + uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); + uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + + buffer[ix] = texturePixels[uvoffset]; + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + } + + buffer += pitch; + } + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varying[i] = varyingTL[i] + varyingBL[i] * iy; + varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + } + + for (int ix = x; ix < x + q; ix++) + { + bool visible = ix >= clipleft && ix <= clipright && (cliptop[ix] <= y + iy) && (clipbottom[ix] > y + iy); + + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) + { + uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); + uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + + buffer[ix] = texturePixels[uvoffset]; + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + buffer += pitch; + } + } + } + + dest += q * pitch; + } +} + +void ScreenTriangleDrawer::fill(const ScreenTriangleDrawerArgs *args) +{ + uint8_t *dest = args->dest; + int pitch = args->pitch; + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipleft = args->clipleft; + int clipright = args->clipright; + const short *cliptop = args->cliptop; + const short *clipbottom = args->clipbottom; + int solidcolor = args->solidcolor; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int clipymin = cliptop[clipleft]; + int clipymax = clipbottom[clipleft]; + for (int i = clipleft + 1; i <= clipright; i++) + { + clipymin = MIN(clipymin, (int)cliptop[i]); + clipymax = MAX(clipymax, (int)clipbottom[i]); + } + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax - 1); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + dest += miny * pitch; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // Loop through blocks + for (int y = miny; y < maxy; y += q) + { + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Check if block needs clipping + int clipcount = 0; + for (int ix = x; ix < x + q; ix++) + { + clipcount += (clipleft > ix) || (clipright < ix) || (cliptop[ix] > y) || (clipbottom[ix] <= y + q - 1); + } + + uint8_t *buffer = dest; + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) + { + for (int iy = 0; iy < q; iy++) + { + for (int ix = x; ix < x + q; ix++) + { + buffer[ix] = solidcolor; + } + + buffer += pitch; + } + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = x; ix < x + q; ix++) + { + bool visible = ix >= clipleft && ix <= clipright && (cliptop[ix] <= y + iy) && (clipbottom[ix] > y + iy); + + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) + { + buffer[ix] = solidcolor; + } + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + buffer += pitch; + } + } + } + + dest += q * pitch; + } +} + +float ScreenTriangleDrawer::gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); + float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); + return top / bottom; +} + +float ScreenTriangleDrawer::grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); + float bottom = -((x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2)); + return top / bottom; +} + +///////////////////////////////////////////////////////////////////////////// + +TriMatrix TriMatrix::null() +{ + TriMatrix m; + memset(m.matrix, 0, sizeof(m.matrix)); + return m; +} + +TriMatrix TriMatrix::identity() +{ + TriMatrix m = null(); + m.matrix[0] = 1.0f; + m.matrix[5] = 1.0f; + m.matrix[10] = 1.0f; + m.matrix[15] = 1.0f; + return m; +} + +TriMatrix TriMatrix::translate(float x, float y, float z) +{ + TriMatrix m = identity(); + m.matrix[0 + 3 * 4] = x; + m.matrix[1 + 3 * 4] = y; + m.matrix[2 + 3 * 4] = z; + return m; +} + +TriMatrix TriMatrix::scale(float x, float y, float z) +{ + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = x; + m.matrix[1 + 1 * 4] = y; + m.matrix[2 + 2 * 4] = z; + m.matrix[3 + 3 * 4] = 1; + return m; +} + +TriMatrix TriMatrix::rotate(float angle, float x, float y, float z) +{ + float c = cosf(angle); + float s = sinf(angle); + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = (x*x*(1.0f - c) + c); + m.matrix[0 + 1 * 4] = (x*y*(1.0f - c) - z*s); + m.matrix[0 + 2 * 4] = (x*z*(1.0f - c) + y*s); + m.matrix[1 + 0 * 4] = (y*x*(1.0f - c) + z*s); + m.matrix[1 + 1 * 4] = (y*y*(1.0f - c) + c); + m.matrix[1 + 2 * 4] = (y*z*(1.0f - c) - x*s); + m.matrix[2 + 0 * 4] = (x*z*(1.0f - c) - y*s); + m.matrix[2 + 1 * 4] = (y*z*(1.0f - c) + x*s); + m.matrix[2 + 2 * 4] = (z*z*(1.0f - c) + c); + m.matrix[3 + 3 * 4] = 1.0f; + return m; +} + +TriMatrix TriMatrix::operator*(const TriMatrix &mult) const +{ + TriMatrix result; + for (int x = 0; x < 4; x++) + { + for (int y = 0; y < 4; y++) + { + result.matrix[x + y * 4] = + matrix[0 * 4 + x] * mult.matrix[y * 4 + 0] + + matrix[1 * 4 + x] * mult.matrix[y * 4 + 1] + + matrix[2 * 4 + x] * mult.matrix[y * 4 + 2] + + matrix[3 * 4 + x] * mult.matrix[y * 4 + 3]; + } + } + return result; +} + +TriVertex TriMatrix::operator*(TriVertex v) const +{ + float vx = matrix[0 * 4 + 0] * v.x + matrix[1 * 4 + 0] * v.y + matrix[2 * 4 + 0] * v.z + matrix[3 * 4 + 0] * v.w; + float vy = matrix[0 * 4 + 1] * v.x + matrix[1 * 4 + 1] * v.y + matrix[2 * 4 + 1] * v.z + matrix[3 * 4 + 1] * v.w; + float vz = matrix[0 * 4 + 2] * v.x + matrix[1 * 4 + 2] * v.y + matrix[2 * 4 + 2] * v.z + matrix[3 * 4 + 2] * v.w; + float vw = matrix[0 * 4 + 3] * v.x + matrix[1 * 4 + 3] * v.y + matrix[2 * 4 + 3] * v.z + matrix[3 * 4 + 3] * v.w; + v.x = vx; + v.y = vy; + v.z = vz; + v.w = vw; + return v; +} diff --git a/src/r_triangle.h b/src/r_triangle.h new file mode 100644 index 0000000000..2cd98bdd6e --- /dev/null +++ b/src/r_triangle.h @@ -0,0 +1,96 @@ +/* +** Triangle drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + + +#ifndef __R_TRIANGLE__ +#define __R_TRIANGLE__ + +#include "r_draw.h" + +class FTexture; +struct ScreenTriangleDrawerArgs; + +struct TriVertex +{ + TriVertex() { } + TriVertex(float x, float y, float z, float w, float u, float v, float light) : x(x), y(y), z(z), w(w) { varying[0] = u; varying[1] = v; varying[2] = light; } + + enum { NumVarying = 3 }; + float x, y, z, w; + float varying[NumVarying]; +}; + +struct TriMatrix +{ + static TriMatrix null(); + static TriMatrix identity(); + static TriMatrix translate(float x, float y, float z); + static TriMatrix scale(float x, float y, float z); + static TriMatrix rotate(float angle, float x, float y, float z); + + TriVertex operator*(TriVertex v) const; + TriMatrix operator*(const TriMatrix &m) const; + + float matrix[16]; +}; + +class TriangleDrawer +{ +public: + static void draw(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture); + static void fill(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor); + +private: + static void draw_any(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture, int solidcolor, void(*drawfunc)(const ScreenTriangleDrawerArgs *)); + static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); + static void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert); +}; + +struct ScreenTriangleDrawerArgs +{ + uint8_t *dest; + int pitch; + TriVertex *v1; + TriVertex *v2; + TriVertex *v3; + int clipleft; + int clipright; + const short *cliptop; + const short *clipbottom; + const uint8_t *texturePixels; + int textureWidth; + int textureHeight; + int solidcolor; +}; + +class ScreenTriangleDrawer +{ +public: + static void draw(const ScreenTriangleDrawerArgs *args); + static void fill(const ScreenTriangleDrawerArgs *args); + +private: + static float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); + static float grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); +}; + +#endif From 7a9c674f83379ec8ea9b714afd049660a58c06f5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 31 Oct 2016 08:30:08 +0100 Subject: [PATCH 251/912] Add triangle fan and strip --- src/r_triangle.cpp | 203 ++++++++++++++++++++++++++------------------- src/r_triangle.h | 15 +++- 2 files changed, 130 insertions(+), 88 deletions(-) diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp index 429f4a62ab..b6e7b1b2b6 100644 --- a/src/r_triangle.cpp +++ b/src/r_triangle.cpp @@ -36,110 +36,143 @@ #include "r_data/colormaps.h" #include "r_triangle.h" -void TriangleDrawer::draw(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture) +void TriangleDrawer::draw(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture) { - draw_any(objectToWorld, vinput, vcount, ccw, clipleft, clipright, cliptop, clipbottom, texture, 0, &ScreenTriangleDrawer::draw); + draw_arrays(objectToWorld, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture, 0, &ScreenTriangleDrawer::draw); } -void TriangleDrawer::fill(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor) +void TriangleDrawer::fill(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor) { - draw_any(objectToWorld, vinput, vcount, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, solidcolor, &ScreenTriangleDrawer::fill); + draw_arrays(objectToWorld, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, solidcolor, &ScreenTriangleDrawer::fill); } -void TriangleDrawer::draw_any(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture, int solidcolor, void(*drawfunc)(const ScreenTriangleDrawerArgs *)) +void TriangleDrawer::draw_arrays(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture, int solidcolor, void(*drawfunc)(const ScreenTriangleDrawerArgs *)) { - for (int i = 0; i < vcount / 3; i++) + if (vcount < 3) + return; + + ScreenTriangleDrawerArgs args; + args.dest = dc_destorg; + args.pitch = dc_pitch; + args.clipleft = clipleft; + args.clipright = clipright; + args.cliptop = cliptop; + args.clipbottom = clipbottom; + if (texture) { - TriVertex vert[3]; + args.textureWidth = texture->GetWidth(); + args.textureHeight = texture->GetHeight(); + args.texturePixels = texture->GetPixels(); + } + else + { + args.textureWidth = 0; + args.textureHeight = 0; + args.texturePixels = nullptr; + } + args.solidcolor = solidcolor; - // Vertex shader stuff: - for (int j = 0; j < 3; j++) + TriVertex vert[3]; + if (mode == TriangleDrawMode::Normal) + { + for (int i = 0; i < vcount / 3; i++) { - auto &v = vert[j]; - v = *(vinput++); - - // Apply transform to get world coordinates: - v = objectToWorld * v; - - // The software renderer world to clip transform: - double nearp = 5.0f; - double farp = 65536.f; - double tr_x = v.x - ViewPos.X; - double tr_y = v.y - ViewPos.Y; - double tr_z = v.z - ViewPos.Z; - double tx = tr_x * ViewSin - tr_y * ViewCos; - double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; - v.x = (float)tx * 0.5f; - v.y = (float)tr_z * 0.5f; - v.z = (float)((-tz * (farp + nearp) / (nearp - farp) + (2.0f * farp * nearp) / (nearp - farp))); - v.w = (float)tz; + for (int j = 0; j < 3; j++) + vert[j] = shade_vertex(objectToWorld, *(vinput++)); + draw_shaded_triangle(vert, ccw, &args, drawfunc); } - - // Cull, clip and generate additional vertices as needed - TriVertex clippedvert[6]; - int numclipvert = 0; - clipedge(vert[0], vert[1], clippedvert, numclipvert); - clipedge(vert[1], vert[2], clippedvert, numclipvert); - clipedge(vert[2], vert[0], clippedvert, numclipvert); - - // Map to 2D viewport: - for (int j = 0; j < numclipvert; j++) + } + else if (mode == TriangleDrawMode::Fan) + { + vert[0] = shade_vertex(objectToWorld, *(vinput++)); + vert[1] = shade_vertex(objectToWorld, *(vinput++)); + for (int i = 2; i < vcount; i++) { - auto &v = clippedvert[j]; - - // Calculate normalized device coordinates: - v.w = 1.0f / v.w; - v.x *= v.w; - v.y *= v.w; - v.z *= v.w; - - // Apply viewport scale to get screen coordinates: - v.x = (float)(CenterX + v.x * 2.0f * CenterX); - v.y = (float)(CenterY - v.y * 2.0f * InvZtoScale); + vert[2] = shade_vertex(objectToWorld, *(vinput++)); + draw_shaded_triangle(vert, ccw, &args, drawfunc); + vert[1] = vert[2]; } - - // Draw screen triangles - - ScreenTriangleDrawerArgs args; - args.dest = dc_destorg; - args.pitch = dc_pitch; - args.clipleft = clipleft; - args.clipright = clipright; - args.cliptop = cliptop; - args.clipbottom = clipbottom; - if (texture) + } + else // TriangleDrawMode::Strip + { + vert[0] = shade_vertex(objectToWorld, *(vinput++)); + vert[1] = shade_vertex(objectToWorld, *(vinput++)); + for (int i = 2; i < vcount; i++) { - args.textureWidth = texture->GetWidth(); - args.textureHeight = texture->GetHeight(); - args.texturePixels = texture->GetPixels(); + vert[2] = shade_vertex(objectToWorld, *(vinput++)); + draw_shaded_triangle(vert, ccw, &args, drawfunc); + vert[0] = vert[1]; + vert[1] = vert[2]; + ccw = !ccw; } - else - { - args.textureWidth = 0; - args.textureHeight = 0; - args.texturePixels = nullptr; - } - args.solidcolor = solidcolor; + } +} - if (ccw) +TriVertex TriangleDrawer::shade_vertex(const TriMatrix &objectToWorld, TriVertex v) +{ + // Apply transform to get world coordinates: + v = objectToWorld * v; + + // The software renderer world to clip transform: + double nearp = 5.0f; + double farp = 65536.f; + double tr_x = v.x - ViewPos.X; + double tr_y = v.y - ViewPos.Y; + double tr_z = v.z - ViewPos.Z; + double tx = tr_x * ViewSin - tr_y * ViewCos; + double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; + v.x = (float)tx * 0.5f; + v.y = (float)tr_z * 0.5f; + v.z = (float)((-tz * (farp + nearp) / (nearp - farp) + (2.0f * farp * nearp) / (nearp - farp))); + v.w = (float)tz; + + return v; +} + +void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenTriangleDrawerArgs *args, void(*drawfunc)(const ScreenTriangleDrawerArgs *)) +{ + // Cull, clip and generate additional vertices as needed + TriVertex clippedvert[6]; + int numclipvert = 0; + clipedge(vert[0], vert[1], clippedvert, numclipvert); + clipedge(vert[1], vert[2], clippedvert, numclipvert); + clipedge(vert[2], vert[0], clippedvert, numclipvert); + + // Map to 2D viewport: + for (int j = 0; j < numclipvert; j++) + { + auto &v = clippedvert[j]; + + // Calculate normalized device coordinates: + v.w = 1.0f / v.w; + v.x *= v.w; + v.y *= v.w; + v.z *= v.w; + + // Apply viewport scale to get screen coordinates: + v.x = (float)(CenterX + v.x * 2.0f * CenterX); + v.y = (float)(CenterY - v.y * 2.0f * InvZtoScale); + } + + // Draw screen triangles + if (ccw) + { + for (int i = numclipvert; i > 1; i--) { - for (int i = numclipvert; i > 1; i--) - { - args.v1 = &clippedvert[numclipvert - 1]; - args.v2 = &clippedvert[i - 1]; - args.v3 = &clippedvert[i - 2]; - drawfunc(&args); - } + args->v1 = &clippedvert[numclipvert - 1]; + args->v2 = &clippedvert[i - 1]; + args->v3 = &clippedvert[i - 2]; + drawfunc(args); } - else + } + else + { + for (int i = 2; i < numclipvert; i++) { - for (int i = 2; i < numclipvert; i++) - { - args.v1 = &clippedvert[0]; - args.v2 = &clippedvert[i - 1]; - args.v3 = &clippedvert[i]; - drawfunc(&args); - } + args->v1 = &clippedvert[0]; + args->v2 = &clippedvert[i - 1]; + args->v3 = &clippedvert[i]; + drawfunc(args); } } } diff --git a/src/r_triangle.h b/src/r_triangle.h index 2cd98bdd6e..f1ff220b66 100644 --- a/src/r_triangle.h +++ b/src/r_triangle.h @@ -53,14 +53,23 @@ struct TriMatrix float matrix[16]; }; +enum class TriangleDrawMode +{ + Normal, + Fan, + Strip +}; + class TriangleDrawer { public: - static void draw(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture); - static void fill(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor); + static void draw(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture); + static void fill(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor); private: - static void draw_any(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture, int solidcolor, void(*drawfunc)(const ScreenTriangleDrawerArgs *)); + static TriVertex shade_vertex(const TriMatrix &objectToWorld, TriVertex v); + static void draw_arrays(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture, int solidcolor, void(*drawfunc)(const ScreenTriangleDrawerArgs *)); + static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenTriangleDrawerArgs *args, void(*drawfunc)(const ScreenTriangleDrawerArgs *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert); }; From cdf8264dc3e77f6681ef187565be7961bf341125 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 31 Oct 2016 08:30:39 +0100 Subject: [PATCH 252/912] Cube and dome sky modes --- src/r_plane.cpp | 250 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 07efb84b4d..023de80d34 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -58,6 +58,7 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_triangle.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -900,6 +901,245 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) return composite; } +static void R_DrawCubeSky(visplane_t *pl) +{ + int x1 = pl->left; + int x2 = pl->right; + short *uwal = (short *)pl->top; + short *dwal = (short *)pl->bottom; + + static TriVertex cube[6 * 6] = + { + // Top + { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { 1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.1f, 1.0f }, + + { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.1f, 1.0f }, + { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.1f, 1.0f }, + { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + + // Bottom + { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 0.9f, 1.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f, 1.0f }, + + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f, 1.0f }, + { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.9f, 1.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 0.9f, 1.0f }, + + // Front + { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, + { 1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { -1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + + { -1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, + + // Back + { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, + + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, + { -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, + { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + + // Right + { 1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { 1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, + + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, + { 1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + + // Left + { -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, + { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { -1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + + { -1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f } + }; + + TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z) * TriMatrix::scale(1000.0f, 1000.0f, 1000.0f); + + uint32_t solid_top = frontskytex->GetSkyCapColor(false); + uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); + + solid_top = RGB32k.RGB[(RPART(solid_top) >> 3)][(GPART(solid_top) >> 3)][(BPART(solid_top) >> 3)]; + solid_bottom = RGB32k.RGB[(RPART(solid_bottom) >> 3)][(GPART(solid_bottom) >> 3)][(BPART(solid_bottom) >> 3)]; + + TriangleDrawer::fill(objectToWorld, cube, 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, solid_top); + TriangleDrawer::fill(objectToWorld, cube + 6, 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, solid_bottom); + TriangleDrawer::draw(objectToWorld, cube + 2 * 6, 4 * 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, frontskytex); +} + +namespace +{ + class SkyDome + { + public: + SkyDome() { CreateDome(); } + void Render(visplane_t *pl); + + private: + TArray mVertices; + TArray mPrimStart; + int mRows, mColumns; + + void SkyVertex(int r, int c, bool yflip); + void CreateSkyHemisphere(bool zflip); + void CreateDome(); + void RenderRow(int row, visplane_t *pl); + void RenderCapColorRow(int row, bool bottomCap, visplane_t *pl); + + TriVertex SetVertex(float xx, float yy, float zz, float uu = 0, float vv = 0); + TriVertex SetVertexXYZ(float xx, float yy, float zz, float uu = 0, float vv = 0); + }; + + TriVertex SkyDome::SetVertex(float xx, float yy, float zz, float uu, float vv) + { + TriVertex v; + v.x = xx; + v.y = yy; + v.z = zz; + v.w = 1.0f; + v.varying[0] = uu; + v.varying[1] = vv; + v.varying[2] = 1.0f; + return v; + } + + TriVertex SkyDome::SetVertexXYZ(float xx, float yy, float zz, float uu, float vv) + { + TriVertex v; + v.x = xx; + v.y = zz; + v.z = yy; + v.w = 1.0f; + v.varying[0] = uu; + v.varying[1] = vv; + v.varying[2] = 1.0f; + return v; + } + + void SkyDome::SkyVertex(int r, int c, bool zflip) + { + static const FAngle maxSideAngle = 60.f; + static const float scale = 10000.; + + FAngle topAngle = (c / (float)mColumns * 360.f); + FAngle sideAngle = maxSideAngle * (mRows - r) / mRows; + float height = sideAngle.Sin(); + float realRadius = scale * sideAngle.Cos(); + FVector2 pos = topAngle.ToVector(realRadius); + float z = (!zflip) ? scale * height : -scale * height; + + float u, v; + //uint32_t color = r == 0 ? 0xffffff : 0xffffffff; + + // And the texture coordinates. + if (!zflip) // Flipped Y is for the lower hemisphere. + { + u = (-c / (float)mColumns); + v = (r / (float)mRows); + } + else + { + u = (-c / (float)mColumns); + v = 1.0f + ((mRows - r) / (float)mRows); + } + + if (r != 4) z += 300; + + // And finally the vertex. + TriVertex vert; + vert = SetVertexXYZ(-pos.X, z - 1.f, pos.Y, u * 4.0f, v + 0.5f/*, color*/); + mVertices.Push(vert); + } + + void SkyDome::CreateSkyHemisphere(bool zflip) + { + int r, c; + + mPrimStart.Push(mVertices.Size()); + + for (c = 0; c < mColumns; c++) + { + SkyVertex(1, c, zflip); + } + + // The total number of triangles per hemisphere can be calculated + // as follows: rows * columns * 2 + 2 (for the top cap). + for (r = 0; r < mRows; r++) + { + mPrimStart.Push(mVertices.Size()); + for (c = 0; c <= mColumns; c++) + { + SkyVertex(r + zflip, c, zflip); + SkyVertex(r + 1 - zflip, c, zflip); + } + } + } + + void SkyDome::CreateDome() + { + mColumns = 128; + mRows = 4; + CreateSkyHemisphere(false); + CreateSkyHemisphere(true); + mPrimStart.Push(mVertices.Size()); + } + + void SkyDome::RenderRow(int row, visplane_t *pl) + { + int x1 = pl->left; + int x2 = pl->right; + short *uwal = (short *)pl->top; + short *dwal = (short *)pl->bottom; + TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); + TriangleDrawer::draw(objectToWorld, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, x1, x2 - 1, uwal, dwal, frontskytex); + } + + void SkyDome::RenderCapColorRow(int row, bool bottomCap, visplane_t *pl) + { + uint32_t solid = frontskytex->GetSkyCapColor(bottomCap); + solid = RGB32k.RGB[(RPART(solid) >> 3)][(GPART(solid) >> 3)][(BPART(solid) >> 3)]; + + int x1 = pl->left; + int x2 = pl->right; + short *uwal = (short *)pl->top; + short *dwal = (short *)pl->bottom; + TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); + TriangleDrawer::fill(objectToWorld, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, x1, x2 - 1, uwal, dwal, solid); + } + + void SkyDome::Render(visplane_t *pl) + { + int rc = mRows + 1; + + // No need to draw this as the software renderer can't look that high anyway + //RenderCapColorRow(0, false, pl); + //RenderCapColorRow(rc, true, pl); + + for (int i = 1; i <= mRows; i++) + { + RenderRow(i, pl); + RenderRow(rc + i, pl); + } + } +} + +static void R_DrawDomeSky(visplane_t *pl) +{ + static SkyDome skydome; + skydome.Render(pl); +} + static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) { uint32_t height = frontskytex->GetHeight(); @@ -1064,6 +1304,16 @@ static void R_DrawSky (visplane_t *pl) R_DrawCapSky(pl); return; } + else if (r_skymode == 3) + { + R_DrawCubeSky(pl); + return; + } + else if (r_skymode == 4) + { + R_DrawDomeSky(pl); + return; + } int x; float swal; From 429aed85e15fee727fa7ae8d63f43b24a262b29d Mon Sep 17 00:00:00 2001 From: Marisa Heit Date: Tue, 1 Nov 2016 00:18:56 -0500 Subject: [PATCH 253/912] Fixed: Masked midtexture yscale incorrectly used the texture's X scale --- src/r_segs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index dc8a8fb83e..8c00faa7f6 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -2495,7 +2495,7 @@ void R_StoreWallRange (int start, int stop) lwal = (fixed_t *)(openings + ds_p->maskedtexturecol); swal = (float *)(openings + ds_p->swall); FTexture *pic = TexMan(sidedef->GetTexture(side_t::mid), true); - double yscale = pic->Scale.X * sidedef->GetTextureYScale(side_t::mid); + double yscale = pic->Scale.Y * sidedef->GetTextureYScale(side_t::mid); fixed_t xoffset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); if (pic->bWorldPanning) From 3102668dfa40dcb66a1d851caff315144643314a Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Tue, 1 Nov 2016 03:21:19 -0400 Subject: [PATCH 254/912] - Removed mp### commands, extended map, recordmap, and open commands to accept "dm/coop" as an extra parameter - to open maps in multiplayer mode with the respective ruleset. --- src/g_level.cpp | 151 +++++++++++------------------------------------- 1 file changed, 35 insertions(+), 116 deletions(-) diff --git a/src/g_level.cpp b/src/g_level.cpp index 7588473a37..ebcb000acf 100644 --- a/src/g_level.cpp +++ b/src/g_level.cpp @@ -90,6 +90,8 @@ #include "g_hub.h" +#include + void STAT_StartNewGame(const char *lev); void STAT_ChangeLevel(const char *newl); @@ -181,6 +183,16 @@ CCMD (map) } else { + if (argv.argc() > 2 && strcmp(argv[2], "coop") == 0) + { + deathmatch = false; + multiplayernext = true; + } + else if (argv.argc() > 2 && strcmp(argv[2], "dm") == 0) + { + deathmatch = true; + multiplayernext = true; + } G_DeferedInitNew (argv[1]); } } @@ -192,7 +204,7 @@ CCMD (map) } else { - Printf ("Usage: map \n"); + Printf ("Usage: map [coop|dm]\n"); } } @@ -218,6 +230,16 @@ CCMD(recordmap) } else { + if (argv.argc() > 3 && strcmp(argv[3], "coop") == 0) + { + deathmatch = false; + multiplayernext = true; + } + else if (argv.argc() > 3 && strcmp(argv[3], "dm") == 0) + { + deathmatch = true; + multiplayernext = true; + } G_DeferedInitNew(argv[2]); gameaction = ga_recordgame; newdemoname = argv[1]; @@ -232,7 +254,7 @@ CCMD(recordmap) } else { - Printf("Usage: recordmap \n"); + Printf("Usage: recordmap [coop|dm]\n"); } } @@ -258,126 +280,23 @@ CCMD (open) } else { + if (argv.argc() > 2 && strcmp(argv[2], "coop") == 0) + { + deathmatch = false; + multiplayernext = true; + } + else if (argv.argc() > 2 && strcmp(argv[2], "dm") == 0) + { + deathmatch = true; + multiplayernext = true; + } gameaction = ga_newgame2; d_skill = -1; } } else { - Printf ("Usage: open \n"); - } -} - -//========================================================================== -// -// -//========================================================================== - -CCMD (mpmap) -{ - if (netgame) - { - Printf ("Use " TEXTCOLOR_BOLD "changemap" TEXTCOLOR_NORMAL " instead. " TEXTCOLOR_BOLD "Map" - TEXTCOLOR_NORMAL " is for single-player only.\n"); - return; - } - if (argv.argc() > 1) - { - try - { - if (!P_CheckMapData(argv[1])) - { - Printf ("No map %s\n", argv[1]); - } - else - { - multiplayernext = true; - G_DeferedInitNew (argv[1]); - } - } - catch(CRecoverableError &error) - { - if (error.GetMessage()) - Printf("%s", error.GetMessage()); - } - } - else - { - Printf ("Usage: mpmap \n"); - } -} - -//========================================================================== -// -// -//========================================================================== - -CCMD(mprecordmap) -{ - if (netgame) - { - Printf("You cannot record a new game while in a netgame."); - return; - } - if (argv.argc() > 2) - { - try - { - if (!P_CheckMapData(argv[2])) - { - Printf("No map %s\n", argv[2]); - } - else - { - multiplayernext = true; - G_DeferedInitNew(argv[2]); - gameaction = ga_recordgame; - newdemoname = argv[1]; - newdemomap = argv[2]; - } - } - catch (CRecoverableError &error) - { - if (error.GetMessage()) - Printf("%s", error.GetMessage()); - } - } - else - { - Printf("Usage: mprecordmap \n"); - } -} - -//========================================================================== -// -// -//========================================================================== - -CCMD (mpopen) -{ - if (netgame) - { - Printf ("You cannot use open in multiplayer games.\n"); - return; - } - if (argv.argc() > 1) - { - d_mapname = "file:"; - d_mapname += argv[1]; - if (!P_CheckMapData(d_mapname)) - { - Printf ("No map %s\n", d_mapname.GetChars()); - } - else - { - gameaction = ga_newgame2; - d_skill = -1; - multiplayernext = true; - } - } - else - { - Printf ("Usage: mpopen \n"); + Printf ("Usage: open [coop|dm]\n"); } } From 33637dda23564facb946b4d893bf3b808a3d9ed7 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Tue, 1 Nov 2016 09:47:01 -0400 Subject: [PATCH 255/912] - fixed: map commands will now take capslock DM/COOP as arguments --- src/doomstat.h | 2 +- src/g_game.cpp | 2 +- src/g_level.cpp | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/doomstat.h b/src/doomstat.h index 70869b5a2c..22ee4fdb49 100644 --- a/src/doomstat.h +++ b/src/doomstat.h @@ -71,7 +71,7 @@ extern bool netgame; // Bot game? Like netgame, but doesn't involve network communication. extern bool multiplayer; -// [SP] MPMap implementation - invokes fake multiplayer without bots +// [SP] Map dm/coop implementation - invokes fake multiplayer without bots extern bool multiplayernext; // Flag: true only if started as net deathmatch. diff --git a/src/g_game.cpp b/src/g_game.cpp index 2c827e6391..ec79d59e60 100644 --- a/src/g_game.cpp +++ b/src/g_game.cpp @@ -164,7 +164,7 @@ bool viewactive; bool netgame; // only true if packets are broadcast bool multiplayer; -bool multiplayernext = false; // [SP] MPMap implementation +bool multiplayernext = false; // [SP] Map coop/dm implementation player_t players[MAXPLAYERS]; bool playeringame[MAXPLAYERS]; diff --git a/src/g_level.cpp b/src/g_level.cpp index ebcb000acf..3136a254d4 100644 --- a/src/g_level.cpp +++ b/src/g_level.cpp @@ -183,12 +183,12 @@ CCMD (map) } else { - if (argv.argc() > 2 && strcmp(argv[2], "coop") == 0) + if (argv.argc() > 2 && stricmp(argv[2], "coop") == 0) { deathmatch = false; multiplayernext = true; } - else if (argv.argc() > 2 && strcmp(argv[2], "dm") == 0) + else if (argv.argc() > 2 && stricmp(argv[2], "dm") == 0) { deathmatch = true; multiplayernext = true; @@ -230,12 +230,12 @@ CCMD(recordmap) } else { - if (argv.argc() > 3 && strcmp(argv[3], "coop") == 0) + if (argv.argc() > 3 && stricmp(argv[3], "coop") == 0) { deathmatch = false; multiplayernext = true; } - else if (argv.argc() > 3 && strcmp(argv[3], "dm") == 0) + else if (argv.argc() > 3 && stricmp(argv[3], "dm") == 0) { deathmatch = true; multiplayernext = true; @@ -280,12 +280,12 @@ CCMD (open) } else { - if (argv.argc() > 2 && strcmp(argv[2], "coop") == 0) + if (argv.argc() > 2 && stricmp(argv[2], "coop") == 0) { deathmatch = false; multiplayernext = true; } - else if (argv.argc() > 2 && strcmp(argv[2], "dm") == 0) + else if (argv.argc() > 2 && stricmp(argv[2], "dm") == 0) { deathmatch = true; multiplayernext = true; From bfb8886e93864b0e076a7774b3ce8add44abb8fb Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 1 Nov 2016 13:33:18 +0100 Subject: [PATCH 256/912] - fixed: RapidJSON in ASCII mode cannot handle extended 8 bit character sets and will produce broken data if the input contains some. This means we need to perform the conversion to UTF-8 on ZDoom's side and run RapidJSON in UTF-8 mode. Daedalus triggers this with a 0x85 character which in Windows CP 1252 is the ellipsis (...) The converter will assume ISO-8859-1, though, but cannot do anything with these characters because they map to the font being used here. --- src/p_acs.cpp | 4 ++ src/serializer.cpp | 173 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 160 insertions(+), 17 deletions(-) diff --git a/src/p_acs.cpp b/src/p_acs.cpp index f36aaaf69a..d82e6590c4 100644 --- a/src/p_acs.cpp +++ b/src/p_acs.cpp @@ -791,6 +791,10 @@ void ACSStringPool::WriteStrings(FSerializer &file, const char *key) const { if (file.BeginObject(nullptr)) { + if (i == 430) + { + int a = 0; + } file("index", i) ("string", entry->Str) ("lockcount", entry->LockCount) diff --git a/src/serializer.cpp b/src/serializer.cpp index 49226ce801..dec81e86fe 100644 --- a/src/serializer.cpp +++ b/src/serializer.cpp @@ -67,6 +67,143 @@ char nulspace[1024 * 1024 * 4]; bool save_full = false; // for testing. Should be removed afterward. +int utf8_encode(int32_t codepoint, char *buffer, int *size) +{ + if (codepoint < 0) + return -1; + else if (codepoint < 0x80) + { + buffer[0] = (char)codepoint; + *size = 1; + } + else if (codepoint < 0x800) + { + buffer[0] = 0xC0 + ((codepoint & 0x7C0) >> 6); + buffer[1] = 0x80 + ((codepoint & 0x03F)); + *size = 2; + } + else if (codepoint < 0x10000) + { + buffer[0] = 0xE0 + ((codepoint & 0xF000) >> 12); + buffer[1] = 0x80 + ((codepoint & 0x0FC0) >> 6); + buffer[2] = 0x80 + ((codepoint & 0x003F)); + *size = 3; + } + else if (codepoint <= 0x10FFFF) + { + buffer[0] = 0xF0 + ((codepoint & 0x1C0000) >> 18); + buffer[1] = 0x80 + ((codepoint & 0x03F000) >> 12); + buffer[2] = 0x80 + ((codepoint & 0x000FC0) >> 6); + buffer[3] = 0x80 + ((codepoint & 0x00003F)); + *size = 4; + } + else + return -1; + + return 0; +} + +int utf8_decode(const char *src, int *size) +{ + int c = src[0] & 255; + int r; + + *size = 1; + if ((c & 0x80) == 0) + { + return c; + } + + int c1 = src[1] & 255; + + if ((c & 0xE0) == 0xC0) + { + r = ((c & 0x1F) << 6) | c1; + if (r >= 128) + { + *size = 2; + return r; + } + return -1; + } + + int c2 = src[2] & 255; + + if ((c & 0xF0) == 0xE0) + { + r = ((c & 0x0F) << 12) | (c1 << 6) | c2; + if (r >= 2048 && (r < 55296 || r > 57343)) + { + *size = 3; + return r; + } + return -1; + } + + int c3 = src[3] & 255; + + if ((c & 0xF8) == 0xF0) + { + r = ((c & 0x07) << 18) | (c1 << 12) | (c2 << 6) | c3; + if (r >= 65536 && r <= 1114111) + { + *size = 4; + return r; + } + } + return -1; +} + +static TArray out; +static const char *StringToUnicode(const char *cc, int size = -1) +{ + int ch; + const char *c = cc; + int count = 0; + int count1 = 0; + out.Clear(); + while (ch = (*c++) & 255) + { + count1++; + if (ch >= 128) + { + if (ch < 0x800) count += 2; + else count += 3; + // The source cannot contain 4-byte chars. + } + else count++; + if (count1 == size && size > 0) break; + } + if (count == count1) return cc; // string is pure ASCII. + // we need to convert + out.Resize(count + 1); + out.Last() = 0; + c = cc; + int i = 0; + while (ch = (*c++) & 255) + { + utf8_encode(ch, &out[i], &count1); + i += count1; + } + return &out[0]; +} + +static const char *UnicodeToString(const char *cc) +{ + out.Resize((unsigned)strlen(cc) + 1); + int ndx = 0; + while (*cc != 0) + { + int size; + int c = utf8_decode(cc, &size); + if (c < 0 || c > 255) c = '?'; + out[ndx++] = c; + cc += size; + } + out[ndx] = 0; + return &out[0]; +} + //========================================================================== // // @@ -99,8 +236,8 @@ struct FJSONObject struct FWriter { - typedef rapidjson::Writer > Writer; - typedef rapidjson::PrettyWriter > PrettyWriter; + typedef rapidjson::Writer > Writer; + typedef rapidjson::PrettyWriter > PrettyWriter; Writer *mWriter1; PrettyWriter *mWriter2; @@ -173,14 +310,16 @@ struct FWriter void String(const char *k) { + k = StringToUnicode(k); if (mWriter1) mWriter1->String(k); else if (mWriter2) mWriter2->String(k); } void String(const char *k, int size) { - if (mWriter1) mWriter1->String(k, size); - else if (mWriter2) mWriter2->String(k, size); + k = StringToUnicode(k, size); + if (mWriter1) mWriter1->String(k); + else if (mWriter2) mWriter2->String(k); } void Bool(bool k) @@ -602,7 +741,7 @@ FSerializer &FSerializer::Args(const char *key, int *args, int *defargs, int spe } else if (i == 0 && aval.IsString()) { - args[i] = -FName(aval.GetString()); + args[i] = -FName(UnicodeToString(aval.GetString())); } else { @@ -654,7 +793,7 @@ FSerializer &FSerializer::ScriptNum(const char *key, int &num) } else if (val->IsString()) { - num = -FName(val->GetString()); + num = -FName(UnicodeToString(val->GetString())); } else { @@ -709,7 +848,7 @@ FSerializer &FSerializer::Sprite(const char *key, int32_t &spritenum, int32_t *d { if (val->IsString()) { - uint32_t name = *reinterpret_cast(val->GetString()); + uint32_t name = *reinterpret_cast(UnicodeToString(val->GetString())); for (auto hint = NumStdSprites; hint-- != 0; ) { if (sprites[hint].dwName == name) @@ -747,7 +886,7 @@ FSerializer &FSerializer::StringPtr(const char *key, const char *&charptr) { if (val->IsString()) { - charptr = val->GetString(); + charptr = UnicodeToString(val->GetString()); } else { @@ -1403,7 +1542,7 @@ FSerializer &Serialize(FSerializer &arc, const char *key, FTextureID &value, FTe assert(nameval.IsString() && typeval.IsInt()); if (nameval.IsString() && typeval.IsInt()) { - value = TexMan.GetTexture(nameval.GetString(), typeval.GetInt()); + value = TexMan.GetTexture(UnicodeToString(nameval.GetString()), typeval.GetInt()); } else { @@ -1553,7 +1692,7 @@ FSerializer &Serialize(FSerializer &arc, const char *key, FName &value, FName *d assert(val->IsString()); if (val->IsString()) { - value = val->GetString(); + value = UnicodeToString(val->GetString()); } else { @@ -1638,7 +1777,7 @@ FSerializer &Serialize(FSerializer &arc, const char *key, FSoundID &sid, FSoundI assert(val->IsString() || val->IsNull()); if (val->IsString()) { - sid = val->GetString(); + sid = UnicodeToString(val->GetString()); } else if (val->IsNull()) { @@ -1687,7 +1826,7 @@ template<> FSerializer &Serialize(FSerializer &arc, const char *key, PClassActor assert(val->IsString() || val->IsNull()); if (val->IsString()) { - clst = PClass::FindActor(val->GetString()); + clst = PClass::FindActor(UnicodeToString(val->GetString())); } else if (val->IsNull()) { @@ -1735,7 +1874,7 @@ template<> FSerializer &Serialize(FSerializer &arc, const char *key, PClass *&cl { if (val->IsString()) { - clst = PClass::FindClass(val->GetString()); + clst = PClass::FindClass(UnicodeToString(val->GetString())); } else if (val->IsNull()) { @@ -1810,7 +1949,7 @@ FSerializer &Serialize(FSerializer &arc, const char *key, FState *&state, FState assert(cls.IsString() && ndx.IsUint()); if (cls.IsString() && ndx.IsUint()) { - PClassActor *clas = PClass::FindActor(cls.GetString()); + PClassActor *clas = PClass::FindActor(UnicodeToString(cls.GetString())); if (clas && ndx.GetUint() < (unsigned)clas->NumOwnedStates) { state = clas->OwnedStates + ndx.GetUint(); @@ -1932,7 +2071,7 @@ template<> FSerializer &Serialize(FSerializer &arc, const char *key, FString *&p } else if (val->IsString()) { - pstr = AActor::mStringPropertyData.Alloc(val->GetString()); + pstr = AActor::mStringPropertyData.Alloc(UnicodeToString(val->GetString())); } else { @@ -1974,7 +2113,7 @@ FSerializer &Serialize(FSerializer &arc, const char *key, FString &pstr, FString } else if (val->IsString()) { - pstr = val->GetString(); + pstr = UnicodeToString(val->GetString()); } else { @@ -2023,7 +2162,7 @@ template<> FSerializer &Serialize(FSerializer &arc, const char *key, char *&pstr } else if (val->IsString()) { - pstr = copystring(val->GetString()); + pstr = copystring(UnicodeToString(val->GetString())); } else { From 48229837ed0c09ee57fa7bedf53d671c1e202067 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 1 Nov 2016 13:36:33 +0100 Subject: [PATCH 257/912] - fixed: ACSStringPool::AddString did not check for NULL pointers as input. Let's just map them to the empty string, that's a lot better than crashing. --- src/p_acs.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/p_acs.cpp b/src/p_acs.cpp index d82e6590c4..9281bfad0e 100644 --- a/src/p_acs.cpp +++ b/src/p_acs.cpp @@ -375,6 +375,7 @@ void ACSStringPool::Clear() int ACSStringPool::AddString(const char *str) { + if (str == nullptr) str = ""; size_t len = strlen(str); unsigned int h = SuperFastHash(str, len); unsigned int bucketnum = h % NUM_BUCKETS; From f619e8ece11341f6dc90eb7cd62e26362095004a Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 1 Nov 2016 13:48:56 +0100 Subject: [PATCH 258/912] - fixed numeric output precision for a few sliders. --- wadsrc/static/menudef.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 42fb35fef5..06acc6fdfa 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -694,8 +694,8 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_VSYNC", "vid_vsync", "OnOff" Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn" - Slider "$DSPLYMNU_BLOODFADE", "blood_fade_scalar", 0.0, 1.0, 0.05, 1 - Slider "$DSPLYMNU_PICKUPFADE", "pickup_fade_scalar", 0.0, 1.0, 0.05, 1 + Slider "$DSPLYMNU_BLOODFADE", "blood_fade_scalar", 0.0, 1.0, 0.05, 2 + Slider "$DSPLYMNU_PICKUPFADE", "pickup_fade_scalar", 0.0, 1.0, 0.05, 2 Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" StaticText " " @@ -728,7 +728,7 @@ OptionMenu "VideoOptions" ColorPicker "$DSPLYMNU_DIMCOLOR", "dimcolor" Slider "$DSPLYMNU_MOVEBOB", "movebob", 0, 1.0, 0.05, 2 Slider "$DSPLYMNU_STILLBOB", "stillbob", 0, 1.0, 0.05, 2 - Slider "$DSPLYMNU_BOBSPEED", "wbobspeed", 0, 2.0, 0.1, 2 + Slider "$DSPLYMNU_BOBSPEED", "wbobspeed", 0, 2.0, 0.1 } @@ -1270,7 +1270,7 @@ OptionMenu GameplayOptions Title "$GMPLYMNU_TITLE" //Indent 222 Option "$GMPLYMNU_TEAMPLAY", "teamplay", "OnOff" - Slider "$GMPLYMNU_TEAMDAMAGE", "teamdamage", 0, 1, 0.05 + Slider "$GMPLYMNU_TEAMDAMAGE", "teamdamage", 0, 1, 0.05,2 StaticText " " Option "$GMPLYMNU_SMARTAUTOAIM", "sv_smartaim", "SmartAim" StaticText " " @@ -1855,7 +1855,7 @@ OptionMenu NetworkOptions StaticText "$NETMNU_LOCALOPTIONS", 1 Option "$NETMNU_MOVEPREDICTION", "cl_noprediction", "OffOn" Option "$NETMNU_LINESPECIALPREDICTION", "cl_predict_specials", "OnOff" - Slider "$NETMNU_PREDICTIONLERPSCALE", "cl_predict_lerpscale", 0.0, 0.5, 0.05 + Slider "$NETMNU_PREDICTIONLERPSCALE", "cl_predict_lerpscale", 0.0, 0.5, 0.05, 2 Slider "$NETMNU_LERPTHRESHOLD", "cl_predict_lerpthreshold", 0.1, 16.0, 0.1 StaticText " " StaticText "$NETMNU_HOSTOPTIONS", 1 From 578bf9b09c97d6d4b59e58104e958aff98930050 Mon Sep 17 00:00:00 2001 From: nashmuhandes Date: Sun, 16 Oct 2016 02:50:21 +0800 Subject: [PATCH 259/912] Added "local" parameters to A_PlaySound and ACS PlaySound --- src/p_acs.cpp | 7 ++++--- src/p_actionfunctions.cpp | 5 +++-- src/s_sound.cpp | 26 ++++++++++++++++++++++++++ src/s_sound.h | 3 +++ wadsrc/static/actors/actor.txt | 2 +- 5 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/p_acs.cpp b/src/p_acs.cpp index 9281bfad0e..1a60a0c4fb 100644 --- a/src/p_acs.cpp +++ b/src/p_acs.cpp @@ -5350,7 +5350,7 @@ int DLevelScript::CallFunction(int argCount, int funcIndex, SDWORD *args) case ACSF_PlaySound: case ACSF_PlayActorSound: - // PlaySound(tid, "SoundName", channel, volume, looping, attenuation) + // PlaySound(tid, "SoundName", channel, volume, looping, attenuation, local) { FSoundID sid; @@ -5371,6 +5371,7 @@ int DLevelScript::CallFunction(int argCount, int funcIndex, SDWORD *args) float vol = argCount > 3 ? ACSToFloat(args[3]) : 1.f; INTBOOL looping = argCount > 4 ? args[4] : false; float atten = argCount > 5 ? ACSToFloat(args[5]) : ATTN_NORM; + INTBOOL local = argCount > 6 ? args[6] : false; if (args[0] == 0) { @@ -5387,11 +5388,11 @@ doplaysound: if (funcIndex == ACSF_PlayActorSound) { if (!looping) { - S_Sound(spot, chan, sid, vol, atten); + S_PlaySound(spot, chan, sid, vol, atten, local); } else if (!S_IsActorPlayingSomething(spot, chan & 7, sid)) { - S_Sound(spot, chan | CHAN_LOOP, sid, vol, atten); + S_PlaySound(spot, chan | CHAN_LOOP, sid, vol, atten, local); } } } diff --git a/src/p_actionfunctions.cpp b/src/p_actionfunctions.cpp index c6bc8215b0..73cade49a4 100644 --- a/src/p_actionfunctions.cpp +++ b/src/p_actionfunctions.cpp @@ -1032,16 +1032,17 @@ DEFINE_ACTION_FUNCTION_PARAMS(AActor, A_PlaySound) PARAM_FLOAT_OPT (volume) { volume = 1; } PARAM_BOOL_OPT (looping) { looping = false; } PARAM_FLOAT_OPT (attenuation) { attenuation = ATTN_NORM; } + PARAM_BOOL_OPT (local) { local = false; } if (!looping) { - S_Sound (self, channel, soundid, (float)volume, (float)attenuation); + S_PlaySound(self, channel, soundid, (float)volume, (float)attenuation, local); } else { if (!S_IsActorPlayingSomething (self, channel&7, soundid)) { - S_Sound (self, channel | CHAN_LOOP, soundid, (float)volume, (float)attenuation); + S_PlaySound(self, channel | CHAN_LOOP, soundid, (float)volume, (float)attenuation, local); } } return 0; diff --git a/src/s_sound.cpp b/src/s_sound.cpp index 524b121756..a51a7101be 100644 --- a/src/s_sound.cpp +++ b/src/s_sound.cpp @@ -1301,6 +1301,32 @@ void S_Sound (const sector_t *sec, int channel, FSoundID sfxid, float volume, fl S_StartSound (NULL, sec, NULL, NULL, channel, sfxid, volume, attenuation); } +//========================================================================== +// +// S_PlaySound - Subfunction used by ACS and DECORATE +// +// Has a local parameter to make the sound audible only to the source +// +//========================================================================== + +void S_PlaySound(AActor *a, int chan, FSoundID sid, float vol, float atten, bool local) +{ + if (a == nullptr) + return; + + if (!local) + { + S_Sound(a, chan, sid, vol, atten); + } + else + { + if (a->CheckLocalView(consoleplayer)) + { + S_Sound(chan, sid, vol, ATTN_NONE); + } + } +} + //========================================================================== // // S_LoadSound diff --git a/src/s_sound.h b/src/s_sound.h index 9b917e25c8..d6d2a5403e 100644 --- a/src/s_sound.h +++ b/src/s_sound.h @@ -241,6 +241,9 @@ void S_Sound (const FPolyObj *poly, int channel, FSoundID sfxid, float volume, f void S_Sound (const sector_t *sec, int channel, FSoundID sfxid, float volume, float attenuation); void S_Sound(const DVector3 &pos, int channel, FSoundID sfxid, float volume, float attenuation); +// [Nash] Used by ACS and DECORATE +void S_PlaySound(AActor *a, int chan, FSoundID sid, float vol, float atten, bool local); + // sound channels // channel 0 never willingly overrides // other channels (1-7) always override a playing sound on that channel diff --git a/wadsrc/static/actors/actor.txt b/wadsrc/static/actors/actor.txt index eecc843d39..71e82f613d 100644 --- a/wadsrc/static/actors/actor.txt +++ b/wadsrc/static/actors/actor.txt @@ -191,7 +191,7 @@ ACTOR Actor native //: Thinker action native A_ComboAttack(); action native A_BulletAttack(); action native A_WolfAttack(int flags = 0, sound whattoplay = "weapons/pistol", float snipe = 1.0, int maxdamage = 64, int blocksize = 128, int pointblank = 2, int longrange = 4, float runspeed = 160.0, class pufftype = "BulletPuff"); - action native A_PlaySound(sound whattoplay = "weapons/pistol", int slot = CHAN_BODY, float volume = 1.0, bool looping = false, float attenuation = ATTN_NORM); + action native A_PlaySound(sound whattoplay = "weapons/pistol", int slot = CHAN_BODY, float volume = 1.0, bool looping = false, float attenuation = ATTN_NORM, bool local = false); native void A_PlayWeaponSound(sound whattoplay); action native A_FLoopActiveSound(); action native A_LoopActiveSound(); From 477fb6d676d31ec7704be646a180f38088dd698d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 1 Nov 2016 21:44:33 +0100 Subject: [PATCH 260/912] Added missing copyrights --- .../fixedfunction/drawcolumncodegen.cpp | 21 +++++++++++++++++++ .../fixedfunction/drawcolumncodegen.h | 21 +++++++++++++++++++ .../fixedfunction/drawercodegen.cpp | 21 +++++++++++++++++++ src/r_compiler/fixedfunction/drawercodegen.h | 21 +++++++++++++++++++ .../fixedfunction/drawskycodegen.cpp | 21 +++++++++++++++++++ src/r_compiler/fixedfunction/drawskycodegen.h | 21 +++++++++++++++++++ .../fixedfunction/drawspancodegen.cpp | 21 +++++++++++++++++++ .../fixedfunction/drawspancodegen.h | 21 +++++++++++++++++++ .../fixedfunction/drawwallcodegen.cpp | 21 +++++++++++++++++++ .../fixedfunction/drawwallcodegen.h | 21 +++++++++++++++++++ src/r_compiler/llvm_include.h | 21 +++++++++++++++++++ src/r_compiler/llvmdrawers.cpp | 21 +++++++++++++++++++ src/r_compiler/llvmdrawers.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_barycentric_weight.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_bool.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_bool.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_float.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_float.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_float_ptr.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_float_ptr.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_for_block.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_for_block.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_function.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_function.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_if_block.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_if_block.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_int.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_int.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_int_ptr.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_int_ptr.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_phi.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_scope.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_scope.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_short.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_short.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_stack.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_struct_type.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_struct_type.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_ubyte.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_ubyte.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_ubyte_ptr.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_ubyte_ptr.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_value.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_value.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_vec16ub.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_vec16ub.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_vec4f.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_vec4f.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_vec4f_ptr.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_vec4f_ptr.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_vec4i.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_vec4i.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_vec4i_ptr.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_vec4i_ptr.h | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_vec8s.cpp | 21 +++++++++++++++++++ src/r_compiler/ssa/ssa_vec8s.h | 21 +++++++++++++++++++ src/r_swrenderer2.cpp | 21 +++++++++++++++++++ src/r_swrenderer2.h | 21 +++++++++++++++++++ src/r_thread.cpp | 21 +++++++++++++++++++ src/r_thread.h | 21 +++++++++++++++++++ 60 files changed, 1260 insertions(+) diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index 71cdf8195c..0ed58b9434 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -1,3 +1,24 @@ +/* +** DrawColumn code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "i_system.h" #include "r_compiler/llvm_include.h" diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h index 5f2ad737de..ba453a076c 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.h +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -1,3 +1,24 @@ +/* +** DrawColumn code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp index a4009c1f7e..812ccafd75 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -1,3 +1,24 @@ +/* +** Drawer code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "i_system.h" #include "r_compiler/llvm_include.h" diff --git a/src/r_compiler/fixedfunction/drawercodegen.h b/src/r_compiler/fixedfunction/drawercodegen.h index 98451e1503..4dda370fe9 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.h +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -1,3 +1,24 @@ +/* +** Drawer code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/fixedfunction/drawskycodegen.cpp b/src/r_compiler/fixedfunction/drawskycodegen.cpp index f96d9fc1d5..ae5b450323 100644 --- a/src/r_compiler/fixedfunction/drawskycodegen.cpp +++ b/src/r_compiler/fixedfunction/drawskycodegen.cpp @@ -1,3 +1,24 @@ +/* +** DrawSky code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "i_system.h" #include "r_compiler/llvm_include.h" diff --git a/src/r_compiler/fixedfunction/drawskycodegen.h b/src/r_compiler/fixedfunction/drawskycodegen.h index a02f8dc2be..aaf4bdfebd 100644 --- a/src/r_compiler/fixedfunction/drawskycodegen.h +++ b/src/r_compiler/fixedfunction/drawskycodegen.h @@ -1,3 +1,24 @@ +/* +** DrawSky code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/fixedfunction/drawspancodegen.cpp b/src/r_compiler/fixedfunction/drawspancodegen.cpp index 4404456ab8..fac8411cc8 100644 --- a/src/r_compiler/fixedfunction/drawspancodegen.cpp +++ b/src/r_compiler/fixedfunction/drawspancodegen.cpp @@ -1,3 +1,24 @@ +/* +** DrawSpan code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "i_system.h" #include "r_compiler/llvm_include.h" diff --git a/src/r_compiler/fixedfunction/drawspancodegen.h b/src/r_compiler/fixedfunction/drawspancodegen.h index 20869ac2ff..9ef3edd38f 100644 --- a/src/r_compiler/fixedfunction/drawspancodegen.h +++ b/src/r_compiler/fixedfunction/drawspancodegen.h @@ -1,3 +1,24 @@ +/* +** DrawSpan code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index b820274efe..7c042f9be0 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -1,3 +1,24 @@ +/* +** DrawWall code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "i_system.h" #include "r_compiler/llvm_include.h" diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.h b/src/r_compiler/fixedfunction/drawwallcodegen.h index 0e1cce5fcf..6591fb9b6a 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.h +++ b/src/r_compiler/fixedfunction/drawwallcodegen.h @@ -1,3 +1,24 @@ +/* +** DrawWall code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h index 75952d1f9d..fa23f0c0cc 100644 --- a/src/r_compiler/llvm_include.h +++ b/src/r_compiler/llvm_include.h @@ -1,3 +1,24 @@ +/* +** LLVM includes +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 2f8609f68c..4ac2bf00f2 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -1,3 +1,24 @@ +/* +** LLVM code generated drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "i_system.h" #include "r_compiler/llvm_include.h" diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index e7386678ed..5cbd05e4aa 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -1,3 +1,24 @@ +/* +** LLVM code generated drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_barycentric_weight.h b/src/r_compiler/ssa/ssa_barycentric_weight.h index 52117ccc69..1a07d6c75a 100644 --- a/src/r_compiler/ssa/ssa_barycentric_weight.h +++ b/src/r_compiler/ssa/ssa_barycentric_weight.h @@ -1,3 +1,24 @@ +/* +** SSA barycentric weight and viewport calculations +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_bool.cpp b/src/r_compiler/ssa/ssa_bool.cpp index bfd9ba5abf..c3c4c72b46 100644 --- a/src/r_compiler/ssa/ssa_bool.cpp +++ b/src/r_compiler/ssa/ssa_bool.cpp @@ -1,3 +1,24 @@ +/* +** SSA boolean +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_bool.h" diff --git a/src/r_compiler/ssa/ssa_bool.h b/src/r_compiler/ssa/ssa_bool.h index 2ef79e49b7..df8b66d513 100644 --- a/src/r_compiler/ssa/ssa_bool.h +++ b/src/r_compiler/ssa/ssa_bool.h @@ -1,3 +1,24 @@ +/* +** SSA boolean +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_float.cpp b/src/r_compiler/ssa/ssa_float.cpp index 4ec5c516df..07f18fb569 100644 --- a/src/r_compiler/ssa/ssa_float.cpp +++ b/src/r_compiler/ssa/ssa_float.cpp @@ -1,3 +1,24 @@ +/* +** SSA float32 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_float.h" diff --git a/src/r_compiler/ssa/ssa_float.h b/src/r_compiler/ssa/ssa_float.h index 0edbcfcba8..8f2f22ba61 100644 --- a/src/r_compiler/ssa/ssa_float.h +++ b/src/r_compiler/ssa/ssa_float.h @@ -1,3 +1,24 @@ +/* +** SSA float32 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_float_ptr.cpp b/src/r_compiler/ssa/ssa_float_ptr.cpp index 9937328f68..5f97a75c5a 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.cpp +++ b/src/r_compiler/ssa/ssa_float_ptr.cpp @@ -1,3 +1,24 @@ +/* +** SSA float32 pointer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_float_ptr.h" diff --git a/src/r_compiler/ssa/ssa_float_ptr.h b/src/r_compiler/ssa/ssa_float_ptr.h index a9953eb813..66e462539a 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.h +++ b/src/r_compiler/ssa/ssa_float_ptr.h @@ -1,3 +1,24 @@ +/* +** SSA float32 pointer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_for_block.cpp b/src/r_compiler/ssa/ssa_for_block.cpp index 6039dab1f6..45ffdd9a39 100644 --- a/src/r_compiler/ssa/ssa_for_block.cpp +++ b/src/r_compiler/ssa/ssa_for_block.cpp @@ -1,3 +1,24 @@ +/* +** LLVM for loop branching +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_for_block.h" diff --git a/src/r_compiler/ssa/ssa_for_block.h b/src/r_compiler/ssa/ssa_for_block.h index 9dddef4d61..b65fb1c8bd 100644 --- a/src/r_compiler/ssa/ssa_for_block.h +++ b/src/r_compiler/ssa/ssa_for_block.h @@ -1,3 +1,24 @@ +/* +** LLVM for loop branching +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_function.cpp b/src/r_compiler/ssa/ssa_function.cpp index a326beaf76..751932e157 100644 --- a/src/r_compiler/ssa/ssa_function.cpp +++ b/src/r_compiler/ssa/ssa_function.cpp @@ -1,3 +1,24 @@ +/* +** LLVM function +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_function.h" diff --git a/src/r_compiler/ssa/ssa_function.h b/src/r_compiler/ssa/ssa_function.h index f1969c35b5..faa8b03fe8 100644 --- a/src/r_compiler/ssa/ssa_function.h +++ b/src/r_compiler/ssa/ssa_function.h @@ -1,3 +1,24 @@ +/* +** LLVM function +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_if_block.cpp b/src/r_compiler/ssa/ssa_if_block.cpp index 7187a0759c..c95d7e3500 100644 --- a/src/r_compiler/ssa/ssa_if_block.cpp +++ b/src/r_compiler/ssa/ssa_if_block.cpp @@ -1,3 +1,24 @@ +/* +** LLVM if statement branching +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_if_block.h" diff --git a/src/r_compiler/ssa/ssa_if_block.h b/src/r_compiler/ssa/ssa_if_block.h index 4f0c8a26bb..e46276575c 100644 --- a/src/r_compiler/ssa/ssa_if_block.h +++ b/src/r_compiler/ssa/ssa_if_block.h @@ -1,3 +1,24 @@ +/* +** LLVM if statement branching +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 6f2afce7f1..78ea015956 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -1,3 +1,24 @@ +/* +** SSA int32 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_int.h" diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index ef71e064a2..a8700f80bf 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -1,3 +1,24 @@ +/* +** SSA int32 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_int_ptr.cpp b/src/r_compiler/ssa/ssa_int_ptr.cpp index daef1e7ab4..f8acd1a191 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.cpp +++ b/src/r_compiler/ssa/ssa_int_ptr.cpp @@ -1,3 +1,24 @@ +/* +** SSA int32 pointer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_int_ptr.h" diff --git a/src/r_compiler/ssa/ssa_int_ptr.h b/src/r_compiler/ssa/ssa_int_ptr.h index 9685283651..04075c15eb 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.h +++ b/src/r_compiler/ssa/ssa_int_ptr.h @@ -1,3 +1,24 @@ +/* +** SSA int32 pointer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_phi.h b/src/r_compiler/ssa/ssa_phi.h index 89cbc8cf05..66f233b6df 100644 --- a/src/r_compiler/ssa/ssa_phi.h +++ b/src/r_compiler/ssa/ssa_phi.h @@ -1,3 +1,24 @@ +/* +** SSA phi node +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_scope.cpp b/src/r_compiler/ssa/ssa_scope.cpp index 7c6f090fe7..64b7fad051 100644 --- a/src/r_compiler/ssa/ssa_scope.cpp +++ b/src/r_compiler/ssa/ssa_scope.cpp @@ -1,3 +1,24 @@ +/* +** SSA scope data +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_scope.h" diff --git a/src/r_compiler/ssa/ssa_scope.h b/src/r_compiler/ssa/ssa_scope.h index 1c9dfb3629..0b0228558f 100644 --- a/src/r_compiler/ssa/ssa_scope.h +++ b/src/r_compiler/ssa/ssa_scope.h @@ -1,3 +1,24 @@ +/* +** SSA scope data +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_short.cpp b/src/r_compiler/ssa/ssa_short.cpp index 017f3002a2..8fe3f24e64 100644 --- a/src/r_compiler/ssa/ssa_short.cpp +++ b/src/r_compiler/ssa/ssa_short.cpp @@ -1,3 +1,24 @@ +/* +** SSA int16 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_short.h" diff --git a/src/r_compiler/ssa/ssa_short.h b/src/r_compiler/ssa/ssa_short.h index 4a53434026..efb782b85e 100644 --- a/src/r_compiler/ssa/ssa_short.h +++ b/src/r_compiler/ssa/ssa_short.h @@ -1,3 +1,24 @@ +/* +** SSA int16 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_stack.h b/src/r_compiler/ssa/ssa_stack.h index d072f181e9..24c807a8a5 100644 --- a/src/r_compiler/ssa/ssa_stack.h +++ b/src/r_compiler/ssa/ssa_stack.h @@ -1,3 +1,24 @@ +/* +** LLVM stack variable +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_struct_type.cpp b/src/r_compiler/ssa/ssa_struct_type.cpp index d4ae2acb1c..e97fd6d56c 100644 --- a/src/r_compiler/ssa/ssa_struct_type.cpp +++ b/src/r_compiler/ssa/ssa_struct_type.cpp @@ -1,3 +1,24 @@ +/* +** LLVM struct +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_struct_type.h" diff --git a/src/r_compiler/ssa/ssa_struct_type.h b/src/r_compiler/ssa/ssa_struct_type.h index 67b056b325..f21dc92c30 100644 --- a/src/r_compiler/ssa/ssa_struct_type.h +++ b/src/r_compiler/ssa/ssa_struct_type.h @@ -1,3 +1,24 @@ +/* +** LLVM struct +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_ubyte.cpp b/src/r_compiler/ssa/ssa_ubyte.cpp index 6fe9c3bb16..628973ff2b 100644 --- a/src/r_compiler/ssa/ssa_ubyte.cpp +++ b/src/r_compiler/ssa/ssa_ubyte.cpp @@ -1,3 +1,24 @@ +/* +** SSA uint8 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_ubyte.h" diff --git a/src/r_compiler/ssa/ssa_ubyte.h b/src/r_compiler/ssa/ssa_ubyte.h index 41ed3939be..ef1390162d 100644 --- a/src/r_compiler/ssa/ssa_ubyte.h +++ b/src/r_compiler/ssa/ssa_ubyte.h @@ -1,3 +1,24 @@ +/* +** SSA uint8 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp index 18bafc6891..5c0de68e31 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -1,3 +1,24 @@ +/* +** SSA uint8 pointer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_ubyte_ptr.h" diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.h b/src/r_compiler/ssa/ssa_ubyte_ptr.h index 167a5877d6..b4567597e2 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.h +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.h @@ -1,3 +1,24 @@ +/* +** SSA uint8 pointer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_value.cpp b/src/r_compiler/ssa/ssa_value.cpp index 65f9da15d6..33e293e6ad 100644 --- a/src/r_compiler/ssa/ssa_value.cpp +++ b/src/r_compiler/ssa/ssa_value.cpp @@ -1,3 +1,24 @@ +/* +** SSA value +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_value.h" diff --git a/src/r_compiler/ssa/ssa_value.h b/src/r_compiler/ssa/ssa_value.h index d0d73043c9..1df94b267e 100644 --- a/src/r_compiler/ssa/ssa_value.h +++ b/src/r_compiler/ssa/ssa_value.h @@ -1,3 +1,24 @@ +/* +** SSA value +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_vec16ub.cpp b/src/r_compiler/ssa/ssa_vec16ub.cpp index 4a077382eb..f3ad73294b 100644 --- a/src/r_compiler/ssa/ssa_vec16ub.cpp +++ b/src/r_compiler/ssa/ssa_vec16ub.cpp @@ -1,3 +1,24 @@ +/* +** SSA vec16 uint8 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_vec16ub.h" diff --git a/src/r_compiler/ssa/ssa_vec16ub.h b/src/r_compiler/ssa/ssa_vec16ub.h index 8f48c0c490..42f4ef3ee8 100644 --- a/src/r_compiler/ssa/ssa_vec16ub.h +++ b/src/r_compiler/ssa/ssa_vec16ub.h @@ -1,3 +1,24 @@ +/* +** SSA vec16 uint8 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_vec4f.cpp b/src/r_compiler/ssa/ssa_vec4f.cpp index dc6f9a716b..5e3c37c2da 100644 --- a/src/r_compiler/ssa/ssa_vec4f.cpp +++ b/src/r_compiler/ssa/ssa_vec4f.cpp @@ -1,3 +1,24 @@ +/* +** SSA vec4 float +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_vec4f.h" diff --git a/src/r_compiler/ssa/ssa_vec4f.h b/src/r_compiler/ssa/ssa_vec4f.h index 6d4ae63352..d8a2c5d012 100644 --- a/src/r_compiler/ssa/ssa_vec4f.h +++ b/src/r_compiler/ssa/ssa_vec4f.h @@ -1,3 +1,24 @@ +/* +** SSA vec4 float +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp index 31c23f2bd2..37f97faffb 100644 --- a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp @@ -1,3 +1,24 @@ +/* +** SSA vec4 float pointer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_vec4f_ptr.h" diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.h b/src/r_compiler/ssa/ssa_vec4f_ptr.h index 15192352a9..46f31ef6da 100644 --- a/src/r_compiler/ssa/ssa_vec4f_ptr.h +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.h @@ -1,3 +1,24 @@ +/* +** SSA vec4 float pointer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp index ac36172f0f..17abaf37d1 100644 --- a/src/r_compiler/ssa/ssa_vec4i.cpp +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -1,3 +1,24 @@ +/* +** SSA vec4 int32 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_vec4i.h" diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h index 420ab021e9..2035b54572 100644 --- a/src/r_compiler/ssa/ssa_vec4i.h +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -1,3 +1,24 @@ +/* +** SSA vec4 int32 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp index f75ccd43fa..793d88fb55 100644 --- a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp @@ -1,3 +1,24 @@ +/* +** SSA vec4 int32 pointer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_vec4i_ptr.h" diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.h b/src/r_compiler/ssa/ssa_vec4i_ptr.h index 257b4e34f2..e4d8134bb5 100644 --- a/src/r_compiler/ssa/ssa_vec4i_ptr.h +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.h @@ -1,3 +1,24 @@ +/* +** SSA vec4 int32 pointer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_compiler/ssa/ssa_vec8s.cpp b/src/r_compiler/ssa/ssa_vec8s.cpp index 6016b551fe..51bb5adca9 100644 --- a/src/r_compiler/ssa/ssa_vec8s.cpp +++ b/src/r_compiler/ssa/ssa_vec8s.cpp @@ -1,3 +1,24 @@ +/* +** SSA vec8 int16 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include "r_compiler/llvm_include.h" #include "ssa_vec8s.h" diff --git a/src/r_compiler/ssa/ssa_vec8s.h b/src/r_compiler/ssa/ssa_vec8s.h index 40263773b4..be320804ec 100644 --- a/src/r_compiler/ssa/ssa_vec8s.h +++ b/src/r_compiler/ssa/ssa_vec8s.h @@ -1,3 +1,24 @@ +/* +** SSA vec8 int16 +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_swrenderer2.cpp b/src/r_swrenderer2.cpp index f7fb6afb90..8fcf90ac98 100644 --- a/src/r_swrenderer2.cpp +++ b/src/r_swrenderer2.cpp @@ -1,3 +1,24 @@ +/* +** Experimental Doom software renderer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include #include "templates.h" diff --git a/src/r_swrenderer2.h b/src/r_swrenderer2.h index 1004b66bd3..eaa734c875 100644 --- a/src/r_swrenderer2.h +++ b/src/r_swrenderer2.h @@ -1,3 +1,24 @@ +/* +** Experimental Doom software renderer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once diff --git a/src/r_thread.cpp b/src/r_thread.cpp index 4d0a2100cb..bbb3faf3f9 100644 --- a/src/r_thread.cpp +++ b/src/r_thread.cpp @@ -1,3 +1,24 @@ +/* +** Renderer multithreading framework +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #include #include "templates.h" diff --git a/src/r_thread.h b/src/r_thread.h index e0aca2a016..3271d80506 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -1,3 +1,24 @@ +/* +** Renderer multithreading framework +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ #pragma once From 5814270980c027a5a21fe311e2fb61a9a67b86c6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 1 Nov 2016 22:58:53 +0100 Subject: [PATCH 261/912] Fix wrapping and scaling issue for the U texture coordinate for sprites --- src/r_things.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/r_things.cpp b/src/r_things.cpp index cad434fa5c..6ef4e8027a 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -954,15 +954,14 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor return; tx += tex->GetWidth() * thingxscalemul; - double dtx2 = tx * xscale; - x2 = centerx + xs_RoundToInt(dtx2); + x2 = centerx + xs_RoundToInt(tx * xscale); // off the left side or too small? if ((x2 < WindowLeft || x2 <= x1)) return; xscale = spriteScale.X * xscale / tex->Scale.X; - iscale = (fixed_t)(tex->GetWidth() / (dtx2 - dtx1) * FRACUNIT); + iscale = (fixed_t)(FRACUNIT / xscale); // Round towards zero to avoid wrapping in edge cases double yscale = spriteScale.Y / tex->Scale.Y; @@ -990,7 +989,7 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor vis->xiscale = iscale; } - vis->startfrac += (fixed_t)(vis->xiscale * (vis->x1 - centerx - dtx1 + 0.5 * thingxscalemul)); + vis->startfrac += (fixed_t)(vis->xiscale * (vis->x1 - centerx + 0.5 - dtx1)); } else { From 356830a8c836647154784e62823a8ce5a1d4f35b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 3 Nov 2016 12:02:07 +0100 Subject: [PATCH 262/912] Add CPU misdetection workaround for the Pentium G840 and a llvm_cpu CVAR that can force codegen to a specific CPU architecture --- src/r_compiler/llvmdrawers.cpp | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 4ac2bf00f2..ce5024bba4 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -35,6 +35,14 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" #include "r_compiler/ssa/ssa_barycentric_weight.h" +#include "x86.h" +#include "c_cvars.h" +#include "version.h" + +CUSTOM_CVAR(String, llvm_cpu, "auto", CVAR_ARCHIVE | CVAR_NOINITCALL) +{ + Printf("You must restart " GAMENAME " for this change to take effect.\n"); +} class LLVMProgram { @@ -485,12 +493,22 @@ void LLVMProgram::CreateEE() std::string errorstring; + std::string mcpu = sys::getHostCPUName(); + if (std::string(CPU.CPUString).find("G840") && mcpu == "sandybridge") + mcpu = "westmere"; // Pentium G840 is misdetected as a sandy bridge CPU + + if (stricmp(llvm_cpu, "auto") != 0) + { + mcpu = llvm_cpu; + Printf("Overriding LLVM CPU target to %s\n", mcpu.c_str()); + } + llvm::Module *module = mModule.get(); EngineBuilder engineBuilder(std::move(mModule)); engineBuilder.setErrorStr(&errorstring); engineBuilder.setOptLevel(CodeGenOpt::Aggressive); engineBuilder.setEngineKind(EngineKind::JIT); - engineBuilder.setMCPU(sys::getHostCPUName()); + engineBuilder.setMCPU(mcpu); machine = engineBuilder.selectTarget(); if (!machine) I_FatalError("Could not create LLVM target machine"); From 90c8b3370a0e0915e512705c5752e734c2b05eb5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 3 Nov 2016 12:51:07 +0100 Subject: [PATCH 263/912] Fix mipmap sampling bug --- src/r_draw.cpp | 2 +- src/r_segs.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 45758ee610..0a2c8553ee 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1073,7 +1073,7 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; void R_SetSpanSource(FTexture *tex) { ds_source = r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels(); - ds_source_mipmapped = tex->Mipmapped(); + ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; #ifdef X86_ASM if (!r_swtruecolor && ds_cursource != ds_source) { diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 108463e389..a8267a663c 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1159,12 +1159,12 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof int mipmap_offset = 0; int mip_width = texture->GetWidth(); int mip_height = texture->GetHeight(); - if (r_mipmap && texture->Mipmapped()) + if (r_mipmap && texture->Mipmapped() && mip_width > 1 && mip_height > 1) { uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); double texture_bias = 1.7f; double level = MAX(magnitude - 3.0, 0.0); - while (level > texture_bias) + while (level > texture_bias && mip_width > 1 && mip_height > 1) { mipmap_offset += mip_width * mip_height; level *= 0.5f; From 12a1755edf1587fd8ee095aaa5d8978af01f37cc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 4 Nov 2016 01:39:53 +0100 Subject: [PATCH 264/912] Move texture info to R_DrawMaskedColumn and add bounds checking --- src/r_segs.cpp | 8 +------ src/r_things.cpp | 60 ++++++++++++++++++++++++++---------------------- src/r_things.h | 2 +- src/v_draw.cpp | 42 +++------------------------------ 4 files changed, 37 insertions(+), 75 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index a8267a663c..e1925f66a1 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -197,13 +197,7 @@ static void BlastMaskedColumn (FTexture *tex, bool useRt) // when forming multipatched textures (see r_data.c). // draw the texture - const FTexture::Span *spans; - const BYTE *pixels; - if (r_swtruecolor && !drawer_needs_pal_input) - pixels = (const BYTE *)tex->GetColumnBgra(maskedtexturecol[dc_x] >> FRACBITS, &spans); - else - pixels = tex->GetColumn(maskedtexturecol[dc_x] >> FRACBITS, &spans); - R_DrawMaskedColumn(pixels, spans, useRt); + R_DrawMaskedColumn(tex, maskedtexturecol[dc_x], useRt); rw_light += rw_lightstep; spryscale += rw_scalestep; } diff --git a/src/r_things.cpp b/src/r_things.cpp index bde2bfc662..a04676492d 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -251,9 +251,27 @@ double sprtopscreen; bool sprflipvert; -void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span, bool useRt) +void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked) { + const FTexture::Span *span; + const BYTE *column; + if (r_swtruecolor && !drawer_needs_pal_input) + column = (const BYTE *)tex->GetColumnBgra(col >> FRACBITS, &span); + else + column = tex->GetColumn(col >> FRACBITS, &span); + + FTexture::Span unmaskedSpan[2]; + if (unmasked) + { + span = unmaskedSpan; + unmaskedSpan[0].TopOffset = 0; + unmaskedSpan[0].Length = tex->GetHeight(); + unmaskedSpan[1].TopOffset = 0; + unmaskedSpan[1].Length = 0; + } + int pixelsize = r_swtruecolor ? 4 : 1; + while (span->Length != 0) { const int length = span->Length; @@ -283,6 +301,15 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span, bool us dc_source = column; dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; + + fixed_t maxfrac = ((top + length) << FRACBITS) - 1; + dc_texturefrac = MAX(dc_texturefrac, 0); + dc_texturefrac = MIN(dc_texturefrac, maxfrac); + if (dc_iscale > 0) + dc_count = MIN(dc_count, (maxfrac - dc_texturefrac + dc_iscale - 1) / dc_iscale); + else if (dc_iscale < 0) + dc_count = MIN(dc_count, (dc_texturefrac - dc_iscale) / (-dc_iscale)); + if (useRt) hcolfunc_pre(); else @@ -364,8 +391,6 @@ static inline bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) // void R_DrawVisSprite (vissprite_t *vis) { - const BYTE *pixels; - const FTexture::Span *spans; fixed_t frac; FTexture *tex; int x2, stop4; @@ -432,13 +457,8 @@ void R_DrawVisSprite (vissprite_t *vis) { while ((dc_x < stop4) && (dc_x & 3)) { - if (r_swtruecolor && !drawer_needs_pal_input) - pixels = (const BYTE *)tex->GetColumnBgra (frac >> FRACBITS, &spans); - else - pixels = tex->GetColumn (frac >> FRACBITS, &spans); - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans, false); + R_DrawMaskedColumn (tex, frac, false); dc_x++; frac += xiscale; } @@ -448,13 +468,8 @@ void R_DrawVisSprite (vissprite_t *vis) rt_initcols(nullptr); for (int zz = 4; zz; --zz) { - if (r_swtruecolor && !drawer_needs_pal_input) - pixels = (const BYTE *)tex->GetColumnBgra (frac >> FRACBITS, &spans); - else - pixels = tex->GetColumn (frac >> FRACBITS, &spans); - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans, true); + R_DrawMaskedColumn (tex, frac, true); dc_x++; frac += xiscale; } @@ -463,13 +478,8 @@ void R_DrawVisSprite (vissprite_t *vis) while (dc_x < x2) { - if (r_swtruecolor && !drawer_needs_pal_input) - pixels = (const BYTE *)tex->GetColumnBgra (frac >> FRACBITS, &spans); - else - pixels = tex->GetColumn (frac >> FRACBITS, &spans); - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans, false); + R_DrawMaskedColumn (tex, frac, false); dc_x++; frac += xiscale; } @@ -623,14 +633,8 @@ void R_WallSpriteColumn (bool useRt) else sprtopscreen = CenterY - dc_texturemid * spryscale; - const BYTE *column; - const FTexture::Span *spans; - if (r_swtruecolor && !drawer_needs_pal_input) - column = (const BYTE *)WallSpriteTile->GetColumnBgra (lwall[dc_x] >> FRACBITS, &spans); - else - column = WallSpriteTile->GetColumn (lwall[dc_x] >> FRACBITS, &spans); dc_texturefrac = 0; - R_DrawMaskedColumn(column, spans, useRt); + R_DrawMaskedColumn(WallSpriteTile, lwall[dc_x], useRt); rw_light += rw_lightstep; } diff --git a/src/r_things.h b/src/r_things.h index e8ffbf5ca4..e354898924 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -126,7 +126,7 @@ extern double pspriteyscale; extern FTexture *WallSpriteTile; -void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *spans, bool useRt); +void R_DrawMaskedColumn (FTexture *texture, fixed_t column, bool useRt, bool unmasked = false); void R_WallSpriteColumn (bool useRt); void R_CacheSprite (spritedef_t *sprite); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index ddcbb381c5..2397fc48cf 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -133,8 +133,6 @@ void DCanvas::DrawTexture (FTexture *img, double x, double y, int tags_first, .. void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) { #ifndef NO_SWRENDER - FTexture::Span unmaskedSpan[2]; - const FTexture::Span **spanptr, *spans; static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; const BYTE *translation = NULL; @@ -144,15 +142,6 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) R_InitColumnDrawers(); } - if (parms.masked) - { - spanptr = &spans; - } - else - { - spanptr = NULL; - } - if (APART(parms.colorOverlay) != 0) { // The software renderer cannot invert the source without inverting the overlay @@ -217,18 +206,8 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (mode != DontDraw) { - const BYTE *pixels; int stop4; - if (spanptr == NULL) - { // Create a single span for forced unmasked images - spans = unmaskedSpan; - unmaskedSpan[0].TopOffset = 0; - unmaskedSpan[0].Length = img->GetHeight(); - unmaskedSpan[1].TopOffset = 0; - unmaskedSpan[1].Length = 0; - } - double centeryback = CenterY; CenterY = 0; @@ -320,12 +299,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) { while ((dc_x < stop4) && (dc_x & 3)) { - if (r_swtruecolor && !drawer_needs_pal_input) - pixels = (const BYTE *)img->GetColumnBgra(frac >> FRACBITS, spanptr); - else - pixels = img->GetColumn(frac >> FRACBITS, spanptr); - - R_DrawMaskedColumn(pixels, spans, false); + R_DrawMaskedColumn(img, frac, false, !parms.masked); dc_x++; frac += xiscale_i; } @@ -335,12 +309,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) rt_initcols(nullptr); for (int zz = 4; zz; --zz) { - if (r_swtruecolor && !drawer_needs_pal_input) - pixels = (const BYTE *)img->GetColumnBgra(frac >> FRACBITS, spanptr); - else - pixels = img->GetColumn(frac >> FRACBITS, spanptr); - - R_DrawMaskedColumn(pixels, spans, true); + R_DrawMaskedColumn(img, frac, true, !parms.masked); dc_x++; frac += xiscale_i; } @@ -349,12 +318,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) while (dc_x < x2_i) { - if (r_swtruecolor && !drawer_needs_pal_input) - pixels = (const BYTE *)img->GetColumnBgra(frac >> FRACBITS, spanptr); - else - pixels = img->GetColumn(frac >> FRACBITS, spanptr); - - R_DrawMaskedColumn(pixels, spans, false); + R_DrawMaskedColumn(img, frac, false, !parms.masked); dc_x++; frac += xiscale_i; } From 4b18530047085cf480b873198dda3010037c4854 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 5 Nov 2016 11:29:50 +0100 Subject: [PATCH 265/912] Add linear filtering to column drawers and fix offsetting bug with wall/span linear filters --- .../fixedfunction/drawcolumncodegen.cpp | 142 +++++++++++++----- .../fixedfunction/drawcolumncodegen.h | 13 +- .../fixedfunction/drawercodegen.cpp | 42 ------ src/r_compiler/fixedfunction/drawercodegen.h | 4 - .../fixedfunction/drawspancodegen.cpp | 28 +++- .../fixedfunction/drawspancodegen.h | 1 + .../fixedfunction/drawwallcodegen.cpp | 25 ++- .../fixedfunction/drawwallcodegen.h | 1 + src/r_compiler/llvmdrawers.cpp | 3 + src/r_compiler/llvmdrawers.h | 6 +- src/r_draw.cpp | 6 +- src/r_draw_rgba.cpp | 5 + src/r_drawt_rgba.cpp | 3 + src/r_segs.cpp | 1 + src/r_things.cpp | 125 +++++++++++++++ 15 files changed, 310 insertions(+), 95 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index 0ed58b9434..45a75cdcb8 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -36,30 +36,35 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met { dest = args[0][0].load(true); source = args[0][1].load(true); - colormap = args[0][2].load(true); - translation = args[0][3].load(true); - basecolors = args[0][4].load(true); - pitch = args[0][5].load(true); - count = args[0][6].load(true); - dest_y = args[0][7].load(true); + source2 = args[0][2].load(true); + colormap = args[0][3].load(true); + translation = args[0][4].load(true); + basecolors = args[0][5].load(true); + pitch = args[0][6].load(true); + count = args[0][7].load(true); + dest_y = args[0][8].load(true); if (method == DrawColumnMethod::Normal) - iscale = args[0][8].load(true); - texturefrac = args[0][9].load(true); - light = args[0][10].load(true); - color = SSAVec4i::unpack(args[0][11].load(true)); - srccolor = SSAVec4i::unpack(args[0][12].load(true)); - srcalpha = args[0][13].load(true); - destalpha = args[0][14].load(true); - SSAShort light_alpha = args[0][15].load(true); - SSAShort light_red = args[0][16].load(true); - SSAShort light_green = args[0][17].load(true); - SSAShort light_blue = args[0][18].load(true); - SSAShort fade_alpha = args[0][19].load(true); - SSAShort fade_red = args[0][20].load(true); - SSAShort fade_green = args[0][21].load(true); - SSAShort fade_blue = args[0][22].load(true); - SSAShort desaturate = args[0][23].load(true); - SSAInt flags = args[0][24].load(true); + { + iscale = args[0][9].load(true); + texturefracx = args[0][10].load(true); + textureheight = args[0][11].load(true); + } + texturefrac = args[0][12].load(true); + light = args[0][13].load(true); + color = SSAVec4i::unpack(args[0][14].load(true)); + srccolor = SSAVec4i::unpack(args[0][15].load(true)); + srcalpha = args[0][16].load(true); + destalpha = args[0][17].load(true); + SSAShort light_alpha = args[0][18].load(true); + SSAShort light_red = args[0][19].load(true); + SSAShort light_green = args[0][20].load(true); + SSAShort light_blue = args[0][21].load(true); + SSAShort fade_alpha = args[0][22].load(true); + SSAShort fade_red = args[0][23].load(true); + SSAShort fade_green = args[0][24].load(true); + SSAShort fade_blue = args[0][25].load(true); + SSAShort desaturate = args[0][26].load(true); + SSAInt flags = args[0][27].load(true); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); @@ -71,6 +76,7 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met thread.temp = thread_data[0][4].load(true); is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade); + is_nearest_filter = (flags & DrawColumnArgs::nearest_filter) == SSAInt(DrawColumnArgs::nearest_filter); count = count_for_thread(dest_y, count, thread); dest = dest_for_thread(dest_y, pitch, dest, thread); @@ -79,21 +85,40 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met { stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); iscale = iscale * thread.num_cores; + one = (1 << 30) / textureheight; + + SSAIfBlock branch; + branch.if_block(is_simple_shade); + LoopShade(variant, method, true); + branch.else_block(); + LoopShade(variant, method, false); + branch.end_block(); } else { source = thread.temp[((dest_y + skipped_by_thread(dest_y, thread)) * 4 + texturefrac) * 4]; - } + SSAIfBlock branch; + branch.if_block(is_simple_shade); + Loop(variant, method, true, true); + branch.else_block(); + Loop(variant, method, false, true); + branch.end_block(); + } +} + +void DrawColumnCodegen::LoopShade(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade) +{ SSAIfBlock branch; - branch.if_block(is_simple_shade); - Loop(variant, method, true); + branch.if_block(is_nearest_filter); + Loop(variant, method, isSimpleShade, true); branch.else_block(); - Loop(variant, method, false); + stack_frac.store(stack_frac.load() - (one >> 1)); + Loop(variant, method, isSimpleShade, false); branch.end_block(); } -void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade) +void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter) { SSAInt sincr; if (method != DrawColumnMethod::Normal) @@ -109,9 +134,10 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, if (method == DrawColumnMethod::Normal) { frac = stack_frac.load(); - sample_index = frac >> FRACBITS; - if (!IsPaletteInput(variant)) - sample_index = sample_index * 4; + if (IsPaletteInput(variant)) + sample_index = frac >> FRACBITS; + else + sample_index = frac; } else { @@ -140,7 +166,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, SSAVec4i outcolor[4]; for (int i = 0; i < numColumns; i++) - outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, isSimpleShade); + outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, method, isSimpleShade, isNearestFilter); if (numColumns == 4) { @@ -186,7 +212,7 @@ bool DrawColumnCodegen::IsPaletteInput(DrawColumnVariant variant) } } -SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade) +SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter) { SSAInt alpha, inv_alpha; SSAVec4i fg; @@ -194,22 +220,22 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, { default: case DrawColumnVariant::DrawCopy: - return blend_copy(Sample(sample_index)); + return blend_copy(Sample(sample_index, method, isNearestFilter)); case DrawColumnVariant::Draw: - return blend_copy(Shade(Sample(sample_index), isSimpleShade)); + return blend_copy(Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade)); case DrawColumnVariant::DrawAdd: case DrawColumnVariant::DrawAddClamp: - fg = Shade(Sample(sample_index), isSimpleShade); + fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade); return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawShaded: alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; inv_alpha = 256 - alpha; return blend_add(color, bgcolor, alpha, inv_alpha); case DrawColumnVariant::DrawSubClamp: - fg = Shade(Sample(sample_index), isSimpleShade); + fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade); return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawRevSubClamp: - fg = Shade(Sample(sample_index), isSimpleShade); + fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade); return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawTranslated: return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade)); @@ -285,9 +311,45 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo } } -SSAVec4i DrawColumnCodegen::Sample(SSAInt sample_index) +SSAVec4i DrawColumnCodegen::Sample(SSAInt frac, DrawColumnMethod method, bool isNearestFilter) { - return source[sample_index].load_vec4ub(true); + if (method == DrawColumnMethod::Normal) + { + if (isNearestFilter) + { + SSAInt sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; + return source[sample_index * 4].load_vec4ub(false); + } + else + { + return SampleLinear(source, source2, texturefracx, frac, one, textureheight); + } + } + else + { + return source[frac].load_vec4ub(true); + } +} + +SSAVec4i DrawColumnCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) +{ + // Clamp to edge + SSAInt frac_y0 = (SSAInt::MAX(SSAInt::MIN(texturefracy, SSAInt((1 << 30) - 1)), SSAInt(0)) >> (FRACBITS - 2)) * height; + SSAInt frac_y1 = (SSAInt::MAX(SSAInt::MIN(texturefracy + one, SSAInt((1 << 30) - 1)), SSAInt(0)) >> (FRACBITS - 2)) * height; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; + + SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true); + SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true); + SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true); + SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true); + + SSAInt inv_b = texturefracx; + SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; } SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index) diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h index ba453a076c..2c44edc5ce 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.h +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -58,10 +58,12 @@ public: void Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data); private: - void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade); - SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); + void LoopShade(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade); + void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter); + SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter); SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); - SSAVec4i Sample(SSAInt frac); + SSAVec4i Sample(SSAInt frac, DrawColumnMethod method, bool isNearestFilter); + SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAInt ColormapSample(SSAInt frac); SSAVec4i TranslateSample(SSAInt frac); SSAInt TranslateSamplePal(SSAInt frac); @@ -73,6 +75,7 @@ private: SSAUBytePtr dest; SSAUBytePtr source; + SSAUBytePtr source2; SSAUBytePtr colormap; SSAUBytePtr translation; SSAUBytePtr basecolors; @@ -80,6 +83,9 @@ private: SSAInt count; SSAInt dest_y; SSAInt iscale; + SSAInt texturefracx; + SSAInt textureheight; + SSAInt one; SSAInt texturefrac; SSAInt light; SSAVec4i color; @@ -87,6 +93,7 @@ private: SSAInt srcalpha; SSAInt destalpha; SSABool is_simple_shade; + SSABool is_nearest_filter; SSAShadeConstants shade_constants; SSAWorkerThread thread; }; diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp index 812ccafd75..761023ceff 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -139,45 +139,3 @@ SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha) SSAInt inv_alpha = 256 - alpha; return (destalpha * alpha + 256 * inv_alpha + 128) >> 8; } - -SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) -{ - SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; - SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; - SSAInt y0 = frac_y0 >> FRACBITS; - SSAInt y1 = frac_y1 >> FRACBITS; - - SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true); - SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true); - SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true); - SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true); - - SSAInt inv_b = texturefracx; - SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt inv_a = 16 - a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} - -SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) -{ - SSAInt xshift = (32 - xbits); - SSAInt yshift = (32 - ybits); - SSAInt xmask = (SSAInt(1) << xshift) - 1; - SSAInt ymask = (SSAInt(1) << yshift) - 1; - SSAInt x = xfrac >> xbits; - SSAInt y = yfrac >> ybits; - - SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); - SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); - SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); - SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); - - SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; - SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} diff --git a/src/r_compiler/fixedfunction/drawercodegen.h b/src/r_compiler/fixedfunction/drawercodegen.h index 4dda370fe9..5de52dca1c 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.h +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -88,8 +88,4 @@ public: // Calculates the final alpha values to be used when combined with the source texture alpha channel SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha); - - // SampleBgra - SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); - SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); }; diff --git a/src/r_compiler/fixedfunction/drawspancodegen.cpp b/src/r_compiler/fixedfunction/drawspancodegen.cpp index fac8411cc8..0c6bd683f1 100644 --- a/src/r_compiler/fixedfunction/drawspancodegen.cpp +++ b/src/r_compiler/fixedfunction/drawspancodegen.cpp @@ -89,6 +89,8 @@ void DrawSpanCodegen::LoopShade(DrawSpanVariant variant, bool isSimpleShade) branch.if_block(is_nearest_filter); LoopFilter(variant, isSimpleShade, true); branch.else_block(); + stack_xfrac.store(stack_xfrac.load() - (SSAInt(1) << (31 - xbits))); + stack_yfrac.store(stack_yfrac.load() - (SSAInt(1) << (31 - ybits))); LoopFilter(variant, isSimpleShade, false); branch.end_block(); } @@ -187,15 +189,37 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte { if (is64x64) { - return sample_linear(source, xfrac, yfrac, SSAInt(26), SSAInt(26)); + return SampleLinear(source, xfrac, yfrac, SSAInt(26), SSAInt(26)); } else { - return sample_linear(source, xfrac, yfrac, 32 - xbits, 32 - ybits); + return SampleLinear(source, xfrac, yfrac, 32 - xbits, 32 - ybits); } } } +SSAVec4i DrawSpanCodegen::SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) +{ + SSAInt xshift = (32 - xbits); + SSAInt yshift = (32 - ybits); + SSAInt xmask = (SSAInt(1) << xshift) - 1; + SSAInt ymask = (SSAInt(1) << yshift) - 1; + SSAInt x = xfrac >> xbits; + SSAInt y = yfrac >> ybits; + + SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); + SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); + SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); + SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); + + SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; + SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} + SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) { if (isSimpleShade) diff --git a/src/r_compiler/fixedfunction/drawspancodegen.h b/src/r_compiler/fixedfunction/drawspancodegen.h index 9ef3edd38f..48c86040bd 100644 --- a/src/r_compiler/fixedfunction/drawspancodegen.h +++ b/src/r_compiler/fixedfunction/drawspancodegen.h @@ -45,6 +45,7 @@ private: SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); + SSAVec4i SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant); diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 7c042f9be0..ec30a8298e 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -117,6 +117,9 @@ void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool fourColumns, bool branch.if_block(is_nearest_filter); Loop(variant, fourColumns, isSimpleShade, true); branch.else_block(); + int numColumns = fourColumns ? 4 : 1; + for (int i = 0; i < numColumns; i++) + stack_frac[i].store(stack_frac[i].load() - (one[i] / 2)); Loop(variant, fourColumns, isSimpleShade, false); branch.end_block(); } @@ -180,10 +183,30 @@ SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter) } else { - return sample_linear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]); + return SampleLinear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]); } } +SSAVec4i DrawWallCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) +{ + SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; + SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; + + SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true); + SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true); + SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true); + SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true); + + SSAInt inv_b = texturefracx; + SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt inv_a = 16 - a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} + SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) { if (isSimpleShade) diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.h b/src/r_compiler/fixedfunction/drawwallcodegen.h index 6591fb9b6a..4a1599063c 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.h +++ b/src/r_compiler/fixedfunction/drawwallcodegen.h @@ -43,6 +43,7 @@ private: void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade); void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter); SSAVec4i Sample(SSAInt frac, int index, bool isNearestFilter); + SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index ce5024bba4..c390197999 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -360,6 +360,7 @@ llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source2; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors; @@ -367,6 +368,8 @@ llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefracx; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureheight; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 5cbd05e4aa..b2978cabfa 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -117,6 +117,7 @@ struct DrawColumnArgs { uint32_t *dest; const uint8_t *source; + const uint8_t *source2; uint8_t *colormap; uint8_t *translation; const uint32_t *basecolors; @@ -124,6 +125,8 @@ struct DrawColumnArgs int32_t count; int32_t dest_y; uint32_t iscale; + uint32_t texturefracx; + uint32_t textureheight; uint32_t texturefrac; uint32_t light; uint32_t color; @@ -143,7 +146,8 @@ struct DrawColumnArgs uint32_t flags; enum Flags { - simple_shade = 1 + simple_shade = 1, + nearest_filter = 2 }; FString ToString() diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 0a2c8553ee..0ff047238d 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2973,7 +2973,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, { R_SetColorMapLight(basecolormap, 0, 0); } - return r_columnmethod ? DoDraw1 : DoDraw0; + bool active_columnmethod = r_columnmethod && !r_swtruecolor; + return active_columnmethod ? DoDraw1 : DoDraw0; } fglevel = GetAlpha(style.SrcAlpha, alpha); @@ -3006,7 +3007,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, { return DontDraw; } - return r_columnmethod ? DoDraw1 : DoDraw0; + bool active_columnmethod = r_columnmethod && !r_swtruecolor; + return active_columnmethod ? DoDraw1 : DoDraw0; } void R_FinishSetPatchStyle () diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index c65718de22..2f1f6d17ae 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -348,6 +348,7 @@ public: { args.dest = (uint32_t*)dc_dest; args.source = dc_source; + args.source2 = dc_source2; args.colormap = dc_colormap; args.translation = dc_translation; args.basecolors = (const uint32_t *)GPalette.BaseColors; @@ -355,6 +356,8 @@ public: args.count = dc_count; args.dest_y = _dest_y; args.iscale = dc_iscale; + args.texturefracx = dc_texturefracx; + args.textureheight = dc_textureheight; args.texturefrac = dc_texturefrac; args.light = LightBgra::calc_light_multiplier(dc_light); args.color = LightBgra::shade_pal_index_simple(dc_color, args.light); @@ -373,6 +376,8 @@ public: args.flags = 0; if (dc_shade_constants.simple_shade) args.flags |= DrawColumnArgs::simple_shade; + if (args.source2 == nullptr) + args.flags |= DrawWallArgs::nearest_filter; DetectRangeError(args.dest, args.dest_y, args.count); } diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 2963d2fc17..04f216c0c3 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -72,6 +72,7 @@ public: { args.dest = (uint32_t*)dc_destorg + ylookup[yl] + sx; args.source = nullptr; + args.source2 = nullptr; args.colormap = dc_colormap; args.translation = dc_translation; args.basecolors = (const uint32_t *)GPalette.BaseColors; @@ -97,6 +98,8 @@ public: args.flags = 0; if (dc_shade_constants.simple_shade) args.flags |= DrawColumnArgs::simple_shade; + if (args.source2 == nullptr) + args.flags |= DrawWallArgs::nearest_filter; DetectRangeError(args.dest, args.dest_y, args.count); } diff --git a/src/r_segs.cpp b/src/r_segs.cpp index e1925f66a1..ea3debb18e 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1183,6 +1183,7 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof } else { + xoffset -= FRACUNIT / 2; int tx0 = (xoffset >> FRACBITS) % mip_width; if (tx0 < 0) tx0 += mip_width; diff --git a/src/r_things.cpp b/src/r_things.cpp index a04676492d..ef52088483 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -251,8 +251,133 @@ double sprtopscreen; bool sprflipvert; +void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool useRt, bool unmasked) +{ + fixed_t saved_iscale = dc_iscale; // Save this because we need to modify it for mipmaps + + // Normalize to 0-1 range: + double uv_stepd = FIXED2DBL(dc_iscale); + double v_step = uv_stepd / tex->GetHeight(); + + // Convert to uint32: + dc_iscale = (uint32_t)(v_step * (1 << 30)); + + // Texture mipmap and filter selection: + fixed_t xoffset = col; + double magnitude = fabs(uv_stepd * 2); + bool magnifying = magnitude < 1.0f; + + int mipmap_offset = 0; + int mip_width = tex->GetWidth(); + int mip_height = tex->GetHeight(); + if (r_mipmap && tex->Mipmapped() && mip_width > 1 && mip_height > 1) + { + uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); + double texture_bias = 1.7f; + double level = MAX(magnitude - 3.0, 0.0); + while (level > texture_bias && mip_width > 1 && mip_height > 1) + { + mipmap_offset += mip_width * mip_height; + level *= 0.5f; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + xoffset = (xpos >> FRACBITS) * mip_width; + } + + const uint32_t *pixels = tex->GetPixelsBgra() + mipmap_offset; + + bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); + if (filter_nearest) + { + xoffset = MAX(MIN(xoffset, (mip_width << FRACBITS) - 1), 0); + + int tx = xoffset >> FRACBITS; + dc_source = (BYTE*)(pixels + tx * mip_height); + dc_source2 = nullptr; + dc_textureheight = mip_height; + dc_texturefracx = 0; + } + else + { + xoffset = MAX(MIN(xoffset - (FRACUNIT / 2), (mip_width << FRACBITS) - 1), 0); + + int tx0 = xoffset >> FRACBITS; + int tx1 = MIN(tx0 + 1, mip_width - 1); + dc_source = (BYTE*)(pixels + tx0 * mip_height); + dc_source2 = (BYTE*)(pixels + tx1 * mip_height); + dc_textureheight = mip_height; + dc_texturefracx = (xoffset >> (FRACBITS - 4)) & 15; + } + + // Grab the posts we need to draw + const FTexture::Span *span; + tex->GetColumnBgra(col >> FRACBITS, &span); + FTexture::Span unmaskedSpan[2]; + if (unmasked) + { + span = unmaskedSpan; + unmaskedSpan[0].TopOffset = 0; + unmaskedSpan[0].Length = tex->GetHeight(); + unmaskedSpan[1].TopOffset = 0; + unmaskedSpan[1].Length = 0; + } + + // Draw each span post + while (span->Length != 0) + { + const int length = span->Length; + const int top = span->TopOffset; + + // calculate unclipped screen coordinates for post + dc_yl = (int)(sprtopscreen + spryscale * top + 0.5); + dc_yh = (int)(sprtopscreen + spryscale * (top + length) + 0.5) - 1; + + if (sprflipvert) + { + swapvalues(dc_yl, dc_yh); + } + + if (dc_yh >= mfloorclip[dc_x]) + { + dc_yh = mfloorclip[dc_x] - 1; + } + if (dc_yl < mceilingclip[dc_x]) + { + dc_yl = mceilingclip[dc_x]; + } + + if (dc_yl <= dc_yh) + { + dc_dest = (ylookup[dc_yl] + dc_x) * 4 + dc_destorg; + dc_count = dc_yh - dc_yl + 1; + + double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight(); + dc_texturefrac = (uint32_t)(v * (1 << 30)); + + if (useRt) + hcolfunc_pre(); + else + colfunc(); + } + span++; + } + + dc_iscale = saved_iscale; + + if (sprflipvert && useRt) + rt_flip_posts(); +} + void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked) { + // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. + if (r_swtruecolor && !drawer_needs_pal_input && !useRt) // To do: add support to R_DrawColumnHoriz_rgba + { + R_DrawMaskedColumnBgra(tex, col, useRt, unmasked); + return; + } + const FTexture::Span *span; const BYTE *column; if (r_swtruecolor && !drawer_needs_pal_input) From d084f775467f31c875ba9b22e8a7257c8d4fedbf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 5 Nov 2016 16:12:59 +0100 Subject: [PATCH 266/912] Fix mipmap generation bug Fix crash due to dc_source2 not always being set Add r_lod_bias to control mipmap selection Improve LOD calculations to take the U texture coordinate into account --- src/r_draw.h | 1 + src/r_draw_rgba.cpp | 5 ++++- src/r_draw_rgba.h | 1 + src/r_drawt_rgba.cpp | 2 +- src/r_segs.cpp | 36 +++++++++++++++++++++++++----------- src/r_things.cpp | 18 ++++++++++++------ src/textures/texture.cpp | 12 ++++++------ 7 files changed, 50 insertions(+), 25 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 2fa662a95e..005897de6d 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -411,5 +411,6 @@ EXTERN_CVAR(Bool, r_multithreaded); EXTERN_CVAR(Bool, r_magfilter); EXTERN_CVAR(Bool, r_minfilter); EXTERN_CVAR(Bool, r_mipmap); +EXTERN_CVAR(Float, r_lod_bias); #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2f1f6d17ae..34c79b3aab 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -60,6 +60,9 @@ CVAR(Bool, r_minfilter, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Use mipmapped textures CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +// Level of detail texture bias +CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG when a good default has been decided + ///////////////////////////////////////////////////////////////////////////// class DrawSpanLLVMCommand : public DrawerCommand @@ -377,7 +380,7 @@ public: if (dc_shade_constants.simple_shade) args.flags |= DrawColumnArgs::simple_shade; if (args.source2 == nullptr) - args.flags |= DrawWallArgs::nearest_filter; + args.flags |= DrawColumnArgs::nearest_filter; DetectRangeError(args.dest, args.dest_y, args.count); } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 253315f149..4aa1a02aff 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -34,6 +34,7 @@ struct FSpecialColormap; EXTERN_CVAR(Bool, r_mipmap) +EXTERN_CVAR(Float, r_lod_bias) ///////////////////////////////////////////////////////////////////////////// // Drawer functions: diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 04f216c0c3..29a315a8c0 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -99,7 +99,7 @@ public: if (dc_shade_constants.simple_shade) args.flags |= DrawColumnArgs::simple_shade; if (args.source2 == nullptr) - args.flags |= DrawWallArgs::nearest_filter; + args.flags |= DrawColumnArgs::nearest_filter; DetectRangeError(args.dest, args.dest_y, args.count); } diff --git a/src/r_segs.cpp b/src/r_segs.cpp index ea3debb18e..adbd5a3f90 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1070,7 +1070,7 @@ EXTERN_CVAR(Bool, r_mipmap) struct WallscanSampler { WallscanSampler() { } - WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); uint32_t uv_pos; uint32_t uv_step; @@ -1082,8 +1082,10 @@ struct WallscanSampler uint32_t height; }; -WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) +WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) { + xoffset += FLOAT2FIXED(xmagnitude * 0.5); + if (!r_swtruecolor) { height = texture->GetHeight(); @@ -1147,8 +1149,11 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof } else { - double magnitude = fabs(uv_stepd * 2); - bool magnifying = magnitude < 1.0f; + double ymagnitude = fabs(uv_stepd); + double magnitude = MAX(ymagnitude, xmagnitude); + double min_lod = -1000.0; + double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); + bool magnifying = lod < 0.0f; int mipmap_offset = 0; int mip_width = texture->GetWidth(); @@ -1156,12 +1161,12 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof if (r_mipmap && texture->Mipmapped() && mip_width > 1 && mip_height > 1) { uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); - double texture_bias = 1.7f; - double level = MAX(magnitude - 3.0, 0.0); - while (level > texture_bias && mip_width > 1 && mip_height > 1) + + int level = (int)lod; + while (level > 0 && mip_width > 1 && mip_height > 1) { mipmap_offset += mip_width * mip_height; - level *= 0.5f; + level--; mip_width = MAX(mip_width >> 1, 1); mip_height = MAX(mip_height >> 1, 1); } @@ -1411,6 +1416,8 @@ void wallscan_any( int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + double xmagnitude = 1.0; + // First unaligned columns: for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) { @@ -1422,7 +1429,9 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); + + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); wallscan_drawcol1(x, y1, y2, sampler, draw1column); } @@ -1442,7 +1451,10 @@ void wallscan_any( WallscanSampler sampler[4]; for (int i = 0; i < 4; i++) - sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, rw_pic, getcol); + { + if (x + i + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + i + 1]) - FIXED2DBL(lwal[x + i])); + sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, xmagnitude, rw_pic, getcol); + } // Figure out where we vertically can start and stop drawing 4 columns in one go int middle_y1 = y1[0]; @@ -1529,7 +1541,9 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); + + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); wallscan_drawcol1(x, y1, y2, sampler, draw1column); } diff --git a/src/r_things.cpp b/src/r_things.cpp index ef52088483..4f6d5fcc2a 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -264,8 +264,13 @@ void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool useRt, bool unmaske // Texture mipmap and filter selection: fixed_t xoffset = col; - double magnitude = fabs(uv_stepd * 2); - bool magnifying = magnitude < 1.0f; + + double xmagnitude = 1.0; // To do: pass this into R_DrawMaskedColumn + double ymagnitude = fabs(uv_stepd); + double magnitude = MAX(ymagnitude, xmagnitude); + double min_lod = -1000.0; + double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); + bool magnifying = lod < 0.0f; int mipmap_offset = 0; int mip_width = tex->GetWidth(); @@ -273,12 +278,11 @@ void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool useRt, bool unmaske if (r_mipmap && tex->Mipmapped() && mip_width > 1 && mip_height > 1) { uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); - double texture_bias = 1.7f; - double level = MAX(magnitude - 3.0, 0.0); - while (level > texture_bias && mip_width > 1 && mip_height > 1) + int level = (int)lod; + while (level > 0 && mip_width > 1 && mip_height > 1) { mipmap_offset += mip_width * mip_height; - level *= 0.5f; + level--; mip_width = MAX(mip_width >> 1, 1); mip_height = MAX(mip_height >> 1, 1); } @@ -424,6 +428,7 @@ void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked) { dc_texturefrac = FLOAT2FIXED((dc_yl + 0.5 - sprtopscreen) / spryscale); dc_source = column; + dc_source2 = nullptr; dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; @@ -3215,6 +3220,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, dc_x = lxt + x; rt_initcols(OffscreenColorBuffer + (dc_x & ~3) * OffscreenBufferHeight); dc_source = col; + dc_source2 = nullptr; dc_texturefrac = yplc[xxl]; hcolfunc_pre(); } diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 5774a0048c..01906ac7b9 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -456,7 +456,7 @@ void FTexture::GenerateBgraMipmaps() Color4f src11 = src[sy1 + sx1 * srch]; Color4f c = (src00 + src01 + src10 + src11) * 0.25f; - dest[y + x * h] = src00; + dest[y + x * h] = c; } } @@ -483,7 +483,7 @@ void FTexture::GenerateBgraMipmaps() smoothed[y + x * h] = c; } } - float k = 0.04f; + float k = 0.08f; for (int j = 0; j < w * h; j++) dest[j] = dest[j] + (dest[j] - smoothed[j]) * k; @@ -502,10 +502,10 @@ void FTexture::GenerateBgraMipmaps() int h = MAX(Height >> i, 1); for (int j = 0; j < w * h; j++) { - uint32_t a = (uint32_t)clamp(powf(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t r = (uint32_t)clamp(powf(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t g = (uint32_t)clamp(powf(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t b = (uint32_t)clamp(powf(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t a = (uint32_t)clamp(powf(MAX(src[j].a, 0.0f), 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t r = (uint32_t)clamp(powf(MAX(src[j].r, 0.0f), 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t g = (uint32_t)clamp(powf(MAX(src[j].g, 0.0f), 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t b = (uint32_t)clamp(powf(MAX(src[j].b, 0.0f), 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); dest[j] = (a << 24) | (r << 16) | (g << 8) | b; } src += w * h; From 70d4097b3449f14bb7792fb60f6a1280de57eba4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 6 Nov 2016 07:33:17 +0100 Subject: [PATCH 267/912] Fix clamping bug when r_mipmap is off --- src/r_things.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/r_things.cpp b/src/r_things.cpp index 4f6d5fcc2a..8f91b17408 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -275,9 +275,9 @@ void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool useRt, bool unmaske int mipmap_offset = 0; int mip_width = tex->GetWidth(); int mip_height = tex->GetHeight(); + uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); if (r_mipmap && tex->Mipmapped() && mip_width > 1 && mip_height > 1) { - uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); int level = (int)lod; while (level > 0 && mip_width > 1 && mip_height > 1) { @@ -286,8 +286,8 @@ void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool useRt, bool unmaske mip_width = MAX(mip_width >> 1, 1); mip_height = MAX(mip_height >> 1, 1); } - xoffset = (xpos >> FRACBITS) * mip_width; } + xoffset = (xpos >> FRACBITS) * mip_width; const uint32_t *pixels = tex->GetPixelsBgra() + mipmap_offset; From aad2cde332dcf07cbb5a56d935495627ae8d3cd2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 6 Nov 2016 07:37:18 +0100 Subject: [PATCH 268/912] Oops, forced all sandy bridge CPUs to westmere instead of just Pentium G840 --- src/r_compiler/llvmdrawers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index c390197999..871e3c5cab 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -497,7 +497,7 @@ void LLVMProgram::CreateEE() std::string errorstring; std::string mcpu = sys::getHostCPUName(); - if (std::string(CPU.CPUString).find("G840") && mcpu == "sandybridge") + if (std::string(CPU.CPUString).find("G840") != std::string::npos && mcpu == "sandybridge") mcpu = "westmere"; // Pentium G840 is misdetected as a sandy bridge CPU if (stricmp(llvm_cpu, "auto") != 0) From 3a7532fd9bd702858431d8ecaffaf6cbab93bc1c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 6 Nov 2016 11:39:28 +0100 Subject: [PATCH 269/912] Improve flat mipmap selection --- src/r_draw.cpp | 1 + src/r_draw.h | 1 + src/r_draw_rgba.cpp | 17 ++++++----------- src/r_plane.cpp | 10 ++++++++++ 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 0ff047238d..bd53889a1f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1046,6 +1046,7 @@ dsfixed_t ds_xstep; dsfixed_t ds_ystep; int ds_xbits; int ds_ybits; +double ds_lod; // start of a floor/ceiling tile image const BYTE* ds_source; diff --git a/src/r_draw.h b/src/r_draw.h index 005897de6d..196e26beab 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -316,6 +316,7 @@ extern "C" dsfixed_t ds_ystep; extern "C" int ds_xbits; extern "C" int ds_ybits; extern "C" fixed_t ds_alpha; +extern "C" double ds_lod; // start of a 64*64 tile image extern "C" const BYTE* ds_source; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 34c79b3aab..a649572ccc 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -97,7 +97,7 @@ public: args.flags = 0; if (ds_shade_constants.simple_shade) args.flags |= DrawSpanArgs::simple_shade; - if (!sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped)) + if (!sampler_setup(args.source, args.xbits, args.ybits, ds_source_mipmapped)) args.flags |= DrawSpanArgs::nearest_filter; } @@ -117,18 +117,13 @@ protected: DrawSpanArgs args; private: - inline static bool sampler_setup(const uint32_t * &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep, bool mipmapped) + inline static bool sampler_setup(const uint32_t * &source, int &xbits, int &ybits, bool mipmapped) { - // Is this a magfilter or minfilter? - fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); - fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); - fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); - bool magnifying = (magnitude >> FRACBITS == 0); - + bool magnifying = ds_lod < 0.0f; if (r_mipmap && mipmapped) { - int level = magnitude >> (FRACBITS + 1); - while (level != 0) + int level = (int)ds_lod; + while (level > 0) { if (xbits <= 2 || ybits <= 2) break; @@ -136,7 +131,7 @@ private: source += (1 << (xbits)) * (1 << (ybits)); xbits -= 1; ybits -= 1; - level >>= 1; + level--; } } diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 21cea7bcfd..96225729b8 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -245,6 +245,16 @@ void R_MapPlane (int y, int x1) ds_yfrac = 0; } + if (r_swtruecolor) + { + double distance2 = planeheight * yslope[(y + 1 < viewheight) ? y + 1 : y - 1]; + double xmagnitude = fabs(ystepscale * (distance2 - distance) * FocalLengthX); + double ymagnitude = fabs(xstepscale * (distance2 - distance) * FocalLengthX); + double magnitude = MAX(ymagnitude, xmagnitude); + double min_lod = -1000.0; + ds_lod = MAX(log2(magnitude) + r_lod_bias, min_lod); + } + if (plane_shade) { // Determine lighting based on the span's distance from the viewer. From 9f9e4ea19d6a82dd728993116cbae09ecf91918e Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 6 Nov 2016 08:58:06 -0500 Subject: [PATCH 270/912] - Added randi's "bottomclip" parameter to dpJudas's OpenGLSWFrameBuffer::FillSimplePoly implementation. The code refused to compile because of the new changes. Note that this is not tested on Linux or Mac, and will likely need changes there as well. --- src/gl/system/gl_swframebuffer.cpp | 4 ++-- src/gl/system/gl_swframebuffer.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index b6d163044c..0d82aa2217 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -2871,7 +2871,7 @@ void OpenGLSWFrameBuffer::FlatFill(int left, int top, int right, int bottom, FTe void OpenGLSWFrameBuffer::FillSimplePoly(FTexture *texture, FVector2 *points, int npoints, double originx, double originy, double scalex, double scaley, - DAngle rotation, FDynamicColormap *colormap, int lightlevel) + DAngle rotation, FDynamicColormap *colormap, int lightlevel, int bottomclip) { // Use an equation similar to player sprites to determine shade double fadelevel = clamp((LIGHT2SHADE(lightlevel) / 65536. - 12) / NUMCOLORMAPS, 0.0, 1.0); @@ -2892,7 +2892,7 @@ void OpenGLSWFrameBuffer::FillSimplePoly(FTexture *texture, FVector2 *points, in } if (In2D < 2) { - Super::FillSimplePoly(texture, points, npoints, originx, originy, scalex, scaley, rotation, colormap, lightlevel); + Super::FillSimplePoly(texture, points, npoints, originx, originy, scalex, scaley, rotation, colormap, lightlevel, bottomclip); return; } if (!InScene) diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index 226d8cca72..a4ba4e4267 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -59,7 +59,7 @@ public: void FlatFill(int left, int top, int right, int bottom, FTexture *src, bool local_origin) override; void DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) override; void DrawPixel(int x, int y, int palcolor, uint32 rgbcolor) override; - void FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, double originx, double originy, double scalex, double scaley, DAngle rotation, FDynamicColormap *colormap, int lightlevel) override; + void FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, double originx, double originy, double scalex, double scaley, DAngle rotation, FDynamicColormap *colormap, int lightlevel, int bottomclip) override; bool WipeStartScreen(int type) override; void WipeEndScreen() override; bool WipeDo(int ticks) override; From 3ea9d7cf0464ac5a3e4d7010f2087448357cc86c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 7 Nov 2016 03:34:59 +0100 Subject: [PATCH 271/912] Move software renderer transform to TriMatrix --- src/r_plane.cpp | 15 +++++--- src/r_triangle.cpp | 88 +++++++++++++++++++++++++++++----------------- src/r_triangle.h | 12 ++++--- 3 files changed, 74 insertions(+), 41 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 023de80d34..5176d2c366 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -966,6 +966,9 @@ static void R_DrawCubeSky(visplane_t *pl) }; TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z) * TriMatrix::scale(1000.0f, 1000.0f, 1000.0f); + TriMatrix objectToClip = TriMatrix::viewToClip() * TriMatrix::worldToView() * objectToWorld; + //TriMatrix objectToWorld = TriMatrix::scale(1000.0f, 1000.0f, 1000.0f); + //TriMatrix objectToClip = TriMatrix::viewToClip() * objectToWorld; uint32_t solid_top = frontskytex->GetSkyCapColor(false); uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); @@ -973,9 +976,9 @@ static void R_DrawCubeSky(visplane_t *pl) solid_top = RGB32k.RGB[(RPART(solid_top) >> 3)][(GPART(solid_top) >> 3)][(BPART(solid_top) >> 3)]; solid_bottom = RGB32k.RGB[(RPART(solid_bottom) >> 3)][(GPART(solid_bottom) >> 3)][(BPART(solid_bottom) >> 3)]; - TriangleDrawer::fill(objectToWorld, cube, 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, solid_top); - TriangleDrawer::fill(objectToWorld, cube + 6, 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, solid_bottom); - TriangleDrawer::draw(objectToWorld, cube + 2 * 6, 4 * 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, frontskytex); + TriangleDrawer::fill(objectToClip, cube, 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, solid_top); + TriangleDrawer::fill(objectToClip, cube + 6, 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, solid_bottom); + TriangleDrawer::draw(objectToClip, cube + 2 * 6, 4 * 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, frontskytex); } namespace @@ -1102,7 +1105,8 @@ namespace short *uwal = (short *)pl->top; short *dwal = (short *)pl->bottom; TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); - TriangleDrawer::draw(objectToWorld, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, x1, x2 - 1, uwal, dwal, frontskytex); + TriMatrix objectToClip = TriMatrix::viewToClip() * TriMatrix::worldToView() * objectToWorld; + TriangleDrawer::draw(objectToClip, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, x1, x2 - 1, uwal, dwal, frontskytex); } void SkyDome::RenderCapColorRow(int row, bool bottomCap, visplane_t *pl) @@ -1115,7 +1119,8 @@ namespace short *uwal = (short *)pl->top; short *dwal = (short *)pl->bottom; TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); - TriangleDrawer::fill(objectToWorld, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, x1, x2 - 1, uwal, dwal, solid); + TriMatrix objectToClip = TriMatrix::viewToClip() * TriMatrix::worldToView() * objectToWorld; + TriangleDrawer::fill(objectToClip, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, x1, x2 - 1, uwal, dwal, solid); } void SkyDome::Render(visplane_t *pl) diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp index b6e7b1b2b6..b039ed1ffe 100644 --- a/src/r_triangle.cpp +++ b/src/r_triangle.cpp @@ -36,17 +36,17 @@ #include "r_data/colormaps.h" #include "r_triangle.h" -void TriangleDrawer::draw(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture) +void TriangleDrawer::draw(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture) { - draw_arrays(objectToWorld, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture, 0, &ScreenTriangleDrawer::draw); + draw_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture, 0, &ScreenTriangleDrawer::draw); } -void TriangleDrawer::fill(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor) +void TriangleDrawer::fill(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor) { - draw_arrays(objectToWorld, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, solidcolor, &ScreenTriangleDrawer::fill); + draw_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, solidcolor, &ScreenTriangleDrawer::fill); } -void TriangleDrawer::draw_arrays(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture, int solidcolor, void(*drawfunc)(const ScreenTriangleDrawerArgs *)) +void TriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture, int solidcolor, void(*drawfunc)(const ScreenTriangleDrawerArgs *)) { if (vcount < 3) return; @@ -78,28 +78,28 @@ void TriangleDrawer::draw_arrays(const TriMatrix &objectToWorld, const TriVertex for (int i = 0; i < vcount / 3; i++) { for (int j = 0; j < 3; j++) - vert[j] = shade_vertex(objectToWorld, *(vinput++)); + vert[j] = shade_vertex(objectToClip, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, drawfunc); } } else if (mode == TriangleDrawMode::Fan) { - vert[0] = shade_vertex(objectToWorld, *(vinput++)); - vert[1] = shade_vertex(objectToWorld, *(vinput++)); + vert[0] = shade_vertex(objectToClip, *(vinput++)); + vert[1] = shade_vertex(objectToClip, *(vinput++)); for (int i = 2; i < vcount; i++) { - vert[2] = shade_vertex(objectToWorld, *(vinput++)); + vert[2] = shade_vertex(objectToClip, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, drawfunc); vert[1] = vert[2]; } } else // TriangleDrawMode::Strip { - vert[0] = shade_vertex(objectToWorld, *(vinput++)); - vert[1] = shade_vertex(objectToWorld, *(vinput++)); + vert[0] = shade_vertex(objectToClip, *(vinput++)); + vert[1] = shade_vertex(objectToClip, *(vinput++)); for (int i = 2; i < vcount; i++) { - vert[2] = shade_vertex(objectToWorld, *(vinput++)); + vert[2] = shade_vertex(objectToClip, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, drawfunc); vert[0] = vert[1]; vert[1] = vert[2]; @@ -108,25 +108,10 @@ void TriangleDrawer::draw_arrays(const TriMatrix &objectToWorld, const TriVertex } } -TriVertex TriangleDrawer::shade_vertex(const TriMatrix &objectToWorld, TriVertex v) +TriVertex TriangleDrawer::shade_vertex(const TriMatrix &objectToClip, TriVertex v) { - // Apply transform to get world coordinates: - v = objectToWorld * v; - - // The software renderer world to clip transform: - double nearp = 5.0f; - double farp = 65536.f; - double tr_x = v.x - ViewPos.X; - double tr_y = v.y - ViewPos.Y; - double tr_z = v.z - ViewPos.Z; - double tx = tr_x * ViewSin - tr_y * ViewCos; - double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; - v.x = (float)tx * 0.5f; - v.y = (float)tr_z * 0.5f; - v.z = (float)((-tz * (farp + nearp) / (nearp - farp) + (2.0f * farp * nearp) / (nearp - farp))); - v.w = (float)tz; - - return v; + // Apply transform to get clip coordinates: + return objectToClip * v; } void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenTriangleDrawerArgs *args, void(*drawfunc)(const ScreenTriangleDrawerArgs *)) @@ -150,8 +135,8 @@ void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, Scree v.z *= v.w; // Apply viewport scale to get screen coordinates: - v.x = (float)(CenterX + v.x * 2.0f * CenterX); - v.y = (float)(CenterY - v.y * 2.0f * InvZtoScale); + v.x = viewwidth * (1.0f + v.x) * 0.5f; + v.y = viewheight * (1.0f - v.y) * 0.5f; } // Draw screen triangles @@ -717,6 +702,45 @@ TriMatrix TriMatrix::rotate(float angle, float x, float y, float z) return m; } +TriMatrix TriMatrix::frustum(float left, float right, float bottom, float top, float near, float far) +{ + float a = (right + left) / (right - left); + float b = (top + bottom) / (top - bottom); + float c = -(far + near) / (far - near); + float d = -(2.0f * far) / (far - near); + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = 2.0f * near / (right - left); + m.matrix[1 + 1 * 4] = 2.0f * near / (top - bottom); + m.matrix[0 + 2 * 4] = a; + m.matrix[1 + 2 * 4] = b; + m.matrix[2 + 2 * 4] = c; + m.matrix[2 + 3 * 4] = d; + m.matrix[3 + 2 * 4] = -1; + return m; +} + +TriMatrix TriMatrix::worldToView() +{ + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = (float)ViewSin; + m.matrix[0 + 1 * 4] = (float)-ViewCos; + m.matrix[1 + 2 * 4] = 1.0f; + m.matrix[2 + 0 * 4] = (float)-ViewCos; + m.matrix[2 + 1 * 4] = (float)-ViewSin; + m.matrix[3 + 3 * 4] = 1.0f; + return m * translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); +} + +TriMatrix TriMatrix::viewToClip() +{ + float near = 5.0f; + float far = 65536.0f; + float width = (float)(FocalTangent * near); + float top = (float)(CenterY / InvZtoScale * near); + float bottom = (float)(top - viewheight / InvZtoScale * near); + return frustum(-width, width, bottom, top, near, far); +} + TriMatrix TriMatrix::operator*(const TriMatrix &mult) const { TriMatrix result; diff --git a/src/r_triangle.h b/src/r_triangle.h index f1ff220b66..05cfada980 100644 --- a/src/r_triangle.h +++ b/src/r_triangle.h @@ -46,6 +46,10 @@ struct TriMatrix static TriMatrix translate(float x, float y, float z); static TriMatrix scale(float x, float y, float z); static TriMatrix rotate(float angle, float x, float y, float z); + static TriMatrix frustum(float left, float right, float bottom, float top, float near, float far); + + static TriMatrix worldToView(); // Software renderer world to view space transform + static TriMatrix viewToClip(); // Software renderer shearing projection TriVertex operator*(TriVertex v) const; TriMatrix operator*(const TriMatrix &m) const; @@ -63,12 +67,12 @@ enum class TriangleDrawMode class TriangleDrawer { public: - static void draw(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture); - static void fill(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor); + static void draw(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture); + static void fill(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor); private: - static TriVertex shade_vertex(const TriMatrix &objectToWorld, TriVertex v); - static void draw_arrays(const TriMatrix &objectToWorld, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture, int solidcolor, void(*drawfunc)(const ScreenTriangleDrawerArgs *)); + static TriVertex shade_vertex(const TriMatrix &objectToClip, TriVertex v); + static void draw_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture, int solidcolor, void(*drawfunc)(const ScreenTriangleDrawerArgs *)); static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenTriangleDrawerArgs *args, void(*drawfunc)(const ScreenTriangleDrawerArgs *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert); From 92be8f401c11a9471fa0fd36b8f80fb829d59b31 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 7 Nov 2016 04:27:55 +0100 Subject: [PATCH 272/912] Remove old triangle version --- src/r_draw.h | 13 -------- src/r_draw_rgba.cpp | 2 ++ src/r_plane.cpp | 79 +-------------------------------------------- 3 files changed, 3 insertions(+), 91 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 196e26beab..7556575bd1 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -393,19 +393,6 @@ void R_DrawSingleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom); void R_DrawDoubleSkyCol1_rgba(uint32_t solid_top, uint32_t solid_bottom); void R_DrawDoubleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom); -struct TriVertex -{ - TriVertex() { } - TriVertex(float x, float y, float z, float w, float u, float v, float light) : x(x), y(y), z(z), w(w) { varying[0] = u; varying[1] = v; varying[2] = light; } - - enum { NumVarying = 3 }; - float x, y, z, w; - float varying[NumVarying]; -}; - -class VSMatrix; -void R_DrawTriangles(const VSMatrix &objectToWorld, const TriVertex *vertices, int count, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture); - extern bool r_swtruecolor; EXTERN_CVAR(Bool, r_multithreaded); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a649572ccc..452de79621 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1260,6 +1260,7 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// +#if 0 class DrawTrianglesCommand : public DrawerCommand { public: @@ -1705,6 +1706,7 @@ void R_DrawTriangles(const VSMatrix &transform, const TriVertex *vertices, int c DrawerCommandQueue::QueueCommand(transform, vertices, count, clipleft, clipright, clipdata, texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight()); } +#endif ///////////////////////////////////////////////////////////////////////////// diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 96225729b8..f39360b59f 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -66,7 +66,6 @@ #endif EXTERN_CVAR(Int, r_skymode) -CVAR(Bool, r_cubesky, false, 0) //EXTERN_CVAR (Int, tx) //EXTERN_CVAR (Int, ty) @@ -1016,77 +1015,6 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) } } -static void R_DrawCubeSky(visplane_t *pl) -{ - int x1 = pl->left; - int x2 = pl->right; - short *uwal = (short *)pl->top; - short *dwal = (short *)pl->bottom; - - static TriVertex cube[6 * 6] = - { - // Top - { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - { 1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.1f, 1.0f }, - - { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.1f, 1.0f }, - { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.1f, 1.0f }, - { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - - // Bottom - { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 0.9f, 1.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f, 1.0f }, - - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f, 1.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.9f, 1.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 0.9f, 1.0f }, - - // Front - { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, - { 1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - { -1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - - { -1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, - - // Back - { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, - - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, - { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - - // Right - { 1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - { 1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, - - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, - { 1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - - // Left - { -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, - { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - { -1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - - { -1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f } - }; - - VSMatrix transform(0); - transform.translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); - transform.scale(1000.0f, 1000.0f, 1000.0f); - - R_DrawTriangles(transform, cube, 6 * 6, x1, x2 - 1, uwal, dwal, frontskytex); -} - static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) { uint32_t height = frontskytex->GetHeight(); @@ -1279,12 +1207,7 @@ static void R_DrawCapSky(visplane_t *pl) static void R_DrawSky (visplane_t *pl) { - if (r_swtruecolor && r_cubesky) - { - R_DrawCubeSky(pl); - return; - } - else if (r_skymode == 2) + if (r_skymode == 2) { R_DrawCapSky(pl); return; From 47cc110498f6db86a857d018b847139d5767157a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 7 Nov 2016 05:24:17 +0100 Subject: [PATCH 273/912] Add true color triangle drawer --- src/r_draw_rgba.cpp | 450 -------------------------------------- src/r_triangle.cpp | 513 ++++++++++++++++++++++++++++++++++++++++++-- src/r_triangle.h | 39 +++- 3 files changed, 525 insertions(+), 477 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 452de79621..b7be3ceed3 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1260,456 +1260,6 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// -#if 0 -class DrawTrianglesCommand : public DrawerCommand -{ -public: - DrawTrianglesCommand(const VSMatrix &transform, const TriVertex *vertices, int count, int clipleft, int clipright, const short *clipdata, const uint32_t *texturePixels, int textureWidth, int textureHeight) - : transform(transform), vertices(vertices), count(count), clipleft(clipleft), clipright(clipright), clipdata(clipdata), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight) - { - } - - void Execute(DrawerThread *thread) override - { - int cliplength = clipright - clipleft + 1; - for (int i = 0; i < cliplength; i++) - { - thread->triangle_clip_top[clipleft + i] = clipdata[i]; - thread->triangle_clip_bottom[clipleft + i] = clipdata[cliplength + i]; - } - - draw_triangles(transform, vertices, count, clipleft, clipright, thread->triangle_clip_top, thread->triangle_clip_bottom, thread); - } - - FString DebugInfo() override - { - return "DrawTriangles"; - } - -private: - float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) - { - float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); - float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); - return top / bottom; - } - - float grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) - { - float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); - float bottom = -((x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2)); - return top / bottom; - } - - void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v2, const TriVertex &v3, int clipleft, int clipright, const short *cliptop, const short *clipbottom, DrawerThread *thread) - { - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int clipymin = cliptop[clipleft]; - int clipymax = clipbottom[clipleft]; - for (int i = clipleft + 1; i <= clipright; i++) - { - clipymin = MIN(clipymin, (int)cliptop[i]); - clipymax = MAX(clipymax, (int)clipbottom[i]); - } - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax - 1); - if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - dest += miny * pitch; - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // Gradients - float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); - float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); - } - - // Loop through blocks - for (int y = miny; y < maxy; y += q) - { - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; - - // Check if block needs clipping - int clipcount = 0; - for (int ix = x; ix < x + q; ix++) - { - clipcount += (clipleft > ix) || (clipright < ix) || (cliptop[ix] > y) || (clipbottom[ix] <= y + q - 1); - } - - // Calculate varying variables for affine block - float offx0 = (x - minx) + 0.5f; - float offy0 = (y - miny) + 0.5f; - float offx1 = offx0 + q; - float offy1 = offy0 + q; - float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); - float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); - float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); - float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); - float varyingTL[TriVertex::NumVarying]; - float varyingTR[TriVertex::NumVarying]; - float varyingBL[TriVertex::NumVarying]; - float varyingBR[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; - varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; - varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); - varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); - } - - uint32_t *buffer = dest; - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) - { - for (int iy = 0; iy < q; iy++) - { - float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - varying[i] = varyingTL[i] + varyingBL[i] * iy; - varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); - } - - if (!thread->skipped_by_thread(y + iy)) - { - for (int ix = x; ix < x + q; ix++) - { - uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); - uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); - - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - - buffer[ix] = texturePixels[uvoffset]; - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - } - } - - buffer += pitch; - } - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - for (int iy = 0; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - varying[i] = varyingTL[i] + varyingBL[i] * iy; - varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); - } - - if (!thread->skipped_by_thread(y + iy)) - { - for (int ix = x; ix < x + q; ix++) - { - bool visible = ix >= clipleft && ix <= clipright && (cliptop[ix] <= y + iy) && (clipbottom[ix] > y + iy); - - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) - { - uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); - uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); - - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - - buffer[ix] = texturePixels[uvoffset]; - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - - buffer += pitch; - } - } - } - - dest += q * pitch; - } - } - - bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2) - { - float d1 = clipdistance1 * (1.0f - t1) + clipdistance2 * t1; - float d2 = clipdistance1 * (1.0f - t2) + clipdistance2 * t2; - if (d1 < 0.0f && d2 < 0.0f) - return true; - - if (d1 < 0.0f) - t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1); - - if (d2 < 0.0f) - t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2); - - return false; - } - - void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert) - { - // Clip and cull so that the following is true for all vertices: - // -v.w <= v.x <= v.w - // -v.w <= v.y <= v.w - // -v.w <= v.z <= v.w - - float t1 = 0.0f, t2 = 1.0f; - bool culled = - cullhalfspace(v1.x + v1.w, v2.x + v2.w, t1, t2) || - cullhalfspace(v1.w - v1.x, v2.w - v2.x, t1, t2) || - cullhalfspace(v1.y + v1.w, v2.y + v2.w, t1, t2) || - cullhalfspace(v1.w - v1.y, v2.w - v2.y, t1, t2) || - cullhalfspace(v1.z + v1.w, v2.z + v2.w, t1, t2) || - cullhalfspace(v1.w - v1.z, v2.w - v2.z, t1, t2); - if (culled) - return; - - if (t1 == 0.0f) - { - clippedvert[numclipvert++] = v1; - } - else - { - auto &v = clippedvert[numclipvert++]; - v.x = v1.x * (1.0f - t1) + v2.x * t1; - v.y = v1.y * (1.0f - t1) + v2.y * t1; - v.z = v1.z * (1.0f - t1) + v2.z * t1; - v.w = v1.w * (1.0f - t1) + v2.w * t1; - for (int i = 0; i < TriVertex::NumVarying; i++) - v.varying[i] = v1.varying[i] * (1.0f - t1) + v2.varying[i] * t1; - } - - if (t2 != 1.0f) - { - auto &v = clippedvert[numclipvert++]; - v.x = v1.x * (1.0f - t2) + v2.x * t2; - v.y = v1.y * (1.0f - t2) + v2.y * t2; - v.z = v1.z * (1.0f - t2) + v2.z * t2; - v.w = v1.w * (1.0f - t2) + v2.w * t2; - for (int i = 0; i < TriVertex::NumVarying; i++) - v.varying[i] = v1.varying[i] * (1.0f - t2) + v2.varying[i] * t2; - } - } - - void draw_triangles(const VSMatrix &transform, const TriVertex *vinput, int vcount, int clipleft, int clipright, const short *cliptop, const short *clipbottom, DrawerThread *thread) - { - for (int i = 0; i < vcount / 3; i++) - { - TriVertex vert[3]; - - // Vertex shader stuff: - for (int j = 0; j < 3; j++) - { - auto &v = vert[j]; - v = *(vinput++); - - // Apply transform to get world coordinates: - const float *matrix = transform.get(); - float vx = matrix[0 * 4 + 0] * v.x + matrix[1 * 4 + 0] * v.y + matrix[2 * 4 + 0] * v.z + matrix[3 * 4 + 0] * v.w; - float vy = matrix[0 * 4 + 1] * v.x + matrix[1 * 4 + 1] * v.y + matrix[2 * 4 + 1] * v.z + matrix[3 * 4 + 1] * v.w; - float vz = matrix[0 * 4 + 2] * v.x + matrix[1 * 4 + 2] * v.y + matrix[2 * 4 + 2] * v.z + matrix[3 * 4 + 2] * v.w; - float vw = matrix[0 * 4 + 3] * v.x + matrix[1 * 4 + 3] * v.y + matrix[2 * 4 + 3] * v.z + matrix[3 * 4 + 3] * v.w; - v.x = vx; - v.y = vy; - v.z = vz; - v.w = vw; - - // The software renderer world to clip transform: - double nearp = 5.0f; - double farp = 65536.f; - double tr_x = v.x - ViewPos.X; - double tr_y = v.y - ViewPos.Y; - double tr_z = v.z - ViewPos.Z; - double tx = tr_x * ViewSin - tr_y * ViewCos; - double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; - v.x = (float)tx * 0.5f; - v.y = (float)tr_z * 0.5f; - v.z = (float)((-tz * (farp + nearp) / (nearp - farp) + (2.0f * farp * nearp) / (nearp - farp))); - v.w = (float)tz; - } - - // Cull, clip and generate additional vertices as needed - TriVertex clippedvert[6]; - int numclipvert = 0; - clipedge(vert[0], vert[1], clippedvert, numclipvert); - clipedge(vert[1], vert[2], clippedvert, numclipvert); - clipedge(vert[2], vert[0], clippedvert, numclipvert); - - // Map to 2D viewport: - for (int j = 0; j < numclipvert; j++) - { - auto &v = clippedvert[j]; - - // Calculate normalized device coordinates: - v.w = 1.0f / v.w; - v.x *= v.w; - v.y *= v.w; - v.z *= v.w; - - // Apply viewport scale to get screen coordinates: - v.x = (float)(CenterX + v.x * 2.0f * CenterX); - v.y = (float)(CenterY - v.y * 2.0f * InvZtoScale); - } - - // Draw screen triangles - bool ccw = false; - if (ccw) - { - for (int i = numclipvert; i > 1; i--) - { - triangle((uint32_t*)dc_destorg, dc_pitch, clippedvert[numclipvert - 1], clippedvert[i - 1], clippedvert[i - 2], clipleft, clipright, cliptop, clipbottom, thread); - } - } - else - { - for (int i = 2; i < numclipvert; i++) - { - triangle((uint32_t*)dc_destorg, dc_pitch, clippedvert[0], clippedvert[i - 1], clippedvert[i], clipleft, clipright, cliptop, clipbottom, thread); - } - } - } - } - - VSMatrix transform; - const TriVertex *vertices; - int count; - int clipleft; - int clipright; - const short *clipdata; - const uint32_t *texturePixels; - int textureWidth; - int textureHeight; -}; - -void R_DrawTriangles(const VSMatrix &transform, const TriVertex *vertices, int count, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture) -{ - if (clipright < clipleft || clipleft < 0 || clipright > MAXWIDTH) - return; - - int cliplength = clipright - clipleft + 1; - short *clipdata = (short*)DrawerCommandQueue::AllocMemory(cliplength * 2 * sizeof(short)); - if (!clipdata) - { - DrawerCommandQueue::WaitForWorkers(); - clipdata = (short*)DrawerCommandQueue::AllocMemory(cliplength * 2 * sizeof(short)); - if (!clipdata) - return; - } - - for (int i = 0; i < cliplength; i++) - clipdata[i] = cliptop[clipleft + i]; - for (int i = 0; i < cliplength; i++) - clipdata[cliplength + i] = clipbottom[clipleft + i]; - - DrawerCommandQueue::QueueCommand(transform, vertices, count, clipleft, clipright, clipdata, texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight()); -} -#endif - -///////////////////////////////////////////////////////////////////////////// - void R_DrawSingleSkyCol1_rgba(uint32_t solid_top, uint32_t solid_bottom) { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp index b039ed1ffe..59a4464076 100644 --- a/src/r_triangle.cpp +++ b/src/r_triangle.cpp @@ -38,15 +38,44 @@ void TriangleDrawer::draw(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture) { - draw_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture, 0, &ScreenTriangleDrawer::draw); + if (r_swtruecolor) + queue_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, (const uint8_t*)texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight(), 0); + else + draw_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture->GetPixels(), texture->GetWidth(), texture->GetHeight(), 0, nullptr, &ScreenTriangleDrawer::draw); } void TriangleDrawer::fill(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor) { - draw_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, solidcolor, &ScreenTriangleDrawer::fill); + if (r_swtruecolor) + queue_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor); + else + draw_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor, nullptr, &ScreenTriangleDrawer::fill); } -void TriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture, int solidcolor, void(*drawfunc)(const ScreenTriangleDrawerArgs *)) +void TriangleDrawer::queue_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) +{ + if (clipright < clipleft || clipleft < 0 || clipright > MAXWIDTH) + return; + + int cliplength = clipright - clipleft + 1; + short *clipdata = (short*)DrawerCommandQueue::AllocMemory(cliplength * 2 * sizeof(short)); + if (!clipdata) + { + DrawerCommandQueue::WaitForWorkers(); + clipdata = (short*)DrawerCommandQueue::AllocMemory(cliplength * 2 * sizeof(short)); + if (!clipdata) + return; + } + + for (int i = 0; i < cliplength; i++) + clipdata[i] = cliptop[clipleft + i]; + for (int i = 0; i < cliplength; i++) + clipdata[cliplength + i] = clipbottom[clipleft + i]; + + DrawerCommandQueue::QueueCommand(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, clipdata, texturePixels, textureWidth, textureHeight, solidcolor); +} + +void TriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)) { if (vcount < 3) return; @@ -58,18 +87,9 @@ void TriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex args.clipright = clipright; args.cliptop = cliptop; args.clipbottom = clipbottom; - if (texture) - { - args.textureWidth = texture->GetWidth(); - args.textureHeight = texture->GetHeight(); - args.texturePixels = texture->GetPixels(); - } - else - { - args.textureWidth = 0; - args.textureHeight = 0; - args.texturePixels = nullptr; - } + args.texturePixels = texturePixels; + args.textureWidth = textureWidth; + args.textureHeight = textureHeight; args.solidcolor = solidcolor; TriVertex vert[3]; @@ -79,7 +99,7 @@ void TriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex { for (int j = 0; j < 3; j++) vert[j] = shade_vertex(objectToClip, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, drawfunc); + draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); } } else if (mode == TriangleDrawMode::Fan) @@ -89,7 +109,7 @@ void TriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex for (int i = 2; i < vcount; i++) { vert[2] = shade_vertex(objectToClip, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, drawfunc); + draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); vert[1] = vert[2]; } } @@ -100,7 +120,7 @@ void TriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex for (int i = 2; i < vcount; i++) { vert[2] = shade_vertex(objectToClip, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, drawfunc); + draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); vert[0] = vert[1]; vert[1] = vert[2]; ccw = !ccw; @@ -114,7 +134,7 @@ TriVertex TriangleDrawer::shade_vertex(const TriMatrix &objectToClip, TriVertex return objectToClip * v; } -void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenTriangleDrawerArgs *args, void(*drawfunc)(const ScreenTriangleDrawerArgs *)) +void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)) { // Cull, clip and generate additional vertices as needed TriVertex clippedvert[6]; @@ -147,7 +167,7 @@ void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, Scree args->v1 = &clippedvert[numclipvert - 1]; args->v2 = &clippedvert[i - 1]; args->v3 = &clippedvert[i - 2]; - drawfunc(args); + drawfunc(args, thread); } } else @@ -157,7 +177,7 @@ void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, Scree args->v1 = &clippedvert[0]; args->v2 = &clippedvert[i - 1]; args->v3 = &clippedvert[i]; - drawfunc(args); + drawfunc(args, thread); } } } @@ -225,7 +245,7 @@ void TriangleDrawer::clipedge(const TriVertex &v1, const TriVertex &v2, TriVerte ///////////////////////////////////////////////////////////////////////////// -void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args) +void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args, DrawerThread *thread) { uint8_t *dest = args->dest; int pitch = args->pitch; @@ -464,7 +484,7 @@ void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args) } } -void ScreenTriangleDrawer::fill(const ScreenTriangleDrawerArgs *args) +void ScreenTriangleDrawer::fill(const ScreenTriangleDrawerArgs *args, DrawerThread *thread) { uint8_t *dest = args->dest; int pitch = args->pitch; @@ -632,6 +652,425 @@ void ScreenTriangleDrawer::fill(const ScreenTriangleDrawerArgs *args) } } +void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, DrawerThread *thread) +{ + uint32_t *dest = (uint32_t *)args->dest; + int pitch = args->pitch; + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipleft = args->clipleft; + int clipright = args->clipright; + const short *cliptop = args->cliptop; + const short *clipbottom = args->clipbottom; + const uint32_t *texturePixels = (const uint32_t *)args->texturePixels; + int textureWidth = args->textureWidth; + int textureHeight = args->textureHeight; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int clipymin = cliptop[clipleft]; + int clipymax = clipbottom[clipleft]; + for (int i = clipleft + 1; i <= clipright; i++) + { + clipymin = MIN(clipymin, (int)cliptop[i]); + clipymax = MAX(clipymax, (int)clipbottom[i]); + } + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax - 1); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + dest += miny * pitch; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // Gradients + float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); + float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); + } + + // Loop through blocks + for (int y = miny; y < maxy; y += q) + { + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Check if block needs clipping + int clipcount = 0; + for (int ix = x; ix < x + q; ix++) + { + clipcount += (clipleft > ix) || (clipright < ix) || (cliptop[ix] > y) || (clipbottom[ix] <= y + q - 1); + } + + // Calculate varying variables for affine block + float offx0 = (x - minx) + 0.5f; + float offy0 = (y - miny) + 0.5f; + float offx1 = offx0 + q; + float offy1 = offy0 + q; + float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); + float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); + float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); + float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); + float varyingTL[TriVertex::NumVarying]; + float varyingTR[TriVertex::NumVarying]; + float varyingBL[TriVertex::NumVarying]; + float varyingBR[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; + varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; + varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); + varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); + } + + uint32_t *buffer = dest; + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) + { + for (int iy = 0; iy < q; iy++) + { + float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varying[i] = varyingTL[i] + varyingBL[i] * iy; + varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + } + + if (!thread->skipped_by_thread(y + iy)) + { + for (int ix = x; ix < x + q; ix++) + { + uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); + uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + + buffer[ix] = texturePixels[uvoffset]; + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + } + } + + buffer += pitch; + } + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varying[i] = varyingTL[i] + varyingBL[i] * iy; + varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + } + + if (!thread->skipped_by_thread(y + iy)) + { + for (int ix = x; ix < x + q; ix++) + { + bool visible = ix >= clipleft && ix <= clipright && (cliptop[ix] <= y + iy) && (clipbottom[ix] > y + iy); + + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) + { + uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); + uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + + buffer[ix] = texturePixels[uvoffset]; + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + buffer += pitch; + } + } + } + + dest += q * pitch; + } +} + +void ScreenTriangleDrawer::fill32(const ScreenTriangleDrawerArgs *args, DrawerThread *thread) +{ + uint32_t *dest = (uint32_t *)args->dest; + int pitch = args->pitch; + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipleft = args->clipleft; + int clipright = args->clipright; + const short *cliptop = args->cliptop; + const short *clipbottom = args->clipbottom; + int solidcolor = args->solidcolor; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int clipymin = cliptop[clipleft]; + int clipymax = clipbottom[clipleft]; + for (int i = clipleft + 1; i <= clipright; i++) + { + clipymin = MIN(clipymin, (int)cliptop[i]); + clipymax = MAX(clipymax, (int)clipbottom[i]); + } + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax - 1); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + dest += miny * pitch; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // Loop through blocks + for (int y = miny; y < maxy; y += q) + { + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Check if block needs clipping + int clipcount = 0; + for (int ix = x; ix < x + q; ix++) + { + clipcount += (clipleft > ix) || (clipright < ix) || (cliptop[ix] > y) || (clipbottom[ix] <= y + q - 1); + } + + uint32_t *buffer = dest; + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) + { + for (int iy = 0; iy < q; iy++) + { + if (!thread->skipped_by_thread(y + iy)) + { + for (int ix = x; ix < x + q; ix++) + { + buffer[ix] = solidcolor; + } + } + + buffer += pitch; + } + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + if (!thread->skipped_by_thread(y + iy)) + { + for (int ix = x; ix < x + q; ix++) + { + bool visible = ix >= clipleft && ix <= clipright && (cliptop[ix] <= y + iy) && (clipbottom[ix] > y + iy); + + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) + { + buffer[ix] = solidcolor; + } + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + buffer += pitch; + } + } + } + + dest += q * pitch; + } +} + float ScreenTriangleDrawer::gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) { float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); @@ -770,3 +1209,31 @@ TriVertex TriMatrix::operator*(TriVertex v) const v.w = vw; return v; } + +///////////////////////////////////////////////////////////////////////////// + +DrawTrianglesCommand::DrawTrianglesCommand(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *clipdata, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) + : objectToClip(objectToClip), vinput(vinput), vcount(vcount), mode(mode), ccw(ccw), clipleft(clipleft), clipright(clipright), clipdata(clipdata), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight), solidcolor(solidcolor) +{ +} + +void DrawTrianglesCommand::Execute(DrawerThread *thread) +{ + int cliplength = clipright - clipleft + 1; + for (int i = 0; i < cliplength; i++) + { + thread->triangle_clip_top[clipleft + i] = clipdata[i]; + thread->triangle_clip_bottom[clipleft + i] = clipdata[cliplength + i]; + } + + TriangleDrawer::draw_arrays( + objectToClip, vinput, vcount, mode, ccw, + clipleft, clipright, thread->triangle_clip_top, thread->triangle_clip_bottom, + texturePixels, textureWidth, textureHeight, solidcolor, + thread, texturePixels ? ScreenTriangleDrawer::draw32 : ScreenTriangleDrawer::fill32); +} + +FString DrawTrianglesCommand::DebugInfo() +{ + return "DrawTriangles"; +} diff --git a/src/r_triangle.h b/src/r_triangle.h index 05cfada980..8335f1f936 100644 --- a/src/r_triangle.h +++ b/src/r_triangle.h @@ -25,6 +25,7 @@ #define __R_TRIANGLE__ #include "r_draw.h" +#include "r_thread.h" class FTexture; struct ScreenTriangleDrawerArgs; @@ -72,10 +73,14 @@ public: private: static TriVertex shade_vertex(const TriMatrix &objectToClip, TriVertex v); - static void draw_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture, int solidcolor, void(*drawfunc)(const ScreenTriangleDrawerArgs *)); - static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenTriangleDrawerArgs *args, void(*drawfunc)(const ScreenTriangleDrawerArgs *)); + static void draw_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)); + static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert); + + static void queue_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); + + friend class DrawTrianglesCommand; }; struct ScreenTriangleDrawerArgs @@ -98,12 +103,38 @@ struct ScreenTriangleDrawerArgs class ScreenTriangleDrawer { public: - static void draw(const ScreenTriangleDrawerArgs *args); - static void fill(const ScreenTriangleDrawerArgs *args); + static void draw(const ScreenTriangleDrawerArgs *args, DrawerThread *thread); + static void fill(const ScreenTriangleDrawerArgs *args, DrawerThread *thread); + + static void draw32(const ScreenTriangleDrawerArgs *args, DrawerThread *thread); + static void fill32(const ScreenTriangleDrawerArgs *args, DrawerThread *thread); private: static float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); static float grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); }; +class DrawTrianglesCommand : public DrawerCommand +{ +public: + DrawTrianglesCommand(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *clipdata, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); + + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + +private: + TriMatrix objectToClip; + const TriVertex *vinput; + int vcount; + TriangleDrawMode mode; + bool ccw; + int clipleft; + int clipright; + const short *clipdata; + const uint8_t *texturePixels; + int textureWidth; + int textureHeight; + int solidcolor; +}; + #endif From 237f54f4be930874cce88a7c8097b75b8b9e7dab Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 7 Nov 2016 09:25:12 +0100 Subject: [PATCH 274/912] Toying with triangle drawer and GL BSP nodes --- src/CMakeLists.txt | 1 + src/r_main.cpp | 17 +- src/r_poly.cpp | 852 +++++++++++++++++++++++++++++++++++++++++++++ src/r_poly.h | 140 ++++++++ src/r_triangle.cpp | 22 ++ src/r_triangle.h | 2 + 6 files changed, 1030 insertions(+), 4 deletions(-) create mode 100644 src/r_poly.cpp create mode 100644 src/r_poly.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 723b954127..91553e275f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1051,6 +1051,7 @@ set( NOT_COMPILED_SOURCE_FILES set( FASTMATH_PCH_SOURCES r_swrenderer.cpp r_swrenderer2.cpp + r_poly.cpp r_3dfloors.cpp r_bsp.cpp r_draw.cpp diff --git a/src/r_main.cpp b/src/r_main.cpp index dfbcf2e712..5983e538b9 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -60,8 +60,17 @@ #include "r_data/colormaps.h" #include "p_maputl.h" #include "r_swrenderer2.h" +#include "r_poly.h" +#include "p_setup.h" +#include "version.h" -CVAR(Bool, r_newrenderer, 0, 0); +CUSTOM_CVAR(Bool, r_newrenderer, 0, CVAR_NOINITCALL) +{ + if (self == 1 && !hasglnodes) + { + Printf("No GL BSP detected. You must enable automap texturing and then restart the map\n"); + } +} // MACROS ------------------------------------------------------------------ @@ -907,13 +916,13 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function PO_LinkToSubsectors(); InSubsector = NULL; - if (!r_newrenderer || !r_swtruecolor) + if (!r_newrenderer) { R_RenderBSPNode(nodes + numnodes - 1); // The head node is the last node output. } else { - RenderBsp bsp; + RenderPolyBsp bsp; bsp.Render(); } R_3D_ResetClip(); // reset clips (floor/ceiling) @@ -925,7 +934,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) if (viewactive) { PlaneCycles.Clock(); - if (!r_newrenderer || !r_swtruecolor) + if (!r_newrenderer) { R_DrawPlanes(); R_DrawPortals(); diff --git a/src/r_poly.cpp b/src/r_poly.cpp new file mode 100644 index 0000000000..a1b099f632 --- /dev/null +++ b/src/r_poly.cpp @@ -0,0 +1,852 @@ +/* +** Experimental Doom software renderer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_poly.h" +#include "r_draw.h" +#include "r_plane.h" // for yslope +#include "r_sky.h" // for skyflatnum +#include "r_things.h" // for pspritexscale + +EXTERN_CVAR(Bool, r_drawplayersprites) +EXTERN_CVAR(Bool, r_deathcamera) +EXTERN_CVAR(Bool, st_scale) + +namespace +{ + short cliptop[MAXWIDTH], clipbottom[MAXWIDTH]; +} + +///////////////////////////////////////////////////////////////////////////// + +void RenderPolyBsp::Render() +{ + PolyVertexBuffer::Clear(); + + for (int i = 0; i < viewwidth; i++) + { + cliptop[i] = 0; + clipbottom[i] = viewheight; + } + + // Perspective correct: + float ratio = WidescreenRatio; + float fovratio = (WidescreenRatio >= 1.3f) ? 1.333333f : ratio; + float fovy = (float)(2 * DAngle::ToDegrees(atan(tan(FieldOfView.Radians() / 2) / fovratio)).Degrees); + TriMatrix worldToView = + TriMatrix::scale(1.0f, (float)YaspectMul, 1.0f) * + TriMatrix::rotate((float)ViewPitch.Radians(), 1.0f, 0.0f, 0.0f) * + TriMatrix::rotate((float)(ViewAngle - 90).Radians(), 0.0f, -1.0f, 0.0f) * + TriMatrix::swapYZ() * + TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); + worldToClip = TriMatrix::perspective(fovy, ratio, 5.0, 65535.0f) * worldToView; + + // Y shearing like the Doom renderer: + //worldToClip = TriMatrix::viewToClip() * TriMatrix::worldToView(); + + // Cull front to back (ok, so we dont cull yet, but we should during this!): + if (numnodes == 0) + PvsSectors.push_back(subsectors); // RenderSubsector(subsectors); + else + RenderNode(nodes + numnodes - 1); // The head node is the last node output. + + // Render back to front (we don't have a zbuffer at the moment, sniff!): + for (auto it = PvsSectors.rbegin(); it != PvsSectors.rend(); ++it) + RenderSubsector(*it); + + RenderPlayerSprites(); + RenderScreenSprites(); // To do: should be called by FSoftwareRenderer::DrawRemainingPlayerSprites instead of here +} + +void RenderPolyBsp::RenderScreenSprites() +{ + for (auto &sprite : ScreenSprites) + sprite.Render(); +} + +void RenderPolyBsp::RenderSubsector(subsector_t *sub) +{ + sector_t *frontsector = sub->sector; + frontsector->MoreFlags |= SECF_DRAWN; + + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + if (line->sidedef == NULL || !(line->sidedef->Flags & WALLF_POLYOBJ)) + AddLine(line, frontsector); + } + + TriVertex *floorVertices = PolyVertexBuffer::GetVertices(sub->numlines); + TriVertex *ceilVertices = PolyVertexBuffer::GetVertices(sub->numlines); + if (floorVertices == nullptr || ceilVertices == nullptr) + return; + + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + int j = sub->numlines - 1 - i; + ceilVertices[j].x = (float)line->v1->fPos().X; + ceilVertices[j].y = (float)line->v1->fPos().Y; + ceilVertices[j].z = (float)frontsector->ceilingplane.ZatPoint(line->v1); + ceilVertices[j].w = 1.0f; + ceilVertices[j].varying[0] = ceilVertices[j].x / 64.0f; + ceilVertices[j].varying[1] = ceilVertices[j].y / 64.0f; + ceilVertices[j].varying[2] = 1.0f; + + floorVertices[i].x = (float)line->v1->fPos().X; + floorVertices[i].y = (float)line->v1->fPos().Y; + floorVertices[i].z = (float)frontsector->floorplane.ZatPoint(line->v1); + floorVertices[i].w = 1.0f; + floorVertices[i].varying[0] = floorVertices[i].x / 64.0f; + floorVertices[i].varying[1] = floorVertices[i].y / 64.0f; + floorVertices[i].varying[2] = 1.0f; + } + + FTexture *floortex = TexMan(frontsector->GetTexture(sector_t::floor)); + if (floortex->UseType != FTexture::TEX_Null) + TriangleDrawer::draw(worldToClip, floorVertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, cliptop, clipbottom, floortex); + + FTexture *ceiltex = TexMan(frontsector->GetTexture(sector_t::ceiling)); + if (ceiltex->UseType != FTexture::TEX_Null) + TriangleDrawer::draw(worldToClip, ceilVertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, cliptop, clipbottom, ceiltex); + + /*for (AActor *thing = sub->sector->thinglist; thing != nullptr; thing = thing->snext) + { + if ((thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) + AddWallSprite(thing); + else + AddSprite(thing); + }*/ +} + +void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector) +{ + // Reject lines not facing viewer + DVector2 pt1 = line->v1->fPos() - ViewPos; + DVector2 pt2 = line->v2->fPos() - ViewPos; + if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) + return; + + double frontceilz1 = frontsector->ceilingplane.ZatPoint(line->v1); + double frontfloorz1 = frontsector->floorplane.ZatPoint(line->v1); + double frontceilz2 = frontsector->ceilingplane.ZatPoint(line->v2); + double frontfloorz2 = frontsector->floorplane.ZatPoint(line->v2); + + //VisiblePlaneKey ceilingPlaneKey(frontsector->GetTexture(sector_t::ceiling), frontsector->ColorMap, frontsector->lightlevel, frontsector->ceilingplane, frontsector->planes[sector_t::ceiling].xform); + //VisiblePlaneKey floorPlaneKey(frontsector->GetTexture(sector_t::floor), frontsector->ColorMap, frontsector->lightlevel, frontsector->floorplane, frontsector->planes[sector_t::floor].xform); + + RenderPolyWall wall; + wall.Line = line; + wall.Colormap = frontsector->ColorMap; + wall.Masked = false; + + if (line->backsector == nullptr) + { + wall.SetCoords(line->v1->fPos(), line->v2->fPos(), frontceilz1, frontfloorz1, frontceilz2, frontfloorz2); + wall.TopZ = frontceilz1; + wall.BottomZ = frontfloorz1; + wall.UnpeggedCeil = frontceilz1; + wall.Texpart = side_t::mid; + wall.Render(worldToClip); + } + else + { + sector_t *backsector = (line->backsector != line->frontsector) ? line->backsector : line->frontsector; + + double backceilz1 = backsector->ceilingplane.ZatPoint(line->v1); + double backfloorz1 = backsector->floorplane.ZatPoint(line->v1); + double backceilz2 = backsector->ceilingplane.ZatPoint(line->v2); + double backfloorz2 = backsector->floorplane.ZatPoint(line->v2); + + double topceilz1 = frontceilz1; + double topceilz2 = frontceilz2; + double topfloorz1 = MIN(backceilz1, frontceilz1); + double topfloorz2 = MIN(backceilz2, frontceilz2); + double bottomceilz1 = MAX(frontfloorz1, backfloorz1); + double bottomceilz2 = MAX(frontfloorz2, backfloorz2); + double bottomfloorz1 = frontfloorz1; + double bottomfloorz2 = frontfloorz2; + double middleceilz1 = topfloorz1; + double middleceilz2 = topfloorz2; + double middlefloorz1 = MIN(bottomceilz1, middleceilz1); + double middlefloorz2 = MIN(bottomceilz2, middleceilz2); + + bool bothSkyCeiling = false;// frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; + bool bothSkyFloor = false;// frontsector->GetTexture(sector_t::floor) == skyflatnum && backsector->GetTexture(sector_t::floor) == skyflatnum; + + if ((topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && !bothSkyCeiling && line->sidedef) + { + wall.SetCoords(line->v1->fPos(), line->v2->fPos(), topceilz1, topfloorz1, topceilz2, topfloorz2); + wall.TopZ = topceilz1; + wall.BottomZ = topfloorz1; + wall.UnpeggedCeil = topceilz1; + wall.Texpart = side_t::top; + wall.Render(worldToClip); + } + + if ((bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && !bothSkyFloor && line->sidedef) + { + wall.SetCoords(line->v1->fPos(), line->v2->fPos(), bottomceilz1, bottomfloorz2, bottomceilz2, bottomfloorz2); + wall.TopZ = bottomceilz1; + wall.BottomZ = bottomfloorz2; + wall.UnpeggedCeil = topceilz1; + wall.Texpart = side_t::bottom; + wall.Render(worldToClip); + } + + if (line->sidedef) + { + FTexture *midtex = TexMan(line->sidedef->GetTexture(side_t::mid), true); + if (midtex && midtex->UseType != FTexture::TEX_Null) + { + wall.SetCoords(line->v1->fPos(), line->v2->fPos(), middleceilz1, middlefloorz1, middleceilz2, middlefloorz2); + wall.TopZ = middleceilz1; + wall.BottomZ = middlefloorz1; + wall.UnpeggedCeil = topceilz1; + wall.Texpart = side_t::mid; + wall.Masked = true; + wall.Render(worldToClip); + } + } + } +} + +void RenderPolyBsp::RenderNode(void *node) +{ + while (!((size_t)node & 1)) // Keep going until found a subsector + { + node_t *bsp = (node_t *)node; + + // Decide which side the view point is on. + int side = PointOnSide(ViewPos, bsp); + + // Recursively divide front space (toward the viewer). + RenderNode(bsp->children[side]); + + // Possibly divide back space (away from the viewer). + side ^= 1; + if (!CheckBBox(bsp->bbox[side])) + return; + + node = bsp->children[side]; + } + PvsSectors.push_back((subsector_t *)((BYTE *)node - 1)); + //RenderSubsector((subsector_t *)((BYTE *)node - 1)); +} + +void RenderPolyBsp::RenderPlayerSprites() +{ + if (!r_drawplayersprites || + !camera || + !camera->player || + (players[consoleplayer].cheats & CF_CHASECAM) || + (r_deathcamera && camera->health <= 0)) + return; + + float bobx, boby; + P_BobWeapon(camera->player, &bobx, &boby, r_TicFracF); + + // Interpolate the main weapon layer once so as to be able to add it to other layers. + double wx, wy; + DPSprite *weapon = camera->player->FindPSprite(PSP_WEAPON); + if (weapon) + { + if (weapon->firstTic) + { + wx = weapon->x; + wy = weapon->y; + } + else + { + wx = weapon->oldx + (weapon->x - weapon->oldx) * r_TicFracF; + wy = weapon->oldy + (weapon->y - weapon->oldy) * r_TicFracF; + } + } + else + { + wx = 0; + wy = 0; + } + + for (DPSprite *sprite = camera->player->psprites; sprite != nullptr; sprite = sprite->GetNext()) + { + // [RH] Don't draw the targeter's crosshair if the player already has a crosshair set. + // It's possible this psprite's caller is now null but the layer itself hasn't been destroyed + // because it didn't tick yet (if we typed 'take all' while in the console for example). + // In this case let's simply not draw it to avoid crashing. + if ((sprite->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && sprite->GetCaller() != nullptr) + { + RenderPlayerSprite(sprite, camera, bobx, boby, wx, wy, r_TicFracF); + } + } +} + +void RenderPolyBsp::RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac) +{ + // decide which patch to use + if ((unsigned)sprite->GetSprite() >= (unsigned)sprites.Size()) + { + DPrintf(DMSG_ERROR, "RenderPlayerSprite: invalid sprite number %i\n", sprite->GetSprite()); + return; + } + + spritedef_t *def = &sprites[sprite->GetSprite()]; + if (sprite->GetFrame() >= def->numframes) + { + DPrintf(DMSG_ERROR, "RenderPlayerSprite: invalid sprite frame %i : %i\n", sprite->GetSprite(), sprite->GetFrame()); + return; + } + + spriteframe_t *frame = &SpriteFrames[def->spriteframes + sprite->GetFrame()]; + FTextureID picnum = frame->Texture[0]; + bool flip = (frame->Flip & 1) != 0; + + FTexture *tex = TexMan(picnum); + if (tex->UseType == FTexture::TEX_Null) + return; + + // Can't interpolate the first tic. + if (sprite->firstTic) + { + sprite->firstTic = false; + sprite->oldx = sprite->x; + sprite->oldy = sprite->y; + } + + double sx = sprite->oldx + (sprite->x - sprite->oldx) * ticfrac; + double sy = sprite->oldy + (sprite->y - sprite->oldy) * ticfrac; + + if (sprite->Flags & PSPF_ADDBOB) + { + sx += bobx; + sy += boby; + } + + if (sprite->Flags & PSPF_ADDWEAPON && sprite->GetID() != PSP_WEAPON) + { + sx += wx; + sy += wy; + } + + // calculate edges of the shape + double tx = sx - BaseXCenter; + + tx -= tex->GetScaledLeftOffset(); + int x1 = xs_RoundToInt(CenterX + tx * pspritexscale); + + // off the right side + if (x1 > viewwidth) + return; + + tx += tex->GetScaledWidth(); + int x2 = xs_RoundToInt(CenterX + tx * pspritexscale); + + // off the left side + if (x2 <= 0) + return; + + double texturemid = (BaseYCenter - sy) * tex->Scale.Y + tex->TopOffset; + + // Adjust PSprite for fullscreen views + if (camera->player && (RenderTarget != screen || viewheight == RenderTarget->GetHeight() || (RenderTarget->GetWidth() > (BaseXCenter * 2) && !st_scale))) + { + AWeapon *weapon = dyn_cast(sprite->GetCaller()); + if (weapon != nullptr && weapon->YAdjust != 0) + { + if (RenderTarget != screen || viewheight == RenderTarget->GetHeight()) + { + texturemid -= weapon->YAdjust; + } + else + { + texturemid -= StatusBar->GetDisplacement() * weapon->YAdjust; + } + } + } + + // Move the weapon down for 1280x1024. + if (sprite->GetID() < PSP_TARGETCENTER) + { + texturemid -= AspectPspriteOffset(WidescreenRatio); + } + + int clipped_x1 = MAX(x1, 0); + int clipped_x2 = MIN(x2, viewwidth); + double xscale = pspritexscale / tex->Scale.X; + double yscale = pspriteyscale / tex->Scale.Y; + uint32_t translation = 0; // [RH] Use default colors + + double xiscale, startfrac; + if (flip) + { + xiscale = -pspritexiscale * tex->Scale.X; + startfrac = 1; + } + else + { + xiscale = pspritexiscale * tex->Scale.X; + startfrac = 0; + } + + if (clipped_x1 > x1) + startfrac += xiscale * (clipped_x1 - x1); + + bool noaccel = false; + + FDynamicColormap *basecolormap = viewsector->ColorMap; + FDynamicColormap *colormap_to_use = basecolormap; + + visstyle_t visstyle; + visstyle.ColormapNum = 0; + visstyle.BaseColormap = basecolormap; + visstyle.Alpha = 0; + visstyle.RenderStyle = STYLE_Normal; + + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + int spriteshade = LIGHT2SHADE(owner->Sector->lightlevel + actualextralight); + double minz = double((2048 * 4) / double(1 << 20)); + visstyle.ColormapNum = GETPALOOKUP(r_SpriteVisibility / minz, spriteshade); + + if (sprite->GetID() < PSP_TARGETCENTER) + { + // Lots of complicated style and noaccel stuff + } + + // Check for hardware-assisted 2D. If it's available, and this sprite is not + // fuzzy, don't draw it until after the switch to 2D mode. + if (!noaccel && RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) + { + FRenderStyle style = visstyle.RenderStyle; + style.CheckFuzz(); + if (style.BlendOp != STYLEOP_Fuzz) + { + PolyScreenSprite screenSprite; + screenSprite.Pic = tex; + screenSprite.X1 = viewwindowx + x1; + screenSprite.Y1 = viewwindowy + viewheight / 2 - texturemid * yscale - 0.5; + screenSprite.Width = tex->GetWidth() * xscale; + screenSprite.Height = tex->GetHeight() * yscale; + screenSprite.Translation = TranslationToTable(translation); + screenSprite.Flip = xiscale < 0; + screenSprite.visstyle = visstyle; + screenSprite.Colormap = colormap_to_use; + ScreenSprites.push_back(screenSprite); + return; + } + } + + //R_DrawVisSprite(vis); +} + +int RenderPolyBsp::PointOnSide(const DVector2 &pos, const node_t *node) +{ + return DMulScale32(FLOAT2FIXED(pos.Y) - node->y, node->dx, node->x - FLOAT2FIXED(pos.X), node->dy) > 0; +} + +bool RenderPolyBsp::CheckBBox(float *bspcoord) +{ + static const int checkcoord[12][4] = + { + { 3,0,2,1 }, + { 3,0,2,0 }, + { 3,1,2,0 }, + { 0 }, + { 2,0,2,1 }, + { 0,0,0,0 }, + { 3,1,3,0 }, + { 0 }, + { 2,0,3,1 }, + { 2,1,3,1 }, + { 2,1,3,0 } + }; + + int boxx; + int boxy; + int boxpos; + + double x1, y1, x2, y2; + double rx1, ry1, rx2, ry2; + int sx1, sx2; + + // Find the corners of the box + // that define the edges from current viewpoint. + if (ViewPos.X <= bspcoord[BOXLEFT]) + boxx = 0; + else if (ViewPos.X < bspcoord[BOXRIGHT]) + boxx = 1; + else + boxx = 2; + + if (ViewPos.Y >= bspcoord[BOXTOP]) + boxy = 0; + else if (ViewPos.Y > bspcoord[BOXBOTTOM]) + boxy = 1; + else + boxy = 2; + + boxpos = (boxy << 2) + boxx; + if (boxpos == 5) + return true; + + x1 = bspcoord[checkcoord[boxpos][0]] - ViewPos.X; + y1 = bspcoord[checkcoord[boxpos][1]] - ViewPos.Y; + x2 = bspcoord[checkcoord[boxpos][2]] - ViewPos.X; + y2 = bspcoord[checkcoord[boxpos][3]] - ViewPos.Y; + + // check clip list for an open space + + // Sitting on a line? + if (y1 * (x1 - x2) + x1 * (y2 - y1) >= -EQUAL_EPSILON) + return true; + + rx1 = x1 * ViewSin - y1 * ViewCos; + rx2 = x2 * ViewSin - y2 * ViewCos; + ry1 = x1 * ViewTanCos + y1 * ViewTanSin; + ry2 = x2 * ViewTanCos + y2 * ViewTanSin; + + /*if (MirrorFlags & RF_XFLIP) + { + double t = -rx1; + rx1 = -rx2; + rx2 = t; + swapvalues(ry1, ry2); + }*/ + + if (rx1 >= -ry1) + { + if (rx1 > ry1) return false; // left edge is off the right side + if (ry1 == 0) return false; + sx1 = xs_RoundToInt(CenterX + rx1 * CenterX / ry1); + } + else + { + if (rx2 < -ry2) return false; // wall is off the left side + if (rx1 - rx2 - ry2 + ry1 == 0) return false; // wall does not intersect view volume + sx1 = 0; + } + + if (rx2 <= ry2) + { + if (rx2 < -ry2) return false; // right edge is off the left side + if (ry2 == 0) return false; + sx2 = xs_RoundToInt(CenterX + rx2 * CenterX / ry2); + } + else + { + if (rx1 > ry1) return false; // wall is off the right side + if (ry2 - ry1 - rx2 + rx1 == 0) return false; // wall does not intersect view volume + sx2 = viewwidth; + } + + // Find the first clippost that touches the source post + // (adjacent pixels are touching). + + // Does not cross a pixel. + if (sx2 <= sx1) + return false; + + return true; +} + +///////////////////////////////////////////////////////////////////////////// + +void RenderPolyWall::Render(const TriMatrix &worldToClip) +{ + FTexture *tex = GetTexture(); + if (!tex) + return; + + PolyWallTextureCoords texcoords(tex, Line, Texpart, TopZ, BottomZ, UnpeggedCeil); + + TriVertex *vertices = PolyVertexBuffer::GetVertices(4); + if (!vertices) + return; + + vertices[0].x = (float)v1.X; + vertices[0].y = (float)v1.Y; + vertices[0].z = (float)ceil1; + vertices[0].w = 1.0f; + vertices[0].varying[0] = (float)texcoords.u1; + vertices[0].varying[1] = (float)texcoords.v1; + vertices[0].varying[2] = 1.0f; + + vertices[1].x = (float)v2.X; + vertices[1].y = (float)v2.Y; + vertices[1].z = (float)ceil2; + vertices[1].w = 1.0f; + vertices[1].varying[0] = (float)texcoords.u2; + vertices[1].varying[1] = (float)texcoords.v1; + vertices[1].varying[2] = 1.0f; + + vertices[2].x = (float)v2.X; + vertices[2].y = (float)v2.Y; + vertices[2].z = (float)floor2; + vertices[2].w = 1.0f; + vertices[2].varying[0] = (float)texcoords.u2; + vertices[2].varying[1] = (float)texcoords.v2; + vertices[2].varying[2] = 1.0f; + + vertices[3].x = (float)v1.X; + vertices[3].y = (float)v1.Y; + vertices[3].z = (float)floor1; + vertices[3].w = 1.0f; + vertices[3].varying[0] = (float)texcoords.u1; + vertices[3].varying[1] = (float)texcoords.v2; + vertices[3].varying[2] = 1.0f; + + TriangleDrawer::draw(worldToClip, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, cliptop, clipbottom, tex); +} + +FTexture *RenderPolyWall::GetTexture() +{ + FTexture *tex = TexMan(Line->sidedef->GetTexture(Texpart), true); + if (tex == nullptr || tex->UseType == FTexture::TEX_Null) + return nullptr; + else + return tex; +} + +int RenderPolyWall::GetShade() +{ + if (fixedlightlev >= 0 || fixedcolormap) + { + return 0; + } + else + { + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + int shade = LIGHT2SHADE(Line->sidedef->GetLightLevel(foggy, Line->frontsector->lightlevel) + actualextralight); + return shade; + } +} + +/* +float RenderPolyWall::GetLight(short x) +{ + if (fixedlightlev >= 0 || fixedcolormap) + return 0.0f; + else + return (float)(r_WallVisibility / Coords.Z(x)); +} +*/ + +///////////////////////////////////////////////////////////////////////////// + +PolyWallTextureCoords::PolyWallTextureCoords(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) +{ + CalcU(tex, line, texpart); + CalcV(tex, line, texpart, topz, bottomz, unpeggedceil); +} + +void PolyWallTextureCoords::CalcU(FTexture *tex, const seg_t *line, side_t::ETexpart texpart) +{ + double lineLength = line->sidedef->TexelLength; + double lineStart = 0.0; + + bool entireSegment = ((line->linedef->v1 == line->v1) && (line->linedef->v2 == line->v2) || (line->linedef->v2 == line->v1) && (line->linedef->v1 == line->v2)); + if (!entireSegment) + { + lineLength = (line->v2->fPos() - line->v1->fPos()).Length(); + lineStart = (line->v1->fPos() - line->linedef->v1->fPos()).Length(); + } + + int texWidth = tex->GetWidth(); + double uscale = line->sidedef->GetTextureXScale(texpart) * tex->Scale.X; + u1 = lineStart + line->sidedef->GetTextureXOffset(texpart); + u2 = u1 + lineLength; + u1 *= uscale; + u2 *= uscale; + u1 /= texWidth; + u2 /= texWidth; +} + +void PolyWallTextureCoords::CalcV(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) +{ + double vscale = line->sidedef->GetTextureYScale(texpart) * tex->Scale.Y; + + double yoffset = line->sidedef->GetTextureYOffset(texpart); + if (tex->bWorldPanning) + yoffset *= vscale; + + switch (texpart) + { + default: + case side_t::mid: + CalcVMidPart(tex, line, topz, bottomz, vscale, yoffset); + break; + case side_t::top: + CalcVTopPart(tex, line, topz, bottomz, vscale, yoffset); + break; + case side_t::bottom: + CalcVBottomPart(tex, line, topz, bottomz, unpeggedceil, vscale, yoffset); + break; + } + + int texHeight = tex->GetHeight(); + v1 /= texHeight; + v2 /= texHeight; +} + +void PolyWallTextureCoords::CalcVTopPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset) +{ + bool pegged = (line->linedef->flags & ML_DONTPEGTOP) == 0; + if (pegged) // bottom to top + { + int texHeight = tex->GetHeight(); + v1 = -yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + v1 = texHeight - v1; + v2 = texHeight - v2; + std::swap(v1, v2); + } + else // top to bottom + { + v1 = yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + } +} + +void PolyWallTextureCoords::CalcVMidPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset) +{ + bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; + if (pegged) // top to bottom + { + v1 = yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + } + else // bottom to top + { + int texHeight = tex->GetHeight(); + v1 = yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + v1 = texHeight - v1; + v2 = texHeight - v2; + std::swap(v1, v2); + } +} + +void PolyWallTextureCoords::CalcVBottomPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset) +{ + bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; + if (pegged) // top to bottom + { + v1 = yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + } + else + { + v1 = yoffset + (unpeggedceil - topz); + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + } +} + +///////////////////////////////////////////////////////////////////////////// + +void PolyScreenSprite::Render() +{ + FSpecialColormap *special = nullptr; + FColormapStyle colormapstyle; + PalEntry overlay = 0; + bool usecolormapstyle = false; + if (visstyle.BaseColormap >= &SpecialColormaps[0] && + visstyle.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) + { + special = static_cast(visstyle.BaseColormap); + } + else if (Colormap->Color == PalEntry(255, 255, 255) && + Colormap->Desaturate == 0) + { + overlay = Colormap->Fade; + overlay.a = BYTE(visstyle.ColormapNum * 255 / NUMCOLORMAPS); + } + else + { + usecolormapstyle = true; + colormapstyle.Color = Colormap->Color; + colormapstyle.Fade = Colormap->Fade; + colormapstyle.Desaturate = Colormap->Desaturate; + colormapstyle.FadeLevel = visstyle.ColormapNum / float(NUMCOLORMAPS); + } + + screen->DrawTexture(Pic, + X1, + Y1, + DTA_DestWidthF, Width, + DTA_DestHeightF, Height, + DTA_Translation, Translation, + DTA_FlipX, Flip, + DTA_TopOffset, 0, + DTA_LeftOffset, 0, + DTA_ClipLeft, viewwindowx, + DTA_ClipTop, viewwindowy, + DTA_ClipRight, viewwindowx + viewwidth, + DTA_ClipBottom, viewwindowy + viewheight, + DTA_AlphaF, visstyle.Alpha, + DTA_RenderStyle, visstyle.RenderStyle, + DTA_FillColor, FillColor, + DTA_SpecialColormap, special, + DTA_ColorOverlay, overlay.d, + DTA_ColormapStyle, usecolormapstyle ? &colormapstyle : NULL, + TAG_DONE); +} + +///////////////////////////////////////////////////////////////////////////// + +namespace +{ + int NextBufferVertex = 0; +} + +TriVertex *PolyVertexBuffer::GetVertices(int count) +{ + enum { VertexBufferSize = 16 * 1024 }; + static TriVertex Vertex[VertexBufferSize]; + + if (NextBufferVertex + count > VertexBufferSize) + return nullptr; + TriVertex *v = Vertex + NextBufferVertex; + NextBufferVertex += count; + return v; +} + +void PolyVertexBuffer::Clear() +{ + NextBufferVertex = 0; +} diff --git a/src/r_poly.h b/src/r_poly.h new file mode 100644 index 0000000000..48bd2985a8 --- /dev/null +++ b/src/r_poly.h @@ -0,0 +1,140 @@ +/* +** Experimental Doom software renderer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include +#include +#include +#include +#include "doomdata.h" +#include "r_utility.h" +#include "r_main.h" +#include "r_triangle.h" + +// DScreen accelerated sprite to be rendered +class PolyScreenSprite +{ +public: + void Render(); + + FTexture *Pic = nullptr; + double X1 = 0.0; + double Y1 = 0.0; + double Width = 0.0; + double Height = 0.0; + FRemapTable *Translation = nullptr; + bool Flip = false; + visstyle_t visstyle; + uint32_t FillColor = 0; + FDynamicColormap *Colormap = nullptr; +}; + +// Renders a GL BSP tree in a scene +class RenderPolyBsp +{ +public: + void Render(); + void RenderScreenSprites(); + +private: + void RenderNode(void *node); + void RenderSubsector(subsector_t *sub); + void AddLine(seg_t *line, sector_t *frontsector); + + void RenderPlayerSprites(); + void RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac); + + int PointOnSide(const DVector2 &pos, const node_t *node); + + // Checks BSP node/subtree bounding box. + // Returns true if some part of the bbox might be visible. + bool CheckBBox(float *bspcoord); + + std::vector PvsSectors; + TriMatrix worldToClip; + + std::vector ScreenSprites; + + const int BaseXCenter = 160; + const int BaseYCenter = 100; +}; + +class RenderPolyWall +{ +public: + void Render(const TriMatrix &worldToClip); + + void SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2) + { + this->v1 = v1; + this->v2 = v2; + this->ceil1 = ceil1; + this->floor1 = floor1; + this->ceil2 = ceil2; + this->floor2 = floor2; + } + + DVector2 v1; + DVector2 v2; + double ceil1 = 0.0; + double floor1 = 0.0; + double ceil2 = 0.0; + double floor2 = 0.0; + + const seg_t *Line = nullptr; + side_t::ETexpart Texpart = side_t::mid; + double TopZ = 0.0; + double BottomZ = 0.0; + double UnpeggedCeil = 0.0; + FSWColormap *Colormap = nullptr; + bool Masked = false; + +private: + FTexture *GetTexture(); + int GetShade(); + //float GetLight(short x); +}; + +// Texture coordinates for a wall +class PolyWallTextureCoords +{ +public: + PolyWallTextureCoords(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); + + double u1, u2; + double v1, v2; + +private: + void CalcU(FTexture *tex, const seg_t *line, side_t::ETexpart texpart); + void CalcV(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); + void CalcVTopPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset); + void CalcVMidPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset); + void CalcVBottomPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset); +}; + +class PolyVertexBuffer +{ +public: + static TriVertex *GetVertices(int count); + static void Clear(); +}; diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp index 59a4464076..653a46f31e 100644 --- a/src/r_triangle.cpp +++ b/src/r_triangle.cpp @@ -1141,6 +1141,28 @@ TriMatrix TriMatrix::rotate(float angle, float x, float y, float z) return m; } +TriMatrix TriMatrix::swapYZ() +{ + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = 1.0f; + m.matrix[1 + 2 * 4] = 1.0f; + m.matrix[2 + 1 * 4] = -1.0f; + m.matrix[3 + 3 * 4] = 1.0f; + return m; +} + +TriMatrix TriMatrix::perspective(float fovy, float aspect, float z_near, float z_far) +{ + float f = (float)(1.0 / tan(fovy * M_PI / 360.0)); + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = f / aspect; + m.matrix[1 + 1 * 4] = f; + m.matrix[2 + 2 * 4] = (z_far + z_near) / (z_near - z_far); + m.matrix[2 + 3 * 4] = (2.0f * z_far * z_near) / (z_near - z_far); + m.matrix[3 + 2 * 4] = -1.0f; + return m; +} + TriMatrix TriMatrix::frustum(float left, float right, float bottom, float top, float near, float far) { float a = (right + left) / (right - left); diff --git a/src/r_triangle.h b/src/r_triangle.h index 8335f1f936..98fa25db05 100644 --- a/src/r_triangle.h +++ b/src/r_triangle.h @@ -47,6 +47,8 @@ struct TriMatrix static TriMatrix translate(float x, float y, float z); static TriMatrix scale(float x, float y, float z); static TriMatrix rotate(float angle, float x, float y, float z); + static TriMatrix swapYZ(); + static TriMatrix perspective(float fovy, float aspect, float near, float far); static TriMatrix frustum(float left, float right, float bottom, float top, float near, float far); static TriMatrix worldToView(); // Software renderer world to view space transform From b9d6fc68566d63e8faaffa3f03987d7bbd31a40d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 8 Nov 2016 02:48:14 +0100 Subject: [PATCH 275/912] Add some light and transparency --- src/r_poly.cpp | 65 ++++++++++++++++++++++++++++------------------ src/r_poly.h | 4 +-- src/r_triangle.cpp | 22 +++++++++++++--- 3 files changed, 60 insertions(+), 31 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index a1b099f632..7bb54712f1 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -62,7 +62,7 @@ void RenderPolyBsp::Render() TriMatrix::rotate((float)(ViewAngle - 90).Radians(), 0.0f, -1.0f, 0.0f) * TriMatrix::swapYZ() * TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); - worldToClip = TriMatrix::perspective(fovy, ratio, 5.0, 65535.0f) * worldToView; + worldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; // Y shearing like the Doom renderer: //worldToClip = TriMatrix::viewToClip() * TriMatrix::worldToView(); @@ -87,6 +87,36 @@ void RenderPolyBsp::RenderScreenSprites() sprite.Render(); } +TriVertex RenderPolyBsp::PlaneVertex(vertex_t *v1, sector_t *sector, const secplane_t &plane) +{ + TriVertex v; + v.x = (float)v1->fPos().X; + v.y = (float)v1->fPos().Y; + v.z = (float)plane.ZatPoint(v1); + v.w = 1.0f; + v.varying[0] = v.x / 64.0f; + v.varying[1] = v.y / 64.0f; + + if (fixedlightlev >= 0) + v.varying[2] = fixedlightlev / 255.0f; + else if (fixedcolormap) + v.varying[2] = 1.0f; + else + v.varying[2] = sector->lightlevel / 255.0f; + + /* + double vis = r_FloorVisibility / (plane.Zat0() - ViewPos.Z); + if (fixedlightlev >= 0) + R_SetDSColorMapLight(sector->ColorMap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + else if (fixedcolormap) + R_SetDSColorMapLight(fixedcolormap, 0, 0); + else + R_SetDSColorMapLight(sector->ColorMap, (float)(vis * fabs(CenterY - y)), LIGHT2SHADE(sector->lightlevel)); + */ + + return v; +} + void RenderPolyBsp::RenderSubsector(subsector_t *sub) { sector_t *frontsector = sub->sector; @@ -107,22 +137,8 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) for (uint32_t i = 0; i < sub->numlines; i++) { seg_t *line = &sub->firstline[i]; - int j = sub->numlines - 1 - i; - ceilVertices[j].x = (float)line->v1->fPos().X; - ceilVertices[j].y = (float)line->v1->fPos().Y; - ceilVertices[j].z = (float)frontsector->ceilingplane.ZatPoint(line->v1); - ceilVertices[j].w = 1.0f; - ceilVertices[j].varying[0] = ceilVertices[j].x / 64.0f; - ceilVertices[j].varying[1] = ceilVertices[j].y / 64.0f; - ceilVertices[j].varying[2] = 1.0f; - - floorVertices[i].x = (float)line->v1->fPos().X; - floorVertices[i].y = (float)line->v1->fPos().Y; - floorVertices[i].z = (float)frontsector->floorplane.ZatPoint(line->v1); - floorVertices[i].w = 1.0f; - floorVertices[i].varying[0] = floorVertices[i].x / 64.0f; - floorVertices[i].varying[1] = floorVertices[i].y / 64.0f; - floorVertices[i].varying[2] = 1.0f; + ceilVertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, frontsector, frontsector->ceilingplane); + floorVertices[i] = PlaneVertex(line->v1, frontsector, frontsector->floorplane); } FTexture *floortex = TexMan(frontsector->GetTexture(sector_t::floor)); @@ -592,7 +608,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) vertices[0].w = 1.0f; vertices[0].varying[0] = (float)texcoords.u1; vertices[0].varying[1] = (float)texcoords.v1; - vertices[0].varying[2] = 1.0f; + vertices[0].varying[2] = GetLightLevel() / 255.0f; vertices[1].x = (float)v2.X; vertices[1].y = (float)v2.Y; @@ -600,7 +616,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) vertices[1].w = 1.0f; vertices[1].varying[0] = (float)texcoords.u2; vertices[1].varying[1] = (float)texcoords.v1; - vertices[1].varying[2] = 1.0f; + vertices[1].varying[2] = GetLightLevel() / 255.0f; vertices[2].x = (float)v2.X; vertices[2].y = (float)v2.Y; @@ -608,7 +624,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) vertices[2].w = 1.0f; vertices[2].varying[0] = (float)texcoords.u2; vertices[2].varying[1] = (float)texcoords.v2; - vertices[2].varying[2] = 1.0f; + vertices[2].varying[2] = GetLightLevel() / 255.0f; vertices[3].x = (float)v1.X; vertices[3].y = (float)v1.Y; @@ -616,7 +632,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) vertices[3].w = 1.0f; vertices[3].varying[0] = (float)texcoords.u1; vertices[3].varying[1] = (float)texcoords.v2; - vertices[3].varying[2] = 1.0f; + vertices[3].varying[2] = GetLightLevel() / 255.0f; TriangleDrawer::draw(worldToClip, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, cliptop, clipbottom, tex); } @@ -630,18 +646,17 @@ FTexture *RenderPolyWall::GetTexture() return tex; } -int RenderPolyWall::GetShade() +int RenderPolyWall::GetLightLevel() { if (fixedlightlev >= 0 || fixedcolormap) { - return 0; + return 255; } else { bool foggy = false; int actualextralight = foggy ? 0 : extralight << 4; - int shade = LIGHT2SHADE(Line->sidedef->GetLightLevel(foggy, Line->frontsector->lightlevel) + actualextralight); - return shade; + return Line->sidedef->GetLightLevel(foggy, Line->frontsector->lightlevel) + actualextralight; } } diff --git a/src/r_poly.h b/src/r_poly.h index 48bd2985a8..2ec9a82744 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -60,6 +60,7 @@ private: void RenderNode(void *node); void RenderSubsector(subsector_t *sub); void AddLine(seg_t *line, sector_t *frontsector); + TriVertex PlaneVertex(vertex_t *v1, sector_t *sector, const secplane_t &plane); void RenderPlayerSprites(); void RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac); @@ -111,8 +112,7 @@ public: private: FTexture *GetTexture(); - int GetShade(); - //float GetLight(short x); + int GetLightLevel(); }; // Texture coordinates for a wall diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp index 653a46f31e..26e3933239 100644 --- a/src/r_triangle.cpp +++ b/src/r_triangle.cpp @@ -821,13 +821,20 @@ void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, DrawerTh { uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t light = (uint32_t)clamp(varying[2] * 256.0f + 0.5f, 0.0f, 256.0f); uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; uint32_t uvoffset = upos * textureHeight + vpos; - buffer[ix] = texturePixels[uvoffset]; + uint32_t fg = texturePixels[uvoffset]; + uint32_t fg_red = (RPART(fg) * light) >> 8; + uint32_t fg_green = (GPART(fg) * light) >> 8; + uint32_t fg_blue = (BPART(fg) * light) >> 8; + uint32_t fg_alpha = APART(fg); + + if (fg_alpha > 127) + buffer[ix] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; for (int i = 0; i < TriVertex::NumVarying; i++) varying[i] += varyingStep[i]; @@ -866,13 +873,20 @@ void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, DrawerTh { uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t light = (uint32_t)clamp(varying[2] * 256.0f + 0.5f, 0.0f, 256.0f); uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; uint32_t uvoffset = upos * textureHeight + vpos; - buffer[ix] = texturePixels[uvoffset]; + uint32_t fg = texturePixels[uvoffset]; + uint32_t fg_red = (RPART(fg) * light) >> 8; + uint32_t fg_green = (GPART(fg) * light) >> 8; + uint32_t fg_blue = (BPART(fg) * light) >> 8; + uint32_t fg_alpha = APART(fg); + + if (fg_alpha > 127) + buffer[ix] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; } for (int i = 0; i < TriVertex::NumVarying; i++) From d0f0500f0ddd196c7ffec2d1ce2024c73faa5de9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 8 Nov 2016 05:17:29 +0100 Subject: [PATCH 276/912] Draw things --- src/r_poly.cpp | 251 ++++++++++++++++++++++++++++++++++++++++++++++++- src/r_poly.h | 6 ++ 2 files changed, 255 insertions(+), 2 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 7bb54712f1..e9edacae16 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -149,13 +149,13 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) if (ceiltex->UseType != FTexture::TEX_Null) TriangleDrawer::draw(worldToClip, ceilVertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, cliptop, clipbottom, ceiltex); - /*for (AActor *thing = sub->sector->thinglist; thing != nullptr; thing = thing->snext) + for (AActor *thing = sub->sector->thinglist; thing != nullptr; thing = thing->snext) { if ((thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) AddWallSprite(thing); else AddSprite(thing); - }*/ + } } void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector) @@ -250,6 +250,253 @@ void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector) } } +bool RenderPolyBsp::IsThingCulled(AActor *thing) +{ + FIntCVar *cvar = thing->GetClass()->distancecheck; + if (cvar != nullptr && *cvar >= 0) + { + double dist = (thing->Pos() - ViewPos).LengthSquared(); + double check = (double)**cvar; + if (dist >= check * check) + return true; + } + + // Don't waste time projecting sprites that are definitely not visible. + if (thing == nullptr || + (thing->renderflags & RF_INVISIBLE) || + !thing->RenderStyle.IsVisible(thing->Alpha) || + !thing->IsVisibleToPlayer()) + { + return true; + } + + return false; +} + +void RenderPolyBsp::AddSprite(AActor *thing) +{ + if (IsThingCulled(thing)) + return; + + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); + pos.Z += thing->GetBobOffset(r_TicFracF); + + bool flipTextureX = false; + FTexture *tex = GetSpriteTexture(thing, flipTextureX); + if (tex == nullptr) + return; + DVector2 spriteScale = thing->Scale; + double thingxscalemul = spriteScale.X / tex->Scale.X; + double thingyscalemul = spriteScale.Y / tex->Scale.Y; + + if (flipTextureX) + pos.X -= (tex->GetWidth() - tex->LeftOffset) * thingxscalemul; + else + pos.X -= tex->LeftOffset * thingxscalemul; + + //pos.Z -= tex->TopOffset * thingyscalemul; + pos.Z -= (tex->GetHeight() - tex->TopOffset) * thingyscalemul + thing->Floorclip; + + double spriteHalfWidth = thingxscalemul * tex->GetWidth() * 0.5; + double spriteHeight = thingyscalemul * tex->GetHeight(); + + pos.X += spriteHalfWidth; + + DVector2 points[2] = + { + { pos.X - ViewSin * spriteHalfWidth, pos.Y + ViewCos * spriteHalfWidth }, + { pos.X + ViewSin * spriteHalfWidth, pos.Y - ViewCos * spriteHalfWidth } + }; + + //double depth = 1.0; + //visstyle_t visstyle = GetSpriteVisStyle(thing, depth); + // Rumor has it that AlterWeaponSprite needs to be called with visstyle passed in somewhere around here.. + //R_SetColorMapLight(visstyle.BaseColormap, 0, visstyle.ColormapNum << FRACBITS); + + TriVertex *vertices = PolyVertexBuffer::GetVertices(4); + if (!vertices) + return; + + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + + std::pair offsets[4] = + { + { 0.0f, 1.0f }, + { 1.0f, 1.0f }, + { 1.0f, 0.0f }, + { 0.0f, 0.0f }, + }; + + for (int i = 0; i < 4; i++) + { + auto &p = (i == 0 || i == 3) ? points[0] : points[1]; + + vertices[i].x = (float)p.X; + vertices[i].y = (float)p.Y; + vertices[i].z = (float)(pos.Z + spriteHeight * offsets[i].second); + vertices[i].w = 1.0f; + vertices[i].varying[0] = (float)(offsets[i].first * tex->Scale.X); + vertices[i].varying[1] = (float)((1.0f - offsets[i].second) * tex->Scale.Y); + if (flipTextureX) + vertices[i].varying[0] = 1.0f - vertices[i].varying[0]; + vertices[i].varying[2] = (thing->Sector->lightlevel + actualextralight) / 255.0f; + } + + TriangleDrawer::draw(worldToClip, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, cliptop, clipbottom, tex); + TriangleDrawer::draw(worldToClip, vertices, 4, TriangleDrawMode::Fan, false, 0, viewwidth, cliptop, clipbottom, tex); +} + +void RenderPolyBsp::AddWallSprite(AActor *thing) +{ + if (IsThingCulled(thing)) + return; +} + +visstyle_t RenderPolyBsp::GetSpriteVisStyle(AActor *thing, double z) +{ + visstyle_t visstyle; + + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + int spriteshade = LIGHT2SHADE(thing->Sector->lightlevel + actualextralight); + + visstyle.RenderStyle = thing->RenderStyle; + visstyle.Alpha = float(thing->Alpha); + visstyle.ColormapNum = 0; + + // The software renderer cannot invert the source without inverting the overlay + // too. That means if the source is inverted, we need to do the reverse of what + // the invert overlay flag says to do. + bool invertcolormap = (visstyle.RenderStyle.Flags & STYLEF_InvertOverlay) != 0; + + if (visstyle.RenderStyle.Flags & STYLEF_InvertSource) + { + invertcolormap = !invertcolormap; + } + + FDynamicColormap *mybasecolormap = thing->Sector->ColorMap; + + // Sprites that are added to the scene must fade to black. + if (visstyle.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); + } + + if (visstyle.RenderStyle.Flags & STYLEF_FadeToBlack) + { + if (invertcolormap) + { // Fade to white + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); + invertcolormap = false; + } + else + { // Fade to black + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); + } + } + + // get light level + if (fixedcolormap != NULL) + { // fixed map + visstyle.BaseColormap = fixedcolormap; + visstyle.ColormapNum = 0; + } + else + { + if (invertcolormap) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); + } + if (fixedlightlev >= 0) + { + visstyle.BaseColormap = mybasecolormap; + visstyle.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + } + else if (!foggy && ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) + { // full bright + visstyle.BaseColormap = mybasecolormap; + visstyle.ColormapNum = 0; + } + else + { // diminished light + double minz = double((2048 * 4) / double(1 << 20)); + visstyle.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(z, minz), spriteshade); + visstyle.BaseColormap = mybasecolormap; + } + } + + return visstyle; +} + +FTexture *RenderPolyBsp::GetSpriteTexture(AActor *thing, /*out*/ bool &flipX) +{ + flipX = false; + if (thing->picnum.isValid()) + { + FTexture *tex = TexMan(thing->picnum); + if (tex->UseType == FTexture::TEX_Null) + { + return nullptr; + } + + if (tex->Rotations != 0xFFFF) + { + // choose a different rotation based on player view + spriteframe_t *sprframe = &SpriteFrames[tex->Rotations]; + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); + pos.Z += thing->GetBobOffset(r_TicFracF); + DAngle ang = (pos - ViewPos).Angle(); + angle_t rot; + if (sprframe->Texture[0] == sprframe->Texture[1]) + { + rot = (ang - thing->Angles.Yaw + 45.0 / 2 * 9).BAMs() >> 28; + } + else + { + rot = (ang - thing->Angles.Yaw + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + } + flipX = (sprframe->Flip & (1 << rot)) != 0; + tex = TexMan[sprframe->Texture[rot]]; // Do not animate the rotation + } + return tex; + } + else + { + // decide which texture to use for the sprite + int spritenum = thing->sprite; + if (spritenum >= (signed)sprites.Size() || spritenum < 0) + return nullptr; + + spritedef_t *sprdef = &sprites[spritenum]; + if (thing->frame >= sprdef->numframes) + { + // If there are no frames at all for this sprite, don't draw it. + return nullptr; + } + else + { + //picnum = SpriteFrames[sprdef->spriteframes + thing->frame].Texture[0]; + // choose a different rotation based on player view + spriteframe_t *sprframe = &SpriteFrames[sprdef->spriteframes + thing->frame]; + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); + pos.Z += thing->GetBobOffset(r_TicFracF); + DAngle ang = (pos - ViewPos).Angle(); + angle_t rot; + if (sprframe->Texture[0] == sprframe->Texture[1]) + { + rot = (ang - thing->Angles.Yaw + 45.0 / 2 * 9).BAMs() >> 28; + } + else + { + rot = (ang - thing->Angles.Yaw + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + } + flipX = (sprframe->Flip & (1 << rot)) != 0; + return TexMan[sprframe->Texture[rot]]; // Do not animate the rotation + } + } +} + void RenderPolyBsp::RenderNode(void *node) { while (!((size_t)node & 1)) // Keep going until found a subsector diff --git a/src/r_poly.h b/src/r_poly.h index 2ec9a82744..1baaeddfba 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -62,6 +62,12 @@ private: void AddLine(seg_t *line, sector_t *frontsector); TriVertex PlaneVertex(vertex_t *v1, sector_t *sector, const secplane_t &plane); + void AddSprite(AActor *thing); + void AddWallSprite(AActor *thing); + bool IsThingCulled(AActor *thing); + visstyle_t GetSpriteVisStyle(AActor *thing, double z); + FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); + void RenderPlayerSprites(); void RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac); From 2e2d6da00fd12f38649d7ea691d44c12777b60bf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 8 Nov 2016 05:48:45 +0100 Subject: [PATCH 277/912] Only draw thing if its in the subsector --- src/r_poly.cpp | 22 ++++++++++++++++++---- src/r_poly.h | 4 ++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index e9edacae16..1844986929 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -152,9 +152,9 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) for (AActor *thing = sub->sector->thinglist; thing != nullptr; thing = thing->snext) { if ((thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) - AddWallSprite(thing); + AddWallSprite(thing, sub); else - AddSprite(thing); + AddSprite(thing, sub); } } @@ -273,7 +273,7 @@ bool RenderPolyBsp::IsThingCulled(AActor *thing) return false; } -void RenderPolyBsp::AddSprite(AActor *thing) +void RenderPolyBsp::AddSprite(AActor *thing, subsector_t *sub) { if (IsThingCulled(thing)) return; @@ -308,6 +308,20 @@ void RenderPolyBsp::AddSprite(AActor *thing) { pos.X + ViewSin * spriteHalfWidth, pos.Y - ViewCos * spriteHalfWidth } }; + // Is this sprite inside? (To do: clip the points) + for (int i = 0; i < 2; i++) + { + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + double nx = line->v1->fY() - line->v2->fY(); + double ny = line->v2->fX() - line->v1->fX(); + double d = -(line->v1->fX() * nx + line->v1->fY() * ny); + if (pos.X * nx + pos.Y * ny + d > 0.0) + return; + } + } + //double depth = 1.0; //visstyle_t visstyle = GetSpriteVisStyle(thing, depth); // Rumor has it that AlterWeaponSprite needs to be called with visstyle passed in somewhere around here.. @@ -347,7 +361,7 @@ void RenderPolyBsp::AddSprite(AActor *thing) TriangleDrawer::draw(worldToClip, vertices, 4, TriangleDrawMode::Fan, false, 0, viewwidth, cliptop, clipbottom, tex); } -void RenderPolyBsp::AddWallSprite(AActor *thing) +void RenderPolyBsp::AddWallSprite(AActor *thing, subsector_t *sub) { if (IsThingCulled(thing)) return; diff --git a/src/r_poly.h b/src/r_poly.h index 1baaeddfba..cd1cd12152 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -62,8 +62,8 @@ private: void AddLine(seg_t *line, sector_t *frontsector); TriVertex PlaneVertex(vertex_t *v1, sector_t *sector, const secplane_t &plane); - void AddSprite(AActor *thing); - void AddWallSprite(AActor *thing); + void AddSprite(AActor *thing, subsector_t *sub); + void AddWallSprite(AActor *thing, subsector_t *sub); bool IsThingCulled(AActor *thing); visstyle_t GetSpriteVisStyle(AActor *thing, double z); FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); From dffba5267d8064e6bafab9e754a8e8f8ab709dbe Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Tue, 8 Nov 2016 00:44:15 -0500 Subject: [PATCH 278/912] - Full freelook is now enabled if r_newrenderer is set to true. --- src/r_swrenderer.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 372f23accc..ad9752db97 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -250,9 +250,12 @@ void FSoftwareRenderer::DrawRemainingPlayerSprites() #define MAX_DN_ANGLE 56 // Max looking down angle #define MAX_UP_ANGLE 32 // Max looking up angle +EXTERN_CVAR(Bool, r_newrenderer) // [SP] dpJudas's new renderer +EXTERN_CVAR(Float, maxviewpitch) // [SP] CVAR from GZDoom + int FSoftwareRenderer::GetMaxViewPitch(bool down) { - return down ? MAX_DN_ANGLE : MAX_UP_ANGLE; + return (r_newrenderer) ? int(maxviewpitch) : (down ? MAX_DN_ANGLE : MAX_UP_ANGLE); } //========================================================================== From cb4b74e0c34a8eb2b0eba43a8fec8ec14452f368 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 8 Nov 2016 14:28:58 +0100 Subject: [PATCH 279/912] Double render speed of triangle drawer in the poly version by removing sprite clip and assigning whole blocks to threads --- src/CMakeLists.txt | 1 + src/r_poly.cpp | 20 +- src/r_poly.h | 2 +- src/r_poly_triangle.cpp | 1054 +++++++++++++++++++++++++++++++++++++++ src/r_poly_triangle.h | 104 ++++ 5 files changed, 1164 insertions(+), 17 deletions(-) create mode 100644 src/r_poly_triangle.cpp create mode 100644 src/r_poly_triangle.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 91553e275f..e6ee761b07 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1052,6 +1052,7 @@ set( FASTMATH_PCH_SOURCES r_swrenderer.cpp r_swrenderer2.cpp r_poly.cpp + r_poly_triangle.cpp r_3dfloors.cpp r_bsp.cpp r_draw.cpp diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 1844986929..6542afcfb8 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -35,23 +35,12 @@ EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_deathcamera) EXTERN_CVAR(Bool, st_scale) -namespace -{ - short cliptop[MAXWIDTH], clipbottom[MAXWIDTH]; -} - ///////////////////////////////////////////////////////////////////////////// void RenderPolyBsp::Render() { PolyVertexBuffer::Clear(); - for (int i = 0; i < viewwidth; i++) - { - cliptop[i] = 0; - clipbottom[i] = viewheight; - } - // Perspective correct: float ratio = WidescreenRatio; float fovratio = (WidescreenRatio >= 1.3f) ? 1.333333f : ratio; @@ -143,11 +132,11 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) FTexture *floortex = TexMan(frontsector->GetTexture(sector_t::floor)); if (floortex->UseType != FTexture::TEX_Null) - TriangleDrawer::draw(worldToClip, floorVertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, cliptop, clipbottom, floortex); + PolyTriangleDrawer::draw(worldToClip, floorVertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, floortex); FTexture *ceiltex = TexMan(frontsector->GetTexture(sector_t::ceiling)); if (ceiltex->UseType != FTexture::TEX_Null) - TriangleDrawer::draw(worldToClip, ceilVertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, cliptop, clipbottom, ceiltex); + PolyTriangleDrawer::draw(worldToClip, ceilVertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, ceiltex); for (AActor *thing = sub->sector->thinglist; thing != nullptr; thing = thing->snext) { @@ -357,8 +346,7 @@ void RenderPolyBsp::AddSprite(AActor *thing, subsector_t *sub) vertices[i].varying[2] = (thing->Sector->lightlevel + actualextralight) / 255.0f; } - TriangleDrawer::draw(worldToClip, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, cliptop, clipbottom, tex); - TriangleDrawer::draw(worldToClip, vertices, 4, TriangleDrawMode::Fan, false, 0, viewwidth, cliptop, clipbottom, tex); + PolyTriangleDrawer::draw(worldToClip, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex); } void RenderPolyBsp::AddWallSprite(AActor *thing, subsector_t *sub) @@ -895,7 +883,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) vertices[3].varying[1] = (float)texcoords.v2; vertices[3].varying[2] = GetLightLevel() / 255.0f; - TriangleDrawer::draw(worldToClip, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, cliptop, clipbottom, tex); + PolyTriangleDrawer::draw(worldToClip, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex); } FTexture *RenderPolyWall::GetTexture() diff --git a/src/r_poly.h b/src/r_poly.h index cd1cd12152..7a143cf675 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -29,7 +29,7 @@ #include "doomdata.h" #include "r_utility.h" #include "r_main.h" -#include "r_triangle.h" +#include "r_poly_triangle.h" // DScreen accelerated sprite to be rendered class PolyScreenSprite diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp new file mode 100644 index 0000000000..73c33d0243 --- /dev/null +++ b/src/r_poly_triangle.cpp @@ -0,0 +1,1054 @@ +/* +** Triangle drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_poly_triangle.h" + +void PolyTriangleDrawer::draw(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture) +{ + if (r_swtruecolor) + queue_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, (const uint8_t*)texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight(), 0); + else + draw_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture->GetPixels(), texture->GetWidth(), texture->GetHeight(), 0, nullptr, &ScreenPolyTriangleDrawer::draw); +} + +void PolyTriangleDrawer::fill(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int solidcolor) +{ + if (r_swtruecolor) + queue_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor); + else + draw_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor, nullptr, &ScreenPolyTriangleDrawer::fill); +} + +void PolyTriangleDrawer::queue_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) +{ + if (clipright < clipleft || clipleft < 0 || clipright > MAXWIDTH || clipbottom < cliptop || cliptop < 0 || clipbottom > MAXHEIGHT) + return; + + DrawerCommandQueue::QueueCommand(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texturePixels, textureWidth, textureHeight, solidcolor); +} + +void PolyTriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)) +{ + if (vcount < 3) + return; + + ScreenPolyTriangleDrawerArgs args; + args.dest = dc_destorg; + args.pitch = dc_pitch; + args.clipleft = clipleft; + args.clipright = clipright; + args.cliptop = cliptop; + args.clipbottom = clipbottom; + args.texturePixels = texturePixels; + args.textureWidth = textureWidth; + args.textureHeight = textureHeight; + args.solidcolor = solidcolor; + + TriVertex vert[3]; + if (mode == TriangleDrawMode::Normal) + { + for (int i = 0; i < vcount / 3; i++) + { + for (int j = 0; j < 3; j++) + vert[j] = shade_vertex(objectToClip, *(vinput++)); + draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); + } + } + else if (mode == TriangleDrawMode::Fan) + { + vert[0] = shade_vertex(objectToClip, *(vinput++)); + vert[1] = shade_vertex(objectToClip, *(vinput++)); + for (int i = 2; i < vcount; i++) + { + vert[2] = shade_vertex(objectToClip, *(vinput++)); + draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); + vert[1] = vert[2]; + } + } + else // TriangleDrawMode::Strip + { + vert[0] = shade_vertex(objectToClip, *(vinput++)); + vert[1] = shade_vertex(objectToClip, *(vinput++)); + for (int i = 2; i < vcount; i++) + { + vert[2] = shade_vertex(objectToClip, *(vinput++)); + draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); + vert[0] = vert[1]; + vert[1] = vert[2]; + ccw = !ccw; + } + } +} + +TriVertex PolyTriangleDrawer::shade_vertex(const TriMatrix &objectToClip, TriVertex v) +{ + // Apply transform to get clip coordinates: + return objectToClip * v; +} + +void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)) +{ + // Cull, clip and generate additional vertices as needed + TriVertex clippedvert[6]; + int numclipvert = 0; + clipedge(vert[0], vert[1], clippedvert, numclipvert); + clipedge(vert[1], vert[2], clippedvert, numclipvert); + clipedge(vert[2], vert[0], clippedvert, numclipvert); + + // Map to 2D viewport: + for (int j = 0; j < numclipvert; j++) + { + auto &v = clippedvert[j]; + + // Calculate normalized device coordinates: + v.w = 1.0f / v.w; + v.x *= v.w; + v.y *= v.w; + v.z *= v.w; + + // Apply viewport scale to get screen coordinates: + v.x = viewwidth * (1.0f + v.x) * 0.5f; + v.y = viewheight * (1.0f - v.y) * 0.5f; + } + + // Draw screen triangles + if (ccw) + { + for (int i = numclipvert; i > 1; i--) + { + args->v1 = &clippedvert[numclipvert - 1]; + args->v2 = &clippedvert[i - 1]; + args->v3 = &clippedvert[i - 2]; + drawfunc(args, thread); + } + } + else + { + for (int i = 2; i < numclipvert; i++) + { + args->v1 = &clippedvert[0]; + args->v2 = &clippedvert[i - 1]; + args->v3 = &clippedvert[i]; + drawfunc(args, thread); + } + } +} + +bool PolyTriangleDrawer::cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2) +{ + float d1 = clipdistance1 * (1.0f - t1) + clipdistance2 * t1; + float d2 = clipdistance1 * (1.0f - t2) + clipdistance2 * t2; + if (d1 < 0.0f && d2 < 0.0f) + return true; + + if (d1 < 0.0f) + t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1); + + if (d2 < 0.0f) + t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2); + + return false; +} + +void PolyTriangleDrawer::clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert) +{ + // Clip and cull so that the following is true for all vertices: + // -v.w <= v.x <= v.w + // -v.w <= v.y <= v.w + // -v.w <= v.z <= v.w + + float t1 = 0.0f, t2 = 1.0f; + bool culled = + cullhalfspace(v1.x + v1.w, v2.x + v2.w, t1, t2) || + cullhalfspace(v1.w - v1.x, v2.w - v2.x, t1, t2) || + cullhalfspace(v1.y + v1.w, v2.y + v2.w, t1, t2) || + cullhalfspace(v1.w - v1.y, v2.w - v2.y, t1, t2) || + cullhalfspace(v1.z + v1.w, v2.z + v2.w, t1, t2) || + cullhalfspace(v1.w - v1.z, v2.w - v2.z, t1, t2); + if (culled) + return; + + if (t1 == 0.0f) + { + clippedvert[numclipvert++] = v1; + } + else + { + auto &v = clippedvert[numclipvert++]; + v.x = v1.x * (1.0f - t1) + v2.x * t1; + v.y = v1.y * (1.0f - t1) + v2.y * t1; + v.z = v1.z * (1.0f - t1) + v2.z * t1; + v.w = v1.w * (1.0f - t1) + v2.w * t1; + for (int i = 0; i < TriVertex::NumVarying; i++) + v.varying[i] = v1.varying[i] * (1.0f - t1) + v2.varying[i] * t1; + } + + if (t2 != 1.0f) + { + auto &v = clippedvert[numclipvert++]; + v.x = v1.x * (1.0f - t2) + v2.x * t2; + v.y = v1.y * (1.0f - t2) + v2.y * t2; + v.z = v1.z * (1.0f - t2) + v2.z * t2; + v.w = v1.w * (1.0f - t2) + v2.w * t2; + for (int i = 0; i < TriVertex::NumVarying; i++) + v.varying[i] = v1.varying[i] * (1.0f - t2) + v2.varying[i] * t2; + } +} + +///////////////////////////////////////////////////////////////////////////// + +void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) +{ + uint8_t *dest = args->dest; + int pitch = args->pitch; + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipleft = args->clipleft; + int clipright = args->clipright; + int cliptop = args->cliptop; + int clipbottom = args->clipbottom; + const uint8_t *texturePixels = args->texturePixels; + int textureWidth = args->textureWidth; + int textureHeight = args->textureHeight; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + dest += miny * pitch; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // Gradients + float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); + float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); + } + + // Loop through blocks + for (int y = miny; y < maxy; y += q, dest += q * pitch) + { + // Is this row of blocks done by this thread? + if (thread->skipped_by_thread(y / q)) continue; + + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Check if block needs clipping + bool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); + + // Calculate varying variables for affine block + float offx0 = (x - minx) + 0.5f; + float offy0 = (y - miny) + 0.5f; + float offx1 = offx0 + q; + float offy1 = offy0 + q; + float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); + float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); + float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); + float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); + float varyingTL[TriVertex::NumVarying]; + float varyingTR[TriVertex::NumVarying]; + float varyingBL[TriVertex::NumVarying]; + float varyingBR[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; + varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; + varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); + varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); + } + + uint8_t *buffer = dest; + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF && !clipneeded) + { + for (int iy = 0; iy < q; iy++) + { + float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varying[i] = varyingTL[i] + varyingBL[i] * iy; + varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + } + + for (int ix = x; ix < x + q; ix++) + { + uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); + uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + + buffer[ix] = texturePixels[uvoffset]; + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + } + + buffer += pitch; + } + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varying[i] = varyingTL[i] + varyingBL[i] * iy; + varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + } + + for (int ix = x; ix < x + q; ix++) + { + bool visible = ix >= clipleft && ix <= clipright && (cliptop <= y + iy) && (clipbottom > y + iy); + + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) + { + uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); + uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); + + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + + buffer[ix] = texturePixels[uvoffset]; + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + buffer += pitch; + } + } + } + } +} + +void ScreenPolyTriangleDrawer::fill(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) +{ + uint8_t *dest = args->dest; + int pitch = args->pitch; + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipleft = args->clipleft; + int clipright = args->clipright; + int cliptop = args->cliptop; + int clipbottom = args->clipbottom; + int solidcolor = args->solidcolor; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + dest += miny * pitch; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // Loop through blocks + for (int y = miny; y < maxy; y += q, dest += q * pitch) + { + // Is this row of blocks done by this thread? + if (thread->skipped_by_thread(y / q)) continue; + + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Check if block needs clipping + bool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); + + uint8_t *buffer = dest; + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF && !clipneeded) + { + for (int iy = 0; iy < q; iy++) + { + for (int ix = x; ix < x + q; ix++) + { + buffer[ix] = solidcolor; + } + + buffer += pitch; + } + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = x; ix < x + q; ix++) + { + bool visible = ix >= clipleft && ix <= clipright && (cliptop <= y + iy) && (clipbottom > y + iy); + + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) + { + buffer[ix] = solidcolor; + } + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + buffer += pitch; + } + } + } + } +} + +void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) +{ + uint32_t *dest = (uint32_t *)args->dest; + int pitch = args->pitch; + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipleft = args->clipleft; + int clipright = args->clipright; + int cliptop = args->cliptop; + int clipbottom = args->clipbottom; + const uint32_t *texturePixels = (const uint32_t *)args->texturePixels; + int textureWidth = args->textureWidth; + int textureHeight = args->textureHeight; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + dest += miny * pitch; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // Gradients + float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); + float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); + } + + // Loop through blocks + for (int y = miny; y < maxy; y += q, dest += q * pitch) + { + // Is this row of blocks done by this thread? + if (thread->skipped_by_thread(y / q)) continue; + + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Check if block needs clipping + bool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); + + // Calculate varying variables for affine block + float offx0 = (x - minx) + 0.5f; + float offy0 = (y - miny) + 0.5f; + float offx1 = offx0 + q; + float offy1 = offy0 + q; + float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); + float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); + float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); + float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); + float varyingTL[TriVertex::NumVarying]; + float varyingTR[TriVertex::NumVarying]; + float varyingBL[TriVertex::NumVarying]; + float varyingBR[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; + varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; + varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); + varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); + } + + uint32_t *buffer = dest; + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF && !clipneeded) + { + for (int iy = 0; iy < q; iy++) + { + float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varying[i] = varyingTL[i] + varyingBL[i] * iy; + varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + } + + for (int ix = x; ix < x + q; ix++) + { + uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); + uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + uint32_t light = (uint32_t)clamp(varying[2] * 256.0f + 0.5f, 0.0f, 256.0f); + + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + + uint32_t fg = texturePixels[uvoffset]; + uint32_t fg_red = (RPART(fg) * light) >> 8; + uint32_t fg_green = (GPART(fg) * light) >> 8; + uint32_t fg_blue = (BPART(fg) * light) >> 8; + uint32_t fg_alpha = APART(fg); + + if (fg_alpha > 127) + buffer[ix] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + } + + buffer += pitch; + } + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varying[i] = varyingTL[i] + varyingBL[i] * iy; + varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + } + + for (int ix = x; ix < x + q; ix++) + { + bool visible = ix >= clipleft && ix <= clipright && (cliptop <= y + iy) && (clipbottom > y + iy); + + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) + { + uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); + uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + uint32_t light = (uint32_t)clamp(varying[2] * 256.0f + 0.5f, 0.0f, 256.0f); + + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + + uint32_t fg = texturePixels[uvoffset]; + uint32_t fg_red = (RPART(fg) * light) >> 8; + uint32_t fg_green = (GPART(fg) * light) >> 8; + uint32_t fg_blue = (BPART(fg) * light) >> 8; + uint32_t fg_alpha = APART(fg); + + if (fg_alpha > 127) + buffer[ix] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + buffer += pitch; + } + } + } + } +} + +void ScreenPolyTriangleDrawer::fill32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) +{ + uint32_t *dest = (uint32_t *)args->dest; + int pitch = args->pitch; + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipleft = args->clipleft; + int clipright = args->clipright; + int cliptop = args->cliptop; + int clipbottom = args->clipbottom; + int solidcolor = args->solidcolor; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + dest += miny * pitch; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // Loop through blocks + for (int y = miny; y < maxy; y += q, dest += q * pitch) + { + // Is this row of blocks done by this thread? + if (thread->skipped_by_thread(y / q)) continue; + + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Check if block needs clipping + bool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); + + uint32_t *buffer = dest; + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF && !clipneeded) + { + for (int iy = 0; iy < q; iy++) + { + for (int ix = x; ix < x + q; ix++) + { + buffer[ix] = solidcolor; + } + + buffer += pitch; + } + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = x; ix < x + q; ix++) + { + bool visible = ix >= clipleft && ix <= clipright && (cliptop <= y + iy) && (clipbottom > y + iy); + + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) + { + buffer[ix] = solidcolor; + } + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + buffer += pitch; + } + } + } + } +} + +float ScreenPolyTriangleDrawer::gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); + float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); + return top / bottom; +} + +float ScreenPolyTriangleDrawer::grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); + float bottom = -((x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2)); + return top / bottom; +} + +///////////////////////////////////////////////////////////////////////////// + +DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) + : objectToClip(objectToClip), vinput(vinput), vcount(vcount), mode(mode), ccw(ccw), clipleft(clipleft), clipright(clipright), cliptop(cliptop), clipbottom(clipbottom), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight), solidcolor(solidcolor) +{ +} + +void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) +{ + PolyTriangleDrawer::draw_arrays( + objectToClip, vinput, vcount, mode, ccw, + clipleft, clipright, cliptop, clipbottom, + texturePixels, textureWidth, textureHeight, solidcolor, + thread, texturePixels ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::fill32); +} + +FString DrawPolyTrianglesCommand::DebugInfo() +{ + return "DrawPolyTriangles"; +} diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h new file mode 100644 index 0000000000..de9cd543ca --- /dev/null +++ b/src/r_poly_triangle.h @@ -0,0 +1,104 @@ +/* +** Triangle drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + + +#ifndef __R_POLY_TRIANGLE__ +#define __R_POLY_TRIANGLE__ + +#include "r_triangle.h" + +struct ScreenPolyTriangleDrawerArgs; + +class PolyTriangleDrawer +{ +public: + static void draw(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture); + static void fill(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int solidcolor); + +private: + static TriVertex shade_vertex(const TriMatrix &objectToClip, TriVertex v); + static void draw_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); + static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); + static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); + static void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert); + + static void queue_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); + + friend class DrawPolyTrianglesCommand; +}; + +struct ScreenPolyTriangleDrawerArgs +{ + uint8_t *dest; + int pitch; + TriVertex *v1; + TriVertex *v2; + TriVertex *v3; + int clipleft; + int clipright; + int cliptop; + int clipbottom; + const uint8_t *texturePixels; + int textureWidth; + int textureHeight; + int solidcolor; +}; + +class ScreenPolyTriangleDrawer +{ +public: + static void draw(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); + static void fill(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); + + static void draw32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); + static void fill32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); + +private: + static float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); + static float grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); +}; + +class DrawPolyTrianglesCommand : public DrawerCommand +{ +public: + DrawPolyTrianglesCommand(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); + + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + +private: + TriMatrix objectToClip; + const TriVertex *vinput; + int vcount; + TriangleDrawMode mode; + bool ccw; + int clipleft; + int clipright; + int cliptop; + int clipbottom; + const uint8_t *texturePixels; + int textureWidth; + int textureHeight; + int solidcolor; +}; + +#endif From 6620d99fbb62fb741980323ba0a451f95b2a467f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 8 Nov 2016 15:27:45 +0100 Subject: [PATCH 280/912] Fix frame buffer bounds and access errors --- src/r_poly.cpp | 1 + src/r_poly_triangle.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 6542afcfb8..bd9003fab3 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -67,6 +67,7 @@ void RenderPolyBsp::Render() RenderSubsector(*it); RenderPlayerSprites(); + DrawerCommandQueue::WaitForWorkers(); RenderScreenSprites(); // To do: should be called by FSoftwareRenderer::DrawRemainingPlayerSprites instead of here } diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 73c33d0243..d446485510 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -425,7 +425,7 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Dr for (int ix = x; ix < x + q; ix++) { - bool visible = ix >= clipleft && ix <= clipright && (cliptop <= y + iy) && (clipbottom > y + iy); + bool visible = ix >= clipleft && ix < clipright && (cliptop <= y + iy) && (clipbottom > y + iy); if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) { @@ -594,7 +594,7 @@ void ScreenPolyTriangleDrawer::fill(const ScreenPolyTriangleDrawerArgs *args, Dr for (int ix = x; ix < x + q; ix++) { - bool visible = ix >= clipleft && ix <= clipright && (cliptop <= y + iy) && (clipbottom > y + iy); + bool visible = ix >= clipleft && ix < clipright && (cliptop <= y + iy) && (clipbottom > y + iy); if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) { @@ -819,7 +819,7 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, for (int ix = x; ix < x + q; ix++) { - bool visible = ix >= clipleft && ix <= clipright && (cliptop <= y + iy) && (clipbottom > y + iy); + bool visible = ix >= clipleft && ix < clipright && (cliptop <= y + iy) && (clipbottom > y + iy); if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) { @@ -995,7 +995,7 @@ void ScreenPolyTriangleDrawer::fill32(const ScreenPolyTriangleDrawerArgs *args, for (int ix = x; ix < x + q; ix++) { - bool visible = ix >= clipleft && ix <= clipright && (cliptop <= y + iy) && (clipbottom > y + iy); + bool visible = ix >= clipleft && ix < clipright && (cliptop <= y + iy) && (clipbottom > y + iy); if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) { From 28d1cdc1ccfe9bd64816ecab0def9db07774abf7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 8 Nov 2016 16:16:24 +0100 Subject: [PATCH 281/912] Add TriUniforms and move light to it --- src/r_plane.cpp | 97 +++++++++++++++++++++++------------------ src/r_poly.cpp | 80 +++++++++++++++++++++------------ src/r_poly_triangle.cpp | 48 ++++++++++---------- src/r_poly_triangle.h | 15 ++++--- src/r_triangle.cpp | 48 ++++++++++---------- src/r_triangle.h | 43 ++++++++++++++---- 6 files changed, 194 insertions(+), 137 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 66738346fa..17728e80c8 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1025,58 +1025,58 @@ static void R_DrawCubeSky(visplane_t *pl) static TriVertex cube[6 * 6] = { // Top - { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - { 1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.1f, 1.0f }, + { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, + { 1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.1f }, - { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.1f, 1.0f }, - { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.1f, 1.0f }, - { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.1f }, + { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.1f }, + { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, // Bottom - { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 0.9f, 1.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f, 1.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 0.9f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f, 1.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.9f, 1.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 0.9f, 1.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f }, + { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.9f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 0.9f }, // Front - { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, - { 1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - { -1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f }, + { 1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, + { -1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, - { -1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, + { -1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f }, // Back - { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, + { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, + { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, - { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f }, + { -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 2.0f }, + { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, // Right - { 1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - { 1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, + { 1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, + { 1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, - { 1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, + { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f }, + { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 2.0f }, + { 1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, // Left - { -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f }, - { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f, 1.0f }, - { -1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f }, + { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, + { -1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, - { -1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f, 1.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f, 1.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f } + { -1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, + { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f }, + { -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f } }; TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z) * TriMatrix::scale(1000.0f, 1000.0f, 1000.0f); @@ -1090,9 +1090,14 @@ static void R_DrawCubeSky(visplane_t *pl) solid_top = RGB32k.RGB[(RPART(solid_top) >> 3)][(GPART(solid_top) >> 3)][(BPART(solid_top) >> 3)]; solid_bottom = RGB32k.RGB[(RPART(solid_bottom) >> 3)][(GPART(solid_bottom) >> 3)][(BPART(solid_bottom) >> 3)]; - TriangleDrawer::fill(objectToClip, cube, 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, solid_top); - TriangleDrawer::fill(objectToClip, cube + 6, 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, solid_bottom); - TriangleDrawer::draw(objectToClip, cube + 2 * 6, 4 * 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, frontskytex); + TriUniforms uniforms; + uniforms.objectToClip = objectToClip; + uniforms.light = 256; + uniforms.flags = 0; + + TriangleDrawer::fill(uniforms, cube, 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, solid_top); + TriangleDrawer::fill(uniforms, cube + 6, 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, solid_bottom); + TriangleDrawer::draw(uniforms, cube + 2 * 6, 4 * 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, frontskytex); } namespace @@ -1127,7 +1132,6 @@ namespace v.w = 1.0f; v.varying[0] = uu; v.varying[1] = vv; - v.varying[2] = 1.0f; return v; } @@ -1140,7 +1144,6 @@ namespace v.w = 1.0f; v.varying[0] = uu; v.varying[1] = vv; - v.varying[2] = 1.0f; return v; } @@ -1220,7 +1223,11 @@ namespace short *dwal = (short *)pl->bottom; TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); TriMatrix objectToClip = TriMatrix::viewToClip() * TriMatrix::worldToView() * objectToWorld; - TriangleDrawer::draw(objectToClip, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, x1, x2 - 1, uwal, dwal, frontskytex); + TriUniforms uniforms; + uniforms.objectToClip = objectToClip; + uniforms.light = 256; + uniforms.flags = 0; + TriangleDrawer::draw(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, x1, x2 - 1, uwal, dwal, frontskytex); } void SkyDome::RenderCapColorRow(int row, bool bottomCap, visplane_t *pl) @@ -1234,7 +1241,11 @@ namespace short *dwal = (short *)pl->bottom; TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); TriMatrix objectToClip = TriMatrix::viewToClip() * TriMatrix::worldToView() * objectToWorld; - TriangleDrawer::fill(objectToClip, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, x1, x2 - 1, uwal, dwal, solid); + TriUniforms uniforms; + uniforms.objectToClip = objectToClip; + uniforms.light = 256; + uniforms.flags = 0; + TriangleDrawer::fill(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, x1, x2 - 1, uwal, dwal, solid); } void SkyDome::Render(visplane_t *pl) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index bd9003fab3..03052ab605 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -87,13 +87,6 @@ TriVertex RenderPolyBsp::PlaneVertex(vertex_t *v1, sector_t *sector, const secpl v.varying[0] = v.x / 64.0f; v.varying[1] = v.y / 64.0f; - if (fixedlightlev >= 0) - v.varying[2] = fixedlightlev / 255.0f; - else if (fixedcolormap) - v.varying[2] = 1.0f; - else - v.varying[2] = sector->lightlevel / 255.0f; - /* double vis = r_FloorVisibility / (plane.Zat0() - ViewPos.Z); if (fixedlightlev >= 0) @@ -119,25 +112,53 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) AddLine(line, frontsector); } - TriVertex *floorVertices = PolyVertexBuffer::GetVertices(sub->numlines); - TriVertex *ceilVertices = PolyVertexBuffer::GetVertices(sub->numlines); - if (floorVertices == nullptr || ceilVertices == nullptr) - return; - - for (uint32_t i = 0; i < sub->numlines; i++) - { - seg_t *line = &sub->firstline[i]; - ceilVertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, frontsector, frontsector->ceilingplane); - floorVertices[i] = PlaneVertex(line->v1, frontsector, frontsector->floorplane); - } - FTexture *floortex = TexMan(frontsector->GetTexture(sector_t::floor)); if (floortex->UseType != FTexture::TEX_Null) - PolyTriangleDrawer::draw(worldToClip, floorVertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, floortex); + { + TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); + if (!vertices) + return; + + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + vertices[i] = PlaneVertex(line->v1, frontsector, frontsector->floorplane); + } + + TriUniforms uniforms; + uniforms.objectToClip = worldToClip; + uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); + if (fixedlightlev >= 0) + uniforms.light = (uint32_t)(fixedlightlev / 255.0f * 256.0f); + else if (fixedcolormap) + uniforms.light = 256; + uniforms.flags = 0; + PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, floortex); + } FTexture *ceiltex = TexMan(frontsector->GetTexture(sector_t::ceiling)); if (ceiltex->UseType != FTexture::TEX_Null) - PolyTriangleDrawer::draw(worldToClip, ceilVertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, ceiltex); + { + TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); + if (!vertices) + return; + + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + vertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, frontsector, frontsector->ceilingplane); + } + + TriUniforms uniforms; + uniforms.objectToClip = worldToClip; + uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); + if (fixedlightlev >= 0) + uniforms.light = (uint32_t)(fixedlightlev / 255.0f * 256.0f); + else if (fixedcolormap) + uniforms.light = 256; + uniforms.flags = 0; + PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, ceiltex); + } for (AActor *thing = sub->sector->thinglist; thing != nullptr; thing = thing->snext) { @@ -344,10 +365,13 @@ void RenderPolyBsp::AddSprite(AActor *thing, subsector_t *sub) vertices[i].varying[1] = (float)((1.0f - offsets[i].second) * tex->Scale.Y); if (flipTextureX) vertices[i].varying[0] = 1.0f - vertices[i].varying[0]; - vertices[i].varying[2] = (thing->Sector->lightlevel + actualextralight) / 255.0f; } - PolyTriangleDrawer::draw(worldToClip, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex); + TriUniforms uniforms; + uniforms.objectToClip = worldToClip; + uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); + uniforms.flags = 0; + PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex); } void RenderPolyBsp::AddWallSprite(AActor *thing, subsector_t *sub) @@ -858,7 +882,6 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) vertices[0].w = 1.0f; vertices[0].varying[0] = (float)texcoords.u1; vertices[0].varying[1] = (float)texcoords.v1; - vertices[0].varying[2] = GetLightLevel() / 255.0f; vertices[1].x = (float)v2.X; vertices[1].y = (float)v2.Y; @@ -866,7 +889,6 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) vertices[1].w = 1.0f; vertices[1].varying[0] = (float)texcoords.u2; vertices[1].varying[1] = (float)texcoords.v1; - vertices[1].varying[2] = GetLightLevel() / 255.0f; vertices[2].x = (float)v2.X; vertices[2].y = (float)v2.Y; @@ -874,7 +896,6 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) vertices[2].w = 1.0f; vertices[2].varying[0] = (float)texcoords.u2; vertices[2].varying[1] = (float)texcoords.v2; - vertices[2].varying[2] = GetLightLevel() / 255.0f; vertices[3].x = (float)v1.X; vertices[3].y = (float)v1.Y; @@ -882,9 +903,12 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) vertices[3].w = 1.0f; vertices[3].varying[0] = (float)texcoords.u1; vertices[3].varying[1] = (float)texcoords.v2; - vertices[3].varying[2] = GetLightLevel() / 255.0f; - PolyTriangleDrawer::draw(worldToClip, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex); + TriUniforms uniforms; + uniforms.objectToClip = worldToClip; + uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); + uniforms.flags = 0; + PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex); } FTexture *RenderPolyWall::GetTexture() diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index d446485510..d779196773 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -36,31 +36,31 @@ #include "r_data/colormaps.h" #include "r_poly_triangle.h" -void PolyTriangleDrawer::draw(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture) +void PolyTriangleDrawer::draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture) { if (r_swtruecolor) - queue_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, (const uint8_t*)texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight(), 0); + queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, (const uint8_t*)texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight(), 0); else - draw_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture->GetPixels(), texture->GetWidth(), texture->GetHeight(), 0, nullptr, &ScreenPolyTriangleDrawer::draw); + draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture->GetPixels(), texture->GetWidth(), texture->GetHeight(), 0, nullptr, &ScreenPolyTriangleDrawer::draw); } -void PolyTriangleDrawer::fill(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int solidcolor) +void PolyTriangleDrawer::fill(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int solidcolor) { if (r_swtruecolor) - queue_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor); + queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor); else - draw_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor, nullptr, &ScreenPolyTriangleDrawer::fill); + draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor, nullptr, &ScreenPolyTriangleDrawer::fill); } -void PolyTriangleDrawer::queue_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) +void PolyTriangleDrawer::queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) { if (clipright < clipleft || clipleft < 0 || clipright > MAXWIDTH || clipbottom < cliptop || cliptop < 0 || clipbottom > MAXHEIGHT) return; - DrawerCommandQueue::QueueCommand(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texturePixels, textureWidth, textureHeight, solidcolor); + DrawerCommandQueue::QueueCommand(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texturePixels, textureWidth, textureHeight, solidcolor); } -void PolyTriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)) +void PolyTriangleDrawer::draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)) { if (vcount < 3) return; @@ -76,6 +76,7 @@ void PolyTriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVer args.textureWidth = textureWidth; args.textureHeight = textureHeight; args.solidcolor = solidcolor; + args.uniforms = &uniforms; TriVertex vert[3]; if (mode == TriangleDrawMode::Normal) @@ -83,28 +84,28 @@ void PolyTriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVer for (int i = 0; i < vcount / 3; i++) { for (int j = 0; j < 3; j++) - vert[j] = shade_vertex(objectToClip, *(vinput++)); + vert[j] = shade_vertex(uniforms, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); } } else if (mode == TriangleDrawMode::Fan) { - vert[0] = shade_vertex(objectToClip, *(vinput++)); - vert[1] = shade_vertex(objectToClip, *(vinput++)); + vert[0] = shade_vertex(uniforms, *(vinput++)); + vert[1] = shade_vertex(uniforms, *(vinput++)); for (int i = 2; i < vcount; i++) { - vert[2] = shade_vertex(objectToClip, *(vinput++)); + vert[2] = shade_vertex(uniforms, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); vert[1] = vert[2]; } } else // TriangleDrawMode::Strip { - vert[0] = shade_vertex(objectToClip, *(vinput++)); - vert[1] = shade_vertex(objectToClip, *(vinput++)); + vert[0] = shade_vertex(uniforms, *(vinput++)); + vert[1] = shade_vertex(uniforms, *(vinput++)); for (int i = 2; i < vcount; i++) { - vert[2] = shade_vertex(objectToClip, *(vinput++)); + vert[2] = shade_vertex(uniforms, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); vert[0] = vert[1]; vert[1] = vert[2]; @@ -113,10 +114,10 @@ void PolyTriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVer } } -TriVertex PolyTriangleDrawer::shade_vertex(const TriMatrix &objectToClip, TriVertex v) +TriVertex PolyTriangleDrawer::shade_vertex(const TriUniforms &uniforms, TriVertex v) { // Apply transform to get clip coordinates: - return objectToClip * v; + return uniforms.objectToClip * v; } void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)) @@ -389,7 +390,6 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Dr { uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -431,7 +431,6 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Dr { uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -631,6 +630,7 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, const uint32_t *texturePixels = (const uint32_t *)args->texturePixels; int textureWidth = args->textureWidth; int textureHeight = args->textureHeight; + uint32_t light = args->uniforms->light; // 28.4 fixed-point coordinates const int Y1 = (int)round(16.0f * v1.y); @@ -776,7 +776,6 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, { uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - uint32_t light = (uint32_t)clamp(varying[2] * 256.0f + 0.5f, 0.0f, 256.0f); uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -825,7 +824,6 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, { uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - uint32_t light = (uint32_t)clamp(varying[2] * 256.0f + 0.5f, 0.0f, 256.0f); uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -1034,15 +1032,15 @@ float ScreenPolyTriangleDrawer::grady(float x0, float y0, float x1, float y1, fl ///////////////////////////////////////////////////////////////////////////// -DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) - : objectToClip(objectToClip), vinput(vinput), vcount(vcount), mode(mode), ccw(ccw), clipleft(clipleft), clipright(clipright), cliptop(cliptop), clipbottom(clipbottom), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight), solidcolor(solidcolor) +DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) + : uniforms(uniforms), vinput(vinput), vcount(vcount), mode(mode), ccw(ccw), clipleft(clipleft), clipright(clipright), cliptop(cliptop), clipbottom(clipbottom), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight), solidcolor(solidcolor) { } void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) { PolyTriangleDrawer::draw_arrays( - objectToClip, vinput, vcount, mode, ccw, + uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texturePixels, textureWidth, textureHeight, solidcolor, thread, texturePixels ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::fill32); diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index de9cd543ca..cba5202b76 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -31,17 +31,17 @@ struct ScreenPolyTriangleDrawerArgs; class PolyTriangleDrawer { public: - static void draw(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture); - static void fill(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int solidcolor); + static void draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture); + static void fill(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int solidcolor); private: - static TriVertex shade_vertex(const TriMatrix &objectToClip, TriVertex v); - static void draw_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); + static TriVertex shade_vertex(const TriUniforms &uniforms, TriVertex v); + static void draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert); - static void queue_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); + static void queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); friend class DrawPolyTrianglesCommand; }; @@ -61,6 +61,7 @@ struct ScreenPolyTriangleDrawerArgs int textureWidth; int textureHeight; int solidcolor; + const TriUniforms *uniforms; }; class ScreenPolyTriangleDrawer @@ -80,13 +81,13 @@ private: class DrawPolyTrianglesCommand : public DrawerCommand { public: - DrawPolyTrianglesCommand(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); + DrawPolyTrianglesCommand(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); void Execute(DrawerThread *thread) override; FString DebugInfo() override; private: - TriMatrix objectToClip; + TriUniforms uniforms; const TriVertex *vinput; int vcount; TriangleDrawMode mode; diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp index 26e3933239..a95e6d0db6 100644 --- a/src/r_triangle.cpp +++ b/src/r_triangle.cpp @@ -36,23 +36,23 @@ #include "r_data/colormaps.h" #include "r_triangle.h" -void TriangleDrawer::draw(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture) +void TriangleDrawer::draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture) { if (r_swtruecolor) - queue_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, (const uint8_t*)texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight(), 0); + queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, (const uint8_t*)texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight(), 0); else - draw_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture->GetPixels(), texture->GetWidth(), texture->GetHeight(), 0, nullptr, &ScreenTriangleDrawer::draw); + draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture->GetPixels(), texture->GetWidth(), texture->GetHeight(), 0, nullptr, &ScreenTriangleDrawer::draw); } -void TriangleDrawer::fill(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor) +void TriangleDrawer::fill(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor) { if (r_swtruecolor) - queue_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor); + queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor); else - draw_arrays(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor, nullptr, &ScreenTriangleDrawer::fill); + draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor, nullptr, &ScreenTriangleDrawer::fill); } -void TriangleDrawer::queue_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) +void TriangleDrawer::queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) { if (clipright < clipleft || clipleft < 0 || clipright > MAXWIDTH) return; @@ -72,10 +72,10 @@ void TriangleDrawer::queue_arrays(const TriMatrix &objectToClip, const TriVertex for (int i = 0; i < cliplength; i++) clipdata[cliplength + i] = clipbottom[clipleft + i]; - DrawerCommandQueue::QueueCommand(objectToClip, vinput, vcount, mode, ccw, clipleft, clipright, clipdata, texturePixels, textureWidth, textureHeight, solidcolor); + DrawerCommandQueue::QueueCommand(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, clipdata, texturePixels, textureWidth, textureHeight, solidcolor); } -void TriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)) +void TriangleDrawer::draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)) { if (vcount < 3) return; @@ -91,6 +91,7 @@ void TriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex args.textureWidth = textureWidth; args.textureHeight = textureHeight; args.solidcolor = solidcolor; + args.uniforms = &uniforms; TriVertex vert[3]; if (mode == TriangleDrawMode::Normal) @@ -98,28 +99,28 @@ void TriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex for (int i = 0; i < vcount / 3; i++) { for (int j = 0; j < 3; j++) - vert[j] = shade_vertex(objectToClip, *(vinput++)); + vert[j] = shade_vertex(uniforms, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); } } else if (mode == TriangleDrawMode::Fan) { - vert[0] = shade_vertex(objectToClip, *(vinput++)); - vert[1] = shade_vertex(objectToClip, *(vinput++)); + vert[0] = shade_vertex(uniforms, *(vinput++)); + vert[1] = shade_vertex(uniforms, *(vinput++)); for (int i = 2; i < vcount; i++) { - vert[2] = shade_vertex(objectToClip, *(vinput++)); + vert[2] = shade_vertex(uniforms, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); vert[1] = vert[2]; } } else // TriangleDrawMode::Strip { - vert[0] = shade_vertex(objectToClip, *(vinput++)); - vert[1] = shade_vertex(objectToClip, *(vinput++)); + vert[0] = shade_vertex(uniforms, *(vinput++)); + vert[1] = shade_vertex(uniforms, *(vinput++)); for (int i = 2; i < vcount; i++) { - vert[2] = shade_vertex(objectToClip, *(vinput++)); + vert[2] = shade_vertex(uniforms, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); vert[0] = vert[1]; vert[1] = vert[2]; @@ -128,10 +129,10 @@ void TriangleDrawer::draw_arrays(const TriMatrix &objectToClip, const TriVertex } } -TriVertex TriangleDrawer::shade_vertex(const TriMatrix &objectToClip, TriVertex v) +TriVertex TriangleDrawer::shade_vertex(const TriUniforms &uniforms, TriVertex v) { // Apply transform to get clip coordinates: - return objectToClip * v; + return uniforms.objectToClip * v; } void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)) @@ -412,7 +413,6 @@ void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args, DrawerThre { uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -454,7 +454,6 @@ void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args, DrawerThre { uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -666,6 +665,7 @@ void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, DrawerTh const uint32_t *texturePixels = (const uint32_t *)args->texturePixels; int textureWidth = args->textureWidth; int textureHeight = args->textureHeight; + uint32_t light = args->uniforms->light; // 28.4 fixed-point coordinates const int Y1 = (int)round(16.0f * v1.y); @@ -821,7 +821,6 @@ void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, DrawerTh { uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - uint32_t light = (uint32_t)clamp(varying[2] * 256.0f + 0.5f, 0.0f, 256.0f); uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -873,7 +872,6 @@ void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, DrawerTh { uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - uint32_t light = (uint32_t)clamp(varying[2] * 256.0f + 0.5f, 0.0f, 256.0f); uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -1248,8 +1246,8 @@ TriVertex TriMatrix::operator*(TriVertex v) const ///////////////////////////////////////////////////////////////////////////// -DrawTrianglesCommand::DrawTrianglesCommand(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *clipdata, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) - : objectToClip(objectToClip), vinput(vinput), vcount(vcount), mode(mode), ccw(ccw), clipleft(clipleft), clipright(clipright), clipdata(clipdata), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight), solidcolor(solidcolor) +DrawTrianglesCommand::DrawTrianglesCommand(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *clipdata, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) + : uniforms(uniforms), vinput(vinput), vcount(vcount), mode(mode), ccw(ccw), clipleft(clipleft), clipright(clipright), clipdata(clipdata), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight), solidcolor(solidcolor) { } @@ -1263,7 +1261,7 @@ void DrawTrianglesCommand::Execute(DrawerThread *thread) } TriangleDrawer::draw_arrays( - objectToClip, vinput, vcount, mode, ccw, + uniforms, vinput, vcount, mode, ccw, clipleft, clipright, thread->triangle_clip_top, thread->triangle_clip_bottom, texturePixels, textureWidth, textureHeight, solidcolor, thread, texturePixels ? ScreenTriangleDrawer::draw32 : ScreenTriangleDrawer::fill32); diff --git a/src/r_triangle.h b/src/r_triangle.h index 98fa25db05..254cb5570f 100644 --- a/src/r_triangle.h +++ b/src/r_triangle.h @@ -33,9 +33,9 @@ struct ScreenTriangleDrawerArgs; struct TriVertex { TriVertex() { } - TriVertex(float x, float y, float z, float w, float u, float v, float light) : x(x), y(y), z(z), w(w) { varying[0] = u; varying[1] = v; varying[2] = light; } + TriVertex(float x, float y, float z, float w, float u, float v) : x(x), y(y), z(z), w(w) { varying[0] = u; varying[1] = v; } - enum { NumVarying = 3 }; + enum { NumVarying = 2 }; float x, y, z, w; float varying[NumVarying]; }; @@ -60,6 +60,30 @@ struct TriMatrix float matrix[16]; }; +struct TriUniforms +{ + uint32_t light; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1, + nearest_filter = 2, + diminishing_lighting = 4 + }; + + TriMatrix objectToClip; +}; + enum class TriangleDrawMode { Normal, @@ -70,17 +94,17 @@ enum class TriangleDrawMode class TriangleDrawer { public: - static void draw(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture); - static void fill(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor); + static void draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture); + static void fill(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor); private: - static TriVertex shade_vertex(const TriMatrix &objectToClip, TriVertex v); - static void draw_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)); + static TriVertex shade_vertex(const TriUniforms &uniforms, TriVertex v); + static void draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)); static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert); - static void queue_arrays(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); + static void queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); friend class DrawTrianglesCommand; }; @@ -100,6 +124,7 @@ struct ScreenTriangleDrawerArgs int textureWidth; int textureHeight; int solidcolor; + const TriUniforms *uniforms; }; class ScreenTriangleDrawer @@ -119,13 +144,13 @@ private: class DrawTrianglesCommand : public DrawerCommand { public: - DrawTrianglesCommand(const TriMatrix &objectToClip, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *clipdata, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); + DrawTrianglesCommand(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *clipdata, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); void Execute(DrawerThread *thread) override; FString DebugInfo() override; private: - TriMatrix objectToClip; + const TriUniforms uniforms; const TriVertex *vinput; int vcount; TriangleDrawMode mode; From 466274d7f6903bf0477d098723e6914b45001139 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 8 Nov 2016 16:46:15 +0100 Subject: [PATCH 282/912] Remove floats from the inner block loop --- src/r_poly_triangle.cpp | 52 +++++++++++++++++++++++++---------------- src/r_triangle.cpp | 39 +++++++++++++++++++------------ 2 files changed, 56 insertions(+), 35 deletions(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index d779196773..1be35aa26f 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -379,17 +379,20 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Dr { for (int iy = 0; iy < q; iy++) { - float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { - varying[i] = varyingTL[i] + varyingBL[i] * iy; - varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + float pos = varyingTL[i] + varyingBL[i] * iy; + float step = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + + varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); + varyingStep[i] = (uint32_t)(step * 0x100000000LL); } for (int ix = x; ix < x + q; ix++) { - uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); - uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + uint32_t ufrac = varying[0]; + uint32_t vfrac = varying[1]; uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -416,11 +419,14 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Dr int CX2 = CY2; int CX3 = CY3; - float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { - varying[i] = varyingTL[i] + varyingBL[i] * iy; - varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + float pos = varyingTL[i] + varyingBL[i] * iy; + float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); + + varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); + varyingStep[i] = (uint32_t)(step * 0x100000000LL); } for (int ix = x; ix < x + q; ix++) @@ -429,8 +435,8 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Dr if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) { - uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); - uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + uint32_t ufrac = varying[0]; + uint32_t vfrac = varying[1]; uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -765,17 +771,20 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, { for (int iy = 0; iy < q; iy++) { - float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { - varying[i] = varyingTL[i] + varyingBL[i] * iy; - varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + float pos = varyingTL[i] + varyingBL[i] * iy; + float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); + + varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); + varyingStep[i] = (uint32_t)(step * 0x100000000LL); } for (int ix = x; ix < x + q; ix++) { - uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); - uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + uint32_t ufrac = varying[0]; + uint32_t vfrac = varying[1]; uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -809,11 +818,14 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, int CX2 = CY2; int CX3 = CY3; - float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { - varying[i] = varyingTL[i] + varyingBL[i] * iy; - varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + float pos = varyingTL[i] + varyingBL[i] * iy; + float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); + + varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); + varyingStep[i] = (uint32_t)(step * 0x100000000LL); } for (int ix = x; ix < x + q; ix++) @@ -822,8 +834,8 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) { - uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); - uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + uint32_t ufrac = varying[0]; + uint32_t vfrac = varying[1]; uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp index a95e6d0db6..95e4e636e4 100644 --- a/src/r_triangle.cpp +++ b/src/r_triangle.cpp @@ -402,17 +402,20 @@ void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args, DrawerThre { for (int iy = 0; iy < q; iy++) { - float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { - varying[i] = varyingTL[i] + varyingBL[i] * iy; - varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + float pos = varyingTL[i] + varyingBL[i] * iy; + float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); + + varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); + varyingStep[i] = (uint32_t)(step * 0x100000000LL); } for (int ix = x; ix < x + q; ix++) { - uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); - uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + uint32_t ufrac = varying[0]; + uint32_t vfrac = varying[1]; uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -439,11 +442,14 @@ void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args, DrawerThre int CX2 = CY2; int CX3 = CY3; - float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { - varying[i] = varyingTL[i] + varyingBL[i] * iy; - varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + float pos = varyingTL[i] + varyingBL[i] * iy; + float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); + + varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); + varyingStep[i] = (uint32_t)(step * 0x100000000LL); } for (int ix = x; ix < x + q; ix++) @@ -452,8 +458,8 @@ void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args, DrawerThre if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) { - uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); - uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + uint32_t ufrac = varying[0]; + uint32_t vfrac = varying[1]; uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -808,19 +814,22 @@ void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, DrawerTh { for (int iy = 0; iy < q; iy++) { - float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { - varying[i] = varyingTL[i] + varyingBL[i] * iy; - varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + float pos = varyingTL[i] + varyingBL[i] * iy; + float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); + + varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); + varyingStep[i] = (uint32_t)(step * 0x100000000LL); } if (!thread->skipped_by_thread(y + iy)) { for (int ix = x; ix < x + q; ix++) { - uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); - uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); + uint32_t ufrac = varying[0]; + uint32_t vfrac = varying[1]; uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; From c1d496c17fac5a77e4d735779028ee5a90d75ff6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 8 Nov 2016 17:38:19 +0100 Subject: [PATCH 283/912] A little bit of SSE to speed it slightly more --- src/r_poly_triangle.cpp | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 1be35aa26f..d20c776a4b 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -638,6 +638,10 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, int textureHeight = args->textureHeight; uint32_t light = args->uniforms->light; +#if !defined(NO_SSE) + __m128i mlight = _mm_set1_epi16(light); +#endif + // 28.4 fixed-point coordinates const int Y1 = (int)round(16.0f * v1.y); const int Y2 = (int)round(16.0f * v2.y); @@ -781,6 +785,7 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, varyingStep[i] = (uint32_t)(step * 0x100000000LL); } +#if NO_SSE for (int ix = x; ix < x + q; ix++) { uint32_t ufrac = varying[0]; @@ -802,6 +807,34 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, for (int i = 0; i < TriVertex::NumVarying; i++) varying[i] += varyingStep[i]; } +#else + for (int sse = 0; sse < q / 4; sse++) + { + uint32_t fg[4]; + for (int ix = 0; ix < 4; ix++) + { + uint32_t ufrac = varying[0]; + uint32_t vfrac = varying[1]; + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + fg[ix] = texturePixels[uvoffset]; + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + } + + __m128i mfg = _mm_loadu_si128((const __m128i*)fg); + __m128i mfg0 = _mm_unpacklo_epi8(mfg, _mm_setzero_si128()); + __m128i mfg1 = _mm_unpackhi_epi8(mfg, _mm_setzero_si128()); + __m128i mout0 = _mm_srli_epi16(_mm_mullo_epi16(mfg0, mlight), 8); + __m128i mout1 = _mm_srli_epi16(_mm_mullo_epi16(mfg1, mlight), 8); + __m128i mout = _mm_packus_epi16(mout0, mout1); + __m128i mmask0 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(mfg0, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); + __m128i mmask1 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(mfg1, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); + __m128i mmask = _mm_cmplt_epi8(_mm_packus_epi16(mmask0, mmask1), _mm_setzero_si128()); + _mm_maskmoveu_si128(mout, mmask, (char*)(&buffer[x + sse * 4])); + } +#endif buffer += pitch; } From 6ef8fe2da560b5178989fad7d379b5a81fb1fbcd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 8 Nov 2016 18:08:13 +0100 Subject: [PATCH 284/912] Add diminishing lighting --- src/r_poly_triangle.cpp | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index d20c776a4b..ffb8d82bb9 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -638,10 +638,6 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, int textureHeight = args->textureHeight; uint32_t light = args->uniforms->light; -#if !defined(NO_SSE) - __m128i mlight = _mm_set1_epi16(light); -#endif - // 28.4 fixed-point coordinates const int Y1 = (int)round(16.0f * v1.y); const int Y2 = (int)round(16.0f * v2.y); @@ -768,6 +764,16 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); } + float globVis = 1706.0f; + float vis = globVis / rcpWTL; + float shade = 64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f; + float lightscale = clamp((shade - MIN(24.0f, vis)) / 32.0f, 0.0f, 31.0f / 32.0f); + int diminishedlight = (int)clamp((1.0f - lightscale) * 256.0f + 0.5f, 0.0f, 256.0f); + +#if !defined(NO_SSE) + __m128i mlight = _mm_set1_epi16(diminishedlight); +#endif + uint32_t *buffer = dest; // Accept whole block when totally covered @@ -796,9 +802,9 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, uint32_t uvoffset = upos * textureHeight + vpos; uint32_t fg = texturePixels[uvoffset]; - uint32_t fg_red = (RPART(fg) * light) >> 8; - uint32_t fg_green = (GPART(fg) * light) >> 8; - uint32_t fg_blue = (BPART(fg) * light) >> 8; + uint32_t fg_red = (RPART(fg) * diminishedlight) >> 8; + uint32_t fg_green = (GPART(fg) * diminishedlight) >> 8; + uint32_t fg_blue = (BPART(fg) * diminishedlight) >> 8; uint32_t fg_alpha = APART(fg); if (fg_alpha > 127) @@ -875,9 +881,9 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, uint32_t uvoffset = upos * textureHeight + vpos; uint32_t fg = texturePixels[uvoffset]; - uint32_t fg_red = (RPART(fg) * light) >> 8; - uint32_t fg_green = (GPART(fg) * light) >> 8; - uint32_t fg_blue = (BPART(fg) * light) >> 8; + uint32_t fg_red = (RPART(fg) * diminishedlight) >> 8; + uint32_t fg_green = (GPART(fg) * diminishedlight) >> 8; + uint32_t fg_blue = (BPART(fg) * diminishedlight) >> 8; uint32_t fg_alpha = APART(fg); if (fg_alpha > 127) From 9413ea6edf72a2d478f3c5fad30abf014a3c7109 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Tue, 8 Nov 2016 15:05:23 -0500 Subject: [PATCH 285/912] - Textured automap would crash due to an additional parameter in the software renderer to the FillSimplePoly function call, causing stack issues. --- src/gl/renderer/gl_renderer.h | 2 +- src/gl/system/gl_framebuffer.cpp | 2 +- src/gl/system/gl_framebuffer.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gl/renderer/gl_renderer.h b/src/gl/renderer/gl_renderer.h index c671d74971..5610c034ef 100644 --- a/src/gl/renderer/gl_renderer.h +++ b/src/gl/renderer/gl_renderer.h @@ -216,7 +216,7 @@ public: void FillSimplePoly(FTexture *texture, FVector2 *points, int npoints, double originx, double originy, double scalex, double scaley, - DAngle rotation, FDynamicColormap *colormap, int lightlevel); + DAngle rotation, FDynamicColormap *colormap, int lightlevel, int bottomclip); int PTM_BestColor (const uint32 *pal_in, int r, int g, int b, int first, int num); diff --git a/src/gl/system/gl_framebuffer.cpp b/src/gl/system/gl_framebuffer.cpp index 57e0cd679f..a286d45686 100644 --- a/src/gl/system/gl_framebuffer.cpp +++ b/src/gl/system/gl_framebuffer.cpp @@ -464,7 +464,7 @@ void OpenGLFrameBuffer::Clear(int left, int top, int right, int bottom, int palc void OpenGLFrameBuffer::FillSimplePoly(FTexture *texture, FVector2 *points, int npoints, double originx, double originy, double scalex, double scaley, - DAngle rotation, FDynamicColormap *colormap, int lightlevel) + DAngle rotation, FDynamicColormap *colormap, int lightlevel, int bottomclip) { if (GLRenderer != nullptr && GLRenderer->m2DDrawer != nullptr && npoints >= 3) { diff --git a/src/gl/system/gl_framebuffer.h b/src/gl/system/gl_framebuffer.h index 5315bb0a9a..27c3d1cf97 100644 --- a/src/gl/system/gl_framebuffer.h +++ b/src/gl/system/gl_framebuffer.h @@ -71,7 +71,7 @@ public: void FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, double originx, double originy, double scalex, double scaley, - DAngle rotation, FDynamicColormap *colormap, int lightlevel); + DAngle rotation, FDynamicColormap *colormap, int lightlevel, int bottomclip); FNativePalette *CreatePalette(FRemapTable *remap); From 7d3e8d14140f8654f31aa7ac78649d89f8f88659 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 8 Nov 2016 23:08:25 +0100 Subject: [PATCH 286/912] Fix triangle drawer clipping bug --- src/r_poly_triangle.cpp | 139 +++++++++++++++++++++++++++------------- src/r_poly_triangle.h | 2 +- src/r_triangle.cpp | 135 +++++++++++++++++++++++++------------- src/r_triangle.h | 2 +- 4 files changed, 190 insertions(+), 88 deletions(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index ffb8d82bb9..34bc74dca9 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -36,6 +36,10 @@ #include "r_data/colormaps.h" #include "r_poly_triangle.h" +#ifndef NO_SSE +#include +#endif + void PolyTriangleDrawer::draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture) { if (r_swtruecolor) @@ -124,10 +128,8 @@ void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, S { // Cull, clip and generate additional vertices as needed TriVertex clippedvert[6]; - int numclipvert = 0; - clipedge(vert[0], vert[1], clippedvert, numclipvert); - clipedge(vert[1], vert[2], clippedvert, numclipvert); - clipedge(vert[2], vert[0], clippedvert, numclipvert); + int numclipvert; + clipedge(vert, clippedvert, numclipvert); // Map to 2D viewport: for (int j = 0; j < numclipvert; j++) @@ -170,62 +172,113 @@ void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, S bool PolyTriangleDrawer::cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2) { - float d1 = clipdistance1 * (1.0f - t1) + clipdistance2 * t1; - float d2 = clipdistance1 * (1.0f - t2) + clipdistance2 * t2; - if (d1 < 0.0f && d2 < 0.0f) + if (clipdistance1 < 0.0f && clipdistance2 < 0.0f) return true; - if (d1 < 0.0f) - t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1); + if (clipdistance1 < 0.0f) + t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), 0.0f); + else + t1 = 0.0f; - if (d2 < 0.0f) - t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2); + if (clipdistance2 < 0.0f) + t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), 1.0f); + else + t2 = 1.0f; return false; } -void PolyTriangleDrawer::clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert) +void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert) { // Clip and cull so that the following is true for all vertices: // -v.w <= v.x <= v.w // -v.w <= v.y <= v.w // -v.w <= v.z <= v.w - - float t1 = 0.0f, t2 = 1.0f; - bool culled = - cullhalfspace(v1.x + v1.w, v2.x + v2.w, t1, t2) || - cullhalfspace(v1.w - v1.x, v2.w - v2.x, t1, t2) || - cullhalfspace(v1.y + v1.w, v2.y + v2.w, t1, t2) || - cullhalfspace(v1.w - v1.y, v2.w - v2.y, t1, t2) || - cullhalfspace(v1.z + v1.w, v2.z + v2.w, t1, t2) || - cullhalfspace(v1.w - v1.z, v2.w - v2.z, t1, t2); - if (culled) - return; - - if (t1 == 0.0f) + + // use barycentric weights while clipping vertices + float weights[6 * 3 * 2]; + for (int i = 0; i < 3; i++) { - clippedvert[numclipvert++] = v1; + weights[i * 3 + 0] = 0.0f; + weights[i * 3 + 1] = 0.0f; + weights[i * 3 + 2] = 0.0f; + weights[i * 3 + i] = 1.0f; } - else + + // halfspace clip distances + float clipdistance[6 * 3]; + for (int i = 0; i < 3; i++) { - auto &v = clippedvert[numclipvert++]; - v.x = v1.x * (1.0f - t1) + v2.x * t1; - v.y = v1.y * (1.0f - t1) + v2.y * t1; - v.z = v1.z * (1.0f - t1) + v2.z * t1; - v.w = v1.w * (1.0f - t1) + v2.w * t1; - for (int i = 0; i < TriVertex::NumVarying; i++) - v.varying[i] = v1.varying[i] * (1.0f - t1) + v2.varying[i] * t1; + const auto &v = verts[i]; + clipdistance[i * 6 + 0] = v.x + v.w; + clipdistance[i * 6 + 1] = v.w - v.x; + clipdistance[i * 6 + 2] = v.y + v.w; + clipdistance[i * 6 + 3] = v.w - v.y; + clipdistance[i * 6 + 4] = v.z + v.w; + clipdistance[i * 6 + 5] = v.w - v.z; } - - if (t2 != 1.0f) + + // Clip against each halfspace + float *input = weights; + float *output = weights + 6 * 3; + int inputverts = 3; + int outputverts = 0; + for (int p = 0; p < 6; p++) { - auto &v = clippedvert[numclipvert++]; - v.x = v1.x * (1.0f - t2) + v2.x * t2; - v.y = v1.y * (1.0f - t2) + v2.y * t2; - v.z = v1.z * (1.0f - t2) + v2.z * t2; - v.w = v1.w * (1.0f - t2) + v2.w * t2; - for (int i = 0; i < TriVertex::NumVarying; i++) - v.varying[i] = v1.varying[i] * (1.0f - t2) + v2.varying[i] * t2; + // Clip each edge + outputverts = 0; + for (int i = 0; i < inputverts; i++) + { + int j = (i + 1) % inputverts; + float clipdistance1 = + clipdistance[0 * 6 + p] * input[i * 3 + 0] + + clipdistance[1 * 6 + p] * input[i * 3 + 1] + + clipdistance[2 * 6 + p] * input[i * 3 + 2]; + + float clipdistance2 = + clipdistance[0 * 6 + p] * input[j * 3 + 0] + + clipdistance[1 * 6 + p] * input[j * 3 + 1] + + clipdistance[2 * 6 + p] * input[j * 3 + 2]; + + float t1, t2; + if (!cullhalfspace(clipdistance1, clipdistance2, t1, t2)) + { + // add t1 vertex + for (int k = 0; k < 3; k++) + output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t1) + input[j * 3 + k] * t1; + outputverts++; + + if (t2 != 1.0f && t2 > t1) + { + // add t2 vertex + for (int k = 0; k < 3; k++) + output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t2) + input[j * 3 + k] * t2; + outputverts++; + } + } + } + std::swap(input, output); + std::swap(inputverts, outputverts); + if (inputverts == 0) + break; + } + + // Convert barycentric weights to actual vertices + numclipvert = inputverts; + for (int i = 0; i < numclipvert; i++) + { + auto &v = clippedvert[i]; + memset(&v, 0, sizeof(TriVertex)); + for (int w = 0; w < 3; w++) + { + float weight = input[i * 3 + w]; + v.x += verts[w].x * weight; + v.y += verts[w].y * weight; + v.z += verts[w].z * weight; + v.w += verts[w].w * weight; + for (int iv = 0; iv < TriVertex::NumVarying; iv++) + v.varying[iv] += verts[w].varying[iv] * weight; + } } } diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index cba5202b76..79c8d4a7a1 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -39,7 +39,7 @@ private: static void draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); - static void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert); + static void clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert); static void queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp index 95e4e636e4..340306feca 100644 --- a/src/r_triangle.cpp +++ b/src/r_triangle.cpp @@ -139,10 +139,8 @@ void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, Scree { // Cull, clip and generate additional vertices as needed TriVertex clippedvert[6]; - int numclipvert = 0; - clipedge(vert[0], vert[1], clippedvert, numclipvert); - clipedge(vert[1], vert[2], clippedvert, numclipvert); - clipedge(vert[2], vert[0], clippedvert, numclipvert); + int numclipvert; + clipedge(vert, clippedvert, numclipvert); // Map to 2D viewport: for (int j = 0; j < numclipvert; j++) @@ -185,62 +183,113 @@ void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, Scree bool TriangleDrawer::cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2) { - float d1 = clipdistance1 * (1.0f - t1) + clipdistance2 * t1; - float d2 = clipdistance1 * (1.0f - t2) + clipdistance2 * t2; - if (d1 < 0.0f && d2 < 0.0f) + if (clipdistance1 < 0.0f && clipdistance2 < 0.0f) return true; - if (d1 < 0.0f) - t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1); + if (clipdistance1 < 0.0f) + t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), 0.0f); + else + t1 = 0.0f; - if (d2 < 0.0f) - t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2); + if (clipdistance2 < 0.0f) + t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), 1.0f); + else + t2 = 1.0f; return false; } -void TriangleDrawer::clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert) +void TriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert) { // Clip and cull so that the following is true for all vertices: // -v.w <= v.x <= v.w // -v.w <= v.y <= v.w // -v.w <= v.z <= v.w - - float t1 = 0.0f, t2 = 1.0f; - bool culled = - cullhalfspace(v1.x + v1.w, v2.x + v2.w, t1, t2) || - cullhalfspace(v1.w - v1.x, v2.w - v2.x, t1, t2) || - cullhalfspace(v1.y + v1.w, v2.y + v2.w, t1, t2) || - cullhalfspace(v1.w - v1.y, v2.w - v2.y, t1, t2) || - cullhalfspace(v1.z + v1.w, v2.z + v2.w, t1, t2) || - cullhalfspace(v1.w - v1.z, v2.w - v2.z, t1, t2); - if (culled) - return; - - if (t1 == 0.0f) + + // use barycentric weights while clipping vertices + float weights[6 * 3 * 2]; + for (int i = 0; i < 3; i++) { - clippedvert[numclipvert++] = v1; + weights[i * 3 + 0] = 0.0f; + weights[i * 3 + 1] = 0.0f; + weights[i * 3 + 2] = 0.0f; + weights[i * 3 + i] = 1.0f; } - else + + // halfspace clip distances + float clipdistance[6 * 3]; + for (int i = 0; i < 3; i++) { - auto &v = clippedvert[numclipvert++]; - v.x = v1.x * (1.0f - t1) + v2.x * t1; - v.y = v1.y * (1.0f - t1) + v2.y * t1; - v.z = v1.z * (1.0f - t1) + v2.z * t1; - v.w = v1.w * (1.0f - t1) + v2.w * t1; - for (int i = 0; i < TriVertex::NumVarying; i++) - v.varying[i] = v1.varying[i] * (1.0f - t1) + v2.varying[i] * t1; + const auto &v = verts[i]; + clipdistance[i * 6 + 0] = v.x + v.w; + clipdistance[i * 6 + 1] = v.w - v.x; + clipdistance[i * 6 + 2] = v.y + v.w; + clipdistance[i * 6 + 3] = v.w - v.y; + clipdistance[i * 6 + 4] = v.z + v.w; + clipdistance[i * 6 + 5] = v.w - v.z; } - - if (t2 != 1.0f) + + // Clip against each halfspace + float *input = weights; + float *output = weights + 6 * 3; + int inputverts = 3; + int outputverts = 0; + for (int p = 0; p < 6; p++) { - auto &v = clippedvert[numclipvert++]; - v.x = v1.x * (1.0f - t2) + v2.x * t2; - v.y = v1.y * (1.0f - t2) + v2.y * t2; - v.z = v1.z * (1.0f - t2) + v2.z * t2; - v.w = v1.w * (1.0f - t2) + v2.w * t2; - for (int i = 0; i < TriVertex::NumVarying; i++) - v.varying[i] = v1.varying[i] * (1.0f - t2) + v2.varying[i] * t2; + // Clip each edge + outputverts = 0; + for (int i = 0; i < inputverts; i++) + { + int j = (i + 1) % inputverts; + float clipdistance1 = + clipdistance[0 * 6 + p] * input[i * 3 + 0] + + clipdistance[1 * 6 + p] * input[i * 3 + 1] + + clipdistance[2 * 6 + p] * input[i * 3 + 2]; + + float clipdistance2 = + clipdistance[0 * 6 + p] * input[j * 3 + 0] + + clipdistance[1 * 6 + p] * input[j * 3 + 1] + + clipdistance[2 * 6 + p] * input[j * 3 + 2]; + + float t1, t2; + if (!cullhalfspace(clipdistance1, clipdistance2, t1, t2)) + { + // add t1 vertex + for (int k = 0; k < 3; k++) + output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t1) + input[j * 3 + k] * t1; + outputverts++; + + if (t2 != 1.0f && t2 > t1) + { + // add t2 vertex + for (int k = 0; k < 3; k++) + output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t2) + input[j * 3 + k] * t2; + outputverts++; + } + } + } + std::swap(input, output); + std::swap(inputverts, outputverts); + if (inputverts == 0) + break; + } + + // Convert barycentric weights to actual vertices + numclipvert = inputverts; + for (int i = 0; i < numclipvert; i++) + { + auto &v = clippedvert[i]; + memset(&v, 0, sizeof(TriVertex)); + for (int w = 0; w < 3; w++) + { + float weight = input[i * 3 + w]; + v.x += verts[w].x * weight; + v.y += verts[w].y * weight; + v.z += verts[w].z * weight; + v.w += verts[w].w * weight; + for (int iv = 0; iv < TriVertex::NumVarying; iv++) + v.varying[iv] += verts[w].varying[iv] * weight; + } } } diff --git a/src/r_triangle.h b/src/r_triangle.h index 254cb5570f..82b1d2c4d9 100644 --- a/src/r_triangle.h +++ b/src/r_triangle.h @@ -102,7 +102,7 @@ private: static void draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)); static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); - static void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert); + static void clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert); static void queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); From 968150ceffbe9f133400c31fb706ea213b917d2c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 9 Nov 2016 00:22:05 +0100 Subject: [PATCH 287/912] Seems there could be more than 6 vertices --- src/r_poly_triangle.cpp | 6 +++--- src/r_triangle.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 34bc74dca9..168b4378ad 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -127,7 +127,7 @@ TriVertex PolyTriangleDrawer::shade_vertex(const TriUniforms &uniforms, TriVerte void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)) { // Cull, clip and generate additional vertices as needed - TriVertex clippedvert[6]; + TriVertex clippedvert[12]; int numclipvert; clipedge(vert, clippedvert, numclipvert); @@ -196,7 +196,7 @@ void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert // -v.w <= v.z <= v.w // use barycentric weights while clipping vertices - float weights[6 * 3 * 2]; + float weights[12 * 3 * 2]; for (int i = 0; i < 3; i++) { weights[i * 3 + 0] = 0.0f; @@ -220,7 +220,7 @@ void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert // Clip against each halfspace float *input = weights; - float *output = weights + 6 * 3; + float *output = weights + 12 * 3; int inputverts = 3; int outputverts = 0; for (int p = 0; p < 6; p++) diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp index 340306feca..c158177707 100644 --- a/src/r_triangle.cpp +++ b/src/r_triangle.cpp @@ -138,7 +138,7 @@ TriVertex TriangleDrawer::shade_vertex(const TriUniforms &uniforms, TriVertex v) void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)) { // Cull, clip and generate additional vertices as needed - TriVertex clippedvert[6]; + TriVertex clippedvert[12]; int numclipvert; clipedge(vert, clippedvert, numclipvert); @@ -207,7 +207,7 @@ void TriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert, in // -v.w <= v.z <= v.w // use barycentric weights while clipping vertices - float weights[6 * 3 * 2]; + float weights[12 * 3 * 2]; for (int i = 0; i < 3; i++) { weights[i * 3 + 0] = 0.0f; @@ -231,7 +231,7 @@ void TriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert, in // Clip against each halfspace float *input = weights; - float *output = weights + 6 * 3; + float *output = weights + 12 * 3; int inputverts = 3; int outputverts = 0; for (int p = 0; p < 6; p++) From 95df9bf48e51990d71ea65e44a3d842efd8f9a8f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 9 Nov 2016 00:35:41 +0100 Subject: [PATCH 288/912] Fix crash bug in 8 bit drawer --- src/r_poly_triangle.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 168b4378ad..a9c54bc743 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -369,7 +369,7 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Dr for (int y = miny; y < maxy; y += q, dest += q * pitch) { // Is this row of blocks done by this thread? - if (thread->skipped_by_thread(y / q)) continue; + if (thread && thread->skipped_by_thread(y / q)) continue; for (int x = minx; x < maxx; x += q) { @@ -436,7 +436,7 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Dr for (int i = 0; i < TriVertex::NumVarying; i++) { float pos = varyingTL[i] + varyingBL[i] * iy; - float step = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); + float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); varyingStep[i] = (uint32_t)(step * 0x100000000LL); @@ -451,7 +451,8 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Dr uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; uint32_t uvoffset = upos * textureHeight + vpos; - buffer[ix] = texturePixels[uvoffset]; + if (texturePixels[uvoffset] != 0) + buffer[ix] = texturePixels[uvoffset]; for (int i = 0; i < TriVertex::NumVarying; i++) varying[i] += varyingStep[i]; @@ -495,7 +496,8 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Dr uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; uint32_t uvoffset = upos * textureHeight + vpos; - buffer[ix] = texturePixels[uvoffset]; + if (texturePixels[uvoffset] != 0) + buffer[ix] = texturePixels[uvoffset]; } for (int i = 0; i < TriVertex::NumVarying; i++) @@ -588,7 +590,7 @@ void ScreenPolyTriangleDrawer::fill(const ScreenPolyTriangleDrawerArgs *args, Dr for (int y = miny; y < maxy; y += q, dest += q * pitch) { // Is this row of blocks done by this thread? - if (thread->skipped_by_thread(y / q)) continue; + if (thread && thread->skipped_by_thread(y / q)) continue; for (int x = minx; x < maxx; x += q) { From cffdfdf7fdfe453050e67d0ca25b6fc1f7ca092b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 9 Nov 2016 01:33:40 +0100 Subject: [PATCH 289/912] Cull bsp using solid segments --- src/r_poly.cpp | 133 +++++++++++++++++++++++++++++++++++++++++++++++-- src/r_poly.h | 13 +++++ 2 files changed, 142 insertions(+), 4 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 03052ab605..a823d94f7a 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -35,11 +35,17 @@ EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_deathcamera) EXTERN_CVAR(Bool, st_scale) +CVAR(Bool, r_debug_cull, 0, 0) + ///////////////////////////////////////////////////////////////////////////// void RenderPolyBsp::Render() { PolyVertexBuffer::Clear(); + SolidSegments.clear(); + SolidSegments.reserve(MAXWIDTH / 2 + 2); + SolidSegments.push_back({ -0x7fff, 0 }); + SolidSegments.push_back({ viewwidth, 0x7fff }); // Perspective correct: float ratio = WidescreenRatio; @@ -63,8 +69,16 @@ void RenderPolyBsp::Render() RenderNode(nodes + numnodes - 1); // The head node is the last node output. // Render back to front (we don't have a zbuffer at the moment, sniff!): - for (auto it = PvsSectors.rbegin(); it != PvsSectors.rend(); ++it) - RenderSubsector(*it); + if (!r_debug_cull) + { + for (auto it = PvsSectors.rbegin(); it != PvsSectors.rend(); ++it) + RenderSubsector(*it); + } + else + { + for (auto it = PvsSectors.begin(); it != PvsSectors.end(); ++it) + RenderSubsector(*it); + } RenderPlayerSprites(); DrawerCommandQueue::WaitForWorkers(); @@ -543,8 +557,24 @@ void RenderPolyBsp::RenderNode(void *node) node = bsp->children[side]; } - PvsSectors.push_back((subsector_t *)((BYTE *)node - 1)); - //RenderSubsector((subsector_t *)((BYTE *)node - 1)); + + // Mark that we need to render this + subsector_t *sub = (subsector_t *)((BYTE *)node - 1); + PvsSectors.push_back(sub); + + // Update culling info for further bsp clipping + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + if ((line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) && line->backsector == nullptr) + { + int sx1, sx2; + if (GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2)) + { + MarkSegmentCulled(sx1, sx2); + } + } + } } void RenderPolyBsp::RenderPlayerSprites() @@ -752,6 +782,52 @@ void RenderPolyBsp::RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bo //R_DrawVisSprite(vis); } +bool RenderPolyBsp::IsSegmentCulled(int x1, int x2) const +{ + int next = 0; + while (SolidSegments[next].X2 <= x2) + next++; + return (x1 >= SolidSegments[next].X1 && x2 <= SolidSegments[next].X2); +} + +void RenderPolyBsp::MarkSegmentCulled(int x1, int x2) +{ + if (x1 >= x2) + return; + + int cur = 1; + while (true) + { + if (SolidSegments[cur].X1 <= x1 && SolidSegments[cur].X2 >= x2) // Already fully marked + { + break; + } + else if (cur + 1 != SolidSegments.size() && SolidSegments[cur].X2 >= x1 && SolidSegments[cur].X1 <= x2) // Merge segments + { + // Find last segment + int merge = cur; + while (merge + 2 != SolidSegments.size() && SolidSegments[merge + 1].X1 <= x2) + merge++; + + // Apply new merged range + SolidSegments[cur].X1 = MIN(SolidSegments[cur].X1, x1); + SolidSegments[cur].X2 = MAX(SolidSegments[merge].X2, x2); + + // Remove additional segments we merged with + if (merge > cur) + SolidSegments.erase(SolidSegments.begin() + (cur + 1), SolidSegments.begin() + (merge + 1)); + + break; + } + else if (SolidSegments[cur].X1 > x1) // Insert new segment + { + SolidSegments.insert(SolidSegments.begin() + cur, { x1, x2 }); + break; + } + cur++; + } +} + int RenderPolyBsp::PointOnSide(const DVector2 &pos, const node_t *node) { return DMulScale32(FLOAT2FIXED(pos.Y) - node->y, node->dx, node->x - FLOAT2FIXED(pos.X), node->dy) > 0; @@ -855,6 +931,55 @@ bool RenderPolyBsp::CheckBBox(float *bspcoord) // Find the first clippost that touches the source post // (adjacent pixels are touching). + // Does not cross a pixel. + if (sx2 <= sx1) + return false; + + return !IsSegmentCulled(sx1, sx2); +} + +bool RenderPolyBsp::GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const +{ + x1 = x1 - ViewPos.X; + y1 = y1 - ViewPos.Y; + x2 = x2 - ViewPos.X; + y2 = y2 - ViewPos.Y; + + // Sitting on a line? + if (y1 * (x1 - x2) + x1 * (y2 - y1) >= -EQUAL_EPSILON) + return false; + + double rx1 = x1 * ViewSin - y1 * ViewCos; + double rx2 = x2 * ViewSin - y2 * ViewCos; + double ry1 = x1 * ViewTanCos + y1 * ViewTanSin; + double ry2 = x2 * ViewTanCos + y2 * ViewTanSin; + + if (rx1 >= -ry1) + { + if (rx1 > ry1) return false; // left edge is off the right side + if (ry1 == 0) return false; + sx1 = xs_RoundToInt(CenterX + rx1 * CenterX / ry1); + } + else + { + if (rx2 < -ry2) return false; // wall is off the left side + if (rx1 - rx2 - ry2 + ry1 == 0) return false; // wall does not intersect view volume + sx1 = 0; + } + + if (rx2 <= ry2) + { + if (rx2 < -ry2) return false; // right edge is off the left side + if (ry2 == 0) return false; + sx2 = xs_RoundToInt(CenterX + rx2 * CenterX / ry2); + } + else + { + if (rx1 > ry1) return false; // wall is off the right side + if (ry2 - ry1 - rx2 + rx1 == 0) return false; // wall does not intersect view volume + sx2 = viewwidth; + } + // Does not cross a pixel. if (sx2 <= sx1) return false; diff --git a/src/r_poly.h b/src/r_poly.h index 7a143cf675..6248e02f7e 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -77,6 +77,11 @@ private: // Returns true if some part of the bbox might be visible. bool CheckBBox(float *bspcoord); + bool GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const; + + void MarkSegmentCulled(int x1, int x2); + bool IsSegmentCulled(int x1, int x2) const; + std::vector PvsSectors; TriMatrix worldToClip; @@ -84,6 +89,14 @@ private: const int BaseXCenter = 160; const int BaseYCenter = 100; + + struct SolidSegment + { + SolidSegment(int x1, int x2) : X1(x1), X2(x2) { } + int X1, X2; + }; + + std::vector SolidSegments; }; class RenderPolyWall From dc0e6d40a48facd1743606b279db30e5ec002c21 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 9 Nov 2016 11:38:07 +0100 Subject: [PATCH 290/912] Distance sort sprites --- src/r_poly.cpp | 28 +++++++++++++++++++++++++++- src/r_poly.h | 24 ++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index a823d94f7a..978e0a1f48 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -47,6 +47,10 @@ void RenderPolyBsp::Render() SolidSegments.push_back({ -0x7fff, 0 }); SolidSegments.push_back({ viewwidth, 0x7fff }); + SectorSpriteRanges.clear(); + SectorSpriteRanges.resize(numsectors); + SortedSprites.clear(); + // Perspective correct: float ratio = WidescreenRatio; float fovratio = (WidescreenRatio >= 1.3f) ? 1.333333f : ratio; @@ -174,8 +178,10 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, ceiltex); } - for (AActor *thing = sub->sector->thinglist; thing != nullptr; thing = thing->snext) + SpriteRange sprites = GetSpritesForSector(sub->sector); + for (int i = 0; i < sprites.Count; i++) { + AActor *thing = SortedSprites[sprites.Start + i].Thing; if ((thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) AddWallSprite(thing, sub); else @@ -183,6 +189,26 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) } } +SpriteRange RenderPolyBsp::GetSpritesForSector(sector_t *sector) +{ + if (SectorSpriteRanges.size() < sector->sectornum || sector->sectornum < 0) + return SpriteRange(); + + auto &range = SectorSpriteRanges[sector->sectornum]; + if (range.Start == -1) + { + range.Start = (int)SortedSprites.size(); + range.Count = 0; + for (AActor *thing = sector->thinglist; thing != nullptr; thing = thing->snext) + { + SortedSprites.push_back({ thing, (thing->Pos() - ViewPos).LengthSquared() }); + range.Count++; + } + std::stable_sort(SortedSprites.begin() + range.Start, SortedSprites.begin() + range.Start + range.Count); + } + return range; +} + void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector) { // Reject lines not facing viewer diff --git a/src/r_poly.h b/src/r_poly.h index 6248e02f7e..7e26f8ce67 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -49,6 +49,26 @@ public: FDynamicColormap *Colormap = nullptr; }; +// Used for sorting things by distance to the camera +class PolySortedSprite +{ +public: + PolySortedSprite(AActor *thing, double distanceSquared) : Thing(thing), DistanceSquared(distanceSquared) { } + bool operator<(const PolySortedSprite &other) const { return DistanceSquared > other.DistanceSquared; } + + AActor *Thing; + double DistanceSquared; +}; + +class SpriteRange +{ +public: + SpriteRange() = default; + SpriteRange(int start, int count) : Start(start), Count(count) { } + int Start = -1; + int Count = 0; +}; + // Renders a GL BSP tree in a scene class RenderPolyBsp { @@ -67,6 +87,7 @@ private: bool IsThingCulled(AActor *thing); visstyle_t GetSpriteVisStyle(AActor *thing, double z); FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); + SpriteRange GetSpritesForSector(sector_t *sector); void RenderPlayerSprites(); void RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac); @@ -85,6 +106,9 @@ private: std::vector PvsSectors; TriMatrix worldToClip; + std::vector SectorSpriteRanges; + std::vector SortedSprites; + std::vector ScreenSprites; const int BaseXCenter = 160; From 9820a6cb8883eae953fd6598597671d799bb4f12 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 9 Nov 2016 12:07:07 +0100 Subject: [PATCH 291/912] Request GL BSP instead of requiring automap texturing to be on --- src/r_main.cpp | 2 +- src/r_swrenderer.cpp | 5 +++++ src/r_swrenderer.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/r_main.cpp b/src/r_main.cpp index 5983e538b9..25945012fc 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -68,7 +68,7 @@ CUSTOM_CVAR(Bool, r_newrenderer, 0, CVAR_NOINITCALL) { if (self == 1 && !hasglnodes) { - Printf("No GL BSP detected. You must enable automap texturing and then restart the map\n"); + Printf("No GL BSP detected. You must restart the map before rendering will be correct\n"); } } diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index ad9752db97..97a2076528 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -258,6 +258,11 @@ int FSoftwareRenderer::GetMaxViewPitch(bool down) return (r_newrenderer) ? int(maxviewpitch) : (down ? MAX_DN_ANGLE : MAX_UP_ANGLE); } +bool FSoftwareRenderer::RequireGLNodes() +{ + return r_newrenderer; +} + //========================================================================== // // OnModeSet diff --git a/src/r_swrenderer.h b/src/r_swrenderer.h index fc3ec25512..5b205b8522 100644 --- a/src/r_swrenderer.h +++ b/src/r_swrenderer.h @@ -28,6 +28,7 @@ struct FSoftwareRenderer : public FRenderer virtual void DrawRemainingPlayerSprites() override; virtual int GetMaxViewPitch(bool down) override; + bool RequireGLNodes() override; void OnModeSet () override; void ErrorCleanup () override; From 6c52e1e52bbd5bc8036211afd1b2b5b048df3dac Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 10 Nov 2016 05:01:33 +0100 Subject: [PATCH 292/912] Draw sky in a very expensive way --- src/r_poly.cpp | 157 +++++++++++++++++++++++++++++++++++++++++++++++-- src/r_poly.h | 21 +++++++ 2 files changed, 172 insertions(+), 6 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 978e0a1f48..01097fe224 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -72,6 +72,9 @@ void RenderPolyBsp::Render() else RenderNode(nodes + numnodes - 1); // The head node is the last node output. + static PolySkyDome skydome; + skydome.Render(worldToClip); + // Render back to front (we don't have a zbuffer at the moment, sniff!): if (!r_debug_cull) { @@ -130,8 +133,9 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) AddLine(line, frontsector); } - FTexture *floortex = TexMan(frontsector->GetTexture(sector_t::floor)); - if (floortex->UseType != FTexture::TEX_Null) + FTextureID floorpicnum = frontsector->GetTexture(sector_t::floor); + FTexture *floortex = TexMan(floorpicnum); + if (floortex->UseType != FTexture::TEX_Null && floorpicnum != skyflatnum) { TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); if (!vertices) @@ -154,8 +158,9 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, floortex); } - FTexture *ceiltex = TexMan(frontsector->GetTexture(sector_t::ceiling)); - if (ceiltex->UseType != FTexture::TEX_Null) + FTextureID ceilpicnum = frontsector->GetTexture(sector_t::ceiling); + FTexture *ceiltex = TexMan(ceilpicnum); + if (ceiltex->UseType != FTexture::TEX_Null && ceilpicnum != skyflatnum) { TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); if (!vertices) @@ -261,8 +266,8 @@ void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector) double middlefloorz1 = MIN(bottomceilz1, middleceilz1); double middlefloorz2 = MIN(bottomceilz2, middleceilz2); - bool bothSkyCeiling = false;// frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; - bool bothSkyFloor = false;// frontsector->GetTexture(sector_t::floor) == skyflatnum && backsector->GetTexture(sector_t::floor) == skyflatnum; + bool bothSkyCeiling = frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; + bool bothSkyFloor = frontsector->GetTexture(sector_t::floor) == skyflatnum && backsector->GetTexture(sector_t::floor) == skyflatnum; if ((topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && !bothSkyCeiling && line->sidedef) { @@ -1290,3 +1295,143 @@ void PolyVertexBuffer::Clear() { NextBufferVertex = 0; } + +///////////////////////////////////////////////////////////////////////////// + +TriVertex PolySkyDome::SetVertex(float xx, float yy, float zz, float uu, float vv) +{ + TriVertex v; + v.x = xx; + v.y = yy; + v.z = zz; + v.w = 1.0f; + v.varying[0] = uu; + v.varying[1] = vv; + return v; +} + +TriVertex PolySkyDome::SetVertexXYZ(float xx, float yy, float zz, float uu, float vv) +{ + TriVertex v; + v.x = xx; + v.y = zz; + v.z = yy; + v.w = 1.0f; + v.varying[0] = uu; + v.varying[1] = vv; + return v; +} + +void PolySkyDome::SkyVertex(int r, int c, bool zflip) +{ + static const FAngle maxSideAngle = 60.f; + static const float scale = 10000.; + + FAngle topAngle = (c / (float)mColumns * 360.f); + FAngle sideAngle = maxSideAngle * (float)(mRows - r) / (float)mRows; + float height = sideAngle.Sin(); + float realRadius = scale * sideAngle.Cos(); + FVector2 pos = topAngle.ToVector(realRadius); + float z = (!zflip) ? scale * height : -scale * height; + + float u, v; + //uint32_t color = r == 0 ? 0xffffff : 0xffffffff; + + // And the texture coordinates. + if (!zflip) // Flipped Y is for the lower hemisphere. + { + u = (-c / (float)mColumns); + v = (r / (float)mRows); + } + else + { + u = (-c / (float)mColumns); + v = 1.0f + ((mRows - r) / (float)mRows); + } + + if (r != 4) z += 300; + + // And finally the vertex. + TriVertex vert; + vert = SetVertexXYZ(-pos.X, z - 1.f, pos.Y, u * 4.0f, v + 0.5f/*, color*/); + mVertices.Push(vert); +} + +void PolySkyDome::CreateSkyHemisphere(bool zflip) +{ + int r, c; + + mPrimStart.Push(mVertices.Size()); + + for (c = 0; c < mColumns; c++) + { + SkyVertex(1, c, zflip); + } + + // The total number of triangles per hemisphere can be calculated + // as follows: rows * columns * 2 + 2 (for the top cap). + for (r = 0; r < mRows; r++) + { + mPrimStart.Push(mVertices.Size()); + for (c = 0; c <= mColumns; c++) + { + SkyVertex(r + zflip, c, zflip); + SkyVertex(r + 1 - zflip, c, zflip); + } + } +} + +void PolySkyDome::CreateDome() +{ + mColumns = 128; + mRows = 4; + CreateSkyHemisphere(false); + CreateSkyHemisphere(true); + mPrimStart.Push(mVertices.Size()); +} + +void PolySkyDome::RenderRow(const TriUniforms &uniforms, FTexture *skytex, int row) +{ + PolyTriangleDrawer::draw(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, 0, viewwidth, 0, viewheight, skytex); +} + +void PolySkyDome::RenderCapColorRow(const TriUniforms &uniforms, FTexture *skytex, int row, bool bottomCap) +{ + uint32_t solid = skytex->GetSkyCapColor(bottomCap); + if (!r_swtruecolor) + solid = RGB32k.RGB[(RPART(solid) >> 3)][(GPART(solid) >> 3)][(BPART(solid) >> 3)]; + PolyTriangleDrawer::fill(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, 0, viewwidth, 0, viewheight, solid); +} + +void PolySkyDome::Render(const TriMatrix &worldToClip) +{ + FTextureID sky1tex, sky2tex; + if ((level.flags & LEVEL_SWAPSKIES) && !(level.flags & LEVEL_DOUBLESKY)) + sky1tex = sky2texture; + else + sky1tex = sky1texture; + sky2tex = sky2texture; + + FTexture *frontskytex = TexMan(sky1tex, true); + FTexture *backskytex = nullptr; + if (level.flags & LEVEL_DOUBLESKY) + backskytex = TexMan(sky2tex, true); + + TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); + + TriUniforms uniforms; + uniforms.objectToClip = worldToClip * objectToWorld; + uniforms.light = 256; + uniforms.flags = 0; + + int rc = mRows + 1; + + RenderCapColorRow(uniforms, frontskytex, 0, false); + RenderCapColorRow(uniforms, frontskytex, rc, true); + + for (int i = 1; i <= mRows; i++) + { + RenderRow(uniforms, frontskytex, i); + RenderRow(uniforms, frontskytex, rc + i); + } +} diff --git a/src/r_poly.h b/src/r_poly.h index 7e26f8ce67..ad5fe7bdb6 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -181,3 +181,24 @@ public: static TriVertex *GetVertices(int count); static void Clear(); }; + +class PolySkyDome +{ +public: + PolySkyDome() { CreateDome(); } + void Render(const TriMatrix &worldToClip); + +private: + TArray mVertices; + TArray mPrimStart; + int mRows, mColumns; + + void SkyVertex(int r, int c, bool yflip); + void CreateSkyHemisphere(bool zflip); + void CreateDome(); + void RenderRow(const TriUniforms &uniforms, FTexture *skytex, int row); + void RenderCapColorRow(const TriUniforms &uniforms, FTexture *skytex, int row, bool bottomCap); + + TriVertex SetVertex(float xx, float yy, float zz, float uu = 0, float vv = 0); + TriVertex SetVertexXYZ(float xx, float yy, float zz, float uu = 0, float vv = 0); +}; From b94096ffe193320e81b0f2bb7a58ddf951b53f09 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 10 Nov 2016 05:30:33 +0100 Subject: [PATCH 293/912] Crash fixes --- src/r_poly.cpp | 21 ++++++++++++--------- src/r_poly_triangle.cpp | 8 ++++---- src/r_poly_triangle.h | 2 ++ src/r_triangle.cpp | 8 ++++---- src/r_triangle.h | 2 ++ 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 01097fe224..1d63630b3e 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -129,7 +129,7 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) for (uint32_t i = 0; i < sub->numlines; i++) { seg_t *line = &sub->firstline[i]; - if (line->sidedef == NULL || !(line->sidedef->Flags & WALLF_POLYOBJ)) + if (line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) AddLine(line, frontsector); } @@ -237,12 +237,15 @@ void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector) if (line->backsector == nullptr) { - wall.SetCoords(line->v1->fPos(), line->v2->fPos(), frontceilz1, frontfloorz1, frontceilz2, frontfloorz2); - wall.TopZ = frontceilz1; - wall.BottomZ = frontfloorz1; - wall.UnpeggedCeil = frontceilz1; - wall.Texpart = side_t::mid; - wall.Render(worldToClip); + if (line->sidedef) + { + wall.SetCoords(line->v1->fPos(), line->v2->fPos(), frontceilz1, frontfloorz1, frontceilz2, frontfloorz2); + wall.TopZ = frontceilz1; + wall.BottomZ = frontfloorz1; + wall.UnpeggedCeil = frontceilz1; + wall.Texpart = side_t::mid; + wall.Render(worldToClip); + } } else { @@ -469,7 +472,7 @@ visstyle_t RenderPolyBsp::GetSpriteVisStyle(AActor *thing, double z) } // get light level - if (fixedcolormap != NULL) + if (fixedcolormap != nullptr) { // fixed map visstyle.BaseColormap = fixedcolormap; visstyle.ColormapNum = 0; @@ -1268,7 +1271,7 @@ void PolyScreenSprite::Render() DTA_FillColor, FillColor, DTA_SpecialColormap, special, DTA_ColorOverlay, overlay.d, - DTA_ColormapStyle, usecolormapstyle ? &colormapstyle : NULL, + DTA_ColormapStyle, usecolormapstyle ? &colormapstyle : nullptr, TAG_DONE); } diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index a9c54bc743..4b599f931e 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -127,7 +127,7 @@ TriVertex PolyTriangleDrawer::shade_vertex(const TriUniforms &uniforms, TriVerte void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)) { // Cull, clip and generate additional vertices as needed - TriVertex clippedvert[12]; + TriVertex clippedvert[max_additional_vertices]; int numclipvert; clipedge(vert, clippedvert, numclipvert); @@ -196,7 +196,7 @@ void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert // -v.w <= v.z <= v.w // use barycentric weights while clipping vertices - float weights[12 * 3 * 2]; + float weights[max_additional_vertices * 3 * 2]; for (int i = 0; i < 3; i++) { weights[i * 3 + 0] = 0.0f; @@ -220,7 +220,7 @@ void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert // Clip against each halfspace float *input = weights; - float *output = weights + 12 * 3; + float *output = weights + max_additional_vertices * 3; int inputverts = 3; int outputverts = 0; for (int p = 0; p < 6; p++) @@ -241,7 +241,7 @@ void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert clipdistance[2 * 6 + p] * input[j * 3 + 2]; float t1, t2; - if (!cullhalfspace(clipdistance1, clipdistance2, t1, t2)) + if (!cullhalfspace(clipdistance1, clipdistance2, t1, t2) && outputverts + 1 < max_additional_vertices) { // add t1 vertex for (int k = 0; k < 3; k++) diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 79c8d4a7a1..a392c3c3a4 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -43,6 +43,8 @@ private: static void queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); + enum { max_additional_vertices = 16 }; + friend class DrawPolyTrianglesCommand; }; diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp index c158177707..ff35cc7dae 100644 --- a/src/r_triangle.cpp +++ b/src/r_triangle.cpp @@ -138,7 +138,7 @@ TriVertex TriangleDrawer::shade_vertex(const TriUniforms &uniforms, TriVertex v) void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)) { // Cull, clip and generate additional vertices as needed - TriVertex clippedvert[12]; + TriVertex clippedvert[max_additional_vertices]; int numclipvert; clipedge(vert, clippedvert, numclipvert); @@ -207,7 +207,7 @@ void TriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert, in // -v.w <= v.z <= v.w // use barycentric weights while clipping vertices - float weights[12 * 3 * 2]; + float weights[max_additional_vertices * 3 * 2]; for (int i = 0; i < 3; i++) { weights[i * 3 + 0] = 0.0f; @@ -231,7 +231,7 @@ void TriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert, in // Clip against each halfspace float *input = weights; - float *output = weights + 12 * 3; + float *output = weights + max_additional_vertices * 3; int inputverts = 3; int outputverts = 0; for (int p = 0; p < 6; p++) @@ -252,7 +252,7 @@ void TriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert, in clipdistance[2 * 6 + p] * input[j * 3 + 2]; float t1, t2; - if (!cullhalfspace(clipdistance1, clipdistance2, t1, t2)) + if (!cullhalfspace(clipdistance1, clipdistance2, t1, t2) && outputverts + 1 < max_additional_vertices) { // add t1 vertex for (int k = 0; k < 3; k++) diff --git a/src/r_triangle.h b/src/r_triangle.h index 82b1d2c4d9..915f06f590 100644 --- a/src/r_triangle.h +++ b/src/r_triangle.h @@ -106,6 +106,8 @@ private: static void queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); + enum { max_additional_vertices = 16 }; + friend class DrawTrianglesCommand; }; From 204d09efde967be92076cd9795d71a558629b2d2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 10 Nov 2016 08:08:37 +0100 Subject: [PATCH 294/912] Add a stencil buffer --- src/r_main.cpp | 2 +- src/r_poly.cpp | 12 ++--- src/r_poly.h | 44 +++++++++-------- src/r_poly_triangle.h | 111 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+), 28 deletions(-) diff --git a/src/r_main.cpp b/src/r_main.cpp index 25945012fc..149353a874 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -922,7 +922,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) } else { - RenderPolyBsp bsp; + static RenderPolyBsp bsp; bsp.Render(); } R_3D_ResetClip(); // reset clips (floor/ceiling) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 1d63630b3e..1cfe5c0858 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -41,15 +41,19 @@ CVAR(Bool, r_debug_cull, 0, 0) void RenderPolyBsp::Render() { + // Setup working buffers PolyVertexBuffer::Clear(); SolidSegments.clear(); SolidSegments.reserve(MAXWIDTH / 2 + 2); SolidSegments.push_back({ -0x7fff, 0 }); SolidSegments.push_back({ viewwidth, 0x7fff }); - SectorSpriteRanges.clear(); SectorSpriteRanges.resize(numsectors); SortedSprites.clear(); + PvsSectors.clear(); + SectorSpriteRanges.clear(); + ScreenSprites.clear(); + PolyStencilBuffer::Instance()->Clear(viewwidth, viewheight, 0); // Perspective correct: float ratio = WidescreenRatio; @@ -66,13 +70,12 @@ void RenderPolyBsp::Render() // Y shearing like the Doom renderer: //worldToClip = TriMatrix::viewToClip() * TriMatrix::worldToView(); - // Cull front to back (ok, so we dont cull yet, but we should during this!): + // Cull front to back if (numnodes == 0) PvsSectors.push_back(subsectors); // RenderSubsector(subsectors); else RenderNode(nodes + numnodes - 1); // The head node is the last node output. - static PolySkyDome skydome; skydome.Render(worldToClip); // Render back to front (we don't have a zbuffer at the moment, sniff!): @@ -227,9 +230,6 @@ void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector) double frontceilz2 = frontsector->ceilingplane.ZatPoint(line->v2); double frontfloorz2 = frontsector->floorplane.ZatPoint(line->v2); - //VisiblePlaneKey ceilingPlaneKey(frontsector->GetTexture(sector_t::ceiling), frontsector->ColorMap, frontsector->lightlevel, frontsector->ceilingplane, frontsector->planes[sector_t::ceiling].xform); - //VisiblePlaneKey floorPlaneKey(frontsector->GetTexture(sector_t::floor), frontsector->ColorMap, frontsector->lightlevel, frontsector->floorplane, frontsector->planes[sector_t::floor].xform); - RenderPolyWall wall; wall.Line = line; wall.Colormap = frontsector->ColorMap; diff --git a/src/r_poly.h b/src/r_poly.h index ad5fe7bdb6..36cddfa303 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -69,6 +69,27 @@ public: int Count = 0; }; +class PolySkyDome +{ +public: + PolySkyDome() { CreateDome(); } + void Render(const TriMatrix &worldToClip); + +private: + TArray mVertices; + TArray mPrimStart; + int mRows, mColumns; + + void SkyVertex(int r, int c, bool yflip); + void CreateSkyHemisphere(bool zflip); + void CreateDome(); + void RenderRow(const TriUniforms &uniforms, FTexture *skytex, int row); + void RenderCapColorRow(const TriUniforms &uniforms, FTexture *skytex, int row, bool bottomCap); + + TriVertex SetVertex(float xx, float yy, float zz, float uu = 0, float vv = 0); + TriVertex SetVertexXYZ(float xx, float yy, float zz, float uu = 0, float vv = 0); +}; + // Renders a GL BSP tree in a scene class RenderPolyBsp { @@ -121,6 +142,8 @@ private: }; std::vector SolidSegments; + + PolySkyDome skydome; }; class RenderPolyWall @@ -181,24 +204,3 @@ public: static TriVertex *GetVertices(int count); static void Clear(); }; - -class PolySkyDome -{ -public: - PolySkyDome() { CreateDome(); } - void Render(const TriMatrix &worldToClip); - -private: - TArray mVertices; - TArray mPrimStart; - int mRows, mColumns; - - void SkyVertex(int r, int c, bool yflip); - void CreateSkyHemisphere(bool zflip); - void CreateDome(); - void RenderRow(const TriUniforms &uniforms, FTexture *skytex, int row); - void RenderCapColorRow(const TriUniforms &uniforms, FTexture *skytex, int row, bool bottomCap); - - TriVertex SetVertex(float xx, float yy, float zz, float uu = 0, float vv = 0); - TriVertex SetVertexXYZ(float xx, float yy, float zz, float uu = 0, float vv = 0); -}; diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index a392c3c3a4..f18361caac 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -48,6 +48,117 @@ private: friend class DrawPolyTrianglesCommand; }; +// 8x8 block of stencil values, plus a mask indicating if values are the same for early out stencil testing +class PolyStencilBlock +{ +public: + PolyStencilBlock(int block, uint8_t *values, uint32_t *masks) : Values(values + block * 64), ValueMask(masks[block]) + { + } + + void Set(int x, int y, uint8_t value) + { + if (ValueMask == 0xffffffff) + { + if (Values[0] == value) + return; + + for (int i = 1; i < 8 * 8 + 4 * 4 + 2 * 2 + 1; i++) + Values[i] = Values[0]; + } + + if (Values[x + y * 8] == value) + return; + + Values[x + y * 8] = value; + + int leveloffset = 0; + for (int i = 1; i < 4; i++) + { + int iy = i + 3; + + x >>= 1; + y >>= 1; + + bool same = + Values[(x << i) + (y << iy)] != value || + Values[((x + 1) << i) + (y << iy)] != value || + Values[(x << i) + ((y + 1) << iy)] != value || + Values[((x + 1) << i) + ((y + 1) << iy)] != value; + + int levelbit = 1 << (leveloffset + x + y * (8 >> i)); + + if (same) + ValueMask = ValueMask & ~levelbit; + else + ValueMask = ValueMask | levelbit; + } + + if (Values[0] != value || Values[4] != value || Values[4 * 8] != value || Values[4 * 8 + 4] != value) + ValueMask = ValueMask & ~(1 << 22); + else + ValueMask = ValueMask | (1 << 22); + } + + uint8_t Get(int x, int y) const + { + if (ValueMask == 0xffffffff) + return Values[0]; + else + return Values[x + y * 8]; + } + + void Clear(uint8_t value) + { + Values[0] = value; + ValueMask = 0xffffffff; + } + +private: + uint8_t *Values; // [8 * 8]; + uint32_t &ValueMask; // 4 * 4 + 2 * 2 + 1 bits indicating is Values are the same +}; + +class PolyStencilBuffer +{ +public: + static PolyStencilBuffer *Instance() + { + static PolyStencilBuffer buffer; + return &buffer; + } + + void Clear(int newwidth, int newheight, uint8_t stencil_value = 0) + { + width = newwidth; + height = newheight; + int count = BlockWidth() * BlockHeight(); + values.resize(count * 64); + masks.resize(count); + + uint8_t *v = Values(); + uint32_t *m = Masks(); + for (int i = 0; i < count; i++) + { + PolyStencilBlock block(i, v, m); + block.Clear(stencil_value); + } + } + + int Width() const { return width; } + int Height() const { return height; } + int BlockWidth() const { return (width + 7) / 8; } + int BlockHeight() const { return (height + 7) / 8; } + uint8_t *Values() { return values.data(); } + uint32_t *Masks() { return masks.data(); } + +private: + int width; + int height; + std::vector values; + std::vector masks; +}; + struct ScreenPolyTriangleDrawerArgs { uint8_t *dest; From 049ceecca51d77f866964fa5a574ca4fe66ce3e2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 10 Nov 2016 10:44:35 +0100 Subject: [PATCH 295/912] Toying with stencils --- src/r_poly.cpp | 42 +++++--- src/r_poly.h | 1 + src/r_poly_triangle.cpp | 230 ++++++++++++++++++++++++++++++++++++---- src/r_poly_triangle.h | 37 +++++-- 4 files changed, 262 insertions(+), 48 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 1cfe5c0858..473e51c4d4 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -51,7 +51,6 @@ void RenderPolyBsp::Render() SectorSpriteRanges.resize(numsectors); SortedSprites.clear(); PvsSectors.clear(); - SectorSpriteRanges.clear(); ScreenSprites.clear(); PolyStencilBuffer::Instance()->Clear(viewwidth, viewheight, 0); @@ -76,10 +75,8 @@ void RenderPolyBsp::Render() else RenderNode(nodes + numnodes - 1); // The head node is the last node output. - skydome.Render(worldToClip); - - // Render back to front (we don't have a zbuffer at the moment, sniff!): - if (!r_debug_cull) + // Render front to back using the stencil buffer + if (r_debug_cull) { for (auto it = PvsSectors.rbegin(); it != PvsSectors.rend(); ++it) RenderSubsector(*it); @@ -90,6 +87,8 @@ void RenderPolyBsp::Render() RenderSubsector(*it); } + skydome.Render(worldToClip); + RenderPlayerSprites(); DrawerCommandQueue::WaitForWorkers(); RenderScreenSprites(); // To do: should be called by FSoftwareRenderer::DrawRemainingPlayerSprites instead of here @@ -138,7 +137,7 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) FTextureID floorpicnum = frontsector->GetTexture(sector_t::floor); FTexture *floortex = TexMan(floorpicnum); - if (floortex->UseType != FTexture::TEX_Null && floorpicnum != skyflatnum) + if (floortex->UseType != FTexture::TEX_Null) { TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); if (!vertices) @@ -158,12 +157,16 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) else if (fixedcolormap) uniforms.light = 256; uniforms.flags = 0; - PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, floortex); + + bool isSky = floorpicnum == skyflatnum; + if (!isSky) + PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, floortex, 0); + PolyTriangleDrawer::stencil(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, isSky ? 255 : 254); } FTextureID ceilpicnum = frontsector->GetTexture(sector_t::ceiling); FTexture *ceiltex = TexMan(ceilpicnum); - if (ceiltex->UseType != FTexture::TEX_Null && ceilpicnum != skyflatnum) + if (ceiltex->UseType != FTexture::TEX_Null) { TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); if (!vertices) @@ -183,7 +186,11 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) else if (fixedcolormap) uniforms.light = 256; uniforms.flags = 0; - PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, ceiltex); + + bool isSky = ceilpicnum == skyflatnum; + if (!isSky) + PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, ceiltex, 0); + PolyTriangleDrawer::stencil(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, isSky ? 255 : 254); } SpriteRange sprites = GetSpritesForSector(sub->sector); @@ -272,23 +279,25 @@ void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector) bool bothSkyCeiling = frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; bool bothSkyFloor = frontsector->GetTexture(sector_t::floor) == skyflatnum && backsector->GetTexture(sector_t::floor) == skyflatnum; - if ((topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && !bothSkyCeiling && line->sidedef) + if ((topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && line->sidedef) { wall.SetCoords(line->v1->fPos(), line->v2->fPos(), topceilz1, topfloorz1, topceilz2, topfloorz2); wall.TopZ = topceilz1; wall.BottomZ = topfloorz1; wall.UnpeggedCeil = topceilz1; wall.Texpart = side_t::top; + wall.IsSky = bothSkyCeiling; wall.Render(worldToClip); } - if ((bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && !bothSkyFloor && line->sidedef) + if ((bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef) { wall.SetCoords(line->v1->fPos(), line->v2->fPos(), bottomceilz1, bottomfloorz2, bottomceilz2, bottomfloorz2); wall.TopZ = bottomceilz1; wall.BottomZ = bottomfloorz2; wall.UnpeggedCeil = topceilz1; wall.Texpart = side_t::bottom; + wall.IsSky = bothSkyFloor; wall.Render(worldToClip); } @@ -419,7 +428,7 @@ void RenderPolyBsp::AddSprite(AActor *thing, subsector_t *sub) uniforms.objectToClip = worldToClip; uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); uniforms.flags = 0; - PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex); + PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex, 254); } void RenderPolyBsp::AddWallSprite(AActor *thing, subsector_t *sub) @@ -1067,7 +1076,10 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) uniforms.objectToClip = worldToClip; uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); uniforms.flags = 0; - PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex); + + if (!IsSky) + PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex, 0); + PolyTriangleDrawer::stencil(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, IsSky ? 255 : 254); } FTexture *RenderPolyWall::GetTexture() @@ -1395,7 +1407,7 @@ void PolySkyDome::CreateDome() void PolySkyDome::RenderRow(const TriUniforms &uniforms, FTexture *skytex, int row) { - PolyTriangleDrawer::draw(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, 0, viewwidth, 0, viewheight, skytex); + PolyTriangleDrawer::draw(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, 0, viewwidth, 0, viewheight, skytex, 255); } void PolySkyDome::RenderCapColorRow(const TriUniforms &uniforms, FTexture *skytex, int row, bool bottomCap) @@ -1403,7 +1415,7 @@ void PolySkyDome::RenderCapColorRow(const TriUniforms &uniforms, FTexture *skyte uint32_t solid = skytex->GetSkyCapColor(bottomCap); if (!r_swtruecolor) solid = RGB32k.RGB[(RPART(solid) >> 3)][(GPART(solid) >> 3)][(BPART(solid) >> 3)]; - PolyTriangleDrawer::fill(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, 0, viewwidth, 0, viewheight, solid); + PolyTriangleDrawer::fill(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, 0, viewwidth, 0, viewheight, solid, 255); } void PolySkyDome::Render(const TriMatrix &worldToClip) diff --git a/src/r_poly.h b/src/r_poly.h index 36cddfa303..d4af108e2b 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -175,6 +175,7 @@ public: double UnpeggedCeil = 0.0; FSWColormap *Colormap = nullptr; bool Masked = false; + bool IsSky = false; private: FTexture *GetTexture(); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 4b599f931e..7fb9aafa40 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -40,31 +40,39 @@ #include #endif -void PolyTriangleDrawer::draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture) +void PolyTriangleDrawer::draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture, int stenciltestvalue) { if (r_swtruecolor) - queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, (const uint8_t*)texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight(), 0); + queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, (const uint8_t*)texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight(), 0, stenciltestvalue, stenciltestvalue); else - draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture->GetPixels(), texture->GetWidth(), texture->GetHeight(), 0, nullptr, &ScreenPolyTriangleDrawer::draw); + draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture->GetPixels(), texture->GetWidth(), texture->GetHeight(), 0, stenciltestvalue, stenciltestvalue, nullptr, &ScreenPolyTriangleDrawer::draw); } -void PolyTriangleDrawer::fill(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int solidcolor) +void PolyTriangleDrawer::fill(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int solidcolor, int stenciltestvalue) { if (r_swtruecolor) - queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor); + queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor, stenciltestvalue, stenciltestvalue); else - draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor, nullptr, &ScreenPolyTriangleDrawer::fill); + draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor, stenciltestvalue, stenciltestvalue, nullptr, &ScreenPolyTriangleDrawer::fill); } -void PolyTriangleDrawer::queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) +void PolyTriangleDrawer::stencil(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int stenciltestvalue, int stencilwritevalue) +{ + if (r_swtruecolor) + queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, 0xbeef, stenciltestvalue, stencilwritevalue); + else + draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, 0, stenciltestvalue, stencilwritevalue, nullptr, &ScreenPolyTriangleDrawer::stencil); +} + +void PolyTriangleDrawer::queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, int stenciltestvalue, int stencilwritevalue) { if (clipright < clipleft || clipleft < 0 || clipright > MAXWIDTH || clipbottom < cliptop || cliptop < 0 || clipbottom > MAXHEIGHT) return; - DrawerCommandQueue::QueueCommand(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texturePixels, textureWidth, textureHeight, solidcolor); + DrawerCommandQueue::QueueCommand(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texturePixels, textureWidth, textureHeight, solidcolor, stenciltestvalue, stencilwritevalue); } -void PolyTriangleDrawer::draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)) +void PolyTriangleDrawer::draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, int stenciltestvalue, int stencilwritevalue, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)) { if (vcount < 3) return; @@ -81,6 +89,11 @@ void PolyTriangleDrawer::draw_arrays(const TriUniforms &uniforms, const TriVerte args.textureHeight = textureHeight; args.solidcolor = solidcolor; args.uniforms = &uniforms; + args.stencilTestValue = stenciltestvalue; + args.stencilWriteValue = stencilwritevalue; + args.stencilPitch = PolyStencilBuffer::Instance()->BlockWidth(); + args.stencilValues = PolyStencilBuffer::Instance()->Values(); + args.stencilMasks = PolyStencilBuffer::Instance()->Masks(); TriVertex vert[3]; if (mode == TriangleDrawMode::Normal) @@ -677,6 +690,158 @@ void ScreenPolyTriangleDrawer::fill(const ScreenPolyTriangleDrawerArgs *args, Dr } } +void ScreenPolyTriangleDrawer::stencil(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) +{ + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipleft = args->clipleft; + int clipright = args->clipright; + int cliptop = args->cliptop; + int clipbottom = args->clipbottom; + int solidcolor = args->solidcolor; + uint8_t *stencilValues = args->stencilValues; + uint32_t *stencilMasks = args->stencilMasks; + int stencilPitch = args->stencilPitch; + uint8_t stencilTestValue = args->stencilTestValue; + uint8_t stencilWriteValue = args->stencilWriteValue; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // Loop through blocks + for (int y = miny; y < maxy; y += q) + { + // Is this row of blocks done by this thread? + if (thread && thread->skipped_by_thread(y / q)) continue; + + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Check if block needs clipping + bool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); + + PolyStencilBlock stencil(x / 8 + y / 8 * stencilPitch, stencilValues, stencilMasks); + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF && !clipneeded && stencil.IsSingleValue()) + { + // Reject whole block if the stencil test fails + if (stencil.Get(0, 0) != stencilTestValue) + continue; + stencil.Clear(stencilWriteValue); + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = 0; ix < q; ix++) + { + bool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); + + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible && stencil.Get(ix, iy) == stencilTestValue) + { + stencil.Set(ix, iy, stencilWriteValue); + } + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + } + } + } + } +} + void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) { uint32_t *dest = (uint32_t *)args->dest; @@ -692,6 +857,10 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, int textureWidth = args->textureWidth; int textureHeight = args->textureHeight; uint32_t light = args->uniforms->light; + uint8_t *stencilValues = args->stencilValues; + uint32_t *stencilMasks = args->stencilMasks; + int stencilPitch = args->stencilPitch; + uint8_t stencilTestValue = args->stencilTestValue; // 28.4 fixed-point coordinates const int Y1 = (int)round(16.0f * v1.y); @@ -831,9 +1000,15 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, uint32_t *buffer = dest; + PolyStencilBlock stencil(x / 8 + y / 8 * stencilPitch, stencilValues, stencilMasks); + // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && !clipneeded) + if (a == 0xF && b == 0xF && c == 0xF && !clipneeded && stencil.IsSingleValue()) { + // Reject whole block if the stencil test fails + if (stencil.Get(0, 0) != stencilTestValue) + continue; + for (int iy = 0; iy < q; iy++) { uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; @@ -922,11 +1097,11 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, varyingStep[i] = (uint32_t)(step * 0x100000000LL); } - for (int ix = x; ix < x + q; ix++) + for (int ix = 0; ix < q; ix++) { - bool visible = ix >= clipleft && ix < clipright && (cliptop <= y + iy) && (clipbottom > y + iy); + bool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible && stencil.Get(ix, iy) == stencilTestValue) { uint32_t ufrac = varying[0]; uint32_t vfrac = varying[1]; @@ -942,7 +1117,7 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, uint32_t fg_alpha = APART(fg); if (fg_alpha > 127) - buffer[ix] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; + buffer[ix + x] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; } for (int i = 0; i < TriVertex::NumVarying; i++) @@ -976,6 +1151,10 @@ void ScreenPolyTriangleDrawer::fill32(const ScreenPolyTriangleDrawerArgs *args, int cliptop = args->cliptop; int clipbottom = args->clipbottom; int solidcolor = args->solidcolor; + uint8_t *stencilValues = args->stencilValues; + uint32_t *stencilMasks = args->stencilMasks; + int stencilPitch = args->stencilPitch; + uint8_t stencilTestValue = args->stencilTestValue; // 28.4 fixed-point coordinates const int Y1 = (int)round(16.0f * v1.y); @@ -1072,9 +1251,15 @@ void ScreenPolyTriangleDrawer::fill32(const ScreenPolyTriangleDrawerArgs *args, uint32_t *buffer = dest; + PolyStencilBlock stencil(x / 8 + y / 8 * stencilPitch, stencilValues, stencilMasks); + // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && !clipneeded) + if (a == 0xF && b == 0xF && c == 0xF && !clipneeded && stencil.IsSingleValue()) { + // Reject whole block if the stencil test fails + if (stencil.Get(0, 0) != stencilTestValue) + continue; + for (int iy = 0; iy < q; iy++) { for (int ix = x; ix < x + q; ix++) @@ -1097,13 +1282,13 @@ void ScreenPolyTriangleDrawer::fill32(const ScreenPolyTriangleDrawerArgs *args, int CX2 = CY2; int CX3 = CY3; - for (int ix = x; ix < x + q; ix++) + for (int ix = 0; ix < q; ix++) { - bool visible = ix >= clipleft && ix < clipright && (cliptop <= y + iy) && (clipbottom > y + iy); + bool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible && stencil.Get(ix, iy) == stencilTestValue) { - buffer[ix] = solidcolor; + buffer[ix + x] = solidcolor; } CX1 -= FDY12; @@ -1138,8 +1323,8 @@ float ScreenPolyTriangleDrawer::grady(float x0, float y0, float x1, float y1, fl ///////////////////////////////////////////////////////////////////////////// -DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) - : uniforms(uniforms), vinput(vinput), vcount(vcount), mode(mode), ccw(ccw), clipleft(clipleft), clipright(clipright), cliptop(cliptop), clipbottom(clipbottom), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight), solidcolor(solidcolor) +DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, int stenciltestvalue, int stencilwritevalue) + : uniforms(uniforms), vinput(vinput), vcount(vcount), mode(mode), ccw(ccw), clipleft(clipleft), clipright(clipright), cliptop(cliptop), clipbottom(clipbottom), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight), solidcolor(solidcolor), stenciltestvalue(stenciltestvalue), stencilwritevalue(stencilwritevalue) { } @@ -1149,7 +1334,8 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texturePixels, textureWidth, textureHeight, solidcolor, - thread, texturePixels ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::fill32); + stenciltestvalue, stencilwritevalue, + thread, texturePixels ? ScreenPolyTriangleDrawer::draw32 : solidcolor != 0xbeef ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::stencil); } FString DrawPolyTrianglesCommand::DebugInfo() diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index f18361caac..9313001690 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -31,17 +31,18 @@ struct ScreenPolyTriangleDrawerArgs; class PolyTriangleDrawer { public: - static void draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture); - static void fill(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int solidcolor); + static void draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture, int stenciltestvalue); + static void fill(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int solidcolor, int stenciltestvalue); + static void stencil(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int stenciltestvalue, int stencilwritevalue); private: static TriVertex shade_vertex(const TriUniforms &uniforms, TriVertex v); - static void draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); + static void draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, int stenciltestvalue, int stencilwritevalue, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert); - static void queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); + static void queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, int stenciltestvalue, int stencilwritevalue); enum { max_additional_vertices = 16 }; @@ -75,16 +76,14 @@ public: int leveloffset = 0; for (int i = 1; i < 4; i++) { - int iy = i + 3; - x >>= 1; y >>= 1; bool same = - Values[(x << i) + (y << iy)] != value || - Values[((x + 1) << i) + (y << iy)] != value || - Values[(x << i) + ((y + 1) << iy)] != value || - Values[((x + 1) << i) + ((y + 1) << iy)] != value; + Values[(x << i) + (y << i) * 8] != value || + Values[((x + 1) << i) + (y << i) * 8] != value || + Values[(x << i) + ((y + 1) << i) * 8] != value || + Values[((x + 1) << i) + ((y + 1) << i) * 8] != value; int levelbit = 1 << (leveloffset + x + y * (8 >> i)); @@ -92,6 +91,8 @@ public: ValueMask = ValueMask & ~levelbit; else ValueMask = ValueMask | levelbit; + + leveloffset += (8 >> leveloffset) * (8 >> leveloffset); } if (Values[0] != value || Values[4] != value || Values[4 * 8] != value || Values[4 * 8 + 4] != value) @@ -114,6 +115,11 @@ public: ValueMask = 0xffffffff; } + bool IsSingleValue() const + { + return ValueMask == 0xffffffff; + } + private: uint8_t *Values; // [8 * 8]; uint32_t &ValueMask; // 4 * 4 + 2 * 2 + 1 bits indicating is Values are the same @@ -175,6 +181,11 @@ struct ScreenPolyTriangleDrawerArgs int textureHeight; int solidcolor; const TriUniforms *uniforms; + uint8_t *stencilValues; + uint32_t *stencilMasks; + int stencilPitch; + uint8_t stencilTestValue; + uint8_t stencilWriteValue; }; class ScreenPolyTriangleDrawer @@ -183,6 +194,8 @@ public: static void draw(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); static void fill(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); + static void stencil(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); + static void draw32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); static void fill32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); @@ -194,7 +207,7 @@ private: class DrawPolyTrianglesCommand : public DrawerCommand { public: - DrawPolyTrianglesCommand(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); + DrawPolyTrianglesCommand(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, int stenciltestvalue, int stencilwritevalue); void Execute(DrawerThread *thread) override; FString DebugInfo() override; @@ -213,6 +226,8 @@ private: int textureWidth; int textureHeight; int solidcolor; + int stenciltestvalue; + int stencilwritevalue; }; #endif From 373038231c371b8090b5b0685daac535c3e5cf66 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 10 Nov 2016 13:58:03 +0100 Subject: [PATCH 296/912] More stencil sky stuff --- src/r_poly.cpp | 55 ++++++++++++++++++++++++++++------------- src/r_poly.h | 11 ++++++--- src/r_poly_triangle.cpp | 1 + src/r_poly_triangle.h | 27 ++++++++++++++++++++ src/r_triangle.h | 1 + 5 files changed, 75 insertions(+), 20 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 473e51c4d4..a1da1f6ab6 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -53,6 +53,8 @@ void RenderPolyBsp::Render() PvsSectors.clear(); ScreenSprites.clear(); PolyStencilBuffer::Instance()->Clear(viewwidth, viewheight, 0); + PolySubsectorGBuffer::Instance()->Resize(dc_pitch, viewheight); + NextSubsectorDepth = 0; // Perspective correct: float ratio = WidescreenRatio; @@ -75,7 +77,7 @@ void RenderPolyBsp::Render() else RenderNode(nodes + numnodes - 1); // The head node is the last node output. - // Render front to back using the stencil buffer + // Render front to back if (r_debug_cull) { for (auto it = PvsSectors.rbegin(); it != PvsSectors.rend(); ++it) @@ -128,17 +130,21 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) sector_t *frontsector = sub->sector; frontsector->MoreFlags |= SECF_DRAWN; + uint32_t subsectorDepth = NextSubsectorDepth++; + for (uint32_t i = 0; i < sub->numlines; i++) { seg_t *line = &sub->firstline[i]; if (line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) - AddLine(line, frontsector); + AddLine(line, frontsector, subsectorDepth); } FTextureID floorpicnum = frontsector->GetTexture(sector_t::floor); FTexture *floortex = TexMan(floorpicnum); if (floortex->UseType != FTexture::TEX_Null) { + bool isSky = floorpicnum == skyflatnum; + TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); if (!vertices) return; @@ -157,17 +163,22 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) else if (fixedcolormap) uniforms.light = 256; uniforms.flags = 0; - - bool isSky = floorpicnum == skyflatnum; + uniforms.subsectorDepth = isSky ? SkySubsectorDepth : subsectorDepth; if (!isSky) + { PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, floortex, 0); - PolyTriangleDrawer::stencil(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, isSky ? 255 : 254); + PolyTriangleDrawer::stencil(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 1); + } + else + PolyTriangleDrawer::fill(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 0); } FTextureID ceilpicnum = frontsector->GetTexture(sector_t::ceiling); FTexture *ceiltex = TexMan(ceilpicnum); if (ceiltex->UseType != FTexture::TEX_Null) { + bool isSky = ceilpicnum == skyflatnum; + TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); if (!vertices) return; @@ -186,11 +197,14 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) else if (fixedcolormap) uniforms.light = 256; uniforms.flags = 0; - - bool isSky = ceilpicnum == skyflatnum; + uniforms.subsectorDepth = isSky ? SkySubsectorDepth : subsectorDepth; if (!isSky) + { PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, ceiltex, 0); - PolyTriangleDrawer::stencil(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, isSky ? 255 : 254); + PolyTriangleDrawer::stencil(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 1); + } + else + PolyTriangleDrawer::fill(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 0); } SpriteRange sprites = GetSpritesForSector(sub->sector); @@ -198,9 +212,9 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) { AActor *thing = SortedSprites[sprites.Start + i].Thing; if ((thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) - AddWallSprite(thing, sub); + AddWallSprite(thing, sub, subsectorDepth); else - AddSprite(thing, sub); + AddSprite(thing, sub, subsectorDepth); } } @@ -224,7 +238,7 @@ SpriteRange RenderPolyBsp::GetSpritesForSector(sector_t *sector) return range; } -void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector) +void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector, uint32_t subsectorDepth) { // Reject lines not facing viewer DVector2 pt1 = line->v1->fPos() - ViewPos; @@ -241,6 +255,7 @@ void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector) wall.Line = line; wall.Colormap = frontsector->ColorMap; wall.Masked = false; + wall.SubsectorDepth = subsectorDepth; if (line->backsector == nullptr) { @@ -341,7 +356,7 @@ bool RenderPolyBsp::IsThingCulled(AActor *thing) return false; } -void RenderPolyBsp::AddSprite(AActor *thing, subsector_t *sub) +void RenderPolyBsp::AddSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) { if (IsThingCulled(thing)) return; @@ -428,10 +443,11 @@ void RenderPolyBsp::AddSprite(AActor *thing, subsector_t *sub) uniforms.objectToClip = worldToClip; uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); uniforms.flags = 0; - PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex, 254); + uniforms.subsectorDepth = subsectorDepth; + PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex, 0); } -void RenderPolyBsp::AddWallSprite(AActor *thing, subsector_t *sub) +void RenderPolyBsp::AddWallSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) { if (IsThingCulled(thing)) return; @@ -1076,10 +1092,15 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) uniforms.objectToClip = worldToClip; uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); uniforms.flags = 0; + uniforms.subsectorDepth = IsSky ? RenderPolyBsp::SkySubsectorDepth : SubsectorDepth; if (!IsSky) + { PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex, 0); - PolyTriangleDrawer::stencil(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, IsSky ? 255 : 254); + PolyTriangleDrawer::stencil(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 1); + } + else + PolyTriangleDrawer::fill(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 0); } FTexture *RenderPolyWall::GetTexture() @@ -1407,7 +1428,7 @@ void PolySkyDome::CreateDome() void PolySkyDome::RenderRow(const TriUniforms &uniforms, FTexture *skytex, int row) { - PolyTriangleDrawer::draw(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, 0, viewwidth, 0, viewheight, skytex, 255); + PolyTriangleDrawer::draw(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, 0, viewwidth, 0, viewheight, skytex, 0); } void PolySkyDome::RenderCapColorRow(const TriUniforms &uniforms, FTexture *skytex, int row, bool bottomCap) @@ -1415,7 +1436,7 @@ void PolySkyDome::RenderCapColorRow(const TriUniforms &uniforms, FTexture *skyte uint32_t solid = skytex->GetSkyCapColor(bottomCap); if (!r_swtruecolor) solid = RGB32k.RGB[(RPART(solid) >> 3)][(GPART(solid) >> 3)][(BPART(solid) >> 3)]; - PolyTriangleDrawer::fill(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, 0, viewwidth, 0, viewheight, solid, 255); + PolyTriangleDrawer::fill(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, 0, viewwidth, 0, viewheight, solid, 0); } void PolySkyDome::Render(const TriMatrix &worldToClip) diff --git a/src/r_poly.h b/src/r_poly.h index d4af108e2b..c312f313bf 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -97,14 +97,16 @@ public: void Render(); void RenderScreenSprites(); + static const uint32_t SkySubsectorDepth = 0xffffffff; + private: void RenderNode(void *node); void RenderSubsector(subsector_t *sub); - void AddLine(seg_t *line, sector_t *frontsector); + void AddLine(seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); TriVertex PlaneVertex(vertex_t *v1, sector_t *sector, const secplane_t &plane); - void AddSprite(AActor *thing, subsector_t *sub); - void AddWallSprite(AActor *thing, subsector_t *sub); + void AddSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth); + void AddWallSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth); bool IsThingCulled(AActor *thing); visstyle_t GetSpriteVisStyle(AActor *thing, double z); FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); @@ -125,6 +127,8 @@ private: bool IsSegmentCulled(int x1, int x2) const; std::vector PvsSectors; + uint32_t NextSubsectorDepth = 0; + TriMatrix worldToClip; std::vector SectorSpriteRanges; @@ -176,6 +180,7 @@ public: FSWColormap *Colormap = nullptr; bool Masked = false; bool IsSky = false; + uint32_t SubsectorDepth = 0; private: FTexture *GetTexture(); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 7fb9aafa40..57165736dd 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -94,6 +94,7 @@ void PolyTriangleDrawer::draw_arrays(const TriUniforms &uniforms, const TriVerte args.stencilPitch = PolyStencilBuffer::Instance()->BlockWidth(); args.stencilValues = PolyStencilBuffer::Instance()->Values(); args.stencilMasks = PolyStencilBuffer::Instance()->Masks(); + args.subsectorGBuffer = PolySubsectorGBuffer::Instance()->Values(); TriVertex vert[3]; if (mode == TriangleDrawMode::Normal) diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 9313001690..b0b22f8d0f 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -125,6 +125,32 @@ private: uint32_t &ValueMask; // 4 * 4 + 2 * 2 + 1 bits indicating is Values are the same }; +class PolySubsectorGBuffer +{ +public: + static PolySubsectorGBuffer *Instance() + { + static PolySubsectorGBuffer buffer; + return &buffer; + } + + void Resize(int newwidth, int newheight) + { + width = newwidth; + height = newheight; + values.resize(width * height); + } + + int Width() const { return width; } + int Height() const { return height; } + uint32_t *Values() { return values.data(); } + +private: + int width; + int height; + std::vector values; +}; + class PolyStencilBuffer { public: @@ -186,6 +212,7 @@ struct ScreenPolyTriangleDrawerArgs int stencilPitch; uint8_t stencilTestValue; uint8_t stencilWriteValue; + uint32_t *subsectorGBuffer; }; class ScreenPolyTriangleDrawer diff --git a/src/r_triangle.h b/src/r_triangle.h index 915f06f590..a3bdca5ed0 100644 --- a/src/r_triangle.h +++ b/src/r_triangle.h @@ -63,6 +63,7 @@ struct TriMatrix struct TriUniforms { uint32_t light; + uint32_t subsectorDepth; uint16_t light_alpha; uint16_t light_red; From 529a93b680cc7fda4a5116d2c23e0320d6fc1279 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 11 Nov 2016 10:15:27 +0100 Subject: [PATCH 297/912] Sky rendering fixes --- src/r_poly.cpp | 284 +++++++++++++++++++++++++++++++------------------ src/r_poly.h | 6 +- 2 files changed, 185 insertions(+), 105 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index a1da1f6ab6..a58cb86a5a 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -73,9 +73,17 @@ void RenderPolyBsp::Render() // Cull front to back if (numnodes == 0) - PvsSectors.push_back(subsectors); // RenderSubsector(subsectors); + { + PvsSectors.push_back(subsectors); + MaxCeilingHeight = subsectors->sector->ceilingplane.Zat0(); + MinFloorHeight = subsectors->sector->floorplane.Zat0(); + } else + { + MaxCeilingHeight = 0.0; + MinFloorHeight = 0.0; RenderNode(nodes + numnodes - 1); // The head node is the last node output. + } // Render front to back if (r_debug_cull) @@ -102,29 +110,6 @@ void RenderPolyBsp::RenderScreenSprites() sprite.Render(); } -TriVertex RenderPolyBsp::PlaneVertex(vertex_t *v1, sector_t *sector, const secplane_t &plane) -{ - TriVertex v; - v.x = (float)v1->fPos().X; - v.y = (float)v1->fPos().Y; - v.z = (float)plane.ZatPoint(v1); - v.w = 1.0f; - v.varying[0] = v.x / 64.0f; - v.varying[1] = v.y / 64.0f; - - /* - double vis = r_FloorVisibility / (plane.Zat0() - ViewPos.Z); - if (fixedlightlev >= 0) - R_SetDSColorMapLight(sector->ColorMap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap) - R_SetDSColorMapLight(fixedcolormap, 0, 0); - else - R_SetDSColorMapLight(sector->ColorMap, (float)(vis * fabs(CenterY - y)), LIGHT2SHADE(sector->lightlevel)); - */ - - return v; -} - void RenderPolyBsp::RenderSubsector(subsector_t *sub) { sector_t *frontsector = sub->sector; @@ -139,72 +124,10 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) AddLine(line, frontsector, subsectorDepth); } - FTextureID floorpicnum = frontsector->GetTexture(sector_t::floor); - FTexture *floortex = TexMan(floorpicnum); - if (floortex->UseType != FTexture::TEX_Null) + if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) { - bool isSky = floorpicnum == skyflatnum; - - TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); - if (!vertices) - return; - - for (uint32_t i = 0; i < sub->numlines; i++) - { - seg_t *line = &sub->firstline[i]; - vertices[i] = PlaneVertex(line->v1, frontsector, frontsector->floorplane); - } - - TriUniforms uniforms; - uniforms.objectToClip = worldToClip; - uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); - if (fixedlightlev >= 0) - uniforms.light = (uint32_t)(fixedlightlev / 255.0f * 256.0f); - else if (fixedcolormap) - uniforms.light = 256; - uniforms.flags = 0; - uniforms.subsectorDepth = isSky ? SkySubsectorDepth : subsectorDepth; - if (!isSky) - { - PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, floortex, 0); - PolyTriangleDrawer::stencil(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 1); - } - else - PolyTriangleDrawer::fill(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 0); - } - - FTextureID ceilpicnum = frontsector->GetTexture(sector_t::ceiling); - FTexture *ceiltex = TexMan(ceilpicnum); - if (ceiltex->UseType != FTexture::TEX_Null) - { - bool isSky = ceilpicnum == skyflatnum; - - TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); - if (!vertices) - return; - - for (uint32_t i = 0; i < sub->numlines; i++) - { - seg_t *line = &sub->firstline[i]; - vertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, frontsector, frontsector->ceilingplane); - } - - TriUniforms uniforms; - uniforms.objectToClip = worldToClip; - uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); - if (fixedlightlev >= 0) - uniforms.light = (uint32_t)(fixedlightlev / 255.0f * 256.0f); - else if (fixedcolormap) - uniforms.light = 256; - uniforms.flags = 0; - uniforms.subsectorDepth = isSky ? SkySubsectorDepth : subsectorDepth; - if (!isSky) - { - PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, ceiltex, 0); - PolyTriangleDrawer::stencil(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 1); - } - else - PolyTriangleDrawer::fill(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 0); + RenderPlane(sub, subsectorDepth, true); + RenderPlane(sub, subsectorDepth, false); } SpriteRange sprites = GetSpritesForSector(sub->sector); @@ -218,6 +141,166 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) } } +void RenderPolyBsp::RenderPlane(subsector_t *sub, uint32_t subsectorDepth, bool ceiling) +{ + sector_t *frontsector = sub->sector; + + FTextureID picnum = frontsector->GetTexture(ceiling ? sector_t::ceiling : sector_t::floor); + FTexture *tex = TexMan(picnum); + if (tex->UseType == FTexture::TEX_Null) + return; + + bool isSky = picnum == skyflatnum; + double skyHeight = ceiling ? MaxCeilingHeight : MinFloorHeight; + + TriUniforms uniforms; + uniforms.objectToClip = worldToClip; + uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); + if (fixedlightlev >= 0) + uniforms.light = (uint32_t)(fixedlightlev / 255.0f * 256.0f); + else if (fixedcolormap) + uniforms.light = 256; + uniforms.flags = 0; + uniforms.subsectorDepth = isSky ? SkySubsectorDepth : subsectorDepth; + + /* + double vis = r_FloorVisibility / (plane.Zat0() - ViewPos.Z); + if (fixedlightlev >= 0) + R_SetDSColorMapLight(sector->ColorMap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + else if (fixedcolormap) + R_SetDSColorMapLight(fixedcolormap, 0, 0); + else + R_SetDSColorMapLight(sector->ColorMap, (float)(vis * fabs(CenterY - y)), LIGHT2SHADE(sector->lightlevel)); + */ + + TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); + if (!vertices) + return; + + if (ceiling) + { + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + vertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, frontsector, isSky ? skyHeight : frontsector->ceilingplane.ZatPoint(line->v1)); + } + } + else + { + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + vertices[i] = PlaneVertex(line->v1, frontsector, isSky ? skyHeight : frontsector->floorplane.ZatPoint(line->v1)); + } + } + + if (!isSky) + { + PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex, 0); + PolyTriangleDrawer::stencil(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 1); + } + else + { + PolyTriangleDrawer::stencil(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 255); + + for (uint32_t i = 0; i < sub->numlines; i++) + { + TriVertex *wallvert = PolyVertexBuffer::GetVertices(4); + if (!wallvert) + return; + + seg_t *line = &sub->firstline[i]; + + bool closedSky = false; + if (line->backsector) + { + sector_t *backsector = (line->backsector != line->frontsector) ? line->backsector : line->frontsector; + + double frontceilz1 = frontsector->ceilingplane.ZatPoint(line->v1); + double frontfloorz1 = frontsector->floorplane.ZatPoint(line->v1); + double frontceilz2 = frontsector->ceilingplane.ZatPoint(line->v2); + double frontfloorz2 = frontsector->floorplane.ZatPoint(line->v2); + + double backceilz1 = backsector->ceilingplane.ZatPoint(line->v1); + double backfloorz1 = backsector->floorplane.ZatPoint(line->v1); + double backceilz2 = backsector->ceilingplane.ZatPoint(line->v2); + double backfloorz2 = backsector->floorplane.ZatPoint(line->v2); + + double topceilz1 = frontceilz1; + double topceilz2 = frontceilz2; + double topfloorz1 = MIN(backceilz1, frontceilz1); + double topfloorz2 = MIN(backceilz2, frontceilz2); + double bottomceilz1 = MAX(frontfloorz1, backfloorz1); + double bottomceilz2 = MAX(frontfloorz2, backfloorz2); + double bottomfloorz1 = frontfloorz1; + double bottomfloorz2 = frontfloorz2; + double middleceilz1 = topfloorz1; + double middleceilz2 = topfloorz2; + double middlefloorz1 = MIN(bottomceilz1, middleceilz1); + double middlefloorz2 = MIN(bottomceilz2, middleceilz2); + + bool bothSkyCeiling = frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; + bool bothSkyFloor = frontsector->GetTexture(sector_t::floor) == skyflatnum && backsector->GetTexture(sector_t::floor) == skyflatnum; + + bool closedSector = backceilz1 == backfloorz1 && backceilz2 == backfloorz2; + closedSky = (ceiling && bothSkyCeiling && closedSector) || (!ceiling && bothSkyFloor && closedSector); + if (!closedSky) + { + bool topwall = (topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && line->sidedef && !bothSkyCeiling; + bool bottomwall = (bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef && !bothSkyFloor; + if ((ceiling && !topwall) || (!ceiling && !bottomwall)) + continue; + } + } + + if (ceiling) + { + wallvert[0] = PlaneVertex(line->v1, frontsector, skyHeight); + wallvert[1] = PlaneVertex(line->v2, frontsector, skyHeight); + if (!closedSky) + { + wallvert[2] = PlaneVertex(line->v2, frontsector, frontsector->ceilingplane.ZatPoint(line->v2)); + wallvert[3] = PlaneVertex(line->v1, frontsector, frontsector->ceilingplane.ZatPoint(line->v1)); + } + else + { + wallvert[2] = PlaneVertex(line->v2, frontsector, frontsector->floorplane.ZatPoint(line->v2)); + wallvert[3] = PlaneVertex(line->v1, frontsector, frontsector->floorplane.ZatPoint(line->v1)); + } + } + else + { + if (!closedSky) + { + wallvert[0] = PlaneVertex(line->v1, frontsector, frontsector->floorplane.ZatPoint(line->v1)); + wallvert[1] = PlaneVertex(line->v2, frontsector, frontsector->floorplane.ZatPoint(line->v2)); + } + else + { + wallvert[0] = PlaneVertex(line->v1, frontsector, frontsector->ceilingplane.ZatPoint(line->v1)); + wallvert[1] = PlaneVertex(line->v2, frontsector, frontsector->ceilingplane.ZatPoint(line->v2)); + } + wallvert[2] = PlaneVertex(line->v2, frontsector, skyHeight); + wallvert[3] = PlaneVertex(line->v1, frontsector, skyHeight); + } + + PolyTriangleDrawer::stencil(uniforms, wallvert, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 255); + } + } +} + +TriVertex RenderPolyBsp::PlaneVertex(vertex_t *v1, sector_t *sector, double height) +{ + TriVertex v; + v.x = (float)v1->fPos().X; + v.y = (float)v1->fPos().Y; + v.z = (float)height; + v.w = 1.0f; + v.varying[0] = v.x / 64.0f; + v.varying[1] = 1.0f - v.y / 64.0f; + return v; +} + SpriteRange RenderPolyBsp::GetSpritesForSector(sector_t *sector) { if (SectorSpriteRanges.size() < sector->sectornum || sector->sectornum < 0) @@ -294,25 +377,23 @@ void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector, uint32_t subsect bool bothSkyCeiling = frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; bool bothSkyFloor = frontsector->GetTexture(sector_t::floor) == skyflatnum && backsector->GetTexture(sector_t::floor) == skyflatnum; - if ((topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && line->sidedef) + if ((topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && line->sidedef && !bothSkyCeiling) { wall.SetCoords(line->v1->fPos(), line->v2->fPos(), topceilz1, topfloorz1, topceilz2, topfloorz2); wall.TopZ = topceilz1; wall.BottomZ = topfloorz1; wall.UnpeggedCeil = topceilz1; wall.Texpart = side_t::top; - wall.IsSky = bothSkyCeiling; wall.Render(worldToClip); } - if ((bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef) + if ((bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef && !bothSkyFloor) { wall.SetCoords(line->v1->fPos(), line->v2->fPos(), bottomceilz1, bottomfloorz2, bottomceilz2, bottomfloorz2); wall.TopZ = bottomceilz1; wall.BottomZ = bottomfloorz2; wall.UnpeggedCeil = topceilz1; wall.Texpart = side_t::bottom; - wall.IsSky = bothSkyFloor; wall.Render(worldToClip); } @@ -619,6 +700,8 @@ void RenderPolyBsp::RenderNode(void *node) // Mark that we need to render this subsector_t *sub = (subsector_t *)((BYTE *)node - 1); + MaxCeilingHeight = MAX(MaxCeilingHeight, sub->sector->ceilingplane.Zat0()); + MinFloorHeight = MIN(MinFloorHeight, sub->sector->floorplane.Zat0()); PvsSectors.push_back(sub); // Update culling info for further bsp clipping @@ -1092,15 +1175,10 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) uniforms.objectToClip = worldToClip; uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); uniforms.flags = 0; - uniforms.subsectorDepth = IsSky ? RenderPolyBsp::SkySubsectorDepth : SubsectorDepth; + uniforms.subsectorDepth = SubsectorDepth; - if (!IsSky) - { - PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex, 0); - PolyTriangleDrawer::stencil(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 1); - } - else - PolyTriangleDrawer::fill(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 0); + PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex, 0); + PolyTriangleDrawer::stencil(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 1); } FTexture *RenderPolyWall::GetTexture() @@ -1428,7 +1506,7 @@ void PolySkyDome::CreateDome() void PolySkyDome::RenderRow(const TriUniforms &uniforms, FTexture *skytex, int row) { - PolyTriangleDrawer::draw(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, 0, viewwidth, 0, viewheight, skytex, 0); + PolyTriangleDrawer::draw(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, 0, viewwidth, 0, viewheight, skytex, 255); } void PolySkyDome::RenderCapColorRow(const TriUniforms &uniforms, FTexture *skytex, int row, bool bottomCap) @@ -1436,7 +1514,7 @@ void PolySkyDome::RenderCapColorRow(const TriUniforms &uniforms, FTexture *skyte uint32_t solid = skytex->GetSkyCapColor(bottomCap); if (!r_swtruecolor) solid = RGB32k.RGB[(RPART(solid) >> 3)][(GPART(solid) >> 3)][(BPART(solid) >> 3)]; - PolyTriangleDrawer::fill(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, 0, viewwidth, 0, viewheight, solid, 0); + PolyTriangleDrawer::fill(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, 0, viewwidth, 0, viewheight, solid, 255); } void PolySkyDome::Render(const TriMatrix &worldToClip) diff --git a/src/r_poly.h b/src/r_poly.h index c312f313bf..55a3d16cf0 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -102,8 +102,9 @@ public: private: void RenderNode(void *node); void RenderSubsector(subsector_t *sub); + void RenderPlane(subsector_t *sub, uint32_t subsectorDepth, bool ceiling); void AddLine(seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); - TriVertex PlaneVertex(vertex_t *v1, sector_t *sector, const secplane_t &plane); + TriVertex PlaneVertex(vertex_t *v1, sector_t *sector, double height); void AddSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth); void AddWallSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth); @@ -128,6 +129,8 @@ private: std::vector PvsSectors; uint32_t NextSubsectorDepth = 0; + double MaxCeilingHeight = 0.0; + double MinFloorHeight = 0.0; TriMatrix worldToClip; @@ -179,7 +182,6 @@ public: double UnpeggedCeil = 0.0; FSWColormap *Colormap = nullptr; bool Masked = false; - bool IsSky = false; uint32_t SubsectorDepth = 0; private: From 6989b7037e6bf2ee3c92d8688005abb1a0dd527d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 11 Nov 2016 10:25:40 +0100 Subject: [PATCH 298/912] Fix stencil write bug --- src/r_poly_triangle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index b0b22f8d0f..f0a93eb8c4 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -64,7 +64,7 @@ public: if (Values[0] == value) return; - for (int i = 1; i < 8 * 8 + 4 * 4 + 2 * 2 + 1; i++) + for (int i = 1; i < 8 * 8; i++) Values[i] = Values[0]; } From 5a9d4ee9d90e8f8e7122f2171abd7c3832694952 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 11 Nov 2016 18:24:59 +0100 Subject: [PATCH 299/912] Simplify argument passing --- src/r_poly.cpp | 95 +++++++++++++++++++++++++++++++++------- src/r_poly.h | 4 +- src/r_poly_triangle.cpp | 96 +++++++++++++++++------------------------ src/r_poly_triangle.h | 64 +++++++++++++++++---------- 4 files changed, 164 insertions(+), 95 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index a58cb86a5a..c641702e1c 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -194,14 +194,29 @@ void RenderPolyBsp::RenderPlane(subsector_t *sub, uint32_t subsectorDepth, bool } } + PolyDrawArgs args; + args.uniforms = uniforms; + args.vinput = vertices; + args.vcount = sub->numlines; + args.mode = TriangleDrawMode::Fan; + args.ccw = true; + args.clipleft = 0; + args.cliptop = 0; + args.clipright = viewwidth; + args.clipbottom = viewheight; + args.stenciltestvalue = 0; + args.stencilwritevalue = 1; + if (!isSky) { - PolyTriangleDrawer::draw(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex, 0); - PolyTriangleDrawer::stencil(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 1); + args.SetTexture(tex); + PolyTriangleDrawer::draw(args, PolyDrawVariant::Draw); + PolyTriangleDrawer::draw(args, PolyDrawVariant::Stencil); } else { - PolyTriangleDrawer::stencil(uniforms, vertices, sub->numlines, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 255); + args.stencilwritevalue = 255; + PolyTriangleDrawer::draw(args, PolyDrawVariant::Stencil); for (uint32_t i = 0; i < sub->numlines; i++) { @@ -284,7 +299,9 @@ void RenderPolyBsp::RenderPlane(subsector_t *sub, uint32_t subsectorDepth, bool wallvert[3] = PlaneVertex(line->v1, frontsector, skyHeight); } - PolyTriangleDrawer::stencil(uniforms, wallvert, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 255); + args.vinput = wallvert; + args.vcount = 4; + PolyTriangleDrawer::draw(args, PolyDrawVariant::Stencil); } } } @@ -525,7 +542,21 @@ void RenderPolyBsp::AddSprite(AActor *thing, subsector_t *sub, uint32_t subsecto uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); uniforms.flags = 0; uniforms.subsectorDepth = subsectorDepth; - PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex, 0); + + PolyDrawArgs args; + args.uniforms = uniforms; + args.vinput = vertices; + args.vcount = 4; + args.mode = TriangleDrawMode::Fan; + args.ccw = true; + args.clipleft = 0; + args.cliptop = 0; + args.clipright = viewwidth; + args.clipbottom = viewheight; + args.stenciltestvalue = 0; + args.stencilwritevalue = 1; + args.SetTexture(tex); + PolyTriangleDrawer::draw(args, PolyDrawVariant::Draw); } void RenderPolyBsp::AddWallSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) @@ -1177,8 +1208,22 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) uniforms.flags = 0; uniforms.subsectorDepth = SubsectorDepth; - PolyTriangleDrawer::draw(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, tex, 0); - PolyTriangleDrawer::stencil(uniforms, vertices, 4, TriangleDrawMode::Fan, true, 0, viewwidth, 0, viewheight, 0, 1); + PolyDrawArgs args; + args.uniforms = uniforms; + args.vinput = vertices; + args.vcount = 4; + args.mode = TriangleDrawMode::Fan; + args.ccw = true; + args.clipleft = 0; + args.cliptop = 0; + args.clipright = viewwidth; + args.clipbottom = viewheight; + args.stenciltestvalue = 0; + args.stencilwritevalue = 1; + args.SetTexture(tex); + + PolyTriangleDrawer::draw(args, PolyDrawVariant::Draw); + PolyTriangleDrawer::draw(args, PolyDrawVariant::Stencil); } FTexture *RenderPolyWall::GetTexture() @@ -1504,17 +1549,27 @@ void PolySkyDome::CreateDome() mPrimStart.Push(mVertices.Size()); } -void PolySkyDome::RenderRow(const TriUniforms &uniforms, FTexture *skytex, int row) +void PolySkyDome::RenderRow(PolyDrawArgs &args, int row) { - PolyTriangleDrawer::draw(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, 0, viewwidth, 0, viewheight, skytex, 255); + args.vinput = &mVertices[mPrimStart[row]]; + args.vcount = mPrimStart[row + 1] - mPrimStart[row]; + args.mode = TriangleDrawMode::Strip; + args.ccw = false; + PolyTriangleDrawer::draw(args, PolyDrawVariant::Draw); } -void PolySkyDome::RenderCapColorRow(const TriUniforms &uniforms, FTexture *skytex, int row, bool bottomCap) +void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap) { uint32_t solid = skytex->GetSkyCapColor(bottomCap); if (!r_swtruecolor) solid = RGB32k.RGB[(RPART(solid) >> 3)][(GPART(solid) >> 3)][(BPART(solid) >> 3)]; - PolyTriangleDrawer::fill(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, 0, viewwidth, 0, viewheight, solid, 255); + + args.vinput = &mVertices[mPrimStart[row]]; + args.vcount = mPrimStart[row + 1] - mPrimStart[row]; + args.mode = TriangleDrawMode::Fan; + args.ccw = bottomCap; + args.solidcolor = solid; + PolyTriangleDrawer::draw(args, PolyDrawVariant::Fill); } void PolySkyDome::Render(const TriMatrix &worldToClip) @@ -1540,12 +1595,22 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) int rc = mRows + 1; - RenderCapColorRow(uniforms, frontskytex, 0, false); - RenderCapColorRow(uniforms, frontskytex, rc, true); + PolyDrawArgs args; + args.uniforms = uniforms; + args.clipleft = 0; + args.cliptop = 0; + args.clipright = viewwidth; + args.clipbottom = viewheight; + args.stenciltestvalue = 255; + args.stencilwritevalue = 1; + args.SetTexture(frontskytex); + + RenderCapColorRow(args, frontskytex, 0, false); + RenderCapColorRow(args, frontskytex, rc, true); for (int i = 1; i <= mRows; i++) { - RenderRow(uniforms, frontskytex, i); - RenderRow(uniforms, frontskytex, rc + i); + RenderRow(args, i); + RenderRow(args, rc + i); } } diff --git a/src/r_poly.h b/src/r_poly.h index 55a3d16cf0..18997c01d4 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -83,8 +83,8 @@ private: void SkyVertex(int r, int c, bool yflip); void CreateSkyHemisphere(bool zflip); void CreateDome(); - void RenderRow(const TriUniforms &uniforms, FTexture *skytex, int row); - void RenderCapColorRow(const TriUniforms &uniforms, FTexture *skytex, int row, bool bottomCap); + void RenderRow(PolyDrawArgs &args, int row); + void RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap); TriVertex SetVertex(float xx, float yy, float zz, float uu = 0, float vv = 0); TriVertex SetVertexXYZ(float xx, float yy, float zz, float uu = 0, float vv = 0); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 57165736dd..075b4432fe 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -40,90 +40,79 @@ #include #endif -void PolyTriangleDrawer::draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture, int stenciltestvalue) +void PolyTriangleDrawer::draw(const PolyDrawArgs &args, PolyDrawVariant variant) { if (r_swtruecolor) - queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, (const uint8_t*)texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight(), 0, stenciltestvalue, stenciltestvalue); + DrawerCommandQueue::QueueCommand(args, variant); else - draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture->GetPixels(), texture->GetWidth(), texture->GetHeight(), 0, stenciltestvalue, stenciltestvalue, nullptr, &ScreenPolyTriangleDrawer::draw); + draw_arrays(args, variant, nullptr); } -void PolyTriangleDrawer::fill(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int solidcolor, int stenciltestvalue) +void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, PolyDrawVariant variant, DrawerThread *thread) { - if (r_swtruecolor) - queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor, stenciltestvalue, stenciltestvalue); - else - draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor, stenciltestvalue, stenciltestvalue, nullptr, &ScreenPolyTriangleDrawer::fill); -} - -void PolyTriangleDrawer::stencil(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int stenciltestvalue, int stencilwritevalue) -{ - if (r_swtruecolor) - queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, 0xbeef, stenciltestvalue, stencilwritevalue); - else - draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, 0, stenciltestvalue, stencilwritevalue, nullptr, &ScreenPolyTriangleDrawer::stencil); -} - -void PolyTriangleDrawer::queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, int stenciltestvalue, int stencilwritevalue) -{ - if (clipright < clipleft || clipleft < 0 || clipright > MAXWIDTH || clipbottom < cliptop || cliptop < 0 || clipbottom > MAXHEIGHT) + if (drawargs.vcount < 3) return; - DrawerCommandQueue::QueueCommand(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texturePixels, textureWidth, textureHeight, solidcolor, stenciltestvalue, stencilwritevalue); -} - -void PolyTriangleDrawer::draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, int stenciltestvalue, int stencilwritevalue, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)) -{ - if (vcount < 3) - return; + void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *); + switch (variant) + { + default: + case PolyDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break; + case PolyDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break; + case PolyDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break; + } ScreenPolyTriangleDrawerArgs args; args.dest = dc_destorg; args.pitch = dc_pitch; - args.clipleft = clipleft; - args.clipright = clipright; - args.cliptop = cliptop; - args.clipbottom = clipbottom; - args.texturePixels = texturePixels; - args.textureWidth = textureWidth; - args.textureHeight = textureHeight; - args.solidcolor = solidcolor; - args.uniforms = &uniforms; - args.stencilTestValue = stenciltestvalue; - args.stencilWriteValue = stencilwritevalue; + args.clipleft = drawargs.clipleft; + args.clipright = drawargs.clipright; + args.cliptop = drawargs.cliptop; + args.clipbottom = drawargs.clipbottom; + args.texturePixels = drawargs.texturePixels; + args.textureWidth = drawargs.textureWidth; + args.textureHeight = drawargs.textureHeight; + args.solidcolor = drawargs.solidcolor; + args.uniforms = &drawargs.uniforms; + args.stencilTestValue = drawargs.stenciltestvalue; + args.stencilWriteValue = drawargs.stencilwritevalue; args.stencilPitch = PolyStencilBuffer::Instance()->BlockWidth(); args.stencilValues = PolyStencilBuffer::Instance()->Values(); args.stencilMasks = PolyStencilBuffer::Instance()->Masks(); args.subsectorGBuffer = PolySubsectorGBuffer::Instance()->Values(); + bool ccw = drawargs.ccw; + const TriVertex *vinput = drawargs.vinput; + int vcount = drawargs.vcount; + TriVertex vert[3]; - if (mode == TriangleDrawMode::Normal) + if (drawargs.mode == TriangleDrawMode::Normal) { for (int i = 0; i < vcount / 3; i++) { for (int j = 0; j < 3; j++) - vert[j] = shade_vertex(uniforms, *(vinput++)); + vert[j] = shade_vertex(drawargs.uniforms, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); } } - else if (mode == TriangleDrawMode::Fan) + else if (drawargs.mode == TriangleDrawMode::Fan) { - vert[0] = shade_vertex(uniforms, *(vinput++)); - vert[1] = shade_vertex(uniforms, *(vinput++)); + vert[0] = shade_vertex(drawargs.uniforms, *(vinput++)); + vert[1] = shade_vertex(drawargs.uniforms, *(vinput++)); for (int i = 2; i < vcount; i++) { - vert[2] = shade_vertex(uniforms, *(vinput++)); + vert[2] = shade_vertex(drawargs.uniforms, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); vert[1] = vert[2]; } } else // TriangleDrawMode::Strip { - vert[0] = shade_vertex(uniforms, *(vinput++)); - vert[1] = shade_vertex(uniforms, *(vinput++)); + vert[0] = shade_vertex(drawargs.uniforms, *(vinput++)); + vert[1] = shade_vertex(drawargs.uniforms, *(vinput++)); for (int i = 2; i < vcount; i++) { - vert[2] = shade_vertex(uniforms, *(vinput++)); + vert[2] = shade_vertex(drawargs.uniforms, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); vert[0] = vert[1]; vert[1] = vert[2]; @@ -1324,19 +1313,14 @@ float ScreenPolyTriangleDrawer::grady(float x0, float y0, float x1, float y1, fl ///////////////////////////////////////////////////////////////////////////// -DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, int stenciltestvalue, int stencilwritevalue) - : uniforms(uniforms), vinput(vinput), vcount(vcount), mode(mode), ccw(ccw), clipleft(clipleft), clipright(clipright), cliptop(cliptop), clipbottom(clipbottom), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight), solidcolor(solidcolor), stenciltestvalue(stenciltestvalue), stencilwritevalue(stencilwritevalue) +DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, PolyDrawVariant variant) + : args(args), variant(variant) { } void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) { - PolyTriangleDrawer::draw_arrays( - uniforms, vinput, vcount, mode, ccw, - clipleft, clipright, cliptop, clipbottom, - texturePixels, textureWidth, textureHeight, solidcolor, - stenciltestvalue, stencilwritevalue, - thread, texturePixels ? ScreenPolyTriangleDrawer::draw32 : solidcolor != 0xbeef ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::stencil); + PolyTriangleDrawer::draw_arrays(args, variant, thread); } FString DrawPolyTrianglesCommand::DebugInfo() diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index f0a93eb8c4..d6e1d33413 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -28,22 +28,55 @@ struct ScreenPolyTriangleDrawerArgs; +enum class PolyDrawVariant +{ + Draw, + Fill, + Stencil +}; + +class PolyDrawArgs +{ +public: + TriUniforms uniforms; + const TriVertex *vinput = nullptr; + int vcount = 0; + TriangleDrawMode mode = TriangleDrawMode::Normal; + bool ccw = false; + int clipleft = 0; + int clipright = 0; + int cliptop = 0; + int clipbottom = 0; + const uint8_t *texturePixels = nullptr; + int textureWidth = 0; + int textureHeight = 0; + int solidcolor = 0; + int stenciltestvalue = 0; + int stencilwritevalue = 0; + + void SetTexture(FTexture *texture) + { + textureWidth = texture->GetWidth(); + textureHeight = texture->GetHeight(); + if (r_swtruecolor) + texturePixels = (const uint8_t *)texture->GetPixelsBgra(); + else + texturePixels = texture->GetPixels(); + } +}; + class PolyTriangleDrawer { public: - static void draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, FTexture *texture, int stenciltestvalue); - static void fill(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int solidcolor, int stenciltestvalue); - static void stencil(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, int stenciltestvalue, int stencilwritevalue); + static void draw(const PolyDrawArgs &args, PolyDrawVariant variant); private: static TriVertex shade_vertex(const TriUniforms &uniforms, TriVertex v); - static void draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, int stenciltestvalue, int stencilwritevalue, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); + static void draw_arrays(const PolyDrawArgs &args, PolyDrawVariant variant, DrawerThread *thread); static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert); - static void queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, int stenciltestvalue, int stencilwritevalue); - enum { max_additional_vertices = 16 }; friend class DrawPolyTrianglesCommand; @@ -234,27 +267,14 @@ private: class DrawPolyTrianglesCommand : public DrawerCommand { public: - DrawPolyTrianglesCommand(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, int cliptop, int clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, int stenciltestvalue, int stencilwritevalue); + DrawPolyTrianglesCommand(const PolyDrawArgs &args, PolyDrawVariant variant); void Execute(DrawerThread *thread) override; FString DebugInfo() override; private: - TriUniforms uniforms; - const TriVertex *vinput; - int vcount; - TriangleDrawMode mode; - bool ccw; - int clipleft; - int clipright; - int cliptop; - int clipbottom; - const uint8_t *texturePixels; - int textureWidth; - int textureHeight; - int solidcolor; - int stenciltestvalue; - int stencilwritevalue; + PolyDrawArgs args; + PolyDrawVariant variant; }; #endif From 3cc5cec5a0e61df82d618361be31d7dc9a06fb27 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 11 Nov 2016 19:26:28 +0100 Subject: [PATCH 300/912] Hook up subsector gbuffer --- src/r_poly.cpp | 33 +++-- src/r_poly.h | 13 +- src/r_poly_triangle.cpp | 287 +++++++++++++++++++++++++++++++++++++++- src/r_poly_triangle.h | 12 +- 4 files changed, 327 insertions(+), 18 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index c641702e1c..94805c9c0c 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -50,6 +50,7 @@ void RenderPolyBsp::Render() SectorSpriteRanges.clear(); SectorSpriteRanges.resize(numsectors); SortedSprites.clear(); + SubsectoredSprites.clear(); PvsSectors.clear(); ScreenSprites.clear(); PolyStencilBuffer::Instance()->Clear(viewwidth, viewheight, 0); @@ -99,6 +100,8 @@ void RenderPolyBsp::Render() skydome.Render(worldToClip); + RenderSprites(); + RenderPlayerSprites(); DrawerCommandQueue::WaitForWorkers(); RenderScreenSprites(); // To do: should be called by FSoftwareRenderer::DrawRemainingPlayerSprites instead of here @@ -117,6 +120,12 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) uint32_t subsectorDepth = NextSubsectorDepth++; + if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) + { + RenderPlane(sub, subsectorDepth, true); + RenderPlane(sub, subsectorDepth, false); + } + for (uint32_t i = 0; i < sub->numlines; i++) { seg_t *line = &sub->firstline[i]; @@ -124,20 +133,23 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) AddLine(line, frontsector, subsectorDepth); } - if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) - { - RenderPlane(sub, subsectorDepth, true); - RenderPlane(sub, subsectorDepth, false); - } - SpriteRange sprites = GetSpritesForSector(sub->sector); for (int i = 0; i < sprites.Count; i++) { AActor *thing = SortedSprites[sprites.Start + i].Thing; - if ((thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) - AddWallSprite(thing, sub, subsectorDepth); + SubsectoredSprites.push_back({ thing, sub, subsectorDepth }); + } +} + +void RenderPolyBsp::RenderSprites() +{ + for (auto it = SubsectoredSprites.rbegin(); it != SubsectoredSprites.rend(); ++it) + { + auto &spr = *it; + if ((spr.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) + AddWallSprite(spr.thing, spr.sub, spr.subsectorDepth); else - AddSprite(thing, sub, subsectorDepth); + AddSprite(spr.thing, spr.sub, spr.subsectorDepth); } } @@ -556,7 +568,7 @@ void RenderPolyBsp::AddSprite(AActor *thing, subsector_t *sub, uint32_t subsecto args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); - PolyTriangleDrawer::draw(args, PolyDrawVariant::Draw); + PolyTriangleDrawer::draw(args, PolyDrawVariant::DrawSubsector); } void RenderPolyBsp::AddWallSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) @@ -1592,6 +1604,7 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) uniforms.objectToClip = worldToClip * objectToWorld; uniforms.light = 256; uniforms.flags = 0; + uniforms.subsectorDepth = RenderPolyBsp::SkySubsectorDepth; int rc = mRows + 1; diff --git a/src/r_poly.h b/src/r_poly.h index 18997c01d4..b957aad593 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -54,12 +54,21 @@ class PolySortedSprite { public: PolySortedSprite(AActor *thing, double distanceSquared) : Thing(thing), DistanceSquared(distanceSquared) { } - bool operator<(const PolySortedSprite &other) const { return DistanceSquared > other.DistanceSquared; } + bool operator<(const PolySortedSprite &other) const { return DistanceSquared < other.DistanceSquared; } AActor *Thing; double DistanceSquared; }; +class PolySubsectoredSprite +{ +public: + PolySubsectoredSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) : thing(thing), sub(sub), subsectorDepth(subsectorDepth) { } + AActor *thing; + subsector_t *sub; + uint32_t subsectorDepth; +}; + class SpriteRange { public: @@ -106,6 +115,7 @@ private: void AddLine(seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); TriVertex PlaneVertex(vertex_t *v1, sector_t *sector, double height); + void RenderSprites(); void AddSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth); void AddWallSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth); bool IsThingCulled(AActor *thing); @@ -136,6 +146,7 @@ private: std::vector SectorSpriteRanges; std::vector SortedSprites; + std::vector SubsectoredSprites; std::vector ScreenSprites; diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 075b4432fe..fb1874be29 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -59,6 +59,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, PolyDrawVaria default: case PolyDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break; case PolyDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break; + case PolyDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::drawsubsector32 : ScreenPolyTriangleDrawer::draw; break; case PolyDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break; } @@ -846,11 +847,13 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, const uint32_t *texturePixels = (const uint32_t *)args->texturePixels; int textureWidth = args->textureWidth; int textureHeight = args->textureHeight; - uint32_t light = args->uniforms->light; uint8_t *stencilValues = args->stencilValues; uint32_t *stencilMasks = args->stencilMasks; int stencilPitch = args->stencilPitch; uint8_t stencilTestValue = args->stencilTestValue; + uint32_t light = args->uniforms->light; + uint32_t subsector = args->uniforms->subsectorDepth; + uint32_t *subsectorGBuffer = args->subsectorGBuffer; // 28.4 fixed-point coordinates const int Y1 = (int)round(16.0f * v1.y); @@ -895,6 +898,7 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, miny &= ~(q - 1); dest += miny * pitch; + subsectorGBuffer += miny * pitch; // Half-edge constants int C1 = DY12 * X1 - DX12 * Y1; @@ -919,7 +923,7 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, } // Loop through blocks - for (int y = miny; y < maxy; y += q, dest += q * pitch) + for (int y = miny; y < maxy; y += q, dest += q * pitch, subsectorGBuffer += q * pitch) { // Is this row of blocks done by this thread? if (thread->skipped_by_thread(y / q)) continue; @@ -989,6 +993,7 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, #endif uint32_t *buffer = dest; + uint32_t *subsectorbuffer = subsectorGBuffer; PolyStencilBlock stencil(x / 8 + y / 8 * stencilPitch, stencilValues, stencilMasks); @@ -1028,7 +1033,10 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, uint32_t fg_alpha = APART(fg); if (fg_alpha > 127) + { buffer[ix] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; + subsectorbuffer[ix] = subsector; + } for (int i = 0; i < TriVertex::NumVarying; i++) varying[i] += varyingStep[i]; @@ -1059,10 +1067,14 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, __m128i mmask1 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(mfg1, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); __m128i mmask = _mm_cmplt_epi8(_mm_packus_epi16(mmask0, mmask1), _mm_setzero_si128()); _mm_maskmoveu_si128(mout, mmask, (char*)(&buffer[x + sse * 4])); + + __m128i msubsector = _mm_set1_epi32(subsector); + _mm_maskmoveu_si128(msubsector, mmask, (char*)(&subsectorbuffer[x + sse * 4])); } #endif buffer += pitch; + subsectorbuffer += pitch; } } else // Partially covered block @@ -1106,6 +1118,276 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, uint32_t fg_blue = (BPART(fg) * diminishedlight) >> 8; uint32_t fg_alpha = APART(fg); + if (fg_alpha > 127) + { + buffer[ix + x] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; + subsectorbuffer[ix + x] = subsector; + } + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + buffer += pitch; + subsectorbuffer += pitch; + } + } + } + } +} + +void ScreenPolyTriangleDrawer::drawsubsector32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) +{ + uint32_t *dest = (uint32_t *)args->dest; + int pitch = args->pitch; + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipleft = args->clipleft; + int clipright = args->clipright; + int cliptop = args->cliptop; + int clipbottom = args->clipbottom; + const uint32_t *texturePixels = (const uint32_t *)args->texturePixels; + int textureWidth = args->textureWidth; + int textureHeight = args->textureHeight; + uint32_t light = args->uniforms->light; + uint32_t subsector = args->uniforms->subsectorDepth; + uint32_t *subsectorGBuffer = args->subsectorGBuffer; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + dest += miny * pitch; + subsectorGBuffer += miny * pitch; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // Gradients + float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); + float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); + } + + // Loop through blocks + for (int y = miny; y < maxy; y += q, dest += q * pitch, subsectorGBuffer += q * pitch) + { + // Is this row of blocks done by this thread? + if (thread->skipped_by_thread(y / q)) continue; + + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Check if block needs clipping + bool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); + + // Calculate varying variables for affine block + float offx0 = (x - minx) + 0.5f; + float offy0 = (y - miny) + 0.5f; + float offx1 = offx0 + q; + float offy1 = offy0 + q; + float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); + float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); + float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); + float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); + float varyingTL[TriVertex::NumVarying]; + float varyingTR[TriVertex::NumVarying]; + float varyingBL[TriVertex::NumVarying]; + float varyingBR[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; + varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; + varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); + varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); + } + + float globVis = 1706.0f; + float vis = globVis / rcpWTL; + float shade = 64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f; + float lightscale = clamp((shade - MIN(24.0f, vis)) / 32.0f, 0.0f, 31.0f / 32.0f); + int diminishedlight = (int)clamp((1.0f - lightscale) * 256.0f + 0.5f, 0.0f, 256.0f); + +#if !defined(NO_SSE) + __m128i mlight = _mm_set1_epi16(diminishedlight); +#endif + + uint32_t *buffer = dest; + uint32_t *subsectorbuffer = subsectorGBuffer; + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF && !clipneeded) + { + for (int iy = 0; iy < q; iy++) + { + uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + float pos = varyingTL[i] + varyingBL[i] * iy; + float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); + + varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); + varyingStep[i] = (uint32_t)(step * 0x100000000LL); + } + + for (int ix = x; ix < x + q; ix++) + { + if (subsectorbuffer[ix] >= subsector) + { + uint32_t ufrac = varying[0]; + uint32_t vfrac = varying[1]; + + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + + uint32_t fg = texturePixels[uvoffset]; + uint32_t fg_red = (RPART(fg) * diminishedlight) >> 8; + uint32_t fg_green = (GPART(fg) * diminishedlight) >> 8; + uint32_t fg_blue = (BPART(fg) * diminishedlight) >> 8; + uint32_t fg_alpha = APART(fg); + + if (fg_alpha > 127) + buffer[ix] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] += varyingStep[i]; + } + + buffer += pitch; + subsectorbuffer += pitch; + } + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + float pos = varyingTL[i] + varyingBL[i] * iy; + float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); + + varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); + varyingStep[i] = (uint32_t)(step * 0x100000000LL); + } + + for (int ix = 0; ix < q; ix++) + { + bool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); + + if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible && subsectorbuffer[ix + x] >= subsector) + { + uint32_t ufrac = varying[0]; + uint32_t vfrac = varying[1]; + + uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; + uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; + uint32_t uvoffset = upos * textureHeight + vpos; + + uint32_t fg = texturePixels[uvoffset]; + uint32_t fg_red = (RPART(fg) * diminishedlight) >> 8; + uint32_t fg_green = (GPART(fg) * diminishedlight) >> 8; + uint32_t fg_blue = (BPART(fg) * diminishedlight) >> 8; + uint32_t fg_alpha = APART(fg); + if (fg_alpha > 127) buffer[ix + x] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; } @@ -1123,6 +1405,7 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, CY3 += FDX31; buffer += pitch; + subsectorbuffer += pitch; } } } diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index d6e1d33413..0861e7186e 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -32,7 +32,8 @@ enum class PolyDrawVariant { Draw, Fill, - Stencil + DrawSubsector, + Stencil, }; class PolyDrawArgs @@ -50,9 +51,9 @@ public: const uint8_t *texturePixels = nullptr; int textureWidth = 0; int textureHeight = 0; - int solidcolor = 0; - int stenciltestvalue = 0; - int stencilwritevalue = 0; + uint32_t solidcolor = 0; + uint8_t stenciltestvalue = 0; + uint8_t stencilwritevalue = 0; void SetTexture(FTexture *texture) { @@ -238,7 +239,7 @@ struct ScreenPolyTriangleDrawerArgs const uint8_t *texturePixels; int textureWidth; int textureHeight; - int solidcolor; + uint32_t solidcolor; const TriUniforms *uniforms; uint8_t *stencilValues; uint32_t *stencilMasks; @@ -257,6 +258,7 @@ public: static void stencil(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); static void draw32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); + static void drawsubsector32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); static void fill32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); private: From 09384208716417dd4796b935c0620a2c12b0584c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 11 Nov 2016 19:54:27 +0100 Subject: [PATCH 301/912] Make triangle drawers compatible with LLVMDrawers --- src/r_compiler/llvmdrawers.h | 79 ++++++++++++++++++++++++++++++++++++ src/r_poly_triangle.cpp | 39 ++++++++++-------- src/r_poly_triangle.h | 40 ++++-------------- src/r_triangle.cpp | 29 ++++++++----- src/r_triangle.h | 68 ++++--------------------------- 5 files changed, 135 insertions(+), 120 deletions(-) diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index b2978cabfa..abf9598e3d 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -181,6 +181,85 @@ struct DrawSkyArgs } }; +struct TriVertex +{ + TriVertex() { } + TriVertex(float x, float y, float z, float w, float u, float v) : x(x), y(y), z(z), w(w) { varying[0] = u; varying[1] = v; } + + enum { NumVarying = 2 }; + float x, y, z, w; + float varying[NumVarying]; +}; + +struct TriMatrix +{ + static TriMatrix null(); + static TriMatrix identity(); + static TriMatrix translate(float x, float y, float z); + static TriMatrix scale(float x, float y, float z); + static TriMatrix rotate(float angle, float x, float y, float z); + static TriMatrix swapYZ(); + static TriMatrix perspective(float fovy, float aspect, float near, float far); + static TriMatrix frustum(float left, float right, float bottom, float top, float near, float far); + + static TriMatrix worldToView(); // Software renderer world to view space transform + static TriMatrix viewToClip(); // Software renderer shearing projection + + TriVertex operator*(TriVertex v) const; + TriMatrix operator*(const TriMatrix &m) const; + + float matrix[16]; +}; + +struct TriUniforms +{ + uint32_t light; + uint32_t subsectorDepth; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1, + nearest_filter = 2, + diminishing_lighting = 4 + }; + + TriMatrix objectToClip; +}; + +struct ScreenPolyTriangleDrawerArgs +{ + uint8_t *dest; + int pitch; + TriVertex *v1; + TriVertex *v2; + TriVertex *v3; + int clipleft; + int clipright; + int cliptop; + int clipbottom; + const uint8_t *texturePixels; + int textureWidth; + int textureHeight; + uint32_t solidcolor; + const TriUniforms *uniforms; + uint8_t *stencilValues; + uint32_t *stencilMasks; + int stencilPitch; + uint8_t stencilTestValue; + uint8_t stencilWriteValue; + uint32_t *subsectorGBuffer; +}; + class LLVMDrawers { public: diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index fb1874be29..181b011a4a 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -48,12 +48,12 @@ void PolyTriangleDrawer::draw(const PolyDrawArgs &args, PolyDrawVariant variant) draw_arrays(args, variant, nullptr); } -void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, PolyDrawVariant variant, DrawerThread *thread) +void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, PolyDrawVariant variant, WorkerThreadData *thread) { if (drawargs.vcount < 3) return; - void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *); + void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, WorkerThreadData *); switch (variant) { default: @@ -128,7 +128,7 @@ TriVertex PolyTriangleDrawer::shade_vertex(const TriUniforms &uniforms, TriVerte return uniforms.objectToClip * v; } -void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)) +void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, WorkerThreadData *)) { // Cull, clip and generate additional vertices as needed TriVertex clippedvert[max_additional_vertices]; @@ -288,7 +288,7 @@ void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert ///////////////////////////////////////////////////////////////////////////// -void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) +void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread) { uint8_t *dest = args->dest; int pitch = args->pitch; @@ -373,7 +373,7 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Dr for (int y = miny; y < maxy; y += q, dest += q * pitch) { // Is this row of blocks done by this thread? - if (thread && thread->skipped_by_thread(y / q)) continue; + if (thread && ((y / q) % thread->num_cores != thread->core)) continue; for (int x = minx; x < maxx; x += q) { @@ -523,7 +523,7 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Dr } } -void ScreenPolyTriangleDrawer::fill(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) +void ScreenPolyTriangleDrawer::fill(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread) { uint8_t *dest = args->dest; int pitch = args->pitch; @@ -594,7 +594,7 @@ void ScreenPolyTriangleDrawer::fill(const ScreenPolyTriangleDrawerArgs *args, Dr for (int y = miny; y < maxy; y += q, dest += q * pitch) { // Is this row of blocks done by this thread? - if (thread && thread->skipped_by_thread(y / q)) continue; + if (thread && ((y / q) % thread->num_cores != thread->core)) continue; for (int x = minx; x < maxx; x += q) { @@ -681,7 +681,7 @@ void ScreenPolyTriangleDrawer::fill(const ScreenPolyTriangleDrawerArgs *args, Dr } } -void ScreenPolyTriangleDrawer::stencil(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) +void ScreenPolyTriangleDrawer::stencil(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread) { const TriVertex &v1 = *args->v1; const TriVertex &v2 = *args->v2; @@ -753,7 +753,7 @@ void ScreenPolyTriangleDrawer::stencil(const ScreenPolyTriangleDrawerArgs *args, for (int y = miny; y < maxy; y += q) { // Is this row of blocks done by this thread? - if (thread && thread->skipped_by_thread(y / q)) continue; + if (thread && ((y / q) % thread->num_cores != thread->core)) continue; for (int x = minx; x < maxx; x += q) { @@ -833,7 +833,7 @@ void ScreenPolyTriangleDrawer::stencil(const ScreenPolyTriangleDrawerArgs *args, } } -void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) +void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread) { uint32_t *dest = (uint32_t *)args->dest; int pitch = args->pitch; @@ -926,7 +926,7 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, for (int y = miny; y < maxy; y += q, dest += q * pitch, subsectorGBuffer += q * pitch) { // Is this row of blocks done by this thread? - if (thread->skipped_by_thread(y / q)) continue; + if ((y / q) % thread->num_cores != thread->core) continue; for (int x = minx; x < maxx; x += q) { @@ -1145,7 +1145,7 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, } } -void ScreenPolyTriangleDrawer::drawsubsector32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) +void ScreenPolyTriangleDrawer::drawsubsector32(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread) { uint32_t *dest = (uint32_t *)args->dest; int pitch = args->pitch; @@ -1234,7 +1234,7 @@ void ScreenPolyTriangleDrawer::drawsubsector32(const ScreenPolyTriangleDrawerArg for (int y = miny; y < maxy; y += q, dest += q * pitch, subsectorGBuffer += q * pitch) { // Is this row of blocks done by this thread? - if (thread->skipped_by_thread(y / q)) continue; + if ((y / q) % thread->num_cores != thread->core) continue; for (int x = minx; x < maxx; x += q) { @@ -1412,7 +1412,7 @@ void ScreenPolyTriangleDrawer::drawsubsector32(const ScreenPolyTriangleDrawerArg } } -void ScreenPolyTriangleDrawer::fill32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread) +void ScreenPolyTriangleDrawer::fill32(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread) { uint32_t *dest = (uint32_t *)args->dest; int pitch = args->pitch; @@ -1487,7 +1487,7 @@ void ScreenPolyTriangleDrawer::fill32(const ScreenPolyTriangleDrawerArgs *args, for (int y = miny; y < maxy; y += q, dest += q * pitch) { // Is this row of blocks done by this thread? - if (thread->skipped_by_thread(y / q)) continue; + if ((y / q) % thread->num_cores != thread->core) continue; for (int x = minx; x < maxx; x += q) { @@ -1603,7 +1603,14 @@ DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, Pol void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) { - PolyTriangleDrawer::draw_arrays(args, variant, thread); + WorkerThreadData thread_data; + thread_data.core = thread->core; + thread_data.num_cores = thread->num_cores; + thread_data.pass_start_y = thread->pass_start_y; + thread_data.pass_end_y = thread->pass_end_y; + thread_data.temp = thread->dc_temp_rgba; + + PolyTriangleDrawer::draw_arrays(args, variant, &thread_data); } FString DrawPolyTrianglesCommand::DebugInfo() diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 0861e7186e..f0ffd18655 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -73,8 +73,8 @@ public: private: static TriVertex shade_vertex(const TriUniforms &uniforms, TriVertex v); - static void draw_arrays(const PolyDrawArgs &args, PolyDrawVariant variant, DrawerThread *thread); - static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, DrawerThread *)); + static void draw_arrays(const PolyDrawArgs &args, PolyDrawVariant variant, WorkerThreadData *thread); + static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, WorkerThreadData *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert); @@ -225,41 +225,17 @@ private: std::vector masks; }; -struct ScreenPolyTriangleDrawerArgs -{ - uint8_t *dest; - int pitch; - TriVertex *v1; - TriVertex *v2; - TriVertex *v3; - int clipleft; - int clipright; - int cliptop; - int clipbottom; - const uint8_t *texturePixels; - int textureWidth; - int textureHeight; - uint32_t solidcolor; - const TriUniforms *uniforms; - uint8_t *stencilValues; - uint32_t *stencilMasks; - int stencilPitch; - uint8_t stencilTestValue; - uint8_t stencilWriteValue; - uint32_t *subsectorGBuffer; -}; - class ScreenPolyTriangleDrawer { public: - static void draw(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); - static void fill(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); + static void draw(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread); + static void fill(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread); - static void stencil(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); + static void stencil(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread); - static void draw32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); - static void drawsubsector32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); - static void fill32(const ScreenPolyTriangleDrawerArgs *args, DrawerThread *thread); + static void draw32(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread); + static void drawsubsector32(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread); + static void fill32(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread); private: static float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp index ff35cc7dae..13a8ca292a 100644 --- a/src/r_triangle.cpp +++ b/src/r_triangle.cpp @@ -75,7 +75,7 @@ void TriangleDrawer::queue_arrays(const TriUniforms &uniforms, const TriVertex * DrawerCommandQueue::QueueCommand(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, clipdata, texturePixels, textureWidth, textureHeight, solidcolor); } -void TriangleDrawer::draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)) +void TriangleDrawer::draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, WorkerThreadData *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, WorkerThreadData *)) { if (vcount < 3) return; @@ -135,7 +135,7 @@ TriVertex TriangleDrawer::shade_vertex(const TriUniforms &uniforms, TriVertex v) return uniforms.objectToClip * v; } -void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)) +void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenTriangleDrawerArgs *args, WorkerThreadData *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, WorkerThreadData *)) { // Cull, clip and generate additional vertices as needed TriVertex clippedvert[max_additional_vertices]; @@ -295,7 +295,7 @@ void TriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert, in ///////////////////////////////////////////////////////////////////////////// -void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args, DrawerThread *thread) +void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread) { uint8_t *dest = args->dest; int pitch = args->pitch; @@ -538,7 +538,7 @@ void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args, DrawerThre } } -void ScreenTriangleDrawer::fill(const ScreenTriangleDrawerArgs *args, DrawerThread *thread) +void ScreenTriangleDrawer::fill(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread) { uint8_t *dest = args->dest; int pitch = args->pitch; @@ -706,7 +706,7 @@ void ScreenTriangleDrawer::fill(const ScreenTriangleDrawerArgs *args, DrawerThre } } -void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, DrawerThread *thread) +void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread) { uint32_t *dest = (uint32_t *)args->dest; int pitch = args->pitch; @@ -873,7 +873,7 @@ void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, DrawerTh varyingStep[i] = (uint32_t)(step * 0x100000000LL); } - if (!thread->skipped_by_thread(y + iy)) + if ((y + iy) % thread->num_cores == thread->core) { for (int ix = x; ix < x + q; ix++) { @@ -920,7 +920,7 @@ void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, DrawerTh varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); } - if (!thread->skipped_by_thread(y + iy)) + if ((y + iy) % thread->num_cores == thread->core) { for (int ix = x; ix < x + q; ix++) { @@ -967,7 +967,7 @@ void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, DrawerTh } } -void ScreenTriangleDrawer::fill32(const ScreenTriangleDrawerArgs *args, DrawerThread *thread) +void ScreenTriangleDrawer::fill32(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread) { uint32_t *dest = (uint32_t *)args->dest; int pitch = args->pitch; @@ -1088,7 +1088,7 @@ void ScreenTriangleDrawer::fill32(const ScreenTriangleDrawerArgs *args, DrawerTh { for (int iy = 0; iy < q; iy++) { - if (!thread->skipped_by_thread(y + iy)) + if ((y + iy) % thread->num_cores == thread->core) { for (int ix = x; ix < x + q; ix++) { @@ -1111,7 +1111,7 @@ void ScreenTriangleDrawer::fill32(const ScreenTriangleDrawerArgs *args, DrawerTh int CX2 = CY2; int CX3 = CY3; - if (!thread->skipped_by_thread(y + iy)) + if ((y + iy) % thread->num_cores == thread->core) { for (int ix = x; ix < x + q; ix++) { @@ -1318,11 +1318,18 @@ void DrawTrianglesCommand::Execute(DrawerThread *thread) thread->triangle_clip_bottom[clipleft + i] = clipdata[cliplength + i]; } + WorkerThreadData thread_data; + thread_data.core = thread->core; + thread_data.num_cores = thread->num_cores; + thread_data.pass_start_y = thread->pass_start_y; + thread_data.pass_end_y = thread->pass_end_y; + thread_data.temp = thread->dc_temp_rgba; + TriangleDrawer::draw_arrays( uniforms, vinput, vcount, mode, ccw, clipleft, clipright, thread->triangle_clip_top, thread->triangle_clip_bottom, texturePixels, textureWidth, textureHeight, solidcolor, - thread, texturePixels ? ScreenTriangleDrawer::draw32 : ScreenTriangleDrawer::fill32); + &thread_data, texturePixels ? ScreenTriangleDrawer::draw32 : ScreenTriangleDrawer::fill32); } FString DrawTrianglesCommand::DebugInfo() diff --git a/src/r_triangle.h b/src/r_triangle.h index a3bdca5ed0..6ec413d3f2 100644 --- a/src/r_triangle.h +++ b/src/r_triangle.h @@ -26,65 +26,11 @@ #include "r_draw.h" #include "r_thread.h" +#include "r_compiler/llvmdrawers.h" class FTexture; struct ScreenTriangleDrawerArgs; -struct TriVertex -{ - TriVertex() { } - TriVertex(float x, float y, float z, float w, float u, float v) : x(x), y(y), z(z), w(w) { varying[0] = u; varying[1] = v; } - - enum { NumVarying = 2 }; - float x, y, z, w; - float varying[NumVarying]; -}; - -struct TriMatrix -{ - static TriMatrix null(); - static TriMatrix identity(); - static TriMatrix translate(float x, float y, float z); - static TriMatrix scale(float x, float y, float z); - static TriMatrix rotate(float angle, float x, float y, float z); - static TriMatrix swapYZ(); - static TriMatrix perspective(float fovy, float aspect, float near, float far); - static TriMatrix frustum(float left, float right, float bottom, float top, float near, float far); - - static TriMatrix worldToView(); // Software renderer world to view space transform - static TriMatrix viewToClip(); // Software renderer shearing projection - - TriVertex operator*(TriVertex v) const; - TriMatrix operator*(const TriMatrix &m) const; - - float matrix[16]; -}; - -struct TriUniforms -{ - uint32_t light; - uint32_t subsectorDepth; - - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - uint32_t flags; - enum Flags - { - simple_shade = 1, - nearest_filter = 2, - diminishing_lighting = 4 - }; - - TriMatrix objectToClip; -}; - enum class TriangleDrawMode { Normal, @@ -100,8 +46,8 @@ public: private: static TriVertex shade_vertex(const TriUniforms &uniforms, TriVertex v); - static void draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)); - static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenTriangleDrawerArgs *args, DrawerThread *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, DrawerThread *)); + static void draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, WorkerThreadData *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, WorkerThreadData *)); + static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenTriangleDrawerArgs *args, WorkerThreadData *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, WorkerThreadData *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert); @@ -133,11 +79,11 @@ struct ScreenTriangleDrawerArgs class ScreenTriangleDrawer { public: - static void draw(const ScreenTriangleDrawerArgs *args, DrawerThread *thread); - static void fill(const ScreenTriangleDrawerArgs *args, DrawerThread *thread); + static void draw(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread); + static void fill(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread); - static void draw32(const ScreenTriangleDrawerArgs *args, DrawerThread *thread); - static void fill32(const ScreenTriangleDrawerArgs *args, DrawerThread *thread); + static void draw32(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread); + static void fill32(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread); private: static float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); From 667f35bfcea985d2668aaa71b983c201deda2eae Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 11 Nov 2016 20:12:09 +0100 Subject: [PATCH 302/912] Declare structs to LLVM --- src/r_compiler/llvmdrawers.cpp | 65 ++++++++++++++++++++++++++++++++++ src/r_compiler/llvmdrawers.h | 18 +++++----- src/r_poly_triangle.cpp | 18 +++++----- src/r_poly_triangle.h | 16 ++++----- 4 files changed, 91 insertions(+), 26 deletions(-) diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 871e3c5cab..aa9a7b4fa3 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -87,6 +87,10 @@ private: static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); + static llvm::Type *GetTriVertexStruct(llvm::LLVMContext &context); + static llvm::Type *GetTriMatrixStruct(llvm::LLVMContext &context); + static llvm::Type *GetTriUniformsStruct(llvm::LLVMContext &context); + static llvm::Type *GetTriDrawTriangleArgs(llvm::LLVMContext &context); LLVMProgram mProgram; }; @@ -461,6 +465,67 @@ llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &contex return llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo(); } +llvm::Type *LLVMDrawersImpl::GetTriVertexStruct(llvm::LLVMContext &context) +{ + std::vector elements; + for (int i = 0; i < 6; i++) + elements.push_back(llvm::Type::getFloatTy(context)); + return llvm::StructType::create(context, elements, "TriVertex", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawersImpl::GetTriMatrixStruct(llvm::LLVMContext &context) +{ + std::vector elements; + for (int i = 0; i < 4 * 4; i++) + elements.push_back(llvm::Type::getFloatTy(context)); + return llvm::StructType::create(context, elements, "TriMatrix", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawersImpl::GetTriUniformsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t subsectorDepth; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + elements.push_back(GetTriMatrixStruct(context)); // TriMatrix objectToClip + return llvm::StructType::create(context, elements, "TriUniforms", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawersImpl::GetTriDrawTriangleArgs(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *dest; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; + elements.push_back(GetTriVertexStruct(context)->getPointerTo()); // TriVertex *v1; + elements.push_back(GetTriVertexStruct(context)->getPointerTo()); // TriVertex *v2; + elements.push_back(GetTriVertexStruct(context)->getPointerTo()); // TriVertex *v3; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipleft; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipright; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t cliptop; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipbottom; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *texturePixels; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureWidth; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureHeight; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t solidcolor; + elements.push_back(GetTriUniformsStruct(context)->getPointerTo()); // const TriUniforms *uniforms; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *stencilValues; + elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *stencilMasks; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t stencilPitch; + elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilTestValue; + elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilWriteValue; + elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *subsectorGBuffer; + return llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); +} + ///////////////////////////////////////////////////////////////////////////// namespace { static bool LogFatalErrors = false; } diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index abf9598e3d..d0ee3dcb76 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -236,25 +236,25 @@ struct TriUniforms TriMatrix objectToClip; }; -struct ScreenPolyTriangleDrawerArgs +struct TriDrawTriangleArgs { uint8_t *dest; - int pitch; + int32_t pitch; TriVertex *v1; TriVertex *v2; TriVertex *v3; - int clipleft; - int clipright; - int cliptop; - int clipbottom; + int32_t clipleft; + int32_t clipright; + int32_t cliptop; + int32_t clipbottom; const uint8_t *texturePixels; - int textureWidth; - int textureHeight; + uint32_t textureWidth; + uint32_t textureHeight; uint32_t solidcolor; const TriUniforms *uniforms; uint8_t *stencilValues; uint32_t *stencilMasks; - int stencilPitch; + int32_t stencilPitch; uint8_t stencilTestValue; uint8_t stencilWriteValue; uint32_t *subsectorGBuffer; diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 181b011a4a..27177fa3e2 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -53,7 +53,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, PolyDrawVaria if (drawargs.vcount < 3) return; - void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, WorkerThreadData *); + void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *); switch (variant) { default: @@ -63,7 +63,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, PolyDrawVaria case PolyDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break; } - ScreenPolyTriangleDrawerArgs args; + TriDrawTriangleArgs args; args.dest = dc_destorg; args.pitch = dc_pitch; args.clipleft = drawargs.clipleft; @@ -128,7 +128,7 @@ TriVertex PolyTriangleDrawer::shade_vertex(const TriUniforms &uniforms, TriVerte return uniforms.objectToClip * v; } -void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, WorkerThreadData *)) +void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)) { // Cull, clip and generate additional vertices as needed TriVertex clippedvert[max_additional_vertices]; @@ -288,7 +288,7 @@ void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert ///////////////////////////////////////////////////////////////////////////// -void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread) +void ScreenPolyTriangleDrawer::draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { uint8_t *dest = args->dest; int pitch = args->pitch; @@ -523,7 +523,7 @@ void ScreenPolyTriangleDrawer::draw(const ScreenPolyTriangleDrawerArgs *args, Wo } } -void ScreenPolyTriangleDrawer::fill(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread) +void ScreenPolyTriangleDrawer::fill(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { uint8_t *dest = args->dest; int pitch = args->pitch; @@ -681,7 +681,7 @@ void ScreenPolyTriangleDrawer::fill(const ScreenPolyTriangleDrawerArgs *args, Wo } } -void ScreenPolyTriangleDrawer::stencil(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread) +void ScreenPolyTriangleDrawer::stencil(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { const TriVertex &v1 = *args->v1; const TriVertex &v2 = *args->v2; @@ -833,7 +833,7 @@ void ScreenPolyTriangleDrawer::stencil(const ScreenPolyTriangleDrawerArgs *args, } } -void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread) +void ScreenPolyTriangleDrawer::draw32(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { uint32_t *dest = (uint32_t *)args->dest; int pitch = args->pitch; @@ -1145,7 +1145,7 @@ void ScreenPolyTriangleDrawer::draw32(const ScreenPolyTriangleDrawerArgs *args, } } -void ScreenPolyTriangleDrawer::drawsubsector32(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread) +void ScreenPolyTriangleDrawer::drawsubsector32(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { uint32_t *dest = (uint32_t *)args->dest; int pitch = args->pitch; @@ -1412,7 +1412,7 @@ void ScreenPolyTriangleDrawer::drawsubsector32(const ScreenPolyTriangleDrawerArg } } -void ScreenPolyTriangleDrawer::fill32(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread) +void ScreenPolyTriangleDrawer::fill32(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { uint32_t *dest = (uint32_t *)args->dest; int pitch = args->pitch; diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index f0ffd18655..b66df78981 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -26,7 +26,7 @@ #include "r_triangle.h" -struct ScreenPolyTriangleDrawerArgs; +struct TriDrawTriangleArgs; enum class PolyDrawVariant { @@ -74,7 +74,7 @@ public: private: static TriVertex shade_vertex(const TriUniforms &uniforms, TriVertex v); static void draw_arrays(const PolyDrawArgs &args, PolyDrawVariant variant, WorkerThreadData *thread); - static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread, void(*drawfunc)(const ScreenPolyTriangleDrawerArgs *, WorkerThreadData *)); + static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert); @@ -228,14 +228,14 @@ private: class ScreenPolyTriangleDrawer { public: - static void draw(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread); - static void fill(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread); + static void draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void fill(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void stencil(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread); + static void stencil(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void draw32(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread); - static void drawsubsector32(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread); - static void fill32(const ScreenPolyTriangleDrawerArgs *args, WorkerThreadData *thread); + static void draw32(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void drawsubsector32(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void fill32(const TriDrawTriangleArgs *args, WorkerThreadData *thread); private: static float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); From c852b6c5e9a9a69c0fb8e57d57aea65b82f57deb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 12 Nov 2016 00:50:21 +0100 Subject: [PATCH 303/912] Add codegen for one of the triangle variants --- src/CMakeLists.txt | 1 + .../fixedfunction/drawtrianglecodegen.cpp | 483 ++++++++++++++++++ .../fixedfunction/drawtrianglecodegen.h | 115 +++++ src/r_compiler/llvmdrawers.cpp | 45 +- src/r_compiler/llvmdrawers.h | 16 + src/r_compiler/ssa/ssa_bool.cpp | 5 + src/r_compiler/ssa/ssa_bool.h | 2 + src/r_compiler/ssa/ssa_float.cpp | 30 ++ src/r_compiler/ssa/ssa_float.h | 5 + src/r_compiler/ssa/ssa_if_block.cpp | 6 + src/r_compiler/ssa/ssa_if_block.h | 1 + src/r_compiler/ssa/ssa_int.cpp | 12 +- src/r_compiler/ssa/ssa_int.h | 3 +- src/r_compiler/ssa/ssa_scope.cpp | 2 +- src/r_poly.cpp | 18 +- src/r_poly.h | 2 +- src/r_poly_triangle.cpp | 15 +- src/r_poly_triangle.h | 16 +- 18 files changed, 739 insertions(+), 38 deletions(-) create mode 100644 src/r_compiler/fixedfunction/drawtrianglecodegen.cpp create mode 100644 src/r_compiler/fixedfunction/drawtrianglecodegen.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e6ee761b07..43a94f91af 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1487,6 +1487,7 @@ set (PCH_SOURCES r_compiler/fixedfunction/drawwallcodegen.cpp r_compiler/fixedfunction/drawcolumncodegen.cpp r_compiler/fixedfunction/drawskycodegen.cpp + r_compiler/fixedfunction/drawtrianglecodegen.cpp r_data/sprites.cpp r_data/voxels.cpp r_data/renderstyle.cpp diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp new file mode 100644 index 0000000000..13fb40d048 --- /dev/null +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -0,0 +1,483 @@ +/* +** DrawTriangle code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include "i_system.h" +#include "r_compiler/llvm_include.h" +#include "r_compiler/fixedfunction/drawtrianglecodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" + +void DrawTriangleCodegen::Generate(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data) +{ + LoadArgs(variant, truecolor, args, thread_data); + Setup(variant, truecolor); + LoopBlockY(variant, truecolor); +} + +void DrawTriangleCodegen::Setup(TriDrawVariant variant, bool truecolor) +{ + int pixelsize = truecolor ? 4 : 1; + + // 28.4 fixed-point coordinates + Y1 = SSAInt(SSAFloat::round(16.0f * v1.y), false); + Y2 = SSAInt(SSAFloat::round(16.0f * v2.y), false); + Y3 = SSAInt(SSAFloat::round(16.0f * v3.y), false); + + X1 = SSAInt(SSAFloat::round(16.0f * v1.x), false); + X2 = SSAInt(SSAFloat::round(16.0f * v2.x), false); + X3 = SSAInt(SSAFloat::round(16.0f * v3.x), false); + + // Deltas + DX12 = X1 - X2; + DX23 = X2 - X3; + DX31 = X3 - X1; + + DY12 = Y1 - Y2; + DY23 = Y2 - Y3; + DY31 = Y3 - Y1; + + // Fixed-point deltas + FDX12 = DX12 << 4; + FDX23 = DX23 << 4; + FDX31 = DX31 << 4; + + FDY12 = DY12 << 4; + FDY23 = DY23 << 4; + FDY31 = DY31 << 4; + + // Bounding rectangle + minx = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(X1, X2), X3) + 0xF) >> 4, clipleft); + maxx = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + miny = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); + maxy = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + + SSAIfBlock if0; + if0.if_block(minx >= maxx || miny >= maxy); + if0.end_retvoid(); + + // Start in corner of 8x8 block + minx = minx & ~(q - 1); + miny = miny & ~(q - 1); + + dest = dest[miny * pitch * pixelsize]; + subsectorGBuffer = subsectorGBuffer[miny * pitch]; + + // Half-edge constants + C1 = DY12 * X1 - DX12 * Y1; + C2 = DY23 * X2 - DX23 * Y2; + C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + SSAIfBlock if1; + if1.if_block(DY12 < SSAInt(0) || (DY12 == SSAInt(0) && DX12 > SSAInt(0))); + stack_C1.store(C1 + 1); + if1.else_block(); + stack_C1.store(C1); + if1.end_block(); + C1 = stack_C1.load(); + SSAIfBlock if2; + if2.if_block(DY23 < SSAInt(0) || (DY23 == SSAInt(0) && DX23 > SSAInt(0))); + stack_C2.store(C2 + 1); + if2.else_block(); + stack_C2.store(C2); + if2.end_block(); + C2 = stack_C2.load(); + SSAIfBlock if3; + if3.if_block(DY31 < SSAInt(0) || (DY31 == SSAInt(0) && DX31 > SSAInt(0))); + stack_C3.store(C3 + 1); + if3.else_block(); + stack_C3.store(C3); + if3.end_block(); + C3 = stack_C3.load(); + + // Gradients + gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + startW = v1.w + gradWX * (SSAFloat(minx) - v1.x) + gradWY * (SSAFloat(miny) - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (SSAFloat(minx) - v1.x) + gradVaryingY[i] * (SSAFloat(miny) - v1.y); + } +} + +SSAFloat DrawTriangleCodegen::gradx(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2) +{ + SSAFloat top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); + SSAFloat bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); + return top / bottom; +} + +SSAFloat DrawTriangleCodegen::grady(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2) +{ + SSAFloat top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); + SSAFloat bottom = (x0 - x2) * (y1 - y2) - (x1 - x2) * (y0 - y2); + return top / bottom; +} + +void DrawTriangleCodegen::LoopBlockY(TriDrawVariant variant, bool truecolor) +{ + int pixelsize = truecolor ? 4 : 1; + + stack_y.store(miny); + stack_dest.store(dest); + stack_subsectorGBuffer.store(subsectorGBuffer); + + SSAForBlock loop; + y = stack_y.load(); + dest = stack_dest.load(); + subsectorGBuffer = stack_subsectorGBuffer.load(); + loop.loop_block(y < maxy, 0); + { + SSAIfBlock branch; + branch.if_block((y / q) % thread.num_cores == thread.core); + { + LoopBlockX(variant, truecolor); + } + branch.end_block(); + + stack_dest.store(dest[q * pitch * pixelsize]); + stack_subsectorGBuffer.store(subsectorGBuffer[q * pitch]); + stack_y.store(y + q); + } + loop.end_block(); +} + +void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) +{ + stack_x.store(minx); + + SSAForBlock loop; + x = stack_x.load(); + loop.loop_block(x < maxx, 0); + { + // Corners of block + x0 = x << 4; + x1 = (x + q - 1) << 4; + y0 = y << 4; + y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + SSABool a00 = C1 + DX12 * y0 - DY12 * x0 > SSAInt(0); + SSABool a10 = C1 + DX12 * y0 - DY12 * x1 > SSAInt(0); + SSABool a01 = C1 + DX12 * y1 - DY12 * x0 > SSAInt(0); + SSABool a11 = C1 + DX12 * y1 - DY12 * x1 > SSAInt(0); + + SSAInt a = (a00.zext_int() << 0) | (a10.zext_int() << 1) | (a01.zext_int() << 2) | (a11.zext_int() << 3); + + SSABool b00 = C2 + DX23 * y0 - DY23 * x0 > SSAInt(0); + SSABool b10 = C2 + DX23 * y0 - DY23 * x1 > SSAInt(0); + SSABool b01 = C2 + DX23 * y1 - DY23 * x0 > SSAInt(0); + SSABool b11 = C2 + DX23 * y1 - DY23 * x1 > SSAInt(0); + SSAInt b = (b00.zext_int() << 0) | (b10.zext_int() << 1) | (b01.zext_int() << 2) | (b11.zext_int() << 3); + + SSABool c00 = C3 + DX31 * y0 - DY31 * x0 > SSAInt(0); + SSABool c10 = C3 + DX31 * y0 - DY31 * x1 > SSAInt(0); + SSABool c01 = C3 + DX31 * y1 - DY31 * x0 > SSAInt(0); + SSABool c11 = C3 + DX31 * y1 - DY31 * x1 > SSAInt(0); + SSAInt c = (c00.zext_int() << 0) | (c10.zext_int() << 1) | (c01.zext_int() << 2) | (c11.zext_int() << 3); + + // Skip block when outside an edge + SSAIfBlock branch; + branch.if_block(!(a == SSAInt(0) || b == SSAInt(0) || c == SSAInt(0))); + + // Check if block needs clipping + SSABool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); + + // Calculate varying variables for affine block + SSAFloat offx0 = SSAFloat(x - minx) + 0.5f; + SSAFloat offy0 = SSAFloat(y - miny) + 0.5f; + SSAFloat offx1 = offx0 + SSAFloat(q); + SSAFloat offy1 = offy0 + SSAFloat(q); + SSAFloat rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); + SSAFloat rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); + SSAFloat rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); + SSAFloat rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; + varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; + varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); + varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); + } + + SSAFloat globVis = SSAFloat(1706.0f); + SSAFloat vis = globVis / rcpWTL; + SSAFloat shade = 64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f; + SSAFloat lightscale = SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)); + diminishedlight = SSAInt(SSAFloat::clamp((1.0f - lightscale) * 256.0f + 0.5f, SSAFloat(0.0f), SSAFloat(256.0f)), false); + + // Accept whole block when totally covered + SSAIfBlock branch_covered; + branch_covered.if_block(a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded); + { + LoopFullBlock(variant, truecolor); + } + branch_covered.else_block(); + { + LoopPartialBlock(variant, truecolor); + } + branch_covered.end_block(); + + branch.end_block(); + + stack_x.store(x + q); + } + loop.end_block(); +} + +void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) +{ + int pixelsize = truecolor ? 4 : 1; + + stack_iy.store(SSAInt(0)); + stack_buffer.store(dest); + stack_subsectorbuffer.store(subsectorGBuffer); + + SSAForBlock loopy; + SSAInt iy = stack_iy.load(); + SSAUBytePtr buffer = stack_buffer.load(); + SSAIntPtr subsectorbuffer = stack_subsectorbuffer.load(); + loopy.loop_block(iy < SSAInt(q), q); + { + SSAInt varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + SSAFloat pos = varyingTL[i] + varyingBL[i] * SSAFloat(iy); + SSAFloat step = (varyingTR[i] + varyingBR[i] * SSAFloat(iy) - pos) * (1.0f / q); + + stack_varying[i].store(SSAInt((pos - SSAFloat::floor(pos)) * SSAFloat((float)0x100000000LL), true)); + varyingStep[i] = SSAInt(step * SSAFloat((float)0x100000000LL), true); + } + + stack_ix.store(x); + SSAForBlock loopx; + SSAInt ix = stack_ix.load(); + SSAInt varying[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] = stack_varying[i].load(); + loopx.loop_block(ix < x + q, q); + { + SSAIfBlock branch; + branch.if_block(subsectorbuffer[ix].load(true) >= subsectorDepth); + { + if (truecolor) + ProcessPixel(buffer[ix * 4], subsectorbuffer[ix], varying, variant, truecolor); + else + ProcessPixel(buffer[ix], subsectorbuffer[ix], varying, variant, truecolor); + } + branch.end_block(); + + for (int i = 0; i < TriVertex::NumVarying; i++) + stack_varying[i].store(varying[i] + varyingStep[i]); + + stack_ix.store(ix + 1); + } + loopx.end_block(); + + stack_buffer.store(buffer[pitch * pixelsize]); + stack_subsectorbuffer.store(subsectorbuffer[pitch]); + stack_iy.store(iy + 1); + } + loopy.end_block(); +} + +void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolor) +{ + int pixelsize = truecolor ? 4 : 1; + + stack_CY1.store(C1 + DX12 * y0 - DY12 * x0); + stack_CY2.store(C2 + DX23 * y0 - DY23 * x0); + stack_CY3.store(C3 + DX31 * y0 - DY31 * x0); + stack_iy.store(SSAInt(0)); + stack_buffer.store(dest); + stack_subsectorbuffer.store(subsectorGBuffer); + + SSAForBlock loopy; + SSAInt iy = stack_iy.load(); + SSAUBytePtr buffer = stack_buffer.load(); + SSAIntPtr subsectorbuffer = stack_subsectorbuffer.load(); + SSAInt CY1 = stack_CY1.load(); + SSAInt CY2 = stack_CY2.load(); + SSAInt CY3 = stack_CY3.load(); + loopy.loop_block(iy < SSAInt(q), q); + { + SSAInt varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + SSAFloat pos = varyingTL[i] + varyingBL[i] * SSAFloat(iy); + SSAFloat step = (varyingTR[i] + varyingBR[i] * SSAFloat(iy) - pos) * (1.0f / q); + + stack_varying[i].store(SSAInt((pos - SSAFloat::floor(pos)) * SSAFloat((float)0x100000000LL), true)); + varyingStep[i] = SSAInt(step * SSAFloat((float)0x100000000LL), true); + } + + stack_CX1.store(CY1); + stack_CX2.store(CY2); + stack_CX3.store(CY3); + stack_ix.store(SSAInt(0)); + + SSAForBlock loopx; + SSAInt ix = stack_ix.load(); + SSAInt CX1 = stack_CX1.load(); + SSAInt CX2 = stack_CX2.load(); + SSAInt CX3 = stack_CX3.load(); + SSAInt varying[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] = stack_varying[i].load(); + loopx.loop_block(ix < SSAInt(q), q); + { + SSABool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); + + SSAIfBlock branch; + branch.if_block(CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible && subsectorbuffer[ix + x].load(true) >= subsectorDepth); + { + if (truecolor) + ProcessPixel(buffer[(ix + x) * 4], subsectorbuffer[ix + x], varying, variant, truecolor); + else + ProcessPixel(buffer[ix + x], subsectorbuffer[ix + x], varying, variant, truecolor); + } + branch.end_block(); + + for (int i = 0; i < TriVertex::NumVarying; i++) + stack_varying[i].store(varying[i] + varyingStep[i]); + + stack_CX1.store(CX1 - FDY12); + stack_CX2.store(CX2 - FDY23); + stack_CX3.store(CX3 - FDY31); + stack_ix.store(ix + 1); + } + loopx.end_block(); + + stack_CY1.store(CY1 + FDX12); + stack_CY2.store(CY2 + FDX23); + stack_CY3.store(CY3 + FDX31); + stack_buffer.store(buffer[pitch * pixelsize]); + stack_subsectorbuffer.store(subsectorbuffer[pitch]); + stack_iy.store(iy + 1); + } + loopy.end_block(); +} + +void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying, TriDrawVariant variant, bool truecolor) +{ + SSAInt ufrac = varying[0]; + SSAInt vfrac = varying[1]; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + if (truecolor) + { + SSAVec4i fg = texturePixels[uvoffset * 4].load_vec4ub(true); + SSAInt fg_alpha = fg[3]; + fg = (fg * diminishedlight) >> 8; + fg.insert(3, fg_alpha); + + SSAIfBlock branch_transparency; + branch_transparency.if_block(fg_alpha > SSAInt(127)); + { + buffer.store_vec4ub(fg); + } + branch_transparency.end_block(); + } + else + { + SSAUByte palindex = texturePixels[uvoffset].load(true); + SSAIfBlock branch_transparency; + branch_transparency.if_block(!(palindex.zext_int() == SSAInt(0))); + { + buffer.store(palindex); + } + branch_transparency.end_block(); + } +} + +void DrawTriangleCodegen::LoadArgs(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data) +{ + dest = args[0][0].load(true); + pitch = args[0][1].load(true); + v1 = LoadTriVertex(args[0][2].load(true)); + v2 = LoadTriVertex(args[0][3].load(true)); + v3 = LoadTriVertex(args[0][4].load(true)); + clipleft = args[0][5].load(true); + clipright = args[0][6].load(true); + cliptop = args[0][7].load(true); + clipbottom = args[0][8].load(true); + texturePixels = args[0][9].load(true); + textureWidth = args[0][10].load(true); + textureHeight = args[0][11].load(true); + solidcolor = args[0][12].load(true); + LoadUniforms(args[0][13].load(true)); + stencilValues = args[0][14].load(true); + stencilMasks = args[0][15].load(true); + stencilPitch = args[0][16].load(true); + stencilTestValue = args[0][17].load(true); + stencilWriteValue = args[0][18].load(true); + subsectorGBuffer = args[0][19].load(true); + + thread.core = thread_data[0][0].load(true); + thread.num_cores = thread_data[0][1].load(true); +} + +SSATriVertex DrawTriangleCodegen::LoadTriVertex(SSAValue ptr) +{ + SSATriVertex v; + v.x = ptr[0][0].load(true); + v.y = ptr[0][1].load(true); + v.z = ptr[0][2].load(true); + v.w = ptr[0][3].load(true); + for (int i = 0; i < TriVertex::NumVarying; i++) + v.varying[i] = ptr[0][4 + i].load(true); + return v; +} + +void DrawTriangleCodegen::LoadUniforms(SSAValue uniforms) +{ + light = uniforms[0][0].load(true); + subsectorDepth = uniforms[0][1].load(true); + + SSAShort light_alpha = uniforms[0][2].load(true); + SSAShort light_red = uniforms[0][3].load(true); + SSAShort light_green = uniforms[0][4].load(true); + SSAShort light_blue = uniforms[0][5].load(true); + SSAShort fade_alpha = uniforms[0][6].load(true); + SSAShort fade_red = uniforms[0][7].load(true); + SSAShort fade_green = uniforms[0][8].load(true); + SSAShort fade_blue = uniforms[0][9].load(true); + SSAShort desaturate = uniforms[0][10].load(true); + SSAInt flags = uniforms[0][11].load(true); + shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); + shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); + shade_constants.desaturate = desaturate.zext_int(); + + is_simple_shade = (flags & TriUniforms::simple_shade) == SSAInt(TriUniforms::simple_shade); + is_nearest_filter = (flags & TriUniforms::nearest_filter) == SSAInt(TriUniforms::nearest_filter); +} diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.h b/src/r_compiler/fixedfunction/drawtrianglecodegen.h new file mode 100644 index 0000000000..08b848b919 --- /dev/null +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.h @@ -0,0 +1,115 @@ +/* +** DrawTriangle code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "drawercodegen.h" + +struct SSATriVertex +{ + SSAFloat x, y, z, w; + SSAFloat varying[TriVertex::NumVarying]; +}; + +class DrawTriangleCodegen : public DrawerCodegen +{ +public: + void Generate(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data); + +private: + void LoadArgs(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data); + SSATriVertex LoadTriVertex(SSAValue v); + void LoadUniforms(SSAValue uniforms); + void Setup(TriDrawVariant variant, bool truecolor); + void LoopBlockY(TriDrawVariant variant, bool truecolor); + void LoopBlockX(TriDrawVariant variant, bool truecolor); + void LoopFullBlock(TriDrawVariant variant, bool truecolor); + void LoopPartialBlock(TriDrawVariant variant, bool truecolor); + + void ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying, TriDrawVariant variant, bool truecolor); + + SSAFloat gradx(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); + SSAFloat grady(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); + + SSAStack stack_C1, stack_C2, stack_C3; + SSAStack stack_y; + SSAStack stack_dest; + SSAStack stack_subsectorGBuffer; + SSAStack stack_x; + SSAStack stack_buffer; + SSAStack stack_subsectorbuffer; + SSAStack stack_iy, stack_ix; + SSAStack stack_varying[TriVertex::NumVarying]; + SSAStack stack_CY1, stack_CY2, stack_CY3; + SSAStack stack_CX1, stack_CX2, stack_CX3; + + SSAUBytePtr dest; + SSAInt pitch; + SSATriVertex v1; + SSATriVertex v2; + SSATriVertex v3; + SSAInt clipleft; + SSAInt clipright; + SSAInt cliptop; + SSAInt clipbottom; + SSAUBytePtr texturePixels; + SSAInt textureWidth; + SSAInt textureHeight; + SSAInt solidcolor; + + SSAInt light; + SSAInt subsectorDepth; + SSAShadeConstants shade_constants; + SSABool is_simple_shade; + SSABool is_nearest_filter; + + SSAUBytePtr stencilValues; + SSAIntPtr stencilMasks; + SSAInt stencilPitch; + SSAUByte stencilTestValue; + SSAUByte stencilWriteValue; + SSAIntPtr subsectorGBuffer; + + SSAWorkerThread thread; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + SSAInt Y1, Y2, Y3; + SSAInt X1, X2, X3; + SSAInt DX12, DX23, DX31; + SSAInt DY12, DY23, DY31; + SSAInt FDX12, FDX23, FDX31; + SSAInt FDY12, FDY23, FDY31; + SSAInt minx, maxx, miny, maxy; + SSAInt C1, C2, C3; + SSAFloat gradWX, gradWY, startW; + SSAFloat gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; + + SSAInt x, y; + SSAInt x0, x1, y0, y1; + SSAInt diminishedlight; + SSAFloat varyingTL[TriVertex::NumVarying]; + SSAFloat varyingTR[TriVertex::NumVarying]; + SSAFloat varyingBL[TriVertex::NumVarying]; + SSAFloat varyingBR[TriVertex::NumVarying]; +}; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index aa9a7b4fa3..b3d893153e 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -26,6 +26,7 @@ #include "r_compiler/fixedfunction/drawwallcodegen.h" #include "r_compiler/fixedfunction/drawcolumncodegen.h" #include "r_compiler/fixedfunction/drawskycodegen.h" +#include "r_compiler/fixedfunction/drawtrianglecodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" #include "r_compiler/ssa/ssa_for_block.h" @@ -81,6 +82,7 @@ private: void CodegenDrawSpan(const char *name, DrawSpanVariant variant); void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); + void CodegenDrawTriangle(const char *name, TriDrawVariant variant, bool truecolor); static llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); @@ -184,6 +186,13 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawSky("DrawSky4", DrawSkyVariant::Single, 4); CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double, 1); CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4); + CodegenDrawTriangle("TriDraw8", TriDrawVariant::Draw, false); + CodegenDrawTriangle("TriDraw32", TriDrawVariant::Draw, true); + CodegenDrawTriangle("TriDrawSubsector8", TriDrawVariant::DrawSubsector, false); + CodegenDrawTriangle("TriDrawSubsector32", TriDrawVariant::DrawSubsector, true); + CodegenDrawTriangle("TriFill8", TriDrawVariant::Fill, false); + CodegenDrawTriangle("TriFill32", TriDrawVariant::Fill, true); + CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, false); mProgram.CreateEE(); @@ -249,6 +258,13 @@ LLVMDrawersImpl::LLVMDrawersImpl() DrawSky4 = mProgram.GetProcAddress("DrawSky4"); DrawDoubleSky1 = mProgram.GetProcAddress("DrawDoubleSky1"); DrawDoubleSky4 = mProgram.GetProcAddress("DrawDoubleSky4"); + TriDraw8 = mProgram.GetProcAddress("TriDraw8"); + TriDraw32 = mProgram.GetProcAddress("TriDraw32"); + TriDrawSubsector8 = mProgram.GetProcAddress("TriDrawSubsector8"); + TriDrawSubsector32 = mProgram.GetProcAddress("TriDrawSubsector32"); + TriFill8 = mProgram.GetProcAddress("TriFill8"); + TriFill32 = mProgram.GetProcAddress("TriFill32"); + TriStencil = mProgram.GetProcAddress("TriStencil"); #if 0 std::vector foo(1024 * 4); @@ -359,6 +375,25 @@ void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, i I_FatalError("verifyFunction failed for CodegenDrawSky()"); } +void LLVMDrawersImpl::CodegenDrawTriangle(const char *name, TriDrawVariant variant, bool truecolor) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name); + function.add_parameter(GetTriDrawTriangleArgs(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawTriangleCodegen codegen; + codegen.Generate(variant, truecolor, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for CodegenDrawTriangle()"); +} + llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) { std::vector elements; @@ -468,7 +503,7 @@ llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &contex llvm::Type *LLVMDrawersImpl::GetTriVertexStruct(llvm::LLVMContext &context) { std::vector elements; - for (int i = 0; i < 6; i++) + for (int i = 0; i < 4 + TriVertex::NumVarying; i++) elements.push_back(llvm::Type::getFloatTy(context)); return llvm::StructType::create(context, elements, "TriVertex", false)->getPointerTo(); } @@ -505,9 +540,9 @@ llvm::Type *LLVMDrawersImpl::GetTriDrawTriangleArgs(llvm::LLVMContext &context) std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *dest; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; - elements.push_back(GetTriVertexStruct(context)->getPointerTo()); // TriVertex *v1; - elements.push_back(GetTriVertexStruct(context)->getPointerTo()); // TriVertex *v2; - elements.push_back(GetTriVertexStruct(context)->getPointerTo()); // TriVertex *v3; + elements.push_back(GetTriVertexStruct(context)); // TriVertex *v1; + elements.push_back(GetTriVertexStruct(context)); // TriVertex *v2; + elements.push_back(GetTriVertexStruct(context)); // TriVertex *v3; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipleft; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipright; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t cliptop; @@ -516,7 +551,7 @@ llvm::Type *LLVMDrawersImpl::GetTriDrawTriangleArgs(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureWidth; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureHeight; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t solidcolor; - elements.push_back(GetTriUniformsStruct(context)->getPointerTo()); // const TriUniforms *uniforms; + elements.push_back(GetTriUniformsStruct(context)); // const TriUniforms *uniforms; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *stencilValues; elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *stencilMasks; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t stencilPitch; diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index d0ee3dcb76..f8b49e9be1 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -260,6 +260,14 @@ struct TriDrawTriangleArgs uint32_t *subsectorGBuffer; }; +enum class TriDrawVariant +{ + Draw, + Fill, + DrawSubsector, + Stencil, +}; + class LLVMDrawers { public: @@ -335,6 +343,14 @@ public: void(*DrawDoubleSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; void(*DrawDoubleSky4)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; + void(*TriDraw8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + void(*TriDraw32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + void(*TriDrawSubsector8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + void(*TriDrawSubsector32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + void(*TriFill8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + void(*TriFill32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + void(*TriStencil)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + private: static LLVMDrawers *Singleton; }; diff --git a/src/r_compiler/ssa/ssa_bool.cpp b/src/r_compiler/ssa/ssa_bool.cpp index c3c4c72b46..d23d9e8ae4 100644 --- a/src/r_compiler/ssa/ssa_bool.cpp +++ b/src/r_compiler/ssa/ssa_bool.cpp @@ -44,6 +44,11 @@ llvm::Type *SSABool::llvm_type() return llvm::Type::getInt1Ty(SSAScope::context()); } +SSAInt SSABool::zext_int() +{ + return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint())); +} + SSABool operator&&(const SSABool &a, const SSABool &b) { return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_bool.h b/src/r_compiler/ssa/ssa_bool.h index df8b66d513..4c04941d5e 100644 --- a/src/r_compiler/ssa/ssa_bool.h +++ b/src/r_compiler/ssa/ssa_bool.h @@ -37,6 +37,8 @@ public: static SSABool from_llvm(llvm::Value *v) { return SSABool(v); } static llvm::Type *llvm_type(); + SSAInt zext_int(); + llvm::Value *v; }; diff --git a/src/r_compiler/ssa/ssa_float.cpp b/src/r_compiler/ssa/ssa_float.cpp index 07f18fb569..77cef29ffa 100644 --- a/src/r_compiler/ssa/ssa_float.cpp +++ b/src/r_compiler/ssa/ssa_float.cpp @@ -24,6 +24,7 @@ #include "ssa_float.h" #include "ssa_int.h" #include "ssa_scope.h" +#include "ssa_bool.h" SSAFloat::SSAFloat() : v(0) @@ -111,6 +112,35 @@ SSAFloat SSAFloat::fma(SSAFloat a, SSAFloat b, SSAFloat c) return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint())); } +SSAFloat SSAFloat::round(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::round, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::floor(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::floor, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::MIN(SSAFloat a, SSAFloat b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateSelect((a < b).v, a.v, b.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::MAX(SSAFloat a, SSAFloat b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::clamp(SSAFloat a, SSAFloat b, SSAFloat c) +{ + return SSAFloat::MAX(SSAFloat::MIN(a, c), b); +} + SSAFloat operator+(const SSAFloat &a, const SSAFloat &b) { return SSAFloat::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_float.h b/src/r_compiler/ssa/ssa_float.h index 8f2f22ba61..9386db0174 100644 --- a/src/r_compiler/ssa/ssa_float.h +++ b/src/r_compiler/ssa/ssa_float.h @@ -43,6 +43,11 @@ public: static SSAFloat exp(SSAFloat val); static SSAFloat log(SSAFloat val); static SSAFloat fma(SSAFloat a, SSAFloat b, SSAFloat c); + static SSAFloat round(SSAFloat val); + static SSAFloat floor(SSAFloat val); + static SSAFloat MIN(SSAFloat a, SSAFloat b); + static SSAFloat MAX(SSAFloat a, SSAFloat b); + static SSAFloat clamp(SSAFloat a, SSAFloat b, SSAFloat c); llvm::Value *v; }; diff --git a/src/r_compiler/ssa/ssa_if_block.cpp b/src/r_compiler/ssa/ssa_if_block.cpp index c95d7e3500..6d36faf49e 100644 --- a/src/r_compiler/ssa/ssa_if_block.cpp +++ b/src/r_compiler/ssa/ssa_if_block.cpp @@ -50,3 +50,9 @@ void SSAIfBlock::end_block() SSAScope::builder().CreateBr(end_basic_block); SSAScope::builder().SetInsertPoint(end_basic_block); } + +void SSAIfBlock::end_retvoid() +{ + SSAScope::builder().CreateRetVoid(); + SSAScope::builder().SetInsertPoint(end_basic_block); +} diff --git a/src/r_compiler/ssa/ssa_if_block.h b/src/r_compiler/ssa/ssa_if_block.h index e46276575c..b958883eb5 100644 --- a/src/r_compiler/ssa/ssa_if_block.h +++ b/src/r_compiler/ssa/ssa_if_block.h @@ -32,6 +32,7 @@ public: void if_block(SSABool true_condition); void else_block(); void end_block(); + void end_retvoid(); private: llvm::BasicBlock *if_basic_block; diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 78ea015956..ac14af8b6c 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -37,10 +37,13 @@ SSAInt::SSAInt(int constant) v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true)); } -SSAInt::SSAInt(SSAFloat f) +SSAInt::SSAInt(SSAFloat f, bool uint) : v(0) { - v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt32Ty(SSAScope::context()), SSAScope::hint()); + if (uint) + v = SSAScope::builder().CreateFPToUI(f.v, llvm::Type::getInt32Ty(SSAScope::context()), SSAScope::hint()); + else + v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt32Ty(SSAScope::context()), SSAScope::hint()); } SSAInt::SSAInt(llvm::Value *v) @@ -63,6 +66,11 @@ SSAInt SSAInt::MAX(SSAInt a, SSAInt b) return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint())); } +SSAInt SSAInt::clamp(SSAInt a, SSAInt b, SSAInt c) +{ + return SSAInt::MAX(SSAInt::MIN(a, c), b); +} + SSAInt SSAInt::add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap) { return SSAInt::from_llvm(SSAScope::builder().CreateAdd(v, b.v, SSAScope::hint(), no_unsigned_wrap, no_signed_wrap)); diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index a8700f80bf..a655e3d2e7 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -32,13 +32,14 @@ class SSAInt public: SSAInt(); explicit SSAInt(int constant); - SSAInt(SSAFloat f); + SSAInt(SSAFloat f, bool uint); explicit SSAInt(llvm::Value *v); static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); } static llvm::Type *llvm_type(); static SSAInt MIN(SSAInt a, SSAInt b); static SSAInt MAX(SSAInt a, SSAInt b); + static SSAInt clamp(SSAInt a, SSAInt b, SSAInt c); SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap); SSAInt ashr(int bits); diff --git a/src/r_compiler/ssa/ssa_scope.cpp b/src/r_compiler/ssa/ssa_scope.cpp index 64b7fad051..0dbf3aca0b 100644 --- a/src/r_compiler/ssa/ssa_scope.cpp +++ b/src/r_compiler/ssa/ssa_scope.cpp @@ -56,7 +56,7 @@ llvm::IRBuilder<> &SSAScope::builder() llvm::Function *SSAScope::intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types) { - llvm::Function *func = module()->getFunction(llvm::Intrinsic::getName(id)); + llvm::Function *func = module()->getFunction(llvm::Intrinsic::getName(id, parameter_types)); if (func == 0) func = llvm::Function::Create(llvm::Intrinsic::getType(context(), id, parameter_types), llvm::Function::ExternalLinkage, llvm::Intrinsic::getName(id, parameter_types), module()); return func; diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 94805c9c0c..2df1a93475 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -222,13 +222,13 @@ void RenderPolyBsp::RenderPlane(subsector_t *sub, uint32_t subsectorDepth, bool if (!isSky) { args.SetTexture(tex); - PolyTriangleDrawer::draw(args, PolyDrawVariant::Draw); - PolyTriangleDrawer::draw(args, PolyDrawVariant::Stencil); + PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); } else { args.stencilwritevalue = 255; - PolyTriangleDrawer::draw(args, PolyDrawVariant::Stencil); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); for (uint32_t i = 0; i < sub->numlines; i++) { @@ -313,7 +313,7 @@ void RenderPolyBsp::RenderPlane(subsector_t *sub, uint32_t subsectorDepth, bool args.vinput = wallvert; args.vcount = 4; - PolyTriangleDrawer::draw(args, PolyDrawVariant::Stencil); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); } } } @@ -568,7 +568,7 @@ void RenderPolyBsp::AddSprite(AActor *thing, subsector_t *sub, uint32_t subsecto args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); - PolyTriangleDrawer::draw(args, PolyDrawVariant::DrawSubsector); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); } void RenderPolyBsp::AddWallSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) @@ -1234,8 +1234,8 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) args.stencilwritevalue = 1; args.SetTexture(tex); - PolyTriangleDrawer::draw(args, PolyDrawVariant::Draw); - PolyTriangleDrawer::draw(args, PolyDrawVariant::Stencil); + PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); } FTexture *RenderPolyWall::GetTexture() @@ -1567,7 +1567,7 @@ void PolySkyDome::RenderRow(PolyDrawArgs &args, int row) args.vcount = mPrimStart[row + 1] - mPrimStart[row]; args.mode = TriangleDrawMode::Strip; args.ccw = false; - PolyTriangleDrawer::draw(args, PolyDrawVariant::Draw); + PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); } void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap) @@ -1581,7 +1581,7 @@ void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int ro args.mode = TriangleDrawMode::Fan; args.ccw = bottomCap; args.solidcolor = solid; - PolyTriangleDrawer::draw(args, PolyDrawVariant::Fill); + PolyTriangleDrawer::draw(args, TriDrawVariant::Fill); } void PolySkyDome::Render(const TriMatrix &worldToClip) diff --git a/src/r_poly.h b/src/r_poly.h index b957aad593..a9ae5e33df 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -106,7 +106,7 @@ public: void Render(); void RenderScreenSprites(); - static const uint32_t SkySubsectorDepth = 0xffffffff; + static const uint32_t SkySubsectorDepth = 0x7fffffff; private: void RenderNode(void *node); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 27177fa3e2..ed1ed0b51b 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -40,7 +40,7 @@ #include #endif -void PolyTriangleDrawer::draw(const PolyDrawArgs &args, PolyDrawVariant variant) +void PolyTriangleDrawer::draw(const PolyDrawArgs &args, TriDrawVariant variant) { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(args, variant); @@ -48,19 +48,20 @@ void PolyTriangleDrawer::draw(const PolyDrawArgs &args, PolyDrawVariant variant) draw_arrays(args, variant, nullptr); } -void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, PolyDrawVariant variant, WorkerThreadData *thread) +void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVariant variant, WorkerThreadData *thread) { if (drawargs.vcount < 3) return; + auto llvm = LLVMDrawers::Instance(); void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *); switch (variant) { default: - case PolyDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break; - case PolyDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break; - case PolyDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::drawsubsector32 : ScreenPolyTriangleDrawer::draw; break; - case PolyDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break; + case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break; + case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break; + case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break; + case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break; } TriDrawTriangleArgs args; @@ -1596,7 +1597,7 @@ float ScreenPolyTriangleDrawer::grady(float x0, float y0, float x1, float y1, fl ///////////////////////////////////////////////////////////////////////////// -DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, PolyDrawVariant variant) +DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant) : args(args), variant(variant) { } diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index b66df78981..1996308ca0 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -28,14 +28,6 @@ struct TriDrawTriangleArgs; -enum class PolyDrawVariant -{ - Draw, - Fill, - DrawSubsector, - Stencil, -}; - class PolyDrawArgs { public: @@ -69,11 +61,11 @@ public: class PolyTriangleDrawer { public: - static void draw(const PolyDrawArgs &args, PolyDrawVariant variant); + static void draw(const PolyDrawArgs &args, TriDrawVariant variant); private: static TriVertex shade_vertex(const TriUniforms &uniforms, TriVertex v); - static void draw_arrays(const PolyDrawArgs &args, PolyDrawVariant variant, WorkerThreadData *thread); + static void draw_arrays(const PolyDrawArgs &args, TriDrawVariant variant, WorkerThreadData *thread); static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert); @@ -245,14 +237,14 @@ private: class DrawPolyTrianglesCommand : public DrawerCommand { public: - DrawPolyTrianglesCommand(const PolyDrawArgs &args, PolyDrawVariant variant); + DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant); void Execute(DrawerThread *thread) override; FString DebugInfo() override; private: PolyDrawArgs args; - PolyDrawVariant variant; + TriDrawVariant variant; }; #endif From eb4021b9978f5aea024951268ea9963c41b27ef1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 12 Nov 2016 10:21:02 +0100 Subject: [PATCH 304/912] Codegen for all triangle variants --- .../fixedfunction/drawtrianglecodegen.cpp | 325 +++++++++++++----- .../fixedfunction/drawtrianglecodegen.h | 10 + src/r_compiler/llvmdrawers.h | 1 + src/r_compiler/ssa/ssa_bool.cpp | 39 +++ src/r_compiler/ssa/ssa_bool.h | 9 + src/r_compiler/ssa/ssa_int.cpp | 5 + src/r_compiler/ssa/ssa_int.h | 1 + src/r_poly.cpp | 2 +- src/r_poly_triangle.cpp | 6 +- src/r_poly_triangle.h | 4 +- 10 files changed, 319 insertions(+), 83 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 13fb40d048..5c8b9a7369 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -233,9 +233,17 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) SSAFloat lightscale = SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)); diminishedlight = SSAInt(SSAFloat::clamp((1.0f - lightscale) * 256.0f + 0.5f, SSAFloat(0.0f), SSAFloat(256.0f)), false); + SetStencilBlock(x / 8 + y / 8 * stencilPitch); + + SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded; + if (variant != TriDrawVariant::DrawSubsector) + { + covered = covered && StencilIsSingleValue(); + } + // Accept whole block when totally covered SSAIfBlock branch_covered; - branch_covered.if_block(a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded); + branch_covered.if_block(covered); { LoopFullBlock(variant, truecolor); } @@ -254,58 +262,86 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) { - int pixelsize = truecolor ? 4 : 1; - - stack_iy.store(SSAInt(0)); - stack_buffer.store(dest); - stack_subsectorbuffer.store(subsectorGBuffer); - - SSAForBlock loopy; - SSAInt iy = stack_iy.load(); - SSAUBytePtr buffer = stack_buffer.load(); - SSAIntPtr subsectorbuffer = stack_subsectorbuffer.load(); - loopy.loop_block(iy < SSAInt(q), q); + SSAIfBlock branch_stenciltest; + if (variant != TriDrawVariant::DrawSubsector) { - SSAInt varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - SSAFloat pos = varyingTL[i] + varyingBL[i] * SSAFloat(iy); - SSAFloat step = (varyingTR[i] + varyingBR[i] * SSAFloat(iy) - pos) * (1.0f / q); - - stack_varying[i].store(SSAInt((pos - SSAFloat::floor(pos)) * SSAFloat((float)0x100000000LL), true)); - varyingStep[i] = SSAInt(step * SSAFloat((float)0x100000000LL), true); - } - - stack_ix.store(x); - SSAForBlock loopx; - SSAInt ix = stack_ix.load(); - SSAInt varying[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] = stack_varying[i].load(); - loopx.loop_block(ix < x + q, q); - { - SSAIfBlock branch; - branch.if_block(subsectorbuffer[ix].load(true) >= subsectorDepth); - { - if (truecolor) - ProcessPixel(buffer[ix * 4], subsectorbuffer[ix], varying, variant, truecolor); - else - ProcessPixel(buffer[ix], subsectorbuffer[ix], varying, variant, truecolor); - } - branch.end_block(); - - for (int i = 0; i < TriVertex::NumVarying; i++) - stack_varying[i].store(varying[i] + varyingStep[i]); - - stack_ix.store(ix + 1); - } - loopx.end_block(); - - stack_buffer.store(buffer[pitch * pixelsize]); - stack_subsectorbuffer.store(subsectorbuffer[pitch]); - stack_iy.store(iy + 1); + branch_stenciltest.if_block(StencilGetSingle() == stencilTestValue); + } + + if (variant == TriDrawVariant::Stencil) + { + StencilClear(stencilWriteValue); + } + else + { + int pixelsize = truecolor ? 4 : 1; + + stack_iy.store(SSAInt(0)); + stack_buffer.store(dest); + stack_subsectorbuffer.store(subsectorGBuffer); + + SSAForBlock loopy; + SSAInt iy = stack_iy.load(); + SSAUBytePtr buffer = stack_buffer.load(); + SSAIntPtr subsectorbuffer = stack_subsectorbuffer.load(); + loopy.loop_block(iy < SSAInt(q), q); + { + SSAInt varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + SSAFloat pos = varyingTL[i] + varyingBL[i] * SSAFloat(iy); + SSAFloat step = (varyingTR[i] + varyingBR[i] * SSAFloat(iy) - pos) * (1.0f / q); + + stack_varying[i].store(SSAInt((pos - SSAFloat::floor(pos)) * SSAFloat((float)0x100000000LL), true)); + varyingStep[i] = SSAInt(step * SSAFloat((float)0x100000000LL), true); + } + + stack_ix.store(x); + SSAForBlock loopx; + SSAInt ix = stack_ix.load(); + SSAInt varying[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] = stack_varying[i].load(); + loopx.loop_block(ix < x + q, q); + { + if (variant == TriDrawVariant::DrawSubsector) + { + SSAIfBlock branch; + branch.if_block(subsectorbuffer[ix].load(true) >= subsectorDepth); + { + if (truecolor) + ProcessPixel(buffer[ix * 4], subsectorbuffer[ix], varying, variant, truecolor); + else + ProcessPixel(buffer[ix], subsectorbuffer[ix], varying, variant, truecolor); + } + branch.end_block(); + } + else + { + if (truecolor) + ProcessPixel(buffer[ix * 4], subsectorbuffer[ix], varying, variant, truecolor); + else + ProcessPixel(buffer[ix], subsectorbuffer[ix], varying, variant, truecolor); + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + stack_varying[i].store(varying[i] + varyingStep[i]); + + stack_ix.store(ix + 1); + } + loopx.end_block(); + + stack_buffer.store(buffer[pitch * pixelsize]); + stack_subsectorbuffer.store(subsectorbuffer[pitch]); + stack_iy.store(iy + 1); + } + loopy.end_block(); + } + + if (variant != TriDrawVariant::DrawSubsector) + { + branch_stenciltest.end_block(); } - loopy.end_block(); } void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolor) @@ -354,14 +390,31 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo loopx.loop_block(ix < SSAInt(q), q); { SSABool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); + SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible; + + if (variant == TriDrawVariant::DrawSubsector) + { + covered = covered && subsectorbuffer[ix + x].load(true) >= subsectorDepth; + } + else + { + covered = covered && StencilGet(ix, iy) == stencilTestValue; + } SSAIfBlock branch; - branch.if_block(CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible && subsectorbuffer[ix + x].load(true) >= subsectorDepth); + branch.if_block(covered); { - if (truecolor) - ProcessPixel(buffer[(ix + x) * 4], subsectorbuffer[ix + x], varying, variant, truecolor); + if (variant == TriDrawVariant::Stencil) + { + StencilSet(ix, iy, stencilWriteValue); + } else - ProcessPixel(buffer[ix + x], subsectorbuffer[ix + x], varying, variant, truecolor); + { + if (truecolor) + ProcessPixel(buffer[(ix + x) * 4], subsectorbuffer[ix + x], varying, variant, truecolor); + else + ProcessPixel(buffer[ix + x], subsectorbuffer[ix + x], varying, variant, truecolor); + } } branch.end_block(); @@ -387,39 +440,157 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying, TriDrawVariant variant, bool truecolor) { - SSAInt ufrac = varying[0]; - SSAInt vfrac = varying[1]; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - if (truecolor) + if (variant == TriDrawVariant::Fill) { - SSAVec4i fg = texturePixels[uvoffset * 4].load_vec4ub(true); - SSAInt fg_alpha = fg[3]; - fg = (fg * diminishedlight) >> 8; - fg.insert(3, fg_alpha); - - SSAIfBlock branch_transparency; - branch_transparency.if_block(fg_alpha > SSAInt(127)); + if (truecolor) { - buffer.store_vec4ub(fg); + buffer.store_vec4ub(SSAVec4i::unpack(solidcolor)); } - branch_transparency.end_block(); + else + { + //buffer.store(solidcolor); + } + subsectorbuffer.store(subsectorDepth); } else { - SSAUByte palindex = texturePixels[uvoffset].load(true); - SSAIfBlock branch_transparency; - branch_transparency.if_block(!(palindex.zext_int() == SSAInt(0))); + SSAInt ufrac = varying[0]; + SSAInt vfrac = varying[1]; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + if (truecolor) { - buffer.store(palindex); + SSAVec4i fg = texturePixels[uvoffset * 4].load_vec4ub(true); + SSAInt fg_alpha = fg[3]; + fg = (fg * diminishedlight) >> 8; + fg.insert(3, fg_alpha); + + if (variant == TriDrawVariant::DrawMasked || variant == TriDrawVariant::DrawSubsector) + { + SSAIfBlock branch_transparency; + branch_transparency.if_block(fg_alpha > SSAInt(127)); + { + buffer.store_vec4ub(fg); + if (variant != TriDrawVariant::DrawSubsector) + subsectorbuffer.store(subsectorDepth); + } + branch_transparency.end_block(); + } + else + { + buffer.store_vec4ub(fg); + subsectorbuffer.store(subsectorDepth); + } + } + else + { + SSAUByte palindex = texturePixels[uvoffset].load(true); + + if (variant == TriDrawVariant::DrawMasked || variant == TriDrawVariant::DrawSubsector) + { + SSAIfBlock branch_transparency; + branch_transparency.if_block(!(palindex.zext_int() == SSAInt(0))); + { + buffer.store(palindex); + if (variant != TriDrawVariant::DrawSubsector) + subsectorbuffer.store(subsectorDepth); + } + branch_transparency.end_block(); + } + else + { + buffer.store(palindex); + subsectorbuffer.store(subsectorDepth); + } } - branch_transparency.end_block(); } } +void DrawTriangleCodegen::SetStencilBlock(SSAInt block) +{ + StencilBlock = stencilValues[block * 64]; + StencilBlockMask = stencilMasks[block]; +} + +void DrawTriangleCodegen::StencilSet(SSAInt x, SSAInt y, SSAUByte value) +{ + SSAInt mask = StencilBlockMask.load(false); + + SSAIfBlock branchNeedsUpdate; + branchNeedsUpdate.if_block(!(mask == SSAInt(0xffffffff) && StencilBlock[0].load(false) == value)); + + SSAIfBlock branchFirstSet; + branchFirstSet.if_block(mask == SSAInt(0xffffffff)); + { + SSAUByte val0 = StencilBlock[0].load(false); + for (int i = 1; i < 8 * 8; i++) + StencilBlock[i].store(val0); + } + branchFirstSet.end_block(); + + SSAIfBlock branchNeedsUpdate2; + branchNeedsUpdate2.if_block(!(StencilBlock[x + y * 8].load(false) == value)); + + StencilBlock[x + y * 8].store(value); + + SSAInt leveloffset = SSAInt(0); + for (int i = 1; i < 4; i++) + { + x = x >> 1; + y = y >> 1; + + SSABool differs = + !(StencilBlock[(x << i) + (y << i) * 8].load(false) == value && + StencilBlock[((x + 1) << i) + (y << i) * 8].load(false) == value && + StencilBlock[(x << i) + ((y + 1) << i) * 8].load(false) == value && + StencilBlock[((x + 1) << i) + ((y + 1) << i) * 8].load(false) == value); + + SSAInt levelbit = SSAInt(1) << (leveloffset + x + y * (8 >> i)); + + mask = differs.select(mask & ~levelbit, mask | levelbit); + + leveloffset = leveloffset + (SSAInt(8) >> leveloffset) * (SSAInt(8) >> leveloffset); + } + + SSABool differs = + !(StencilBlock[0].load(false) == value && + StencilBlock[4].load(false) == value && + StencilBlock[4 * 8].load(false) == value && + StencilBlock[4 * 8 + 4].load(false) == value); + + mask = differs.select(mask & ~(1 << 22), mask | (1 << 22)); + + StencilBlockMask.store(mask); + + branchNeedsUpdate2.end_block(); + branchNeedsUpdate.end_block(); +} + +SSAUByte DrawTriangleCodegen::StencilGet(SSAInt x, SSAInt y) +{ + SSABool oneValueBlock = StencilBlockMask.load(false) == SSAInt(0xffffffff); + return oneValueBlock.select(StencilBlock[0].load(false), StencilBlock[x + y * 8].load(false)); +} + +SSAUByte DrawTriangleCodegen::StencilGetSingle() +{ + return StencilBlock[0].load(false); +} + +void DrawTriangleCodegen::StencilClear(SSAUByte value) +{ + StencilBlock[0].store(value); + StencilBlockMask.store(SSAInt(0xffffffff)); +} + +SSABool DrawTriangleCodegen::StencilIsSingleValue() +{ + return StencilBlockMask.load(false) == SSAInt(0xffffffff); +} + void DrawTriangleCodegen::LoadArgs(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data) { dest = args[0][0].load(true); diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.h b/src/r_compiler/fixedfunction/drawtrianglecodegen.h index 08b848b919..6f9a6c32ad 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.h +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.h @@ -47,6 +47,13 @@ private: void ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying, TriDrawVariant variant, bool truecolor); + void SetStencilBlock(SSAInt block); + void StencilSet(SSAInt x, SSAInt y, SSAUByte value); + void StencilClear(SSAUByte value); + SSAUByte StencilGet(SSAInt x, SSAInt y); + SSAUByte StencilGetSingle(); + SSABool StencilIsSingleValue(); + SSAFloat gradx(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); SSAFloat grady(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); @@ -112,4 +119,7 @@ private: SSAFloat varyingTR[TriVertex::NumVarying]; SSAFloat varyingBL[TriVertex::NumVarying]; SSAFloat varyingBR[TriVertex::NumVarying]; + + SSAUBytePtr StencilBlock; + SSAIntPtr StencilBlockMask; }; diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index f8b49e9be1..b68c23fe10 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -263,6 +263,7 @@ struct TriDrawTriangleArgs enum class TriDrawVariant { Draw, + DrawMasked, Fill, DrawSubsector, Stencil, diff --git a/src/r_compiler/ssa/ssa_bool.cpp b/src/r_compiler/ssa/ssa_bool.cpp index d23d9e8ae4..65cc25c90c 100644 --- a/src/r_compiler/ssa/ssa_bool.cpp +++ b/src/r_compiler/ssa/ssa_bool.cpp @@ -22,6 +22,8 @@ #include "r_compiler/llvm_include.h" #include "ssa_bool.h" +#include "ssa_ubyte.h" +#include "ssa_value.h" #include "ssa_scope.h" SSABool::SSABool() @@ -49,6 +51,16 @@ SSAInt SSABool::zext_int() return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint())); } +SSAInt SSABool::select(SSAInt a, SSAInt b) +{ + return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint())); +} + +SSAUByte SSABool::select(SSAUByte a, SSAUByte b) +{ + return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint())); +} + SSABool operator&&(const SSABool &a, const SSABool &b) { return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); @@ -91,6 +103,33 @@ SSABool operator>(const SSAInt &a, const SSAInt &b) ///////////////////////////////////////////////////////////////////////////// +SSABool operator<(const SSAUByte &a, const SSAUByte &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSLT(a.v, b.v, SSAScope::hint())); +} + +SSABool operator<=(const SSAUByte &a, const SSAUByte &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSLE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator==(const SSAUByte &a, const SSAUByte &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpEQ(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>=(const SSAUByte &a, const SSAUByte &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSGE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>(const SSAUByte &a, const SSAUByte &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSGT(a.v, b.v, SSAScope::hint())); +} + +///////////////////////////////////////////////////////////////////////////// + SSABool operator<(const SSAFloat &a, const SSAFloat &b) { return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLT(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_bool.h b/src/r_compiler/ssa/ssa_bool.h index 4c04941d5e..2ed6e7d4a6 100644 --- a/src/r_compiler/ssa/ssa_bool.h +++ b/src/r_compiler/ssa/ssa_bool.h @@ -23,6 +23,7 @@ #pragma once #include "ssa_int.h" +#include "ssa_ubyte.h" #include "ssa_float.h" namespace llvm { class Value; } @@ -38,6 +39,8 @@ public: static llvm::Type *llvm_type(); SSAInt zext_int(); + SSAInt select(SSAInt a, SSAInt b); + SSAUByte select(SSAUByte a, SSAUByte b); llvm::Value *v; }; @@ -53,6 +56,12 @@ SSABool operator==(const SSAInt &a, const SSAInt &b); SSABool operator>=(const SSAInt &a, const SSAInt &b); SSABool operator>(const SSAInt &a, const SSAInt &b); +SSABool operator<(const SSAUByte &a, const SSAUByte &b); +SSABool operator<=(const SSAUByte &a, const SSAUByte &b); +SSABool operator==(const SSAUByte &a, const SSAUByte &b); +SSABool operator>=(const SSAUByte &a, const SSAUByte &b); +SSABool operator>(const SSAUByte &a, const SSAUByte &b); + SSABool operator<(const SSAFloat &a, const SSAFloat &b); SSABool operator<=(const SSAFloat &a, const SSAFloat &b); SSABool operator==(const SSAFloat &a, const SSAFloat &b); diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index ac14af8b6c..01ad92e3f6 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -195,3 +195,8 @@ SSAInt operator|(const SSAInt &a, const SSAInt &b) { return SSAInt::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); } + +SSAInt operator~(const SSAInt &a) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateNot(a.v, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index a655e3d2e7..ae6e6074fc 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -74,3 +74,4 @@ SSAInt operator&(const SSAInt &a, int b); SSAInt operator&(const SSAInt &a, const SSAInt &b); SSAInt operator|(const SSAInt &a, int b); SSAInt operator|(const SSAInt &a, const SSAInt &b); +SSAInt operator~(const SSAInt &a); diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 2df1a93475..403adeec05 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -1524,7 +1524,7 @@ void PolySkyDome::SkyVertex(int r, int c, bool zflip) // And finally the vertex. TriVertex vert; - vert = SetVertexXYZ(-pos.X, z - 1.f, pos.Y, u * 4.0f, v + 0.5f/*, color*/); + vert = SetVertexXYZ(-pos.X, z - 1.f, pos.Y, u * 4.0f, v * 1.2f + 0.5f/*, color*/); mVertices.Push(vert); } diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index ed1ed0b51b..2b8c166dce 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -58,10 +58,10 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian switch (variant) { default: - case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break; - case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break; + case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? llvm->TriDraw32: llvm->TriDraw8; break; + case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? llvm->TriFill32 : llvm->TriFill8; break; case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break; - case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break; + case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil/*llvm->TriStencil*/; break; } TriDrawTriangleArgs args; diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 1996308ca0..a21ff1d6db 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -105,7 +105,7 @@ public: x >>= 1; y >>= 1; - bool same = + bool differs = Values[(x << i) + (y << i) * 8] != value || Values[((x + 1) << i) + (y << i) * 8] != value || Values[(x << i) + ((y + 1) << i) * 8] != value || @@ -113,7 +113,7 @@ public: int levelbit = 1 << (leveloffset + x + y * (8 >> i)); - if (same) + if (differs) ValueMask = ValueMask & ~levelbit; else ValueMask = ValueMask | levelbit; From e3dc9c93b91e43229676c24800fef8ff5a020c17 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 12 Nov 2016 13:50:28 +0100 Subject: [PATCH 305/912] Use a simpler algorithm for the stencil buffer as the old one was too slow --- .../fixedfunction/drawtrianglecodegen.cpp | 52 ++++--------------- src/r_compiler/ssa/ssa_int.cpp | 6 +++ src/r_compiler/ssa/ssa_int.h | 3 ++ src/r_poly.cpp | 3 ++ src/r_poly_triangle.cpp | 13 ++++- src/r_poly_triangle.h | 52 ++++--------------- 6 files changed, 44 insertions(+), 85 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 5c8b9a7369..4819a28cff 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -520,75 +520,41 @@ void DrawTriangleCodegen::StencilSet(SSAInt x, SSAInt y, SSAUByte value) SSAInt mask = StencilBlockMask.load(false); SSAIfBlock branchNeedsUpdate; - branchNeedsUpdate.if_block(!(mask == SSAInt(0xffffffff) && StencilBlock[0].load(false) == value)); + branchNeedsUpdate.if_block(!(mask == (SSAInt(0xffffff00) | value.zext_int()))); SSAIfBlock branchFirstSet; - branchFirstSet.if_block(mask == SSAInt(0xffffffff)); + branchFirstSet.if_block((mask & SSAInt(0xffffff00)) == SSAInt(0xffffff00)); { - SSAUByte val0 = StencilBlock[0].load(false); - for (int i = 1; i < 8 * 8; i++) + SSAUByte val0 = mask.trunc_ubyte(); + for (int i = 0; i < 8 * 8; i++) StencilBlock[i].store(val0); + StencilBlockMask.store(SSAInt(0)); } branchFirstSet.end_block(); - SSAIfBlock branchNeedsUpdate2; - branchNeedsUpdate2.if_block(!(StencilBlock[x + y * 8].load(false) == value)); - StencilBlock[x + y * 8].store(value); - SSAInt leveloffset = SSAInt(0); - for (int i = 1; i < 4; i++) - { - x = x >> 1; - y = y >> 1; - - SSABool differs = - !(StencilBlock[(x << i) + (y << i) * 8].load(false) == value && - StencilBlock[((x + 1) << i) + (y << i) * 8].load(false) == value && - StencilBlock[(x << i) + ((y + 1) << i) * 8].load(false) == value && - StencilBlock[((x + 1) << i) + ((y + 1) << i) * 8].load(false) == value); - - SSAInt levelbit = SSAInt(1) << (leveloffset + x + y * (8 >> i)); - - mask = differs.select(mask & ~levelbit, mask | levelbit); - - leveloffset = leveloffset + (SSAInt(8) >> leveloffset) * (SSAInt(8) >> leveloffset); - } - - SSABool differs = - !(StencilBlock[0].load(false) == value && - StencilBlock[4].load(false) == value && - StencilBlock[4 * 8].load(false) == value && - StencilBlock[4 * 8 + 4].load(false) == value); - - mask = differs.select(mask & ~(1 << 22), mask | (1 << 22)); - - StencilBlockMask.store(mask); - - branchNeedsUpdate2.end_block(); branchNeedsUpdate.end_block(); } SSAUByte DrawTriangleCodegen::StencilGet(SSAInt x, SSAInt y) { - SSABool oneValueBlock = StencilBlockMask.load(false) == SSAInt(0xffffffff); - return oneValueBlock.select(StencilBlock[0].load(false), StencilBlock[x + y * 8].load(false)); + return StencilIsSingleValue().select(StencilBlockMask.load(false).trunc_ubyte(), StencilBlock[x + y * 8].load(false)); } SSAUByte DrawTriangleCodegen::StencilGetSingle() { - return StencilBlock[0].load(false); + return StencilBlockMask.load(false).trunc_ubyte(); } void DrawTriangleCodegen::StencilClear(SSAUByte value) { - StencilBlock[0].store(value); - StencilBlockMask.store(SSAInt(0xffffffff)); + StencilBlockMask.store(SSAInt(0xffffff00) | value.zext_int()); } SSABool DrawTriangleCodegen::StencilIsSingleValue() { - return StencilBlockMask.load(false) == SSAInt(0xffffffff); + return (StencilBlockMask.load(false) & SSAInt(0xffffff00)) == SSAInt(0xffffff00); } void DrawTriangleCodegen::LoadArgs(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data) diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 01ad92e3f6..b0cd26821a 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -23,6 +23,7 @@ #include "r_compiler/llvm_include.h" #include "ssa_int.h" #include "ssa_float.h" +#include "ssa_ubyte.h" #include "ssa_bool.h" #include "ssa_scope.h" @@ -81,6 +82,11 @@ SSAInt SSAInt::ashr(int bits) return SSAInt::from_llvm(SSAScope::builder().CreateAShr(v, bits, SSAScope::hint())); } +SSAUByte SSAInt::trunc_ubyte() +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateTrunc(v, SSAUByte::llvm_type(), SSAScope::hint())); +} + SSAInt operator+(const SSAInt &a, const SSAInt &b) { return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index ae6e6074fc..dab8adcb99 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -26,6 +26,7 @@ namespace llvm { class Value; } namespace llvm { class Type; } class SSAFloat; +class SSAUByte; class SSAInt { @@ -44,6 +45,8 @@ public: SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap); SSAInt ashr(int bits); + SSAUByte trunc_ubyte(); + llvm::Value *v; }; diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 403adeec05..a4141b94c1 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -41,6 +41,9 @@ CVAR(Bool, r_debug_cull, 0, 0) void RenderPolyBsp::Render() { + if (!r_swtruecolor) // Disable pal rendering for now + return; + // Setup working buffers PolyVertexBuffer::Clear(); SolidSegments.clear(); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 2b8c166dce..3d444d2ada 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -55,14 +55,25 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian auto llvm = LLVMDrawers::Instance(); void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *); +#if 1 switch (variant) { default: case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? llvm->TriDraw32: llvm->TriDraw8; break; case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? llvm->TriFill32 : llvm->TriFill8; break; case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break; - case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil/*llvm->TriStencil*/; break; + case TriDrawVariant::Stencil: drawfunc = llvm->TriStencil; break; } +#else + switch (variant) + { + default: + case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break; + case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break; + case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::drawsubsector32 : llvm->TriDrawSubsector8; break; + case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break; + } +#endif TriDrawTriangleArgs args; args.dest = dc_destorg; diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index a21ff1d6db..f3f63f8f11 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -85,70 +85,40 @@ public: void Set(int x, int y, uint8_t value) { - if (ValueMask == 0xffffffff) + if ((ValueMask & 0xffffff00) == 0xffffff00) { - if (Values[0] == value) + if ((ValueMask & 0xff) == value) return; - for (int i = 1; i < 8 * 8; i++) - Values[i] = Values[0]; + for (int i = 0; i < 8 * 8; i++) + Values[i] = (ValueMask & 0xff); + ValueMask = 0; } - if (Values[x + y * 8] == value) - return; - Values[x + y * 8] = value; - - int leveloffset = 0; - for (int i = 1; i < 4; i++) - { - x >>= 1; - y >>= 1; - - bool differs = - Values[(x << i) + (y << i) * 8] != value || - Values[((x + 1) << i) + (y << i) * 8] != value || - Values[(x << i) + ((y + 1) << i) * 8] != value || - Values[((x + 1) << i) + ((y + 1) << i) * 8] != value; - - int levelbit = 1 << (leveloffset + x + y * (8 >> i)); - - if (differs) - ValueMask = ValueMask & ~levelbit; - else - ValueMask = ValueMask | levelbit; - - leveloffset += (8 >> leveloffset) * (8 >> leveloffset); - } - - if (Values[0] != value || Values[4] != value || Values[4 * 8] != value || Values[4 * 8 + 4] != value) - ValueMask = ValueMask & ~(1 << 22); - else - ValueMask = ValueMask | (1 << 22); } uint8_t Get(int x, int y) const { - if (ValueMask == 0xffffffff) - return Values[0]; + if (IsSingleValue()) + return ValueMask & 0xff; else return Values[x + y * 8]; } void Clear(uint8_t value) { - Values[0] = value; - ValueMask = 0xffffffff; + ValueMask = 0xffffff00 | (uint32_t)value; } bool IsSingleValue() const { - return ValueMask == 0xffffffff; + return (ValueMask & 0xffffff00) == 0xffffff00; } private: - uint8_t *Values; // [8 * 8]; - uint32_t &ValueMask; // 4 * 4 + 2 * 2 + 1 bits indicating is Values are the same + uint8_t *Values; + uint32_t &ValueMask; }; class PolySubsectorGBuffer From 402227d6b6fee59d3358a42cdc852a0cd3f4b863 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 12 Nov 2016 14:55:14 +0100 Subject: [PATCH 306/912] Add some intersection tests useful for culling --- src/CMakeLists.txt | 1 + src/r_poly.cpp | 103 +++++----------- src/r_poly.h | 2 + src/r_poly_intersection.cpp | 235 ++++++++++++++++++++++++++++++++++++ src/r_poly_intersection.h | 167 +++++++++++++++++++++++++ 5 files changed, 437 insertions(+), 71 deletions(-) create mode 100644 src/r_poly_intersection.cpp create mode 100644 src/r_poly_intersection.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 43a94f91af..b2cf47945b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1053,6 +1053,7 @@ set( FASTMATH_PCH_SOURCES r_swrenderer2.cpp r_poly.cpp r_poly_triangle.cpp + r_poly_intersection.cpp r_3dfloors.cpp r_bsp.cpp r_draw.cpp diff --git a/src/r_poly.cpp b/src/r_poly.cpp index a4141b94c1..e8a34544b7 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -75,6 +75,8 @@ void RenderPolyBsp::Render() // Y shearing like the Doom renderer: //worldToClip = TriMatrix::viewToClip() * TriMatrix::worldToView(); + frustumPlanes = FrustumPlanes(worldToClip); + // Cull front to back if (numnodes == 0) { @@ -1023,28 +1025,20 @@ int RenderPolyBsp::PointOnSide(const DVector2 &pos, const node_t *node) bool RenderPolyBsp::CheckBBox(float *bspcoord) { - static const int checkcoord[12][4] = - { - { 3,0,2,1 }, - { 3,0,2,0 }, - { 3,1,2,0 }, - { 0 }, - { 2,0,2,1 }, - { 0,0,0,0 }, - { 3,1,3,0 }, - { 0 }, - { 2,0,3,1 }, - { 2,1,3,1 }, - { 2,1,3,0 } - }; + // Start using a quick frustum AABB test: + + AxisAlignedBoundingBox aabb(Vec3f(bspcoord[BOXLEFT], bspcoord[BOXBOTTOM], -1000.0f), Vec3f(bspcoord[BOXRIGHT], bspcoord[BOXTOP], 1000.0f)); + auto result = IntersectionTest::frustum_aabb(frustumPlanes, aabb); + if (result == IntersectionTest::outside) + return false; + + // Occlusion test using solid segments (which seems to be quite broken, actually): int boxx; int boxy; int boxpos; double x1, y1, x2, y2; - double rx1, ry1, rx2, ry2; - int sx1, sx2; // Find the corners of the box // that define the edges from current viewpoint. @@ -1066,64 +1060,31 @@ bool RenderPolyBsp::CheckBBox(float *bspcoord) if (boxpos == 5) return true; - x1 = bspcoord[checkcoord[boxpos][0]] - ViewPos.X; - y1 = bspcoord[checkcoord[boxpos][1]] - ViewPos.Y; - x2 = bspcoord[checkcoord[boxpos][2]] - ViewPos.X; - y2 = bspcoord[checkcoord[boxpos][3]] - ViewPos.Y; + static const int checkcoord[12][4] = + { + { 3,0,2,1 }, + { 3,0,2,0 }, + { 3,1,2,0 }, + { 0 }, + { 2,0,2,1 }, + { 0,0,0,0 }, + { 3,1,3,0 }, + { 0 }, + { 2,0,3,1 }, + { 2,1,3,1 }, + { 2,1,3,0 } + }; - // check clip list for an open space + x1 = bspcoord[checkcoord[boxpos][0]]; + y1 = bspcoord[checkcoord[boxpos][1]]; + x2 = bspcoord[checkcoord[boxpos][2]]; + y2 = bspcoord[checkcoord[boxpos][3]]; - // Sitting on a line? - if (y1 * (x1 - x2) + x1 * (y2 - y1) >= -EQUAL_EPSILON) + int sx1, sx2; + if (GetSegmentRangeForLine(x1, y1, x2, y2, sx1, sx2)) + return !IsSegmentCulled(sx1, sx2); + else return true; - - rx1 = x1 * ViewSin - y1 * ViewCos; - rx2 = x2 * ViewSin - y2 * ViewCos; - ry1 = x1 * ViewTanCos + y1 * ViewTanSin; - ry2 = x2 * ViewTanCos + y2 * ViewTanSin; - - /*if (MirrorFlags & RF_XFLIP) - { - double t = -rx1; - rx1 = -rx2; - rx2 = t; - swapvalues(ry1, ry2); - }*/ - - if (rx1 >= -ry1) - { - if (rx1 > ry1) return false; // left edge is off the right side - if (ry1 == 0) return false; - sx1 = xs_RoundToInt(CenterX + rx1 * CenterX / ry1); - } - else - { - if (rx2 < -ry2) return false; // wall is off the left side - if (rx1 - rx2 - ry2 + ry1 == 0) return false; // wall does not intersect view volume - sx1 = 0; - } - - if (rx2 <= ry2) - { - if (rx2 < -ry2) return false; // right edge is off the left side - if (ry2 == 0) return false; - sx2 = xs_RoundToInt(CenterX + rx2 * CenterX / ry2); - } - else - { - if (rx1 > ry1) return false; // wall is off the right side - if (ry2 - ry1 - rx2 + rx1 == 0) return false; // wall does not intersect view volume - sx2 = viewwidth; - } - - // Find the first clippost that touches the source post - // (adjacent pixels are touching). - - // Does not cross a pixel. - if (sx2 <= sx1) - return false; - - return !IsSegmentCulled(sx1, sx2); } bool RenderPolyBsp::GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const diff --git a/src/r_poly.h b/src/r_poly.h index a9ae5e33df..f1718f2661 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -30,6 +30,7 @@ #include "r_utility.h" #include "r_main.h" #include "r_poly_triangle.h" +#include "r_poly_intersection.h" // DScreen accelerated sprite to be rendered class PolyScreenSprite @@ -143,6 +144,7 @@ private: double MinFloorHeight = 0.0; TriMatrix worldToClip; + FrustumPlanes frustumPlanes; std::vector SectorSpriteRanges; std::vector SortedSprites; diff --git a/src/r_poly_intersection.cpp b/src/r_poly_intersection.cpp new file mode 100644 index 0000000000..5e7ad374b7 --- /dev/null +++ b/src/r_poly_intersection.cpp @@ -0,0 +1,235 @@ +/* +** Various 3D intersection tests +** Copyright (c) 1997-2015 The UICore Team +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "r_poly_intersection.h" + +IntersectionTest::Result IntersectionTest::plane_aabb(const Vec4f &plane, const AxisAlignedBoundingBox &aabb) +{ + Vec3f center = aabb.center(); + Vec3f extents = aabb.extents(); + float e = extents.x * std::abs(plane.x) + extents.y * std::abs(plane.y) + extents.z * std::abs(plane.z); + float s = center.x * plane.x + center.y * plane.y + center.z * plane.z + plane.w; + if (s - e > 0) + return inside; + else if (s + e < 0) + return outside; + else + return intersecting; +} + +IntersectionTest::Result IntersectionTest::plane_obb(const Vec4f &plane, const OrientedBoundingBox &obb) +{ + Vec3f n(plane); + float d = plane.w; + float e = obb.extents.x * std::abs(Vec3f::dot(obb.axis_x, n)) + obb.extents.y * std::abs(Vec3f::dot(obb.axis_y, n)) + obb.extents.z * std::abs(Vec3f::dot(obb.axis_z, n)); + float s = Vec3f::dot(obb.center, n) + d; + if (s - e > 0) + return inside; + else if (s + e < 0) + return outside; + else + return intersecting; +} + +IntersectionTest::OverlapResult IntersectionTest::sphere(const Vec3f ¢er1, float radius1, const Vec3f ¢er2, float radius2) +{ + Vec3f h = center1 - center2; + float square_distance = Vec3f::dot(h, h); + float radius_sum = radius1 + radius2; + if (square_distance > radius_sum * radius_sum) + return disjoint; + else + return overlap; +} + +IntersectionTest::OverlapResult IntersectionTest::sphere_aabb(const Vec3f ¢er, float radius, const AxisAlignedBoundingBox &aabb) +{ + Vec3f a = aabb.aabb_min - center; + Vec3f b = center - aabb.aabb_max; + a.x = std::max(a.x, 0.0f); + a.y = std::max(a.y, 0.0f); + a.z = std::max(a.z, 0.0f); + b.x = std::max(b.x, 0.0f); + b.y = std::max(b.y, 0.0f); + b.z = std::max(b.z, 0.0f); + Vec3f e = a + b; + float d = Vec3f::dot(e, e); + if (d > radius * radius) + return disjoint; + else + return overlap; +} + +IntersectionTest::OverlapResult IntersectionTest::aabb(const AxisAlignedBoundingBox &a, const AxisAlignedBoundingBox &b) +{ + if (a.aabb_min.x > b.aabb_max.x || b.aabb_min.x > a.aabb_max.x || + a.aabb_min.y > b.aabb_max.y || b.aabb_min.y > a.aabb_max.y || + a.aabb_min.z > b.aabb_max.z || b.aabb_min.z > a.aabb_max.z) + { + return disjoint; + } + else + { + return overlap; + } +} + +IntersectionTest::Result IntersectionTest::frustum_aabb(const FrustumPlanes &frustum, const AxisAlignedBoundingBox &box) +{ + bool is_intersecting = false; + for (int i = 0; i < 6; i++) + { + Result result = plane_aabb(frustum.planes[i], box); + if (result == outside) + return outside; + else if (result == intersecting) + is_intersecting = true; + break; + } + if (is_intersecting) + return intersecting; + else + return inside; +} + +IntersectionTest::Result IntersectionTest::frustum_obb(const FrustumPlanes &frustum, const OrientedBoundingBox &box) +{ + bool is_intersecting = false; + for (int i = 0; i < 6; i++) + { + Result result = plane_obb(frustum.planes[i], box); + if (result == outside) + return outside; + else if (result == intersecting) + is_intersecting = true; + } + if (is_intersecting) + return intersecting; + else + return inside; +} + +IntersectionTest::OverlapResult IntersectionTest::ray_aabb(const Vec3f &ray_start, const Vec3f &ray_end, const AxisAlignedBoundingBox &aabb) +{ + Vec3f c = (ray_start + ray_end) * 0.5f; + Vec3f w = ray_end - c; + Vec3f h = aabb.extents(); + + c -= aabb.center(); + + Vec3f v(std::abs(w.x), std::abs(w.y), std::abs(w.z)); + + if (std::abs(c.x) > v.x + h.x || std::abs(c.y) > v.y + h.y || std::abs(c.z) > v.z + h.z) + return disjoint; + + if (std::abs(c.y * w.z - c.z * w.y) > h.y * v.z + h.z * v.y || + std::abs(c.x * w.z - c.z * w.x) > h.x * v.z + h.z * v.x || + std::abs(c.x * w.y - c.y * w.x) > h.x * v.y + h.y * v.x) + return disjoint; + + return overlap; +} + +///////////////////////////////////////////////////////////////////////////// + +FrustumPlanes::FrustumPlanes() +{ +} + +FrustumPlanes::FrustumPlanes(const Mat4f &world_to_projection) +{ + planes[0] = near_frustum_plane(world_to_projection); + planes[1] = far_frustum_plane(world_to_projection); + planes[2] = left_frustum_plane(world_to_projection); + planes[3] = right_frustum_plane(world_to_projection); + planes[4] = top_frustum_plane(world_to_projection); + planes[5] = bottom_frustum_plane(world_to_projection); +} + +Vec4f FrustumPlanes::left_frustum_plane(const Mat4f &m) +{ + Vec4f plane( + m.matrix[3 + 0 * 4] + m.matrix[0 + 0 * 4], + m.matrix[3 + 1 * 4] + m.matrix[0 + 1 * 4], + m.matrix[3 + 2 * 4] + m.matrix[0 + 2 * 4], + m.matrix[3 + 3 * 4] + m.matrix[0 + 3 * 4]); + plane /= plane.length3(); + return plane; +} + +Vec4f FrustumPlanes::right_frustum_plane(const Mat4f &m) +{ + Vec4f plane( + m.matrix[3 + 0 * 4] - m.matrix[0 + 0 * 4], + m.matrix[3 + 1 * 4] - m.matrix[0 + 1 * 4], + m.matrix[3 + 2 * 4] - m.matrix[0 + 2 * 4], + m.matrix[3 + 3 * 4] - m.matrix[0 + 3 * 4]); + plane /= plane.length3(); + return plane; +} + +Vec4f FrustumPlanes::top_frustum_plane(const Mat4f &m) +{ + Vec4f plane( + m.matrix[3 + 0 * 4] - m.matrix[1 + 0 * 4], + m.matrix[3 + 1 * 4] - m.matrix[1 + 1 * 4], + m.matrix[3 + 2 * 4] - m.matrix[1 + 2 * 4], + m.matrix[3 + 3 * 4] - m.matrix[1 + 3 * 4]); + plane /= plane.length3(); + return plane; +} + +Vec4f FrustumPlanes::bottom_frustum_plane(const Mat4f &m) +{ + Vec4f plane( + m.matrix[3 + 0 * 4] + m.matrix[1 + 0 * 4], + m.matrix[3 + 1 * 4] + m.matrix[1 + 1 * 4], + m.matrix[3 + 2 * 4] + m.matrix[1 + 2 * 4], + m.matrix[3 + 3 * 4] + m.matrix[1 + 3 * 4]); + plane /= plane.length3(); + return plane; +} + +Vec4f FrustumPlanes::near_frustum_plane(const Mat4f &m) +{ + Vec4f plane( + m.matrix[3 + 0 * 4] + m.matrix[2 + 0 * 4], + m.matrix[3 + 1 * 4] + m.matrix[2 + 1 * 4], + m.matrix[3 + 2 * 4] + m.matrix[2 + 2 * 4], + m.matrix[3 + 3 * 4] + m.matrix[2 + 3 * 4]); + plane /= plane.length3(); + return plane; +} + +Vec4f FrustumPlanes::far_frustum_plane(const Mat4f &m) +{ + Vec4f plane( + m.matrix[3 + 0 * 4] - m.matrix[2 + 0 * 4], + m.matrix[3 + 1 * 4] - m.matrix[2 + 1 * 4], + m.matrix[3 + 2 * 4] - m.matrix[2 + 2 * 4], + m.matrix[3 + 3 * 4] - m.matrix[2 + 3 * 4]); + plane /= plane.length3(); + return plane; +} diff --git a/src/r_poly_intersection.h b/src/r_poly_intersection.h new file mode 100644 index 0000000000..16f30201c2 --- /dev/null +++ b/src/r_poly_intersection.h @@ -0,0 +1,167 @@ +/* +** Various 3D intersection tests +** Copyright (c) 1997-2015 The UICore Team +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "r_triangle.h" +#include + +class Vec4f +{ +public: + Vec4f() = default; + Vec4f(const Vec4f &) = default; + Vec4f(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) { } + Vec4f(float v) : x(v), y(v), z(v), w(v) { } + + static float dot(const Vec4f &a, const Vec4f &b) { return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; } + static float dot3(const Vec4f &a, const Vec4f &b) { return a.x * b.x + a.y * b.y + a.z * b.z; } + float length3() const { return std::sqrt(dot3(*this, *this)); } + float magnitude() const { return std::sqrt(dot(*this, *this)); } + + Vec4f &operator+=(const Vec4f &b) { *this = Vec4f(x + b.x, y + b.y, z + b.z, w + b.w); return *this; } + Vec4f &operator-=(const Vec4f &b) { *this = Vec4f(x - b.x, y - b.y, z - b.z, w - b.w); return *this; } + Vec4f &operator*=(const Vec4f &b) { *this = Vec4f(x * b.x, y * b.y, z * b.z, w * b.w); return *this; } + Vec4f &operator/=(const Vec4f &b) { *this = Vec4f(x / b.x, y / b.y, z / b.z, w / b.w); return *this; } + Vec4f &operator+=(float b) { *this = Vec4f(x + b, y + b, z + b, w + b); return *this; } + Vec4f &operator-=(float b) { *this = Vec4f(x - b, y - b, z - b, w - b); return *this; } + Vec4f &operator*=(float b) { *this = Vec4f(x * b, y * b, z * b, w * b); return *this; } + Vec4f &operator/=(float b) { *this = Vec4f(x / b, y / b, z / b, w / b); return *this; } + + float x, y, z, w; +}; + +class Vec3f +{ +public: + Vec3f() = default; + Vec3f(const Vec3f &) = default; + Vec3f(const Vec4f &v) : x(v.x), y(v.y), z(v.z) { } + Vec3f(float x, float y, float z) : x(x), y(y), z(z) { } + Vec3f(float v) : x(v), y(v), z(v) { } + + static float dot(const Vec3f &a, const Vec3f &b) { return a.x * b.x + a.y * b.y + a.z * b.z; } + float length() const { return std::sqrt(dot(*this, *this)); } + + Vec3f &operator+=(const Vec3f &b) { *this = Vec3f(x + b.x, y + b.y, z + b.z); return *this; } + Vec3f &operator-=(const Vec3f &b) { *this = Vec3f(x - b.x, y - b.y, z - b.z); return *this; } + Vec3f &operator*=(const Vec3f &b) { *this = Vec3f(x * b.x, y * b.y, z * b.z); return *this; } + Vec3f &operator/=(const Vec3f &b) { *this = Vec3f(x / b.x, y / b.y, z / b.z); return *this; } + Vec3f &operator+=(float b) { *this = Vec3f(x + b, y + b, z + b); return *this; } + Vec3f &operator-=(float b) { *this = Vec3f(x - b, y - b, z - b); return *this; } + Vec3f &operator*=(float b) { *this = Vec3f(x * b, y * b, z * b); return *this; } + Vec3f &operator/=(float b) { *this = Vec3f(x / b, y / b, z / b); return *this; } + + float x, y, z; +}; + +inline Vec3f operator+(const Vec3f &a, const Vec3f &b) { return Vec3f(a.x + b.x, a.y + b.y, a.z + b.z); } +inline Vec3f operator-(const Vec3f &a, const Vec3f &b) { return Vec3f(a.x - b.x, a.y - b.y, a.z - b.z); } +inline Vec3f operator*(const Vec3f &a, const Vec3f &b) { return Vec3f(a.x * b.x, a.y * b.y, a.z * b.z); } +inline Vec3f operator/(const Vec3f &a, const Vec3f &b) { return Vec3f(a.x / b.x, a.y / b.y, a.z / b.z); } + +inline Vec3f operator+(const Vec3f &a, float b) { return Vec3f(a.x + b, a.y + b, a.z + b); } +inline Vec3f operator-(const Vec3f &a, float b) { return Vec3f(a.x - b, a.y - b, a.z - b); } +inline Vec3f operator*(const Vec3f &a, float b) { return Vec3f(a.x * b, a.y * b, a.z * b); } +inline Vec3f operator/(const Vec3f &a, float b) { return Vec3f(a.x / b, a.y / b, a.z / b); } + +inline Vec3f operator+(float a, const Vec3f &b) { return Vec3f(a + b.x, a + b.y, a + b.z); } +inline Vec3f operator-(float a, const Vec3f &b) { return Vec3f(a - b.x, a - b.y, a - b.z); } +inline Vec3f operator*(float a, const Vec3f &b) { return Vec3f(a * b.x, a * b.y, a * b.z); } +inline Vec3f operator/(float a, const Vec3f &b) { return Vec3f(a / b.x, a / b.y, a / b.z); } + +typedef TriMatrix Mat4f; + +class AxisAlignedBoundingBox +{ +public: + AxisAlignedBoundingBox() : aabb_min(), aabb_max() {} + AxisAlignedBoundingBox(const Vec3f &aabb_min, const Vec3f &aabb_max) : aabb_min(aabb_min), aabb_max(aabb_max) { } + AxisAlignedBoundingBox(const AxisAlignedBoundingBox &aabb, const Vec3f &barycentric_min, const Vec3f &barycentric_max) + : aabb_min(mix(aabb.aabb_min, aabb.aabb_max, barycentric_min)), aabb_max(mix(aabb.aabb_min, aabb.aabb_max, barycentric_max)) { } + + Vec3f center() const { return (aabb_max + aabb_min) * 0.5f; } + Vec3f extents() const { return (aabb_max - aabb_min) * 0.5f; } + + Vec3f aabb_min; + Vec3f aabb_max; + +private: + template + inline A mix(A a, B b, C mix) + { + return a * (C(1) - mix) + b * mix; + } +}; + +class OrientedBoundingBox +{ +public: + Vec3f center; + Vec3f extents; + Vec3f axis_x; + Vec3f axis_y; + Vec3f axis_z; +}; + +class FrustumPlanes +{ +public: + FrustumPlanes(); + explicit FrustumPlanes(const Mat4f &world_to_projection); + + Vec4f planes[6]; + +private: + static Vec4f left_frustum_plane(const Mat4f &matrix); + static Vec4f right_frustum_plane(const Mat4f &matrix); + static Vec4f top_frustum_plane(const Mat4f &matrix); + static Vec4f bottom_frustum_plane(const Mat4f &matrix); + static Vec4f near_frustum_plane(const Mat4f &matrix); + static Vec4f far_frustum_plane(const Mat4f &matrix); +}; + +class IntersectionTest +{ +public: + enum Result + { + outside, + inside, + intersecting, + }; + + enum OverlapResult + { + disjoint, + overlap + }; + + static Result plane_aabb(const Vec4f &plane, const AxisAlignedBoundingBox &aabb); + static Result plane_obb(const Vec4f &plane, const OrientedBoundingBox &obb); + static OverlapResult sphere(const Vec3f ¢er1, float radius1, const Vec3f ¢er2, float radius2); + static OverlapResult sphere_aabb(const Vec3f ¢er, float radius, const AxisAlignedBoundingBox &aabb); + static OverlapResult aabb(const AxisAlignedBoundingBox &a, const AxisAlignedBoundingBox &b); + static Result frustum_aabb(const FrustumPlanes &frustum, const AxisAlignedBoundingBox &box); + static Result frustum_obb(const FrustumPlanes &frustum, const OrientedBoundingBox &box); + static OverlapResult ray_aabb(const Vec3f &ray_start, const Vec3f &ray_end, const AxisAlignedBoundingBox &box); +}; From b16e6725d7067397a0a3a5b4531882d63cdfa3dd Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sat, 12 Nov 2016 18:09:05 +0200 Subject: [PATCH 307/912] Added missing #include to fix build with GCC/Clang --- src/r_poly_intersection.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/r_poly_intersection.h b/src/r_poly_intersection.h index 16f30201c2..48ebef0314 100644 --- a/src/r_poly_intersection.h +++ b/src/r_poly_intersection.h @@ -24,6 +24,7 @@ #include "r_triangle.h" #include +#include class Vec4f { From af7a7ab7d71780fd171a22948dda56bc3caae4bc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 13 Nov 2016 08:57:29 +0100 Subject: [PATCH 308/912] Remove use of the llvm.round intrinsic as it apparently doesn't work in some environments --- .../fixedfunction/drawtrianglecodegen.cpp | 19 +++++++++++++------ .../fixedfunction/drawtrianglecodegen.h | 1 + src/r_compiler/ssa/ssa_float.cpp | 3 +++ src/r_compiler/ssa/ssa_float.h | 2 +- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 4819a28cff..013969bbbd 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -39,18 +39,25 @@ void DrawTriangleCodegen::Generate(TriDrawVariant variant, bool truecolor, SSAVa LoopBlockY(variant, truecolor); } +SSAInt DrawTriangleCodegen::FloatTo28_4(SSAFloat v) +{ + // SSAInt(SSAFloat::round(16.0f * v), false); + SSAInt a = SSAInt(v * 32.0f, false); + return (a + (a.ashr(31) | SSAInt(1))).ashr(1); +} + void DrawTriangleCodegen::Setup(TriDrawVariant variant, bool truecolor) { int pixelsize = truecolor ? 4 : 1; // 28.4 fixed-point coordinates - Y1 = SSAInt(SSAFloat::round(16.0f * v1.y), false); - Y2 = SSAInt(SSAFloat::round(16.0f * v2.y), false); - Y3 = SSAInt(SSAFloat::round(16.0f * v3.y), false); + Y1 = FloatTo28_4(v1.y); + Y2 = FloatTo28_4(v2.y); + Y3 = FloatTo28_4(v3.y); - X1 = SSAInt(SSAFloat::round(16.0f * v1.x), false); - X2 = SSAInt(SSAFloat::round(16.0f * v2.x), false); - X3 = SSAInt(SSAFloat::round(16.0f * v3.x), false); + X1 = FloatTo28_4(v1.x); + X2 = FloatTo28_4(v2.x); + X3 = FloatTo28_4(v3.x); // Deltas DX12 = X1 - X2; diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.h b/src/r_compiler/fixedfunction/drawtrianglecodegen.h index 6f9a6c32ad..eba1caf4f8 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.h +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.h @@ -40,6 +40,7 @@ private: SSATriVertex LoadTriVertex(SSAValue v); void LoadUniforms(SSAValue uniforms); void Setup(TriDrawVariant variant, bool truecolor); + SSAInt FloatTo28_4(SSAFloat v); void LoopBlockY(TriDrawVariant variant, bool truecolor); void LoopBlockX(TriDrawVariant variant, bool truecolor); void LoopFullBlock(TriDrawVariant variant, bool truecolor); diff --git a/src/r_compiler/ssa/ssa_float.cpp b/src/r_compiler/ssa/ssa_float.cpp index 77cef29ffa..a45bee6132 100644 --- a/src/r_compiler/ssa/ssa_float.cpp +++ b/src/r_compiler/ssa/ssa_float.cpp @@ -112,12 +112,15 @@ SSAFloat SSAFloat::fma(SSAFloat a, SSAFloat b, SSAFloat c) return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint())); } +/* This intrinsic isn't always available.. SSAFloat SSAFloat::round(SSAFloat val) { + std::vector params; params.push_back(SSAFloat::llvm_type()); return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::round, params), val.v, SSAScope::hint())); } +*/ SSAFloat SSAFloat::floor(SSAFloat val) { diff --git a/src/r_compiler/ssa/ssa_float.h b/src/r_compiler/ssa/ssa_float.h index 9386db0174..69fb81a759 100644 --- a/src/r_compiler/ssa/ssa_float.h +++ b/src/r_compiler/ssa/ssa_float.h @@ -43,7 +43,7 @@ public: static SSAFloat exp(SSAFloat val); static SSAFloat log(SSAFloat val); static SSAFloat fma(SSAFloat a, SSAFloat b, SSAFloat c); - static SSAFloat round(SSAFloat val); + //static SSAFloat round(SSAFloat val); static SSAFloat floor(SSAFloat val); static SSAFloat MIN(SSAFloat a, SSAFloat b); static SSAFloat MAX(SSAFloat a, SSAFloat b); From 27eb8e36aea92f32a7506ae97b4695820e239e25 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 13 Nov 2016 11:44:07 +0100 Subject: [PATCH 309/912] Fix solid segment culling --- src/r_poly.cpp | 72 +++++++++++++++++++++++--------------------------- src/r_poly.h | 1 + 2 files changed, 34 insertions(+), 39 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index e8a34544b7..e54452e101 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -47,9 +47,9 @@ void RenderPolyBsp::Render() // Setup working buffers PolyVertexBuffer::Clear(); SolidSegments.clear(); - SolidSegments.reserve(MAXWIDTH / 2 + 2); - SolidSegments.push_back({ -0x7fff, 0 }); - SolidSegments.push_back({ viewwidth, 0x7fff }); + SolidSegments.reserve(SolidCullScale + 2); + SolidSegments.push_back({ -0x7fff, -SolidCullScale }); + SolidSegments.push_back({ SolidCullScale , 0x7fff }); SectorSpriteRanges.clear(); SectorSpriteRanges.resize(numsectors); SortedSprites.clear(); @@ -1027,12 +1027,12 @@ bool RenderPolyBsp::CheckBBox(float *bspcoord) { // Start using a quick frustum AABB test: - AxisAlignedBoundingBox aabb(Vec3f(bspcoord[BOXLEFT], bspcoord[BOXBOTTOM], -1000.0f), Vec3f(bspcoord[BOXRIGHT], bspcoord[BOXTOP], 1000.0f)); + AxisAlignedBoundingBox aabb(Vec3f(bspcoord[BOXLEFT], bspcoord[BOXBOTTOM], (float)ViewPos.Z - 1000.0f), Vec3f(bspcoord[BOXRIGHT], bspcoord[BOXTOP], (float)ViewPos.Z + 1000.0f)); auto result = IntersectionTest::frustum_aabb(frustumPlanes, aabb); if (result == IntersectionTest::outside) return false; - // Occlusion test using solid segments (which seems to be quite broken, actually): + // Occlusion test using solid segments: int boxx; int boxy; @@ -1089,51 +1089,45 @@ bool RenderPolyBsp::CheckBBox(float *bspcoord) bool RenderPolyBsp::GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const { + double znear = 5.0; + + // Transform to 2D view space: x1 = x1 - ViewPos.X; y1 = y1 - ViewPos.Y; x2 = x2 - ViewPos.X; y2 = y2 - ViewPos.Y; - - // Sitting on a line? - if (y1 * (x1 - x2) + x1 * (y2 - y1) >= -EQUAL_EPSILON) - return false; - double rx1 = x1 * ViewSin - y1 * ViewCos; double rx2 = x2 * ViewSin - y2 * ViewCos; - double ry1 = x1 * ViewTanCos + y1 * ViewTanSin; - double ry2 = x2 * ViewTanCos + y2 * ViewTanSin; + double ry1 = x1 * ViewCos + y1 * ViewSin; + double ry2 = x2 * ViewCos + y2 * ViewSin; - if (rx1 >= -ry1) + // Cull if line is entirely behind view + if (ry1 < znear && ry2 < znear) return false; + + // Clip line, if needed + double t1 = 0.0f, t2 = 1.0f; + if (ry1 < znear) + t1 = clamp((znear - ry1) / (ry2 - ry1), 0.0, 1.0); + if (ry2 < znear) + t2 = clamp((znear - ry1) / (ry2 - ry1), 0.0, 1.0); + if (t1 != 0.0 || t2 != 1.0) { - if (rx1 > ry1) return false; // left edge is off the right side - if (ry1 == 0) return false; - sx1 = xs_RoundToInt(CenterX + rx1 * CenterX / ry1); - } - else - { - if (rx2 < -ry2) return false; // wall is off the left side - if (rx1 - rx2 - ry2 + ry1 == 0) return false; // wall does not intersect view volume - sx1 = 0; + double nx1 = rx1 * (1.0 - t1) + rx2 * t1; + double ny1 = ry1 * (1.0 - t1) + ry2 * t1; + double nx2 = rx1 * (1.0 - t2) + rx2 * t2; + double ny2 = ry1 * (1.0 - t2) + ry2 * t2; + rx1 = nx1; + rx2 = nx2; + ry1 = ny1; + ry2 = ny2; } - if (rx2 <= ry2) - { - if (rx2 < -ry2) return false; // right edge is off the left side - if (ry2 == 0) return false; - sx2 = xs_RoundToInt(CenterX + rx2 * CenterX / ry2); - } - else - { - if (rx1 > ry1) return false; // wall is off the right side - if (ry2 - ry1 - rx2 + rx1 == 0) return false; // wall does not intersect view volume - sx2 = viewwidth; - } + sx1 = (int)floor(clamp(rx1 / ry1 * (SolidCullScale / 3), (double)-SolidCullScale, (double)SolidCullScale)); + sx2 = (int)floor(clamp(rx2 / ry2 * (SolidCullScale / 3), (double)-SolidCullScale, (double)SolidCullScale)); - // Does not cross a pixel. - if (sx2 <= sx1) - return false; - - return true; + if (sx1 > sx2) + std::swap(sx1, sx2); + return sx1 != sx2; } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/r_poly.h b/src/r_poly.h index f1718f2661..45cbcf8647 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -162,6 +162,7 @@ private: }; std::vector SolidSegments; + const int SolidCullScale = 3000; PolySkyDome skydome; }; From c914a7a7fea6a54c0ab3f5b5231978a512a278b7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 13 Nov 2016 13:24:36 +0100 Subject: [PATCH 310/912] Cull walls --- src/r_poly.cpp | 22 ++++++++++++++++++---- src/r_poly.h | 3 +-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index e54452e101..a34853e2ec 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -46,10 +46,7 @@ void RenderPolyBsp::Render() // Setup working buffers PolyVertexBuffer::Clear(); - SolidSegments.clear(); - SolidSegments.reserve(SolidCullScale + 2); - SolidSegments.push_back({ -0x7fff, -SolidCullScale }); - SolidSegments.push_back({ SolidCullScale , 0x7fff }); + ClearSolidSegments(); SectorSpriteRanges.clear(); SectorSpriteRanges.resize(numsectors); SortedSprites.clear(); @@ -92,6 +89,7 @@ void RenderPolyBsp::Render() } // Render front to back + ClearSolidSegments(); if (r_debug_cull) { for (auto it = PvsSectors.rbegin(); it != PvsSectors.rend(); ++it) @@ -363,6 +361,12 @@ void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector, uint32_t subsect if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) return; + // Cull wall if not visible + int sx1, sx2; + bool hasSegmentRange = GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2); + if (hasSegmentRange && IsSegmentCulled(sx1, sx2)) + return; + double frontceilz1 = frontsector->ceilingplane.ZatPoint(line->v1); double frontfloorz1 = frontsector->floorplane.ZatPoint(line->v1); double frontceilz2 = frontsector->ceilingplane.ZatPoint(line->v2); @@ -384,6 +388,8 @@ void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector, uint32_t subsect wall.UnpeggedCeil = frontceilz1; wall.Texpart = side_t::mid; wall.Render(worldToClip); + if (hasSegmentRange) + MarkSegmentCulled(sx1, sx2); } } else @@ -972,6 +978,14 @@ void RenderPolyBsp::RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bo //R_DrawVisSprite(vis); } +void RenderPolyBsp::ClearSolidSegments() +{ + SolidSegments.clear(); + SolidSegments.reserve(SolidCullScale + 2); + SolidSegments.push_back({ -0x7fff, -SolidCullScale }); + SolidSegments.push_back({ SolidCullScale , 0x7fff }); +} + bool RenderPolyBsp::IsSegmentCulled(int x1, int x2) const { int next = 0; diff --git a/src/r_poly.h b/src/r_poly.h index 45cbcf8647..b2a0244888 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -132,11 +132,10 @@ private: // Checks BSP node/subtree bounding box. // Returns true if some part of the bbox might be visible. bool CheckBBox(float *bspcoord); - bool GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const; - void MarkSegmentCulled(int x1, int x2); bool IsSegmentCulled(int x1, int x2) const; + void ClearSolidSegments(); std::vector PvsSectors; uint32_t NextSubsectorDepth = 0; From 411eb579527f5ebc543ab11f24fea5383218bac5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 13 Nov 2016 15:16:55 +0100 Subject: [PATCH 311/912] Fix rendering of masked walls --- src/r_poly.cpp | 36 +++++++++++++------- src/r_poly.h | 92 +++++++++++++++++++++++++++----------------------- 2 files changed, 73 insertions(+), 55 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index a34853e2ec..94e27cd433 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -50,7 +50,7 @@ void RenderPolyBsp::Render() SectorSpriteRanges.clear(); SectorSpriteRanges.resize(numsectors); SortedSprites.clear(); - SubsectoredSprites.clear(); + TranslucentObjects.clear(); PvsSectors.clear(); ScreenSprites.clear(); PolyStencilBuffer::Instance()->Clear(viewwidth, viewheight, 0); @@ -103,7 +103,7 @@ void RenderPolyBsp::Render() skydome.Render(worldToClip); - RenderSprites(); + RenderTranslucent(); RenderPlayerSprites(); DrawerCommandQueue::WaitForWorkers(); @@ -140,19 +140,24 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) for (int i = 0; i < sprites.Count; i++) { AActor *thing = SortedSprites[sprites.Start + i].Thing; - SubsectoredSprites.push_back({ thing, sub, subsectorDepth }); + TranslucentObjects.push_back({ thing, sub, subsectorDepth }); } + + TranslucentObjects.insert(TranslucentObjects.end(), TempTranslucentWalls.begin(), TempTranslucentWalls.end()); + TempTranslucentWalls.clear(); } -void RenderPolyBsp::RenderSprites() +void RenderPolyBsp::RenderTranslucent() { - for (auto it = SubsectoredSprites.rbegin(); it != SubsectoredSprites.rend(); ++it) + for (auto it = TranslucentObjects.rbegin(); it != TranslucentObjects.rend(); ++it) { - auto &spr = *it; - if ((spr.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) - AddWallSprite(spr.thing, spr.sub, spr.subsectorDepth); + auto &obj = *it; + if (!obj.thing) + obj.wall.Render(worldToClip); + else if ((obj.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) + AddWallSprite(obj.thing, obj.sub, obj.subsectorDepth); else - AddSprite(spr.thing, spr.sub, spr.subsectorDepth); + AddSprite(obj.thing, obj.sub, obj.subsectorDepth); } } @@ -448,7 +453,7 @@ void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector, uint32_t subsect wall.UnpeggedCeil = topceilz1; wall.Texpart = side_t::mid; wall.Masked = true; - wall.Render(worldToClip); + TempTranslucentWalls.push_back({ wall }); } } } @@ -1206,8 +1211,15 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) args.stencilwritevalue = 1; args.SetTexture(tex); - PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); + if (!Masked) + { + PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); + } + else + { + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); + } } FTexture *RenderPolyWall::GetTexture() diff --git a/src/r_poly.h b/src/r_poly.h index b2a0244888..6c50519043 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -50,6 +50,42 @@ public: FDynamicColormap *Colormap = nullptr; }; +class RenderPolyWall +{ +public: + void Render(const TriMatrix &worldToClip); + + void SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2) + { + this->v1 = v1; + this->v2 = v2; + this->ceil1 = ceil1; + this->floor1 = floor1; + this->ceil2 = ceil2; + this->floor2 = floor2; + } + + DVector2 v1; + DVector2 v2; + double ceil1 = 0.0; + double floor1 = 0.0; + double ceil2 = 0.0; + double floor2 = 0.0; + + const seg_t *Line = nullptr; + side_t::ETexpart Texpart = side_t::mid; + double TopZ = 0.0; + double BottomZ = 0.0; + double UnpeggedCeil = 0.0; + FSWColormap *Colormap = nullptr; + bool Masked = false; + uint32_t SubsectorDepth = 0; + +private: + FTexture *GetTexture(); + int GetLightLevel(); +}; + // Used for sorting things by distance to the camera class PolySortedSprite { @@ -61,13 +97,17 @@ public: double DistanceSquared; }; -class PolySubsectoredSprite +class PolyTranslucentObject { public: - PolySubsectoredSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) : thing(thing), sub(sub), subsectorDepth(subsectorDepth) { } - AActor *thing; - subsector_t *sub; - uint32_t subsectorDepth; + PolyTranslucentObject(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) : thing(thing), sub(sub), subsectorDepth(subsectorDepth) { } + PolyTranslucentObject(RenderPolyWall wall) : wall(wall) { } + + AActor *thing = nullptr; + subsector_t *sub = nullptr; + uint32_t subsectorDepth = 0; + + RenderPolyWall wall; }; class SpriteRange @@ -116,7 +156,7 @@ private: void AddLine(seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); TriVertex PlaneVertex(vertex_t *v1, sector_t *sector, double height); - void RenderSprites(); + void RenderTranslucent(); void AddSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth); void AddWallSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth); bool IsThingCulled(AActor *thing); @@ -147,7 +187,9 @@ private: std::vector SectorSpriteRanges; std::vector SortedSprites; - std::vector SubsectoredSprites; + std::vector TranslucentObjects; + + std::vector TempTranslucentWalls; std::vector ScreenSprites; @@ -166,42 +208,6 @@ private: PolySkyDome skydome; }; -class RenderPolyWall -{ -public: - void Render(const TriMatrix &worldToClip); - - void SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2) - { - this->v1 = v1; - this->v2 = v2; - this->ceil1 = ceil1; - this->floor1 = floor1; - this->ceil2 = ceil2; - this->floor2 = floor2; - } - - DVector2 v1; - DVector2 v2; - double ceil1 = 0.0; - double floor1 = 0.0; - double ceil2 = 0.0; - double floor2 = 0.0; - - const seg_t *Line = nullptr; - side_t::ETexpart Texpart = side_t::mid; - double TopZ = 0.0; - double BottomZ = 0.0; - double UnpeggedCeil = 0.0; - FSWColormap *Colormap = nullptr; - bool Masked = false; - uint32_t SubsectorDepth = 0; - -private: - FTexture *GetTexture(); - int GetLightLevel(); -}; - // Texture coordinates for a wall class PolyWallTextureCoords { From f25579849af52cf999b0b825f2027de2759cd49f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 13 Nov 2016 15:43:54 +0100 Subject: [PATCH 312/912] Minor variable optimization --- .../fixedfunction/drawtrianglecodegen.cpp | 29 +++++++------------ 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 013969bbbd..bd6b262680 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -284,8 +284,8 @@ void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) int pixelsize = truecolor ? 4 : 1; stack_iy.store(SSAInt(0)); - stack_buffer.store(dest); - stack_subsectorbuffer.store(subsectorGBuffer); + stack_buffer.store(dest[x * pixelsize]); + stack_subsectorbuffer.store(subsectorGBuffer[x]); SSAForBlock loopy; SSAInt iy = stack_iy.load(); @@ -303,32 +303,26 @@ void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) varyingStep[i] = SSAInt(step * SSAFloat((float)0x100000000LL), true); } - stack_ix.store(x); + stack_ix.store(SSAInt(0)); SSAForBlock loopx; SSAInt ix = stack_ix.load(); SSAInt varying[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) varying[i] = stack_varying[i].load(); - loopx.loop_block(ix < x + q, q); + loopx.loop_block(ix < SSAInt(q), q); { if (variant == TriDrawVariant::DrawSubsector) { SSAIfBlock branch; branch.if_block(subsectorbuffer[ix].load(true) >= subsectorDepth); { - if (truecolor) - ProcessPixel(buffer[ix * 4], subsectorbuffer[ix], varying, variant, truecolor); - else - ProcessPixel(buffer[ix], subsectorbuffer[ix], varying, variant, truecolor); + ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying, variant, truecolor); } branch.end_block(); } else { - if (truecolor) - ProcessPixel(buffer[ix * 4], subsectorbuffer[ix], varying, variant, truecolor); - else - ProcessPixel(buffer[ix], subsectorbuffer[ix], varying, variant, truecolor); + ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying, variant, truecolor); } for (int i = 0; i < TriVertex::NumVarying; i++) @@ -359,8 +353,8 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo stack_CY2.store(C2 + DX23 * y0 - DY23 * x0); stack_CY3.store(C3 + DX31 * y0 - DY31 * x0); stack_iy.store(SSAInt(0)); - stack_buffer.store(dest); - stack_subsectorbuffer.store(subsectorGBuffer); + stack_buffer.store(dest[x * pixelsize]); + stack_subsectorbuffer.store(subsectorGBuffer[x]); SSAForBlock loopy; SSAInt iy = stack_iy.load(); @@ -401,7 +395,7 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo if (variant == TriDrawVariant::DrawSubsector) { - covered = covered && subsectorbuffer[ix + x].load(true) >= subsectorDepth; + covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth; } else { @@ -417,10 +411,7 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo } else { - if (truecolor) - ProcessPixel(buffer[(ix + x) * 4], subsectorbuffer[ix + x], varying, variant, truecolor); - else - ProcessPixel(buffer[ix + x], subsectorbuffer[ix + x], varying, variant, truecolor); + ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying, variant, truecolor); } } branch.end_block(); From acb2e821f01ffb4d5264f3a6e977e8ae8ce0ebe8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 13 Nov 2016 16:36:47 +0100 Subject: [PATCH 313/912] Evict floats from the block code (too bad it didn't make any difference to the speed) --- .../fixedfunction/drawtrianglecodegen.cpp | 32 +++++++++++-------- .../fixedfunction/drawtrianglecodegen.h | 8 ++--- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index bd6b262680..257b550cef 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -228,10 +228,20 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) SSAFloat rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); for (int i = 0; i < TriVertex::NumVarying; i++) { - varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; - varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; - varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); - varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); + SSAFloat varyingTL = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; + SSAFloat varyingTR = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; + SSAFloat varyingBL = (startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL; + SSAFloat varyingBR = (startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR; + + SSAFloat pos = varyingTL; + SSAFloat stepPos = (varyingBL - varyingTL) * (1.0f / q); + SSAFloat startStepX = (varyingTR - varyingTL) * (1.0f / q); + SSAFloat incrStepX = (varyingBR - varyingBL) * (1.0f / q) - startStepX; + + varyingPos[i] = SSAInt(pos * SSAFloat((float)0x01000000), false); + varyingStepPos[i] = SSAInt(stepPos * SSAFloat((float)0x01000000), false); + varyingStartStepX[i] = SSAInt(startStepX * SSAFloat((float)0x01000000), false); + varyingIncrStepX[i] = SSAInt(incrStepX * SSAFloat((float)0x01000000), false); } SSAFloat globVis = SSAFloat(1706.0f); @@ -296,11 +306,8 @@ void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) SSAInt varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { - SSAFloat pos = varyingTL[i] + varyingBL[i] * SSAFloat(iy); - SSAFloat step = (varyingTR[i] + varyingBR[i] * SSAFloat(iy) - pos) * (1.0f / q); - - stack_varying[i].store(SSAInt((pos - SSAFloat::floor(pos)) * SSAFloat((float)0x100000000LL), true)); - varyingStep[i] = SSAInt(step * SSAFloat((float)0x100000000LL), true); + stack_varying[i].store((varyingPos[i] + varyingStepPos[i] * iy) << 8); + varyingStep[i] = (varyingStartStepX[i] + varyingIncrStepX[i] * iy) << 8; } stack_ix.store(SSAInt(0)); @@ -368,11 +375,8 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo SSAInt varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { - SSAFloat pos = varyingTL[i] + varyingBL[i] * SSAFloat(iy); - SSAFloat step = (varyingTR[i] + varyingBR[i] * SSAFloat(iy) - pos) * (1.0f / q); - - stack_varying[i].store(SSAInt((pos - SSAFloat::floor(pos)) * SSAFloat((float)0x100000000LL), true)); - varyingStep[i] = SSAInt(step * SSAFloat((float)0x100000000LL), true); + stack_varying[i].store((varyingPos[i] + varyingStepPos[i] * iy) << 8); + varyingStep[i] = (varyingStartStepX[i] + varyingIncrStepX[i] * iy) << 8; } stack_CX1.store(CY1); diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.h b/src/r_compiler/fixedfunction/drawtrianglecodegen.h index eba1caf4f8..c9fb61e67c 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.h +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.h @@ -116,10 +116,10 @@ private: SSAInt x, y; SSAInt x0, x1, y0, y1; SSAInt diminishedlight; - SSAFloat varyingTL[TriVertex::NumVarying]; - SSAFloat varyingTR[TriVertex::NumVarying]; - SSAFloat varyingBL[TriVertex::NumVarying]; - SSAFloat varyingBR[TriVertex::NumVarying]; + SSAInt varyingPos[TriVertex::NumVarying]; + SSAInt varyingStepPos[TriVertex::NumVarying]; + SSAInt varyingStartStepX[TriVertex::NumVarying]; + SSAInt varyingIncrStepX[TriVertex::NumVarying]; SSAUBytePtr StencilBlock; SSAIntPtr StencilBlockMask; From 1442f7fb7bfe4a5b693b3a7c4c228037dfb9479f Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Sun, 13 Nov 2016 16:53:18 +0100 Subject: [PATCH 314/912] - Fixed Clang on Linux compilation. --- src/r_compiler/llvm_include.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h index fa23f0c0cc..ad58666e3d 100644 --- a/src/r_compiler/llvm_include.h +++ b/src/r_compiler/llvm_include.h @@ -46,7 +46,7 @@ #endif -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__clang__) #define __STDC_LIMIT_MACROS // DataTypes.h:57:3: error: "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h" #define __STDC_CONSTANT_MACROS // DataTypes.h:61:3: error: "Must #define __STDC_CONSTANT_MACROS before " "#including Support/DataTypes.h" #pragma clang diagnostic push @@ -80,7 +80,7 @@ #include #endif -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__clang__) #pragma clang diagnostic pop #endif From c06798bca1a51b1fcc40bba874353022669ad81a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 13 Nov 2016 17:58:03 +0100 Subject: [PATCH 315/912] Adjust texture coordinates --- src/r_compiler/fixedfunction/drawtrianglecodegen.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 257b550cef..77076b0721 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -123,6 +123,12 @@ void DrawTriangleCodegen::Setup(TriDrawVariant variant, bool truecolor) C3 = stack_C3.load(); // Gradients + v1.x = SSAFloat(X1) * 0.0625f; + v2.x = SSAFloat(X2) * 0.0625f; + v3.x = SSAFloat(X3) * 0.0625f; + v1.y = SSAFloat(Y1) * 0.0625f; + v2.y = SSAFloat(Y2) * 0.0625f; + v3.y = SSAFloat(Y3) * 0.0625f; gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); startW = v1.w + gradWX * (SSAFloat(minx) - v1.x) + gradWY * (SSAFloat(miny) - v1.y); @@ -218,8 +224,8 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) SSABool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); // Calculate varying variables for affine block - SSAFloat offx0 = SSAFloat(x - minx) + 0.5f; - SSAFloat offy0 = SSAFloat(y - miny) + 0.5f; + SSAFloat offx0 = SSAFloat(x - minx); + SSAFloat offy0 = SSAFloat(y - miny); SSAFloat offx1 = offx0 + SSAFloat(q); SSAFloat offy1 = offy0 + SSAFloat(q); SSAFloat rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); From 3673338644f9b4cfd372a332869896ac25287fdc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 14 Nov 2016 14:19:48 +0100 Subject: [PATCH 316/912] Split r_poly into multiple files handling each aspect of rendering a scene --- src/CMakeLists.txt | 8 + src/r_main.cpp | 6 +- src/r_poly.cpp | 1482 +---------------------------------- src/r_poly.h | 159 +--- src/r_poly_cull.cpp | 259 ++++++ src/r_poly_cull.h | 61 ++ src/r_poly_particle.cpp | 33 + src/r_poly_particle.h | 29 + src/r_poly_plane.cpp | 196 +++++ src/r_poly_plane.h | 34 + src/r_poly_playersprite.cpp | 299 +++++++ src/r_poly_playersprite.h | 59 ++ src/r_poly_sky.cpp | 193 +++++ src/r_poly_sky.h | 45 ++ src/r_poly_sprite.cpp | 301 +++++++ src/r_poly_sprite.h | 37 + src/r_poly_wall.cpp | 334 ++++++++ src/r_poly_wall.h | 82 ++ src/r_poly_wallsprite.cpp | 35 + src/r_poly_wallsprite.h | 31 + src/r_swrenderer.cpp | 15 +- 21 files changed, 2107 insertions(+), 1591 deletions(-) create mode 100644 src/r_poly_cull.cpp create mode 100644 src/r_poly_cull.h create mode 100644 src/r_poly_particle.cpp create mode 100644 src/r_poly_particle.h create mode 100644 src/r_poly_plane.cpp create mode 100644 src/r_poly_plane.h create mode 100644 src/r_poly_playersprite.cpp create mode 100644 src/r_poly_playersprite.h create mode 100644 src/r_poly_sky.cpp create mode 100644 src/r_poly_sky.h create mode 100644 src/r_poly_sprite.cpp create mode 100644 src/r_poly_sprite.h create mode 100644 src/r_poly_wall.cpp create mode 100644 src/r_poly_wall.h create mode 100644 src/r_poly_wallsprite.cpp create mode 100644 src/r_poly_wallsprite.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index afe89f568c..7933f80a70 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1070,6 +1070,14 @@ set( FASTMATH_PCH_SOURCES r_swrenderer.cpp r_swrenderer2.cpp r_poly.cpp + r_poly_cull.cpp + r_poly_particle.cpp + r_poly_plane.cpp + r_poly_playersprite.cpp + r_poly_wall.cpp + r_poly_wallsprite.cpp + r_poly_sprite.cpp + r_poly_sky.cpp r_poly_triangle.cpp r_poly_intersection.cpp r_3dfloors.cpp diff --git a/src/r_main.cpp b/src/r_main.cpp index 149353a874..1ba89eecd7 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -59,7 +59,6 @@ #include "v_font.h" #include "r_data/colormaps.h" #include "p_maputl.h" -#include "r_swrenderer2.h" #include "r_poly.h" #include "p_setup.h" #include "version.h" @@ -922,8 +921,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) } else { - static RenderPolyBsp bsp; - bsp.Render(); + RenderPolyScene::Instance()->Render(); } R_3D_ResetClip(); // reset clips (floor/ceiling) camera->renderflags = savedflags; @@ -955,7 +953,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) NetUpdate (); MaskedCycles.Clock(); - if (!r_newrenderer || !r_swtruecolor) + if (!r_newrenderer) R_DrawMasked (); MaskedCycles.Unclock(); diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 94e27cd433..42cc84e5fe 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -1,5 +1,5 @@ /* -** Experimental Doom software renderer +** Polygon Doom software renderer ** Copyright (c) 2016 Magnus Norddahl ** ** This software is provided 'as-is', without any express or implied @@ -26,38 +26,27 @@ #include "sbar.h" #include "r_data/r_translate.h" #include "r_poly.h" -#include "r_draw.h" -#include "r_plane.h" // for yslope -#include "r_sky.h" // for skyflatnum -#include "r_things.h" // for pspritexscale - -EXTERN_CVAR(Bool, r_drawplayersprites) -EXTERN_CVAR(Bool, r_deathcamera) -EXTERN_CVAR(Bool, st_scale) CVAR(Bool, r_debug_cull, 0, 0) ///////////////////////////////////////////////////////////////////////////// -void RenderPolyBsp::Render() +void RenderPolyScene::Render() { if (!r_swtruecolor) // Disable pal rendering for now return; // Setup working buffers PolyVertexBuffer::Clear(); - ClearSolidSegments(); SectorSpriteRanges.clear(); SectorSpriteRanges.resize(numsectors); SortedSprites.clear(); TranslucentObjects.clear(); - PvsSectors.clear(); - ScreenSprites.clear(); PolyStencilBuffer::Instance()->Clear(viewwidth, viewheight, 0); PolySubsectorGBuffer::Instance()->Resize(dc_pitch, viewheight); NextSubsectorDepth = 0; - // Perspective correct: + // Setup perspective matrix: float ratio = WidescreenRatio; float fovratio = (WidescreenRatio >= 1.3f) ? 1.333333f : ratio; float fovy = (float)(2 * DAngle::ToDegrees(atan(tan(FieldOfView.Radians() / 2) / fovratio)).Degrees); @@ -67,56 +56,35 @@ void RenderPolyBsp::Render() TriMatrix::rotate((float)(ViewAngle - 90).Radians(), 0.0f, -1.0f, 0.0f) * TriMatrix::swapYZ() * TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); - worldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; + WorldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; - // Y shearing like the Doom renderer: - //worldToClip = TriMatrix::viewToClip() * TriMatrix::worldToView(); - - frustumPlanes = FrustumPlanes(worldToClip); - - // Cull front to back - if (numnodes == 0) - { - PvsSectors.push_back(subsectors); - MaxCeilingHeight = subsectors->sector->ceilingplane.Zat0(); - MinFloorHeight = subsectors->sector->floorplane.Zat0(); - } - else - { - MaxCeilingHeight = 0.0; - MinFloorHeight = 0.0; - RenderNode(nodes + numnodes - 1); // The head node is the last node output. - } - - // Render front to back - ClearSolidSegments(); + Cull.CullScene(WorldToClip); if (r_debug_cull) { - for (auto it = PvsSectors.rbegin(); it != PvsSectors.rend(); ++it) + for (auto it = Cull.PvsSectors.rbegin(); it != Cull.PvsSectors.rend(); ++it) RenderSubsector(*it); } else { - for (auto it = PvsSectors.begin(); it != PvsSectors.end(); ++it) + for (auto it = Cull.PvsSectors.begin(); it != Cull.PvsSectors.end(); ++it) RenderSubsector(*it); } - skydome.Render(worldToClip); + skydome.Render(WorldToClip); RenderTranslucent(); - RenderPlayerSprites(); + PlayerSprites.Render(); DrawerCommandQueue::WaitForWorkers(); - RenderScreenSprites(); // To do: should be called by FSoftwareRenderer::DrawRemainingPlayerSprites instead of here + RenderRemainingPlayerSprites(); // To do: should be called by FSoftwareRenderer::DrawRemainingPlayerSprites instead of here } -void RenderPolyBsp::RenderScreenSprites() +void RenderPolyScene::RenderRemainingPlayerSprites() { - for (auto &sprite : ScreenSprites) - sprite.Render(); + PlayerSprites.RenderRemainingSprites(); } -void RenderPolyBsp::RenderSubsector(subsector_t *sub) +void RenderPolyScene::RenderSubsector(subsector_t *sub) { sector_t *frontsector = sub->sector; frontsector->MoreFlags |= SECF_DRAWN; @@ -125,15 +93,16 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) { - RenderPlane(sub, subsectorDepth, true); - RenderPlane(sub, subsectorDepth, false); + RenderPolyPlane plane; + plane.Render(WorldToClip, sub, subsectorDepth, true, Cull.MaxCeilingHeight); + plane.Render(WorldToClip, sub, subsectorDepth, false, Cull.MinFloorHeight); } for (uint32_t i = 0; i < sub->numlines; i++) { seg_t *line = &sub->firstline[i]; if (line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) - AddLine(line, frontsector, subsectorDepth); + RenderLine(line, frontsector, subsectorDepth); } SpriteRange sprites = GetSpritesForSector(sub->sector); @@ -143,202 +112,11 @@ void RenderPolyBsp::RenderSubsector(subsector_t *sub) TranslucentObjects.push_back({ thing, sub, subsectorDepth }); } - TranslucentObjects.insert(TranslucentObjects.end(), TempTranslucentWalls.begin(), TempTranslucentWalls.end()); - TempTranslucentWalls.clear(); + TranslucentObjects.insert(TranslucentObjects.end(), SubsectorTranslucentWalls.begin(), SubsectorTranslucentWalls.end()); + SubsectorTranslucentWalls.clear(); } -void RenderPolyBsp::RenderTranslucent() -{ - for (auto it = TranslucentObjects.rbegin(); it != TranslucentObjects.rend(); ++it) - { - auto &obj = *it; - if (!obj.thing) - obj.wall.Render(worldToClip); - else if ((obj.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) - AddWallSprite(obj.thing, obj.sub, obj.subsectorDepth); - else - AddSprite(obj.thing, obj.sub, obj.subsectorDepth); - } -} - -void RenderPolyBsp::RenderPlane(subsector_t *sub, uint32_t subsectorDepth, bool ceiling) -{ - sector_t *frontsector = sub->sector; - - FTextureID picnum = frontsector->GetTexture(ceiling ? sector_t::ceiling : sector_t::floor); - FTexture *tex = TexMan(picnum); - if (tex->UseType == FTexture::TEX_Null) - return; - - bool isSky = picnum == skyflatnum; - double skyHeight = ceiling ? MaxCeilingHeight : MinFloorHeight; - - TriUniforms uniforms; - uniforms.objectToClip = worldToClip; - uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); - if (fixedlightlev >= 0) - uniforms.light = (uint32_t)(fixedlightlev / 255.0f * 256.0f); - else if (fixedcolormap) - uniforms.light = 256; - uniforms.flags = 0; - uniforms.subsectorDepth = isSky ? SkySubsectorDepth : subsectorDepth; - - /* - double vis = r_FloorVisibility / (plane.Zat0() - ViewPos.Z); - if (fixedlightlev >= 0) - R_SetDSColorMapLight(sector->ColorMap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap) - R_SetDSColorMapLight(fixedcolormap, 0, 0); - else - R_SetDSColorMapLight(sector->ColorMap, (float)(vis * fabs(CenterY - y)), LIGHT2SHADE(sector->lightlevel)); - */ - - TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); - if (!vertices) - return; - - if (ceiling) - { - for (uint32_t i = 0; i < sub->numlines; i++) - { - seg_t *line = &sub->firstline[i]; - vertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, frontsector, isSky ? skyHeight : frontsector->ceilingplane.ZatPoint(line->v1)); - } - } - else - { - for (uint32_t i = 0; i < sub->numlines; i++) - { - seg_t *line = &sub->firstline[i]; - vertices[i] = PlaneVertex(line->v1, frontsector, isSky ? skyHeight : frontsector->floorplane.ZatPoint(line->v1)); - } - } - - PolyDrawArgs args; - args.uniforms = uniforms; - args.vinput = vertices; - args.vcount = sub->numlines; - args.mode = TriangleDrawMode::Fan; - args.ccw = true; - args.clipleft = 0; - args.cliptop = 0; - args.clipright = viewwidth; - args.clipbottom = viewheight; - args.stenciltestvalue = 0; - args.stencilwritevalue = 1; - - if (!isSky) - { - args.SetTexture(tex); - PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); - } - else - { - args.stencilwritevalue = 255; - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); - - for (uint32_t i = 0; i < sub->numlines; i++) - { - TriVertex *wallvert = PolyVertexBuffer::GetVertices(4); - if (!wallvert) - return; - - seg_t *line = &sub->firstline[i]; - - bool closedSky = false; - if (line->backsector) - { - sector_t *backsector = (line->backsector != line->frontsector) ? line->backsector : line->frontsector; - - double frontceilz1 = frontsector->ceilingplane.ZatPoint(line->v1); - double frontfloorz1 = frontsector->floorplane.ZatPoint(line->v1); - double frontceilz2 = frontsector->ceilingplane.ZatPoint(line->v2); - double frontfloorz2 = frontsector->floorplane.ZatPoint(line->v2); - - double backceilz1 = backsector->ceilingplane.ZatPoint(line->v1); - double backfloorz1 = backsector->floorplane.ZatPoint(line->v1); - double backceilz2 = backsector->ceilingplane.ZatPoint(line->v2); - double backfloorz2 = backsector->floorplane.ZatPoint(line->v2); - - double topceilz1 = frontceilz1; - double topceilz2 = frontceilz2; - double topfloorz1 = MIN(backceilz1, frontceilz1); - double topfloorz2 = MIN(backceilz2, frontceilz2); - double bottomceilz1 = MAX(frontfloorz1, backfloorz1); - double bottomceilz2 = MAX(frontfloorz2, backfloorz2); - double bottomfloorz1 = frontfloorz1; - double bottomfloorz2 = frontfloorz2; - double middleceilz1 = topfloorz1; - double middleceilz2 = topfloorz2; - double middlefloorz1 = MIN(bottomceilz1, middleceilz1); - double middlefloorz2 = MIN(bottomceilz2, middleceilz2); - - bool bothSkyCeiling = frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; - bool bothSkyFloor = frontsector->GetTexture(sector_t::floor) == skyflatnum && backsector->GetTexture(sector_t::floor) == skyflatnum; - - bool closedSector = backceilz1 == backfloorz1 && backceilz2 == backfloorz2; - closedSky = (ceiling && bothSkyCeiling && closedSector) || (!ceiling && bothSkyFloor && closedSector); - if (!closedSky) - { - bool topwall = (topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && line->sidedef && !bothSkyCeiling; - bool bottomwall = (bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef && !bothSkyFloor; - if ((ceiling && !topwall) || (!ceiling && !bottomwall)) - continue; - } - } - - if (ceiling) - { - wallvert[0] = PlaneVertex(line->v1, frontsector, skyHeight); - wallvert[1] = PlaneVertex(line->v2, frontsector, skyHeight); - if (!closedSky) - { - wallvert[2] = PlaneVertex(line->v2, frontsector, frontsector->ceilingplane.ZatPoint(line->v2)); - wallvert[3] = PlaneVertex(line->v1, frontsector, frontsector->ceilingplane.ZatPoint(line->v1)); - } - else - { - wallvert[2] = PlaneVertex(line->v2, frontsector, frontsector->floorplane.ZatPoint(line->v2)); - wallvert[3] = PlaneVertex(line->v1, frontsector, frontsector->floorplane.ZatPoint(line->v1)); - } - } - else - { - if (!closedSky) - { - wallvert[0] = PlaneVertex(line->v1, frontsector, frontsector->floorplane.ZatPoint(line->v1)); - wallvert[1] = PlaneVertex(line->v2, frontsector, frontsector->floorplane.ZatPoint(line->v2)); - } - else - { - wallvert[0] = PlaneVertex(line->v1, frontsector, frontsector->ceilingplane.ZatPoint(line->v1)); - wallvert[1] = PlaneVertex(line->v2, frontsector, frontsector->ceilingplane.ZatPoint(line->v2)); - } - wallvert[2] = PlaneVertex(line->v2, frontsector, skyHeight); - wallvert[3] = PlaneVertex(line->v1, frontsector, skyHeight); - } - - args.vinput = wallvert; - args.vcount = 4; - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); - } - } -} - -TriVertex RenderPolyBsp::PlaneVertex(vertex_t *v1, sector_t *sector, double height) -{ - TriVertex v; - v.x = (float)v1->fPos().X; - v.y = (float)v1->fPos().Y; - v.z = (float)height; - v.w = 1.0f; - v.varying[0] = v.x / 64.0f; - v.varying[1] = 1.0f - v.y / 64.0f; - return v; -} - -SpriteRange RenderPolyBsp::GetSpritesForSector(sector_t *sector) +SpriteRange RenderPolyScene::GetSpritesForSector(sector_t *sector) { if (SectorSpriteRanges.size() < sector->sectornum || sector->sectornum < 0) return SpriteRange(); @@ -358,7 +136,7 @@ SpriteRange RenderPolyBsp::GetSpritesForSector(sector_t *sector) return range; } -void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector, uint32_t subsectorDepth) +void RenderPolyScene::RenderLine(seg_t *line, sector_t *frontsector, uint32_t subsectorDepth) { // Reject lines not facing viewer DVector2 pt1 = line->v1->fPos() - ViewPos; @@ -368,1063 +146,44 @@ void RenderPolyBsp::AddLine(seg_t *line, sector_t *frontsector, uint32_t subsect // Cull wall if not visible int sx1, sx2; - bool hasSegmentRange = GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2); - if (hasSegmentRange && IsSegmentCulled(sx1, sx2)) + bool hasSegmentRange = Cull.GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2); + if (hasSegmentRange && Cull.IsSegmentCulled(sx1, sx2)) return; - double frontceilz1 = frontsector->ceilingplane.ZatPoint(line->v1); - double frontfloorz1 = frontsector->floorplane.ZatPoint(line->v1); - double frontceilz2 = frontsector->ceilingplane.ZatPoint(line->v2); - double frontfloorz2 = frontsector->floorplane.ZatPoint(line->v2); - - RenderPolyWall wall; - wall.Line = line; - wall.Colormap = frontsector->ColorMap; - wall.Masked = false; - wall.SubsectorDepth = subsectorDepth; - - if (line->backsector == nullptr) + // Render wall, and update culling info if its an occlusion blocker + if (RenderPolyWall::RenderLine(WorldToClip, line, frontsector, subsectorDepth, SubsectorTranslucentWalls)) { - if (line->sidedef) - { - wall.SetCoords(line->v1->fPos(), line->v2->fPos(), frontceilz1, frontfloorz1, frontceilz2, frontfloorz2); - wall.TopZ = frontceilz1; - wall.BottomZ = frontfloorz1; - wall.UnpeggedCeil = frontceilz1; - wall.Texpart = side_t::mid; - wall.Render(worldToClip); - if (hasSegmentRange) - MarkSegmentCulled(sx1, sx2); - } - } - else - { - sector_t *backsector = (line->backsector != line->frontsector) ? line->backsector : line->frontsector; - - double backceilz1 = backsector->ceilingplane.ZatPoint(line->v1); - double backfloorz1 = backsector->floorplane.ZatPoint(line->v1); - double backceilz2 = backsector->ceilingplane.ZatPoint(line->v2); - double backfloorz2 = backsector->floorplane.ZatPoint(line->v2); - - double topceilz1 = frontceilz1; - double topceilz2 = frontceilz2; - double topfloorz1 = MIN(backceilz1, frontceilz1); - double topfloorz2 = MIN(backceilz2, frontceilz2); - double bottomceilz1 = MAX(frontfloorz1, backfloorz1); - double bottomceilz2 = MAX(frontfloorz2, backfloorz2); - double bottomfloorz1 = frontfloorz1; - double bottomfloorz2 = frontfloorz2; - double middleceilz1 = topfloorz1; - double middleceilz2 = topfloorz2; - double middlefloorz1 = MIN(bottomceilz1, middleceilz1); - double middlefloorz2 = MIN(bottomceilz2, middleceilz2); - - bool bothSkyCeiling = frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; - bool bothSkyFloor = frontsector->GetTexture(sector_t::floor) == skyflatnum && backsector->GetTexture(sector_t::floor) == skyflatnum; - - if ((topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && line->sidedef && !bothSkyCeiling) - { - wall.SetCoords(line->v1->fPos(), line->v2->fPos(), topceilz1, topfloorz1, topceilz2, topfloorz2); - wall.TopZ = topceilz1; - wall.BottomZ = topfloorz1; - wall.UnpeggedCeil = topceilz1; - wall.Texpart = side_t::top; - wall.Render(worldToClip); - } - - if ((bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef && !bothSkyFloor) - { - wall.SetCoords(line->v1->fPos(), line->v2->fPos(), bottomceilz1, bottomfloorz2, bottomceilz2, bottomfloorz2); - wall.TopZ = bottomceilz1; - wall.BottomZ = bottomfloorz2; - wall.UnpeggedCeil = topceilz1; - wall.Texpart = side_t::bottom; - wall.Render(worldToClip); - } - - if (line->sidedef) - { - FTexture *midtex = TexMan(line->sidedef->GetTexture(side_t::mid), true); - if (midtex && midtex->UseType != FTexture::TEX_Null) - { - wall.SetCoords(line->v1->fPos(), line->v2->fPos(), middleceilz1, middlefloorz1, middleceilz2, middlefloorz2); - wall.TopZ = middleceilz1; - wall.BottomZ = middlefloorz1; - wall.UnpeggedCeil = topceilz1; - wall.Texpart = side_t::mid; - wall.Masked = true; - TempTranslucentWalls.push_back({ wall }); - } - } + if (hasSegmentRange) + Cull.MarkSegmentCulled(sx1, sx2); } } -bool RenderPolyBsp::IsThingCulled(AActor *thing) +void RenderPolyScene::RenderTranslucent() { - FIntCVar *cvar = thing->GetClass()->distancecheck; - if (cvar != nullptr && *cvar >= 0) + for (auto it = TranslucentObjects.rbegin(); it != TranslucentObjects.rend(); ++it) { - double dist = (thing->Pos() - ViewPos).LengthSquared(); - double check = (double)**cvar; - if (dist >= check * check) - return true; - } - - // Don't waste time projecting sprites that are definitely not visible. - if (thing == nullptr || - (thing->renderflags & RF_INVISIBLE) || - !thing->RenderStyle.IsVisible(thing->Alpha) || - !thing->IsVisibleToPlayer()) - { - return true; - } - - return false; -} - -void RenderPolyBsp::AddSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) -{ - if (IsThingCulled(thing)) - return; - - DVector3 pos = thing->InterpolatedPosition(r_TicFracF); - pos.Z += thing->GetBobOffset(r_TicFracF); - - bool flipTextureX = false; - FTexture *tex = GetSpriteTexture(thing, flipTextureX); - if (tex == nullptr) - return; - DVector2 spriteScale = thing->Scale; - double thingxscalemul = spriteScale.X / tex->Scale.X; - double thingyscalemul = spriteScale.Y / tex->Scale.Y; - - if (flipTextureX) - pos.X -= (tex->GetWidth() - tex->LeftOffset) * thingxscalemul; - else - pos.X -= tex->LeftOffset * thingxscalemul; - - //pos.Z -= tex->TopOffset * thingyscalemul; - pos.Z -= (tex->GetHeight() - tex->TopOffset) * thingyscalemul + thing->Floorclip; - - double spriteHalfWidth = thingxscalemul * tex->GetWidth() * 0.5; - double spriteHeight = thingyscalemul * tex->GetHeight(); - - pos.X += spriteHalfWidth; - - DVector2 points[2] = - { - { pos.X - ViewSin * spriteHalfWidth, pos.Y + ViewCos * spriteHalfWidth }, - { pos.X + ViewSin * spriteHalfWidth, pos.Y - ViewCos * spriteHalfWidth } - }; - - // Is this sprite inside? (To do: clip the points) - for (int i = 0; i < 2; i++) - { - for (uint32_t i = 0; i < sub->numlines; i++) + auto &obj = *it; + if (!obj.thing) { - seg_t *line = &sub->firstline[i]; - double nx = line->v1->fY() - line->v2->fY(); - double ny = line->v2->fX() - line->v1->fX(); - double d = -(line->v1->fX() * nx + line->v1->fY() * ny); - if (pos.X * nx + pos.Y * ny + d > 0.0) - return; + obj.wall.Render(WorldToClip); } - } - - //double depth = 1.0; - //visstyle_t visstyle = GetSpriteVisStyle(thing, depth); - // Rumor has it that AlterWeaponSprite needs to be called with visstyle passed in somewhere around here.. - //R_SetColorMapLight(visstyle.BaseColormap, 0, visstyle.ColormapNum << FRACBITS); - - TriVertex *vertices = PolyVertexBuffer::GetVertices(4); - if (!vertices) - return; - - bool foggy = false; - int actualextralight = foggy ? 0 : extralight << 4; - - std::pair offsets[4] = - { - { 0.0f, 1.0f }, - { 1.0f, 1.0f }, - { 1.0f, 0.0f }, - { 0.0f, 0.0f }, - }; - - for (int i = 0; i < 4; i++) - { - auto &p = (i == 0 || i == 3) ? points[0] : points[1]; - - vertices[i].x = (float)p.X; - vertices[i].y = (float)p.Y; - vertices[i].z = (float)(pos.Z + spriteHeight * offsets[i].second); - vertices[i].w = 1.0f; - vertices[i].varying[0] = (float)(offsets[i].first * tex->Scale.X); - vertices[i].varying[1] = (float)((1.0f - offsets[i].second) * tex->Scale.Y); - if (flipTextureX) - vertices[i].varying[0] = 1.0f - vertices[i].varying[0]; - } - - TriUniforms uniforms; - uniforms.objectToClip = worldToClip; - uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); - uniforms.flags = 0; - uniforms.subsectorDepth = subsectorDepth; - - PolyDrawArgs args; - args.uniforms = uniforms; - args.vinput = vertices; - args.vcount = 4; - args.mode = TriangleDrawMode::Fan; - args.ccw = true; - args.clipleft = 0; - args.cliptop = 0; - args.clipright = viewwidth; - args.clipbottom = viewheight; - args.stenciltestvalue = 0; - args.stencilwritevalue = 1; - args.SetTexture(tex); - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); -} - -void RenderPolyBsp::AddWallSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) -{ - if (IsThingCulled(thing)) - return; -} - -visstyle_t RenderPolyBsp::GetSpriteVisStyle(AActor *thing, double z) -{ - visstyle_t visstyle; - - bool foggy = false; - int actualextralight = foggy ? 0 : extralight << 4; - int spriteshade = LIGHT2SHADE(thing->Sector->lightlevel + actualextralight); - - visstyle.RenderStyle = thing->RenderStyle; - visstyle.Alpha = float(thing->Alpha); - visstyle.ColormapNum = 0; - - // The software renderer cannot invert the source without inverting the overlay - // too. That means if the source is inverted, we need to do the reverse of what - // the invert overlay flag says to do. - bool invertcolormap = (visstyle.RenderStyle.Flags & STYLEF_InvertOverlay) != 0; - - if (visstyle.RenderStyle.Flags & STYLEF_InvertSource) - { - invertcolormap = !invertcolormap; - } - - FDynamicColormap *mybasecolormap = thing->Sector->ColorMap; - - // Sprites that are added to the scene must fade to black. - if (visstyle.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); - } - - if (visstyle.RenderStyle.Flags & STYLEF_FadeToBlack) - { - if (invertcolormap) - { // Fade to white - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); - invertcolormap = false; - } - else - { // Fade to black - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); - } - } - - // get light level - if (fixedcolormap != nullptr) - { // fixed map - visstyle.BaseColormap = fixedcolormap; - visstyle.ColormapNum = 0; - } - else - { - if (invertcolormap) + else if ((obj.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); - } - if (fixedlightlev >= 0) - { - visstyle.BaseColormap = mybasecolormap; - visstyle.ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (!foggy && ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) - { // full bright - visstyle.BaseColormap = mybasecolormap; - visstyle.ColormapNum = 0; - } - else - { // diminished light - double minz = double((2048 * 4) / double(1 << 20)); - visstyle.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(z, minz), spriteshade); - visstyle.BaseColormap = mybasecolormap; - } - } - - return visstyle; -} - -FTexture *RenderPolyBsp::GetSpriteTexture(AActor *thing, /*out*/ bool &flipX) -{ - flipX = false; - if (thing->picnum.isValid()) - { - FTexture *tex = TexMan(thing->picnum); - if (tex->UseType == FTexture::TEX_Null) - { - return nullptr; - } - - if (tex->Rotations != 0xFFFF) - { - // choose a different rotation based on player view - spriteframe_t *sprframe = &SpriteFrames[tex->Rotations]; - DVector3 pos = thing->InterpolatedPosition(r_TicFracF); - pos.Z += thing->GetBobOffset(r_TicFracF); - DAngle ang = (pos - ViewPos).Angle(); - angle_t rot; - if (sprframe->Texture[0] == sprframe->Texture[1]) - { - rot = (ang - thing->Angles.Yaw + 45.0 / 2 * 9).BAMs() >> 28; - } - else - { - rot = (ang - thing->Angles.Yaw + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; - } - flipX = (sprframe->Flip & (1 << rot)) != 0; - tex = TexMan[sprframe->Texture[rot]]; // Do not animate the rotation - } - return tex; - } - else - { - // decide which texture to use for the sprite - int spritenum = thing->sprite; - if (spritenum >= (signed)sprites.Size() || spritenum < 0) - return nullptr; - - spritedef_t *sprdef = &sprites[spritenum]; - if (thing->frame >= sprdef->numframes) - { - // If there are no frames at all for this sprite, don't draw it. - return nullptr; + RenderPolyWallSprite wallspr; + wallspr.Render(obj.thing, obj.sub, obj.subsectorDepth); } else { - //picnum = SpriteFrames[sprdef->spriteframes + thing->frame].Texture[0]; - // choose a different rotation based on player view - spriteframe_t *sprframe = &SpriteFrames[sprdef->spriteframes + thing->frame]; - DVector3 pos = thing->InterpolatedPosition(r_TicFracF); - pos.Z += thing->GetBobOffset(r_TicFracF); - DAngle ang = (pos - ViewPos).Angle(); - angle_t rot; - if (sprframe->Texture[0] == sprframe->Texture[1]) - { - rot = (ang - thing->Angles.Yaw + 45.0 / 2 * 9).BAMs() >> 28; - } - else - { - rot = (ang - thing->Angles.Yaw + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; - } - flipX = (sprframe->Flip & (1 << rot)) != 0; - return TexMan[sprframe->Texture[rot]]; // Do not animate the rotation + RenderPolySprite spr; + spr.Render(WorldToClip, obj.thing, obj.sub, obj.subsectorDepth); } } } -void RenderPolyBsp::RenderNode(void *node) +RenderPolyScene *RenderPolyScene::Instance() { - while (!((size_t)node & 1)) // Keep going until found a subsector - { - node_t *bsp = (node_t *)node; - - // Decide which side the view point is on. - int side = PointOnSide(ViewPos, bsp); - - // Recursively divide front space (toward the viewer). - RenderNode(bsp->children[side]); - - // Possibly divide back space (away from the viewer). - side ^= 1; - if (!CheckBBox(bsp->bbox[side])) - return; - - node = bsp->children[side]; - } - - // Mark that we need to render this - subsector_t *sub = (subsector_t *)((BYTE *)node - 1); - MaxCeilingHeight = MAX(MaxCeilingHeight, sub->sector->ceilingplane.Zat0()); - MinFloorHeight = MIN(MinFloorHeight, sub->sector->floorplane.Zat0()); - PvsSectors.push_back(sub); - - // Update culling info for further bsp clipping - for (uint32_t i = 0; i < sub->numlines; i++) - { - seg_t *line = &sub->firstline[i]; - if ((line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) && line->backsector == nullptr) - { - int sx1, sx2; - if (GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2)) - { - MarkSegmentCulled(sx1, sx2); - } - } - } -} - -void RenderPolyBsp::RenderPlayerSprites() -{ - if (!r_drawplayersprites || - !camera || - !camera->player || - (players[consoleplayer].cheats & CF_CHASECAM) || - (r_deathcamera && camera->health <= 0)) - return; - - float bobx, boby; - P_BobWeapon(camera->player, &bobx, &boby, r_TicFracF); - - // Interpolate the main weapon layer once so as to be able to add it to other layers. - double wx, wy; - DPSprite *weapon = camera->player->FindPSprite(PSP_WEAPON); - if (weapon) - { - if (weapon->firstTic) - { - wx = weapon->x; - wy = weapon->y; - } - else - { - wx = weapon->oldx + (weapon->x - weapon->oldx) * r_TicFracF; - wy = weapon->oldy + (weapon->y - weapon->oldy) * r_TicFracF; - } - } - else - { - wx = 0; - wy = 0; - } - - for (DPSprite *sprite = camera->player->psprites; sprite != nullptr; sprite = sprite->GetNext()) - { - // [RH] Don't draw the targeter's crosshair if the player already has a crosshair set. - // It's possible this psprite's caller is now null but the layer itself hasn't been destroyed - // because it didn't tick yet (if we typed 'take all' while in the console for example). - // In this case let's simply not draw it to avoid crashing. - if ((sprite->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && sprite->GetCaller() != nullptr) - { - RenderPlayerSprite(sprite, camera, bobx, boby, wx, wy, r_TicFracF); - } - } -} - -void RenderPolyBsp::RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac) -{ - // decide which patch to use - if ((unsigned)sprite->GetSprite() >= (unsigned)sprites.Size()) - { - DPrintf(DMSG_ERROR, "RenderPlayerSprite: invalid sprite number %i\n", sprite->GetSprite()); - return; - } - - spritedef_t *def = &sprites[sprite->GetSprite()]; - if (sprite->GetFrame() >= def->numframes) - { - DPrintf(DMSG_ERROR, "RenderPlayerSprite: invalid sprite frame %i : %i\n", sprite->GetSprite(), sprite->GetFrame()); - return; - } - - spriteframe_t *frame = &SpriteFrames[def->spriteframes + sprite->GetFrame()]; - FTextureID picnum = frame->Texture[0]; - bool flip = (frame->Flip & 1) != 0; - - FTexture *tex = TexMan(picnum); - if (tex->UseType == FTexture::TEX_Null) - return; - - // Can't interpolate the first tic. - if (sprite->firstTic) - { - sprite->firstTic = false; - sprite->oldx = sprite->x; - sprite->oldy = sprite->y; - } - - double sx = sprite->oldx + (sprite->x - sprite->oldx) * ticfrac; - double sy = sprite->oldy + (sprite->y - sprite->oldy) * ticfrac; - - if (sprite->Flags & PSPF_ADDBOB) - { - sx += bobx; - sy += boby; - } - - if (sprite->Flags & PSPF_ADDWEAPON && sprite->GetID() != PSP_WEAPON) - { - sx += wx; - sy += wy; - } - - // calculate edges of the shape - double tx = sx - BaseXCenter; - - tx -= tex->GetScaledLeftOffset(); - int x1 = xs_RoundToInt(CenterX + tx * pspritexscale); - - // off the right side - if (x1 > viewwidth) - return; - - tx += tex->GetScaledWidth(); - int x2 = xs_RoundToInt(CenterX + tx * pspritexscale); - - // off the left side - if (x2 <= 0) - return; - - double texturemid = (BaseYCenter - sy) * tex->Scale.Y + tex->TopOffset; - - // Adjust PSprite for fullscreen views - if (camera->player && (RenderTarget != screen || viewheight == RenderTarget->GetHeight() || (RenderTarget->GetWidth() > (BaseXCenter * 2) && !st_scale))) - { - AWeapon *weapon = dyn_cast(sprite->GetCaller()); - if (weapon != nullptr && weapon->YAdjust != 0) - { - if (RenderTarget != screen || viewheight == RenderTarget->GetHeight()) - { - texturemid -= weapon->YAdjust; - } - else - { - texturemid -= StatusBar->GetDisplacement() * weapon->YAdjust; - } - } - } - - // Move the weapon down for 1280x1024. - if (sprite->GetID() < PSP_TARGETCENTER) - { - texturemid -= AspectPspriteOffset(WidescreenRatio); - } - - int clipped_x1 = MAX(x1, 0); - int clipped_x2 = MIN(x2, viewwidth); - double xscale = pspritexscale / tex->Scale.X; - double yscale = pspriteyscale / tex->Scale.Y; - uint32_t translation = 0; // [RH] Use default colors - - double xiscale, startfrac; - if (flip) - { - xiscale = -pspritexiscale * tex->Scale.X; - startfrac = 1; - } - else - { - xiscale = pspritexiscale * tex->Scale.X; - startfrac = 0; - } - - if (clipped_x1 > x1) - startfrac += xiscale * (clipped_x1 - x1); - - bool noaccel = false; - - FDynamicColormap *basecolormap = viewsector->ColorMap; - FDynamicColormap *colormap_to_use = basecolormap; - - visstyle_t visstyle; - visstyle.ColormapNum = 0; - visstyle.BaseColormap = basecolormap; - visstyle.Alpha = 0; - visstyle.RenderStyle = STYLE_Normal; - - bool foggy = false; - int actualextralight = foggy ? 0 : extralight << 4; - int spriteshade = LIGHT2SHADE(owner->Sector->lightlevel + actualextralight); - double minz = double((2048 * 4) / double(1 << 20)); - visstyle.ColormapNum = GETPALOOKUP(r_SpriteVisibility / minz, spriteshade); - - if (sprite->GetID() < PSP_TARGETCENTER) - { - // Lots of complicated style and noaccel stuff - } - - // Check for hardware-assisted 2D. If it's available, and this sprite is not - // fuzzy, don't draw it until after the switch to 2D mode. - if (!noaccel && RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) - { - FRenderStyle style = visstyle.RenderStyle; - style.CheckFuzz(); - if (style.BlendOp != STYLEOP_Fuzz) - { - PolyScreenSprite screenSprite; - screenSprite.Pic = tex; - screenSprite.X1 = viewwindowx + x1; - screenSprite.Y1 = viewwindowy + viewheight / 2 - texturemid * yscale - 0.5; - screenSprite.Width = tex->GetWidth() * xscale; - screenSprite.Height = tex->GetHeight() * yscale; - screenSprite.Translation = TranslationToTable(translation); - screenSprite.Flip = xiscale < 0; - screenSprite.visstyle = visstyle; - screenSprite.Colormap = colormap_to_use; - ScreenSprites.push_back(screenSprite); - return; - } - } - - //R_DrawVisSprite(vis); -} - -void RenderPolyBsp::ClearSolidSegments() -{ - SolidSegments.clear(); - SolidSegments.reserve(SolidCullScale + 2); - SolidSegments.push_back({ -0x7fff, -SolidCullScale }); - SolidSegments.push_back({ SolidCullScale , 0x7fff }); -} - -bool RenderPolyBsp::IsSegmentCulled(int x1, int x2) const -{ - int next = 0; - while (SolidSegments[next].X2 <= x2) - next++; - return (x1 >= SolidSegments[next].X1 && x2 <= SolidSegments[next].X2); -} - -void RenderPolyBsp::MarkSegmentCulled(int x1, int x2) -{ - if (x1 >= x2) - return; - - int cur = 1; - while (true) - { - if (SolidSegments[cur].X1 <= x1 && SolidSegments[cur].X2 >= x2) // Already fully marked - { - break; - } - else if (cur + 1 != SolidSegments.size() && SolidSegments[cur].X2 >= x1 && SolidSegments[cur].X1 <= x2) // Merge segments - { - // Find last segment - int merge = cur; - while (merge + 2 != SolidSegments.size() && SolidSegments[merge + 1].X1 <= x2) - merge++; - - // Apply new merged range - SolidSegments[cur].X1 = MIN(SolidSegments[cur].X1, x1); - SolidSegments[cur].X2 = MAX(SolidSegments[merge].X2, x2); - - // Remove additional segments we merged with - if (merge > cur) - SolidSegments.erase(SolidSegments.begin() + (cur + 1), SolidSegments.begin() + (merge + 1)); - - break; - } - else if (SolidSegments[cur].X1 > x1) // Insert new segment - { - SolidSegments.insert(SolidSegments.begin() + cur, { x1, x2 }); - break; - } - cur++; - } -} - -int RenderPolyBsp::PointOnSide(const DVector2 &pos, const node_t *node) -{ - return DMulScale32(FLOAT2FIXED(pos.Y) - node->y, node->dx, node->x - FLOAT2FIXED(pos.X), node->dy) > 0; -} - -bool RenderPolyBsp::CheckBBox(float *bspcoord) -{ - // Start using a quick frustum AABB test: - - AxisAlignedBoundingBox aabb(Vec3f(bspcoord[BOXLEFT], bspcoord[BOXBOTTOM], (float)ViewPos.Z - 1000.0f), Vec3f(bspcoord[BOXRIGHT], bspcoord[BOXTOP], (float)ViewPos.Z + 1000.0f)); - auto result = IntersectionTest::frustum_aabb(frustumPlanes, aabb); - if (result == IntersectionTest::outside) - return false; - - // Occlusion test using solid segments: - - int boxx; - int boxy; - int boxpos; - - double x1, y1, x2, y2; - - // Find the corners of the box - // that define the edges from current viewpoint. - if (ViewPos.X <= bspcoord[BOXLEFT]) - boxx = 0; - else if (ViewPos.X < bspcoord[BOXRIGHT]) - boxx = 1; - else - boxx = 2; - - if (ViewPos.Y >= bspcoord[BOXTOP]) - boxy = 0; - else if (ViewPos.Y > bspcoord[BOXBOTTOM]) - boxy = 1; - else - boxy = 2; - - boxpos = (boxy << 2) + boxx; - if (boxpos == 5) - return true; - - static const int checkcoord[12][4] = - { - { 3,0,2,1 }, - { 3,0,2,0 }, - { 3,1,2,0 }, - { 0 }, - { 2,0,2,1 }, - { 0,0,0,0 }, - { 3,1,3,0 }, - { 0 }, - { 2,0,3,1 }, - { 2,1,3,1 }, - { 2,1,3,0 } - }; - - x1 = bspcoord[checkcoord[boxpos][0]]; - y1 = bspcoord[checkcoord[boxpos][1]]; - x2 = bspcoord[checkcoord[boxpos][2]]; - y2 = bspcoord[checkcoord[boxpos][3]]; - - int sx1, sx2; - if (GetSegmentRangeForLine(x1, y1, x2, y2, sx1, sx2)) - return !IsSegmentCulled(sx1, sx2); - else - return true; -} - -bool RenderPolyBsp::GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const -{ - double znear = 5.0; - - // Transform to 2D view space: - x1 = x1 - ViewPos.X; - y1 = y1 - ViewPos.Y; - x2 = x2 - ViewPos.X; - y2 = y2 - ViewPos.Y; - double rx1 = x1 * ViewSin - y1 * ViewCos; - double rx2 = x2 * ViewSin - y2 * ViewCos; - double ry1 = x1 * ViewCos + y1 * ViewSin; - double ry2 = x2 * ViewCos + y2 * ViewSin; - - // Cull if line is entirely behind view - if (ry1 < znear && ry2 < znear) return false; - - // Clip line, if needed - double t1 = 0.0f, t2 = 1.0f; - if (ry1 < znear) - t1 = clamp((znear - ry1) / (ry2 - ry1), 0.0, 1.0); - if (ry2 < znear) - t2 = clamp((znear - ry1) / (ry2 - ry1), 0.0, 1.0); - if (t1 != 0.0 || t2 != 1.0) - { - double nx1 = rx1 * (1.0 - t1) + rx2 * t1; - double ny1 = ry1 * (1.0 - t1) + ry2 * t1; - double nx2 = rx1 * (1.0 - t2) + rx2 * t2; - double ny2 = ry1 * (1.0 - t2) + ry2 * t2; - rx1 = nx1; - rx2 = nx2; - ry1 = ny1; - ry2 = ny2; - } - - sx1 = (int)floor(clamp(rx1 / ry1 * (SolidCullScale / 3), (double)-SolidCullScale, (double)SolidCullScale)); - sx2 = (int)floor(clamp(rx2 / ry2 * (SolidCullScale / 3), (double)-SolidCullScale, (double)SolidCullScale)); - - if (sx1 > sx2) - std::swap(sx1, sx2); - return sx1 != sx2; -} - -///////////////////////////////////////////////////////////////////////////// - -void RenderPolyWall::Render(const TriMatrix &worldToClip) -{ - FTexture *tex = GetTexture(); - if (!tex) - return; - - PolyWallTextureCoords texcoords(tex, Line, Texpart, TopZ, BottomZ, UnpeggedCeil); - - TriVertex *vertices = PolyVertexBuffer::GetVertices(4); - if (!vertices) - return; - - vertices[0].x = (float)v1.X; - vertices[0].y = (float)v1.Y; - vertices[0].z = (float)ceil1; - vertices[0].w = 1.0f; - vertices[0].varying[0] = (float)texcoords.u1; - vertices[0].varying[1] = (float)texcoords.v1; - - vertices[1].x = (float)v2.X; - vertices[1].y = (float)v2.Y; - vertices[1].z = (float)ceil2; - vertices[1].w = 1.0f; - vertices[1].varying[0] = (float)texcoords.u2; - vertices[1].varying[1] = (float)texcoords.v1; - - vertices[2].x = (float)v2.X; - vertices[2].y = (float)v2.Y; - vertices[2].z = (float)floor2; - vertices[2].w = 1.0f; - vertices[2].varying[0] = (float)texcoords.u2; - vertices[2].varying[1] = (float)texcoords.v2; - - vertices[3].x = (float)v1.X; - vertices[3].y = (float)v1.Y; - vertices[3].z = (float)floor1; - vertices[3].w = 1.0f; - vertices[3].varying[0] = (float)texcoords.u1; - vertices[3].varying[1] = (float)texcoords.v2; - - TriUniforms uniforms; - uniforms.objectToClip = worldToClip; - uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); - uniforms.flags = 0; - uniforms.subsectorDepth = SubsectorDepth; - - PolyDrawArgs args; - args.uniforms = uniforms; - args.vinput = vertices; - args.vcount = 4; - args.mode = TriangleDrawMode::Fan; - args.ccw = true; - args.clipleft = 0; - args.cliptop = 0; - args.clipright = viewwidth; - args.clipbottom = viewheight; - args.stenciltestvalue = 0; - args.stencilwritevalue = 1; - args.SetTexture(tex); - - if (!Masked) - { - PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); - } - else - { - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); - } -} - -FTexture *RenderPolyWall::GetTexture() -{ - FTexture *tex = TexMan(Line->sidedef->GetTexture(Texpart), true); - if (tex == nullptr || tex->UseType == FTexture::TEX_Null) - return nullptr; - else - return tex; -} - -int RenderPolyWall::GetLightLevel() -{ - if (fixedlightlev >= 0 || fixedcolormap) - { - return 255; - } - else - { - bool foggy = false; - int actualextralight = foggy ? 0 : extralight << 4; - return Line->sidedef->GetLightLevel(foggy, Line->frontsector->lightlevel) + actualextralight; - } -} - -/* -float RenderPolyWall::GetLight(short x) -{ - if (fixedlightlev >= 0 || fixedcolormap) - return 0.0f; - else - return (float)(r_WallVisibility / Coords.Z(x)); -} -*/ - -///////////////////////////////////////////////////////////////////////////// - -PolyWallTextureCoords::PolyWallTextureCoords(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) -{ - CalcU(tex, line, texpart); - CalcV(tex, line, texpart, topz, bottomz, unpeggedceil); -} - -void PolyWallTextureCoords::CalcU(FTexture *tex, const seg_t *line, side_t::ETexpart texpart) -{ - double lineLength = line->sidedef->TexelLength; - double lineStart = 0.0; - - bool entireSegment = ((line->linedef->v1 == line->v1) && (line->linedef->v2 == line->v2) || (line->linedef->v2 == line->v1) && (line->linedef->v1 == line->v2)); - if (!entireSegment) - { - lineLength = (line->v2->fPos() - line->v1->fPos()).Length(); - lineStart = (line->v1->fPos() - line->linedef->v1->fPos()).Length(); - } - - int texWidth = tex->GetWidth(); - double uscale = line->sidedef->GetTextureXScale(texpart) * tex->Scale.X; - u1 = lineStart + line->sidedef->GetTextureXOffset(texpart); - u2 = u1 + lineLength; - u1 *= uscale; - u2 *= uscale; - u1 /= texWidth; - u2 /= texWidth; -} - -void PolyWallTextureCoords::CalcV(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) -{ - double vscale = line->sidedef->GetTextureYScale(texpart) * tex->Scale.Y; - - double yoffset = line->sidedef->GetTextureYOffset(texpart); - if (tex->bWorldPanning) - yoffset *= vscale; - - switch (texpart) - { - default: - case side_t::mid: - CalcVMidPart(tex, line, topz, bottomz, vscale, yoffset); - break; - case side_t::top: - CalcVTopPart(tex, line, topz, bottomz, vscale, yoffset); - break; - case side_t::bottom: - CalcVBottomPart(tex, line, topz, bottomz, unpeggedceil, vscale, yoffset); - break; - } - - int texHeight = tex->GetHeight(); - v1 /= texHeight; - v2 /= texHeight; -} - -void PolyWallTextureCoords::CalcVTopPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset) -{ - bool pegged = (line->linedef->flags & ML_DONTPEGTOP) == 0; - if (pegged) // bottom to top - { - int texHeight = tex->GetHeight(); - v1 = -yoffset; - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - v1 = texHeight - v1; - v2 = texHeight - v2; - std::swap(v1, v2); - } - else // top to bottom - { - v1 = yoffset; - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - } -} - -void PolyWallTextureCoords::CalcVMidPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset) -{ - bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; - if (pegged) // top to bottom - { - v1 = yoffset; - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - } - else // bottom to top - { - int texHeight = tex->GetHeight(); - v1 = yoffset; - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - v1 = texHeight - v1; - v2 = texHeight - v2; - std::swap(v1, v2); - } -} - -void PolyWallTextureCoords::CalcVBottomPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset) -{ - bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; - if (pegged) // top to bottom - { - v1 = yoffset; - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - } - else - { - v1 = yoffset + (unpeggedceil - topz); - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - } -} - -///////////////////////////////////////////////////////////////////////////// - -void PolyScreenSprite::Render() -{ - FSpecialColormap *special = nullptr; - FColormapStyle colormapstyle; - PalEntry overlay = 0; - bool usecolormapstyle = false; - if (visstyle.BaseColormap >= &SpecialColormaps[0] && - visstyle.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) - { - special = static_cast(visstyle.BaseColormap); - } - else if (Colormap->Color == PalEntry(255, 255, 255) && - Colormap->Desaturate == 0) - { - overlay = Colormap->Fade; - overlay.a = BYTE(visstyle.ColormapNum * 255 / NUMCOLORMAPS); - } - else - { - usecolormapstyle = true; - colormapstyle.Color = Colormap->Color; - colormapstyle.Fade = Colormap->Fade; - colormapstyle.Desaturate = Colormap->Desaturate; - colormapstyle.FadeLevel = visstyle.ColormapNum / float(NUMCOLORMAPS); - } - - screen->DrawTexture(Pic, - X1, - Y1, - DTA_DestWidthF, Width, - DTA_DestHeightF, Height, - DTA_Translation, Translation, - DTA_FlipX, Flip, - DTA_TopOffset, 0, - DTA_LeftOffset, 0, - DTA_ClipLeft, viewwindowx, - DTA_ClipTop, viewwindowy, - DTA_ClipRight, viewwindowx + viewwidth, - DTA_ClipBottom, viewwindowy + viewheight, - DTA_AlphaF, visstyle.Alpha, - DTA_RenderStyle, visstyle.RenderStyle, - DTA_FillColor, FillColor, - DTA_SpecialColormap, special, - DTA_ColorOverlay, overlay.d, - DTA_ColormapStyle, usecolormapstyle ? &colormapstyle : nullptr, - TAG_DONE); + static RenderPolyScene scene; + return &scene; } ///////////////////////////////////////////////////////////////////////////// @@ -1450,164 +209,3 @@ void PolyVertexBuffer::Clear() { NextBufferVertex = 0; } - -///////////////////////////////////////////////////////////////////////////// - -TriVertex PolySkyDome::SetVertex(float xx, float yy, float zz, float uu, float vv) -{ - TriVertex v; - v.x = xx; - v.y = yy; - v.z = zz; - v.w = 1.0f; - v.varying[0] = uu; - v.varying[1] = vv; - return v; -} - -TriVertex PolySkyDome::SetVertexXYZ(float xx, float yy, float zz, float uu, float vv) -{ - TriVertex v; - v.x = xx; - v.y = zz; - v.z = yy; - v.w = 1.0f; - v.varying[0] = uu; - v.varying[1] = vv; - return v; -} - -void PolySkyDome::SkyVertex(int r, int c, bool zflip) -{ - static const FAngle maxSideAngle = 60.f; - static const float scale = 10000.; - - FAngle topAngle = (c / (float)mColumns * 360.f); - FAngle sideAngle = maxSideAngle * (float)(mRows - r) / (float)mRows; - float height = sideAngle.Sin(); - float realRadius = scale * sideAngle.Cos(); - FVector2 pos = topAngle.ToVector(realRadius); - float z = (!zflip) ? scale * height : -scale * height; - - float u, v; - //uint32_t color = r == 0 ? 0xffffff : 0xffffffff; - - // And the texture coordinates. - if (!zflip) // Flipped Y is for the lower hemisphere. - { - u = (-c / (float)mColumns); - v = (r / (float)mRows); - } - else - { - u = (-c / (float)mColumns); - v = 1.0f + ((mRows - r) / (float)mRows); - } - - if (r != 4) z += 300; - - // And finally the vertex. - TriVertex vert; - vert = SetVertexXYZ(-pos.X, z - 1.f, pos.Y, u * 4.0f, v * 1.2f + 0.5f/*, color*/); - mVertices.Push(vert); -} - -void PolySkyDome::CreateSkyHemisphere(bool zflip) -{ - int r, c; - - mPrimStart.Push(mVertices.Size()); - - for (c = 0; c < mColumns; c++) - { - SkyVertex(1, c, zflip); - } - - // The total number of triangles per hemisphere can be calculated - // as follows: rows * columns * 2 + 2 (for the top cap). - for (r = 0; r < mRows; r++) - { - mPrimStart.Push(mVertices.Size()); - for (c = 0; c <= mColumns; c++) - { - SkyVertex(r + zflip, c, zflip); - SkyVertex(r + 1 - zflip, c, zflip); - } - } -} - -void PolySkyDome::CreateDome() -{ - mColumns = 128; - mRows = 4; - CreateSkyHemisphere(false); - CreateSkyHemisphere(true); - mPrimStart.Push(mVertices.Size()); -} - -void PolySkyDome::RenderRow(PolyDrawArgs &args, int row) -{ - args.vinput = &mVertices[mPrimStart[row]]; - args.vcount = mPrimStart[row + 1] - mPrimStart[row]; - args.mode = TriangleDrawMode::Strip; - args.ccw = false; - PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); -} - -void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap) -{ - uint32_t solid = skytex->GetSkyCapColor(bottomCap); - if (!r_swtruecolor) - solid = RGB32k.RGB[(RPART(solid) >> 3)][(GPART(solid) >> 3)][(BPART(solid) >> 3)]; - - args.vinput = &mVertices[mPrimStart[row]]; - args.vcount = mPrimStart[row + 1] - mPrimStart[row]; - args.mode = TriangleDrawMode::Fan; - args.ccw = bottomCap; - args.solidcolor = solid; - PolyTriangleDrawer::draw(args, TriDrawVariant::Fill); -} - -void PolySkyDome::Render(const TriMatrix &worldToClip) -{ - FTextureID sky1tex, sky2tex; - if ((level.flags & LEVEL_SWAPSKIES) && !(level.flags & LEVEL_DOUBLESKY)) - sky1tex = sky2texture; - else - sky1tex = sky1texture; - sky2tex = sky2texture; - - FTexture *frontskytex = TexMan(sky1tex, true); - FTexture *backskytex = nullptr; - if (level.flags & LEVEL_DOUBLESKY) - backskytex = TexMan(sky2tex, true); - - TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); - - TriUniforms uniforms; - uniforms.objectToClip = worldToClip * objectToWorld; - uniforms.light = 256; - uniforms.flags = 0; - uniforms.subsectorDepth = RenderPolyBsp::SkySubsectorDepth; - - int rc = mRows + 1; - - PolyDrawArgs args; - args.uniforms = uniforms; - args.clipleft = 0; - args.cliptop = 0; - args.clipright = viewwidth; - args.clipbottom = viewheight; - args.stenciltestvalue = 255; - args.stencilwritevalue = 1; - args.SetTexture(frontskytex); - - RenderCapColorRow(args, frontskytex, 0, false); - RenderCapColorRow(args, frontskytex, rc, true); - - for (int i = 1; i <= mRows; i++) - { - RenderRow(args, i); - RenderRow(args, rc + i); - } -} diff --git a/src/r_poly.h b/src/r_poly.h index 6c50519043..b182a582dd 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -1,5 +1,5 @@ /* -** Experimental Doom software renderer +** Polygon Doom software renderer ** Copyright (c) 2016 Magnus Norddahl ** ** This software is provided 'as-is', without any express or implied @@ -31,60 +31,13 @@ #include "r_main.h" #include "r_poly_triangle.h" #include "r_poly_intersection.h" - -// DScreen accelerated sprite to be rendered -class PolyScreenSprite -{ -public: - void Render(); - - FTexture *Pic = nullptr; - double X1 = 0.0; - double Y1 = 0.0; - double Width = 0.0; - double Height = 0.0; - FRemapTable *Translation = nullptr; - bool Flip = false; - visstyle_t visstyle; - uint32_t FillColor = 0; - FDynamicColormap *Colormap = nullptr; -}; - -class RenderPolyWall -{ -public: - void Render(const TriMatrix &worldToClip); - - void SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2) - { - this->v1 = v1; - this->v2 = v2; - this->ceil1 = ceil1; - this->floor1 = floor1; - this->ceil2 = ceil2; - this->floor2 = floor2; - } - - DVector2 v1; - DVector2 v2; - double ceil1 = 0.0; - double floor1 = 0.0; - double ceil2 = 0.0; - double floor2 = 0.0; - - const seg_t *Line = nullptr; - side_t::ETexpart Texpart = side_t::mid; - double TopZ = 0.0; - double BottomZ = 0.0; - double UnpeggedCeil = 0.0; - FSWColormap *Colormap = nullptr; - bool Masked = false; - uint32_t SubsectorDepth = 0; - -private: - FTexture *GetTexture(); - int GetLightLevel(); -}; +#include "r_poly_wall.h" +#include "r_poly_sprite.h" +#include "r_poly_wallsprite.h" +#include "r_poly_playersprite.h" +#include "r_poly_plane.h" +#include "r_poly_sky.h" +#include "r_poly_cull.h" // Used for sorting things by distance to the camera class PolySortedSprite @@ -119,110 +72,34 @@ public: int Count = 0; }; -class PolySkyDome -{ -public: - PolySkyDome() { CreateDome(); } - void Render(const TriMatrix &worldToClip); - -private: - TArray mVertices; - TArray mPrimStart; - int mRows, mColumns; - - void SkyVertex(int r, int c, bool yflip); - void CreateSkyHemisphere(bool zflip); - void CreateDome(); - void RenderRow(PolyDrawArgs &args, int row); - void RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap); - - TriVertex SetVertex(float xx, float yy, float zz, float uu = 0, float vv = 0); - TriVertex SetVertexXYZ(float xx, float yy, float zz, float uu = 0, float vv = 0); -}; - -// Renders a GL BSP tree in a scene -class RenderPolyBsp +// Renders a scene +class RenderPolyScene { public: void Render(); - void RenderScreenSprites(); + void RenderRemainingPlayerSprites(); static const uint32_t SkySubsectorDepth = 0x7fffffff; + static RenderPolyScene *Instance(); + private: - void RenderNode(void *node); void RenderSubsector(subsector_t *sub); - void RenderPlane(subsector_t *sub, uint32_t subsectorDepth, bool ceiling); - void AddLine(seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); - TriVertex PlaneVertex(vertex_t *v1, sector_t *sector, double height); + void RenderLine(seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); void RenderTranslucent(); - void AddSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth); - void AddWallSprite(AActor *thing, subsector_t *sub, uint32_t subsectorDepth); - bool IsThingCulled(AActor *thing); - visstyle_t GetSpriteVisStyle(AActor *thing, double z); - FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); SpriteRange GetSpritesForSector(sector_t *sector); - void RenderPlayerSprites(); - void RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac); - - int PointOnSide(const DVector2 &pos, const node_t *node); - - // Checks BSP node/subtree bounding box. - // Returns true if some part of the bbox might be visible. - bool CheckBBox(float *bspcoord); - bool GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const; - void MarkSegmentCulled(int x1, int x2); - bool IsSegmentCulled(int x1, int x2) const; - void ClearSolidSegments(); - - std::vector PvsSectors; + TriMatrix WorldToClip; + PolyCull Cull; uint32_t NextSubsectorDepth = 0; - double MaxCeilingHeight = 0.0; - double MinFloorHeight = 0.0; - - TriMatrix worldToClip; - FrustumPlanes frustumPlanes; - std::vector SectorSpriteRanges; std::vector SortedSprites; std::vector TranslucentObjects; - - std::vector TempTranslucentWalls; - - std::vector ScreenSprites; - - const int BaseXCenter = 160; - const int BaseYCenter = 100; - - struct SolidSegment - { - SolidSegment(int x1, int x2) : X1(x1), X2(x2) { } - int X1, X2; - }; - - std::vector SolidSegments; - const int SolidCullScale = 3000; + std::vector SubsectorTranslucentWalls; PolySkyDome skydome; -}; - -// Texture coordinates for a wall -class PolyWallTextureCoords -{ -public: - PolyWallTextureCoords(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); - - double u1, u2; - double v1, v2; - -private: - void CalcU(FTexture *tex, const seg_t *line, side_t::ETexpart texpart); - void CalcV(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); - void CalcVTopPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset); - void CalcVMidPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset); - void CalcVBottomPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset); + RenderPolyPlayerSprites PlayerSprites; }; class PolyVertexBuffer diff --git a/src/r_poly_cull.cpp b/src/r_poly_cull.cpp new file mode 100644 index 0000000000..3df07abf96 --- /dev/null +++ b/src/r_poly_cull.cpp @@ -0,0 +1,259 @@ +/* +** Potential visible set (PVS) handling +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_poly_cull.h" +#include "r_poly.h" + +void PolyCull::CullScene(const TriMatrix &worldToClip) +{ + ClearSolidSegments(); + PvsSectors.clear(); + frustumPlanes = FrustumPlanes(worldToClip); + + // Cull front to back + if (numnodes == 0) + { + PvsSectors.push_back(subsectors); + MaxCeilingHeight = subsectors->sector->ceilingplane.Zat0(); + MinFloorHeight = subsectors->sector->floorplane.Zat0(); + } + else + { + MaxCeilingHeight = 0.0; + MinFloorHeight = 0.0; + CullNode(nodes + numnodes - 1); // The head node is the last node output. + } + + ClearSolidSegments(); +} + +void PolyCull::CullNode(void *node) +{ + while (!((size_t)node & 1)) // Keep going until found a subsector + { + node_t *bsp = (node_t *)node; + + // Decide which side the view point is on. + int side = PointOnSide(ViewPos, bsp); + + // Recursively divide front space (toward the viewer). + CullNode(bsp->children[side]); + + // Possibly divide back space (away from the viewer). + side ^= 1; + if (!CheckBBox(bsp->bbox[side])) + return; + + node = bsp->children[side]; + } + + // Mark that we need to render this + subsector_t *sub = (subsector_t *)((BYTE *)node - 1); + MaxCeilingHeight = MAX(MaxCeilingHeight, sub->sector->ceilingplane.Zat0()); + MinFloorHeight = MIN(MinFloorHeight, sub->sector->floorplane.Zat0()); + PvsSectors.push_back(sub); + + // Update culling info for further bsp clipping + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + if ((line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) && line->backsector == nullptr) + { + int sx1, sx2; + if (GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2)) + { + MarkSegmentCulled(sx1, sx2); + } + } + } +} + +void PolyCull::ClearSolidSegments() +{ + SolidSegments.clear(); + SolidSegments.reserve(SolidCullScale + 2); + SolidSegments.push_back({ -0x7fff, -SolidCullScale }); + SolidSegments.push_back({ SolidCullScale , 0x7fff }); +} + +bool PolyCull::IsSegmentCulled(int x1, int x2) const +{ + int next = 0; + while (SolidSegments[next].X2 <= x2) + next++; + return (x1 >= SolidSegments[next].X1 && x2 <= SolidSegments[next].X2); +} + +void PolyCull::MarkSegmentCulled(int x1, int x2) +{ + if (x1 >= x2) + return; + + int cur = 1; + while (true) + { + if (SolidSegments[cur].X1 <= x1 && SolidSegments[cur].X2 >= x2) // Already fully marked + { + break; + } + else if (cur + 1 != SolidSegments.size() && SolidSegments[cur].X2 >= x1 && SolidSegments[cur].X1 <= x2) // Merge segments + { + // Find last segment + int merge = cur; + while (merge + 2 != SolidSegments.size() && SolidSegments[merge + 1].X1 <= x2) + merge++; + + // Apply new merged range + SolidSegments[cur].X1 = MIN(SolidSegments[cur].X1, x1); + SolidSegments[cur].X2 = MAX(SolidSegments[merge].X2, x2); + + // Remove additional segments we merged with + if (merge > cur) + SolidSegments.erase(SolidSegments.begin() + (cur + 1), SolidSegments.begin() + (merge + 1)); + + break; + } + else if (SolidSegments[cur].X1 > x1) // Insert new segment + { + SolidSegments.insert(SolidSegments.begin() + cur, { x1, x2 }); + break; + } + cur++; + } +} + +int PolyCull::PointOnSide(const DVector2 &pos, const node_t *node) +{ + return DMulScale32(FLOAT2FIXED(pos.Y) - node->y, node->dx, node->x - FLOAT2FIXED(pos.X), node->dy) > 0; +} + +bool PolyCull::CheckBBox(float *bspcoord) +{ + // Start using a quick frustum AABB test: + + AxisAlignedBoundingBox aabb(Vec3f(bspcoord[BOXLEFT], bspcoord[BOXBOTTOM], (float)ViewPos.Z - 1000.0f), Vec3f(bspcoord[BOXRIGHT], bspcoord[BOXTOP], (float)ViewPos.Z + 1000.0f)); + auto result = IntersectionTest::frustum_aabb(frustumPlanes, aabb); + if (result == IntersectionTest::outside) + return false; + + // Occlusion test using solid segments: + + int boxx; + int boxy; + int boxpos; + + double x1, y1, x2, y2; + + // Find the corners of the box + // that define the edges from current viewpoint. + if (ViewPos.X <= bspcoord[BOXLEFT]) + boxx = 0; + else if (ViewPos.X < bspcoord[BOXRIGHT]) + boxx = 1; + else + boxx = 2; + + if (ViewPos.Y >= bspcoord[BOXTOP]) + boxy = 0; + else if (ViewPos.Y > bspcoord[BOXBOTTOM]) + boxy = 1; + else + boxy = 2; + + boxpos = (boxy << 2) + boxx; + if (boxpos == 5) + return true; + + static const int checkcoord[12][4] = + { + { 3,0,2,1 }, + { 3,0,2,0 }, + { 3,1,2,0 }, + { 0 }, + { 2,0,2,1 }, + { 0,0,0,0 }, + { 3,1,3,0 }, + { 0 }, + { 2,0,3,1 }, + { 2,1,3,1 }, + { 2,1,3,0 } + }; + + x1 = bspcoord[checkcoord[boxpos][0]]; + y1 = bspcoord[checkcoord[boxpos][1]]; + x2 = bspcoord[checkcoord[boxpos][2]]; + y2 = bspcoord[checkcoord[boxpos][3]]; + + int sx1, sx2; + if (GetSegmentRangeForLine(x1, y1, x2, y2, sx1, sx2)) + return !IsSegmentCulled(sx1, sx2); + else + return true; +} + +bool PolyCull::GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const +{ + double znear = 5.0; + + // Transform to 2D view space: + x1 = x1 - ViewPos.X; + y1 = y1 - ViewPos.Y; + x2 = x2 - ViewPos.X; + y2 = y2 - ViewPos.Y; + double rx1 = x1 * ViewSin - y1 * ViewCos; + double rx2 = x2 * ViewSin - y2 * ViewCos; + double ry1 = x1 * ViewCos + y1 * ViewSin; + double ry2 = x2 * ViewCos + y2 * ViewSin; + + // Cull if line is entirely behind view + if (ry1 < znear && ry2 < znear) return false; + + // Clip line, if needed + double t1 = 0.0f, t2 = 1.0f; + if (ry1 < znear) + t1 = clamp((znear - ry1) / (ry2 - ry1), 0.0, 1.0); + if (ry2 < znear) + t2 = clamp((znear - ry1) / (ry2 - ry1), 0.0, 1.0); + if (t1 != 0.0 || t2 != 1.0) + { + double nx1 = rx1 * (1.0 - t1) + rx2 * t1; + double ny1 = ry1 * (1.0 - t1) + ry2 * t1; + double nx2 = rx1 * (1.0 - t2) + rx2 * t2; + double ny2 = ry1 * (1.0 - t2) + ry2 * t2; + rx1 = nx1; + rx2 = nx2; + ry1 = ny1; + ry2 = ny2; + } + + sx1 = (int)floor(clamp(rx1 / ry1 * (SolidCullScale / 3), (double)-SolidCullScale, (double)SolidCullScale)); + sx2 = (int)floor(clamp(rx2 / ry2 * (SolidCullScale / 3), (double)-SolidCullScale, (double)SolidCullScale)); + + if (sx1 > sx2) + std::swap(sx1, sx2); + return sx1 != sx2; +} diff --git a/src/r_poly_cull.h b/src/r_poly_cull.h new file mode 100644 index 0000000000..a79355f689 --- /dev/null +++ b/src/r_poly_cull.h @@ -0,0 +1,61 @@ +/* +** Potential visible set (PVS) handling +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "r_poly_triangle.h" +#include "r_poly_intersection.h" + +class PolyCull +{ +public: + void CullScene(const TriMatrix &worldToClip); + + bool GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const; + void MarkSegmentCulled(int x1, int x2); + bool IsSegmentCulled(int x1, int x2) const; + + std::vector PvsSectors; + double MaxCeilingHeight = 0.0; + double MinFloorHeight = 0.0; + +private: + struct SolidSegment + { + SolidSegment(int x1, int x2) : X1(x1), X2(x2) { } + int X1, X2; + }; + + void CullNode(void *node); + int PointOnSide(const DVector2 &pos, const node_t *node); + + // Checks BSP node/subtree bounding box. + // Returns true if some part of the bbox might be visible. + bool CheckBBox(float *bspcoord); + + void ClearSolidSegments(); + + std::vector SolidSegments; + const int SolidCullScale = 3000; + + FrustumPlanes frustumPlanes; +}; diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp new file mode 100644 index 0000000000..06b963e4ff --- /dev/null +++ b/src/r_poly_particle.cpp @@ -0,0 +1,33 @@ +/* +** Particle drawing +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_poly_particle.h" +#include "r_poly.h" + +void RenderPolyParticle::Render() +{ +} diff --git a/src/r_poly_particle.h b/src/r_poly_particle.h new file mode 100644 index 0000000000..5573a7e482 --- /dev/null +++ b/src/r_poly_particle.h @@ -0,0 +1,29 @@ +/* +** Handling drawing a particle +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +class RenderPolyParticle +{ +public: + void Render(); +}; diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp new file mode 100644 index 0000000000..ac9622110b --- /dev/null +++ b/src/r_poly_plane.cpp @@ -0,0 +1,196 @@ +/* +** Handling drawing a plane (ceiling, floor) +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_poly_plane.h" +#include "r_poly.h" +#include "r_sky.h" // for skyflatnum + +void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, double skyHeight) +{ + sector_t *frontsector = sub->sector; + + FTextureID picnum = frontsector->GetTexture(ceiling ? sector_t::ceiling : sector_t::floor); + FTexture *tex = TexMan(picnum); + if (tex->UseType == FTexture::TEX_Null) + return; + + bool isSky = picnum == skyflatnum; + + TriUniforms uniforms; + uniforms.objectToClip = worldToClip; + uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); + if (fixedlightlev >= 0) + uniforms.light = (uint32_t)(fixedlightlev / 255.0f * 256.0f); + else if (fixedcolormap) + uniforms.light = 256; + uniforms.flags = 0; + uniforms.subsectorDepth = isSky ? RenderPolyScene::SkySubsectorDepth : subsectorDepth; + + TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); + if (!vertices) + return; + + if (ceiling) + { + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + vertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, frontsector, isSky ? skyHeight : frontsector->ceilingplane.ZatPoint(line->v1)); + } + } + else + { + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + vertices[i] = PlaneVertex(line->v1, frontsector, isSky ? skyHeight : frontsector->floorplane.ZatPoint(line->v1)); + } + } + + PolyDrawArgs args; + args.uniforms = uniforms; + args.vinput = vertices; + args.vcount = sub->numlines; + args.mode = TriangleDrawMode::Fan; + args.ccw = true; + args.clipleft = 0; + args.cliptop = 0; + args.clipright = viewwidth; + args.clipbottom = viewheight; + args.stenciltestvalue = 0; + args.stencilwritevalue = 1; + + if (!isSky) + { + args.SetTexture(tex); + PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); + } + else + { + args.stencilwritevalue = 255; + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); + + for (uint32_t i = 0; i < sub->numlines; i++) + { + TriVertex *wallvert = PolyVertexBuffer::GetVertices(4); + if (!wallvert) + return; + + seg_t *line = &sub->firstline[i]; + + bool closedSky = false; + if (line->backsector) + { + sector_t *backsector = (line->backsector != line->frontsector) ? line->backsector : line->frontsector; + + double frontceilz1 = frontsector->ceilingplane.ZatPoint(line->v1); + double frontfloorz1 = frontsector->floorplane.ZatPoint(line->v1); + double frontceilz2 = frontsector->ceilingplane.ZatPoint(line->v2); + double frontfloorz2 = frontsector->floorplane.ZatPoint(line->v2); + + double backceilz1 = backsector->ceilingplane.ZatPoint(line->v1); + double backfloorz1 = backsector->floorplane.ZatPoint(line->v1); + double backceilz2 = backsector->ceilingplane.ZatPoint(line->v2); + double backfloorz2 = backsector->floorplane.ZatPoint(line->v2); + + double topceilz1 = frontceilz1; + double topceilz2 = frontceilz2; + double topfloorz1 = MIN(backceilz1, frontceilz1); + double topfloorz2 = MIN(backceilz2, frontceilz2); + double bottomceilz1 = MAX(frontfloorz1, backfloorz1); + double bottomceilz2 = MAX(frontfloorz2, backfloorz2); + double bottomfloorz1 = frontfloorz1; + double bottomfloorz2 = frontfloorz2; + double middleceilz1 = topfloorz1; + double middleceilz2 = topfloorz2; + double middlefloorz1 = MIN(bottomceilz1, middleceilz1); + double middlefloorz2 = MIN(bottomceilz2, middleceilz2); + + bool bothSkyCeiling = frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; + bool bothSkyFloor = frontsector->GetTexture(sector_t::floor) == skyflatnum && backsector->GetTexture(sector_t::floor) == skyflatnum; + + bool closedSector = backceilz1 == backfloorz1 && backceilz2 == backfloorz2; + closedSky = (ceiling && bothSkyCeiling && closedSector) || (!ceiling && bothSkyFloor && closedSector); + if (!closedSky) + { + bool topwall = (topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && line->sidedef && !bothSkyCeiling; + bool bottomwall = (bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef && !bothSkyFloor; + if ((ceiling && !topwall) || (!ceiling && !bottomwall)) + continue; + } + } + + if (ceiling) + { + wallvert[0] = PlaneVertex(line->v1, frontsector, skyHeight); + wallvert[1] = PlaneVertex(line->v2, frontsector, skyHeight); + if (!closedSky) + { + wallvert[2] = PlaneVertex(line->v2, frontsector, frontsector->ceilingplane.ZatPoint(line->v2)); + wallvert[3] = PlaneVertex(line->v1, frontsector, frontsector->ceilingplane.ZatPoint(line->v1)); + } + else + { + wallvert[2] = PlaneVertex(line->v2, frontsector, frontsector->floorplane.ZatPoint(line->v2)); + wallvert[3] = PlaneVertex(line->v1, frontsector, frontsector->floorplane.ZatPoint(line->v1)); + } + } + else + { + if (!closedSky) + { + wallvert[0] = PlaneVertex(line->v1, frontsector, frontsector->floorplane.ZatPoint(line->v1)); + wallvert[1] = PlaneVertex(line->v2, frontsector, frontsector->floorplane.ZatPoint(line->v2)); + } + else + { + wallvert[0] = PlaneVertex(line->v1, frontsector, frontsector->ceilingplane.ZatPoint(line->v1)); + wallvert[1] = PlaneVertex(line->v2, frontsector, frontsector->ceilingplane.ZatPoint(line->v2)); + } + wallvert[2] = PlaneVertex(line->v2, frontsector, skyHeight); + wallvert[3] = PlaneVertex(line->v1, frontsector, skyHeight); + } + + args.vinput = wallvert; + args.vcount = 4; + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); + } + } +} + +TriVertex RenderPolyPlane::PlaneVertex(vertex_t *v1, sector_t *sector, double height) +{ + TriVertex v; + v.x = (float)v1->fPos().X; + v.y = (float)v1->fPos().Y; + v.z = (float)height; + v.w = 1.0f; + v.varying[0] = v.x / 64.0f; + v.varying[1] = 1.0f - v.y / 64.0f; + return v; +} diff --git a/src/r_poly_plane.h b/src/r_poly_plane.h new file mode 100644 index 0000000000..b4f2087e82 --- /dev/null +++ b/src/r_poly_plane.h @@ -0,0 +1,34 @@ +/* +** Handling drawing a plane (ceiling, floor) +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "r_poly_triangle.h" + +class RenderPolyPlane +{ +public: + void Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, double skyHeight); + +private: + TriVertex PlaneVertex(vertex_t *v1, sector_t *sector, double height); +}; diff --git a/src/r_poly_playersprite.cpp b/src/r_poly_playersprite.cpp new file mode 100644 index 0000000000..1a657d1a21 --- /dev/null +++ b/src/r_poly_playersprite.cpp @@ -0,0 +1,299 @@ +/* +** Handling drawing a player sprite +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_poly_playersprite.h" +#include "r_poly.h" +#include "r_things.h" // for pspritexscale + +EXTERN_CVAR(Bool, r_drawplayersprites) +EXTERN_CVAR(Bool, r_deathcamera) +EXTERN_CVAR(Bool, st_scale) + +void RenderPolyPlayerSprites::Render() +{ + // In theory, everything in this function could be moved to RenderRemainingSprites. + // Just in case there's some hack elsewhere that relies on this happening as part + // of the main rendering we do it exactly as the old software renderer did. + + ScreenSprites.clear(); + + if (!r_drawplayersprites || + !camera || + !camera->player || + (players[consoleplayer].cheats & CF_CHASECAM) || + (r_deathcamera && camera->health <= 0)) + return; + + float bobx, boby; + P_BobWeapon(camera->player, &bobx, &boby, r_TicFracF); + + // Interpolate the main weapon layer once so as to be able to add it to other layers. + double wx, wy; + DPSprite *weapon = camera->player->FindPSprite(PSP_WEAPON); + if (weapon) + { + if (weapon->firstTic) + { + wx = weapon->x; + wy = weapon->y; + } + else + { + wx = weapon->oldx + (weapon->x - weapon->oldx) * r_TicFracF; + wy = weapon->oldy + (weapon->y - weapon->oldy) * r_TicFracF; + } + } + else + { + wx = 0; + wy = 0; + } + + for (DPSprite *sprite = camera->player->psprites; sprite != nullptr; sprite = sprite->GetNext()) + { + // [RH] Don't draw the targeter's crosshair if the player already has a crosshair set. + // It's possible this psprite's caller is now null but the layer itself hasn't been destroyed + // because it didn't tick yet (if we typed 'take all' while in the console for example). + // In this case let's simply not draw it to avoid crashing. + if ((sprite->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && sprite->GetCaller() != nullptr) + { + RenderSprite(sprite, camera, bobx, boby, wx, wy, r_TicFracF); + } + } +} + +void RenderPolyPlayerSprites::RenderRemainingSprites() +{ + for (auto &sprite : ScreenSprites) + sprite.Render(); +} + +void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac) +{ + // decide which patch to use + if ((unsigned)sprite->GetSprite() >= (unsigned)sprites.Size()) + { + DPrintf(DMSG_ERROR, "RenderPlayerSprite: invalid sprite number %i\n", sprite->GetSprite()); + return; + } + + spritedef_t *def = &sprites[sprite->GetSprite()]; + if (sprite->GetFrame() >= def->numframes) + { + DPrintf(DMSG_ERROR, "RenderPlayerSprite: invalid sprite frame %i : %i\n", sprite->GetSprite(), sprite->GetFrame()); + return; + } + + spriteframe_t *frame = &SpriteFrames[def->spriteframes + sprite->GetFrame()]; + FTextureID picnum = frame->Texture[0]; + bool flip = (frame->Flip & 1) != 0; + + FTexture *tex = TexMan(picnum); + if (tex->UseType == FTexture::TEX_Null) + return; + + // Can't interpolate the first tic. + if (sprite->firstTic) + { + sprite->firstTic = false; + sprite->oldx = sprite->x; + sprite->oldy = sprite->y; + } + + double sx = sprite->oldx + (sprite->x - sprite->oldx) * ticfrac; + double sy = sprite->oldy + (sprite->y - sprite->oldy) * ticfrac; + + if (sprite->Flags & PSPF_ADDBOB) + { + sx += bobx; + sy += boby; + } + + if (sprite->Flags & PSPF_ADDWEAPON && sprite->GetID() != PSP_WEAPON) + { + sx += wx; + sy += wy; + } + + // calculate edges of the shape + double tx = sx - BaseXCenter; + + tx -= tex->GetScaledLeftOffset(); + int x1 = xs_RoundToInt(CenterX + tx * pspritexscale); + + // off the right side + if (x1 > viewwidth) + return; + + tx += tex->GetScaledWidth(); + int x2 = xs_RoundToInt(CenterX + tx * pspritexscale); + + // off the left side + if (x2 <= 0) + return; + + double texturemid = (BaseYCenter - sy) * tex->Scale.Y + tex->TopOffset; + + // Adjust PSprite for fullscreen views + if (camera->player && (RenderTarget != screen || viewheight == RenderTarget->GetHeight() || (RenderTarget->GetWidth() > (BaseXCenter * 2) && !st_scale))) + { + AWeapon *weapon = dyn_cast(sprite->GetCaller()); + if (weapon != nullptr && weapon->YAdjust != 0) + { + if (RenderTarget != screen || viewheight == RenderTarget->GetHeight()) + { + texturemid -= weapon->YAdjust; + } + else + { + texturemid -= StatusBar->GetDisplacement() * weapon->YAdjust; + } + } + } + + // Move the weapon down for 1280x1024. + if (sprite->GetID() < PSP_TARGETCENTER) + { + texturemid -= AspectPspriteOffset(WidescreenRatio); + } + + int clipped_x1 = MAX(x1, 0); + int clipped_x2 = MIN(x2, viewwidth); + double xscale = pspritexscale / tex->Scale.X; + double yscale = pspriteyscale / tex->Scale.Y; + uint32_t translation = 0; // [RH] Use default colors + + double xiscale, startfrac; + if (flip) + { + xiscale = -pspritexiscale * tex->Scale.X; + startfrac = 1; + } + else + { + xiscale = pspritexiscale * tex->Scale.X; + startfrac = 0; + } + + if (clipped_x1 > x1) + startfrac += xiscale * (clipped_x1 - x1); + + bool noaccel = false; + + FDynamicColormap *basecolormap = viewsector->ColorMap; + FDynamicColormap *colormap_to_use = basecolormap; + + visstyle_t visstyle; + visstyle.ColormapNum = 0; + visstyle.BaseColormap = basecolormap; + visstyle.Alpha = 0; + visstyle.RenderStyle = STYLE_Normal; + + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + int spriteshade = LIGHT2SHADE(owner->Sector->lightlevel + actualextralight); + double minz = double((2048 * 4) / double(1 << 20)); + visstyle.ColormapNum = GETPALOOKUP(r_SpriteVisibility / minz, spriteshade); + + if (sprite->GetID() < PSP_TARGETCENTER) + { + // Lots of complicated style and noaccel stuff + } + + // Check for hardware-assisted 2D. If it's available, and this sprite is not + // fuzzy, don't draw it until after the switch to 2D mode. + if (!noaccel && RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) + { + FRenderStyle style = visstyle.RenderStyle; + style.CheckFuzz(); + if (style.BlendOp != STYLEOP_Fuzz) + { + PolyScreenSprite screenSprite; + screenSprite.Pic = tex; + screenSprite.X1 = viewwindowx + x1; + screenSprite.Y1 = viewwindowy + viewheight / 2 - texturemid * yscale - 0.5; + screenSprite.Width = tex->GetWidth() * xscale; + screenSprite.Height = tex->GetHeight() * yscale; + screenSprite.Translation = TranslationToTable(translation); + screenSprite.Flip = xiscale < 0; + screenSprite.visstyle = visstyle; + screenSprite.Colormap = colormap_to_use; + ScreenSprites.push_back(screenSprite); + return; + } + } + + //R_DrawVisSprite(vis); +} + +void PolyScreenSprite::Render() +{ + FSpecialColormap *special = nullptr; + FColormapStyle colormapstyle; + PalEntry overlay = 0; + bool usecolormapstyle = false; + if (visstyle.BaseColormap >= &SpecialColormaps[0] && + visstyle.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) + { + special = static_cast(visstyle.BaseColormap); + } + else if (Colormap->Color == PalEntry(255, 255, 255) && + Colormap->Desaturate == 0) + { + overlay = Colormap->Fade; + overlay.a = BYTE(visstyle.ColormapNum * 255 / NUMCOLORMAPS); + } + else + { + usecolormapstyle = true; + colormapstyle.Color = Colormap->Color; + colormapstyle.Fade = Colormap->Fade; + colormapstyle.Desaturate = Colormap->Desaturate; + colormapstyle.FadeLevel = visstyle.ColormapNum / float(NUMCOLORMAPS); + } + + screen->DrawTexture(Pic, + X1, + Y1, + DTA_DestWidthF, Width, + DTA_DestHeightF, Height, + DTA_Translation, Translation, + DTA_FlipX, Flip, + DTA_TopOffset, 0, + DTA_LeftOffset, 0, + DTA_ClipLeft, viewwindowx, + DTA_ClipTop, viewwindowy, + DTA_ClipRight, viewwindowx + viewwidth, + DTA_ClipBottom, viewwindowy + viewheight, + DTA_AlphaF, visstyle.Alpha, + DTA_RenderStyle, visstyle.RenderStyle, + DTA_FillColor, FillColor, + DTA_SpecialColormap, special, + DTA_ColorOverlay, overlay.d, + DTA_ColormapStyle, usecolormapstyle ? &colormapstyle : nullptr, + TAG_DONE); +} diff --git a/src/r_poly_playersprite.h b/src/r_poly_playersprite.h new file mode 100644 index 0000000000..e7384e2007 --- /dev/null +++ b/src/r_poly_playersprite.h @@ -0,0 +1,59 @@ +/* +** Handling drawing a player sprite +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +class PolyScreenSprite; +class DPSprite; + +class RenderPolyPlayerSprites +{ +public: + void Render(); + void RenderRemainingSprites(); + +private: + void RenderSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac); + + const int BaseXCenter = 160; + const int BaseYCenter = 100; + + std::vector ScreenSprites; +}; + +// DScreen accelerated sprite to be rendered +class PolyScreenSprite +{ +public: + void Render(); + + FTexture *Pic = nullptr; + double X1 = 0.0; + double Y1 = 0.0; + double Width = 0.0; + double Height = 0.0; + FRemapTable *Translation = nullptr; + bool Flip = false; + visstyle_t visstyle; + uint32_t FillColor = 0; + FDynamicColormap *Colormap = nullptr; +}; diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp new file mode 100644 index 0000000000..03a4bfdb20 --- /dev/null +++ b/src/r_poly_sky.cpp @@ -0,0 +1,193 @@ +/* +** Sky dome rendering +** Copyright(C) 2003-2016 Christoph Oelckers +** All rights reserved. +** +** This program is free software: you can redistribute it and/or modify +** it under the terms of the GNU Lesser General Public License as published by +** the Free Software Foundation, either version 3 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU Lesser General Public License for more details. +** +** You should have received a copy of the GNU Lesser General Public License +** along with this program. If not, see http:**www.gnu.org/licenses/ +** +** Loosely based on the JDoom sky and the ZDoomGL 0.66.2 sky. +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_poly_sky.h" +#include "r_poly.h" +#include "r_sky.h" // for skyflatnum + +PolySkyDome::PolySkyDome() +{ + CreateDome(); +} + +void PolySkyDome::Render(const TriMatrix &worldToClip) +{ + FTextureID sky1tex, sky2tex; + if ((level.flags & LEVEL_SWAPSKIES) && !(level.flags & LEVEL_DOUBLESKY)) + sky1tex = sky2texture; + else + sky1tex = sky1texture; + sky2tex = sky2texture; + + FTexture *frontskytex = TexMan(sky1tex, true); + FTexture *backskytex = nullptr; + if (level.flags & LEVEL_DOUBLESKY) + backskytex = TexMan(sky2tex, true); + + TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); + + TriUniforms uniforms; + uniforms.objectToClip = worldToClip * objectToWorld; + uniforms.light = 256; + uniforms.flags = 0; + uniforms.subsectorDepth = RenderPolyScene::SkySubsectorDepth; + + int rc = mRows + 1; + + PolyDrawArgs args; + args.uniforms = uniforms; + args.clipleft = 0; + args.cliptop = 0; + args.clipright = viewwidth; + args.clipbottom = viewheight; + args.stenciltestvalue = 255; + args.stencilwritevalue = 1; + args.SetTexture(frontskytex); + + RenderCapColorRow(args, frontskytex, 0, false); + RenderCapColorRow(args, frontskytex, rc, true); + + for (int i = 1; i <= mRows; i++) + { + RenderRow(args, i); + RenderRow(args, rc + i); + } +} + +void PolySkyDome::RenderRow(PolyDrawArgs &args, int row) +{ + args.vinput = &mVertices[mPrimStart[row]]; + args.vcount = mPrimStart[row + 1] - mPrimStart[row]; + args.mode = TriangleDrawMode::Strip; + args.ccw = false; + PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); +} + +void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap) +{ + uint32_t solid = skytex->GetSkyCapColor(bottomCap); + if (!r_swtruecolor) + solid = RGB32k.RGB[(RPART(solid) >> 3)][(GPART(solid) >> 3)][(BPART(solid) >> 3)]; + + args.vinput = &mVertices[mPrimStart[row]]; + args.vcount = mPrimStart[row + 1] - mPrimStart[row]; + args.mode = TriangleDrawMode::Fan; + args.ccw = bottomCap; + args.solidcolor = solid; + PolyTriangleDrawer::draw(args, TriDrawVariant::Fill); +} + +void PolySkyDome::CreateDome() +{ + mColumns = 128; + mRows = 4; + CreateSkyHemisphere(false); + CreateSkyHemisphere(true); + mPrimStart.Push(mVertices.Size()); +} + +void PolySkyDome::CreateSkyHemisphere(bool zflip) +{ + int r, c; + + mPrimStart.Push(mVertices.Size()); + + for (c = 0; c < mColumns; c++) + { + SkyVertex(1, c, zflip); + } + + // The total number of triangles per hemisphere can be calculated + // as follows: rows * columns * 2 + 2 (for the top cap). + for (r = 0; r < mRows; r++) + { + mPrimStart.Push(mVertices.Size()); + for (c = 0; c <= mColumns; c++) + { + SkyVertex(r + zflip, c, zflip); + SkyVertex(r + 1 - zflip, c, zflip); + } + } +} + +TriVertex PolySkyDome::SetVertex(float xx, float yy, float zz, float uu, float vv) +{ + TriVertex v; + v.x = xx; + v.y = yy; + v.z = zz; + v.w = 1.0f; + v.varying[0] = uu; + v.varying[1] = vv; + return v; +} + +TriVertex PolySkyDome::SetVertexXYZ(float xx, float yy, float zz, float uu, float vv) +{ + TriVertex v; + v.x = xx; + v.y = zz; + v.z = yy; + v.w = 1.0f; + v.varying[0] = uu; + v.varying[1] = vv; + return v; +} + +void PolySkyDome::SkyVertex(int r, int c, bool zflip) +{ + static const FAngle maxSideAngle = 60.f; + static const float scale = 10000.; + + FAngle topAngle = (c / (float)mColumns * 360.f); + FAngle sideAngle = maxSideAngle * (float)(mRows - r) / (float)mRows; + float height = sideAngle.Sin(); + float realRadius = scale * sideAngle.Cos(); + FVector2 pos = topAngle.ToVector(realRadius); + float z = (!zflip) ? scale * height : -scale * height; + + float u, v; + //uint32_t color = r == 0 ? 0xffffff : 0xffffffff; + + // And the texture coordinates. + if (!zflip) // Flipped Y is for the lower hemisphere. + { + u = (-c / (float)mColumns); + v = (r / (float)mRows); + } + else + { + u = (-c / (float)mColumns); + v = 1.0f + ((mRows - r) / (float)mRows); + } + + if (r != 4) z += 300; + + // And finally the vertex. + TriVertex vert; + vert = SetVertexXYZ(-pos.X, z - 1.f, pos.Y, u * 4.0f, v * 1.2f + 0.5f/*, color*/); + mVertices.Push(vert); +} diff --git a/src/r_poly_sky.h b/src/r_poly_sky.h new file mode 100644 index 0000000000..dd4bd29cae --- /dev/null +++ b/src/r_poly_sky.h @@ -0,0 +1,45 @@ +/* +** Sky dome rendering +** Copyright(C) 2003-2016 Christoph Oelckers +** All rights reserved. +** +** This program is free software: you can redistribute it and/or modify +** it under the terms of the GNU Lesser General Public License as published by +** the Free Software Foundation, either version 3 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU Lesser General Public License for more details. +** +** You should have received a copy of the GNU Lesser General Public License +** along with this program. If not, see http:**www.gnu.org/licenses/ +** +** Loosely based on the JDoom sky and the ZDoomGL 0.66.2 sky. +*/ + +#pragma once + +#include "r_poly_triangle.h" + +class PolySkyDome +{ +public: + PolySkyDome(); + void Render(const TriMatrix &worldToClip); + +private: + TArray mVertices; + TArray mPrimStart; + int mRows, mColumns; + + void SkyVertex(int r, int c, bool yflip); + void CreateSkyHemisphere(bool zflip); + void CreateDome(); + void RenderRow(PolyDrawArgs &args, int row); + void RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap); + + TriVertex SetVertex(float xx, float yy, float zz, float uu = 0, float vv = 0); + TriVertex SetVertexXYZ(float xx, float yy, float zz, float uu = 0, float vv = 0); +}; diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp new file mode 100644 index 0000000000..b3a57ec473 --- /dev/null +++ b/src/r_poly_sprite.cpp @@ -0,0 +1,301 @@ +/* +** Handling drawing a sprite +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_poly_sprite.h" +#include "r_poly.h" + +void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth) +{ + if (IsThingCulled(thing)) + return; + + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); + pos.Z += thing->GetBobOffset(r_TicFracF); + + bool flipTextureX = false; + FTexture *tex = GetSpriteTexture(thing, flipTextureX); + if (tex == nullptr) + return; + DVector2 spriteScale = thing->Scale; + double thingxscalemul = spriteScale.X / tex->Scale.X; + double thingyscalemul = spriteScale.Y / tex->Scale.Y; + + if (flipTextureX) + pos.X -= (tex->GetWidth() - tex->LeftOffset) * thingxscalemul; + else + pos.X -= tex->LeftOffset * thingxscalemul; + + //pos.Z -= tex->TopOffset * thingyscalemul; + pos.Z -= (tex->GetHeight() - tex->TopOffset) * thingyscalemul + thing->Floorclip; + + double spriteHalfWidth = thingxscalemul * tex->GetWidth() * 0.5; + double spriteHeight = thingyscalemul * tex->GetHeight(); + + pos.X += spriteHalfWidth; + + DVector2 points[2] = + { + { pos.X - ViewSin * spriteHalfWidth, pos.Y + ViewCos * spriteHalfWidth }, + { pos.X + ViewSin * spriteHalfWidth, pos.Y - ViewCos * spriteHalfWidth } + }; + + // Is this sprite inside? (To do: clip the points) + for (int i = 0; i < 2; i++) + { + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + double nx = line->v1->fY() - line->v2->fY(); + double ny = line->v2->fX() - line->v1->fX(); + double d = -(line->v1->fX() * nx + line->v1->fY() * ny); + if (pos.X * nx + pos.Y * ny + d > 0.0) + return; + } + } + + //double depth = 1.0; + //visstyle_t visstyle = GetSpriteVisStyle(thing, depth); + // Rumor has it that AlterWeaponSprite needs to be called with visstyle passed in somewhere around here.. + //R_SetColorMapLight(visstyle.BaseColormap, 0, visstyle.ColormapNum << FRACBITS); + + TriVertex *vertices = PolyVertexBuffer::GetVertices(4); + if (!vertices) + return; + + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + + std::pair offsets[4] = + { + { 0.0f, 1.0f }, + { 1.0f, 1.0f }, + { 1.0f, 0.0f }, + { 0.0f, 0.0f }, + }; + + for (int i = 0; i < 4; i++) + { + auto &p = (i == 0 || i == 3) ? points[0] : points[1]; + + vertices[i].x = (float)p.X; + vertices[i].y = (float)p.Y; + vertices[i].z = (float)(pos.Z + spriteHeight * offsets[i].second); + vertices[i].w = 1.0f; + vertices[i].varying[0] = (float)(offsets[i].first * tex->Scale.X); + vertices[i].varying[1] = (float)((1.0f - offsets[i].second) * tex->Scale.Y); + if (flipTextureX) + vertices[i].varying[0] = 1.0f - vertices[i].varying[0]; + } + + TriUniforms uniforms; + uniforms.objectToClip = worldToClip; + uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); + uniforms.flags = 0; + uniforms.subsectorDepth = subsectorDepth; + + PolyDrawArgs args; + args.uniforms = uniforms; + args.vinput = vertices; + args.vcount = 4; + args.mode = TriangleDrawMode::Fan; + args.ccw = true; + args.clipleft = 0; + args.cliptop = 0; + args.clipright = viewwidth; + args.clipbottom = viewheight; + args.stenciltestvalue = 0; + args.stencilwritevalue = 1; + args.SetTexture(tex); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); +} + +bool RenderPolySprite::IsThingCulled(AActor *thing) +{ + FIntCVar *cvar = thing->GetClass()->distancecheck; + if (cvar != nullptr && *cvar >= 0) + { + double dist = (thing->Pos() - ViewPos).LengthSquared(); + double check = (double)**cvar; + if (dist >= check * check) + return true; + } + + // Don't waste time projecting sprites that are definitely not visible. + if (thing == nullptr || + (thing->renderflags & RF_INVISIBLE) || + !thing->RenderStyle.IsVisible(thing->Alpha) || + !thing->IsVisibleToPlayer()) + { + return true; + } + + return false; +} + +visstyle_t RenderPolySprite::GetSpriteVisStyle(AActor *thing, double z) +{ + visstyle_t visstyle; + + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + int spriteshade = LIGHT2SHADE(thing->Sector->lightlevel + actualextralight); + + visstyle.RenderStyle = thing->RenderStyle; + visstyle.Alpha = float(thing->Alpha); + visstyle.ColormapNum = 0; + + // The software renderer cannot invert the source without inverting the overlay + // too. That means if the source is inverted, we need to do the reverse of what + // the invert overlay flag says to do. + bool invertcolormap = (visstyle.RenderStyle.Flags & STYLEF_InvertOverlay) != 0; + + if (visstyle.RenderStyle.Flags & STYLEF_InvertSource) + { + invertcolormap = !invertcolormap; + } + + FDynamicColormap *mybasecolormap = thing->Sector->ColorMap; + + // Sprites that are added to the scene must fade to black. + if (visstyle.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); + } + + if (visstyle.RenderStyle.Flags & STYLEF_FadeToBlack) + { + if (invertcolormap) + { // Fade to white + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); + invertcolormap = false; + } + else + { // Fade to black + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); + } + } + + // get light level + if (fixedcolormap != nullptr) + { // fixed map + visstyle.BaseColormap = fixedcolormap; + visstyle.ColormapNum = 0; + } + else + { + if (invertcolormap) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); + } + if (fixedlightlev >= 0) + { + visstyle.BaseColormap = mybasecolormap; + visstyle.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + } + else if (!foggy && ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) + { // full bright + visstyle.BaseColormap = mybasecolormap; + visstyle.ColormapNum = 0; + } + else + { // diminished light + double minz = double((2048 * 4) / double(1 << 20)); + visstyle.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(z, minz), spriteshade); + visstyle.BaseColormap = mybasecolormap; + } + } + + return visstyle; +} + +FTexture *RenderPolySprite::GetSpriteTexture(AActor *thing, /*out*/ bool &flipX) +{ + flipX = false; + if (thing->picnum.isValid()) + { + FTexture *tex = TexMan(thing->picnum); + if (tex->UseType == FTexture::TEX_Null) + { + return nullptr; + } + + if (tex->Rotations != 0xFFFF) + { + // choose a different rotation based on player view + spriteframe_t *sprframe = &SpriteFrames[tex->Rotations]; + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); + pos.Z += thing->GetBobOffset(r_TicFracF); + DAngle ang = (pos - ViewPos).Angle(); + angle_t rot; + if (sprframe->Texture[0] == sprframe->Texture[1]) + { + rot = (ang - thing->Angles.Yaw + 45.0 / 2 * 9).BAMs() >> 28; + } + else + { + rot = (ang - thing->Angles.Yaw + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + } + flipX = (sprframe->Flip & (1 << rot)) != 0; + tex = TexMan[sprframe->Texture[rot]]; // Do not animate the rotation + } + return tex; + } + else + { + // decide which texture to use for the sprite + int spritenum = thing->sprite; + if (spritenum >= (signed)sprites.Size() || spritenum < 0) + return nullptr; + + spritedef_t *sprdef = &sprites[spritenum]; + if (thing->frame >= sprdef->numframes) + { + // If there are no frames at all for this sprite, don't draw it. + return nullptr; + } + else + { + //picnum = SpriteFrames[sprdef->spriteframes + thing->frame].Texture[0]; + // choose a different rotation based on player view + spriteframe_t *sprframe = &SpriteFrames[sprdef->spriteframes + thing->frame]; + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); + pos.Z += thing->GetBobOffset(r_TicFracF); + DAngle ang = (pos - ViewPos).Angle(); + angle_t rot; + if (sprframe->Texture[0] == sprframe->Texture[1]) + { + rot = (ang - thing->Angles.Yaw + 45.0 / 2 * 9).BAMs() >> 28; + } + else + { + rot = (ang - thing->Angles.Yaw + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + } + flipX = (sprframe->Flip & (1 << rot)) != 0; + return TexMan[sprframe->Texture[rot]]; // Do not animate the rotation + } + } +} diff --git a/src/r_poly_sprite.h b/src/r_poly_sprite.h new file mode 100644 index 0000000000..d60710d049 --- /dev/null +++ b/src/r_poly_sprite.h @@ -0,0 +1,37 @@ +/* +** Handling drawing a sprite +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "r_poly_triangle.h" + +class RenderPolySprite +{ +public: + void Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth); + + static bool IsThingCulled(AActor *thing); + +private: + visstyle_t GetSpriteVisStyle(AActor *thing, double z); + FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); +}; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp new file mode 100644 index 0000000000..815230052a --- /dev/null +++ b/src/r_poly_wall.cpp @@ -0,0 +1,334 @@ +/* +** Handling drawing a wall +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_poly_wall.h" +#include "r_poly.h" +#include "r_sky.h" // for skyflatnum + +bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, std::vector &translucentWallsOutput) +{ + double frontceilz1 = frontsector->ceilingplane.ZatPoint(line->v1); + double frontfloorz1 = frontsector->floorplane.ZatPoint(line->v1); + double frontceilz2 = frontsector->ceilingplane.ZatPoint(line->v2); + double frontfloorz2 = frontsector->floorplane.ZatPoint(line->v2); + + RenderPolyWall wall; + wall.Line = line; + wall.Colormap = frontsector->ColorMap; + wall.Masked = false; + wall.SubsectorDepth = subsectorDepth; + + if (line->backsector == nullptr) + { + if (line->sidedef) + { + wall.SetCoords(line->v1->fPos(), line->v2->fPos(), frontceilz1, frontfloorz1, frontceilz2, frontfloorz2); + wall.TopZ = frontceilz1; + wall.BottomZ = frontfloorz1; + wall.UnpeggedCeil = frontceilz1; + wall.Texpart = side_t::mid; + wall.Render(worldToClip); + return true; + } + } + else + { + sector_t *backsector = (line->backsector != line->frontsector) ? line->backsector : line->frontsector; + + double backceilz1 = backsector->ceilingplane.ZatPoint(line->v1); + double backfloorz1 = backsector->floorplane.ZatPoint(line->v1); + double backceilz2 = backsector->ceilingplane.ZatPoint(line->v2); + double backfloorz2 = backsector->floorplane.ZatPoint(line->v2); + + double topceilz1 = frontceilz1; + double topceilz2 = frontceilz2; + double topfloorz1 = MIN(backceilz1, frontceilz1); + double topfloorz2 = MIN(backceilz2, frontceilz2); + double bottomceilz1 = MAX(frontfloorz1, backfloorz1); + double bottomceilz2 = MAX(frontfloorz2, backfloorz2); + double bottomfloorz1 = frontfloorz1; + double bottomfloorz2 = frontfloorz2; + double middleceilz1 = topfloorz1; + double middleceilz2 = topfloorz2; + double middlefloorz1 = MIN(bottomceilz1, middleceilz1); + double middlefloorz2 = MIN(bottomceilz2, middleceilz2); + + bool bothSkyCeiling = frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; + + if ((topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && line->sidedef && !bothSkyCeiling) + { + wall.SetCoords(line->v1->fPos(), line->v2->fPos(), topceilz1, topfloorz1, topceilz2, topfloorz2); + wall.TopZ = topceilz1; + wall.BottomZ = topfloorz1; + wall.UnpeggedCeil = topceilz1; + wall.Texpart = side_t::top; + wall.Render(worldToClip); + } + + if ((bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef) + { + wall.SetCoords(line->v1->fPos(), line->v2->fPos(), bottomceilz1, bottomfloorz2, bottomceilz2, bottomfloorz2); + wall.TopZ = bottomceilz1; + wall.BottomZ = bottomfloorz2; + wall.UnpeggedCeil = topceilz1; + wall.Texpart = side_t::bottom; + wall.Render(worldToClip); + } + + if (line->sidedef) + { + FTexture *midtex = TexMan(line->sidedef->GetTexture(side_t::mid), true); + if (midtex && midtex->UseType != FTexture::TEX_Null) + { + wall.SetCoords(line->v1->fPos(), line->v2->fPos(), middleceilz1, middlefloorz1, middleceilz2, middlefloorz2); + wall.TopZ = middleceilz1; + wall.BottomZ = middlefloorz1; + wall.UnpeggedCeil = topceilz1; + wall.Texpart = side_t::mid; + wall.Masked = true; + translucentWallsOutput.push_back({ wall }); + } + } + } + return false; +} + +void RenderPolyWall::Render(const TriMatrix &worldToClip) +{ + FTexture *tex = GetTexture(); + if (!tex) + return; + + PolyWallTextureCoords texcoords(tex, Line, Texpart, TopZ, BottomZ, UnpeggedCeil); + + TriVertex *vertices = PolyVertexBuffer::GetVertices(4); + if (!vertices) + return; + + vertices[0].x = (float)v1.X; + vertices[0].y = (float)v1.Y; + vertices[0].z = (float)ceil1; + vertices[0].w = 1.0f; + vertices[0].varying[0] = (float)texcoords.u1; + vertices[0].varying[1] = (float)texcoords.v1; + + vertices[1].x = (float)v2.X; + vertices[1].y = (float)v2.Y; + vertices[1].z = (float)ceil2; + vertices[1].w = 1.0f; + vertices[1].varying[0] = (float)texcoords.u2; + vertices[1].varying[1] = (float)texcoords.v1; + + vertices[2].x = (float)v2.X; + vertices[2].y = (float)v2.Y; + vertices[2].z = (float)floor2; + vertices[2].w = 1.0f; + vertices[2].varying[0] = (float)texcoords.u2; + vertices[2].varying[1] = (float)texcoords.v2; + + vertices[3].x = (float)v1.X; + vertices[3].y = (float)v1.Y; + vertices[3].z = (float)floor1; + vertices[3].w = 1.0f; + vertices[3].varying[0] = (float)texcoords.u1; + vertices[3].varying[1] = (float)texcoords.v2; + + TriUniforms uniforms; + uniforms.objectToClip = worldToClip; + uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); + uniforms.flags = 0; + uniforms.subsectorDepth = SubsectorDepth; + + PolyDrawArgs args; + args.uniforms = uniforms; + args.vinput = vertices; + args.vcount = 4; + args.mode = TriangleDrawMode::Fan; + args.ccw = true; + args.clipleft = 0; + args.cliptop = 0; + args.clipright = viewwidth; + args.clipbottom = viewheight; + args.stenciltestvalue = 0; + args.stencilwritevalue = 1; + args.SetTexture(tex); + + if (!Masked) + { + PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); + } + else + { + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); + } +} + +FTexture *RenderPolyWall::GetTexture() +{ + FTexture *tex = TexMan(Line->sidedef->GetTexture(Texpart), true); + if (tex == nullptr || tex->UseType == FTexture::TEX_Null) + return nullptr; + else + return tex; +} + +int RenderPolyWall::GetLightLevel() +{ + if (fixedlightlev >= 0 || fixedcolormap) + { + return 255; + } + else + { + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + return Line->sidedef->GetLightLevel(foggy, Line->frontsector->lightlevel) + actualextralight; + } +} + +///////////////////////////////////////////////////////////////////////////// + +PolyWallTextureCoords::PolyWallTextureCoords(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) +{ + CalcU(tex, line, texpart); + CalcV(tex, line, texpart, topz, bottomz, unpeggedceil); +} + +void PolyWallTextureCoords::CalcU(FTexture *tex, const seg_t *line, side_t::ETexpart texpart) +{ + double lineLength = line->sidedef->TexelLength; + double lineStart = 0.0; + + bool entireSegment = ((line->linedef->v1 == line->v1) && (line->linedef->v2 == line->v2) || (line->linedef->v2 == line->v1) && (line->linedef->v1 == line->v2)); + if (!entireSegment) + { + lineLength = (line->v2->fPos() - line->v1->fPos()).Length(); + lineStart = (line->v1->fPos() - line->linedef->v1->fPos()).Length(); + } + + int texWidth = tex->GetWidth(); + double uscale = line->sidedef->GetTextureXScale(texpart) * tex->Scale.X; + u1 = lineStart + line->sidedef->GetTextureXOffset(texpart); + u2 = u1 + lineLength; + u1 *= uscale; + u2 *= uscale; + u1 /= texWidth; + u2 /= texWidth; +} + +void PolyWallTextureCoords::CalcV(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) +{ + double vscale = line->sidedef->GetTextureYScale(texpart) * tex->Scale.Y; + + double yoffset = line->sidedef->GetTextureYOffset(texpart); + if (tex->bWorldPanning) + yoffset *= vscale; + + switch (texpart) + { + default: + case side_t::mid: + CalcVMidPart(tex, line, topz, bottomz, vscale, yoffset); + break; + case side_t::top: + CalcVTopPart(tex, line, topz, bottomz, vscale, yoffset); + break; + case side_t::bottom: + CalcVBottomPart(tex, line, topz, bottomz, unpeggedceil, vscale, yoffset); + break; + } + + int texHeight = tex->GetHeight(); + v1 /= texHeight; + v2 /= texHeight; +} + +void PolyWallTextureCoords::CalcVTopPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset) +{ + bool pegged = (line->linedef->flags & ML_DONTPEGTOP) == 0; + if (pegged) // bottom to top + { + int texHeight = tex->GetHeight(); + v1 = -yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + v1 = texHeight - v1; + v2 = texHeight - v2; + std::swap(v1, v2); + } + else // top to bottom + { + v1 = yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + } +} + +void PolyWallTextureCoords::CalcVMidPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset) +{ + bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; + if (pegged) // top to bottom + { + v1 = yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + } + else // bottom to top + { + int texHeight = tex->GetHeight(); + v1 = yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + v1 = texHeight - v1; + v2 = texHeight - v2; + std::swap(v1, v2); + } +} + +void PolyWallTextureCoords::CalcVBottomPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset) +{ + bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; + if (pegged) // top to bottom + { + v1 = yoffset; + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + } + else + { + v1 = yoffset + (unpeggedceil - topz); + v2 = v1 + (topz - bottomz); + v1 *= vscale; + v2 *= vscale; + } +} diff --git a/src/r_poly_wall.h b/src/r_poly_wall.h new file mode 100644 index 0000000000..c215bb611e --- /dev/null +++ b/src/r_poly_wall.h @@ -0,0 +1,82 @@ +/* +** Handling drawing a wall +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "r_poly_triangle.h" + +class PolyTranslucentObject; + +class RenderPolyWall +{ +public: + static bool RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, std::vector &translucentWallsOutput); + + void Render(const TriMatrix &worldToClip); + + void SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2) + { + this->v1 = v1; + this->v2 = v2; + this->ceil1 = ceil1; + this->floor1 = floor1; + this->ceil2 = ceil2; + this->floor2 = floor2; + } + + DVector2 v1; + DVector2 v2; + double ceil1 = 0.0; + double floor1 = 0.0; + double ceil2 = 0.0; + double floor2 = 0.0; + + const seg_t *Line = nullptr; + side_t::ETexpart Texpart = side_t::mid; + double TopZ = 0.0; + double BottomZ = 0.0; + double UnpeggedCeil = 0.0; + FSWColormap *Colormap = nullptr; + bool Masked = false; + uint32_t SubsectorDepth = 0; + +private: + FTexture *GetTexture(); + int GetLightLevel(); +}; + +// Texture coordinates for a wall +class PolyWallTextureCoords +{ +public: + PolyWallTextureCoords(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); + + double u1, u2; + double v1, v2; + +private: + void CalcU(FTexture *tex, const seg_t *line, side_t::ETexpart texpart); + void CalcV(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); + void CalcVTopPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset); + void CalcVMidPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset); + void CalcVBottomPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset); +}; diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp new file mode 100644 index 0000000000..86ddd52a9f --- /dev/null +++ b/src/r_poly_wallsprite.cpp @@ -0,0 +1,35 @@ +/* +** Handling drawing a sprite +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_poly_wallsprite.h" +#include "r_poly.h" + +void RenderPolyWallSprite::Render(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) +{ + if (RenderPolySprite::IsThingCulled(thing)) + return; +} diff --git a/src/r_poly_wallsprite.h b/src/r_poly_wallsprite.h new file mode 100644 index 0000000000..ce6917e58c --- /dev/null +++ b/src/r_poly_wallsprite.h @@ -0,0 +1,31 @@ +/* +** Handling drawing a wall sprite +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "r_triangle.h" + +class RenderPolyWallSprite +{ +public: + void Render(AActor *thing, subsector_t *sub, uint32_t subsectorDepth); +}; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 97a2076528..3ddfcb5f28 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -44,8 +44,11 @@ #include "r_data/voxels.h" #include "r_draw_rgba.h" #include "r_compiler/llvmdrawers.h" +#include "r_poly.h" EXTERN_CVAR(Bool, r_shadercolormaps) +EXTERN_CVAR(Bool, r_newrenderer) // [SP] dpJudas's new renderer +EXTERN_CVAR(Float, maxviewpitch) // [SP] CVAR from GZDoom void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); void R_SetupColormap(player_t *); @@ -239,7 +242,14 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FileWriter *file, int wi void FSoftwareRenderer::DrawRemainingPlayerSprites() { - R_DrawRemainingPlayerSprites(); + if (!r_newrenderer) + { + R_DrawRemainingPlayerSprites(); + } + else + { + RenderPolyScene::Instance()->RenderRemainingPlayerSprites(); + } } //=========================================================================== @@ -250,9 +260,6 @@ void FSoftwareRenderer::DrawRemainingPlayerSprites() #define MAX_DN_ANGLE 56 // Max looking down angle #define MAX_UP_ANGLE 32 // Max looking up angle -EXTERN_CVAR(Bool, r_newrenderer) // [SP] dpJudas's new renderer -EXTERN_CVAR(Float, maxviewpitch) // [SP] CVAR from GZDoom - int FSoftwareRenderer::GetMaxViewPitch(bool down) { return (r_newrenderer) ? int(maxviewpitch) : (down ? MAX_DN_ANGLE : MAX_UP_ANGLE); From e5f8af2216fa65b8dfb294aeaed56dc3098b792c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 14 Nov 2016 23:22:38 +0100 Subject: [PATCH 317/912] Fix sky flood fill for E1M8 and Map11 --- src/r_poly_plane.cpp | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index ac9622110b..9ca82cf28d 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -103,7 +103,8 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin seg_t *line = &sub->firstline[i]; - bool closedSky = false; + double skyBottomz1 = frontsector->ceilingplane.ZatPoint(line->v1); + double skyBottomz2 = frontsector->ceilingplane.ZatPoint(line->v2); if (line->backsector) { sector_t *backsector = (line->backsector != line->frontsector) ? line->backsector : line->frontsector; @@ -132,14 +133,17 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin double middlefloorz2 = MIN(bottomceilz2, middleceilz2); bool bothSkyCeiling = frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; - bool bothSkyFloor = frontsector->GetTexture(sector_t::floor) == skyflatnum && backsector->GetTexture(sector_t::floor) == skyflatnum; bool closedSector = backceilz1 == backfloorz1 && backceilz2 == backfloorz2; - closedSky = (ceiling && bothSkyCeiling && closedSector) || (!ceiling && bothSkyFloor && closedSector); - if (!closedSky) + if (ceiling && bothSkyCeiling && closedSector) + { + skyBottomz1 = middlefloorz1; + skyBottomz2 = middlefloorz2; + } + else { bool topwall = (topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && line->sidedef && !bothSkyCeiling; - bool bottomwall = (bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef && !bothSkyFloor; + bool bottomwall = (bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef; if ((ceiling && !topwall) || (!ceiling && !bottomwall)) continue; } @@ -149,29 +153,13 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin { wallvert[0] = PlaneVertex(line->v1, frontsector, skyHeight); wallvert[1] = PlaneVertex(line->v2, frontsector, skyHeight); - if (!closedSky) - { - wallvert[2] = PlaneVertex(line->v2, frontsector, frontsector->ceilingplane.ZatPoint(line->v2)); - wallvert[3] = PlaneVertex(line->v1, frontsector, frontsector->ceilingplane.ZatPoint(line->v1)); - } - else - { - wallvert[2] = PlaneVertex(line->v2, frontsector, frontsector->floorplane.ZatPoint(line->v2)); - wallvert[3] = PlaneVertex(line->v1, frontsector, frontsector->floorplane.ZatPoint(line->v1)); - } + wallvert[2] = PlaneVertex(line->v2, frontsector, skyBottomz2); + wallvert[3] = PlaneVertex(line->v1, frontsector, skyBottomz1); } else { - if (!closedSky) - { - wallvert[0] = PlaneVertex(line->v1, frontsector, frontsector->floorplane.ZatPoint(line->v1)); - wallvert[1] = PlaneVertex(line->v2, frontsector, frontsector->floorplane.ZatPoint(line->v2)); - } - else - { - wallvert[0] = PlaneVertex(line->v1, frontsector, frontsector->ceilingplane.ZatPoint(line->v1)); - wallvert[1] = PlaneVertex(line->v2, frontsector, frontsector->ceilingplane.ZatPoint(line->v2)); - } + wallvert[0] = PlaneVertex(line->v1, frontsector, frontsector->floorplane.ZatPoint(line->v1)); + wallvert[1] = PlaneVertex(line->v2, frontsector, frontsector->floorplane.ZatPoint(line->v2)); wallvert[2] = PlaneVertex(line->v2, frontsector, skyHeight); wallvert[3] = PlaneVertex(line->v1, frontsector, skyHeight); } From f143b9b7b05168f15ec4c58faec8c6df936bb272 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 15 Nov 2016 12:31:57 +0100 Subject: [PATCH 318/912] Fix midtexture repeat and yoffset bug --- src/r_poly_wall.cpp | 54 +++++++++++++++++++++++++++++++++++---------- src/r_poly_wall.h | 12 ++-------- 2 files changed, 44 insertions(+), 22 deletions(-) diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 815230052a..77742a8f82 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -117,6 +117,16 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, secto return false; } +void RenderPolyWall::SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2) +{ + this->v1 = v1; + this->v2 = v2; + this->ceil1 = ceil1; + this->floor1 = floor1; + this->ceil2 = ceil2; + this->floor2 = floor2; +} + void RenderPolyWall::Render(const TriMatrix &worldToClip) { FTexture *tex = GetTexture(); @@ -157,6 +167,13 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) vertices[3].varying[0] = (float)texcoords.u1; vertices[3].varying[1] = (float)texcoords.v2; + // Masked walls clamp to the 0-1 range (no texture repeat) + if (Masked) + { + ClampHeight(vertices[0], vertices[3]); + ClampHeight(vertices[1], vertices[2]); + } + TriUniforms uniforms; uniforms.objectToClip = worldToClip; uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); @@ -188,6 +205,26 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) } } +void RenderPolyWall::ClampHeight(TriVertex &v1, TriVertex &v2) +{ + float top = v1.z; + float bottom = v2.z; + float texv1 = v1.varying[1]; + float texv2 = v2.varying[1]; + float delta = (texv2 - texv1); + + float t1 = texv1 < 0.0f ? -texv1 / delta : 0.0f; + float t2 = texv2 > 1.0f ? (1.0f - texv1) / delta : 1.0f; + float inv_t1 = 1.0f - t1; + float inv_t2 = 1.0f - t2; + + v1.z = top * inv_t1 + bottom * t1; + v1.varying[1] = texv1 * inv_t1 + texv2 * t1; + + v2.z = top * inv_t2 + bottom * t2; + v2.varying[1] = texv1 * inv_t2 + texv2 * t2; +} + FTexture *RenderPolyWall::GetTexture() { FTexture *tex = TexMan(Line->sidedef->GetTexture(Texpart), true); @@ -296,21 +333,14 @@ void PolyWallTextureCoords::CalcVMidPart(FTexture *tex, const seg_t *line, doubl bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; if (pegged) // top to bottom { - v1 = yoffset; - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; + v1 = yoffset * vscale; + v2 = (yoffset + (topz - bottomz)) * vscale; } else // bottom to top { int texHeight = tex->GetHeight(); - v1 = yoffset; - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - v1 = texHeight - v1; - v2 = texHeight - v2; - std::swap(v1, v2); + v1 = texHeight - (-yoffset + (topz - bottomz)) * vscale; + v2 = texHeight + yoffset * vscale; } } @@ -326,7 +356,7 @@ void PolyWallTextureCoords::CalcVBottomPart(FTexture *tex, const seg_t *line, do } else { - v1 = yoffset + (unpeggedceil - topz); + v1 = -yoffset + (unpeggedceil - topz); v2 = v1 + (topz - bottomz); v1 *= vscale; v2 *= vscale; diff --git a/src/r_poly_wall.h b/src/r_poly_wall.h index c215bb611e..2de1f1c09b 100644 --- a/src/r_poly_wall.h +++ b/src/r_poly_wall.h @@ -31,18 +31,9 @@ class RenderPolyWall public: static bool RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, std::vector &translucentWallsOutput); + void SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2); void Render(const TriMatrix &worldToClip); - void SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2) - { - this->v1 = v1; - this->v2 = v2; - this->ceil1 = ceil1; - this->floor1 = floor1; - this->ceil2 = ceil2; - this->floor2 = floor2; - } - DVector2 v1; DVector2 v2; double ceil1 = 0.0; @@ -60,6 +51,7 @@ public: uint32_t SubsectorDepth = 0; private: + void ClampHeight(TriVertex &v1, TriVertex &v2); FTexture *GetTexture(); int GetLightLevel(); }; From 5f1b82252302958922f8deb60e65f5e4e592bd92 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 15 Nov 2016 13:30:30 +0100 Subject: [PATCH 319/912] Mark rendered lines in the automap --- src/r_poly.cpp | 11 +++++++++-- src/r_poly.h | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 42cc84e5fe..c95083c1db 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -102,7 +102,7 @@ void RenderPolyScene::RenderSubsector(subsector_t *sub) { seg_t *line = &sub->firstline[i]; if (line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) - RenderLine(line, frontsector, subsectorDepth); + RenderLine(sub, line, frontsector, subsectorDepth); } SpriteRange sprites = GetSpritesForSector(sub->sector); @@ -136,7 +136,7 @@ SpriteRange RenderPolyScene::GetSpritesForSector(sector_t *sector) return range; } -void RenderPolyScene::RenderLine(seg_t *line, sector_t *frontsector, uint32_t subsectorDepth) +void RenderPolyScene::RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth) { // Reject lines not facing viewer DVector2 pt1 = line->v1->fPos() - ViewPos; @@ -150,6 +150,13 @@ void RenderPolyScene::RenderLine(seg_t *line, sector_t *frontsector, uint32_t su if (hasSegmentRange && Cull.IsSegmentCulled(sx1, sx2)) return; + // Tell automap we saw this + if (!r_dontmaplines && line->linedef) + { + line->linedef->flags |= ML_MAPPED; + sub->flags |= SSECF_DRAWN; + } + // Render wall, and update culling info if its an occlusion blocker if (RenderPolyWall::RenderLine(WorldToClip, line, frontsector, subsectorDepth, SubsectorTranslucentWalls)) { diff --git a/src/r_poly.h b/src/r_poly.h index b182a582dd..35105e4f96 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -85,7 +85,7 @@ public: private: void RenderSubsector(subsector_t *sub); - void RenderLine(seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); + void RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); void RenderTranslucent(); SpriteRange GetSpritesForSector(sector_t *sector); From 3b397183ac20286156b46bc3e1a476e1c6a7339d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 15 Nov 2016 15:15:01 +0100 Subject: [PATCH 320/912] Clamp segment range --- src/r_poly_cull.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/r_poly_cull.cpp b/src/r_poly_cull.cpp index 3df07abf96..2b97d71bba 100644 --- a/src/r_poly_cull.cpp +++ b/src/r_poly_cull.cpp @@ -102,6 +102,9 @@ void PolyCull::ClearSolidSegments() bool PolyCull::IsSegmentCulled(int x1, int x2) const { + x1 = clamp(x1, -0x7ffe, 0x7ffd); + x2 = clamp(x2, -0x7ffd, 0x7ffe); + int next = 0; while (SolidSegments[next].X2 <= x2) next++; @@ -113,6 +116,9 @@ void PolyCull::MarkSegmentCulled(int x1, int x2) if (x1 >= x2) return; + x1 = clamp(x1, -0x7ffe, 0x7ffd); + x2 = clamp(x2, -0x7ffd, 0x7ffe); + int cur = 1; while (true) { From ee15856786cacbbced069476bb1f4c7306ee2dd1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 15 Nov 2016 15:15:26 +0100 Subject: [PATCH 321/912] Deep water support --- src/r_poly.cpp | 2 +- src/r_poly_plane.cpp | 45 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index c95083c1db..17006b2d0b 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -147,7 +147,7 @@ void RenderPolyScene::RenderLine(subsector_t *sub, seg_t *line, sector_t *fronts // Cull wall if not visible int sx1, sx2; bool hasSegmentRange = Cull.GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2); - if (hasSegmentRange && Cull.IsSegmentCulled(sx1, sx2)) + if (!hasSegmentRange || Cull.IsSegmentCulled(sx1, sx2)) return; // Tell automap we saw this diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 9ca82cf28d..e6c6dbe286 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -31,9 +31,48 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, double skyHeight) { - sector_t *frontsector = sub->sector; + sector_t *fakesector = sub->sector->heightsec; + if (fakesector && (fakesector == sub->sector || (fakesector->MoreFlags & SECF_IGNOREHEIGHTSEC) == SECF_IGNOREHEIGHTSEC)) + fakesector = nullptr; + + bool fakeflooronly = fakesector && (fakesector->MoreFlags & SECF_FAKEFLOORONLY) != SECF_FAKEFLOORONLY; + + FTextureID picnum; + bool ccw; + sector_t *frontsector; + if (ceiling && fakesector && ViewPos.Z < fakesector->floorplane.Zat0()) + { + picnum = fakesector->GetTexture(sector_t::ceiling); + ccw = false; + ceiling = false; + frontsector = fakesector; + } + else if (!ceiling && fakesector && ViewPos.Z >= fakesector->floorplane.Zat0()) + { + picnum = fakesector->GetTexture(sector_t::ceiling); + ccw = true; + frontsector = fakesector; + } + else if (ceiling && fakesector && ViewPos.Z > fakesector->ceilingplane.Zat0() && !fakeflooronly) + { + picnum = fakesector->GetTexture(sector_t::floor); + ccw = true; + frontsector = fakesector; + } + else if (!ceiling && fakesector && ViewPos.Z <= fakesector->ceilingplane.Zat0() && !fakeflooronly) + { + picnum = fakesector->GetTexture(sector_t::floor); + ccw = false; + ceiling = true; + frontsector = fakesector; + } + else + { + picnum = sub->sector->GetTexture(ceiling ? sector_t::ceiling : sector_t::floor); + ccw = true; + frontsector = sub->sector; + } - FTextureID picnum = frontsector->GetTexture(ceiling ? sector_t::ceiling : sector_t::floor); FTexture *tex = TexMan(picnum); if (tex->UseType == FTexture::TEX_Null) return; @@ -76,7 +115,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin args.vinput = vertices; args.vcount = sub->numlines; args.mode = TriangleDrawMode::Fan; - args.ccw = true; + args.ccw = ccw; args.clipleft = 0; args.cliptop = 0; args.clipright = viewwidth; From cfc977f9e6557952915bee88eec2b8c573fbf406 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 15 Nov 2016 18:14:18 +0100 Subject: [PATCH 322/912] Fix texture coordinate --- src/r_poly_wall.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 77742a8f82..aeb6985da0 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -356,7 +356,7 @@ void PolyWallTextureCoords::CalcVBottomPart(FTexture *tex, const seg_t *line, do } else { - v1 = -yoffset + (unpeggedceil - topz); + v1 = yoffset + (unpeggedceil - topz); v2 = v1 + (topz - bottomz); v1 *= vscale; v2 *= vscale; From a90a22e056afad1ce34bcbcee433d104cac8162a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 15 Nov 2016 19:27:21 +0100 Subject: [PATCH 323/912] Fix segment culling bug --- src/r_poly_cull.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_poly_cull.cpp b/src/r_poly_cull.cpp index 2b97d71bba..89c59aa792 100644 --- a/src/r_poly_cull.cpp +++ b/src/r_poly_cull.cpp @@ -119,18 +119,18 @@ void PolyCull::MarkSegmentCulled(int x1, int x2) x1 = clamp(x1, -0x7ffe, 0x7ffd); x2 = clamp(x2, -0x7ffd, 0x7ffe); - int cur = 1; + int cur = 0; while (true) { if (SolidSegments[cur].X1 <= x1 && SolidSegments[cur].X2 >= x2) // Already fully marked { break; } - else if (cur + 1 != SolidSegments.size() && SolidSegments[cur].X2 >= x1 && SolidSegments[cur].X1 <= x2) // Merge segments + else if (SolidSegments[cur].X2 >= x1 && SolidSegments[cur].X1 <= x2) // Merge segments { // Find last segment int merge = cur; - while (merge + 2 != SolidSegments.size() && SolidSegments[merge + 1].X1 <= x2) + while (merge + 1 != SolidSegments.size() && SolidSegments[merge + 1].X1 <= x2) merge++; // Apply new merged range From 1b3d50951a471f2ce413f750d92811dbf7b9c4e2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 15 Nov 2016 22:48:44 +0100 Subject: [PATCH 324/912] Fix sky floodfill bug --- src/r_poly_plane.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index e6c6dbe286..3052ebfbc3 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -164,8 +164,6 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin double topfloorz2 = MIN(backceilz2, frontceilz2); double bottomceilz1 = MAX(frontfloorz1, backfloorz1); double bottomceilz2 = MAX(frontfloorz2, backfloorz2); - double bottomfloorz1 = frontfloorz1; - double bottomfloorz2 = frontfloorz2; double middleceilz1 = topfloorz1; double middleceilz2 = topfloorz2; double middlefloorz1 = MIN(bottomceilz1, middleceilz1); @@ -179,12 +177,9 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin skyBottomz1 = middlefloorz1; skyBottomz2 = middlefloorz2; } - else + else if (bothSkyCeiling) { - bool topwall = (topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && line->sidedef && !bothSkyCeiling; - bool bottomwall = (bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef; - if ((ceiling && !topwall) || (!ceiling && !bottomwall)) - continue; + continue; } } From a092a03c6dce483c67900310effdd622b8f19e80 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 15 Nov 2016 23:30:42 +0100 Subject: [PATCH 325/912] Some basic fixed light support --- .../fixedfunction/drawtrianglecodegen.cpp | 6 ++++-- src/r_compiler/fixedfunction/drawtrianglecodegen.h | 3 ++- src/r_compiler/llvmdrawers.h | 2 +- src/r_poly_plane.cpp | 4 +--- src/r_poly_sprite.cpp | 14 ++++++++++++-- src/r_poly_wall.cpp | 2 +- 6 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 77076b0721..11bdad7e43 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -254,7 +254,8 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) SSAFloat vis = globVis / rcpWTL; SSAFloat shade = 64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f; SSAFloat lightscale = SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)); - diminishedlight = SSAInt(SSAFloat::clamp((1.0f - lightscale) * 256.0f + 0.5f, SSAFloat(0.0f), SSAFloat(256.0f)), false); + SSAInt diminishedlight = SSAInt(SSAFloat::clamp((1.0f - lightscale) * 256.0f + 0.5f, SSAFloat(0.0f), SSAFloat(256.0f)), false); + currentlight = is_fixed_light.select(light, diminishedlight); SetStencilBlock(x / 8 + y / 8 * stencilPitch); @@ -473,7 +474,7 @@ void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbu { SSAVec4i fg = texturePixels[uvoffset * 4].load_vec4ub(true); SSAInt fg_alpha = fg[3]; - fg = (fg * diminishedlight) >> 8; + fg = (fg * currentlight) >> 8; fg.insert(3, fg_alpha); if (variant == TriDrawVariant::DrawMasked || variant == TriDrawVariant::DrawSubsector) @@ -625,4 +626,5 @@ void DrawTriangleCodegen::LoadUniforms(SSAValue uniforms) is_simple_shade = (flags & TriUniforms::simple_shade) == SSAInt(TriUniforms::simple_shade); is_nearest_filter = (flags & TriUniforms::nearest_filter) == SSAInt(TriUniforms::nearest_filter); + is_fixed_light = (flags & TriUniforms::fixed_light) == SSAInt(TriUniforms::fixed_light); } diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.h b/src/r_compiler/fixedfunction/drawtrianglecodegen.h index c9fb61e67c..8452b250fb 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.h +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.h @@ -89,6 +89,7 @@ private: SSAShadeConstants shade_constants; SSABool is_simple_shade; SSABool is_nearest_filter; + SSABool is_fixed_light; SSAUBytePtr stencilValues; SSAIntPtr stencilMasks; @@ -115,7 +116,7 @@ private: SSAInt x, y; SSAInt x0, x1, y0, y1; - SSAInt diminishedlight; + SSAInt currentlight; SSAInt varyingPos[TriVertex::NumVarying]; SSAInt varyingStepPos[TriVertex::NumVarying]; SSAInt varyingStartStepX[TriVertex::NumVarying]; diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index b68c23fe10..38f0f82d14 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -230,7 +230,7 @@ struct TriUniforms { simple_shade = 1, nearest_filter = 2, - diminishing_lighting = 4 + fixed_light = 4 }; TriMatrix objectToClip; diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 3052ebfbc3..4ca6f49909 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -82,9 +82,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin TriUniforms uniforms; uniforms.objectToClip = worldToClip; uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); - if (fixedlightlev >= 0) - uniforms.light = (uint32_t)(fixedlightlev / 255.0f * 256.0f); - else if (fixedcolormap) + if (fixedlightlev >= 0 || fixedcolormap) uniforms.light = 256; uniforms.flags = 0; uniforms.subsectorDepth = isSky ? RenderPolyScene::SkySubsectorDepth : subsectorDepth; diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index b3a57ec473..1701b24ef6 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -111,10 +111,20 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse vertices[i].varying[0] = 1.0f - vertices[i].varying[0]; } + bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); + TriUniforms uniforms; uniforms.objectToClip = worldToClip; - uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); - uniforms.flags = 0; + if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) + { + uniforms.light = 256; + uniforms.flags = TriUniforms::fixed_light; + } + else + { + uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); + uniforms.flags = 0; + } uniforms.subsectorDepth = subsectorDepth; PolyDrawArgs args; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index aeb6985da0..cd932214db 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -244,7 +244,7 @@ int RenderPolyWall::GetLightLevel() { bool foggy = false; int actualextralight = foggy ? 0 : extralight << 4; - return Line->sidedef->GetLightLevel(foggy, Line->frontsector->lightlevel) + actualextralight; + return clamp(Line->sidedef->GetLightLevel(foggy, Line->frontsector->lightlevel) + actualextralight, 0, 255); } } From 449d4963ae23c20f3dc31fa1b6543a28e09e4946 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 16 Nov 2016 01:13:29 +0100 Subject: [PATCH 326/912] Mapping error workaround --- src/r_poly_wall.cpp | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index cd932214db..6921b1a33b 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -229,9 +229,29 @@ FTexture *RenderPolyWall::GetTexture() { FTexture *tex = TexMan(Line->sidedef->GetTexture(Texpart), true); if (tex == nullptr || tex->UseType == FTexture::TEX_Null) - return nullptr; - else - return tex; + { + // Mapping error. Doom floodfills this with a plane. + // This code doesn't do that, but at least it uses the "right" texture.. + + if (Line->linedef && Line->backsector && Line->linedef->sidedef[0] == Line->sidedef) + { + if (Texpart == side_t::top) + tex = TexMan(Line->linedef->backsector->GetTexture(sector_t::ceiling), true); + else if (Texpart == side_t::bottom) + tex = TexMan(Line->linedef->backsector->GetTexture(sector_t::floor), true); + } + if (Line->linedef && Line->backsector && Line->linedef->sidedef[1] == Line->sidedef) + { + if (Texpart == side_t::top) + tex = TexMan(Line->linedef->frontsector->GetTexture(sector_t::ceiling), true); + else if (Texpart == side_t::bottom) + tex = TexMan(Line->linedef->frontsector->GetTexture(sector_t::floor), true); + } + + if (tex == nullptr || tex->UseType == FTexture::TEX_Null) + return nullptr; + } + return tex; } int RenderPolyWall::GetLightLevel() From 9b9ed64360a22273d49faa1d8decaf598a4b469a Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Tue, 15 Nov 2016 19:37:30 -0500 Subject: [PATCH 327/912] - Implemented sv_overridegetcvar: This will override the return value for GetCVar checks for certain CVars marked with the CVAR_OVERRIDEGET flag. Instead of returning their true value, they only return defaults instead. - Implemented dummy CVar vid_renderer with a default value of 1. This allows mods not designed for the software renderer to run if sv_overridegetcvar is turned on. --- src/c_cvars.cpp | 7 +++++++ src/c_cvars.h | 1 + src/p_acs.cpp | 20 ++++++++++++++++++++ src/p_actionfunctions.cpp | 6 ++++++ 4 files changed, 34 insertions(+) diff --git a/src/c_cvars.cpp b/src/c_cvars.cpp index c8f1ee4c62..1af07e1761 100644 --- a/src/c_cvars.cpp +++ b/src/c_cvars.cpp @@ -52,6 +52,13 @@ #include "v_video.h" #include "colormatcher.h" +// [SP] This is a dummy CVAR that needs to be removed in child ports that implement the real version. +CVAR(Int, vid_renderer, 1, CVAR_GLOBALCONFIG | CVAR_OVERRIDEGET | CVAR_ARCHIVE) + +// [SP] Lets the player (arbitrator) choose whether to override GetCVar checks. +// Danger of desync? Can we just make it a client var? This probably *fixes* desyncs, actually... +CVAR(Bool, sv_overridegetcvar, true, CVAR_SERVERINFO | CVAR_GLOBALCONFIG | CVAR_ARCHIVE) + struct FLatchedValue { FBaseCVar *Variable; diff --git a/src/c_cvars.h b/src/c_cvars.h index be7676e895..d74d3b5c9a 100644 --- a/src/c_cvars.h +++ b/src/c_cvars.h @@ -63,6 +63,7 @@ enum CVAR_NOSAVE = 4096, // when used with CVAR_SERVERINFO, do not save var to savegame CVAR_MOD = 8192, // cvar was defined by a mod CVAR_IGNORE = 16384,// do not send cvar across the network/inaccesible from ACS (dummy mod cvar) + CVAR_OVERRIDEGET = 32768,// this cvar disguises its return value for GetCVAR }; union UCVarValue diff --git a/src/p_acs.cpp b/src/p_acs.cpp index 1a60a0c4fb..1d309307d8 100644 --- a/src/p_acs.cpp +++ b/src/p_acs.cpp @@ -4549,6 +4549,8 @@ static void DoSetCVar(FBaseCVar *cvar, int value, bool is_string, bool force=fal } } +EXTERN_CVAR(Bool, sv_overridegetcvar) + // Converts floating- to fixed-point as required. static int DoGetCVar(FBaseCVar *cvar, bool is_string) { @@ -4558,6 +4560,24 @@ static int DoGetCVar(FBaseCVar *cvar, bool is_string) { return 0; } + else if (sv_overridegetcvar && (cvar->GetFlags() & CVAR_OVERRIDEGET)) + { + if (is_string) + { + val = cvar->GetGenericRepDefault(CVAR_String); + return GlobalACSStrings.AddString(val.String); + } + else if (cvar->GetRealType() == CVAR_Float) + { + val = cvar->GetGenericRepDefault(CVAR_Float); + return DoubleToACS(val.Float); + } + else + { + val = cvar->GetGenericRepDefault(CVAR_Int); + return val.Int; + } + } else if (is_string) { val = cvar->GetGenericRep(CVAR_String); diff --git a/src/p_actionfunctions.cpp b/src/p_actionfunctions.cpp index 73cade49a4..df58524739 100644 --- a/src/p_actionfunctions.cpp +++ b/src/p_actionfunctions.cpp @@ -621,6 +621,8 @@ DEFINE_ACTION_FUNCTION_PARAMS(AActor, GetCrouchFactor) // //========================================================================== +EXTERN_CVAR(Bool, sv_overridegetcvar) + DEFINE_ACTION_FUNCTION_PARAMS(AActor, GetCVar) { if (numret > 0) @@ -634,6 +636,10 @@ DEFINE_ACTION_FUNCTION_PARAMS(AActor, GetCVar) { ret->SetFloat(0); } + else if (sv_overridegetcvar && (cvar->GetFlags() & CVAR_OVERRIDEGET)) + { + ret->SetFloat(cvar->GetGenericRepDefault(CVAR_Float).Float); + } else { ret->SetFloat(cvar->GetGenericRep(CVAR_Float).Float); From 813030efef234305b53b4032fa6d6dd8326dd7a1 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Tue, 15 Nov 2016 19:52:40 -0500 Subject: [PATCH 328/912] - Fully implemented CVAR_OVERRIDEGET for vid_renderer from the ZDoom code submission. --- src/c_cvars.cpp | 3 --- src/posix/cocoa/i_video.mm | 2 +- src/posix/sdl/hardware.cpp | 2 +- src/win32/hardware.cpp | 2 +- 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/c_cvars.cpp b/src/c_cvars.cpp index 1af07e1761..18648db285 100644 --- a/src/c_cvars.cpp +++ b/src/c_cvars.cpp @@ -52,9 +52,6 @@ #include "v_video.h" #include "colormatcher.h" -// [SP] This is a dummy CVAR that needs to be removed in child ports that implement the real version. -CVAR(Int, vid_renderer, 1, CVAR_GLOBALCONFIG | CVAR_OVERRIDEGET | CVAR_ARCHIVE) - // [SP] Lets the player (arbitrator) choose whether to override GetCVar checks. // Danger of desync? Can we just make it a client var? This probably *fixes* desyncs, actually... CVAR(Bool, sv_overridegetcvar, true, CVAR_SERVERINFO | CVAR_GLOBALCONFIG | CVAR_ARCHIVE) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 08a563b257..edd7de5709 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -123,7 +123,7 @@ CUSTOM_CVAR(Bool, vid_autoswitch, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_ static int s_currentRenderer; -CUSTOM_CVAR(Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR(Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL | CVAR_OVERRIDEGET) { // 0: Software renderer // 1: OpenGL renderer diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index f4ac13fc5e..5dcf62367d 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -64,7 +64,7 @@ void I_RestartRenderer(); int currentrenderer; // [ZDoomGL] -CUSTOM_CVAR (Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR (Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL | CVAR_OVERRIDEGET) { // 0: Software renderer // 1: OpenGL renderer diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 787c0a4f38..e0338c0dd9 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -71,7 +71,7 @@ int currentrenderer = -1; bool changerenderer; // [ZDoomGL] -CUSTOM_CVAR (Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR (Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL | CVAR_OVERRIDEGET) { // 0: Software renderer // 1: OpenGL renderer From f457f0340c7e9c667c5bdf461e29aaa65c1ebd75 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 16 Nov 2016 11:18:40 +0100 Subject: [PATCH 329/912] Add 3D floor planes --- src/r_poly.cpp | 44 ++++++++------ src/r_poly.h | 3 + src/r_poly_plane.cpp | 141 +++++++++++++++++++++++++++++++++++++++---- src/r_poly_plane.h | 6 +- 4 files changed, 162 insertions(+), 32 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 17006b2d0b..179df248a4 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -36,7 +36,24 @@ void RenderPolyScene::Render() if (!r_swtruecolor) // Disable pal rendering for now return; - // Setup working buffers + ClearBuffers(); + SetupPerspectiveMatrix(); + Cull.CullScene(WorldToClip); + RenderSectors(); + skydome.Render(WorldToClip); + RenderTranslucent(); + PlayerSprites.Render(); + + DrawerCommandQueue::WaitForWorkers(); +} + +void RenderPolyScene::RenderRemainingPlayerSprites() +{ + PlayerSprites.RenderRemainingSprites(); +} + +void RenderPolyScene::ClearBuffers() +{ PolyVertexBuffer::Clear(); SectorSpriteRanges.clear(); SectorSpriteRanges.resize(numsectors); @@ -45,8 +62,10 @@ void RenderPolyScene::Render() PolyStencilBuffer::Instance()->Clear(viewwidth, viewheight, 0); PolySubsectorGBuffer::Instance()->Resize(dc_pitch, viewheight); NextSubsectorDepth = 0; +} - // Setup perspective matrix: +void RenderPolyScene::SetupPerspectiveMatrix() +{ float ratio = WidescreenRatio; float fovratio = (WidescreenRatio >= 1.3f) ? 1.333333f : ratio; float fovy = (float)(2 * DAngle::ToDegrees(atan(tan(FieldOfView.Radians() / 2) / fovratio)).Degrees); @@ -57,8 +76,10 @@ void RenderPolyScene::Render() TriMatrix::swapYZ() * TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); WorldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; +} - Cull.CullScene(WorldToClip); +void RenderPolyScene::RenderSectors() +{ if (r_debug_cull) { for (auto it = Cull.PvsSectors.rbegin(); it != Cull.PvsSectors.rend(); ++it) @@ -69,19 +90,6 @@ void RenderPolyScene::Render() for (auto it = Cull.PvsSectors.begin(); it != Cull.PvsSectors.end(); ++it) RenderSubsector(*it); } - - skydome.Render(WorldToClip); - - RenderTranslucent(); - - PlayerSprites.Render(); - DrawerCommandQueue::WaitForWorkers(); - RenderRemainingPlayerSprites(); // To do: should be called by FSoftwareRenderer::DrawRemainingPlayerSprites instead of here -} - -void RenderPolyScene::RenderRemainingPlayerSprites() -{ - PlayerSprites.RenderRemainingSprites(); } void RenderPolyScene::RenderSubsector(subsector_t *sub) @@ -93,9 +101,7 @@ void RenderPolyScene::RenderSubsector(subsector_t *sub) if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) { - RenderPolyPlane plane; - plane.Render(WorldToClip, sub, subsectorDepth, true, Cull.MaxCeilingHeight); - plane.Render(WorldToClip, sub, subsectorDepth, false, Cull.MinFloorHeight); + RenderPolyPlane::RenderPlanes(WorldToClip, sub, subsectorDepth, Cull.MaxCeilingHeight, Cull.MinFloorHeight); } for (uint32_t i = 0; i < sub->numlines; i++) diff --git a/src/r_poly.h b/src/r_poly.h index 35105e4f96..4f7eca61dd 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -84,6 +84,9 @@ public: static RenderPolyScene *Instance(); private: + void ClearBuffers(); + void SetupPerspectiveMatrix(); + void RenderSectors(); void RenderSubsector(subsector_t *sub); void RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 4ca6f49909..5b0e912a80 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -29,6 +29,125 @@ #include "r_poly.h" #include "r_sky.h" // for skyflatnum +EXTERN_CVAR(Int, r_3dfloors) + +void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, double skyCeilingHeight, double skyFloorHeight) +{ + RenderPolyPlane plane; + + if (r_3dfloors) + { + auto frontsector = sub->sector; + auto &ffloors = frontsector->e->XFloor.ffloors; + + // 3D floor floors + for (int i = 0; i < (int)ffloors.Size(); i++) + { + F3DFloor *fakeFloor = ffloors[i]; + if (!(fakeFloor->flags & FF_EXISTS)) continue; + if (!fakeFloor->model) continue; + if (fakeFloor->bottom.plane->isSlope()) continue; + //if (!(fakeFloor->flags & FF_NOSHADE) || (fakeFloor->flags & (FF_RENDERPLANES | FF_RENDERSIDES))) + // R_3D_AddHeight(fakeFloor->top.plane, frontsector); + if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; + if (fakeFloor->alpha == 0) continue; + if (fakeFloor->flags & FF_THISINSIDE && fakeFloor->flags & FF_INVERTSECTOR) continue; + //fakeFloor->alpha + + double fakeHeight = fakeFloor->top.plane->ZatPoint(frontsector->centerspot); + if (fakeHeight < ViewPos.Z && fakeHeight > frontsector->floorplane.ZatPoint(frontsector->centerspot)) + { + plane.Render3DFloor(worldToClip, sub, subsectorDepth, false, fakeFloor); + } + } + + // 3D floor ceilings + for (int i = 0; i < (int)ffloors.Size(); i++) + { + F3DFloor *fakeFloor = ffloors[i]; + if (!(fakeFloor->flags & FF_EXISTS)) continue; + if (!fakeFloor->model) continue; + if (fakeFloor->top.plane->isSlope()) continue; + //if (!(fakeFloor->flags & FF_NOSHADE) || (fakeFloor->flags & (FF_RENDERPLANES | FF_RENDERSIDES))) + // R_3D_AddHeight(fakeFloor->bottom.plane, frontsector); + if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; + if (fakeFloor->alpha == 0) continue; + if (!(fakeFloor->flags & FF_THISINSIDE) && (fakeFloor->flags & (FF_SWIMMABLE | FF_INVERTSECTOR)) == (FF_SWIMMABLE | FF_INVERTSECTOR)) continue; + //fakeFloor->alpha + + double fakeHeight = fakeFloor->bottom.plane->ZatPoint(frontsector->centerspot); + if (fakeHeight > ViewPos.Z && fakeHeight < frontsector->ceilingplane.ZatPoint(frontsector->centerspot)) + { + plane.Render3DFloor(worldToClip, sub, subsectorDepth, true, fakeFloor); + } + } + } + + plane.Render(worldToClip, sub, subsectorDepth, true, skyCeilingHeight); + plane.Render(worldToClip, sub, subsectorDepth, false, skyFloorHeight); +} + +void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, F3DFloor *fakeFloor) +{ + FTextureID picnum = ceiling ? *fakeFloor->bottom.texture : *fakeFloor->top.texture; + FTexture *tex = TexMan(picnum); + if (tex->UseType == FTexture::TEX_Null) + return; + + int lightlevel = 255; + if (fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) + { + lightlist_t *light = P_GetPlaneLight(sub->sector, &sub->sector->ceilingplane, false); + basecolormap = light->extra_colormap; + lightlevel = *light->p_lightlevel; + } + + TriUniforms uniforms; + uniforms.objectToClip = worldToClip; + uniforms.light = (uint32_t)(lightlevel / 255.0f * 256.0f); + if (fixedlightlev >= 0 || fixedcolormap) + uniforms.light = 256; + uniforms.flags = 0; + uniforms.subsectorDepth = subsectorDepth; + + TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); + if (!vertices) + return; + + if (ceiling) + { + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + vertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, fakeFloor->bottom.plane->ZatPoint(line->v1)); + } + } + else + { + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + vertices[i] = PlaneVertex(line->v1, fakeFloor->top.plane->ZatPoint(line->v1)); + } + } + + PolyDrawArgs args; + args.uniforms = uniforms; + args.vinput = vertices; + args.vcount = sub->numlines; + args.mode = TriangleDrawMode::Fan; + args.ccw = true; + args.clipleft = 0; + args.cliptop = 0; + args.clipright = viewwidth; + args.clipbottom = viewheight; + args.stenciltestvalue = 0; + args.stencilwritevalue = 1; + args.SetTexture(tex); + PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); +} + void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, double skyHeight) { sector_t *fakesector = sub->sector->heightsec; @@ -96,7 +215,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin for (uint32_t i = 0; i < sub->numlines; i++) { seg_t *line = &sub->firstline[i]; - vertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, frontsector, isSky ? skyHeight : frontsector->ceilingplane.ZatPoint(line->v1)); + vertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, isSky ? skyHeight : frontsector->ceilingplane.ZatPoint(line->v1)); } } else @@ -104,7 +223,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin for (uint32_t i = 0; i < sub->numlines; i++) { seg_t *line = &sub->firstline[i]; - vertices[i] = PlaneVertex(line->v1, frontsector, isSky ? skyHeight : frontsector->floorplane.ZatPoint(line->v1)); + vertices[i] = PlaneVertex(line->v1, isSky ? skyHeight : frontsector->floorplane.ZatPoint(line->v1)); } } @@ -183,17 +302,17 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin if (ceiling) { - wallvert[0] = PlaneVertex(line->v1, frontsector, skyHeight); - wallvert[1] = PlaneVertex(line->v2, frontsector, skyHeight); - wallvert[2] = PlaneVertex(line->v2, frontsector, skyBottomz2); - wallvert[3] = PlaneVertex(line->v1, frontsector, skyBottomz1); + wallvert[0] = PlaneVertex(line->v1, skyHeight); + wallvert[1] = PlaneVertex(line->v2, skyHeight); + wallvert[2] = PlaneVertex(line->v2, skyBottomz2); + wallvert[3] = PlaneVertex(line->v1, skyBottomz1); } else { - wallvert[0] = PlaneVertex(line->v1, frontsector, frontsector->floorplane.ZatPoint(line->v1)); - wallvert[1] = PlaneVertex(line->v2, frontsector, frontsector->floorplane.ZatPoint(line->v2)); - wallvert[2] = PlaneVertex(line->v2, frontsector, skyHeight); - wallvert[3] = PlaneVertex(line->v1, frontsector, skyHeight); + wallvert[0] = PlaneVertex(line->v1, frontsector->floorplane.ZatPoint(line->v1)); + wallvert[1] = PlaneVertex(line->v2, frontsector->floorplane.ZatPoint(line->v2)); + wallvert[2] = PlaneVertex(line->v2, skyHeight); + wallvert[3] = PlaneVertex(line->v1, skyHeight); } args.vinput = wallvert; @@ -203,7 +322,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin } } -TriVertex RenderPolyPlane::PlaneVertex(vertex_t *v1, sector_t *sector, double height) +TriVertex RenderPolyPlane::PlaneVertex(vertex_t *v1, double height) { TriVertex v; v.x = (float)v1->fPos().X; diff --git a/src/r_poly_plane.h b/src/r_poly_plane.h index b4f2087e82..fa786ead95 100644 --- a/src/r_poly_plane.h +++ b/src/r_poly_plane.h @@ -27,8 +27,10 @@ class RenderPolyPlane { public: - void Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, double skyHeight); + static void RenderPlanes(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, double skyCeilingHeight, double skyFloorHeight); private: - TriVertex PlaneVertex(vertex_t *v1, sector_t *sector, double height); + void Render3DFloor(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, F3DFloor *fakefloor); + void Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, double skyHeight); + TriVertex PlaneVertex(vertex_t *v1, double height); }; From 712e03257369a9f884a20d3b34839e88da7fd17f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 16 Nov 2016 19:44:11 +0100 Subject: [PATCH 330/912] Remove swrenderer2 --- src/CMakeLists.txt | 1 - src/r_swrenderer2.cpp | 1892 ----------------------------------------- src/r_swrenderer2.h | 366 -------- 3 files changed, 2259 deletions(-) delete mode 100644 src/r_swrenderer2.cpp delete mode 100644 src/r_swrenderer2.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7933f80a70..baef29c213 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1068,7 +1068,6 @@ set( NOT_COMPILED_SOURCE_FILES set( FASTMATH_PCH_SOURCES r_swrenderer.cpp - r_swrenderer2.cpp r_poly.cpp r_poly_cull.cpp r_poly_particle.cpp diff --git a/src/r_swrenderer2.cpp b/src/r_swrenderer2.cpp deleted file mode 100644 index 8fcf90ac98..0000000000 --- a/src/r_swrenderer2.cpp +++ /dev/null @@ -1,1892 +0,0 @@ -/* -** Experimental Doom software renderer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include -#include "templates.h" -#include "doomdef.h" -#include "sbar.h" -#include "r_data/r_translate.h" -#include "r_swrenderer2.h" -#include "r_draw.h" -#include "r_plane.h" // for yslope -#include "r_sky.h" // for skyflatnum -#include "r_things.h" // for pspritexscale - -EXTERN_CVAR(Bool, r_drawplayersprites) -EXTERN_CVAR(Bool, r_deathcamera) -EXTERN_CVAR(Bool, st_scale) - -DVector3 ViewPosTransform::WorldToEye(const DVector3 &worldPoint) const -{ - double tr_x = worldPoint.X - ViewPos.X; - double tr_y = worldPoint.Y - ViewPos.Y; - double tr_z = worldPoint.Z - ViewPos.Z; - double tx = tr_x * ViewSin - tr_y * ViewCos; - double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; - return DVector3(tx, tr_z, tz); -} - -DVector3 ViewPosTransform::EyeToViewport(const DVector3 &eyePoint) const -{ - double rcp_z = 1.0 / eyePoint.Z; - return DVector3(eyePoint.X * rcp_z, eyePoint.Y * rcp_z, rcp_z); -} - -DVector3 ViewPosTransform::ViewportToScreen(const DVector3 &viewportPoint) const -{ - return DVector3(CenterX + viewportPoint.X * CenterX, CenterY - viewportPoint.Y * InvZtoScale, viewportPoint.Z); -} - -double ViewPosTransform::ScreenXToEye(int x, double z) const -{ - return (x + 0.5 - CenterX) / CenterX * z; -} - -double ViewPosTransform::ScreenYToEye(int y, double z) const -{ - return -(y + 0.5 - CenterY) / InvZtoScale * z; -} - -///////////////////////////////////////////////////////////////////////////// - -WallCoords::WallCoords(const ViewPosTransform &transform, const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2) : Transform(transform) -{ - // Transform wall to eye space - DVector3 top1 = transform.WorldToEye(DVector3(v1, ceil1)); - DVector3 top2 = transform.WorldToEye(DVector3(v2, ceil2)); - DVector3 bottom1 = transform.WorldToEye(DVector3(v1, floor1)); - DVector3 bottom2 = transform.WorldToEye(DVector3(v2, floor2)); - - double clipNearZ = transform.NearZ(); - - // Is entire wall behind znear clipping plane? If so, wall is culled - if ((top1.Z < clipNearZ && top2.Z < clipNearZ)) - return; - - PlaneNormal = (top2 - top1) ^ (top1 - bottom1); - PlaneD = -(PlaneNormal | top1); - - // Clip wall to znear clipping plane - if (top1.Z < clipNearZ) - { - double t = (clipNearZ - top1.Z) / (top2.Z - top1.Z); - top1 = Mix(top1, top2, t); - bottom1 = Mix(bottom1, bottom2, t); - - VaryingXScale = 1.0 - t; - VaryingXOffset = t; - } - else if (top2.Z < clipNearZ) - { - double t = (clipNearZ - top1.Z) / (top2.Z - top1.Z); - top2 = Mix(top1, top2, t); - bottom2 = Mix(bottom1, bottom2, t); - VaryingXScale = t; - VaryingXOffset = 0.0; - } - - NearZ = MIN(top1.Z, top2.Z); - FarZ = MAX(top1.Z, top2.Z); - - // Transform to screen coordinates - ScreenTopLeft = transform.EyeToScreen(top1); - ScreenTopRight = transform.EyeToScreen(top2); - ScreenBottomLeft = transform.EyeToScreen(bottom1); - ScreenBottomRight = transform.EyeToScreen(bottom2); - if (ScreenTopLeft.X > ScreenTopRight.X) - { - std::swap(ScreenTopLeft, ScreenTopRight); - std::swap(ScreenBottomLeft, ScreenBottomRight); - } - - ScreenX1 = xs_RoundToInt(MAX(ScreenTopLeft.X, 0.0)); - ScreenX2 = xs_RoundToInt(MIN(ScreenTopRight.X, (double)viewwidth)); - ScreenY1 = xs_RoundToInt(MAX(MIN(ScreenTopLeft.Y, ScreenTopRight.Y), 0.0)); - ScreenY2 = xs_RoundToInt(MIN(MAX(ScreenBottomLeft.Y, ScreenBottomRight.Y), (double)viewheight)); - - // Cull if nothing of the wall is visible - if (ScreenX2 <= ScreenX1 || ScreenY2 <= ScreenY1) - return; - - RcpDeltaScreenX = 1.0 / (ScreenTopRight.X - ScreenTopLeft.X); - Culled = false; -} - -DVector3 WallCoords::Mix(const DVector3 &a, const DVector3 &b, double t) -{ - double invt = 1.0 - t; - return DVector3(a.X * invt + b.X * t, a.Y * invt + b.Y * t, a.Z * invt + b.Z * t); -} - -double WallCoords::Mix(double a, double b, double t) -{ - return a * (1.0 - t) + b * t; -} - -short WallCoords::Y1(int x) const -{ - double t = (x + 0.5 - ScreenTopLeft.X) * RcpDeltaScreenX; - return (short)MAX(xs_RoundToInt(Mix(ScreenTopLeft.Y, ScreenTopRight.Y, t)), 0); -} - -short WallCoords::Y2(int x) const -{ - double t = (x + 0.5 - ScreenBottomLeft.X) * RcpDeltaScreenX; - return (short)MIN(xs_RoundToInt(Mix(ScreenBottomLeft.Y, ScreenBottomRight.Y, t)), viewheight); -} - -double WallCoords::Z(int x) const -{ - double t = (x + 0.5 - ScreenTopLeft.X) * RcpDeltaScreenX; - double rcp_z = Mix(ScreenTopLeft.Z, ScreenTopRight.Z, t); - return 1.0 / rcp_z; -} - -double WallCoords::VaryingX(int x, double start, double end) const -{ - double t = (x + 0.5 - ScreenTopLeft.X) * RcpDeltaScreenX; - double rcp_z = Mix(ScreenTopLeft.Z, ScreenTopRight.Z, t); - double t2 = VaryingXOffset + t / rcp_z * ScreenTopRight.Z * VaryingXScale; - return Mix(start, end, t2); -} - -double WallCoords::VaryingY(int x, int y, double start, double end) const -{ - double t = (x + 0.5 - ScreenTopLeft.X) * RcpDeltaScreenX; - double y1 = Mix(ScreenTopLeft.Y, ScreenTopRight.Y, t); - double y2 = Mix(ScreenBottomLeft.Y, ScreenBottomRight.Y, t); - if (y1 == y2 || y1 == -y2) - return start; - double t2 = (y + 0.5 - y1) / (y2 - y1); - return Mix(start, end, t2); -} - -///////////////////////////////////////////////////////////////////////////// - -void RenderBsp::Render() -{ - Clip.Clear(0, viewwidth); - Planes.Clear(); - VisibleSprites.clear(); - ScreenSprites.clear(); - - if (numnodes == 0) - RenderSubsector(subsectors); - else - RenderNode(nodes + numnodes - 1); // The head node is the last node output. - - Planes.Render(); - - RenderMaskedObjects(); - RenderPlayerSprites(); - RenderScreenSprites(); // To do: should be called by FSoftwareRenderer::DrawRemainingPlayerSprites instead of here -} - -void RenderBsp::RenderScreenSprites() -{ - for (auto &sprite : ScreenSprites) - sprite.Render(); -} - -void RenderBsp::RenderMaskedObjects() -{ - Clip.DrawMaskedWall = [&](short x1, short x2, int drawIndex, const short *clipTop, const short *clipBottom) - { - VisibleMaskedWalls[drawIndex].RenderMasked(x1, x2, clipTop, clipBottom); - }; - - std::stable_sort(VisibleSprites.begin(), VisibleSprites.end(), [](const auto &a, const auto &b) { return a.EyePos.Z > b.EyePos.Z; }); - - for (auto &sprite : VisibleSprites) - sprite.Render(&Clip); - - Clip.RenderMaskedWalls(); -} - -void RenderBsp::RenderPlayerSprites() -{ - if (!r_drawplayersprites || - !camera || - !camera->player || - (players[consoleplayer].cheats & CF_CHASECAM) || - (r_deathcamera && camera->health <= 0)) - return; - - float bobx, boby; - P_BobWeapon(camera->player, &bobx, &boby, r_TicFracF); - - // Interpolate the main weapon layer once so as to be able to add it to other layers. - double wx, wy; - DPSprite *weapon = camera->player->FindPSprite(PSP_WEAPON); - if (weapon) - { - if (weapon->firstTic) - { - wx = weapon->x; - wy = weapon->y; - } - else - { - wx = weapon->oldx + (weapon->x - weapon->oldx) * r_TicFracF; - wy = weapon->oldy + (weapon->y - weapon->oldy) * r_TicFracF; - } - } - else - { - wx = 0; - wy = 0; - } - - for (DPSprite *sprite = camera->player->psprites; sprite != nullptr; sprite = sprite->GetNext()) - { - // [RH] Don't draw the targeter's crosshair if the player already has a crosshair set. - // It's possible this psprite's caller is now null but the layer itself hasn't been destroyed - // because it didn't tick yet (if we typed 'take all' while in the console for example). - // In this case let's simply not draw it to avoid crashing. - if ((sprite->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && sprite->GetCaller() != nullptr) - { - RenderPlayerSprite(sprite, camera, bobx, boby, wx, wy, r_TicFracF); - } - } -} - -void RenderBsp::RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac) -{ - // decide which patch to use - if ((unsigned)sprite->GetSprite() >= (unsigned)sprites.Size()) - { - DPrintf(DMSG_ERROR, "RenderPlayerSprite: invalid sprite number %i\n", sprite->GetSprite()); - return; - } - - spritedef_t *def = &sprites[sprite->GetSprite()]; - if (sprite->GetFrame() >= def->numframes) - { - DPrintf(DMSG_ERROR, "RenderPlayerSprite: invalid sprite frame %i : %i\n", sprite->GetSprite(), sprite->GetFrame()); - return; - } - - spriteframe_t *frame = &SpriteFrames[def->spriteframes + sprite->GetFrame()]; - FTextureID picnum = frame->Texture[0]; - bool flip = (frame->Flip & 1) != 0; - - FTexture *tex = TexMan(picnum); - if (tex->UseType == FTexture::TEX_Null) - return; - - // Can't interpolate the first tic. - if (sprite->firstTic) - { - sprite->firstTic = false; - sprite->oldx = sprite->x; - sprite->oldy = sprite->y; - } - - double sx = sprite->oldx + (sprite->x - sprite->oldx) * ticfrac; - double sy = sprite->oldy + (sprite->y - sprite->oldy) * ticfrac; - - if (sprite->Flags & PSPF_ADDBOB) - { - sx += bobx; - sy += boby; - } - - if (sprite->Flags & PSPF_ADDWEAPON && sprite->GetID() != PSP_WEAPON) - { - sx += wx; - sy += wy; - } - - // calculate edges of the shape - double tx = sx - BaseXCenter; - - tx -= tex->GetScaledLeftOffset(); - int x1 = xs_RoundToInt(CenterX + tx * pspritexscale); - - // off the right side - if (x1 > viewwidth) - return; - - tx += tex->GetScaledWidth(); - int x2 = xs_RoundToInt(CenterX + tx * pspritexscale); - - // off the left side - if (x2 <= 0) - return; - - double texturemid = (BaseYCenter - sy) * tex->Scale.Y + tex->TopOffset; - - // Adjust PSprite for fullscreen views - if (camera->player && (RenderTarget != screen || viewheight == RenderTarget->GetHeight() || (RenderTarget->GetWidth() > (BaseXCenter * 2) && !st_scale))) - { - AWeapon *weapon = dyn_cast(sprite->GetCaller()); - if (weapon != nullptr && weapon->YAdjust != 0) - { - if (RenderTarget != screen || viewheight == RenderTarget->GetHeight()) - { - texturemid -= weapon->YAdjust; - } - else - { - texturemid -= StatusBar->GetDisplacement() * weapon->YAdjust; - } - } - } - - // Move the weapon down for 1280x1024. - if (sprite->GetID() < PSP_TARGETCENTER) - { - texturemid -= AspectPspriteOffset(WidescreenRatio); - } - - int clipped_x1 = MAX(x1, 0); - int clipped_x2 = MIN(x2, viewwidth); - double xscale = pspritexscale / tex->Scale.X; - double yscale = pspriteyscale / tex->Scale.Y; - uint32_t translation = 0; // [RH] Use default colors - - double xiscale, startfrac; - if (flip) - { - xiscale = -pspritexiscale * tex->Scale.X; - startfrac = 1; - } - else - { - xiscale = pspritexiscale * tex->Scale.X; - startfrac = 0; - } - - if (clipped_x1 > x1) - startfrac += xiscale * (clipped_x1 - x1); - - bool noaccel = false; - - FDynamicColormap *basecolormap = viewsector->ColorMap; - FDynamicColormap *colormap_to_use = basecolormap; - - visstyle_t visstyle; - visstyle.ColormapNum = 0; - visstyle.BaseColormap = basecolormap; - visstyle.Alpha = 0; - visstyle.RenderStyle = STYLE_Normal; - - bool foggy = false; - int actualextralight = foggy ? 0 : extralight << 4; - int spriteshade = LIGHT2SHADE(owner->Sector->lightlevel + actualextralight); - double minz = double((2048 * 4) / double(1 << 20)); - visstyle.ColormapNum = GETPALOOKUP(r_SpriteVisibility / minz, spriteshade); - - if (sprite->GetID() < PSP_TARGETCENTER) - { - // Lots of complicated style and noaccel stuff - } - - // Check for hardware-assisted 2D. If it's available, and this sprite is not - // fuzzy, don't draw it until after the switch to 2D mode. - if (!noaccel && RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) - { - FRenderStyle style = visstyle.RenderStyle; - style.CheckFuzz(); - if (style.BlendOp != STYLEOP_Fuzz) - { - ScreenSprite screenSprite; - screenSprite.Pic = tex; - screenSprite.X1 = viewwindowx + x1; - screenSprite.Y1 = viewwindowy + viewheight / 2 - texturemid * yscale - 0.5; - screenSprite.Width = tex->GetWidth() * xscale; - screenSprite.Height = tex->GetHeight() * yscale; - screenSprite.Translation = TranslationToTable(translation); - screenSprite.Flip = xiscale < 0; - screenSprite.visstyle = visstyle; - screenSprite.Colormap = colormap_to_use; - ScreenSprites.push_back(screenSprite); - return; - } - } - - //R_DrawVisSprite(vis); -} - -bool RenderBsp::IsThingCulled(AActor *thing) -{ - FIntCVar *cvar = thing->GetClass()->distancecheck; - if (cvar != nullptr && *cvar >= 0) - { - double dist = (thing->Pos() - ViewPos).LengthSquared(); - double check = (double)**cvar; - if (dist >= check * check) - return true; - } - - // Don't waste time projecting sprites that are definitely not visible. - if (thing == nullptr || - (thing->renderflags & RF_INVISIBLE) || - !thing->RenderStyle.IsVisible(thing->Alpha) || - !thing->IsVisibleToPlayer()) - { - return true; - } - - return false; -} - -void RenderBsp::AddSprite(AActor *thing) -{ - if (IsThingCulled(thing)) - return; - - DVector3 pos = thing->InterpolatedPosition(r_TicFracF); - pos.Z += thing->GetBobOffset(r_TicFracF); - - DVector3 eyePos = Transform.WorldToEye(pos); - - // thing is behind view plane? - if (eyePos.Z < Transform.NearZ()) - return; - - // too far off the side? - if (fabs(eyePos.X / 64) > eyePos.Z) - return; - - VisibleSprites.push_back({ thing, eyePos }); -} - -void RenderBsp::AddWallSprite(AActor *thing) -{ - if (IsThingCulled(thing)) - return; -} - -void RenderBsp::RenderSubsector(subsector_t *sub) -{ - sector_t *frontsector = sub->sector; - frontsector->MoreFlags |= SECF_DRAWN; - - for (AActor *thing = sub->sector->thinglist; thing != nullptr; thing = thing->snext) - { - if ((thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) - AddWallSprite(thing); - else - AddSprite(thing); - } - - for (uint32_t i = 0; i < sub->numlines; i++) - { - seg_t *line = &sub->firstline[i]; - if (line->sidedef == NULL || !(line->sidedef->Flags & WALLF_POLYOBJ)) - AddLine(line, frontsector); - } -} - -void RenderBsp::AddLine(seg_t *line, sector_t *frontsector) -{ - // Reject lines not facing viewer - DVector2 pt1 = line->v1->fPos() - ViewPos; - DVector2 pt2 = line->v2->fPos() - ViewPos; - if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) - return; - - double frontceilz1 = frontsector->ceilingplane.ZatPoint(line->v1); - double frontfloorz1 = frontsector->floorplane.ZatPoint(line->v1); - double frontceilz2 = frontsector->ceilingplane.ZatPoint(line->v2); - double frontfloorz2 = frontsector->floorplane.ZatPoint(line->v2); - - WallCoords entireWall(Transform, line->v1->fPos(), line->v2->fPos(), frontceilz1, frontfloorz1, frontceilz2, frontfloorz2); - if (entireWall.Culled) - return; - - VisiblePlaneKey ceilingPlaneKey(frontsector->GetTexture(sector_t::ceiling), frontsector->ColorMap, frontsector->lightlevel, frontsector->ceilingplane, frontsector->planes[sector_t::ceiling].xform); - VisiblePlaneKey floorPlaneKey(frontsector->GetTexture(sector_t::floor), frontsector->ColorMap, frontsector->lightlevel, frontsector->floorplane, frontsector->planes[sector_t::floor].xform); - - RenderWall wall; - wall.Line = line; - wall.Colormap = frontsector->ColorMap; - wall.Masked = false; - - if (line->backsector == nullptr) - { - Planes.MarkCeilingPlane(ceilingPlaneKey, Clip, entireWall); - Planes.MarkFloorPlane(floorPlaneKey, Clip, entireWall); - - wall.Coords = entireWall; - wall.TopZ = frontceilz1; - wall.BottomZ = frontfloorz1; - wall.UnpeggedCeil = frontceilz1; - wall.Texpart = side_t::mid; - wall.Render(Clip); - - Clip.MarkSegmentCulled(entireWall, -1); - } - else - { - sector_t *backsector = (line->backsector != line->frontsector) ? line->backsector : line->frontsector; - - double backceilz1 = backsector->ceilingplane.ZatPoint(line->v1); - double backfloorz1 = backsector->floorplane.ZatPoint(line->v1); - double backceilz2 = backsector->ceilingplane.ZatPoint(line->v2); - double backfloorz2 = backsector->floorplane.ZatPoint(line->v2); - - double topceilz1 = frontceilz1; - double topceilz2 = frontceilz2; - double topfloorz1 = MIN(backceilz1, frontceilz1); - double topfloorz2 = MIN(backceilz2, frontceilz2); - double bottomceilz1 = MAX(frontfloorz1, backfloorz1); - double bottomceilz2 = MAX(frontfloorz2, backfloorz2); - double bottomfloorz1 = frontfloorz1; - double bottomfloorz2 = frontfloorz2; - double middleceilz1 = topfloorz1; - double middleceilz2 = topfloorz2; - double middlefloorz1 = MIN(bottomceilz1, middleceilz1); - double middlefloorz2 = MIN(bottomceilz2, middleceilz2); - - bool bothSkyCeiling = frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum; - bool bothSkyFloor = frontsector->GetTexture(sector_t::floor) == skyflatnum && backsector->GetTexture(sector_t::floor) == skyflatnum; - - int maskedWallIndex = -1; - - if ((topceilz1 > topfloorz1 || topceilz2 > topfloorz2) && !bothSkyCeiling && line->sidedef) - { - WallCoords topwall(Transform, line->v1->fPos(), line->v2->fPos(), topceilz1, topfloorz1, topceilz2, topfloorz2); - if (!topwall.Culled) - { - wall.Coords = topwall; - wall.TopZ = topceilz1; - wall.BottomZ = topfloorz1; - wall.UnpeggedCeil = topceilz1; - wall.Texpart = side_t::top; - wall.Render(Clip); - } - } - - if ((bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && !bothSkyFloor && line->sidedef) - { - WallCoords bottomwall(Transform, line->v1->fPos(), line->v2->fPos(), bottomceilz1, bottomfloorz2, bottomceilz2, bottomfloorz2); - if (!bottomwall.Culled) - { - wall.Coords = bottomwall; - wall.TopZ = bottomceilz1; - wall.BottomZ = bottomfloorz2; - wall.UnpeggedCeil = topceilz1; - wall.Texpart = side_t::bottom; - wall.Render(Clip); - } - } - - WallCoords midwall(Transform, line->v1->fPos(), line->v2->fPos(), middleceilz1, middlefloorz1, middleceilz2, middlefloorz2); - if (!midwall.Culled && line->sidedef) - { - FTexture *midtex = TexMan(line->sidedef->GetTexture(side_t::mid), true); - if (midtex && midtex->UseType != FTexture::TEX_Null) - { - DVector3 v1 = Transform.WorldToEye({ line->v1->fPos(), 0.0 }); - DVector3 v2 = Transform.WorldToEye({ line->v2->fPos(), 0.0 }); - wall.Coords = midwall; - wall.TopZ = middleceilz1; - wall.BottomZ = middlefloorz1; - wall.UnpeggedCeil = topceilz1; - wall.Texpart = side_t::mid; - wall.Masked = true; - - maskedWallIndex = (int)VisibleMaskedWalls.size(); - VisibleMaskedWalls.push_back(wall); - } - } - - if (!bothSkyCeiling && !bothSkyFloor) - { - Planes.MarkCeilingPlane(ceilingPlaneKey, Clip, entireWall); - Planes.MarkFloorPlane(floorPlaneKey, Clip, entireWall); - if (!midwall.Culled) - Clip.ClipVertical(midwall, maskedWallIndex); - else - Clip.MarkSegmentCulled(entireWall, maskedWallIndex); - } - else if (bothSkyCeiling) - { - Planes.MarkFloorPlane(floorPlaneKey, Clip, entireWall); - if (!midwall.Culled) - Clip.ClipBottom(midwall, maskedWallIndex); - else - Clip.MarkSegmentCulled(entireWall, maskedWallIndex); - } - else if (bothSkyFloor) - { - Planes.MarkCeilingPlane(ceilingPlaneKey, Clip, entireWall); - if (!midwall.Culled) - Clip.ClipTop(midwall, maskedWallIndex); - else - Clip.MarkSegmentCulled(entireWall, maskedWallIndex); - } - } -} - -void RenderBsp::RenderNode(void *node) -{ - while (!((size_t)node & 1)) // Keep going until found a subsector - { - node_t *bsp = (node_t *)node; - - // Decide which side the view point is on. - int side = PointOnSide(ViewPos, bsp); - - // Recursively divide front space (toward the viewer). - RenderNode(bsp->children[side]); - - // Possibly divide back space (away from the viewer). - side ^= 1; - if (!CheckBBox(bsp->bbox[side])) - return; - - node = bsp->children[side]; - } - RenderSubsector((subsector_t *)((BYTE *)node - 1)); -} - -int RenderBsp::PointOnSide(const DVector2 &pos, const node_t *node) -{ - return DMulScale32(FLOAT2FIXED(pos.Y) - node->y, node->dx, node->x - FLOAT2FIXED(pos.X), node->dy) > 0; -} - -bool RenderBsp::CheckBBox(float *bspcoord) -{ - static const int checkcoord[12][4] = - { - { 3,0,2,1 }, - { 3,0,2,0 }, - { 3,1,2,0 }, - { 0 }, - { 2,0,2,1 }, - { 0,0,0,0 }, - { 3,1,3,0 }, - { 0 }, - { 2,0,3,1 }, - { 2,1,3,1 }, - { 2,1,3,0 } - }; - - int boxx; - int boxy; - int boxpos; - - double x1, y1, x2, y2; - double rx1, ry1, rx2, ry2; - int sx1, sx2; - - // Find the corners of the box - // that define the edges from current viewpoint. - if (ViewPos.X <= bspcoord[BOXLEFT]) - boxx = 0; - else if (ViewPos.X < bspcoord[BOXRIGHT]) - boxx = 1; - else - boxx = 2; - - if (ViewPos.Y >= bspcoord[BOXTOP]) - boxy = 0; - else if (ViewPos.Y > bspcoord[BOXBOTTOM]) - boxy = 1; - else - boxy = 2; - - boxpos = (boxy << 2) + boxx; - if (boxpos == 5) - return true; - - x1 = bspcoord[checkcoord[boxpos][0]] - ViewPos.X; - y1 = bspcoord[checkcoord[boxpos][1]] - ViewPos.Y; - x2 = bspcoord[checkcoord[boxpos][2]] - ViewPos.X; - y2 = bspcoord[checkcoord[boxpos][3]] - ViewPos.Y; - - // check clip list for an open space - - // Sitting on a line? - if (y1 * (x1 - x2) + x1 * (y2 - y1) >= -EQUAL_EPSILON) - return true; - - rx1 = x1 * ViewSin - y1 * ViewCos; - rx2 = x2 * ViewSin - y2 * ViewCos; - ry1 = x1 * ViewTanCos + y1 * ViewTanSin; - ry2 = x2 * ViewTanCos + y2 * ViewTanSin; - - /*if (MirrorFlags & RF_XFLIP) - { - double t = -rx1; - rx1 = -rx2; - rx2 = t; - swapvalues(ry1, ry2); - }*/ - - if (rx1 >= -ry1) - { - if (rx1 > ry1) return false; // left edge is off the right side - if (ry1 == 0) return false; - sx1 = xs_RoundToInt(CenterX + rx1 * CenterX / ry1); - } - else - { - if (rx2 < -ry2) return false; // wall is off the left side - if (rx1 - rx2 - ry2 + ry1 == 0) return false; // wall does not intersect view volume - sx1 = 0; - } - - if (rx2 <= ry2) - { - if (rx2 < -ry2) return false; // right edge is off the left side - if (ry2 == 0) return false; - sx2 = xs_RoundToInt(CenterX + rx2 * CenterX / ry2); - } - else - { - if (rx1 > ry1) return false; // wall is off the right side - if (ry2 - ry1 - rx2 + rx1 == 0) return false; // wall does not intersect view volume - sx2 = viewwidth; - } - - // Find the first clippost that touches the source post - // (adjacent pixels are touching). - - // Does not cross a pixel. - if (sx2 <= sx1) - return false; - - return !Clip.IsSegmentCulled(sx1, sx2); -} - -///////////////////////////////////////////////////////////////////////////// - -WallTextureCoords::WallTextureCoords(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) -{ - CalcU(tex, line, texpart); - CalcV(tex, line, texpart, topz, bottomz, unpeggedceil); -} - -void WallTextureCoords::CalcU(FTexture *tex, const seg_t *line, side_t::ETexpart texpart) -{ - double lineLength = line->sidedef->TexelLength; - double lineStart = 0.0; - - bool entireSegment = ((line->linedef->v1 == line->v1) && (line->linedef->v2 == line->v2) || (line->linedef->v2 == line->v1) && (line->linedef->v1 == line->v2)); - if (!entireSegment) - { - lineLength = (line->v2->fPos() - line->v1->fPos()).Length(); - lineStart = (line->v1->fPos() - line->linedef->v1->fPos()).Length(); - } - - int texWidth = tex->GetWidth(); - double uscale = line->sidedef->GetTextureXScale(texpart) * tex->Scale.X; - u1 = lineStart + line->sidedef->GetTextureXOffset(texpart); - u2 = u1 + lineLength; - u1 *= uscale; - u2 *= uscale; - u1 /= texWidth; - u2 /= texWidth; -} - -void WallTextureCoords::CalcV(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) -{ - double vscale = line->sidedef->GetTextureYScale(texpart) * tex->Scale.Y; - - double yoffset = line->sidedef->GetTextureYOffset(texpart); - if (tex->bWorldPanning) - yoffset *= vscale; - - switch (texpart) - { - default: - case side_t::mid: - CalcVMidPart(tex, line, topz, bottomz, vscale, yoffset); - break; - case side_t::top: - CalcVTopPart(tex, line, topz, bottomz, vscale, yoffset); - break; - case side_t::bottom: - CalcVBottomPart(tex, line, topz, bottomz, unpeggedceil, vscale, yoffset); - break; - } - - int texHeight = tex->GetHeight(); - v1 /= texHeight; - v2 /= texHeight; -} - -void WallTextureCoords::CalcVTopPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset) -{ - bool pegged = (line->linedef->flags & ML_DONTPEGTOP) == 0; - if (pegged) // bottom to top - { - int texHeight = tex->GetHeight(); - v1 = -yoffset; - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - v1 = texHeight - v1; - v2 = texHeight - v2; - std::swap(v1, v2); - } - else // top to bottom - { - v1 = yoffset; - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - } -} - -void WallTextureCoords::CalcVMidPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset) -{ - bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; - if (pegged) // top to bottom - { - v1 = yoffset; - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - } - else // bottom to top - { - int texHeight = tex->GetHeight(); - v1 = yoffset; - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - v1 = texHeight - v1; - v2 = texHeight - v2; - std::swap(v1, v2); - } -} - -void WallTextureCoords::CalcVBottomPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset) -{ - bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; - if (pegged) // top to bottom - { - v1 = yoffset; - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - } - else - { - v1 = yoffset + (unpeggedceil - topz); - v2 = v1 + (topz - bottomz); - v1 *= vscale; - v2 *= vscale; - } -} - -///////////////////////////////////////////////////////////////////////////// - -void RenderClipBuffer::Clear(short left, short right) -{ - SolidSegments.clear(); - SolidSegments.reserve(MAXWIDTH / 2 + 2); - SolidSegments.push_back({ -0x7fff, left }); - SolidSegments.push_back({ right, 0x7fff }); - - DrawSegments.clear(); - ClipValues.clear(); - - for (int x = left; x < right; x++) - { - Top[x] = 0; - Bottom[x] = viewheight; - } -} - -bool RenderClipBuffer::IsSegmentCulled(short x1, short x2) const -{ - int next = 0; - while (SolidSegments[next].X2 <= x2) - next++; - return (x1 >= SolidSegments[next].X1 && x2 <= SolidSegments[next].X2); -} - -void RenderClipBuffer::MarkSegmentCulled(const WallCoords &wallCoords, int drawIndex) -{ - if (wallCoords.Culled) - return; - - VisibleSegmentsIterator it(*this, wallCoords.ScreenX1, wallCoords.ScreenX2); - while (it.Step()) - { - for (short x = it.X1; x < it.X2; x++) - { - Bottom[x] = Top[x]; - } - - AddDrawSegment(it.X1, it.X2, wallCoords, true, true, drawIndex); - } - - short x1 = wallCoords.ScreenX1; - short x2 = wallCoords.ScreenX2; - - if (x1 >= x2) - return; - - int cur = 1; - while (true) - { - if (SolidSegments[cur].X1 <= x1 && SolidSegments[cur].X2 >= x2) // Already fully marked - { - break; - } - else if (cur + 1 != SolidSegments.size() && SolidSegments[cur].X2 >= x1 && SolidSegments[cur].X1 <= x2) // Merge segments - { - // Find last segment - int merge = cur; - while (merge + 2 != SolidSegments.size() && SolidSegments[merge + 1].X1 <= x2) - merge++; - - // Apply new merged range - SolidSegments[cur].X1 = MIN(SolidSegments[cur].X1, x1); - SolidSegments[cur].X2 = MAX(SolidSegments[merge].X2, x2); - - // Remove additional segments we merged with - if (merge > cur) - SolidSegments.erase(SolidSegments.begin() + (cur + 1), SolidSegments.begin() + (merge + 1)); - - break; - } - else if (SolidSegments[cur].X1 > x1) // Insert new segment - { - SolidSegments.insert(SolidSegments.begin() + cur, { x1, x2 }); - break; - } - cur++; - } -} - -void RenderClipBuffer::ClipVertical(const WallCoords &wallCoords, int drawIndex) -{ - if (wallCoords.Culled) - return; - - VisibleSegmentsIterator it(*this, wallCoords.ScreenX1, wallCoords.ScreenX2); - while (it.Step()) - { - for (short x = it.X1; x < it.X2; x++) - { - Top[x] = MAX(wallCoords.Y1(x), Top[x]); - Bottom[x] = MIN(wallCoords.Y2(x), Bottom[x]); - } - AddDrawSegment(it.X1, it.X2, wallCoords, true, true, drawIndex); - } -} - -void RenderClipBuffer::ClipTop(const WallCoords &wallCoords, int drawIndex) -{ - if (wallCoords.Culled) - return; - - VisibleSegmentsIterator it(*this, wallCoords.ScreenX1, wallCoords.ScreenX2); - while (it.Step()) - { - for (short x = it.X1; x < it.X2; x++) - { - Top[x] = MAX(wallCoords.Y1(x), Top[x]); - } - AddDrawSegment(it.X1, it.X2, wallCoords, true, false, drawIndex); - } -} - -void RenderClipBuffer::ClipBottom(const WallCoords &wallCoords, int drawIndex) -{ - if (wallCoords.Culled) - return; - - VisibleSegmentsIterator it(*this, wallCoords.ScreenX1, wallCoords.ScreenX2); - while (it.Step()) - { - for (short x = it.X1; x < it.X2; x++) - { - Bottom[x] = MIN(wallCoords.Y2(x), Bottom[x]); - } - AddDrawSegment(it.X1, it.X2, wallCoords, false, true, drawIndex); - } -} - -void RenderClipBuffer::AddDrawSegment(short x1, short x2, const WallCoords &wall, bool clipTop, bool clipBottom, int drawIndex) -{ - if (drawIndex != -1) // DrawMaskedWall needs both clipping ranges - { - clipTop = true; - clipBottom = true; - } - - DrawSegment segment; - segment.X1 = x1; - segment.X2 = x2; - segment.ClipOffset = (int)ClipValues.size(); - segment.ClipTop = clipTop; - segment.ClipBottom = clipBottom; - segment.PlaneNormal = wall.PlaneNormal; - segment.PlaneD = wall.PlaneD; - segment.NearZ = wall.NearZ; - segment.FarZ = wall.FarZ; - segment.DrawIndex = drawIndex; - - if (clipTop) - { - ClipValues.insert(ClipValues.end(), Top + x1, Top + x2); - } - - if (clipBottom) - { - ClipValues.insert(ClipValues.end(), Bottom + x1, Bottom + x2); - } - - DrawSegments.push_back(segment); -} - -void RenderClipBuffer::SetupSpriteClip(short x1, short x2, const DVector3 &pos, bool wallSprite) -{ - for (int i = x1; i < x2; i++) - { - Top[i] = 0; - Bottom[i] = viewheight; - } - - for (auto it = DrawSegments.crbegin(); it != DrawSegments.crend(); ++it) - { - const auto &segment = *it; - - int r1 = MAX(segment.X1, x1); - int r2 = MIN(segment.X2, x2); - if (r2 <= r1) - continue; - - short *clipTop = ClipValues.data() + segment.ClipOffset; - short *clipBottom = segment.ClipTop ? clipTop + (segment.X2 - segment.X1) : clipTop; - - double side = (pos | segment.PlaneNormal) + segment.PlaneD; - bool segBehindSprite; - if (!wallSprite) - segBehindSprite = (segment.NearZ >= pos.Z) || (segment.FarZ >= pos.Z && side <= 0.0); - else - segBehindSprite = side <= 0.0; - - if (segBehindSprite) - { - if (segment.DrawIndex != -1 && DrawMaskedWall) - DrawMaskedWall(r1, r2, segment.DrawIndex, clipTop + (r1 - segment.X1), clipBottom + (r1 - segment.X1)); - - if (segment.ClipTop) - { - for (int i = r1 - segment.X1; i < r2 - segment.X1; i++) - clipTop[i] = 0; - } - - if (segment.ClipBottom) - { - for (int i = r1 - segment.X1; i < r2 - segment.X1; i++) - clipBottom[i] = 0; - } - } - else - { - if (segment.ClipTop) - { - for (int x = r1; x < r2; x++) - Top[x] = MAX(clipTop[x - segment.X1], Top[x]); - } - - if (segment.ClipBottom) - { - for (int x = r1; x < r2; x++) - Bottom[x] = MIN(clipBottom[x - segment.X1], Bottom[x]); - } - } - } -} - -void RenderClipBuffer::RenderMaskedWalls() -{ - for (int i = 0; i < viewwidth; i++) - { - Top[i] = 0; - Bottom[i] = viewheight; - } - - for (auto it = DrawSegments.crbegin(); it != DrawSegments.crend(); ++it) - { - const auto &segment = *it; - if (segment.DrawIndex != -1 && DrawMaskedWall) - { - short *clipTop = ClipValues.data() + segment.ClipOffset; - short *clipBottom = segment.ClipTop ? clipTop + (segment.X2 - segment.X1) : clipTop; - DrawMaskedWall(segment.X1, segment.X2, segment.DrawIndex, clipTop, clipBottom); - } - } -} - -///////////////////////////////////////////////////////////////////////////// - -VisibleSegmentsIterator::VisibleSegmentsIterator(const RenderClipBuffer &buffer, short startx, short endx) : SolidSegments(buffer.SolidSegments), endx(endx) -{ - X1 = startx; - X2 = startx; -} - -bool VisibleSegmentsIterator::Step() -{ - if (next == 0) - { - while (SolidSegments[next].X2 <= X1) - next++; - if (SolidSegments[next].X1 <= X1) - X1 = SolidSegments[next++].X2; - X2 = MIN(SolidSegments[next].X1, endx); - } - else if (X2 == SolidSegments[next].X1 && next + 1 != SolidSegments.size()) - { - X1 = SolidSegments[next++].X2; - X2 = MIN(SolidSegments[next].X1, endx); - } - else - { - X1 = X2; - } - - return X1 < X2; -} - -///////////////////////////////////////////////////////////////////////////// - -RenderVisiblePlane::RenderVisiblePlane(VisiblePlane *plane, FTexture *tex) -{ - const auto &key = plane->Key; - - double xscale = key.Transform.xScale * tex->Scale.X; - double yscale = key.Transform.yScale * tex->Scale.Y; - - double planeang = (key.Transform.Angle + key.Transform.baseAngle).Radians(); - double cosine = cos(planeang); - double sine = sin(planeang); - viewx = (key.Transform.xOffs + ViewPos.X * cosine - ViewPos.Y * sine) * xscale; - viewy = (key.Transform.yOffs - ViewPos.X * sine - ViewPos.Y * cosine) * yscale; - - // left to right mapping - planeang += (ViewAngle - 90).Radians(); - - // Scale will be unit scale at FocalLengthX (normally SCREENWIDTH/2) distance - double xstep = cos(planeang) / FocalLengthX; - double ystep = -sin(planeang) / FocalLengthX; - - // [RH] flip for mirrors - /*if (MirrorFlags & RF_XFLIP) - { - xstep = -xstep; - ystep = -ystep; - }*/ - - planeang += M_PI / 2; - cosine = cos(planeang); - sine = -sin(planeang); - double x = plane->Right - centerx - 0.5; - double rightxfrac = xscale * (cosine + x * xstep); - double rightyfrac = yscale * (sine + x * ystep); - x = plane->Left - centerx - 0.5; - double leftxfrac = xscale * (cosine + x * xstep); - double leftyfrac = yscale * (sine + x * ystep); - - basexfrac = rightxfrac; - baseyfrac = rightyfrac; - xstepscale = (rightxfrac - leftxfrac) / (plane->Right - plane->Left); - ystepscale = (rightyfrac - leftyfrac) / (plane->Right - plane->Left); - - planeheight = fabs(key.Plane.Zat0() - ViewPos.Z); -} - -void RenderVisiblePlane::Step() -{ - basexfrac -= xstepscale; - baseyfrac -= ystepscale; -} - -///////////////////////////////////////////////////////////////////////////// - -void RenderPlanes::Render() -{ - for (int i = 0; i < NumBuckets; i++) - { - VisiblePlane *plane = PlaneBuckets[i].get(); - while (plane) - { - RenderPlane(plane); - plane = plane->Next.get(); - } - } -} - -void RenderPlanes::RenderPlane(VisiblePlane *plane) -{ - FTexture *tex = TexMan(plane->Key.Picnum); - if (tex->UseType == FTexture::TEX_Null) - return; - - RenderVisiblePlane render(plane, tex); - - short spanend[MAXHEIGHT]; - int x = plane->Right - 1; - int t2 = plane->Top[x]; - int b2 = plane->Bottom[x]; - - if (b2 > t2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - - for (--x; x >= plane->Left; --x) - { - int t1 = plane->Top[x]; - int b1 = plane->Bottom[x]; - const int xr = x + 1; - int stop; - - // Draw any spans that have just closed - stop = MIN(t1, b2); - while (t2 < stop) - { - int y = t2++; - RenderSpan(y, xr, spanend[y], plane->Key, tex, render); - } - stop = MAX(b1, t2); - while (b2 > stop) - { - int y = --b2; - RenderSpan(y, xr, spanend[y], plane->Key, tex, render); - } - - // Mark any spans that have just opened - stop = MIN(t2, b1); - while (t1 < stop) - { - spanend[t1++] = x; - } - stop = MAX(b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; - } - - t2 = plane->Top[x]; - b2 = plane->Bottom[x]; - render.Step(); - } - // Draw any spans that are still open - while (t2 < b2) - { - int y = --b2; - RenderSpan(y, plane->Left, spanend[y], plane->Key, tex, render); - } -} - -void RenderPlanes::RenderSpan(int y, int x1, int x2, const VisiblePlaneKey &key, FTexture *tex, const RenderVisiblePlane &renderInfo) -{ - if (key.Picnum != skyflatnum) - { - double distance = renderInfo.planeheight * yslope[y]; - - double u = distance * renderInfo.basexfrac + renderInfo.viewx; - double v = distance * renderInfo.baseyfrac + renderInfo.viewy; - double uscale = distance * renderInfo.xstepscale; - double vscale = distance * renderInfo.ystepscale; - - double vis = r_FloorVisibility / renderInfo.planeheight; - - if (fixedlightlev >= 0) - R_SetDSColorMapLight(key.ColorMap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap) - R_SetDSColorMapLight(fixedcolormap, 0, 0); - else - R_SetDSColorMapLight(key.ColorMap, (float)(vis * fabs(CenterY - y)), LIGHT2SHADE(key.LightLevel)); - - ds_source = (const BYTE *)tex->GetPixelsBgra(); - ds_source_mipmapped = false; - ds_xbits = tex->WidthBits; - ds_ybits = tex->HeightBits; - ds_xfrac = (uint32_t)(u * (1 << (32 - ds_xbits))); - ds_yfrac = (uint32_t)(v * (1 << (32 - ds_ybits))); - ds_xstep = (uint32_t)(uscale * (1 << (32 - ds_xbits))); - ds_ystep = (uint32_t)(vscale * (1 << (32 - ds_ybits))); - ds_y = y; - ds_x1 = x1; - ds_x2 = x2; - R_DrawSpan(); - } - else - { - tex = TexMan(sky1texture, true); - - double xangle1 = ((0.5 - x1 / (double)viewwidth) * FocalTangent * 90.0); - double xangle2 = ((0.5 - x2 / (double)viewwidth) * FocalTangent * 90.0); - - double u1 = sky1pos + (ViewAngle.Degrees + xangle1) * sky1cyl / 360.0; - double u2 = sky1pos + (ViewAngle.Degrees + xangle2) * sky1cyl / 360.0; - double u = u1; - double v = (y - CenterY) * skyiscale + skytexturemid * tex->Scale.Y; - double uscale = (u2 - u1) / (x2 - x1); - double vscale = 0.0; - - if (fixedlightlev >= 0) - R_SetDSColorMapLight(key.ColorMap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap) - R_SetDSColorMapLight(fixedcolormap, 0, 0); - else - R_SetDSColorMapLight(key.ColorMap, 0, 0); - - ds_source = (const BYTE *)tex->GetPixelsBgra(); - ds_source_mipmapped = false; - ds_xbits = tex->WidthBits; - ds_ybits = tex->HeightBits; - ds_xfrac = (uint32_t)(u * (1 << (32 - ds_xbits))); - ds_yfrac = (uint32_t)(v * (1 << (32 - ds_ybits))); - ds_xstep = (uint32_t)(uscale * (1 << (32 - ds_xbits))); - ds_ystep = (uint32_t)(vscale * (1 << (32 - ds_ybits))); - ds_y = y; - ds_x1 = x1; - ds_x2 = x2; - R_DrawSpan(); - } -} - -void RenderPlanes::Clear() -{ - for (int i = 0; i < NumBuckets; i++) - { - std::unique_ptr plane = std::move(PlaneBuckets[i]); - while (plane) - { - std::unique_ptr next = std::move(plane->Next); - FreePlanes.push_back(std::move(plane)); - plane = std::move(next); - } - } -} - -void RenderPlanes::MarkCeilingPlane(const VisiblePlaneKey &key, const RenderClipBuffer &clip, const WallCoords &wallCoords) -{ - VisibleSegmentsIterator it(clip, wallCoords.ScreenX1, wallCoords.ScreenX2); - while (it.Step()) - { - VisiblePlane *plane = GetPlaneWithUnsetRange(key, it.X1, it.X2); - - for (short x = it.X1; x < it.X2; x++) - { - short walltop = MAX(wallCoords.Y1(x), clip.Top[x]); - short top = clip.Top[x]; - short bottom = MIN(walltop, clip.Bottom[x]); - if (top < bottom) - { - plane->Top[x] = top; - plane->Bottom[x] = bottom; - } - } - } -} - -void RenderPlanes::MarkFloorPlane(const VisiblePlaneKey &key, const RenderClipBuffer &clip, const WallCoords &wallCoords) -{ - VisibleSegmentsIterator it(clip, wallCoords.ScreenX1, wallCoords.ScreenX2); - while (it.Step()) - { - VisiblePlane *plane = GetPlaneWithUnsetRange(key, it.X1, it.X2); - - for (short x = it.X1; x < it.X2; x++) - { - short wallbottom = MIN(wallCoords.Y2(x), clip.Bottom[x]); - short top = MAX(wallbottom, clip.Top[x]); - short bottom = clip.Bottom[x]; - if (top < bottom) - { - plane->Top[x] = top; - plane->Bottom[x] = bottom; - } - } - } -} - -VisiblePlane *RenderPlanes::GetPlaneWithUnsetRange(const VisiblePlaneKey &key, int start, int stop) -{ - VisiblePlane *plane = GetPlane(key); - - int intrl, intrh; - int unionl, unionh; - - if (start < plane->Left) - { - intrl = plane->Left; - unionl = start; - } - else - { - unionl = plane->Left; - intrl = start; - } - - if (stop > plane->Right) - { - intrh = plane->Right; - unionh = stop; - } - else - { - unionh = plane->Right; - intrh = stop; - } - - // Verify that the entire range has unset values - int x = intrl; - while (x < intrh && plane->Top[x] == VisiblePlane::UnsetValue) - x++; - - if (x >= intrh) // They do. Use the current plane - { - plane->Left = unionl; - plane->Right = unionh; - return plane; - } - else // Create new plane and make sure it is found first - { - auto &bucket = PlaneBuckets[Hash(key)]; - std::unique_ptr newPlane = AllocPlane(key); - newPlane->Left = start; - newPlane->Right = stop; - newPlane->Next = std::move(bucket); - bucket = std::move(newPlane); - return bucket.get(); - } -} - -VisiblePlane *RenderPlanes::GetPlane(const VisiblePlaneKey &key) -{ - auto &bucket = PlaneBuckets[Hash(key)]; - VisiblePlane *plane = bucket.get(); - - while (plane != nullptr) - { - if (plane->Key == key) - return plane; - plane = plane->Next.get(); - } - - std::unique_ptr new_plane = AllocPlane(key); - new_plane->Next = std::move(bucket); - bucket = std::move(new_plane); - return bucket.get(); -} - -std::unique_ptr RenderPlanes::AllocPlane(const VisiblePlaneKey &key) -{ - if (!FreePlanes.empty()) - { - std::unique_ptr plane = std::move(FreePlanes.back()); - FreePlanes.pop_back(); - plane->Clear(key); - return std::move(plane); - } - else - { - return std::make_unique(key); - } -} - -///////////////////////////////////////////////////////////////////////////// - -void RenderWall::Render(const RenderClipBuffer &clip) -{ - FTexture *tex = GetTexture(); - if (!tex) - return; - int texWidth = tex->GetWidth(); - int texHeight = tex->GetHeight(); - - WallTextureCoords texcoords(tex, Line, Texpart, TopZ, BottomZ, UnpeggedCeil); - - VisibleSegmentsIterator it(clip, Coords.ScreenX1, Coords.ScreenX2); - while (it.Step()) - { - for (short x = it.X1; x < it.X2; x++) - { - short y1 = MAX(Coords.Y1(x), clip.Top[x]); - short y2 = MIN(Coords.Y2(x), clip.Bottom[x]); - if (y1 >= y2) - continue; - - double u = Coords.VaryingX(x, texcoords.u1, texcoords.u2); - double v1 = Coords.VaryingY(x, y1, texcoords.v1, texcoords.v2); - double v2 = Coords.VaryingY(x, y2, texcoords.v1, texcoords.v2); - - R_SetColorMapLight(Colormap, GetLight(x), GetShade()); - - dc_source = (const BYTE *)tex->GetColumnBgra((int)(u * texWidth), nullptr); - dc_source2 = nullptr; - dc_textureheight = texHeight; - dc_texturefrac = (uint32_t)(v1 * 0xffffffff); - dc_iscale = (uint32_t)((v2 - v1) / (y2 - y1) * 0xffffffff); - dc_dest = dc_destorg + (ylookup[y1] + x) * 4; - dc_count = y2 - y1; - dovline1(); - } - } -} - -void RenderWall::RenderMasked(short x1, short x2, const short *clipTop, const short *clipBottom) -{ - FTexture *tex = GetTexture(); - if (!tex) - return; - int texWidth = tex->GetWidth(); - int texHeight = tex->GetHeight(); - - WallTextureCoords texcoords(tex, Line, Texpart, TopZ, BottomZ, UnpeggedCeil); - - for (short x = x1; x < x2; x++) - { - short y1 = MAX(Coords.Y1(x), clipTop[x - x1]); - short y2 = MIN(Coords.Y2(x), clipBottom[x - x1]); - if (y1 >= y2) - continue; - - double u = Coords.VaryingX(x, texcoords.u1, texcoords.u2); - double v1 = Coords.VaryingY(x, y1, texcoords.v1, texcoords.v2); - double v2 = Coords.VaryingY(x, y2, texcoords.v1, texcoords.v2); - - R_SetColorMapLight(Colormap, GetLight(x), GetShade()); - - dc_source = (const BYTE *)tex->GetColumnBgra((int)(u * texWidth), nullptr); - dc_source2 = nullptr; - dc_textureheight = texHeight; - dc_texturefrac = (uint32_t)(v1 * 0xffffffff); - dc_iscale = (uint32_t)((v2 - v1) / (y2 - y1) * 0xffffffff); - dc_dest = dc_destorg + (ylookup[y1] + x) * 4; - dc_count = y2 - y1; - domvline1(); - } -} - -FTexture *RenderWall::GetTexture() -{ - FTexture *tex = TexMan(Line->sidedef->GetTexture(Texpart), true); - if (tex == nullptr || tex->UseType == FTexture::TEX_Null) - return nullptr; - else - return tex; -} - -int RenderWall::GetShade() -{ - if (fixedlightlev >= 0 || fixedcolormap) - { - return 0; - } - else - { - bool foggy = false; - int actualextralight = foggy ? 0 : extralight << 4; - int shade = LIGHT2SHADE(Line->sidedef->GetLightLevel(foggy, Line->frontsector->lightlevel) + actualextralight); - return shade; - } -} - -float RenderWall::GetLight(short x) -{ - if (fixedlightlev >= 0 || fixedcolormap) - return 0.0f; - else - return (float)(r_WallVisibility / Coords.Z(x)); -} - -///////////////////////////////////////////////////////////////////////////// - -VisibleSprite::VisibleSprite(AActor *actor, const DVector3 &eyePos) : Actor(actor), EyePos(eyePos) -{ -} - -void VisibleSprite::Render(RenderClipBuffer *clip) -{ - //if (MirrorFlags & RF_XFLIP) - // tx = -tx; - - bool flipTextureX = false; - FTexture *tex = GetSpriteTexture(Actor, flipTextureX); - DVector2 spriteScale = Actor->Scale; - - const double thingxscalemul = spriteScale.X / tex->Scale.X; - - double xscale = CenterX / EyePos.Z; - double yscale = spriteScale.Y / tex->Scale.Y;// spriteScale.Y / tex->Scale.Y * InvZtoScale / EyePos.Z; - - double tx; - if (flipTextureX) - { - tx = EyePos.X - (tex->GetWidth() - tex->LeftOffset - 1) * thingxscalemul; - } - else - { - tx = EyePos.X - tex->LeftOffset * thingxscalemul; - } - - double texturemid = tex->TopOffset + (EyePos.Y - Actor->Floorclip) / yscale; - double y = CenterY - texturemid * (InvZtoScale * yscale / EyePos.Z); - - int x1 = centerx + xs_RoundToInt(tx * xscale); - int x2 = centerx + xs_RoundToInt((tx + tex->GetWidth() * thingxscalemul) * xscale); - int y1 = xs_RoundToInt(y); - int y2 = xs_RoundToInt(y + (InvZtoScale * yscale / EyePos.Z) * tex->GetHeight()); - - xscale = spriteScale.X * xscale / tex->Scale.X; - - int clipped_x1 = clamp(x1, 0, viewwidth - 1); - int clipped_x2 = clamp(x2, 0, viewwidth - 1); - int clipped_y1 = clamp(y1, 0, viewheight - 1); - int clipped_y2 = clamp(y2, 0, viewheight - 1); - if (clipped_x1 >= clipped_x2 || clipped_y1 >= clipped_y2) - return; - - clip->SetupSpriteClip(clipped_x1, clipped_x2, EyePos, false); - - uint32_t texwidth = tex->GetWidth(); - uint32_t texheight = tex->GetHeight(); - - visstyle_t visstyle = GetSpriteVisStyle(Actor, EyePos.Z); - // Rumor has it that AlterWeaponSprite needs to be called with visstyle passed in somewhere around here.. - R_SetColorMapLight(visstyle.BaseColormap, 0, visstyle.ColormapNum << FRACBITS); - - for (int x = clipped_x1; x < clipped_x2; x++) - { - short top = MAX(clipped_y1, clip->Top[x]); - short bottom = MIN(clipped_y2, clip->Bottom[x]); - if (top < bottom) - { - float u = (x - x1) / (float)(x2 - x1); - float v = (top - y1) / (float)(y2 - y1); - if (flipTextureX) - u = 1.0f - u; - u = u - floor(u); - - dc_source = (const BYTE *)tex->GetColumnBgra((int)(u * texwidth), nullptr); - dc_source2 = nullptr; - dc_textureheight = texheight; - dc_texturefrac = (uint32_t)(v * 0xffffffff); - dc_iscale = 0xffffffff / (y2 - y1); - dc_dest = dc_destorg + (ylookup[top] + x) * 4; - dc_count = bottom - top; - domvline1(); - } - } -} - -visstyle_t VisibleSprite::GetSpriteVisStyle(AActor *thing, double z) -{ - visstyle_t visstyle; - - bool foggy = false; - int actualextralight = foggy ? 0 : extralight << 4; - int spriteshade = LIGHT2SHADE(thing->Sector->lightlevel + actualextralight); - - visstyle.RenderStyle = thing->RenderStyle; - visstyle.Alpha = float(thing->Alpha); - visstyle.ColormapNum = 0; - - // The software renderer cannot invert the source without inverting the overlay - // too. That means if the source is inverted, we need to do the reverse of what - // the invert overlay flag says to do. - bool invertcolormap = (visstyle.RenderStyle.Flags & STYLEF_InvertOverlay) != 0; - - if (visstyle.RenderStyle.Flags & STYLEF_InvertSource) - { - invertcolormap = !invertcolormap; - } - - FDynamicColormap *mybasecolormap = thing->Sector->ColorMap; - - // Sprites that are added to the scene must fade to black. - if (visstyle.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); - } - - if (visstyle.RenderStyle.Flags & STYLEF_FadeToBlack) - { - if (invertcolormap) - { // Fade to white - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); - invertcolormap = false; - } - else - { // Fade to black - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); - } - } - - // get light level - if (fixedcolormap != NULL) - { // fixed map - visstyle.BaseColormap = fixedcolormap; - visstyle.ColormapNum = 0; - } - else - { - if (invertcolormap) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); - } - if (fixedlightlev >= 0) - { - visstyle.BaseColormap = mybasecolormap; - visstyle.ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (!foggy && ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) - { // full bright - visstyle.BaseColormap = mybasecolormap; - visstyle.ColormapNum = 0; - } - else - { // diminished light - double minz = double((2048 * 4) / double(1 << 20)); - visstyle.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(z, minz), spriteshade); - visstyle.BaseColormap = mybasecolormap; - } - } - - return visstyle; -} - -FTexture *VisibleSprite::GetSpriteTexture(AActor *thing, /*out*/ bool &flipX) -{ - flipX = false; - if (thing->picnum.isValid()) - { - FTexture *tex = TexMan(thing->picnum); - if (tex->UseType == FTexture::TEX_Null) - { - return nullptr; - } - - if (tex->Rotations != 0xFFFF) - { - // choose a different rotation based on player view - spriteframe_t *sprframe = &SpriteFrames[tex->Rotations]; - DVector3 pos = thing->InterpolatedPosition(r_TicFracF); - pos.Z += thing->GetBobOffset(r_TicFracF); - DAngle ang = (pos - ViewPos).Angle(); - angle_t rot; - if (sprframe->Texture[0] == sprframe->Texture[1]) - { - rot = (ang - thing->Angles.Yaw + 45.0 / 2 * 9).BAMs() >> 28; - } - else - { - rot = (ang - thing->Angles.Yaw + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; - } - flipX = (sprframe->Flip & (1 << rot)) != 0; - tex = TexMan[sprframe->Texture[rot]]; // Do not animate the rotation - } - return tex; - } - else - { - // decide which texture to use for the sprite - int spritenum = thing->sprite; - if (spritenum >= (signed)sprites.Size() || spritenum < 0) - return nullptr; - - spritedef_t *sprdef = &sprites[spritenum]; - if (thing->frame >= sprdef->numframes) - { - // If there are no frames at all for this sprite, don't draw it. - return nullptr; - } - else - { - //picnum = SpriteFrames[sprdef->spriteframes + thing->frame].Texture[0]; - // choose a different rotation based on player view - spriteframe_t *sprframe = &SpriteFrames[sprdef->spriteframes + thing->frame]; - DVector3 pos = thing->InterpolatedPosition(r_TicFracF); - pos.Z += thing->GetBobOffset(r_TicFracF); - DAngle ang = (pos - ViewPos).Angle(); - angle_t rot; - if (sprframe->Texture[0] == sprframe->Texture[1]) - { - rot = (ang - thing->Angles.Yaw + 45.0 / 2 * 9).BAMs() >> 28; - } - else - { - rot = (ang - thing->Angles.Yaw + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; - } - flipX = (sprframe->Flip & (1 << rot)) != 0; - return TexMan[sprframe->Texture[rot]]; // Do not animate the rotation - } - } -} - -///////////////////////////////////////////////////////////////////////////// - -void ScreenSprite::Render() -{ - FSpecialColormap *special = nullptr; - FColormapStyle colormapstyle; - PalEntry overlay = 0; - bool usecolormapstyle = false; - if (visstyle.BaseColormap >= &SpecialColormaps[0] && - visstyle.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) - { - special = static_cast(visstyle.BaseColormap); - } - else if (Colormap->Color == PalEntry(255, 255, 255) && - Colormap->Desaturate == 0) - { - overlay = Colormap->Fade; - overlay.a = BYTE(visstyle.ColormapNum * 255 / NUMCOLORMAPS); - } - else - { - usecolormapstyle = true; - colormapstyle.Color = Colormap->Color; - colormapstyle.Fade = Colormap->Fade; - colormapstyle.Desaturate = Colormap->Desaturate; - colormapstyle.FadeLevel = visstyle.ColormapNum / float(NUMCOLORMAPS); - } - - screen->DrawTexture(Pic, - X1, - Y1, - DTA_DestWidthF, Width, - DTA_DestHeightF, Height, - DTA_Translation, Translation, - DTA_FlipX, Flip, - DTA_TopOffset, 0, - DTA_LeftOffset, 0, - DTA_ClipLeft, viewwindowx, - DTA_ClipTop, viewwindowy, - DTA_ClipRight, viewwindowx + viewwidth, - DTA_ClipBottom, viewwindowy + viewheight, - DTA_AlphaF, visstyle.Alpha, - DTA_RenderStyle, visstyle.RenderStyle, - DTA_FillColor, FillColor, - DTA_SpecialColormap, special, - DTA_ColorOverlay, overlay.d, - DTA_ColormapStyle, usecolormapstyle ? &colormapstyle : NULL, - TAG_DONE); -} diff --git a/src/r_swrenderer2.h b/src/r_swrenderer2.h deleted file mode 100644 index eaa734c875..0000000000 --- a/src/r_swrenderer2.h +++ /dev/null @@ -1,366 +0,0 @@ -/* -** Experimental Doom software renderer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include -#include -#include -#include -#include "doomdata.h" -#include "r_utility.h" -#include "r_main.h" - -// Transform for a view position and its viewport -// -// World space uses map coordinates in the XY plane. Z is up. -// Eye space means relative to viewer, Y is up and Z is into the screen. -// Viewport means in normalized device coordinates (-1 to 1 range with perspective division). 0,0 is in the center of the viewport and Y is still up. -// Screen means in final screen coordinates. 0,0 is the upper left corner and Y is down. Z is still 1/z. -// -class ViewPosTransform -{ -public: - DVector3 WorldToEye(const DVector3 &worldPoint) const; - DVector3 WorldToViewport(const DVector3 &worldPoint) const { return EyeToViewport(WorldToEye(worldPoint)); } - DVector3 WorldToScreen(const DVector3 &worldPoint) const { return EyeToScreen(WorldToEye(worldPoint)); } - - DVector3 EyeToViewport(const DVector3 &eyePoint) const; - DVector3 EyeToScreen(const DVector3 &eyePoint) const { return ViewportToScreen(EyeToViewport(eyePoint)); } - - DVector3 ViewportToScreen(const DVector3 &viewportPoint) const; - - double ScreenXToEye(int x, double z) const; - double ScreenYToEye(int y, double z) const; - - double NearZ() const { return 0.0078125; }; -}; - -// Screen space coordinates for a wall -class WallCoords -{ -public: - WallCoords() = default; - WallCoords(const ViewPosTransform &transform, const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2); - - // True if transform and clip culled the wall - bool Culled = true; - - // Plane for wall in eye space - DVector3 PlaneNormal; - double PlaneD = 0.0; - - // Z range of the wall in eye space - double NearZ = 0.0; - double FarZ = 0.0; - - // Screen space bounding box of the wall - int ScreenX1 = 0; - int ScreenX2 = 0; - int ScreenY1 = 0; - int ScreenY2 = 0; - - // Get the Y positions for the given column - short Y1(int x) const; - short Y2(int x) const; - - // Get the depth for a column - double Z(int x) const; - - // Perspective correct interpolation from start to end (used to calculate texture coordinates) - double VaryingX(int x, double start, double end) const; - double VaryingY(int x, int y, double start, double end) const; - -private: - static DVector3 Mix(const DVector3 &a, const DVector3 &b, double t); - static double Mix(double a, double b, double t); - - ViewPosTransform Transform; - DVector3 ScreenTopLeft; - DVector3 ScreenTopRight; - DVector3 ScreenBottomLeft; - DVector3 ScreenBottomRight; - double RcpDeltaScreenX = 0.0; - double VaryingXScale = 1.0; - double VaryingXOffset = 0.0; -}; - -// Texture coordinates for a wall -class WallTextureCoords -{ -public: - WallTextureCoords(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); - - double u1, u2; - double v1, v2; - -private: - void CalcU(FTexture *tex, const seg_t *line, side_t::ETexpart texpart); - void CalcV(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); - void CalcVTopPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset); - void CalcVMidPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset); - void CalcVBottomPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset); -}; - -// Clipping buffers used during rendering -class RenderClipBuffer -{ -public: - void Clear(short left, short right); - void MarkSegmentCulled(const WallCoords &wallCoords, int drawIndex); - void ClipVertical(const WallCoords &wallCoords, int drawIndex); - void ClipTop(const WallCoords &wallCoords, int drawIndex); - void ClipBottom(const WallCoords &wallCoords, int drawIndex); - bool IsSegmentCulled(short x1, short x2) const; - - void SetupSpriteClip(short x1, short x2, const DVector3 &pos, bool wallSprite); - void RenderMaskedWalls(); - - short Top[MAXWIDTH]; - short Bottom[MAXWIDTH]; - - std::function DrawMaskedWall; - -private: - void AddDrawSegment(short x1, short x2, const WallCoords &wall, bool clipTop, bool clipBottom, int drawIndex); - - struct SolidSegment - { - SolidSegment(short x1, short x2) : X1(x1), X2(x2) { } - short X1, X2; - }; - - struct DrawSegment - { - short X1; - short X2; - int ClipOffset; - bool ClipTop; - bool ClipBottom; - DVector3 PlaneNormal; - double PlaneD; - double NearZ; - double FarZ; - int DrawIndex; - }; - - std::vector SolidSegments; - std::vector DrawSegments; - std::vector ClipValues; - - friend class VisibleSegmentsIterator; -}; - -// Walks the visible segments in a range -class VisibleSegmentsIterator -{ -public: - VisibleSegmentsIterator(const RenderClipBuffer &buffer, short startx, short endx); - bool Step(); - - short X1; - short X2; - -private: - const std::vector &SolidSegments; - short endx; - int next = 0; -}; - -// Class used to group sector ceilings/floors sharing common properties -class VisiblePlaneKey -{ -public: - VisiblePlaneKey() { } - VisiblePlaneKey(FTextureID picnum, FSWColormap *colormap, int lightlevel, secplane_t plane, const FTransform &xform) : Picnum(picnum), ColorMap(colormap), LightLevel(lightlevel), Plane(plane), Transform(xform) { } - - bool operator==(const VisiblePlaneKey &other) const - { - return Picnum == other.Picnum && LightLevel == other.LightLevel && Plane.fD() == other.Plane.fD(); - } - - FTextureID Picnum; - FSWColormap *ColorMap; - int LightLevel; - secplane_t Plane; - FTransform Transform; -}; - -// Visible plane to be rendered -class VisiblePlane -{ -public: - VisiblePlane(const VisiblePlaneKey &key) { Clear(key); } - - void Clear(const VisiblePlaneKey &key) - { - Key = key; - Left = viewwidth; - Right = 0; - for (int i = 0; i < MAXWIDTH; i++) - { - Top[i] = UnsetValue; - Bottom[i] = 0; - } - } - - VisiblePlaneKey Key; - - enum { UnsetValue = 0x7fff }; - short Left; - short Right; - short Top[MAXWIDTH]; - short Bottom[MAXWIDTH]; - std::unique_ptr Next; -}; - -class RenderVisiblePlane -{ -public: - RenderVisiblePlane(VisiblePlane *plane, FTexture *tex); - void Step(); - - double viewx; - double viewy; - double planeheight; - double basexfrac; - double baseyfrac; - double xstepscale; - double ystepscale; -}; - -// Tracks plane locations and renders them -class RenderPlanes -{ -public: - void Clear(); - void MarkCeilingPlane(const VisiblePlaneKey &key, const RenderClipBuffer &clip, const WallCoords &wallCoords); - void MarkFloorPlane(const VisiblePlaneKey &key, const RenderClipBuffer &clip, const WallCoords &wallCoords); - void Render(); - -private: - void RenderPlane(VisiblePlane *plane); - void RenderSpan(int y, int x1, int x2, const VisiblePlaneKey &key, FTexture *texture, const RenderVisiblePlane &renderInfo); - - VisiblePlane *GetPlaneWithUnsetRange(const VisiblePlaneKey &key, int x0, int x1); - VisiblePlane *GetPlane(const VisiblePlaneKey &key); - std::unique_ptr AllocPlane(const VisiblePlaneKey &key); - static uint32_t Hash(const VisiblePlaneKey &key) { return ((unsigned)((key.Picnum.GetIndex()) * 3 + (key.LightLevel) + (FLOAT2FIXED(key.Plane.fD())) * 7) & (NumBuckets - 1)); } - - enum { NumBuckets = 128 /* must be a power of 2 */ }; - std::unique_ptr PlaneBuckets[NumBuckets]; - std::vector> FreePlanes; -}; - -// Renders a wall texture -class RenderWall -{ -public: - void Render(const RenderClipBuffer &clip); - void RenderMasked(short x1, short x2, const short *clipTop, const short *clipBottom); - - WallCoords Coords; - const seg_t *Line; - side_t::ETexpart Texpart; - double TopZ; - double BottomZ; - double UnpeggedCeil; - FSWColormap *Colormap; - bool Masked; - -private: - FTexture *GetTexture(); - int GetShade(); - float GetLight(short x); -}; - -// Sprite thing to be rendered -class VisibleSprite -{ -public: - VisibleSprite(AActor *actor, const DVector3 &eyePos); - void Render(RenderClipBuffer *clip); - -private: - AActor *Actor; - DVector3 EyePos; - - FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); - visstyle_t GetSpriteVisStyle(AActor *thing, double z); - - friend class RenderBsp; // For sorting -}; - -// DScreen accelerated sprite to be rendered -class ScreenSprite -{ -public: - void Render(); - - FTexture *Pic = nullptr; - double X1 = 0.0; - double Y1 = 0.0; - double Width = 0.0; - double Height = 0.0; - FRemapTable *Translation = nullptr; - bool Flip = false; - visstyle_t visstyle; - uint32_t FillColor = 0; - FDynamicColormap *Colormap = nullptr; -}; - -// Renders a BSP tree in a scene -class RenderBsp -{ -public: - void Render(); - void RenderScreenSprites(); - -private: - void RenderNode(void *node); - void RenderSubsector(subsector_t *sub); - void AddLine(seg_t *line, sector_t *frontsector); - - void AddSprite(AActor *thing); - void AddWallSprite(AActor *thing); - bool IsThingCulled(AActor *thing); - void RenderMaskedObjects(); - - void RenderPlayerSprites(); - void RenderPlayerSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac); - - int PointOnSide(const DVector2 &pos, const node_t *node); - - // Checks BSP node/subtree bounding box. - // Returns true if some part of the bbox might be visible. - bool CheckBBox(float *bspcoord); - - ViewPosTransform Transform; - RenderClipBuffer Clip; - RenderPlanes Planes; - std::vector VisibleSprites; - std::vector VisibleMaskedWalls; - std::vector ScreenSprites; - - const int BaseXCenter = 160; - const int BaseYCenter = 100; -}; From ae889acaea74e049cb2be670d5b57672c73b0234 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 16 Nov 2016 19:49:46 +0100 Subject: [PATCH 331/912] active_con_scale should respect uiscale --- src/c_console.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/c_console.cpp b/src/c_console.cpp index 3064abb0d2..68f4455428 100644 --- a/src/c_console.cpp +++ b/src/c_console.cpp @@ -158,10 +158,14 @@ int active_con_scale() int scale = con_scale; if (scale <= 0) { - scale = CleanXfac - 1; - if (scale <= 0) + scale = uiscale; + if (scale == 0) { - scale = 1; + scale = CleanXfac - 1; + if (scale <= 0) + { + scale = 1; + } } } return scale; From 59f7dedfb4e03690ef9228cce3874e424ef496f0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 16 Nov 2016 22:31:49 +0100 Subject: [PATCH 332/912] Added wall sprite support. Unfortunately it would seem that a bullet hole is not a wall sprite.. --- src/r_poly.cpp | 2 +- src/r_poly_sprite.h | 2 +- src/r_poly_wallsprite.cpp | 96 ++++++++++++++++++++++++++++++++++++++- src/r_poly_wallsprite.h | 2 +- 4 files changed, 98 insertions(+), 4 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 179df248a4..919b2ff78e 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -183,7 +183,7 @@ void RenderPolyScene::RenderTranslucent() else if ((obj.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) { RenderPolyWallSprite wallspr; - wallspr.Render(obj.thing, obj.sub, obj.subsectorDepth); + wallspr.Render(WorldToClip, obj.thing, obj.sub, obj.subsectorDepth); } else { diff --git a/src/r_poly_sprite.h b/src/r_poly_sprite.h index d60710d049..085d9b5708 100644 --- a/src/r_poly_sprite.h +++ b/src/r_poly_sprite.h @@ -30,8 +30,8 @@ public: void Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth); static bool IsThingCulled(AActor *thing); + static FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); private: visstyle_t GetSpriteVisStyle(AActor *thing, double z); - FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); }; diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp index 86ddd52a9f..63472e2977 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/r_poly_wallsprite.cpp @@ -28,8 +28,102 @@ #include "r_poly_wallsprite.h" #include "r_poly.h" -void RenderPolyWallSprite::Render(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) +void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth) { if (RenderPolySprite::IsThingCulled(thing)) return; + + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); + pos.Z += thing->GetBobOffset(r_TicFracF); + + bool flipTextureX = false; + FTexture *tex = RenderPolySprite::GetSpriteTexture(thing, flipTextureX); + if (tex == nullptr) + return; + + DVector2 spriteScale = thing->Scale; + double thingxscalemul = spriteScale.X / tex->Scale.X; + double thingyscalemul = spriteScale.Y / tex->Scale.Y; + double spriteHeight = thingyscalemul * tex->GetHeight(); + + DAngle ang = thing->Angles.Yaw + 90; + double angcos = ang.Cos(); + double angsin = ang.Sin(); + + // Determine left and right edges of sprite. The sprite's angle is its normal, + // so the edges are 90 degrees each side of it. + double x2 = tex->GetScaledWidth() * spriteScale.X; + double x1 = tex->GetScaledLeftOffset() * spriteScale.X; + DVector2 left, right; + left.X = pos.X - x1 * angcos; + left.Y = pos.Y - x1 * angsin; + right.X = left.X + x2 * angcos; + right.Y = right.Y + x2 * angsin; + + //int scaled_to = tex->GetScaledTopOffset(); + //int scaled_bo = scaled_to - tex->GetScaledHeight(); + //gzt = pos.Z + scale.Y * scaled_to; + //gzb = pos.Z + scale.Y * scaled_bo; + + DVector2 points[2] = { right, left }; + + TriVertex *vertices = PolyVertexBuffer::GetVertices(4); + if (!vertices) + return; + + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + + std::pair offsets[4] = + { + { 0.0f, 1.0f }, + { 1.0f, 1.0f }, + { 1.0f, 0.0f }, + { 0.0f, 0.0f }, + }; + + for (int i = 0; i < 4; i++) + { + auto &p = (i == 0 || i == 3) ? points[0] : points[1]; + + vertices[i].x = (float)p.X; + vertices[i].y = (float)p.Y; + vertices[i].z = (float)(pos.Z + spriteHeight * offsets[i].second); + vertices[i].w = 1.0f; + vertices[i].varying[0] = (float)(offsets[i].first * tex->Scale.X); + vertices[i].varying[1] = (float)((1.0f - offsets[i].second) * tex->Scale.Y); + if (flipTextureX) + vertices[i].varying[0] = 1.0f - vertices[i].varying[0]; + } + + bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); + + TriUniforms uniforms; + uniforms.objectToClip = worldToClip; + if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) + { + uniforms.light = 256; + uniforms.flags = TriUniforms::fixed_light; + } + else + { + uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); + uniforms.flags = 0; + } + uniforms.subsectorDepth = subsectorDepth; + + PolyDrawArgs args; + args.uniforms = uniforms; + args.vinput = vertices; + args.vcount = 4; + args.mode = TriangleDrawMode::Fan; + args.ccw = true; + args.clipleft = 0; + args.cliptop = 0; + args.clipright = viewwidth; + args.clipbottom = viewheight; + args.stenciltestvalue = 0; + args.stencilwritevalue = 1; + args.SetTexture(tex); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); } diff --git a/src/r_poly_wallsprite.h b/src/r_poly_wallsprite.h index ce6917e58c..dd4ba28dac 100644 --- a/src/r_poly_wallsprite.h +++ b/src/r_poly_wallsprite.h @@ -27,5 +27,5 @@ class RenderPolyWallSprite { public: - void Render(AActor *thing, subsector_t *sub, uint32_t subsectorDepth); + void Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth); }; From ff2ab61abc4a87ddbfc69b1b258e4f398908ff41 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 16 Nov 2016 23:34:05 +0100 Subject: [PATCH 333/912] Added decals --- src/CMakeLists.txt | 1 + src/r_poly_decal.cpp | 159 ++++++++++++++++++++++++++++++++++++++ src/r_poly_decal.h | 34 ++++++++ src/r_poly_wall.cpp | 3 + src/r_poly_wallsprite.cpp | 2 +- 5 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 src/r_poly_decal.cpp create mode 100644 src/r_poly_decal.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index baef29c213..34e0e72d02 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1070,6 +1070,7 @@ set( FASTMATH_PCH_SOURCES r_swrenderer.cpp r_poly.cpp r_poly_cull.cpp + r_poly_decal.cpp r_poly_particle.cpp r_poly_plane.cpp r_poly_playersprite.cpp diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp new file mode 100644 index 0000000000..af9c53a89e --- /dev/null +++ b/src/r_poly_decal.cpp @@ -0,0 +1,159 @@ +/* +** Handling drawing a decal +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_poly_decal.h" +#include "r_poly.h" +#include "a_sharedglobal.h" + +void RenderPolyDecal::RenderWallDecals(const TriMatrix &worldToClip, const seg_t *line, uint32_t subsectorDepth) +{ + for (DBaseDecal *decal = line->sidedef->AttachedDecals; decal != nullptr; decal = decal->WallNext) + { + RenderPolyDecal render; + render.Render(worldToClip, decal, line, subsectorDepth); + } +} + +void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, const seg_t *line, uint32_t subsectorDepth) +{ + if (decal->RenderFlags & RF_INVISIBLE || !viewactive || !decal->PicNum.isValid()) + return; + + FTexture *tex = TexMan(decal->PicNum, true); + if (tex == nullptr || tex->UseType == FTexture::TEX_Null) + return; + + double edge_right = tex->GetWidth(); + double edge_left = tex->LeftOffset; + edge_right = (edge_right - edge_left) * decal->ScaleX; + edge_left *= decal->ScaleX; + + double dcx, dcy; + decal->GetXY(line->sidedef, dcx, dcy); + DVector2 decal_pos = { dcx, dcy }; + + DVector2 angvec = (line->v2->fPos() - line->v1->fPos()).Unit(); + DVector2 decal_left = decal_pos - edge_left * angvec; + DVector2 decal_right = decal_pos + edge_right * angvec; + + // Determine actor z + double zpos = decal->Z; + sector_t *front = line->frontsector; + sector_t *back = (line->backsector != nullptr) ? line->backsector : line->frontsector; + switch (decal->RenderFlags & RF_RELMASK) + { + default: + zpos = decal->Z; + break; + case RF_RELUPPER: + if (line->linedef->flags & ML_DONTPEGTOP) + zpos = decal->Z + front->GetPlaneTexZ(sector_t::ceiling); + else + zpos = decal->Z + back->GetPlaneTexZ(sector_t::ceiling); + break; + case RF_RELLOWER: + if (line->linedef->flags & ML_DONTPEGBOTTOM) + zpos = decal->Z + front->GetPlaneTexZ(sector_t::ceiling); + else + zpos = decal->Z + back->GetPlaneTexZ(sector_t::floor); + break; + case RF_RELMID: + if (line->linedef->flags & ML_DONTPEGBOTTOM) + zpos = decal->Z + front->GetPlaneTexZ(sector_t::floor); + else + zpos = decal->Z + front->GetPlaneTexZ(sector_t::ceiling); + } + + DVector2 spriteScale = { decal->ScaleX, decal->ScaleY }; + double thingxscalemul = spriteScale.X / tex->Scale.X; + double thingyscalemul = spriteScale.Y / tex->Scale.Y; + double spriteHeight = thingyscalemul * tex->GetHeight(); + + bool flipTextureX = (decal->RenderFlags & RF_XFLIP) == RF_XFLIP; + + DVector2 points[2] = { decal_left, decal_right }; + + TriVertex *vertices = PolyVertexBuffer::GetVertices(4); + if (!vertices) + return; + + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + + std::pair offsets[4] = + { + { 0.0f, 1.0f }, + { 1.0f, 1.0f }, + { 1.0f, 0.0f }, + { 0.0f, 0.0f }, + }; + + for (int i = 0; i < 4; i++) + { + auto &p = (i == 0 || i == 3) ? points[0] : points[1]; + + vertices[i].x = (float)p.X; + vertices[i].y = (float)p.Y; + vertices[i].z = (float)(zpos + spriteHeight * offsets[i].second); + vertices[i].w = 1.0f; + vertices[i].varying[0] = (float)(offsets[i].first * tex->Scale.X); + vertices[i].varying[1] = (float)((1.0f - offsets[i].second) * tex->Scale.Y); + if (flipTextureX) + vertices[i].varying[0] = 1.0f - vertices[i].varying[0]; + } + + bool fullbrightSprite = (decal->RenderFlags & RF_FULLBRIGHT) == RF_FULLBRIGHT; + + TriUniforms uniforms; + uniforms.objectToClip = worldToClip; + if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) + { + uniforms.light = 256; + uniforms.flags = TriUniforms::fixed_light; + } + else + { + uniforms.light = (uint32_t)((front->lightlevel + actualextralight) / 255.0f * 256.0f); + uniforms.flags = 0; + } + uniforms.subsectorDepth = subsectorDepth; + + PolyDrawArgs args; + args.uniforms = uniforms; + args.vinput = vertices; + args.vcount = 4; + args.mode = TriangleDrawMode::Fan; + args.ccw = true; + args.clipleft = 0; + args.cliptop = 0; + args.clipright = viewwidth; + args.clipbottom = viewheight; + args.stenciltestvalue = 0; + args.stencilwritevalue = 1; + args.SetTexture(tex); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); +} diff --git a/src/r_poly_decal.h b/src/r_poly_decal.h new file mode 100644 index 0000000000..39b250abba --- /dev/null +++ b/src/r_poly_decal.h @@ -0,0 +1,34 @@ +/* +** Handling drawing a decal +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "r_triangle.h" + +class RenderPolyDecal +{ +public: + static void RenderWallDecals(const TriMatrix &worldToClip, const seg_t *line, uint32_t subsectorDepth); + +private: + void Render(const TriMatrix &worldToClip, DBaseDecal *decal, const seg_t *line, uint32_t subsectorDepth); +}; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 6921b1a33b..2ed225cf20 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -26,6 +26,7 @@ #include "sbar.h" #include "r_data/r_translate.h" #include "r_poly_wall.h" +#include "r_poly_decal.h" #include "r_poly.h" #include "r_sky.h" // for skyflatnum @@ -203,6 +204,8 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) { PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); } + + RenderPolyDecal::RenderWallDecals(worldToClip, Line, SubsectorDepth); } void RenderPolyWall::ClampHeight(TriVertex &v1, TriVertex &v2) diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp index 63472e2977..c6b3a65690 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/r_poly_wallsprite.cpp @@ -65,7 +65,7 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, s //gzt = pos.Z + scale.Y * scaled_to; //gzb = pos.Z + scale.Y * scaled_bo; - DVector2 points[2] = { right, left }; + DVector2 points[2] = { left, right }; TriVertex *vertices = PolyVertexBuffer::GetVertices(4); if (!vertices) From 266924600aa33042d955d4c27c98ea52a132361c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 17 Nov 2016 01:28:51 +0100 Subject: [PATCH 334/912] Missing include guard --- src/p_effect.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/p_effect.h b/src/p_effect.h index 614702a49d..eb417d63c3 100644 --- a/src/p_effect.h +++ b/src/p_effect.h @@ -31,6 +31,8 @@ ** */ +#pragma once + #include "vectors.h" #define FX_ROCKET 0x00000001 From 511eb59479d13741aa6addc8b9d19b9e770d3822 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 17 Nov 2016 01:29:08 +0100 Subject: [PATCH 335/912] Add particle drawing --- .../fixedfunction/drawtrianglecodegen.cpp | 16 +++-- src/r_compiler/llvmdrawers.cpp | 4 ++ src/r_compiler/llvmdrawers.h | 3 + src/r_poly.cpp | 18 ++++- src/r_poly.h | 3 + src/r_poly_particle.cpp | 72 ++++++++++++++++++- src/r_poly_particle.h | 5 +- src/r_poly_triangle.cpp | 2 + 8 files changed, 113 insertions(+), 10 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 11bdad7e43..7368fa1210 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -260,7 +260,7 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) SetStencilBlock(x / 8 + y / 8 * stencilPitch); SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded; - if (variant != TriDrawVariant::DrawSubsector) + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector) { covered = covered && StencilIsSingleValue(); } @@ -287,7 +287,7 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) { SSAIfBlock branch_stenciltest; - if (variant != TriDrawVariant::DrawSubsector) + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector) { branch_stenciltest.if_block(StencilGetSingle() == stencilTestValue); } @@ -325,7 +325,7 @@ void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) varying[i] = stack_varying[i].load(); loopx.loop_block(ix < SSAInt(q), q); { - if (variant == TriDrawVariant::DrawSubsector) + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector) { SSAIfBlock branch; branch.if_block(subsectorbuffer[ix].load(true) >= subsectorDepth); @@ -353,7 +353,7 @@ void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) loopy.end_block(); } - if (variant != TriDrawVariant::DrawSubsector) + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector) { branch_stenciltest.end_block(); } @@ -404,7 +404,7 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo SSABool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible; - if (variant == TriDrawVariant::DrawSubsector) + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector) { covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth; } @@ -449,7 +449,7 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying, TriDrawVariant variant, bool truecolor) { - if (variant == TriDrawVariant::Fill) + if (variant == TriDrawVariant::Fill || variant == TriDrawVariant::FillSubsector) { if (truecolor) { @@ -459,7 +459,9 @@ void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbu { //buffer.store(solidcolor); } - subsectorbuffer.store(subsectorDepth); + + if (variant != TriDrawVariant::FillSubsector) + subsectorbuffer.store(subsectorDepth); } else { diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index b3d893153e..e7b9f6e5c1 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -190,6 +190,8 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawTriangle("TriDraw32", TriDrawVariant::Draw, true); CodegenDrawTriangle("TriDrawSubsector8", TriDrawVariant::DrawSubsector, false); CodegenDrawTriangle("TriDrawSubsector32", TriDrawVariant::DrawSubsector, true); + CodegenDrawTriangle("TriFillSubsector8", TriDrawVariant::FillSubsector, false); + CodegenDrawTriangle("TriFillSubsector32", TriDrawVariant::FillSubsector, true); CodegenDrawTriangle("TriFill8", TriDrawVariant::Fill, false); CodegenDrawTriangle("TriFill32", TriDrawVariant::Fill, true); CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, false); @@ -262,6 +264,8 @@ LLVMDrawersImpl::LLVMDrawersImpl() TriDraw32 = mProgram.GetProcAddress("TriDraw32"); TriDrawSubsector8 = mProgram.GetProcAddress("TriDrawSubsector8"); TriDrawSubsector32 = mProgram.GetProcAddress("TriDrawSubsector32"); + TriFillSubsector8 = mProgram.GetProcAddress("TriFillSubsector8"); + TriFillSubsector32 = mProgram.GetProcAddress("TriFillSubsector32"); TriFill8 = mProgram.GetProcAddress("TriFill8"); TriFill32 = mProgram.GetProcAddress("TriFill32"); TriStencil = mProgram.GetProcAddress("TriStencil"); diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 38f0f82d14..e7a34b962d 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -266,6 +266,7 @@ enum class TriDrawVariant DrawMasked, Fill, DrawSubsector, + FillSubsector, Stencil, }; @@ -348,6 +349,8 @@ public: void(*TriDraw32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; void(*TriDrawSubsector8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; void(*TriDrawSubsector32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + void(*TriFillSubsector8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + void(*TriFillSubsector32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; void(*TriFill8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; void(*TriFill32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; void(*TriStencil)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 919b2ff78e..d4f24be59c 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -111,6 +111,17 @@ void RenderPolyScene::RenderSubsector(subsector_t *sub) RenderLine(sub, line, frontsector, subsectorDepth); } + bool mainBSP = ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors); + if (mainBSP) + { + int subsectorIndex = (int)(sub - subsectors); + for (int i = ParticlesInSubsec[subsectorIndex]; i != NO_PARTICLE; i = Particles[i].snext) + { + particle_t *particle = Particles + i; + TranslucentObjects.push_back({ particle, sub, subsectorDepth }); + } + } + SpriteRange sprites = GetSpritesForSector(sub->sector); for (int i = 0; i < sprites.Count; i++) { @@ -176,7 +187,12 @@ void RenderPolyScene::RenderTranslucent() for (auto it = TranslucentObjects.rbegin(); it != TranslucentObjects.rend(); ++it) { auto &obj = *it; - if (!obj.thing) + if (obj.particle) + { + RenderPolyParticle spr; + spr.Render(WorldToClip, obj.particle, obj.sub, obj.subsectorDepth); + } + else if (!obj.thing) { obj.wall.Render(WorldToClip); } diff --git a/src/r_poly.h b/src/r_poly.h index 4f7eca61dd..f18f8a3cd9 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -35,6 +35,7 @@ #include "r_poly_sprite.h" #include "r_poly_wallsprite.h" #include "r_poly_playersprite.h" +#include "r_poly_particle.h" #include "r_poly_plane.h" #include "r_poly_sky.h" #include "r_poly_cull.h" @@ -53,9 +54,11 @@ public: class PolyTranslucentObject { public: + PolyTranslucentObject(particle_t *particle, subsector_t *sub, uint32_t subsectorDepth) : particle(particle), sub(sub), subsectorDepth(subsectorDepth) { } PolyTranslucentObject(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) : thing(thing), sub(sub), subsectorDepth(subsectorDepth) { } PolyTranslucentObject(RenderPolyWall wall) : wall(wall) { } + particle_t *particle = nullptr; AActor *thing = nullptr; subsector_t *sub = nullptr; uint32_t subsectorDepth = 0; diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp index 06b963e4ff..f0b2219886 100644 --- a/src/r_poly_particle.cpp +++ b/src/r_poly_particle.cpp @@ -28,6 +28,76 @@ #include "r_poly_particle.h" #include "r_poly.h" -void RenderPolyParticle::Render() +void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *particle, subsector_t *sub, uint32_t subsectorDepth) { + DVector3 pos = particle->Pos; + double psize = particle->size / 8.0; + double zpos = pos.Z; + + DVector2 points[2] = + { + { pos.X - ViewSin * psize, pos.Y + ViewCos * psize }, + { pos.X + ViewSin * psize, pos.Y - ViewCos * psize } + }; + + TriVertex *vertices = PolyVertexBuffer::GetVertices(4); + if (!vertices) + return; + + bool foggy = false; + int actualextralight = foggy ? 0 : extralight << 4; + + std::pair offsets[4] = + { + { 0.0f, 1.0f }, + { 1.0f, 1.0f }, + { 1.0f, 0.0f }, + { 0.0f, 0.0f }, + }; + + for (int i = 0; i < 4; i++) + { + auto &p = (i == 0 || i == 3) ? points[0] : points[1]; + + vertices[i].x = (float)p.X; + vertices[i].y = (float)p.Y; + vertices[i].z = (float)(zpos + psize * (2.0 * offsets[i].second - 1.0)); + vertices[i].w = 1.0f; + vertices[i].varying[0] = (float)(offsets[i].first); + vertices[i].varying[1] = (float)(1.0f - offsets[i].second); + } + + // int color = (particle->color >> 24) & 0xff; // pal index, I think + bool fullbrightSprite = particle->bright != 0; + + TriUniforms uniforms; + uniforms.objectToClip = worldToClip; + if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) + { + uniforms.light = 256; + uniforms.flags = TriUniforms::fixed_light; + } + else + { + uniforms.light = (uint32_t)((sub->sector->lightlevel + actualextralight) / 255.0f * 256.0f); + uniforms.flags = 0; + } + uniforms.subsectorDepth = subsectorDepth; + + uint32_t alpha = particle->trans; + + PolyDrawArgs args; + args.uniforms = uniforms; + args.vinput = vertices; + args.vcount = 4; + args.mode = TriangleDrawMode::Fan; + args.ccw = true; + args.clipleft = 0; + args.cliptop = 0; + args.clipright = viewwidth; + args.clipbottom = viewheight; + args.stenciltestvalue = 0; + args.stencilwritevalue = 1; + args.solidcolor = (alpha << 24) | (particle->color & 0xffffff); + PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector); } diff --git a/src/r_poly_particle.h b/src/r_poly_particle.h index 5573a7e482..f4218a310e 100644 --- a/src/r_poly_particle.h +++ b/src/r_poly_particle.h @@ -22,8 +22,11 @@ #pragma once +#include "r_poly_triangle.h" +#include "p_effect.h" + class RenderPolyParticle { public: - void Render(); + void Render(const TriMatrix &worldToClip, particle_t *particle, subsector_t *sub, uint32_t subsectorDepth); }; diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 3d444d2ada..6426bae4ed 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -62,6 +62,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? llvm->TriDraw32: llvm->TriDraw8; break; case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? llvm->TriFill32 : llvm->TriFill8; break; case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break; + case TriDrawVariant::FillSubsector: drawfunc = r_swtruecolor ? llvm->TriFillSubsector32 : llvm->TriFillSubsector8; break; case TriDrawVariant::Stencil: drawfunc = llvm->TriStencil; break; } #else @@ -69,6 +70,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian { default: case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break; + case TriDrawVariant::FillSubsector: case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break; case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::drawsubsector32 : llvm->TriDrawSubsector8; break; case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break; From b02e3d9514af94c39b9125f7408c7e7754c2be8b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 17 Nov 2016 14:28:14 +0100 Subject: [PATCH 336/912] Add shaded blend mode for the decals --- .../fixedfunction/drawtrianglecodegen.cpp | 34 +++++++++++++------ src/r_compiler/llvmdrawers.cpp | 4 +++ src/r_compiler/llvmdrawers.h | 3 ++ src/r_poly_decal.cpp | 4 ++- src/r_poly_triangle.cpp | 2 ++ 5 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 7368fa1210..6f9af51fc3 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -260,7 +260,7 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) SetStencilBlock(x / 8 + y / 8 * stencilPitch); SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded; - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector) + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::DrawShadedSubsector && variant != TriDrawVariant::FillSubsector) { covered = covered && StencilIsSingleValue(); } @@ -287,7 +287,7 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) { SSAIfBlock branch_stenciltest; - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector) + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::DrawShadedSubsector && variant != TriDrawVariant::FillSubsector) { branch_stenciltest.if_block(StencilGetSingle() == stencilTestValue); } @@ -325,7 +325,7 @@ void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) varying[i] = stack_varying[i].load(); loopx.loop_block(ix < SSAInt(q), q); { - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector) + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::DrawShadedSubsector || variant == TriDrawVariant::FillSubsector) { SSAIfBlock branch; branch.if_block(subsectorbuffer[ix].load(true) >= subsectorDepth); @@ -353,7 +353,7 @@ void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) loopy.end_block(); } - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector) + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::DrawShadedSubsector && variant != TriDrawVariant::FillSubsector) { branch_stenciltest.end_block(); } @@ -404,7 +404,7 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo SSABool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible; - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector) + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::DrawShadedSubsector || variant == TriDrawVariant::FillSubsector) { covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth; } @@ -474,13 +474,13 @@ void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbu if (truecolor) { - SSAVec4i fg = texturePixels[uvoffset * 4].load_vec4ub(true); - SSAInt fg_alpha = fg[3]; - fg = (fg * currentlight) >> 8; - fg.insert(3, fg_alpha); - if (variant == TriDrawVariant::DrawMasked || variant == TriDrawVariant::DrawSubsector) { + SSAVec4i fg = texturePixels[uvoffset * 4].load_vec4ub(true); + SSAInt fg_alpha = fg[3]; + fg = (fg * currentlight) >> 8; + fg.insert(3, fg_alpha); + SSAIfBlock branch_transparency; branch_transparency.if_block(fg_alpha > SSAInt(127)); { @@ -490,8 +490,22 @@ void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbu } branch_transparency.end_block(); } + else if (variant == TriDrawVariant::DrawShadedSubsector) + { + SSAInt alpha = texturePixels[uvoffset * 4].load(true).zext_int(); + alpha = alpha + (alpha >> 7); // // 255 -> 256 + SSAInt inv_alpha = 256 - alpha; + + SSAVec4i bgcolor = buffer.load_vec4ub(false); + buffer.store_vec4ub(blend_add(shade_bgra_simple(SSAVec4i::unpack(solidcolor), currentlight), bgcolor, alpha, inv_alpha)); + } else { + SSAVec4i fg = texturePixels[uvoffset * 4].load_vec4ub(true); + SSAInt fg_alpha = fg[3]; + fg = (fg * currentlight) >> 8; + fg.insert(3, fg_alpha); + buffer.store_vec4ub(fg); subsectorbuffer.store(subsectorDepth); } diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index e7b9f6e5c1..458d90d1a3 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -190,6 +190,8 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawTriangle("TriDraw32", TriDrawVariant::Draw, true); CodegenDrawTriangle("TriDrawSubsector8", TriDrawVariant::DrawSubsector, false); CodegenDrawTriangle("TriDrawSubsector32", TriDrawVariant::DrawSubsector, true); + CodegenDrawTriangle("TriDrawShadedSubsector8", TriDrawVariant::DrawShadedSubsector, false); + CodegenDrawTriangle("TriDrawShadedSubsector32", TriDrawVariant::DrawShadedSubsector, true); CodegenDrawTriangle("TriFillSubsector8", TriDrawVariant::FillSubsector, false); CodegenDrawTriangle("TriFillSubsector32", TriDrawVariant::FillSubsector, true); CodegenDrawTriangle("TriFill8", TriDrawVariant::Fill, false); @@ -264,6 +266,8 @@ LLVMDrawersImpl::LLVMDrawersImpl() TriDraw32 = mProgram.GetProcAddress("TriDraw32"); TriDrawSubsector8 = mProgram.GetProcAddress("TriDrawSubsector8"); TriDrawSubsector32 = mProgram.GetProcAddress("TriDrawSubsector32"); + TriDrawShadedSubsector8 = mProgram.GetProcAddress("TriDrawShadedSubsector8"); + TriDrawShadedSubsector32 = mProgram.GetProcAddress("TriDrawShadedSubsector32"); TriFillSubsector8 = mProgram.GetProcAddress("TriFillSubsector8"); TriFillSubsector32 = mProgram.GetProcAddress("TriFillSubsector32"); TriFill8 = mProgram.GetProcAddress("TriFill8"); diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index e7a34b962d..9c3a3e45f5 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -266,6 +266,7 @@ enum class TriDrawVariant DrawMasked, Fill, DrawSubsector, + DrawShadedSubsector, FillSubsector, Stencil, }; @@ -349,6 +350,8 @@ public: void(*TriDraw32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; void(*TriDrawSubsector8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; void(*TriDrawSubsector32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + void(*TriDrawShadedSubsector8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + void(*TriDrawShadedSubsector32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; void(*TriFillSubsector8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; void(*TriFillSubsector32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; void(*TriFill8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index af9c53a89e..011ce83e47 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -155,5 +155,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); + args.solidcolor = decal->AlphaColor; + //mode = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawShadedSubsector); } diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 6426bae4ed..64611376b8 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -62,6 +62,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? llvm->TriDraw32: llvm->TriDraw8; break; case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? llvm->TriFill32 : llvm->TriFill8; break; case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break; + case TriDrawVariant::DrawShadedSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawShadedSubsector32 : llvm->TriDrawShadedSubsector8; break; case TriDrawVariant::FillSubsector: drawfunc = r_swtruecolor ? llvm->TriFillSubsector32 : llvm->TriFillSubsector8; break; case TriDrawVariant::Stencil: drawfunc = llvm->TriStencil; break; } @@ -72,6 +73,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break; case TriDrawVariant::FillSubsector: case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break; + case TriDrawVariant::DrawShadedSubsector: case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::drawsubsector32 : llvm->TriDrawSubsector8; break; case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break; } From 7447c49dc5c87639f1cf88c5bb7285ce44c08cbb Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Thu, 17 Nov 2016 02:50:54 -0500 Subject: [PATCH 337/912] - Added pixelratio support for the softpoly renderer. --- src/r_poly.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index d4f24be59c..25fae638e5 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -26,8 +26,10 @@ #include "sbar.h" #include "r_data/r_translate.h" #include "r_poly.h" +#include "gl/data/gl_data.h" CVAR(Bool, r_debug_cull, 0, 0) +void InitGLRMapinfoData(); ///////////////////////////////////////////////////////////////////////////// @@ -66,13 +68,24 @@ void RenderPolyScene::ClearBuffers() void RenderPolyScene::SetupPerspectiveMatrix() { + static bool bDidSetup = false; + + if (!bDidSetup) + { + InitGLRMapinfoData(); + bDidSetup = true; + } + + float pixelstretch = (glset.pixelstretch) ? glset.pixelstretch : 1.2; + float ratio = WidescreenRatio; float fovratio = (WidescreenRatio >= 1.3f) ? 1.333333f : ratio; float fovy = (float)(2 * DAngle::ToDegrees(atan(tan(FieldOfView.Radians() / 2) / fovratio)).Degrees); TriMatrix worldToView = - TriMatrix::scale(1.0f, (float)YaspectMul, 1.0f) * + TriMatrix::scale(1.0f, 1.2f, 1.0f) * TriMatrix::rotate((float)ViewPitch.Radians(), 1.0f, 0.0f, 0.0f) * TriMatrix::rotate((float)(ViewAngle - 90).Radians(), 0.0f, -1.0f, 0.0f) * + TriMatrix::scale(1.0f, pixelstretch, 1.0f) * TriMatrix::swapYZ() * TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); WorldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; From eac9367acee82c78c381c6d8ffe535a24a0c8fc4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 17 Nov 2016 21:07:00 +0100 Subject: [PATCH 338/912] Fix perspective when status bar is visible --- src/r_poly.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 25fae638e5..dd2df0ee40 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -29,6 +29,7 @@ #include "gl/data/gl_data.h" CVAR(Bool, r_debug_cull, 0, 0) +EXTERN_CVAR(Int, screenblocks) void InitGLRMapinfoData(); ///////////////////////////////////////////////////////////////////////////// @@ -76,17 +77,25 @@ void RenderPolyScene::SetupPerspectiveMatrix() bDidSetup = true; } - float pixelstretch = (glset.pixelstretch) ? glset.pixelstretch : 1.2; + int height; + if (screenblocks >= 10) + { + height = SCREENHEIGHT; + } + else + { + height = (screenblocks*SCREENHEIGHT / 10) & ~7; + } + viewheight = height; // So viewheight was calculated incorrectly. That's just.. wonderful. float ratio = WidescreenRatio; float fovratio = (WidescreenRatio >= 1.3f) ? 1.333333f : ratio; float fovy = (float)(2 * DAngle::ToDegrees(atan(tan(FieldOfView.Radians() / 2) / fovratio)).Degrees); TriMatrix worldToView = - TriMatrix::scale(1.0f, 1.2f, 1.0f) * TriMatrix::rotate((float)ViewPitch.Radians(), 1.0f, 0.0f, 0.0f) * TriMatrix::rotate((float)(ViewAngle - 90).Radians(), 0.0f, -1.0f, 0.0f) * - TriMatrix::scale(1.0f, pixelstretch, 1.0f) * TriMatrix::swapYZ() * + TriMatrix::scale(1.0f, 1.0f, glset.pixelstretch) * TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); WorldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; } From 6be80efa5194f3472afa6d596652099d6020bc0d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 17 Nov 2016 21:19:50 +0100 Subject: [PATCH 339/912] Moved scale back to be after pitch - otherwise the the bullets don't end up where you point --- src/r_poly.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index dd2df0ee40..0c05a590ac 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -92,10 +92,10 @@ void RenderPolyScene::SetupPerspectiveMatrix() float fovratio = (WidescreenRatio >= 1.3f) ? 1.333333f : ratio; float fovy = (float)(2 * DAngle::ToDegrees(atan(tan(FieldOfView.Radians() / 2) / fovratio)).Degrees); TriMatrix worldToView = + TriMatrix::scale(1.0f, glset.pixelstretch, 1.0f) * TriMatrix::rotate((float)ViewPitch.Radians(), 1.0f, 0.0f, 0.0f) * TriMatrix::rotate((float)(ViewAngle - 90).Radians(), 0.0f, -1.0f, 0.0f) * TriMatrix::swapYZ() * - TriMatrix::scale(1.0f, 1.0f, glset.pixelstretch) * TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); WorldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; } From aa0828567605ebe30703803fd1d8d711ff245b3b Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Thu, 17 Nov 2016 16:44:55 -0500 Subject: [PATCH 340/912] - Moved scaling before rotation again - this time, using a formula provided by Graf, the rendering pitch is now dynamically scaled based on the aspect ratio, causing pitch aiming to once again be correct. --- src/r_poly.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 0c05a590ac..ade89571ec 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -88,13 +88,21 @@ void RenderPolyScene::SetupPerspectiveMatrix() } viewheight = height; // So viewheight was calculated incorrectly. That's just.. wonderful. + // Code provided courtesy of Graf Zahl. Now we just have to plug it into the viewmatrix code... + // We have to scale the pitch to account for the pixel stretching, because the playsim doesn't know about this and treats it as 1:1. + double radPitch = ViewPitch.Normalized180().Radians(); + double angx = cos(radPitch); + double angy = sin(radPitch) * glset.pixelstretch; + double alen = sqrt(angx*angx + angy*angy); + //mAngles.Pitch = (float)RAD2DEG(asin(angy / alen)); + float ratio = WidescreenRatio; float fovratio = (WidescreenRatio >= 1.3f) ? 1.333333f : ratio; float fovy = (float)(2 * DAngle::ToDegrees(atan(tan(FieldOfView.Radians() / 2) / fovratio)).Degrees); TriMatrix worldToView = - TriMatrix::scale(1.0f, glset.pixelstretch, 1.0f) * - TriMatrix::rotate((float)ViewPitch.Radians(), 1.0f, 0.0f, 0.0f) * + TriMatrix::rotate((float)asin(angy / alen), 1.0f, 0.0f, 0.0f) * TriMatrix::rotate((float)(ViewAngle - 90).Radians(), 0.0f, -1.0f, 0.0f) * + TriMatrix::scale(1.0f, glset.pixelstretch, 1.0f) * TriMatrix::swapYZ() * TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); WorldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; From dbb6c7ca27e7fd8017906356c73b50b6a903415a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 18 Nov 2016 01:58:39 +0100 Subject: [PATCH 341/912] Add support for specifying a viewport out of screen bounds and fixed statusbar by doing just that! --- .../fixedfunction/drawtrianglecodegen.cpp | 16 ++--- src/r_poly.cpp | 13 ++-- src/r_poly_decal.cpp | 4 -- src/r_poly_particle.cpp | 4 -- src/r_poly_plane.cpp | 8 --- src/r_poly_sky.cpp | 4 -- src/r_poly_sprite.cpp | 4 -- src/r_poly_triangle.cpp | 66 ++++++++++++++----- src/r_poly_triangle.h | 9 +-- src/r_poly_wall.cpp | 4 -- src/r_poly_wallsprite.cpp | 4 -- 11 files changed, 68 insertions(+), 68 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 6f9af51fc3..8bf4779e21 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -78,10 +78,10 @@ void DrawTriangleCodegen::Setup(TriDrawVariant variant, bool truecolor) FDY31 = DY31 << 4; // Bounding rectangle - minx = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - maxx = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); - miny = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); - maxy = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + minx = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(X1, X2), X3) + 0xF).ashr(4), clipleft); + maxx = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(X1, X2), X3) + 0xF).ashr(4), clipright - 1); + miny = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(Y1, Y2), Y3) + 0xF).ashr(4), cliptop); + maxy = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(Y1, Y2), Y3) + 0xF).ashr(4), clipbottom - 1); SSAIfBlock if0; if0.if_block(minx >= maxx || miny >= maxy); @@ -221,7 +221,7 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) branch.if_block(!(a == SSAInt(0) || b == SSAInt(0) || c == SSAInt(0))); // Check if block needs clipping - SSABool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); + SSABool clipneeded = x < clipleft || (x + q) > clipright || y < cliptop || (y + q) > clipbottom; // Calculate varying variables for affine block SSAFloat offx0 = SSAFloat(x - minx); @@ -401,7 +401,7 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo varying[i] = stack_varying[i].load(); loopx.loop_block(ix < SSAInt(q), q); { - SSABool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); + SSABool visible = (ix + x >= clipleft) && (ix + x < clipright) && (iy + y >= cliptop) && (iy + y < clipbottom); SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible; if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::DrawShadedSubsector || variant == TriDrawVariant::FillSubsector) @@ -589,9 +589,9 @@ void DrawTriangleCodegen::LoadArgs(TriDrawVariant variant, bool truecolor, SSAVa v1 = LoadTriVertex(args[0][2].load(true)); v2 = LoadTriVertex(args[0][3].load(true)); v3 = LoadTriVertex(args[0][4].load(true)); - clipleft = args[0][5].load(true); + clipleft = SSAInt(0);// args[0][5].load(true); clipright = args[0][6].load(true); - cliptop = args[0][7].load(true); + cliptop = SSAInt(0);// args[0][7].load(true); clipbottom = args[0][8].load(true); texturePixels = args[0][9].load(true); textureWidth = args[0][10].load(true); diff --git a/src/r_poly.cpp b/src/r_poly.cpp index ade89571ec..7a06a471e2 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -62,8 +62,8 @@ void RenderPolyScene::ClearBuffers() SectorSpriteRanges.resize(numsectors); SortedSprites.clear(); TranslucentObjects.clear(); - PolyStencilBuffer::Instance()->Clear(viewwidth, viewheight, 0); - PolySubsectorGBuffer::Instance()->Resize(dc_pitch, viewheight); + PolyStencilBuffer::Instance()->Clear(screen->GetWidth(), screen->GetHeight(), 0); + PolySubsectorGBuffer::Instance()->Resize(screen->GetPitch(), screen->GetHeight()); NextSubsectorDepth = 0; } @@ -79,14 +79,12 @@ void RenderPolyScene::SetupPerspectiveMatrix() int height; if (screenblocks >= 10) - { height = SCREENHEIGHT; - } else - { height = (screenblocks*SCREENHEIGHT / 10) & ~7; - } - viewheight = height; // So viewheight was calculated incorrectly. That's just.. wonderful. + + int bottom = SCREENHEIGHT - (height + viewwindowy - ((height - viewheight) / 2)); + PolyTriangleDrawer::set_viewport(viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, screen); // Code provided courtesy of Graf Zahl. Now we just have to plug it into the viewmatrix code... // We have to scale the pitch to account for the pixel stretching, because the playsim doesn't know about this and treats it as 1:1. @@ -105,6 +103,7 @@ void RenderPolyScene::SetupPerspectiveMatrix() TriMatrix::scale(1.0f, glset.pixelstretch, 1.0f) * TriMatrix::swapYZ() * TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); + WorldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; } diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index 011ce83e47..0c6c427a84 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -148,10 +148,6 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co args.vcount = 4; args.mode = TriangleDrawMode::Fan; args.ccw = true; - args.clipleft = 0; - args.cliptop = 0; - args.clipright = viewwidth; - args.clipbottom = viewheight; args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp index f0b2219886..be80300713 100644 --- a/src/r_poly_particle.cpp +++ b/src/r_poly_particle.cpp @@ -92,10 +92,6 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *partic args.vcount = 4; args.mode = TriangleDrawMode::Fan; args.ccw = true; - args.clipleft = 0; - args.cliptop = 0; - args.clipright = viewwidth; - args.clipbottom = viewheight; args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.solidcolor = (alpha << 24) | (particle->color & 0xffffff); diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 5b0e912a80..9a425598bb 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -137,10 +137,6 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *s args.vcount = sub->numlines; args.mode = TriangleDrawMode::Fan; args.ccw = true; - args.clipleft = 0; - args.cliptop = 0; - args.clipright = viewwidth; - args.clipbottom = viewheight; args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); @@ -233,10 +229,6 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin args.vcount = sub->numlines; args.mode = TriangleDrawMode::Fan; args.ccw = ccw; - args.clipleft = 0; - args.cliptop = 0; - args.clipright = viewwidth; - args.clipbottom = viewheight; args.stenciltestvalue = 0; args.stencilwritevalue = 1; diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index 03a4bfdb20..fc6963dbb8 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -59,10 +59,6 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) PolyDrawArgs args; args.uniforms = uniforms; - args.clipleft = 0; - args.cliptop = 0; - args.clipright = viewwidth; - args.clipbottom = viewheight; args.stenciltestvalue = 255; args.stencilwritevalue = 1; args.SetTexture(frontskytex); diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 1701b24ef6..e1c6fcc14b 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -133,10 +133,6 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse args.vcount = 4; args.mode = TriangleDrawMode::Fan; args.ccw = true; - args.clipleft = 0; - args.cliptop = 0; - args.clipright = viewwidth; - args.clipbottom = viewheight; args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 64611376b8..d661fe0d7f 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -40,9 +40,41 @@ #include #endif +int PolyTriangleDrawer::viewport_x; +int PolyTriangleDrawer::viewport_y; +int PolyTriangleDrawer::viewport_width; +int PolyTriangleDrawer::viewport_height; +int PolyTriangleDrawer::dest_pitch; +int PolyTriangleDrawer::dest_width; +int PolyTriangleDrawer::dest_height; +uint8_t *PolyTriangleDrawer::dest; +bool PolyTriangleDrawer::dest_bgra; + +void PolyTriangleDrawer::set_viewport(int x, int y, int width, int height, DCanvas *canvas) +{ + dest = (uint8_t*)canvas->GetBuffer(); + dest_width = canvas->GetWidth(); + dest_height = canvas->GetHeight(); + dest_pitch = canvas->GetPitch(); + dest_bgra = canvas->IsBgra(); + + int offsetx = clamp(x, 0, dest_width); + int offsety = clamp(y, 0, dest_height); + int pixelsize = dest_bgra ? 4 : 1; + + viewport_x = x - offsetx; + viewport_y = y - offsety; + viewport_width = width; + viewport_height = height; + + dest += (offsetx + offsety * dest_pitch) * pixelsize; + dest_width = clamp(viewport_x + viewport_width, 0, dest_width - offsetx); + dest_height = clamp(viewport_y + viewport_height, 0, dest_height - offsety); +} + void PolyTriangleDrawer::draw(const PolyDrawArgs &args, TriDrawVariant variant) { - if (r_swtruecolor) + if (dest_bgra) DrawerCommandQueue::QueueCommand(args, variant); else draw_arrays(args, variant, nullptr); @@ -59,33 +91,33 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian switch (variant) { default: - case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? llvm->TriDraw32: llvm->TriDraw8; break; - case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? llvm->TriFill32 : llvm->TriFill8; break; - case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break; - case TriDrawVariant::DrawShadedSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawShadedSubsector32 : llvm->TriDrawShadedSubsector8; break; - case TriDrawVariant::FillSubsector: drawfunc = r_swtruecolor ? llvm->TriFillSubsector32 : llvm->TriFillSubsector8; break; + case TriDrawVariant::Draw: drawfunc = dest_bgra ? llvm->TriDraw32: llvm->TriDraw8; break; + case TriDrawVariant::Fill: drawfunc = dest_bgra ? llvm->TriFill32 : llvm->TriFill8; break; + case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break; + case TriDrawVariant::DrawShadedSubsector: drawfunc = dest_bgra ? llvm->TriDrawShadedSubsector32 : llvm->TriDrawShadedSubsector8; break; + case TriDrawVariant::FillSubsector: drawfunc = dest_bgra ? llvm->TriFillSubsector32 : llvm->TriFillSubsector8; break; case TriDrawVariant::Stencil: drawfunc = llvm->TriStencil; break; } #else switch (variant) { default: - case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break; + case TriDrawVariant::Draw: drawfunc = dest_bgra ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break; case TriDrawVariant::FillSubsector: - case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break; + case TriDrawVariant::Fill: drawfunc = dest_bgra ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break; case TriDrawVariant::DrawShadedSubsector: - case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::drawsubsector32 : llvm->TriDrawSubsector8; break; + case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? ScreenPolyTriangleDrawer::drawsubsector32 : llvm->TriDrawSubsector8; break; case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break; } #endif TriDrawTriangleArgs args; - args.dest = dc_destorg; - args.pitch = dc_pitch; - args.clipleft = drawargs.clipleft; - args.clipright = drawargs.clipright; - args.cliptop = drawargs.cliptop; - args.clipbottom = drawargs.clipbottom; + args.dest = dest; + args.pitch = dest_pitch; + args.clipleft = 0; + args.clipright = dest_width; + args.cliptop = 0; + args.clipbottom = dest_height; args.texturePixels = drawargs.texturePixels; args.textureWidth = drawargs.textureWidth; args.textureHeight = drawargs.textureHeight; @@ -163,8 +195,8 @@ void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, T v.z *= v.w; // Apply viewport scale to get screen coordinates: - v.x = viewwidth * (1.0f + v.x) * 0.5f; - v.y = viewheight * (1.0f - v.y) * 0.5f; + v.x = viewport_x + viewport_width * (1.0f + v.x) * 0.5f; + v.y = viewport_y + viewport_height * (1.0f - v.y) * 0.5f; } // Draw screen triangles diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index f3f63f8f11..8f2fcf3d04 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -36,10 +36,6 @@ public: int vcount = 0; TriangleDrawMode mode = TriangleDrawMode::Normal; bool ccw = false; - int clipleft = 0; - int clipright = 0; - int cliptop = 0; - int clipbottom = 0; const uint8_t *texturePixels = nullptr; int textureWidth = 0; int textureHeight = 0; @@ -61,6 +57,7 @@ public: class PolyTriangleDrawer { public: + static void set_viewport(int x, int y, int width, int height, DCanvas *canvas); static void draw(const PolyDrawArgs &args, TriDrawVariant variant); private: @@ -70,6 +67,10 @@ private: static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert); + static int viewport_x, viewport_y, viewport_width, viewport_height, dest_pitch, dest_width, dest_height; + static bool dest_bgra; + static uint8_t *dest; + enum { max_additional_vertices = 16 }; friend class DrawPolyTrianglesCommand; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 2ed225cf20..1f1908ab47 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -187,10 +187,6 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) args.vcount = 4; args.mode = TriangleDrawMode::Fan; args.ccw = true; - args.clipleft = 0; - args.cliptop = 0; - args.clipright = viewwidth; - args.clipbottom = viewheight; args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp index c6b3a65690..e2d1a9e86d 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/r_poly_wallsprite.cpp @@ -118,10 +118,6 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, s args.vcount = 4; args.mode = TriangleDrawMode::Fan; args.ccw = true; - args.clipleft = 0; - args.cliptop = 0; - args.clipright = viewwidth; - args.clipbottom = viewheight; args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); From 5d6ceb868e80450f2fc14c6e56bbc73953d597a1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 18 Nov 2016 14:40:53 +0100 Subject: [PATCH 342/912] Move viewport stuff to its own function --- src/r_poly.cpp | 30 ++++++++++++++++++------------ src/r_poly.h | 1 + 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 7a06a471e2..4dc90027d0 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -40,6 +40,7 @@ void RenderPolyScene::Render() return; ClearBuffers(); + SetSceneViewport(); SetupPerspectiveMatrix(); Cull.CullScene(WorldToClip); RenderSectors(); @@ -67,6 +68,18 @@ void RenderPolyScene::ClearBuffers() NextSubsectorDepth = 0; } +void RenderPolyScene::SetSceneViewport() +{ + int height; + if (screenblocks >= 10) + height = SCREENHEIGHT; + else + height = (screenblocks*SCREENHEIGHT / 10) & ~7; + + int bottom = SCREENHEIGHT - (height + viewwindowy - ((height - viewheight) / 2)); + PolyTriangleDrawer::set_viewport(viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, screen); +} + void RenderPolyScene::SetupPerspectiveMatrix() { static bool bDidSetup = false; @@ -77,29 +90,22 @@ void RenderPolyScene::SetupPerspectiveMatrix() bDidSetup = true; } - int height; - if (screenblocks >= 10) - height = SCREENHEIGHT; - else - height = (screenblocks*SCREENHEIGHT / 10) & ~7; - - int bottom = SCREENHEIGHT - (height + viewwindowy - ((height - viewheight) / 2)); - PolyTriangleDrawer::set_viewport(viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, screen); - // Code provided courtesy of Graf Zahl. Now we just have to plug it into the viewmatrix code... // We have to scale the pitch to account for the pixel stretching, because the playsim doesn't know about this and treats it as 1:1. double radPitch = ViewPitch.Normalized180().Radians(); double angx = cos(radPitch); double angy = sin(radPitch) * glset.pixelstretch; double alen = sqrt(angx*angx + angy*angy); - //mAngles.Pitch = (float)RAD2DEG(asin(angy / alen)); + float adjustedPitch = (float)asin(angy / alen); + float adjustedViewAngle = (float)(ViewAngle - 90).Radians(); float ratio = WidescreenRatio; float fovratio = (WidescreenRatio >= 1.3f) ? 1.333333f : ratio; float fovy = (float)(2 * DAngle::ToDegrees(atan(tan(FieldOfView.Radians() / 2) / fovratio)).Degrees); + TriMatrix worldToView = - TriMatrix::rotate((float)asin(angy / alen), 1.0f, 0.0f, 0.0f) * - TriMatrix::rotate((float)(ViewAngle - 90).Radians(), 0.0f, -1.0f, 0.0f) * + TriMatrix::rotate(adjustedPitch, 1.0f, 0.0f, 0.0f) * + TriMatrix::rotate(adjustedViewAngle, 0.0f, -1.0f, 0.0f) * TriMatrix::scale(1.0f, glset.pixelstretch, 1.0f) * TriMatrix::swapYZ() * TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); diff --git a/src/r_poly.h b/src/r_poly.h index f18f8a3cd9..4be49f5321 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -88,6 +88,7 @@ public: private: void ClearBuffers(); + void SetSceneViewport(); void SetupPerspectiveMatrix(); void RenderSectors(); void RenderSubsector(subsector_t *sub); From 272fe7f754466bcd7dd940b39a11aed43a12962c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 19 Nov 2016 02:53:32 +0100 Subject: [PATCH 343/912] Added blending modes to triangle codegen --- .../fixedfunction/drawtrianglecodegen.cpp | 222 ++++++++++-------- .../fixedfunction/drawtrianglecodegen.h | 26 +- src/r_compiler/llvmdrawers.cpp | 59 ++--- src/r_compiler/llvmdrawers.h | 50 ++-- src/r_poly_decal.cpp | 4 +- src/r_poly_particle.cpp | 5 +- src/r_poly_plane.cpp | 12 +- src/r_poly_sky.cpp | 6 +- src/r_poly_sprite.cpp | 2 +- src/r_poly_triangle.cpp | 42 ++-- src/r_poly_triangle.h | 11 +- src/r_poly_wall.cpp | 6 +- src/r_poly_wallsprite.cpp | 2 +- 13 files changed, 242 insertions(+), 205 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 8bf4779e21..21995bb636 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -32,11 +32,14 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -void DrawTriangleCodegen::Generate(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data) +void DrawTriangleCodegen::Generate(TriDrawVariant variant, TriBlendMode blendmode, bool truecolor, SSAValue args, SSAValue thread_data) { - LoadArgs(variant, truecolor, args, thread_data); - Setup(variant, truecolor); - LoopBlockY(variant, truecolor); + this->variant = variant; + this->blendmode = blendmode; + this->truecolor = truecolor; + LoadArgs(args, thread_data); + Setup(); + LoopBlockY(); } SSAInt DrawTriangleCodegen::FloatTo28_4(SSAFloat v) @@ -46,7 +49,7 @@ SSAInt DrawTriangleCodegen::FloatTo28_4(SSAFloat v) return (a + (a.ashr(31) | SSAInt(1))).ashr(1); } -void DrawTriangleCodegen::Setup(TriDrawVariant variant, bool truecolor) +void DrawTriangleCodegen::Setup() { int pixelsize = truecolor ? 4 : 1; @@ -154,7 +157,7 @@ SSAFloat DrawTriangleCodegen::grady(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFl return top / bottom; } -void DrawTriangleCodegen::LoopBlockY(TriDrawVariant variant, bool truecolor) +void DrawTriangleCodegen::LoopBlockY() { int pixelsize = truecolor ? 4 : 1; @@ -171,7 +174,7 @@ void DrawTriangleCodegen::LoopBlockY(TriDrawVariant variant, bool truecolor) SSAIfBlock branch; branch.if_block((y / q) % thread.num_cores == thread.core); { - LoopBlockX(variant, truecolor); + LoopBlockX(); } branch.end_block(); @@ -182,7 +185,7 @@ void DrawTriangleCodegen::LoopBlockY(TriDrawVariant variant, bool truecolor) loop.end_block(); } -void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) +void DrawTriangleCodegen::LoopBlockX() { stack_x.store(minx); @@ -260,7 +263,7 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) SetStencilBlock(x / 8 + y / 8 * stencilPitch); SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded; - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::DrawShadedSubsector && variant != TriDrawVariant::FillSubsector) + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) { covered = covered && StencilIsSingleValue(); } @@ -269,11 +272,11 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) SSAIfBlock branch_covered; branch_covered.if_block(covered); { - LoopFullBlock(variant, truecolor); + LoopFullBlock(); } branch_covered.else_block(); { - LoopPartialBlock(variant, truecolor); + LoopPartialBlock(); } branch_covered.end_block(); @@ -284,10 +287,10 @@ void DrawTriangleCodegen::LoopBlockX(TriDrawVariant variant, bool truecolor) loop.end_block(); } -void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) +void DrawTriangleCodegen::LoopFullBlock() { SSAIfBlock branch_stenciltest; - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::DrawShadedSubsector && variant != TriDrawVariant::FillSubsector) + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) { branch_stenciltest.if_block(StencilGetSingle() == stencilTestValue); } @@ -325,18 +328,18 @@ void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) varying[i] = stack_varying[i].load(); loopx.loop_block(ix < SSAInt(q), q); { - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::DrawShadedSubsector || variant == TriDrawVariant::FillSubsector) + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) { SSAIfBlock branch; branch.if_block(subsectorbuffer[ix].load(true) >= subsectorDepth); { - ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying, variant, truecolor); + ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying); } branch.end_block(); } else { - ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying, variant, truecolor); + ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying); } for (int i = 0; i < TriVertex::NumVarying; i++) @@ -353,13 +356,13 @@ void DrawTriangleCodegen::LoopFullBlock(TriDrawVariant variant, bool truecolor) loopy.end_block(); } - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::DrawShadedSubsector && variant != TriDrawVariant::FillSubsector) + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) { branch_stenciltest.end_block(); } } -void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolor) +void DrawTriangleCodegen::LoopPartialBlock() { int pixelsize = truecolor ? 4 : 1; @@ -404,7 +407,7 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo SSABool visible = (ix + x >= clipleft) && (ix + x < clipright) && (iy + y >= cliptop) && (iy + y < clipbottom); SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible; - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::DrawShadedSubsector || variant == TriDrawVariant::FillSubsector) + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) { covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth; } @@ -422,7 +425,7 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo } else { - ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying, variant, truecolor); + ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying); } } branch.end_block(); @@ -447,91 +450,101 @@ void DrawTriangleCodegen::LoopPartialBlock(TriDrawVariant variant, bool truecolo loopy.end_block(); } -void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying, TriDrawVariant variant, bool truecolor) +SSAVec4i DrawTriangleCodegen::TranslateSample(SSAInt uvoffset) { - if (variant == TriDrawVariant::Fill || variant == TriDrawVariant::FillSubsector) + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return translation[color * 4].load_vec4ub(true); + else + return translation[texturePixels[uvoffset].load(true).zext_int() * 4].load_vec4ub(true); +} + +SSAVec4i DrawTriangleCodegen::Sample(SSAInt uvoffset) +{ + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return SSAVec4i::unpack(color); + else + return texturePixels[uvoffset * 4].load_vec4ub(true); +} + +void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying) +{ + SSAInt ufrac = varying[0]; + SSAInt vfrac = varying[1]; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + if (truecolor) { - if (truecolor) + SSAVec4i fg; + SSAVec4i bg = buffer.load_vec4ub(false); + SSAInt alpha, inv_alpha; + SSAVec4i output; + + switch (blendmode) { - buffer.store_vec4ub(SSAVec4i::unpack(solidcolor)); - } - else - { - //buffer.store(solidcolor); + default: + case TriBlendMode::Copy: + fg = Sample(uvoffset); + output = blend_copy(shade_bgra_simple(fg, currentlight)); break; + case TriBlendMode::AlphaBlend: + fg = Sample(uvoffset); + output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; + case TriBlendMode::AddSolid: + fg = Sample(uvoffset); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); break; + case TriBlendMode::Add: + fg = Sample(uvoffset); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + case TriBlendMode::Sub: + fg = Sample(uvoffset); + output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + case TriBlendMode::RevSub: + fg = Sample(uvoffset); + output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + case TriBlendMode::Shaded: + fg = Sample(uvoffset); + alpha = fg[0]; + alpha = alpha + (alpha >> 7); // 255 -> 256 + inv_alpha = 256 - alpha; + output = blend_add(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), bg, alpha, inv_alpha); + break; + case TriBlendMode::TranslateCopy: + fg = TranslateSample(uvoffset); + output = blend_copy(shade_bgra_simple(fg, currentlight)); + break; + case TriBlendMode::TranslateAdd: + fg = TranslateSample(uvoffset); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::TranslateSub: + fg = TranslateSample(uvoffset); + output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::TranslateRevSub: + fg = TranslateSample(uvoffset); + output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; } - if (variant != TriDrawVariant::FillSubsector) - subsectorbuffer.store(subsectorDepth); + buffer.store_vec4ub(output); } else { - SSAInt ufrac = varying[0]; - SSAInt vfrac = varying[1]; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - if (truecolor) + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) { - if (variant == TriDrawVariant::DrawMasked || variant == TriDrawVariant::DrawSubsector) - { - SSAVec4i fg = texturePixels[uvoffset * 4].load_vec4ub(true); - SSAInt fg_alpha = fg[3]; - fg = (fg * currentlight) >> 8; - fg.insert(3, fg_alpha); - - SSAIfBlock branch_transparency; - branch_transparency.if_block(fg_alpha > SSAInt(127)); - { - buffer.store_vec4ub(fg); - if (variant != TriDrawVariant::DrawSubsector) - subsectorbuffer.store(subsectorDepth); - } - branch_transparency.end_block(); - } - else if (variant == TriDrawVariant::DrawShadedSubsector) - { - SSAInt alpha = texturePixels[uvoffset * 4].load(true).zext_int(); - alpha = alpha + (alpha >> 7); // // 255 -> 256 - SSAInt inv_alpha = 256 - alpha; - - SSAVec4i bgcolor = buffer.load_vec4ub(false); - buffer.store_vec4ub(blend_add(shade_bgra_simple(SSAVec4i::unpack(solidcolor), currentlight), bgcolor, alpha, inv_alpha)); - } - else - { - SSAVec4i fg = texturePixels[uvoffset * 4].load_vec4ub(true); - SSAInt fg_alpha = fg[3]; - fg = (fg * currentlight) >> 8; - fg.insert(3, fg_alpha); - - buffer.store_vec4ub(fg); - subsectorbuffer.store(subsectorDepth); - } + buffer.store(color.trunc_ubyte()); } else { - SSAUByte palindex = texturePixels[uvoffset].load(true); - - if (variant == TriDrawVariant::DrawMasked || variant == TriDrawVariant::DrawSubsector) - { - SSAIfBlock branch_transparency; - branch_transparency.if_block(!(palindex.zext_int() == SSAInt(0))); - { - buffer.store(palindex); - if (variant != TriDrawVariant::DrawSubsector) - subsectorbuffer.store(subsectorDepth); - } - branch_transparency.end_block(); - } - else - { - buffer.store(palindex); - subsectorbuffer.store(subsectorDepth); - } + SSAUByte fg = texturePixels[uvoffset].load(true); + buffer.store(fg); } } + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsectorbuffer.store(subsectorDepth); } void DrawTriangleCodegen::SetStencilBlock(SSAInt block) @@ -582,7 +595,7 @@ SSABool DrawTriangleCodegen::StencilIsSingleValue() return (StencilBlockMask.load(false) & SSAInt(0xffffff00)) == SSAInt(0xffffff00); } -void DrawTriangleCodegen::LoadArgs(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data) +void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) { dest = args[0][0].load(true); pitch = args[0][1].load(true); @@ -596,7 +609,7 @@ void DrawTriangleCodegen::LoadArgs(TriDrawVariant variant, bool truecolor, SSAVa texturePixels = args[0][9].load(true); textureWidth = args[0][10].load(true); textureHeight = args[0][11].load(true); - solidcolor = args[0][12].load(true); + translation = args[0][12].load(true); LoadUniforms(args[0][13].load(true)); stencilValues = args[0][14].load(true); stencilMasks = args[0][15].load(true); @@ -625,17 +638,20 @@ void DrawTriangleCodegen::LoadUniforms(SSAValue uniforms) { light = uniforms[0][0].load(true); subsectorDepth = uniforms[0][1].load(true); + color = uniforms[0][2].load(true); + srcalpha = uniforms[0][3].load(true); + destalpha = uniforms[0][4].load(true); - SSAShort light_alpha = uniforms[0][2].load(true); - SSAShort light_red = uniforms[0][3].load(true); - SSAShort light_green = uniforms[0][4].load(true); - SSAShort light_blue = uniforms[0][5].load(true); - SSAShort fade_alpha = uniforms[0][6].load(true); - SSAShort fade_red = uniforms[0][7].load(true); - SSAShort fade_green = uniforms[0][8].load(true); - SSAShort fade_blue = uniforms[0][9].load(true); - SSAShort desaturate = uniforms[0][10].load(true); - SSAInt flags = uniforms[0][11].load(true); + SSAShort light_alpha = uniforms[0][5].load(true); + SSAShort light_red = uniforms[0][6].load(true); + SSAShort light_green = uniforms[0][7].load(true); + SSAShort light_blue = uniforms[0][8].load(true); + SSAShort fade_alpha = uniforms[0][9].load(true); + SSAShort fade_red = uniforms[0][10].load(true); + SSAShort fade_green = uniforms[0][11].load(true); + SSAShort fade_blue = uniforms[0][12].load(true); + SSAShort desaturate = uniforms[0][13].load(true); + SSAInt flags = uniforms[0][14].load(true); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.h b/src/r_compiler/fixedfunction/drawtrianglecodegen.h index 8452b250fb..d5539ef22f 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.h +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.h @@ -33,20 +33,23 @@ struct SSATriVertex class DrawTriangleCodegen : public DrawerCodegen { public: - void Generate(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data); + void Generate(TriDrawVariant variant, TriBlendMode blendmode, bool truecolor, SSAValue args, SSAValue thread_data); private: - void LoadArgs(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data); + void LoadArgs(SSAValue args, SSAValue thread_data); SSATriVertex LoadTriVertex(SSAValue v); void LoadUniforms(SSAValue uniforms); - void Setup(TriDrawVariant variant, bool truecolor); + void Setup(); SSAInt FloatTo28_4(SSAFloat v); - void LoopBlockY(TriDrawVariant variant, bool truecolor); - void LoopBlockX(TriDrawVariant variant, bool truecolor); - void LoopFullBlock(TriDrawVariant variant, bool truecolor); - void LoopPartialBlock(TriDrawVariant variant, bool truecolor); + void LoopBlockY(); + void LoopBlockX(); + void LoopFullBlock(); + void LoopPartialBlock(); - void ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying, TriDrawVariant variant, bool truecolor); + void ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying); + + SSAVec4i TranslateSample(SSAInt uvoffset); + SSAVec4i Sample(SSAInt uvoffset); void SetStencilBlock(SSAInt block); void StencilSet(SSAInt x, SSAInt y, SSAUByte value); @@ -58,6 +61,10 @@ private: SSAFloat gradx(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); SSAFloat grady(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); + TriDrawVariant variant; + TriBlendMode blendmode; + bool truecolor; + SSAStack stack_C1, stack_C2, stack_C3; SSAStack stack_y; SSAStack stack_dest; @@ -82,7 +89,8 @@ private: SSAUBytePtr texturePixels; SSAInt textureWidth; SSAInt textureHeight; - SSAInt solidcolor; + SSAUBytePtr translation; + SSAInt color, srcalpha, destalpha; SSAInt light; SSAInt subsectorDepth; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 458d90d1a3..d9a43d3d78 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -57,7 +57,7 @@ public: void StopLogFatalErrors(); template - Func *GetProcAddress(const char *name) { return reinterpret_cast(PointerToFunction(name)); } + Func *GetProcAddress(const std::string &name) { return reinterpret_cast(PointerToFunction(name.c_str())); } llvm::LLVMContext &context() { return *mContext; } llvm::Module *module() { return mModule.get(); } @@ -82,7 +82,7 @@ private: void CodegenDrawSpan(const char *name, DrawSpanVariant variant); void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); - void CodegenDrawTriangle(const char *name, TriDrawVariant variant, bool truecolor); + void CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor); static llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); @@ -186,17 +186,18 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawSky("DrawSky4", DrawSkyVariant::Single, 4); CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double, 1); CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4); - CodegenDrawTriangle("TriDraw8", TriDrawVariant::Draw, false); - CodegenDrawTriangle("TriDraw32", TriDrawVariant::Draw, true); - CodegenDrawTriangle("TriDrawSubsector8", TriDrawVariant::DrawSubsector, false); - CodegenDrawTriangle("TriDrawSubsector32", TriDrawVariant::DrawSubsector, true); - CodegenDrawTriangle("TriDrawShadedSubsector8", TriDrawVariant::DrawShadedSubsector, false); - CodegenDrawTriangle("TriDrawShadedSubsector32", TriDrawVariant::DrawShadedSubsector, true); - CodegenDrawTriangle("TriFillSubsector8", TriDrawVariant::FillSubsector, false); - CodegenDrawTriangle("TriFillSubsector32", TriDrawVariant::FillSubsector, true); - CodegenDrawTriangle("TriFill8", TriDrawVariant::Fill, false); - CodegenDrawTriangle("TriFill32", TriDrawVariant::Fill, true); - CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, false); + for (int i = 0; i < NumTriBlendModes(); i++) + { + CodegenDrawTriangle("TriDraw8_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, false); + CodegenDrawTriangle("TriDraw32_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, true); + CodegenDrawTriangle("TriFill8_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, false); + CodegenDrawTriangle("TriFill32_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, true); + CodegenDrawTriangle("TriDrawSubsector8_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, false); + CodegenDrawTriangle("TriDrawSubsector32_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, true); + CodegenDrawTriangle("TriFillSubsector8_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, false); + CodegenDrawTriangle("TriFillSubsector32_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, true); + } + CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, TriBlendMode::Copy, false); mProgram.CreateEE(); @@ -262,16 +263,17 @@ LLVMDrawersImpl::LLVMDrawersImpl() DrawSky4 = mProgram.GetProcAddress("DrawSky4"); DrawDoubleSky1 = mProgram.GetProcAddress("DrawDoubleSky1"); DrawDoubleSky4 = mProgram.GetProcAddress("DrawDoubleSky4"); - TriDraw8 = mProgram.GetProcAddress("TriDraw8"); - TriDraw32 = mProgram.GetProcAddress("TriDraw32"); - TriDrawSubsector8 = mProgram.GetProcAddress("TriDrawSubsector8"); - TriDrawSubsector32 = mProgram.GetProcAddress("TriDrawSubsector32"); - TriDrawShadedSubsector8 = mProgram.GetProcAddress("TriDrawShadedSubsector8"); - TriDrawShadedSubsector32 = mProgram.GetProcAddress("TriDrawShadedSubsector32"); - TriFillSubsector8 = mProgram.GetProcAddress("TriFillSubsector8"); - TriFillSubsector32 = mProgram.GetProcAddress("TriFillSubsector32"); - TriFill8 = mProgram.GetProcAddress("TriFill8"); - TriFill32 = mProgram.GetProcAddress("TriFill32"); + for (int i = 0; i < NumTriBlendModes(); i++) + { + TriDrawNormal8.push_back(mProgram.GetProcAddress("TriDraw8_" + std::to_string(i))); + TriDrawNormal32.push_back(mProgram.GetProcAddress("TriDraw32_" + std::to_string(i))); + TriFillNormal8.push_back(mProgram.GetProcAddress("TriFill8_" + std::to_string(i))); + TriFillNormal32.push_back(mProgram.GetProcAddress("TriFill32_" + std::to_string(i))); + TriDrawSubsector8.push_back(mProgram.GetProcAddress("TriDrawSubsector8_" + std::to_string(i))); + TriDrawSubsector32.push_back(mProgram.GetProcAddress("TriDrawSubsector32_" + std::to_string(i))); + TriFillSubsector8.push_back(mProgram.GetProcAddress("TriFillSubsector8_" + std::to_string(i))); + TriFillSubsector32.push_back(mProgram.GetProcAddress("TriFillSubsector32_" + std::to_string(i))); + } TriStencil = mProgram.GetProcAddress("TriStencil"); #if 0 @@ -383,7 +385,7 @@ void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, i I_FatalError("verifyFunction failed for CodegenDrawSky()"); } -void LLVMDrawersImpl::CodegenDrawTriangle(const char *name, TriDrawVariant variant, bool truecolor) +void LLVMDrawersImpl::CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor) { llvm::IRBuilder<> builder(mProgram.context()); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); @@ -394,12 +396,12 @@ void LLVMDrawersImpl::CodegenDrawTriangle(const char *name, TriDrawVariant varia function.create_public(); DrawTriangleCodegen codegen; - codegen.Generate(variant, truecolor, function.parameter(0), function.parameter(1)); + codegen.Generate(variant, blendmode, truecolor, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); if (llvm::verifyFunction(*function.func)) - I_FatalError("verifyFunction failed for CodegenDrawTriangle()"); + I_FatalError("verifyFunction failed for CodegenDrawTriangle(%d, %d, %d)", (int)variant, (int)blendmode, (int)truecolor); } llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) @@ -529,6 +531,9 @@ llvm::Type *LLVMDrawersImpl::GetTriUniformsStruct(llvm::LLVMContext &context) std::vector elements; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t subsectorDepth; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; @@ -558,7 +563,7 @@ llvm::Type *LLVMDrawersImpl::GetTriDrawTriangleArgs(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *texturePixels; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureWidth; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureHeight; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t solidcolor; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *translation; elements.push_back(GetTriUniformsStruct(context)); // const TriUniforms *uniforms; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *stencilValues; elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *stencilMasks; diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 9c3a3e45f5..3b13962836 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -215,7 +215,9 @@ struct TriUniforms { uint32_t light; uint32_t subsectorDepth; - + uint32_t color; + uint32_t srcalpha; + uint32_t destalpha; uint16_t light_alpha; uint16_t light_red; uint16_t light_green; @@ -250,7 +252,7 @@ struct TriDrawTriangleArgs const uint8_t *texturePixels; uint32_t textureWidth; uint32_t textureHeight; - uint32_t solidcolor; + const uint8_t *translation; const TriUniforms *uniforms; uint8_t *stencilValues; uint32_t *stencilMasks; @@ -262,15 +264,31 @@ struct TriDrawTriangleArgs enum class TriDrawVariant { - Draw, - DrawMasked, - Fill, + DrawNormal, + FillNormal, DrawSubsector, - DrawShadedSubsector, FillSubsector, - Stencil, + FuzzSubsector, + Stencil }; +enum class TriBlendMode +{ + Copy, // blend_copy(shade(fg)) + AlphaBlend, // blend_alpha_blend(shade(fg), bg) + AddSolid, // blend_add(shade(fg), bg, srcalpha, destalpha) + Add, // blend_add(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + Sub, // blend_sub(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + RevSub, // blend_revsub(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + Shaded, // blend_add(color, bg, fg.a, 1 - fg.a) + TranslateCopy, // blend_copy(shade(translate(fg))) + TranslateAdd, // blend_add(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + TranslateSub, // blend_sub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + TranslateRevSub // blend_revsub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) +}; + +inline int NumTriBlendModes() { return (int)TriBlendMode::TranslateRevSub + 1; } + class LLVMDrawers { public: @@ -346,16 +364,14 @@ public: void(*DrawDoubleSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; void(*DrawDoubleSky4)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; - void(*TriDraw8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; - void(*TriDraw32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; - void(*TriDrawSubsector8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; - void(*TriDrawSubsector32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; - void(*TriDrawShadedSubsector8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; - void(*TriDrawShadedSubsector32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; - void(*TriFillSubsector8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; - void(*TriFillSubsector32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; - void(*TriFill8)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; - void(*TriFill32)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + std::vector TriDrawNormal8; + std::vector TriDrawNormal32; + std::vector TriFillNormal8; + std::vector TriFillNormal32; + std::vector TriDrawSubsector8; + std::vector TriDrawSubsector32; + std::vector TriFillSubsector8; + std::vector TriFillSubsector32; void(*TriStencil)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; private: diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index 0c6c427a84..5641bfcd1c 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -141,6 +141,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co uniforms.flags = 0; } uniforms.subsectorDepth = subsectorDepth; + uniforms.color = decal->AlphaColor; PolyDrawArgs args; args.uniforms = uniforms; @@ -151,7 +152,6 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); - args.solidcolor = decal->AlphaColor; //mode = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawShadedSubsector); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); } diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp index be80300713..e5797a1089 100644 --- a/src/r_poly_particle.cpp +++ b/src/r_poly_particle.cpp @@ -83,8 +83,8 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *partic uniforms.flags = 0; } uniforms.subsectorDepth = subsectorDepth; - uint32_t alpha = particle->trans; + uniforms.color = (alpha << 24) | (particle->color & 0xffffff); PolyDrawArgs args; args.uniforms = uniforms; @@ -94,6 +94,5 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *partic args.ccw = true; args.stenciltestvalue = 0; args.stencilwritevalue = 1; - args.solidcolor = (alpha << 24) | (particle->color & 0xffffff); - PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector); + PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::AlphaBlend); } diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 9a425598bb..9005e32f15 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -140,8 +140,8 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *s args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); - PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); } void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, double skyHeight) @@ -235,13 +235,13 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin if (!isSky) { args.SetTexture(tex); - PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); } else { args.stencilwritevalue = 255; - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); for (uint32_t i = 0; i < sub->numlines; i++) { @@ -309,7 +309,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin args.vinput = wallvert; args.vcount = 4; - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); } } } diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index fc6963dbb8..b0b06f2625 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -79,7 +79,7 @@ void PolySkyDome::RenderRow(PolyDrawArgs &args, int row) args.vcount = mPrimStart[row + 1] - mPrimStart[row]; args.mode = TriangleDrawMode::Strip; args.ccw = false; - PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); } void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap) @@ -92,8 +92,8 @@ void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int ro args.vcount = mPrimStart[row + 1] - mPrimStart[row]; args.mode = TriangleDrawMode::Fan; args.ccw = bottomCap; - args.solidcolor = solid; - PolyTriangleDrawer::draw(args, TriDrawVariant::Fill); + args.uniforms.color = solid; + PolyTriangleDrawer::draw(args, TriDrawVariant::FillNormal, TriBlendMode::Copy); } void PolySkyDome::CreateDome() diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index e1c6fcc14b..43bdf40af8 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -136,7 +136,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); } bool RenderPolySprite::IsThingCulled(AActor *thing) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index d661fe0d7f..07a4566dd1 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -72,44 +72,32 @@ void PolyTriangleDrawer::set_viewport(int x, int y, int width, int height, DCanv dest_height = clamp(viewport_y + viewport_height, 0, dest_height - offsety); } -void PolyTriangleDrawer::draw(const PolyDrawArgs &args, TriDrawVariant variant) +void PolyTriangleDrawer::draw(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode) { if (dest_bgra) - DrawerCommandQueue::QueueCommand(args, variant); + DrawerCommandQueue::QueueCommand(args, variant, blendmode); else - draw_arrays(args, variant, nullptr); + draw_arrays(args, variant, blendmode, nullptr); } -void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVariant variant, WorkerThreadData *thread) +void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVariant variant, TriBlendMode blendmode, WorkerThreadData *thread) { if (drawargs.vcount < 3) return; auto llvm = LLVMDrawers::Instance(); void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *); -#if 1 + int bmode = (int)blendmode; switch (variant) { default: - case TriDrawVariant::Draw: drawfunc = dest_bgra ? llvm->TriDraw32: llvm->TriDraw8; break; - case TriDrawVariant::Fill: drawfunc = dest_bgra ? llvm->TriFill32 : llvm->TriFill8; break; - case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break; - case TriDrawVariant::DrawShadedSubsector: drawfunc = dest_bgra ? llvm->TriDrawShadedSubsector32 : llvm->TriDrawShadedSubsector8; break; - case TriDrawVariant::FillSubsector: drawfunc = dest_bgra ? llvm->TriFillSubsector32 : llvm->TriFillSubsector8; break; + case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break; + case TriDrawVariant::FillNormal: drawfunc = dest_bgra ? llvm->TriFillNormal32[bmode] : llvm->TriFillNormal8[bmode]; break; + case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? llvm->TriDrawSubsector32[bmode] : llvm->TriDrawSubsector8[bmode]; break; + case TriDrawVariant::FuzzSubsector: + case TriDrawVariant::FillSubsector: drawfunc = dest_bgra ? llvm->TriFillSubsector32[bmode] : llvm->TriFillSubsector8[bmode]; break; case TriDrawVariant::Stencil: drawfunc = llvm->TriStencil; break; } -#else - switch (variant) - { - default: - case TriDrawVariant::Draw: drawfunc = dest_bgra ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break; - case TriDrawVariant::FillSubsector: - case TriDrawVariant::Fill: drawfunc = dest_bgra ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break; - case TriDrawVariant::DrawShadedSubsector: - case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? ScreenPolyTriangleDrawer::drawsubsector32 : llvm->TriDrawSubsector8; break; - case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break; - } -#endif TriDrawTriangleArgs args; args.dest = dest; @@ -121,7 +109,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian args.texturePixels = drawargs.texturePixels; args.textureWidth = drawargs.textureWidth; args.textureHeight = drawargs.textureHeight; - args.solidcolor = drawargs.solidcolor; + args.translation = drawargs.translation; args.uniforms = &drawargs.uniforms; args.stencilTestValue = drawargs.stenciltestvalue; args.stencilWriteValue = drawargs.stencilwritevalue; @@ -336,6 +324,7 @@ void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert ///////////////////////////////////////////////////////////////////////////// +#if 0 void ScreenPolyTriangleDrawer::draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { uint8_t *dest = args->dest; @@ -1641,11 +1630,12 @@ float ScreenPolyTriangleDrawer::grady(float x0, float y0, float x1, float y1, fl float bottom = -((x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2)); return top / bottom; } +#endif ///////////////////////////////////////////////////////////////////////////// -DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant) - : args(args), variant(variant) +DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode) + : args(args), variant(variant), blendmode(blendmode) { } @@ -1658,7 +1648,7 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) thread_data.pass_end_y = thread->pass_end_y; thread_data.temp = thread->dc_temp_rgba; - PolyTriangleDrawer::draw_arrays(args, variant, &thread_data); + PolyTriangleDrawer::draw_arrays(args, variant, blendmode, &thread_data); } FString DrawPolyTrianglesCommand::DebugInfo() diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 8f2fcf3d04..cc8558985c 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -39,7 +39,7 @@ public: const uint8_t *texturePixels = nullptr; int textureWidth = 0; int textureHeight = 0; - uint32_t solidcolor = 0; + const uint8_t *translation = nullptr; uint8_t stenciltestvalue = 0; uint8_t stencilwritevalue = 0; @@ -58,11 +58,11 @@ class PolyTriangleDrawer { public: static void set_viewport(int x, int y, int width, int height, DCanvas *canvas); - static void draw(const PolyDrawArgs &args, TriDrawVariant variant); + static void draw(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode); private: static TriVertex shade_vertex(const TriUniforms &uniforms, TriVertex v); - static void draw_arrays(const PolyDrawArgs &args, TriDrawVariant variant, WorkerThreadData *thread); + static void draw_arrays(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode, WorkerThreadData *thread); static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert); @@ -188,6 +188,7 @@ private: std::vector masks; }; +#if 0 class ScreenPolyTriangleDrawer { public: @@ -204,11 +205,12 @@ private: static float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); static float grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); }; +#endif class DrawPolyTrianglesCommand : public DrawerCommand { public: - DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant); + DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode); void Execute(DrawerThread *thread) override; FString DebugInfo() override; @@ -216,6 +218,7 @@ public: private: PolyDrawArgs args; TriDrawVariant variant; + TriBlendMode blendmode; }; #endif diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 1f1908ab47..e8bad52a77 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -193,12 +193,12 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) if (!Masked) { - PolyTriangleDrawer::draw(args, TriDrawVariant::Draw); - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); } else { - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); } RenderPolyDecal::RenderWallDecals(worldToClip, Line, SubsectorDepth); diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp index e2d1a9e86d..ccebe5f0e1 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/r_poly_wallsprite.cpp @@ -121,5 +121,5 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, s args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); } From 17ed585c1f1edc94fa3a0c695bbedf40447a95de Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 19 Nov 2016 12:30:58 +0100 Subject: [PATCH 344/912] Fix vsync not working --- src/gl/system/gl_swframebuffer.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 0d82aa2217..38d7d36791 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -1543,20 +1543,21 @@ void OpenGLSWFrameBuffer::GetFlashedPalette(PalEntry pal[256]) void OpenGLSWFrameBuffer::SetVSync(bool vsync) { - if (VSync != vsync) - { - VSync = vsync; - Reset(); - } + // Switch to the default frame buffer because Nvidia's driver associates the vsync state with the bound FB object. + GLint oldDrawFramebufferBinding = 0, oldReadFramebufferBinding = 0; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &oldDrawFramebufferBinding); + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &oldReadFramebufferBinding); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + Super::SetVSync(vsync); + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, oldDrawFramebufferBinding); + glBindFramebuffer(GL_READ_FRAMEBUFFER, oldReadFramebufferBinding); } void OpenGLSWFrameBuffer::NewRefreshRate() { - if (IsFullscreen()) - { - Reset(); - } } void OpenGLSWFrameBuffer::SetBlendingRect(int x1, int y1, int x2, int y2) From 114fda1ed5bf6445b25ed668d3a7c326f79327d8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 19 Nov 2016 13:32:57 +0100 Subject: [PATCH 345/912] Added missing TranslateAlphaBlend and created a helper function for specifying the translation --- .../fixedfunction/drawtrianglecodegen.cpp | 4 ++++ src/r_compiler/llvmdrawers.h | 1 + src/r_poly_sprite.cpp | 9 +++++-- src/r_poly_triangle.h | 24 +++++++++++++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 21995bb636..1ad98a8fb3 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -514,6 +514,10 @@ void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbu fg = TranslateSample(uvoffset); output = blend_copy(shade_bgra_simple(fg, currentlight)); break; + case TriBlendMode::TranslateAlphaBlend: + fg = TranslateSample(uvoffset); + output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; + break; case TriBlendMode::TranslateAdd: fg = TranslateSample(uvoffset); output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 3b13962836..e421537434 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -282,6 +282,7 @@ enum class TriBlendMode RevSub, // blend_revsub(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) Shaded, // blend_add(color, bg, fg.a, 1 - fg.a) TranslateCopy, // blend_copy(shade(translate(fg))) + TranslateAlphaBlend, // blend_alpha_blend(shade(translate(fg)), bg) TranslateAdd, // blend_add(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) TranslateSub, // blend_sub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) TranslateRevSub // blend_revsub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 43bdf40af8..ff102366aa 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -135,8 +135,13 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse args.ccw = true; args.stenciltestvalue = 0; args.stencilwritevalue = 1; - args.SetTexture(tex); - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); + args.translation = nullptr; + args.SetTexture(tex, thing->Translation); + + if (args.translation) + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAlphaBlend); + else + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); } bool RenderPolySprite::IsThingCulled(AActor *thing) diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index cc8558985c..ad00d1a671 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -25,6 +25,7 @@ #define __R_POLY_TRIANGLE__ #include "r_triangle.h" +#include "r_data/r_translate.h" struct TriDrawTriangleArgs; @@ -51,6 +52,29 @@ public: texturePixels = (const uint8_t *)texture->GetPixelsBgra(); else texturePixels = texture->GetPixels(); + translation = nullptr; + } + + void SetTexture(FTexture *texture, uint32_t translationID) + { + if (translationID != -1 && translationID != 0) + { + FRemapTable *table = TranslationToTable(translationID); + if (table != nullptr && !table->Inactive) + { + if (r_swtruecolor) + translation = (uint8_t*)table->Palette; + else + translation = table->Remap; + + textureWidth = texture->GetWidth(); + textureHeight = texture->GetHeight(); + texturePixels = texture->GetPixels(); + return; + } + } + + SetTexture(texture); } }; From d197ebca781bcf9e06b9a06a7cca39ba6d59da26 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 19 Nov 2016 14:07:39 +0100 Subject: [PATCH 346/912] Remove redundant line --- src/r_poly_sprite.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index ff102366aa..8c74d0d1a3 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -135,7 +135,6 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse args.ccw = true; args.stenciltestvalue = 0; args.stencilwritevalue = 1; - args.translation = nullptr; args.SetTexture(tex, thing->Translation); if (args.translation) From b4eb49678ab7c4ad25200acf249cbb8fcccf62b8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 19 Nov 2016 17:14:37 +0100 Subject: [PATCH 347/912] Cache the optimized bitcode (note: this adds LLVMBitWriter as a dependency) --- src/CMakeLists.txt | 4 +- src/r_compiler/llvm_include.h | 1 + src/r_compiler/llvmdrawers.cpp | 294 +++++++++++++++++++++------------ 3 files changed, 190 insertions(+), 109 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 34e0e72d02..8c2e362209 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -294,7 +294,7 @@ endif() set( LLVM_PRECOMPILED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../llvm" ) if( NOT WIN32 ) - set( LLVM_COMPONENTS core support asmparser asmprinter bitreader codegen ipo + set( LLVM_COMPONENTS core support asmparser asmprinter bitreader bitwriter codegen ipo irreader transformutils instrumentation profiledata runtimedyld object instcombine linker analysis selectiondag scalaropts vectorize executionengine mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) @@ -307,7 +307,7 @@ if( NOT WIN32 ) include_directories( ${LLVM_INCLUDE_DIRS} ) set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) else() - set( LLVM_COMPONENTS core support asmparser asmprinter bitreader codegen passes ipo + set( LLVM_COMPONENTS core support asmparser asmprinter bitreader bitwriter codegen passes ipo irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld object instcombine linker analysis selectiondag scalaropts vectorize executionengine mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h index ad58666e3d..323eef0143 100644 --- a/src/r_compiler/llvm_include.h +++ b/src/r_compiler/llvm_include.h @@ -75,6 +75,7 @@ #include #include #include +#include #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) #include diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index d9a43d3d78..a3432eae65 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -39,6 +39,7 @@ #include "x86.h" #include "c_cvars.h" #include "version.h" +#include "m_misc.h" CUSTOM_CVAR(String, llvm_cpu, "auto", CVAR_ARCHIVE | CVAR_NOINITCALL) { @@ -51,7 +52,9 @@ public: LLVMProgram(); void CreateModule(); - void CreateEE(); + std::string GetTargetCPU(); + bool LoadCachedModule(int version, std::string targetCPU); + void CreateEE(int version, std::string targetCPU, bool optimize); std::string GenerateAssembly(std::string cpuName); std::string DumpModule(); void StopLogFatalErrors(); @@ -64,6 +67,8 @@ public: llvm::ExecutionEngine *engine() { return mEngine.get(); } private: + void SaveCachedModule(llvm::Module *module, int version, std::string targetCPU); + FString GetDrawerCacheFilename(int version, FString cpu); void *PointerToFunction(const char *name); llvm::TargetMachine *machine = nullptr; @@ -122,84 +127,90 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - mProgram.CreateModule(); - - CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRt1", DrawColumnVariant::Draw, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); - CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); - CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); - CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); - CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); - CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); - CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1); - CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4); - CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1); - CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4); - CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1); - CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4); - CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1); - CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4); - CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1); - CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); - CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); - CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); - CodegenDrawSky("DrawSky1", DrawSkyVariant::Single, 1); - CodegenDrawSky("DrawSky4", DrawSkyVariant::Single, 4); - CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double, 1); - CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4); - for (int i = 0; i < NumTriBlendModes(); i++) + int version = 1; // Increment this number if the drawer codegen is modified (forces recreation of the module). + std::string targetCPU = mProgram.GetTargetCPU(); + bool loaded = mProgram.LoadCachedModule(version, targetCPU); + if (!loaded) { - CodegenDrawTriangle("TriDraw8_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, false); - CodegenDrawTriangle("TriDraw32_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, true); - CodegenDrawTriangle("TriFill8_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, false); - CodegenDrawTriangle("TriFill32_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, true); - CodegenDrawTriangle("TriDrawSubsector8_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, false); - CodegenDrawTriangle("TriDrawSubsector32_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, true); - CodegenDrawTriangle("TriFillSubsector8_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, false); - CodegenDrawTriangle("TriFillSubsector32_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, true); - } - CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, TriBlendMode::Copy, false); + mProgram.CreateModule(); - mProgram.CreateEE(); + CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRt1", DrawColumnVariant::Draw, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); + CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); + CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); + CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); + CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); + CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); + CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1); + CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4); + CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1); + CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4); + CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1); + CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4); + CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1); + CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4); + CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1); + CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); + CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); + CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); + CodegenDrawSky("DrawSky1", DrawSkyVariant::Single, 1); + CodegenDrawSky("DrawSky4", DrawSkyVariant::Single, 4); + CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double, 1); + CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4); + for (int i = 0; i < NumTriBlendModes(); i++) + { + CodegenDrawTriangle("TriDraw8_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, false); + CodegenDrawTriangle("TriDraw32_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, true); + CodegenDrawTriangle("TriFill8_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, false); + CodegenDrawTriangle("TriFill32_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, true); + CodegenDrawTriangle("TriDrawSubsector8_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, false); + CodegenDrawTriangle("TriDrawSubsector32_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, true); + CodegenDrawTriangle("TriFillSubsector8_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, false); + CodegenDrawTriangle("TriFillSubsector32_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, true); + } + CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, TriBlendMode::Copy, false); + } + + mProgram.CreateEE(version, targetCPU, !loaded); FillColumn = mProgram.GetProcAddress("FillColumn"); FillColumnAdd = mProgram.GetProcAddress("FillColumnAdd"); @@ -603,12 +614,64 @@ void LLVMProgram::CreateModule() mModule = std::make_unique("render", context()); } -void LLVMProgram::CreateEE() +bool LLVMProgram::LoadCachedModule(int version, std::string targetCPU) +{ + FString filename = GetDrawerCacheFilename(version, targetCPU.c_str()); + FILE *file = fopen(filename, "rb"); + if (!file) + return false; + + bool success = false; + std::string data; + + fseek(file, 0, SEEK_END); + int length = ftell(file); + fseek(file, 0, SEEK_SET); + if (length > 0) + { + data.resize(length); + success = fread(&data[0], length, 1, file) == 1; + } + + fclose(file); + if (!success) + return false; + + auto result = llvm::parseBitcodeFile(llvm::MemoryBufferRef(data, filename.GetChars()), *mContext.get()); + if (!result) + return false; + + mModule = std::move(result.get()); + return true; +} + +void LLVMProgram::SaveCachedModule(llvm::Module *module, int version, std::string targetCPU) +{ + std::string str; + llvm::raw_string_ostream stream(str); + llvm::WriteBitcodeToFile(module, stream); + std::string data = stream.str(); + + FString filename = GetDrawerCacheFilename(version, targetCPU.c_str()); + FILE *file = fopen(filename, "wb"); + if (file) + { + fwrite(data.data(), data.size(), 1, file); + fclose(file); + } +} + +FString LLVMProgram::GetDrawerCacheFilename(int version, FString cpu) +{ + FString path = M_GetCachePath(true); + FString filename; + filename.Format("%s/LLVMDrawers-%d-%s.bc", path.GetChars(), version, cpu.GetChars()); + return filename; +} + +std::string LLVMProgram::GetTargetCPU() { using namespace llvm; - - std::string errorstring; - std::string mcpu = sys::getHostCPUName(); if (std::string(CPU.CPUString).find("G840") != std::string::npos && mcpu == "sandybridge") mcpu = "westmere"; // Pentium G840 is misdetected as a sandy bridge CPU @@ -618,13 +681,21 @@ void LLVMProgram::CreateEE() mcpu = llvm_cpu; Printf("Overriding LLVM CPU target to %s\n", mcpu.c_str()); } + return mcpu; +} + +void LLVMProgram::CreateEE(int version, std::string targetCPU, bool optimize) +{ + using namespace llvm; + + std::string errorstring; llvm::Module *module = mModule.get(); EngineBuilder engineBuilder(std::move(mModule)); engineBuilder.setErrorStr(&errorstring); engineBuilder.setOptLevel(CodeGenOpt::Aggressive); engineBuilder.setEngineKind(EngineKind::JIT); - engineBuilder.setMCPU(mcpu); + engineBuilder.setMCPU(targetCPU); machine = engineBuilder.selectTarget(); if (!machine) I_FatalError("Could not create LLVM target machine"); @@ -638,42 +709,51 @@ void LLVMProgram::CreateEE() Printf("LLVM target triple: %s\n", targetTriple.c_str()); Printf("LLVM target CPU: %s\n", cpuName.c_str()); - module->setTargetTriple(targetTriple); + if (optimize) + { + Printf("Optimizing drawers..\n"); + + module->setTargetTriple(targetTriple); #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); + module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); #else - module->setDataLayout(machine->createDataLayout()); + module->setDataLayout(machine->createDataLayout()); #endif - legacy::FunctionPassManager PerFunctionPasses(module); - legacy::PassManager PerModulePasses; + legacy::FunctionPassManager PerFunctionPasses(module); + legacy::PassManager PerModulePasses; #if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) - PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); - PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); + PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); + PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); #endif - PassManagerBuilder passManagerBuilder; - passManagerBuilder.OptLevel = 3; - passManagerBuilder.SizeLevel = 0; - passManagerBuilder.Inliner = createFunctionInliningPass(); - passManagerBuilder.SLPVectorize = true; - passManagerBuilder.LoopVectorize = true; - passManagerBuilder.LoadCombine = true; - passManagerBuilder.populateModulePassManager(PerModulePasses); - passManagerBuilder.populateFunctionPassManager(PerFunctionPasses); + PassManagerBuilder passManagerBuilder; + passManagerBuilder.OptLevel = 3; + passManagerBuilder.SizeLevel = 0; + passManagerBuilder.Inliner = createFunctionInliningPass(); + passManagerBuilder.SLPVectorize = true; + passManagerBuilder.LoopVectorize = true; + passManagerBuilder.LoadCombine = true; + passManagerBuilder.populateModulePassManager(PerModulePasses); + passManagerBuilder.populateFunctionPassManager(PerFunctionPasses); - // Run function passes: - PerFunctionPasses.doInitialization(); - for (llvm::Function &func : *module) - { - if (!func.isDeclaration()) - PerFunctionPasses.run(func); + // Run function passes: + PerFunctionPasses.doInitialization(); + for (llvm::Function &func : *module) + { + if (!func.isDeclaration()) + PerFunctionPasses.run(func); + } + PerFunctionPasses.doFinalization(); + + // Run module passes: + PerModulePasses.run(*module); + + SaveCachedModule(module, version, targetCPU); } - PerFunctionPasses.doFinalization(); - // Run module passes: - PerModulePasses.run(*module); + Printf("Compiling drawers..\n"); // Create execution engine and generate machine code mEngine.reset(engineBuilder.create(machine)); From 08c4f2ac18b40b6fd07a7d88aaebd9140555d8f2 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 19 Nov 2016 17:08:08 -0500 Subject: [PATCH 348/912] - Added a tool to clear the LLVM cache, if ever there is corruption or it needs rebuilt. --- tools/DelLLVMCache.cmd | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 tools/DelLLVMCache.cmd diff --git a/tools/DelLLVMCache.cmd b/tools/DelLLVMCache.cmd new file mode 100644 index 0000000000..d7c3b1525e --- /dev/null +++ b/tools/DelLLVMCache.cmd @@ -0,0 +1,9 @@ +@echo off +if not exist %localappdata%\zdoom\cache\llvm* goto :eof +echo QZDoom's LLVM drawers may take some time to create at startup. Because of this, +echo the program uses a cache to temporarily store bitcode for faster startups. If +echo this cache is ever corrupted, this program has been created to solve the +echo problem. +echo. +echo Are you SURE you wish to destroy the cache? +del /p %localappdata%\zdoom\cache\llvm* From 845bf93c95bc69d5942ffa838d910fd04966005d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 20 Nov 2016 01:29:19 +0100 Subject: [PATCH 349/912] Change triangle full block to use vectored load and store --- .../fixedfunction/drawtrianglecodegen.cpp | 242 ++++++++++-------- .../fixedfunction/drawtrianglecodegen.h | 3 +- src/r_compiler/llvmdrawers.cpp | 2 +- src/r_compiler/ssa/ssa_bool.cpp | 6 + src/r_compiler/ssa/ssa_bool.h | 3 + 5 files changed, 152 insertions(+), 104 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 1ad98a8fb3..fbbca7517a 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -303,57 +303,80 @@ void DrawTriangleCodegen::LoopFullBlock() { int pixelsize = truecolor ? 4 : 1; - stack_iy.store(SSAInt(0)); - stack_buffer.store(dest[x * pixelsize]); - stack_subsectorbuffer.store(subsectorGBuffer[x]); - - SSAForBlock loopy; - SSAInt iy = stack_iy.load(); - SSAUBytePtr buffer = stack_buffer.load(); - SSAIntPtr subsectorbuffer = stack_subsectorbuffer.load(); - loopy.loop_block(iy < SSAInt(q), q); + for (int iy = 0; iy < q; iy++) { + SSAUBytePtr buffer = dest[(x + iy * pitch) * pixelsize]; + SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch]; + + SSAInt varying[TriVertex::NumVarying]; SSAInt varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { - stack_varying[i].store((varyingPos[i] + varyingStepPos[i] * iy) << 8); + varying[i] = (varyingPos[i] + varyingStepPos[i] * iy) << 8; varyingStep[i] = (varyingStartStepX[i] + varyingIncrStepX[i] * iy) << 8; } - stack_ix.store(SSAInt(0)); - SSAForBlock loopx; - SSAInt ix = stack_ix.load(); - SSAInt varying[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] = stack_varying[i].load(); - loopx.loop_block(ix < SSAInt(q), q); + for (int ix = 0; ix < q; ix += 4) { - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + SSAUBytePtr buf = buffer[ix * pixelsize]; + if (truecolor) { - SSAIfBlock branch; - branch.if_block(subsectorbuffer[ix].load(true) >= subsectorDepth); + SSAVec16ub pixels16 = buf.load_unaligned_vec16ub(false); + SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16); + SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16); + SSAVec4i pixels[4] = { - ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying); + SSAVec4i::extendlo(pixels8lo), + SSAVec4i::extendhi(pixels8lo), + SSAVec4i::extendlo(pixels8hi), + SSAVec4i::extendhi(pixels8hi) + }; + + for (int sse = 0; sse < 4; sse++) + { + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + { + SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth; + pixels[sse] = subsectorTest.select(ProcessPixel32(pixels[sse], varying), pixels[sse]); + } + else + { + pixels[sse] = ProcessPixel32(pixels[sse], varying); + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] = varying[i] + varyingStep[i]; } - branch.end_block(); + + buf.store_unaligned_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3]))); } else { - ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying); + SSAVec4i pixels = buf.load_vec4ub(false); + + for (int sse = 0; sse < 4; sse++) + { + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + { + SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth; + pixels.insert(sse, subsectorTest.select(ProcessPixel8(pixels[sse], varying), pixels[sse])); + } + else + { + pixels.insert(sse, ProcessPixel8(pixels[sse], varying)); + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + varying[i] = varying[i] + varyingStep[i]; + } + + buf.store_vec4ub(pixels); } - for (int i = 0; i < TriVertex::NumVarying; i++) - stack_varying[i].store(varying[i] + varyingStep[i]); - - stack_ix.store(ix + 1); + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth)); } - loopx.end_block(); - - stack_buffer.store(buffer[pitch * pixelsize]); - stack_subsectorbuffer.store(subsectorbuffer[pitch]); - stack_iy.store(iy + 1); } - loopy.end_block(); } if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) @@ -425,7 +448,21 @@ void DrawTriangleCodegen::LoopPartialBlock() } else { - ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying); + SSAUBytePtr buf = buffer[ix * pixelsize]; + + if (truecolor) + { + SSAVec4i bg = buf.load_vec4ub(false); + buf.store_vec4ub(ProcessPixel32(bg, varying)); + } + else + { + SSAUByte bg = buf.load(false); + buf.store(ProcessPixel8(bg.zext_int(), varying).trunc_ubyte()); + } + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsectorbuffer[ix].store(subsectorDepth); } } branch.end_block(); @@ -466,7 +503,7 @@ SSAVec4i DrawTriangleCodegen::Sample(SSAInt uvoffset) return texturePixels[uvoffset * 4].load_vec4ub(true); } -void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying) +SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) { SSAInt ufrac = varying[0]; SSAInt vfrac = varying[1]; @@ -475,80 +512,81 @@ void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbu SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; SSAInt uvoffset = upos * textureHeight + vpos; - if (truecolor) + SSAVec4i fg; + SSAInt alpha, inv_alpha; + SSAVec4i output; + + switch (blendmode) { - SSAVec4i fg; - SSAVec4i bg = buffer.load_vec4ub(false); - SSAInt alpha, inv_alpha; - SSAVec4i output; + default: + case TriBlendMode::Copy: + fg = Sample(uvoffset); + output = blend_copy(shade_bgra_simple(fg, currentlight)); break; + case TriBlendMode::AlphaBlend: + fg = Sample(uvoffset); + output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; + case TriBlendMode::AddSolid: + fg = Sample(uvoffset); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); break; + case TriBlendMode::Add: + fg = Sample(uvoffset); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + case TriBlendMode::Sub: + fg = Sample(uvoffset); + output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + case TriBlendMode::RevSub: + fg = Sample(uvoffset); + output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + case TriBlendMode::Shaded: + fg = Sample(uvoffset); + alpha = fg[0]; + alpha = alpha + (alpha >> 7); // 255 -> 256 + inv_alpha = 256 - alpha; + output = blend_add(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), bg, alpha, inv_alpha); + break; + case TriBlendMode::TranslateCopy: + fg = TranslateSample(uvoffset); + output = blend_copy(shade_bgra_simple(fg, currentlight)); + break; + case TriBlendMode::TranslateAlphaBlend: + fg = TranslateSample(uvoffset); + output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; + break; + case TriBlendMode::TranslateAdd: + fg = TranslateSample(uvoffset); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::TranslateSub: + fg = TranslateSample(uvoffset); + output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::TranslateRevSub: + fg = TranslateSample(uvoffset); + output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + } - switch (blendmode) - { - default: - case TriBlendMode::Copy: - fg = Sample(uvoffset); - output = blend_copy(shade_bgra_simple(fg, currentlight)); break; - case TriBlendMode::AlphaBlend: - fg = Sample(uvoffset); - output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; - case TriBlendMode::AddSolid: - fg = Sample(uvoffset); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); break; - case TriBlendMode::Add: - fg = Sample(uvoffset); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; - case TriBlendMode::Sub: - fg = Sample(uvoffset); - output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; - case TriBlendMode::RevSub: - fg = Sample(uvoffset); - output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; - case TriBlendMode::Shaded: - fg = Sample(uvoffset); - alpha = fg[0]; - alpha = alpha + (alpha >> 7); // 255 -> 256 - inv_alpha = 256 - alpha; - output = blend_add(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), bg, alpha, inv_alpha); - break; - case TriBlendMode::TranslateCopy: - fg = TranslateSample(uvoffset); - output = blend_copy(shade_bgra_simple(fg, currentlight)); - break; - case TriBlendMode::TranslateAlphaBlend: - fg = TranslateSample(uvoffset); - output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; - break; - case TriBlendMode::TranslateAdd: - fg = TranslateSample(uvoffset); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::TranslateSub: - fg = TranslateSample(uvoffset); - output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::TranslateRevSub: - fg = TranslateSample(uvoffset); - output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - } + return output; +} - buffer.store_vec4ub(output); +SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) +{ + SSAInt ufrac = varying[0]; + SSAInt vfrac = varying[1]; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + { + return color; } else { - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - { - buffer.store(color.trunc_ubyte()); - } - else - { - SSAUByte fg = texturePixels[uvoffset].load(true); - buffer.store(fg); - } + SSAUByte fg = texturePixels[uvoffset].load(true); + return fg.zext_int(); } - - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) - subsectorbuffer.store(subsectorDepth); } void DrawTriangleCodegen::SetStencilBlock(SSAInt block) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.h b/src/r_compiler/fixedfunction/drawtrianglecodegen.h index d5539ef22f..e4c22d49cd 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.h +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.h @@ -46,7 +46,8 @@ private: void LoopFullBlock(); void LoopPartialBlock(); - void ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying); + SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying); + SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying); SSAVec4i TranslateSample(SSAInt uvoffset); SSAVec4i Sample(SSAInt uvoffset); diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index a3432eae65..913e5d9b78 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -127,7 +127,7 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - int version = 1; // Increment this number if the drawer codegen is modified (forces recreation of the module). + int version = 2; // Increment this number if the drawer codegen is modified (forces recreation of the module). std::string targetCPU = mProgram.GetTargetCPU(); bool loaded = mProgram.LoadCachedModule(version, targetCPU); if (!loaded) diff --git a/src/r_compiler/ssa/ssa_bool.cpp b/src/r_compiler/ssa/ssa_bool.cpp index 65cc25c90c..916350c59d 100644 --- a/src/r_compiler/ssa/ssa_bool.cpp +++ b/src/r_compiler/ssa/ssa_bool.cpp @@ -23,6 +23,7 @@ #include "r_compiler/llvm_include.h" #include "ssa_bool.h" #include "ssa_ubyte.h" +#include "ssa_vec4i.h" #include "ssa_value.h" #include "ssa_scope.h" @@ -61,6 +62,11 @@ SSAUByte SSABool::select(SSAUByte a, SSAUByte b) return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint())); } +SSAVec4i SSABool::select(SSAVec4i a, SSAVec4i b) +{ + return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint())); +} + SSABool operator&&(const SSABool &a, const SSABool &b) { return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_bool.h b/src/r_compiler/ssa/ssa_bool.h index 2ed6e7d4a6..372c626c04 100644 --- a/src/r_compiler/ssa/ssa_bool.h +++ b/src/r_compiler/ssa/ssa_bool.h @@ -29,6 +29,8 @@ namespace llvm { class Value; } namespace llvm { class Type; } +class SSAVec4i; + class SSABool { public: @@ -41,6 +43,7 @@ public: SSAInt zext_int(); SSAInt select(SSAInt a, SSAInt b); SSAUByte select(SSAUByte a, SSAUByte b); + SSAVec4i select(SSAVec4i a, SSAVec4i b); llvm::Value *v; }; From f8efe394cc475722ae9e13fdabc61945480dcdbf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 20 Nov 2016 01:51:08 +0100 Subject: [PATCH 350/912] Remove unused code --- src/r_poly_triangle.cpp | 1314 --------------------------------------- src/r_poly_triangle.h | 76 +-- 2 files changed, 4 insertions(+), 1386 deletions(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 07a4566dd1..ad9f7ff952 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -36,10 +36,6 @@ #include "r_data/colormaps.h" #include "r_poly_triangle.h" -#ifndef NO_SSE -#include -#endif - int PolyTriangleDrawer::viewport_x; int PolyTriangleDrawer::viewport_y; int PolyTriangleDrawer::viewport_width; @@ -324,1316 +320,6 @@ void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert ///////////////////////////////////////////////////////////////////////////// -#if 0 -void ScreenPolyTriangleDrawer::draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - uint8_t *dest = args->dest; - int pitch = args->pitch; - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipleft = args->clipleft; - int clipright = args->clipright; - int cliptop = args->cliptop; - int clipbottom = args->clipbottom; - const uint8_t *texturePixels = args->texturePixels; - int textureWidth = args->textureWidth; - int textureHeight = args->textureHeight; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); - if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - dest += miny * pitch; - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // Gradients - float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); - float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); - } - - // Loop through blocks - for (int y = miny; y < maxy; y += q, dest += q * pitch) - { - // Is this row of blocks done by this thread? - if (thread && ((y / q) % thread->num_cores != thread->core)) continue; - - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; - - // Check if block needs clipping - bool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); - - // Calculate varying variables for affine block - float offx0 = (x - minx) + 0.5f; - float offy0 = (y - miny) + 0.5f; - float offx1 = offx0 + q; - float offy1 = offy0 + q; - float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); - float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); - float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); - float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); - float varyingTL[TriVertex::NumVarying]; - float varyingTR[TriVertex::NumVarying]; - float varyingBL[TriVertex::NumVarying]; - float varyingBR[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; - varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; - varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); - varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); - } - - uint8_t *buffer = dest; - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && !clipneeded) - { - for (int iy = 0; iy < q; iy++) - { - uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - float pos = varyingTL[i] + varyingBL[i] * iy; - float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); - - varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); - varyingStep[i] = (uint32_t)(step * 0x100000000LL); - } - - for (int ix = x; ix < x + q; ix++) - { - uint32_t ufrac = varying[0]; - uint32_t vfrac = varying[1]; - - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - - if (texturePixels[uvoffset] != 0) - buffer[ix] = texturePixels[uvoffset]; - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - } - - buffer += pitch; - } - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - for (int iy = 0; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - float pos = varyingTL[i] + varyingBL[i] * iy; - float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); - - varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); - varyingStep[i] = (uint32_t)(step * 0x100000000LL); - } - - for (int ix = x; ix < x + q; ix++) - { - bool visible = ix >= clipleft && ix < clipright && (cliptop <= y + iy) && (clipbottom > y + iy); - - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) - { - uint32_t ufrac = varying[0]; - uint32_t vfrac = varying[1]; - - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - - if (texturePixels[uvoffset] != 0) - buffer[ix] = texturePixels[uvoffset]; - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - - buffer += pitch; - } - } - } - } -} - -void ScreenPolyTriangleDrawer::fill(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - uint8_t *dest = args->dest; - int pitch = args->pitch; - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipleft = args->clipleft; - int clipright = args->clipright; - int cliptop = args->cliptop; - int clipbottom = args->clipbottom; - int solidcolor = args->solidcolor; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); - if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - dest += miny * pitch; - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // Loop through blocks - for (int y = miny; y < maxy; y += q, dest += q * pitch) - { - // Is this row of blocks done by this thread? - if (thread && ((y / q) % thread->num_cores != thread->core)) continue; - - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; - - // Check if block needs clipping - bool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); - - uint8_t *buffer = dest; - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && !clipneeded) - { - for (int iy = 0; iy < q; iy++) - { - for (int ix = x; ix < x + q; ix++) - { - buffer[ix] = solidcolor; - } - - buffer += pitch; - } - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - for (int iy = 0; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = x; ix < x + q; ix++) - { - bool visible = ix >= clipleft && ix < clipright && (cliptop <= y + iy) && (clipbottom > y + iy); - - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) - { - buffer[ix] = solidcolor; - } - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - - buffer += pitch; - } - } - } - } -} - -void ScreenPolyTriangleDrawer::stencil(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipleft = args->clipleft; - int clipright = args->clipright; - int cliptop = args->cliptop; - int clipbottom = args->clipbottom; - int solidcolor = args->solidcolor; - uint8_t *stencilValues = args->stencilValues; - uint32_t *stencilMasks = args->stencilMasks; - int stencilPitch = args->stencilPitch; - uint8_t stencilTestValue = args->stencilTestValue; - uint8_t stencilWriteValue = args->stencilWriteValue; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); - if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // Loop through blocks - for (int y = miny; y < maxy; y += q) - { - // Is this row of blocks done by this thread? - if (thread && ((y / q) % thread->num_cores != thread->core)) continue; - - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; - - // Check if block needs clipping - bool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); - - PolyStencilBlock stencil(x / 8 + y / 8 * stencilPitch, stencilValues, stencilMasks); - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && !clipneeded && stencil.IsSingleValue()) - { - // Reject whole block if the stencil test fails - if (stencil.Get(0, 0) != stencilTestValue) - continue; - stencil.Clear(stencilWriteValue); - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - for (int iy = 0; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = 0; ix < q; ix++) - { - bool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); - - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible && stencil.Get(ix, iy) == stencilTestValue) - { - stencil.Set(ix, iy, stencilWriteValue); - } - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - } - } - } - } -} - -void ScreenPolyTriangleDrawer::draw32(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - uint32_t *dest = (uint32_t *)args->dest; - int pitch = args->pitch; - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipleft = args->clipleft; - int clipright = args->clipright; - int cliptop = args->cliptop; - int clipbottom = args->clipbottom; - const uint32_t *texturePixels = (const uint32_t *)args->texturePixels; - int textureWidth = args->textureWidth; - int textureHeight = args->textureHeight; - uint8_t *stencilValues = args->stencilValues; - uint32_t *stencilMasks = args->stencilMasks; - int stencilPitch = args->stencilPitch; - uint8_t stencilTestValue = args->stencilTestValue; - uint32_t light = args->uniforms->light; - uint32_t subsector = args->uniforms->subsectorDepth; - uint32_t *subsectorGBuffer = args->subsectorGBuffer; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); - if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - dest += miny * pitch; - subsectorGBuffer += miny * pitch; - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // Gradients - float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); - float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); - } - - // Loop through blocks - for (int y = miny; y < maxy; y += q, dest += q * pitch, subsectorGBuffer += q * pitch) - { - // Is this row of blocks done by this thread? - if ((y / q) % thread->num_cores != thread->core) continue; - - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; - - // Check if block needs clipping - bool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); - - // Calculate varying variables for affine block - float offx0 = (x - minx) + 0.5f; - float offy0 = (y - miny) + 0.5f; - float offx1 = offx0 + q; - float offy1 = offy0 + q; - float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); - float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); - float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); - float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); - float varyingTL[TriVertex::NumVarying]; - float varyingTR[TriVertex::NumVarying]; - float varyingBL[TriVertex::NumVarying]; - float varyingBR[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; - varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; - varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); - varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); - } - - float globVis = 1706.0f; - float vis = globVis / rcpWTL; - float shade = 64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f; - float lightscale = clamp((shade - MIN(24.0f, vis)) / 32.0f, 0.0f, 31.0f / 32.0f); - int diminishedlight = (int)clamp((1.0f - lightscale) * 256.0f + 0.5f, 0.0f, 256.0f); - -#if !defined(NO_SSE) - __m128i mlight = _mm_set1_epi16(diminishedlight); -#endif - - uint32_t *buffer = dest; - uint32_t *subsectorbuffer = subsectorGBuffer; - - PolyStencilBlock stencil(x / 8 + y / 8 * stencilPitch, stencilValues, stencilMasks); - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && !clipneeded && stencil.IsSingleValue()) - { - // Reject whole block if the stencil test fails - if (stencil.Get(0, 0) != stencilTestValue) - continue; - - for (int iy = 0; iy < q; iy++) - { - uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - float pos = varyingTL[i] + varyingBL[i] * iy; - float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); - - varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); - varyingStep[i] = (uint32_t)(step * 0x100000000LL); - } - -#if NO_SSE - for (int ix = x; ix < x + q; ix++) - { - uint32_t ufrac = varying[0]; - uint32_t vfrac = varying[1]; - - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - - uint32_t fg = texturePixels[uvoffset]; - uint32_t fg_red = (RPART(fg) * diminishedlight) >> 8; - uint32_t fg_green = (GPART(fg) * diminishedlight) >> 8; - uint32_t fg_blue = (BPART(fg) * diminishedlight) >> 8; - uint32_t fg_alpha = APART(fg); - - if (fg_alpha > 127) - { - buffer[ix] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; - subsectorbuffer[ix] = subsector; - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - } -#else - for (int sse = 0; sse < q / 4; sse++) - { - uint32_t fg[4]; - for (int ix = 0; ix < 4; ix++) - { - uint32_t ufrac = varying[0]; - uint32_t vfrac = varying[1]; - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - fg[ix] = texturePixels[uvoffset]; - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - } - - __m128i mfg = _mm_loadu_si128((const __m128i*)fg); - __m128i mfg0 = _mm_unpacklo_epi8(mfg, _mm_setzero_si128()); - __m128i mfg1 = _mm_unpackhi_epi8(mfg, _mm_setzero_si128()); - __m128i mout0 = _mm_srli_epi16(_mm_mullo_epi16(mfg0, mlight), 8); - __m128i mout1 = _mm_srli_epi16(_mm_mullo_epi16(mfg1, mlight), 8); - __m128i mout = _mm_packus_epi16(mout0, mout1); - __m128i mmask0 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(mfg0, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); - __m128i mmask1 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(mfg1, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); - __m128i mmask = _mm_cmplt_epi8(_mm_packus_epi16(mmask0, mmask1), _mm_setzero_si128()); - _mm_maskmoveu_si128(mout, mmask, (char*)(&buffer[x + sse * 4])); - - __m128i msubsector = _mm_set1_epi32(subsector); - _mm_maskmoveu_si128(msubsector, mmask, (char*)(&subsectorbuffer[x + sse * 4])); - } -#endif - - buffer += pitch; - subsectorbuffer += pitch; - } - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - for (int iy = 0; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - float pos = varyingTL[i] + varyingBL[i] * iy; - float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); - - varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); - varyingStep[i] = (uint32_t)(step * 0x100000000LL); - } - - for (int ix = 0; ix < q; ix++) - { - bool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); - - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible && stencil.Get(ix, iy) == stencilTestValue) - { - uint32_t ufrac = varying[0]; - uint32_t vfrac = varying[1]; - - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - - uint32_t fg = texturePixels[uvoffset]; - uint32_t fg_red = (RPART(fg) * diminishedlight) >> 8; - uint32_t fg_green = (GPART(fg) * diminishedlight) >> 8; - uint32_t fg_blue = (BPART(fg) * diminishedlight) >> 8; - uint32_t fg_alpha = APART(fg); - - if (fg_alpha > 127) - { - buffer[ix + x] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; - subsectorbuffer[ix + x] = subsector; - } - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - - buffer += pitch; - subsectorbuffer += pitch; - } - } - } - } -} - -void ScreenPolyTriangleDrawer::drawsubsector32(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - uint32_t *dest = (uint32_t *)args->dest; - int pitch = args->pitch; - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipleft = args->clipleft; - int clipright = args->clipright; - int cliptop = args->cliptop; - int clipbottom = args->clipbottom; - const uint32_t *texturePixels = (const uint32_t *)args->texturePixels; - int textureWidth = args->textureWidth; - int textureHeight = args->textureHeight; - uint32_t light = args->uniforms->light; - uint32_t subsector = args->uniforms->subsectorDepth; - uint32_t *subsectorGBuffer = args->subsectorGBuffer; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); - if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - dest += miny * pitch; - subsectorGBuffer += miny * pitch; - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // Gradients - float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); - float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); - } - - // Loop through blocks - for (int y = miny; y < maxy; y += q, dest += q * pitch, subsectorGBuffer += q * pitch) - { - // Is this row of blocks done by this thread? - if ((y / q) % thread->num_cores != thread->core) continue; - - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; - - // Check if block needs clipping - bool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); - - // Calculate varying variables for affine block - float offx0 = (x - minx) + 0.5f; - float offy0 = (y - miny) + 0.5f; - float offx1 = offx0 + q; - float offy1 = offy0 + q; - float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); - float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); - float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); - float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); - float varyingTL[TriVertex::NumVarying]; - float varyingTR[TriVertex::NumVarying]; - float varyingBL[TriVertex::NumVarying]; - float varyingBR[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; - varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; - varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); - varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); - } - - float globVis = 1706.0f; - float vis = globVis / rcpWTL; - float shade = 64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f; - float lightscale = clamp((shade - MIN(24.0f, vis)) / 32.0f, 0.0f, 31.0f / 32.0f); - int diminishedlight = (int)clamp((1.0f - lightscale) * 256.0f + 0.5f, 0.0f, 256.0f); - -#if !defined(NO_SSE) - __m128i mlight = _mm_set1_epi16(diminishedlight); -#endif - - uint32_t *buffer = dest; - uint32_t *subsectorbuffer = subsectorGBuffer; - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && !clipneeded) - { - for (int iy = 0; iy < q; iy++) - { - uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - float pos = varyingTL[i] + varyingBL[i] * iy; - float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); - - varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); - varyingStep[i] = (uint32_t)(step * 0x100000000LL); - } - - for (int ix = x; ix < x + q; ix++) - { - if (subsectorbuffer[ix] >= subsector) - { - uint32_t ufrac = varying[0]; - uint32_t vfrac = varying[1]; - - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - - uint32_t fg = texturePixels[uvoffset]; - uint32_t fg_red = (RPART(fg) * diminishedlight) >> 8; - uint32_t fg_green = (GPART(fg) * diminishedlight) >> 8; - uint32_t fg_blue = (BPART(fg) * diminishedlight) >> 8; - uint32_t fg_alpha = APART(fg); - - if (fg_alpha > 127) - buffer[ix] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - } - - buffer += pitch; - subsectorbuffer += pitch; - } - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - for (int iy = 0; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - float pos = varyingTL[i] + varyingBL[i] * iy; - float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); - - varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); - varyingStep[i] = (uint32_t)(step * 0x100000000LL); - } - - for (int ix = 0; ix < q; ix++) - { - bool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); - - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible && subsectorbuffer[ix + x] >= subsector) - { - uint32_t ufrac = varying[0]; - uint32_t vfrac = varying[1]; - - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - - uint32_t fg = texturePixels[uvoffset]; - uint32_t fg_red = (RPART(fg) * diminishedlight) >> 8; - uint32_t fg_green = (GPART(fg) * diminishedlight) >> 8; - uint32_t fg_blue = (BPART(fg) * diminishedlight) >> 8; - uint32_t fg_alpha = APART(fg); - - if (fg_alpha > 127) - buffer[ix + x] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - - buffer += pitch; - subsectorbuffer += pitch; - } - } - } - } -} - -void ScreenPolyTriangleDrawer::fill32(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - uint32_t *dest = (uint32_t *)args->dest; - int pitch = args->pitch; - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipleft = args->clipleft; - int clipright = args->clipright; - int cliptop = args->cliptop; - int clipbottom = args->clipbottom; - int solidcolor = args->solidcolor; - uint8_t *stencilValues = args->stencilValues; - uint32_t *stencilMasks = args->stencilMasks; - int stencilPitch = args->stencilPitch; - uint8_t stencilTestValue = args->stencilTestValue; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, cliptop); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); - if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - dest += miny * pitch; - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // Loop through blocks - for (int y = miny; y < maxy; y += q, dest += q * pitch) - { - // Is this row of blocks done by this thread? - if ((y / q) % thread->num_cores != thread->core) continue; - - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; - - // Check if block needs clipping - bool clipneeded = clipleft > x || clipright < (x + q) || cliptop > y || clipbottom < (y + q); - - uint32_t *buffer = dest; - - PolyStencilBlock stencil(x / 8 + y / 8 * stencilPitch, stencilValues, stencilMasks); - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && !clipneeded && stencil.IsSingleValue()) - { - // Reject whole block if the stencil test fails - if (stencil.Get(0, 0) != stencilTestValue) - continue; - - for (int iy = 0; iy < q; iy++) - { - for (int ix = x; ix < x + q; ix++) - { - buffer[ix] = solidcolor; - } - - buffer += pitch; - } - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - for (int iy = 0; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = 0; ix < q; ix++) - { - bool visible = (ix + x >= clipleft) && (ix + x < clipright) && (cliptop <= y + iy) && (clipbottom > y + iy); - - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible && stencil.Get(ix, iy) == stencilTestValue) - { - buffer[ix + x] = solidcolor; - } - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - - buffer += pitch; - } - } - } - } -} - -float ScreenPolyTriangleDrawer::gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) -{ - float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); - float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); - return top / bottom; -} - -float ScreenPolyTriangleDrawer::grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) -{ - float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); - float bottom = -((x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2)); - return top / bottom; -} -#endif - -///////////////////////////////////////////////////////////////////////////// - DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode) : args(args), variant(variant), blendmode(blendmode) { diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index ad00d1a671..f0797460fd 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -20,9 +20,7 @@ ** */ - -#ifndef __R_POLY_TRIANGLE__ -#define __R_POLY_TRIANGLE__ +#pragma once #include "r_triangle.h" #include "r_data/r_translate.h" @@ -100,52 +98,6 @@ private: friend class DrawPolyTrianglesCommand; }; -// 8x8 block of stencil values, plus a mask indicating if values are the same for early out stencil testing -class PolyStencilBlock -{ -public: - PolyStencilBlock(int block, uint8_t *values, uint32_t *masks) : Values(values + block * 64), ValueMask(masks[block]) - { - } - - void Set(int x, int y, uint8_t value) - { - if ((ValueMask & 0xffffff00) == 0xffffff00) - { - if ((ValueMask & 0xff) == value) - return; - - for (int i = 0; i < 8 * 8; i++) - Values[i] = (ValueMask & 0xff); - ValueMask = 0; - } - - Values[x + y * 8] = value; - } - - uint8_t Get(int x, int y) const - { - if (IsSingleValue()) - return ValueMask & 0xff; - else - return Values[x + y * 8]; - } - - void Clear(uint8_t value) - { - ValueMask = 0xffffff00 | (uint32_t)value; - } - - bool IsSingleValue() const - { - return (ValueMask & 0xffffff00) == 0xffffff00; - } - -private: - uint8_t *Values; - uint32_t &ValueMask; -}; - class PolySubsectorGBuffer { public: @@ -193,8 +145,7 @@ public: uint32_t *m = Masks(); for (int i = 0; i < count; i++) { - PolyStencilBlock block(i, v, m); - block.Clear(stencil_value); + m[i] = 0xffffff00 | stencil_value; } } @@ -208,29 +159,12 @@ public: private: int width; int height; + + // 8x8 blocks of stencil values, plus a mask for each block indicating if values are the same for early out stencil testing std::vector values; std::vector masks; }; -#if 0 -class ScreenPolyTriangleDrawer -{ -public: - static void draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void fill(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - - static void stencil(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - - static void draw32(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void drawsubsector32(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void fill32(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - -private: - static float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); - static float grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); -}; -#endif - class DrawPolyTrianglesCommand : public DrawerCommand { public: @@ -244,5 +178,3 @@ private: TriDrawVariant variant; TriBlendMode blendmode; }; - -#endif From 7ac0cace7d1eed574de31e0a8b28517e759034a7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 20 Nov 2016 02:07:55 +0100 Subject: [PATCH 351/912] Stop copying the matrix and remove r_triangle --- src/CMakeLists.txt | 1 - src/r_compiler/llvmdrawers.h | 2 - src/r_plane.cpp | 266 ------- src/r_poly_decal.cpp | 2 +- src/r_poly_decal.h | 2 +- src/r_poly_intersection.h | 2 +- src/r_poly_particle.cpp | 2 +- src/r_poly_plane.cpp | 4 +- src/r_poly_sky.cpp | 3 +- src/r_poly_sky.h | 1 + src/r_poly_sprite.cpp | 2 +- src/r_poly_triangle.cpp | 165 ++++- src/r_poly_triangle.h | 16 +- src/r_poly_wall.cpp | 2 +- src/r_poly_wallsprite.cpp | 2 +- src/r_poly_wallsprite.h | 2 +- src/r_triangle.cpp | 1338 ---------------------------------- src/r_triangle.h | 116 --- 18 files changed, 183 insertions(+), 1745 deletions(-) delete mode 100644 src/r_triangle.cpp delete mode 100644 src/r_triangle.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c2e362209..987f23f2fe 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1092,7 +1092,6 @@ set( FASTMATH_PCH_SOURCES r_segs.cpp r_sky.cpp r_things.cpp - r_triangle.cpp s_advsound.cpp s_environment.cpp s_playlist.cpp diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index e421537434..1db2f5b2a5 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -234,8 +234,6 @@ struct TriUniforms nearest_filter = 2, fixed_light = 4 }; - - TriMatrix objectToClip; }; struct TriDrawTriangleArgs diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 17728e80c8..ca85470035 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -59,7 +59,6 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_draw_rgba.h" -#include "r_triangle.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -1015,261 +1014,6 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) } } -static void R_DrawCubeSky(visplane_t *pl) -{ - int x1 = pl->left; - int x2 = pl->right; - short *uwal = (short *)pl->top; - short *dwal = (short *)pl->bottom; - - static TriVertex cube[6 * 6] = - { - // Top - { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, - { 1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.1f }, - - { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.1f }, - { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.1f }, - { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, - - // Bottom - { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 0.9f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f }, - - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 0.9f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 0.9f }, - - // Front - { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f }, - { 1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, - { -1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, - - { -1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f }, - - // Back - { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, - { 1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f }, - - { 1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 2.0f }, - { -1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, - - // Right - { 1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, - { 1.0f, 1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f }, - - { 1.0f, 1.0f, -1.0f, 1.0f, 0.0f, 2.0f }, - { 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 2.0f }, - { 1.0f, -1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, - - // Left - { -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f }, - { -1.0f, 1.0f, 0.6f, 1.0f, 1.0f, 0.0f }, - { -1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, - - { -1.0f, -1.0f, 0.6f, 1.0f, 0.0f, 0.0f }, - { -1.0f, -1.0f, -1.0f, 1.0f, 0.0f, 2.0f }, - { -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 2.0f } - }; - - TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z) * TriMatrix::scale(1000.0f, 1000.0f, 1000.0f); - TriMatrix objectToClip = TriMatrix::viewToClip() * TriMatrix::worldToView() * objectToWorld; - //TriMatrix objectToWorld = TriMatrix::scale(1000.0f, 1000.0f, 1000.0f); - //TriMatrix objectToClip = TriMatrix::viewToClip() * objectToWorld; - - uint32_t solid_top = frontskytex->GetSkyCapColor(false); - uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); - - solid_top = RGB32k.RGB[(RPART(solid_top) >> 3)][(GPART(solid_top) >> 3)][(BPART(solid_top) >> 3)]; - solid_bottom = RGB32k.RGB[(RPART(solid_bottom) >> 3)][(GPART(solid_bottom) >> 3)][(BPART(solid_bottom) >> 3)]; - - TriUniforms uniforms; - uniforms.objectToClip = objectToClip; - uniforms.light = 256; - uniforms.flags = 0; - - TriangleDrawer::fill(uniforms, cube, 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, solid_top); - TriangleDrawer::fill(uniforms, cube + 6, 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, solid_bottom); - TriangleDrawer::draw(uniforms, cube + 2 * 6, 4 * 6, TriangleDrawMode::Normal, false, x1, x2 - 1, uwal, dwal, frontskytex); -} - -namespace -{ - class SkyDome - { - public: - SkyDome() { CreateDome(); } - void Render(visplane_t *pl); - - private: - TArray mVertices; - TArray mPrimStart; - int mRows, mColumns; - - void SkyVertex(int r, int c, bool yflip); - void CreateSkyHemisphere(bool zflip); - void CreateDome(); - void RenderRow(int row, visplane_t *pl); - void RenderCapColorRow(int row, bool bottomCap, visplane_t *pl); - - TriVertex SetVertex(float xx, float yy, float zz, float uu = 0, float vv = 0); - TriVertex SetVertexXYZ(float xx, float yy, float zz, float uu = 0, float vv = 0); - }; - - TriVertex SkyDome::SetVertex(float xx, float yy, float zz, float uu, float vv) - { - TriVertex v; - v.x = xx; - v.y = yy; - v.z = zz; - v.w = 1.0f; - v.varying[0] = uu; - v.varying[1] = vv; - return v; - } - - TriVertex SkyDome::SetVertexXYZ(float xx, float yy, float zz, float uu, float vv) - { - TriVertex v; - v.x = xx; - v.y = zz; - v.z = yy; - v.w = 1.0f; - v.varying[0] = uu; - v.varying[1] = vv; - return v; - } - - void SkyDome::SkyVertex(int r, int c, bool zflip) - { - static const FAngle maxSideAngle = 60.f; - static const float scale = 10000.; - - FAngle topAngle = (c / (float)mColumns * 360.f); - FAngle sideAngle = maxSideAngle * (mRows - r) / mRows; - float height = sideAngle.Sin(); - float realRadius = scale * sideAngle.Cos(); - FVector2 pos = topAngle.ToVector(realRadius); - float z = (!zflip) ? scale * height : -scale * height; - - float u, v; - //uint32_t color = r == 0 ? 0xffffff : 0xffffffff; - - // And the texture coordinates. - if (!zflip) // Flipped Y is for the lower hemisphere. - { - u = (-c / (float)mColumns); - v = (r / (float)mRows); - } - else - { - u = (-c / (float)mColumns); - v = 1.0f + ((mRows - r) / (float)mRows); - } - - if (r != 4) z += 300; - - // And finally the vertex. - TriVertex vert; - vert = SetVertexXYZ(-pos.X, z - 1.f, pos.Y, u * 4.0f, v + 0.5f/*, color*/); - mVertices.Push(vert); - } - - void SkyDome::CreateSkyHemisphere(bool zflip) - { - int r, c; - - mPrimStart.Push(mVertices.Size()); - - for (c = 0; c < mColumns; c++) - { - SkyVertex(1, c, zflip); - } - - // The total number of triangles per hemisphere can be calculated - // as follows: rows * columns * 2 + 2 (for the top cap). - for (r = 0; r < mRows; r++) - { - mPrimStart.Push(mVertices.Size()); - for (c = 0; c <= mColumns; c++) - { - SkyVertex(r + zflip, c, zflip); - SkyVertex(r + 1 - zflip, c, zflip); - } - } - } - - void SkyDome::CreateDome() - { - mColumns = 128; - mRows = 4; - CreateSkyHemisphere(false); - CreateSkyHemisphere(true); - mPrimStart.Push(mVertices.Size()); - } - - void SkyDome::RenderRow(int row, visplane_t *pl) - { - int x1 = pl->left; - int x2 = pl->right; - short *uwal = (short *)pl->top; - short *dwal = (short *)pl->bottom; - TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); - TriMatrix objectToClip = TriMatrix::viewToClip() * TriMatrix::worldToView() * objectToWorld; - TriUniforms uniforms; - uniforms.objectToClip = objectToClip; - uniforms.light = 256; - uniforms.flags = 0; - TriangleDrawer::draw(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Strip, false, x1, x2 - 1, uwal, dwal, frontskytex); - } - - void SkyDome::RenderCapColorRow(int row, bool bottomCap, visplane_t *pl) - { - uint32_t solid = frontskytex->GetSkyCapColor(bottomCap); - solid = RGB32k.RGB[(RPART(solid) >> 3)][(GPART(solid) >> 3)][(BPART(solid) >> 3)]; - - int x1 = pl->left; - int x2 = pl->right; - short *uwal = (short *)pl->top; - short *dwal = (short *)pl->bottom; - TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); - TriMatrix objectToClip = TriMatrix::viewToClip() * TriMatrix::worldToView() * objectToWorld; - TriUniforms uniforms; - uniforms.objectToClip = objectToClip; - uniforms.light = 256; - uniforms.flags = 0; - TriangleDrawer::fill(uniforms, &mVertices[mPrimStart[row]], mPrimStart[row + 1] - mPrimStart[row], TriangleDrawMode::Fan, bottomCap, x1, x2 - 1, uwal, dwal, solid); - } - - void SkyDome::Render(visplane_t *pl) - { - int rc = mRows + 1; - - // No need to draw this as the software renderer can't look that high anyway - //RenderCapColorRow(0, false, pl); - //RenderCapColorRow(rc, true, pl); - - for (int i = 1; i <= mRows; i++) - { - RenderRow(i, pl); - RenderRow(rc + i, pl); - } - } -} - -static void R_DrawDomeSky(visplane_t *pl) -{ - static SkyDome skydome; - skydome.Render(pl); -} - static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) { uint32_t height = frontskytex->GetHeight(); @@ -1467,16 +1211,6 @@ static void R_DrawSky (visplane_t *pl) R_DrawCapSky(pl); return; } - else if (r_skymode == 3) - { - R_DrawCubeSky(pl); - return; - } - else if (r_skymode == 4) - { - R_DrawDomeSky(pl); - return; - } int x; float swal; diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index 5641bfcd1c..52caae7e2e 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -129,7 +129,6 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co bool fullbrightSprite = (decal->RenderFlags & RF_FULLBRIGHT) == RF_FULLBRIGHT; TriUniforms uniforms; - uniforms.objectToClip = worldToClip; if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) { uniforms.light = 256; @@ -145,6 +144,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co PolyDrawArgs args; args.uniforms = uniforms; + args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = 4; args.mode = TriangleDrawMode::Fan; diff --git a/src/r_poly_decal.h b/src/r_poly_decal.h index 39b250abba..61087a773e 100644 --- a/src/r_poly_decal.h +++ b/src/r_poly_decal.h @@ -22,7 +22,7 @@ #pragma once -#include "r_triangle.h" +#include "r_poly_triangle.h" class RenderPolyDecal { diff --git a/src/r_poly_intersection.h b/src/r_poly_intersection.h index 48ebef0314..b06bc12001 100644 --- a/src/r_poly_intersection.h +++ b/src/r_poly_intersection.h @@ -22,7 +22,7 @@ #pragma once -#include "r_triangle.h" +#include "r_poly_triangle.h" #include #include diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp index e5797a1089..8448c8f32b 100644 --- a/src/r_poly_particle.cpp +++ b/src/r_poly_particle.cpp @@ -71,7 +71,6 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *partic bool fullbrightSprite = particle->bright != 0; TriUniforms uniforms; - uniforms.objectToClip = worldToClip; if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) { uniforms.light = 256; @@ -88,6 +87,7 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *partic PolyDrawArgs args; args.uniforms = uniforms; + args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = 4; args.mode = TriangleDrawMode::Fan; diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 9005e32f15..a68a4b5689 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -103,7 +103,6 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *s } TriUniforms uniforms; - uniforms.objectToClip = worldToClip; uniforms.light = (uint32_t)(lightlevel / 255.0f * 256.0f); if (fixedlightlev >= 0 || fixedcolormap) uniforms.light = 256; @@ -133,6 +132,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *s PolyDrawArgs args; args.uniforms = uniforms; + args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = sub->numlines; args.mode = TriangleDrawMode::Fan; @@ -195,7 +195,6 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin bool isSky = picnum == skyflatnum; TriUniforms uniforms; - uniforms.objectToClip = worldToClip; uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); if (fixedlightlev >= 0 || fixedcolormap) uniforms.light = 256; @@ -225,6 +224,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin PolyDrawArgs args; args.uniforms = uniforms; + args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = sub->numlines; args.mode = TriangleDrawMode::Fan; diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index b0b06f2625..a60b3f460e 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -48,9 +48,9 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) backskytex = TexMan(sky2tex, true); TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); + objectToClip = worldToClip * objectToWorld; TriUniforms uniforms; - uniforms.objectToClip = worldToClip * objectToWorld; uniforms.light = 256; uniforms.flags = 0; uniforms.subsectorDepth = RenderPolyScene::SkySubsectorDepth; @@ -59,6 +59,7 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) PolyDrawArgs args; args.uniforms = uniforms; + args.objectToClip = &objectToClip; args.stenciltestvalue = 255; args.stencilwritevalue = 1; args.SetTexture(frontskytex); diff --git a/src/r_poly_sky.h b/src/r_poly_sky.h index dd4bd29cae..95ec27ed65 100644 --- a/src/r_poly_sky.h +++ b/src/r_poly_sky.h @@ -33,6 +33,7 @@ private: TArray mVertices; TArray mPrimStart; int mRows, mColumns; + TriMatrix objectToClip; void SkyVertex(int r, int c, bool yflip); void CreateSkyHemisphere(bool zflip); diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 8c74d0d1a3..84ff8e2213 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -114,7 +114,6 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); TriUniforms uniforms; - uniforms.objectToClip = worldToClip; if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) { uniforms.light = 256; @@ -129,6 +128,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse PolyDrawArgs args; args.uniforms = uniforms; + args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = 4; args.mode = TriangleDrawMode::Fan; diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index ad9f7ff952..5574251e61 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -124,28 +124,28 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian for (int i = 0; i < vcount / 3; i++) { for (int j = 0; j < 3; j++) - vert[j] = shade_vertex(drawargs.uniforms, *(vinput++)); + vert[j] = shade_vertex(*drawargs.objectToClip, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); } } else if (drawargs.mode == TriangleDrawMode::Fan) { - vert[0] = shade_vertex(drawargs.uniforms, *(vinput++)); - vert[1] = shade_vertex(drawargs.uniforms, *(vinput++)); + vert[0] = shade_vertex(*drawargs.objectToClip, *(vinput++)); + vert[1] = shade_vertex(*drawargs.objectToClip, *(vinput++)); for (int i = 2; i < vcount; i++) { - vert[2] = shade_vertex(drawargs.uniforms, *(vinput++)); + vert[2] = shade_vertex(*drawargs.objectToClip, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); vert[1] = vert[2]; } } else // TriangleDrawMode::Strip { - vert[0] = shade_vertex(drawargs.uniforms, *(vinput++)); - vert[1] = shade_vertex(drawargs.uniforms, *(vinput++)); + vert[0] = shade_vertex(*drawargs.objectToClip, *(vinput++)); + vert[1] = shade_vertex(*drawargs.objectToClip, *(vinput++)); for (int i = 2; i < vcount; i++) { - vert[2] = shade_vertex(drawargs.uniforms, *(vinput++)); + vert[2] = shade_vertex(*drawargs.objectToClip, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); vert[0] = vert[1]; vert[1] = vert[2]; @@ -154,10 +154,10 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian } } -TriVertex PolyTriangleDrawer::shade_vertex(const TriUniforms &uniforms, TriVertex v) +TriVertex PolyTriangleDrawer::shade_vertex(const TriMatrix &objectToClip, TriVertex v) { // Apply transform to get clip coordinates: - return uniforms.objectToClip * v; + return objectToClip * v; } void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)) @@ -341,3 +341,150 @@ FString DrawPolyTrianglesCommand::DebugInfo() { return "DrawPolyTriangles"; } + +///////////////////////////////////////////////////////////////////////////// + +TriMatrix TriMatrix::null() +{ + TriMatrix m; + memset(m.matrix, 0, sizeof(m.matrix)); + return m; +} + +TriMatrix TriMatrix::identity() +{ + TriMatrix m = null(); + m.matrix[0] = 1.0f; + m.matrix[5] = 1.0f; + m.matrix[10] = 1.0f; + m.matrix[15] = 1.0f; + return m; +} + +TriMatrix TriMatrix::translate(float x, float y, float z) +{ + TriMatrix m = identity(); + m.matrix[0 + 3 * 4] = x; + m.matrix[1 + 3 * 4] = y; + m.matrix[2 + 3 * 4] = z; + return m; +} + +TriMatrix TriMatrix::scale(float x, float y, float z) +{ + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = x; + m.matrix[1 + 1 * 4] = y; + m.matrix[2 + 2 * 4] = z; + m.matrix[3 + 3 * 4] = 1; + return m; +} + +TriMatrix TriMatrix::rotate(float angle, float x, float y, float z) +{ + float c = cosf(angle); + float s = sinf(angle); + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = (x*x*(1.0f - c) + c); + m.matrix[0 + 1 * 4] = (x*y*(1.0f - c) - z*s); + m.matrix[0 + 2 * 4] = (x*z*(1.0f - c) + y*s); + m.matrix[1 + 0 * 4] = (y*x*(1.0f - c) + z*s); + m.matrix[1 + 1 * 4] = (y*y*(1.0f - c) + c); + m.matrix[1 + 2 * 4] = (y*z*(1.0f - c) - x*s); + m.matrix[2 + 0 * 4] = (x*z*(1.0f - c) - y*s); + m.matrix[2 + 1 * 4] = (y*z*(1.0f - c) + x*s); + m.matrix[2 + 2 * 4] = (z*z*(1.0f - c) + c); + m.matrix[3 + 3 * 4] = 1.0f; + return m; +} + +TriMatrix TriMatrix::swapYZ() +{ + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = 1.0f; + m.matrix[1 + 2 * 4] = 1.0f; + m.matrix[2 + 1 * 4] = -1.0f; + m.matrix[3 + 3 * 4] = 1.0f; + return m; +} + +TriMatrix TriMatrix::perspective(float fovy, float aspect, float z_near, float z_far) +{ + float f = (float)(1.0 / tan(fovy * M_PI / 360.0)); + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = f / aspect; + m.matrix[1 + 1 * 4] = f; + m.matrix[2 + 2 * 4] = (z_far + z_near) / (z_near - z_far); + m.matrix[2 + 3 * 4] = (2.0f * z_far * z_near) / (z_near - z_far); + m.matrix[3 + 2 * 4] = -1.0f; + return m; +} + +TriMatrix TriMatrix::frustum(float left, float right, float bottom, float top, float near, float far) +{ + float a = (right + left) / (right - left); + float b = (top + bottom) / (top - bottom); + float c = -(far + near) / (far - near); + float d = -(2.0f * far) / (far - near); + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = 2.0f * near / (right - left); + m.matrix[1 + 1 * 4] = 2.0f * near / (top - bottom); + m.matrix[0 + 2 * 4] = a; + m.matrix[1 + 2 * 4] = b; + m.matrix[2 + 2 * 4] = c; + m.matrix[2 + 3 * 4] = d; + m.matrix[3 + 2 * 4] = -1; + return m; +} + +TriMatrix TriMatrix::worldToView() +{ + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = (float)ViewSin; + m.matrix[0 + 1 * 4] = (float)-ViewCos; + m.matrix[1 + 2 * 4] = 1.0f; + m.matrix[2 + 0 * 4] = (float)-ViewCos; + m.matrix[2 + 1 * 4] = (float)-ViewSin; + m.matrix[3 + 3 * 4] = 1.0f; + return m * translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); +} + +TriMatrix TriMatrix::viewToClip() +{ + float near = 5.0f; + float far = 65536.0f; + float width = (float)(FocalTangent * near); + float top = (float)(CenterY / InvZtoScale * near); + float bottom = (float)(top - viewheight / InvZtoScale * near); + return frustum(-width, width, bottom, top, near, far); +} + +TriMatrix TriMatrix::operator*(const TriMatrix &mult) const +{ + TriMatrix result; + for (int x = 0; x < 4; x++) + { + for (int y = 0; y < 4; y++) + { + result.matrix[x + y * 4] = + matrix[0 * 4 + x] * mult.matrix[y * 4 + 0] + + matrix[1 * 4 + x] * mult.matrix[y * 4 + 1] + + matrix[2 * 4 + x] * mult.matrix[y * 4 + 2] + + matrix[3 * 4 + x] * mult.matrix[y * 4 + 3]; + } + } + return result; +} + +TriVertex TriMatrix::operator*(TriVertex v) const +{ + float vx = matrix[0 * 4 + 0] * v.x + matrix[1 * 4 + 0] * v.y + matrix[2 * 4 + 0] * v.z + matrix[3 * 4 + 0] * v.w; + float vy = matrix[0 * 4 + 1] * v.x + matrix[1 * 4 + 1] * v.y + matrix[2 * 4 + 1] * v.z + matrix[3 * 4 + 1] * v.w; + float vz = matrix[0 * 4 + 2] * v.x + matrix[1 * 4 + 2] * v.y + matrix[2 * 4 + 2] * v.z + matrix[3 * 4 + 2] * v.w; + float vw = matrix[0 * 4 + 3] * v.x + matrix[1 * 4 + 3] * v.y + matrix[2 * 4 + 3] * v.z + matrix[3 * 4 + 3] * v.w; + v.x = vx; + v.y = vy; + v.z = vz; + v.w = vw; + return v; +} diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index f0797460fd..db6719de90 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -22,15 +22,27 @@ #pragma once -#include "r_triangle.h" +#include "r_draw.h" +#include "r_thread.h" +#include "r_compiler/llvmdrawers.h" #include "r_data/r_translate.h" +class FTexture; + +enum class TriangleDrawMode +{ + Normal, + Fan, + Strip +}; + struct TriDrawTriangleArgs; class PolyDrawArgs { public: TriUniforms uniforms; + const TriMatrix *objectToClip = nullptr; const TriVertex *vinput = nullptr; int vcount = 0; TriangleDrawMode mode = TriangleDrawMode::Normal; @@ -83,7 +95,7 @@ public: static void draw(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode); private: - static TriVertex shade_vertex(const TriUniforms &uniforms, TriVertex v); + static TriVertex shade_vertex(const TriMatrix &objectToClip, TriVertex v); static void draw_arrays(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode, WorkerThreadData *thread); static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index e8bad52a77..f0eb33c32f 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -176,13 +176,13 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) } TriUniforms uniforms; - uniforms.objectToClip = worldToClip; uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); uniforms.flags = 0; uniforms.subsectorDepth = SubsectorDepth; PolyDrawArgs args; args.uniforms = uniforms; + args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = 4; args.mode = TriangleDrawMode::Fan; diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp index ccebe5f0e1..cd331dcb52 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/r_poly_wallsprite.cpp @@ -99,7 +99,6 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, s bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); TriUniforms uniforms; - uniforms.objectToClip = worldToClip; if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) { uniforms.light = 256; @@ -114,6 +113,7 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, s PolyDrawArgs args; args.uniforms = uniforms; + args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = 4; args.mode = TriangleDrawMode::Fan; diff --git a/src/r_poly_wallsprite.h b/src/r_poly_wallsprite.h index dd4ba28dac..66a92b0333 100644 --- a/src/r_poly_wallsprite.h +++ b/src/r_poly_wallsprite.h @@ -22,7 +22,7 @@ #pragma once -#include "r_triangle.h" +#include "r_poly_triangle.h" class RenderPolyWallSprite { diff --git a/src/r_triangle.cpp b/src/r_triangle.cpp deleted file mode 100644 index 13a8ca292a..0000000000 --- a/src/r_triangle.cpp +++ /dev/null @@ -1,1338 +0,0 @@ -/* -** Triangle drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include -#include "templates.h" -#include "doomdef.h" -#include "i_system.h" -#include "w_wad.h" -#include "r_local.h" -#include "v_video.h" -#include "doomstat.h" -#include "st_stuff.h" -#include "g_game.h" -#include "g_level.h" -#include "r_data/r_translate.h" -#include "v_palette.h" -#include "r_data/colormaps.h" -#include "r_triangle.h" - -void TriangleDrawer::draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture) -{ - if (r_swtruecolor) - queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, (const uint8_t*)texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight(), 0); - else - draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, texture->GetPixels(), texture->GetWidth(), texture->GetHeight(), 0, nullptr, &ScreenTriangleDrawer::draw); -} - -void TriangleDrawer::fill(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor) -{ - if (r_swtruecolor) - queue_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor); - else - draw_arrays(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, cliptop, clipbottom, nullptr, 0, 0, solidcolor, nullptr, &ScreenTriangleDrawer::fill); -} - -void TriangleDrawer::queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) -{ - if (clipright < clipleft || clipleft < 0 || clipright > MAXWIDTH) - return; - - int cliplength = clipright - clipleft + 1; - short *clipdata = (short*)DrawerCommandQueue::AllocMemory(cliplength * 2 * sizeof(short)); - if (!clipdata) - { - DrawerCommandQueue::WaitForWorkers(); - clipdata = (short*)DrawerCommandQueue::AllocMemory(cliplength * 2 * sizeof(short)); - if (!clipdata) - return; - } - - for (int i = 0; i < cliplength; i++) - clipdata[i] = cliptop[clipleft + i]; - for (int i = 0; i < cliplength; i++) - clipdata[cliplength + i] = clipbottom[clipleft + i]; - - DrawerCommandQueue::QueueCommand(uniforms, vinput, vcount, mode, ccw, clipleft, clipright, clipdata, texturePixels, textureWidth, textureHeight, solidcolor); -} - -void TriangleDrawer::draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, WorkerThreadData *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, WorkerThreadData *)) -{ - if (vcount < 3) - return; - - ScreenTriangleDrawerArgs args; - args.dest = dc_destorg; - args.pitch = dc_pitch; - args.clipleft = clipleft; - args.clipright = clipright; - args.cliptop = cliptop; - args.clipbottom = clipbottom; - args.texturePixels = texturePixels; - args.textureWidth = textureWidth; - args.textureHeight = textureHeight; - args.solidcolor = solidcolor; - args.uniforms = &uniforms; - - TriVertex vert[3]; - if (mode == TriangleDrawMode::Normal) - { - for (int i = 0; i < vcount / 3; i++) - { - for (int j = 0; j < 3; j++) - vert[j] = shade_vertex(uniforms, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); - } - } - else if (mode == TriangleDrawMode::Fan) - { - vert[0] = shade_vertex(uniforms, *(vinput++)); - vert[1] = shade_vertex(uniforms, *(vinput++)); - for (int i = 2; i < vcount; i++) - { - vert[2] = shade_vertex(uniforms, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); - vert[1] = vert[2]; - } - } - else // TriangleDrawMode::Strip - { - vert[0] = shade_vertex(uniforms, *(vinput++)); - vert[1] = shade_vertex(uniforms, *(vinput++)); - for (int i = 2; i < vcount; i++) - { - vert[2] = shade_vertex(uniforms, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); - vert[0] = vert[1]; - vert[1] = vert[2]; - ccw = !ccw; - } - } -} - -TriVertex TriangleDrawer::shade_vertex(const TriUniforms &uniforms, TriVertex v) -{ - // Apply transform to get clip coordinates: - return uniforms.objectToClip * v; -} - -void TriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, ScreenTriangleDrawerArgs *args, WorkerThreadData *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, WorkerThreadData *)) -{ - // Cull, clip and generate additional vertices as needed - TriVertex clippedvert[max_additional_vertices]; - int numclipvert; - clipedge(vert, clippedvert, numclipvert); - - // Map to 2D viewport: - for (int j = 0; j < numclipvert; j++) - { - auto &v = clippedvert[j]; - - // Calculate normalized device coordinates: - v.w = 1.0f / v.w; - v.x *= v.w; - v.y *= v.w; - v.z *= v.w; - - // Apply viewport scale to get screen coordinates: - v.x = viewwidth * (1.0f + v.x) * 0.5f; - v.y = viewheight * (1.0f - v.y) * 0.5f; - } - - // Draw screen triangles - if (ccw) - { - for (int i = numclipvert; i > 1; i--) - { - args->v1 = &clippedvert[numclipvert - 1]; - args->v2 = &clippedvert[i - 1]; - args->v3 = &clippedvert[i - 2]; - drawfunc(args, thread); - } - } - else - { - for (int i = 2; i < numclipvert; i++) - { - args->v1 = &clippedvert[0]; - args->v2 = &clippedvert[i - 1]; - args->v3 = &clippedvert[i]; - drawfunc(args, thread); - } - } -} - -bool TriangleDrawer::cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2) -{ - if (clipdistance1 < 0.0f && clipdistance2 < 0.0f) - return true; - - if (clipdistance1 < 0.0f) - t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), 0.0f); - else - t1 = 0.0f; - - if (clipdistance2 < 0.0f) - t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), 1.0f); - else - t2 = 1.0f; - - return false; -} - -void TriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert) -{ - // Clip and cull so that the following is true for all vertices: - // -v.w <= v.x <= v.w - // -v.w <= v.y <= v.w - // -v.w <= v.z <= v.w - - // use barycentric weights while clipping vertices - float weights[max_additional_vertices * 3 * 2]; - for (int i = 0; i < 3; i++) - { - weights[i * 3 + 0] = 0.0f; - weights[i * 3 + 1] = 0.0f; - weights[i * 3 + 2] = 0.0f; - weights[i * 3 + i] = 1.0f; - } - - // halfspace clip distances - float clipdistance[6 * 3]; - for (int i = 0; i < 3; i++) - { - const auto &v = verts[i]; - clipdistance[i * 6 + 0] = v.x + v.w; - clipdistance[i * 6 + 1] = v.w - v.x; - clipdistance[i * 6 + 2] = v.y + v.w; - clipdistance[i * 6 + 3] = v.w - v.y; - clipdistance[i * 6 + 4] = v.z + v.w; - clipdistance[i * 6 + 5] = v.w - v.z; - } - - // Clip against each halfspace - float *input = weights; - float *output = weights + max_additional_vertices * 3; - int inputverts = 3; - int outputverts = 0; - for (int p = 0; p < 6; p++) - { - // Clip each edge - outputverts = 0; - for (int i = 0; i < inputverts; i++) - { - int j = (i + 1) % inputverts; - float clipdistance1 = - clipdistance[0 * 6 + p] * input[i * 3 + 0] + - clipdistance[1 * 6 + p] * input[i * 3 + 1] + - clipdistance[2 * 6 + p] * input[i * 3 + 2]; - - float clipdistance2 = - clipdistance[0 * 6 + p] * input[j * 3 + 0] + - clipdistance[1 * 6 + p] * input[j * 3 + 1] + - clipdistance[2 * 6 + p] * input[j * 3 + 2]; - - float t1, t2; - if (!cullhalfspace(clipdistance1, clipdistance2, t1, t2) && outputverts + 1 < max_additional_vertices) - { - // add t1 vertex - for (int k = 0; k < 3; k++) - output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t1) + input[j * 3 + k] * t1; - outputverts++; - - if (t2 != 1.0f && t2 > t1) - { - // add t2 vertex - for (int k = 0; k < 3; k++) - output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t2) + input[j * 3 + k] * t2; - outputverts++; - } - } - } - std::swap(input, output); - std::swap(inputverts, outputverts); - if (inputverts == 0) - break; - } - - // Convert barycentric weights to actual vertices - numclipvert = inputverts; - for (int i = 0; i < numclipvert; i++) - { - auto &v = clippedvert[i]; - memset(&v, 0, sizeof(TriVertex)); - for (int w = 0; w < 3; w++) - { - float weight = input[i * 3 + w]; - v.x += verts[w].x * weight; - v.y += verts[w].y * weight; - v.z += verts[w].z * weight; - v.w += verts[w].w * weight; - for (int iv = 0; iv < TriVertex::NumVarying; iv++) - v.varying[iv] += verts[w].varying[iv] * weight; - } - } -} - -///////////////////////////////////////////////////////////////////////////// - -void ScreenTriangleDrawer::draw(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread) -{ - uint8_t *dest = args->dest; - int pitch = args->pitch; - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipleft = args->clipleft; - int clipright = args->clipright; - const short *cliptop = args->cliptop; - const short *clipbottom = args->clipbottom; - const uint8_t *texturePixels = args->texturePixels; - int textureWidth = args->textureWidth; - int textureHeight = args->textureHeight; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int clipymin = cliptop[clipleft]; - int clipymax = clipbottom[clipleft]; - for (int i = clipleft + 1; i <= clipright; i++) - { - clipymin = MIN(clipymin, (int)cliptop[i]); - clipymax = MAX(clipymax, (int)clipbottom[i]); - } - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax - 1); - if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - dest += miny * pitch; - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // Gradients - float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); - float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); - } - - // Loop through blocks - for (int y = miny; y < maxy; y += q) - { - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; - - // Check if block needs clipping - int clipcount = 0; - for (int ix = x; ix < x + q; ix++) - { - clipcount += (clipleft > ix) || (clipright < ix) || (cliptop[ix] > y) || (clipbottom[ix] <= y + q - 1); - } - - // Calculate varying variables for affine block - float offx0 = (x - minx) + 0.5f; - float offy0 = (y - miny) + 0.5f; - float offx1 = offx0 + q; - float offy1 = offy0 + q; - float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); - float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); - float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); - float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); - float varyingTL[TriVertex::NumVarying]; - float varyingTR[TriVertex::NumVarying]; - float varyingBL[TriVertex::NumVarying]; - float varyingBR[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; - varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; - varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); - varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); - } - - uint8_t *buffer = dest; - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) - { - for (int iy = 0; iy < q; iy++) - { - uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - float pos = varyingTL[i] + varyingBL[i] * iy; - float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); - - varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); - varyingStep[i] = (uint32_t)(step * 0x100000000LL); - } - - for (int ix = x; ix < x + q; ix++) - { - uint32_t ufrac = varying[0]; - uint32_t vfrac = varying[1]; - - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - - buffer[ix] = texturePixels[uvoffset]; - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - } - - buffer += pitch; - } - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - for (int iy = 0; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - float pos = varyingTL[i] + varyingBL[i] * iy; - float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); - - varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); - varyingStep[i] = (uint32_t)(step * 0x100000000LL); - } - - for (int ix = x; ix < x + q; ix++) - { - bool visible = ix >= clipleft && ix <= clipright && (cliptop[ix] <= y + iy) && (clipbottom[ix] > y + iy); - - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) - { - uint32_t ufrac = varying[0]; - uint32_t vfrac = varying[1]; - - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - - buffer[ix] = texturePixels[uvoffset]; - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - - buffer += pitch; - } - } - } - - dest += q * pitch; - } -} - -void ScreenTriangleDrawer::fill(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread) -{ - uint8_t *dest = args->dest; - int pitch = args->pitch; - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipleft = args->clipleft; - int clipright = args->clipright; - const short *cliptop = args->cliptop; - const short *clipbottom = args->clipbottom; - int solidcolor = args->solidcolor; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int clipymin = cliptop[clipleft]; - int clipymax = clipbottom[clipleft]; - for (int i = clipleft + 1; i <= clipright; i++) - { - clipymin = MIN(clipymin, (int)cliptop[i]); - clipymax = MAX(clipymax, (int)clipbottom[i]); - } - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax - 1); - if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - dest += miny * pitch; - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // Loop through blocks - for (int y = miny; y < maxy; y += q) - { - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; - - // Check if block needs clipping - int clipcount = 0; - for (int ix = x; ix < x + q; ix++) - { - clipcount += (clipleft > ix) || (clipright < ix) || (cliptop[ix] > y) || (clipbottom[ix] <= y + q - 1); - } - - uint8_t *buffer = dest; - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) - { - for (int iy = 0; iy < q; iy++) - { - for (int ix = x; ix < x + q; ix++) - { - buffer[ix] = solidcolor; - } - - buffer += pitch; - } - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - for (int iy = 0; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = x; ix < x + q; ix++) - { - bool visible = ix >= clipleft && ix <= clipright && (cliptop[ix] <= y + iy) && (clipbottom[ix] > y + iy); - - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) - { - buffer[ix] = solidcolor; - } - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - - buffer += pitch; - } - } - } - - dest += q * pitch; - } -} - -void ScreenTriangleDrawer::draw32(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread) -{ - uint32_t *dest = (uint32_t *)args->dest; - int pitch = args->pitch; - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipleft = args->clipleft; - int clipright = args->clipright; - const short *cliptop = args->cliptop; - const short *clipbottom = args->clipbottom; - const uint32_t *texturePixels = (const uint32_t *)args->texturePixels; - int textureWidth = args->textureWidth; - int textureHeight = args->textureHeight; - uint32_t light = args->uniforms->light; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int clipymin = cliptop[clipleft]; - int clipymax = clipbottom[clipleft]; - for (int i = clipleft + 1; i <= clipright; i++) - { - clipymin = MIN(clipymin, (int)cliptop[i]); - clipymax = MAX(clipymax, (int)clipbottom[i]); - } - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax - 1); - if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - dest += miny * pitch; - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // Gradients - float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); - float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); - } - - // Loop through blocks - for (int y = miny; y < maxy; y += q) - { - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; - - // Check if block needs clipping - int clipcount = 0; - for (int ix = x; ix < x + q; ix++) - { - clipcount += (clipleft > ix) || (clipright < ix) || (cliptop[ix] > y) || (clipbottom[ix] <= y + q - 1); - } - - // Calculate varying variables for affine block - float offx0 = (x - minx) + 0.5f; - float offy0 = (y - miny) + 0.5f; - float offx1 = offx0 + q; - float offy1 = offy0 + q; - float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); - float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); - float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); - float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); - float varyingTL[TriVertex::NumVarying]; - float varyingTR[TriVertex::NumVarying]; - float varyingBL[TriVertex::NumVarying]; - float varyingBR[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; - varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; - varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); - varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); - } - - uint32_t *buffer = dest; - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) - { - for (int iy = 0; iy < q; iy++) - { - uint32_t varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - float pos = varyingTL[i] + varyingBL[i] * iy; - float step = (varyingTR[i] + varyingBR[i] * iy - pos) * (1.0f / q); - - varying[i] = (uint32_t)((pos - floor(pos)) * 0x100000000LL); - varyingStep[i] = (uint32_t)(step * 0x100000000LL); - } - - if ((y + iy) % thread->num_cores == thread->core) - { - for (int ix = x; ix < x + q; ix++) - { - uint32_t ufrac = varying[0]; - uint32_t vfrac = varying[1]; - - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - - uint32_t fg = texturePixels[uvoffset]; - uint32_t fg_red = (RPART(fg) * light) >> 8; - uint32_t fg_green = (GPART(fg) * light) >> 8; - uint32_t fg_blue = (BPART(fg) * light) >> 8; - uint32_t fg_alpha = APART(fg); - - if (fg_alpha > 127) - buffer[ix] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - } - } - - buffer += pitch; - } - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - for (int iy = 0; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - { - varying[i] = varyingTL[i] + varyingBL[i] * iy; - varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); - } - - if ((y + iy) % thread->num_cores == thread->core) - { - for (int ix = x; ix < x + q; ix++) - { - bool visible = ix >= clipleft && ix <= clipright && (cliptop[ix] <= y + iy) && (clipbottom[ix] > y + iy); - - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) - { - uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); - uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); - - uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; - uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; - uint32_t uvoffset = upos * textureHeight + vpos; - - uint32_t fg = texturePixels[uvoffset]; - uint32_t fg_red = (RPART(fg) * light) >> 8; - uint32_t fg_green = (GPART(fg) * light) >> 8; - uint32_t fg_blue = (BPART(fg) * light) >> 8; - uint32_t fg_alpha = APART(fg); - - if (fg_alpha > 127) - buffer[ix] = 0xff000000 | (fg_red << 16) | (fg_green << 8) | fg_blue; - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] += varyingStep[i]; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - - buffer += pitch; - } - } - } - - dest += q * pitch; - } -} - -void ScreenTriangleDrawer::fill32(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread) -{ - uint32_t *dest = (uint32_t *)args->dest; - int pitch = args->pitch; - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipleft = args->clipleft; - int clipright = args->clipright; - const short *cliptop = args->cliptop; - const short *clipbottom = args->clipbottom; - int solidcolor = args->solidcolor; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int clipymin = cliptop[clipleft]; - int clipymax = clipbottom[clipleft]; - for (int i = clipleft + 1; i <= clipright; i++) - { - clipymin = MIN(clipymin, (int)cliptop[i]); - clipymax = MAX(clipymax, (int)clipbottom[i]); - } - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax - 1); - if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - dest += miny * pitch; - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // Loop through blocks - for (int y = miny; y < maxy; y += q) - { - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; - - // Check if block needs clipping - int clipcount = 0; - for (int ix = x; ix < x + q; ix++) - { - clipcount += (clipleft > ix) || (clipright < ix) || (cliptop[ix] > y) || (clipbottom[ix] <= y + q - 1); - } - - uint32_t *buffer = dest; - - // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) - { - for (int iy = 0; iy < q; iy++) - { - if ((y + iy) % thread->num_cores == thread->core) - { - for (int ix = x; ix < x + q; ix++) - { - buffer[ix] = solidcolor; - } - } - - buffer += pitch; - } - } - else // Partially covered block - { - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - for (int iy = 0; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - if ((y + iy) % thread->num_cores == thread->core) - { - for (int ix = x; ix < x + q; ix++) - { - bool visible = ix >= clipleft && ix <= clipright && (cliptop[ix] <= y + iy) && (clipbottom[ix] > y + iy); - - if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) - { - buffer[ix] = solidcolor; - } - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - - buffer += pitch; - } - } - } - - dest += q * pitch; - } -} - -float ScreenTriangleDrawer::gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) -{ - float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); - float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); - return top / bottom; -} - -float ScreenTriangleDrawer::grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) -{ - float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); - float bottom = -((x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2)); - return top / bottom; -} - -///////////////////////////////////////////////////////////////////////////// - -TriMatrix TriMatrix::null() -{ - TriMatrix m; - memset(m.matrix, 0, sizeof(m.matrix)); - return m; -} - -TriMatrix TriMatrix::identity() -{ - TriMatrix m = null(); - m.matrix[0] = 1.0f; - m.matrix[5] = 1.0f; - m.matrix[10] = 1.0f; - m.matrix[15] = 1.0f; - return m; -} - -TriMatrix TriMatrix::translate(float x, float y, float z) -{ - TriMatrix m = identity(); - m.matrix[0 + 3 * 4] = x; - m.matrix[1 + 3 * 4] = y; - m.matrix[2 + 3 * 4] = z; - return m; -} - -TriMatrix TriMatrix::scale(float x, float y, float z) -{ - TriMatrix m = null(); - m.matrix[0 + 0 * 4] = x; - m.matrix[1 + 1 * 4] = y; - m.matrix[2 + 2 * 4] = z; - m.matrix[3 + 3 * 4] = 1; - return m; -} - -TriMatrix TriMatrix::rotate(float angle, float x, float y, float z) -{ - float c = cosf(angle); - float s = sinf(angle); - TriMatrix m = null(); - m.matrix[0 + 0 * 4] = (x*x*(1.0f - c) + c); - m.matrix[0 + 1 * 4] = (x*y*(1.0f - c) - z*s); - m.matrix[0 + 2 * 4] = (x*z*(1.0f - c) + y*s); - m.matrix[1 + 0 * 4] = (y*x*(1.0f - c) + z*s); - m.matrix[1 + 1 * 4] = (y*y*(1.0f - c) + c); - m.matrix[1 + 2 * 4] = (y*z*(1.0f - c) - x*s); - m.matrix[2 + 0 * 4] = (x*z*(1.0f - c) - y*s); - m.matrix[2 + 1 * 4] = (y*z*(1.0f - c) + x*s); - m.matrix[2 + 2 * 4] = (z*z*(1.0f - c) + c); - m.matrix[3 + 3 * 4] = 1.0f; - return m; -} - -TriMatrix TriMatrix::swapYZ() -{ - TriMatrix m = null(); - m.matrix[0 + 0 * 4] = 1.0f; - m.matrix[1 + 2 * 4] = 1.0f; - m.matrix[2 + 1 * 4] = -1.0f; - m.matrix[3 + 3 * 4] = 1.0f; - return m; -} - -TriMatrix TriMatrix::perspective(float fovy, float aspect, float z_near, float z_far) -{ - float f = (float)(1.0 / tan(fovy * M_PI / 360.0)); - TriMatrix m = null(); - m.matrix[0 + 0 * 4] = f / aspect; - m.matrix[1 + 1 * 4] = f; - m.matrix[2 + 2 * 4] = (z_far + z_near) / (z_near - z_far); - m.matrix[2 + 3 * 4] = (2.0f * z_far * z_near) / (z_near - z_far); - m.matrix[3 + 2 * 4] = -1.0f; - return m; -} - -TriMatrix TriMatrix::frustum(float left, float right, float bottom, float top, float near, float far) -{ - float a = (right + left) / (right - left); - float b = (top + bottom) / (top - bottom); - float c = -(far + near) / (far - near); - float d = -(2.0f * far) / (far - near); - TriMatrix m = null(); - m.matrix[0 + 0 * 4] = 2.0f * near / (right - left); - m.matrix[1 + 1 * 4] = 2.0f * near / (top - bottom); - m.matrix[0 + 2 * 4] = a; - m.matrix[1 + 2 * 4] = b; - m.matrix[2 + 2 * 4] = c; - m.matrix[2 + 3 * 4] = d; - m.matrix[3 + 2 * 4] = -1; - return m; -} - -TriMatrix TriMatrix::worldToView() -{ - TriMatrix m = null(); - m.matrix[0 + 0 * 4] = (float)ViewSin; - m.matrix[0 + 1 * 4] = (float)-ViewCos; - m.matrix[1 + 2 * 4] = 1.0f; - m.matrix[2 + 0 * 4] = (float)-ViewCos; - m.matrix[2 + 1 * 4] = (float)-ViewSin; - m.matrix[3 + 3 * 4] = 1.0f; - return m * translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); -} - -TriMatrix TriMatrix::viewToClip() -{ - float near = 5.0f; - float far = 65536.0f; - float width = (float)(FocalTangent * near); - float top = (float)(CenterY / InvZtoScale * near); - float bottom = (float)(top - viewheight / InvZtoScale * near); - return frustum(-width, width, bottom, top, near, far); -} - -TriMatrix TriMatrix::operator*(const TriMatrix &mult) const -{ - TriMatrix result; - for (int x = 0; x < 4; x++) - { - for (int y = 0; y < 4; y++) - { - result.matrix[x + y * 4] = - matrix[0 * 4 + x] * mult.matrix[y * 4 + 0] + - matrix[1 * 4 + x] * mult.matrix[y * 4 + 1] + - matrix[2 * 4 + x] * mult.matrix[y * 4 + 2] + - matrix[3 * 4 + x] * mult.matrix[y * 4 + 3]; - } - } - return result; -} - -TriVertex TriMatrix::operator*(TriVertex v) const -{ - float vx = matrix[0 * 4 + 0] * v.x + matrix[1 * 4 + 0] * v.y + matrix[2 * 4 + 0] * v.z + matrix[3 * 4 + 0] * v.w; - float vy = matrix[0 * 4 + 1] * v.x + matrix[1 * 4 + 1] * v.y + matrix[2 * 4 + 1] * v.z + matrix[3 * 4 + 1] * v.w; - float vz = matrix[0 * 4 + 2] * v.x + matrix[1 * 4 + 2] * v.y + matrix[2 * 4 + 2] * v.z + matrix[3 * 4 + 2] * v.w; - float vw = matrix[0 * 4 + 3] * v.x + matrix[1 * 4 + 3] * v.y + matrix[2 * 4 + 3] * v.z + matrix[3 * 4 + 3] * v.w; - v.x = vx; - v.y = vy; - v.z = vz; - v.w = vw; - return v; -} - -///////////////////////////////////////////////////////////////////////////// - -DrawTrianglesCommand::DrawTrianglesCommand(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *clipdata, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor) - : uniforms(uniforms), vinput(vinput), vcount(vcount), mode(mode), ccw(ccw), clipleft(clipleft), clipright(clipright), clipdata(clipdata), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight), solidcolor(solidcolor) -{ -} - -void DrawTrianglesCommand::Execute(DrawerThread *thread) -{ - int cliplength = clipright - clipleft + 1; - for (int i = 0; i < cliplength; i++) - { - thread->triangle_clip_top[clipleft + i] = clipdata[i]; - thread->triangle_clip_bottom[clipleft + i] = clipdata[cliplength + i]; - } - - WorkerThreadData thread_data; - thread_data.core = thread->core; - thread_data.num_cores = thread->num_cores; - thread_data.pass_start_y = thread->pass_start_y; - thread_data.pass_end_y = thread->pass_end_y; - thread_data.temp = thread->dc_temp_rgba; - - TriangleDrawer::draw_arrays( - uniforms, vinput, vcount, mode, ccw, - clipleft, clipright, thread->triangle_clip_top, thread->triangle_clip_bottom, - texturePixels, textureWidth, textureHeight, solidcolor, - &thread_data, texturePixels ? ScreenTriangleDrawer::draw32 : ScreenTriangleDrawer::fill32); -} - -FString DrawTrianglesCommand::DebugInfo() -{ - return "DrawTriangles"; -} diff --git a/src/r_triangle.h b/src/r_triangle.h deleted file mode 100644 index 6ec413d3f2..0000000000 --- a/src/r_triangle.h +++ /dev/null @@ -1,116 +0,0 @@ -/* -** Triangle drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - - -#ifndef __R_TRIANGLE__ -#define __R_TRIANGLE__ - -#include "r_draw.h" -#include "r_thread.h" -#include "r_compiler/llvmdrawers.h" - -class FTexture; -struct ScreenTriangleDrawerArgs; - -enum class TriangleDrawMode -{ - Normal, - Fan, - Strip -}; - -class TriangleDrawer -{ -public: - static void draw(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture); - static void fill(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, int solidcolor); - -private: - static TriVertex shade_vertex(const TriUniforms &uniforms, TriVertex v); - static void draw_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor, WorkerThreadData *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, WorkerThreadData *)); - static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, ScreenTriangleDrawerArgs *args, WorkerThreadData *thread, void(*drawfunc)(const ScreenTriangleDrawerArgs *, WorkerThreadData *)); - static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); - static void clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert); - - static void queue_arrays(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *cliptop, const short *clipbottom, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); - - enum { max_additional_vertices = 16 }; - - friend class DrawTrianglesCommand; -}; - -struct ScreenTriangleDrawerArgs -{ - uint8_t *dest; - int pitch; - TriVertex *v1; - TriVertex *v2; - TriVertex *v3; - int clipleft; - int clipright; - const short *cliptop; - const short *clipbottom; - const uint8_t *texturePixels; - int textureWidth; - int textureHeight; - int solidcolor; - const TriUniforms *uniforms; -}; - -class ScreenTriangleDrawer -{ -public: - static void draw(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread); - static void fill(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread); - - static void draw32(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread); - static void fill32(const ScreenTriangleDrawerArgs *args, WorkerThreadData *thread); - -private: - static float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); - static float grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); -}; - -class DrawTrianglesCommand : public DrawerCommand -{ -public: - DrawTrianglesCommand(const TriUniforms &uniforms, const TriVertex *vinput, int vcount, TriangleDrawMode mode, bool ccw, int clipleft, int clipright, const short *clipdata, const uint8_t *texturePixels, int textureWidth, int textureHeight, int solidcolor); - - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - -private: - const TriUniforms uniforms; - const TriVertex *vinput; - int vcount; - TriangleDrawMode mode; - bool ccw; - int clipleft; - int clipright; - const short *clipdata; - const uint8_t *texturePixels; - int textureWidth; - int textureHeight; - int solidcolor; -}; - -#endif From 6761e8639a07cca24ca88f66be7e1be5edd2f27c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 20 Nov 2016 04:06:21 +0100 Subject: [PATCH 352/912] Add palette support --- .../fixedfunction/drawtrianglecodegen.cpp | 46 +++++++++++++++---- .../fixedfunction/drawtrianglecodegen.h | 7 +++ src/r_compiler/llvmdrawers.cpp | 7 ++- src/r_compiler/llvmdrawers.h | 5 ++ src/r_poly.cpp | 9 ++-- src/r_poly_decal.cpp | 1 + src/r_poly_particle.cpp | 1 + src/r_poly_plane.cpp | 2 + src/r_poly_sky.cpp | 1 + src/r_poly_sprite.cpp | 1 + src/r_poly_triangle.cpp | 10 ++-- src/r_poly_triangle.h | 21 +++++++++ src/r_poly_wall.cpp | 1 + src/r_poly_wallsprite.cpp | 1 + 14 files changed, 94 insertions(+), 19 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index fbbca7517a..c59992d3b8 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -258,7 +258,18 @@ void DrawTriangleCodegen::LoopBlockX() SSAFloat shade = 64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f; SSAFloat lightscale = SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)); SSAInt diminishedlight = SSAInt(SSAFloat::clamp((1.0f - lightscale) * 256.0f + 0.5f, SSAFloat(0.0f), SSAFloat(256.0f)), false); - currentlight = is_fixed_light.select(light, diminishedlight); + + if (!truecolor) + { + SSAInt diminishedindex = SSAInt(lightscale * 32.0f, false); + SSAInt lightindex = SSAInt::MIN((256 - light) * 32 / 256, SSAInt(31)); + SSAInt colormapindex = is_fixed_light.select(lightindex, diminishedindex); + currentcolormap = Colormaps[colormapindex << 8]; + } + else + { + currentlight = is_fixed_light.select(light, diminishedlight); + } SetStencilBlock(x / 8 + y / 8 * stencilPitch); @@ -352,25 +363,32 @@ void DrawTriangleCodegen::LoopFullBlock() } else { - SSAVec4i pixels = buf.load_vec4ub(false); + SSAVec4i pixelsvec = buf.load_vec4ub(false); + SSAInt pixels[4] = + { + pixelsvec[0], + pixelsvec[1], + pixelsvec[2], + pixelsvec[3] + }; for (int sse = 0; sse < 4; sse++) { if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) { SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth; - pixels.insert(sse, subsectorTest.select(ProcessPixel8(pixels[sse], varying), pixels[sse])); + pixels[sse] = subsectorTest.select(ProcessPixel8(pixels[sse], varying), pixels[sse]); } else { - pixels.insert(sse, ProcessPixel8(pixels[sse], varying)); + pixels[sse] = ProcessPixel8(pixels[sse], varying); } for (int i = 0; i < TriVertex::NumVarying; i++) varying[i] = varying[i] + varyingStep[i]; } - buf.store_vec4ub(pixels); + buf.store_vec4ub(SSAVec4i(pixels[0], pixels[1], pixels[2], pixels[3])); } if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) @@ -580,12 +598,16 @@ SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) { - return color; + return currentcolormap[color].load(true).zext_int(); } else { - SSAUByte fg = texturePixels[uvoffset].load(true); - return fg.zext_int(); + SSAInt index = texturePixels[uvoffset].load(true).zext_int(); + SSAInt fg = currentcolormap[index].load(true).zext_int(); + if (blendmode != TriBlendMode::AlphaBlend) + return fg; + else + return (index == SSAInt(0)).select(bg, fg); } } @@ -659,6 +681,14 @@ void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) stencilTestValue = args[0][17].load(true); stencilWriteValue = args[0][18].load(true); subsectorGBuffer = args[0][19].load(true); + if (!truecolor) + { + Colormaps = args[0][20].load(true); + RGB32k = args[0][21].load(true); + Col2RGB8 = args[0][22].load(true); + Col2RGB8_LessPrecision = args[0][23].load(true); + Col2RGB8_Inverse = args[0][24].load(true); + } thread.core = thread_data[0][0].load(true); thread.num_cores = thread_data[0][1].load(true); diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.h b/src/r_compiler/fixedfunction/drawtrianglecodegen.h index e4c22d49cd..d2ff95e023 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.h +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.h @@ -107,6 +107,12 @@ private: SSAUByte stencilWriteValue; SSAIntPtr subsectorGBuffer; + SSAUBytePtr Colormaps; + SSAUBytePtr RGB32k; + SSAIntPtr Col2RGB8; + SSAIntPtr Col2RGB8_LessPrecision; + SSAIntPtr Col2RGB8_Inverse; + SSAWorkerThread thread; // Block size, standard 8x8 (must be power of two) @@ -126,6 +132,7 @@ private: SSAInt x, y; SSAInt x0, x1, y0, y1; SSAInt currentlight; + SSAUBytePtr currentcolormap; SSAInt varyingPos[TriVertex::NumVarying]; SSAInt varyingStepPos[TriVertex::NumVarying]; SSAInt varyingStartStepX[TriVertex::NumVarying]; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 913e5d9b78..869a24c2ce 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -127,7 +127,7 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - int version = 2; // Increment this number if the drawer codegen is modified (forces recreation of the module). + int version = 3; // Increment this number if the drawer codegen is modified (forces recreation of the module). std::string targetCPU = mProgram.GetTargetCPU(); bool loaded = mProgram.LoadCachedModule(version, targetCPU); if (!loaded) @@ -582,6 +582,11 @@ llvm::Type *LLVMDrawersImpl::GetTriDrawTriangleArgs(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilTestValue; elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilWriteValue; elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *subsectorGBuffer; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *colormaps; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB32k; + elements.push_back(llvm::Type::getInt32PtrTy(context)); // const uint32_t *Col2RGB8; + elements.push_back(llvm::Type::getInt32PtrTy(context)); // const uint32_t *Col2RGB8_LessPrecision; + elements.push_back(llvm::Type::getInt32PtrTy(context)); // const uint32_t *Col2RGB8_Inverse; return llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); } diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 1db2f5b2a5..873f98c73c 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -258,6 +258,11 @@ struct TriDrawTriangleArgs uint8_t stencilTestValue; uint8_t stencilWriteValue; uint32_t *subsectorGBuffer; + const uint8_t *colormaps; + const uint8_t *RGB32k; + const uint32_t *Col2RGB8; + const uint32_t *Col2RGB8_LessPrecision; + const uint32_t *Col2RGB8_Inverse; }; enum class TriDrawVariant diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 4dc90027d0..680d77c523 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -36,9 +36,6 @@ void InitGLRMapinfoData(); void RenderPolyScene::Render() { - if (!r_swtruecolor) // Disable pal rendering for now - return; - ClearBuffers(); SetSceneViewport(); SetupPerspectiveMatrix(); @@ -63,8 +60,8 @@ void RenderPolyScene::ClearBuffers() SectorSpriteRanges.resize(numsectors); SortedSprites.clear(); TranslucentObjects.clear(); - PolyStencilBuffer::Instance()->Clear(screen->GetWidth(), screen->GetHeight(), 0); - PolySubsectorGBuffer::Instance()->Resize(screen->GetPitch(), screen->GetHeight()); + PolyStencilBuffer::Instance()->Clear(RenderTarget->GetWidth(), RenderTarget->GetHeight(), 0); + PolySubsectorGBuffer::Instance()->Resize(RenderTarget->GetPitch(), RenderTarget->GetHeight()); NextSubsectorDepth = 0; } @@ -77,7 +74,7 @@ void RenderPolyScene::SetSceneViewport() height = (screenblocks*SCREENHEIGHT / 10) & ~7; int bottom = SCREENHEIGHT - (height + viewwindowy - ((height - viewheight) / 2)); - PolyTriangleDrawer::set_viewport(viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, screen); + PolyTriangleDrawer::set_viewport(viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, RenderTarget); } void RenderPolyScene::SetupPerspectiveMatrix() diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index 52caae7e2e..fa77b25a45 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -152,6 +152,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); + args.SetColormap(line->frontsector->ColorMap); //mode = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); } diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp index 8448c8f32b..619b3e146c 100644 --- a/src/r_poly_particle.cpp +++ b/src/r_poly_particle.cpp @@ -94,5 +94,6 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *partic args.ccw = true; args.stenciltestvalue = 0; args.stencilwritevalue = 1; + args.SetColormap(sub->sector->ColorMap); PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::AlphaBlend); } diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index a68a4b5689..a64dfaaf57 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -140,6 +140,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *s args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); + args.SetColormap(sub->sector->ColorMap); PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); } @@ -231,6 +232,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin args.ccw = ccw; args.stenciltestvalue = 0; args.stencilwritevalue = 1; + args.SetColormap(frontsector->ColorMap); if (!isSky) { diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index a60b3f460e..a8e7b23d6e 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -63,6 +63,7 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) args.stenciltestvalue = 255; args.stencilwritevalue = 1; args.SetTexture(frontskytex); + args.SetColormap(&NormalLight); RenderCapColorRow(args, frontskytex, 0, false); RenderCapColorRow(args, frontskytex, rc, true); diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 84ff8e2213..737707c74a 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -136,6 +136,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex, thing->Translation); + args.SetColormap(sub->sector->ColorMap); if (args.translation) PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAlphaBlend); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 5574251e61..a00ea90eb3 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -70,10 +70,7 @@ void PolyTriangleDrawer::set_viewport(int x, int y, int width, int height, DCanv void PolyTriangleDrawer::draw(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode) { - if (dest_bgra) - DrawerCommandQueue::QueueCommand(args, variant, blendmode); - else - draw_arrays(args, variant, blendmode, nullptr); + DrawerCommandQueue::QueueCommand(args, variant, blendmode); } void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVariant variant, TriBlendMode blendmode, WorkerThreadData *thread) @@ -113,6 +110,11 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian args.stencilValues = PolyStencilBuffer::Instance()->Values(); args.stencilMasks = PolyStencilBuffer::Instance()->Masks(); args.subsectorGBuffer = PolySubsectorGBuffer::Instance()->Values(); + args.colormaps = drawargs.colormaps; + args.RGB32k = RGB32k.All; + args.Col2RGB8 = (const uint32_t*)Col2RGB8; + args.Col2RGB8_Inverse = (const uint32_t*)Col2RGB8_Inverse; + args.Col2RGB8_LessPrecision = (const uint32_t*)Col2RGB8_LessPrecision; bool ccw = drawargs.ccw; const TriVertex *vinput = drawargs.vinput; diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index db6719de90..5736e2c9d2 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -26,6 +26,7 @@ #include "r_thread.h" #include "r_compiler/llvmdrawers.h" #include "r_data/r_translate.h" +#include "r_data/colormaps.h" class FTexture; @@ -53,6 +54,7 @@ public: const uint8_t *translation = nullptr; uint8_t stenciltestvalue = 0; uint8_t stencilwritevalue = 0; + const uint8_t *colormaps = nullptr; void SetTexture(FTexture *texture) { @@ -86,6 +88,25 @@ public: SetTexture(texture); } + + void SetColormap(FSWColormap *base_colormap) + { + uniforms.light_red = base_colormap->Color.r * 256 / 255; + uniforms.light_green = base_colormap->Color.g * 256 / 255; + uniforms.light_blue = base_colormap->Color.b * 256 / 255; + uniforms.light_alpha = base_colormap->Color.a * 256 / 255; + uniforms.fade_red = base_colormap->Fade.r; + uniforms.fade_green = base_colormap->Fade.g; + uniforms.fade_blue = base_colormap->Fade.b; + uniforms.fade_alpha = base_colormap->Fade.a; + uniforms.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + bool simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); + if (simple_shade) + uniforms.flags |= TriUniforms::simple_shade; + else + uniforms.flags &= ~TriUniforms::simple_shade; + colormaps = base_colormap->Maps; + } }; class PolyTriangleDrawer diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index f0eb33c32f..92c669ebf8 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -190,6 +190,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); + args.SetColormap(Line->frontsector->ColorMap); if (!Masked) { diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp index cd331dcb52..6ed009c333 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/r_poly_wallsprite.cpp @@ -121,5 +121,6 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, s args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); + args.SetColormap(sub->sector->ColorMap); PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); } From bd9ec843dd8d8e9af66179b3a11de68f7e26bdad Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 20 Nov 2016 16:42:53 +0100 Subject: [PATCH 353/912] Add palette version of the blend modes --- .../fixedfunction/drawtrianglecodegen.cpp | 153 ++++++++++++++---- .../fixedfunction/drawtrianglecodegen.h | 13 +- src/r_compiler/llvmdrawers.cpp | 6 +- src/r_compiler/llvmdrawers.h | 4 +- src/r_poly_particle.cpp | 14 +- src/r_poly_triangle.cpp | 4 +- 6 files changed, 142 insertions(+), 52 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index c59992d3b8..da6933149d 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -505,7 +505,7 @@ void DrawTriangleCodegen::LoopPartialBlock() loopy.end_block(); } -SSAVec4i DrawTriangleCodegen::TranslateSample(SSAInt uvoffset) +SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt uvoffset) { if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) return translation[color * 4].load_vec4ub(true); @@ -513,7 +513,15 @@ SSAVec4i DrawTriangleCodegen::TranslateSample(SSAInt uvoffset) return translation[texturePixels[uvoffset].load(true).zext_int() * 4].load_vec4ub(true); } -SSAVec4i DrawTriangleCodegen::Sample(SSAInt uvoffset) +SSAInt DrawTriangleCodegen::TranslateSample8(SSAInt uvoffset) +{ + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return translation[color].load(true).zext_int(); + else + return translation[texturePixels[uvoffset].load(true).zext_int()].load(true).zext_int(); +} + +SSAVec4i DrawTriangleCodegen::Sample32(SSAInt uvoffset) { if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) return SSAVec4i::unpack(color); @@ -521,6 +529,19 @@ SSAVec4i DrawTriangleCodegen::Sample(SSAInt uvoffset) return texturePixels[uvoffset * 4].load_vec4ub(true); } +SSAInt DrawTriangleCodegen::Sample8(SSAInt uvoffset) +{ + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return color; + else + return texturePixels[uvoffset].load(true).zext_int(); +} + +SSAInt DrawTriangleCodegen::Shade8(SSAInt c) +{ + return currentcolormap[c].load(true).zext_int(); +} + SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) { SSAInt ufrac = varying[0]; @@ -538,48 +559,54 @@ SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) { default: case TriBlendMode::Copy: - fg = Sample(uvoffset); - output = blend_copy(shade_bgra_simple(fg, currentlight)); break; + fg = Sample32(uvoffset); + output = blend_copy(shade_bgra_simple(fg, currentlight)); + break; case TriBlendMode::AlphaBlend: - fg = Sample(uvoffset); - output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; + fg = Sample32(uvoffset); + output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); + break; case TriBlendMode::AddSolid: - fg = Sample(uvoffset); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); break; + fg = Sample32(uvoffset); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); + break; case TriBlendMode::Add: - fg = Sample(uvoffset); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + fg = Sample32(uvoffset); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; case TriBlendMode::Sub: - fg = Sample(uvoffset); - output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + fg = Sample32(uvoffset); + output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; case TriBlendMode::RevSub: - fg = Sample(uvoffset); - output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + fg = Sample32(uvoffset); + output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; case TriBlendMode::Shaded: - fg = Sample(uvoffset); + fg = Sample32(uvoffset); alpha = fg[0]; alpha = alpha + (alpha >> 7); // 255 -> 256 inv_alpha = 256 - alpha; output = blend_add(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), bg, alpha, inv_alpha); break; case TriBlendMode::TranslateCopy: - fg = TranslateSample(uvoffset); + fg = TranslateSample32(uvoffset); output = blend_copy(shade_bgra_simple(fg, currentlight)); break; case TriBlendMode::TranslateAlphaBlend: - fg = TranslateSample(uvoffset); - output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; + fg = TranslateSample32(uvoffset); + output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; case TriBlendMode::TranslateAdd: - fg = TranslateSample(uvoffset); + fg = TranslateSample32(uvoffset); output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::TranslateSub: - fg = TranslateSample(uvoffset); + fg = TranslateSample32(uvoffset); output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::TranslateRevSub: - fg = TranslateSample(uvoffset); + fg = TranslateSample32(uvoffset); output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; } @@ -587,6 +614,18 @@ SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) return output; } +SSAVec4i DrawTriangleCodegen::ToBgra(SSAInt index) +{ + SSAVec4i c = BaseColors[index * 4].load_vec4ub(true); + c = c.insert(3, 255); + return c; +} + +SSAInt DrawTriangleCodegen::ToPal8(SSAVec4i c) +{ + return RGB32k[((c[2] >> 3) * 32 + (c[1] >> 3)) * 32 + (c[0] >> 3)].load(true).zext_int(); +} + SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) { SSAInt ufrac = varying[0]; @@ -596,19 +635,65 @@ SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; SSAInt uvoffset = upos * textureHeight + vpos; - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + SSAVec4i fg; + SSAInt alpha, inv_alpha; + SSAInt output; + SSAInt palindex; + + switch (blendmode) { - return currentcolormap[color].load(true).zext_int(); - } - else - { - SSAInt index = texturePixels[uvoffset].load(true).zext_int(); - SSAInt fg = currentcolormap[index].load(true).zext_int(); - if (blendmode != TriBlendMode::AlphaBlend) - return fg; - else - return (index == SSAInt(0)).select(bg, fg); + default: + case TriBlendMode::Copy: + output = Shade8(Sample8(uvoffset)); + break; + case TriBlendMode::AlphaBlend: + palindex = Sample8(uvoffset); + output = (palindex == SSAInt(0)).select(bg, Shade8(palindex)); + break; + case TriBlendMode::AddSolid: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, destalpha)); + break; + case TriBlendMode::Add: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + break; + case TriBlendMode::Sub: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + break; + case TriBlendMode::RevSub: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + break; + case TriBlendMode::Shaded: + alpha = Sample8(uvoffset); + alpha = alpha + (alpha >> 7); // 255 -> 256 + inv_alpha = 256 - alpha; + output = ToPal8(blend_add(ToBgra(Shade8(color)), ToBgra(bg), alpha, inv_alpha)); + break; + case TriBlendMode::TranslateCopy: + output = Shade8(TranslateSample8(uvoffset)); + break; + case TriBlendMode::TranslateAlphaBlend: + palindex = TranslateSample8(uvoffset); + output = (palindex == SSAInt(0)).select(bg, Shade8(palindex)); + break; + case TriBlendMode::TranslateAdd: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + break; + case TriBlendMode::TranslateSub: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + break; + case TriBlendMode::TranslateRevSub: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + break; } + + return output; } void DrawTriangleCodegen::SetStencilBlock(SSAInt block) @@ -685,9 +770,7 @@ void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) { Colormaps = args[0][20].load(true); RGB32k = args[0][21].load(true); - Col2RGB8 = args[0][22].load(true); - Col2RGB8_LessPrecision = args[0][23].load(true); - Col2RGB8_Inverse = args[0][24].load(true); + BaseColors = args[0][22].load(true); } thread.core = thread_data[0][0].load(true); diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.h b/src/r_compiler/fixedfunction/drawtrianglecodegen.h index d2ff95e023..74358f99a0 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.h +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.h @@ -49,8 +49,13 @@ private: SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying); SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying); - SSAVec4i TranslateSample(SSAInt uvoffset); - SSAVec4i Sample(SSAInt uvoffset); + SSAVec4i TranslateSample32(SSAInt uvoffset); + SSAInt TranslateSample8(SSAInt uvoffset); + SSAVec4i Sample32(SSAInt uvoffset); + SSAInt Sample8(SSAInt uvoffset); + SSAInt Shade8(SSAInt c); + SSAVec4i ToBgra(SSAInt index); + SSAInt ToPal8(SSAVec4i c); void SetStencilBlock(SSAInt block); void StencilSet(SSAInt x, SSAInt y, SSAUByte value); @@ -109,9 +114,7 @@ private: SSAUBytePtr Colormaps; SSAUBytePtr RGB32k; - SSAIntPtr Col2RGB8; - SSAIntPtr Col2RGB8_LessPrecision; - SSAIntPtr Col2RGB8_Inverse; + SSAUBytePtr BaseColors; SSAWorkerThread thread; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 869a24c2ce..dea72a1acf 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -127,7 +127,7 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - int version = 3; // Increment this number if the drawer codegen is modified (forces recreation of the module). + int version = 4; // Increment this number if the drawer codegen is modified (forces recreation of the module). std::string targetCPU = mProgram.GetTargetCPU(); bool loaded = mProgram.LoadCachedModule(version, targetCPU); if (!loaded) @@ -584,9 +584,7 @@ llvm::Type *LLVMDrawersImpl::GetTriDrawTriangleArgs(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *subsectorGBuffer; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *colormaps; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB32k; - elements.push_back(llvm::Type::getInt32PtrTy(context)); // const uint32_t *Col2RGB8; - elements.push_back(llvm::Type::getInt32PtrTy(context)); // const uint32_t *Col2RGB8_LessPrecision; - elements.push_back(llvm::Type::getInt32PtrTy(context)); // const uint32_t *Col2RGB8_Inverse; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *BaseColors; return llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); } diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 873f98c73c..1cbafa11a6 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -260,9 +260,7 @@ struct TriDrawTriangleArgs uint32_t *subsectorGBuffer; const uint8_t *colormaps; const uint8_t *RGB32k; - const uint32_t *Col2RGB8; - const uint32_t *Col2RGB8_LessPrecision; - const uint32_t *Col2RGB8_Inverse; + const uint8_t *BaseColors; }; enum class TriDrawVariant diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp index 619b3e146c..9b1dbaf1cc 100644 --- a/src/r_poly_particle.cpp +++ b/src/r_poly_particle.cpp @@ -82,8 +82,18 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *partic uniforms.flags = 0; } uniforms.subsectorDepth = subsectorDepth; - uint32_t alpha = particle->trans; - uniforms.color = (alpha << 24) | (particle->color & 0xffffff); + + if (r_swtruecolor) + { + uint32_t alpha = particle->trans; + uniforms.color = (alpha << 24) | (particle->color & 0xffffff); + } + else + { + uniforms.color = ((uint32_t)particle->color) >> 24; + uniforms.srcalpha = particle->trans; + uniforms.destalpha = 255 - particle->trans; + } PolyDrawArgs args; args.uniforms = uniforms; diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index a00ea90eb3..2ba3336d14 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -112,9 +112,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian args.subsectorGBuffer = PolySubsectorGBuffer::Instance()->Values(); args.colormaps = drawargs.colormaps; args.RGB32k = RGB32k.All; - args.Col2RGB8 = (const uint32_t*)Col2RGB8; - args.Col2RGB8_Inverse = (const uint32_t*)Col2RGB8_Inverse; - args.Col2RGB8_LessPrecision = (const uint32_t*)Col2RGB8_LessPrecision; + args.BaseColors = (const uint8_t *)GPalette.BaseColors; bool ccw = drawargs.ccw; const TriVertex *vinput = drawargs.vinput; From ebd810d843330f30300201ec0a18276ea5f7af30 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 20 Nov 2016 16:43:13 +0100 Subject: [PATCH 354/912] Fix decal vertical position being wrong --- src/r_poly_decal.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index fa77b25a45..5035b4529e 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -118,7 +118,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co vertices[i].x = (float)p.X; vertices[i].y = (float)p.Y; - vertices[i].z = (float)(zpos + spriteHeight * offsets[i].second); + vertices[i].z = (float)(zpos + spriteHeight * offsets[i].second - spriteHeight * 0.5); vertices[i].w = 1.0f; vertices[i].varying[0] = (float)(offsets[i].first * tex->Scale.X); vertices[i].varying[1] = (float)((1.0f - offsets[i].second) * tex->Scale.Y); @@ -140,7 +140,15 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co uniforms.flags = 0; } uniforms.subsectorDepth = subsectorDepth; - uniforms.color = decal->AlphaColor; + + if (r_swtruecolor) + { + uniforms.color = 0xff000000 | decal->AlphaColor; + } + else + { + uniforms.color = ((uint32_t)decal->AlphaColor) >> 24; + } PolyDrawArgs args; args.uniforms = uniforms; @@ -152,7 +160,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co args.stenciltestvalue = 0; args.stencilwritevalue = 1; args.SetTexture(tex); - args.SetColormap(line->frontsector->ColorMap); + args.SetColormap(front->ColorMap); //mode = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); } From 4ed548a189b16676d1350e7429c1db05ac9b015f Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 20 Nov 2016 17:07:58 -0500 Subject: [PATCH 355/912] - Implemented some sprite blend modes. --- src/r_poly_sprite.cpp | 103 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 100 insertions(+), 3 deletions(-) diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 737707c74a..6de63bbdcd 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -28,6 +28,9 @@ #include "r_poly_sprite.h" #include "r_poly.h" +EXTERN_CVAR(Float, transsouls) +EXTERN_CVAR(Int, r_drawfuzz) + void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth) { if (IsThingCulled(thing)) @@ -138,10 +141,104 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse args.SetTexture(tex, thing->Translation); args.SetColormap(sub->sector->ColorMap); - if (args.translation) - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAlphaBlend); + if (thing->RenderStyle == LegacyRenderStyles[STYLE_Normal] || + (r_drawfuzz == 0 && thing->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy])) + { + args.uniforms.destalpha = 0; + args.uniforms.srcalpha = 256; + (args.translation) ? + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAdd) : + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); + } + else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Add]) + { + args.uniforms.destalpha = (uint32_t)(1.0 * 256); + args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); + (args.translation) ? + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAdd) : + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); + } + else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Subtract]) + { + args.uniforms.destalpha = (uint32_t)(1.0 * 256); + args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); + (args.translation) ? + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateSub) : + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Sub); + } + else if (thing->RenderStyle == LegacyRenderStyles[STYLE_SoulTrans]) + { + args.uniforms.destalpha = (uint32_t)(256 - transsouls * 256); + args.uniforms.srcalpha = (uint32_t)(transsouls * 256); + (args.translation) ? + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAdd) : + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); + } + else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Fuzzy] || + (r_drawfuzz == 2 && thing->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy])) + { // NYI - Fuzzy - for now, just a copy of "Shadow" + args.uniforms.destalpha = 160; + args.uniforms.srcalpha = 0; + (args.translation) ? + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAdd) : + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); + + } + else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Shadow] || + (r_drawfuzz == 1 && thing->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy])) + { + args.uniforms.destalpha = 160; + args.uniforms.srcalpha = 0; + (args.translation) ? + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAdd) : + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); + + } + else if (thing->RenderStyle == LegacyRenderStyles[STYLE_TranslucentStencil]) + { + // NYI + args.uniforms.destalpha = (uint32_t)(256 - thing->Alpha * 256); + args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); + args.uniforms.color = 0xff000000 | thing->fillcolor; + (args.translation) ? + PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::TranslateAdd) : + PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::Add); + } + else if (thing->RenderStyle == LegacyRenderStyles[STYLE_AddStencil]) + { + // NYI + args.uniforms.destalpha = 256; + args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); + args.uniforms.color = 0xff000000 | thing->fillcolor; + (args.translation) ? + PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::TranslateAdd) : + PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::Add); + } + else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Shaded]) + { + args.uniforms.destalpha = 256; + args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); + args.SetTexture(tex, false); + uniforms.flags |= TriUniforms::simple_shade; + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); + } + else if (thing->RenderStyle == LegacyRenderStyles[STYLE_AddShaded]) + { // NYI? + args.uniforms.destalpha = 256; + args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); + args.SetTexture(tex, false); + uniforms.flags |= TriUniforms::simple_shade; + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); + } else - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); + { + args.uniforms.destalpha = (uint32_t)(256 - thing->Alpha * 256); + args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); + (args.translation) ? + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAdd) : + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); + } + } bool RenderPolySprite::IsThingCulled(AActor *thing) From dadee080e4b6ed0363bc2a09d1eeec90b2caa903 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 20 Nov 2016 18:36:26 -0500 Subject: [PATCH 356/912] - Implemented Wall blend modes - fixed: set color for "shaded" modes - though it's still not yet properly implemented. --- src/r_poly_sprite.cpp | 2 ++ src/r_poly_wall.cpp | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 6de63bbdcd..e4738ab264 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -219,6 +219,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse args.uniforms.destalpha = 256; args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); args.SetTexture(tex, false); + args.uniforms.color = 0; uniforms.flags |= TriUniforms::simple_shade; PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); } @@ -227,6 +228,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse args.uniforms.destalpha = 256; args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); args.SetTexture(tex, false); + args.uniforms.color = 0; uniforms.flags |= TriUniforms::simple_shade; PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); } diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 92c669ebf8..2cdcb30192 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -199,7 +199,12 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) } else { - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); + args.uniforms.destalpha = (Line->linedef->flags & ML_ADDTRANS) ? 256 : (uint32_t)(256 - Line->linedef->alpha * 256); + args.uniforms.srcalpha = (uint32_t)(Line->linedef->alpha * 256); + if (args.uniforms.destalpha == 0 && args.uniforms.srcalpha == 256) + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); + else + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); } RenderPolyDecal::RenderWallDecals(worldToClip, Line, SubsectorDepth); From 29005b5ce6b44436d8840d55790452a9609d918d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 21 Nov 2016 01:40:08 +0100 Subject: [PATCH 357/912] Compile fix for older versions of LLVM --- src/r_compiler/llvmdrawers.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index dea72a1acf..82cac4a184 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -644,7 +644,11 @@ bool LLVMProgram::LoadCachedModule(int version, std::string targetCPU) if (!result) return false; +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + mModule.reset(result.get()); +#else mModule = std::move(result.get()); +#endif return true; } From 144c7f413928e33447f24cc146b839429dab6d09 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 21 Nov 2016 01:56:54 +0100 Subject: [PATCH 358/912] Flip sub and revsub TC drawers in the old renderer --- src/r_draw.cpp | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index bd53889a1f..d04b1dcf6c 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2521,6 +2521,9 @@ void R_InitColumnDrawers () dovline4_saved = dovline4; domvline4_saved = domvline4; } + + // This code intentionally flips sub and revsub for all the classic drawer functions + // because the renderer swapped sub and revsub! R_DrawColumnHoriz = R_DrawColumnHoriz_rgba; R_DrawColumn = R_DrawColumn_rgba; @@ -2537,16 +2540,16 @@ void R_InitColumnDrawers () R_FillColumn = R_FillColumn_rgba; R_FillAddColumn = R_FillAddColumn_rgba; R_FillAddClampColumn = R_FillAddClampColumn_rgba; - R_FillSubClampColumn = R_FillSubClampColumn_rgba; - R_FillRevSubClampColumn = R_FillRevSubClampColumn_rgba; + R_FillSubClampColumn = R_FillRevSubClampColumn_rgba; + R_FillRevSubClampColumn = R_FillSubClampColumn_rgba; R_DrawAddColumn = R_DrawAddColumn_rgba; R_DrawTlatedAddColumn = R_DrawTlatedAddColumn_rgba; R_DrawAddClampColumn = R_DrawAddClampColumn_rgba; R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumn_rgba; - R_DrawSubClampColumn = R_DrawSubClampColumn_rgba; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumn_rgba; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumn_rgba; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumn_rgba; + R_DrawSubClampColumn = R_DrawRevSubClampColumn_rgba; + R_DrawSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumn_rgba; + R_DrawRevSubClampColumn = R_DrawSubClampColumn_rgba; + R_DrawRevSubClampTranslatedColumn = R_DrawSubClampTranslatedColumn_rgba; R_FillSpan = R_FillSpan_rgba; R_DrawFogBoundary = R_DrawFogBoundary_rgba; R_FillColumnHoriz = R_FillColumnHoriz_rgba; @@ -2578,20 +2581,20 @@ void R_InitColumnDrawers () rt_add4cols = rt_add4cols_rgba; rt_addclamp1col = rt_addclamp1col_rgba; rt_addclamp4cols = rt_addclamp4cols_rgba; - rt_subclamp1col = rt_subclamp1col_rgba; - rt_revsubclamp1col = rt_revsubclamp1col_rgba; + rt_subclamp1col = rt_revsubclamp1col_rgba; + rt_revsubclamp1col = rt_subclamp1col_rgba; rt_tlate1col = rt_tlate1col_rgba; rt_tlateadd1col = rt_tlateadd1col_rgba; rt_tlateaddclamp1col = rt_tlateaddclamp1col_rgba; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_rgba; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_rgba; - rt_subclamp4cols = rt_subclamp4cols_rgba; - rt_revsubclamp4cols = rt_revsubclamp4cols_rgba; + rt_tlatesubclamp1col = rt_tlaterevsubclamp1col_rgba; + rt_tlaterevsubclamp1col = rt_tlatesubclamp1col_rgba; + rt_subclamp4cols = rt_revsubclamp4cols_rgba; + rt_revsubclamp4cols = rt_subclamp4cols_rgba; rt_tlate4cols = rt_tlate4cols_rgba; rt_tlateadd4cols = rt_tlateadd4cols_rgba; rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_rgba; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_rgba; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_rgba; + rt_tlatesubclamp4cols = rt_tlaterevsubclamp4cols_rgba; + rt_tlaterevsubclamp4cols = rt_tlatesubclamp4cols_rgba; rt_initcols = rt_initcols_rgba; rt_span_coverage = rt_span_coverage_rgba; From 2ba2dc7611f1891ed7cf13f9aca6694004905de1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 21 Nov 2016 03:49:55 +0100 Subject: [PATCH 359/912] Change shaded blend mode use srcalpha and destalpha. Fix blending error in add/sub/revsub --- .../fixedfunction/drawercodegen.cpp | 23 +++++- src/r_compiler/fixedfunction/drawercodegen.h | 1 + .../fixedfunction/drawtrianglecodegen.cpp | 11 +-- src/r_compiler/llvmdrawers.h | 2 +- src/r_poly_decal.cpp | 21 ++--- src/r_poly_sprite.cpp | 79 ++++++++----------- 6 files changed, 68 insertions(+), 69 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp index 761023ceff..65b726b934 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -107,18 +107,27 @@ SSAVec4i DrawerCodegen::blend_copy(SSAVec4i fg) SSAVec4i DrawerCodegen::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) { + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); // 255 -> 256 + srcalpha = (alpha * srcalpha + 128) >> 8; SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; return color.insert(3, 255); } SSAVec4i DrawerCodegen::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) { + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); // 255 -> 256 + srcalpha = (alpha * srcalpha + 128) >> 8; SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; return color.insert(3, 255); } SSAVec4i DrawerCodegen::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) { + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); // 255 -> 256 + srcalpha = (alpha * srcalpha + 128) >> 8; SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; return color.insert(3, 255); } @@ -126,7 +135,7 @@ SSAVec4i DrawerCodegen::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) { SSAInt alpha = fg[3]; - alpha = alpha + (alpha >> 7); // // 255 -> 256 + alpha = alpha + (alpha >> 7); // 255 -> 256 SSAInt inv_alpha = 256 - alpha; SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; return color.insert(3, 255); @@ -139,3 +148,15 @@ SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha) SSAInt inv_alpha = 256 - alpha; return (destalpha * alpha + 256 * inv_alpha + 128) >> 8; } + +SSAVec4i DrawerCodegen::blend_stencil(SSAVec4i stencilcolor, SSAInt fgalpha, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + fgalpha = fgalpha + (fgalpha >> 7); // 255 -> 256 + SSAInt inv_fgalpha = 256 - fgalpha; + + srcalpha = (fgalpha * srcalpha + 128) >> 8; + destalpha = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; + + SSAVec4i color = (stencilcolor * srcalpha + bg * destalpha) / 256; + return color.insert(3, 255); +} diff --git a/src/r_compiler/fixedfunction/drawercodegen.h b/src/r_compiler/fixedfunction/drawercodegen.h index 5de52dca1c..ef6e8d11ef 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.h +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -85,6 +85,7 @@ public: SSAVec4i blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg); + SSAVec4i blend_stencil(SSAVec4i color, SSAInt fgalpha, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); // Calculates the final alpha values to be used when combined with the source texture alpha channel SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha); diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index da6933149d..4ee444067e 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -552,7 +552,6 @@ SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) SSAInt uvoffset = upos * textureHeight + vpos; SSAVec4i fg; - SSAInt alpha, inv_alpha; SSAVec4i output; switch (blendmode) @@ -584,10 +583,7 @@ SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) break; case TriBlendMode::Shaded: fg = Sample32(uvoffset); - alpha = fg[0]; - alpha = alpha + (alpha >> 7); // 255 -> 256 - inv_alpha = 256 - alpha; - output = blend_add(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), bg, alpha, inv_alpha); + output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), fg[3], bg, srcalpha, destalpha); break; case TriBlendMode::TranslateCopy: fg = TranslateSample32(uvoffset); @@ -667,10 +663,7 @@ SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); break; case TriBlendMode::Shaded: - alpha = Sample8(uvoffset); - alpha = alpha + (alpha >> 7); // 255 -> 256 - inv_alpha = 256 - alpha; - output = ToPal8(blend_add(ToBgra(Shade8(color)), ToBgra(bg), alpha, inv_alpha)); + output = ToPal8(blend_stencil(ToBgra(Shade8(color)), Sample8(uvoffset), ToBgra(bg), srcalpha, destalpha)); break; case TriBlendMode::TranslateCopy: output = Shade8(TranslateSample8(uvoffset)); diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 1cbafa11a6..dbaef8b479 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -281,7 +281,7 @@ enum class TriBlendMode Add, // blend_add(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) Sub, // blend_sub(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) RevSub, // blend_revsub(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) - Shaded, // blend_add(color, bg, fg.a, 1 - fg.a) + Shaded, // blend_stencil(shade(color), fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) TranslateCopy, // blend_copy(shade(translate(fg))) TranslateAlphaBlend, // blend_alpha_blend(shade(translate(fg)), bg) TranslateAdd, // blend_add(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index 5035b4529e..f23ca28631 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -128,30 +128,31 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co bool fullbrightSprite = (decal->RenderFlags & RF_FULLBRIGHT) == RF_FULLBRIGHT; - TriUniforms uniforms; + PolyDrawArgs args; + args.uniforms.flags = 0; if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) { - uniforms.light = 256; - uniforms.flags = TriUniforms::fixed_light; + args.uniforms.light = 256; + args.uniforms.flags |= TriUniforms::fixed_light; } else { - uniforms.light = (uint32_t)((front->lightlevel + actualextralight) / 255.0f * 256.0f); - uniforms.flags = 0; + args.uniforms.light = (uint32_t)((front->lightlevel + actualextralight) / 255.0f * 256.0f); } - uniforms.subsectorDepth = subsectorDepth; + args.uniforms.subsectorDepth = subsectorDepth; if (r_swtruecolor) { - uniforms.color = 0xff000000 | decal->AlphaColor; + args.uniforms.color = 0xff000000 | decal->AlphaColor; } else { - uniforms.color = ((uint32_t)decal->AlphaColor) >> 24; + args.uniforms.color = ((uint32_t)decal->AlphaColor) >> 24; } + + args.uniforms.srcalpha = 256; + args.uniforms.destalpha = 0; - PolyDrawArgs args; - args.uniforms = uniforms; args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = 4; diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index e4738ab264..bc032b91ca 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -116,21 +116,19 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); - TriUniforms uniforms; + PolyDrawArgs args; + args.uniforms.flags = 0; if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) { - uniforms.light = 256; - uniforms.flags = TriUniforms::fixed_light; + args.uniforms.light = 256; + args.uniforms.flags |= TriUniforms::fixed_light; } else { - uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); - uniforms.flags = 0; + args.uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); } - uniforms.subsectorDepth = subsectorDepth; + args.uniforms.subsectorDepth = subsectorDepth; - PolyDrawArgs args; - args.uniforms = uniforms; args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = 4; @@ -141,106 +139,91 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse args.SetTexture(tex, thing->Translation); args.SetColormap(sub->sector->ColorMap); + TriBlendMode blendmode; + if (thing->RenderStyle == LegacyRenderStyles[STYLE_Normal] || (r_drawfuzz == 0 && thing->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy])) { args.uniforms.destalpha = 0; args.uniforms.srcalpha = 256; - (args.translation) ? - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAdd) : - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); + blendmode = args.translation ? TriBlendMode::TranslateAdd : TriBlendMode::Add; } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Add]) { args.uniforms.destalpha = (uint32_t)(1.0 * 256); args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); - (args.translation) ? - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAdd) : - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); + blendmode = args.translation ? TriBlendMode::TranslateAdd : TriBlendMode::Add; } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Subtract]) { args.uniforms.destalpha = (uint32_t)(1.0 * 256); args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); - (args.translation) ? - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateSub) : - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Sub); + blendmode = args.translation ? TriBlendMode::TranslateSub : TriBlendMode::Sub; } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_SoulTrans]) { args.uniforms.destalpha = (uint32_t)(256 - transsouls * 256); args.uniforms.srcalpha = (uint32_t)(transsouls * 256); - (args.translation) ? - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAdd) : - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); + blendmode = args.translation ? TriBlendMode::TranslateAdd : TriBlendMode::Add; } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Fuzzy] || (r_drawfuzz == 2 && thing->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy])) { // NYI - Fuzzy - for now, just a copy of "Shadow" args.uniforms.destalpha = 160; args.uniforms.srcalpha = 0; - (args.translation) ? - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAdd) : - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); - + blendmode = args.translation ? TriBlendMode::TranslateAdd : TriBlendMode::Add; } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Shadow] || (r_drawfuzz == 1 && thing->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy])) { args.uniforms.destalpha = 160; args.uniforms.srcalpha = 0; - (args.translation) ? - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAdd) : - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); - + blendmode = args.translation ? TriBlendMode::TranslateAdd : TriBlendMode::Add; } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_TranslucentStencil]) { - // NYI args.uniforms.destalpha = (uint32_t)(256 - thing->Alpha * 256); args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); args.uniforms.color = 0xff000000 | thing->fillcolor; - (args.translation) ? - PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::TranslateAdd) : - PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::Add); + blendmode = TriBlendMode::Shaded; } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_AddStencil]) { - // NYI args.uniforms.destalpha = 256; - args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); + args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256) * 2; // Don't know this needs to be multiplied by two.. args.uniforms.color = 0xff000000 | thing->fillcolor; - (args.translation) ? - PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::TranslateAdd) : - PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::Add); + blendmode = TriBlendMode::Shaded; } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Shaded]) { - args.uniforms.destalpha = 256; args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); - args.SetTexture(tex, false); + args.uniforms.destalpha = 256 - args.uniforms.srcalpha; args.uniforms.color = 0; - uniforms.flags |= TriUniforms::simple_shade; - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); + blendmode = TriBlendMode::Shaded; } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_AddShaded]) - { // NYI? + { args.uniforms.destalpha = 256; args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); - args.SetTexture(tex, false); args.uniforms.color = 0; - uniforms.flags |= TriUniforms::simple_shade; - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); + blendmode = TriBlendMode::Shaded; } else { args.uniforms.destalpha = (uint32_t)(256 - thing->Alpha * 256); args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); - (args.translation) ? - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::TranslateAdd) : - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); + blendmode = args.translation ? TriBlendMode::TranslateAdd : TriBlendMode::Add; + } + + if (!r_swtruecolor) + { + uint32_t r = (args.uniforms.color >> 16) & 0xff; + uint32_t g = (args.uniforms.color >> 8) & 0xff; + uint32_t b = args.uniforms.color & 0xff; + args.uniforms.color = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; } + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, blendmode); } bool RenderPolySprite::IsThingCulled(AActor *thing) From e7d64a380a69a988bb77d699866d8a3bee59ce39 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 21 Nov 2016 05:09:53 +0100 Subject: [PATCH 360/912] Fix AABB culling bug --- src/r_poly_cull.cpp | 84 ++++++++++++++++++------------------------- src/r_poly_triangle.h | 2 +- 2 files changed, 36 insertions(+), 50 deletions(-) diff --git a/src/r_poly_cull.cpp b/src/r_poly_cull.cpp index 89c59aa792..dff8b76889 100644 --- a/src/r_poly_cull.cpp +++ b/src/r_poly_cull.cpp @@ -130,7 +130,7 @@ void PolyCull::MarkSegmentCulled(int x1, int x2) { // Find last segment int merge = cur; - while (merge + 1 != SolidSegments.size() && SolidSegments[merge + 1].X1 <= x2) + while (merge + 1 != (int)SolidSegments.size() && SolidSegments[merge + 1].X1 <= x2) merge++; // Apply new merged range @@ -168,57 +168,43 @@ bool PolyCull::CheckBBox(float *bspcoord) // Occlusion test using solid segments: - int boxx; - int boxy; - int boxpos; - - double x1, y1, x2, y2; - - // Find the corners of the box - // that define the edges from current viewpoint. - if (ViewPos.X <= bspcoord[BOXLEFT]) - boxx = 0; - else if (ViewPos.X < bspcoord[BOXRIGHT]) - boxx = 1; - else - boxx = 2; - - if (ViewPos.Y >= bspcoord[BOXTOP]) - boxy = 0; - else if (ViewPos.Y > bspcoord[BOXBOTTOM]) - boxy = 1; - else - boxy = 2; - - boxpos = (boxy << 2) + boxx; - if (boxpos == 5) - return true; - - static const int checkcoord[12][4] = + static const int lines[4][4] = { - { 3,0,2,1 }, - { 3,0,2,0 }, - { 3,1,2,0 }, - { 0 }, - { 2,0,2,1 }, - { 0,0,0,0 }, - { 3,1,3,0 }, - { 0 }, - { 2,0,3,1 }, - { 2,1,3,1 }, - { 2,1,3,0 } + { BOXLEFT, BOXBOTTOM, BOXRIGHT, BOXBOTTOM }, + { BOXRIGHT, BOXBOTTOM, BOXRIGHT, BOXTOP }, + { BOXRIGHT, BOXTOP, BOXLEFT, BOXTOP }, + { BOXLEFT, BOXTOP, BOXLEFT, BOXBOTTOM } }; - x1 = bspcoord[checkcoord[boxpos][0]]; - y1 = bspcoord[checkcoord[boxpos][1]]; - x2 = bspcoord[checkcoord[boxpos][2]]; - y2 = bspcoord[checkcoord[boxpos][3]]; - - int sx1, sx2; - if (GetSegmentRangeForLine(x1, y1, x2, y2, sx1, sx2)) - return !IsSegmentCulled(sx1, sx2); - else - return true; + bool foundline = false; + int minsx1, maxsx2; + for (int i = 0; i < 4; i++) + { + int j = i < 3 ? i + 1 : 0; + int x1 = bspcoord[lines[i][0]]; + int y1 = bspcoord[lines[i][1]]; + int x2 = bspcoord[lines[i][2]]; + int y2 = bspcoord[lines[i][3]]; + int sx1, sx2; + if (GetSegmentRangeForLine(x1, y1, x2, y2, sx1, sx2)) + { + if (foundline) + { + minsx1 = MIN(minsx1, sx1); + maxsx2 = MAX(maxsx2, sx2); + } + else + { + minsx1 = sx1; + maxsx2 = sx2; + foundline = true; + } + } + } + if (!foundline) + return false; + + return !IsSegmentCulled(minsx1, maxsx2); } bool PolyCull::GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 5736e2c9d2..9fd8b7a9a6 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -69,7 +69,7 @@ public: void SetTexture(FTexture *texture, uint32_t translationID) { - if (translationID != -1 && translationID != 0) + if (translationID != 0xffffffff && translationID != 0) { FRemapTable *table = TranslationToTable(translationID); if (table != nullptr && !table->Inactive) From 5e9c9c825dd13216e2ce832a9d220564d1c21429 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 21 Nov 2016 05:52:02 +0100 Subject: [PATCH 361/912] Add stencil blend mode as apparently the shaded mode uses indices whiles stencil uses mask/alpha.. --- .../fixedfunction/drawtrianglecodegen.cpp | 8 +++++++- src/r_compiler/llvmdrawers.cpp | 2 +- src/r_compiler/llvmdrawers.h | 3 ++- src/r_poly_decal.cpp | 8 ++++---- src/r_poly_sprite.cpp | 9 +++++++-- src/r_poly_triangle.h | 15 ++++++++++++--- 6 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 4ee444067e..3dca725448 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -581,10 +581,13 @@ SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) fg = Sample32(uvoffset); output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; - case TriBlendMode::Shaded: + case TriBlendMode::Stencil: fg = Sample32(uvoffset); output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), fg[3], bg, srcalpha, destalpha); break; + case TriBlendMode::Shaded: + output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), Sample8(uvoffset), bg, srcalpha, destalpha); + break; case TriBlendMode::TranslateCopy: fg = TranslateSample32(uvoffset); output = blend_copy(shade_bgra_simple(fg, currentlight)); @@ -662,6 +665,9 @@ SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) fg = ToBgra(Shade8(Sample8(uvoffset))); output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); break; + case TriBlendMode::Stencil: + output = ToPal8(blend_stencil(ToBgra(Shade8(color)), (Sample8(uvoffset) == SSAInt(0)).select(SSAInt(0), SSAInt(256)), ToBgra(bg), srcalpha, destalpha)); + break; case TriBlendMode::Shaded: output = ToPal8(blend_stencil(ToBgra(Shade8(color)), Sample8(uvoffset), ToBgra(bg), srcalpha, destalpha)); break; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 82cac4a184..6745af8e4e 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -127,7 +127,7 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - int version = 4; // Increment this number if the drawer codegen is modified (forces recreation of the module). + int version = 5; // Increment this number if the drawer codegen is modified (forces recreation of the module). std::string targetCPU = mProgram.GetTargetCPU(); bool loaded = mProgram.LoadCachedModule(version, targetCPU); if (!loaded) diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index dbaef8b479..6113b1d042 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -281,7 +281,8 @@ enum class TriBlendMode Add, // blend_add(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) Sub, // blend_sub(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) RevSub, // blend_revsub(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) - Shaded, // blend_stencil(shade(color), fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + Stencil, // blend_stencil(shade(color), fg.a, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + Shaded, // blend_stencil(shade(color), fg.index, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) TranslateCopy, // blend_copy(shade(translate(fg))) TranslateAlphaBlend, // blend_alpha_blend(shade(translate(fg)), bg) TranslateAdd, // blend_add(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index f23ca28631..ef20c9de7b 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -130,6 +130,8 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co PolyDrawArgs args; args.uniforms.flags = 0; + args.SetColormap(front->ColorMap); + args.SetTexture(tex, decal->Translation, true); if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) { args.uniforms.light = 256; @@ -150,8 +152,8 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co args.uniforms.color = ((uint32_t)decal->AlphaColor) >> 24; } - args.uniforms.srcalpha = 256; - args.uniforms.destalpha = 0; + args.uniforms.srcalpha = (uint32_t)(decal->Alpha * 256.0 + 0.5); + args.uniforms.destalpha = 256 - args.uniforms.srcalpha; args.objectToClip = &worldToClip; args.vinput = vertices; @@ -160,8 +162,6 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co args.ccw = true; args.stenciltestvalue = 0; args.stencilwritevalue = 1; - args.SetTexture(tex); - args.SetColormap(front->ColorMap); //mode = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); } diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index bc032b91ca..38ddd46dfa 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -185,14 +185,14 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse args.uniforms.destalpha = (uint32_t)(256 - thing->Alpha * 256); args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); args.uniforms.color = 0xff000000 | thing->fillcolor; - blendmode = TriBlendMode::Shaded; + blendmode = TriBlendMode::Stencil; } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_AddStencil]) { args.uniforms.destalpha = 256; args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256) * 2; // Don't know this needs to be multiplied by two.. args.uniforms.color = 0xff000000 | thing->fillcolor; - blendmode = TriBlendMode::Shaded; + blendmode = TriBlendMode::Stencil; } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Shaded]) { @@ -215,6 +215,11 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse blendmode = args.translation ? TriBlendMode::TranslateAdd : TriBlendMode::Add; } + if (blendmode == TriBlendMode::Shaded) + { + args.SetTexture(tex, thing->Translation, true); + } + if (!r_swtruecolor) { uint32_t r = (args.uniforms.color >> 16) & 0xff; diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 9fd8b7a9a6..7d58ed5ed2 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -67,7 +67,7 @@ public: translation = nullptr; } - void SetTexture(FTexture *texture, uint32_t translationID) + void SetTexture(FTexture *texture, uint32_t translationID, bool forcePal = false) { if (translationID != 0xffffffff && translationID != 0) { @@ -85,8 +85,17 @@ public: return; } } - - SetTexture(texture); + + if (forcePal) + { + textureWidth = texture->GetWidth(); + textureHeight = texture->GetHeight(); + texturePixels = texture->GetPixels(); + } + else + { + SetTexture(texture); + } } void SetColormap(FSWColormap *base_colormap) From fabac78ea877be05c26e3f5de880b3725c5ba83b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 21 Nov 2016 15:19:24 +0100 Subject: [PATCH 362/912] Add poly renderer to the menus --- src/r_main.cpp | 8 ++++---- src/r_swrenderer.cpp | 8 ++++---- wadsrc/static/language.enu | 1 + wadsrc/static/menudef.txt | 1 + 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/r_main.cpp b/src/r_main.cpp index 1ba89eecd7..e0773718fa 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -63,7 +63,7 @@ #include "p_setup.h" #include "version.h" -CUSTOM_CVAR(Bool, r_newrenderer, 0, CVAR_NOINITCALL) +CUSTOM_CVAR(Bool, r_polyrenderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { if (self == 1 && !hasglnodes) { @@ -915,7 +915,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function PO_LinkToSubsectors(); InSubsector = NULL; - if (!r_newrenderer) + if (!r_polyrenderer) { R_RenderBSPNode(nodes + numnodes - 1); // The head node is the last node output. } @@ -932,7 +932,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) if (viewactive) { PlaneCycles.Clock(); - if (!r_newrenderer) + if (!r_polyrenderer) { R_DrawPlanes(); R_DrawPortals(); @@ -953,7 +953,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) NetUpdate (); MaskedCycles.Clock(); - if (!r_newrenderer) + if (!r_polyrenderer) R_DrawMasked (); MaskedCycles.Unclock(); diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 3ddfcb5f28..708b465e1b 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -47,7 +47,7 @@ #include "r_poly.h" EXTERN_CVAR(Bool, r_shadercolormaps) -EXTERN_CVAR(Bool, r_newrenderer) // [SP] dpJudas's new renderer +EXTERN_CVAR(Bool, r_polyrenderer) // [SP] dpJudas's new renderer EXTERN_CVAR(Float, maxviewpitch) // [SP] CVAR from GZDoom void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); @@ -242,7 +242,7 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FileWriter *file, int wi void FSoftwareRenderer::DrawRemainingPlayerSprites() { - if (!r_newrenderer) + if (!r_polyrenderer) { R_DrawRemainingPlayerSprites(); } @@ -262,12 +262,12 @@ void FSoftwareRenderer::DrawRemainingPlayerSprites() int FSoftwareRenderer::GetMaxViewPitch(bool down) { - return (r_newrenderer) ? int(maxviewpitch) : (down ? MAX_DN_ANGLE : MAX_UP_ANGLE); + return (r_polyrenderer) ? int(maxviewpitch) : (down ? MAX_DN_ANGLE : MAX_UP_ANGLE); } bool FSoftwareRenderer::RequireGLNodes() { - return r_newrenderer; + return r_polyrenderer; } //========================================================================== diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 881cde31ff..177d635214 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -2152,6 +2152,7 @@ MODMNU_CHIPOMATIC = "Chip-o-matic"; RNDMNU_TITLE = "CHANGE RENDERER"; RNDMNU_RENDERER = "Hardware Acceleration"; RNDMNU_TRUECOLOR = "Software Truecolor Mode"; +RNDMNU_POLY = "Poly Renderer (experimental)"; RNDMNU_CANVAS = "Software Canvas"; // Video Options diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 81c18a5223..1c9d42de15 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -1773,6 +1773,7 @@ OptionMenu RendererMenu Title "$RNDMNU_TITLE" Option "$RNDMNU_RENDERER", "vid_renderer", "PolyDoom" Option "$RNDMNU_TRUECOLOR", "swtruecolor", "OnOff" + Option "$RNDMNU_POLY", "r_polyrenderer", "OnOff" IfOption(Windows) { Option "$RNDMNU_CANVAS", "vid_used3d", "D3DGL" From 0108ff89d79be0404eea8b05b2ae726ed81a8c90 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 21 Nov 2016 15:20:07 +0100 Subject: [PATCH 363/912] Bug fixes --- src/r_poly_cull.cpp | 8 ++++---- src/r_poly_sprite.cpp | 2 +- src/r_poly_triangle.cpp | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/src/r_poly_cull.cpp b/src/r_poly_cull.cpp index dff8b76889..79b8d151d9 100644 --- a/src/r_poly_cull.cpp +++ b/src/r_poly_cull.cpp @@ -181,10 +181,10 @@ bool PolyCull::CheckBBox(float *bspcoord) for (int i = 0; i < 4; i++) { int j = i < 3 ? i + 1 : 0; - int x1 = bspcoord[lines[i][0]]; - int y1 = bspcoord[lines[i][1]]; - int x2 = bspcoord[lines[i][2]]; - int y2 = bspcoord[lines[i][3]]; + float x1 = bspcoord[lines[i][0]]; + float y1 = bspcoord[lines[i][1]]; + float x2 = bspcoord[lines[i][2]]; + float y2 = bspcoord[lines[i][3]]; int sx1, sx2; if (GetSegmentRangeForLine(x1, y1, x2, y2, sx1, sx2)) { diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 38ddd46dfa..87d52b98f1 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -190,7 +190,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse else if (thing->RenderStyle == LegacyRenderStyles[STYLE_AddStencil]) { args.uniforms.destalpha = 256; - args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256) * 2; // Don't know this needs to be multiplied by two.. + args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); args.uniforms.color = 0xff000000 | thing->fillcolor; blendmode = TriBlendMode::Stencil; } diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 2ba3336d14..88ee02a36e 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -339,7 +339,40 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) FString DrawPolyTrianglesCommand::DebugInfo() { - return "DrawPolyTriangles"; + FString variantstr; + switch (variant) + { + default: variantstr = "Unknown"; break; + case TriDrawVariant::DrawNormal: variantstr = "DrawNormal"; break; + case TriDrawVariant::FillNormal: variantstr = "FillNormal"; break; + case TriDrawVariant::DrawSubsector: variantstr = "DrawSubsector"; break; + case TriDrawVariant::FillSubsector: variantstr = "FillSubsector"; break; + case TriDrawVariant::FuzzSubsector: variantstr = "FuzzSubsector"; break; + case TriDrawVariant::Stencil: variantstr = "Stencil"; break; + } + + FString blendmodestr; + switch (blendmode) + { + default: blendmodestr = "Unknown"; break; + case TriBlendMode::Copy: blendmodestr = "Copy"; break; + case TriBlendMode::AlphaBlend: blendmodestr = "AlphaBlend"; break; + case TriBlendMode::AddSolid: blendmodestr = "AddSolid"; break; + case TriBlendMode::Add: blendmodestr = "Add"; break; + case TriBlendMode::Sub: blendmodestr = "Sub"; break; + case TriBlendMode::RevSub: blendmodestr = "RevSub"; break; + case TriBlendMode::Stencil: blendmodestr = "Stencil"; break; + case TriBlendMode::Shaded: blendmodestr = "Shaded"; break; + case TriBlendMode::TranslateCopy: blendmodestr = "TranslateCopy"; break; + case TriBlendMode::TranslateAlphaBlend: blendmodestr = "TranslateAlphaBlend"; break; + case TriBlendMode::TranslateAdd: blendmodestr = "TranslateAdd"; break; + case TriBlendMode::TranslateSub: blendmodestr = "TranslateSub"; break; + case TriBlendMode::TranslateRevSub: blendmodestr = "TranslateRevSub"; break; + } + + FString info; + info.Format("DrawPolyTriangles: variant = %s, blend mode = %s, color = %d", variantstr.GetChars(), blendmodestr.GetChars(), args.uniforms.color); + return info; } ///////////////////////////////////////////////////////////////////////////// From ddb0161f9c4448e198659ad53cd7e89c14bc0a89 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 21 Nov 2016 15:40:42 +0100 Subject: [PATCH 364/912] Sub pinkie crashes the pal drawer :( --- src/r_poly_sprite.cpp | 3 +++ src/r_poly_triangle.cpp | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 87d52b98f1..453ab2d00b 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -226,6 +226,9 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse uint32_t g = (args.uniforms.color >> 8) & 0xff; uint32_t b = args.uniforms.color & 0xff; args.uniforms.color = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + + if (blendmode == TriBlendMode::Sub) // Sub crashes in pal mode for some weird reason. + blendmode = TriBlendMode::Add; } PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, blendmode); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 88ee02a36e..367d3df409 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -371,7 +371,9 @@ FString DrawPolyTrianglesCommand::DebugInfo() } FString info; - info.Format("DrawPolyTriangles: variant = %s, blend mode = %s, color = %d", variantstr.GetChars(), blendmodestr.GetChars(), args.uniforms.color); + info.Format("DrawPolyTriangles: variant = %s, blend mode = %s, color = %d, light = %d, textureWidth = %d, textureHeight = %d, texture = %s, translation = %s, colormaps = %s", + variantstr.GetChars(), blendmodestr.GetChars(), args.uniforms.color, args.uniforms.light, args.textureWidth, args.textureHeight, + args.texturePixels ? "ptr" : "null", args.translation ? "ptr" : "null", args.colormaps ? "ptr" : "null"); return info; } From 7af504df586e0fe1ccc8af425c72e1ddf67ad765 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 21 Nov 2016 20:50:54 +0100 Subject: [PATCH 365/912] Draw 3d floor sides --- src/r_poly.cpp | 15 +++++++ src/r_poly_decal.cpp | 3 ++ src/r_poly_wall.cpp | 104 ++++++++++++++++++++++++++----------------- src/r_poly_wall.h | 17 ++++--- 4 files changed, 92 insertions(+), 47 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 680d77c523..04e5becdec 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -140,7 +140,9 @@ void RenderPolyScene::RenderSubsector(subsector_t *sub) { seg_t *line = &sub->firstline[i]; if (line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) + { RenderLine(sub, line, frontsector, subsectorDepth); + } } bool mainBSP = ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors); @@ -206,6 +208,19 @@ void RenderPolyScene::RenderLine(subsector_t *sub, seg_t *line, sector_t *fronts sub->flags |= SSECF_DRAWN; } + // Render 3D floor sides + if (line->backsector && frontsector->e && line->backsector->e->XFloor.ffloors.Size()) + { + for (unsigned int i = 0; i < line->backsector->e->XFloor.ffloors.Size(); i++) + { + F3DFloor *fakeFloor = line->backsector->e->XFloor.ffloors[i]; + if (!(fakeFloor->flags & FF_EXISTS)) continue; + if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; + if (!fakeFloor->model) continue; + RenderPolyWall::Render3DFloorLine(WorldToClip, line, frontsector, subsectorDepth, fakeFloor, SubsectorTranslucentWalls); + } + } + // Render wall, and update culling info if its an occlusion blocker if (RenderPolyWall::RenderLine(WorldToClip, line, frontsector, subsectorDepth, SubsectorTranslucentWalls)) { diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index ef20c9de7b..206eb9862b 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -31,6 +31,9 @@ void RenderPolyDecal::RenderWallDecals(const TriMatrix &worldToClip, const seg_t *line, uint32_t subsectorDepth) { + if (line->linedef == nullptr && line->sidedef == nullptr) + return; + for (DBaseDecal *decal = line->sidedef->AttachedDecals; decal != nullptr; decal = decal->WallNext) { RenderPolyDecal render; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 2cdcb30192..075a6f2a24 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -32,17 +32,19 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, std::vector &translucentWallsOutput) { + RenderPolyWall wall; + wall.LineSeg = line; + wall.Line = line->linedef; + wall.Side = line->sidedef; + wall.Colormap = frontsector->ColorMap; + wall.Masked = false; + wall.SubsectorDepth = subsectorDepth; + double frontceilz1 = frontsector->ceilingplane.ZatPoint(line->v1); double frontfloorz1 = frontsector->floorplane.ZatPoint(line->v1); double frontceilz2 = frontsector->ceilingplane.ZatPoint(line->v2); double frontfloorz2 = frontsector->floorplane.ZatPoint(line->v2); - RenderPolyWall wall; - wall.Line = line; - wall.Colormap = frontsector->ColorMap; - wall.Masked = false; - wall.SubsectorDepth = subsectorDepth; - if (line->backsector == nullptr) { if (line->sidedef) @@ -118,6 +120,28 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, secto return false; } +void RenderPolyWall::Render3DFloorLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, F3DFloor *fakeFloor, std::vector &translucentWallsOutput) +{ + double frontceilz1 = fakeFloor->top.plane->ZatPoint(line->v1); + double frontfloorz1 = fakeFloor->bottom.plane->ZatPoint(line->v1); + double frontceilz2 = fakeFloor->top.plane->ZatPoint(line->v2); + double frontfloorz2 = fakeFloor->bottom.plane->ZatPoint(line->v2); + + RenderPolyWall wall; + wall.LineSeg = line; + wall.Line = fakeFloor->master; + wall.Side = fakeFloor->master->sidedef[0]; + wall.Colormap = frontsector->ColorMap; + wall.Masked = false; + wall.SubsectorDepth = subsectorDepth; + wall.SetCoords(line->v1->fPos(), line->v2->fPos(), frontceilz1, frontfloorz1, frontceilz2, frontfloorz2); + wall.TopZ = frontceilz1; + wall.BottomZ = frontfloorz1; + wall.UnpeggedCeil = frontceilz1; + wall.Texpart = side_t::mid; + wall.Render(worldToClip); +} + void RenderPolyWall::SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2) { this->v1 = v1; @@ -134,7 +158,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) if (!tex) return; - PolyWallTextureCoords texcoords(tex, Line, Texpart, TopZ, BottomZ, UnpeggedCeil); + PolyWallTextureCoords texcoords(tex, LineSeg, Line, Side, Texpart, TopZ, BottomZ, UnpeggedCeil); TriVertex *vertices = PolyVertexBuffer::GetVertices(4); if (!vertices) @@ -199,15 +223,15 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) } else { - args.uniforms.destalpha = (Line->linedef->flags & ML_ADDTRANS) ? 256 : (uint32_t)(256 - Line->linedef->alpha * 256); - args.uniforms.srcalpha = (uint32_t)(Line->linedef->alpha * 256); + args.uniforms.destalpha = (Line->flags & ML_ADDTRANS) ? 256 : (uint32_t)(256 - Line->alpha * 256); + args.uniforms.srcalpha = (uint32_t)(Line->alpha * 256); if (args.uniforms.destalpha == 0 && args.uniforms.srcalpha == 256) PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); else PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); } - RenderPolyDecal::RenderWallDecals(worldToClip, Line, SubsectorDepth); + RenderPolyDecal::RenderWallDecals(worldToClip, LineSeg, SubsectorDepth); } void RenderPolyWall::ClampHeight(TriVertex &v1, TriVertex &v2) @@ -232,25 +256,25 @@ void RenderPolyWall::ClampHeight(TriVertex &v1, TriVertex &v2) FTexture *RenderPolyWall::GetTexture() { - FTexture *tex = TexMan(Line->sidedef->GetTexture(Texpart), true); + FTexture *tex = TexMan(Side->GetTexture(Texpart), true); if (tex == nullptr || tex->UseType == FTexture::TEX_Null) { // Mapping error. Doom floodfills this with a plane. // This code doesn't do that, but at least it uses the "right" texture.. - if (Line->linedef && Line->backsector && Line->linedef->sidedef[0] == Line->sidedef) + if (Line && Line->backsector && Line->sidedef[0] == Side) { if (Texpart == side_t::top) - tex = TexMan(Line->linedef->backsector->GetTexture(sector_t::ceiling), true); + tex = TexMan(Line->backsector->GetTexture(sector_t::ceiling), true); else if (Texpart == side_t::bottom) - tex = TexMan(Line->linedef->backsector->GetTexture(sector_t::floor), true); + tex = TexMan(Line->backsector->GetTexture(sector_t::floor), true); } - if (Line->linedef && Line->backsector && Line->linedef->sidedef[1] == Line->sidedef) + if (Line && Line->backsector && Line->sidedef[1] == Side) { if (Texpart == side_t::top) - tex = TexMan(Line->linedef->frontsector->GetTexture(sector_t::ceiling), true); + tex = TexMan(Line->frontsector->GetTexture(sector_t::ceiling), true); else if (Texpart == side_t::bottom) - tex = TexMan(Line->linedef->frontsector->GetTexture(sector_t::floor), true); + tex = TexMan(Line->frontsector->GetTexture(sector_t::floor), true); } if (tex == nullptr || tex->UseType == FTexture::TEX_Null) @@ -269,33 +293,33 @@ int RenderPolyWall::GetLightLevel() { bool foggy = false; int actualextralight = foggy ? 0 : extralight << 4; - return clamp(Line->sidedef->GetLightLevel(foggy, Line->frontsector->lightlevel) + actualextralight, 0, 255); + return clamp(Side->GetLightLevel(foggy, LineSeg->frontsector->lightlevel) + actualextralight, 0, 255); } } ///////////////////////////////////////////////////////////////////////////// -PolyWallTextureCoords::PolyWallTextureCoords(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) +PolyWallTextureCoords::PolyWallTextureCoords(FTexture *tex, const seg_t *lineseg, const line_t *line, const side_t *side, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) { - CalcU(tex, line, texpart); - CalcV(tex, line, texpart, topz, bottomz, unpeggedceil); + CalcU(tex, lineseg, line, side, texpart); + CalcV(tex, line, side, texpart, topz, bottomz, unpeggedceil); } -void PolyWallTextureCoords::CalcU(FTexture *tex, const seg_t *line, side_t::ETexpart texpart) +void PolyWallTextureCoords::CalcU(FTexture *tex, const seg_t *lineseg, const line_t *line, const side_t *side, side_t::ETexpart texpart) { - double lineLength = line->sidedef->TexelLength; + double lineLength = side->TexelLength; double lineStart = 0.0; - bool entireSegment = ((line->linedef->v1 == line->v1) && (line->linedef->v2 == line->v2) || (line->linedef->v2 == line->v1) && (line->linedef->v1 == line->v2)); + bool entireSegment = ((lineseg->v1 == line->v1) && (lineseg->v2 == line->v2) || (lineseg->v2 == line->v1) && (lineseg->v1 == line->v2)); if (!entireSegment) { - lineLength = (line->v2->fPos() - line->v1->fPos()).Length(); - lineStart = (line->v1->fPos() - line->linedef->v1->fPos()).Length(); + lineLength = (lineseg->v2->fPos() - lineseg->v1->fPos()).Length(); + lineStart = (lineseg->v1->fPos() - lineseg->v1->fPos()).Length(); } int texWidth = tex->GetWidth(); - double uscale = line->sidedef->GetTextureXScale(texpart) * tex->Scale.X; - u1 = lineStart + line->sidedef->GetTextureXOffset(texpart); + double uscale = side->GetTextureXScale(texpart) * tex->Scale.X; + u1 = lineStart + side->GetTextureXOffset(texpart); u2 = u1 + lineLength; u1 *= uscale; u2 *= uscale; @@ -303,11 +327,11 @@ void PolyWallTextureCoords::CalcU(FTexture *tex, const seg_t *line, side_t::ETex u2 /= texWidth; } -void PolyWallTextureCoords::CalcV(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) +void PolyWallTextureCoords::CalcV(FTexture *tex, const line_t *line, const side_t *side, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil) { - double vscale = line->sidedef->GetTextureYScale(texpart) * tex->Scale.Y; + double vscale = side->GetTextureYScale(texpart) * tex->Scale.Y; - double yoffset = line->sidedef->GetTextureYOffset(texpart); + double yoffset = side->GetTextureYOffset(texpart); if (tex->bWorldPanning) yoffset *= vscale; @@ -315,13 +339,13 @@ void PolyWallTextureCoords::CalcV(FTexture *tex, const seg_t *line, side_t::ETex { default: case side_t::mid: - CalcVMidPart(tex, line, topz, bottomz, vscale, yoffset); + CalcVMidPart(tex, line, side, topz, bottomz, vscale, yoffset); break; case side_t::top: - CalcVTopPart(tex, line, topz, bottomz, vscale, yoffset); + CalcVTopPart(tex, line, side, topz, bottomz, vscale, yoffset); break; case side_t::bottom: - CalcVBottomPart(tex, line, topz, bottomz, unpeggedceil, vscale, yoffset); + CalcVBottomPart(tex, line, side, topz, bottomz, unpeggedceil, vscale, yoffset); break; } @@ -330,9 +354,9 @@ void PolyWallTextureCoords::CalcV(FTexture *tex, const seg_t *line, side_t::ETex v2 /= texHeight; } -void PolyWallTextureCoords::CalcVTopPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset) +void PolyWallTextureCoords::CalcVTopPart(FTexture *tex, const line_t *line, const side_t *side, double topz, double bottomz, double vscale, double yoffset) { - bool pegged = (line->linedef->flags & ML_DONTPEGTOP) == 0; + bool pegged = (line->flags & ML_DONTPEGTOP) == 0; if (pegged) // bottom to top { int texHeight = tex->GetHeight(); @@ -353,9 +377,9 @@ void PolyWallTextureCoords::CalcVTopPart(FTexture *tex, const seg_t *line, doubl } } -void PolyWallTextureCoords::CalcVMidPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset) +void PolyWallTextureCoords::CalcVMidPart(FTexture *tex, const line_t *line, const side_t *side, double topz, double bottomz, double vscale, double yoffset) { - bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; + bool pegged = (line->flags & ML_DONTPEGBOTTOM) == 0; if (pegged) // top to bottom { v1 = yoffset * vscale; @@ -369,9 +393,9 @@ void PolyWallTextureCoords::CalcVMidPart(FTexture *tex, const seg_t *line, doubl } } -void PolyWallTextureCoords::CalcVBottomPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset) +void PolyWallTextureCoords::CalcVBottomPart(FTexture *tex, const line_t *line, const side_t *side, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset) { - bool pegged = (line->linedef->flags & ML_DONTPEGBOTTOM) == 0; + bool pegged = (line->flags & ML_DONTPEGBOTTOM) == 0; if (pegged) // top to bottom { v1 = yoffset; diff --git a/src/r_poly_wall.h b/src/r_poly_wall.h index 2de1f1c09b..d897319114 100644 --- a/src/r_poly_wall.h +++ b/src/r_poly_wall.h @@ -30,6 +30,7 @@ class RenderPolyWall { public: static bool RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, std::vector &translucentWallsOutput); + static void Render3DFloorLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, F3DFloor *fakeFloor, std::vector &translucentWallsOutput); void SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2); void Render(const TriMatrix &worldToClip); @@ -41,7 +42,9 @@ public: double ceil2 = 0.0; double floor2 = 0.0; - const seg_t *Line = nullptr; + const seg_t *LineSeg = nullptr; + const line_t *Line = nullptr; + const side_t *Side = nullptr; side_t::ETexpart Texpart = side_t::mid; double TopZ = 0.0; double BottomZ = 0.0; @@ -60,15 +63,15 @@ private: class PolyWallTextureCoords { public: - PolyWallTextureCoords(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); + PolyWallTextureCoords(FTexture *tex, const seg_t *lineseg, const line_t *line, const side_t *side, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); double u1, u2; double v1, v2; private: - void CalcU(FTexture *tex, const seg_t *line, side_t::ETexpart texpart); - void CalcV(FTexture *tex, const seg_t *line, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); - void CalcVTopPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset); - void CalcVMidPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double vscale, double yoffset); - void CalcVBottomPart(FTexture *tex, const seg_t *line, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset); + void CalcU(FTexture *tex, const seg_t *lineseg, const line_t *line, const side_t *side, side_t::ETexpart texpart); + void CalcV(FTexture *tex, const line_t *line, const side_t *side, side_t::ETexpart texpart, double topz, double bottomz, double unpeggedceil); + void CalcVTopPart(FTexture *tex, const line_t *line, const side_t *side, double topz, double bottomz, double vscale, double yoffset); + void CalcVMidPart(FTexture *tex, const line_t *line, const side_t *side, double topz, double bottomz, double vscale, double yoffset); + void CalcVBottomPart(FTexture *tex, const line_t *line, const side_t *side, double topz, double bottomz, double unpeggedceil, double vscale, double yoffset); }; From dacb985878d37ab4e993f93cde67b24ab05635c4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 22 Nov 2016 01:59:28 +0100 Subject: [PATCH 366/912] Fix texture mapping bug --- src/r_poly_wall.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 075a6f2a24..63f35ccd58 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -314,7 +314,7 @@ void PolyWallTextureCoords::CalcU(FTexture *tex, const seg_t *lineseg, const lin if (!entireSegment) { lineLength = (lineseg->v2->fPos() - lineseg->v1->fPos()).Length(); - lineStart = (lineseg->v1->fPos() - lineseg->v1->fPos()).Length(); + lineStart = (lineseg->v1->fPos() - line->v1->fPos()).Length(); } int texWidth = tex->GetWidth(); From 7beac654716bea57230c04c5266863dfa3705499 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 23 Nov 2016 00:55:07 +0100 Subject: [PATCH 367/912] Add Linux support for GLSWFrameBuffer --- src/posix/sdl/hardware.cpp | 4 +++- src/posix/sdl/sdlglvideo.cpp | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index f4ac13fc5e..b8ca0ee21b 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -55,6 +55,8 @@ EXTERN_CVAR (Bool, ticker) EXTERN_CVAR (Bool, fullscreen) EXTERN_CVAR (Float, vid_winscale) +CVAR (Bool, vid_sdl, 0, 0); + IVideo *Video; extern int NewWidth, NewHeight, NewBits, DisplayBits; @@ -119,7 +121,7 @@ void I_InitGraphics () ticker.SetGenericRepDefault (val, CVAR_Bool); //currentrenderer = vid_renderer; - if (currentrenderer==1) Video = new SDLGLVideo(0); + if (currentrenderer==1 || vid_sdl==0) Video = new SDLGLVideo(0); else Video = new SDLVideo (0); if (Video == NULL) diff --git a/src/posix/sdl/sdlglvideo.cpp b/src/posix/sdl/sdlglvideo.cpp index d8c00f2363..ef9eef0b4d 100644 --- a/src/posix/sdl/sdlglvideo.cpp +++ b/src/posix/sdl/sdlglvideo.cpp @@ -52,6 +52,8 @@ EXTERN_CVAR (Int, vid_renderer) EXTERN_CVAR (Int, vid_maxfps) EXTERN_CVAR (Bool, cl_capfps) +DFrameBuffer *CreateGLSWFrameBuffer(int width, int height, bool fullscreen); + // PUBLIC DATA DEFINITIONS ------------------------------------------------- CUSTOM_CVAR(Bool, gl_debug, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) @@ -194,7 +196,12 @@ DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool fullscr // flashAmount = 0; } - SDLGLFB *fb = new OpenGLFrameBuffer (0, width, height, 32, 60, fullscreen); + SDLGLFB *fb; + if (vid_renderer == 1) + fb = new OpenGLFrameBuffer (0, width, height, 32, 60, fullscreen); + else + fb = (SDLGLFB*)CreateGLSWFrameBuffer (width, height, fullscreen); + retry = 0; // If we could not create the framebuffer, try again with slightly From ff738de279c2a19b79969e6d3e0de7ade7e3c983 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 23 Nov 2016 01:01:28 +0100 Subject: [PATCH 368/912] Bgra support --- src/posix/sdl/sdlglvideo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/posix/sdl/sdlglvideo.cpp b/src/posix/sdl/sdlglvideo.cpp index a07f7c0f57..92773801f0 100644 --- a/src/posix/sdl/sdlglvideo.cpp +++ b/src/posix/sdl/sdlglvideo.cpp @@ -52,7 +52,7 @@ EXTERN_CVAR (Int, vid_renderer) EXTERN_CVAR (Int, vid_maxfps) EXTERN_CVAR (Bool, cl_capfps) -DFrameBuffer *CreateGLSWFrameBuffer(int width, int height, bool fullscreen); +DFrameBuffer *CreateGLSWFrameBuffer(int width, int height, bool bgra, bool fullscreen); // PUBLIC DATA DEFINITIONS ------------------------------------------------- @@ -200,7 +200,7 @@ DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool bgra, b if (vid_renderer == 1) fb = new OpenGLFrameBuffer (0, width, height, 32, 60, fullscreen); else - fb = (SDLGLFB*)CreateGLSWFrameBuffer (width, height, fullscreen); + fb = (SDLGLFB*)CreateGLSWFrameBuffer (width, height, bgra, fullscreen); retry = 0; From 91d180bec65cefbe271b54902f5c37839df9111e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 23 Nov 2016 21:10:19 +0100 Subject: [PATCH 369/912] Fix render to texture --- src/r_poly.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 04e5becdec..d51d01f9ba 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -67,14 +67,21 @@ void RenderPolyScene::ClearBuffers() void RenderPolyScene::SetSceneViewport() { - int height; - if (screenblocks >= 10) - height = SCREENHEIGHT; - else - height = (screenblocks*SCREENHEIGHT / 10) & ~7; + if (RenderTarget == screen) // Rendering to screen + { + int height; + if (screenblocks >= 10) + height = SCREENHEIGHT; + else + height = (screenblocks*SCREENHEIGHT / 10) & ~7; - int bottom = SCREENHEIGHT - (height + viewwindowy - ((height - viewheight) / 2)); - PolyTriangleDrawer::set_viewport(viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, RenderTarget); + int bottom = SCREENHEIGHT - (height + viewwindowy - ((height - viewheight) / 2)); + PolyTriangleDrawer::set_viewport(viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, RenderTarget); + } + else // Rendering to camera texture + { + PolyTriangleDrawer::set_viewport(0, 0, RenderTarget->GetWidth(), RenderTarget->GetHeight(), RenderTarget); + } } void RenderPolyScene::SetupPerspectiveMatrix() From 8bd92a2c482f7c8302421eea373a2ad35c4c5ae2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 23 Nov 2016 22:55:24 +0100 Subject: [PATCH 370/912] Fix interpolation bug in the affine texturing block --- .../fixedfunction/drawtrianglecodegen.cpp | 62 +++++++++++++------ .../fixedfunction/drawtrianglecodegen.h | 4 +- src/r_compiler/llvmdrawers.cpp | 2 +- 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 3dca725448..d8a436101c 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -242,15 +242,15 @@ void DrawTriangleCodegen::LoopBlockX() SSAFloat varyingBL = (startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL; SSAFloat varyingBR = (startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR; - SSAFloat pos = varyingTL; - SSAFloat stepPos = (varyingBL - varyingTL) * (1.0f / q); SSAFloat startStepX = (varyingTR - varyingTL) * (1.0f / q); - SSAFloat incrStepX = (varyingBR - varyingBL) * (1.0f / q) - startStepX; + SSAFloat endStepX = (varyingBR - varyingBL) * (1.0f / q); + SSAFloat incrStepX = (endStepX - startStepX) * (1.0f / q); + SSAFloat stepY = (varyingBL - varyingTL) * (1.0f / q); - varyingPos[i] = SSAInt(pos * SSAFloat((float)0x01000000), false); - varyingStepPos[i] = SSAInt(stepPos * SSAFloat((float)0x01000000), false); - varyingStartStepX[i] = SSAInt(startStepX * SSAFloat((float)0x01000000), false); - varyingIncrStepX[i] = SSAInt(incrStepX * SSAFloat((float)0x01000000), false); + varyingPos[i] = SSAInt(varyingTL * SSAFloat((float)0x01000000), false) << 8; + varyingStepY[i] = SSAInt(stepY * SSAFloat((float)0x01000000), false) << 8; + varyingStartStepX[i] = SSAInt(startStepX * SSAFloat((float)0x01000000), false) << 8; + varyingIncrStepX[i] = SSAInt(incrStepX * SSAFloat((float)0x01000000), false) << 8; } SSAFloat globVis = SSAFloat(1706.0f); @@ -314,18 +314,22 @@ void DrawTriangleCodegen::LoopFullBlock() { int pixelsize = truecolor ? 4 : 1; + SSAInt varyingLine[TriVertex::NumVarying]; + SSAInt varyingStepX[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varyingLine[i] = varyingPos[i]; + varyingStepX[i] = varyingStartStepX[i]; + } + for (int iy = 0; iy < q; iy++) { SSAUBytePtr buffer = dest[(x + iy * pitch) * pixelsize]; SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch]; SSAInt varying[TriVertex::NumVarying]; - SSAInt varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) - { - varying[i] = (varyingPos[i] + varyingStepPos[i] * iy) << 8; - varyingStep[i] = (varyingStartStepX[i] + varyingIncrStepX[i] * iy) << 8; - } + varying[i] = varyingLine[i]; for (int ix = 0; ix < q; ix += 4) { @@ -356,7 +360,7 @@ void DrawTriangleCodegen::LoopFullBlock() } for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] = varying[i] + varyingStep[i]; + varying[i] = varying[i] + varyingStepX[i]; } buf.store_unaligned_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3]))); @@ -385,7 +389,7 @@ void DrawTriangleCodegen::LoopFullBlock() } for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] = varying[i] + varyingStep[i]; + varying[i] = varying[i] + varyingStepX[i]; } buf.store_vec4ub(SSAVec4i(pixels[0], pixels[1], pixels[2], pixels[3])); @@ -394,6 +398,12 @@ void DrawTriangleCodegen::LoopFullBlock() if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth)); } + + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varyingLine[i] = varyingLine[i] + varyingStepY[i]; + varyingStepX[i] = varyingStepX[i] + varyingIncrStepX[i]; + } } } @@ -413,6 +423,11 @@ void DrawTriangleCodegen::LoopPartialBlock() stack_iy.store(SSAInt(0)); stack_buffer.store(dest[x * pixelsize]); stack_subsectorbuffer.store(subsectorGBuffer[x]); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + stack_varyingLine[i].store(varyingPos[i]); + stack_varyingStep[i].store(varyingStartStepX[i]); + } SSAForBlock loopy; SSAInt iy = stack_iy.load(); @@ -421,14 +436,17 @@ void DrawTriangleCodegen::LoopPartialBlock() SSAInt CY1 = stack_CY1.load(); SSAInt CY2 = stack_CY2.load(); SSAInt CY3 = stack_CY3.load(); + SSAInt varyingLine[TriVertex::NumVarying]; + SSAInt varyingStep[TriVertex::NumVarying]; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + varyingLine[i] = stack_varyingLine[i].load(); + varyingStep[i] = stack_varyingStep[i].load(); + } loopy.loop_block(iy < SSAInt(q), q); { - SSAInt varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) - { - stack_varying[i].store((varyingPos[i] + varyingStepPos[i] * iy) << 8); - varyingStep[i] = (varyingStartStepX[i] + varyingIncrStepX[i] * iy) << 8; - } + stack_varying[i].store(varyingLine[i]); stack_CX1.store(CY1); stack_CX2.store(CY2); @@ -495,6 +513,12 @@ void DrawTriangleCodegen::LoopPartialBlock() } loopx.end_block(); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + stack_varyingLine[i].store(varyingLine[i] + varyingStepY[i]); + stack_varyingStep[i].store(varyingStep[i] + varyingIncrStepX[i]); + } + stack_CY1.store(CY1 + FDX12); stack_CY2.store(CY2 + FDX23); stack_CY3.store(CY3 + FDX31); diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.h b/src/r_compiler/fixedfunction/drawtrianglecodegen.h index 74358f99a0..bb6df455eb 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.h +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.h @@ -79,6 +79,8 @@ private: SSAStack stack_buffer; SSAStack stack_subsectorbuffer; SSAStack stack_iy, stack_ix; + SSAStack stack_varyingLine[TriVertex::NumVarying]; + SSAStack stack_varyingStep[TriVertex::NumVarying]; SSAStack stack_varying[TriVertex::NumVarying]; SSAStack stack_CY1, stack_CY2, stack_CY3; SSAStack stack_CX1, stack_CX2, stack_CX3; @@ -137,7 +139,7 @@ private: SSAInt currentlight; SSAUBytePtr currentcolormap; SSAInt varyingPos[TriVertex::NumVarying]; - SSAInt varyingStepPos[TriVertex::NumVarying]; + SSAInt varyingStepY[TriVertex::NumVarying]; SSAInt varyingStartStepX[TriVertex::NumVarying]; SSAInt varyingIncrStepX[TriVertex::NumVarying]; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 6745af8e4e..3abf732c0d 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -127,7 +127,7 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - int version = 5; // Increment this number if the drawer codegen is modified (forces recreation of the module). + int version = 6; // Increment this number if the drawer codegen is modified (forces recreation of the module). std::string targetCPU = mProgram.GetTargetCPU(); bool loaded = mProgram.LoadCachedModule(version, targetCPU); if (!loaded) From 80a5f81f9a0dae20ea54d4a69c091ca20c7351b7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 23 Nov 2016 23:29:53 +0100 Subject: [PATCH 371/912] Fix typo affecting walls on sloped floors --- src/r_poly_wall.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 63f35ccd58..a4dd62e4b2 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -94,7 +94,7 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, secto if ((bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef) { - wall.SetCoords(line->v1->fPos(), line->v2->fPos(), bottomceilz1, bottomfloorz2, bottomceilz2, bottomfloorz2); + wall.SetCoords(line->v1->fPos(), line->v2->fPos(), bottomceilz1, bottomfloorz1, bottomceilz2, bottomfloorz2); wall.TopZ = bottomceilz1; wall.BottomZ = bottomfloorz2; wall.UnpeggedCeil = topceilz1; From 0361e912cbe820965bf02c7c4773b2fc39f2c9a8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 24 Nov 2016 01:01:02 +0100 Subject: [PATCH 372/912] Minor bug fix --- src/r_poly_cull.cpp | 22 ++++++++++++---------- src/r_poly_cull.h | 1 + 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/r_poly_cull.cpp b/src/r_poly_cull.cpp index 79b8d151d9..ff349690c0 100644 --- a/src/r_poly_cull.cpp +++ b/src/r_poly_cull.cpp @@ -35,18 +35,12 @@ void PolyCull::CullScene(const TriMatrix &worldToClip) frustumPlanes = FrustumPlanes(worldToClip); // Cull front to back + MaxCeilingHeight = 0.0; + MinFloorHeight = 0.0; if (numnodes == 0) - { - PvsSectors.push_back(subsectors); - MaxCeilingHeight = subsectors->sector->ceilingplane.Zat0(); - MinFloorHeight = subsectors->sector->floorplane.Zat0(); - } + CullSubsector(subsectors); else - { - MaxCeilingHeight = 0.0; - MinFloorHeight = 0.0; CullNode(nodes + numnodes - 1); // The head node is the last node output. - } ClearSolidSegments(); } @@ -65,16 +59,24 @@ void PolyCull::CullNode(void *node) // Possibly divide back space (away from the viewer). side ^= 1; + if (!CheckBBox(bsp->bbox[side])) return; node = bsp->children[side]; } - // Mark that we need to render this subsector_t *sub = (subsector_t *)((BYTE *)node - 1); + CullSubsector(sub); +} + +void PolyCull::CullSubsector(subsector_t *sub) +{ + // Update sky heights for the scene MaxCeilingHeight = MAX(MaxCeilingHeight, sub->sector->ceilingplane.Zat0()); MinFloorHeight = MIN(MinFloorHeight, sub->sector->floorplane.Zat0()); + + // Mark that we need to render this PvsSectors.push_back(sub); // Update culling info for further bsp clipping diff --git a/src/r_poly_cull.h b/src/r_poly_cull.h index a79355f689..e011ef5837 100644 --- a/src/r_poly_cull.h +++ b/src/r_poly_cull.h @@ -46,6 +46,7 @@ private: }; void CullNode(void *node); + void CullSubsector(subsector_t *sub); int PointOnSide(const DVector2 &pos, const node_t *node); // Checks BSP node/subtree bounding box. From 2316658dfc35a66a662782a992ba7157f9d1101f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 24 Nov 2016 02:11:35 +0100 Subject: [PATCH 373/912] Fix kdizd blinking issue because the vertex buffer was too small --- src/r_poly.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index d51d01f9ba..964ec2ae7d 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -278,7 +278,7 @@ namespace TriVertex *PolyVertexBuffer::GetVertices(int count) { - enum { VertexBufferSize = 16 * 1024 }; + enum { VertexBufferSize = 64 * 1024 }; static TriVertex Vertex[VertexBufferSize]; if (NextBufferVertex + count > VertexBufferSize) From 47aaf781c6c3321c8a8bf74fc8213d8bd4fcfb37 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 24 Nov 2016 05:51:37 +0100 Subject: [PATCH 374/912] Split RenderPolyScene into two to allow rendering from multiple viewpoints --- src/CMakeLists.txt | 1 + src/r_poly.cpp | 196 ++----------------------------------- src/r_poly.h | 69 +------------ src/r_poly_plane.cpp | 3 +- src/r_poly_portal.cpp | 210 ++++++++++++++++++++++++++++++++++++++++ src/r_poly_portal.h | 105 ++++++++++++++++++++ src/r_poly_sky.cpp | 4 +- src/r_poly_triangle.cpp | 24 +++++ src/r_poly_triangle.h | 7 ++ 9 files changed, 364 insertions(+), 255 deletions(-) create mode 100644 src/r_poly_portal.cpp create mode 100644 src/r_poly_portal.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e53a2dbe9a..f1dcb88c11 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1061,6 +1061,7 @@ set( NOT_COMPILED_SOURCE_FILES set( FASTMATH_PCH_SOURCES r_swrenderer.cpp r_poly.cpp + r_poly_portal.cpp r_poly_cull.cpp r_poly_decal.cpp r_poly_particle.cpp diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 964ec2ae7d..171d7e0ade 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -28,21 +28,26 @@ #include "r_poly.h" #include "gl/data/gl_data.h" -CVAR(Bool, r_debug_cull, 0, 0) EXTERN_CVAR(Int, screenblocks) void InitGLRMapinfoData(); ///////////////////////////////////////////////////////////////////////////// +RenderPolyScene *RenderPolyScene::Instance() +{ + static RenderPolyScene scene; + return &scene; +} + void RenderPolyScene::Render() { ClearBuffers(); SetSceneViewport(); SetupPerspectiveMatrix(); - Cull.CullScene(WorldToClip); - RenderSectors(); - skydome.Render(WorldToClip); - RenderTranslucent(); + MainPortal.SetViewpoint(WorldToClip, 0); + MainPortal.Render(); + Skydome.Render(WorldToClip); + MainPortal.RenderTranslucent(); PlayerSprites.Render(); DrawerCommandQueue::WaitForWorkers(); @@ -56,13 +61,8 @@ void RenderPolyScene::RenderRemainingPlayerSprites() void RenderPolyScene::ClearBuffers() { PolyVertexBuffer::Clear(); - SectorSpriteRanges.clear(); - SectorSpriteRanges.resize(numsectors); - SortedSprites.clear(); - TranslucentObjects.clear(); PolyStencilBuffer::Instance()->Clear(RenderTarget->GetWidth(), RenderTarget->GetHeight(), 0); PolySubsectorGBuffer::Instance()->Resize(RenderTarget->GetPitch(), RenderTarget->GetHeight()); - NextSubsectorDepth = 0; } void RenderPolyScene::SetSceneViewport() @@ -116,179 +116,3 @@ void RenderPolyScene::SetupPerspectiveMatrix() WorldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; } - -void RenderPolyScene::RenderSectors() -{ - if (r_debug_cull) - { - for (auto it = Cull.PvsSectors.rbegin(); it != Cull.PvsSectors.rend(); ++it) - RenderSubsector(*it); - } - else - { - for (auto it = Cull.PvsSectors.begin(); it != Cull.PvsSectors.end(); ++it) - RenderSubsector(*it); - } -} - -void RenderPolyScene::RenderSubsector(subsector_t *sub) -{ - sector_t *frontsector = sub->sector; - frontsector->MoreFlags |= SECF_DRAWN; - - uint32_t subsectorDepth = NextSubsectorDepth++; - - if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) - { - RenderPolyPlane::RenderPlanes(WorldToClip, sub, subsectorDepth, Cull.MaxCeilingHeight, Cull.MinFloorHeight); - } - - for (uint32_t i = 0; i < sub->numlines; i++) - { - seg_t *line = &sub->firstline[i]; - if (line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) - { - RenderLine(sub, line, frontsector, subsectorDepth); - } - } - - bool mainBSP = ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors); - if (mainBSP) - { - int subsectorIndex = (int)(sub - subsectors); - for (int i = ParticlesInSubsec[subsectorIndex]; i != NO_PARTICLE; i = Particles[i].snext) - { - particle_t *particle = Particles + i; - TranslucentObjects.push_back({ particle, sub, subsectorDepth }); - } - } - - SpriteRange sprites = GetSpritesForSector(sub->sector); - for (int i = 0; i < sprites.Count; i++) - { - AActor *thing = SortedSprites[sprites.Start + i].Thing; - TranslucentObjects.push_back({ thing, sub, subsectorDepth }); - } - - TranslucentObjects.insert(TranslucentObjects.end(), SubsectorTranslucentWalls.begin(), SubsectorTranslucentWalls.end()); - SubsectorTranslucentWalls.clear(); -} - -SpriteRange RenderPolyScene::GetSpritesForSector(sector_t *sector) -{ - if (SectorSpriteRanges.size() < sector->sectornum || sector->sectornum < 0) - return SpriteRange(); - - auto &range = SectorSpriteRanges[sector->sectornum]; - if (range.Start == -1) - { - range.Start = (int)SortedSprites.size(); - range.Count = 0; - for (AActor *thing = sector->thinglist; thing != nullptr; thing = thing->snext) - { - SortedSprites.push_back({ thing, (thing->Pos() - ViewPos).LengthSquared() }); - range.Count++; - } - std::stable_sort(SortedSprites.begin() + range.Start, SortedSprites.begin() + range.Start + range.Count); - } - return range; -} - -void RenderPolyScene::RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth) -{ - // Reject lines not facing viewer - DVector2 pt1 = line->v1->fPos() - ViewPos; - DVector2 pt2 = line->v2->fPos() - ViewPos; - if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) - return; - - // Cull wall if not visible - int sx1, sx2; - bool hasSegmentRange = Cull.GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2); - if (!hasSegmentRange || Cull.IsSegmentCulled(sx1, sx2)) - return; - - // Tell automap we saw this - if (!r_dontmaplines && line->linedef) - { - line->linedef->flags |= ML_MAPPED; - sub->flags |= SSECF_DRAWN; - } - - // Render 3D floor sides - if (line->backsector && frontsector->e && line->backsector->e->XFloor.ffloors.Size()) - { - for (unsigned int i = 0; i < line->backsector->e->XFloor.ffloors.Size(); i++) - { - F3DFloor *fakeFloor = line->backsector->e->XFloor.ffloors[i]; - if (!(fakeFloor->flags & FF_EXISTS)) continue; - if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; - if (!fakeFloor->model) continue; - RenderPolyWall::Render3DFloorLine(WorldToClip, line, frontsector, subsectorDepth, fakeFloor, SubsectorTranslucentWalls); - } - } - - // Render wall, and update culling info if its an occlusion blocker - if (RenderPolyWall::RenderLine(WorldToClip, line, frontsector, subsectorDepth, SubsectorTranslucentWalls)) - { - if (hasSegmentRange) - Cull.MarkSegmentCulled(sx1, sx2); - } -} - -void RenderPolyScene::RenderTranslucent() -{ - for (auto it = TranslucentObjects.rbegin(); it != TranslucentObjects.rend(); ++it) - { - auto &obj = *it; - if (obj.particle) - { - RenderPolyParticle spr; - spr.Render(WorldToClip, obj.particle, obj.sub, obj.subsectorDepth); - } - else if (!obj.thing) - { - obj.wall.Render(WorldToClip); - } - else if ((obj.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) - { - RenderPolyWallSprite wallspr; - wallspr.Render(WorldToClip, obj.thing, obj.sub, obj.subsectorDepth); - } - else - { - RenderPolySprite spr; - spr.Render(WorldToClip, obj.thing, obj.sub, obj.subsectorDepth); - } - } -} - -RenderPolyScene *RenderPolyScene::Instance() -{ - static RenderPolyScene scene; - return &scene; -} - -///////////////////////////////////////////////////////////////////////////// - -namespace -{ - int NextBufferVertex = 0; -} - -TriVertex *PolyVertexBuffer::GetVertices(int count) -{ - enum { VertexBufferSize = 64 * 1024 }; - static TriVertex Vertex[VertexBufferSize]; - - if (NextBufferVertex + count > VertexBufferSize) - return nullptr; - TriVertex *v = Vertex + NextBufferVertex; - NextBufferVertex += count; - return v; -} - -void PolyVertexBuffer::Clear() -{ - NextBufferVertex = 0; -} diff --git a/src/r_poly.h b/src/r_poly.h index 4be49f5321..99e86307bb 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -29,51 +29,9 @@ #include "doomdata.h" #include "r_utility.h" #include "r_main.h" -#include "r_poly_triangle.h" -#include "r_poly_intersection.h" -#include "r_poly_wall.h" -#include "r_poly_sprite.h" -#include "r_poly_wallsprite.h" +#include "r_poly_portal.h" #include "r_poly_playersprite.h" -#include "r_poly_particle.h" -#include "r_poly_plane.h" #include "r_poly_sky.h" -#include "r_poly_cull.h" - -// Used for sorting things by distance to the camera -class PolySortedSprite -{ -public: - PolySortedSprite(AActor *thing, double distanceSquared) : Thing(thing), DistanceSquared(distanceSquared) { } - bool operator<(const PolySortedSprite &other) const { return DistanceSquared < other.DistanceSquared; } - - AActor *Thing; - double DistanceSquared; -}; - -class PolyTranslucentObject -{ -public: - PolyTranslucentObject(particle_t *particle, subsector_t *sub, uint32_t subsectorDepth) : particle(particle), sub(sub), subsectorDepth(subsectorDepth) { } - PolyTranslucentObject(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) : thing(thing), sub(sub), subsectorDepth(subsectorDepth) { } - PolyTranslucentObject(RenderPolyWall wall) : wall(wall) { } - - particle_t *particle = nullptr; - AActor *thing = nullptr; - subsector_t *sub = nullptr; - uint32_t subsectorDepth = 0; - - RenderPolyWall wall; -}; - -class SpriteRange -{ -public: - SpriteRange() = default; - SpriteRange(int start, int count) : Start(start), Count(count) { } - int Start = -1; - int Count = 0; -}; // Renders a scene class RenderPolyScene @@ -82,36 +40,15 @@ public: void Render(); void RenderRemainingPlayerSprites(); - static const uint32_t SkySubsectorDepth = 0x7fffffff; - static RenderPolyScene *Instance(); private: void ClearBuffers(); void SetSceneViewport(); void SetupPerspectiveMatrix(); - void RenderSectors(); - void RenderSubsector(subsector_t *sub); - void RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); - - void RenderTranslucent(); - SpriteRange GetSpritesForSector(sector_t *sector); TriMatrix WorldToClip; - PolyCull Cull; - uint32_t NextSubsectorDepth = 0; - std::vector SectorSpriteRanges; - std::vector SortedSprites; - std::vector TranslucentObjects; - std::vector SubsectorTranslucentWalls; - - PolySkyDome skydome; + RenderPolyPortal MainPortal; + PolySkyDome Skydome; RenderPolyPlayerSprites PlayerSprites; }; - -class PolyVertexBuffer -{ -public: - static TriVertex *GetVertices(int count); - static void Clear(); -}; diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index a64dfaaf57..e68487e172 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -26,6 +26,7 @@ #include "sbar.h" #include "r_data/r_translate.h" #include "r_poly_plane.h" +#include "r_poly_portal.h" #include "r_poly.h" #include "r_sky.h" // for skyflatnum @@ -200,7 +201,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin if (fixedlightlev >= 0 || fixedcolormap) uniforms.light = 256; uniforms.flags = 0; - uniforms.subsectorDepth = isSky ? RenderPolyScene::SkySubsectorDepth : subsectorDepth; + uniforms.subsectorDepth = isSky ? RenderPolyPortal::SkySubsectorDepth : subsectorDepth; TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); if (!vertices) diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp new file mode 100644 index 0000000000..79468032ba --- /dev/null +++ b/src/r_poly_portal.cpp @@ -0,0 +1,210 @@ +/* +** Polygon Doom software renderer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_poly_portal.h" +#include "gl/data/gl_data.h" + +CVAR(Bool, r_debug_cull, 0, 0) + +///////////////////////////////////////////////////////////////////////////// + +void RenderPolyPortal::SetViewpoint(const TriMatrix &worldToClip, uint32_t stencilValue) +{ + WorldToClip = worldToClip; + StencilValue = stencilValue; +} + +void RenderPolyPortal::Render() +{ + ClearBuffers(); + Cull.CullScene(WorldToClip); + RenderSectors(); + for (auto &portal : Portals) + portal->Render(); +} + +void RenderPolyPortal::ClearBuffers() +{ + SectorSpriteRanges.clear(); + SectorSpriteRanges.resize(numsectors); + SortedSprites.clear(); + TranslucentObjects.clear(); + Portals.clear(); + NextSubsectorDepth = 0; +} + +void RenderPolyPortal::RenderSectors() +{ + if (r_debug_cull) + { + for (auto it = Cull.PvsSectors.rbegin(); it != Cull.PvsSectors.rend(); ++it) + RenderSubsector(*it); + } + else + { + for (auto it = Cull.PvsSectors.begin(); it != Cull.PvsSectors.end(); ++it) + RenderSubsector(*it); + } +} + +void RenderPolyPortal::RenderSubsector(subsector_t *sub) +{ + sector_t *frontsector = sub->sector; + frontsector->MoreFlags |= SECF_DRAWN; + + uint32_t subsectorDepth = NextSubsectorDepth++; + + if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) + { + RenderPolyPlane::RenderPlanes(WorldToClip, sub, subsectorDepth, Cull.MaxCeilingHeight, Cull.MinFloorHeight); + + FSectorPortal *ceilingPortal = frontsector->ValidatePortal(sector_t::ceiling); + FSectorPortal *floorPortal = frontsector->ValidatePortal(sector_t::floor); + } + + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + if (line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) + { + RenderLine(sub, line, frontsector, subsectorDepth); + } + } + + bool mainBSP = ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors); + if (mainBSP) + { + int subsectorIndex = (int)(sub - subsectors); + for (int i = ParticlesInSubsec[subsectorIndex]; i != NO_PARTICLE; i = Particles[i].snext) + { + particle_t *particle = Particles + i; + TranslucentObjects.push_back({ particle, sub, subsectorDepth }); + } + } + + SpriteRange sprites = GetSpritesForSector(sub->sector); + for (int i = 0; i < sprites.Count; i++) + { + AActor *thing = SortedSprites[sprites.Start + i].Thing; + TranslucentObjects.push_back({ thing, sub, subsectorDepth }); + } + + TranslucentObjects.insert(TranslucentObjects.end(), SubsectorTranslucentWalls.begin(), SubsectorTranslucentWalls.end()); + SubsectorTranslucentWalls.clear(); +} + +SpriteRange RenderPolyPortal::GetSpritesForSector(sector_t *sector) +{ + if (SectorSpriteRanges.size() < sector->sectornum || sector->sectornum < 0) + return SpriteRange(); + + auto &range = SectorSpriteRanges[sector->sectornum]; + if (range.Start == -1) + { + range.Start = (int)SortedSprites.size(); + range.Count = 0; + for (AActor *thing = sector->thinglist; thing != nullptr; thing = thing->snext) + { + SortedSprites.push_back({ thing, (thing->Pos() - ViewPos).LengthSquared() }); + range.Count++; + } + std::stable_sort(SortedSprites.begin() + range.Start, SortedSprites.begin() + range.Start + range.Count); + } + return range; +} + +void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth) +{ + // Reject lines not facing viewer + DVector2 pt1 = line->v1->fPos() - ViewPos; + DVector2 pt2 = line->v2->fPos() - ViewPos; + if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) + return; + + // Cull wall if not visible + int sx1, sx2; + bool hasSegmentRange = Cull.GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2); + if (!hasSegmentRange || Cull.IsSegmentCulled(sx1, sx2)) + return; + + // Tell automap we saw this + if (!r_dontmaplines && line->linedef) + { + line->linedef->flags |= ML_MAPPED; + sub->flags |= SSECF_DRAWN; + } + + // Render 3D floor sides + if (line->backsector && frontsector->e && line->backsector->e->XFloor.ffloors.Size()) + { + for (unsigned int i = 0; i < line->backsector->e->XFloor.ffloors.Size(); i++) + { + F3DFloor *fakeFloor = line->backsector->e->XFloor.ffloors[i]; + if (!(fakeFloor->flags & FF_EXISTS)) continue; + if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; + if (!fakeFloor->model) continue; + RenderPolyWall::Render3DFloorLine(WorldToClip, line, frontsector, subsectorDepth, fakeFloor, SubsectorTranslucentWalls); + } + } + + // Render wall, and update culling info if its an occlusion blocker + if (RenderPolyWall::RenderLine(WorldToClip, line, frontsector, subsectorDepth, SubsectorTranslucentWalls)) + { + if (hasSegmentRange) + Cull.MarkSegmentCulled(sx1, sx2); + } +} + +void RenderPolyPortal::RenderTranslucent() +{ + for (auto it = Portals.rbegin(); it != Portals.rend(); ++it) + (*it)->RenderTranslucent(); + + for (auto it = TranslucentObjects.rbegin(); it != TranslucentObjects.rend(); ++it) + { + auto &obj = *it; + if (obj.particle) + { + RenderPolyParticle spr; + spr.Render(WorldToClip, obj.particle, obj.sub, obj.subsectorDepth); + } + else if (!obj.thing) + { + obj.wall.Render(WorldToClip); + } + else if ((obj.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) + { + RenderPolyWallSprite wallspr; + wallspr.Render(WorldToClip, obj.thing, obj.sub, obj.subsectorDepth); + } + else + { + RenderPolySprite spr; + spr.Render(WorldToClip, obj.thing, obj.sub, obj.subsectorDepth); + } + } +} diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h new file mode 100644 index 0000000000..364f2dcfb4 --- /dev/null +++ b/src/r_poly_portal.h @@ -0,0 +1,105 @@ +/* +** Polygon Doom software renderer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include +#include +#include +#include +#include "doomdata.h" +#include "r_utility.h" +#include "r_main.h" +#include "r_poly_triangle.h" +#include "r_poly_intersection.h" +#include "r_poly_wall.h" +#include "r_poly_sprite.h" +#include "r_poly_wallsprite.h" +#include "r_poly_playersprite.h" +#include "r_poly_particle.h" +#include "r_poly_plane.h" +#include "r_poly_cull.h" + +// Used for sorting things by distance to the camera +class PolySortedSprite +{ +public: + PolySortedSprite(AActor *thing, double distanceSquared) : Thing(thing), DistanceSquared(distanceSquared) { } + bool operator<(const PolySortedSprite &other) const { return DistanceSquared < other.DistanceSquared; } + + AActor *Thing; + double DistanceSquared; +}; + +class PolyTranslucentObject +{ +public: + PolyTranslucentObject(particle_t *particle, subsector_t *sub, uint32_t subsectorDepth) : particle(particle), sub(sub), subsectorDepth(subsectorDepth) { } + PolyTranslucentObject(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) : thing(thing), sub(sub), subsectorDepth(subsectorDepth) { } + PolyTranslucentObject(RenderPolyWall wall) : wall(wall) { } + + particle_t *particle = nullptr; + AActor *thing = nullptr; + subsector_t *sub = nullptr; + uint32_t subsectorDepth = 0; + + RenderPolyWall wall; +}; + +class SpriteRange +{ +public: + SpriteRange() = default; + SpriteRange(int start, int count) : Start(start), Count(count) { } + int Start = -1; + int Count = 0; +}; + +// Renders everything from a specific viewpoint +class RenderPolyPortal +{ +public: + void SetViewpoint(const TriMatrix &worldToClip, uint32_t stencilValue); + void Render(); + void RenderTranslucent(); + + static const uint32_t SkySubsectorDepth = 0x7fffffff; + +private: + void ClearBuffers(); + void RenderSectors(); + void RenderSubsector(subsector_t *sub); + void RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); + + SpriteRange GetSpritesForSector(sector_t *sector); + + TriMatrix WorldToClip; + uint32_t StencilValue = 0; + PolyCull Cull; + uint32_t NextSubsectorDepth = 0; + std::vector SectorSpriteRanges; + std::vector SortedSprites; + std::vector TranslucentObjects; + std::vector SubsectorTranslucentWalls; + + std::vector> Portals; +}; diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index a8e7b23d6e..6fc8170417 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -25,7 +25,7 @@ #include "sbar.h" #include "r_data/r_translate.h" #include "r_poly_sky.h" -#include "r_poly.h" +#include "r_poly_portal.h" #include "r_sky.h" // for skyflatnum PolySkyDome::PolySkyDome() @@ -53,7 +53,7 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) TriUniforms uniforms; uniforms.light = 256; uniforms.flags = 0; - uniforms.subsectorDepth = RenderPolyScene::SkySubsectorDepth; + uniforms.subsectorDepth = RenderPolyPortal::SkySubsectorDepth; int rc = mRows + 1; diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 367d3df409..9a389777ad 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -523,3 +523,27 @@ TriVertex TriMatrix::operator*(TriVertex v) const v.w = vw; return v; } + +///////////////////////////////////////////////////////////////////////////// + +namespace +{ + int NextBufferVertex = 0; +} + +TriVertex *PolyVertexBuffer::GetVertices(int count) +{ + enum { VertexBufferSize = 256 * 1024 }; + static TriVertex Vertex[VertexBufferSize]; + + if (NextBufferVertex + count > VertexBufferSize) + return nullptr; + TriVertex *v = Vertex + NextBufferVertex; + NextBufferVertex += count; + return v; +} + +void PolyVertexBuffer::Clear() +{ + NextBufferVertex = 0; +} diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 7d58ed5ed2..8fef11e99a 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -220,3 +220,10 @@ private: TriDrawVariant variant; TriBlendMode blendmode; }; + +class PolyVertexBuffer +{ +public: + static TriVertex *GetVertices(int count); + static void Clear(); +}; From 33958ca5b2fbd5ed1e721d5f57519fc957e3ad07 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 24 Nov 2016 07:37:15 +0100 Subject: [PATCH 375/912] Add vid_vsync support to Linux target --- src/posix/sdl/sdlglvideo.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/posix/sdl/sdlglvideo.cpp b/src/posix/sdl/sdlglvideo.cpp index d8c00f2363..173a84d801 100644 --- a/src/posix/sdl/sdlglvideo.cpp +++ b/src/posix/sdl/sdlglvideo.cpp @@ -442,6 +442,16 @@ void SDLGLFB::SetVSync( bool vsync ) #if defined (__APPLE__) const GLint value = vsync ? 1 : 0; CGLSetParameter( CGLGetCurrentContext(), kCGLCPSwapInterval, &value ); +#else + if (vsync) + { + if (SDL_GL_SetSwapInterval(-1) == -1) + SDL_GL_SetSwapInterval(1); + } + else + { + SDL_GL_SetSwapInterval(0); + } #endif } From cff72fb07251851928253da9943b69130edb7015 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 24 Nov 2016 08:23:50 +0100 Subject: [PATCH 376/912] Add stencil value to decal, particle, plane, port, sky, sprite, wall and wallsprite classes --- src/r_poly_decal.cpp | 10 +++++----- src/r_poly_decal.h | 4 ++-- src/r_poly_particle.cpp | 6 +++--- src/r_poly_particle.h | 2 +- src/r_poly_plane.cpp | 22 +++++++++++----------- src/r_poly_plane.h | 6 +++--- src/r_poly_portal.cpp | 12 ++++++------ src/r_poly_sky.cpp | 2 +- src/r_poly_sprite.cpp | 6 +++--- src/r_poly_sprite.h | 2 +- src/r_poly_wall.cpp | 10 +++++----- src/r_poly_wall.h | 5 +++-- src/r_poly_wallsprite.cpp | 6 +++--- src/r_poly_wallsprite.h | 2 +- 14 files changed, 48 insertions(+), 47 deletions(-) diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index 206eb9862b..ded64a0957 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -29,7 +29,7 @@ #include "r_poly.h" #include "a_sharedglobal.h" -void RenderPolyDecal::RenderWallDecals(const TriMatrix &worldToClip, const seg_t *line, uint32_t subsectorDepth) +void RenderPolyDecal::RenderWallDecals(const TriMatrix &worldToClip, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue) { if (line->linedef == nullptr && line->sidedef == nullptr) return; @@ -37,11 +37,11 @@ void RenderPolyDecal::RenderWallDecals(const TriMatrix &worldToClip, const seg_t for (DBaseDecal *decal = line->sidedef->AttachedDecals; decal != nullptr; decal = decal->WallNext) { RenderPolyDecal render; - render.Render(worldToClip, decal, line, subsectorDepth); + render.Render(worldToClip, decal, line, subsectorDepth, stencilValue); } } -void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, const seg_t *line, uint32_t subsectorDepth) +void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue) { if (decal->RenderFlags & RF_INVISIBLE || !viewactive || !decal->PicNum.isValid()) return; @@ -163,8 +163,8 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co args.vcount = 4; args.mode = TriangleDrawMode::Fan; args.ccw = true; - args.stenciltestvalue = 0; - args.stencilwritevalue = 1; + args.stenciltestvalue = stencilValue; + args.stencilwritevalue = stencilValue; //mode = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); } diff --git a/src/r_poly_decal.h b/src/r_poly_decal.h index 61087a773e..c836a0cd73 100644 --- a/src/r_poly_decal.h +++ b/src/r_poly_decal.h @@ -27,8 +27,8 @@ class RenderPolyDecal { public: - static void RenderWallDecals(const TriMatrix &worldToClip, const seg_t *line, uint32_t subsectorDepth); + static void RenderWallDecals(const TriMatrix &worldToClip, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue); private: - void Render(const TriMatrix &worldToClip, DBaseDecal *decal, const seg_t *line, uint32_t subsectorDepth); + void Render(const TriMatrix &worldToClip, DBaseDecal *decal, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue); }; diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp index 9b1dbaf1cc..e69803e1e2 100644 --- a/src/r_poly_particle.cpp +++ b/src/r_poly_particle.cpp @@ -28,7 +28,7 @@ #include "r_poly_particle.h" #include "r_poly.h" -void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *particle, subsector_t *sub, uint32_t subsectorDepth) +void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *particle, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) { DVector3 pos = particle->Pos; double psize = particle->size / 8.0; @@ -102,8 +102,8 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *partic args.vcount = 4; args.mode = TriangleDrawMode::Fan; args.ccw = true; - args.stenciltestvalue = 0; - args.stencilwritevalue = 1; + args.stenciltestvalue = stencilValue; + args.stencilwritevalue = stencilValue; args.SetColormap(sub->sector->ColorMap); PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::AlphaBlend); } diff --git a/src/r_poly_particle.h b/src/r_poly_particle.h index f4218a310e..3fbb2d2abd 100644 --- a/src/r_poly_particle.h +++ b/src/r_poly_particle.h @@ -28,5 +28,5 @@ class RenderPolyParticle { public: - void Render(const TriMatrix &worldToClip, particle_t *particle, subsector_t *sub, uint32_t subsectorDepth); + void Render(const TriMatrix &worldToClip, particle_t *particle, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue); }; diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index e68487e172..68a56c69bb 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -32,7 +32,7 @@ EXTERN_CVAR(Int, r_3dfloors) -void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, double skyCeilingHeight, double skyFloorHeight) +void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight) { RenderPolyPlane plane; @@ -58,7 +58,7 @@ void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, subsector_t *su double fakeHeight = fakeFloor->top.plane->ZatPoint(frontsector->centerspot); if (fakeHeight < ViewPos.Z && fakeHeight > frontsector->floorplane.ZatPoint(frontsector->centerspot)) { - plane.Render3DFloor(worldToClip, sub, subsectorDepth, false, fakeFloor); + plane.Render3DFloor(worldToClip, sub, subsectorDepth, stencilValue, false, fakeFloor); } } @@ -79,16 +79,16 @@ void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, subsector_t *su double fakeHeight = fakeFloor->bottom.plane->ZatPoint(frontsector->centerspot); if (fakeHeight > ViewPos.Z && fakeHeight < frontsector->ceilingplane.ZatPoint(frontsector->centerspot)) { - plane.Render3DFloor(worldToClip, sub, subsectorDepth, true, fakeFloor); + plane.Render3DFloor(worldToClip, sub, subsectorDepth, stencilValue, true, fakeFloor); } } } - plane.Render(worldToClip, sub, subsectorDepth, true, skyCeilingHeight); - plane.Render(worldToClip, sub, subsectorDepth, false, skyFloorHeight); + plane.Render(worldToClip, sub, subsectorDepth, stencilValue, true, skyCeilingHeight); + plane.Render(worldToClip, sub, subsectorDepth, stencilValue, false, skyFloorHeight); } -void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, F3DFloor *fakeFloor) +void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, F3DFloor *fakeFloor) { FTextureID picnum = ceiling ? *fakeFloor->bottom.texture : *fakeFloor->top.texture; FTexture *tex = TexMan(picnum); @@ -138,15 +138,15 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *s args.vcount = sub->numlines; args.mode = TriangleDrawMode::Fan; args.ccw = true; - args.stenciltestvalue = 0; - args.stencilwritevalue = 1; + args.stenciltestvalue = stencilValue; + args.stencilwritevalue = stencilValue + 1; args.SetTexture(tex); args.SetColormap(sub->sector->ColorMap); PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); } -void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, double skyHeight) +void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight) { sector_t *fakesector = sub->sector->heightsec; if (fakesector && (fakesector == sub->sector || (fakesector->MoreFlags & SECF_IGNOREHEIGHTSEC) == SECF_IGNOREHEIGHTSEC)) @@ -231,8 +231,8 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin args.vcount = sub->numlines; args.mode = TriangleDrawMode::Fan; args.ccw = ccw; - args.stenciltestvalue = 0; - args.stencilwritevalue = 1; + args.stenciltestvalue = stencilValue; + args.stencilwritevalue = stencilValue + 1; args.SetColormap(frontsector->ColorMap); if (!isSky) diff --git a/src/r_poly_plane.h b/src/r_poly_plane.h index fa786ead95..d36bb70dcb 100644 --- a/src/r_poly_plane.h +++ b/src/r_poly_plane.h @@ -27,10 +27,10 @@ class RenderPolyPlane { public: - static void RenderPlanes(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, double skyCeilingHeight, double skyFloorHeight); + static void RenderPlanes(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight); private: - void Render3DFloor(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, F3DFloor *fakefloor); - void Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, bool ceiling, double skyHeight); + void Render3DFloor(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, F3DFloor *fakefloor); + void Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight); TriVertex PlaneVertex(vertex_t *v1, double height); }; diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 79468032ba..6b37d6424b 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -80,7 +80,7 @@ void RenderPolyPortal::RenderSubsector(subsector_t *sub) if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) { - RenderPolyPlane::RenderPlanes(WorldToClip, sub, subsectorDepth, Cull.MaxCeilingHeight, Cull.MinFloorHeight); + RenderPolyPlane::RenderPlanes(WorldToClip, sub, subsectorDepth, StencilValue, Cull.MaxCeilingHeight, Cull.MinFloorHeight); FSectorPortal *ceilingPortal = frontsector->ValidatePortal(sector_t::ceiling); FSectorPortal *floorPortal = frontsector->ValidatePortal(sector_t::floor); @@ -167,12 +167,12 @@ void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *front if (!(fakeFloor->flags & FF_EXISTS)) continue; if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; if (!fakeFloor->model) continue; - RenderPolyWall::Render3DFloorLine(WorldToClip, line, frontsector, subsectorDepth, fakeFloor, SubsectorTranslucentWalls); + RenderPolyWall::Render3DFloorLine(WorldToClip, line, frontsector, subsectorDepth, StencilValue, fakeFloor, SubsectorTranslucentWalls); } } // Render wall, and update culling info if its an occlusion blocker - if (RenderPolyWall::RenderLine(WorldToClip, line, frontsector, subsectorDepth, SubsectorTranslucentWalls)) + if (RenderPolyWall::RenderLine(WorldToClip, line, frontsector, subsectorDepth, StencilValue, SubsectorTranslucentWalls)) { if (hasSegmentRange) Cull.MarkSegmentCulled(sx1, sx2); @@ -190,7 +190,7 @@ void RenderPolyPortal::RenderTranslucent() if (obj.particle) { RenderPolyParticle spr; - spr.Render(WorldToClip, obj.particle, obj.sub, obj.subsectorDepth); + spr.Render(WorldToClip, obj.particle, obj.sub, obj.subsectorDepth, StencilValue + 1); } else if (!obj.thing) { @@ -199,12 +199,12 @@ void RenderPolyPortal::RenderTranslucent() else if ((obj.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) { RenderPolyWallSprite wallspr; - wallspr.Render(WorldToClip, obj.thing, obj.sub, obj.subsectorDepth); + wallspr.Render(WorldToClip, obj.thing, obj.sub, obj.subsectorDepth, StencilValue + 1); } else { RenderPolySprite spr; - spr.Render(WorldToClip, obj.thing, obj.sub, obj.subsectorDepth); + spr.Render(WorldToClip, obj.thing, obj.sub, obj.subsectorDepth, StencilValue + 1); } } } diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index 6fc8170417..ad8d4b861e 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -61,7 +61,7 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) args.uniforms = uniforms; args.objectToClip = &objectToClip; args.stenciltestvalue = 255; - args.stencilwritevalue = 1; + args.stencilwritevalue = 255; args.SetTexture(frontskytex); args.SetColormap(&NormalLight); diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 453ab2d00b..0bacd6eb2b 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -31,7 +31,7 @@ EXTERN_CVAR(Float, transsouls) EXTERN_CVAR(Int, r_drawfuzz) -void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth) +void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) { if (IsThingCulled(thing)) return; @@ -134,8 +134,8 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse args.vcount = 4; args.mode = TriangleDrawMode::Fan; args.ccw = true; - args.stenciltestvalue = 0; - args.stencilwritevalue = 1; + args.stenciltestvalue = stencilValue; + args.stencilwritevalue = stencilValue; args.SetTexture(tex, thing->Translation); args.SetColormap(sub->sector->ColorMap); diff --git a/src/r_poly_sprite.h b/src/r_poly_sprite.h index 085d9b5708..e23d0d09da 100644 --- a/src/r_poly_sprite.h +++ b/src/r_poly_sprite.h @@ -27,7 +27,7 @@ class RenderPolySprite { public: - void Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth); + void Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue); static bool IsThingCulled(AActor *thing); static FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index a4dd62e4b2..f49908965f 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -30,7 +30,7 @@ #include "r_poly.h" #include "r_sky.h" // for skyflatnum -bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, std::vector &translucentWallsOutput) +bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput) { RenderPolyWall wall; wall.LineSeg = line; @@ -120,7 +120,7 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, secto return false; } -void RenderPolyWall::Render3DFloorLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, F3DFloor *fakeFloor, std::vector &translucentWallsOutput) +void RenderPolyWall::Render3DFloorLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, F3DFloor *fakeFloor, std::vector &translucentWallsOutput) { double frontceilz1 = fakeFloor->top.plane->ZatPoint(line->v1); double frontfloorz1 = fakeFloor->bottom.plane->ZatPoint(line->v1); @@ -211,8 +211,8 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) args.vcount = 4; args.mode = TriangleDrawMode::Fan; args.ccw = true; - args.stenciltestvalue = 0; - args.stencilwritevalue = 1; + args.stenciltestvalue = StencilValue; + args.stencilwritevalue = StencilValue + 1; args.SetTexture(tex); args.SetColormap(Line->frontsector->ColorMap); @@ -231,7 +231,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); } - RenderPolyDecal::RenderWallDecals(worldToClip, LineSeg, SubsectorDepth); + RenderPolyDecal::RenderWallDecals(worldToClip, LineSeg, SubsectorDepth, StencilValue); } void RenderPolyWall::ClampHeight(TriVertex &v1, TriVertex &v2) diff --git a/src/r_poly_wall.h b/src/r_poly_wall.h index d897319114..3d2f89f841 100644 --- a/src/r_poly_wall.h +++ b/src/r_poly_wall.h @@ -29,8 +29,8 @@ class PolyTranslucentObject; class RenderPolyWall { public: - static bool RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, std::vector &translucentWallsOutput); - static void Render3DFloorLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, F3DFloor *fakeFloor, std::vector &translucentWallsOutput); + static bool RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput); + static void Render3DFloorLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, F3DFloor *fakeFloor, std::vector &translucentWallsOutput); void SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2); void Render(const TriMatrix &worldToClip); @@ -52,6 +52,7 @@ public: FSWColormap *Colormap = nullptr; bool Masked = false; uint32_t SubsectorDepth = 0; + uint32_t StencilValue = 0; private: void ClampHeight(TriVertex &v1, TriVertex &v2); diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp index 6ed009c333..553235957e 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/r_poly_wallsprite.cpp @@ -28,7 +28,7 @@ #include "r_poly_wallsprite.h" #include "r_poly.h" -void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth) +void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) { if (RenderPolySprite::IsThingCulled(thing)) return; @@ -118,8 +118,8 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, s args.vcount = 4; args.mode = TriangleDrawMode::Fan; args.ccw = true; - args.stenciltestvalue = 0; - args.stencilwritevalue = 1; + args.stenciltestvalue = stencilValue; + args.stencilwritevalue = stencilValue; args.SetTexture(tex); args.SetColormap(sub->sector->ColorMap); PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); diff --git a/src/r_poly_wallsprite.h b/src/r_poly_wallsprite.h index 66a92b0333..2942d6994a 100644 --- a/src/r_poly_wallsprite.h +++ b/src/r_poly_wallsprite.h @@ -27,5 +27,5 @@ class RenderPolyWallSprite { public: - void Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth); + void Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue); }; From 558a4bcdca5adf8edc0d00b51b7fca6caa1ef652 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 24 Nov 2016 23:08:36 +0100 Subject: [PATCH 377/912] Partial sky portal support --- src/r_poly_plane.cpp | 38 +++++++++++--- src/r_poly_plane.h | 6 ++- src/r_poly_portal.cpp | 117 +++++++++++++++++++++++++++++++++++++++--- src/r_poly_portal.h | 36 ++++++++++++- src/r_poly_wall.cpp | 2 + 5 files changed, 181 insertions(+), 18 deletions(-) diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 68a56c69bb..57db62600d 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -32,7 +32,7 @@ EXTERN_CVAR(Int, r_3dfloors) -void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight) +void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight, std::vector> §orPortals) { RenderPolyPlane plane; @@ -84,8 +84,8 @@ void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, subsector_t *su } } - plane.Render(worldToClip, sub, subsectorDepth, stencilValue, true, skyCeilingHeight); - plane.Render(worldToClip, sub, subsectorDepth, stencilValue, false, skyFloorHeight); + plane.Render(worldToClip, sub, subsectorDepth, stencilValue, true, skyCeilingHeight, sectorPortals); + plane.Render(worldToClip, sub, subsectorDepth, stencilValue, false, skyFloorHeight, sectorPortals); } void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, F3DFloor *fakeFloor) @@ -146,8 +146,14 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *s PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); } -void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight) +void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals) { + FSectorPortal *portal = sub->sector->ValidatePortal(ceiling ? sector_t::ceiling : sector_t::floor); + if (portal && sectorPortals.empty()) + { + sectorPortals.push_back(std::make_unique(portal, ceiling)); + } + sector_t *fakesector = sub->sector->heightsec; if (fakesector && (fakesector == sub->sector || (fakesector->MoreFlags & SECF_IGNOREHEIGHTSEC) == SECF_IGNOREHEIGHTSEC)) fakesector = nullptr; @@ -237,13 +243,29 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin if (!isSky) { - args.SetTexture(tex); - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + if (!portal) + { + args.SetTexture(tex); + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + } + else + { + args.stencilwritevalue = 252; + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + } } else { - args.stencilwritevalue = 255; + if (portal) + { + args.stencilwritevalue = 252; + } + else + { + args.stencilwritevalue = 255; + } + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); for (uint32_t i = 0; i < sub->numlines; i++) diff --git a/src/r_poly_plane.h b/src/r_poly_plane.h index d36bb70dcb..6e418bf502 100644 --- a/src/r_poly_plane.h +++ b/src/r_poly_plane.h @@ -24,13 +24,15 @@ #include "r_poly_triangle.h" +class PolyDrawSectorPortal; + class RenderPolyPlane { public: - static void RenderPlanes(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight); + static void RenderPlanes(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight, std::vector> §orPortals); private: void Render3DFloor(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, F3DFloor *fakefloor); - void Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight); + void Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals); TriVertex PlaneVertex(vertex_t *v1, double height); }; diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 6b37d6424b..aad43986bf 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -29,9 +29,18 @@ #include "gl/data/gl_data.h" CVAR(Bool, r_debug_cull, 0, 0) +EXTERN_CVAR(Int, r_portal_recursions) ///////////////////////////////////////////////////////////////////////////// +RenderPolyPortal::RenderPolyPortal() +{ +} + +RenderPolyPortal::~RenderPolyPortal() +{ +} + void RenderPolyPortal::SetViewpoint(const TriMatrix &worldToClip, uint32_t stencilValue) { WorldToClip = worldToClip; @@ -43,7 +52,9 @@ void RenderPolyPortal::Render() ClearBuffers(); Cull.CullScene(WorldToClip); RenderSectors(); - for (auto &portal : Portals) + for (auto &portal : SectorPortals) + portal->Render(); + for (auto &portal : LinePortals) portal->Render(); } @@ -53,7 +64,8 @@ void RenderPolyPortal::ClearBuffers() SectorSpriteRanges.resize(numsectors); SortedSprites.clear(); TranslucentObjects.clear(); - Portals.clear(); + SectorPortals.clear(); + LinePortals.clear(); NextSubsectorDepth = 0; } @@ -80,10 +92,7 @@ void RenderPolyPortal::RenderSubsector(subsector_t *sub) if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) { - RenderPolyPlane::RenderPlanes(WorldToClip, sub, subsectorDepth, StencilValue, Cull.MaxCeilingHeight, Cull.MinFloorHeight); - - FSectorPortal *ceilingPortal = frontsector->ValidatePortal(sector_t::ceiling); - FSectorPortal *floorPortal = frontsector->ValidatePortal(sector_t::floor); + RenderPolyPlane::RenderPlanes(WorldToClip, sub, subsectorDepth, StencilValue, Cull.MaxCeilingHeight, Cull.MinFloorHeight, SectorPortals); } for (uint32_t i = 0; i < sub->numlines; i++) @@ -181,7 +190,10 @@ void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *front void RenderPolyPortal::RenderTranslucent() { - for (auto it = Portals.rbegin(); it != Portals.rend(); ++it) + for (auto it = SectorPortals.rbegin(); it != SectorPortals.rend(); ++it) + (*it)->RenderTranslucent(); + + for (auto it = LinePortals.rbegin(); it != LinePortals.rend(); ++it) (*it)->RenderTranslucent(); for (auto it = TranslucentObjects.rbegin(); it != TranslucentObjects.rend(); ++it) @@ -208,3 +220,94 @@ void RenderPolyPortal::RenderTranslucent() } } } + +///////////////////////////////////////////////////////////////////////////// + +PolyDrawSectorPortal::PolyDrawSectorPortal(FSectorPortal *portal, bool ceiling) : Portal(portal), Ceiling(ceiling) +{ +} + +void PolyDrawSectorPortal::Render() +{ + if (Portal->mType != PORTS_SKYVIEWPOINT) + return; + + static int recursion = 0; + if (recursion >= 1/*r_portal_recursions*/) + return; + recursion++; + + int savedextralight = extralight; + DVector3 savedpos = ViewPos; + DAngle savedangle = ViewAngle; + double savedvisibility = R_GetVisibility(); + AActor *savedcamera = camera; + sector_t *savedsector = viewsector; + + // Don't let gun flashes brighten the sky box + ASkyViewpoint *sky = barrier_cast(Portal->mSkybox); + extralight = 0; + R_SetVisibility(sky->args[0] * 0.25f); + ViewPos = sky->InterpolatedPosition(r_TicFracF); + ViewAngle = savedangle + (sky->PrevAngles.Yaw + deltaangle(sky->PrevAngles.Yaw, sky->Angles.Yaw) * r_TicFracF); + + camera = nullptr; + viewsector = Portal->mDestination; + R_SetViewAngle(); + + // To do: get this information from RenderPolyScene instead of duplicating the code.. + double radPitch = ViewPitch.Normalized180().Radians(); + double angx = cos(radPitch); + double angy = sin(radPitch) * glset.pixelstretch; + double alen = sqrt(angx*angx + angy*angy); + float adjustedPitch = (float)asin(angy / alen); + float adjustedViewAngle = (float)(ViewAngle - 90).Radians(); + float ratio = WidescreenRatio; + float fovratio = (WidescreenRatio >= 1.3f) ? 1.333333f : ratio; + float fovy = (float)(2 * DAngle::ToDegrees(atan(tan(FieldOfView.Radians() / 2) / fovratio)).Degrees); + TriMatrix worldToView = + TriMatrix::rotate(adjustedPitch, 1.0f, 0.0f, 0.0f) * + TriMatrix::rotate(adjustedViewAngle, 0.0f, -1.0f, 0.0f) * + TriMatrix::scale(1.0f, glset.pixelstretch, 1.0f) * + TriMatrix::swapYZ() * + TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); + TriMatrix worldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; + + RenderPortal.SetViewpoint(worldToClip, 252); + RenderPortal.Render(); + + camera = savedcamera; + viewsector = savedsector; + ViewPos = savedpos; + R_SetVisibility(savedvisibility); + extralight = savedextralight; + ViewAngle = savedangle; + R_SetViewAngle(); + + recursion--; +} + +void PolyDrawSectorPortal::RenderTranslucent() +{ + /*if (Portal->mType != PORTS_SKYVIEWPOINT) + return; + + RenderPortal.RenderTranslucent();*/ +} + +///////////////////////////////////////////////////////////////////////////// + +PolyDrawLinePortal::PolyDrawLinePortal(line_t *src, line_t *dest, bool mirror) : Src(src), Dest(dest) +{ + // To do: do what R_EnterPortal and PortalDrawseg does +} + +void PolyDrawLinePortal::Render() +{ + RenderPortal.Render(); +} + +void PolyDrawLinePortal::RenderTranslucent() +{ + RenderPortal.RenderTranslucent(); +} diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index 364f2dcfb4..34e4666f78 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -74,10 +74,15 @@ public: int Count = 0; }; +class PolyDrawSectorPortal; +class PolyDrawLinePortal; + // Renders everything from a specific viewpoint class RenderPolyPortal { public: + RenderPolyPortal(); + ~RenderPolyPortal(); void SetViewpoint(const TriMatrix &worldToClip, uint32_t stencilValue); void Render(); void RenderTranslucent(); @@ -101,5 +106,34 @@ private: std::vector TranslucentObjects; std::vector SubsectorTranslucentWalls; - std::vector> Portals; + std::vector> SectorPortals; + std::vector> LinePortals; +}; + +class PolyDrawSectorPortal +{ +public: + PolyDrawSectorPortal(FSectorPortal *portal, bool ceiling); + + void Render(); + void RenderTranslucent(); + +private: + FSectorPortal *Portal; + bool Ceiling; + RenderPolyPortal RenderPortal; +}; + +class PolyDrawLinePortal +{ +public: + PolyDrawLinePortal(line_t *src, line_t *dest, bool mirror); + + void Render(); + void RenderTranslucent(); + +private: + line_t *Src; + line_t *Dest; + RenderPolyPortal RenderPortal; }; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index f49908965f..a417784d35 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -39,6 +39,7 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, secto wall.Colormap = frontsector->ColorMap; wall.Masked = false; wall.SubsectorDepth = subsectorDepth; + wall.StencilValue = stencilValue; double frontceilz1 = frontsector->ceilingplane.ZatPoint(line->v1); double frontfloorz1 = frontsector->floorplane.ZatPoint(line->v1); @@ -134,6 +135,7 @@ void RenderPolyWall::Render3DFloorLine(const TriMatrix &worldToClip, seg_t *line wall.Colormap = frontsector->ColorMap; wall.Masked = false; wall.SubsectorDepth = subsectorDepth; + wall.StencilValue = stencilValue; wall.SetCoords(line->v1->fPos(), line->v2->fPos(), frontceilz1, frontfloorz1, frontceilz2, frontfloorz2); wall.TopZ = frontceilz1; wall.BottomZ = frontfloorz1; From 1def7b3eaea3dc34617f3fb8659ba8aeb21788dc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 25 Nov 2016 01:08:25 +0100 Subject: [PATCH 378/912] More portal handling stuff --- src/r_poly_plane.cpp | 24 ++++++++-- src/r_poly_portal.cpp | 101 ++++++++++++++++++++++++++++++------------ src/r_poly_portal.h | 24 +++++++++- src/r_poly_wall.cpp | 2 +- 4 files changed, 118 insertions(+), 33 deletions(-) diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 57db62600d..0c19d9832a 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -149,11 +149,24 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *s void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals) { FSectorPortal *portal = sub->sector->ValidatePortal(ceiling ? sector_t::ceiling : sector_t::floor); - if (portal && sectorPortals.empty()) + PolyDrawSectorPortal *polyportal = nullptr; + if (portal) { - sectorPortals.push_back(std::make_unique(portal, ceiling)); + for (auto &p : sectorPortals) + { + if (p->Portal == portal) // To do: what other criterias do we need to check for? + { + polyportal = p.get(); + break; + } + } + if (!portal) + { + sectorPortals.push_back(std::make_unique(portal, ceiling)); + polyportal = sectorPortals.back().get(); + } } - + sector_t *fakesector = sub->sector->heightsec; if (fakesector && (fakesector == sub->sector || (fakesector->MoreFlags & SECF_IGNOREHEIGHTSEC) == SECF_IGNOREHEIGHTSEC)) fakesector = nullptr; @@ -253,6 +266,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin { args.stencilwritevalue = 252; PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw }); } } else @@ -260,6 +274,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin if (portal) { args.stencilwritevalue = 252; + polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw }); } else { @@ -335,6 +350,9 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin args.vinput = wallvert; args.vcount = 4; PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + + if (portal) + polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw }); } } } diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index aad43986bf..e500e24d1a 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -128,7 +128,7 @@ void RenderPolyPortal::RenderSubsector(subsector_t *sub) SpriteRange RenderPolyPortal::GetSpritesForSector(sector_t *sector) { - if (SectorSpriteRanges.size() < sector->sectornum || sector->sectornum < 0) + if ((int)SectorSpriteRanges.size() < sector->sectornum || sector->sectornum < 0) return SpriteRange(); auto &range = SectorSpriteRanges[sector->sectornum]; @@ -191,10 +191,38 @@ void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *front void RenderPolyPortal::RenderTranslucent() { for (auto it = SectorPortals.rbegin(); it != SectorPortals.rend(); ++it) + { (*it)->RenderTranslucent(); + + PolyDrawArgs args; + args.objectToClip = &WorldToClip; + args.stenciltestvalue = 253; + args.stencilwritevalue = 1; + for (const auto &verts : (*it)->Shape) + { + args.vinput = verts.Vertices; + args.vcount = verts.Count; + args.ccw = verts.Ccw; + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + } + } for (auto it = LinePortals.rbegin(); it != LinePortals.rend(); ++it) + { (*it)->RenderTranslucent(); + + PolyDrawArgs args; + args.objectToClip = &WorldToClip; + args.stenciltestvalue = 253; + args.stencilwritevalue = 1; + for (const auto &verts : (*it)->Shape) + { + args.vinput = verts.Vertices; + args.vcount = verts.Count; + args.ccw = verts.Ccw; + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + } + } for (auto it = TranslucentObjects.rbegin(); it != TranslucentObjects.rend(); ++it) { @@ -237,23 +265,7 @@ void PolyDrawSectorPortal::Render() return; recursion++; - int savedextralight = extralight; - DVector3 savedpos = ViewPos; - DAngle savedangle = ViewAngle; - double savedvisibility = R_GetVisibility(); - AActor *savedcamera = camera; - sector_t *savedsector = viewsector; - - // Don't let gun flashes brighten the sky box - ASkyViewpoint *sky = barrier_cast(Portal->mSkybox); - extralight = 0; - R_SetVisibility(sky->args[0] * 0.25f); - ViewPos = sky->InterpolatedPosition(r_TicFracF); - ViewAngle = savedangle + (sky->PrevAngles.Yaw + deltaangle(sky->PrevAngles.Yaw, sky->Angles.Yaw) * r_TicFracF); - - camera = nullptr; - viewsector = Portal->mDestination; - R_SetViewAngle(); + SaveGlobals(); // To do: get this information from RenderPolyScene instead of duplicating the code.. double radPitch = ViewPitch.Normalized180().Radians(); @@ -275,7 +287,50 @@ void PolyDrawSectorPortal::Render() RenderPortal.SetViewpoint(worldToClip, 252); RenderPortal.Render(); + + RestoreGlobals(); + + recursion--; +} +void PolyDrawSectorPortal::RenderTranslucent() +{ + if (Portal->mType != PORTS_SKYVIEWPOINT) + return; + + static int recursion = 0; + if (recursion >= 1/*r_portal_recursions*/) + return; + recursion++; + + RenderPortal.RenderTranslucent(); + + recursion--; +} + +void PolyDrawSectorPortal::SaveGlobals() +{ + int savedextralight = extralight; + DVector3 savedpos = ViewPos; + DAngle savedangle = ViewAngle; + double savedvisibility = R_GetVisibility(); + AActor *savedcamera = camera; + sector_t *savedsector = viewsector; + + // Don't let gun flashes brighten the sky box + ASkyViewpoint *sky = barrier_cast(Portal->mSkybox); + extralight = 0; + R_SetVisibility(sky->args[0] * 0.25f); + ViewPos = sky->InterpolatedPosition(r_TicFracF); + ViewAngle = savedangle + (sky->PrevAngles.Yaw + deltaangle(sky->PrevAngles.Yaw, sky->Angles.Yaw) * r_TicFracF); + + camera = nullptr; + viewsector = Portal->mDestination; + R_SetViewAngle(); +} + +void PolyDrawSectorPortal::RestoreGlobals() +{ camera = savedcamera; viewsector = savedsector; ViewPos = savedpos; @@ -283,16 +338,6 @@ void PolyDrawSectorPortal::Render() extralight = savedextralight; ViewAngle = savedangle; R_SetViewAngle(); - - recursion--; -} - -void PolyDrawSectorPortal::RenderTranslucent() -{ - /*if (Portal->mType != PORTS_SKYVIEWPOINT) - return; - - RenderPortal.RenderTranslucent();*/ } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index 34e4666f78..6042d5938d 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -110,6 +110,14 @@ private: std::vector> LinePortals; }; +struct PolyPortalVertexRange +{ + PolyPortalVertexRange(const TriVertex *vertices, int count, bool ccw) : Vertices(vertices), Count(count), Ccw(ccw) { } + const TriVertex *Vertices; + int Count; + bool Ccw; +}; + class PolyDrawSectorPortal { public: @@ -117,11 +125,23 @@ public: void Render(); void RenderTranslucent(); + + FSectorPortal *Portal; + std::vector Shape; private: - FSectorPortal *Portal; + void SaveGlobals(); + void RestoreGlobals(); + bool Ceiling; RenderPolyPortal RenderPortal; + + int savedextralight; + DVector3 savedpos; + DAngle savedangle; + double savedvisibility; + AActor *savedcamera; + sector_t *savedsector; }; class PolyDrawLinePortal @@ -132,6 +152,8 @@ public: void Render(); void RenderTranslucent(); + std::vector Shape; + private: line_t *Src; line_t *Dest; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index a417784d35..8f3fb05d73 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -312,7 +312,7 @@ void PolyWallTextureCoords::CalcU(FTexture *tex, const seg_t *lineseg, const lin double lineLength = side->TexelLength; double lineStart = 0.0; - bool entireSegment = ((lineseg->v1 == line->v1) && (lineseg->v2 == line->v2) || (lineseg->v2 == line->v1) && (lineseg->v1 == line->v2)); + bool entireSegment = ((lineseg->v1 == line->v1) && (lineseg->v2 == line->v2)) || ((lineseg->v2 == line->v1) && (lineseg->v1 == line->v2)); if (!entireSegment) { lineLength = (lineseg->v2->fPos() - lineseg->v1->fPos()).Length(); From e642ed099a644ed052e3d52abc73133d31204946 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 25 Nov 2016 03:26:15 +0100 Subject: [PATCH 379/912] Fix palette blending --- .../fixedfunction/drawtrianglecodegen.cpp | 41 ++++++++++++++----- src/r_compiler/llvmdrawers.cpp | 2 +- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index d8a436101c..b34f347994 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -671,48 +671,67 @@ SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) break; case TriBlendMode::AlphaBlend: palindex = Sample8(uvoffset); - output = (palindex == SSAInt(0)).select(bg, Shade8(palindex)); + output = Shade8(palindex); + output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::AddSolid: - fg = ToBgra(Shade8(Sample8(uvoffset))); + palindex = Sample8(uvoffset); + fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, destalpha)); + output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::Add: - fg = ToBgra(Shade8(Sample8(uvoffset))); + palindex = Sample8(uvoffset); + fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::Sub: - fg = ToBgra(Shade8(Sample8(uvoffset))); + palindex = Sample8(uvoffset); + fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::RevSub: - fg = ToBgra(Shade8(Sample8(uvoffset))); + palindex = Sample8(uvoffset); + fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::Stencil: output = ToPal8(blend_stencil(ToBgra(Shade8(color)), (Sample8(uvoffset) == SSAInt(0)).select(SSAInt(0), SSAInt(256)), ToBgra(bg), srcalpha, destalpha)); break; case TriBlendMode::Shaded: - output = ToPal8(blend_stencil(ToBgra(Shade8(color)), Sample8(uvoffset), ToBgra(bg), srcalpha, destalpha)); + palindex = Sample8(uvoffset); + output = ToPal8(blend_stencil(ToBgra(Shade8(color)), palindex, ToBgra(bg), srcalpha, destalpha)); break; case TriBlendMode::TranslateCopy: - output = Shade8(TranslateSample8(uvoffset)); + palindex = TranslateSample8(uvoffset); + output = Shade8(palindex); + output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::TranslateAlphaBlend: palindex = TranslateSample8(uvoffset); - output = (palindex == SSAInt(0)).select(bg, Shade8(palindex)); + output = Shade8(palindex); + output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::TranslateAdd: - fg = ToBgra(Shade8(Sample8(uvoffset))); + palindex = TranslateSample8(uvoffset); + fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::TranslateSub: - fg = ToBgra(Shade8(Sample8(uvoffset))); + palindex = TranslateSample8(uvoffset); + fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::TranslateRevSub: - fg = ToBgra(Shade8(Sample8(uvoffset))); + palindex = TranslateSample8(uvoffset); + fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); break; } diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 3abf732c0d..fc44f67bd4 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -127,7 +127,7 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - int version = 6; // Increment this number if the drawer codegen is modified (forces recreation of the module). + int version = 7; // Increment this number if the drawer codegen is modified (forces recreation of the module). std::string targetCPU = mProgram.GetTargetCPU(); bool loaded = mProgram.LoadCachedModule(version, targetCPU); if (!loaded) From b2ad26d2d5868cf9d66de3dc8b093b5e03be982f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 25 Nov 2016 07:44:51 +0100 Subject: [PATCH 380/912] Sealing off portal entrances for the transparency pass --- .../fixedfunction/drawtrianglecodegen.cpp | 40 ++++++++++++++----- src/r_compiler/llvmdrawers.cpp | 4 +- src/r_compiler/llvmdrawers.h | 4 +- src/r_compiler/ssa/ssa_bool.cpp | 5 +++ src/r_compiler/ssa/ssa_bool.h | 2 + src/r_poly_plane.cpp | 8 ++-- src/r_poly_portal.cpp | 20 ++++++---- src/r_poly_portal.h | 3 +- src/r_poly_triangle.cpp | 1 + 9 files changed, 61 insertions(+), 26 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index b34f347994..09d3a618e9 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -273,11 +273,7 @@ void DrawTriangleCodegen::LoopBlockX() SetStencilBlock(x / 8 + y / 8 * stencilPitch); - SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded; - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) - { - covered = covered && StencilIsSingleValue(); - } + SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded && StencilIsSingleValue(); // Accept whole block when totally covered SSAIfBlock branch_covered; @@ -301,7 +297,11 @@ void DrawTriangleCodegen::LoopBlockX() void DrawTriangleCodegen::LoopFullBlock() { SSAIfBlock branch_stenciltest; - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector || variant == TriDrawVariant::StencilClose) + { + branch_stenciltest.if_block(SSABool::compare_uge(StencilGetSingle(), stencilTestValue)); + } + else { branch_stenciltest.if_block(StencilGetSingle() == stencilTestValue); } @@ -310,6 +310,18 @@ void DrawTriangleCodegen::LoopFullBlock() { StencilClear(stencilWriteValue); } + else if (variant == TriDrawVariant::StencilClose) + { + StencilClear(stencilWriteValue); + for (int iy = 0; iy < q; iy++) + { + SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch]; + for (int ix = 0; ix < q; ix += 4) + { + subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth)); + } + } + } else { int pixelsize = truecolor ? 4 : 1; @@ -407,10 +419,7 @@ void DrawTriangleCodegen::LoopFullBlock() } } - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) - { - branch_stenciltest.end_block(); - } + branch_stenciltest.end_block(); } void DrawTriangleCodegen::LoopPartialBlock() @@ -468,7 +477,11 @@ void DrawTriangleCodegen::LoopPartialBlock() if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) { - covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth; + covered = covered && SSABool::compare_uge(StencilGet(ix, iy), stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth; + } + else if (variant == TriDrawVariant::StencilClose) + { + covered = covered && SSABool::compare_uge(StencilGet(ix, iy), stencilTestValue); } else { @@ -482,6 +495,11 @@ void DrawTriangleCodegen::LoopPartialBlock() { StencilSet(ix, iy, stencilWriteValue); } + else if (variant == TriDrawVariant::StencilClose) + { + StencilSet(ix, iy, stencilWriteValue); + subsectorbuffer[ix].store(subsectorDepth); + } else { SSAUBytePtr buf = buffer[ix * pixelsize]; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index fc44f67bd4..babb7c6e70 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -127,7 +127,7 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - int version = 7; // Increment this number if the drawer codegen is modified (forces recreation of the module). + int version = 8; // Increment this number if the drawer codegen is modified (forces recreation of the module). std::string targetCPU = mProgram.GetTargetCPU(); bool loaded = mProgram.LoadCachedModule(version, targetCPU); if (!loaded) @@ -208,6 +208,7 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawTriangle("TriFillSubsector32_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, true); } CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, TriBlendMode::Copy, false); + CodegenDrawTriangle("TriStencilClose", TriDrawVariant::StencilClose, TriBlendMode::Copy, false); } mProgram.CreateEE(version, targetCPU, !loaded); @@ -286,6 +287,7 @@ LLVMDrawersImpl::LLVMDrawersImpl() TriFillSubsector32.push_back(mProgram.GetProcAddress("TriFillSubsector32_" + std::to_string(i))); } TriStencil = mProgram.GetProcAddress("TriStencil"); + TriStencilClose = mProgram.GetProcAddress("TriStencilClose"); #if 0 std::vector foo(1024 * 4); diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 6113b1d042..4ce3a3a037 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -270,7 +270,8 @@ enum class TriDrawVariant DrawSubsector, FillSubsector, FuzzSubsector, - Stencil + Stencil, + StencilClose }; enum class TriBlendMode @@ -376,6 +377,7 @@ public: std::vector TriFillSubsector8; std::vector TriFillSubsector32; void(*TriStencil)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + void(*TriStencilClose)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; private: static LLVMDrawers *Singleton; diff --git a/src/r_compiler/ssa/ssa_bool.cpp b/src/r_compiler/ssa/ssa_bool.cpp index 916350c59d..d6ecad4e84 100644 --- a/src/r_compiler/ssa/ssa_bool.cpp +++ b/src/r_compiler/ssa/ssa_bool.cpp @@ -67,6 +67,11 @@ SSAVec4i SSABool::select(SSAVec4i a, SSAVec4i b) return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint())); } +SSABool SSABool::compare_uge(const SSAUByte &a, const SSAUByte &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpUGE(a.v, b.v, SSAScope::hint())); +} + SSABool operator&&(const SSABool &a, const SSABool &b) { return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_bool.h b/src/r_compiler/ssa/ssa_bool.h index 372c626c04..9b8a564d85 100644 --- a/src/r_compiler/ssa/ssa_bool.h +++ b/src/r_compiler/ssa/ssa_bool.h @@ -45,6 +45,8 @@ public: SSAUByte select(SSAUByte a, SSAUByte b); SSAVec4i select(SSAVec4i a, SSAVec4i b); + static SSABool compare_uge(const SSAUByte &a, const SSAUByte &b); + llvm::Value *v; }; diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 0c19d9832a..c8248bed66 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -160,7 +160,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin break; } } - if (!portal) + if (!polyportal) { sectorPortals.push_back(std::make_unique(portal, ceiling)); polyportal = sectorPortals.back().get(); @@ -266,7 +266,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin { args.stencilwritevalue = 252; PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); - polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw }); + polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw, subsectorDepth }); } } else @@ -274,7 +274,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin if (portal) { args.stencilwritevalue = 252; - polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw }); + polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw, subsectorDepth }); } else { @@ -352,7 +352,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); if (portal) - polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw }); + polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw, subsectorDepth }); } } } diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index e500e24d1a..805195835d 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -196,6 +196,7 @@ void RenderPolyPortal::RenderTranslucent() PolyDrawArgs args; args.objectToClip = &WorldToClip; + args.mode = TriangleDrawMode::Fan; args.stenciltestvalue = 253; args.stencilwritevalue = 1; for (const auto &verts : (*it)->Shape) @@ -203,7 +204,8 @@ void RenderPolyPortal::RenderTranslucent() args.vinput = verts.Vertices; args.vcount = verts.Count; args.ccw = verts.Ccw; - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + args.uniforms.subsectorDepth = verts.SubsectorDepth; + PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); } } @@ -213,6 +215,7 @@ void RenderPolyPortal::RenderTranslucent() PolyDrawArgs args; args.objectToClip = &WorldToClip; + args.mode = TriangleDrawMode::Fan; args.stenciltestvalue = 253; args.stencilwritevalue = 1; for (const auto &verts : (*it)->Shape) @@ -220,7 +223,8 @@ void RenderPolyPortal::RenderTranslucent() args.vinput = verts.Vertices; args.vcount = verts.Count; args.ccw = verts.Ccw; - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + args.uniforms.subsectorDepth = verts.SubsectorDepth; + PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); } } @@ -310,12 +314,12 @@ void PolyDrawSectorPortal::RenderTranslucent() void PolyDrawSectorPortal::SaveGlobals() { - int savedextralight = extralight; - DVector3 savedpos = ViewPos; - DAngle savedangle = ViewAngle; - double savedvisibility = R_GetVisibility(); - AActor *savedcamera = camera; - sector_t *savedsector = viewsector; + savedextralight = extralight; + savedpos = ViewPos; + savedangle = ViewAngle; + savedvisibility = R_GetVisibility(); + savedcamera = camera; + savedsector = viewsector; // Don't let gun flashes brighten the sky box ASkyViewpoint *sky = barrier_cast(Portal->mSkybox); diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index 6042d5938d..8878f496d8 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -112,10 +112,11 @@ private: struct PolyPortalVertexRange { - PolyPortalVertexRange(const TriVertex *vertices, int count, bool ccw) : Vertices(vertices), Count(count), Ccw(ccw) { } + PolyPortalVertexRange(const TriVertex *vertices, int count, bool ccw, uint32_t subsectorDepth) : Vertices(vertices), Count(count), Ccw(ccw), SubsectorDepth(subsectorDepth) { } const TriVertex *Vertices; int Count; bool Ccw; + uint32_t SubsectorDepth; }; class PolyDrawSectorPortal diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 9a389777ad..298c5f024b 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -90,6 +90,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian case TriDrawVariant::FuzzSubsector: case TriDrawVariant::FillSubsector: drawfunc = dest_bgra ? llvm->TriFillSubsector32[bmode] : llvm->TriFillSubsector8[bmode]; break; case TriDrawVariant::Stencil: drawfunc = llvm->TriStencil; break; + case TriDrawVariant::StencilClose: drawfunc = llvm->TriStencilClose; break; } TriDrawTriangleArgs args; From 93af906a1d7a2ed552a3a4f9e460926a7b1f0933 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 25 Nov 2016 17:14:26 +0100 Subject: [PATCH 381/912] Add support for multiple portals --- src/r_poly.cpp | 3 ++- src/r_poly.h | 3 +++ src/r_poly_plane.cpp | 4 ++-- src/r_poly_portal.cpp | 24 +++++++++++++++--------- src/r_poly_portal.h | 2 ++ 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 171d7e0ade..13ecd26258 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -44,7 +44,7 @@ void RenderPolyScene::Render() ClearBuffers(); SetSceneViewport(); SetupPerspectiveMatrix(); - MainPortal.SetViewpoint(WorldToClip, 0); + MainPortal.SetViewpoint(WorldToClip, GetNextStencilValue()); MainPortal.Render(); Skydome.Render(WorldToClip); MainPortal.RenderTranslucent(); @@ -63,6 +63,7 @@ void RenderPolyScene::ClearBuffers() PolyVertexBuffer::Clear(); PolyStencilBuffer::Instance()->Clear(RenderTarget->GetWidth(), RenderTarget->GetHeight(), 0); PolySubsectorGBuffer::Instance()->Resize(RenderTarget->GetPitch(), RenderTarget->GetHeight()); + NextStencilValue = 0; } void RenderPolyScene::SetSceneViewport() diff --git a/src/r_poly.h b/src/r_poly.h index 99e86307bb..3bd9bb319a 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -41,6 +41,8 @@ public: void RenderRemainingPlayerSprites(); static RenderPolyScene *Instance(); + + uint32_t GetNextStencilValue() { uint32_t value = NextStencilValue; NextStencilValue += 2; return value; } private: void ClearBuffers(); @@ -51,4 +53,5 @@ private: RenderPolyPortal MainPortal; PolySkyDome Skydome; RenderPolyPlayerSprites PlayerSprites; + uint32_t NextStencilValue = 0; }; diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index c8248bed66..60b9d7569a 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -264,7 +264,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin } else { - args.stencilwritevalue = 252; + args.stencilwritevalue = polyportal->StencilValue; PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw, subsectorDepth }); } @@ -273,7 +273,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin { if (portal) { - args.stencilwritevalue = 252; + args.stencilwritevalue = polyportal->StencilValue; polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw, subsectorDepth }); } else diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 805195835d..4f42ab77e0 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -26,6 +26,7 @@ #include "sbar.h" #include "r_data/r_translate.h" #include "r_poly_portal.h" +#include "r_poly.h" #include "gl/data/gl_data.h" CVAR(Bool, r_debug_cull, 0, 0) @@ -192,14 +193,15 @@ void RenderPolyPortal::RenderTranslucent() { for (auto it = SectorPortals.rbegin(); it != SectorPortals.rend(); ++it) { - (*it)->RenderTranslucent(); + auto &portal = *it; + portal->RenderTranslucent(); PolyDrawArgs args; args.objectToClip = &WorldToClip; args.mode = TriangleDrawMode::Fan; - args.stenciltestvalue = 253; - args.stencilwritevalue = 1; - for (const auto &verts : (*it)->Shape) + args.stenciltestvalue = portal->StencilValue + 1; + args.stencilwritevalue = StencilValue; + for (const auto &verts : portal->Shape) { args.vinput = verts.Vertices; args.vcount = verts.Count; @@ -211,14 +213,15 @@ void RenderPolyPortal::RenderTranslucent() for (auto it = LinePortals.rbegin(); it != LinePortals.rend(); ++it) { - (*it)->RenderTranslucent(); + auto &portal = *it; + portal->RenderTranslucent(); PolyDrawArgs args; args.objectToClip = &WorldToClip; args.mode = TriangleDrawMode::Fan; - args.stenciltestvalue = 253; - args.stencilwritevalue = 1; - for (const auto &verts : (*it)->Shape) + args.stenciltestvalue = portal->StencilValue + 1; + args.stencilwritevalue = StencilValue; + for (const auto &verts : portal->Shape) { args.vinput = verts.Vertices; args.vcount = verts.Count; @@ -257,6 +260,7 @@ void RenderPolyPortal::RenderTranslucent() PolyDrawSectorPortal::PolyDrawSectorPortal(FSectorPortal *portal, bool ceiling) : Portal(portal), Ceiling(ceiling) { + StencilValue = RenderPolyScene::Instance()->GetNextStencilValue(); } void PolyDrawSectorPortal::Render() @@ -289,7 +293,7 @@ void PolyDrawSectorPortal::Render() TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); TriMatrix worldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; - RenderPortal.SetViewpoint(worldToClip, 252); + RenderPortal.SetViewpoint(worldToClip, StencilValue); RenderPortal.Render(); RestoreGlobals(); @@ -349,6 +353,8 @@ void PolyDrawSectorPortal::RestoreGlobals() PolyDrawLinePortal::PolyDrawLinePortal(line_t *src, line_t *dest, bool mirror) : Src(src), Dest(dest) { // To do: do what R_EnterPortal and PortalDrawseg does + + StencilValue = RenderPolyScene::Instance()->GetNextStencilValue(); } void PolyDrawLinePortal::Render() diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index 8878f496d8..b910034a2c 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -128,6 +128,7 @@ public: void RenderTranslucent(); FSectorPortal *Portal; + uint32_t StencilValue = 0; std::vector Shape; private: @@ -153,6 +154,7 @@ public: void Render(); void RenderTranslucent(); + uint32_t StencilValue = 0; std::vector Shape; private: From 864358389e5face04f25e00b87dc49b8012065ff Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 25 Nov 2016 18:15:48 +0100 Subject: [PATCH 382/912] Implement portal recursion limit --- src/r_poly.cpp | 4 +- src/r_poly_portal.cpp | 112 ++++++++++++++++++++---------------------- src/r_poly_portal.h | 12 ++--- 3 files changed, 60 insertions(+), 68 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 13ecd26258..7297741744 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -45,9 +45,9 @@ void RenderPolyScene::Render() SetSceneViewport(); SetupPerspectiveMatrix(); MainPortal.SetViewpoint(WorldToClip, GetNextStencilValue()); - MainPortal.Render(); + MainPortal.Render(0); Skydome.Render(WorldToClip); - MainPortal.RenderTranslucent(); + MainPortal.RenderTranslucent(0); PlayerSprites.Render(); DrawerCommandQueue::WaitForWorkers(); diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 4f42ab77e0..0346a9d7db 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -48,15 +48,18 @@ void RenderPolyPortal::SetViewpoint(const TriMatrix &worldToClip, uint32_t stenc StencilValue = stencilValue; } -void RenderPolyPortal::Render() +void RenderPolyPortal::Render(int portalDepth) { ClearBuffers(); Cull.CullScene(WorldToClip); RenderSectors(); - for (auto &portal : SectorPortals) - portal->Render(); - for (auto &portal : LinePortals) - portal->Render(); + if (portalDepth < r_portal_recursions) + { + for (auto &portal : SectorPortals) + portal->Render(portalDepth + 1); + for (auto &portal : LinePortals) + portal->Render(portalDepth + 1); + } } void RenderPolyPortal::ClearBuffers() @@ -189,45 +192,48 @@ void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *front } } -void RenderPolyPortal::RenderTranslucent() +void RenderPolyPortal::RenderTranslucent(int portalDepth) { - for (auto it = SectorPortals.rbegin(); it != SectorPortals.rend(); ++it) + if (portalDepth < r_portal_recursions) { - auto &portal = *it; - portal->RenderTranslucent(); - - PolyDrawArgs args; - args.objectToClip = &WorldToClip; - args.mode = TriangleDrawMode::Fan; - args.stenciltestvalue = portal->StencilValue + 1; - args.stencilwritevalue = StencilValue; - for (const auto &verts : portal->Shape) + for (auto it = SectorPortals.rbegin(); it != SectorPortals.rend(); ++it) { - args.vinput = verts.Vertices; - args.vcount = verts.Count; - args.ccw = verts.Ccw; - args.uniforms.subsectorDepth = verts.SubsectorDepth; - PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); + auto &portal = *it; + portal->RenderTranslucent(portalDepth + 1); + + PolyDrawArgs args; + args.objectToClip = &WorldToClip; + args.mode = TriangleDrawMode::Fan; + args.stenciltestvalue = portal->StencilValue + 1; + args.stencilwritevalue = StencilValue; + for (const auto &verts : portal->Shape) + { + args.vinput = verts.Vertices; + args.vcount = verts.Count; + args.ccw = verts.Ccw; + args.uniforms.subsectorDepth = verts.SubsectorDepth; + PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); + } } - } - for (auto it = LinePortals.rbegin(); it != LinePortals.rend(); ++it) - { - auto &portal = *it; - portal->RenderTranslucent(); - - PolyDrawArgs args; - args.objectToClip = &WorldToClip; - args.mode = TriangleDrawMode::Fan; - args.stenciltestvalue = portal->StencilValue + 1; - args.stencilwritevalue = StencilValue; - for (const auto &verts : portal->Shape) + for (auto it = LinePortals.rbegin(); it != LinePortals.rend(); ++it) { - args.vinput = verts.Vertices; - args.vcount = verts.Count; - args.ccw = verts.Ccw; - args.uniforms.subsectorDepth = verts.SubsectorDepth; - PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); + auto &portal = *it; + portal->RenderTranslucent(portalDepth + 1); + + PolyDrawArgs args; + args.objectToClip = &WorldToClip; + args.mode = TriangleDrawMode::Fan; + args.stenciltestvalue = portal->StencilValue + 1; + args.stencilwritevalue = StencilValue; + for (const auto &verts : portal->Shape) + { + args.vinput = verts.Vertices; + args.vcount = verts.Count; + args.ccw = verts.Ccw; + args.uniforms.subsectorDepth = verts.SubsectorDepth; + PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); + } } } @@ -263,16 +269,11 @@ PolyDrawSectorPortal::PolyDrawSectorPortal(FSectorPortal *portal, bool ceiling) StencilValue = RenderPolyScene::Instance()->GetNextStencilValue(); } -void PolyDrawSectorPortal::Render() +void PolyDrawSectorPortal::Render(int portalDepth) { if (Portal->mType != PORTS_SKYVIEWPOINT) return; - static int recursion = 0; - if (recursion >= 1/*r_portal_recursions*/) - return; - recursion++; - SaveGlobals(); // To do: get this information from RenderPolyScene instead of duplicating the code.. @@ -294,26 +295,17 @@ void PolyDrawSectorPortal::Render() TriMatrix worldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; RenderPortal.SetViewpoint(worldToClip, StencilValue); - RenderPortal.Render(); + RenderPortal.Render(portalDepth); RestoreGlobals(); - - recursion--; } -void PolyDrawSectorPortal::RenderTranslucent() +void PolyDrawSectorPortal::RenderTranslucent(int portalDepth) { if (Portal->mType != PORTS_SKYVIEWPOINT) return; - static int recursion = 0; - if (recursion >= 1/*r_portal_recursions*/) - return; - recursion++; - - RenderPortal.RenderTranslucent(); - - recursion--; + RenderPortal.RenderTranslucent(portalDepth); } void PolyDrawSectorPortal::SaveGlobals() @@ -357,12 +349,12 @@ PolyDrawLinePortal::PolyDrawLinePortal(line_t *src, line_t *dest, bool mirror) : StencilValue = RenderPolyScene::Instance()->GetNextStencilValue(); } -void PolyDrawLinePortal::Render() +void PolyDrawLinePortal::Render(int portalDepth) { - RenderPortal.Render(); + RenderPortal.Render(portalDepth); } -void PolyDrawLinePortal::RenderTranslucent() +void PolyDrawLinePortal::RenderTranslucent(int portalDepth) { - RenderPortal.RenderTranslucent(); + RenderPortal.RenderTranslucent(portalDepth); } diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index b910034a2c..a53a185e36 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -84,8 +84,8 @@ public: RenderPolyPortal(); ~RenderPolyPortal(); void SetViewpoint(const TriMatrix &worldToClip, uint32_t stencilValue); - void Render(); - void RenderTranslucent(); + void Render(int portalDepth); + void RenderTranslucent(int portalDepth); static const uint32_t SkySubsectorDepth = 0x7fffffff; @@ -124,8 +124,8 @@ class PolyDrawSectorPortal public: PolyDrawSectorPortal(FSectorPortal *portal, bool ceiling); - void Render(); - void RenderTranslucent(); + void Render(int portalDepth); + void RenderTranslucent(int portalDepth); FSectorPortal *Portal; uint32_t StencilValue = 0; @@ -151,8 +151,8 @@ class PolyDrawLinePortal public: PolyDrawLinePortal(line_t *src, line_t *dest, bool mirror); - void Render(); - void RenderTranslucent(); + void Render(int portalDepth); + void RenderTranslucent(int portalDepth); uint32_t StencilValue = 0; std::vector Shape; From eaa15f2b74bdacc62937a7d0e0967686821eca0f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 25 Nov 2016 20:19:35 +0100 Subject: [PATCH 383/912] Render other sector portal types --- src/r_poly_cull.cpp | 6 ++++++ src/r_poly_plane.cpp | 2 ++ src/r_poly_portal.cpp | 36 ++++++++++++++++++++++++++++-------- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/r_poly_cull.cpp b/src/r_poly_cull.cpp index ff349690c0..04d5c3c244 100644 --- a/src/r_poly_cull.cpp +++ b/src/r_poly_cull.cpp @@ -85,6 +85,12 @@ void PolyCull::CullSubsector(subsector_t *sub) seg_t *line = &sub->firstline[i]; if ((line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) && line->backsector == nullptr) { + // Skip lines not facing viewer + DVector2 pt1 = line->v1->fPos() - ViewPos; + DVector2 pt2 = line->v2->fPos() - ViewPos; + if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) + continue; + int sx1, sx2; if (GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2)) { diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 60b9d7569a..e64d085e78 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -150,6 +150,8 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin { FSectorPortal *portal = sub->sector->ValidatePortal(ceiling ? sector_t::ceiling : sector_t::floor); PolyDrawSectorPortal *polyportal = nullptr; + if (portal && (portal->mFlags & PORTSF_INSKYBOX) == PORTSF_INSKYBOX) // Do not recurse into portals we already recursed into + portal = nullptr; if (portal) { for (auto &p : sectorPortals) diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 0346a9d7db..d6a67a3175 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -271,7 +271,7 @@ PolyDrawSectorPortal::PolyDrawSectorPortal(FSectorPortal *portal, bool ceiling) void PolyDrawSectorPortal::Render(int portalDepth) { - if (Portal->mType != PORTS_SKYVIEWPOINT) + if (Portal->mType == PORTS_HORIZON || Portal->mType == PORTS_PLANE) return; SaveGlobals(); @@ -302,10 +302,14 @@ void PolyDrawSectorPortal::Render(int portalDepth) void PolyDrawSectorPortal::RenderTranslucent(int portalDepth) { - if (Portal->mType != PORTS_SKYVIEWPOINT) + if (Portal->mType == PORTS_HORIZON || Portal->mType == PORTS_PLANE) return; + + SaveGlobals(); RenderPortal.RenderTranslucent(portalDepth); + + RestoreGlobals(); } void PolyDrawSectorPortal::SaveGlobals() @@ -317,20 +321,36 @@ void PolyDrawSectorPortal::SaveGlobals() savedcamera = camera; savedsector = viewsector; - // Don't let gun flashes brighten the sky box - ASkyViewpoint *sky = barrier_cast(Portal->mSkybox); - extralight = 0; - R_SetVisibility(sky->args[0] * 0.25f); - ViewPos = sky->InterpolatedPosition(r_TicFracF); - ViewAngle = savedangle + (sky->PrevAngles.Yaw + deltaangle(sky->PrevAngles.Yaw, sky->Angles.Yaw) * r_TicFracF); + if (Portal->mType == PORTS_SKYVIEWPOINT) + { + // Don't let gun flashes brighten the sky box + ASkyViewpoint *sky = barrier_cast(Portal->mSkybox); + extralight = 0; + R_SetVisibility(sky->args[0] * 0.25f); + ViewPos = sky->InterpolatedPosition(r_TicFracF); + ViewAngle = savedangle + (sky->PrevAngles.Yaw + deltaangle(sky->PrevAngles.Yaw, sky->Angles.Yaw) * r_TicFracF); + } + else //if (Portal->mType == PORTS_STACKEDSECTORTHING || Portal->mType == PORTS_PORTAL || Portal->mType == PORTS_LINKEDPORTAL) + { + //extralight = pl->extralight; + //R_SetVisibility(pl->visibility); + ViewPos.X += Portal->mDisplacement.X; + ViewPos.Y += Portal->mDisplacement.Y; + } camera = nullptr; viewsector = Portal->mDestination; R_SetViewAngle(); + + Portal->mFlags |= PORTSF_INSKYBOX; + if (Portal->mPartner > 0) sectorPortals[Portal->mPartner].mFlags |= PORTSF_INSKYBOX; } void PolyDrawSectorPortal::RestoreGlobals() { + Portal->mFlags &= ~PORTSF_INSKYBOX; + if (Portal->mPartner > 0) sectorPortals[Portal->mPartner].mFlags &= ~PORTSF_INSKYBOX; + camera = savedcamera; viewsector = savedsector; ViewPos = savedpos; From cc3ac9ea0536ec201e964156d910d896d2cdd77c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 25 Nov 2016 23:44:55 +0100 Subject: [PATCH 384/912] Line portal rendering --- src/r_poly_portal.cpp | 136 ++++++++++++++++++++++++++++++++++++++++-- src/r_poly_portal.h | 20 +++++-- src/r_poly_wall.cpp | 79 +++++++++++++++++++++++- src/r_poly_wall.h | 3 +- 4 files changed, 228 insertions(+), 10 deletions(-) diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index d6a67a3175..e4ce5c5476 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -23,6 +23,7 @@ #include #include "templates.h" #include "doomdef.h" +#include "p_maputl.h" #include "sbar.h" #include "r_data/r_translate.h" #include "r_poly_portal.h" @@ -32,6 +33,8 @@ CVAR(Bool, r_debug_cull, 0, 0) EXTERN_CVAR(Int, r_portal_recursions) +extern bool r_showviewer; + ///////////////////////////////////////////////////////////////////////////// RenderPolyPortal::RenderPolyPortal() @@ -185,7 +188,7 @@ void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *front } // Render wall, and update culling info if its an occlusion blocker - if (RenderPolyWall::RenderLine(WorldToClip, line, frontsector, subsectorDepth, StencilValue, SubsectorTranslucentWalls)) + if (RenderPolyWall::RenderLine(WorldToClip, line, frontsector, subsectorDepth, StencilValue, SubsectorTranslucentWalls, LinePortals)) { if (hasSegmentRange) Cull.MarkSegmentCulled(sx1, sx2); @@ -362,19 +365,144 @@ void PolyDrawSectorPortal::RestoreGlobals() ///////////////////////////////////////////////////////////////////////////// -PolyDrawLinePortal::PolyDrawLinePortal(line_t *src, line_t *dest, bool mirror) : Src(src), Dest(dest) +PolyDrawLinePortal::PolyDrawLinePortal(FLinePortal *portal) : Portal(portal) +{ + StencilValue = RenderPolyScene::Instance()->GetNextStencilValue(); +} + +PolyDrawLinePortal::PolyDrawLinePortal(line_t *mirror) : Mirror(mirror) { - // To do: do what R_EnterPortal and PortalDrawseg does - StencilValue = RenderPolyScene::Instance()->GetNextStencilValue(); } void PolyDrawLinePortal::Render(int portalDepth) { + SaveGlobals(); + + // To do: get this information from RenderPolyScene instead of duplicating the code.. + double radPitch = ViewPitch.Normalized180().Radians(); + double angx = cos(radPitch); + double angy = sin(radPitch) * glset.pixelstretch; + double alen = sqrt(angx*angx + angy*angy); + float adjustedPitch = (float)asin(angy / alen); + float adjustedViewAngle = (float)(ViewAngle - 90).Radians(); + float ratio = WidescreenRatio; + float fovratio = (WidescreenRatio >= 1.3f) ? 1.333333f : ratio; + float fovy = (float)(2 * DAngle::ToDegrees(atan(tan(FieldOfView.Radians() / 2) / fovratio)).Degrees); + TriMatrix worldToView = + TriMatrix::rotate(adjustedPitch, 1.0f, 0.0f, 0.0f) * + TriMatrix::rotate(adjustedViewAngle, 0.0f, -1.0f, 0.0f) * + TriMatrix::scale(1.0f, glset.pixelstretch, 1.0f) * + TriMatrix::swapYZ() * + TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); + TriMatrix worldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; + + RenderPortal.SetViewpoint(worldToClip, StencilValue); RenderPortal.Render(portalDepth); + + RestoreGlobals(); } void PolyDrawLinePortal::RenderTranslucent(int portalDepth) { + SaveGlobals(); RenderPortal.RenderTranslucent(portalDepth); + RestoreGlobals(); +} + +void PolyDrawLinePortal::SaveGlobals() +{ + savedextralight = extralight; + savedpos = ViewPos; + savedangle = ViewAngle; + savedcamera = camera; + savedsector = viewsector; + savedvisibility = camera ? camera->renderflags & RF_INVISIBLE : ActorRenderFlags::FromInt(0); + savedViewPath[0] = ViewPath[0]; + savedViewPath[1] = ViewPath[1]; + + if (Mirror) + { + DAngle startang = ViewAngle; + DVector3 startpos = ViewPos; + + vertex_t *v1 = Mirror->v1; + + // Reflect the current view behind the mirror. + if (Mirror->Delta().X == 0) + { // vertical mirror + ViewPos.X = v1->fX() - startpos.X + v1->fX(); + } + else if (Mirror->Delta().Y == 0) + { // horizontal mirror + ViewPos.Y = v1->fY() - startpos.Y + v1->fY(); + } + else + { // any mirror + vertex_t *v2 = Mirror->v2; + + double dx = v2->fX() - v1->fX(); + double dy = v2->fY() - v1->fY(); + double x1 = v1->fX(); + double y1 = v1->fY(); + double x = startpos.X; + double y = startpos.Y; + + // the above two cases catch len == 0 + double r = ((x - x1)*dx + (y - y1)*dy) / (dx*dx + dy*dy); + + ViewPos.X = (x1 + r * dx) * 2 - x; + ViewPos.Y = (y1 + r * dy) * 2 - y; + } + ViewAngle = Mirror->Delta().Angle() * 2 - startang; + } + else + { + auto src = Portal->mOrigin; + auto dst = Portal->mDestination; + + P_TranslatePortalXY(src, ViewPos.X, ViewPos.Y); + P_TranslatePortalZ(src, ViewPos.Z); + P_TranslatePortalAngle(src, ViewAngle); + P_TranslatePortalXY(src, ViewPath[0].X, ViewPath[0].Y); + P_TranslatePortalXY(src, ViewPath[1].X, ViewPath[1].Y); + + if (!r_showviewer && camera && P_PointOnLineSidePrecise(ViewPath[0], dst) != P_PointOnLineSidePrecise(ViewPath[1], dst)) + { + double distp = (ViewPath[0] - ViewPath[1]).Length(); + if (distp > EQUAL_EPSILON) + { + double dist1 = (ViewPos - ViewPath[0]).Length(); + double dist2 = (ViewPos - ViewPath[1]).Length(); + + if (dist1 + dist2 < distp + 1) + { + camera->renderflags |= RF_INVISIBLE; + } + } + } + + /*if (Portal->mirror) + { + if (MirrorFlags & RF_XFLIP) MirrorFlags &= ~RF_XFLIP; + else MirrorFlags |= RF_XFLIP; + }*/ + } + + camera = nullptr; + //viewsector = Portal->mDestination; + R_SetViewAngle(); +} + +void PolyDrawLinePortal::RestoreGlobals() +{ + if (!savedvisibility && camera) camera->renderflags &= ~RF_INVISIBLE; + camera = savedcamera; + viewsector = savedsector; + ViewPos = savedpos; + extralight = savedextralight; + ViewAngle = savedangle; + ViewPath[0] = savedViewPath[0]; + ViewPath[1] = savedViewPath[1]; + R_SetViewAngle(); } diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index a53a185e36..e47277f322 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -127,7 +127,7 @@ public: void Render(int portalDepth); void RenderTranslucent(int portalDepth); - FSectorPortal *Portal; + FSectorPortal *Portal = nullptr; uint32_t StencilValue = 0; std::vector Shape; @@ -149,16 +149,28 @@ private: class PolyDrawLinePortal { public: - PolyDrawLinePortal(line_t *src, line_t *dest, bool mirror); + PolyDrawLinePortal(FLinePortal *portal); + PolyDrawLinePortal(line_t *mirror); void Render(int portalDepth); void RenderTranslucent(int portalDepth); + FLinePortal *Portal = nullptr; + line_t *Mirror = nullptr; uint32_t StencilValue = 0; std::vector Shape; private: - line_t *Src; - line_t *Dest; + void SaveGlobals(); + void RestoreGlobals(); + RenderPolyPortal RenderPortal; + + int savedextralight; + DVector3 savedpos; + DAngle savedangle; + AActor *savedcamera; + sector_t *savedsector; + ActorRenderFlags savedvisibility; + DVector3 savedViewPath[2]; }; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 8f3fb05d73..7484fd493a 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -23,6 +23,9 @@ #include #include "templates.h" #include "doomdef.h" +#include "doomstat.h" +#include "doomdata.h" +#include "p_lnspec.h" #include "sbar.h" #include "r_data/r_translate.h" #include "r_poly_wall.h" @@ -30,8 +33,82 @@ #include "r_poly.h" #include "r_sky.h" // for skyflatnum -bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput) +EXTERN_CVAR(Bool, r_drawmirrors) + +bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput, std::vector> &linePortals) { + PolyDrawLinePortal *polyportal = nullptr; + if (line->backsector == nullptr && line->sidedef == line->linedef->sidedef[0] && (line->linedef->special == Line_Mirror && r_drawmirrors)) + { + linePortals.push_back(std::make_unique(line->linedef)); + polyportal = linePortals.back().get(); + } + else if (line->linedef && line->linedef->isVisualPortal()) + { + FLinePortal *portal = line->linedef->getPortal(); + for (auto &p : linePortals) + { + if (p->Portal == portal) // To do: what other criterias do we need to check for? + { + polyportal = p.get(); + break; + } + } + if (!polyportal) + { + linePortals.push_back(std::make_unique(portal)); + polyportal = linePortals.back().get(); + } + } + + if (polyportal) + { + double ceil1 = frontsector->ceilingplane.ZatPoint(line->v1); + double floor1 = frontsector->floorplane.ZatPoint(line->v1); + double ceil2 = frontsector->ceilingplane.ZatPoint(line->v2); + double floor2 = frontsector->floorplane.ZatPoint(line->v2); + DVector2 v1 = line->v1->fPos(); + DVector2 v2 = line->v2->fPos(); + + TriVertex *vertices = PolyVertexBuffer::GetVertices(4); + if (!vertices) + return true; + + vertices[0].x = (float)v1.X; + vertices[0].y = (float)v1.Y; + vertices[0].z = (float)ceil1; + vertices[0].w = 1.0f; + + vertices[1].x = (float)v2.X; + vertices[1].y = (float)v2.Y; + vertices[1].z = (float)ceil2; + vertices[1].w = 1.0f; + + vertices[2].x = (float)v2.X; + vertices[2].y = (float)v2.Y; + vertices[2].z = (float)floor2; + vertices[2].w = 1.0f; + + vertices[3].x = (float)v1.X; + vertices[3].y = (float)v1.Y; + vertices[3].z = (float)floor1; + vertices[3].w = 1.0f; + + PolyDrawArgs args; + args.uniforms.flags = 0; + args.objectToClip = &worldToClip; + args.vinput = vertices; + args.vcount = 4; + args.mode = TriangleDrawMode::Fan; + args.ccw = true; + args.stenciltestvalue = stencilValue; + args.stencilwritevalue = polyportal->StencilValue; + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + + polyportal->Shape.push_back({ vertices, 4, true, subsectorDepth }); + return true; + } + RenderPolyWall wall; wall.LineSeg = line; wall.Line = line->linedef; diff --git a/src/r_poly_wall.h b/src/r_poly_wall.h index 3d2f89f841..31c236dd4d 100644 --- a/src/r_poly_wall.h +++ b/src/r_poly_wall.h @@ -25,11 +25,12 @@ #include "r_poly_triangle.h" class PolyTranslucentObject; +class PolyDrawLinePortal; class RenderPolyWall { public: - static bool RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput); + static bool RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput, std::vector> &linePortals); static void Render3DFloorLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, F3DFloor *fakeFloor, std::vector &translucentWallsOutput); void SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2); From 383a59259a5158761e4a26f491965d62d74b8c85 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Fri, 25 Nov 2016 21:02:18 -0500 Subject: [PATCH 385/912] Revert "Merge commit 'refs/pull/122/head' of https://github.com/coelckers/gzdoom" This reverts commit 71b4f57058eac07c38ce44b1eb057e8f40bd0701, reversing changes made to 449d4963ae23c20f3dc31fa1b6543a28e09e4946. --- src/c_cvars.cpp | 4 ---- src/c_cvars.h | 1 - src/p_acs.cpp | 20 -------------------- src/p_actionfunctions.cpp | 6 ------ src/posix/cocoa/i_video.mm | 2 +- src/posix/sdl/hardware.cpp | 2 +- src/win32/hardware.cpp | 2 +- 7 files changed, 3 insertions(+), 34 deletions(-) diff --git a/src/c_cvars.cpp b/src/c_cvars.cpp index 18648db285..c8f1ee4c62 100644 --- a/src/c_cvars.cpp +++ b/src/c_cvars.cpp @@ -52,10 +52,6 @@ #include "v_video.h" #include "colormatcher.h" -// [SP] Lets the player (arbitrator) choose whether to override GetCVar checks. -// Danger of desync? Can we just make it a client var? This probably *fixes* desyncs, actually... -CVAR(Bool, sv_overridegetcvar, true, CVAR_SERVERINFO | CVAR_GLOBALCONFIG | CVAR_ARCHIVE) - struct FLatchedValue { FBaseCVar *Variable; diff --git a/src/c_cvars.h b/src/c_cvars.h index d74d3b5c9a..be7676e895 100644 --- a/src/c_cvars.h +++ b/src/c_cvars.h @@ -63,7 +63,6 @@ enum CVAR_NOSAVE = 4096, // when used with CVAR_SERVERINFO, do not save var to savegame CVAR_MOD = 8192, // cvar was defined by a mod CVAR_IGNORE = 16384,// do not send cvar across the network/inaccesible from ACS (dummy mod cvar) - CVAR_OVERRIDEGET = 32768,// this cvar disguises its return value for GetCVAR }; union UCVarValue diff --git a/src/p_acs.cpp b/src/p_acs.cpp index 1d309307d8..1a60a0c4fb 100644 --- a/src/p_acs.cpp +++ b/src/p_acs.cpp @@ -4549,8 +4549,6 @@ static void DoSetCVar(FBaseCVar *cvar, int value, bool is_string, bool force=fal } } -EXTERN_CVAR(Bool, sv_overridegetcvar) - // Converts floating- to fixed-point as required. static int DoGetCVar(FBaseCVar *cvar, bool is_string) { @@ -4560,24 +4558,6 @@ static int DoGetCVar(FBaseCVar *cvar, bool is_string) { return 0; } - else if (sv_overridegetcvar && (cvar->GetFlags() & CVAR_OVERRIDEGET)) - { - if (is_string) - { - val = cvar->GetGenericRepDefault(CVAR_String); - return GlobalACSStrings.AddString(val.String); - } - else if (cvar->GetRealType() == CVAR_Float) - { - val = cvar->GetGenericRepDefault(CVAR_Float); - return DoubleToACS(val.Float); - } - else - { - val = cvar->GetGenericRepDefault(CVAR_Int); - return val.Int; - } - } else if (is_string) { val = cvar->GetGenericRep(CVAR_String); diff --git a/src/p_actionfunctions.cpp b/src/p_actionfunctions.cpp index df58524739..73cade49a4 100644 --- a/src/p_actionfunctions.cpp +++ b/src/p_actionfunctions.cpp @@ -621,8 +621,6 @@ DEFINE_ACTION_FUNCTION_PARAMS(AActor, GetCrouchFactor) // //========================================================================== -EXTERN_CVAR(Bool, sv_overridegetcvar) - DEFINE_ACTION_FUNCTION_PARAMS(AActor, GetCVar) { if (numret > 0) @@ -636,10 +634,6 @@ DEFINE_ACTION_FUNCTION_PARAMS(AActor, GetCVar) { ret->SetFloat(0); } - else if (sv_overridegetcvar && (cvar->GetFlags() & CVAR_OVERRIDEGET)) - { - ret->SetFloat(cvar->GetGenericRepDefault(CVAR_Float).Float); - } else { ret->SetFloat(cvar->GetGenericRep(CVAR_Float).Float); diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 8f8cdab844..5875e87b8b 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -135,7 +135,7 @@ CUSTOM_CVAR(Bool, vid_autoswitch, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_ static int s_currentRenderer; -CUSTOM_CVAR(Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL | CVAR_OVERRIDEGET) +CUSTOM_CVAR(Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // 0: Software renderer // 1: OpenGL renderer diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index 362fadc542..8fbb7c08ac 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -67,7 +67,7 @@ void I_RestartRenderer(); int currentrenderer; // [ZDoomGL] -CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL | CVAR_OVERRIDEGET) +CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // 0: Software renderer // 1: OpenGL renderer diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index cf1ec907e6..609f56647d 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -81,7 +81,7 @@ CUSTOM_CVAR(Bool, vid_used3d, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOIN } // [ZDoomGL] -CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL | CVAR_OVERRIDEGET) +CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // 0: Software renderer // 1: OpenGL renderer From 62188d1ea58707d6600608a3ae90501fe5fe1f95 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 26 Nov 2016 07:06:10 +0100 Subject: [PATCH 386/912] Change the line portal shape to be determined by the same code that decides the shape of other walls --- src/r_poly_portal.cpp | 61 ++++++++++++++++++++---- src/r_poly_portal.h | 1 + src/r_poly_wall.cpp | 106 +++++++++++++++--------------------------- src/r_poly_wall.h | 1 + 4 files changed, 91 insertions(+), 78 deletions(-) diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index e4ce5c5476..715d475823 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -56,13 +56,7 @@ void RenderPolyPortal::Render(int portalDepth) ClearBuffers(); Cull.CullScene(WorldToClip); RenderSectors(); - if (portalDepth < r_portal_recursions) - { - for (auto &portal : SectorPortals) - portal->Render(portalDepth + 1); - for (auto &portal : LinePortals) - portal->Render(portalDepth + 1); - } + RenderPortals(portalDepth); } void RenderPolyPortal::ClearBuffers() @@ -195,6 +189,55 @@ void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *front } } +void RenderPolyPortal::RenderPortals(int portalDepth) +{ + if (portalDepth < r_portal_recursions) + { + for (auto &portal : SectorPortals) + portal->Render(portalDepth + 1); + + for (auto &portal : LinePortals) + portal->Render(portalDepth + 1); + } + else // Fill with black + { + PolyDrawArgs args; + args.objectToClip = &WorldToClip; + args.mode = TriangleDrawMode::Fan; + args.uniforms.color = 0; + args.uniforms.light = 256; + args.uniforms.flags = TriUniforms::fixed_light; + + for (auto &portal : SectorPortals) + { + args.stenciltestvalue = portal->StencilValue; + args.stencilwritevalue = portal->StencilValue + 1; + for (const auto &verts : portal->Shape) + { + args.vinput = verts.Vertices; + args.vcount = verts.Count; + args.ccw = verts.Ccw; + args.uniforms.subsectorDepth = verts.SubsectorDepth; + PolyTriangleDrawer::draw(args, TriDrawVariant::FillNormal, TriBlendMode::Copy); + } + } + + for (auto &portal : LinePortals) + { + args.stenciltestvalue = portal->StencilValue; + args.stencilwritevalue = portal->StencilValue + 1; + for (const auto &verts : portal->Shape) + { + args.vinput = verts.Vertices; + args.vcount = verts.Count; + args.ccw = verts.Ccw; + args.uniforms.subsectorDepth = verts.SubsectorDepth; + PolyTriangleDrawer::draw(args, TriDrawVariant::FillNormal, TriBlendMode::Copy); + } + } + } +} + void RenderPolyPortal::RenderTranslucent(int portalDepth) { if (portalDepth < r_portal_recursions) @@ -236,7 +279,7 @@ void RenderPolyPortal::RenderTranslucent(int portalDepth) args.ccw = verts.Ccw; args.uniforms.subsectorDepth = verts.SubsectorDepth; PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); - } + } } } @@ -490,7 +533,7 @@ void PolyDrawLinePortal::SaveGlobals() } camera = nullptr; - //viewsector = Portal->mDestination; + //viewsector = R_PointInSubsector(ViewPos)->sector; R_SetViewAngle(); } diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index e47277f322..56a125d2ca 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -91,6 +91,7 @@ public: private: void ClearBuffers(); + void RenderPortals(int portalDepth); void RenderSectors(); void RenderSubsector(subsector_t *sub); void RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 7484fd493a..d6ade7279a 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -61,54 +61,6 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, secto } } - if (polyportal) - { - double ceil1 = frontsector->ceilingplane.ZatPoint(line->v1); - double floor1 = frontsector->floorplane.ZatPoint(line->v1); - double ceil2 = frontsector->ceilingplane.ZatPoint(line->v2); - double floor2 = frontsector->floorplane.ZatPoint(line->v2); - DVector2 v1 = line->v1->fPos(); - DVector2 v2 = line->v2->fPos(); - - TriVertex *vertices = PolyVertexBuffer::GetVertices(4); - if (!vertices) - return true; - - vertices[0].x = (float)v1.X; - vertices[0].y = (float)v1.Y; - vertices[0].z = (float)ceil1; - vertices[0].w = 1.0f; - - vertices[1].x = (float)v2.X; - vertices[1].y = (float)v2.Y; - vertices[1].z = (float)ceil2; - vertices[1].w = 1.0f; - - vertices[2].x = (float)v2.X; - vertices[2].y = (float)v2.Y; - vertices[2].z = (float)floor2; - vertices[2].w = 1.0f; - - vertices[3].x = (float)v1.X; - vertices[3].y = (float)v1.Y; - vertices[3].z = (float)floor1; - vertices[3].w = 1.0f; - - PolyDrawArgs args; - args.uniforms.flags = 0; - args.objectToClip = &worldToClip; - args.vinput = vertices; - args.vcount = 4; - args.mode = TriangleDrawMode::Fan; - args.ccw = true; - args.stenciltestvalue = stencilValue; - args.stencilwritevalue = polyportal->StencilValue; - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); - - polyportal->Shape.push_back({ vertices, 4, true, subsectorDepth }); - return true; - } - RenderPolyWall wall; wall.LineSeg = line; wall.Line = line->linedef; @@ -132,6 +84,7 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, secto wall.BottomZ = frontfloorz1; wall.UnpeggedCeil = frontceilz1; wall.Texpart = side_t::mid; + wall.Polyportal = polyportal; wall.Render(worldToClip); return true; } @@ -182,20 +135,25 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, secto if (line->sidedef) { + wall.SetCoords(line->v1->fPos(), line->v2->fPos(), middleceilz1, middlefloorz1, middleceilz2, middlefloorz2); + wall.TopZ = middleceilz1; + wall.BottomZ = middlefloorz1; + wall.UnpeggedCeil = topceilz1; + wall.Texpart = side_t::mid; + wall.Masked = true; + FTexture *midtex = TexMan(line->sidedef->GetTexture(side_t::mid), true); if (midtex && midtex->UseType != FTexture::TEX_Null) - { - wall.SetCoords(line->v1->fPos(), line->v2->fPos(), middleceilz1, middlefloorz1, middleceilz2, middlefloorz2); - wall.TopZ = middleceilz1; - wall.BottomZ = middlefloorz1; - wall.UnpeggedCeil = topceilz1; - wall.Texpart = side_t::mid; - wall.Masked = true; translucentWallsOutput.push_back({ wall }); + + if (polyportal) + { + wall.Polyportal = polyportal; + wall.Render(worldToClip); } } } - return false; + return polyportal != nullptr; } void RenderPolyWall::Render3DFloorLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, F3DFloor *fakeFloor, std::vector &translucentWallsOutput) @@ -234,11 +192,9 @@ void RenderPolyWall::SetCoords(const DVector2 &v1, const DVector2 &v2, double ce void RenderPolyWall::Render(const TriMatrix &worldToClip) { FTexture *tex = GetTexture(); - if (!tex) + if (!tex && !Polyportal) return; - PolyWallTextureCoords texcoords(tex, LineSeg, Line, Side, Texpart, TopZ, BottomZ, UnpeggedCeil); - TriVertex *vertices = PolyVertexBuffer::GetVertices(4); if (!vertices) return; @@ -247,29 +203,34 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) vertices[0].y = (float)v1.Y; vertices[0].z = (float)ceil1; vertices[0].w = 1.0f; - vertices[0].varying[0] = (float)texcoords.u1; - vertices[0].varying[1] = (float)texcoords.v1; vertices[1].x = (float)v2.X; vertices[1].y = (float)v2.Y; vertices[1].z = (float)ceil2; vertices[1].w = 1.0f; - vertices[1].varying[0] = (float)texcoords.u2; - vertices[1].varying[1] = (float)texcoords.v1; vertices[2].x = (float)v2.X; vertices[2].y = (float)v2.Y; vertices[2].z = (float)floor2; vertices[2].w = 1.0f; - vertices[2].varying[0] = (float)texcoords.u2; - vertices[2].varying[1] = (float)texcoords.v2; vertices[3].x = (float)v1.X; vertices[3].y = (float)v1.Y; vertices[3].z = (float)floor1; vertices[3].w = 1.0f; - vertices[3].varying[0] = (float)texcoords.u1; - vertices[3].varying[1] = (float)texcoords.v2; + + if (tex) + { + PolyWallTextureCoords texcoords(tex, LineSeg, Line, Side, Texpart, TopZ, BottomZ, UnpeggedCeil); + vertices[0].varying[0] = (float)texcoords.u1; + vertices[0].varying[1] = (float)texcoords.v1; + vertices[1].varying[0] = (float)texcoords.u2; + vertices[1].varying[1] = (float)texcoords.v1; + vertices[2].varying[0] = (float)texcoords.u2; + vertices[2].varying[1] = (float)texcoords.v2; + vertices[3].varying[0] = (float)texcoords.u1; + vertices[3].varying[1] = (float)texcoords.v2; + } // Masked walls clamp to the 0-1 range (no texture repeat) if (Masked) @@ -292,10 +253,17 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) args.ccw = true; args.stenciltestvalue = StencilValue; args.stencilwritevalue = StencilValue + 1; - args.SetTexture(tex); + if (tex) + args.SetTexture(tex); args.SetColormap(Line->frontsector->ColorMap); - if (!Masked) + if (Polyportal) + { + args.stencilwritevalue = Polyportal->StencilValue; + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + Polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw, args.uniforms.subsectorDepth }); + } + else if (!Masked) { PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); diff --git a/src/r_poly_wall.h b/src/r_poly_wall.h index 31c236dd4d..d1196df9f9 100644 --- a/src/r_poly_wall.h +++ b/src/r_poly_wall.h @@ -54,6 +54,7 @@ public: bool Masked = false; uint32_t SubsectorDepth = 0; uint32_t StencilValue = 0; + PolyDrawLinePortal *Polyportal = nullptr; private: void ClampHeight(TriVertex &v1, TriVertex &v2); From 70181f41466243ea31818b60a94f61a302402582 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 26 Nov 2016 07:35:04 +0100 Subject: [PATCH 387/912] Always access args.uniforms directly instead of first creating a TriUniforms variable --- src/r_poly_particle.cpp | 23 +++++++++++------------ src/r_poly_plane.cpp | 24 ++++++++++-------------- src/r_poly_sky.cpp | 9 +++------ src/r_poly_wall.cpp | 9 +++------ src/r_poly_wallsprite.cpp | 14 ++++++-------- 5 files changed, 33 insertions(+), 46 deletions(-) diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp index e69803e1e2..8151d4888e 100644 --- a/src/r_poly_particle.cpp +++ b/src/r_poly_particle.cpp @@ -70,33 +70,32 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *partic // int color = (particle->color >> 24) & 0xff; // pal index, I think bool fullbrightSprite = particle->bright != 0; - TriUniforms uniforms; + PolyDrawArgs args; + if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) { - uniforms.light = 256; - uniforms.flags = TriUniforms::fixed_light; + args.uniforms.light = 256; + args.uniforms.flags = TriUniforms::fixed_light; } else { - uniforms.light = (uint32_t)((sub->sector->lightlevel + actualextralight) / 255.0f * 256.0f); - uniforms.flags = 0; + args.uniforms.light = (uint32_t)((sub->sector->lightlevel + actualextralight) / 255.0f * 256.0f); + args.uniforms.flags = 0; } - uniforms.subsectorDepth = subsectorDepth; + args.uniforms.subsectorDepth = subsectorDepth; if (r_swtruecolor) { uint32_t alpha = particle->trans; - uniforms.color = (alpha << 24) | (particle->color & 0xffffff); + args.uniforms.color = (alpha << 24) | (particle->color & 0xffffff); } else { - uniforms.color = ((uint32_t)particle->color) >> 24; - uniforms.srcalpha = particle->trans; - uniforms.destalpha = 255 - particle->trans; + args.uniforms.color = ((uint32_t)particle->color) >> 24; + args.uniforms.srcalpha = particle->trans; + args.uniforms.destalpha = 255 - particle->trans; } - PolyDrawArgs args; - args.uniforms = uniforms; args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = 4; diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index e64d085e78..8d4067be02 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -103,12 +103,12 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *s lightlevel = *light->p_lightlevel; } - TriUniforms uniforms; - uniforms.light = (uint32_t)(lightlevel / 255.0f * 256.0f); + PolyDrawArgs args; + args.uniforms.light = (uint32_t)(lightlevel / 255.0f * 256.0f); if (fixedlightlev >= 0 || fixedcolormap) - uniforms.light = 256; - uniforms.flags = 0; - uniforms.subsectorDepth = subsectorDepth; + args.uniforms.light = 256; + args.uniforms.flags = 0; + args.uniforms.subsectorDepth = subsectorDepth; TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); if (!vertices) @@ -131,8 +131,6 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *s } } - PolyDrawArgs args; - args.uniforms = uniforms; args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = sub->numlines; @@ -217,12 +215,12 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin bool isSky = picnum == skyflatnum; - TriUniforms uniforms; - uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); + PolyDrawArgs args; + args.uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); if (fixedlightlev >= 0 || fixedcolormap) - uniforms.light = 256; - uniforms.flags = 0; - uniforms.subsectorDepth = isSky ? RenderPolyPortal::SkySubsectorDepth : subsectorDepth; + args.uniforms.light = 256; + args.uniforms.flags = 0; + args.uniforms.subsectorDepth = isSky ? RenderPolyPortal::SkySubsectorDepth : subsectorDepth; TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); if (!vertices) @@ -245,8 +243,6 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin } } - PolyDrawArgs args; - args.uniforms = uniforms; args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = sub->numlines; diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index ad8d4b861e..97f8203fac 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -50,15 +50,12 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) TriMatrix objectToWorld = TriMatrix::translate((float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z); objectToClip = worldToClip * objectToWorld; - TriUniforms uniforms; - uniforms.light = 256; - uniforms.flags = 0; - uniforms.subsectorDepth = RenderPolyPortal::SkySubsectorDepth; - int rc = mRows + 1; PolyDrawArgs args; - args.uniforms = uniforms; + args.uniforms.light = 256; + args.uniforms.flags = 0; + args.uniforms.subsectorDepth = RenderPolyPortal::SkySubsectorDepth; args.objectToClip = &objectToClip; args.stenciltestvalue = 255; args.stencilwritevalue = 255; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index d6ade7279a..6e09faa8a4 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -239,13 +239,10 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) ClampHeight(vertices[1], vertices[2]); } - TriUniforms uniforms; - uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); - uniforms.flags = 0; - uniforms.subsectorDepth = SubsectorDepth; - PolyDrawArgs args; - args.uniforms = uniforms; + args.uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); + args.uniforms.flags = 0; + args.uniforms.subsectorDepth = SubsectorDepth; args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = 4; diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp index 553235957e..c8e0abcc43 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/r_poly_wallsprite.cpp @@ -98,21 +98,19 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, s bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); - TriUniforms uniforms; + PolyDrawArgs args; if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) { - uniforms.light = 256; - uniforms.flags = TriUniforms::fixed_light; + args.uniforms.light = 256; + args.uniforms.flags = TriUniforms::fixed_light; } else { - uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); - uniforms.flags = 0; + args.uniforms.light = (uint32_t)((thing->Sector->lightlevel + actualextralight) / 255.0f * 256.0f); + args.uniforms.flags = 0; } - uniforms.subsectorDepth = subsectorDepth; + args.uniforms.subsectorDepth = subsectorDepth; - PolyDrawArgs args; - args.uniforms = uniforms; args.objectToClip = &worldToClip; args.vinput = vertices; args.vcount = 4; From 8bbb63e91328d37a0f3df8240a7c089f25f03eb5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 26 Nov 2016 09:01:58 +0100 Subject: [PATCH 388/912] Add portal plane clipping --- src/r_poly.cpp | 2 +- src/r_poly_cull.cpp | 12 +++++++++++- src/r_poly_cull.h | 3 ++- src/r_poly_intersection.h | 11 +++++++++++ src/r_poly_portal.cpp | 16 ++++++++++++---- src/r_poly_portal.h | 3 ++- 6 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 7297741744..b262ca01d5 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -44,7 +44,7 @@ void RenderPolyScene::Render() ClearBuffers(); SetSceneViewport(); SetupPerspectiveMatrix(); - MainPortal.SetViewpoint(WorldToClip, GetNextStencilValue()); + MainPortal.SetViewpoint(WorldToClip, Vec4f(0.0f), GetNextStencilValue()); MainPortal.Render(0); Skydome.Render(WorldToClip); MainPortal.RenderTranslucent(0); diff --git a/src/r_poly_cull.cpp b/src/r_poly_cull.cpp index 04d5c3c244..37ddf91037 100644 --- a/src/r_poly_cull.cpp +++ b/src/r_poly_cull.cpp @@ -28,11 +28,12 @@ #include "r_poly_cull.h" #include "r_poly.h" -void PolyCull::CullScene(const TriMatrix &worldToClip) +void PolyCull::CullScene(const TriMatrix &worldToClip, const Vec4f &portalClipPlane) { ClearSolidSegments(); PvsSectors.clear(); frustumPlanes = FrustumPlanes(worldToClip); + PortalClipPlane = portalClipPlane; // Cull front to back MaxCeilingHeight = 0.0; @@ -174,6 +175,11 @@ bool PolyCull::CheckBBox(float *bspcoord) if (result == IntersectionTest::outside) return false; + // Skip if its in front of the portal: + + if (PortalClipPlane != Vec4f(0.0f) && IntersectionTest::plane_aabb(PortalClipPlane, aabb) == IntersectionTest::outside) + return false; + // Occlusion test using solid segments: static const int lines[4][4] = @@ -219,6 +225,10 @@ bool PolyCull::GetSegmentRangeForLine(double x1, double y1, double x2, double y2 { double znear = 5.0; + // Cull if entirely behind the portal clip plane (tbd: should we clip the segment?) + if (Vec4f::dot(PortalClipPlane, Vec4f((float)x1, (float)y1, 0.0f, 1.0f)) < 0.0f && Vec4f::dot(PortalClipPlane, Vec4f((float)x2, (float)y2, 0.0f, 1.0f)) < 0.0f) + return false; + // Transform to 2D view space: x1 = x1 - ViewPos.X; y1 = y1 - ViewPos.Y; diff --git a/src/r_poly_cull.h b/src/r_poly_cull.h index e011ef5837..f1fe56ed53 100644 --- a/src/r_poly_cull.h +++ b/src/r_poly_cull.h @@ -28,7 +28,7 @@ class PolyCull { public: - void CullScene(const TriMatrix &worldToClip); + void CullScene(const TriMatrix &worldToClip, const Vec4f &portalClipPlane); bool GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const; void MarkSegmentCulled(int x1, int x2); @@ -59,4 +59,5 @@ private: const int SolidCullScale = 3000; FrustumPlanes frustumPlanes; + Vec4f PortalClipPlane; }; diff --git a/src/r_poly_intersection.h b/src/r_poly_intersection.h index b06bc12001..2ce164e5e7 100644 --- a/src/r_poly_intersection.h +++ b/src/r_poly_intersection.h @@ -26,6 +26,8 @@ #include #include +class Vec3f; + class Vec4f { public: @@ -33,6 +35,7 @@ public: Vec4f(const Vec4f &) = default; Vec4f(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) { } Vec4f(float v) : x(v), y(v), z(v), w(v) { } + Vec4f(const Vec3f &xyz, float w); static float dot(const Vec4f &a, const Vec4f &b) { return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; } static float dot3(const Vec4f &a, const Vec4f &b) { return a.x * b.x + a.y * b.y + a.z * b.z; } @@ -51,6 +54,9 @@ public: float x, y, z, w; }; +inline bool operator==(const Vec4f &a, const Vec4f &b) { return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; } +inline bool operator!=(const Vec4f &a, const Vec4f &b) { return a.x != b.x || a.y != b.y || a.z != b.z || a.w == b.w; } + class Vec3f { public: @@ -75,6 +81,9 @@ public: float x, y, z; }; +inline bool operator==(const Vec3f &a, const Vec3f &b) { return a.x == b.x && a.y == b.y && a.z == b.z; } +inline bool operator!=(const Vec3f &a, const Vec3f &b) { return a.x != b.x || a.y != b.y || a.z != b.z; } + inline Vec3f operator+(const Vec3f &a, const Vec3f &b) { return Vec3f(a.x + b.x, a.y + b.y, a.z + b.z); } inline Vec3f operator-(const Vec3f &a, const Vec3f &b) { return Vec3f(a.x - b.x, a.y - b.y, a.z - b.z); } inline Vec3f operator*(const Vec3f &a, const Vec3f &b) { return Vec3f(a.x * b.x, a.y * b.y, a.z * b.z); } @@ -90,6 +99,8 @@ inline Vec3f operator-(float a, const Vec3f &b) { return Vec3f(a - b.x, a - b.y, inline Vec3f operator*(float a, const Vec3f &b) { return Vec3f(a * b.x, a * b.y, a * b.z); } inline Vec3f operator/(float a, const Vec3f &b) { return Vec3f(a / b.x, a / b.y, a / b.z); } +inline Vec4f::Vec4f(const Vec3f &xyz, float w) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) { } + typedef TriMatrix Mat4f; class AxisAlignedBoundingBox diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 715d475823..757285d772 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -45,16 +45,17 @@ RenderPolyPortal::~RenderPolyPortal() { } -void RenderPolyPortal::SetViewpoint(const TriMatrix &worldToClip, uint32_t stencilValue) +void RenderPolyPortal::SetViewpoint(const TriMatrix &worldToClip, const Vec4f &portalPlane, uint32_t stencilValue) { WorldToClip = worldToClip; StencilValue = stencilValue; + PortalPlane = portalPlane; } void RenderPolyPortal::Render(int portalDepth) { ClearBuffers(); - Cull.CullScene(WorldToClip); + Cull.CullScene(WorldToClip, PortalPlane); RenderSectors(); RenderPortals(portalDepth); } @@ -340,7 +341,7 @@ void PolyDrawSectorPortal::Render(int portalDepth) TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); TriMatrix worldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; - RenderPortal.SetViewpoint(worldToClip, StencilValue); + RenderPortal.SetViewpoint(worldToClip, Vec4f(0.0f), StencilValue); RenderPortal.Render(portalDepth); RestoreGlobals(); @@ -440,7 +441,14 @@ void PolyDrawLinePortal::Render(int portalDepth) TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); TriMatrix worldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; - RenderPortal.SetViewpoint(worldToClip, StencilValue); + // Calculate plane clipping + DVector2 planePos = Portal->mDestination->v1->fPos(); + DVector2 planeNormal = (Portal->mDestination->v2->fPos() - Portal->mDestination->v1->fPos()).Rotated90CW(); + planeNormal.MakeUnit(); + double planeD = -(planeNormal | (planePos + planeNormal * 5.0)); + Vec4f portalPlane((float)planeNormal.X, (float)planeNormal.Y, 0.0f, (float)planeD); + + RenderPortal.SetViewpoint(worldToClip, portalPlane, StencilValue); RenderPortal.Render(portalDepth); RestoreGlobals(); diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index 56a125d2ca..e2b131b323 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -83,7 +83,7 @@ class RenderPolyPortal public: RenderPolyPortal(); ~RenderPolyPortal(); - void SetViewpoint(const TriMatrix &worldToClip, uint32_t stencilValue); + void SetViewpoint(const TriMatrix &worldToClip, const Vec4f &portalPlane, uint32_t stencilValue); void Render(int portalDepth); void RenderTranslucent(int portalDepth); @@ -99,6 +99,7 @@ private: SpriteRange GetSpritesForSector(sector_t *sector); TriMatrix WorldToClip; + Vec4f PortalPlane; uint32_t StencilValue = 0; PolyCull Cull; uint32_t NextSubsectorDepth = 0; From c37ce1fdbc57bb788ce7e2f950edc4a444cd58ed Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 26 Nov 2016 10:49:29 +0100 Subject: [PATCH 389/912] Add plane clipping to the triangle drawer --- src/r_compiler/llvmdrawers.h | 20 ---------- src/r_poly.cpp | 2 +- src/r_poly_cull.cpp | 2 +- src/r_poly_decal.cpp | 7 ++-- src/r_poly_decal.h | 6 ++- src/r_poly_particle.cpp | 3 +- src/r_poly_particle.h | 4 +- src/r_poly_plane.cpp | 16 ++++---- src/r_poly_plane.h | 7 ++-- src/r_poly_portal.cpp | 21 +++++----- src/r_poly_sky.cpp | 1 + src/r_poly_sprite.cpp | 4 +- src/r_poly_sprite.h | 4 +- src/r_poly_triangle.cpp | 74 ++++++++++++++++++++---------------- src/r_poly_triangle.h | 41 ++++++++++++++++++-- src/r_poly_wall.cpp | 19 ++++----- src/r_poly_wall.h | 7 ++-- src/r_poly_wallsprite.cpp | 3 +- src/r_poly_wallsprite.h | 4 +- 19 files changed, 146 insertions(+), 99 deletions(-) diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 4ce3a3a037..c5eb96f4c9 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -191,26 +191,6 @@ struct TriVertex float varying[NumVarying]; }; -struct TriMatrix -{ - static TriMatrix null(); - static TriMatrix identity(); - static TriMatrix translate(float x, float y, float z); - static TriMatrix scale(float x, float y, float z); - static TriMatrix rotate(float angle, float x, float y, float z); - static TriMatrix swapYZ(); - static TriMatrix perspective(float fovy, float aspect, float near, float far); - static TriMatrix frustum(float left, float right, float bottom, float top, float near, float far); - - static TriMatrix worldToView(); // Software renderer world to view space transform - static TriMatrix viewToClip(); // Software renderer shearing projection - - TriVertex operator*(TriVertex v) const; - TriMatrix operator*(const TriMatrix &m) const; - - float matrix[16]; -}; - struct TriUniforms { uint32_t light; diff --git a/src/r_poly.cpp b/src/r_poly.cpp index b262ca01d5..186077806c 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -44,7 +44,7 @@ void RenderPolyScene::Render() ClearBuffers(); SetSceneViewport(); SetupPerspectiveMatrix(); - MainPortal.SetViewpoint(WorldToClip, Vec4f(0.0f), GetNextStencilValue()); + MainPortal.SetViewpoint(WorldToClip, Vec4f(0.0f, 0.0f, 0.0f, 1.0f), GetNextStencilValue()); MainPortal.Render(0); Skydome.Render(WorldToClip); MainPortal.RenderTranslucent(0); diff --git a/src/r_poly_cull.cpp b/src/r_poly_cull.cpp index 37ddf91037..9ee71c28a8 100644 --- a/src/r_poly_cull.cpp +++ b/src/r_poly_cull.cpp @@ -177,7 +177,7 @@ bool PolyCull::CheckBBox(float *bspcoord) // Skip if its in front of the portal: - if (PortalClipPlane != Vec4f(0.0f) && IntersectionTest::plane_aabb(PortalClipPlane, aabb) == IntersectionTest::outside) + if (IntersectionTest::plane_aabb(PortalClipPlane, aabb) == IntersectionTest::outside) return false; // Occlusion test using solid segments: diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index ded64a0957..9d47c014a0 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -29,7 +29,7 @@ #include "r_poly.h" #include "a_sharedglobal.h" -void RenderPolyDecal::RenderWallDecals(const TriMatrix &worldToClip, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue) +void RenderPolyDecal::RenderWallDecals(const TriMatrix &worldToClip, const Vec4f &clipPlane, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue) { if (line->linedef == nullptr && line->sidedef == nullptr) return; @@ -37,11 +37,11 @@ void RenderPolyDecal::RenderWallDecals(const TriMatrix &worldToClip, const seg_t for (DBaseDecal *decal = line->sidedef->AttachedDecals; decal != nullptr; decal = decal->WallNext) { RenderPolyDecal render; - render.Render(worldToClip, decal, line, subsectorDepth, stencilValue); + render.Render(worldToClip, clipPlane, decal, line, subsectorDepth, stencilValue); } } -void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue) +void RenderPolyDecal::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, DBaseDecal *decal, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue) { if (decal->RenderFlags & RF_INVISIBLE || !viewactive || !decal->PicNum.isValid()) return; @@ -166,5 +166,6 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, DBaseDecal *decal, co args.stenciltestvalue = stencilValue; args.stencilwritevalue = stencilValue; //mode = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); + args.SetClipPlane(clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w); PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); } diff --git a/src/r_poly_decal.h b/src/r_poly_decal.h index c836a0cd73..068887d90c 100644 --- a/src/r_poly_decal.h +++ b/src/r_poly_decal.h @@ -24,11 +24,13 @@ #include "r_poly_triangle.h" +class Vec4f; + class RenderPolyDecal { public: - static void RenderWallDecals(const TriMatrix &worldToClip, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue); + static void RenderWallDecals(const TriMatrix &worldToClip, const Vec4f &clipPlane, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue); private: - void Render(const TriMatrix &worldToClip, DBaseDecal *decal, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue); + void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, DBaseDecal *decal, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue); }; diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp index 8151d4888e..73e1819d4f 100644 --- a/src/r_poly_particle.cpp +++ b/src/r_poly_particle.cpp @@ -28,7 +28,7 @@ #include "r_poly_particle.h" #include "r_poly.h" -void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *particle, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) +void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, particle_t *particle, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) { DVector3 pos = particle->Pos; double psize = particle->size / 8.0; @@ -104,5 +104,6 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *partic args.stenciltestvalue = stencilValue; args.stencilwritevalue = stencilValue; args.SetColormap(sub->sector->ColorMap); + args.SetClipPlane(clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w); PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::AlphaBlend); } diff --git a/src/r_poly_particle.h b/src/r_poly_particle.h index 3fbb2d2abd..fd581fa83c 100644 --- a/src/r_poly_particle.h +++ b/src/r_poly_particle.h @@ -25,8 +25,10 @@ #include "r_poly_triangle.h" #include "p_effect.h" +class Vec4f; + class RenderPolyParticle { public: - void Render(const TriMatrix &worldToClip, particle_t *particle, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue); + void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, particle_t *particle, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue); }; diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 8d4067be02..76ede69603 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -32,7 +32,7 @@ EXTERN_CVAR(Int, r_3dfloors) -void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight, std::vector> §orPortals) +void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight, std::vector> §orPortals) { RenderPolyPlane plane; @@ -58,7 +58,7 @@ void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, subsector_t *su double fakeHeight = fakeFloor->top.plane->ZatPoint(frontsector->centerspot); if (fakeHeight < ViewPos.Z && fakeHeight > frontsector->floorplane.ZatPoint(frontsector->centerspot)) { - plane.Render3DFloor(worldToClip, sub, subsectorDepth, stencilValue, false, fakeFloor); + plane.Render3DFloor(worldToClip, clipPlane, sub, subsectorDepth, stencilValue, false, fakeFloor); } } @@ -79,16 +79,16 @@ void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, subsector_t *su double fakeHeight = fakeFloor->bottom.plane->ZatPoint(frontsector->centerspot); if (fakeHeight > ViewPos.Z && fakeHeight < frontsector->ceilingplane.ZatPoint(frontsector->centerspot)) { - plane.Render3DFloor(worldToClip, sub, subsectorDepth, stencilValue, true, fakeFloor); + plane.Render3DFloor(worldToClip, clipPlane, sub, subsectorDepth, stencilValue, true, fakeFloor); } } } - plane.Render(worldToClip, sub, subsectorDepth, stencilValue, true, skyCeilingHeight, sectorPortals); - plane.Render(worldToClip, sub, subsectorDepth, stencilValue, false, skyFloorHeight, sectorPortals); + plane.Render(worldToClip, clipPlane, sub, subsectorDepth, stencilValue, true, skyCeilingHeight, sectorPortals); + plane.Render(worldToClip, clipPlane, sub, subsectorDepth, stencilValue, false, skyFloorHeight, sectorPortals); } -void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, F3DFloor *fakeFloor) +void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, F3DFloor *fakeFloor) { FTextureID picnum = ceiling ? *fakeFloor->bottom.texture : *fakeFloor->top.texture; FTexture *tex = TexMan(picnum); @@ -140,11 +140,12 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, subsector_t *s args.stencilwritevalue = stencilValue + 1; args.SetTexture(tex); args.SetColormap(sub->sector->ColorMap); + args.SetClipPlane(clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w); PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); } -void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals) +void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals) { FSectorPortal *portal = sub->sector->ValidatePortal(ceiling ? sector_t::ceiling : sector_t::floor); PolyDrawSectorPortal *polyportal = nullptr; @@ -251,6 +252,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, subsector_t *sub, uin args.stenciltestvalue = stencilValue; args.stencilwritevalue = stencilValue + 1; args.SetColormap(frontsector->ColorMap); + args.SetClipPlane(clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w); if (!isSky) { diff --git a/src/r_poly_plane.h b/src/r_poly_plane.h index 6e418bf502..3006ce86b4 100644 --- a/src/r_poly_plane.h +++ b/src/r_poly_plane.h @@ -25,14 +25,15 @@ #include "r_poly_triangle.h" class PolyDrawSectorPortal; +class Vec4f; class RenderPolyPlane { public: - static void RenderPlanes(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight, std::vector> §orPortals); + static void RenderPlanes(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight, std::vector> §orPortals); private: - void Render3DFloor(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, F3DFloor *fakefloor); - void Render(const TriMatrix &worldToClip, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals); + void Render3DFloor(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, F3DFloor *fakefloor); + void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals); TriVertex PlaneVertex(vertex_t *v1, double height); }; diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 757285d772..3257afc702 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -94,7 +94,7 @@ void RenderPolyPortal::RenderSubsector(subsector_t *sub) if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) { - RenderPolyPlane::RenderPlanes(WorldToClip, sub, subsectorDepth, StencilValue, Cull.MaxCeilingHeight, Cull.MinFloorHeight, SectorPortals); + RenderPolyPlane::RenderPlanes(WorldToClip, PortalPlane, sub, subsectorDepth, StencilValue, Cull.MaxCeilingHeight, Cull.MinFloorHeight, SectorPortals); } for (uint32_t i = 0; i < sub->numlines; i++) @@ -178,12 +178,12 @@ void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *front if (!(fakeFloor->flags & FF_EXISTS)) continue; if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; if (!fakeFloor->model) continue; - RenderPolyWall::Render3DFloorLine(WorldToClip, line, frontsector, subsectorDepth, StencilValue, fakeFloor, SubsectorTranslucentWalls); + RenderPolyWall::Render3DFloorLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, fakeFloor, SubsectorTranslucentWalls); } } // Render wall, and update culling info if its an occlusion blocker - if (RenderPolyWall::RenderLine(WorldToClip, line, frontsector, subsectorDepth, StencilValue, SubsectorTranslucentWalls, LinePortals)) + if (RenderPolyWall::RenderLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, SubsectorTranslucentWalls, LinePortals)) { if (hasSegmentRange) Cull.MarkSegmentCulled(sx1, sx2); @@ -208,6 +208,7 @@ void RenderPolyPortal::RenderPortals(int portalDepth) args.uniforms.color = 0; args.uniforms.light = 256; args.uniforms.flags = TriUniforms::fixed_light; + args.SetClipPlane(PortalPlane.x, PortalPlane.y, PortalPlane.z, PortalPlane.w); for (auto &portal : SectorPortals) { @@ -253,6 +254,7 @@ void RenderPolyPortal::RenderTranslucent(int portalDepth) args.mode = TriangleDrawMode::Fan; args.stenciltestvalue = portal->StencilValue + 1; args.stencilwritevalue = StencilValue; + args.SetClipPlane(PortalPlane.x, PortalPlane.y, PortalPlane.z, PortalPlane.w); for (const auto &verts : portal->Shape) { args.vinput = verts.Vertices; @@ -273,6 +275,7 @@ void RenderPolyPortal::RenderTranslucent(int portalDepth) args.mode = TriangleDrawMode::Fan; args.stenciltestvalue = portal->StencilValue + 1; args.stencilwritevalue = StencilValue; + args.SetClipPlane(PortalPlane.x, PortalPlane.y, PortalPlane.z, PortalPlane.w); for (const auto &verts : portal->Shape) { args.vinput = verts.Vertices; @@ -290,21 +293,21 @@ void RenderPolyPortal::RenderTranslucent(int portalDepth) if (obj.particle) { RenderPolyParticle spr; - spr.Render(WorldToClip, obj.particle, obj.sub, obj.subsectorDepth, StencilValue + 1); + spr.Render(WorldToClip, PortalPlane, obj.particle, obj.sub, obj.subsectorDepth, StencilValue + 1); } else if (!obj.thing) { - obj.wall.Render(WorldToClip); + obj.wall.Render(WorldToClip, PortalPlane); } else if ((obj.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) { RenderPolyWallSprite wallspr; - wallspr.Render(WorldToClip, obj.thing, obj.sub, obj.subsectorDepth, StencilValue + 1); + wallspr.Render(WorldToClip, PortalPlane, obj.thing, obj.sub, obj.subsectorDepth, StencilValue + 1); } else { RenderPolySprite spr; - spr.Render(WorldToClip, obj.thing, obj.sub, obj.subsectorDepth, StencilValue + 1); + spr.Render(WorldToClip, PortalPlane, obj.thing, obj.sub, obj.subsectorDepth, StencilValue + 1); } } } @@ -341,7 +344,7 @@ void PolyDrawSectorPortal::Render(int portalDepth) TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); TriMatrix worldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; - RenderPortal.SetViewpoint(worldToClip, Vec4f(0.0f), StencilValue); + RenderPortal.SetViewpoint(worldToClip, Vec4f(0.0f, 0.0f, 0.0f, 1.0f), StencilValue); RenderPortal.Render(portalDepth); RestoreGlobals(); @@ -445,7 +448,7 @@ void PolyDrawLinePortal::Render(int portalDepth) DVector2 planePos = Portal->mDestination->v1->fPos(); DVector2 planeNormal = (Portal->mDestination->v2->fPos() - Portal->mDestination->v1->fPos()).Rotated90CW(); planeNormal.MakeUnit(); - double planeD = -(planeNormal | (planePos + planeNormal * 5.0)); + double planeD = -(planeNormal | (planePos + planeNormal * 0.001)); Vec4f portalPlane((float)planeNormal.X, (float)planeNormal.Y, 0.0f, (float)planeD); RenderPortal.SetViewpoint(worldToClip, portalPlane, StencilValue); diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index 97f8203fac..c0a219c799 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -61,6 +61,7 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) args.stencilwritevalue = 255; args.SetTexture(frontskytex); args.SetColormap(&NormalLight); + args.SetClipPlane(0.0f, 0.0f, 0.0f, 0.0f); RenderCapColorRow(args, frontskytex, 0, false); RenderCapColorRow(args, frontskytex, rc, true); diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 0bacd6eb2b..edb46c4609 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -27,11 +27,12 @@ #include "r_data/r_translate.h" #include "r_poly_sprite.h" #include "r_poly.h" +#include "r_poly_intersection.h" EXTERN_CVAR(Float, transsouls) EXTERN_CVAR(Int, r_drawfuzz) -void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) +void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) { if (IsThingCulled(thing)) return; @@ -138,6 +139,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, AActor *thing, subse args.stencilwritevalue = stencilValue; args.SetTexture(tex, thing->Translation); args.SetColormap(sub->sector->ColorMap); + args.SetClipPlane(clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w); TriBlendMode blendmode; diff --git a/src/r_poly_sprite.h b/src/r_poly_sprite.h index e23d0d09da..04b0fcb942 100644 --- a/src/r_poly_sprite.h +++ b/src/r_poly_sprite.h @@ -24,10 +24,12 @@ #include "r_poly_triangle.h" +class Vec4f; + class RenderPolySprite { public: - void Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue); + void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue); static bool IsThingCulled(AActor *thing); static FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 298c5f024b..f08d060e8f 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -119,34 +119,34 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian const TriVertex *vinput = drawargs.vinput; int vcount = drawargs.vcount; - TriVertex vert[3]; + ShadedTriVertex vert[3]; if (drawargs.mode == TriangleDrawMode::Normal) { for (int i = 0; i < vcount / 3; i++) { for (int j = 0; j < 3; j++) - vert[j] = shade_vertex(*drawargs.objectToClip, *(vinput++)); + vert[j] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); } } else if (drawargs.mode == TriangleDrawMode::Fan) { - vert[0] = shade_vertex(*drawargs.objectToClip, *(vinput++)); - vert[1] = shade_vertex(*drawargs.objectToClip, *(vinput++)); + vert[0] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); + vert[1] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); for (int i = 2; i < vcount; i++) { - vert[2] = shade_vertex(*drawargs.objectToClip, *(vinput++)); + vert[2] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); vert[1] = vert[2]; } } else // TriangleDrawMode::Strip { - vert[0] = shade_vertex(*drawargs.objectToClip, *(vinput++)); - vert[1] = shade_vertex(*drawargs.objectToClip, *(vinput++)); + vert[0] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); + vert[1] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); for (int i = 2; i < vcount; i++) { - vert[2] = shade_vertex(*drawargs.objectToClip, *(vinput++)); + vert[2] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); vert[0] = vert[1]; vert[1] = vert[2]; @@ -155,13 +155,18 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian } } -TriVertex PolyTriangleDrawer::shade_vertex(const TriMatrix &objectToClip, TriVertex v) +ShadedTriVertex PolyTriangleDrawer::shade_vertex(const TriMatrix &objectToClip, const float *clipPlane, const TriVertex &v) { // Apply transform to get clip coordinates: - return objectToClip * v; + ShadedTriVertex sv = objectToClip * v; + + // Calculate gl_ClipDistance[0] + sv.clipDistance0 = v.x * clipPlane[0] + v.y * clipPlane[1] + v.z * clipPlane[2] + v.w * clipPlane[3]; + + return sv; } -void PolyTriangleDrawer::draw_shaded_triangle(const TriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)) +void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)) { // Cull, clip and generate additional vertices as needed TriVertex clippedvert[max_additional_vertices]; @@ -225,7 +230,7 @@ bool PolyTriangleDrawer::cullhalfspace(float clipdistance1, float clipdistance2, return false; } -void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert) +void PolyTriangleDrawer::clipedge(const ShadedTriVertex *verts, TriVertex *clippedvert, int &numclipvert) { // Clip and cull so that the following is true for all vertices: // -v.w <= v.x <= v.w @@ -243,16 +248,18 @@ void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert } // halfspace clip distances - float clipdistance[6 * 3]; + static const int numclipdistances = 7; + float clipdistance[numclipdistances * 3]; for (int i = 0; i < 3; i++) { const auto &v = verts[i]; - clipdistance[i * 6 + 0] = v.x + v.w; - clipdistance[i * 6 + 1] = v.w - v.x; - clipdistance[i * 6 + 2] = v.y + v.w; - clipdistance[i * 6 + 3] = v.w - v.y; - clipdistance[i * 6 + 4] = v.z + v.w; - clipdistance[i * 6 + 5] = v.w - v.z; + clipdistance[i * numclipdistances + 0] = v.x + v.w; + clipdistance[i * numclipdistances + 1] = v.w - v.x; + clipdistance[i * numclipdistances + 2] = v.y + v.w; + clipdistance[i * numclipdistances + 3] = v.w - v.y; + clipdistance[i * numclipdistances + 4] = v.z + v.w; + clipdistance[i * numclipdistances + 5] = v.w - v.z; + clipdistance[i * numclipdistances + 6] = v.clipDistance0; } // Clip against each halfspace @@ -260,7 +267,7 @@ void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert float *output = weights + max_additional_vertices * 3; int inputverts = 3; int outputverts = 0; - for (int p = 0; p < 6; p++) + for (int p = 0; p < numclipdistances; p++) { // Clip each edge outputverts = 0; @@ -268,14 +275,14 @@ void PolyTriangleDrawer::clipedge(const TriVertex *verts, TriVertex *clippedvert { int j = (i + 1) % inputverts; float clipdistance1 = - clipdistance[0 * 6 + p] * input[i * 3 + 0] + - clipdistance[1 * 6 + p] * input[i * 3 + 1] + - clipdistance[2 * 6 + p] * input[i * 3 + 2]; + clipdistance[0 * numclipdistances + p] * input[i * 3 + 0] + + clipdistance[1 * numclipdistances + p] * input[i * 3 + 1] + + clipdistance[2 * numclipdistances + p] * input[i * 3 + 2]; float clipdistance2 = - clipdistance[0 * 6 + p] * input[j * 3 + 0] + - clipdistance[1 * 6 + p] * input[j * 3 + 1] + - clipdistance[2 * 6 + p] * input[j * 3 + 2]; + clipdistance[0 * numclipdistances + p] * input[j * 3 + 0] + + clipdistance[1 * numclipdistances + p] * input[j * 3 + 1] + + clipdistance[2 * numclipdistances + p] * input[j * 3 + 2]; float t1, t2; if (!cullhalfspace(clipdistance1, clipdistance2, t1, t2) && outputverts + 1 < max_additional_vertices) @@ -512,17 +519,20 @@ TriMatrix TriMatrix::operator*(const TriMatrix &mult) const return result; } -TriVertex TriMatrix::operator*(TriVertex v) const +ShadedTriVertex TriMatrix::operator*(TriVertex v) const { float vx = matrix[0 * 4 + 0] * v.x + matrix[1 * 4 + 0] * v.y + matrix[2 * 4 + 0] * v.z + matrix[3 * 4 + 0] * v.w; float vy = matrix[0 * 4 + 1] * v.x + matrix[1 * 4 + 1] * v.y + matrix[2 * 4 + 1] * v.z + matrix[3 * 4 + 1] * v.w; float vz = matrix[0 * 4 + 2] * v.x + matrix[1 * 4 + 2] * v.y + matrix[2 * 4 + 2] * v.z + matrix[3 * 4 + 2] * v.w; float vw = matrix[0 * 4 + 3] * v.x + matrix[1 * 4 + 3] * v.y + matrix[2 * 4 + 3] * v.z + matrix[3 * 4 + 3] * v.w; - v.x = vx; - v.y = vy; - v.z = vz; - v.w = vw; - return v; + ShadedTriVertex sv; + sv.x = vx; + sv.y = vy; + sv.z = vz; + sv.w = vw; + for (int i = 0; i < TriVertex::NumVarying; i++) + sv.varying[i] = v.varying[i]; + return sv; } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 8fef11e99a..270dda8948 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -38,6 +38,7 @@ enum class TriangleDrawMode }; struct TriDrawTriangleArgs; +struct TriMatrix; class PolyDrawArgs { @@ -55,6 +56,15 @@ public: uint8_t stenciltestvalue = 0; uint8_t stencilwritevalue = 0; const uint8_t *colormaps = nullptr; + float clipPlane[4]; + + void SetClipPlane(float a, float b, float c, float d) + { + clipPlane[0] = a; + clipPlane[1] = b; + clipPlane[2] = c; + clipPlane[3] = d; + } void SetTexture(FTexture *texture) { @@ -118,6 +128,31 @@ public: } }; +struct ShadedTriVertex : public TriVertex +{ + float clipDistance0; +}; + +struct TriMatrix +{ + static TriMatrix null(); + static TriMatrix identity(); + static TriMatrix translate(float x, float y, float z); + static TriMatrix scale(float x, float y, float z); + static TriMatrix rotate(float angle, float x, float y, float z); + static TriMatrix swapYZ(); + static TriMatrix perspective(float fovy, float aspect, float near, float far); + static TriMatrix frustum(float left, float right, float bottom, float top, float near, float far); + + static TriMatrix worldToView(); // Software renderer world to view space transform + static TriMatrix viewToClip(); // Software renderer shearing projection + + ShadedTriVertex operator*(TriVertex v) const; + TriMatrix operator*(const TriMatrix &m) const; + + float matrix[16]; +}; + class PolyTriangleDrawer { public: @@ -125,11 +160,11 @@ public: static void draw(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode); private: - static TriVertex shade_vertex(const TriMatrix &objectToClip, TriVertex v); + static ShadedTriVertex shade_vertex(const TriMatrix &objectToClip, const float *clipPlane, const TriVertex &v); static void draw_arrays(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode, WorkerThreadData *thread); - static void draw_shaded_triangle(const TriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)); + static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); - static void clipedge(const TriVertex *verts, TriVertex *clippedvert, int &numclipvert); + static void clipedge(const ShadedTriVertex *verts, TriVertex *clippedvert, int &numclipvert); static int viewport_x, viewport_y, viewport_width, viewport_height, dest_pitch, dest_width, dest_height; static bool dest_bgra; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 6e09faa8a4..cd4d32077f 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -35,7 +35,7 @@ EXTERN_CVAR(Bool, r_drawmirrors) -bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput, std::vector> &linePortals) +bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput, std::vector> &linePortals) { PolyDrawLinePortal *polyportal = nullptr; if (line->backsector == nullptr && line->sidedef == line->linedef->sidedef[0] && (line->linedef->special == Line_Mirror && r_drawmirrors)) @@ -85,7 +85,7 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, secto wall.UnpeggedCeil = frontceilz1; wall.Texpart = side_t::mid; wall.Polyportal = polyportal; - wall.Render(worldToClip); + wall.Render(worldToClip, clipPlane); return true; } } @@ -120,7 +120,7 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, secto wall.BottomZ = topfloorz1; wall.UnpeggedCeil = topceilz1; wall.Texpart = side_t::top; - wall.Render(worldToClip); + wall.Render(worldToClip, clipPlane); } if ((bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef) @@ -130,7 +130,7 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, secto wall.BottomZ = bottomfloorz2; wall.UnpeggedCeil = topceilz1; wall.Texpart = side_t::bottom; - wall.Render(worldToClip); + wall.Render(worldToClip, clipPlane); } if (line->sidedef) @@ -149,14 +149,14 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, seg_t *line, secto if (polyportal) { wall.Polyportal = polyportal; - wall.Render(worldToClip); + wall.Render(worldToClip, clipPlane); } } } return polyportal != nullptr; } -void RenderPolyWall::Render3DFloorLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, F3DFloor *fakeFloor, std::vector &translucentWallsOutput) +void RenderPolyWall::Render3DFloorLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, F3DFloor *fakeFloor, std::vector &translucentWallsOutput) { double frontceilz1 = fakeFloor->top.plane->ZatPoint(line->v1); double frontfloorz1 = fakeFloor->bottom.plane->ZatPoint(line->v1); @@ -176,7 +176,7 @@ void RenderPolyWall::Render3DFloorLine(const TriMatrix &worldToClip, seg_t *line wall.BottomZ = frontfloorz1; wall.UnpeggedCeil = frontceilz1; wall.Texpart = side_t::mid; - wall.Render(worldToClip); + wall.Render(worldToClip, clipPlane); } void RenderPolyWall::SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2) @@ -189,7 +189,7 @@ void RenderPolyWall::SetCoords(const DVector2 &v1, const DVector2 &v2, double ce this->floor2 = floor2; } -void RenderPolyWall::Render(const TriMatrix &worldToClip) +void RenderPolyWall::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane) { FTexture *tex = GetTexture(); if (!tex && !Polyportal) @@ -253,6 +253,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) if (tex) args.SetTexture(tex); args.SetColormap(Line->frontsector->ColorMap); + args.SetClipPlane(clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w); if (Polyportal) { @@ -275,7 +276,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip) PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); } - RenderPolyDecal::RenderWallDecals(worldToClip, LineSeg, SubsectorDepth, StencilValue); + RenderPolyDecal::RenderWallDecals(worldToClip, clipPlane, LineSeg, SubsectorDepth, StencilValue); } void RenderPolyWall::ClampHeight(TriVertex &v1, TriVertex &v2) diff --git a/src/r_poly_wall.h b/src/r_poly_wall.h index d1196df9f9..bcecd5a05b 100644 --- a/src/r_poly_wall.h +++ b/src/r_poly_wall.h @@ -26,15 +26,16 @@ class PolyTranslucentObject; class PolyDrawLinePortal; +class Vec4f; class RenderPolyWall { public: - static bool RenderLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput, std::vector> &linePortals); - static void Render3DFloorLine(const TriMatrix &worldToClip, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, F3DFloor *fakeFloor, std::vector &translucentWallsOutput); + static bool RenderLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput, std::vector> &linePortals); + static void Render3DFloorLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, F3DFloor *fakeFloor, std::vector &translucentWallsOutput); void SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2); - void Render(const TriMatrix &worldToClip); + void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane); DVector2 v1; DVector2 v2; diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp index c8e0abcc43..351e24e364 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/r_poly_wallsprite.cpp @@ -28,7 +28,7 @@ #include "r_poly_wallsprite.h" #include "r_poly.h" -void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) +void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) { if (RenderPolySprite::IsThingCulled(thing)) return; @@ -120,5 +120,6 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, AActor *thing, s args.stencilwritevalue = stencilValue; args.SetTexture(tex); args.SetColormap(sub->sector->ColorMap); + args.SetClipPlane(clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w); PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); } diff --git a/src/r_poly_wallsprite.h b/src/r_poly_wallsprite.h index 2942d6994a..51cf28de6d 100644 --- a/src/r_poly_wallsprite.h +++ b/src/r_poly_wallsprite.h @@ -24,8 +24,10 @@ #include "r_poly_triangle.h" +class Vec4f; + class RenderPolyWallSprite { public: - void Render(const TriMatrix &worldToClip, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue); + void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue); }; From 1a16776dff9c927363caa79b56d76be03786764b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 27 Nov 2016 04:39:33 +0100 Subject: [PATCH 390/912] Plane texturing adjustments --- src/r_poly_plane.cpp | 148 +++++++++++++++++++++++++++++++------------ src/r_poly_plane.h | 16 ++++- 2 files changed, 123 insertions(+), 41 deletions(-) diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 76ede69603..b17b0ec2c3 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -103,6 +103,8 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c lightlevel = *light->p_lightlevel; } + UVTransform xform(ceiling ? fakeFloor->top.model->planes[sector_t::ceiling].xform : fakeFloor->top.model->planes[sector_t::floor].xform, tex); + PolyDrawArgs args; args.uniforms.light = (uint32_t)(lightlevel / 255.0f * 256.0f); if (fixedlightlev >= 0 || fixedcolormap) @@ -119,7 +121,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c for (uint32_t i = 0; i < sub->numlines; i++) { seg_t *line = &sub->firstline[i]; - vertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, fakeFloor->bottom.plane->ZatPoint(line->v1)); + vertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, fakeFloor->bottom.plane->ZatPoint(line->v1), xform); } } else @@ -127,7 +129,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c for (uint32_t i = 0; i < sub->numlines; i++) { seg_t *line = &sub->firstline[i]; - vertices[i] = PlaneVertex(line->v1, fakeFloor->top.plane->ZatPoint(line->v1)); + vertices[i] = PlaneVertex(line->v1, fakeFloor->top.plane->ZatPoint(line->v1), xform); } } @@ -172,36 +174,65 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan if (fakesector && (fakesector == sub->sector || (fakesector->MoreFlags & SECF_IGNOREHEIGHTSEC) == SECF_IGNOREHEIGHTSEC)) fakesector = nullptr; - bool fakeflooronly = fakesector && (fakesector->MoreFlags & SECF_FAKEFLOORONLY) != SECF_FAKEFLOORONLY; + bool fakeflooronly = fakesector && (fakesector->MoreFlags & SECF_FAKEFLOORONLY) == SECF_FAKEFLOORONLY; FTextureID picnum; bool ccw; sector_t *frontsector; - if (ceiling && fakesector && ViewPos.Z < fakesector->floorplane.Zat0()) + if (fakesector) { - picnum = fakesector->GetTexture(sector_t::ceiling); - ccw = false; - ceiling = false; - frontsector = fakesector; - } - else if (!ceiling && fakesector && ViewPos.Z >= fakesector->floorplane.Zat0()) - { - picnum = fakesector->GetTexture(sector_t::ceiling); - ccw = true; - frontsector = fakesector; - } - else if (ceiling && fakesector && ViewPos.Z > fakesector->ceilingplane.Zat0() && !fakeflooronly) - { - picnum = fakesector->GetTexture(sector_t::floor); - ccw = true; - frontsector = fakesector; - } - else if (!ceiling && fakesector && ViewPos.Z <= fakesector->ceilingplane.Zat0() && !fakeflooronly) - { - picnum = fakesector->GetTexture(sector_t::floor); - ccw = false; - ceiling = true; - frontsector = fakesector; + // Floor and ceiling texture needs to be swapped sometimes? Why?? :( + + if (ViewPos.Z < fakesector->floorplane.Zat0()) // In water + { + if (ceiling) + { + picnum = fakesector->GetTexture(sector_t::ceiling); + ceiling = false; + frontsector = fakesector; + ccw = false; + } + else + { + picnum = fakesector->GetTexture(sector_t::floor); + frontsector = sub->sector; + ccw = true; + } + } + else if (ViewPos.Z >= fakesector->ceilingplane.Zat0() && !fakeflooronly) // In ceiling water + { + if (ceiling) + { + picnum = fakesector->GetTexture(sector_t::ceiling); + frontsector = sub->sector; + ccw = true; + } + else + { + picnum = fakesector->GetTexture(sector_t::floor); + frontsector = fakesector; + ccw = false; + ceiling = true; + } + } + else if (!ceiling) // Water surface + { + picnum = fakesector->GetTexture(sector_t::ceiling); + frontsector = fakesector; + ccw = true; + } + else if (!fakeflooronly) // Ceiling water surface + { + picnum = fakesector->GetTexture(sector_t::floor); + frontsector = fakesector; + ccw = true; + } + else // Upper ceiling + { + picnum = sub->sector->GetTexture(sector_t::ceiling); + ccw = true; + frontsector = sub->sector; + } } else { @@ -216,6 +247,8 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan bool isSky = picnum == skyflatnum; + UVTransform transform(ceiling ? frontsector->planes[sector_t::ceiling].xform : frontsector->planes[sector_t::floor].xform, tex); + PolyDrawArgs args; args.uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); if (fixedlightlev >= 0 || fixedcolormap) @@ -232,7 +265,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan for (uint32_t i = 0; i < sub->numlines; i++) { seg_t *line = &sub->firstline[i]; - vertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, isSky ? skyHeight : frontsector->ceilingplane.ZatPoint(line->v1)); + vertices[sub->numlines - 1 - i] = PlaneVertex(line->v1, isSky ? skyHeight : frontsector->ceilingplane.ZatPoint(line->v1), transform); } } else @@ -240,7 +273,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan for (uint32_t i = 0; i < sub->numlines; i++) { seg_t *line = &sub->firstline[i]; - vertices[i] = PlaneVertex(line->v1, isSky ? skyHeight : frontsector->floorplane.ZatPoint(line->v1)); + vertices[i] = PlaneVertex(line->v1, isSky ? skyHeight : frontsector->floorplane.ZatPoint(line->v1), transform); } } @@ -334,17 +367,17 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan if (ceiling) { - wallvert[0] = PlaneVertex(line->v1, skyHeight); - wallvert[1] = PlaneVertex(line->v2, skyHeight); - wallvert[2] = PlaneVertex(line->v2, skyBottomz2); - wallvert[3] = PlaneVertex(line->v1, skyBottomz1); + wallvert[0] = PlaneVertex(line->v1, skyHeight, transform); + wallvert[1] = PlaneVertex(line->v2, skyHeight, transform); + wallvert[2] = PlaneVertex(line->v2, skyBottomz2, transform); + wallvert[3] = PlaneVertex(line->v1, skyBottomz1, transform); } else { - wallvert[0] = PlaneVertex(line->v1, frontsector->floorplane.ZatPoint(line->v1)); - wallvert[1] = PlaneVertex(line->v2, frontsector->floorplane.ZatPoint(line->v2)); - wallvert[2] = PlaneVertex(line->v2, skyHeight); - wallvert[3] = PlaneVertex(line->v1, skyHeight); + wallvert[0] = PlaneVertex(line->v1, frontsector->floorplane.ZatPoint(line->v1), transform); + wallvert[1] = PlaneVertex(line->v2, frontsector->floorplane.ZatPoint(line->v2), transform); + wallvert[2] = PlaneVertex(line->v2, skyHeight, transform); + wallvert[3] = PlaneVertex(line->v1, skyHeight, transform); } args.vinput = wallvert; @@ -357,14 +390,49 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan } } -TriVertex RenderPolyPlane::PlaneVertex(vertex_t *v1, double height) +TriVertex RenderPolyPlane::PlaneVertex(vertex_t *v1, double height, const UVTransform &transform) { TriVertex v; v.x = (float)v1->fPos().X; v.y = (float)v1->fPos().Y; v.z = (float)height; v.w = 1.0f; - v.varying[0] = v.x / 64.0f; - v.varying[1] = 1.0f - v.y / 64.0f; + v.varying[0] = transform.GetU(v.x, v.y); + v.varying[1] = transform.GetV(v.x, v.y); return v; } + +RenderPolyPlane::UVTransform::UVTransform(const FTransform &transform, FTexture *tex) +{ + if (tex) + { + xscale = (float)(transform.xScale * tex->Scale.X / tex->GetWidth()); + yscale = (float)(transform.yScale * tex->Scale.Y / tex->GetHeight()); + + double planeang = (transform.Angle + transform.baseAngle).Radians(); + cosine = (float)cos(planeang); + sine = (float)sin(planeang); + + xOffs = (float)transform.xOffs; + yOffs = (float)transform.yOffs; + } + else + { + xscale = 1.0f / 64.0f; + yscale = 1.0f / 64.0f; + cosine = 1.0f; + sine = 0.0f; + xOffs = 0.0f; + yOffs = 0.0f; + } +} + +float RenderPolyPlane::UVTransform::GetU(float x, float y) const +{ + return (xOffs + x * cosine - y * sine) * xscale; +} + +float RenderPolyPlane::UVTransform::GetV(float x, float y) const +{ + return (yOffs - x * sine - y * cosine) * yscale; +} diff --git a/src/r_poly_plane.h b/src/r_poly_plane.h index 3006ce86b4..396a9e4a88 100644 --- a/src/r_poly_plane.h +++ b/src/r_poly_plane.h @@ -33,7 +33,21 @@ public: static void RenderPlanes(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight, std::vector> §orPortals); private: + struct UVTransform + { + UVTransform(const FTransform &transform, FTexture *tex); + + float GetU(float x, float y) const; + float GetV(float x, float y) const; + + float xscale; + float yscale; + float cosine; + float sine; + float xOffs, yOffs; + }; + void Render3DFloor(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, F3DFloor *fakefloor); void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals); - TriVertex PlaneVertex(vertex_t *v1, double height); + TriVertex PlaneVertex(vertex_t *v1, double height, const UVTransform &transform); }; From e9e7839133a2e7784f87a47b4f8bf7e97f595b51 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 28 Nov 2016 17:31:56 +0100 Subject: [PATCH 391/912] Create drawergen tool --- .gitignore | 2 + src/CMakeLists.txt | 118 +-- src/r_compiler/llvmdrawers.cpp | 866 ------------------ src/r_draw_rgba.cpp | 22 +- src/r_drawers.cpp | 423 +++++++++ src/{r_compiler/llvmdrawers.h => r_drawers.h} | 44 +- src/r_drawt_rgba.cpp | 6 +- src/r_poly_triangle.cpp | 2 +- src/r_poly_triangle.h | 2 +- src/r_swrenderer.cpp | 4 +- tools/CMakeLists.txt | 1 + tools/drawergen/CMakeLists.txt | 133 +++ tools/drawergen/drawergen.cpp | 664 ++++++++++++++ .../fixedfunction/drawcolumncodegen.cpp | 21 +- .../fixedfunction/drawcolumncodegen.h | 0 .../fixedfunction/drawercodegen.cpp | 21 +- .../drawergen}/fixedfunction/drawercodegen.h | 35 +- .../fixedfunction/drawskycodegen.cpp | 21 +- .../drawergen}/fixedfunction/drawskycodegen.h | 0 .../fixedfunction/drawspancodegen.cpp | 21 +- .../fixedfunction/drawspancodegen.h | 0 .../fixedfunction/drawtrianglecodegen.cpp | 21 +- .../fixedfunction/drawtrianglecodegen.h | 0 .../fixedfunction/drawwallcodegen.cpp | 21 +- .../fixedfunction/drawwallcodegen.h | 0 .../drawergen}/llvm_include.h | 0 tools/drawergen/precomp.h | 5 + .../drawergen}/ssa/ssa_barycentric_weight.h | 0 .../drawergen}/ssa/ssa_bool.cpp | 2 +- .../drawergen}/ssa/ssa_bool.h | 0 .../drawergen}/ssa/ssa_float.cpp | 2 +- .../drawergen}/ssa/ssa_float.h | 0 .../drawergen}/ssa/ssa_float_ptr.cpp | 2 +- .../drawergen}/ssa/ssa_float_ptr.h | 0 .../drawergen}/ssa/ssa_for_block.cpp | 2 +- .../drawergen}/ssa/ssa_for_block.h | 0 .../drawergen}/ssa/ssa_function.cpp | 2 +- .../drawergen}/ssa/ssa_function.h | 0 .../drawergen}/ssa/ssa_if_block.cpp | 2 +- .../drawergen}/ssa/ssa_if_block.h | 0 .../drawergen}/ssa/ssa_int.cpp | 2 +- .../drawergen}/ssa/ssa_int.h | 0 .../drawergen}/ssa/ssa_int_ptr.cpp | 2 +- .../drawergen}/ssa/ssa_int_ptr.h | 0 .../drawergen}/ssa/ssa_phi.h | 0 .../drawergen}/ssa/ssa_scope.cpp | 2 +- .../drawergen}/ssa/ssa_scope.h | 0 .../drawergen}/ssa/ssa_short.cpp | 2 +- .../drawergen}/ssa/ssa_short.h | 0 .../drawergen}/ssa/ssa_stack.h | 0 .../drawergen}/ssa/ssa_struct_type.cpp | 2 +- .../drawergen}/ssa/ssa_struct_type.h | 0 .../drawergen}/ssa/ssa_ubyte.cpp | 2 +- .../drawergen}/ssa/ssa_ubyte.h | 0 .../drawergen}/ssa/ssa_ubyte_ptr.cpp | 2 +- .../drawergen}/ssa/ssa_ubyte_ptr.h | 0 .../drawergen}/ssa/ssa_value.cpp | 2 +- .../drawergen}/ssa/ssa_value.h | 0 .../drawergen}/ssa/ssa_vec16ub.cpp | 2 +- .../drawergen}/ssa/ssa_vec16ub.h | 0 .../drawergen}/ssa/ssa_vec4f.cpp | 2 +- .../drawergen}/ssa/ssa_vec4f.h | 0 .../drawergen}/ssa/ssa_vec4f_ptr.cpp | 2 +- .../drawergen}/ssa/ssa_vec4f_ptr.h | 0 .../drawergen}/ssa/ssa_vec4i.cpp | 2 +- .../drawergen}/ssa/ssa_vec4i.h | 0 .../drawergen}/ssa/ssa_vec4i_ptr.cpp | 2 +- .../drawergen}/ssa/ssa_vec4i_ptr.h | 0 .../drawergen}/ssa/ssa_vec8s.cpp | 2 +- .../drawergen}/ssa/ssa_vec8s.h | 0 tools/drawergen/trustinfo.rc | 6 + tools/drawergen/trustinfo.txt | 16 + 72 files changed, 1407 insertions(+), 1108 deletions(-) delete mode 100644 src/r_compiler/llvmdrawers.cpp create mode 100644 src/r_drawers.cpp rename src/{r_compiler/llvmdrawers.h => r_drawers.h} (93%) create mode 100644 tools/drawergen/CMakeLists.txt create mode 100644 tools/drawergen/drawergen.cpp rename {src/r_compiler => tools/drawergen}/fixedfunction/drawcolumncodegen.cpp (96%) rename {src/r_compiler => tools/drawergen}/fixedfunction/drawcolumncodegen.h (100%) rename {src/r_compiler => tools/drawergen}/fixedfunction/drawercodegen.cpp (92%) rename {src/r_compiler => tools/drawergen}/fixedfunction/drawercodegen.h (81%) rename {src/r_compiler => tools/drawergen}/fixedfunction/drawskycodegen.cpp (91%) rename {src/r_compiler => tools/drawergen}/fixedfunction/drawskycodegen.h (100%) rename {src/r_compiler => tools/drawergen}/fixedfunction/drawspancodegen.cpp (94%) rename {src/r_compiler => tools/drawergen}/fixedfunction/drawspancodegen.h (100%) rename {src/r_compiler => tools/drawergen}/fixedfunction/drawtrianglecodegen.cpp (98%) rename {src/r_compiler => tools/drawergen}/fixedfunction/drawtrianglecodegen.h (100%) rename {src/r_compiler => tools/drawergen}/fixedfunction/drawwallcodegen.cpp (94%) rename {src/r_compiler => tools/drawergen}/fixedfunction/drawwallcodegen.h (100%) rename {src/r_compiler => tools/drawergen}/llvm_include.h (100%) create mode 100644 tools/drawergen/precomp.h rename {src/r_compiler => tools/drawergen}/ssa/ssa_barycentric_weight.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_bool.cpp (99%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_bool.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_float.cpp (99%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_float.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_float_ptr.cpp (99%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_float_ptr.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_for_block.cpp (98%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_for_block.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_function.cpp (98%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_function.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_if_block.cpp (98%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_if_block.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_int.cpp (99%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_int.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_int_ptr.cpp (99%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_int_ptr.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_phi.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_scope.cpp (98%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_scope.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_short.cpp (99%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_short.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_stack.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_struct_type.cpp (97%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_struct_type.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_ubyte.cpp (98%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_ubyte.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_ubyte_ptr.cpp (99%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_ubyte_ptr.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_value.cpp (98%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_value.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_vec16ub.cpp (99%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_vec16ub.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_vec4f.cpp (99%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_vec4f.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_vec4f_ptr.cpp (98%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_vec4f_ptr.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_vec4i.cpp (99%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_vec4i.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_vec4i_ptr.cpp (98%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_vec4i_ptr.h (100%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_vec8s.cpp (99%) rename {src/r_compiler => tools/drawergen}/ssa/ssa_vec8s.h (100%) create mode 100644 tools/drawergen/trustinfo.rc create mode 100644 tools/drawergen/trustinfo.txt diff --git a/.gitignore b/.gitignore index 7cc9d98607..a001e38de8 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,5 @@ /build_vc2015-64 /build /llvm +/src/r_drawersasm.obj +/src/r_drawersasm.o diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d19951f9f1..458acf6ad9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -293,47 +293,6 @@ if( NOT NO_OPENAL ) endif() endif() - -# Path where it looks for the LLVM compiled files on Windows -set( LLVM_PRECOMPILED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../llvm" ) - -if( NOT WIN32 ) - set( LLVM_COMPONENTS core support asmparser asmprinter bitreader bitwriter codegen ipo - irreader transformutils instrumentation profiledata runtimedyld - object instcombine linker analysis selectiondag scalaropts vectorize executionengine - mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) - - # Example LLVM_DIR folder: C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm - find_package(LLVM REQUIRED CONFIG) - message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") - message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") - llvm_map_components_to_libnames( llvm_libs ${LLVM_COMPONENTS} ) - include_directories( ${LLVM_INCLUDE_DIRS} ) - set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) -else() - set( LLVM_COMPONENTS core support asmparser asmprinter bitreader bitwriter codegen passes ipo - irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld - object instcombine linker analysis selectiondag scalaropts vectorize executionengine - mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) - - include_directories( "${LLVM_PRECOMPILED_DIR}/include" ) - if( X64 ) - include_directories( "${LLVM_PRECOMPILED_DIR}/64bit-include" ) - set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/64bit-" ) - else() - include_directories( "${LLVM_PRECOMPILED_DIR}/32bit-include" ) - set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/32bit-" ) - endif() - foreach(buildtype IN ITEMS RELEASE DEBUG) - set( llvm_libs_${buildtype} "${llvm_libs_base}${buildtype}" ) - set( LLVM_${buildtype}_LIBS "" ) - foreach( llvm_module ${LLVM_COMPONENTS} ) - find_library( LLVM_${llvm_module}_LIBRARY_${buildtype} LLVM${llvm_module} PATHS ${llvm_libs_${buildtype}} ) - set( LLVM_${buildtype}_LIBS ${LLVM_${buildtype}_LIBS} ${LLVM_${llvm_module}_LIBRARY_${buildtype}} ) - endforeach( llvm_module ) - endforeach(buildtype) -endif() - if( NOT NO_FMOD ) # Search for FMOD include files if( NOT WIN32 ) @@ -644,11 +603,6 @@ if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) set( CMAKE_EXE_LINKER_FLAGS "-stdlib=libc++ ${CMAKE_EXE_LINKER_FLAGS}" ) endif () - # Linux - add these flags for LLVM compatibility to prevent crashing - if ( UNIX AND NOT APPLE ) - set( CMAKE_EXE_LINKER_FLAGS "-Wl,--exclude-libs,ALL ${CMAKE_EXE_LINKER_FLAGS}" ) - endif() - # Remove extra warnings when using the official DirectX headers. # Also, TDM-GCC 4.4.0 no longer accepts glibc-style printf formats as valid, # which is a royal pain. The previous version I had been using was fine with them. @@ -719,6 +673,20 @@ add_custom_target( revision_check ALL WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} DEPENDS updaterevision ) +# Run drawer codegen tool + +if ( WIN32 ) + add_custom_target( drawergen_target ALL + COMMAND drawergen src/r_drawersasm.obj + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + DEPENDS drawergen ) +else() + add_custom_target( drawergen_target ALL + COMMAND drawergen src/r_drawersasm.o + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + DEPENDS drawergen ) +endif() + # Libraries ZDoom needs message( STATUS "Fluid synth libs: ${FLUIDSYNTH_LIBRARIES}" ) @@ -933,9 +901,6 @@ file( GLOB HEADER_FILES posix/*.h posix/cocoa/*.h posix/sdl/*.h - r_compiler/*.h - r_compiler/ssa/*.h - r_compiler/fixedfunction/*.h r_data/*.h resourcefiles/*.h sfmt/*.h @@ -1083,6 +1048,7 @@ set( FASTMATH_PCH_SOURCES r_draw_rgba.cpp r_drawt.cpp r_drawt_rgba.cpp + r_drawers.cpp r_thread.cpp r_main.cpp r_plane.cpp @@ -1485,33 +1451,6 @@ set (PCH_SOURCES fragglescript/t_spec.cpp fragglescript/t_variable.cpp fragglescript/t_cmd.cpp - r_compiler/llvmdrawers.cpp - r_compiler/ssa/ssa_bool.cpp - r_compiler/ssa/ssa_float.cpp - r_compiler/ssa/ssa_float_ptr.cpp - r_compiler/ssa/ssa_for_block.cpp - r_compiler/ssa/ssa_function.cpp - r_compiler/ssa/ssa_if_block.cpp - r_compiler/ssa/ssa_int.cpp - r_compiler/ssa/ssa_int_ptr.cpp - r_compiler/ssa/ssa_short.cpp - r_compiler/ssa/ssa_scope.cpp - r_compiler/ssa/ssa_struct_type.cpp - r_compiler/ssa/ssa_ubyte.cpp - r_compiler/ssa/ssa_ubyte_ptr.cpp - r_compiler/ssa/ssa_value.cpp - r_compiler/ssa/ssa_vec4f.cpp - r_compiler/ssa/ssa_vec4f_ptr.cpp - r_compiler/ssa/ssa_vec4i.cpp - r_compiler/ssa/ssa_vec4i_ptr.cpp - r_compiler/ssa/ssa_vec8s.cpp - r_compiler/ssa/ssa_vec16ub.cpp - r_compiler/fixedfunction/drawercodegen.cpp - r_compiler/fixedfunction/drawspancodegen.cpp - r_compiler/fixedfunction/drawwallcodegen.cpp - r_compiler/fixedfunction/drawcolumncodegen.cpp - r_compiler/fixedfunction/drawskycodegen.cpp - r_compiler/fixedfunction/drawtrianglecodegen.cpp r_data/sprites.cpp r_data/voxels.cpp r_data/renderstyle.cpp @@ -1528,6 +1467,16 @@ set (PCH_SOURCES ) enable_precompiled_headers( g_pch.h PCH_SOURCES ) +if ( WIN32 ) + set (CODEGENOBJ_SOURCES + r_drawersasm.obj + ) +else() + set (CODEGENOBJ_SOURCES + r_drawersasm.o + ) +endif() + add_executable( zdoom WIN32 MACOSX_BUNDLE ${HEADER_FILES} ${NOT_COMPILED_SOURCE_FILES} @@ -1557,8 +1506,11 @@ add_executable( zdoom WIN32 MACOSX_BUNDLE math/tanh.c math/fastsin.cpp zzautozend.cpp + r_drawersasm.obj ) +set_source_files_properties( ${CODEGENOBJ_SOURCES} PROPERTIES EXTERNAL_OBJECT true GENERATED true) + set_source_files_properties( ${FASTMATH_SOURCES} PROPERTIES COMPILE_FLAGS ${ZD_FASTMATH_FLAG} ) set_source_files_properties( xlat/parse_xlat.cpp PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c" ) set_source_files_properties( sc_man.cpp PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/sc_man_scanner.h" ) @@ -1579,15 +1531,6 @@ endif() target_link_libraries( zdoom ${ZDOOM_LIBS} gdtoa dumb lzma ) -if( WIN32 ) - foreach(debuglib ${LLVM_DEBUG_LIBS}) - target_link_libraries( zdoom debug ${debuglib} ) - endforeach(debuglib) - foreach(releaselib ${LLVM_RELEASE_LIBS}) - target_link_libraries( zdoom optimized ${releaselib} ) - endforeach(releaselib) -endif() - include_directories( . g_doom g_heretic @@ -1608,7 +1551,7 @@ include_directories( . ${CMAKE_BINARY_DIR}/gdtoa ${SYSTEM_SOURCES_DIR} ) -add_dependencies( zdoom revision_check ) +add_dependencies( zdoom revision_check drawergen_target ) # Due to some quirks, we need to do this in this order if( NOT ZDOOM_OUTPUT_OLDSTYLE ) @@ -1750,9 +1693,6 @@ source_group("Render Data\\Resource Headers" REGULAR_EXPRESSION "^${CMAKE_CURREN source_group("Render Data\\Resource Sources" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_data/.+\\.cpp$") source_group("Render Data\\Textures" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/textures/.+") source_group("Render Interface" FILES r_defs.h r_renderer.h r_sky.cpp r_sky.h r_state.h r_utility.cpp r_utility.h) -source_group("Render Compiler" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/.+") -source_group("Render Compiler\\SSA" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/ssa/.+") -source_group("Render Compiler\\Fixed Function" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/fixedfunction/.+") source_group("Resource Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/resourcefiles/.+") source_group("POSIX Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/posix/.+") source_group("Cocoa Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/posix/cocoa/.+") diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp deleted file mode 100644 index babb7c6e70..0000000000 --- a/src/r_compiler/llvmdrawers.cpp +++ /dev/null @@ -1,866 +0,0 @@ -/* -** LLVM code generated drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "i_system.h" -#include "r_compiler/llvm_include.h" -#include "r_compiler/fixedfunction/drawspancodegen.h" -#include "r_compiler/fixedfunction/drawwallcodegen.h" -#include "r_compiler/fixedfunction/drawcolumncodegen.h" -#include "r_compiler/fixedfunction/drawskycodegen.h" -#include "r_compiler/fixedfunction/drawtrianglecodegen.h" -#include "r_compiler/ssa/ssa_function.h" -#include "r_compiler/ssa/ssa_scope.h" -#include "r_compiler/ssa/ssa_for_block.h" -#include "r_compiler/ssa/ssa_if_block.h" -#include "r_compiler/ssa/ssa_stack.h" -#include "r_compiler/ssa/ssa_function.h" -#include "r_compiler/ssa/ssa_struct_type.h" -#include "r_compiler/ssa/ssa_value.h" -#include "r_compiler/ssa/ssa_barycentric_weight.h" -#include "x86.h" -#include "c_cvars.h" -#include "version.h" -#include "m_misc.h" - -CUSTOM_CVAR(String, llvm_cpu, "auto", CVAR_ARCHIVE | CVAR_NOINITCALL) -{ - Printf("You must restart " GAMENAME " for this change to take effect.\n"); -} - -class LLVMProgram -{ -public: - LLVMProgram(); - - void CreateModule(); - std::string GetTargetCPU(); - bool LoadCachedModule(int version, std::string targetCPU); - void CreateEE(int version, std::string targetCPU, bool optimize); - std::string GenerateAssembly(std::string cpuName); - std::string DumpModule(); - void StopLogFatalErrors(); - - template - Func *GetProcAddress(const std::string &name) { return reinterpret_cast(PointerToFunction(name.c_str())); } - - llvm::LLVMContext &context() { return *mContext; } - llvm::Module *module() { return mModule.get(); } - llvm::ExecutionEngine *engine() { return mEngine.get(); } - -private: - void SaveCachedModule(llvm::Module *module, int version, std::string targetCPU); - FString GetDrawerCacheFilename(int version, FString cpu); - void *PointerToFunction(const char *name); - - llvm::TargetMachine *machine = nullptr; - std::unique_ptr mContext; - std::unique_ptr mModule; - std::unique_ptr mEngine; -}; - -class LLVMDrawersImpl : public LLVMDrawers -{ -public: - LLVMDrawersImpl(); - -private: - void CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method); - void CodegenDrawSpan(const char *name, DrawSpanVariant variant); - void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); - void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); - void CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor); - - static llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriVertexStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriMatrixStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriUniformsStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriDrawTriangleArgs(llvm::LLVMContext &context); - - LLVMProgram mProgram; -}; - -///////////////////////////////////////////////////////////////////////////// - -LLVMDrawers *LLVMDrawers::Singleton = nullptr; - -void LLVMDrawers::Create() -{ - if (!Singleton) - Singleton = new LLVMDrawersImpl(); -} - -void LLVMDrawers::Destroy() -{ - delete Singleton; - Singleton = nullptr; -} - -LLVMDrawers *LLVMDrawers::Instance() -{ - return Singleton; -} - -///////////////////////////////////////////////////////////////////////////// - -LLVMDrawersImpl::LLVMDrawersImpl() -{ - int version = 8; // Increment this number if the drawer codegen is modified (forces recreation of the module). - std::string targetCPU = mProgram.GetTargetCPU(); - bool loaded = mProgram.LoadCachedModule(version, targetCPU); - if (!loaded) - { - mProgram.CreateModule(); - - CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRt1", DrawColumnVariant::Draw, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); - CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); - CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); - CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); - CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); - CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); - CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1); - CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4); - CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1); - CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4); - CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1); - CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4); - CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1); - CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4); - CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1); - CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); - CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); - CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); - CodegenDrawSky("DrawSky1", DrawSkyVariant::Single, 1); - CodegenDrawSky("DrawSky4", DrawSkyVariant::Single, 4); - CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double, 1); - CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4); - for (int i = 0; i < NumTriBlendModes(); i++) - { - CodegenDrawTriangle("TriDraw8_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, false); - CodegenDrawTriangle("TriDraw32_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, true); - CodegenDrawTriangle("TriFill8_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, false); - CodegenDrawTriangle("TriFill32_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, true); - CodegenDrawTriangle("TriDrawSubsector8_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, false); - CodegenDrawTriangle("TriDrawSubsector32_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, true); - CodegenDrawTriangle("TriFillSubsector8_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, false); - CodegenDrawTriangle("TriFillSubsector32_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, true); - } - CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, TriBlendMode::Copy, false); - CodegenDrawTriangle("TriStencilClose", TriDrawVariant::StencilClose, TriBlendMode::Copy, false); - } - - mProgram.CreateEE(version, targetCPU, !loaded); - - FillColumn = mProgram.GetProcAddress("FillColumn"); - FillColumnAdd = mProgram.GetProcAddress("FillColumnAdd"); - FillColumnAddClamp = mProgram.GetProcAddress("FillColumnAddClamp"); - FillColumnSubClamp = mProgram.GetProcAddress("FillColumnSubClamp"); - FillColumnRevSubClamp = mProgram.GetProcAddress("FillColumnRevSubClamp"); - DrawColumn = mProgram.GetProcAddress("DrawColumn"); - DrawColumnAdd = mProgram.GetProcAddress("DrawColumnAdd"); - DrawColumnShaded = mProgram.GetProcAddress("DrawColumnShaded"); - DrawColumnAddClamp = mProgram.GetProcAddress("DrawColumnAddClamp"); - DrawColumnSubClamp = mProgram.GetProcAddress("DrawColumnSubClamp"); - DrawColumnRevSubClamp = mProgram.GetProcAddress("DrawColumnRevSubClamp"); - DrawColumnTranslated = mProgram.GetProcAddress("DrawColumnTranslated"); - DrawColumnTlatedAdd = mProgram.GetProcAddress("DrawColumnTlatedAdd"); - DrawColumnAddClampTranslated = mProgram.GetProcAddress("DrawColumnAddClampTranslated"); - DrawColumnSubClampTranslated = mProgram.GetProcAddress("DrawColumnSubClampTranslated"); - DrawColumnRevSubClampTranslated = mProgram.GetProcAddress("DrawColumnRevSubClampTranslated"); - DrawColumnRt1 = mProgram.GetProcAddress("DrawColumnRt1"); - DrawColumnRt1Copy = mProgram.GetProcAddress("DrawColumnRt1Copy"); - DrawColumnRt1Add = mProgram.GetProcAddress("DrawColumnRt1Add"); - DrawColumnRt1Shaded = mProgram.GetProcAddress("DrawColumnRt1Shaded"); - DrawColumnRt1AddClamp = mProgram.GetProcAddress("DrawColumnRt1AddClamp"); - DrawColumnRt1SubClamp = mProgram.GetProcAddress("DrawColumnRt1SubClamp"); - DrawColumnRt1RevSubClamp = mProgram.GetProcAddress("DrawColumnRt1RevSubClamp"); - DrawColumnRt1Translated = mProgram.GetProcAddress("DrawColumnRt1Translated"); - DrawColumnRt1TlatedAdd = mProgram.GetProcAddress("DrawColumnRt1TlatedAdd"); - DrawColumnRt1AddClampTranslated = mProgram.GetProcAddress("DrawColumnRt1AddClampTranslated"); - DrawColumnRt1SubClampTranslated = mProgram.GetProcAddress("DrawColumnRt1SubClampTranslated"); - DrawColumnRt1RevSubClampTranslated = mProgram.GetProcAddress("DrawColumnRt1RevSubClampTranslated"); - DrawColumnRt4 = mProgram.GetProcAddress("DrawColumnRt4"); - DrawColumnRt4Copy = mProgram.GetProcAddress("DrawColumnRt4Copy"); - DrawColumnRt4Add = mProgram.GetProcAddress("DrawColumnRt4Add"); - DrawColumnRt4Shaded = mProgram.GetProcAddress("DrawColumnRt4Shaded"); - DrawColumnRt4AddClamp = mProgram.GetProcAddress("DrawColumnRt4AddClamp"); - DrawColumnRt4SubClamp = mProgram.GetProcAddress("DrawColumnRt4SubClamp"); - DrawColumnRt4RevSubClamp = mProgram.GetProcAddress("DrawColumnRt4RevSubClamp"); - DrawColumnRt4Translated = mProgram.GetProcAddress("DrawColumnRt4Translated"); - DrawColumnRt4TlatedAdd = mProgram.GetProcAddress("DrawColumnRt4TlatedAdd"); - DrawColumnRt4AddClampTranslated = mProgram.GetProcAddress("DrawColumnRt4AddClampTranslated"); - DrawColumnRt4SubClampTranslated = mProgram.GetProcAddress("DrawColumnRt4SubClampTranslated"); - DrawColumnRt4RevSubClampTranslated = mProgram.GetProcAddress("DrawColumnRt4RevSubClampTranslated"); - DrawSpan = mProgram.GetProcAddress("DrawSpan"); - DrawSpanMasked = mProgram.GetProcAddress("DrawSpanMasked"); - DrawSpanTranslucent = mProgram.GetProcAddress("DrawSpanTranslucent"); - DrawSpanMaskedTranslucent = mProgram.GetProcAddress("DrawSpanMaskedTranslucent"); - DrawSpanAddClamp = mProgram.GetProcAddress("DrawSpanAddClamp"); - DrawSpanMaskedAddClamp = mProgram.GetProcAddress("DrawSpanMaskedAddClamp"); - vlinec1 = mProgram.GetProcAddress("vlinec1"); - vlinec4 = mProgram.GetProcAddress("vlinec4"); - mvlinec1 = mProgram.GetProcAddress("mvlinec1"); - mvlinec4 = mProgram.GetProcAddress("mvlinec4"); - tmvline1_add = mProgram.GetProcAddress("tmvline1_add"); - tmvline4_add = mProgram.GetProcAddress("tmvline4_add"); - tmvline1_addclamp = mProgram.GetProcAddress("tmvline1_addclamp"); - tmvline4_addclamp = mProgram.GetProcAddress("tmvline4_addclamp"); - tmvline1_subclamp = mProgram.GetProcAddress("tmvline1_subclamp"); - tmvline4_subclamp = mProgram.GetProcAddress("tmvline4_subclamp"); - tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); - tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); - DrawSky1 = mProgram.GetProcAddress("DrawSky1"); - DrawSky4 = mProgram.GetProcAddress("DrawSky4"); - DrawDoubleSky1 = mProgram.GetProcAddress("DrawDoubleSky1"); - DrawDoubleSky4 = mProgram.GetProcAddress("DrawDoubleSky4"); - for (int i = 0; i < NumTriBlendModes(); i++) - { - TriDrawNormal8.push_back(mProgram.GetProcAddress("TriDraw8_" + std::to_string(i))); - TriDrawNormal32.push_back(mProgram.GetProcAddress("TriDraw32_" + std::to_string(i))); - TriFillNormal8.push_back(mProgram.GetProcAddress("TriFill8_" + std::to_string(i))); - TriFillNormal32.push_back(mProgram.GetProcAddress("TriFill32_" + std::to_string(i))); - TriDrawSubsector8.push_back(mProgram.GetProcAddress("TriDrawSubsector8_" + std::to_string(i))); - TriDrawSubsector32.push_back(mProgram.GetProcAddress("TriDrawSubsector32_" + std::to_string(i))); - TriFillSubsector8.push_back(mProgram.GetProcAddress("TriFillSubsector8_" + std::to_string(i))); - TriFillSubsector32.push_back(mProgram.GetProcAddress("TriFillSubsector32_" + std::to_string(i))); - } - TriStencil = mProgram.GetProcAddress("TriStencil"); - TriStencilClose = mProgram.GetProcAddress("TriStencilClose"); - -#if 0 - std::vector foo(1024 * 4); - std::vector boo(256 * 256 * 4); - DrawColumnArgs args = { 0 }; - WorkerThreadData thread = { 0 }; - thread.core = 0; - thread.num_cores = 1; - thread.pass_start_y = 0; - thread.pass_end_y = 3600; - thread.temp = foo.data(); - foo[125 * 4] = 1234; - foo[126 * 4] = 1234; - for (int i = 0; i < 16; i++) - boo[i] = i; - args.dest = boo.data() + 4; - args.dest_y = 125; - args.pitch = 256; - args.count = 1; - args.texturefrac = 0; - args.flags = 0; - args.iscale = 252769; - args.light = 256; - args.color = 4279179008; - args.srcalpha = 12; - args.destalpha = 256; - args.light_red = 192; - args.light_green = 256; - args.light_blue = 128; - DrawColumnRt4AddClamp(&args, &thread); -#endif - - mProgram.StopLogFatalErrors(); -} - -void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name); - function.add_parameter(GetDrawColumnArgsStruct(mProgram.context())); - function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); - function.create_public(); - - DrawColumnCodegen codegen; - codegen.Generate(variant, method, function.parameter(0), function.parameter(1)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - I_FatalError("verifyFunction failed for CodegenDrawColumn()"); -} - -void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name); - function.add_parameter(GetDrawSpanArgsStruct(mProgram.context())); - function.create_public(); - - DrawSpanCodegen codegen; - codegen.Generate(variant, function.parameter(0)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - I_FatalError("verifyFunction failed for CodegenDrawSpan()"); -} - -void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name); - function.add_parameter(GetDrawWallArgsStruct(mProgram.context())); - function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); - function.create_public(); - - DrawWallCodegen codegen; - codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - I_FatalError("verifyFunction failed for CodegenDrawWall()"); -} - -void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name); - function.add_parameter(GetDrawSkyArgsStruct(mProgram.context())); - function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); - function.create_public(); - - DrawSkyCodegen codegen; - codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - I_FatalError("verifyFunction failed for CodegenDrawSky()"); -} - -void LLVMDrawersImpl::CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name); - function.add_parameter(GetTriDrawTriangleArgs(mProgram.context())); - function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); - function.create_public(); - - DrawTriangleCodegen codegen; - codegen.Generate(variant, blendmode, truecolor, function.parameter(0), function.parameter(1)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - I_FatalError("verifyFunction failed for CodegenDrawTriangle(%d, %d, %d)", (int)variant, (int)blendmode, (int)truecolor); -} - -llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source2; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefracx; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureheight; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srccolor; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::create(context, elements, "DrawColumnArgs", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 8; i++) - elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 25; i++) - elements.push_back(llvm::Type::getInt32Ty(context)); - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawersImpl::GetDrawSkyArgsStruct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 8; i++) - elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 15; i++) - elements.push_back(llvm::Type::getInt32Ty(context)); - return llvm::StructType::create(context, elements, "DrawSkyArgs", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &context) -{ - std::vector elements; - for (int i = 0; i < 4; i++) - elements.push_back(llvm::Type::getInt32Ty(context)); - elements.push_back(llvm::Type::getInt8PtrTy(context)); - return llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawersImpl::GetTriVertexStruct(llvm::LLVMContext &context) -{ - std::vector elements; - for (int i = 0; i < 4 + TriVertex::NumVarying; i++) - elements.push_back(llvm::Type::getFloatTy(context)); - return llvm::StructType::create(context, elements, "TriVertex", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawersImpl::GetTriMatrixStruct(llvm::LLVMContext &context) -{ - std::vector elements; - for (int i = 0; i < 4 * 4; i++) - elements.push_back(llvm::Type::getFloatTy(context)); - return llvm::StructType::create(context, elements, "TriMatrix", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawersImpl::GetTriUniformsStruct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t subsectorDepth; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - elements.push_back(GetTriMatrixStruct(context)); // TriMatrix objectToClip - return llvm::StructType::create(context, elements, "TriUniforms", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawersImpl::GetTriDrawTriangleArgs(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *dest; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; - elements.push_back(GetTriVertexStruct(context)); // TriVertex *v1; - elements.push_back(GetTriVertexStruct(context)); // TriVertex *v2; - elements.push_back(GetTriVertexStruct(context)); // TriVertex *v3; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipleft; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipright; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t cliptop; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipbottom; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *texturePixels; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureWidth; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureHeight; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *translation; - elements.push_back(GetTriUniformsStruct(context)); // const TriUniforms *uniforms; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *stencilValues; - elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *stencilMasks; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t stencilPitch; - elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilTestValue; - elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilWriteValue; - elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *subsectorGBuffer; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *colormaps; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB32k; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *BaseColors; - return llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); -} - -///////////////////////////////////////////////////////////////////////////// - -namespace { static bool LogFatalErrors = false; } - -LLVMProgram::LLVMProgram() -{ - using namespace llvm; - - // We have to extra careful about this because both LLVM and ZDoom made - // the very unwise decision to hook atexit. To top it off, LLVM decided - // to log something in the atexit handler.. - LogFatalErrors = true; - - install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { - if (LogFatalErrors) - I_FatalError("LLVM fatal error: %s", reason.c_str()); - }); - - InitializeNativeTarget(); - InitializeNativeTargetAsmPrinter(); - - mContext = std::make_unique(); -} - -void LLVMProgram::CreateModule() -{ - mModule = std::make_unique("render", context()); -} - -bool LLVMProgram::LoadCachedModule(int version, std::string targetCPU) -{ - FString filename = GetDrawerCacheFilename(version, targetCPU.c_str()); - FILE *file = fopen(filename, "rb"); - if (!file) - return false; - - bool success = false; - std::string data; - - fseek(file, 0, SEEK_END); - int length = ftell(file); - fseek(file, 0, SEEK_SET); - if (length > 0) - { - data.resize(length); - success = fread(&data[0], length, 1, file) == 1; - } - - fclose(file); - if (!success) - return false; - - auto result = llvm::parseBitcodeFile(llvm::MemoryBufferRef(data, filename.GetChars()), *mContext.get()); - if (!result) - return false; - -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - mModule.reset(result.get()); -#else - mModule = std::move(result.get()); -#endif - return true; -} - -void LLVMProgram::SaveCachedModule(llvm::Module *module, int version, std::string targetCPU) -{ - std::string str; - llvm::raw_string_ostream stream(str); - llvm::WriteBitcodeToFile(module, stream); - std::string data = stream.str(); - - FString filename = GetDrawerCacheFilename(version, targetCPU.c_str()); - FILE *file = fopen(filename, "wb"); - if (file) - { - fwrite(data.data(), data.size(), 1, file); - fclose(file); - } -} - -FString LLVMProgram::GetDrawerCacheFilename(int version, FString cpu) -{ - FString path = M_GetCachePath(true); - FString filename; - filename.Format("%s/LLVMDrawers-%d-%s.bc", path.GetChars(), version, cpu.GetChars()); - return filename; -} - -std::string LLVMProgram::GetTargetCPU() -{ - using namespace llvm; - std::string mcpu = sys::getHostCPUName(); - if (std::string(CPU.CPUString).find("G840") != std::string::npos && mcpu == "sandybridge") - mcpu = "westmere"; // Pentium G840 is misdetected as a sandy bridge CPU - - if (stricmp(llvm_cpu, "auto") != 0) - { - mcpu = llvm_cpu; - Printf("Overriding LLVM CPU target to %s\n", mcpu.c_str()); - } - return mcpu; -} - -void LLVMProgram::CreateEE(int version, std::string targetCPU, bool optimize) -{ - using namespace llvm; - - std::string errorstring; - - llvm::Module *module = mModule.get(); - EngineBuilder engineBuilder(std::move(mModule)); - engineBuilder.setErrorStr(&errorstring); - engineBuilder.setOptLevel(CodeGenOpt::Aggressive); - engineBuilder.setEngineKind(EngineKind::JIT); - engineBuilder.setMCPU(targetCPU); - machine = engineBuilder.selectTarget(); - if (!machine) - I_FatalError("Could not create LLVM target machine"); - -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - std::string targetTriple = machine->getTargetTriple(); -#else - std::string targetTriple = machine->getTargetTriple().getTriple(); -#endif - std::string cpuName = machine->getTargetCPU(); - Printf("LLVM target triple: %s\n", targetTriple.c_str()); - Printf("LLVM target CPU: %s\n", cpuName.c_str()); - - if (optimize) - { - Printf("Optimizing drawers..\n"); - - module->setTargetTriple(targetTriple); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); -#else - module->setDataLayout(machine->createDataLayout()); -#endif - - legacy::FunctionPassManager PerFunctionPasses(module); - legacy::PassManager PerModulePasses; - -#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) - PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); - PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); -#endif - - PassManagerBuilder passManagerBuilder; - passManagerBuilder.OptLevel = 3; - passManagerBuilder.SizeLevel = 0; - passManagerBuilder.Inliner = createFunctionInliningPass(); - passManagerBuilder.SLPVectorize = true; - passManagerBuilder.LoopVectorize = true; - passManagerBuilder.LoadCombine = true; - passManagerBuilder.populateModulePassManager(PerModulePasses); - passManagerBuilder.populateFunctionPassManager(PerFunctionPasses); - - // Run function passes: - PerFunctionPasses.doInitialization(); - for (llvm::Function &func : *module) - { - if (!func.isDeclaration()) - PerFunctionPasses.run(func); - } - PerFunctionPasses.doFinalization(); - - // Run module passes: - PerModulePasses.run(*module); - - SaveCachedModule(module, version, targetCPU); - } - - Printf("Compiling drawers..\n"); - - // Create execution engine and generate machine code - mEngine.reset(engineBuilder.create(machine)); - if (!mEngine) - I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str()); - - mEngine->finalizeObject(); -} - -std::string LLVMProgram::GenerateAssembly(std::string cpuName) -{ - using namespace llvm; - - std::string errorstring; - - llvm::Module *module = mModule.get(); - EngineBuilder engineBuilder(std::move(mModule)); - engineBuilder.setErrorStr(&errorstring); - engineBuilder.setOptLevel(CodeGenOpt::Aggressive); - engineBuilder.setEngineKind(EngineKind::JIT); - engineBuilder.setMCPU(cpuName); - machine = engineBuilder.selectTarget(); - if (!machine) - I_FatalError("Could not create LLVM target machine"); - -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - std::string targetTriple = machine->getTargetTriple(); -#else - std::string targetTriple = machine->getTargetTriple().getTriple(); -#endif - - module->setTargetTriple(targetTriple); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); -#else - module->setDataLayout(machine->createDataLayout()); -#endif - - legacy::FunctionPassManager PerFunctionPasses(module); - legacy::PassManager PerModulePasses; - -#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) - PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); - PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); -#endif - - SmallString<16*1024> str; -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - raw_svector_ostream vecstream(str); - formatted_raw_ostream stream(vecstream); -#else - raw_svector_ostream stream(str); -#endif - machine->addPassesToEmitFile(PerModulePasses, stream, TargetMachine::CGFT_AssemblyFile); - - PassManagerBuilder passManagerBuilder; - passManagerBuilder.OptLevel = 3; - passManagerBuilder.SizeLevel = 0; - passManagerBuilder.Inliner = createFunctionInliningPass(); - passManagerBuilder.SLPVectorize = true; - passManagerBuilder.LoopVectorize = true; - passManagerBuilder.LoadCombine = true; - passManagerBuilder.populateModulePassManager(PerModulePasses); - passManagerBuilder.populateFunctionPassManager(PerFunctionPasses); - - // Run function passes: - PerFunctionPasses.doInitialization(); - for (llvm::Function &func : *module) - { - if (!func.isDeclaration()) - PerFunctionPasses.run(func); - } - PerFunctionPasses.doFinalization(); - - // Run module passes: - PerModulePasses.run(*module); - - return str.c_str(); -} - -std::string LLVMProgram::DumpModule() -{ - std::string str; - llvm::raw_string_ostream stream(str); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - mModule->print(stream, nullptr); -#else - mModule->print(stream, nullptr, false, true); -#endif - return stream.str(); -} - -void *LLVMProgram::PointerToFunction(const char *name) -{ - return reinterpret_cast(mEngine->getFunctionAddress(name)); -} - -void LLVMProgram::StopLogFatalErrors() -{ - LogFatalErrors = false; -} diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index b7be3ceed3..716b30c0d3 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -38,7 +38,7 @@ #include "r_data/colormaps.h" #include "r_plane.h" #include "r_draw_rgba.h" -#include "r_compiler/llvmdrawers.h" +#include "r_drawers.h" #include "gl/data/gl_matrix.h" #include "gi.h" @@ -105,7 +105,7 @@ public: { if (thread->skipped_by_thread(args.y)) return; - LLVMDrawers::Instance()->DrawSpan(&args); + Drawers::Instance()->DrawSpan(&args); } FString DebugInfo() override @@ -146,7 +146,7 @@ public: { if (thread->skipped_by_thread(args.y)) return; - LLVMDrawers::Instance()->DrawSpanMasked(&args); + Drawers::Instance()->DrawSpanMasked(&args); } }; @@ -157,7 +157,7 @@ public: { if (thread->skipped_by_thread(args.y)) return; - LLVMDrawers::Instance()->DrawSpanTranslucent(&args); + Drawers::Instance()->DrawSpanTranslucent(&args); } }; @@ -168,7 +168,7 @@ public: { if (thread->skipped_by_thread(args.y)) return; - LLVMDrawers::Instance()->DrawSpanMaskedTranslucent(&args); + Drawers::Instance()->DrawSpanMaskedTranslucent(&args); } }; @@ -179,7 +179,7 @@ public: { if (thread->skipped_by_thread(args.y)) return; - LLVMDrawers::Instance()->DrawSpanAddClamp(&args); + Drawers::Instance()->DrawSpanAddClamp(&args); } }; @@ -190,7 +190,7 @@ public: { if (thread->skipped_by_thread(args.y)) return; - LLVMDrawers::Instance()->DrawSpanMaskedAddClamp(&args); + Drawers::Instance()->DrawSpanMaskedAddClamp(&args); } }; @@ -251,7 +251,7 @@ public: void Execute(DrawerThread *thread) override { WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->vlinec4(&args, &d); + Drawers::Instance()->vlinec4(&args, &d); } FString DebugInfo() override @@ -312,7 +312,7 @@ public: void Execute(DrawerThread *thread) override { WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->vlinec1(&args, &d); + Drawers::Instance()->vlinec1(&args, &d); } FString DebugInfo() override @@ -383,7 +383,7 @@ public: void Execute(DrawerThread *thread) override { WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->DrawColumn(&args, &d); + Drawers::Instance()->DrawColumn(&args, &d); } }; @@ -438,7 +438,7 @@ public: \ void Execute(DrawerThread *thread) override \ { \ WorkerThreadData d = ThreadData(thread); \ - LLVMDrawers::Instance()->func(&args, &d); \ + Drawers::Instance()->func(&args, &d); \ } \ }; diff --git a/src/r_drawers.cpp b/src/r_drawers.cpp new file mode 100644 index 0000000000..6d4aaa055d --- /dev/null +++ b/src/r_drawers.cpp @@ -0,0 +1,423 @@ +/* +** LLVM code generated drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include "i_system.h" +#include "r_drawers.h" +#include "x86.h" +#include "c_cvars.h" +#include "version.h" +#include "m_misc.h" + +CUSTOM_CVAR(String, llvm_cpu, "auto", CVAR_ARCHIVE | CVAR_NOINITCALL) +{ + Printf("You must restart " GAMENAME " for this change to take effect.\n"); +} + +///////////////////////////////////////////////////////////////////////////// + +extern "C" +{ + void DrawColumn_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnShaded_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnAddClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRevSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnTlatedAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnAddClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnSubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRevSubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void FillColumn_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void FillColumnAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void FillColumnAddClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void FillColumnSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void FillColumnRevSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt1_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt1Copy_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt1Add_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt1Shaded_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt1AddClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt1SubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt1RevSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt1Translated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt1TlatedAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt1AddClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt1SubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt1RevSubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt4_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt4Copy_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt4Add_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt4Shaded_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt4AddClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt4SubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt4RevSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt4Translated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt4TlatedAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt4AddClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt4SubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawColumnRt4RevSubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); + void DrawSpan_SSE2(const DrawSpanArgs *); + void DrawSpanMasked_SSE2(const DrawSpanArgs *); + void DrawSpanTranslucent_SSE2(const DrawSpanArgs *); + void DrawSpanMaskedTranslucent_SSE2(const DrawSpanArgs *); + void DrawSpanAddClamp_SSE2(const DrawSpanArgs *); + void DrawSpanMaskedAddClamp_SSE2(const DrawSpanArgs *); + void vlinec1_SSE2(const DrawWallArgs *, const WorkerThreadData *); + void vlinec4_SSE2(const DrawWallArgs *, const WorkerThreadData *); + void mvlinec1_SSE2(const DrawWallArgs *, const WorkerThreadData *); + void mvlinec4_SSE2(const DrawWallArgs *, const WorkerThreadData *); + void tmvline1_add_SSE2(const DrawWallArgs *, const WorkerThreadData *); + void tmvline4_add_SSE2(const DrawWallArgs *, const WorkerThreadData *); + void tmvline1_addclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); + void tmvline4_addclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); + void tmvline1_subclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); + void tmvline4_subclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); + void tmvline1_revsubclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); + void tmvline4_revsubclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); + void DrawSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *); + void DrawSky4_SSE2(const DrawSkyArgs *, const WorkerThreadData *); + void DrawDoubleSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *); + void DrawDoubleSky4_SSE2(const DrawSkyArgs *, const WorkerThreadData *); + void TriDrawNormal8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriStencil_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriStencilClose_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); +} + +///////////////////////////////////////////////////////////////////////////// + +Drawers *Drawers::Instance() +{ + static Drawers drawers; + static bool firstcall = true; + + if (!firstcall) + return &drawers; + + drawers.DrawColumn = DrawColumn_SSE2; + drawers.DrawColumnAdd = DrawColumnAdd_SSE2; + drawers.DrawColumnShaded = DrawColumnShaded_SSE2; + drawers.DrawColumnAddClamp = DrawColumnAddClamp_SSE2; + drawers.DrawColumnSubClamp = DrawColumnSubClamp_SSE2; + drawers.DrawColumnRevSubClamp = DrawColumnRevSubClamp_SSE2; + drawers.DrawColumnTranslated = DrawColumnTranslated_SSE2; + drawers.DrawColumnTlatedAdd = DrawColumnTlatedAdd_SSE2; + drawers.DrawColumnAddClampTranslated = DrawColumnAddClampTranslated_SSE2; + drawers.DrawColumnSubClampTranslated = DrawColumnSubClampTranslated_SSE2; + drawers.DrawColumnRevSubClampTranslated = DrawColumnRevSubClampTranslated_SSE2; + drawers.FillColumn = FillColumn_SSE2; + drawers.FillColumnAdd = FillColumnAdd_SSE2; + drawers.FillColumnAddClamp = FillColumnAddClamp_SSE2; + drawers.FillColumnSubClamp = FillColumnSubClamp_SSE2; + drawers.FillColumnRevSubClamp = FillColumnRevSubClamp_SSE2; + drawers.DrawColumnRt1 = DrawColumnRt1_SSE2; + drawers.DrawColumnRt1Copy = DrawColumnRt1Copy_SSE2; + drawers.DrawColumnRt1Add = DrawColumnRt1Add_SSE2; + drawers.DrawColumnRt1Shaded = DrawColumnRt1Shaded_SSE2; + drawers.DrawColumnRt1AddClamp = DrawColumnRt1AddClamp_SSE2; + drawers.DrawColumnRt1SubClamp = DrawColumnRt1SubClamp_SSE2; + drawers.DrawColumnRt1RevSubClamp = DrawColumnRt1RevSubClamp_SSE2; + drawers.DrawColumnRt1Translated = DrawColumnRt1Translated_SSE2; + drawers.DrawColumnRt1TlatedAdd = DrawColumnRt1TlatedAdd_SSE2; + drawers.DrawColumnRt1AddClampTranslated = DrawColumnRt1AddClampTranslated_SSE2; + drawers.DrawColumnRt1SubClampTranslated = DrawColumnRt1SubClampTranslated_SSE2; + drawers.DrawColumnRt1RevSubClampTranslated = DrawColumnRt1RevSubClampTranslated_SSE2; + drawers.DrawColumnRt4 = DrawColumnRt4_SSE2; + drawers.DrawColumnRt4Copy = DrawColumnRt4Copy_SSE2; + drawers.DrawColumnRt4Add = DrawColumnRt4Add_SSE2; + drawers.DrawColumnRt4Shaded = DrawColumnRt4Shaded_SSE2; + drawers.DrawColumnRt4AddClamp = DrawColumnRt4AddClamp_SSE2; + drawers.DrawColumnRt4SubClamp = DrawColumnRt4SubClamp_SSE2; + drawers.DrawColumnRt4RevSubClamp = DrawColumnRt4RevSubClamp_SSE2; + drawers.DrawColumnRt4Translated = DrawColumnRt4Translated_SSE2; + drawers.DrawColumnRt4TlatedAdd = DrawColumnRt4TlatedAdd_SSE2; + drawers.DrawColumnRt4AddClampTranslated = DrawColumnRt4AddClampTranslated_SSE2; + drawers.DrawColumnRt4SubClampTranslated = DrawColumnRt4SubClampTranslated_SSE2; + drawers.DrawColumnRt4RevSubClampTranslated = DrawColumnRt4RevSubClampTranslated_SSE2; + drawers.DrawSpan = DrawSpan_SSE2; + drawers.DrawSpanMasked = DrawSpanMasked_SSE2; + drawers.DrawSpanTranslucent = DrawSpanTranslucent_SSE2; + drawers.DrawSpanMaskedTranslucent = DrawSpanMaskedTranslucent_SSE2; + drawers.DrawSpanAddClamp = DrawSpanAddClamp_SSE2; + drawers.DrawSpanMaskedAddClamp = DrawSpanMaskedAddClamp_SSE2; + drawers.vlinec1 = vlinec1_SSE2; + drawers.vlinec4 = vlinec4_SSE2; + drawers.mvlinec1 = mvlinec1_SSE2; + drawers.mvlinec4 = mvlinec4_SSE2; + drawers.tmvline1_add = tmvline1_add_SSE2; + drawers.tmvline4_add = tmvline4_add_SSE2; + drawers.tmvline1_addclamp = tmvline1_addclamp_SSE2; + drawers.tmvline4_addclamp = tmvline4_addclamp_SSE2; + drawers.tmvline1_subclamp = tmvline1_subclamp_SSE2; + drawers.tmvline4_subclamp = tmvline4_subclamp_SSE2; + drawers.tmvline1_revsubclamp = tmvline1_revsubclamp_SSE2; + drawers.tmvline4_revsubclamp = tmvline4_revsubclamp_SSE2; + drawers.DrawSky1 = DrawSky1_SSE2; + drawers.DrawSky4 = DrawSky4_SSE2; + drawers.DrawDoubleSky1 = DrawDoubleSky1_SSE2; + drawers.DrawDoubleSky4 = DrawDoubleSky4_SSE2; + drawers.TriDrawNormal8.push_back(TriDrawNormal8_0_SSE2); + drawers.TriDrawNormal8.push_back(TriDrawNormal8_1_SSE2); + drawers.TriDrawNormal8.push_back(TriDrawNormal8_2_SSE2); + drawers.TriDrawNormal8.push_back(TriDrawNormal8_3_SSE2); + drawers.TriDrawNormal8.push_back(TriDrawNormal8_4_SSE2); + drawers.TriDrawNormal8.push_back(TriDrawNormal8_5_SSE2); + drawers.TriDrawNormal8.push_back(TriDrawNormal8_6_SSE2); + drawers.TriDrawNormal8.push_back(TriDrawNormal8_7_SSE2); + drawers.TriDrawNormal8.push_back(TriDrawNormal8_8_SSE2); + drawers.TriDrawNormal8.push_back(TriDrawNormal8_9_SSE2); + drawers.TriDrawNormal8.push_back(TriDrawNormal8_10_SSE2); + drawers.TriDrawNormal8.push_back(TriDrawNormal8_11_SSE2); + drawers.TriDrawNormal8.push_back(TriDrawNormal8_12_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_0_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_1_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_2_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_3_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_4_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_5_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_6_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_7_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_8_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_9_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_10_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_11_SSE2); + drawers.TriDrawNormal32.push_back(TriDrawNormal32_12_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_0_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_1_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_2_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_3_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_4_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_5_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_6_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_7_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_8_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_9_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_10_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_11_SSE2); + drawers.TriFillNormal8.push_back(TriFillNormal8_12_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_0_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_1_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_2_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_3_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_4_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_5_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_6_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_7_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_8_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_9_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_10_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_11_SSE2); + drawers.TriFillNormal32.push_back(TriFillNormal32_12_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_0_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_1_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_2_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_3_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_4_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_5_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_6_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_7_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_8_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_9_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_10_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_11_SSE2); + drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_12_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_0_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_1_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_2_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_3_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_4_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_5_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_6_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_7_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_8_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_9_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_10_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_11_SSE2); + drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_12_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_0_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_1_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_2_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_3_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_4_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_5_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_6_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_7_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_8_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_9_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_10_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_11_SSE2); + drawers.TriFillSubsector8.push_back(TriFillSubsector8_12_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_0_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_1_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_2_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_3_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_4_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_5_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_6_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_7_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_8_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_9_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_10_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_11_SSE2); + drawers.TriFillSubsector32.push_back(TriFillSubsector32_12_SSE2); + drawers.TriStencil = TriStencil_SSE2; + drawers.TriStencilClose = TriStencilClose_SSE2; + + firstcall = false; + return &drawers; +} + +Drawers::Drawers() +{ + // To do: setup pointers +} + +FString DrawWallArgs::ToString() +{ + FString info; + info.Format("dest_y = %i, count = %i, flags = %i, texturefrac[0] = %u, textureheight[0] = %u", dest_y, count, flags, texturefrac[0], textureheight[0]); + return info; +} + +FString DrawSpanArgs::ToString() +{ + FString info; + info.Format("x1 = %i, x2 = %i, y = %i, flags = %i", x1, x2, y, flags); + return info; +} + +FString DrawColumnArgs::ToString() +{ + FString info; + info.Format("dest_y = %i, count = %i, flags = %i, iscale = %i (%f), texturefrac = %i (%f)", dest_y, count, flags, iscale, ((fixed_t)iscale) / (float)FRACUNIT, texturefrac, ((fixed_t)texturefrac) / (float)FRACUNIT); + return info; +} + +FString DrawSkyArgs::ToString() +{ + FString info; + info.Format("dest_y = %i, count = %i", dest_y, count); + return info; +} diff --git a/src/r_compiler/llvmdrawers.h b/src/r_drawers.h similarity index 93% rename from src/r_compiler/llvmdrawers.h rename to src/r_drawers.h index c5eb96f4c9..17168b6f31 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_drawers.h @@ -22,6 +22,11 @@ #pragma once +#include +#include + +class FString; + struct WorkerThreadData { int32_t core; @@ -63,12 +68,7 @@ struct DrawWallArgs nearest_filter = 2 }; - FString ToString() - { - FString info; - info.Format("dest_y = %i, count = %i, flags = %i, texturefrac[0] = %u, textureheight[0] = %u", dest_y, count, flags, texturefrac[0], textureheight[0]); - return info; - } + FString ToString(); }; struct DrawSpanArgs @@ -105,12 +105,7 @@ struct DrawSpanArgs nearest_filter = 2 }; - FString ToString() - { - FString info; - info.Format("x1 = %i, x2 = %i, y = %i, flags = %i", x1, x2, y, flags); - return info; - } + FString ToString(); }; struct DrawColumnArgs @@ -150,12 +145,7 @@ struct DrawColumnArgs nearest_filter = 2 }; - FString ToString() - { - FString info; - info.Format("dest_y = %i, count = %i, flags = %i, iscale = %i (%f), texturefrac = %i (%f)", dest_y, count, flags, iscale, ((fixed_t)iscale) / (float)FRACUNIT, texturefrac, ((fixed_t)texturefrac) / (float)FRACUNIT); - return info; - } + FString ToString(); }; struct DrawSkyArgs @@ -173,12 +163,7 @@ struct DrawSkyArgs uint32_t top_color; uint32_t bottom_color; - FString ToString() - { - FString info; - info.Format("dest_y = %i, count = %i", dest_y, count); - return info; - } + FString ToString(); }; struct TriVertex @@ -273,14 +258,12 @@ enum class TriBlendMode inline int NumTriBlendModes() { return (int)TriBlendMode::TranslateRevSub + 1; } -class LLVMDrawers +class Drawers { public: - virtual ~LLVMDrawers() { } + Drawers(); - static void Create(); - static void Destroy(); - static LLVMDrawers *Instance(); + static Drawers *Instance(); void(*DrawColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; @@ -358,7 +341,4 @@ public: std::vector TriFillSubsector32; void(*TriStencil)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; void(*TriStencilClose)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; - -private: - static LLVMDrawers *Singleton; }; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 29a315a8c0..57de59eba3 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -43,7 +43,7 @@ #include "r_things.h" #include "v_video.h" #include "r_draw_rgba.h" -#include "r_compiler/llvmdrawers.h" +#include "r_drawers.h" extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; @@ -107,7 +107,7 @@ public: void Execute(DrawerThread *thread) override { WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->DrawColumnRt1(&args, &d); + Drawers::Instance()->DrawColumnRt1(&args, &d); } FString DebugInfo() override @@ -124,7 +124,7 @@ public: \ void Execute(DrawerThread *thread) override \ { \ WorkerThreadData d = ThreadData(thread); \ - LLVMDrawers::Instance()->func(&args, &d); \ + Drawers::Instance()->func(&args, &d); \ } \ }; diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index f08d060e8f..ed7ab55b99 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -78,7 +78,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian if (drawargs.vcount < 3) return; - auto llvm = LLVMDrawers::Instance(); + auto llvm = Drawers::Instance(); void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *); int bmode = (int)blendmode; switch (variant) diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 270dda8948..1c07ce5bd0 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -24,7 +24,7 @@ #include "r_draw.h" #include "r_thread.h" -#include "r_compiler/llvmdrawers.h" +#include "r_drawers.h" #include "r_data/r_translate.h" #include "r_data/colormaps.h" diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 708b465e1b..74a6da8232 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -43,7 +43,7 @@ #include "textures/textures.h" #include "r_data/voxels.h" #include "r_draw_rgba.h" -#include "r_compiler/llvmdrawers.h" +#include "r_drawers.h" #include "r_poly.h" EXTERN_CVAR(Bool, r_shadercolormaps) @@ -57,12 +57,10 @@ void R_InitRenderer(); FSoftwareRenderer::FSoftwareRenderer() { - LLVMDrawers::Create(); } FSoftwareRenderer::~FSoftwareRenderer() { - LLVMDrawers::Destroy(); } //========================================================================== diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 8a97243bb8..f4fe251e1b 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -7,5 +7,6 @@ if( WIN32 AND NOT CMAKE_SIZEOF_VOID_P MATCHES "8" ) endif() add_subdirectory( updaterevision ) add_subdirectory( zipdir ) +add_subdirectory( drawergen ) set( CROSS_EXPORTS ${CROSS_EXPORTS} PARENT_SCOPE ) diff --git a/tools/drawergen/CMakeLists.txt b/tools/drawergen/CMakeLists.txt new file mode 100644 index 0000000000..eef5a0a944 --- /dev/null +++ b/tools/drawergen/CMakeLists.txt @@ -0,0 +1,133 @@ +cmake_minimum_required( VERSION 2.8.7 ) + +include(../../precompiled_headers.cmake) + +# Path where it looks for the LLVM compiled files on Windows +set( LLVM_PRECOMPILED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../llvm" ) + +if( NOT DRAWERGEN_LIBS ) + set( DRAWERGEN_LIBS "" ) +endif() + +include_directories( . ) + +file( GLOB HEADER_FILES + *.h + ssa/*.h + fixedfunction/*.h +) + +if( NOT WIN32 ) + set( LLVM_COMPONENTS core support asmparser asmprinter bitreader bitwriter codegen ipo + irreader transformutils instrumentation profiledata runtimedyld + object instcombine linker analysis selectiondag scalaropts vectorize executionengine + mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) + + # Example LLVM_DIR folder: C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm + find_package(LLVM REQUIRED CONFIG) + message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") + message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") + llvm_map_components_to_libnames( llvm_libs ${LLVM_COMPONENTS} ) + include_directories( ${LLVM_INCLUDE_DIRS} ) + set( DRAWERGEN_LIBS ${DRAWERGEN_LIBS} ${llvm_libs} ) +else() + set( LLVM_COMPONENTS core support asmparser asmprinter bitreader bitwriter codegen passes ipo + irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld + object instcombine linker analysis selectiondag scalaropts vectorize executionengine + mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) + + include_directories( "${LLVM_PRECOMPILED_DIR}/include" ) + if( X64 ) + include_directories( "${LLVM_PRECOMPILED_DIR}/64bit-include" ) + set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/64bit-" ) + else() + include_directories( "${LLVM_PRECOMPILED_DIR}/32bit-include" ) + set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/32bit-" ) + endif() + foreach(buildtype IN ITEMS RELEASE DEBUG) + set( llvm_libs_${buildtype} "${llvm_libs_base}${buildtype}" ) + set( LLVM_${buildtype}_LIBS "" ) + foreach( llvm_module ${LLVM_COMPONENTS} ) + find_library( LLVM_${llvm_module}_LIBRARY_${buildtype} LLVM${llvm_module} PATHS ${llvm_libs_${buildtype}} ) + set( LLVM_${buildtype}_LIBS ${LLVM_${buildtype}_LIBS} ${LLVM_${llvm_module}_LIBRARY_${buildtype}} ) + endforeach( llvm_module ) + endforeach(buildtype) +endif() + +if( WIN32 ) + if( MSVC_VERSION GREATER 1399 ) + # VC 8+ adds a manifest automatically to the executable. We need to + # merge ours with it. + set( MT_MERGE ON ) + else() + set( TRUSTINFO trustinfo.rc ) + endif() +else( WIN32 ) + set( TRUSTINFO "" ) +endif() + +set (SOURCES + drawergen.cpp + ssa/ssa_bool.cpp + ssa/ssa_float.cpp + ssa/ssa_float_ptr.cpp + ssa/ssa_for_block.cpp + ssa/ssa_function.cpp + ssa/ssa_if_block.cpp + ssa/ssa_int.cpp + ssa/ssa_int_ptr.cpp + ssa/ssa_short.cpp + ssa/ssa_scope.cpp + ssa/ssa_struct_type.cpp + ssa/ssa_ubyte.cpp + ssa/ssa_ubyte_ptr.cpp + ssa/ssa_value.cpp + ssa/ssa_vec4f.cpp + ssa/ssa_vec4f_ptr.cpp + ssa/ssa_vec4i.cpp + ssa/ssa_vec4i_ptr.cpp + ssa/ssa_vec8s.cpp + ssa/ssa_vec16ub.cpp + fixedfunction/drawercodegen.cpp + fixedfunction/drawspancodegen.cpp + fixedfunction/drawwallcodegen.cpp + fixedfunction/drawcolumncodegen.cpp + fixedfunction/drawskycodegen.cpp + fixedfunction/drawtrianglecodegen.cpp +) +enable_precompiled_headers( precomp.h SOURCES ) + +if( NOT CMAKE_CROSSCOMPILING ) + add_executable( drawergen ${SOURCES} ${TRUSTINFO} ${HEADER_FILES} ) + set( CROSS_EXPORTS ${CROSS_EXPORTS} drawergen PARENT_SCOPE ) +endif() + +if( MT_MERGE ) + add_custom_command(TARGET drawergen POST_BUILD + COMMAND mt -inputresource:$ -manifest ${CMAKE_CURRENT_SOURCE_DIR}/trustinfo.txt -outputresource:$ -nologo + COMMENT "Embedding trustinfo into drawergen" ) +endif() + +# Linux - add these flags for LLVM compatibility to prevent crashing +#if ( UNIX AND NOT APPLE ) +# set( CMAKE_EXE_LINKER_FLAGS "-Wl,--exclude-libs,ALL ${CMAKE_EXE_LINKER_FLAGS}" ) +#endif() + +target_link_libraries( drawergen ${DRAWERGEN_LIBS} ) + +if( WIN32 ) + foreach(debuglib ${LLVM_DEBUG_LIBS}) + target_link_libraries( drawergen debug ${debuglib} ) + endforeach(debuglib) + foreach(releaselib ${LLVM_RELEASE_LIBS}) + target_link_libraries( drawergen optimized ${releaselib} ) + endforeach(releaselib) +endif() + +#source_group("Render Compiler" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/.+") +#source_group("Render Compiler\\SSA" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/ssa/.+") +#source_group("Render Compiler\\Fixed Function" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/fixedfunction/.+") + +source_group("Compiler" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/.+\\.(cpp|h)$") +source_group("Compiler\\SSA" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/ssa/.+") +source_group("Compiler\\Fixed Function" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/fixedfunction/.+") diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp new file mode 100644 index 0000000000..dfe5fc11be --- /dev/null +++ b/tools/drawergen/drawergen.cpp @@ -0,0 +1,664 @@ +/* +** LLVM code generated drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include "precomp.h" +#include "fixedfunction/drawspancodegen.h" +#include "fixedfunction/drawwallcodegen.h" +#include "fixedfunction/drawcolumncodegen.h" +#include "fixedfunction/drawskycodegen.h" +#include "fixedfunction/drawtrianglecodegen.h" +#include "ssa/ssa_function.h" +#include "ssa/ssa_scope.h" +#include "ssa/ssa_for_block.h" +#include "ssa/ssa_if_block.h" +#include "ssa/ssa_stack.h" +#include "ssa/ssa_function.h" +#include "ssa/ssa_struct_type.h" +#include "ssa/ssa_value.h" +#include "ssa/ssa_barycentric_weight.h" +#include + +class Exception : public std::exception +{ +public: + Exception(const std::string &message) : message(message) { } + const char *what() const override { return message.c_str(); } + +private: + std::string message; +}; + +class LLVMProgram +{ +public: + LLVMProgram(); + + void CreateModule(); + std::string GenerateAssembly(std::string cpuName); + std::vector GenerateObjectFile(std::string cpuName); + std::string DumpModule(); + + llvm::LLVMContext &context() { return *mContext; } + llvm::Module *module() { return mModule.get(); } + +private: + llvm::TargetMachine *machine = nullptr; + std::unique_ptr mContext; + std::unique_ptr mModule; +}; + +class LLVMDrawers +{ +public: + LLVMDrawers(const std::string &cpuName, const std::string namePostfix); + + std::vector ObjectFile; + +private: + void CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method); + void CodegenDrawSpan(const char *name, DrawSpanVariant variant); + void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); + void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); + void CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor); + + static llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); + static llvm::Type *GetTriVertexStruct(llvm::LLVMContext &context); + static llvm::Type *GetTriMatrixStruct(llvm::LLVMContext &context); + static llvm::Type *GetTriUniformsStruct(llvm::LLVMContext &context); + static llvm::Type *GetTriDrawTriangleArgs(llvm::LLVMContext &context); + + LLVMProgram mProgram; + std::string mNamePostfix; +}; + +///////////////////////////////////////////////////////////////////////////// + +LLVMDrawers::LLVMDrawers(const std::string &cpuName, const std::string namePostfix) : mNamePostfix(namePostfix) +{ + mProgram.CreateModule(); + + CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRt1", DrawColumnVariant::Draw, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); + CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); + CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); + CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); + CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); + CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); + CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1); + CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4); + CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1); + CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4); + CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1); + CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4); + CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1); + CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4); + CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1); + CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); + CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); + CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); + CodegenDrawSky("DrawSky1", DrawSkyVariant::Single, 1); + CodegenDrawSky("DrawSky4", DrawSkyVariant::Single, 4); + CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double, 1); + CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4); + for (int i = 0; i < NumTriBlendModes(); i++) + { + CodegenDrawTriangle("TriDrawNormal8_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, false); + CodegenDrawTriangle("TriDrawNormal32_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, true); + CodegenDrawTriangle("TriFillNormal8_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, false); + CodegenDrawTriangle("TriFillNormal32_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, true); + CodegenDrawTriangle("TriDrawSubsector8_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, false); + CodegenDrawTriangle("TriDrawSubsector32_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, true); + CodegenDrawTriangle("TriFillSubsector8_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, false); + CodegenDrawTriangle("TriFillSubsector32_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, true); + } + CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, TriBlendMode::Copy, false); + CodegenDrawTriangle("TriStencilClose", TriDrawVariant::StencilClose, TriBlendMode::Copy, false); + + ObjectFile = mProgram.GenerateObjectFile(cpuName); +} + +void LLVMDrawers::CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name + mNamePostfix); + function.add_parameter(GetDrawColumnArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawColumnCodegen codegen; + codegen.Generate(variant, method, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + throw Exception("verifyFunction failed for CodegenDrawColumn()"); +} + +void LLVMDrawers::CodegenDrawSpan(const char *name, DrawSpanVariant variant) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name + mNamePostfix); + function.add_parameter(GetDrawSpanArgsStruct(mProgram.context())); + function.create_public(); + + DrawSpanCodegen codegen; + codegen.Generate(variant, function.parameter(0)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + throw Exception("verifyFunction failed for CodegenDrawSpan()"); +} + +void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name + mNamePostfix); + function.add_parameter(GetDrawWallArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawWallCodegen codegen; + codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + throw Exception("verifyFunction failed for CodegenDrawWall()"); +} + +void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name + mNamePostfix); + function.add_parameter(GetDrawSkyArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawSkyCodegen codegen; + codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + throw Exception("verifyFunction failed for CodegenDrawSky()"); +} + +void LLVMDrawers::CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name + mNamePostfix); + function.add_parameter(GetTriDrawTriangleArgs(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawTriangleCodegen codegen; + codegen.Generate(variant, blendmode, truecolor, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + throw Exception(std::string("verifyFunction failed for CodegenDrawTriangle(") + std::to_string((int)variant) + ", " + std::to_string((int)blendmode) + ", " + std::to_string((int)truecolor) + ")"); +} + +llvm::Type *LLVMDrawers::GetDrawColumnArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source2; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefracx; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureheight; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srccolor; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::create(context, elements, "DrawColumnArgs", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetDrawSpanArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetDrawWallArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 8; i++) + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 25; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetDrawSkyArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 8; i++) + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 15; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + return llvm::StructType::create(context, elements, "DrawSkyArgs", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetWorkerThreadDataStruct(llvm::LLVMContext &context) +{ + std::vector elements; + for (int i = 0; i < 4; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + elements.push_back(llvm::Type::getInt8PtrTy(context)); + return llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetTriVertexStruct(llvm::LLVMContext &context) +{ + std::vector elements; + for (int i = 0; i < 4 + TriVertex::NumVarying; i++) + elements.push_back(llvm::Type::getFloatTy(context)); + return llvm::StructType::create(context, elements, "TriVertex", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetTriMatrixStruct(llvm::LLVMContext &context) +{ + std::vector elements; + for (int i = 0; i < 4 * 4; i++) + elements.push_back(llvm::Type::getFloatTy(context)); + return llvm::StructType::create(context, elements, "TriMatrix", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetTriUniformsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t subsectorDepth; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + elements.push_back(GetTriMatrixStruct(context)); // TriMatrix objectToClip + return llvm::StructType::create(context, elements, "TriUniforms", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetTriDrawTriangleArgs(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *dest; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; + elements.push_back(GetTriVertexStruct(context)); // TriVertex *v1; + elements.push_back(GetTriVertexStruct(context)); // TriVertex *v2; + elements.push_back(GetTriVertexStruct(context)); // TriVertex *v3; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipleft; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipright; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t cliptop; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipbottom; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *texturePixels; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureWidth; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureHeight; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *translation; + elements.push_back(GetTriUniformsStruct(context)); // const TriUniforms *uniforms; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *stencilValues; + elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *stencilMasks; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t stencilPitch; + elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilTestValue; + elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilWriteValue; + elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *subsectorGBuffer; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *colormaps; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB32k; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *BaseColors; + return llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); +} + +///////////////////////////////////////////////////////////////////////////// + +LLVMProgram::LLVMProgram() +{ + mContext = std::make_unique(); +} + +void LLVMProgram::CreateModule() +{ + mModule = std::make_unique("render", context()); +} + +std::string LLVMProgram::GenerateAssembly(std::string cpuName) +{ + using namespace llvm; + + std::string errorstring; + + llvm::Module *module = mModule.get(); + EngineBuilder engineBuilder(std::move(mModule)); + engineBuilder.setErrorStr(&errorstring); + engineBuilder.setOptLevel(CodeGenOpt::Aggressive); + engineBuilder.setEngineKind(EngineKind::JIT); + engineBuilder.setMCPU(cpuName); + machine = engineBuilder.selectTarget(); + if (!machine) + throw Exception("Could not create LLVM target machine"); + +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + std::string targetTriple = machine->getTargetTriple(); +#else + std::string targetTriple = machine->getTargetTriple().getTriple(); +#endif + + module->setTargetTriple(targetTriple); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); +#else + module->setDataLayout(machine->createDataLayout()); +#endif + + legacy::FunctionPassManager PerFunctionPasses(module); + legacy::PassManager PerModulePasses; + +#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) + PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); + PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); +#endif + + SmallString<16 * 1024> str; +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + raw_svector_ostream vecstream(str); + formatted_raw_ostream stream(vecstream); +#else + raw_svector_ostream stream(str); +#endif + machine->addPassesToEmitFile(PerModulePasses, stream, TargetMachine::CGFT_AssemblyFile); + + PassManagerBuilder passManagerBuilder; + passManagerBuilder.OptLevel = 3; + passManagerBuilder.SizeLevel = 0; + passManagerBuilder.Inliner = createFunctionInliningPass(); + passManagerBuilder.SLPVectorize = true; + passManagerBuilder.LoopVectorize = true; + passManagerBuilder.LoadCombine = true; + passManagerBuilder.populateModulePassManager(PerModulePasses); + passManagerBuilder.populateFunctionPassManager(PerFunctionPasses); + + // Run function passes: + PerFunctionPasses.doInitialization(); + for (llvm::Function &func : *module) + { + if (!func.isDeclaration()) + PerFunctionPasses.run(func); + } + PerFunctionPasses.doFinalization(); + + // Run module passes: + PerModulePasses.run(*module); + + return str.c_str(); +} + +std::vector LLVMProgram::GenerateObjectFile(std::string cpuName) +{ + using namespace llvm; + + std::string errorstring; + + llvm::Module *module = mModule.get(); + EngineBuilder engineBuilder(std::move(mModule)); + engineBuilder.setErrorStr(&errorstring); + engineBuilder.setOptLevel(CodeGenOpt::Aggressive); + engineBuilder.setEngineKind(EngineKind::JIT); + engineBuilder.setMCPU(cpuName); + machine = engineBuilder.selectTarget(); + if (!machine) + throw Exception("Could not create LLVM target machine"); + +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + std::string targetTriple = machine->getTargetTriple(); +#else + std::string targetTriple = machine->getTargetTriple().getTriple(); +#endif + + module->setTargetTriple(targetTriple); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); +#else + module->setDataLayout(machine->createDataLayout()); +#endif + + legacy::FunctionPassManager PerFunctionPasses(module); + legacy::PassManager PerModulePasses; + +#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) + PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); + PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); +#endif + + SmallString<16 * 1024> str; +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + raw_svector_ostream vecstream(str); + formatted_raw_ostream stream(vecstream); +#else + raw_svector_ostream stream(str); +#endif + machine->addPassesToEmitFile(PerModulePasses, stream, TargetMachine::CGFT_ObjectFile); + + PassManagerBuilder passManagerBuilder; + passManagerBuilder.OptLevel = 3; + passManagerBuilder.SizeLevel = 0; + passManagerBuilder.Inliner = createFunctionInliningPass(); + passManagerBuilder.SLPVectorize = true; + passManagerBuilder.LoopVectorize = true; + passManagerBuilder.LoadCombine = true; + passManagerBuilder.populateModulePassManager(PerModulePasses); + passManagerBuilder.populateFunctionPassManager(PerFunctionPasses); + + // Run function passes: + PerFunctionPasses.doInitialization(); + for (llvm::Function &func : *module) + { + if (!func.isDeclaration()) + PerFunctionPasses.run(func); + } + PerFunctionPasses.doFinalization(); + + // Run module passes: + PerModulePasses.run(*module); + + // Return the resulting object file + std::vector data; + data.resize(str.size()); + memcpy(data.data(), str.data(), data.size()); + return data; +} + +std::string LLVMProgram::DumpModule() +{ + std::string str; + llvm::raw_string_ostream stream(str); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + mModule->print(stream, nullptr); +#else + mModule->print(stream, nullptr, false, true); +#endif + return stream.str(); +} + +///////////////////////////////////////////////////////////////////////////// + +int main(int argc, char **argv) +{ + if (argc != 2) + { + std::cerr << "Usage: " << argv[0] << "" << std::endl; + return 1; + } + + llvm::install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) + { + std::cerr << "LLVM fatal error: " << reason; + exit(1); + }); + + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + + std::string cpuName = "pentium4"; + std::cout << "Compiling drawer code for " << cpuName << ".." << std::endl; + + LLVMDrawers drawersSSE2(cpuName, "_SSE2"); + + FILE *file = fopen(argv[1], "wb"); + if (file == nullptr) + { + std::cerr << "Unable to open " << argv[1] << " for writing." << std::endl; + return 1; + } + + int result = fwrite(drawersSSE2.ObjectFile.data(), drawersSSE2.ObjectFile.size(), 1, file); + fclose(file); + + if (result != 1) + { + std::cerr << "Could not write data to " << argv[1] << std::endl; + return 1; + } + + //LLVMDrawers drawersSSE4("core2"); + //LLVMDrawers drawersAVX("sandybridge"); + //LLVMDrawers drawersAVX2("haswell"); + + return 0; +} diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/tools/drawergen/fixedfunction/drawcolumncodegen.cpp similarity index 96% rename from src/r_compiler/fixedfunction/drawcolumncodegen.cpp rename to tools/drawergen/fixedfunction/drawcolumncodegen.cpp index 45a75cdcb8..0ce34c83c9 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/tools/drawergen/fixedfunction/drawcolumncodegen.cpp @@ -20,17 +20,16 @@ ** */ -#include "i_system.h" -#include "r_compiler/llvm_include.h" -#include "r_compiler/fixedfunction/drawcolumncodegen.h" -#include "r_compiler/ssa/ssa_function.h" -#include "r_compiler/ssa/ssa_scope.h" -#include "r_compiler/ssa/ssa_for_block.h" -#include "r_compiler/ssa/ssa_if_block.h" -#include "r_compiler/ssa/ssa_stack.h" -#include "r_compiler/ssa/ssa_function.h" -#include "r_compiler/ssa/ssa_struct_type.h" -#include "r_compiler/ssa/ssa_value.h" +#include "precomp.h" +#include "fixedfunction/drawcolumncodegen.h" +#include "ssa/ssa_function.h" +#include "ssa/ssa_scope.h" +#include "ssa/ssa_for_block.h" +#include "ssa/ssa_if_block.h" +#include "ssa/ssa_stack.h" +#include "ssa/ssa_function.h" +#include "ssa/ssa_struct_type.h" +#include "ssa/ssa_value.h" void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data) { diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/tools/drawergen/fixedfunction/drawcolumncodegen.h similarity index 100% rename from src/r_compiler/fixedfunction/drawcolumncodegen.h rename to tools/drawergen/fixedfunction/drawcolumncodegen.h diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/tools/drawergen/fixedfunction/drawercodegen.cpp similarity index 92% rename from src/r_compiler/fixedfunction/drawercodegen.cpp rename to tools/drawergen/fixedfunction/drawercodegen.cpp index 65b726b934..c1d24f5940 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/tools/drawergen/fixedfunction/drawercodegen.cpp @@ -20,17 +20,16 @@ ** */ -#include "i_system.h" -#include "r_compiler/llvm_include.h" -#include "r_compiler/fixedfunction/drawercodegen.h" -#include "r_compiler/ssa/ssa_function.h" -#include "r_compiler/ssa/ssa_scope.h" -#include "r_compiler/ssa/ssa_for_block.h" -#include "r_compiler/ssa/ssa_if_block.h" -#include "r_compiler/ssa/ssa_stack.h" -#include "r_compiler/ssa/ssa_function.h" -#include "r_compiler/ssa/ssa_struct_type.h" -#include "r_compiler/ssa/ssa_value.h" +#include "precomp.h" +#include "fixedfunction/drawercodegen.h" +#include "ssa/ssa_function.h" +#include "ssa/ssa_scope.h" +#include "ssa/ssa_for_block.h" +#include "ssa/ssa_if_block.h" +#include "ssa/ssa_stack.h" +#include "ssa/ssa_function.h" +#include "ssa/ssa_struct_type.h" +#include "ssa/ssa_value.h" SSABool DrawerCodegen::line_skipped_by_thread(SSAInt line, SSAWorkerThread thread) { diff --git a/src/r_compiler/fixedfunction/drawercodegen.h b/tools/drawergen/fixedfunction/drawercodegen.h similarity index 81% rename from src/r_compiler/fixedfunction/drawercodegen.h rename to tools/drawergen/fixedfunction/drawercodegen.h index ef6e8d11ef..8404f3acd9 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.h +++ b/tools/drawergen/fixedfunction/drawercodegen.h @@ -22,22 +22,22 @@ #pragma once -#include "r_compiler/llvmdrawers.h" -#include "r_compiler/ssa/ssa_value.h" -#include "r_compiler/ssa/ssa_vec4f.h" -#include "r_compiler/ssa/ssa_vec4i.h" -#include "r_compiler/ssa/ssa_vec8s.h" -#include "r_compiler/ssa/ssa_vec16ub.h" -#include "r_compiler/ssa/ssa_int.h" -#include "r_compiler/ssa/ssa_int_ptr.h" -#include "r_compiler/ssa/ssa_short.h" -#include "r_compiler/ssa/ssa_ubyte_ptr.h" -#include "r_compiler/ssa/ssa_vec4f_ptr.h" -#include "r_compiler/ssa/ssa_vec4i_ptr.h" -#include "r_compiler/ssa/ssa_stack.h" -#include "r_compiler/ssa/ssa_bool.h" -#include "r_compiler/ssa/ssa_barycentric_weight.h" -#include "r_compiler/llvm_include.h" +#include "precomp.h" +#include "ssa/ssa_value.h" +#include "ssa/ssa_vec4f.h" +#include "ssa/ssa_vec4i.h" +#include "ssa/ssa_vec8s.h" +#include "ssa/ssa_vec16ub.h" +#include "ssa/ssa_int.h" +#include "ssa/ssa_int_ptr.h" +#include "ssa/ssa_short.h" +#include "ssa/ssa_ubyte_ptr.h" +#include "ssa/ssa_vec4f_ptr.h" +#include "ssa/ssa_vec4i_ptr.h" +#include "ssa/ssa_stack.h" +#include "ssa/ssa_bool.h" +#include "ssa/ssa_barycentric_weight.h" +#include "llvm_include.h" class SSAWorkerThread { @@ -90,3 +90,6 @@ public: // Calculates the final alpha values to be used when combined with the source texture alpha channel SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha); }; + +#define FRACBITS 16 +#define FRACUNIT (1< + +1 RT_MANIFEST "trustinfo.txt" diff --git a/tools/drawergen/trustinfo.txt b/tools/drawergen/trustinfo.txt new file mode 100644 index 0000000000..5216df6503 --- /dev/null +++ b/tools/drawergen/trustinfo.txt @@ -0,0 +1,16 @@ + + + + + Drawergen for the ZDoom source build process. + + + + + + + + From 09891291c2fdbc01052dba332ca33408bf8e506a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 28 Nov 2016 17:34:32 +0100 Subject: [PATCH 392/912] Remove empty constructor --- src/r_drawers.cpp | 5 ----- src/r_drawers.h | 2 -- 2 files changed, 7 deletions(-) diff --git a/src/r_drawers.cpp b/src/r_drawers.cpp index 6d4aaa055d..b098e50a00 100644 --- a/src/r_drawers.cpp +++ b/src/r_drawers.cpp @@ -389,11 +389,6 @@ Drawers *Drawers::Instance() return &drawers; } -Drawers::Drawers() -{ - // To do: setup pointers -} - FString DrawWallArgs::ToString() { FString info; diff --git a/src/r_drawers.h b/src/r_drawers.h index 17168b6f31..bb541e99c9 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -261,8 +261,6 @@ inline int NumTriBlendModes() { return (int)TriBlendMode::TranslateRevSub + 1; } class Drawers { public: - Drawers(); - static Drawers *Instance(); void(*DrawColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; From f808bcb8d21ab63c2b80b0c9bc05c1f21976db06 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 28 Nov 2016 23:57:55 +0100 Subject: [PATCH 393/912] Remove llvm_cpu cvar --- src/r_drawers.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/r_drawers.cpp b/src/r_drawers.cpp index b098e50a00..72241fe745 100644 --- a/src/r_drawers.cpp +++ b/src/r_drawers.cpp @@ -27,11 +27,6 @@ #include "version.h" #include "m_misc.h" -CUSTOM_CVAR(String, llvm_cpu, "auto", CVAR_ARCHIVE | CVAR_NOINITCALL) -{ - Printf("You must restart " GAMENAME " for this change to take effect.\n"); -} - ///////////////////////////////////////////////////////////////////////////// extern "C" From 1ac0fd1a90d9243f55d7edb8b446add9ab3a12ca Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 29 Nov 2016 01:19:20 +0100 Subject: [PATCH 394/912] Add a silly sleep --- tools/drawergen/drawergen.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index dfe5fc11be..8709ebb95a 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -617,7 +617,8 @@ std::string LLVMProgram::DumpModule() } ///////////////////////////////////////////////////////////////////////////// - +#include +#include int main(int argc, char **argv) { if (argc != 2) @@ -660,5 +661,8 @@ int main(int argc, char **argv) //LLVMDrawers drawersAVX("sandybridge"); //LLVMDrawers drawersAVX2("haswell"); + using namespace std::chrono_literals; + std::this_thread::sleep_for(2s); // Doh! Silly OS! + return 0; } From 37078f7fefa76430974b3bf15b08387ba53b9035 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 28 Nov 2016 19:48:44 -0500 Subject: [PATCH 395/912] - Fixed: CMake now asks DrawerGen to drop its output into the build folder instead of the source folder. Fixes a linker error. --- src/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 458acf6ad9..d6714b4981 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -678,12 +678,12 @@ add_custom_target( revision_check ALL if ( WIN32 ) add_custom_target( drawergen_target ALL COMMAND drawergen src/r_drawersasm.obj - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} DEPENDS drawergen ) else() add_custom_target( drawergen_target ALL COMMAND drawergen src/r_drawersasm.o - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} DEPENDS drawergen ) endif() From 0bb179c0c2e9cff5b047c1ce566eb05468d04503 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 29 Nov 2016 01:55:45 +0100 Subject: [PATCH 396/912] Remove the silly sleep again --- tools/drawergen/drawergen.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index 8709ebb95a..dfe5fc11be 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -617,8 +617,7 @@ std::string LLVMProgram::DumpModule() } ///////////////////////////////////////////////////////////////////////////// -#include -#include + int main(int argc, char **argv) { if (argc != 2) @@ -661,8 +660,5 @@ int main(int argc, char **argv) //LLVMDrawers drawersAVX("sandybridge"); //LLVMDrawers drawersAVX2("haswell"); - using namespace std::chrono_literals; - std::this_thread::sleep_for(2s); // Doh! Silly OS! - return 0; } From 17c4c4a3841f88c7f6a59befabd7808342ce3e9c Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 28 Nov 2016 20:29:57 -0500 Subject: [PATCH 397/912] - Fixed: Linker now searches for r_drawerasm.obj in the proper place. This probably broke on Mac and Linux. :( --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d6714b4981..1b84ed3a1a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1506,7 +1506,7 @@ add_executable( zdoom WIN32 MACOSX_BUNDLE math/tanh.c math/fastsin.cpp zzautozend.cpp - r_drawersasm.obj + ${CMAKE_BINARY_DIR}/src/r_drawersasm.obj ) set_source_files_properties( ${CODEGENOBJ_SOURCES} PROPERTIES EXTERNAL_OBJECT true GENERATED true) From f4d5fb4c258e903231500c553e03f28fd56fcc4b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 29 Nov 2016 03:32:24 +0100 Subject: [PATCH 398/912] Improve drawergen to only recompile the object file if its timestamp does not match --- tools/drawergen/drawergen.cpp | 48 ++++++++++++++++++- .../fixedfunction/drawcolumncodegen.cpp | 1 + .../drawergen/fixedfunction/drawercodegen.cpp | 1 + .../fixedfunction/drawskycodegen.cpp | 1 + .../fixedfunction/drawspancodegen.cpp | 1 + .../fixedfunction/drawtrianglecodegen.cpp | 1 + .../fixedfunction/drawwallcodegen.cpp | 1 + tools/drawergen/timestamp.h | 12 +++++ 8 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 tools/drawergen/timestamp.h diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index dfe5fc11be..068fb164fa 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -21,6 +21,7 @@ */ #include "precomp.h" +#include "timestamp.h" #include "fixedfunction/drawspancodegen.h" #include "fixedfunction/drawwallcodegen.h" #include "fixedfunction/drawcolumncodegen.h" @@ -618,6 +619,18 @@ std::string LLVMProgram::DumpModule() ///////////////////////////////////////////////////////////////////////////// +std::string &AllTimestamps() +{ + static std::string timestamps; + return timestamps; +} + +void AddSourceFileTimestamp(const char *timestamp) +{ + if (!AllTimestamps().empty()) AllTimestamps().push_back(' '); + AllTimestamps() += timestamp; +} + int main(int argc, char **argv) { if (argc != 2) @@ -626,6 +639,25 @@ int main(int argc, char **argv) return 1; } + std::string timestamp_filename = argv[1] + std::string(".timestamp"); + + FILE *file = fopen(timestamp_filename.c_str(), "rb"); + if (file != nullptr) + { + char buffer[4096]; + int bytes_read = fread(buffer, 1, 4096, file); + fclose(file); + std::string last_timestamp; + if (bytes_read > 0) + last_timestamp = std::string(buffer, bytes_read); + + if (AllTimestamps() == last_timestamp) + { + std::cout << "Not recompiling drawers because the object file is already up to date." << std::endl; + exit(0); + } + } + llvm::install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { std::cerr << "LLVM fatal error: " << reason; @@ -640,7 +672,7 @@ int main(int argc, char **argv) LLVMDrawers drawersSSE2(cpuName, "_SSE2"); - FILE *file = fopen(argv[1], "wb"); + file = fopen(argv[1], "wb"); if (file == nullptr) { std::cerr << "Unable to open " << argv[1] << " for writing." << std::endl; @@ -656,6 +688,20 @@ int main(int argc, char **argv) return 1; } + file = fopen(timestamp_filename.c_str(), "wb"); + if (file == nullptr) + { + std::cerr << "Could not create timestamp file" << std::endl; + return 1; + } + result = fwrite(AllTimestamps().data(), AllTimestamps().length(), 1, file); + fclose(file); + if (result != 1) + { + std::cerr << "Could not write timestamp file" << std::endl; + return 1; + } + //LLVMDrawers drawersSSE4("core2"); //LLVMDrawers drawersAVX("sandybridge"); //LLVMDrawers drawersAVX2("haswell"); diff --git a/tools/drawergen/fixedfunction/drawcolumncodegen.cpp b/tools/drawergen/fixedfunction/drawcolumncodegen.cpp index 0ce34c83c9..177074dad3 100644 --- a/tools/drawergen/fixedfunction/drawcolumncodegen.cpp +++ b/tools/drawergen/fixedfunction/drawcolumncodegen.cpp @@ -21,6 +21,7 @@ */ #include "precomp.h" +#include "timestamp.h" #include "fixedfunction/drawcolumncodegen.h" #include "ssa/ssa_function.h" #include "ssa/ssa_scope.h" diff --git a/tools/drawergen/fixedfunction/drawercodegen.cpp b/tools/drawergen/fixedfunction/drawercodegen.cpp index c1d24f5940..02c6e302a6 100644 --- a/tools/drawergen/fixedfunction/drawercodegen.cpp +++ b/tools/drawergen/fixedfunction/drawercodegen.cpp @@ -21,6 +21,7 @@ */ #include "precomp.h" +#include "timestamp.h" #include "fixedfunction/drawercodegen.h" #include "ssa/ssa_function.h" #include "ssa/ssa_scope.h" diff --git a/tools/drawergen/fixedfunction/drawskycodegen.cpp b/tools/drawergen/fixedfunction/drawskycodegen.cpp index 57016a6202..3a05818703 100644 --- a/tools/drawergen/fixedfunction/drawskycodegen.cpp +++ b/tools/drawergen/fixedfunction/drawskycodegen.cpp @@ -21,6 +21,7 @@ */ #include "precomp.h" +#include "timestamp.h" #include "fixedfunction/drawskycodegen.h" #include "ssa/ssa_function.h" #include "ssa/ssa_scope.h" diff --git a/tools/drawergen/fixedfunction/drawspancodegen.cpp b/tools/drawergen/fixedfunction/drawspancodegen.cpp index 2e9fd0d857..c6aacc75a2 100644 --- a/tools/drawergen/fixedfunction/drawspancodegen.cpp +++ b/tools/drawergen/fixedfunction/drawspancodegen.cpp @@ -21,6 +21,7 @@ */ #include "precomp.h" +#include "timestamp.h" #include "fixedfunction/drawspancodegen.h" #include "ssa/ssa_function.h" #include "ssa/ssa_scope.h" diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 8b7e8001ce..fc2c3db151 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -21,6 +21,7 @@ */ #include "precomp.h" +#include "timestamp.h" #include "fixedfunction/drawtrianglecodegen.h" #include "ssa/ssa_function.h" #include "ssa/ssa_scope.h" diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.cpp b/tools/drawergen/fixedfunction/drawwallcodegen.cpp index 90ca16a61b..94b807f40f 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.cpp +++ b/tools/drawergen/fixedfunction/drawwallcodegen.cpp @@ -21,6 +21,7 @@ */ #include "precomp.h" +#include "timestamp.h" #include "fixedfunction/drawwallcodegen.h" #include "ssa/ssa_function.h" #include "ssa/ssa_scope.h" diff --git a/tools/drawergen/timestamp.h b/tools/drawergen/timestamp.h new file mode 100644 index 0000000000..6dd11bcffd --- /dev/null +++ b/tools/drawergen/timestamp.h @@ -0,0 +1,12 @@ + +#pragma once + +void AddSourceFileTimestamp(const char *timestamp); + +namespace +{ + struct TimestampSourceFile + { + TimestampSourceFile() { AddSourceFileTimestamp(__TIME__); } + } timestamp; +} From 046f5f2b2e7f99d2fb0d7b4d338e444e9b10ee03 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 29 Nov 2016 13:53:02 +0100 Subject: [PATCH 399/912] Catch exceptions and write out their message --- tools/drawergen/drawergen.cpp | 142 ++++++++++++++++++---------------- 1 file changed, 75 insertions(+), 67 deletions(-) diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index 068fb164fa..8896e3f7b3 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -633,78 +633,86 @@ void AddSourceFileTimestamp(const char *timestamp) int main(int argc, char **argv) { - if (argc != 2) + try { - std::cerr << "Usage: " << argv[0] << "" << std::endl; - return 1; - } - - std::string timestamp_filename = argv[1] + std::string(".timestamp"); - - FILE *file = fopen(timestamp_filename.c_str(), "rb"); - if (file != nullptr) - { - char buffer[4096]; - int bytes_read = fread(buffer, 1, 4096, file); - fclose(file); - std::string last_timestamp; - if (bytes_read > 0) - last_timestamp = std::string(buffer, bytes_read); - - if (AllTimestamps() == last_timestamp) + if (argc != 2) { - std::cout << "Not recompiling drawers because the object file is already up to date." << std::endl; - exit(0); + std::cerr << "Usage: " << argv[0] << "" << std::endl; + return 1; } + + std::string timestamp_filename = argv[1] + std::string(".timestamp"); + + FILE *file = fopen(timestamp_filename.c_str(), "rb"); + if (file != nullptr) + { + char buffer[4096]; + int bytes_read = fread(buffer, 1, 4096, file); + fclose(file); + std::string last_timestamp; + if (bytes_read > 0) + last_timestamp = std::string(buffer, bytes_read); + + if (AllTimestamps() == last_timestamp) + { + std::cout << "Not recompiling drawers because the object file is already up to date." << std::endl; + exit(0); + } + } + + llvm::install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) + { + std::cerr << "LLVM fatal error: " << reason; + exit(1); + }); + + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + + std::string cpuName = "pentium4"; + std::cout << "Compiling drawer code for " << cpuName << ".." << std::endl; + + LLVMDrawers drawersSSE2(cpuName, "_SSE2"); + + file = fopen(argv[1], "wb"); + if (file == nullptr) + { + std::cerr << "Unable to open " << argv[1] << " for writing." << std::endl; + return 1; + } + + int result = fwrite(drawersSSE2.ObjectFile.data(), drawersSSE2.ObjectFile.size(), 1, file); + fclose(file); + + if (result != 1) + { + std::cerr << "Could not write data to " << argv[1] << std::endl; + return 1; + } + + file = fopen(timestamp_filename.c_str(), "wb"); + if (file == nullptr) + { + std::cerr << "Could not create timestamp file" << std::endl; + return 1; + } + result = fwrite(AllTimestamps().data(), AllTimestamps().length(), 1, file); + fclose(file); + if (result != 1) + { + std::cerr << "Could not write timestamp file" << std::endl; + return 1; + } + + //LLVMDrawers drawersSSE4("core2"); + //LLVMDrawers drawersAVX("sandybridge"); + //LLVMDrawers drawersAVX2("haswell"); + + return 0; } - - llvm::install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) + catch (const std::exception &e) { - std::cerr << "LLVM fatal error: " << reason; - exit(1); - }); - - llvm::InitializeNativeTarget(); - llvm::InitializeNativeTargetAsmPrinter(); - - std::string cpuName = "pentium4"; - std::cout << "Compiling drawer code for " << cpuName << ".." << std::endl; - - LLVMDrawers drawersSSE2(cpuName, "_SSE2"); - - file = fopen(argv[1], "wb"); - if (file == nullptr) - { - std::cerr << "Unable to open " << argv[1] << " for writing." << std::endl; + std::cerr << e.what() << std::endl; return 1; } - - int result = fwrite(drawersSSE2.ObjectFile.data(), drawersSSE2.ObjectFile.size(), 1, file); - fclose(file); - - if (result != 1) - { - std::cerr << "Could not write data to " << argv[1] << std::endl; - return 1; - } - - file = fopen(timestamp_filename.c_str(), "wb"); - if (file == nullptr) - { - std::cerr << "Could not create timestamp file" << std::endl; - return 1; - } - result = fwrite(AllTimestamps().data(), AllTimestamps().length(), 1, file); - fclose(file); - if (result != 1) - { - std::cerr << "Could not write timestamp file" << std::endl; - return 1; - } - - //LLVMDrawers drawersSSE4("core2"); - //LLVMDrawers drawersAVX("sandybridge"); - //LLVMDrawers drawersAVX2("haswell"); - - return 0; } From b450ac50470c1b469e6a4b2d64f954d6bd6ba6ba Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 29 Nov 2016 13:54:06 +0100 Subject: [PATCH 400/912] Remove some multiplications and branching from the triangle drawer --- .../fixedfunction/drawtrianglecodegen.cpp | 140 ++++++++++-------- .../fixedfunction/drawtrianglecodegen.h | 11 +- 2 files changed, 90 insertions(+), 61 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index fc2c3db151..8178a58ccd 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -134,12 +134,20 @@ void DrawTriangleCodegen::Setup() v3.y = SSAFloat(Y3) * 0.0625f; gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - startW = v1.w + gradWX * (SSAFloat(minx) - v1.x) + gradWY * (SSAFloat(miny) - v1.y); + stack_posy_w.store(v1.w + gradWX * (SSAFloat(minx) - v1.x) + gradWY * (SSAFloat(miny) - v1.y)); for (int i = 0; i < TriVertex::NumVarying; i++) { gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (SSAFloat(minx) - v1.x) + gradVaryingY[i] * (SSAFloat(miny) - v1.y); + stack_posy_varying[i].store(v1.varying[i] * v1.w + gradVaryingX[i] * (SSAFloat(minx) - v1.x) + gradVaryingY[i] * (SSAFloat(miny) - v1.y)); + } + + gradWX = gradWX * (float)q; + gradWY = gradWY * (float)q; + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradVaryingX[i] = gradVaryingX[i] * (float)q; + gradVaryingY[i] = gradVaryingY[i] * (float)q; } } @@ -161,26 +169,32 @@ void DrawTriangleCodegen::LoopBlockY() { int pixelsize = truecolor ? 4 : 1; - stack_y.store(miny); - stack_dest.store(dest); - stack_subsectorGBuffer.store(subsectorGBuffer); + SSAInt blocks_skipped = skipped_by_thread(miny / q, thread); + stack_y.store(miny + blocks_skipped * q); + stack_dest.store(dest[blocks_skipped * q * pitch * pixelsize]); + stack_subsectorGBuffer.store(subsectorGBuffer[blocks_skipped * q * pitch]); + stack_posy_w.store(stack_posy_w.load() + gradWY * blocks_skipped); + for (int i = 0; i < TriVertex::NumVarying; i++) + stack_posy_varying[i].store(stack_posy_varying[i].load() + gradVaryingY[i] * blocks_skipped); SSAForBlock loop; y = stack_y.load(); dest = stack_dest.load(); subsectorGBuffer = stack_subsectorGBuffer.load(); + posy_w = stack_posy_w.load(); + for (int i = 0; i < TriVertex::NumVarying; i++) + posy_varying[i] = stack_posy_varying[i].load(); loop.loop_block(y < maxy, 0); { - SSAIfBlock branch; - branch.if_block((y / q) % thread.num_cores == thread.core); - { - LoopBlockX(); - } - branch.end_block(); + LoopBlockX(); - stack_dest.store(dest[q * pitch * pixelsize]); - stack_subsectorGBuffer.store(subsectorGBuffer[q * pitch]); - stack_y.store(y + q); + stack_posy_w.store(posy_w + gradWY * thread.num_cores); + for (int i = 0; i < TriVertex::NumVarying; i++) + stack_posy_varying[i].store(posy_varying[i] + gradVaryingY[i] * thread.num_cores); + + stack_dest.store(dest[q * pitch * pixelsize * thread.num_cores]); + stack_subsectorGBuffer.store(subsectorGBuffer[q * pitch * thread.num_cores]); + stack_y.store(y + thread.num_cores * q); } loop.end_block(); } @@ -188,9 +202,15 @@ void DrawTriangleCodegen::LoopBlockY() void DrawTriangleCodegen::LoopBlockX() { stack_x.store(minx); + stack_posx_w.store(posy_w); + for (int i = 0; i < TriVertex::NumVarying; i++) + stack_posx_varying[i].store(stack_posy_varying[i].load()); SSAForBlock loop; x = stack_x.load(); + posx_w = stack_posx_w.load(); + for (int i = 0; i < TriVertex::NumVarying; i++) + posx_varying[i] = stack_posx_varying[i].load(); loop.loop_block(x < maxx, 0); { // Corners of block @@ -226,51 +246,7 @@ void DrawTriangleCodegen::LoopBlockX() // Check if block needs clipping SSABool clipneeded = x < clipleft || (x + q) > clipright || y < cliptop || (y + q) > clipbottom; - // Calculate varying variables for affine block - SSAFloat offx0 = SSAFloat(x - minx); - SSAFloat offy0 = SSAFloat(y - miny); - SSAFloat offx1 = offx0 + SSAFloat(q); - SSAFloat offy1 = offy0 + SSAFloat(q); - SSAFloat rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); - SSAFloat rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); - SSAFloat rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); - SSAFloat rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); - for (int i = 0; i < TriVertex::NumVarying; i++) - { - SSAFloat varyingTL = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; - SSAFloat varyingTR = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; - SSAFloat varyingBL = (startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL; - SSAFloat varyingBR = (startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR; - - SSAFloat startStepX = (varyingTR - varyingTL) * (1.0f / q); - SSAFloat endStepX = (varyingBR - varyingBL) * (1.0f / q); - SSAFloat incrStepX = (endStepX - startStepX) * (1.0f / q); - SSAFloat stepY = (varyingBL - varyingTL) * (1.0f / q); - - varyingPos[i] = SSAInt(varyingTL * SSAFloat((float)0x01000000), false) << 8; - varyingStepY[i] = SSAInt(stepY * SSAFloat((float)0x01000000), false) << 8; - varyingStartStepX[i] = SSAInt(startStepX * SSAFloat((float)0x01000000), false) << 8; - varyingIncrStepX[i] = SSAInt(incrStepX * SSAFloat((float)0x01000000), false) << 8; - } - - SSAFloat globVis = SSAFloat(1706.0f); - SSAFloat vis = globVis / rcpWTL; - SSAFloat shade = 64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f; - SSAFloat lightscale = SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)); - SSAInt diminishedlight = SSAInt(SSAFloat::clamp((1.0f - lightscale) * 256.0f + 0.5f, SSAFloat(0.0f), SSAFloat(256.0f)), false); - - if (!truecolor) - { - SSAInt diminishedindex = SSAInt(lightscale * 32.0f, false); - SSAInt lightindex = SSAInt::MIN((256 - light) * 32 / 256, SSAInt(31)); - SSAInt colormapindex = is_fixed_light.select(lightindex, diminishedindex); - currentcolormap = Colormaps[colormapindex << 8]; - } - else - { - currentlight = is_fixed_light.select(light, diminishedlight); - } - + SetupAffineBlock(); SetStencilBlock(x / 8 + y / 8 * stencilPitch); SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded && StencilIsSingleValue(); @@ -289,11 +265,55 @@ void DrawTriangleCodegen::LoopBlockX() branch.end_block(); + stack_posx_w.store(posx_w + gradWX); + for (int i = 0; i < TriVertex::NumVarying; i++) + stack_posx_varying[i].store(posx_varying[i] + gradVaryingX[i]); + stack_x.store(x + q); } loop.end_block(); } +void DrawTriangleCodegen::SetupAffineBlock() +{ + // Calculate varying variables for affine block + SSAVec4f rcpW = SSAVec4f::rcp(SSAVec4f(posx_w, posx_w + gradWX, posx_w + gradWY, posx_w + gradWX + gradWY)); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + // Top left, top right, bottom left, bottom right: + SSAVec4f varying = SSAVec4f(posx_varying[i], posx_varying[i] + gradVaryingX[i], posx_varying[i] + gradVaryingY[i], posx_varying[i] + gradVaryingX[i] + gradVaryingY[i]) * rcpW; + + SSAFloat startStepX = (varying[1] - varying[0]) * (1.0f / q); + SSAFloat endStepX = (varying[3] - varying[2]) * (1.0f / q); + SSAFloat incrStepX = (endStepX - startStepX) * (1.0f / q); + SSAFloat stepY = (varying[2] - varying[0]) * (1.0f / q); + + SSAVec4i ints = SSAVec4i(SSAVec4f(varying[0], stepY, startStepX, incrStepX) * (float)0x01000000) << 8; + varyingPos[i] = ints[0]; + varyingStepY[i] = ints[1]; + varyingStartStepX[i] = ints[2]; + varyingIncrStepX[i] = ints[3]; + } + + SSAFloat globVis = SSAFloat(1706.0f); + SSAFloat vis = globVis / rcpW[0]; + SSAFloat shade = 64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f; + SSAFloat lightscale = SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)); + SSAInt diminishedlight = SSAInt(SSAFloat::clamp((1.0f - lightscale) * 256.0f + 0.5f, SSAFloat(0.0f), SSAFloat(256.0f)), false); + + if (!truecolor) + { + SSAInt diminishedindex = SSAInt(lightscale * 32.0f, false); + SSAInt lightindex = SSAInt::MIN((256 - light) * 32 / 256, SSAInt(31)); + SSAInt colormapindex = is_fixed_light.select(lightindex, diminishedindex); + currentcolormap = Colormaps[colormapindex << 8]; + } + else + { + currentlight = is_fixed_light.select(light, diminishedlight); + } +} + void DrawTriangleCodegen::LoopFullBlock() { SSAIfBlock branch_stenciltest; @@ -835,6 +855,8 @@ void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) thread.core = thread_data[0][0].load(true); thread.num_cores = thread_data[0][1].load(true); + thread.pass_start_y = SSAInt(0); + thread.pass_end_y = SSAInt(32000); } SSATriVertex DrawTriangleCodegen::LoadTriVertex(SSAValue ptr) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index bb6df455eb..3866e3566b 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -45,6 +45,7 @@ private: void LoopBlockX(); void LoopFullBlock(); void LoopPartialBlock(); + void SetupAffineBlock(); SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying); SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying); @@ -85,6 +86,9 @@ private: SSAStack stack_CY1, stack_CY2, stack_CY3; SSAStack stack_CX1, stack_CX2, stack_CX3; + SSAStack stack_posy_w, stack_posy_varying[TriVertex::NumVarying]; + SSAStack stack_posx_w, stack_posx_varying[TriVertex::NumVarying]; + SSAUBytePtr dest; SSAInt pitch; SSATriVertex v1; @@ -131,8 +135,8 @@ private: SSAInt FDY12, FDY23, FDY31; SSAInt minx, maxx, miny, maxy; SSAInt C1, C2, C3; - SSAFloat gradWX, gradWY, startW; - SSAFloat gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; + SSAFloat gradWX, gradWY; + SSAFloat gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying]; SSAInt x, y; SSAInt x0, x1, y0, y1; @@ -143,6 +147,9 @@ private: SSAInt varyingStartStepX[TriVertex::NumVarying]; SSAInt varyingIncrStepX[TriVertex::NumVarying]; + SSAFloat posy_w, posy_varying[TriVertex::NumVarying]; + SSAFloat posx_w, posx_varying[TriVertex::NumVarying]; + SSAUBytePtr StencilBlock; SSAIntPtr StencilBlockMask; }; From 7474be628405fc0380f909e2d1020687ca1b28b5 Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Tue, 29 Nov 2016 19:46:38 +0100 Subject: [PATCH 401/912] - Fixed GCC/Clang compiler errors and warnings. --- src/CMakeLists.txt | 2 +- tools/drawergen/CMakeLists.txt | 40 ++++++++++++++++++++++++++++ tools/drawergen/drawergen.cpp | 2 +- tools/drawergen/ssa/ssa_function.cpp | 2 +- 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1b84ed3a1a..73a3074bb5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1506,7 +1506,7 @@ add_executable( zdoom WIN32 MACOSX_BUNDLE math/tanh.c math/fastsin.cpp zzautozend.cpp - ${CMAKE_BINARY_DIR}/src/r_drawersasm.obj + ${CMAKE_BINARY_DIR}/src/${CODEGENOBJ_SOURCES} ) set_source_files_properties( ${CODEGENOBJ_SOURCES} PROPERTIES EXTERNAL_OBJECT true GENERATED true) diff --git a/tools/drawergen/CMakeLists.txt b/tools/drawergen/CMakeLists.txt index eef5a0a944..61944953dc 100644 --- a/tools/drawergen/CMakeLists.txt +++ b/tools/drawergen/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required( VERSION 2.8.7 ) +include( CheckCXXCompilerFlag ) + include(../../precompiled_headers.cmake) # Path where it looks for the LLVM compiled files on Windows @@ -54,6 +56,44 @@ else() endforeach(buildtype) endif() +if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.5") + set( CMAKE_C_FLAGS "-Wno-unused-result ${CMAKE_C_FLAGS}" ) + set( CMAKE_CXX_FLAGS "-Wno-unused-result ${CMAKE_CXX_FLAGS}" ) + endif() + if( CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) + if( APPLE OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "3.6" ) + set( CMAKE_CXX_FLAGS "-Wno-inconsistent-missing-override ${CMAKE_CXX_FLAGS}" ) + endif() + endif() + set( CMAKE_C_FLAGS "-Wall -Wextra -Wno-unused -Wno-unused-parameter -Wno-missing-field-initializers -ffp-contract=off ${CMAKE_C_FLAGS}" ) + set( CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused -Wno-unused-parameter -Wno-missing-field-initializers -ffp-contract=off ${CMAKE_CXX_FLAGS}" ) + + # Use the highest C++ standard available since VS2015 compiles with C++14 + # but we only require C++11. The recommended way to do this in CMake is to + # probably to use target_compile_features, but I don't feel like maintaining + # a list of features we use. + CHECK_CXX_COMPILER_FLAG( "-std=gnu++14" CAN_DO_CPP14 ) + if ( CAN_DO_CPP14 ) + set ( CMAKE_CXX_FLAGS "-std=gnu++14 ${CMAKE_CXX_FLAGS}" ) + else () + CHECK_CXX_COMPILER_FLAG( "-std=gnu++1y" CAN_DO_CPP1Y ) + if ( CAN_DO_CPP1Y ) + set ( CMAKE_CXX_FLAGS "-std=gnu++1y ${CMAKE_CXX_FLAGS}" ) + else () + CHECK_CXX_COMPILER_FLAG( "-std=gnu++11" CAN_DO_CPP11 ) + if ( CAN_DO_CPP11 ) + set ( CMAKE_CXX_FLAGS "-std=gnu++11 ${CMAKE_CXX_FLAGS}" ) + else () + CHECK_CXX_COMPILER_FLAG( "-std=gnu++0x" CAN_DO_CPP0X ) + if ( CAN_DO_CPP0X ) + set ( CMAKE_CXX_FLAGS "-std=gnu++0x ${CMAKE_CXX_FLAGS}" ) + endif () + endif () + endif () + endif () +endif() + if( WIN32 ) if( MSVC_VERSION GREATER 1399 ) # VC 8+ adds a manifest automatically to the executable. We need to diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index 8896e3f7b3..a37f2f26ed 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -42,7 +42,7 @@ class Exception : public std::exception { public: Exception(const std::string &message) : message(message) { } - const char *what() const override { return message.c_str(); } + const char *what() const noexcept override { return message.c_str(); } private: std::string message; diff --git a/tools/drawergen/ssa/ssa_function.cpp b/tools/drawergen/ssa/ssa_function.cpp index ef01cec441..e21b0b2299 100644 --- a/tools/drawergen/ssa/ssa_function.cpp +++ b/tools/drawergen/ssa/ssa_function.cpp @@ -27,7 +27,7 @@ #include "ssa_value.h" SSAFunction::SSAFunction(const std::string name) -: name(name), return_type(llvm::Type::getVoidTy(SSAScope::context())), func() +: func(), name(name), return_type(llvm::Type::getVoidTy(SSAScope::context())) { } From d837f9ab9348e72b17e5058ed8414b0d8fd481e7 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Tue, 29 Nov 2016 22:25:13 +0200 Subject: [PATCH 402/912] Fixed compilation of drawergen on macOS --- tools/drawergen/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/drawergen/CMakeLists.txt b/tools/drawergen/CMakeLists.txt index 61944953dc..dc9fefb178 100644 --- a/tools/drawergen/CMakeLists.txt +++ b/tools/drawergen/CMakeLists.txt @@ -92,6 +92,11 @@ if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) endif () endif () endif () + + if ( APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) + set( CMAKE_CXX_FLAGS "-stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) + set( CMAKE_EXE_LINKER_FLAGS "-stdlib=libc++ ${CMAKE_EXE_LINKER_FLAGS}" ) + endif () endif() if( WIN32 ) From 00387a72113398814178f863ce904aeb69fb3fa3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 29 Nov 2016 22:16:40 +0100 Subject: [PATCH 403/912] Add bounds check to palette upload buffer size --- src/gl/system/gl_swframebuffer.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index affcb86f9c..5eba62d1ea 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -2224,9 +2224,9 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update() { glGenBuffers(2, (GLuint*)Tex->Buffers); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Tex->Buffers[0]); - glBufferData(GL_PIXEL_UNPACK_BUFFER, Remap->NumEntries * 4, nullptr, GL_STREAM_DRAW); + glBufferData(GL_PIXEL_UNPACK_BUFFER, RoundedPaletteSize * 4, nullptr, GL_STREAM_DRAW); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Tex->Buffers[1]); - glBufferData(GL_PIXEL_UNPACK_BUFFER, Remap->NumEntries * 4, nullptr, GL_STREAM_DRAW); + glBufferData(GL_PIXEL_UNPACK_BUFFER, RoundedPaletteSize * 4, nullptr, GL_STREAM_DRAW); } else { @@ -2234,7 +2234,9 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update() Tex->CurrentBuffer = (Tex->CurrentBuffer + 1) & 1; } - buff = (uint32_t *)MapBuffer(GL_PIXEL_UNPACK_BUFFER, Remap->NumEntries * 4); + int numEntries = MIN(Remap->NumEntries, RoundedPaletteSize); + + buff = (uint32_t *)MapBuffer(GL_PIXEL_UNPACK_BUFFER, numEntries * 4); if (buff == nullptr) { return false; @@ -2242,13 +2244,13 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update() pal = Remap->Palette; // See explanation in UploadPalette() for skipat rationale. - skipat = MIN(Remap->NumEntries, DoColorSkip ? 256 - 8 : 256); + skipat = MIN(numEntries, DoColorSkip ? 256 - 8 : 256); for (i = 0; i < skipat; ++i) { buff[i] = ColorARGB(pal[i].a, pal[i].r, pal[i].g, pal[i].b); } - for (++i; i < Remap->NumEntries; ++i) + for (++i; i < numEntries; ++i) { buff[i] = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); } @@ -2258,7 +2260,7 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update() GLint oldBinding = 0; glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); glBindTexture(GL_TEXTURE_2D, Tex->Texture); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Remap->NumEntries, 1, GL_BGRA, GL_UNSIGNED_BYTE, 0); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, numEntries, 1, GL_BGRA, GL_UNSIGNED_BYTE, 0); glBindTexture(GL_TEXTURE_2D, oldBinding); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); From cc94381366ef91887e6d0fad61567a4dc73cdb1d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 30 Nov 2016 07:14:24 +0100 Subject: [PATCH 404/912] Fix flushing issue in drawergen --- tools/drawergen/drawergen.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index a37f2f26ed..a306f36af2 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -599,6 +599,10 @@ std::vector LLVMProgram::GenerateObjectFile(std::string cpuName) PerModulePasses.run(*module); // Return the resulting object file + stream.flush(); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + vecstream.flush(); +#endif std::vector data; data.resize(str.size()); memcpy(data.data(), str.data(), data.size()); From 6cc33553c23dbd9f7922201558b5560bfb438e50 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 30 Nov 2016 07:48:40 +0100 Subject: [PATCH 405/912] Change object file generation to not use the JIT engine builder --- tools/drawergen/drawergen.cpp | 134 +++++++++++++--------------------- 1 file changed, 51 insertions(+), 83 deletions(-) diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index a306f36af2..bd3854d540 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -54,8 +54,7 @@ public: LLVMProgram(); void CreateModule(); - std::string GenerateAssembly(std::string cpuName); - std::vector GenerateObjectFile(std::string cpuName); + std::vector GenerateObjectFile(const std::string &triple, const std::string &cpuName, const std::string &features); std::string DumpModule(); llvm::LLVMContext &context() { return *mContext; } @@ -70,7 +69,7 @@ private: class LLVMDrawers { public: - LLVMDrawers(const std::string &cpuName, const std::string namePostfix); + LLVMDrawers(const std::string &triple, const std::string &cpuName, const std::string &features, const std::string namePostfix); std::vector ObjectFile; @@ -97,7 +96,7 @@ private: ///////////////////////////////////////////////////////////////////////////// -LLVMDrawers::LLVMDrawers(const std::string &cpuName, const std::string namePostfix) : mNamePostfix(namePostfix) +LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName, const std::string &features, const std::string namePostfix) : mNamePostfix(namePostfix) { mProgram.CreateModule(); @@ -177,7 +176,7 @@ LLVMDrawers::LLVMDrawers(const std::string &cpuName, const std::string namePostf CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, TriBlendMode::Copy, false); CodegenDrawTriangle("TriStencilClose", TriDrawVariant::StencilClose, TriBlendMode::Copy, false); - ObjectFile = mProgram.GenerateObjectFile(cpuName); + ObjectFile = mProgram.GenerateObjectFile(triple, cpuName, features); } void LLVMDrawers::CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method) @@ -459,92 +458,57 @@ void LLVMProgram::CreateModule() mModule = std::make_unique("render", context()); } -std::string LLVMProgram::GenerateAssembly(std::string cpuName) +std::vector LLVMProgram::GenerateObjectFile(const std::string &triple, const std::string &cpuName, const std::string &features) { using namespace llvm; std::string errorstring; llvm::Module *module = mModule.get(); - EngineBuilder engineBuilder(std::move(mModule)); - engineBuilder.setErrorStr(&errorstring); - engineBuilder.setOptLevel(CodeGenOpt::Aggressive); - engineBuilder.setEngineKind(EngineKind::JIT); - engineBuilder.setMCPU(cpuName); - machine = engineBuilder.selectTarget(); - if (!machine) - throw Exception("Could not create LLVM target machine"); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - std::string targetTriple = machine->getTargetTriple(); -#else - std::string targetTriple = machine->getTargetTriple().getTriple(); -#endif + const Target *target = TargetRegistry::lookupTarget(triple, errorstring); + Optional relocationModel = Reloc::PIC_; + CodeModel::Model codeModel = CodeModel::Model::Default; - module->setTargetTriple(targetTriple); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); -#else - module->setDataLayout(machine->createDataLayout()); -#endif + TargetOptions options; + options.LessPreciseFPMADOption = true; + options.AllowFPOpFusion = FPOpFusion::Fast; + options.Reciprocals = TargetRecip({ "all" }); + options.UnsafeFPMath = true; + options.NoInfsFPMath = true; + options.NoNaNsFPMath = true; + options.HonorSignDependentRoundingFPMathOption = false; + options.NoZerosInBSS = false; + options.GuaranteedTailCallOpt = false; + options.StackAlignmentOverride = 0; + options.StackSymbolOrdering = true; + options.UseInitArray = true; + options.DataSections = false; + options.FunctionSections = false; + options.UniqueSectionNames = true; + options.EmulatedTLS = false; + options.ExceptionModel = ExceptionHandling::None; + options.JTType = JumpTable::Single; // Create a single table for all jumptable functions + options.ThreadModel = ThreadModel::POSIX; + options.EABIVersion = EABI::Default; + options.DebuggerTuning = DebuggerKind::Default; + options.DisableIntegratedAS = false; + options.MCOptions.SanitizeAddress = false; + options.MCOptions.MCRelaxAll = false; // relax all fixups in the emitted object file + options.MCOptions.MCIncrementalLinkerCompatible = false; + options.MCOptions.DwarfVersion = 0; + options.MCOptions.ShowMCInst = false; + options.MCOptions.ABIName = ""; + options.MCOptions.MCFatalWarnings = false; + options.MCOptions.MCNoWarn = false; + options.MCOptions.ShowMCEncoding = false; // Show encoding in .s output + options.MCOptions.MCUseDwarfDirectory = false; + options.MCOptions.AsmVerbose = true; + options.MCOptions.PreserveAsmComments = true; - legacy::FunctionPassManager PerFunctionPasses(module); - legacy::PassManager PerModulePasses; + CodeGenOpt::Level optimizationLevel = CodeGenOpt::Aggressive; + machine = target->createTargetMachine(triple, cpuName, features, options, relocationModel, codeModel, optimizationLevel); -#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) - PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); - PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); -#endif - - SmallString<16 * 1024> str; -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - raw_svector_ostream vecstream(str); - formatted_raw_ostream stream(vecstream); -#else - raw_svector_ostream stream(str); -#endif - machine->addPassesToEmitFile(PerModulePasses, stream, TargetMachine::CGFT_AssemblyFile); - - PassManagerBuilder passManagerBuilder; - passManagerBuilder.OptLevel = 3; - passManagerBuilder.SizeLevel = 0; - passManagerBuilder.Inliner = createFunctionInliningPass(); - passManagerBuilder.SLPVectorize = true; - passManagerBuilder.LoopVectorize = true; - passManagerBuilder.LoadCombine = true; - passManagerBuilder.populateModulePassManager(PerModulePasses); - passManagerBuilder.populateFunctionPassManager(PerFunctionPasses); - - // Run function passes: - PerFunctionPasses.doInitialization(); - for (llvm::Function &func : *module) - { - if (!func.isDeclaration()) - PerFunctionPasses.run(func); - } - PerFunctionPasses.doFinalization(); - - // Run module passes: - PerModulePasses.run(*module); - - return str.c_str(); -} - -std::vector LLVMProgram::GenerateObjectFile(std::string cpuName) -{ - using namespace llvm; - - std::string errorstring; - - llvm::Module *module = mModule.get(); - EngineBuilder engineBuilder(std::move(mModule)); - engineBuilder.setErrorStr(&errorstring); - engineBuilder.setOptLevel(CodeGenOpt::Aggressive); - engineBuilder.setEngineKind(EngineKind::JIT); - engineBuilder.setMCPU(cpuName); - machine = engineBuilder.selectTarget(); - if (!machine) - throw Exception("Could not create LLVM target machine"); #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) std::string targetTriple = machine->getTargetTriple(); @@ -599,8 +563,8 @@ std::vector LLVMProgram::GenerateObjectFile(std::string cpuName) PerModulePasses.run(*module); // Return the resulting object file - stream.flush(); #if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + stream.flush(); vecstream.flush(); #endif std::vector data; @@ -673,10 +637,14 @@ int main(int argc, char **argv) llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); + std::string triple = llvm::sys::getDefaultTargetTriple(); + std::cout << "Target triple is " << triple << std::endl; + std::string cpuName = "pentium4"; + std::string features; std::cout << "Compiling drawer code for " << cpuName << ".." << std::endl; - LLVMDrawers drawersSSE2(cpuName, "_SSE2"); + LLVMDrawers drawersSSE2(triple, cpuName, features, "_SSE2"); file = fopen(argv[1], "wb"); if (file == nullptr) From 03282c957b28a24b1bc90f430f041a85e2dd9a62 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 30 Nov 2016 07:49:04 +0100 Subject: [PATCH 406/912] Add a new blend mode for sprites --- src/r_drawers.cpp | 6 ++++++ src/r_drawers.h | 5 +++-- src/r_poly_sprite.cpp | 6 ++++++ src/r_poly_triangle.cpp | 1 + tools/drawergen/fixedfunction/drawercodegen.cpp | 7 +++++++ tools/drawergen/fixedfunction/drawercodegen.h | 1 + tools/drawergen/fixedfunction/drawtrianglecodegen.cpp | 10 ++++++++++ 7 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/r_drawers.cpp b/src/r_drawers.cpp index 72241fe745..f94f409c45 100644 --- a/src/r_drawers.cpp +++ b/src/r_drawers.cpp @@ -119,6 +119,7 @@ extern "C" void TriDrawNormal32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -145,6 +146,7 @@ extern "C" void TriFillNormal32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -158,6 +160,7 @@ extern "C" void TriDrawSubsector8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -171,6 +174,7 @@ extern "C" void TriDrawSubsector32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -184,6 +188,7 @@ extern "C" void TriFillSubsector8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -197,6 +202,7 @@ extern "C" void TriFillSubsector32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriStencil_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriStencilClose_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); } diff --git a/src/r_drawers.h b/src/r_drawers.h index bb541e99c9..e986181abe 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -253,10 +253,11 @@ enum class TriBlendMode TranslateAlphaBlend, // blend_alpha_blend(shade(translate(fg)), bg) TranslateAdd, // blend_add(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) TranslateSub, // blend_sub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) - TranslateRevSub // blend_revsub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + TranslateRevSub,// blend_revsub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + AddSrcColorOneMinusSrcColor // glBlendMode(GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR) used by GZDoom's fullbright additive sprites }; -inline int NumTriBlendModes() { return (int)TriBlendMode::TranslateRevSub + 1; } +inline int NumTriBlendModes() { return (int)TriBlendMode::AddSrcColorOneMinusSrcColor + 1; } class Drawers { diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index edb46c4609..eb1328b485 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -150,6 +150,12 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla args.uniforms.srcalpha = 256; blendmode = args.translation ? TriBlendMode::TranslateAdd : TriBlendMode::Add; } + else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Add] && fullbrightSprite && thing->Alpha == 1.0 && args.translation == nullptr) + { + args.uniforms.destalpha = 256; + args.uniforms.srcalpha = 256; + blendmode = TriBlendMode::AddSrcColorOneMinusSrcColor; + } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_Add]) { args.uniforms.destalpha = (uint32_t)(1.0 * 256); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index ed7ab55b99..a3398df2b1 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -376,6 +376,7 @@ FString DrawPolyTrianglesCommand::DebugInfo() case TriBlendMode::TranslateAdd: blendmodestr = "TranslateAdd"; break; case TriBlendMode::TranslateSub: blendmodestr = "TranslateSub"; break; case TriBlendMode::TranslateRevSub: blendmodestr = "TranslateRevSub"; break; + case TriBlendMode::AddSrcColorOneMinusSrcColor: blendmodestr = "AddSrcColorOneMinusSrcColor"; break; } FString info; diff --git a/tools/drawergen/fixedfunction/drawercodegen.cpp b/tools/drawergen/fixedfunction/drawercodegen.cpp index 02c6e302a6..a4ad1a9410 100644 --- a/tools/drawergen/fixedfunction/drawercodegen.cpp +++ b/tools/drawergen/fixedfunction/drawercodegen.cpp @@ -160,3 +160,10 @@ SSAVec4i DrawerCodegen::blend_stencil(SSAVec4i stencilcolor, SSAInt fgalpha, SSA SSAVec4i color = (stencilcolor * srcalpha + bg * destalpha) / 256; return color.insert(3, 255); } + +SSAVec4i DrawerCodegen::blend_add_srccolor_oneminussrccolor(SSAVec4i fg, SSAVec4i bg) +{ + SSAVec4i fgcolor = fg + (fg >> 7); // 255 -> 256 + SSAVec4i inv_fgcolor = SSAVec4i(256) - fgcolor; + return fg + (bg * inv_fgcolor + 128) >> 8; +} diff --git a/tools/drawergen/fixedfunction/drawercodegen.h b/tools/drawergen/fixedfunction/drawercodegen.h index 8404f3acd9..d1931e99cd 100644 --- a/tools/drawergen/fixedfunction/drawercodegen.h +++ b/tools/drawergen/fixedfunction/drawercodegen.h @@ -86,6 +86,7 @@ public: SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg); SSAVec4i blend_stencil(SSAVec4i color, SSAInt fgalpha, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); + SSAVec4i blend_add_srccolor_oneminussrccolor(SSAVec4i fg, SSAVec4i bg); // Calculates the final alpha values to be used when combined with the source texture alpha channel SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha); diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 8178a58ccd..b53a5f7913 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -670,6 +670,10 @@ SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) fg = TranslateSample32(uvoffset); output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + case TriBlendMode::AddSrcColorOneMinusSrcColor: + fg = Sample32(uvoffset); + output = blend_add_srccolor_oneminussrccolor(shade_bgra_simple(fg, currentlight), bg); + break; } return output; @@ -771,6 +775,12 @@ SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); output = (palindex == SSAInt(0)).select(bg, output); break; + case TriBlendMode::AddSrcColorOneMinusSrcColor: + palindex = Sample8(uvoffset); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_add_srccolor_oneminussrccolor(fg, ToBgra(bg))); + output = (palindex == SSAInt(0)).select(bg, output); + break; } return output; From 8ad2f4508216d741791157ac22d02ba0e002d66e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 30 Nov 2016 08:10:04 +0100 Subject: [PATCH 407/912] Fix compile errors for older LLVM --- tools/drawergen/drawergen.cpp | 37 ++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index bd3854d540..db4d896ff0 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -467,13 +467,18 @@ std::vector LLVMProgram::GenerateObjectFile(const std::string &triple, llvm::Module *module = mModule.get(); const Target *target = TargetRegistry::lookupTarget(triple, errorstring); + +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + Reloc::Model relocationModel = Reloc::PIC_; +#else Optional relocationModel = Reloc::PIC_; +#endif + CodeModel::Model codeModel = CodeModel::Model::Default; TargetOptions options; options.LessPreciseFPMADOption = true; options.AllowFPOpFusion = FPOpFusion::Fast; - options.Reciprocals = TargetRecip({ "all" }); options.UnsafeFPMath = true; options.NoInfsFPMath = true; options.NoNaNsFPMath = true; @@ -481,30 +486,34 @@ std::vector LLVMProgram::GenerateObjectFile(const std::string &triple, options.NoZerosInBSS = false; options.GuaranteedTailCallOpt = false; options.StackAlignmentOverride = 0; - options.StackSymbolOrdering = true; options.UseInitArray = true; options.DataSections = false; options.FunctionSections = false; - options.UniqueSectionNames = true; - options.EmulatedTLS = false; - options.ExceptionModel = ExceptionHandling::None; options.JTType = JumpTable::Single; // Create a single table for all jumptable functions options.ThreadModel = ThreadModel::POSIX; - options.EABIVersion = EABI::Default; - options.DebuggerTuning = DebuggerKind::Default; options.DisableIntegratedAS = false; options.MCOptions.SanitizeAddress = false; options.MCOptions.MCRelaxAll = false; // relax all fixups in the emitted object file - options.MCOptions.MCIncrementalLinkerCompatible = false; options.MCOptions.DwarfVersion = 0; options.MCOptions.ShowMCInst = false; options.MCOptions.ABIName = ""; options.MCOptions.MCFatalWarnings = false; - options.MCOptions.MCNoWarn = false; options.MCOptions.ShowMCEncoding = false; // Show encoding in .s output options.MCOptions.MCUseDwarfDirectory = false; options.MCOptions.AsmVerbose = true; + +#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) + options.Reciprocals = TargetRecip({ "all" }); + options.StackSymbolOrdering = true; + options.UniqueSectionNames = true; + options.EmulatedTLS = false; + options.ExceptionModel = ExceptionHandling::None; + options.EABIVersion = EABI::Default; + options.DebuggerTuning = DebuggerKind::Default; + options.MCOptions.MCIncrementalLinkerCompatible = false; + options.MCOptions.MCNoWarn = false; options.MCOptions.PreserveAsmComments = true; +#endif CodeGenOpt::Level optimizationLevel = CodeGenOpt::Aggressive; machine = target->createTargetMachine(triple, cpuName, features, options, relocationModel, codeModel, optimizationLevel); @@ -638,6 +647,16 @@ int main(int argc, char **argv) llvm::InitializeNativeTargetAsmPrinter(); std::string triple = llvm::sys::getDefaultTargetTriple(); + +#ifdef __APPLE__ + // Target triple is x86_64-apple-darwin15.6.0 + auto pos = triple.find("-apple-darwin"); + if (pos != std::string::npos) + { + triple = triple.substr(0, pos) + "-apple-darwin10.11.0"; + } +#endif + std::cout << "Target triple is " << triple << std::endl; std::string cpuName = "pentium4"; From a7a779dd311e2095dfe8a1898353b5fd730af0a5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 30 Nov 2016 08:32:05 +0100 Subject: [PATCH 408/912] Fix race condition --- src/r_drawers.cpp | 347 +++++++++++++++++++++++----------------------- src/r_drawers.h | 3 + 2 files changed, 175 insertions(+), 175 deletions(-) diff --git a/src/r_drawers.cpp b/src/r_drawers.cpp index f94f409c45..ab45ad939c 100644 --- a/src/r_drawers.cpp +++ b/src/r_drawers.cpp @@ -209,184 +209,181 @@ extern "C" ///////////////////////////////////////////////////////////////////////////// +Drawers::Drawers() +{ + DrawColumn = DrawColumn_SSE2; + DrawColumnAdd = DrawColumnAdd_SSE2; + DrawColumnShaded = DrawColumnShaded_SSE2; + DrawColumnAddClamp = DrawColumnAddClamp_SSE2; + DrawColumnSubClamp = DrawColumnSubClamp_SSE2; + DrawColumnRevSubClamp = DrawColumnRevSubClamp_SSE2; + DrawColumnTranslated = DrawColumnTranslated_SSE2; + DrawColumnTlatedAdd = DrawColumnTlatedAdd_SSE2; + DrawColumnAddClampTranslated = DrawColumnAddClampTranslated_SSE2; + DrawColumnSubClampTranslated = DrawColumnSubClampTranslated_SSE2; + DrawColumnRevSubClampTranslated = DrawColumnRevSubClampTranslated_SSE2; + FillColumn = FillColumn_SSE2; + FillColumnAdd = FillColumnAdd_SSE2; + FillColumnAddClamp = FillColumnAddClamp_SSE2; + FillColumnSubClamp = FillColumnSubClamp_SSE2; + FillColumnRevSubClamp = FillColumnRevSubClamp_SSE2; + DrawColumnRt1 = DrawColumnRt1_SSE2; + DrawColumnRt1Copy = DrawColumnRt1Copy_SSE2; + DrawColumnRt1Add = DrawColumnRt1Add_SSE2; + DrawColumnRt1Shaded = DrawColumnRt1Shaded_SSE2; + DrawColumnRt1AddClamp = DrawColumnRt1AddClamp_SSE2; + DrawColumnRt1SubClamp = DrawColumnRt1SubClamp_SSE2; + DrawColumnRt1RevSubClamp = DrawColumnRt1RevSubClamp_SSE2; + DrawColumnRt1Translated = DrawColumnRt1Translated_SSE2; + DrawColumnRt1TlatedAdd = DrawColumnRt1TlatedAdd_SSE2; + DrawColumnRt1AddClampTranslated = DrawColumnRt1AddClampTranslated_SSE2; + DrawColumnRt1SubClampTranslated = DrawColumnRt1SubClampTranslated_SSE2; + DrawColumnRt1RevSubClampTranslated = DrawColumnRt1RevSubClampTranslated_SSE2; + DrawColumnRt4 = DrawColumnRt4_SSE2; + DrawColumnRt4Copy = DrawColumnRt4Copy_SSE2; + DrawColumnRt4Add = DrawColumnRt4Add_SSE2; + DrawColumnRt4Shaded = DrawColumnRt4Shaded_SSE2; + DrawColumnRt4AddClamp = DrawColumnRt4AddClamp_SSE2; + DrawColumnRt4SubClamp = DrawColumnRt4SubClamp_SSE2; + DrawColumnRt4RevSubClamp = DrawColumnRt4RevSubClamp_SSE2; + DrawColumnRt4Translated = DrawColumnRt4Translated_SSE2; + DrawColumnRt4TlatedAdd = DrawColumnRt4TlatedAdd_SSE2; + DrawColumnRt4AddClampTranslated = DrawColumnRt4AddClampTranslated_SSE2; + DrawColumnRt4SubClampTranslated = DrawColumnRt4SubClampTranslated_SSE2; + DrawColumnRt4RevSubClampTranslated = DrawColumnRt4RevSubClampTranslated_SSE2; + DrawSpan = DrawSpan_SSE2; + DrawSpanMasked = DrawSpanMasked_SSE2; + DrawSpanTranslucent = DrawSpanTranslucent_SSE2; + DrawSpanMaskedTranslucent = DrawSpanMaskedTranslucent_SSE2; + DrawSpanAddClamp = DrawSpanAddClamp_SSE2; + DrawSpanMaskedAddClamp = DrawSpanMaskedAddClamp_SSE2; + vlinec1 = vlinec1_SSE2; + vlinec4 = vlinec4_SSE2; + mvlinec1 = mvlinec1_SSE2; + mvlinec4 = mvlinec4_SSE2; + tmvline1_add = tmvline1_add_SSE2; + tmvline4_add = tmvline4_add_SSE2; + tmvline1_addclamp = tmvline1_addclamp_SSE2; + tmvline4_addclamp = tmvline4_addclamp_SSE2; + tmvline1_subclamp = tmvline1_subclamp_SSE2; + tmvline4_subclamp = tmvline4_subclamp_SSE2; + tmvline1_revsubclamp = tmvline1_revsubclamp_SSE2; + tmvline4_revsubclamp = tmvline4_revsubclamp_SSE2; + DrawSky1 = DrawSky1_SSE2; + DrawSky4 = DrawSky4_SSE2; + DrawDoubleSky1 = DrawDoubleSky1_SSE2; + DrawDoubleSky4 = DrawDoubleSky4_SSE2; + TriDrawNormal8.push_back(TriDrawNormal8_0_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_1_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_2_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_3_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_4_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_5_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_6_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_7_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_8_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_9_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_10_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_11_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_12_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_0_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_1_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_2_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_3_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_4_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_5_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_6_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_7_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_8_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_9_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_10_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_11_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_12_SSE2); + TriFillNormal8.push_back(TriFillNormal8_0_SSE2); + TriFillNormal8.push_back(TriFillNormal8_1_SSE2); + TriFillNormal8.push_back(TriFillNormal8_2_SSE2); + TriFillNormal8.push_back(TriFillNormal8_3_SSE2); + TriFillNormal8.push_back(TriFillNormal8_4_SSE2); + TriFillNormal8.push_back(TriFillNormal8_5_SSE2); + TriFillNormal8.push_back(TriFillNormal8_6_SSE2); + TriFillNormal8.push_back(TriFillNormal8_7_SSE2); + TriFillNormal8.push_back(TriFillNormal8_8_SSE2); + TriFillNormal8.push_back(TriFillNormal8_9_SSE2); + TriFillNormal8.push_back(TriFillNormal8_10_SSE2); + TriFillNormal8.push_back(TriFillNormal8_11_SSE2); + TriFillNormal8.push_back(TriFillNormal8_12_SSE2); + TriFillNormal32.push_back(TriFillNormal32_0_SSE2); + TriFillNormal32.push_back(TriFillNormal32_1_SSE2); + TriFillNormal32.push_back(TriFillNormal32_2_SSE2); + TriFillNormal32.push_back(TriFillNormal32_3_SSE2); + TriFillNormal32.push_back(TriFillNormal32_4_SSE2); + TriFillNormal32.push_back(TriFillNormal32_5_SSE2); + TriFillNormal32.push_back(TriFillNormal32_6_SSE2); + TriFillNormal32.push_back(TriFillNormal32_7_SSE2); + TriFillNormal32.push_back(TriFillNormal32_8_SSE2); + TriFillNormal32.push_back(TriFillNormal32_9_SSE2); + TriFillNormal32.push_back(TriFillNormal32_10_SSE2); + TriFillNormal32.push_back(TriFillNormal32_11_SSE2); + TriFillNormal32.push_back(TriFillNormal32_12_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_0_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_1_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_2_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_3_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_4_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_5_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_6_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_7_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_8_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_9_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_10_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_11_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_12_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_0_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_1_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_2_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_3_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_4_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_5_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_6_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_7_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_8_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_9_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_10_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_11_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_12_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_0_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_1_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_2_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_3_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_4_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_5_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_6_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_7_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_8_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_9_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_10_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_11_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_12_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_0_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_1_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_2_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_3_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_4_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_5_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_6_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_7_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_8_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_9_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_10_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_11_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_12_SSE2); + TriStencil = TriStencil_SSE2; + TriStencilClose = TriStencilClose_SSE2; +} + Drawers *Drawers::Instance() { static Drawers drawers; - static bool firstcall = true; - - if (!firstcall) - return &drawers; - - drawers.DrawColumn = DrawColumn_SSE2; - drawers.DrawColumnAdd = DrawColumnAdd_SSE2; - drawers.DrawColumnShaded = DrawColumnShaded_SSE2; - drawers.DrawColumnAddClamp = DrawColumnAddClamp_SSE2; - drawers.DrawColumnSubClamp = DrawColumnSubClamp_SSE2; - drawers.DrawColumnRevSubClamp = DrawColumnRevSubClamp_SSE2; - drawers.DrawColumnTranslated = DrawColumnTranslated_SSE2; - drawers.DrawColumnTlatedAdd = DrawColumnTlatedAdd_SSE2; - drawers.DrawColumnAddClampTranslated = DrawColumnAddClampTranslated_SSE2; - drawers.DrawColumnSubClampTranslated = DrawColumnSubClampTranslated_SSE2; - drawers.DrawColumnRevSubClampTranslated = DrawColumnRevSubClampTranslated_SSE2; - drawers.FillColumn = FillColumn_SSE2; - drawers.FillColumnAdd = FillColumnAdd_SSE2; - drawers.FillColumnAddClamp = FillColumnAddClamp_SSE2; - drawers.FillColumnSubClamp = FillColumnSubClamp_SSE2; - drawers.FillColumnRevSubClamp = FillColumnRevSubClamp_SSE2; - drawers.DrawColumnRt1 = DrawColumnRt1_SSE2; - drawers.DrawColumnRt1Copy = DrawColumnRt1Copy_SSE2; - drawers.DrawColumnRt1Add = DrawColumnRt1Add_SSE2; - drawers.DrawColumnRt1Shaded = DrawColumnRt1Shaded_SSE2; - drawers.DrawColumnRt1AddClamp = DrawColumnRt1AddClamp_SSE2; - drawers.DrawColumnRt1SubClamp = DrawColumnRt1SubClamp_SSE2; - drawers.DrawColumnRt1RevSubClamp = DrawColumnRt1RevSubClamp_SSE2; - drawers.DrawColumnRt1Translated = DrawColumnRt1Translated_SSE2; - drawers.DrawColumnRt1TlatedAdd = DrawColumnRt1TlatedAdd_SSE2; - drawers.DrawColumnRt1AddClampTranslated = DrawColumnRt1AddClampTranslated_SSE2; - drawers.DrawColumnRt1SubClampTranslated = DrawColumnRt1SubClampTranslated_SSE2; - drawers.DrawColumnRt1RevSubClampTranslated = DrawColumnRt1RevSubClampTranslated_SSE2; - drawers.DrawColumnRt4 = DrawColumnRt4_SSE2; - drawers.DrawColumnRt4Copy = DrawColumnRt4Copy_SSE2; - drawers.DrawColumnRt4Add = DrawColumnRt4Add_SSE2; - drawers.DrawColumnRt4Shaded = DrawColumnRt4Shaded_SSE2; - drawers.DrawColumnRt4AddClamp = DrawColumnRt4AddClamp_SSE2; - drawers.DrawColumnRt4SubClamp = DrawColumnRt4SubClamp_SSE2; - drawers.DrawColumnRt4RevSubClamp = DrawColumnRt4RevSubClamp_SSE2; - drawers.DrawColumnRt4Translated = DrawColumnRt4Translated_SSE2; - drawers.DrawColumnRt4TlatedAdd = DrawColumnRt4TlatedAdd_SSE2; - drawers.DrawColumnRt4AddClampTranslated = DrawColumnRt4AddClampTranslated_SSE2; - drawers.DrawColumnRt4SubClampTranslated = DrawColumnRt4SubClampTranslated_SSE2; - drawers.DrawColumnRt4RevSubClampTranslated = DrawColumnRt4RevSubClampTranslated_SSE2; - drawers.DrawSpan = DrawSpan_SSE2; - drawers.DrawSpanMasked = DrawSpanMasked_SSE2; - drawers.DrawSpanTranslucent = DrawSpanTranslucent_SSE2; - drawers.DrawSpanMaskedTranslucent = DrawSpanMaskedTranslucent_SSE2; - drawers.DrawSpanAddClamp = DrawSpanAddClamp_SSE2; - drawers.DrawSpanMaskedAddClamp = DrawSpanMaskedAddClamp_SSE2; - drawers.vlinec1 = vlinec1_SSE2; - drawers.vlinec4 = vlinec4_SSE2; - drawers.mvlinec1 = mvlinec1_SSE2; - drawers.mvlinec4 = mvlinec4_SSE2; - drawers.tmvline1_add = tmvline1_add_SSE2; - drawers.tmvline4_add = tmvline4_add_SSE2; - drawers.tmvline1_addclamp = tmvline1_addclamp_SSE2; - drawers.tmvline4_addclamp = tmvline4_addclamp_SSE2; - drawers.tmvline1_subclamp = tmvline1_subclamp_SSE2; - drawers.tmvline4_subclamp = tmvline4_subclamp_SSE2; - drawers.tmvline1_revsubclamp = tmvline1_revsubclamp_SSE2; - drawers.tmvline4_revsubclamp = tmvline4_revsubclamp_SSE2; - drawers.DrawSky1 = DrawSky1_SSE2; - drawers.DrawSky4 = DrawSky4_SSE2; - drawers.DrawDoubleSky1 = DrawDoubleSky1_SSE2; - drawers.DrawDoubleSky4 = DrawDoubleSky4_SSE2; - drawers.TriDrawNormal8.push_back(TriDrawNormal8_0_SSE2); - drawers.TriDrawNormal8.push_back(TriDrawNormal8_1_SSE2); - drawers.TriDrawNormal8.push_back(TriDrawNormal8_2_SSE2); - drawers.TriDrawNormal8.push_back(TriDrawNormal8_3_SSE2); - drawers.TriDrawNormal8.push_back(TriDrawNormal8_4_SSE2); - drawers.TriDrawNormal8.push_back(TriDrawNormal8_5_SSE2); - drawers.TriDrawNormal8.push_back(TriDrawNormal8_6_SSE2); - drawers.TriDrawNormal8.push_back(TriDrawNormal8_7_SSE2); - drawers.TriDrawNormal8.push_back(TriDrawNormal8_8_SSE2); - drawers.TriDrawNormal8.push_back(TriDrawNormal8_9_SSE2); - drawers.TriDrawNormal8.push_back(TriDrawNormal8_10_SSE2); - drawers.TriDrawNormal8.push_back(TriDrawNormal8_11_SSE2); - drawers.TriDrawNormal8.push_back(TriDrawNormal8_12_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_0_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_1_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_2_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_3_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_4_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_5_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_6_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_7_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_8_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_9_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_10_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_11_SSE2); - drawers.TriDrawNormal32.push_back(TriDrawNormal32_12_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_0_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_1_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_2_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_3_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_4_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_5_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_6_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_7_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_8_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_9_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_10_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_11_SSE2); - drawers.TriFillNormal8.push_back(TriFillNormal8_12_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_0_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_1_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_2_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_3_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_4_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_5_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_6_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_7_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_8_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_9_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_10_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_11_SSE2); - drawers.TriFillNormal32.push_back(TriFillNormal32_12_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_0_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_1_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_2_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_3_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_4_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_5_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_6_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_7_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_8_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_9_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_10_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_11_SSE2); - drawers.TriDrawSubsector8.push_back(TriDrawSubsector8_12_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_0_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_1_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_2_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_3_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_4_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_5_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_6_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_7_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_8_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_9_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_10_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_11_SSE2); - drawers.TriDrawSubsector32.push_back(TriDrawSubsector32_12_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_0_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_1_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_2_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_3_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_4_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_5_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_6_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_7_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_8_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_9_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_10_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_11_SSE2); - drawers.TriFillSubsector8.push_back(TriFillSubsector8_12_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_0_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_1_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_2_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_3_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_4_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_5_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_6_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_7_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_8_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_9_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_10_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_11_SSE2); - drawers.TriFillSubsector32.push_back(TriFillSubsector32_12_SSE2); - drawers.TriStencil = TriStencil_SSE2; - drawers.TriStencilClose = TriStencilClose_SSE2; - - firstcall = false; return &drawers; } diff --git a/src/r_drawers.h b/src/r_drawers.h index e986181abe..47a145b5a5 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -340,4 +340,7 @@ public: std::vector TriFillSubsector32; void(*TriStencil)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; void(*TriStencilClose)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + +private: + Drawers(); }; From 34bf5819574b082ce1f89f60e941c7f955ecb946 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 30 Nov 2016 08:51:39 +0100 Subject: [PATCH 409/912] Fix new blend mode --- src/r_drawers.cpp | 10 ++++++++++ tools/drawergen/fixedfunction/drawercodegen.cpp | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/r_drawers.cpp b/src/r_drawers.cpp index ab45ad939c..66cbdd7501 100644 --- a/src/r_drawers.cpp +++ b/src/r_drawers.cpp @@ -106,6 +106,7 @@ extern "C" void TriDrawNormal8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -133,6 +134,7 @@ extern "C" void TriFillNormal8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -286,6 +288,7 @@ Drawers::Drawers() TriDrawNormal8.push_back(TriDrawNormal8_10_SSE2); TriDrawNormal8.push_back(TriDrawNormal8_11_SSE2); TriDrawNormal8.push_back(TriDrawNormal8_12_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_13_SSE2); TriDrawNormal32.push_back(TriDrawNormal32_0_SSE2); TriDrawNormal32.push_back(TriDrawNormal32_1_SSE2); TriDrawNormal32.push_back(TriDrawNormal32_2_SSE2); @@ -299,6 +302,7 @@ Drawers::Drawers() TriDrawNormal32.push_back(TriDrawNormal32_10_SSE2); TriDrawNormal32.push_back(TriDrawNormal32_11_SSE2); TriDrawNormal32.push_back(TriDrawNormal32_12_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_13_SSE2); TriFillNormal8.push_back(TriFillNormal8_0_SSE2); TriFillNormal8.push_back(TriFillNormal8_1_SSE2); TriFillNormal8.push_back(TriFillNormal8_2_SSE2); @@ -312,6 +316,7 @@ Drawers::Drawers() TriFillNormal8.push_back(TriFillNormal8_10_SSE2); TriFillNormal8.push_back(TriFillNormal8_11_SSE2); TriFillNormal8.push_back(TriFillNormal8_12_SSE2); + TriFillNormal8.push_back(TriFillNormal8_13_SSE2); TriFillNormal32.push_back(TriFillNormal32_0_SSE2); TriFillNormal32.push_back(TriFillNormal32_1_SSE2); TriFillNormal32.push_back(TriFillNormal32_2_SSE2); @@ -325,6 +330,7 @@ Drawers::Drawers() TriFillNormal32.push_back(TriFillNormal32_10_SSE2); TriFillNormal32.push_back(TriFillNormal32_11_SSE2); TriFillNormal32.push_back(TriFillNormal32_12_SSE2); + TriFillNormal32.push_back(TriFillNormal32_13_SSE2); TriDrawSubsector8.push_back(TriDrawSubsector8_0_SSE2); TriDrawSubsector8.push_back(TriDrawSubsector8_1_SSE2); TriDrawSubsector8.push_back(TriDrawSubsector8_2_SSE2); @@ -338,6 +344,7 @@ Drawers::Drawers() TriDrawSubsector8.push_back(TriDrawSubsector8_10_SSE2); TriDrawSubsector8.push_back(TriDrawSubsector8_11_SSE2); TriDrawSubsector8.push_back(TriDrawSubsector8_12_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_13_SSE2); TriDrawSubsector32.push_back(TriDrawSubsector32_0_SSE2); TriDrawSubsector32.push_back(TriDrawSubsector32_1_SSE2); TriDrawSubsector32.push_back(TriDrawSubsector32_2_SSE2); @@ -351,6 +358,7 @@ Drawers::Drawers() TriDrawSubsector32.push_back(TriDrawSubsector32_10_SSE2); TriDrawSubsector32.push_back(TriDrawSubsector32_11_SSE2); TriDrawSubsector32.push_back(TriDrawSubsector32_12_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_13_SSE2); TriFillSubsector8.push_back(TriFillSubsector8_0_SSE2); TriFillSubsector8.push_back(TriFillSubsector8_1_SSE2); TriFillSubsector8.push_back(TriFillSubsector8_2_SSE2); @@ -364,6 +372,7 @@ Drawers::Drawers() TriFillSubsector8.push_back(TriFillSubsector8_10_SSE2); TriFillSubsector8.push_back(TriFillSubsector8_11_SSE2); TriFillSubsector8.push_back(TriFillSubsector8_12_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_13_SSE2); TriFillSubsector32.push_back(TriFillSubsector32_0_SSE2); TriFillSubsector32.push_back(TriFillSubsector32_1_SSE2); TriFillSubsector32.push_back(TriFillSubsector32_2_SSE2); @@ -377,6 +386,7 @@ Drawers::Drawers() TriFillSubsector32.push_back(TriFillSubsector32_10_SSE2); TriFillSubsector32.push_back(TriFillSubsector32_11_SSE2); TriFillSubsector32.push_back(TriFillSubsector32_12_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_13_SSE2); TriStencil = TriStencil_SSE2; TriStencilClose = TriStencilClose_SSE2; } diff --git a/tools/drawergen/fixedfunction/drawercodegen.cpp b/tools/drawergen/fixedfunction/drawercodegen.cpp index a4ad1a9410..ccff40ac04 100644 --- a/tools/drawergen/fixedfunction/drawercodegen.cpp +++ b/tools/drawergen/fixedfunction/drawercodegen.cpp @@ -165,5 +165,5 @@ SSAVec4i DrawerCodegen::blend_add_srccolor_oneminussrccolor(SSAVec4i fg, SSAVec4 { SSAVec4i fgcolor = fg + (fg >> 7); // 255 -> 256 SSAVec4i inv_fgcolor = SSAVec4i(256) - fgcolor; - return fg + (bg * inv_fgcolor + 128) >> 8; + return fg + ((bg * inv_fgcolor + 128) >> 8); } From 837d8f44d1a8c95ac87b5042c4d2ac5c793235ae Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 30 Nov 2016 09:08:49 +0100 Subject: [PATCH 410/912] The precision of the SSE2 rcp opcode was a little bit too low --- tools/drawergen/fixedfunction/drawtrianglecodegen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index b53a5f7913..2671ac0cf7 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -204,7 +204,7 @@ void DrawTriangleCodegen::LoopBlockX() stack_x.store(minx); stack_posx_w.store(posy_w); for (int i = 0; i < TriVertex::NumVarying; i++) - stack_posx_varying[i].store(stack_posy_varying[i].load()); + stack_posx_varying[i].store(posy_varying[i]); SSAForBlock loop; x = stack_x.load(); @@ -277,7 +277,7 @@ void DrawTriangleCodegen::LoopBlockX() void DrawTriangleCodegen::SetupAffineBlock() { // Calculate varying variables for affine block - SSAVec4f rcpW = SSAVec4f::rcp(SSAVec4f(posx_w, posx_w + gradWX, posx_w + gradWY, posx_w + gradWX + gradWY)); + SSAVec4f rcpW = SSAVec4f(1.0f) / SSAVec4f(posx_w, posx_w + gradWX, posx_w + gradWY, posx_w + gradWX + gradWY); for (int i = 0; i < TriVertex::NumVarying; i++) { // Top left, top right, bottom left, bottom right: From 3872e39afc35339d3a6d99e076aeef07b8a5bc04 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 30 Nov 2016 12:56:37 +0100 Subject: [PATCH 411/912] Simplify affine texturing a bit --- .../fixedfunction/drawtrianglecodegen.cpp | 133 +++++++----------- .../fixedfunction/drawtrianglecodegen.h | 15 +- 2 files changed, 60 insertions(+), 88 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 2671ac0cf7..4184dc5362 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -143,12 +143,8 @@ void DrawTriangleCodegen::Setup() } gradWX = gradWX * (float)q; - gradWY = gradWY * (float)q; for (int i = 0; i < TriVertex::NumVarying; i++) - { gradVaryingX[i] = gradVaryingX[i] * (float)q; - gradVaryingY[i] = gradVaryingY[i] * (float)q; - } } SSAFloat DrawTriangleCodegen::gradx(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2) @@ -173,9 +169,9 @@ void DrawTriangleCodegen::LoopBlockY() stack_y.store(miny + blocks_skipped * q); stack_dest.store(dest[blocks_skipped * q * pitch * pixelsize]); stack_subsectorGBuffer.store(subsectorGBuffer[blocks_skipped * q * pitch]); - stack_posy_w.store(stack_posy_w.load() + gradWY * blocks_skipped); + stack_posy_w.store(stack_posy_w.load() + gradWY * (q * blocks_skipped)); for (int i = 0; i < TriVertex::NumVarying; i++) - stack_posy_varying[i].store(stack_posy_varying[i].load() + gradVaryingY[i] * blocks_skipped); + stack_posy_varying[i].store(stack_posy_varying[i].load() + gradVaryingY[i] * (blocks_skipped * q)); SSAForBlock loop; y = stack_y.load(); @@ -188,9 +184,9 @@ void DrawTriangleCodegen::LoopBlockY() { LoopBlockX(); - stack_posy_w.store(posy_w + gradWY * thread.num_cores); + stack_posy_w.store(posy_w + gradWY * (q * thread.num_cores)); for (int i = 0; i < TriVertex::NumVarying; i++) - stack_posy_varying[i].store(posy_varying[i] + gradVaryingY[i] * thread.num_cores); + stack_posy_varying[i].store(posy_varying[i] + gradVaryingY[i] * (q * thread.num_cores)); stack_dest.store(dest[q * pitch * pixelsize * thread.num_cores]); stack_subsectorGBuffer.store(subsectorGBuffer[q * pitch * thread.num_cores]); @@ -246,7 +242,24 @@ void DrawTriangleCodegen::LoopBlockX() // Check if block needs clipping SSABool clipneeded = x < clipleft || (x + q) > clipright || y < cliptop || (y + q) > clipbottom; - SetupAffineBlock(); + SSAFloat globVis = SSAFloat(1706.0f); + SSAFloat vis = globVis * posx_w; + SSAFloat shade = 64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f; + SSAFloat lightscale = SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)); + SSAInt diminishedlight = SSAInt(SSAFloat::clamp((1.0f - lightscale) * 256.0f + 0.5f, SSAFloat(0.0f), SSAFloat(256.0f)), false); + + if (!truecolor) + { + SSAInt diminishedindex = SSAInt(lightscale * 32.0f, false); + SSAInt lightindex = SSAInt::MIN((256 - light) * 32 / 256, SSAInt(31)); + SSAInt colormapindex = is_fixed_light.select(lightindex, diminishedindex); + currentcolormap = Colormaps[colormapindex << 8]; + } + else + { + currentlight = is_fixed_light.select(light, diminishedlight); + } + SetStencilBlock(x / 8 + y / 8 * stencilPitch); SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded && StencilIsSingleValue(); @@ -276,41 +289,15 @@ void DrawTriangleCodegen::LoopBlockX() void DrawTriangleCodegen::SetupAffineBlock() { - // Calculate varying variables for affine block - SSAVec4f rcpW = SSAVec4f(1.0f) / SSAVec4f(posx_w, posx_w + gradWX, posx_w + gradWY, posx_w + gradWX + gradWY); + SSAFloat rcpW0 = (float)0x01000000 / AffineW; + SSAFloat rcpW1 = (float)0x01000000 / (AffineW + gradWX); + for (int i = 0; i < TriVertex::NumVarying; i++) { - // Top left, top right, bottom left, bottom right: - SSAVec4f varying = SSAVec4f(posx_varying[i], posx_varying[i] + gradVaryingX[i], posx_varying[i] + gradVaryingY[i], posx_varying[i] + gradVaryingX[i] + gradVaryingY[i]) * rcpW; - - SSAFloat startStepX = (varying[1] - varying[0]) * (1.0f / q); - SSAFloat endStepX = (varying[3] - varying[2]) * (1.0f / q); - SSAFloat incrStepX = (endStepX - startStepX) * (1.0f / q); - SSAFloat stepY = (varying[2] - varying[0]) * (1.0f / q); - - SSAVec4i ints = SSAVec4i(SSAVec4f(varying[0], stepY, startStepX, incrStepX) * (float)0x01000000) << 8; - varyingPos[i] = ints[0]; - varyingStepY[i] = ints[1]; - varyingStartStepX[i] = ints[2]; - varyingIncrStepX[i] = ints[3]; - } - - SSAFloat globVis = SSAFloat(1706.0f); - SSAFloat vis = globVis / rcpW[0]; - SSAFloat shade = 64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f; - SSAFloat lightscale = SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)); - SSAInt diminishedlight = SSAInt(SSAFloat::clamp((1.0f - lightscale) * 256.0f + 0.5f, SSAFloat(0.0f), SSAFloat(256.0f)), false); - - if (!truecolor) - { - SSAInt diminishedindex = SSAInt(lightscale * 32.0f, false); - SSAInt lightindex = SSAInt::MIN((256 - light) * 32 / 256, SSAInt(31)); - SSAInt colormapindex = is_fixed_light.select(lightindex, diminishedindex); - currentcolormap = Colormaps[colormapindex << 8]; - } - else - { - currentlight = is_fixed_light.select(light, diminishedlight); + AffineVaryingPosX[i] = SSAInt(AffineVaryingPosY[i] * rcpW0, false); + AffineVaryingStepX[i] = (SSAInt((AffineVaryingPosY[i] + gradVaryingX[i]) * rcpW1, false) - AffineVaryingPosX[i]) / q; + AffineVaryingPosX[i] = AffineVaryingPosX[i] << 8; + AffineVaryingStepX[i] = AffineVaryingStepX[i] << 8; } } @@ -346,22 +333,16 @@ void DrawTriangleCodegen::LoopFullBlock() { int pixelsize = truecolor ? 4 : 1; - SSAInt varyingLine[TriVertex::NumVarying]; - SSAInt varyingStepX[TriVertex::NumVarying]; + AffineW = posx_w; for (int i = 0; i < TriVertex::NumVarying; i++) - { - varyingLine[i] = varyingPos[i]; - varyingStepX[i] = varyingStartStepX[i]; - } + AffineVaryingPosY[i] = posx_varying[i]; for (int iy = 0; iy < q; iy++) { SSAUBytePtr buffer = dest[(x + iy * pitch) * pixelsize]; SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch]; - SSAInt varying[TriVertex::NumVarying]; - for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] = varyingLine[i]; + SetupAffineBlock(); for (int ix = 0; ix < q; ix += 4) { @@ -384,15 +365,15 @@ void DrawTriangleCodegen::LoopFullBlock() if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) { SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth; - pixels[sse] = subsectorTest.select(ProcessPixel32(pixels[sse], varying), pixels[sse]); + pixels[sse] = subsectorTest.select(ProcessPixel32(pixels[sse], AffineVaryingPosX), pixels[sse]); } else { - pixels[sse] = ProcessPixel32(pixels[sse], varying); + pixels[sse] = ProcessPixel32(pixels[sse], AffineVaryingPosX); } for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] = varying[i] + varyingStepX[i]; + AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i]; } buf.store_unaligned_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3]))); @@ -413,15 +394,15 @@ void DrawTriangleCodegen::LoopFullBlock() if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) { SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth; - pixels[sse] = subsectorTest.select(ProcessPixel8(pixels[sse], varying), pixels[sse]); + pixels[sse] = subsectorTest.select(ProcessPixel8(pixels[sse], AffineVaryingPosX), pixels[sse]); } else { - pixels[sse] = ProcessPixel8(pixels[sse], varying); + pixels[sse] = ProcessPixel8(pixels[sse], AffineVaryingPosX); } for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] = varying[i] + varyingStepX[i]; + AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i]; } buf.store_vec4ub(SSAVec4i(pixels[0], pixels[1], pixels[2], pixels[3])); @@ -431,11 +412,9 @@ void DrawTriangleCodegen::LoopFullBlock() subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth)); } + AffineW = AffineW + gradWY; for (int i = 0; i < TriVertex::NumVarying; i++) - { - varyingLine[i] = varyingLine[i] + varyingStepY[i]; - varyingStepX[i] = varyingStepX[i] + varyingIncrStepX[i]; - } + AffineVaryingPosY[i] = AffineVaryingPosY[i] + gradVaryingY[i]; } } @@ -452,10 +431,10 @@ void DrawTriangleCodegen::LoopPartialBlock() stack_iy.store(SSAInt(0)); stack_buffer.store(dest[x * pixelsize]); stack_subsectorbuffer.store(subsectorGBuffer[x]); + stack_AffineW.store(posx_w); for (int i = 0; i < TriVertex::NumVarying; i++) { - stack_varyingLine[i].store(varyingPos[i]); - stack_varyingStep[i].store(varyingStartStepX[i]); + stack_AffineVaryingPosY[i].store(posx_varying[i]); } SSAForBlock loopy; @@ -465,17 +444,15 @@ void DrawTriangleCodegen::LoopPartialBlock() SSAInt CY1 = stack_CY1.load(); SSAInt CY2 = stack_CY2.load(); SSAInt CY3 = stack_CY3.load(); - SSAInt varyingLine[TriVertex::NumVarying]; - SSAInt varyingStep[TriVertex::NumVarying]; + AffineW = stack_AffineW.load(); for (int i = 0; i < TriVertex::NumVarying; i++) - { - varyingLine[i] = stack_varyingLine[i].load(); - varyingStep[i] = stack_varyingStep[i].load(); - } + AffineVaryingPosY[i] = stack_AffineVaryingPosY[i].load(); loopy.loop_block(iy < SSAInt(q), q); { + SetupAffineBlock(); + for (int i = 0; i < TriVertex::NumVarying; i++) - stack_varying[i].store(varyingLine[i]); + stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i]); stack_CX1.store(CY1); stack_CX2.store(CY2); @@ -487,9 +464,8 @@ void DrawTriangleCodegen::LoopPartialBlock() SSAInt CX1 = stack_CX1.load(); SSAInt CX2 = stack_CX2.load(); SSAInt CX3 = stack_CX3.load(); - SSAInt varying[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) - varying[i] = stack_varying[i].load(); + AffineVaryingPosX[i] = stack_AffineVaryingPosX[i].load(); loopx.loop_block(ix < SSAInt(q), q); { SSABool visible = (ix + x >= clipleft) && (ix + x < clipright) && (iy + y >= cliptop) && (iy + y < clipbottom); @@ -527,12 +503,12 @@ void DrawTriangleCodegen::LoopPartialBlock() if (truecolor) { SSAVec4i bg = buf.load_vec4ub(false); - buf.store_vec4ub(ProcessPixel32(bg, varying)); + buf.store_vec4ub(ProcessPixel32(bg, AffineVaryingPosX)); } else { SSAUByte bg = buf.load(false); - buf.store(ProcessPixel8(bg.zext_int(), varying).trunc_ubyte()); + buf.store(ProcessPixel8(bg.zext_int(), AffineVaryingPosX).trunc_ubyte()); } if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) @@ -542,7 +518,7 @@ void DrawTriangleCodegen::LoopPartialBlock() branch.end_block(); for (int i = 0; i < TriVertex::NumVarying; i++) - stack_varying[i].store(varying[i] + varyingStep[i]); + stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i] + AffineVaryingStepX[i]); stack_CX1.store(CX1 - FDY12); stack_CX2.store(CX2 - FDY23); @@ -551,12 +527,9 @@ void DrawTriangleCodegen::LoopPartialBlock() } loopx.end_block(); + stack_AffineW.store(AffineW + gradWY); for (int i = 0; i < TriVertex::NumVarying; i++) - { - stack_varyingLine[i].store(varyingLine[i] + varyingStepY[i]); - stack_varyingStep[i].store(varyingStep[i] + varyingIncrStepX[i]); - } - + stack_AffineVaryingPosY[i].store(AffineVaryingPosY[i] + gradVaryingY[i]); stack_CY1.store(CY1 + FDX12); stack_CY2.store(CY2 + FDX23); stack_CY3.store(CY3 + FDX31); diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index 3866e3566b..bc894f1a21 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -80,14 +80,13 @@ private: SSAStack stack_buffer; SSAStack stack_subsectorbuffer; SSAStack stack_iy, stack_ix; - SSAStack stack_varyingLine[TriVertex::NumVarying]; - SSAStack stack_varyingStep[TriVertex::NumVarying]; - SSAStack stack_varying[TriVertex::NumVarying]; SSAStack stack_CY1, stack_CY2, stack_CY3; SSAStack stack_CX1, stack_CX2, stack_CX3; - SSAStack stack_posy_w, stack_posy_varying[TriVertex::NumVarying]; SSAStack stack_posx_w, stack_posx_varying[TriVertex::NumVarying]; + SSAStack stack_AffineW; + SSAStack stack_AffineVaryingPosY[TriVertex::NumVarying]; + SSAStack stack_AffineVaryingPosX[TriVertex::NumVarying]; SSAUBytePtr dest; SSAInt pitch; @@ -142,10 +141,10 @@ private: SSAInt x0, x1, y0, y1; SSAInt currentlight; SSAUBytePtr currentcolormap; - SSAInt varyingPos[TriVertex::NumVarying]; - SSAInt varyingStepY[TriVertex::NumVarying]; - SSAInt varyingStartStepX[TriVertex::NumVarying]; - SSAInt varyingIncrStepX[TriVertex::NumVarying]; + SSAFloat AffineW; + SSAFloat AffineVaryingPosY[TriVertex::NumVarying]; + SSAInt AffineVaryingPosX[TriVertex::NumVarying]; + SSAInt AffineVaryingStepX[TriVertex::NumVarying]; SSAFloat posy_w, posy_varying[TriVertex::NumVarying]; SSAFloat posx_w, posx_varying[TriVertex::NumVarying]; From fdbf2ab5e93ab1531381a8deca2078a6442fd3fb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 30 Nov 2016 17:37:32 +0100 Subject: [PATCH 412/912] Fix compile errors for LLVM 3.8 --- tools/drawergen/drawergen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index db4d896ff0..c31b0229d2 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -468,7 +468,7 @@ std::vector LLVMProgram::GenerateObjectFile(const std::string &triple, const Target *target = TargetRegistry::lookupTarget(triple, errorstring); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) Reloc::Model relocationModel = Reloc::PIC_; #else Optional relocationModel = Reloc::PIC_; @@ -502,7 +502,7 @@ std::vector LLVMProgram::GenerateObjectFile(const std::string &triple, options.MCOptions.MCUseDwarfDirectory = false; options.MCOptions.AsmVerbose = true; -#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) +#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9) options.Reciprocals = TargetRecip({ "all" }); options.StackSymbolOrdering = true; options.UniqueSectionNames = true; From 1bed6feadce05b5f41624f5eb6925feae7a43d02 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 1 Dec 2016 00:42:14 +0100 Subject: [PATCH 413/912] Detach poly renderer from r_main --- src/r_main.cpp | 28 ++++------------------ src/r_poly.cpp | 57 ++++++++++++++++++++++++++++++++++++++++++-- src/r_poly.h | 6 ++++- src/r_swrenderer.cpp | 41 ++++++++++++++++++++++++++++--- 4 files changed, 102 insertions(+), 30 deletions(-) diff --git a/src/r_main.cpp b/src/r_main.cpp index e0773718fa..5c7ab39bcb 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -59,18 +59,9 @@ #include "v_font.h" #include "r_data/colormaps.h" #include "p_maputl.h" -#include "r_poly.h" #include "p_setup.h" #include "version.h" -CUSTOM_CVAR(Bool, r_polyrenderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) -{ - if (self == 1 && !hasglnodes) - { - Printf("No GL BSP detected. You must restart the map before rendering will be correct\n"); - } -} - // MACROS ------------------------------------------------------------------ #if 0 @@ -915,14 +906,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function PO_LinkToSubsectors(); InSubsector = NULL; - if (!r_polyrenderer) - { - R_RenderBSPNode(nodes + numnodes - 1); // The head node is the last node output. - } - else - { - RenderPolyScene::Instance()->Render(); - } + R_RenderBSPNode(nodes + numnodes - 1); // The head node is the last node output. R_3D_ResetClip(); // reset clips (floor/ceiling) camera->renderflags = savedflags; WallCycles.Unclock(); @@ -932,11 +916,8 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) if (viewactive) { PlaneCycles.Clock(); - if (!r_polyrenderer) - { - R_DrawPlanes(); - R_DrawPortals(); - } + R_DrawPlanes(); + R_DrawPortals(); PlaneCycles.Unclock(); // [RH] Walk through mirrors @@ -953,8 +934,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) NetUpdate (); MaskedCycles.Clock(); - if (!r_polyrenderer) - R_DrawMasked (); + R_DrawMasked (); MaskedCycles.Unclock(); NetUpdate (); diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 186077806c..3809c56391 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -25,11 +25,15 @@ #include "doomdef.h" #include "sbar.h" #include "r_data/r_translate.h" +#include "r_data/r_interpolate.h" #include "r_poly.h" #include "gl/data/gl_data.h" +#include "d_net.h" +#include "po_man.h" EXTERN_CVAR(Int, screenblocks) void InitGLRMapinfoData(); +extern bool r_showviewer; ///////////////////////////////////////////////////////////////////////////// @@ -39,8 +43,50 @@ RenderPolyScene *RenderPolyScene::Instance() return &scene; } -void RenderPolyScene::Render() +void RenderPolyScene::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) { + const bool savedviewactive = viewactive; + const bool savedoutputformat = r_swtruecolor; + + viewwidth = width; + RenderTarget = canvas; + bRenderingToCanvas = true; + R_SetWindow(12, width, height, height, true); + viewwindowx = x; + viewwindowy = y; + viewactive = true; + r_swtruecolor = canvas->IsBgra(); + + canvas->Lock(true); + + RenderActorView(actor, dontmaplines); + + canvas->Unlock(); + + RenderTarget = screen; + bRenderingToCanvas = false; + R_ExecuteSetViewSize(); + viewactive = savedviewactive; + r_swtruecolor = savedoutputformat; +} + +void RenderPolyScene::RenderActorView(AActor *actor, bool dontmaplines) +{ + NetUpdate(); + + r_dontmaplines = dontmaplines; + + P_FindParticleSubsectors(); + PO_LinkToSubsectors(); + R_SetupFrame(actor); + + ActorRenderFlags savedflags = camera->renderflags; + // Never draw the player unless in chasecam mode + if (!r_showviewer) + camera->renderflags |= RF_INVISIBLE; + + R_BeginDrawerCommands(); + ClearBuffers(); SetSceneViewport(); SetupPerspectiveMatrix(); @@ -50,7 +96,14 @@ void RenderPolyScene::Render() MainPortal.RenderTranslucent(0); PlayerSprites.Render(); - DrawerCommandQueue::WaitForWorkers(); + camera->renderflags = savedflags; + interpolator.RestoreInterpolations (); + + NetUpdate(); + + R_EndDrawerCommands(); + + NetUpdate(); } void RenderPolyScene::RenderRemainingPlayerSprites() diff --git a/src/r_poly.h b/src/r_poly.h index 3bd9bb319a..d67e395708 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -33,11 +33,15 @@ #include "r_poly_playersprite.h" #include "r_poly_sky.h" +class AActor; +class DCanvas; + // Renders a scene class RenderPolyScene { public: - void Render(); + void RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines); + void RenderActorView(AActor *actor, bool dontmaplines); void RenderRemainingPlayerSprites(); static RenderPolyScene *Instance(); diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 74a6da8232..0f30fc3dff 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -45,11 +45,19 @@ #include "r_draw_rgba.h" #include "r_drawers.h" #include "r_poly.h" +#include "p_setup.h" EXTERN_CVAR(Bool, r_shadercolormaps) -EXTERN_CVAR(Bool, r_polyrenderer) // [SP] dpJudas's new renderer EXTERN_CVAR(Float, maxviewpitch) // [SP] CVAR from GZDoom +CUSTOM_CVAR(Bool, r_polyrenderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +{ + if (self == 1 && !hasglnodes) + { + Printf("No GL BSP detected. You must restart the map before rendering will be correct\n"); + } +} + void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); void R_SetupColormap(player_t *); void R_SetupFreelook(); @@ -175,6 +183,27 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap &act void FSoftwareRenderer::RenderView(player_t *player) { + if (r_polyrenderer) + { + bool saved_swtruecolor = r_swtruecolor; + r_swtruecolor = screen->IsBgra(); + + RenderPolyScene::Instance()->RenderActorView(player->mo, false); + FCanvasTextureInfo::UpdateAll(); + + // Apply special colormap if the target cannot do it + if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + { + R_BeginDrawerCommands(); + DrawerCommandQueue::QueueCommand(realfixedcolormap, screen); + R_EndDrawerCommands(); + } + + r_swtruecolor = saved_swtruecolor; + + return; + } + if (r_swtruecolor != screen->IsBgra()) { r_swtruecolor = screen->IsBgra(); @@ -223,7 +252,10 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FileWriter *file, int wi // Take a snapshot of the player's view pic->ObjectFlags |= OF_Fixed; pic->Lock (); - R_RenderViewToCanvas (player->mo, pic, 0, 0, width, height); + if (r_polyrenderer) + RenderPolyScene::Instance()->RenderViewToCanvas(player->mo, pic, 0, 0, width, height, true); + else + R_RenderViewToCanvas (player->mo, pic, 0, 0, width, height); screen->GetFlashedPalette (palette); M_CreatePNG (file, pic->GetBuffer(), palette, SS_PAL, width, height, pic->GetPitch()); pic->Unlock (); @@ -373,7 +405,10 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin DAngle savedfov = FieldOfView; R_SetFOV ((double)fov); - R_RenderViewToCanvas (viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); + if (r_polyrenderer) + RenderPolyScene::Instance()->RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); + else + R_RenderViewToCanvas (viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); R_SetFOV (savedfov); if (Canvas->IsBgra()) From 9416d436fed692b5c5d49b9faf992b203fb356c2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 1 Dec 2016 02:38:32 +0100 Subject: [PATCH 414/912] Move software renderer into its own namespace to isolate its globals and make any access explicit. This reveals the places in the code where they are being accessed where they shouldn't and prevents accidental usage. --- src/gl/system/gl_swframebuffer.cpp | 7 ++-- src/r_3dfloors.cpp | 8 +++- src/r_3dfloors.h | 8 +++- src/r_bsp.cpp | 10 ++++- src/r_bsp.h | 8 +++- src/r_defs.h | 2 +- src/r_draw.cpp | 14 +++++-- src/r_draw.h | 19 ++++++---- src/r_draw_rgba.cpp | 15 +++++--- src/r_draw_rgba.h | 5 +++ src/r_drawt.cpp | 5 +++ src/r_drawt_rgba.cpp | 5 +++ src/r_main.cpp | 61 +++++++++++++++--------------- src/r_main.h | 12 ++++-- src/r_plane.cpp | 14 ++++--- src/r_plane.h | 5 +++ src/r_poly.cpp | 12 +++--- src/r_poly_decal.cpp | 4 +- src/r_poly_particle.cpp | 4 +- src/r_poly_plane.cpp | 10 ++--- src/r_poly_playersprite.cpp | 14 +++---- src/r_poly_portal.cpp | 10 ++--- src/r_poly_sky.cpp | 2 +- src/r_poly_sprite.cpp | 14 +++---- src/r_poly_triangle.cpp | 4 +- src/r_poly_triangle.h | 4 +- src/r_poly_wall.cpp | 4 +- src/r_poly_wallsprite.cpp | 2 +- src/r_segs.cpp | 29 ++++---------- src/r_segs.h | 5 +++ src/r_state.h | 2 +- src/r_swrenderer.cpp | 7 ++++ src/r_things.cpp | 25 +++++++----- src/r_things.h | 9 ++++- src/r_thread.h | 14 ++++--- src/v_draw.cpp | 6 ++- src/win32/fb_d3d9.cpp | 9 ++--- 37 files changed, 231 insertions(+), 157 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 4b10326a43..421fe92781 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -1377,15 +1377,16 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) uint32_t color0, color1; if (Accel2D) { - if (realfixedcolormap == nullptr) + auto &map = swrenderer::realfixedcolormap; + if (map == nullptr) { color0 = 0; color1 = 0xFFFFFFF; } else { - color0 = ColorValue(realfixedcolormap->ColorizeStart[0] / 2, realfixedcolormap->ColorizeStart[1] / 2, realfixedcolormap->ColorizeStart[2] / 2, 0); - color1 = ColorValue(realfixedcolormap->ColorizeEnd[0] / 2, realfixedcolormap->ColorizeEnd[1] / 2, realfixedcolormap->ColorizeEnd[2] / 2, 1); + color0 = ColorValue(map->ColorizeStart[0] / 2, map->ColorizeStart[1] / 2, map->ColorizeStart[2] / 2, 0); + color1 = ColorValue(map->ColorizeEnd[0] / 2, map->ColorizeEnd[1] / 2, map->ColorizeEnd[2] / 2, 1); if (IsBgra()) SetPixelShader(Shaders[SHADER_SpecialColormap]); else diff --git a/src/r_3dfloors.cpp b/src/r_3dfloors.cpp index 61a23187d4..4289b78433 100644 --- a/src/r_3dfloors.cpp +++ b/src/r_3dfloors.cpp @@ -15,6 +15,11 @@ #include "c_cvars.h" #include "r_3dfloors.h" +CVAR(Int, r_3dfloors, true, 0); + +namespace swrenderer +{ + // external variables int fake3D; F3DFloor *fakeFloor; @@ -28,8 +33,6 @@ HeightLevel *height_cur = NULL; int CurrentMirror = 0; int CurrentSkybox = 0; -CVAR(Int, r_3dfloors, true, 0); - // private variables int height_max = -1; TArray toplist; @@ -160,3 +163,4 @@ void R_3D_LeaveSkybox() CurrentSkybox--; } +} diff --git a/src/r_3dfloors.h b/src/r_3dfloors.h index cacb974443..a703ae19a4 100644 --- a/src/r_3dfloors.h +++ b/src/r_3dfloors.h @@ -3,6 +3,11 @@ #include "p_3dfloors.h" +EXTERN_CVAR(Int, r_3dfloors); + +namespace swrenderer +{ + // special types struct HeightLevel @@ -57,7 +62,6 @@ extern HeightLevel *height_top; extern HeightLevel *height_cur; extern int CurrentMirror; extern int CurrentSkybox; -EXTERN_CVAR(Int, r_3dfloors); // functions void R_3D_DeleteHeights(); @@ -67,4 +71,6 @@ void R_3D_ResetClip(); void R_3D_EnterSkybox(); void R_3D_LeaveSkybox(); +} + #endif diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 8d423b3b31..92581f01b3 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -58,6 +58,12 @@ #include "po_man.h" #include "r_data/colormaps.h" +CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs? +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); + +namespace swrenderer +{ + seg_t* curline; side_t* sidedef; line_t* linedef; @@ -104,8 +110,6 @@ TArray WallPortals(1000); // note: this array needs to go away as subsector_t *InSubsector; -CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs? -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); void R_StoreWallRange (int start, int stop); @@ -1396,3 +1400,5 @@ void R_RenderBSPNode (void *node) } R_Subsector ((subsector_t *)((BYTE *)node - 1)); } + +} diff --git a/src/r_bsp.h b/src/r_bsp.h index 48ca7565bb..e4b518e75d 100644 --- a/src/r_bsp.h +++ b/src/r_bsp.h @@ -27,6 +27,11 @@ #include #include "r_defs.h" +EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs? + +namespace swrenderer +{ + // The 3072 below is just an arbitrary value picked to avoid // drawing lines the player is too close to that would overflow // the texture calculations. @@ -109,8 +114,6 @@ extern WORD MirrorFlags; typedef void (*drawfunc_t) (int start, int stop); -EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs? - // BSP? void R_ClearClipSegs (short left, short right); void R_ClearDrawSegs (); @@ -119,5 +122,6 @@ void R_RenderBSPNode (void *node); // killough 4/13/98: fake floors/ceilings for deep water / fake ceilings: sector_t *R_FakeFlat(sector_t *, sector_t *, int *, int *, bool); +} #endif diff --git a/src/r_defs.h b/src/r_defs.h index 97552fb524..8b176da917 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -59,7 +59,7 @@ enum SIL_BOTH }; -extern size_t MaxDrawSegs; +namespace swrenderer { extern size_t MaxDrawSegs; } struct FDisplacement; // diff --git a/src/r_draw.cpp b/src/r_draw.cpp index d04b1dcf6c..c1e96b329e 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -47,6 +47,14 @@ #undef RANGECHECK +EXTERN_CVAR (Int, r_drawfuzz) +EXTERN_CVAR (Bool, r_drawtrans) +EXTERN_CVAR (Float, transsouls) +EXTERN_CVAR (Int, r_columnmethod) + +namespace swrenderer +{ + // status bar height at bottom of screen // [RH] status bar position at bottom of screen extern int ST_Y; @@ -194,7 +202,6 @@ FDynamicColormap identitycolormap; bool drawer_needs_pal_input; -EXTERN_CVAR (Int, r_columnmethod) void R_InitShadeMaps() { @@ -2734,9 +2741,6 @@ void R_InitColumnDrawers () } // [RH] Choose column drawers in a single place -EXTERN_CVAR (Int, r_drawfuzz) -EXTERN_CVAR (Bool, r_drawtrans) -EXTERN_CVAR (Float, transsouls) static FDynamicColormap *basecolormapsave; @@ -3122,3 +3126,5 @@ void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade) ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); } } + +} diff --git a/src/r_draw.h b/src/r_draw.h index 7556575bd1..d312ac8fbc 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -25,14 +25,23 @@ #include "r_defs.h" +struct FSWColormap; + +EXTERN_CVAR(Bool, r_multithreaded); +EXTERN_CVAR(Bool, r_magfilter); +EXTERN_CVAR(Bool, r_minfilter); +EXTERN_CVAR(Bool, r_mipmap); +EXTERN_CVAR(Float, r_lod_bias); + +namespace swrenderer +{ + // Spectre/Invisibility. #define FUZZTABLE 50 extern "C" int fuzzoffset[FUZZTABLE + 1]; // [RH] +1 for the assembly routine extern "C" int fuzzpos; extern "C" int fuzzviewheight; -struct FSWColormap; - struct ShadeConstants { uint16_t light_alpha; @@ -395,10 +404,6 @@ void R_DrawDoubleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom); extern bool r_swtruecolor; -EXTERN_CVAR(Bool, r_multithreaded); -EXTERN_CVAR(Bool, r_magfilter); -EXTERN_CVAR(Bool, r_minfilter); -EXTERN_CVAR(Bool, r_mipmap); -EXTERN_CVAR(Float, r_lod_bias); +} #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 716b30c0d3..61e0a8932f 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -46,11 +46,6 @@ #include "x86.h" #include -extern "C" short spanend[MAXHEIGHT]; -extern float rw_light; -extern float rw_lightstep; -extern int wallshade; - // Use linear filtering when scaling up CVAR(Bool, r_magfilter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); @@ -63,6 +58,14 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Level of detail texture bias CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG when a good default has been decided +namespace swrenderer +{ + +extern "C" short spanend[MAXHEIGHT]; +extern float rw_light; +extern float rw_lightstep; +extern int wallshade; + ///////////////////////////////////////////////////////////////////////////// class DrawSpanLLVMCommand : public DrawerCommand @@ -1622,3 +1625,5 @@ void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip) R_DrawFogBoundarySection_rgba(t2, b2, x1); } } + +} diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 4aa1a02aff..8e66b7a720 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -36,6 +36,9 @@ struct FSpecialColormap; EXTERN_CVAR(Bool, r_mipmap) EXTERN_CVAR(Float, r_lod_bias) +namespace swrenderer +{ + ///////////////////////////////////////////////////////////////////////////// // Drawer functions: @@ -285,4 +288,6 @@ public: } }; +} + #endif diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 0baf6d38c9..d87c8f4c09 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -47,6 +47,9 @@ #include "r_things.h" #include "v_video.h" +namespace swrenderer +{ + // I should have commented this stuff better. // // dc_temp is the buffer R_DrawColumnHoriz writes into. @@ -1130,3 +1133,5 @@ void R_FillColumnHorizP_C (void) dest += 8; } while (--count); } + +} diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 57de59eba3..98d27adecf 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -45,6 +45,9 @@ #include "r_draw_rgba.h" #include "r_drawers.h" +namespace swrenderer +{ + extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; extern unsigned int *horizspan[4]; @@ -506,3 +509,5 @@ void R_FillColumnHoriz_rgba (void) DrawerCommandQueue::QueueCommand(); } + +} diff --git a/src/r_main.cpp b/src/r_main.cpp index 5c7ab39bcb..b236dc0e7d 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -62,6 +62,34 @@ #include "p_setup.h" #include "version.h" +CVAR (String, r_viewsize, "", CVAR_NOSET) +CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) + +CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) +{ + if (self != 0 && self != 1) + { + self = 1; + } + else + { // Trigger the change + setsizeneeded = true; + } +} + +CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) +CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE) + +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) + +extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; +extern cycle_t FrameCycles; + +extern bool r_showviewer; + +namespace swrenderer +{ + // MACROS ------------------------------------------------------------------ #if 0 @@ -91,7 +119,6 @@ extern short *openings; extern bool r_fakingunderwater; extern "C" int fuzzviewheight; extern subsector_t *InSubsector; -extern bool r_showviewer; // PRIVATE DATA DECLARATIONS ----------------------------------------------- @@ -103,9 +130,6 @@ bool r_dontmaplines; // PUBLIC DATA DEFINITIONS ------------------------------------------------- -CVAR (String, r_viewsize, "", CVAR_NOSET) -CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) - bool r_swtruecolor; double r_BaseVisibility; @@ -366,26 +390,6 @@ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, R_SetVisibility(R_GetVisibility()); } -//========================================================================== -// -// CVAR r_columnmethod -// -// Selects which version of the seg renderers to use. -// -//========================================================================== - -CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) -{ - if (self != 0 && self != 1) - { - self = 1; - } - else - { // Trigger the change - setsizeneeded = true; - } -} - //========================================================================== // // R_Init @@ -450,8 +454,6 @@ void R_CopyStackedViewParameters() // //========================================================================== -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) - void R_SetupColormap(player_t *player) { realfixedcolormap = NULL; @@ -569,9 +571,6 @@ void R_SetupFreelook() // //========================================================================== -CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) -CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE) - void R_HighlightPortal (PortalDrawseg* pds) { // [ZZ] NO OVERFLOW CHECKS HERE @@ -1023,8 +1022,6 @@ void R_MultiresInit () // Displays statistics about rendering times // //========================================================================== -extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; -extern cycle_t FrameCycles; ADD_STAT (fps) { @@ -1104,3 +1101,5 @@ CCMD (clearscancycles) bestscancycles = HUGE_VAL; } #endif + +} diff --git a/src/r_main.h b/src/r_main.h index 6a802e7993..f4d0a144ab 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -28,6 +28,13 @@ #include "v_palette.h" #include "r_data/colormaps.h" +extern double ViewCos; +extern double ViewSin; +extern int viewwindowx; +extern int viewwindowy; + +namespace swrenderer +{ typedef BYTE lighttable_t; // This could be wider for >8 bit display. @@ -35,16 +42,12 @@ typedef BYTE lighttable_t; // This could be wider for >8 bit display. // POV related. // extern bool bRenderingToCanvas; -extern double ViewCos; -extern double ViewSin; extern fixed_t viewingrangerecip; extern double FocalLengthX, FocalLengthY; extern double InvZtoScale; extern double WallTMapScale2; -extern int viewwindowx; -extern int viewwindowy; extern double CenterX; extern double CenterY; @@ -152,5 +155,6 @@ extern DAngle stacked_angle; extern void R_CopyStackedViewParameters(); +} #endif // __R_MAIN_H__ diff --git a/src/r_plane.cpp b/src/r_plane.cpp index ca85470035..da58ad16c6 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -64,10 +64,14 @@ #pragma warning(disable:4244) #endif +CVAR(Bool, r_linearsky, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +CVAR(Bool, tilt, false, 0); +CVAR(Bool, r_skyboxes, true, 0) + EXTERN_CVAR(Int, r_skymode) -//EXTERN_CVAR (Int, tx) -//EXTERN_CVAR (Int, ty) +namespace swrenderer +{ extern subsector_t *InSubsector; @@ -889,7 +893,6 @@ static DWORD lastskycol_bgra[MAXSKYBUF]; static int skycolplace; static int skycolplace_bgra; -CVAR(Bool, r_linearsky, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) @@ -1312,8 +1315,6 @@ static void R_DrawSkyStriped (visplane_t *pl) // //========================================================================== -CVAR (Bool, tilt, false, 0); -//CVAR (Int, pa, 0, 0) int R_DrawPlanes () { @@ -1450,7 +1451,6 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske // 9. Put the camera back where it was to begin with. // //========================================================================== -CVAR (Bool, r_skyboxes, true, 0) static int numskyboxes; void R_DrawPortals () @@ -2190,3 +2190,5 @@ bool R_PlaneInitData () return true; } + +} diff --git a/src/r_plane.h b/src/r_plane.h index b199d34776..5e91a4b0be 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -27,6 +27,9 @@ class ASkyViewpoint; +namespace swrenderer +{ + // // The infamous visplane // @@ -121,4 +124,6 @@ bool R_PlaneInitData (void); extern visplane_t* floorplane; extern visplane_t* ceilingplane; +} + #endif // __R_PLANE_H__ diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 3809c56391..cadf92adf6 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -46,16 +46,16 @@ RenderPolyScene *RenderPolyScene::Instance() void RenderPolyScene::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) { const bool savedviewactive = viewactive; - const bool savedoutputformat = r_swtruecolor; + const bool savedoutputformat = swrenderer::r_swtruecolor; viewwidth = width; RenderTarget = canvas; - bRenderingToCanvas = true; + swrenderer::bRenderingToCanvas = true; R_SetWindow(12, width, height, height, true); viewwindowx = x; viewwindowy = y; viewactive = true; - r_swtruecolor = canvas->IsBgra(); + swrenderer::r_swtruecolor = canvas->IsBgra(); canvas->Lock(true); @@ -64,17 +64,17 @@ void RenderPolyScene::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, canvas->Unlock(); RenderTarget = screen; - bRenderingToCanvas = false; + swrenderer::bRenderingToCanvas = false; R_ExecuteSetViewSize(); viewactive = savedviewactive; - r_swtruecolor = savedoutputformat; + swrenderer::r_swtruecolor = savedoutputformat; } void RenderPolyScene::RenderActorView(AActor *actor, bool dontmaplines) { NetUpdate(); - r_dontmaplines = dontmaplines; + //swrenderer::r_dontmaplines = dontmaplines; P_FindParticleSubsectors(); PO_LinkToSubsectors(); diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index 9d47c014a0..e91e25b582 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -135,7 +135,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan args.uniforms.flags = 0; args.SetColormap(front->ColorMap); args.SetTexture(tex, decal->Translation, true); - if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) + if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { args.uniforms.light = 256; args.uniforms.flags |= TriUniforms::fixed_light; @@ -146,7 +146,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan } args.uniforms.subsectorDepth = subsectorDepth; - if (r_swtruecolor) + if (swrenderer::r_swtruecolor) { args.uniforms.color = 0xff000000 | decal->AlphaColor; } diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp index 73e1819d4f..0d31614d22 100644 --- a/src/r_poly_particle.cpp +++ b/src/r_poly_particle.cpp @@ -72,7 +72,7 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipP PolyDrawArgs args; - if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) + if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { args.uniforms.light = 256; args.uniforms.flags = TriUniforms::fixed_light; @@ -84,7 +84,7 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipP } args.uniforms.subsectorDepth = subsectorDepth; - if (r_swtruecolor) + if (swrenderer::r_swtruecolor) { uint32_t alpha = particle->trans; args.uniforms.color = (alpha << 24) | (particle->color & 0xffffff); diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index b17b0ec2c3..41898fa8b8 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -28,7 +28,7 @@ #include "r_poly_plane.h" #include "r_poly_portal.h" #include "r_poly.h" -#include "r_sky.h" // for skyflatnum +#include "r_sky.h" EXTERN_CVAR(Int, r_3dfloors) @@ -96,10 +96,10 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c return; int lightlevel = 255; - if (fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) + if (swrenderer::fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) { lightlist_t *light = P_GetPlaneLight(sub->sector, &sub->sector->ceilingplane, false); - basecolormap = light->extra_colormap; + swrenderer::basecolormap = light->extra_colormap; lightlevel = *light->p_lightlevel; } @@ -107,7 +107,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c PolyDrawArgs args; args.uniforms.light = (uint32_t)(lightlevel / 255.0f * 256.0f); - if (fixedlightlev >= 0 || fixedcolormap) + if (swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) args.uniforms.light = 256; args.uniforms.flags = 0; args.uniforms.subsectorDepth = subsectorDepth; @@ -251,7 +251,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan PolyDrawArgs args; args.uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); - if (fixedlightlev >= 0 || fixedcolormap) + if (swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) args.uniforms.light = 256; args.uniforms.flags = 0; args.uniforms.subsectorDepth = isSky ? RenderPolyPortal::SkySubsectorDepth : subsectorDepth; diff --git a/src/r_poly_playersprite.cpp b/src/r_poly_playersprite.cpp index 1a657d1a21..381f2e2033 100644 --- a/src/r_poly_playersprite.cpp +++ b/src/r_poly_playersprite.cpp @@ -143,14 +143,14 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa double tx = sx - BaseXCenter; tx -= tex->GetScaledLeftOffset(); - int x1 = xs_RoundToInt(CenterX + tx * pspritexscale); + int x1 = xs_RoundToInt(swrenderer::CenterX + tx * swrenderer::pspritexscale); // off the right side if (x1 > viewwidth) return; tx += tex->GetScaledWidth(); - int x2 = xs_RoundToInt(CenterX + tx * pspritexscale); + int x2 = xs_RoundToInt(swrenderer::CenterX + tx * swrenderer::pspritexscale); // off the left side if (x2 <= 0) @@ -183,19 +183,19 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa int clipped_x1 = MAX(x1, 0); int clipped_x2 = MIN(x2, viewwidth); - double xscale = pspritexscale / tex->Scale.X; - double yscale = pspriteyscale / tex->Scale.Y; + double xscale = swrenderer::pspritexscale / tex->Scale.X; + double yscale = swrenderer::pspriteyscale / tex->Scale.Y; uint32_t translation = 0; // [RH] Use default colors double xiscale, startfrac; if (flip) { - xiscale = -pspritexiscale * tex->Scale.X; + xiscale = -swrenderer::pspritexiscale * tex->Scale.X; startfrac = 1; } else { - xiscale = pspritexiscale * tex->Scale.X; + xiscale = swrenderer::pspritexiscale * tex->Scale.X; startfrac = 0; } @@ -217,7 +217,7 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa int actualextralight = foggy ? 0 : extralight << 4; int spriteshade = LIGHT2SHADE(owner->Sector->lightlevel + actualextralight); double minz = double((2048 * 4) / double(1 << 20)); - visstyle.ColormapNum = GETPALOOKUP(r_SpriteVisibility / minz, spriteshade); + visstyle.ColormapNum = GETPALOOKUP(swrenderer::r_SpriteVisibility / minz, spriteshade); if (sprite->GetID() < PSP_TARGETCENTER) { diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 3257afc702..5c4dbc89fd 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -163,7 +163,7 @@ void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *front return; // Tell automap we saw this - if (!r_dontmaplines && line->linedef) + if (!swrenderer::r_dontmaplines && line->linedef) { line->linedef->flags |= ML_MAPPED; sub->flags |= SSECF_DRAWN; @@ -367,7 +367,7 @@ void PolyDrawSectorPortal::SaveGlobals() savedextralight = extralight; savedpos = ViewPos; savedangle = ViewAngle; - savedvisibility = R_GetVisibility(); + savedvisibility = swrenderer::R_GetVisibility(); savedcamera = camera; savedsector = viewsector; @@ -376,14 +376,14 @@ void PolyDrawSectorPortal::SaveGlobals() // Don't let gun flashes brighten the sky box ASkyViewpoint *sky = barrier_cast(Portal->mSkybox); extralight = 0; - R_SetVisibility(sky->args[0] * 0.25f); + swrenderer::R_SetVisibility(sky->args[0] * 0.25f); ViewPos = sky->InterpolatedPosition(r_TicFracF); ViewAngle = savedangle + (sky->PrevAngles.Yaw + deltaangle(sky->PrevAngles.Yaw, sky->Angles.Yaw) * r_TicFracF); } else //if (Portal->mType == PORTS_STACKEDSECTORTHING || Portal->mType == PORTS_PORTAL || Portal->mType == PORTS_LINKEDPORTAL) { //extralight = pl->extralight; - //R_SetVisibility(pl->visibility); + //swrenderer::R_SetVisibility(pl->visibility); ViewPos.X += Portal->mDisplacement.X; ViewPos.Y += Portal->mDisplacement.Y; } @@ -404,7 +404,7 @@ void PolyDrawSectorPortal::RestoreGlobals() camera = savedcamera; viewsector = savedsector; ViewPos = savedpos; - R_SetVisibility(savedvisibility); + swrenderer::R_SetVisibility(savedvisibility); extralight = savedextralight; ViewAngle = savedangle; R_SetViewAngle(); diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index c0a219c799..a19e0d40b3 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -85,7 +85,7 @@ void PolySkyDome::RenderRow(PolyDrawArgs &args, int row) void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap) { uint32_t solid = skytex->GetSkyCapColor(bottomCap); - if (!r_swtruecolor) + if (!swrenderer::r_swtruecolor) solid = RGB32k.RGB[(RPART(solid) >> 3)][(GPART(solid) >> 3)][(BPART(solid) >> 3)]; args.vinput = &mVertices[mPrimStart[row]]; diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index eb1328b485..46c90adc26 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -119,7 +119,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla PolyDrawArgs args; args.uniforms.flags = 0; - if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) + if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { args.uniforms.light = 256; args.uniforms.flags |= TriUniforms::fixed_light; @@ -228,7 +228,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla args.SetTexture(tex, thing->Translation, true); } - if (!r_swtruecolor) + if (!swrenderer::r_swtruecolor) { uint32_t r = (args.uniforms.color >> 16) & 0xff; uint32_t g = (args.uniforms.color >> 8) & 0xff; @@ -309,9 +309,9 @@ visstyle_t RenderPolySprite::GetSpriteVisStyle(AActor *thing, double z) } // get light level - if (fixedcolormap != nullptr) + if (swrenderer::fixedcolormap != nullptr) { // fixed map - visstyle.BaseColormap = fixedcolormap; + visstyle.BaseColormap = swrenderer::fixedcolormap; visstyle.ColormapNum = 0; } else @@ -320,10 +320,10 @@ visstyle_t RenderPolySprite::GetSpriteVisStyle(AActor *thing, double z) { mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); } - if (fixedlightlev >= 0) + if (swrenderer::fixedlightlev >= 0) { visstyle.BaseColormap = mybasecolormap; - visstyle.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + visstyle.ColormapNum = swrenderer::fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright @@ -333,7 +333,7 @@ visstyle_t RenderPolySprite::GetSpriteVisStyle(AActor *thing, double z) else { // diminished light double minz = double((2048 * 4) / double(1 << 20)); - visstyle.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(z, minz), spriteshade); + visstyle.ColormapNum = GETPALOOKUP(swrenderer::r_SpriteVisibility / MAX(z, minz), spriteshade); visstyle.BaseColormap = mybasecolormap; } } diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index a3398df2b1..4b6e1c344a 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -498,8 +498,8 @@ TriMatrix TriMatrix::viewToClip() float near = 5.0f; float far = 65536.0f; float width = (float)(FocalTangent * near); - float top = (float)(CenterY / InvZtoScale * near); - float bottom = (float)(top - viewheight / InvZtoScale * near); + float top = (float)(swrenderer::CenterY / swrenderer::InvZtoScale * near); + float bottom = (float)(top - viewheight / swrenderer::InvZtoScale * near); return frustum(-width, width, bottom, top, near, far); } diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 1c07ce5bd0..46948310fe 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -70,7 +70,7 @@ public: { textureWidth = texture->GetWidth(); textureHeight = texture->GetHeight(); - if (r_swtruecolor) + if (swrenderer::r_swtruecolor) texturePixels = (const uint8_t *)texture->GetPixelsBgra(); else texturePixels = texture->GetPixels(); @@ -84,7 +84,7 @@ public: FRemapTable *table = TranslationToTable(translationID); if (table != nullptr && !table->Inactive) { - if (r_swtruecolor) + if (swrenderer::r_swtruecolor) translation = (uint8_t*)table->Palette; else translation = table->Remap; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index cd4d32077f..4dd24887d9 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -31,7 +31,7 @@ #include "r_poly_wall.h" #include "r_poly_decal.h" #include "r_poly.h" -#include "r_sky.h" // for skyflatnum +#include "r_sky.h" EXTERN_CVAR(Bool, r_drawmirrors) @@ -330,7 +330,7 @@ FTexture *RenderPolyWall::GetTexture() int RenderPolyWall::GetLightLevel() { - if (fixedlightlev >= 0 || fixedcolormap) + if (swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { return 255; } diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp index 351e24e364..0521fb06e3 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/r_poly_wallsprite.cpp @@ -99,7 +99,7 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, const Vec4f &cli bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); PolyDrawArgs args; - if (fullbrightSprite || fixedlightlev >= 0 || fixedcolormap) + if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { args.uniforms.light = 256; args.uniforms.flags = TriUniforms::fixed_light; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 175768aecd..0e59ef0f74 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -56,12 +56,14 @@ #define WALLYREPEAT 8 - CVAR(Bool, r_np2, true, 0) +CVAR(Bool, r_fogboundary, true, 0) +CVAR(Bool, r_drawmirrors, true, 0) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); +EXTERN_CVAR(Bool, r_mipmap) -//CVAR (Int, ty, 8, 0) -//CVAR (Int, tx, 8, 0) +namespace swrenderer +{ #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) @@ -142,16 +144,6 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask); -//============================================================================= -// -// CVAR r_fogboundary -// -// If true, makes fog look more "real" by shading the walls separating two -// sectors with different fog. -//============================================================================= - -CVAR(Bool, r_fogboundary, true, 0) - inline bool IsFogBoundary (sector_t *front, sector_t *back) { return r_fogboundary && fixedcolormap == NULL && front->ColorMap->Fade && @@ -159,14 +151,6 @@ inline bool IsFogBoundary (sector_t *front, sector_t *back) (front->GetTexture(sector_t::ceiling) != skyflatnum || back->GetTexture(sector_t::ceiling) != skyflatnum); } -//============================================================================= -// -// CVAR r_drawmirrors -// -// Set to false to disable rendering of mirrors -//============================================================================= - -CVAR(Bool, r_drawmirrors, true, 0) // // R_RenderMaskedSegRange @@ -1065,7 +1049,6 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } -EXTERN_CVAR(Bool, r_mipmap) struct WallscanSampler { @@ -3162,3 +3145,5 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, done: WallC = savecoord; } + +} diff --git a/src/r_segs.h b/src/r_segs.h index 1fc428c964..7d34a78f40 100644 --- a/src/r_segs.h +++ b/src/r_segs.h @@ -23,6 +23,9 @@ #ifndef __R_SEGS_H__ #define __R_SEGS_H__ +namespace swrenderer +{ + struct drawseg_t; void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2); @@ -70,4 +73,6 @@ extern int CurrentPortalUniq; extern bool CurrentPortalInSkybox; extern TArray WallPortals; +} + #endif diff --git a/src/r_state.h b/src/r_state.h index b66ad57eb7..cd4aee4be3 100644 --- a/src/r_state.h +++ b/src/r_state.h @@ -80,7 +80,7 @@ extern int numgamesubsectors; extern AActor* camera; // [RH] camera instead of viewplayer extern sector_t* viewsector; // [RH] keep track of sector viewing from -extern angle_t xtoviewangle[MAXWIDTH+1]; +namespace swrenderer { extern angle_t xtoviewangle[MAXWIDTH+1]; } extern DAngle FieldOfView; int R_FindSkin (const char *name, int pclass); // [RH] Find a skin diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 0f30fc3dff..56820e5361 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -58,11 +58,18 @@ CUSTOM_CVAR(Bool, r_polyrenderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOI } } +namespace swrenderer +{ + void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); void R_SetupColormap(player_t *); void R_SetupFreelook(); void R_InitRenderer(); +} + +using namespace swrenderer; + FSoftwareRenderer::FSoftwareRenderer() { } diff --git a/src/r_things.cpp b/src/r_things.cpp index 8f91b17408..659ad916aa 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -66,6 +66,19 @@ #include "p_local.h" #include "p_maputl.h" +EXTERN_CVAR(Bool, st_scale) +EXTERN_CVAR(Bool, r_shadercolormaps) +EXTERN_CVAR(Int, r_drawfuzz) +EXTERN_CVAR(Bool, r_deathcamera); +EXTERN_CVAR(Bool, r_drawplayersprites) +EXTERN_CVAR(Bool, r_drawvoxels) + +CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +CVAR(Bool, r_splitsprites, true, CVAR_ARCHIVE) + +namespace swrenderer +{ + // [RH] A c-buffer. Used for keeping track of offscreen voxel spans. struct FCoverageBuffer @@ -96,12 +109,6 @@ extern float MaskedScaleY; #define BASEXCENTER (160) #define BASEYCENTER (100) -EXTERN_CVAR (Bool, st_scale) -EXTERN_CVAR(Bool, r_shadercolormaps) -EXTERN_CVAR(Int, r_drawfuzz) -EXTERN_CVAR(Bool, r_deathcamera); -CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); - // // Sprite rotation 0 is facing the viewer, // rotation 1 is one angle turn CLOCKWISE around the axis. @@ -133,8 +140,6 @@ FTexture *WallSpriteTile; short zeroarray[MAXWIDTH]; short screenheightarray[MAXWIDTH]; -EXTERN_CVAR (Bool, r_drawplayersprites) -EXTERN_CVAR (Bool, r_drawvoxels) // // INITIALIZATION FUNCTIONS @@ -1903,8 +1908,6 @@ static int sd_comparex (const void *arg1, const void *arg2) return (*(drawseg_t **)arg2)->x2 - (*(drawseg_t **)arg1)->x2; } -CVAR (Bool, r_splitsprites, true, CVAR_ARCHIVE) - // Split up vissprites that intersect drawsegs void R_SplitVisSprites () { @@ -3446,3 +3449,5 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly) OffscreenBufferWidth = width; OffscreenBufferHeight = height; } + +} diff --git a/src/r_things.h b/src/r_things.h index e354898924..0e036d633e 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -25,6 +25,12 @@ #include "r_bsp.h" +struct particle_t; +struct FVoxel; + +namespace swrenderer +{ + // A vissprite_t is a thing // that will be drawn during a refresh. // I.e. a sprite object that is partly visible. @@ -94,8 +100,6 @@ struct vissprite_t vissprite_t() {} }; -struct particle_t; - extern void(*R_DrawParticle)(vissprite_t *); void R_DrawParticle_C (vissprite_t *); void R_DrawParticle_rgba (vissprite_t *); @@ -148,5 +152,6 @@ void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle, void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); +} #endif diff --git a/src/r_thread.h b/src/r_thread.h index 3271d80506..d80fd02031 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -99,7 +99,7 @@ protected: void DetectRangeError(uint32_t *&dest, int &dest_y, int &count) { #if defined(_MSC_VER) && defined(_DEBUG) - if (dest_y < 0 || count < 0 || dest_y + count > dc_destheight) + if (dest_y < 0 || count < 0 || dest_y + count > swrenderer::dc_destheight) __debugbreak(); // Buffer overrun detected! #endif @@ -107,24 +107,26 @@ protected: { count += dest_y; dest_y = 0; - dest = (uint32_t*)dc_destorg; + dest = (uint32_t*)swrenderer::dc_destorg; } - else if (dest_y >= dc_destheight) + else if (dest_y >= swrenderer::dc_destheight) { dest_y = 0; count = 0; } if (count < 0 || count > MAXHEIGHT) count = 0; - if (dest_y + count >= dc_destheight) - count = dc_destheight - dest_y; + if (dest_y + count >= swrenderer::dc_destheight) + count = swrenderer::dc_destheight - dest_y; } public: DrawerCommand() { - _dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + _dest_y = static_cast((swrenderer::dc_dest - swrenderer::dc_destorg) / (swrenderer::dc_pitch * 4)); } + + virtual ~DrawerCommand() { } virtual void Execute(DrawerThread *thread) = 0; virtual FString DebugInfo() = 0; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 05cfdd0dc2..985238071b 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -133,6 +133,8 @@ void DCanvas::DrawTexture (FTexture *img, double x, double y, int tags_first, .. void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) { #ifndef NO_SWRENDER + using namespace swrenderer; + static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; const BYTE *translation = NULL; @@ -1026,7 +1028,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) { uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; - uint32_t fg = LightBgra::shade_pal_index_simple(basecolor, LightBgra::calc_light_multiplier(0)); + uint32_t fg = swrenderer::LightBgra::shade_pal_index_simple(basecolor, swrenderer::LightBgra::calc_light_multiplier(0)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1359,6 +1361,8 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, FDynamicColormap *colormap, int lightlevel, int bottomclip) { #ifndef NO_SWRENDER + using namespace swrenderer; + // Use an equation similar to player sprites to determine shade fixed_t shade = LIGHT2SHADE(lightlevel) - 12*FRACUNIT; float topy, boty, leftx, rightx; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 3ede15b395..5b8670088a 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -1405,17 +1405,16 @@ void D3DFB::Draw3DPart(bool copy3d) D3DCOLOR color0, color1; if (Accel2D) { - if (realfixedcolormap == NULL) + auto &map = swrenderer::realfixedcolormap; + if (map == NULL) { color0 = 0; color1 = 0xFFFFFFF; } else { - color0 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeStart[0]/2, - realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); - color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, - realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); + color0 = D3DCOLOR_COLORVALUE(map->ColorizeStart[0]/2, map->ColorizeStart[1]/2, map->ColorizeStart[2]/2, 0); + color1 = D3DCOLOR_COLORVALUE(map->ColorizeEnd[0]/2, map->ColorizeEnd[1]/2, map->ColorizeEnd[2]/2, 1); if (IsBgra()) SetPixelShader(Shaders[SHADER_SpecialColormap]); else From 3890c80b1a1031b2a5b1f90d3b31e428081f6f15 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 1 Dec 2016 08:59:47 +0100 Subject: [PATCH 415/912] Fix compile error for msvc --- src/r_main.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/r_main.h b/src/r_main.h index f4d0a144ab..5908667c8c 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -33,11 +33,11 @@ extern double ViewSin; extern int viewwindowx; extern int viewwindowy; +typedef BYTE lighttable_t; // This could be wider for >8 bit display. + namespace swrenderer { -typedef BYTE lighttable_t; // This could be wider for >8 bit display. - // // POV related. // From 2203da0c9360611060ee495966ebf29b7479caf2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 1 Dec 2016 09:00:24 +0100 Subject: [PATCH 416/912] Minor simplification of the light calculation --- tools/drawergen/fixedfunction/drawtrianglecodegen.cpp | 9 +++++---- tools/drawergen/fixedfunction/drawtrianglecodegen.h | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 4184dc5362..fd4809e5d6 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -145,6 +145,8 @@ void DrawTriangleCodegen::Setup() gradWX = gradWX * (float)q; for (int i = 0; i < TriVertex::NumVarying; i++) gradVaryingX[i] = gradVaryingX[i] * (float)q; + + shade = 64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f; } SSAFloat DrawTriangleCodegen::gradx(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2) @@ -244,13 +246,12 @@ void DrawTriangleCodegen::LoopBlockX() SSAFloat globVis = SSAFloat(1706.0f); SSAFloat vis = globVis * posx_w; - SSAFloat shade = 64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f; - SSAFloat lightscale = SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)); - SSAInt diminishedlight = SSAInt(SSAFloat::clamp((1.0f - lightscale) * 256.0f + 0.5f, SSAFloat(0.0f), SSAFloat(256.0f)), false); + SSAInt lightscale = SSAInt(SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * 256.0f, true); + SSAInt diminishedlight = 256 - lightscale; if (!truecolor) { - SSAInt diminishedindex = SSAInt(lightscale * 32.0f, false); + SSAInt diminishedindex = lightscale / 8; SSAInt lightindex = SSAInt::MIN((256 - light) * 32 / 256, SSAInt(31)); SSAInt colormapindex = is_fixed_light.select(lightindex, diminishedindex); currentcolormap = Colormaps[colormapindex << 8]; diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index bc894f1a21..584260a4d0 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -136,6 +136,7 @@ private: SSAInt C1, C2, C3; SSAFloat gradWX, gradWY; SSAFloat gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying]; + SSAFloat shade; SSAInt x, y; SSAInt x0, x1, y0, y1; From c89e023bd5bd6b1ec4f2462c7fd50c5e476c20c0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 1 Dec 2016 09:31:43 +0100 Subject: [PATCH 417/912] Remove unused left/top clip --- .../fixedfunction/drawtrianglecodegen.cpp | 14 ++++++-------- .../drawergen/fixedfunction/drawtrianglecodegen.h | 2 -- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index fd4809e5d6..69aacbbef4 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -81,9 +81,9 @@ void DrawTriangleCodegen::Setup() FDY31 = DY31 << 4; // Bounding rectangle - minx = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(X1, X2), X3) + 0xF).ashr(4), clipleft); + minx = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(X1, X2), X3) + 0xF).ashr(4), SSAInt(0)); maxx = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(X1, X2), X3) + 0xF).ashr(4), clipright - 1); - miny = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(Y1, Y2), Y3) + 0xF).ashr(4), cliptop); + miny = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(Y1, Y2), Y3) + 0xF).ashr(4), SSAInt(0)); maxy = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(Y1, Y2), Y3) + 0xF).ashr(4), clipbottom - 1); SSAIfBlock if0; @@ -242,7 +242,7 @@ void DrawTriangleCodegen::LoopBlockX() branch.if_block(!(a == SSAInt(0) || b == SSAInt(0) || c == SSAInt(0))); // Check if block needs clipping - SSABool clipneeded = x < clipleft || (x + q) > clipright || y < cliptop || (y + q) > clipbottom; + SSABool clipneeded = (x + q) > clipright || (y + q) > clipbottom; SSAFloat globVis = SSAFloat(1706.0f); SSAFloat vis = globVis * posx_w; @@ -253,12 +253,12 @@ void DrawTriangleCodegen::LoopBlockX() { SSAInt diminishedindex = lightscale / 8; SSAInt lightindex = SSAInt::MIN((256 - light) * 32 / 256, SSAInt(31)); - SSAInt colormapindex = is_fixed_light.select(lightindex, diminishedindex); + SSAInt colormapindex = (!is_fixed_light).select(diminishedindex, lightindex); currentcolormap = Colormaps[colormapindex << 8]; } else { - currentlight = is_fixed_light.select(light, diminishedlight); + currentlight = (!is_fixed_light).select(diminishedlight, light); } SetStencilBlock(x / 8 + y / 8 * stencilPitch); @@ -469,7 +469,7 @@ void DrawTriangleCodegen::LoopPartialBlock() AffineVaryingPosX[i] = stack_AffineVaryingPosX[i].load(); loopx.loop_block(ix < SSAInt(q), q); { - SSABool visible = (ix + x >= clipleft) && (ix + x < clipright) && (iy + y >= cliptop) && (iy + y < clipbottom); + SSABool visible = (ix + x < clipright) && (iy + y < clipbottom); SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible; if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) @@ -815,9 +815,7 @@ void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) v1 = LoadTriVertex(args[0][2].load(true)); v2 = LoadTriVertex(args[0][3].load(true)); v3 = LoadTriVertex(args[0][4].load(true)); - clipleft = SSAInt(0);// args[0][5].load(true); clipright = args[0][6].load(true); - cliptop = SSAInt(0);// args[0][7].load(true); clipbottom = args[0][8].load(true); texturePixels = args[0][9].load(true); textureWidth = args[0][10].load(true); diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index 584260a4d0..31df77b954 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -93,9 +93,7 @@ private: SSATriVertex v1; SSATriVertex v2; SSATriVertex v3; - SSAInt clipleft; SSAInt clipright; - SSAInt cliptop; SSAInt clipbottom; SSAUBytePtr texturePixels; SSAInt textureWidth; From e3b3b7a4ce39587ae595c38fbe5fe225c0c311fb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 1 Dec 2016 10:52:53 +0100 Subject: [PATCH 418/912] Masked store experiment (which turned out to be much slower) --- .../fixedfunction/drawtrianglecodegen.cpp | 125 ++++++++++++++++++ tools/drawergen/ssa/ssa_float_ptr.cpp | 2 +- tools/drawergen/ssa/ssa_int_ptr.cpp | 22 ++- tools/drawergen/ssa/ssa_int_ptr.h | 3 + tools/drawergen/ssa/ssa_ubyte_ptr.cpp | 52 +++++++- tools/drawergen/ssa/ssa_ubyte_ptr.h | 4 + tools/drawergen/ssa/ssa_vec4f_ptr.cpp | 2 +- 7 files changed, 206 insertions(+), 4 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 69aacbbef4..e29e2a1223 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -541,6 +541,131 @@ void DrawTriangleCodegen::LoopPartialBlock() loopy.end_block(); } +#if 0 +void DrawTriangleCodegen::LoopMaskedStoreBlock() +{ + if (variant == TriDrawVariant::Stencil) + { + } + else if (variant == TriDrawVariant::StencilClose) + { + } + else + { + int pixelsize = truecolor ? 4 : 1; + + AffineW = posx_w; + for (int i = 0; i < TriVertex::NumVarying; i++) + AffineVaryingPosY[i] = posx_varying[i]; + + SSAInt CY1 = C1 + DX12 * y0 - DY12 * x0; + SSAInt CY2 = C2 + DX23 * y0 - DY23 * x0; + SSAInt CY3 = C3 + DX31 * y0 - DY31 * x0; + + for (int iy = 0; iy < q; iy++) + { + SSAUBytePtr buffer = dest[(x + iy * pitch) * pixelsize]; + SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch]; + + SetupAffineBlock(); + + SSAInt CX1 = CY1; + SSAInt CX2 = CY2; + SSAInt CX3 = CY3; + + for (int ix = 0; ix < q; ix += 4) + { + SSABool covered[4]; + for (int maskindex = 0; maskindex < 4; maskindex++) + { + covered[maskindex] = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0); + + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + { + auto xx = SSAInt(ix + maskindex); + auto yy = SSAInt(iy); + covered[maskindex] = covered[maskindex] && SSABool::compare_uge(StencilGet(xx, yy), stencilTestValue) && subsectorbuffer[ix + maskindex].load(true) >= subsectorDepth; + } + else if (variant == TriDrawVariant::StencilClose) + { + auto xx = SSAInt(ix + maskindex); + auto yy = SSAInt(iy); + covered[maskindex] = covered[maskindex] && SSABool::compare_uge(StencilGet(xx, yy), stencilTestValue); + } + else + { + auto xx = SSAInt(ix + maskindex); + auto yy = SSAInt(iy); + covered[maskindex] = covered[maskindex] && StencilGet(xx, yy) == stencilTestValue; + } + + CX1 = CX1 - FDY12; + CX2 = CX2 - FDY23; + CX3 = CX3 - FDY31; + } + + SSAUBytePtr buf = buffer[ix * pixelsize]; + if (truecolor) + { + SSAVec16ub pixels16 = buf.load_unaligned_vec16ub(false); + SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16); + SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16); + SSAVec4i pixels[4] = + { + SSAVec4i::extendlo(pixels8lo), + SSAVec4i::extendhi(pixels8lo), + SSAVec4i::extendlo(pixels8hi), + SSAVec4i::extendhi(pixels8hi) + }; + + for (int sse = 0; sse < 4; sse++) + { + pixels[sse] = ProcessPixel32(pixels[sse], AffineVaryingPosX); + + for (int i = 0; i < TriVertex::NumVarying; i++) + AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i]; + } + + buf.store_masked_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3])), covered); + } + else + { + SSAVec4i pixelsvec = buf.load_vec4ub(false); + SSAInt pixels[4] = + { + pixelsvec[0], + pixelsvec[1], + pixelsvec[2], + pixelsvec[3] + }; + + for (int sse = 0; sse < 4; sse++) + { + pixels[sse] = ProcessPixel8(pixels[sse], AffineVaryingPosX); + + for (int i = 0; i < TriVertex::NumVarying; i++) + AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i]; + } + + buf.store_masked_vec4ub(SSAVec4i(pixels[0], pixels[1], pixels[2], pixels[3]), covered); + } + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsectorbuffer[ix].store_masked_vec4i(SSAVec4i(subsectorDepth), covered); + } + + AffineW = AffineW + gradWY; + for (int i = 0; i < TriVertex::NumVarying; i++) + AffineVaryingPosY[i] = AffineVaryingPosY[i] + gradVaryingY[i]; + + CY1 = CY1 + FDX12; + CY2 = CY2 + FDX23; + CY3 = CY3 + FDX31; + } + } +} +#endif + SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt uvoffset) { if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) diff --git a/tools/drawergen/ssa/ssa_float_ptr.cpp b/tools/drawergen/ssa/ssa_float_ptr.cpp index 50507884b0..731fbbef8e 100644 --- a/tools/drawergen/ssa/ssa_float_ptr.cpp +++ b/tools/drawergen/ssa/ssa_float_ptr.cpp @@ -86,6 +86,6 @@ void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value) void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value) { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 1); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/tools/drawergen/ssa/ssa_int_ptr.cpp b/tools/drawergen/ssa/ssa_int_ptr.cpp index 9b8b44501f..5f60f73589 100644 --- a/tools/drawergen/ssa/ssa_int_ptr.cpp +++ b/tools/drawergen/ssa/ssa_int_ptr.cpp @@ -23,6 +23,7 @@ #include "precomp.h" #include "ssa_int_ptr.h" #include "ssa_scope.h" +#include "ssa_bool.h" SSAIntPtr::SSAIntPtr() : v(0) @@ -86,6 +87,25 @@ void SSAIntPtr::store_vec4i(const SSAVec4i &new_value) void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value) { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 1); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); +} + +void SSAIntPtr::store_masked_vec4i(const SSAVec4i &new_value, SSABool mask[4]) +{ + // Create mask vector + std::vector maskconstants; + maskconstants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, 0, false))); + llvm::Value *maskValue = llvm::ConstantVector::get(maskconstants); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) + for (int i = 0; i < 4; i++) + maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, SSAInt(i).v, SSAScope::hint()); +#else + for (int i = 0; i < 4; i++) + maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, (uint64_t)i, SSAScope::hint()); +#endif + + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + auto inst = SSAScope::builder().CreateMaskedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 1, maskValue); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/tools/drawergen/ssa/ssa_int_ptr.h b/tools/drawergen/ssa/ssa_int_ptr.h index 04075c15eb..da6ecf168b 100644 --- a/tools/drawergen/ssa/ssa_int_ptr.h +++ b/tools/drawergen/ssa/ssa_int_ptr.h @@ -29,6 +29,8 @@ namespace llvm { class Value; } namespace llvm { class Type; } +class SSABool; + class SSAIntPtr { public: @@ -44,6 +46,7 @@ public: void store(const SSAInt &new_value); void store_vec4i(const SSAVec4i &new_value); void store_unaligned_vec4i(const SSAVec4i &new_value); + void store_masked_vec4i(const SSAVec4i &new_value, SSABool mask[4]); llvm::Value *v; }; diff --git a/tools/drawergen/ssa/ssa_ubyte_ptr.cpp b/tools/drawergen/ssa/ssa_ubyte_ptr.cpp index c6b835a1cd..bde0b5b643 100644 --- a/tools/drawergen/ssa/ssa_ubyte_ptr.cpp +++ b/tools/drawergen/ssa/ssa_ubyte_ptr.cpp @@ -23,6 +23,7 @@ #include "precomp.h" #include "ssa_ubyte_ptr.h" #include "ssa_scope.h" +#include "ssa_bool.h" SSAUBytePtr::SSAUBytePtr() : v(0) @@ -104,6 +105,37 @@ void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } +void SSAUBytePtr::store_masked_vec4ub(const SSAVec4i &new_value, SSABool mask[4]) +{ + // Store using saturate: + SSAVec8s v8s(new_value, new_value); + SSAVec16ub v16ub(v8s, v8s); + + // Create mask vector + std::vector maskconstants; + maskconstants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, 0, false))); + llvm::Value *maskValue = llvm::ConstantVector::get(maskconstants); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) + for (int i = 0; i < 4; i++) + maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, SSAInt(i).v, SSAScope::hint()); +#else + for (int i = 0; i < 4; i++) + maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, (uint64_t)i, SSAScope::hint()); +#endif + + llvm::Type *m16xint8type = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16); + llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3))); + llvm::Value *shufflemask = llvm::ConstantVector::get(constants); + llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), shufflemask, SSAScope::hint()); + llvm::CallInst *inst = SSAScope::builder().CreateMaskedStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), 1, maskValue); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); +} + void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); @@ -118,6 +150,24 @@ void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value) { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 1); + llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); +} + +void SSAUBytePtr::store_masked_vec16ub(const SSAVec16ub &new_value, SSABool mask[4]) +{ + std::vector constants; + constants.resize(16, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, 0, false))); + llvm::Value *maskValue = llvm::ConstantVector::get(constants); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) + for (int i = 0; i < 16; i++) + maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i / 4].v, SSAInt(i).v, SSAScope::hint()); +#else + for (int i = 0; i < 16; i++) + maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i / 4].v, (uint64_t)i, SSAScope::hint()); +#endif + + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + llvm::CallInst *inst = SSAScope::builder().CreateMaskedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 1, maskValue); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/tools/drawergen/ssa/ssa_ubyte_ptr.h b/tools/drawergen/ssa/ssa_ubyte_ptr.h index b4567597e2..ba4fb5397e 100644 --- a/tools/drawergen/ssa/ssa_ubyte_ptr.h +++ b/tools/drawergen/ssa/ssa_ubyte_ptr.h @@ -31,6 +31,8 @@ namespace llvm { class Value; } namespace llvm { class Type; } +class SSABool; + class SSAUBytePtr { public: @@ -46,8 +48,10 @@ public: SSAVec16ub load_unaligned_vec16ub(bool constantScopeDomain) const; void store(const SSAUByte &new_value); void store_vec4ub(const SSAVec4i &new_value); + void store_masked_vec4ub(const SSAVec4i &new_value, SSABool mask[4]); void store_vec16ub(const SSAVec16ub &new_value); void store_unaligned_vec16ub(const SSAVec16ub &new_value); + void store_masked_vec16ub(const SSAVec16ub &new_value, SSABool mask[4]); llvm::Value *v; }; diff --git a/tools/drawergen/ssa/ssa_vec4f_ptr.cpp b/tools/drawergen/ssa/ssa_vec4f_ptr.cpp index 8cdee930db..866331f840 100644 --- a/tools/drawergen/ssa/ssa_vec4f_ptr.cpp +++ b/tools/drawergen/ssa/ssa_vec4f_ptr.cpp @@ -68,6 +68,6 @@ void SSAVec4fPtr::store(const SSAVec4f &new_value) void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value) { - auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 1, false); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } From 06362385d6eed88f69735734082fd55e12355c99 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 1 Dec 2016 11:21:14 +0100 Subject: [PATCH 419/912] Improve performance by 30% from using a better stencil block rejection test --- .../fixedfunction/drawtrianglecodegen.cpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index e29e2a1223..772df0f7e2 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -236,10 +236,24 @@ void DrawTriangleCodegen::LoopBlockX() SSABool c01 = C3 + DX31 * y1 - DY31 * x0 > SSAInt(0); SSABool c11 = C3 + DX31 * y1 - DY31 * x1 > SSAInt(0); SSAInt c = (c00.zext_int() << 0) | (c10.zext_int() << 1) | (c01.zext_int() << 2) | (c11.zext_int() << 3); - + // Skip block when outside an edge + SSABool process_block = !(a == SSAInt(0) || b == SSAInt(0) || c == SSAInt(0)); + + SetStencilBlock(x / 8 + y / 8 * stencilPitch); + + // Stencil test the whole block, if possible + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector || variant == TriDrawVariant::StencilClose) + { + process_block = process_block && (!StencilIsSingleValue() || SSABool::compare_uge(StencilGetSingle(), stencilTestValue)); + } + else + { + process_block = process_block && (!StencilIsSingleValue() || StencilGetSingle() == stencilTestValue); + } + SSAIfBlock branch; - branch.if_block(!(a == SSAInt(0) || b == SSAInt(0) || c == SSAInt(0))); + branch.if_block(process_block); // Check if block needs clipping SSABool clipneeded = (x + q) > clipright || (y + q) > clipbottom; @@ -261,8 +275,6 @@ void DrawTriangleCodegen::LoopBlockX() currentlight = (!is_fixed_light).select(diminishedlight, light); } - SetStencilBlock(x / 8 + y / 8 * stencilPitch); - SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded && StencilIsSingleValue(); // Accept whole block when totally covered @@ -304,16 +316,6 @@ void DrawTriangleCodegen::SetupAffineBlock() void DrawTriangleCodegen::LoopFullBlock() { - SSAIfBlock branch_stenciltest; - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector || variant == TriDrawVariant::StencilClose) - { - branch_stenciltest.if_block(SSABool::compare_uge(StencilGetSingle(), stencilTestValue)); - } - else - { - branch_stenciltest.if_block(StencilGetSingle() == stencilTestValue); - } - if (variant == TriDrawVariant::Stencil) { StencilClear(stencilWriteValue); @@ -418,8 +420,6 @@ void DrawTriangleCodegen::LoopFullBlock() AffineVaryingPosY[i] = AffineVaryingPosY[i] + gradVaryingY[i]; } } - - branch_stenciltest.end_block(); } void DrawTriangleCodegen::LoopPartialBlock() From fc16f6bbbc13acb888c40043ccded712d4069f9a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 1 Dec 2016 13:31:42 +0100 Subject: [PATCH 420/912] Further improve early stencil rejection test by restoring stencil blocks to a single value if possible --- .../fixedfunction/drawtrianglecodegen.cpp | 126 ++++++++++++------ .../fixedfunction/drawtrianglecodegen.h | 6 +- tools/drawergen/ssa/ssa_bool.cpp | 5 +- tools/drawergen/ssa/ssa_bool.h | 2 +- 4 files changed, 94 insertions(+), 45 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 772df0f7e2..df86908214 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -285,7 +285,31 @@ void DrawTriangleCodegen::LoopBlockX() } branch_covered.else_block(); { - LoopPartialBlock(); + SSAIfBlock branch_covered_stencil; + branch_covered_stencil.if_block(StencilIsSingleValue()); + { + SSABool stenciltestpass; + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector || variant == TriDrawVariant::StencilClose) + { + stenciltestpass = SSABool::compare_uge(StencilGetSingle(), stencilTestValue); + } + else + { + stenciltestpass = StencilGetSingle() == stencilTestValue; + } + + SSAIfBlock branch_stenciltestpass; + branch_stenciltestpass.if_block(stenciltestpass); + { + LoopPartialBlock(true); + } + branch_stenciltestpass.end_block(); + } + branch_covered_stencil.else_block(); + { + LoopPartialBlock(false); + } + branch_covered_stencil.end_block(); } branch_covered.end_block(); @@ -422,10 +446,26 @@ void DrawTriangleCodegen::LoopFullBlock() } } -void DrawTriangleCodegen::LoopPartialBlock() +void DrawTriangleCodegen::LoopPartialBlock(bool isSingleStencilValue) { int pixelsize = truecolor ? 4 : 1; + if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) + { + if (isSingleStencilValue) + { + SSAInt stencilMask = StencilBlockMask.load(false); + SSAUByte val0 = stencilMask.trunc_ubyte(); + for (int i = 0; i < 8 * 8; i++) + StencilBlock[i].store(val0); + StencilBlockMask.store(SSAInt(0)); + } + + SSAUByte lastStencilValue = StencilBlock[0].load(false); + stack_stencilblock_restored.store(SSABool(true)); + stack_stencilblock_lastval.store(lastStencilValue); + } + stack_CY1.store(C1 + DX12 * y0 - DY12 * x0); stack_CY2.store(C2 + DX23 * y0 - DY23 * x0); stack_CY3.store(C3 + DX31 * y0 - DY31 * x0); @@ -461,6 +501,13 @@ void DrawTriangleCodegen::LoopPartialBlock() stack_ix.store(SSAInt(0)); SSAForBlock loopx; + SSABool stencilblock_restored; + SSAUByte lastStencilValue; + if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) + { + stencilblock_restored = stack_stencilblock_restored.load(); + lastStencilValue = stack_stencilblock_lastval.load(); + } SSAInt ix = stack_ix.load(); SSAInt CX1 = stack_CX1.load(); SSAInt CX2 = stack_CX2.load(); @@ -472,17 +519,26 @@ void DrawTriangleCodegen::LoopPartialBlock() SSABool visible = (ix + x < clipright) && (iy + y < clipbottom); SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible; - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + if (!isSingleStencilValue) { - covered = covered && SSABool::compare_uge(StencilGet(ix, iy), stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth; + SSAUByte stencilValue = StencilBlock[ix + iy * 8].load(false); + + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + { + covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth; + } + else if (variant == TriDrawVariant::StencilClose) + { + covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue); + } + else + { + covered = covered && stencilValue == stencilTestValue; + } } - else if (variant == TriDrawVariant::StencilClose) + else if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) { - covered = covered && SSABool::compare_uge(StencilGet(ix, iy), stencilTestValue); - } - else - { - covered = covered && StencilGet(ix, iy) == stencilTestValue; + covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth; } SSAIfBlock branch; @@ -490,11 +546,11 @@ void DrawTriangleCodegen::LoopPartialBlock() { if (variant == TriDrawVariant::Stencil) { - StencilSet(ix, iy, stencilWriteValue); + StencilBlock[ix + iy * 8].store(stencilWriteValue); } else if (variant == TriDrawVariant::StencilClose) { - StencilSet(ix, iy, stencilWriteValue); + StencilBlock[ix + iy * 8].store(stencilWriteValue); subsectorbuffer[ix].store(subsectorDepth); } else @@ -518,6 +574,13 @@ void DrawTriangleCodegen::LoopPartialBlock() } branch.end_block(); + if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) + { + SSAUByte newStencilValue = StencilBlock[ix + iy * 8].load(false); + stack_stencilblock_restored.store(stencilblock_restored && newStencilValue == lastStencilValue); + stack_stencilblock_lastval.store(newStencilValue); + } + for (int i = 0; i < TriVertex::NumVarying; i++) stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i] + AffineVaryingStepX[i]); @@ -539,6 +602,18 @@ void DrawTriangleCodegen::LoopPartialBlock() stack_iy.store(iy + 1); } loopy.end_block(); + + if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) + { + SSAIfBlock branch; + SSABool restored = stack_stencilblock_restored.load(); + branch.if_block(restored); + { + SSAUByte lastStencilValue = stack_stencilblock_lastval.load(); + StencilClear(lastStencilValue); + } + branch.end_block(); + } } #if 0 @@ -891,33 +966,6 @@ void DrawTriangleCodegen::SetStencilBlock(SSAInt block) StencilBlockMask = stencilMasks[block]; } -void DrawTriangleCodegen::StencilSet(SSAInt x, SSAInt y, SSAUByte value) -{ - SSAInt mask = StencilBlockMask.load(false); - - SSAIfBlock branchNeedsUpdate; - branchNeedsUpdate.if_block(!(mask == (SSAInt(0xffffff00) | value.zext_int()))); - - SSAIfBlock branchFirstSet; - branchFirstSet.if_block((mask & SSAInt(0xffffff00)) == SSAInt(0xffffff00)); - { - SSAUByte val0 = mask.trunc_ubyte(); - for (int i = 0; i < 8 * 8; i++) - StencilBlock[i].store(val0); - StencilBlockMask.store(SSAInt(0)); - } - branchFirstSet.end_block(); - - StencilBlock[x + y * 8].store(value); - - branchNeedsUpdate.end_block(); -} - -SSAUByte DrawTriangleCodegen::StencilGet(SSAInt x, SSAInt y) -{ - return StencilIsSingleValue().select(StencilBlockMask.load(false).trunc_ubyte(), StencilBlock[x + y * 8].load(false)); -} - SSAUByte DrawTriangleCodegen::StencilGetSingle() { return StencilBlockMask.load(false).trunc_ubyte(); diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index 31df77b954..0559efd236 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -44,7 +44,7 @@ private: void LoopBlockY(); void LoopBlockX(); void LoopFullBlock(); - void LoopPartialBlock(); + void LoopPartialBlock(bool isSingleStencilValue); void SetupAffineBlock(); SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying); @@ -59,9 +59,7 @@ private: SSAInt ToPal8(SSAVec4i c); void SetStencilBlock(SSAInt block); - void StencilSet(SSAInt x, SSAInt y, SSAUByte value); void StencilClear(SSAUByte value); - SSAUByte StencilGet(SSAInt x, SSAInt y); SSAUByte StencilGetSingle(); SSABool StencilIsSingleValue(); @@ -87,6 +85,8 @@ private: SSAStack stack_AffineW; SSAStack stack_AffineVaryingPosY[TriVertex::NumVarying]; SSAStack stack_AffineVaryingPosX[TriVertex::NumVarying]; + SSAStack stack_stencilblock_restored; + SSAStack stack_stencilblock_lastval; SSAUBytePtr dest; SSAInt pitch; diff --git a/tools/drawergen/ssa/ssa_bool.cpp b/tools/drawergen/ssa/ssa_bool.cpp index 38cec124fb..7ce99b827a 100644 --- a/tools/drawergen/ssa/ssa_bool.cpp +++ b/tools/drawergen/ssa/ssa_bool.cpp @@ -31,12 +31,13 @@ SSABool::SSABool() : v(0) { } -/* + SSABool::SSABool(bool constant) : v(0) { + v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, constant, false)); } -*/ + SSABool::SSABool(llvm::Value *v) : v(v) { diff --git a/tools/drawergen/ssa/ssa_bool.h b/tools/drawergen/ssa/ssa_bool.h index 9b8a564d85..728c7b7e8e 100644 --- a/tools/drawergen/ssa/ssa_bool.h +++ b/tools/drawergen/ssa/ssa_bool.h @@ -35,7 +35,7 @@ class SSABool { public: SSABool(); - //SSABool(bool constant); + explicit SSABool(bool constant); explicit SSABool(llvm::Value *v); static SSABool from_llvm(llvm::Value *v) { return SSABool(v); } static llvm::Type *llvm_type(); From 71350f2c17f011a722279130fff402606c728f39 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 2 Dec 2016 08:12:01 +0100 Subject: [PATCH 421/912] Sprite line clipping by subsector --- src/r_poly_portal.cpp | 91 ++++++++++++++++++++++++++++++------------- src/r_poly_portal.h | 44 ++++++++------------- src/r_poly_sprite.cpp | 57 ++++++++++++++++----------- src/r_poly_sprite.h | 1 + 4 files changed, 118 insertions(+), 75 deletions(-) diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 5c4dbc89fd..9637d739c3 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -62,9 +62,8 @@ void RenderPolyPortal::Render(int portalDepth) void RenderPolyPortal::ClearBuffers() { - SectorSpriteRanges.clear(); - SectorSpriteRanges.resize(numsectors); - SortedSprites.clear(); + SeenSectors.clear(); + SubsectorDepths.clear(); TranslucentObjects.clear(); SectorPortals.clear(); LinePortals.clear(); @@ -117,35 +116,61 @@ void RenderPolyPortal::RenderSubsector(subsector_t *sub) } } - SpriteRange sprites = GetSpritesForSector(sub->sector); - for (int i = 0; i < sprites.Count; i++) - { - AActor *thing = SortedSprites[sprites.Start + i].Thing; - TranslucentObjects.push_back({ thing, sub, subsectorDepth }); - } - - TranslucentObjects.insert(TranslucentObjects.end(), SubsectorTranslucentWalls.begin(), SubsectorTranslucentWalls.end()); - SubsectorTranslucentWalls.clear(); + SeenSectors.insert(sub->sector); + SubsectorDepths[sub] = subsectorDepth; } -SpriteRange RenderPolyPortal::GetSpritesForSector(sector_t *sector) +void RenderPolyPortal::RenderSprite(AActor *thing, double sortDistance, const DVector2 &left, const DVector2 &right) { - if ((int)SectorSpriteRanges.size() < sector->sectornum || sector->sectornum < 0) - return SpriteRange(); + if (numnodes == 0) + RenderSprite(thing, sortDistance, left, right, 0.0, 1.0, subsectors); + else + RenderSprite(thing, sortDistance, left, right, 0.0, 1.0, nodes + numnodes - 1); // The head node is the last node output. +} - auto &range = SectorSpriteRanges[sector->sectornum]; - if (range.Start == -1) +void RenderPolyPortal::RenderSprite(AActor *thing, double sortDistance, DVector2 left, DVector2 right, double t1, double t2, void *node) +{ + while (!((size_t)node & 1)) // Keep going until found a subsector { - range.Start = (int)SortedSprites.size(); - range.Count = 0; - for (AActor *thing = sector->thinglist; thing != nullptr; thing = thing->snext) + node_t *bsp = (node_t *)node; + + DVector2 planePos(FIXED2DBL(bsp->x), FIXED2DBL(bsp->y)); + DVector2 planeNormal = DVector2(FIXED2DBL(-bsp->dy), FIXED2DBL(bsp->dx)); + double planeD = planeNormal | planePos; + + int sideLeft = (left | planeNormal) > planeD; + int sideRight = (right | planeNormal) > planeD; + + if (sideLeft != sideRight) { - SortedSprites.push_back({ thing, (thing->Pos() - ViewPos).LengthSquared() }); - range.Count++; + double dotLeft = planeNormal | left; + double dotRight = planeNormal | right; + double t = (planeD - dotLeft) / (dotRight - dotLeft); + + DVector2 mid = left * (1.0 - t) + right * t; + double tmid = t1 * (1.0 - t) + t2 * t; + + if (sideLeft == 0) + { + RenderSprite(thing, sortDistance, mid, right, tmid, t2, bsp->children[sideRight]); + right = mid; + t2 = tmid; + } + else + { + RenderSprite(thing, sortDistance, left, mid, t1, tmid, bsp->children[sideLeft]); + left = mid; + t1 = tmid; + } } - std::stable_sort(SortedSprites.begin() + range.Start, SortedSprites.begin() + range.Start + range.Count); + node = bsp->children[sideLeft]; } - return range; + + subsector_t *sub = (subsector_t *)((BYTE *)node - 1); + + auto it = SubsectorDepths.find(sub); + if (it != SubsectorDepths.end()) + TranslucentObjects.push_back({ thing, sub, it->second, sortDistance, (float)t1, (float)t2 }); } void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth) @@ -178,12 +203,12 @@ void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *front if (!(fakeFloor->flags & FF_EXISTS)) continue; if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; if (!fakeFloor->model) continue; - RenderPolyWall::Render3DFloorLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, fakeFloor, SubsectorTranslucentWalls); + RenderPolyWall::Render3DFloorLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, fakeFloor, TranslucentObjects); } } // Render wall, and update culling info if its an occlusion blocker - if (RenderPolyWall::RenderLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, SubsectorTranslucentWalls, LinePortals)) + if (RenderPolyWall::RenderLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, TranslucentObjects, LinePortals)) { if (hasSegmentRange) Cull.MarkSegmentCulled(sx1, sx2); @@ -287,6 +312,20 @@ void RenderPolyPortal::RenderTranslucent(int portalDepth) } } + for (sector_t *sector : SeenSectors) + { + for (AActor *thing = sector->thinglist; thing != nullptr; thing = thing->snext) + { + DVector2 left, right; + if (!RenderPolySprite::GetLine(thing, left, right)) + continue; + double distanceSquared = (thing->Pos() - ViewPos).LengthSquared(); + RenderSprite(thing, distanceSquared, left, right); + } + } + + std::stable_sort(TranslucentObjects.begin(), TranslucentObjects.end()); + for (auto it = TranslucentObjects.rbegin(); it != TranslucentObjects.rend(); ++it) { auto &obj = *it; diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index e2b131b323..8623f86861 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -38,40 +38,31 @@ #include "r_poly_particle.h" #include "r_poly_plane.h" #include "r_poly_cull.h" - -// Used for sorting things by distance to the camera -class PolySortedSprite -{ -public: - PolySortedSprite(AActor *thing, double distanceSquared) : Thing(thing), DistanceSquared(distanceSquared) { } - bool operator<(const PolySortedSprite &other) const { return DistanceSquared < other.DistanceSquared; } - - AActor *Thing; - double DistanceSquared; -}; +#include +#include class PolyTranslucentObject { public: PolyTranslucentObject(particle_t *particle, subsector_t *sub, uint32_t subsectorDepth) : particle(particle), sub(sub), subsectorDepth(subsectorDepth) { } - PolyTranslucentObject(AActor *thing, subsector_t *sub, uint32_t subsectorDepth) : thing(thing), sub(sub), subsectorDepth(subsectorDepth) { } + PolyTranslucentObject(AActor *thing, subsector_t *sub, uint32_t subsectorDepth, double dist, float t1, float t2) : thing(thing), sub(sub), subsectorDepth(subsectorDepth), DistanceSquared(dist), SpriteLeft(t1), SpriteRight(t2) { } PolyTranslucentObject(RenderPolyWall wall) : wall(wall) { } + bool operator<(const PolyTranslucentObject &other) const + { + return subsectorDepth != other.subsectorDepth ? subsectorDepth < other.subsectorDepth : DistanceSquared < other.DistanceSquared; + } + particle_t *particle = nullptr; AActor *thing = nullptr; subsector_t *sub = nullptr; - uint32_t subsectorDepth = 0; RenderPolyWall wall; -}; - -class SpriteRange -{ -public: - SpriteRange() = default; - SpriteRange(int start, int count) : Start(start), Count(count) { } - int Start = -1; - int Count = 0; + + uint32_t subsectorDepth = 0; + double DistanceSquared = 1.e6; + + float SpriteLeft = 0.0f, SpriteRight = 1.0f; }; class PolyDrawSectorPortal; @@ -95,18 +86,17 @@ private: void RenderSectors(); void RenderSubsector(subsector_t *sub); void RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); - - SpriteRange GetSpritesForSector(sector_t *sector); + void RenderSprite(AActor *thing, double sortDistance, const DVector2 &left, const DVector2 &right); + void RenderSprite(AActor *thing, double sortDistance, DVector2 left, DVector2 right, double t1, double t2, void *node); TriMatrix WorldToClip; Vec4f PortalPlane; uint32_t StencilValue = 0; PolyCull Cull; uint32_t NextSubsectorDepth = 0; - std::vector SectorSpriteRanges; - std::vector SortedSprites; + std::set SeenSectors; + std::unordered_map SubsectorDepths; std::vector TranslucentObjects; - std::vector SubsectorTranslucentWalls; std::vector> SectorPortals; std::vector> LinePortals; diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 46c90adc26..5d4a9ba6cb 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -32,11 +32,43 @@ EXTERN_CVAR(Float, transsouls) EXTERN_CVAR(Int, r_drawfuzz) -void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) +bool RenderPolySprite::GetLine(AActor *thing, DVector2 &left, DVector2 &right) { if (IsThingCulled(thing)) - return; + return false; + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); + + bool flipTextureX = false; + FTexture *tex = GetSpriteTexture(thing, flipTextureX); + if (tex == nullptr) + return false; + + DVector2 spriteScale = thing->Scale; + double thingxscalemul = spriteScale.X / tex->Scale.X; + double thingyscalemul = spriteScale.Y / tex->Scale.Y; + + if (flipTextureX) + pos.X -= (tex->GetWidth() - tex->LeftOffset) * thingxscalemul; + else + pos.X -= tex->LeftOffset * thingxscalemul; + + double spriteHalfWidth = thingxscalemul * tex->GetWidth() * 0.5; + double spriteHeight = thingyscalemul * tex->GetHeight(); + + pos.X += spriteHalfWidth; + + left = DVector2(pos.X - ViewSin * spriteHalfWidth, pos.Y + ViewCos * spriteHalfWidth); + right = DVector2(pos.X + ViewSin * spriteHalfWidth, pos.Y - ViewCos * spriteHalfWidth); + return true; +} + +void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) +{ + DVector2 points[2]; + if (!GetLine(thing, points[0], points[1])) + return; + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); pos.Z += thing->GetBobOffset(r_TicFracF); @@ -44,6 +76,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla FTexture *tex = GetSpriteTexture(thing, flipTextureX); if (tex == nullptr) return; + DVector2 spriteScale = thing->Scale; double thingxscalemul = spriteScale.X / tex->Scale.X; double thingyscalemul = spriteScale.Y / tex->Scale.Y; @@ -61,26 +94,6 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla pos.X += spriteHalfWidth; - DVector2 points[2] = - { - { pos.X - ViewSin * spriteHalfWidth, pos.Y + ViewCos * spriteHalfWidth }, - { pos.X + ViewSin * spriteHalfWidth, pos.Y - ViewCos * spriteHalfWidth } - }; - - // Is this sprite inside? (To do: clip the points) - for (int i = 0; i < 2; i++) - { - for (uint32_t i = 0; i < sub->numlines; i++) - { - seg_t *line = &sub->firstline[i]; - double nx = line->v1->fY() - line->v2->fY(); - double ny = line->v2->fX() - line->v1->fX(); - double d = -(line->v1->fX() * nx + line->v1->fY() * ny); - if (pos.X * nx + pos.Y * ny + d > 0.0) - return; - } - } - //double depth = 1.0; //visstyle_t visstyle = GetSpriteVisStyle(thing, depth); // Rumor has it that AlterWeaponSprite needs to be called with visstyle passed in somewhere around here.. diff --git a/src/r_poly_sprite.h b/src/r_poly_sprite.h index 04b0fcb942..8ce9caffdd 100644 --- a/src/r_poly_sprite.h +++ b/src/r_poly_sprite.h @@ -31,6 +31,7 @@ class RenderPolySprite public: void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue); + static bool GetLine(AActor *thing, DVector2 &left, DVector2 &right); static bool IsThingCulled(AActor *thing); static FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); From 01008f0daa3c147f0d08c0b9d06021f7f7cfb779 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 2 Dec 2016 13:06:16 +0100 Subject: [PATCH 422/912] Fix sprite bsp clipping bug and wall sorting bug --- src/r_poly_portal.cpp | 2 +- src/r_poly_portal.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 9637d739c3..7d8c385dd0 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -158,7 +158,7 @@ void RenderPolyPortal::RenderSprite(AActor *thing, double sortDistance, DVector2 } else { - RenderSprite(thing, sortDistance, left, mid, t1, tmid, bsp->children[sideLeft]); + RenderSprite(thing, sortDistance, left, mid, t1, tmid, bsp->children[sideRight]); left = mid; t1 = tmid; } diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index 8623f86861..e983ebd46f 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -46,7 +46,7 @@ class PolyTranslucentObject public: PolyTranslucentObject(particle_t *particle, subsector_t *sub, uint32_t subsectorDepth) : particle(particle), sub(sub), subsectorDepth(subsectorDepth) { } PolyTranslucentObject(AActor *thing, subsector_t *sub, uint32_t subsectorDepth, double dist, float t1, float t2) : thing(thing), sub(sub), subsectorDepth(subsectorDepth), DistanceSquared(dist), SpriteLeft(t1), SpriteRight(t2) { } - PolyTranslucentObject(RenderPolyWall wall) : wall(wall) { } + PolyTranslucentObject(RenderPolyWall wall) : wall(wall), subsectorDepth(wall.SubsectorDepth), DistanceSquared(1.e6) { } bool operator<(const PolyTranslucentObject &other) const { @@ -60,7 +60,7 @@ public: RenderPolyWall wall; uint32_t subsectorDepth = 0; - double DistanceSquared = 1.e6; + double DistanceSquared = 0.0; float SpriteLeft = 0.0f, SpriteRight = 1.0f; }; From ce6e1e1e472e3b6cef3892007aa6f1170d0f9aec Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 2 Dec 2016 13:24:53 +0100 Subject: [PATCH 423/912] Fully hook up sprite clipping by subsector --- src/r_poly_portal.cpp | 17 ++++------------- src/r_poly_sprite.cpp | 20 +++++++++++++------- src/r_poly_sprite.h | 2 +- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 7d8c385dd0..1047164cb4 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -150,18 +150,9 @@ void RenderPolyPortal::RenderSprite(AActor *thing, double sortDistance, DVector2 DVector2 mid = left * (1.0 - t) + right * t; double tmid = t1 * (1.0 - t) + t2 * t; - if (sideLeft == 0) - { - RenderSprite(thing, sortDistance, mid, right, tmid, t2, bsp->children[sideRight]); - right = mid; - t2 = tmid; - } - else - { - RenderSprite(thing, sortDistance, left, mid, t1, tmid, bsp->children[sideRight]); - left = mid; - t1 = tmid; - } + RenderSprite(thing, sortDistance, mid, right, tmid, t2, bsp->children[sideRight]); + right = mid; + t2 = tmid; } node = bsp->children[sideLeft]; } @@ -346,7 +337,7 @@ void RenderPolyPortal::RenderTranslucent(int portalDepth) else { RenderPolySprite spr; - spr.Render(WorldToClip, PortalPlane, obj.thing, obj.sub, obj.subsectorDepth, StencilValue + 1); + spr.Render(WorldToClip, PortalPlane, obj.thing, obj.sub, obj.subsectorDepth, StencilValue + 1, obj.SpriteLeft, obj.SpriteRight); } } } diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 5d4a9ba6cb..c17bc5909f 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -63,10 +63,10 @@ bool RenderPolySprite::GetLine(AActor *thing, DVector2 &left, DVector2 &right) return true; } -void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) +void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, float t1, float t2) { - DVector2 points[2]; - if (!GetLine(thing, points[0], points[1])) + DVector2 line[2]; + if (!GetLine(thing, line[0], line[1])) return; DVector3 pos = thing->InterpolatedPosition(r_TicFracF); @@ -108,10 +108,16 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla std::pair offsets[4] = { - { 0.0f, 1.0f }, - { 1.0f, 1.0f }, - { 1.0f, 0.0f }, - { 0.0f, 0.0f }, + { t1, 1.0f }, + { t2, 1.0f }, + { t2, 0.0f }, + { t1, 0.0f }, + }; + + DVector2 points[2] = + { + line[0] * (1.0 - t1) + line[1] * t1, + line[0] * (1.0 - t2) + line[1] * t2 }; for (int i = 0; i < 4; i++) diff --git a/src/r_poly_sprite.h b/src/r_poly_sprite.h index 8ce9caffdd..085845e506 100644 --- a/src/r_poly_sprite.h +++ b/src/r_poly_sprite.h @@ -29,7 +29,7 @@ class Vec4f; class RenderPolySprite { public: - void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue); + void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, float t1, float t2); static bool GetLine(AActor *thing, DVector2 &left, DVector2 &right); static bool IsThingCulled(AActor *thing); From 04e981dba0382838b0af2b55366cd152e56d9d52 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 3 Dec 2016 05:02:34 +0100 Subject: [PATCH 424/912] Fix up/down culling issue --- src/r_poly_cull.cpp | 23 +++++++++++++++++------ src/r_poly_cull.h | 9 ++++++++- src/r_poly_portal.cpp | 8 ++++---- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/r_poly_cull.cpp b/src/r_poly_cull.cpp index 9ee71c28a8..a688c26444 100644 --- a/src/r_poly_cull.cpp +++ b/src/r_poly_cull.cpp @@ -93,7 +93,7 @@ void PolyCull::CullSubsector(subsector_t *sub) continue; int sx1, sx2; - if (GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2)) + if (GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2) == LineSegmentRange::HasSegment) { MarkSegmentCulled(sx1, sx2); } @@ -200,7 +200,8 @@ bool PolyCull::CheckBBox(float *bspcoord) float x2 = bspcoord[lines[i][2]]; float y2 = bspcoord[lines[i][3]]; int sx1, sx2; - if (GetSegmentRangeForLine(x1, y1, x2, y2, sx1, sx2)) + LineSegmentRange result = GetSegmentRangeForLine(x1, y1, x2, y2, sx1, sx2); + if (result == LineSegmentRange::HasSegment) { if (foundline) { @@ -214,6 +215,10 @@ bool PolyCull::CheckBBox(float *bspcoord) foundline = true; } } + else if (result == LineSegmentRange::AlwaysVisible) + { + return true; + } } if (!foundline) return false; @@ -221,13 +226,14 @@ bool PolyCull::CheckBBox(float *bspcoord) return !IsSegmentCulled(minsx1, maxsx2); } -bool PolyCull::GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const +LineSegmentRange PolyCull::GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const { double znear = 5.0; + double updownnear = -400.0; // Cull if entirely behind the portal clip plane (tbd: should we clip the segment?) if (Vec4f::dot(PortalClipPlane, Vec4f((float)x1, (float)y1, 0.0f, 1.0f)) < 0.0f && Vec4f::dot(PortalClipPlane, Vec4f((float)x2, (float)y2, 0.0f, 1.0f)) < 0.0f) - return false; + return LineSegmentRange::NotVisible; // Transform to 2D view space: x1 = x1 - ViewPos.X; @@ -239,8 +245,13 @@ bool PolyCull::GetSegmentRangeForLine(double x1, double y1, double x2, double y2 double ry1 = x1 * ViewCos + y1 * ViewSin; double ry2 = x2 * ViewCos + y2 * ViewSin; + // Is it potentially visible when looking straight up or down? + if (!(ry1 < updownnear && ry2 < updownnear) && !(ry1 > znear && ry2 > znear)) + return LineSegmentRange::AlwaysVisible; + // Cull if line is entirely behind view - if (ry1 < znear && ry2 < znear) return false; + if (ry1 < znear && ry2 < znear) + return LineSegmentRange::NotVisible; // Clip line, if needed double t1 = 0.0f, t2 = 1.0f; @@ -265,5 +276,5 @@ bool PolyCull::GetSegmentRangeForLine(double x1, double y1, double x2, double y2 if (sx1 > sx2) std::swap(sx1, sx2); - return sx1 != sx2; + return (sx1 != sx2) ? LineSegmentRange::HasSegment : LineSegmentRange::AlwaysVisible; } diff --git a/src/r_poly_cull.h b/src/r_poly_cull.h index f1fe56ed53..fe3cd9f5d3 100644 --- a/src/r_poly_cull.h +++ b/src/r_poly_cull.h @@ -25,12 +25,19 @@ #include "r_poly_triangle.h" #include "r_poly_intersection.h" +enum class LineSegmentRange +{ + NotVisible, + HasSegment, + AlwaysVisible +}; + class PolyCull { public: void CullScene(const TriMatrix &worldToClip, const Vec4f &portalClipPlane); - bool GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const; + LineSegmentRange GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const; void MarkSegmentCulled(int x1, int x2); bool IsSegmentCulled(int x1, int x2) const; diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 1047164cb4..b5553665c2 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -174,12 +174,12 @@ void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *front // Cull wall if not visible int sx1, sx2; - bool hasSegmentRange = Cull.GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2); - if (!hasSegmentRange || Cull.IsSegmentCulled(sx1, sx2)) + LineSegmentRange segmentRange = Cull.GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2); + if (segmentRange == LineSegmentRange::NotVisible || (segmentRange == LineSegmentRange::HasSegment && Cull.IsSegmentCulled(sx1, sx2))) return; // Tell automap we saw this - if (!swrenderer::r_dontmaplines && line->linedef) + if (!swrenderer::r_dontmaplines && line->linedef && segmentRange != LineSegmentRange::AlwaysVisible) { line->linedef->flags |= ML_MAPPED; sub->flags |= SSECF_DRAWN; @@ -201,7 +201,7 @@ void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *front // Render wall, and update culling info if its an occlusion blocker if (RenderPolyWall::RenderLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, TranslucentObjects, LinePortals)) { - if (hasSegmentRange) + if (segmentRange == LineSegmentRange::HasSegment) Cull.MarkSegmentCulled(sx1, sx2); } } From dbacb75617585c504443b15f89bde8ed727a9e96 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 3 Dec 2016 06:58:06 +0100 Subject: [PATCH 425/912] Capped sky rendering --- src/r_drawers.cpp | 16 ++++++++++ src/r_drawers.h | 5 +-- src/r_poly_sky.cpp | 27 ++++++---------- src/r_poly_sky.h | 3 +- .../fixedfunction/drawtrianglecodegen.cpp | 31 +++++++++++++++---- .../fixedfunction/drawtrianglecodegen.h | 2 ++ 6 files changed, 56 insertions(+), 28 deletions(-) diff --git a/src/r_drawers.cpp b/src/r_drawers.cpp index 66cbdd7501..acbe71de45 100644 --- a/src/r_drawers.cpp +++ b/src/r_drawers.cpp @@ -107,6 +107,7 @@ extern "C" void TriDrawNormal8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -121,6 +122,7 @@ extern "C" void TriDrawNormal32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawNormal32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawNormal32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -135,6 +137,7 @@ extern "C" void TriFillNormal8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -149,6 +152,7 @@ extern "C" void TriFillNormal32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillNormal32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillNormal32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -163,6 +167,7 @@ extern "C" void TriDrawSubsector8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -177,6 +182,7 @@ extern "C" void TriDrawSubsector32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDrawSubsector32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDrawSubsector32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -191,6 +197,7 @@ extern "C" void TriFillSubsector8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -205,6 +212,7 @@ extern "C" void TriFillSubsector32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFillSubsector32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFillSubsector32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriStencil_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriStencilClose_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); } @@ -289,6 +297,7 @@ Drawers::Drawers() TriDrawNormal8.push_back(TriDrawNormal8_11_SSE2); TriDrawNormal8.push_back(TriDrawNormal8_12_SSE2); TriDrawNormal8.push_back(TriDrawNormal8_13_SSE2); + TriDrawNormal8.push_back(TriDrawNormal8_14_SSE2); TriDrawNormal32.push_back(TriDrawNormal32_0_SSE2); TriDrawNormal32.push_back(TriDrawNormal32_1_SSE2); TriDrawNormal32.push_back(TriDrawNormal32_2_SSE2); @@ -303,6 +312,7 @@ Drawers::Drawers() TriDrawNormal32.push_back(TriDrawNormal32_11_SSE2); TriDrawNormal32.push_back(TriDrawNormal32_12_SSE2); TriDrawNormal32.push_back(TriDrawNormal32_13_SSE2); + TriDrawNormal32.push_back(TriDrawNormal32_14_SSE2); TriFillNormal8.push_back(TriFillNormal8_0_SSE2); TriFillNormal8.push_back(TriFillNormal8_1_SSE2); TriFillNormal8.push_back(TriFillNormal8_2_SSE2); @@ -317,6 +327,7 @@ Drawers::Drawers() TriFillNormal8.push_back(TriFillNormal8_11_SSE2); TriFillNormal8.push_back(TriFillNormal8_12_SSE2); TriFillNormal8.push_back(TriFillNormal8_13_SSE2); + TriFillNormal8.push_back(TriFillNormal8_14_SSE2); TriFillNormal32.push_back(TriFillNormal32_0_SSE2); TriFillNormal32.push_back(TriFillNormal32_1_SSE2); TriFillNormal32.push_back(TriFillNormal32_2_SSE2); @@ -331,6 +342,7 @@ Drawers::Drawers() TriFillNormal32.push_back(TriFillNormal32_11_SSE2); TriFillNormal32.push_back(TriFillNormal32_12_SSE2); TriFillNormal32.push_back(TriFillNormal32_13_SSE2); + TriFillNormal32.push_back(TriFillNormal32_14_SSE2); TriDrawSubsector8.push_back(TriDrawSubsector8_0_SSE2); TriDrawSubsector8.push_back(TriDrawSubsector8_1_SSE2); TriDrawSubsector8.push_back(TriDrawSubsector8_2_SSE2); @@ -345,6 +357,7 @@ Drawers::Drawers() TriDrawSubsector8.push_back(TriDrawSubsector8_11_SSE2); TriDrawSubsector8.push_back(TriDrawSubsector8_12_SSE2); TriDrawSubsector8.push_back(TriDrawSubsector8_13_SSE2); + TriDrawSubsector8.push_back(TriDrawSubsector8_14_SSE2); TriDrawSubsector32.push_back(TriDrawSubsector32_0_SSE2); TriDrawSubsector32.push_back(TriDrawSubsector32_1_SSE2); TriDrawSubsector32.push_back(TriDrawSubsector32_2_SSE2); @@ -359,6 +372,7 @@ Drawers::Drawers() TriDrawSubsector32.push_back(TriDrawSubsector32_11_SSE2); TriDrawSubsector32.push_back(TriDrawSubsector32_12_SSE2); TriDrawSubsector32.push_back(TriDrawSubsector32_13_SSE2); + TriDrawSubsector32.push_back(TriDrawSubsector32_14_SSE2); TriFillSubsector8.push_back(TriFillSubsector8_0_SSE2); TriFillSubsector8.push_back(TriFillSubsector8_1_SSE2); TriFillSubsector8.push_back(TriFillSubsector8_2_SSE2); @@ -373,6 +387,7 @@ Drawers::Drawers() TriFillSubsector8.push_back(TriFillSubsector8_11_SSE2); TriFillSubsector8.push_back(TriFillSubsector8_12_SSE2); TriFillSubsector8.push_back(TriFillSubsector8_13_SSE2); + TriFillSubsector8.push_back(TriFillSubsector8_14_SSE2); TriFillSubsector32.push_back(TriFillSubsector32_0_SSE2); TriFillSubsector32.push_back(TriFillSubsector32_1_SSE2); TriFillSubsector32.push_back(TriFillSubsector32_2_SSE2); @@ -387,6 +402,7 @@ Drawers::Drawers() TriFillSubsector32.push_back(TriFillSubsector32_11_SSE2); TriFillSubsector32.push_back(TriFillSubsector32_12_SSE2); TriFillSubsector32.push_back(TriFillSubsector32_13_SSE2); + TriFillSubsector32.push_back(TriFillSubsector32_14_SSE2); TriStencil = TriStencil_SSE2; TriStencilClose = TriStencilClose_SSE2; } diff --git a/src/r_drawers.h b/src/r_drawers.h index 47a145b5a5..ebe5d90029 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -254,10 +254,11 @@ enum class TriBlendMode TranslateAdd, // blend_add(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) TranslateSub, // blend_sub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) TranslateRevSub,// blend_revsub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) - AddSrcColorOneMinusSrcColor // glBlendMode(GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR) used by GZDoom's fullbright additive sprites + AddSrcColorOneMinusSrcColor, // glBlendMode(GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR) used by GZDoom's fullbright additive sprites + Skycap // Fade to sky color when the V texture coordinate go beyond the [-1, 1] range }; -inline int NumTriBlendModes() { return (int)TriBlendMode::AddSrcColorOneMinusSrcColor + 1; } +inline int NumTriBlendModes() { return (int)TriBlendMode::Skycap + 1; } class Drawers { diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index a19e0d40b3..2331548b31 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -66,20 +66,24 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) RenderCapColorRow(args, frontskytex, 0, false); RenderCapColorRow(args, frontskytex, rc, true); + uint32_t topcapcolor = frontskytex->GetSkyCapColor(false); + uint32_t bottomcapcolor = frontskytex->GetSkyCapColor(true); + for (int i = 1; i <= mRows; i++) { - RenderRow(args, i); - RenderRow(args, rc + i); + RenderRow(args, i, topcapcolor); + RenderRow(args, rc + i, bottomcapcolor); } } -void PolySkyDome::RenderRow(PolyDrawArgs &args, int row) +void PolySkyDome::RenderRow(PolyDrawArgs &args, int row, uint32_t capcolor) { args.vinput = &mVertices[mPrimStart[row]]; args.vcount = mPrimStart[row + 1] - mPrimStart[row]; args.mode = TriangleDrawMode::Strip; args.ccw = false; - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); + args.uniforms.color = capcolor; + PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Skycap); } void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap) @@ -129,18 +133,6 @@ void PolySkyDome::CreateSkyHemisphere(bool zflip) } } -TriVertex PolySkyDome::SetVertex(float xx, float yy, float zz, float uu, float vv) -{ - TriVertex v; - v.x = xx; - v.y = yy; - v.z = zz; - v.w = 1.0f; - v.varying[0] = uu; - v.varying[1] = vv; - return v; -} - TriVertex PolySkyDome::SetVertexXYZ(float xx, float yy, float zz, float uu, float vv) { TriVertex v; @@ -166,7 +158,6 @@ void PolySkyDome::SkyVertex(int r, int c, bool zflip) float z = (!zflip) ? scale * height : -scale * height; float u, v; - //uint32_t color = r == 0 ? 0xffffff : 0xffffffff; // And the texture coordinates. if (!zflip) // Flipped Y is for the lower hemisphere. @@ -184,6 +175,6 @@ void PolySkyDome::SkyVertex(int r, int c, bool zflip) // And finally the vertex. TriVertex vert; - vert = SetVertexXYZ(-pos.X, z - 1.f, pos.Y, u * 4.0f, v * 1.2f + 0.5f/*, color*/); + vert = SetVertexXYZ(-pos.X, z - 1.f, pos.Y, u * 4.0f, v * 1.2f - 0.5f); mVertices.Push(vert); } diff --git a/src/r_poly_sky.h b/src/r_poly_sky.h index 95ec27ed65..1f5a655b9b 100644 --- a/src/r_poly_sky.h +++ b/src/r_poly_sky.h @@ -38,9 +38,8 @@ private: void SkyVertex(int r, int c, bool yflip); void CreateSkyHemisphere(bool zflip); void CreateDome(); - void RenderRow(PolyDrawArgs &args, int row); + void RenderRow(PolyDrawArgs &args, int row, uint32_t capcolor); void RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap); - TriVertex SetVertex(float xx, float yy, float zz, float uu = 0, float vv = 0); TriVertex SetVertexXYZ(float xx, float yy, float zz, float uu = 0, float vv = 0); }; diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index df86908214..75acffac9e 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -333,8 +333,6 @@ void DrawTriangleCodegen::SetupAffineBlock() { AffineVaryingPosX[i] = SSAInt(AffineVaryingPosY[i] * rcpW0, false); AffineVaryingStepX[i] = (SSAInt((AffineVaryingPosY[i] + gradVaryingX[i]) * rcpW1, false) - AffineVaryingPosX[i]) / q; - AffineVaryingPosX[i] = AffineVaryingPosX[i] << 8; - AffineVaryingStepX[i] = AffineVaryingStepX[i] << 8; } } @@ -780,8 +778,8 @@ SSAInt DrawTriangleCodegen::Shade8(SSAInt c) SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) { - SSAInt ufrac = varying[0]; - SSAInt vfrac = varying[1]; + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -848,6 +846,10 @@ SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) fg = Sample32(uvoffset); output = blend_add_srccolor_oneminussrccolor(shade_bgra_simple(fg, currentlight), bg); break; + case TriBlendMode::Skycap: + fg = Sample32(uvoffset); + output = FadeOut(varying[1], fg); + break; } return output; @@ -867,8 +869,8 @@ SSAInt DrawTriangleCodegen::ToPal8(SSAVec4i c) SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) { - SSAInt ufrac = varying[0]; - SSAInt vfrac = varying[1]; + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; @@ -955,11 +957,28 @@ SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) output = ToPal8(blend_add_srccolor_oneminussrccolor(fg, ToBgra(bg))); output = (palindex == SSAInt(0)).select(bg, output); break; + case TriBlendMode::Skycap: + fg = ToBgra(Sample8(uvoffset)); + output = ToPal8(FadeOut(varying[1], fg)); + break; } return output; } +SSAVec4i DrawTriangleCodegen::FadeOut(SSAInt frac, SSAVec4i fg) +{ + int start_fade = 2; // How fast it should fade out + + SSAInt alpha_top = SSAInt::MAX(SSAInt::MIN(frac.ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); + SSAInt alpha_bottom = SSAInt::MAX(SSAInt::MIN(((2 << 24) - frac).ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); + SSAInt alpha = SSAInt::MIN(alpha_top, alpha_bottom); + SSAInt inv_alpha = 256 - alpha; + + fg = (fg * alpha + SSAVec4i::unpack(color) * inv_alpha) / 256; + return fg.insert(3, 255); +} + void DrawTriangleCodegen::SetStencilBlock(SSAInt block) { StencilBlock = stencilValues[block * 64]; diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index 0559efd236..1f22cf0a8f 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -58,6 +58,8 @@ private: SSAVec4i ToBgra(SSAInt index); SSAInt ToPal8(SSAVec4i c); + SSAVec4i FadeOut(SSAInt frac, SSAVec4i color); + void SetStencilBlock(SSAInt block); void StencilClear(SSAUByte value); SSAUByte StencilGetSingle(); From fc2ae9ecc3e30ae3356abcf5b053202f3a0b4663 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 3 Dec 2016 02:18:29 -0500 Subject: [PATCH 426/912] Delete DelLLVMCache.cmd This file is now vestigial and no longer needed. --- tools/DelLLVMCache.cmd | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 tools/DelLLVMCache.cmd diff --git a/tools/DelLLVMCache.cmd b/tools/DelLLVMCache.cmd deleted file mode 100644 index d7c3b1525e..0000000000 --- a/tools/DelLLVMCache.cmd +++ /dev/null @@ -1,9 +0,0 @@ -@echo off -if not exist %localappdata%\zdoom\cache\llvm* goto :eof -echo QZDoom's LLVM drawers may take some time to create at startup. Because of this, -echo the program uses a cache to temporarily store bitcode for faster startups. If -echo this cache is ever corrupted, this program has been created to solve the -echo problem. -echo. -echo Are you SURE you wish to destroy the cache? -del /p %localappdata%\zdoom\cache\llvm* From e09c7d239f57b8bebdc4a5f9046bfcb26ffac7cc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 3 Dec 2016 14:58:56 +0100 Subject: [PATCH 427/912] Adjust class naming to closer match the GZDoom renderer and split portal rendering part to its own file --- src/CMakeLists.txt | 1 + src/r_poly.cpp | 16 +- src/r_poly.h | 7 +- src/r_poly_plane.cpp | 2 +- src/r_poly_portal.cpp | 320 +-------------------------------------- src/r_poly_portal.h | 84 +---------- src/r_poly_scene.cpp | 341 ++++++++++++++++++++++++++++++++++++++++++ src/r_poly_scene.h | 103 +++++++++++++ src/r_poly_sky.cpp | 2 +- src/r_swrenderer.cpp | 8 +- 10 files changed, 470 insertions(+), 414 deletions(-) create mode 100644 src/r_poly_scene.cpp create mode 100644 src/r_poly_scene.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 73a3074bb5..ea07dec7b4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1030,6 +1030,7 @@ set( NOT_COMPILED_SOURCE_FILES set( FASTMATH_PCH_SOURCES r_swrenderer.cpp r_poly.cpp + r_poly_scene.cpp r_poly_portal.cpp r_poly_cull.cpp r_poly_decal.cpp diff --git a/src/r_poly.cpp b/src/r_poly.cpp index cadf92adf6..4ea047925e 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -37,13 +37,13 @@ extern bool r_showviewer; ///////////////////////////////////////////////////////////////////////////// -RenderPolyScene *RenderPolyScene::Instance() +PolyRenderer *PolyRenderer::Instance() { - static RenderPolyScene scene; + static PolyRenderer scene; return &scene; } -void RenderPolyScene::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) +void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) { const bool savedviewactive = viewactive; const bool savedoutputformat = swrenderer::r_swtruecolor; @@ -70,7 +70,7 @@ void RenderPolyScene::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, swrenderer::r_swtruecolor = savedoutputformat; } -void RenderPolyScene::RenderActorView(AActor *actor, bool dontmaplines) +void PolyRenderer::RenderActorView(AActor *actor, bool dontmaplines) { NetUpdate(); @@ -106,12 +106,12 @@ void RenderPolyScene::RenderActorView(AActor *actor, bool dontmaplines) NetUpdate(); } -void RenderPolyScene::RenderRemainingPlayerSprites() +void PolyRenderer::RenderRemainingPlayerSprites() { PlayerSprites.RenderRemainingSprites(); } -void RenderPolyScene::ClearBuffers() +void PolyRenderer::ClearBuffers() { PolyVertexBuffer::Clear(); PolyStencilBuffer::Instance()->Clear(RenderTarget->GetWidth(), RenderTarget->GetHeight(), 0); @@ -119,7 +119,7 @@ void RenderPolyScene::ClearBuffers() NextStencilValue = 0; } -void RenderPolyScene::SetSceneViewport() +void PolyRenderer::SetSceneViewport() { if (RenderTarget == screen) // Rendering to screen { @@ -138,7 +138,7 @@ void RenderPolyScene::SetSceneViewport() } } -void RenderPolyScene::SetupPerspectiveMatrix() +void PolyRenderer::SetupPerspectiveMatrix() { static bool bDidSetup = false; diff --git a/src/r_poly.h b/src/r_poly.h index d67e395708..bc108a8a13 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -36,15 +36,14 @@ class AActor; class DCanvas; -// Renders a scene -class RenderPolyScene +class PolyRenderer { public: void RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines); void RenderActorView(AActor *actor, bool dontmaplines); void RenderRemainingPlayerSprites(); - static RenderPolyScene *Instance(); + static PolyRenderer *Instance(); uint32_t GetNextStencilValue() { uint32_t value = NextStencilValue; NextStencilValue += 2; return value; } @@ -54,7 +53,7 @@ private: void SetupPerspectiveMatrix(); TriMatrix WorldToClip; - RenderPolyPortal MainPortal; + RenderPolyScene MainPortal; PolySkyDome Skydome; RenderPolyPlayerSprites PlayerSprites; uint32_t NextStencilValue = 0; diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 41898fa8b8..24b00af000 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -254,7 +254,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan if (swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) args.uniforms.light = 256; args.uniforms.flags = 0; - args.uniforms.subsectorDepth = isSky ? RenderPolyPortal::SkySubsectorDepth : subsectorDepth; + args.uniforms.subsectorDepth = isSky ? RenderPolyScene::SkySubsectorDepth : subsectorDepth; TriVertex *vertices = PolyVertexBuffer::GetVertices(sub->numlines); if (!vertices) diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index b5553665c2..1bb6007795 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -30,323 +30,13 @@ #include "r_poly.h" #include "gl/data/gl_data.h" -CVAR(Bool, r_debug_cull, 0, 0) -EXTERN_CVAR(Int, r_portal_recursions) - extern bool r_showviewer; ///////////////////////////////////////////////////////////////////////////// -RenderPolyPortal::RenderPolyPortal() -{ -} - -RenderPolyPortal::~RenderPolyPortal() -{ -} - -void RenderPolyPortal::SetViewpoint(const TriMatrix &worldToClip, const Vec4f &portalPlane, uint32_t stencilValue) -{ - WorldToClip = worldToClip; - StencilValue = stencilValue; - PortalPlane = portalPlane; -} - -void RenderPolyPortal::Render(int portalDepth) -{ - ClearBuffers(); - Cull.CullScene(WorldToClip, PortalPlane); - RenderSectors(); - RenderPortals(portalDepth); -} - -void RenderPolyPortal::ClearBuffers() -{ - SeenSectors.clear(); - SubsectorDepths.clear(); - TranslucentObjects.clear(); - SectorPortals.clear(); - LinePortals.clear(); - NextSubsectorDepth = 0; -} - -void RenderPolyPortal::RenderSectors() -{ - if (r_debug_cull) - { - for (auto it = Cull.PvsSectors.rbegin(); it != Cull.PvsSectors.rend(); ++it) - RenderSubsector(*it); - } - else - { - for (auto it = Cull.PvsSectors.begin(); it != Cull.PvsSectors.end(); ++it) - RenderSubsector(*it); - } -} - -void RenderPolyPortal::RenderSubsector(subsector_t *sub) -{ - sector_t *frontsector = sub->sector; - frontsector->MoreFlags |= SECF_DRAWN; - - uint32_t subsectorDepth = NextSubsectorDepth++; - - if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) - { - RenderPolyPlane::RenderPlanes(WorldToClip, PortalPlane, sub, subsectorDepth, StencilValue, Cull.MaxCeilingHeight, Cull.MinFloorHeight, SectorPortals); - } - - for (uint32_t i = 0; i < sub->numlines; i++) - { - seg_t *line = &sub->firstline[i]; - if (line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) - { - RenderLine(sub, line, frontsector, subsectorDepth); - } - } - - bool mainBSP = ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors); - if (mainBSP) - { - int subsectorIndex = (int)(sub - subsectors); - for (int i = ParticlesInSubsec[subsectorIndex]; i != NO_PARTICLE; i = Particles[i].snext) - { - particle_t *particle = Particles + i; - TranslucentObjects.push_back({ particle, sub, subsectorDepth }); - } - } - - SeenSectors.insert(sub->sector); - SubsectorDepths[sub] = subsectorDepth; -} - -void RenderPolyPortal::RenderSprite(AActor *thing, double sortDistance, const DVector2 &left, const DVector2 &right) -{ - if (numnodes == 0) - RenderSprite(thing, sortDistance, left, right, 0.0, 1.0, subsectors); - else - RenderSprite(thing, sortDistance, left, right, 0.0, 1.0, nodes + numnodes - 1); // The head node is the last node output. -} - -void RenderPolyPortal::RenderSprite(AActor *thing, double sortDistance, DVector2 left, DVector2 right, double t1, double t2, void *node) -{ - while (!((size_t)node & 1)) // Keep going until found a subsector - { - node_t *bsp = (node_t *)node; - - DVector2 planePos(FIXED2DBL(bsp->x), FIXED2DBL(bsp->y)); - DVector2 planeNormal = DVector2(FIXED2DBL(-bsp->dy), FIXED2DBL(bsp->dx)); - double planeD = planeNormal | planePos; - - int sideLeft = (left | planeNormal) > planeD; - int sideRight = (right | planeNormal) > planeD; - - if (sideLeft != sideRight) - { - double dotLeft = planeNormal | left; - double dotRight = planeNormal | right; - double t = (planeD - dotLeft) / (dotRight - dotLeft); - - DVector2 mid = left * (1.0 - t) + right * t; - double tmid = t1 * (1.0 - t) + t2 * t; - - RenderSprite(thing, sortDistance, mid, right, tmid, t2, bsp->children[sideRight]); - right = mid; - t2 = tmid; - } - node = bsp->children[sideLeft]; - } - - subsector_t *sub = (subsector_t *)((BYTE *)node - 1); - - auto it = SubsectorDepths.find(sub); - if (it != SubsectorDepths.end()) - TranslucentObjects.push_back({ thing, sub, it->second, sortDistance, (float)t1, (float)t2 }); -} - -void RenderPolyPortal::RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth) -{ - // Reject lines not facing viewer - DVector2 pt1 = line->v1->fPos() - ViewPos; - DVector2 pt2 = line->v2->fPos() - ViewPos; - if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) - return; - - // Cull wall if not visible - int sx1, sx2; - LineSegmentRange segmentRange = Cull.GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2); - if (segmentRange == LineSegmentRange::NotVisible || (segmentRange == LineSegmentRange::HasSegment && Cull.IsSegmentCulled(sx1, sx2))) - return; - - // Tell automap we saw this - if (!swrenderer::r_dontmaplines && line->linedef && segmentRange != LineSegmentRange::AlwaysVisible) - { - line->linedef->flags |= ML_MAPPED; - sub->flags |= SSECF_DRAWN; - } - - // Render 3D floor sides - if (line->backsector && frontsector->e && line->backsector->e->XFloor.ffloors.Size()) - { - for (unsigned int i = 0; i < line->backsector->e->XFloor.ffloors.Size(); i++) - { - F3DFloor *fakeFloor = line->backsector->e->XFloor.ffloors[i]; - if (!(fakeFloor->flags & FF_EXISTS)) continue; - if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; - if (!fakeFloor->model) continue; - RenderPolyWall::Render3DFloorLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, fakeFloor, TranslucentObjects); - } - } - - // Render wall, and update culling info if its an occlusion blocker - if (RenderPolyWall::RenderLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, TranslucentObjects, LinePortals)) - { - if (segmentRange == LineSegmentRange::HasSegment) - Cull.MarkSegmentCulled(sx1, sx2); - } -} - -void RenderPolyPortal::RenderPortals(int portalDepth) -{ - if (portalDepth < r_portal_recursions) - { - for (auto &portal : SectorPortals) - portal->Render(portalDepth + 1); - - for (auto &portal : LinePortals) - portal->Render(portalDepth + 1); - } - else // Fill with black - { - PolyDrawArgs args; - args.objectToClip = &WorldToClip; - args.mode = TriangleDrawMode::Fan; - args.uniforms.color = 0; - args.uniforms.light = 256; - args.uniforms.flags = TriUniforms::fixed_light; - args.SetClipPlane(PortalPlane.x, PortalPlane.y, PortalPlane.z, PortalPlane.w); - - for (auto &portal : SectorPortals) - { - args.stenciltestvalue = portal->StencilValue; - args.stencilwritevalue = portal->StencilValue + 1; - for (const auto &verts : portal->Shape) - { - args.vinput = verts.Vertices; - args.vcount = verts.Count; - args.ccw = verts.Ccw; - args.uniforms.subsectorDepth = verts.SubsectorDepth; - PolyTriangleDrawer::draw(args, TriDrawVariant::FillNormal, TriBlendMode::Copy); - } - } - - for (auto &portal : LinePortals) - { - args.stenciltestvalue = portal->StencilValue; - args.stencilwritevalue = portal->StencilValue + 1; - for (const auto &verts : portal->Shape) - { - args.vinput = verts.Vertices; - args.vcount = verts.Count; - args.ccw = verts.Ccw; - args.uniforms.subsectorDepth = verts.SubsectorDepth; - PolyTriangleDrawer::draw(args, TriDrawVariant::FillNormal, TriBlendMode::Copy); - } - } - } -} - -void RenderPolyPortal::RenderTranslucent(int portalDepth) -{ - if (portalDepth < r_portal_recursions) - { - for (auto it = SectorPortals.rbegin(); it != SectorPortals.rend(); ++it) - { - auto &portal = *it; - portal->RenderTranslucent(portalDepth + 1); - - PolyDrawArgs args; - args.objectToClip = &WorldToClip; - args.mode = TriangleDrawMode::Fan; - args.stenciltestvalue = portal->StencilValue + 1; - args.stencilwritevalue = StencilValue; - args.SetClipPlane(PortalPlane.x, PortalPlane.y, PortalPlane.z, PortalPlane.w); - for (const auto &verts : portal->Shape) - { - args.vinput = verts.Vertices; - args.vcount = verts.Count; - args.ccw = verts.Ccw; - args.uniforms.subsectorDepth = verts.SubsectorDepth; - PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); - } - } - - for (auto it = LinePortals.rbegin(); it != LinePortals.rend(); ++it) - { - auto &portal = *it; - portal->RenderTranslucent(portalDepth + 1); - - PolyDrawArgs args; - args.objectToClip = &WorldToClip; - args.mode = TriangleDrawMode::Fan; - args.stenciltestvalue = portal->StencilValue + 1; - args.stencilwritevalue = StencilValue; - args.SetClipPlane(PortalPlane.x, PortalPlane.y, PortalPlane.z, PortalPlane.w); - for (const auto &verts : portal->Shape) - { - args.vinput = verts.Vertices; - args.vcount = verts.Count; - args.ccw = verts.Ccw; - args.uniforms.subsectorDepth = verts.SubsectorDepth; - PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); - } - } - } - - for (sector_t *sector : SeenSectors) - { - for (AActor *thing = sector->thinglist; thing != nullptr; thing = thing->snext) - { - DVector2 left, right; - if (!RenderPolySprite::GetLine(thing, left, right)) - continue; - double distanceSquared = (thing->Pos() - ViewPos).LengthSquared(); - RenderSprite(thing, distanceSquared, left, right); - } - } - - std::stable_sort(TranslucentObjects.begin(), TranslucentObjects.end()); - - for (auto it = TranslucentObjects.rbegin(); it != TranslucentObjects.rend(); ++it) - { - auto &obj = *it; - if (obj.particle) - { - RenderPolyParticle spr; - spr.Render(WorldToClip, PortalPlane, obj.particle, obj.sub, obj.subsectorDepth, StencilValue + 1); - } - else if (!obj.thing) - { - obj.wall.Render(WorldToClip, PortalPlane); - } - else if ((obj.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) - { - RenderPolyWallSprite wallspr; - wallspr.Render(WorldToClip, PortalPlane, obj.thing, obj.sub, obj.subsectorDepth, StencilValue + 1); - } - else - { - RenderPolySprite spr; - spr.Render(WorldToClip, PortalPlane, obj.thing, obj.sub, obj.subsectorDepth, StencilValue + 1, obj.SpriteLeft, obj.SpriteRight); - } - } -} - -///////////////////////////////////////////////////////////////////////////// - PolyDrawSectorPortal::PolyDrawSectorPortal(FSectorPortal *portal, bool ceiling) : Portal(portal), Ceiling(ceiling) { - StencilValue = RenderPolyScene::Instance()->GetNextStencilValue(); + StencilValue = PolyRenderer::Instance()->GetNextStencilValue(); } void PolyDrawSectorPortal::Render(int portalDepth) @@ -356,7 +46,7 @@ void PolyDrawSectorPortal::Render(int portalDepth) SaveGlobals(); - // To do: get this information from RenderPolyScene instead of duplicating the code.. + // To do: get this information from PolyRenderer instead of duplicating the code.. double radPitch = ViewPitch.Normalized180().Radians(); double angx = cos(radPitch); double angy = sin(radPitch) * glset.pixelstretch; @@ -444,19 +134,19 @@ void PolyDrawSectorPortal::RestoreGlobals() PolyDrawLinePortal::PolyDrawLinePortal(FLinePortal *portal) : Portal(portal) { - StencilValue = RenderPolyScene::Instance()->GetNextStencilValue(); + StencilValue = PolyRenderer::Instance()->GetNextStencilValue(); } PolyDrawLinePortal::PolyDrawLinePortal(line_t *mirror) : Mirror(mirror) { - StencilValue = RenderPolyScene::Instance()->GetNextStencilValue(); + StencilValue = PolyRenderer::Instance()->GetNextStencilValue(); } void PolyDrawLinePortal::Render(int portalDepth) { SaveGlobals(); - // To do: get this information from RenderPolyScene instead of duplicating the code.. + // To do: get this information from PolyRenderer instead of duplicating the code.. double radPitch = ViewPitch.Normalized180().Radians(); double angx = cos(radPitch); double angy = sin(radPitch) * glset.pixelstretch; diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index e983ebd46f..7835bb042f 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -22,85 +22,7 @@ #pragma once -#include -#include -#include -#include -#include "doomdata.h" -#include "r_utility.h" -#include "r_main.h" -#include "r_poly_triangle.h" -#include "r_poly_intersection.h" -#include "r_poly_wall.h" -#include "r_poly_sprite.h" -#include "r_poly_wallsprite.h" -#include "r_poly_playersprite.h" -#include "r_poly_particle.h" -#include "r_poly_plane.h" -#include "r_poly_cull.h" -#include -#include - -class PolyTranslucentObject -{ -public: - PolyTranslucentObject(particle_t *particle, subsector_t *sub, uint32_t subsectorDepth) : particle(particle), sub(sub), subsectorDepth(subsectorDepth) { } - PolyTranslucentObject(AActor *thing, subsector_t *sub, uint32_t subsectorDepth, double dist, float t1, float t2) : thing(thing), sub(sub), subsectorDepth(subsectorDepth), DistanceSquared(dist), SpriteLeft(t1), SpriteRight(t2) { } - PolyTranslucentObject(RenderPolyWall wall) : wall(wall), subsectorDepth(wall.SubsectorDepth), DistanceSquared(1.e6) { } - - bool operator<(const PolyTranslucentObject &other) const - { - return subsectorDepth != other.subsectorDepth ? subsectorDepth < other.subsectorDepth : DistanceSquared < other.DistanceSquared; - } - - particle_t *particle = nullptr; - AActor *thing = nullptr; - subsector_t *sub = nullptr; - - RenderPolyWall wall; - - uint32_t subsectorDepth = 0; - double DistanceSquared = 0.0; - - float SpriteLeft = 0.0f, SpriteRight = 1.0f; -}; - -class PolyDrawSectorPortal; -class PolyDrawLinePortal; - -// Renders everything from a specific viewpoint -class RenderPolyPortal -{ -public: - RenderPolyPortal(); - ~RenderPolyPortal(); - void SetViewpoint(const TriMatrix &worldToClip, const Vec4f &portalPlane, uint32_t stencilValue); - void Render(int portalDepth); - void RenderTranslucent(int portalDepth); - - static const uint32_t SkySubsectorDepth = 0x7fffffff; - -private: - void ClearBuffers(); - void RenderPortals(int portalDepth); - void RenderSectors(); - void RenderSubsector(subsector_t *sub); - void RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); - void RenderSprite(AActor *thing, double sortDistance, const DVector2 &left, const DVector2 &right); - void RenderSprite(AActor *thing, double sortDistance, DVector2 left, DVector2 right, double t1, double t2, void *node); - - TriMatrix WorldToClip; - Vec4f PortalPlane; - uint32_t StencilValue = 0; - PolyCull Cull; - uint32_t NextSubsectorDepth = 0; - std::set SeenSectors; - std::unordered_map SubsectorDepths; - std::vector TranslucentObjects; - - std::vector> SectorPortals; - std::vector> LinePortals; -}; +#include "r_poly_scene.h" struct PolyPortalVertexRange { @@ -128,7 +50,7 @@ private: void RestoreGlobals(); bool Ceiling; - RenderPolyPortal RenderPortal; + RenderPolyScene RenderPortal; int savedextralight; DVector3 savedpos; @@ -156,7 +78,7 @@ private: void SaveGlobals(); void RestoreGlobals(); - RenderPolyPortal RenderPortal; + RenderPolyScene RenderPortal; int savedextralight; DVector3 savedpos; diff --git a/src/r_poly_scene.cpp b/src/r_poly_scene.cpp new file mode 100644 index 0000000000..7304000a67 --- /dev/null +++ b/src/r_poly_scene.cpp @@ -0,0 +1,341 @@ +/* +** Polygon Doom software renderer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "p_maputl.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_poly_scene.h" +#include "r_poly.h" +#include "gl/data/gl_data.h" + +CVAR(Bool, r_debug_cull, 0, 0) +EXTERN_CVAR(Int, r_portal_recursions) + +///////////////////////////////////////////////////////////////////////////// + +RenderPolyScene::RenderPolyScene() +{ +} + +RenderPolyScene::~RenderPolyScene() +{ +} + +void RenderPolyScene::SetViewpoint(const TriMatrix &worldToClip, const Vec4f &portalPlane, uint32_t stencilValue) +{ + WorldToClip = worldToClip; + StencilValue = stencilValue; + PortalPlane = portalPlane; +} + +void RenderPolyScene::Render(int portalDepth) +{ + ClearBuffers(); + Cull.CullScene(WorldToClip, PortalPlane); + RenderSectors(); + RenderPortals(portalDepth); +} + +void RenderPolyScene::ClearBuffers() +{ + SeenSectors.clear(); + SubsectorDepths.clear(); + TranslucentObjects.clear(); + SectorPortals.clear(); + LinePortals.clear(); + NextSubsectorDepth = 0; +} + +void RenderPolyScene::RenderSectors() +{ + if (r_debug_cull) + { + for (auto it = Cull.PvsSectors.rbegin(); it != Cull.PvsSectors.rend(); ++it) + RenderSubsector(*it); + } + else + { + for (auto it = Cull.PvsSectors.begin(); it != Cull.PvsSectors.end(); ++it) + RenderSubsector(*it); + } +} + +void RenderPolyScene::RenderSubsector(subsector_t *sub) +{ + sector_t *frontsector = sub->sector; + frontsector->MoreFlags |= SECF_DRAWN; + + uint32_t subsectorDepth = NextSubsectorDepth++; + + if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) + { + RenderPolyPlane::RenderPlanes(WorldToClip, PortalPlane, sub, subsectorDepth, StencilValue, Cull.MaxCeilingHeight, Cull.MinFloorHeight, SectorPortals); + } + + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + if (line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) + { + RenderLine(sub, line, frontsector, subsectorDepth); + } + } + + bool mainBSP = ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors); + if (mainBSP) + { + int subsectorIndex = (int)(sub - subsectors); + for (int i = ParticlesInSubsec[subsectorIndex]; i != NO_PARTICLE; i = Particles[i].snext) + { + particle_t *particle = Particles + i; + TranslucentObjects.push_back({ particle, sub, subsectorDepth }); + } + } + + SeenSectors.insert(sub->sector); + SubsectorDepths[sub] = subsectorDepth; +} + +void RenderPolyScene::RenderSprite(AActor *thing, double sortDistance, const DVector2 &left, const DVector2 &right) +{ + if (numnodes == 0) + RenderSprite(thing, sortDistance, left, right, 0.0, 1.0, subsectors); + else + RenderSprite(thing, sortDistance, left, right, 0.0, 1.0, nodes + numnodes - 1); // The head node is the last node output. +} + +void RenderPolyScene::RenderSprite(AActor *thing, double sortDistance, DVector2 left, DVector2 right, double t1, double t2, void *node) +{ + while (!((size_t)node & 1)) // Keep going until found a subsector + { + node_t *bsp = (node_t *)node; + + DVector2 planePos(FIXED2DBL(bsp->x), FIXED2DBL(bsp->y)); + DVector2 planeNormal = DVector2(FIXED2DBL(-bsp->dy), FIXED2DBL(bsp->dx)); + double planeD = planeNormal | planePos; + + int sideLeft = (left | planeNormal) > planeD; + int sideRight = (right | planeNormal) > planeD; + + if (sideLeft != sideRight) + { + double dotLeft = planeNormal | left; + double dotRight = planeNormal | right; + double t = (planeD - dotLeft) / (dotRight - dotLeft); + + DVector2 mid = left * (1.0 - t) + right * t; + double tmid = t1 * (1.0 - t) + t2 * t; + + RenderSprite(thing, sortDistance, mid, right, tmid, t2, bsp->children[sideRight]); + right = mid; + t2 = tmid; + } + node = bsp->children[sideLeft]; + } + + subsector_t *sub = (subsector_t *)((BYTE *)node - 1); + + auto it = SubsectorDepths.find(sub); + if (it != SubsectorDepths.end()) + TranslucentObjects.push_back({ thing, sub, it->second, sortDistance, (float)t1, (float)t2 }); +} + +void RenderPolyScene::RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth) +{ + // Reject lines not facing viewer + DVector2 pt1 = line->v1->fPos() - ViewPos; + DVector2 pt2 = line->v2->fPos() - ViewPos; + if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) + return; + + // Cull wall if not visible + int sx1, sx2; + LineSegmentRange segmentRange = Cull.GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2); + if (segmentRange == LineSegmentRange::NotVisible || (segmentRange == LineSegmentRange::HasSegment && Cull.IsSegmentCulled(sx1, sx2))) + return; + + // Tell automap we saw this + if (!swrenderer::r_dontmaplines && line->linedef && segmentRange != LineSegmentRange::AlwaysVisible) + { + line->linedef->flags |= ML_MAPPED; + sub->flags |= SSECF_DRAWN; + } + + // Render 3D floor sides + if (line->backsector && frontsector->e && line->backsector->e->XFloor.ffloors.Size()) + { + for (unsigned int i = 0; i < line->backsector->e->XFloor.ffloors.Size(); i++) + { + F3DFloor *fakeFloor = line->backsector->e->XFloor.ffloors[i]; + if (!(fakeFloor->flags & FF_EXISTS)) continue; + if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; + if (!fakeFloor->model) continue; + RenderPolyWall::Render3DFloorLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, fakeFloor, TranslucentObjects); + } + } + + // Render wall, and update culling info if its an occlusion blocker + if (RenderPolyWall::RenderLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, TranslucentObjects, LinePortals)) + { + if (segmentRange == LineSegmentRange::HasSegment) + Cull.MarkSegmentCulled(sx1, sx2); + } +} + +void RenderPolyScene::RenderPortals(int portalDepth) +{ + if (portalDepth < r_portal_recursions) + { + for (auto &portal : SectorPortals) + portal->Render(portalDepth + 1); + + for (auto &portal : LinePortals) + portal->Render(portalDepth + 1); + } + else // Fill with black + { + PolyDrawArgs args; + args.objectToClip = &WorldToClip; + args.mode = TriangleDrawMode::Fan; + args.uniforms.color = 0; + args.uniforms.light = 256; + args.uniforms.flags = TriUniforms::fixed_light; + args.SetClipPlane(PortalPlane.x, PortalPlane.y, PortalPlane.z, PortalPlane.w); + + for (auto &portal : SectorPortals) + { + args.stenciltestvalue = portal->StencilValue; + args.stencilwritevalue = portal->StencilValue + 1; + for (const auto &verts : portal->Shape) + { + args.vinput = verts.Vertices; + args.vcount = verts.Count; + args.ccw = verts.Ccw; + args.uniforms.subsectorDepth = verts.SubsectorDepth; + PolyTriangleDrawer::draw(args, TriDrawVariant::FillNormal, TriBlendMode::Copy); + } + } + + for (auto &portal : LinePortals) + { + args.stenciltestvalue = portal->StencilValue; + args.stencilwritevalue = portal->StencilValue + 1; + for (const auto &verts : portal->Shape) + { + args.vinput = verts.Vertices; + args.vcount = verts.Count; + args.ccw = verts.Ccw; + args.uniforms.subsectorDepth = verts.SubsectorDepth; + PolyTriangleDrawer::draw(args, TriDrawVariant::FillNormal, TriBlendMode::Copy); + } + } + } +} + +void RenderPolyScene::RenderTranslucent(int portalDepth) +{ + if (portalDepth < r_portal_recursions) + { + for (auto it = SectorPortals.rbegin(); it != SectorPortals.rend(); ++it) + { + auto &portal = *it; + portal->RenderTranslucent(portalDepth + 1); + + PolyDrawArgs args; + args.objectToClip = &WorldToClip; + args.mode = TriangleDrawMode::Fan; + args.stenciltestvalue = portal->StencilValue + 1; + args.stencilwritevalue = StencilValue; + args.SetClipPlane(PortalPlane.x, PortalPlane.y, PortalPlane.z, PortalPlane.w); + for (const auto &verts : portal->Shape) + { + args.vinput = verts.Vertices; + args.vcount = verts.Count; + args.ccw = verts.Ccw; + args.uniforms.subsectorDepth = verts.SubsectorDepth; + PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); + } + } + + for (auto it = LinePortals.rbegin(); it != LinePortals.rend(); ++it) + { + auto &portal = *it; + portal->RenderTranslucent(portalDepth + 1); + + PolyDrawArgs args; + args.objectToClip = &WorldToClip; + args.mode = TriangleDrawMode::Fan; + args.stenciltestvalue = portal->StencilValue + 1; + args.stencilwritevalue = StencilValue; + args.SetClipPlane(PortalPlane.x, PortalPlane.y, PortalPlane.z, PortalPlane.w); + for (const auto &verts : portal->Shape) + { + args.vinput = verts.Vertices; + args.vcount = verts.Count; + args.ccw = verts.Ccw; + args.uniforms.subsectorDepth = verts.SubsectorDepth; + PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); + } + } + } + + for (sector_t *sector : SeenSectors) + { + for (AActor *thing = sector->thinglist; thing != nullptr; thing = thing->snext) + { + DVector2 left, right; + if (!RenderPolySprite::GetLine(thing, left, right)) + continue; + double distanceSquared = (thing->Pos() - ViewPos).LengthSquared(); + RenderSprite(thing, distanceSquared, left, right); + } + } + + std::stable_sort(TranslucentObjects.begin(), TranslucentObjects.end()); + + for (auto it = TranslucentObjects.rbegin(); it != TranslucentObjects.rend(); ++it) + { + auto &obj = *it; + if (obj.particle) + { + RenderPolyParticle spr; + spr.Render(WorldToClip, PortalPlane, obj.particle, obj.sub, obj.subsectorDepth, StencilValue + 1); + } + else if (!obj.thing) + { + obj.wall.Render(WorldToClip, PortalPlane); + } + else if ((obj.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) + { + RenderPolyWallSprite wallspr; + wallspr.Render(WorldToClip, PortalPlane, obj.thing, obj.sub, obj.subsectorDepth, StencilValue + 1); + } + else + { + RenderPolySprite spr; + spr.Render(WorldToClip, PortalPlane, obj.thing, obj.sub, obj.subsectorDepth, StencilValue + 1, obj.SpriteLeft, obj.SpriteRight); + } + } +} diff --git a/src/r_poly_scene.h b/src/r_poly_scene.h new file mode 100644 index 0000000000..1e3037b124 --- /dev/null +++ b/src/r_poly_scene.h @@ -0,0 +1,103 @@ +/* +** Polygon Doom software renderer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include +#include +#include +#include +#include "doomdata.h" +#include "r_utility.h" +#include "r_main.h" +#include "r_poly_triangle.h" +#include "r_poly_intersection.h" +#include "r_poly_wall.h" +#include "r_poly_sprite.h" +#include "r_poly_wallsprite.h" +#include "r_poly_playersprite.h" +#include "r_poly_particle.h" +#include "r_poly_plane.h" +#include "r_poly_cull.h" +#include +#include + +class PolyTranslucentObject +{ +public: + PolyTranslucentObject(particle_t *particle, subsector_t *sub, uint32_t subsectorDepth) : particle(particle), sub(sub), subsectorDepth(subsectorDepth) { } + PolyTranslucentObject(AActor *thing, subsector_t *sub, uint32_t subsectorDepth, double dist, float t1, float t2) : thing(thing), sub(sub), subsectorDepth(subsectorDepth), DistanceSquared(dist), SpriteLeft(t1), SpriteRight(t2) { } + PolyTranslucentObject(RenderPolyWall wall) : wall(wall), subsectorDepth(wall.SubsectorDepth), DistanceSquared(1.e6) { } + + bool operator<(const PolyTranslucentObject &other) const + { + return subsectorDepth != other.subsectorDepth ? subsectorDepth < other.subsectorDepth : DistanceSquared < other.DistanceSquared; + } + + particle_t *particle = nullptr; + AActor *thing = nullptr; + subsector_t *sub = nullptr; + + RenderPolyWall wall; + + uint32_t subsectorDepth = 0; + double DistanceSquared = 0.0; + + float SpriteLeft = 0.0f, SpriteRight = 1.0f; +}; + +class PolyDrawSectorPortal; +class PolyDrawLinePortal; + +// Renders everything from a specific viewpoint +class RenderPolyScene +{ +public: + RenderPolyScene(); + ~RenderPolyScene(); + void SetViewpoint(const TriMatrix &worldToClip, const Vec4f &portalPlane, uint32_t stencilValue); + void Render(int portalDepth); + void RenderTranslucent(int portalDepth); + + static const uint32_t SkySubsectorDepth = 0x7fffffff; + +private: + void ClearBuffers(); + void RenderPortals(int portalDepth); + void RenderSectors(); + void RenderSubsector(subsector_t *sub); + void RenderLine(subsector_t *sub, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth); + void RenderSprite(AActor *thing, double sortDistance, const DVector2 &left, const DVector2 &right); + void RenderSprite(AActor *thing, double sortDistance, DVector2 left, DVector2 right, double t1, double t2, void *node); + + TriMatrix WorldToClip; + Vec4f PortalPlane; + uint32_t StencilValue = 0; + PolyCull Cull; + uint32_t NextSubsectorDepth = 0; + std::set SeenSectors; + std::unordered_map SubsectorDepths; + std::vector TranslucentObjects; + + std::vector> SectorPortals; + std::vector> LinePortals; +}; diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index 2331548b31..dafe1f7ec8 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -55,7 +55,7 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) PolyDrawArgs args; args.uniforms.light = 256; args.uniforms.flags = 0; - args.uniforms.subsectorDepth = RenderPolyPortal::SkySubsectorDepth; + args.uniforms.subsectorDepth = RenderPolyScene::SkySubsectorDepth; args.objectToClip = &objectToClip; args.stenciltestvalue = 255; args.stencilwritevalue = 255; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 56820e5361..0c49254545 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -195,7 +195,7 @@ void FSoftwareRenderer::RenderView(player_t *player) bool saved_swtruecolor = r_swtruecolor; r_swtruecolor = screen->IsBgra(); - RenderPolyScene::Instance()->RenderActorView(player->mo, false); + PolyRenderer::Instance()->RenderActorView(player->mo, false); FCanvasTextureInfo::UpdateAll(); // Apply special colormap if the target cannot do it @@ -260,7 +260,7 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FileWriter *file, int wi pic->ObjectFlags |= OF_Fixed; pic->Lock (); if (r_polyrenderer) - RenderPolyScene::Instance()->RenderViewToCanvas(player->mo, pic, 0, 0, width, height, true); + PolyRenderer::Instance()->RenderViewToCanvas(player->mo, pic, 0, 0, width, height, true); else R_RenderViewToCanvas (player->mo, pic, 0, 0, width, height); screen->GetFlashedPalette (palette); @@ -285,7 +285,7 @@ void FSoftwareRenderer::DrawRemainingPlayerSprites() } else { - RenderPolyScene::Instance()->RenderRemainingPlayerSprites(); + PolyRenderer::Instance()->RenderRemainingPlayerSprites(); } } @@ -413,7 +413,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin DAngle savedfov = FieldOfView; R_SetFOV ((double)fov); if (r_polyrenderer) - RenderPolyScene::Instance()->RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); + PolyRenderer::Instance()->RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); else R_RenderViewToCanvas (viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); R_SetFOV (savedfov); From 36f168040edc49bdfe9ee8cd7fcda8db5c32b967 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 4 Dec 2016 03:12:40 +0100 Subject: [PATCH 428/912] Fix null pointer crash bug --- src/r_poly_wall.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 4dd24887d9..384529da08 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -38,7 +38,7 @@ EXTERN_CVAR(Bool, r_drawmirrors) bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput, std::vector> &linePortals) { PolyDrawLinePortal *polyportal = nullptr; - if (line->backsector == nullptr && line->sidedef == line->linedef->sidedef[0] && (line->linedef->special == Line_Mirror && r_drawmirrors)) + if (line->backsector == nullptr && line->linedef && line->sidedef == line->linedef->sidedef[0] && (line->linedef->special == Line_Mirror && r_drawmirrors)) { linePortals.push_back(std::make_unique(line->linedef)); polyportal = linePortals.back().get(); From 2e1e2028124c9bbe106fc9033fcdd78800da8ef2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 4 Dec 2016 05:52:13 +0100 Subject: [PATCH 429/912] Add linear filtering --- .../fixedfunction/drawtrianglecodegen.cpp | 154 ++++++++++++------ .../fixedfunction/drawtrianglecodegen.h | 9 +- 2 files changed, 109 insertions(+), 54 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 75acffac9e..3806d5253d 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -334,6 +334,9 @@ void DrawTriangleCodegen::SetupAffineBlock() AffineVaryingPosX[i] = SSAInt(AffineVaryingPosY[i] * rcpW0, false); AffineVaryingStepX[i] = (SSAInt((AffineVaryingPosY[i] + gradVaryingX[i]) * rcpW1, false) - AffineVaryingPosX[i]) / q; } + + // Min filter = linear, Mag filter = nearest: + AffineLinear = (gradVaryingX[0] / AffineW) > SSAFloat(1.0f) || (gradVaryingX[0] / AffineW) < SSAFloat(-1.0f); } void DrawTriangleCodegen::LoopFullBlock() @@ -739,32 +742,97 @@ void DrawTriangleCodegen::LoopMaskedStoreBlock() } #endif -SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt uvoffset) +SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt *varying) { + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) return translation[color * 4].load_vec4ub(true); else return translation[texturePixels[uvoffset].load(true).zext_int() * 4].load_vec4ub(true); } -SSAInt DrawTriangleCodegen::TranslateSample8(SSAInt uvoffset) +SSAInt DrawTriangleCodegen::TranslateSample8(SSAInt *varying) { + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) return translation[color].load(true).zext_int(); else return translation[texturePixels[uvoffset].load(true).zext_int()].load(true).zext_int(); } -SSAVec4i DrawTriangleCodegen::Sample32(SSAInt uvoffset) +SSAVec4i DrawTriangleCodegen::Sample32(SSAInt *varying) { if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) return SSAVec4i::unpack(color); - else - return texturePixels[uvoffset * 4].load_vec4ub(true); + + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; + + SSAVec4i nearest; + SSAVec4i linear; + + { + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + nearest = texturePixels[uvoffset * 4].load_vec4ub(true); + } + + { + SSAInt uone = (SSAInt(0x01000000) / textureWidth) << 8; + SSAInt vone = (SSAInt(0x01000000) / textureHeight) << 8; + + ufrac = ufrac - (uone >> 1); + vfrac = vfrac - (vone >> 1); + + SSAInt frac_x0 = (ufrac >> FRACBITS) * textureWidth; + SSAInt frac_x1 = ((ufrac + uone) >> FRACBITS) * textureWidth; + SSAInt frac_y0 = (vfrac >> FRACBITS) * textureHeight; + SSAInt frac_y1 = ((vfrac + vone) >> FRACBITS) * textureHeight; + + SSAInt x0 = frac_x0 >> FRACBITS; + SSAInt x1 = frac_x1 >> FRACBITS; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; + + SSAVec4i p00 = texturePixels[(x0 * textureHeight + y0) * 4].load_vec4ub(true); + SSAVec4i p01 = texturePixels[(x0 * textureHeight + y1) * 4].load_vec4ub(true); + SSAVec4i p10 = texturePixels[(x1 * textureHeight + y0) * 4].load_vec4ub(true); + SSAVec4i p11 = texturePixels[(x1 * textureHeight + y1) * 4].load_vec4ub(true); + + SSAInt inv_b = (frac_x1 >> (FRACBITS - 4)) & 15; + SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + linear = (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; + } + + return AffineLinear.select(linear, nearest); } -SSAInt DrawTriangleCodegen::Sample8(SSAInt uvoffset) +SSAInt DrawTriangleCodegen::Sample8(SSAInt *varying) { + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) return color; else @@ -778,13 +846,6 @@ SSAInt DrawTriangleCodegen::Shade8(SSAInt c) SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) { - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - SSAVec4i fg; SSAVec4i output; @@ -792,62 +853,62 @@ SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) { default: case TriBlendMode::Copy: - fg = Sample32(uvoffset); + fg = Sample32(varying); output = blend_copy(shade_bgra_simple(fg, currentlight)); break; case TriBlendMode::AlphaBlend: - fg = Sample32(uvoffset); + fg = Sample32(varying); output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; case TriBlendMode::AddSolid: - fg = Sample32(uvoffset); + fg = Sample32(varying); output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); break; case TriBlendMode::Add: - fg = Sample32(uvoffset); + fg = Sample32(varying); output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::Sub: - fg = Sample32(uvoffset); + fg = Sample32(varying); output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::RevSub: - fg = Sample32(uvoffset); + fg = Sample32(varying); output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::Stencil: - fg = Sample32(uvoffset); + fg = Sample32(varying); output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), fg[3], bg, srcalpha, destalpha); break; case TriBlendMode::Shaded: - output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), Sample8(uvoffset), bg, srcalpha, destalpha); + output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), Sample8(varying), bg, srcalpha, destalpha); break; case TriBlendMode::TranslateCopy: - fg = TranslateSample32(uvoffset); + fg = TranslateSample32(varying); output = blend_copy(shade_bgra_simple(fg, currentlight)); break; case TriBlendMode::TranslateAlphaBlend: - fg = TranslateSample32(uvoffset); + fg = TranslateSample32(varying); output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; case TriBlendMode::TranslateAdd: - fg = TranslateSample32(uvoffset); + fg = TranslateSample32(varying); output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::TranslateSub: - fg = TranslateSample32(uvoffset); + fg = TranslateSample32(varying); output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::TranslateRevSub: - fg = TranslateSample32(uvoffset); + fg = TranslateSample32(varying); output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::AddSrcColorOneMinusSrcColor: - fg = Sample32(uvoffset); + fg = Sample32(varying); output = blend_add_srccolor_oneminussrccolor(shade_bgra_simple(fg, currentlight), bg); break; case TriBlendMode::Skycap: - fg = Sample32(uvoffset); + fg = Sample32(varying); output = FadeOut(varying[1], fg); break; } @@ -869,13 +930,6 @@ SSAInt DrawTriangleCodegen::ToPal8(SSAVec4i c) SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) { - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - SSAVec4i fg; SSAInt alpha, inv_alpha; SSAInt output; @@ -885,80 +939,80 @@ SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) { default: case TriBlendMode::Copy: - output = Shade8(Sample8(uvoffset)); + output = Shade8(Sample8(varying)); break; case TriBlendMode::AlphaBlend: - palindex = Sample8(uvoffset); + palindex = Sample8(varying); output = Shade8(palindex); output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::AddSolid: - palindex = Sample8(uvoffset); + palindex = Sample8(varying); fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, destalpha)); output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::Add: - palindex = Sample8(uvoffset); + palindex = Sample8(varying); fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::Sub: - palindex = Sample8(uvoffset); + palindex = Sample8(varying); fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::RevSub: - palindex = Sample8(uvoffset); + palindex = Sample8(varying); fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::Stencil: - output = ToPal8(blend_stencil(ToBgra(Shade8(color)), (Sample8(uvoffset) == SSAInt(0)).select(SSAInt(0), SSAInt(256)), ToBgra(bg), srcalpha, destalpha)); + output = ToPal8(blend_stencil(ToBgra(Shade8(color)), (Sample8(varying) == SSAInt(0)).select(SSAInt(0), SSAInt(256)), ToBgra(bg), srcalpha, destalpha)); break; case TriBlendMode::Shaded: - palindex = Sample8(uvoffset); + palindex = Sample8(varying); output = ToPal8(blend_stencil(ToBgra(Shade8(color)), palindex, ToBgra(bg), srcalpha, destalpha)); break; case TriBlendMode::TranslateCopy: - palindex = TranslateSample8(uvoffset); + palindex = TranslateSample8(varying); output = Shade8(palindex); output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::TranslateAlphaBlend: - palindex = TranslateSample8(uvoffset); + palindex = TranslateSample8(varying); output = Shade8(palindex); output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::TranslateAdd: - palindex = TranslateSample8(uvoffset); + palindex = TranslateSample8(varying); fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::TranslateSub: - palindex = TranslateSample8(uvoffset); + palindex = TranslateSample8(varying); fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::TranslateRevSub: - palindex = TranslateSample8(uvoffset); + palindex = TranslateSample8(varying); fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::AddSrcColorOneMinusSrcColor: - palindex = Sample8(uvoffset); + palindex = Sample8(varying); fg = ToBgra(Shade8(palindex)); output = ToPal8(blend_add_srccolor_oneminussrccolor(fg, ToBgra(bg))); output = (palindex == SSAInt(0)).select(bg, output); break; case TriBlendMode::Skycap: - fg = ToBgra(Sample8(uvoffset)); + fg = ToBgra(Sample8(varying)); output = ToPal8(FadeOut(varying[1], fg)); break; } diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index 1f22cf0a8f..81a5e57d27 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -50,10 +50,10 @@ private: SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying); SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying); - SSAVec4i TranslateSample32(SSAInt uvoffset); - SSAInt TranslateSample8(SSAInt uvoffset); - SSAVec4i Sample32(SSAInt uvoffset); - SSAInt Sample8(SSAInt uvoffset); + SSAVec4i TranslateSample32(SSAInt *varying); + SSAInt TranslateSample8(SSAInt *varying); + SSAVec4i Sample32(SSAInt *varying); + SSAInt Sample8(SSAInt *varying); SSAInt Shade8(SSAInt c); SSAVec4i ToBgra(SSAInt index); SSAInt ToPal8(SSAVec4i c); @@ -143,6 +143,7 @@ private: SSAInt currentlight; SSAUBytePtr currentcolormap; SSAFloat AffineW; + SSABool AffineLinear; SSAFloat AffineVaryingPosY[TriVertex::NumVarying]; SSAInt AffineVaryingPosX[TriVertex::NumVarying]; SSAInt AffineVaryingStepX[TriVertex::NumVarying]; From c59925085c5a9f59293abd96d2f51f962e2f3d8b Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 4 Dec 2016 10:05:01 -0500 Subject: [PATCH 430/912] - Removal of the ASM functions resulted in failed compile when NO_ASM is set. --- src/r_draw.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 06227167bf..290256d58a 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -247,6 +247,8 @@ extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip); +void R_DrawColumnHorizP_C(void); + #ifdef X86_ASM extern "C" void R_DrawColumnP_Unrolled (void); @@ -258,8 +260,6 @@ extern "C" void R_DrawFuzzColumnP_ASM (void); extern "C" void R_DrawSpanP_ASM (void); extern "C" void R_DrawSpanMaskedP_ASM (void); -void R_DrawColumnHorizP_C(void); - #else void R_DrawColumnP_C (void); From 86c1bbcdeb8706bb3d92004f6d0f0c35dd830240 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 4 Dec 2016 18:19:01 +0100 Subject: [PATCH 431/912] Split drawergen file into more files --- tools/drawergen/CMakeLists.txt | 2 + tools/drawergen/drawergen.cpp | 575 +------------------------------- tools/drawergen/exception.h | 36 ++ tools/drawergen/llvmdrawers.cpp | 376 +++++++++++++++++++++ tools/drawergen/llvmdrawers.h | 68 ++++ tools/drawergen/llvmprogram.cpp | 171 ++++++++++ tools/drawergen/llvmprogram.h | 41 +++ 7 files changed, 696 insertions(+), 573 deletions(-) create mode 100644 tools/drawergen/exception.h create mode 100644 tools/drawergen/llvmdrawers.cpp create mode 100644 tools/drawergen/llvmdrawers.h create mode 100644 tools/drawergen/llvmprogram.cpp create mode 100644 tools/drawergen/llvmprogram.h diff --git a/tools/drawergen/CMakeLists.txt b/tools/drawergen/CMakeLists.txt index dc9fefb178..d330e799a1 100644 --- a/tools/drawergen/CMakeLists.txt +++ b/tools/drawergen/CMakeLists.txt @@ -113,6 +113,8 @@ endif() set (SOURCES drawergen.cpp + llvmprogram.cpp + llvmdrawers.cpp ssa/ssa_bool.cpp ssa/ssa_float.cpp ssa/ssa_float_ptr.cpp diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index c31b0229d2..ecf87062e1 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -22,579 +22,8 @@ #include "precomp.h" #include "timestamp.h" -#include "fixedfunction/drawspancodegen.h" -#include "fixedfunction/drawwallcodegen.h" -#include "fixedfunction/drawcolumncodegen.h" -#include "fixedfunction/drawskycodegen.h" -#include "fixedfunction/drawtrianglecodegen.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_scope.h" -#include "ssa/ssa_for_block.h" -#include "ssa/ssa_if_block.h" -#include "ssa/ssa_stack.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_struct_type.h" -#include "ssa/ssa_value.h" -#include "ssa/ssa_barycentric_weight.h" -#include - -class Exception : public std::exception -{ -public: - Exception(const std::string &message) : message(message) { } - const char *what() const noexcept override { return message.c_str(); } - -private: - std::string message; -}; - -class LLVMProgram -{ -public: - LLVMProgram(); - - void CreateModule(); - std::vector GenerateObjectFile(const std::string &triple, const std::string &cpuName, const std::string &features); - std::string DumpModule(); - - llvm::LLVMContext &context() { return *mContext; } - llvm::Module *module() { return mModule.get(); } - -private: - llvm::TargetMachine *machine = nullptr; - std::unique_ptr mContext; - std::unique_ptr mModule; -}; - -class LLVMDrawers -{ -public: - LLVMDrawers(const std::string &triple, const std::string &cpuName, const std::string &features, const std::string namePostfix); - - std::vector ObjectFile; - -private: - void CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method); - void CodegenDrawSpan(const char *name, DrawSpanVariant variant); - void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); - void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); - void CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor); - - static llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriVertexStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriMatrixStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriUniformsStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriDrawTriangleArgs(llvm::LLVMContext &context); - - LLVMProgram mProgram; - std::string mNamePostfix; -}; - -///////////////////////////////////////////////////////////////////////////// - -LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName, const std::string &features, const std::string namePostfix) : mNamePostfix(namePostfix) -{ - mProgram.CreateModule(); - - CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRt1", DrawColumnVariant::Draw, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); - CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); - CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); - CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); - CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); - CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); - CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1); - CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4); - CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1); - CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4); - CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1); - CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4); - CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1); - CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4); - CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1); - CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); - CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); - CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); - CodegenDrawSky("DrawSky1", DrawSkyVariant::Single, 1); - CodegenDrawSky("DrawSky4", DrawSkyVariant::Single, 4); - CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double, 1); - CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4); - for (int i = 0; i < NumTriBlendModes(); i++) - { - CodegenDrawTriangle("TriDrawNormal8_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, false); - CodegenDrawTriangle("TriDrawNormal32_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, true); - CodegenDrawTriangle("TriFillNormal8_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, false); - CodegenDrawTriangle("TriFillNormal32_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, true); - CodegenDrawTriangle("TriDrawSubsector8_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, false); - CodegenDrawTriangle("TriDrawSubsector32_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, true); - CodegenDrawTriangle("TriFillSubsector8_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, false); - CodegenDrawTriangle("TriFillSubsector32_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, true); - } - CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, TriBlendMode::Copy, false); - CodegenDrawTriangle("TriStencilClose", TriDrawVariant::StencilClose, TriBlendMode::Copy, false); - - ObjectFile = mProgram.GenerateObjectFile(triple, cpuName, features); -} - -void LLVMDrawers::CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name + mNamePostfix); - function.add_parameter(GetDrawColumnArgsStruct(mProgram.context())); - function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); - function.create_public(); - - DrawColumnCodegen codegen; - codegen.Generate(variant, method, function.parameter(0), function.parameter(1)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - throw Exception("verifyFunction failed for CodegenDrawColumn()"); -} - -void LLVMDrawers::CodegenDrawSpan(const char *name, DrawSpanVariant variant) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name + mNamePostfix); - function.add_parameter(GetDrawSpanArgsStruct(mProgram.context())); - function.create_public(); - - DrawSpanCodegen codegen; - codegen.Generate(variant, function.parameter(0)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - throw Exception("verifyFunction failed for CodegenDrawSpan()"); -} - -void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name + mNamePostfix); - function.add_parameter(GetDrawWallArgsStruct(mProgram.context())); - function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); - function.create_public(); - - DrawWallCodegen codegen; - codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - throw Exception("verifyFunction failed for CodegenDrawWall()"); -} - -void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name + mNamePostfix); - function.add_parameter(GetDrawSkyArgsStruct(mProgram.context())); - function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); - function.create_public(); - - DrawSkyCodegen codegen; - codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - throw Exception("verifyFunction failed for CodegenDrawSky()"); -} - -void LLVMDrawers::CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name + mNamePostfix); - function.add_parameter(GetTriDrawTriangleArgs(mProgram.context())); - function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); - function.create_public(); - - DrawTriangleCodegen codegen; - codegen.Generate(variant, blendmode, truecolor, function.parameter(0), function.parameter(1)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - throw Exception(std::string("verifyFunction failed for CodegenDrawTriangle(") + std::to_string((int)variant) + ", " + std::to_string((int)blendmode) + ", " + std::to_string((int)truecolor) + ")"); -} - -llvm::Type *LLVMDrawers::GetDrawColumnArgsStruct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source2; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefracx; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureheight; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srccolor; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::create(context, elements, "DrawColumnArgs", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawers::GetDrawSpanArgsStruct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawers::GetDrawWallArgsStruct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 8; i++) - elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 25; i++) - elements.push_back(llvm::Type::getInt32Ty(context)); - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawers::GetDrawSkyArgsStruct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 8; i++) - elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 15; i++) - elements.push_back(llvm::Type::getInt32Ty(context)); - return llvm::StructType::create(context, elements, "DrawSkyArgs", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawers::GetWorkerThreadDataStruct(llvm::LLVMContext &context) -{ - std::vector elements; - for (int i = 0; i < 4; i++) - elements.push_back(llvm::Type::getInt32Ty(context)); - elements.push_back(llvm::Type::getInt8PtrTy(context)); - return llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawers::GetTriVertexStruct(llvm::LLVMContext &context) -{ - std::vector elements; - for (int i = 0; i < 4 + TriVertex::NumVarying; i++) - elements.push_back(llvm::Type::getFloatTy(context)); - return llvm::StructType::create(context, elements, "TriVertex", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawers::GetTriMatrixStruct(llvm::LLVMContext &context) -{ - std::vector elements; - for (int i = 0; i < 4 * 4; i++) - elements.push_back(llvm::Type::getFloatTy(context)); - return llvm::StructType::create(context, elements, "TriMatrix", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawers::GetTriUniformsStruct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t subsectorDepth; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - elements.push_back(GetTriMatrixStruct(context)); // TriMatrix objectToClip - return llvm::StructType::create(context, elements, "TriUniforms", false)->getPointerTo(); -} - -llvm::Type *LLVMDrawers::GetTriDrawTriangleArgs(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *dest; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; - elements.push_back(GetTriVertexStruct(context)); // TriVertex *v1; - elements.push_back(GetTriVertexStruct(context)); // TriVertex *v2; - elements.push_back(GetTriVertexStruct(context)); // TriVertex *v3; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipleft; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipright; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t cliptop; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipbottom; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *texturePixels; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureWidth; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureHeight; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *translation; - elements.push_back(GetTriUniformsStruct(context)); // const TriUniforms *uniforms; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *stencilValues; - elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *stencilMasks; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t stencilPitch; - elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilTestValue; - elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilWriteValue; - elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *subsectorGBuffer; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *colormaps; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB32k; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *BaseColors; - return llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); -} - -///////////////////////////////////////////////////////////////////////////// - -LLVMProgram::LLVMProgram() -{ - mContext = std::make_unique(); -} - -void LLVMProgram::CreateModule() -{ - mModule = std::make_unique("render", context()); -} - -std::vector LLVMProgram::GenerateObjectFile(const std::string &triple, const std::string &cpuName, const std::string &features) -{ - using namespace llvm; - - std::string errorstring; - - llvm::Module *module = mModule.get(); - - const Target *target = TargetRegistry::lookupTarget(triple, errorstring); - -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) - Reloc::Model relocationModel = Reloc::PIC_; -#else - Optional relocationModel = Reloc::PIC_; -#endif - - CodeModel::Model codeModel = CodeModel::Model::Default; - - TargetOptions options; - options.LessPreciseFPMADOption = true; - options.AllowFPOpFusion = FPOpFusion::Fast; - options.UnsafeFPMath = true; - options.NoInfsFPMath = true; - options.NoNaNsFPMath = true; - options.HonorSignDependentRoundingFPMathOption = false; - options.NoZerosInBSS = false; - options.GuaranteedTailCallOpt = false; - options.StackAlignmentOverride = 0; - options.UseInitArray = true; - options.DataSections = false; - options.FunctionSections = false; - options.JTType = JumpTable::Single; // Create a single table for all jumptable functions - options.ThreadModel = ThreadModel::POSIX; - options.DisableIntegratedAS = false; - options.MCOptions.SanitizeAddress = false; - options.MCOptions.MCRelaxAll = false; // relax all fixups in the emitted object file - options.MCOptions.DwarfVersion = 0; - options.MCOptions.ShowMCInst = false; - options.MCOptions.ABIName = ""; - options.MCOptions.MCFatalWarnings = false; - options.MCOptions.ShowMCEncoding = false; // Show encoding in .s output - options.MCOptions.MCUseDwarfDirectory = false; - options.MCOptions.AsmVerbose = true; - -#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9) - options.Reciprocals = TargetRecip({ "all" }); - options.StackSymbolOrdering = true; - options.UniqueSectionNames = true; - options.EmulatedTLS = false; - options.ExceptionModel = ExceptionHandling::None; - options.EABIVersion = EABI::Default; - options.DebuggerTuning = DebuggerKind::Default; - options.MCOptions.MCIncrementalLinkerCompatible = false; - options.MCOptions.MCNoWarn = false; - options.MCOptions.PreserveAsmComments = true; -#endif - - CodeGenOpt::Level optimizationLevel = CodeGenOpt::Aggressive; - machine = target->createTargetMachine(triple, cpuName, features, options, relocationModel, codeModel, optimizationLevel); - - -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - std::string targetTriple = machine->getTargetTriple(); -#else - std::string targetTriple = machine->getTargetTriple().getTriple(); -#endif - - module->setTargetTriple(targetTriple); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); -#else - module->setDataLayout(machine->createDataLayout()); -#endif - - legacy::FunctionPassManager PerFunctionPasses(module); - legacy::PassManager PerModulePasses; - -#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) - PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); - PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); -#endif - - SmallString<16 * 1024> str; -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - raw_svector_ostream vecstream(str); - formatted_raw_ostream stream(vecstream); -#else - raw_svector_ostream stream(str); -#endif - machine->addPassesToEmitFile(PerModulePasses, stream, TargetMachine::CGFT_ObjectFile); - - PassManagerBuilder passManagerBuilder; - passManagerBuilder.OptLevel = 3; - passManagerBuilder.SizeLevel = 0; - passManagerBuilder.Inliner = createFunctionInliningPass(); - passManagerBuilder.SLPVectorize = true; - passManagerBuilder.LoopVectorize = true; - passManagerBuilder.LoadCombine = true; - passManagerBuilder.populateModulePassManager(PerModulePasses); - passManagerBuilder.populateFunctionPassManager(PerFunctionPasses); - - // Run function passes: - PerFunctionPasses.doInitialization(); - for (llvm::Function &func : *module) - { - if (!func.isDeclaration()) - PerFunctionPasses.run(func); - } - PerFunctionPasses.doFinalization(); - - // Run module passes: - PerModulePasses.run(*module); - - // Return the resulting object file -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - stream.flush(); - vecstream.flush(); -#endif - std::vector data; - data.resize(str.size()); - memcpy(data.data(), str.data(), data.size()); - return data; -} - -std::string LLVMProgram::DumpModule() -{ - std::string str; - llvm::raw_string_ostream stream(str); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - mModule->print(stream, nullptr); -#else - mModule->print(stream, nullptr, false, true); -#endif - return stream.str(); -} - -///////////////////////////////////////////////////////////////////////////// +#include "exception.h" +#include "llvmdrawers.h" std::string &AllTimestamps() { diff --git a/tools/drawergen/exception.h b/tools/drawergen/exception.h new file mode 100644 index 0000000000..22d3c18fbb --- /dev/null +++ b/tools/drawergen/exception.h @@ -0,0 +1,36 @@ +/* +** LLVM code generated drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include +#include + +class Exception : public std::exception +{ +public: + Exception(const std::string &message) : message(message) { } + const char *what() const noexcept override { return message.c_str(); } + +private: + std::string message; +}; diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp new file mode 100644 index 0000000000..49eeb5a46a --- /dev/null +++ b/tools/drawergen/llvmdrawers.cpp @@ -0,0 +1,376 @@ +/* +** LLVM code generated drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include "precomp.h" +#include "timestamp.h" +#include "llvmdrawers.h" +#include "exception.h" + +LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName, const std::string &features, const std::string namePostfix) : mNamePostfix(namePostfix) +{ + mProgram.CreateModule(); + + CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRt1", DrawColumnVariant::Draw, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); + CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); + CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); + CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); + CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); + CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); + CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1); + CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4); + CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1); + CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4); + CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1); + CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4); + CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1); + CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4); + CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1); + CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); + CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); + CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); + CodegenDrawSky("DrawSky1", DrawSkyVariant::Single, 1); + CodegenDrawSky("DrawSky4", DrawSkyVariant::Single, 4); + CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double, 1); + CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4); + for (int i = 0; i < NumTriBlendModes(); i++) + { + CodegenDrawTriangle("TriDrawNormal8_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, false); + CodegenDrawTriangle("TriDrawNormal32_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, true); + CodegenDrawTriangle("TriFillNormal8_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, false); + CodegenDrawTriangle("TriFillNormal32_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, true); + CodegenDrawTriangle("TriDrawSubsector8_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, false); + CodegenDrawTriangle("TriDrawSubsector32_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, true); + CodegenDrawTriangle("TriFillSubsector8_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, false); + CodegenDrawTriangle("TriFillSubsector32_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, true); + } + CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, TriBlendMode::Copy, false); + CodegenDrawTriangle("TriStencilClose", TriDrawVariant::StencilClose, TriBlendMode::Copy, false); + + ObjectFile = mProgram.GenerateObjectFile(triple, cpuName, features); +} + +void LLVMDrawers::CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name + mNamePostfix); + function.add_parameter(GetDrawColumnArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawColumnCodegen codegen; + codegen.Generate(variant, method, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + throw Exception("verifyFunction failed for CodegenDrawColumn()"); +} + +void LLVMDrawers::CodegenDrawSpan(const char *name, DrawSpanVariant variant) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name + mNamePostfix); + function.add_parameter(GetDrawSpanArgsStruct(mProgram.context())); + function.create_public(); + + DrawSpanCodegen codegen; + codegen.Generate(variant, function.parameter(0)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + throw Exception("verifyFunction failed for CodegenDrawSpan()"); +} + +void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name + mNamePostfix); + function.add_parameter(GetDrawWallArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawWallCodegen codegen; + codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + throw Exception("verifyFunction failed for CodegenDrawWall()"); +} + +void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name + mNamePostfix); + function.add_parameter(GetDrawSkyArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawSkyCodegen codegen; + codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + throw Exception("verifyFunction failed for CodegenDrawSky()"); +} + +void LLVMDrawers::CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name + mNamePostfix); + function.add_parameter(GetTriDrawTriangleArgs(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawTriangleCodegen codegen; + codegen.Generate(variant, blendmode, truecolor, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + throw Exception(std::string("verifyFunction failed for CodegenDrawTriangle(") + std::to_string((int)variant) + ", " + std::to_string((int)blendmode) + ", " + std::to_string((int)truecolor) + ")"); +} + +llvm::Type *LLVMDrawers::GetDrawColumnArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source2; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefracx; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureheight; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srccolor; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::create(context, elements, "DrawColumnArgs", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetDrawSpanArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetDrawWallArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 8; i++) + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 25; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetDrawSkyArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 8; i++) + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 15; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + return llvm::StructType::create(context, elements, "DrawSkyArgs", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetWorkerThreadDataStruct(llvm::LLVMContext &context) +{ + std::vector elements; + for (int i = 0; i < 4; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + elements.push_back(llvm::Type::getInt8PtrTy(context)); + return llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetTriVertexStruct(llvm::LLVMContext &context) +{ + std::vector elements; + for (int i = 0; i < 4 + TriVertex::NumVarying; i++) + elements.push_back(llvm::Type::getFloatTy(context)); + return llvm::StructType::create(context, elements, "TriVertex", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetTriMatrixStruct(llvm::LLVMContext &context) +{ + std::vector elements; + for (int i = 0; i < 4 * 4; i++) + elements.push_back(llvm::Type::getFloatTy(context)); + return llvm::StructType::create(context, elements, "TriMatrix", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetTriUniformsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t subsectorDepth; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + elements.push_back(GetTriMatrixStruct(context)); // TriMatrix objectToClip + return llvm::StructType::create(context, elements, "TriUniforms", false)->getPointerTo(); +} + +llvm::Type *LLVMDrawers::GetTriDrawTriangleArgs(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *dest; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; + elements.push_back(GetTriVertexStruct(context)); // TriVertex *v1; + elements.push_back(GetTriVertexStruct(context)); // TriVertex *v2; + elements.push_back(GetTriVertexStruct(context)); // TriVertex *v3; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipleft; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipright; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t cliptop; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipbottom; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *texturePixels; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureWidth; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureHeight; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *translation; + elements.push_back(GetTriUniformsStruct(context)); // const TriUniforms *uniforms; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *stencilValues; + elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *stencilMasks; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t stencilPitch; + elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilTestValue; + elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilWriteValue; + elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *subsectorGBuffer; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *colormaps; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB32k; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *BaseColors; + return llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); +} diff --git a/tools/drawergen/llvmdrawers.h b/tools/drawergen/llvmdrawers.h new file mode 100644 index 0000000000..3eef605df7 --- /dev/null +++ b/tools/drawergen/llvmdrawers.h @@ -0,0 +1,68 @@ +/* +** LLVM code generated drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "fixedfunction/drawspancodegen.h" +#include "fixedfunction/drawwallcodegen.h" +#include "fixedfunction/drawcolumncodegen.h" +#include "fixedfunction/drawskycodegen.h" +#include "fixedfunction/drawtrianglecodegen.h" +#include "ssa/ssa_function.h" +#include "ssa/ssa_scope.h" +#include "ssa/ssa_for_block.h" +#include "ssa/ssa_if_block.h" +#include "ssa/ssa_stack.h" +#include "ssa/ssa_function.h" +#include "ssa/ssa_struct_type.h" +#include "ssa/ssa_value.h" +#include "ssa/ssa_barycentric_weight.h" +#include "llvmprogram.h" +#include + +class LLVMDrawers +{ +public: + LLVMDrawers(const std::string &triple, const std::string &cpuName, const std::string &features, const std::string namePostfix); + + std::vector ObjectFile; + +private: + void CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method); + void CodegenDrawSpan(const char *name, DrawSpanVariant variant); + void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); + void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); + void CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor); + + static llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); + static llvm::Type *GetTriVertexStruct(llvm::LLVMContext &context); + static llvm::Type *GetTriMatrixStruct(llvm::LLVMContext &context); + static llvm::Type *GetTriUniformsStruct(llvm::LLVMContext &context); + static llvm::Type *GetTriDrawTriangleArgs(llvm::LLVMContext &context); + + LLVMProgram mProgram; + std::string mNamePostfix; +}; diff --git a/tools/drawergen/llvmprogram.cpp b/tools/drawergen/llvmprogram.cpp new file mode 100644 index 0000000000..84094eda63 --- /dev/null +++ b/tools/drawergen/llvmprogram.cpp @@ -0,0 +1,171 @@ +/* +** LLVM code generated drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include "precomp.h" +#include "timestamp.h" +#include "llvmprogram.h" + +LLVMProgram::LLVMProgram() +{ + mContext = std::make_unique(); +} + +void LLVMProgram::CreateModule() +{ + mModule = std::make_unique("render", context()); +} + +std::vector LLVMProgram::GenerateObjectFile(const std::string &triple, const std::string &cpuName, const std::string &features) +{ + using namespace llvm; + + std::string errorstring; + + llvm::Module *module = mModule.get(); + + const Target *target = TargetRegistry::lookupTarget(triple, errorstring); + +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) + Reloc::Model relocationModel = Reloc::PIC_; +#else + Optional relocationModel = Reloc::PIC_; +#endif + + CodeModel::Model codeModel = CodeModel::Model::Default; + + TargetOptions options; + options.LessPreciseFPMADOption = true; + options.AllowFPOpFusion = FPOpFusion::Fast; + options.UnsafeFPMath = true; + options.NoInfsFPMath = true; + options.NoNaNsFPMath = true; + options.HonorSignDependentRoundingFPMathOption = false; + options.NoZerosInBSS = false; + options.GuaranteedTailCallOpt = false; + options.StackAlignmentOverride = 0; + options.UseInitArray = true; + options.DataSections = false; + options.FunctionSections = false; + options.JTType = JumpTable::Single; // Create a single table for all jumptable functions + options.ThreadModel = ThreadModel::POSIX; + options.DisableIntegratedAS = false; + options.MCOptions.SanitizeAddress = false; + options.MCOptions.MCRelaxAll = false; // relax all fixups in the emitted object file + options.MCOptions.DwarfVersion = 0; + options.MCOptions.ShowMCInst = false; + options.MCOptions.ABIName = ""; + options.MCOptions.MCFatalWarnings = false; + options.MCOptions.ShowMCEncoding = false; // Show encoding in .s output + options.MCOptions.MCUseDwarfDirectory = false; + options.MCOptions.AsmVerbose = true; + +#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9) + options.Reciprocals = TargetRecip({ "all" }); + options.StackSymbolOrdering = true; + options.UniqueSectionNames = true; + options.EmulatedTLS = false; + options.ExceptionModel = ExceptionHandling::None; + options.EABIVersion = EABI::Default; + options.DebuggerTuning = DebuggerKind::Default; + options.MCOptions.MCIncrementalLinkerCompatible = false; + options.MCOptions.MCNoWarn = false; + options.MCOptions.PreserveAsmComments = true; +#endif + + CodeGenOpt::Level optimizationLevel = CodeGenOpt::Aggressive; + machine = target->createTargetMachine(triple, cpuName, features, options, relocationModel, codeModel, optimizationLevel); + + +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + std::string targetTriple = machine->getTargetTriple(); +#else + std::string targetTriple = machine->getTargetTriple().getTriple(); +#endif + + module->setTargetTriple(targetTriple); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); +#else + module->setDataLayout(machine->createDataLayout()); +#endif + + legacy::FunctionPassManager PerFunctionPasses(module); + legacy::PassManager PerModulePasses; + +#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) + PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); + PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); +#endif + + SmallString<16 * 1024> str; +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + raw_svector_ostream vecstream(str); + formatted_raw_ostream stream(vecstream); +#else + raw_svector_ostream stream(str); +#endif + machine->addPassesToEmitFile(PerModulePasses, stream, TargetMachine::CGFT_ObjectFile); + + PassManagerBuilder passManagerBuilder; + passManagerBuilder.OptLevel = 3; + passManagerBuilder.SizeLevel = 0; + passManagerBuilder.Inliner = createFunctionInliningPass(); + passManagerBuilder.SLPVectorize = true; + passManagerBuilder.LoopVectorize = true; + passManagerBuilder.LoadCombine = true; + passManagerBuilder.populateModulePassManager(PerModulePasses); + passManagerBuilder.populateFunctionPassManager(PerFunctionPasses); + + // Run function passes: + PerFunctionPasses.doInitialization(); + for (llvm::Function &func : *module) + { + if (!func.isDeclaration()) + PerFunctionPasses.run(func); + } + PerFunctionPasses.doFinalization(); + + // Run module passes: + PerModulePasses.run(*module); + + // Return the resulting object file +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + stream.flush(); + vecstream.flush(); +#endif + std::vector data; + data.resize(str.size()); + memcpy(data.data(), str.data(), data.size()); + return data; +} + +std::string LLVMProgram::DumpModule() +{ + std::string str; + llvm::raw_string_ostream stream(str); +#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) + mModule->print(stream, nullptr); +#else + mModule->print(stream, nullptr, false, true); +#endif + return stream.str(); +} diff --git a/tools/drawergen/llvmprogram.h b/tools/drawergen/llvmprogram.h new file mode 100644 index 0000000000..30cf33d5ac --- /dev/null +++ b/tools/drawergen/llvmprogram.h @@ -0,0 +1,41 @@ +/* +** LLVM code generated drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +class LLVMProgram +{ +public: + LLVMProgram(); + + void CreateModule(); + std::vector GenerateObjectFile(const std::string &triple, const std::string &cpuName, const std::string &features); + std::string DumpModule(); + + llvm::LLVMContext &context() { return *mContext; } + llvm::Module *module() { return mModule.get(); } + +private: + llvm::TargetMachine *machine = nullptr; + std::unique_ptr mContext; + std::unique_ptr mModule; +}; From 28f9216e7bdc58473ae34ce015b74087f8604bf7 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 16:57:10 +0100 Subject: [PATCH 432/912] - fixed a few prototypes. --- src/r_draw.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 290256d58a..f4f7525570 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -252,10 +252,8 @@ void R_DrawColumnHorizP_C(void); #ifdef X86_ASM extern "C" void R_DrawColumnP_Unrolled (void); -extern "C" void R_DrawColumnHorizP_ASM (void); extern "C" void R_DrawColumnP_ASM (void); extern "C" void R_DrawFuzzColumnP_ASM (void); - void R_DrawTranslatedColumnP_C (void); void R_DrawShadedColumnP_C (void); extern "C" void R_DrawSpanP_ASM (void); extern "C" void R_DrawSpanMaskedP_ASM (void); @@ -264,13 +262,14 @@ extern "C" void R_DrawSpanMaskedP_ASM (void); void R_DrawColumnP_C (void); void R_DrawFuzzColumnP_C (void); -void R_DrawTranslatedColumnP_C (void); void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); void R_DrawSpanMaskedP_C (void); #endif +void R_DrawColumnHorizP_C(void); +void R_DrawTranslatedColumnP_C(void); void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); From 659abc945173a011a968d74a4cff48ececc608bd Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 4 Dec 2016 19:18:13 -0500 Subject: [PATCH 433/912] - Removed vestigial prototype. --- src/r_draw.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index f4f7525570..17365f3f5b 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -247,8 +247,6 @@ extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip); -void R_DrawColumnHorizP_C(void); - #ifdef X86_ASM extern "C" void R_DrawColumnP_Unrolled (void); From 2f3a6da295056e5b1cd0329e3c399ea4832b6343 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 04:05:55 +0100 Subject: [PATCH 434/912] Fix buffer overrun --- src/gl/system/gl_swframebuffer.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 421fe92781..6b3caa81ef 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -2316,7 +2316,11 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update() { buff[i] = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); } - BorderColor = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); + if (numEntries > 1) + { + i = numEntries - 1; + BorderColor = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); + } glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); GLint oldBinding = 0; From 7ffab207cba5b1218231ef54fde16af785aa3792 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 09:22:45 +0100 Subject: [PATCH 435/912] Move rgba drawers to be dispatched by r_draw_tc --- src/r_draw_rgba.cpp | 958 ++++++---------------------- src/r_draw_rgba.h | 658 +++++++++++++++----- src/r_draw_tc.cpp | 1411 ++++++++++++++++++++++++++++++++++++++++++ src/r_draw_tc.h | 239 +++++++ src/r_drawt_rgba.cpp | 365 ++--------- src/r_plane.cpp | 38 +- 6 files changed, 2391 insertions(+), 1278 deletions(-) create mode 100644 src/r_draw_tc.cpp create mode 100644 src/r_draw_tc.h diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 699352a6b0..bbedb2622a 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -60,18 +60,14 @@ CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG namespace swrenderer { + extern "C" short spanend[MAXHEIGHT]; + extern float rw_light; + extern float rw_lightstep; + extern int wallshade; -extern "C" short spanend[MAXHEIGHT]; -extern float rw_light; -extern float rw_lightstep; -extern int wallshade; + ///////////////////////////////////////////////////////////////////////////// -///////////////////////////////////////////////////////////////////////////// - -class DrawSpanLLVMCommand : public DrawerCommand -{ -public: - DrawSpanLLVMCommand() + DrawSpanLLVMCommand::DrawSpanLLVMCommand() { using namespace drawerargs; @@ -106,23 +102,19 @@ public: args.flags |= DrawSpanArgs::nearest_filter; } - void Execute(DrawerThread *thread) override + void DrawSpanLLVMCommand::Execute(DrawerThread *thread) { if (thread->skipped_by_thread(args.y)) return; Drawers::Instance()->DrawSpan(&args); } - FString DebugInfo() override + FString DrawSpanLLVMCommand::DebugInfo() { return "DrawSpan\n" + args.ToString(); } -protected: - DrawSpanArgs args; - -private: - inline static bool sampler_setup(const uint32_t * &source, int &xbits, int &ybits, bool mipmapped) + bool DrawSpanLLVMCommand::sampler_setup(const uint32_t * &source, int &xbits, int &ybits, bool mipmapped) { using namespace drawerargs; @@ -144,71 +136,47 @@ private: return (magnifying && r_magfilter) || (!magnifying && r_minfilter); } -}; -class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand -{ -public: - void Execute(DrawerThread *thread) override + ///////////////////////////////////////////////////////////////////////////// + + void DrawSpanMaskedLLVMCommand::Execute(DrawerThread *thread) { if (thread->skipped_by_thread(args.y)) return; Drawers::Instance()->DrawSpanMasked(&args); } -}; -class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand -{ -public: - void Execute(DrawerThread *thread) override + void DrawSpanTranslucentLLVMCommand::Execute(DrawerThread *thread) { if (thread->skipped_by_thread(args.y)) return; Drawers::Instance()->DrawSpanTranslucent(&args); } -}; -class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand -{ -public: - void Execute(DrawerThread *thread) override + void DrawSpanMaskedTranslucentLLVMCommand::Execute(DrawerThread *thread) { if (thread->skipped_by_thread(args.y)) return; Drawers::Instance()->DrawSpanMaskedTranslucent(&args); } -}; -class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand -{ -public: - void Execute(DrawerThread *thread) override + void DrawSpanAddClampLLVMCommand::Execute(DrawerThread *thread) { if (thread->skipped_by_thread(args.y)) return; Drawers::Instance()->DrawSpanAddClamp(&args); } -}; -class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand -{ -public: - void Execute(DrawerThread *thread) override + void DrawSpanMaskedAddClampLLVMCommand::Execute(DrawerThread *thread) { if (thread->skipped_by_thread(args.y)) return; Drawers::Instance()->DrawSpanMaskedAddClamp(&args); } -}; -///////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////// -class DrawWall4LLVMCommand : public DrawerCommand -{ -protected: - DrawWallArgs args; - - WorkerThreadData ThreadData(DrawerThread *thread) + WorkerThreadData DrawWall4LLVMCommand::ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; @@ -218,8 +186,7 @@ protected: return d; } -public: - DrawWall4LLVMCommand() + DrawWall4LLVMCommand::DrawWall4LLVMCommand() { using namespace drawerargs; @@ -257,24 +224,20 @@ public: DetectRangeError(args.dest, args.dest_y, args.count); } - void Execute(DrawerThread *thread) override + void DrawWall4LLVMCommand::Execute(DrawerThread *thread) { WorkerThreadData d = ThreadData(thread); Drawers::Instance()->vlinec4(&args, &d); } - FString DebugInfo() override + FString DrawWall4LLVMCommand::DebugInfo() { return "DrawWall4\n" + args.ToString(); } -}; -class DrawWall1LLVMCommand : public DrawerCommand -{ -protected: - DrawWallArgs args; + ///////////////////////////////////////////////////////////////////////////// - WorkerThreadData ThreadData(DrawerThread *thread) + WorkerThreadData DrawWall1LLVMCommand::ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; @@ -284,8 +247,7 @@ protected: return d; } -public: - DrawWall1LLVMCommand() + DrawWall1LLVMCommand::DrawWall1LLVMCommand() { using namespace drawerargs; @@ -320,24 +282,20 @@ public: DetectRangeError(args.dest, args.dest_y, args.count); } - void Execute(DrawerThread *thread) override + void DrawWall1LLVMCommand::Execute(DrawerThread *thread) { WorkerThreadData d = ThreadData(thread); Drawers::Instance()->vlinec1(&args, &d); } - FString DebugInfo() override + FString DrawWall1LLVMCommand::DebugInfo() { return "DrawWall1\n" + args.ToString(); } -}; -class DrawColumnLLVMCommand : public DrawerCommand -{ -protected: - DrawColumnArgs args; + ///////////////////////////////////////////////////////////////////////////// - WorkerThreadData ThreadData(DrawerThread *thread) + WorkerThreadData DrawColumnLLVMCommand::ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; @@ -347,13 +305,12 @@ protected: return d; } - FString DebugInfo() override + FString DrawColumnLLVMCommand::DebugInfo() { return "DrawColumn\n" + args.ToString(); } -public: - DrawColumnLLVMCommand() + DrawColumnLLVMCommand::DrawColumnLLVMCommand() { using namespace drawerargs; @@ -393,19 +350,15 @@ public: DetectRangeError(args.dest, args.dest_y, args.count); } - void Execute(DrawerThread *thread) override + void DrawColumnLLVMCommand::Execute(DrawerThread *thread) { WorkerThreadData d = ThreadData(thread); Drawers::Instance()->DrawColumn(&args, &d); } -}; -class DrawSkyLLVMCommand : public DrawerCommand -{ -protected: - DrawSkyArgs args; + ///////////////////////////////////////////////////////////////////////////// - WorkerThreadData ThreadData(DrawerThread *thread) + WorkerThreadData DrawSkyLLVMCommand::ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; @@ -415,8 +368,7 @@ protected: return d; } -public: - DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom) + DrawSkyLLVMCommand::DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom) { using namespace drawerargs; @@ -439,70 +391,14 @@ public: DetectRangeError(args.dest, args.dest_y, args.count); } - FString DebugInfo() override + FString DrawSkyLLVMCommand::DebugInfo() { return "DrawSky\n" + args.ToString(); } -}; -#define DECLARE_DRAW_COMMAND(name, func, base) \ -class name##LLVMCommand : public base \ -{ \ -public: \ - using base::base; \ - void Execute(DrawerThread *thread) override \ - { \ - WorkerThreadData d = ThreadData(thread); \ - Drawers::Instance()->func(&args, &d); \ - } \ -}; + ///////////////////////////////////////////////////////////////////////////// -//DECLARE_DRAW_COMMAND(name, func, DrawSpanLLVMCommand); - -DECLARE_DRAW_COMMAND(DrawWallMasked4, mvlinec4, DrawWall4LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallAdd4, tmvline4_add, DrawWall4LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallAddClamp4, tmvline4_addclamp, DrawWall4LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallSubClamp4, tmvline4_subclamp, DrawWall4LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallRevSubClamp4, tmvline4_revsubclamp, DrawWall4LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallSubClamp1, tmvline1_subclamp, DrawWall1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallRevSubClamp1, tmvline1_revsubclamp, DrawWall1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnAdd, DrawColumnAdd, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnTranslated, DrawColumnTranslated, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnTlatedAdd, DrawColumnTlatedAdd, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnShaded, DrawColumnShaded, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnAddClamp, DrawColumnAddClamp, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnAddClampTranslated, DrawColumnAddClampTranslated, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnSubClamp, DrawColumnSubClamp, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnSubClampTranslated, DrawColumnSubClampTranslated, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRevSubClamp, DrawColumnRevSubClamp, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRevSubClampTranslated, DrawColumnRevSubClampTranslated, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(FillColumn, FillColumn, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(FillColumnAdd, FillColumnAdd, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(FillColumnAddClamp, FillColumnAddClamp, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawSingleSky1, DrawSky1, DrawSkyLLVMCommand); -DECLARE_DRAW_COMMAND(DrawSingleSky4, DrawSky4, DrawSkyLLVMCommand); -DECLARE_DRAW_COMMAND(DrawDoubleSky1, DrawDoubleSky1, DrawSkyLLVMCommand); -DECLARE_DRAW_COMMAND(DrawDoubleSky4, DrawDoubleSky4, DrawSkyLLVMCommand); - -///////////////////////////////////////////////////////////////////////////// - -class DrawFuzzColumnRGBACommand : public DrawerCommand -{ - int _x; - int _yl; - int _yh; - BYTE * RESTRICT _destorg; - int _pitch; - int _fuzzpos; - int _fuzzviewheight; - -public: - DrawFuzzColumnRGBACommand() + DrawFuzzColumnRGBACommand::DrawFuzzColumnRGBACommand() { using namespace drawerargs; @@ -515,7 +411,7 @@ public: _fuzzviewheight = fuzzviewheight; } - void Execute(DrawerThread *thread) override + void DrawFuzzColumnRGBACommand::Execute(DrawerThread *thread) { int yl = MAX(_yl, 1); int yh = MIN(_yh, _fuzzviewheight); @@ -605,23 +501,14 @@ public: } } - FString DebugInfo() override + FString DrawFuzzColumnRGBACommand::DebugInfo() { return "DrawFuzzColumn"; } -}; -class FillSpanRGBACommand : public DrawerCommand -{ - int _x1; - int _x2; - int _y; - BYTE * RESTRICT _destorg; - fixed_t _light; - int _color; + ///////////////////////////////////////////////////////////////////////////// -public: - FillSpanRGBACommand() + FillSpanRGBACommand::FillSpanRGBACommand() { using namespace drawerargs; @@ -633,7 +520,7 @@ public: _color = ds_color; } - void Execute(DrawerThread *thread) override + void FillSpanRGBACommand::Execute(DrawerThread *thread) { if (thread->line_skipped_by_thread(_y)) return; @@ -646,30 +533,14 @@ public: dest[i] = color; } - FString DebugInfo() override + FString FillSpanRGBACommand::DebugInfo() { return "FillSpan"; } -}; -///////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////// -class DrawSlabRGBACommand : public DrawerCommand -{ - int _dx; - fixed_t _v; - int _dy; - fixed_t _vi; - const BYTE *_voxelptr; - uint32_t *_p; - ShadeConstants _shade_constants; - const BYTE *_colormap; - fixed_t _light; - int _pitch; - int _start_y; - -public: - DrawSlabRGBACommand(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p, ShadeConstants shade_constants, const BYTE *colormap, fixed_t light) + DrawSlabRGBACommand::DrawSlabRGBACommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, ShadeConstants shade_constants, const uint8_t *colormap, fixed_t light) { using namespace drawerargs; @@ -687,16 +558,16 @@ public: assert(dx > 0); } - void Execute(DrawerThread *thread) override + void DrawSlabRGBACommand::Execute(DrawerThread *thread) { int dx = _dx; fixed_t v = _v; int dy = _dy; fixed_t vi = _vi; - const BYTE *vptr = _voxelptr; + const uint8_t *vptr = _voxelptr; uint32_t *p = _p; ShadeConstants shade_constants = _shade_constants; - const BYTE *colormap = _colormap; + const uint8_t *colormap = _colormap; uint32_t light = LightBgra::calc_light_multiplier(_light); int pitch = _pitch; int x; @@ -772,25 +643,14 @@ public: } } - FString DebugInfo() override + FString DrawSlabRGBACommand::DebugInfo() { return "DrawSlab"; } -}; -///////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////// -class DrawFogBoundaryLineRGBACommand : public DrawerCommand -{ - int _y; - int _x; - int _x2; - BYTE * RESTRICT _destorg; - fixed_t _light; - ShadeConstants _shade_constants; - -public: - DrawFogBoundaryLineRGBACommand(int y, int x, int x2) + DrawFogBoundaryLineRGBACommand::DrawFogBoundaryLineRGBACommand(int y, int x, int x2) { using namespace drawerargs; @@ -803,7 +663,7 @@ public: _shade_constants = dc_shade_constants; } - void Execute(DrawerThread *thread) override + void DrawFogBoundaryLineRGBACommand::Execute(DrawerThread *thread) { if (thread->line_skipped_by_thread(_y)) return; @@ -853,34 +713,14 @@ public: } while (++x <= x2); } - FString DebugInfo() override + FString DrawFogBoundaryLineRGBACommand::DebugInfo() { return "DrawFogBoundaryLine"; } -}; -class DrawTiltedSpanRGBACommand : public DrawerCommand -{ - int _x1; - int _x2; - int _y; - BYTE * RESTRICT _destorg; - fixed_t _light; - ShadeConstants _shade_constants; - FVector3 _plane_sz; - FVector3 _plane_su; - FVector3 _plane_sv; - bool _plane_shade; - int _planeshade; - float _planelightfloat; - fixed_t _pviewx; - fixed_t _pviewy; - int _xbits; - int _ybits; - const uint32_t * RESTRICT _source; + ///////////////////////////////////////////////////////////////////////////// -public: - DrawTiltedSpanRGBACommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + DrawTiltedSpanRGBACommand::DrawTiltedSpanRGBACommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { using namespace drawerargs; @@ -903,7 +743,7 @@ public: _ybits = ds_ybits; } - void Execute(DrawerThread *thread) override + void DrawTiltedSpanRGBACommand::Execute(DrawerThread *thread) { if (thread->line_skipped_by_thread(_y)) return; @@ -1009,23 +849,14 @@ public: } } - FString DebugInfo() override + FString DrawTiltedSpanRGBACommand::DebugInfo() { return "DrawTiltedSpan"; } -}; -class DrawColoredSpanRGBACommand : public DrawerCommand -{ - int _y; - int _x1; - int _x2; - BYTE * RESTRICT _destorg; - fixed_t _light; - int _color; + ///////////////////////////////////////////////////////////////////////////// -public: - DrawColoredSpanRGBACommand(int y, int x1, int x2) + DrawColoredSpanRGBACommand::DrawColoredSpanRGBACommand(int y, int x1, int x2) { using namespace drawerargs; @@ -1038,7 +869,7 @@ public: _color = ds_color; } - void Execute(DrawerThread *thread) override + void DrawColoredSpanRGBACommand::Execute(DrawerThread *thread) { if (thread->line_skipped_by_thread(_y)) return; @@ -1055,25 +886,14 @@ public: dest[i] = color; } - FString DebugInfo() override + FString DrawColoredSpanRGBACommand::DebugInfo() { return "DrawColoredSpan"; } -}; -class FillTransColumnRGBACommand : public DrawerCommand -{ - int _x; - int _y1; - int _y2; - int _color; - int _a; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; + ///////////////////////////////////////////////////////////////////////////// -public: - FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) + FillTransColumnRGBACommand::FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) { using namespace drawerargs; @@ -1087,7 +907,7 @@ public: _pitch = dc_pitch; } - void Execute(DrawerThread *thread) override + void FillTransColumnRGBACommand::Execute(DrawerThread *thread) { int x = _x; int y1 = _y1; @@ -1129,553 +949,163 @@ public: } } - FString DebugInfo() override + FString FillTransColumnRGBACommand::DebugInfo() { return "FillTransColumn"; } -}; -ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) -{ - buffer = screen->GetBuffer(); - pitch = screen->GetPitch(); - width = screen->GetWidth(); - height = screen->GetHeight(); + ///////////////////////////////////////////////////////////////////////////// - start_red = (int)(colormap->ColorizeStart[0] * 255); - start_green = (int)(colormap->ColorizeStart[1] * 255); - start_blue = (int)(colormap->ColorizeStart[2] * 255); - end_red = (int)(colormap->ColorizeEnd[0] * 255); - end_green = (int)(colormap->ColorizeEnd[1] * 255); - end_blue = (int)(colormap->ColorizeEnd[2] * 255); -} + ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) + { + buffer = screen->GetBuffer(); + pitch = screen->GetPitch(); + width = screen->GetWidth(); + height = screen->GetHeight(); + + start_red = (int)(colormap->ColorizeStart[0] * 255); + start_green = (int)(colormap->ColorizeStart[1] * 255); + start_blue = (int)(colormap->ColorizeStart[2] * 255); + end_red = (int)(colormap->ColorizeEnd[0] * 255); + end_green = (int)(colormap->ColorizeEnd[1] * 255); + end_blue = (int)(colormap->ColorizeEnd[2] * 255); + } #ifdef NO_SSE -void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) -{ - int y = thread->skipped_by_thread(0); - int count = thread->count_for_thread(0, height); - while (count > 0) + void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) { - BYTE *pixels = buffer + y * pitch * 4; - for (int x = 0; x < width; x++) + int y = thread->skipped_by_thread(0); + int count = thread->count_for_thread(0, height); + while (count > 0) { - int fg_red = pixels[2]; - int fg_green = pixels[1]; - int fg_blue = pixels[0]; + uint8_t *pixels = buffer + y * pitch * 4; + for (int x = 0; x < width; x++) + { + int fg_red = pixels[2]; + int fg_green = pixels[1]; + int fg_blue = pixels[0]; - int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; - gray += (gray >> 7); // gray*=256/255 - int inv_gray = 256 - gray; + int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; + gray += (gray >> 7); // gray*=256/255 + int inv_gray = 256 - gray; - int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); - int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); - int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); + int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); + int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); + int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); - pixels[0] = (BYTE)blue; - pixels[1] = (BYTE)green; - pixels[2] = (BYTE)red; - pixels[3] = 0xff; + pixels[0] = (uint8_t)blue; + pixels[1] = (uint8_t)green; + pixels[2] = (uint8_t)red; + pixels[3] = 0xff; - pixels += 4; + pixels += 4; + } + y += thread->num_cores; + count--; } - y += thread->num_cores; - count--; } -} #else -void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) -{ - int y = thread->skipped_by_thread(0); - int count = thread->count_for_thread(0, height); - __m128i gray_weight = _mm_set_epi16(256, 77, 143, 37, 256, 77, 143, 37); - __m128i start_end = _mm_set_epi16(255, start_red, start_green, start_blue, 255, end_red, end_green, end_blue); - while (count > 0) + void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) { - BYTE *pixels = buffer + y * pitch * 4; - int sse_length = width / 4; - for (int x = 0; x < sse_length; x++) + int y = thread->skipped_by_thread(0); + int count = thread->count_for_thread(0, height); + __m128i gray_weight = _mm_set_epi16(256, 77, 143, 37, 256, 77, 143, 37); + __m128i start_end = _mm_set_epi16(255, start_red, start_green, start_blue, 255, end_red, end_green, end_blue); + while (count > 0) { - // Unpack to integers: - __m128i p = _mm_loadu_si128((const __m128i*)pixels); + uint8_t *pixels = buffer + y * pitch * 4; + int sse_length = width / 4; + for (int x = 0; x < sse_length; x++) + { + // Unpack to integers: + __m128i p = _mm_loadu_si128((const __m128i*)pixels); - __m128i p16_0 = _mm_unpacklo_epi8(p, _mm_setzero_si128()); - __m128i p16_1 = _mm_unpackhi_epi8(p, _mm_setzero_si128()); + __m128i p16_0 = _mm_unpacklo_epi8(p, _mm_setzero_si128()); + __m128i p16_1 = _mm_unpackhi_epi8(p, _mm_setzero_si128()); - // Add gray weighting to colors - __m128i mullo0 = _mm_mullo_epi16(p16_0, gray_weight); - __m128i mullo1 = _mm_mullo_epi16(p16_1, gray_weight); - __m128i p32_0 = _mm_unpacklo_epi16(mullo0, _mm_setzero_si128()); - __m128i p32_1 = _mm_unpackhi_epi16(mullo0, _mm_setzero_si128()); - __m128i p32_2 = _mm_unpacklo_epi16(mullo1, _mm_setzero_si128()); - __m128i p32_3 = _mm_unpackhi_epi16(mullo1, _mm_setzero_si128()); + // Add gray weighting to colors + __m128i mullo0 = _mm_mullo_epi16(p16_0, gray_weight); + __m128i mullo1 = _mm_mullo_epi16(p16_1, gray_weight); + __m128i p32_0 = _mm_unpacklo_epi16(mullo0, _mm_setzero_si128()); + __m128i p32_1 = _mm_unpackhi_epi16(mullo0, _mm_setzero_si128()); + __m128i p32_2 = _mm_unpacklo_epi16(mullo1, _mm_setzero_si128()); + __m128i p32_3 = _mm_unpackhi_epi16(mullo1, _mm_setzero_si128()); - // Transpose to get color components in individual vectors: - __m128 tmpx = _mm_castsi128_ps(p32_0); - __m128 tmpy = _mm_castsi128_ps(p32_1); - __m128 tmpz = _mm_castsi128_ps(p32_2); - __m128 tmpw = _mm_castsi128_ps(p32_3); - _MM_TRANSPOSE4_PS(tmpx, tmpy, tmpz, tmpw); - __m128i blue = _mm_castps_si128(tmpx); - __m128i green = _mm_castps_si128(tmpy); - __m128i red = _mm_castps_si128(tmpz); - __m128i alpha = _mm_castps_si128(tmpw); + // Transpose to get color components in individual vectors: + __m128 tmpx = _mm_castsi128_ps(p32_0); + __m128 tmpy = _mm_castsi128_ps(p32_1); + __m128 tmpz = _mm_castsi128_ps(p32_2); + __m128 tmpw = _mm_castsi128_ps(p32_3); + _MM_TRANSPOSE4_PS(tmpx, tmpy, tmpz, tmpw); + __m128i blue = _mm_castps_si128(tmpx); + __m128i green = _mm_castps_si128(tmpy); + __m128i red = _mm_castps_si128(tmpz); + __m128i alpha = _mm_castps_si128(tmpw); - // Calculate gray and 256-gray values: - __m128i gray = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(red, green), blue), 8); - __m128i inv_gray = _mm_sub_epi32(_mm_set1_epi32(256), gray); + // Calculate gray and 256-gray values: + __m128i gray = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(red, green), blue), 8); + __m128i inv_gray = _mm_sub_epi32(_mm_set1_epi32(256), gray); - // p32 = start * inv_gray + end * gray: - __m128i gray0 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(0, 0, 0, 0)); - __m128i gray1 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(1, 1, 1, 1)); - __m128i gray2 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(2, 2, 2, 2)); - __m128i gray3 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(3, 3, 3, 3)); - __m128i inv_gray0 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(0, 0, 0, 0)); - __m128i inv_gray1 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(1, 1, 1, 1)); - __m128i inv_gray2 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(2, 2, 2, 2)); - __m128i inv_gray3 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(3, 3, 3, 3)); - __m128i gray16_0 = _mm_packs_epi32(gray0, inv_gray0); - __m128i gray16_1 = _mm_packs_epi32(gray1, inv_gray1); - __m128i gray16_2 = _mm_packs_epi32(gray2, inv_gray2); - __m128i gray16_3 = _mm_packs_epi32(gray3, inv_gray3); - __m128i gray16_0_mullo = _mm_mullo_epi16(gray16_0, start_end); - __m128i gray16_1_mullo = _mm_mullo_epi16(gray16_1, start_end); - __m128i gray16_2_mullo = _mm_mullo_epi16(gray16_2, start_end); - __m128i gray16_3_mullo = _mm_mullo_epi16(gray16_3, start_end); - __m128i gray16_0_mulhi = _mm_mulhi_epi16(gray16_0, start_end); - __m128i gray16_1_mulhi = _mm_mulhi_epi16(gray16_1, start_end); - __m128i gray16_2_mulhi = _mm_mulhi_epi16(gray16_2, start_end); - __m128i gray16_3_mulhi = _mm_mulhi_epi16(gray16_3, start_end); - p32_0 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_0_mullo, gray16_0_mulhi), _mm_unpackhi_epi16(gray16_0_mullo, gray16_0_mulhi)), 8); - p32_1 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_1_mullo, gray16_1_mulhi), _mm_unpackhi_epi16(gray16_1_mullo, gray16_1_mulhi)), 8); - p32_2 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_2_mullo, gray16_2_mulhi), _mm_unpackhi_epi16(gray16_2_mullo, gray16_2_mulhi)), 8); - p32_3 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_3_mullo, gray16_3_mulhi), _mm_unpackhi_epi16(gray16_3_mullo, gray16_3_mulhi)), 8); + // p32 = start * inv_gray + end * gray: + __m128i gray0 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i gray1 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(1, 1, 1, 1)); + __m128i gray2 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(2, 2, 2, 2)); + __m128i gray3 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(3, 3, 3, 3)); + __m128i inv_gray0 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i inv_gray1 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(1, 1, 1, 1)); + __m128i inv_gray2 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(2, 2, 2, 2)); + __m128i inv_gray3 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(3, 3, 3, 3)); + __m128i gray16_0 = _mm_packs_epi32(gray0, inv_gray0); + __m128i gray16_1 = _mm_packs_epi32(gray1, inv_gray1); + __m128i gray16_2 = _mm_packs_epi32(gray2, inv_gray2); + __m128i gray16_3 = _mm_packs_epi32(gray3, inv_gray3); + __m128i gray16_0_mullo = _mm_mullo_epi16(gray16_0, start_end); + __m128i gray16_1_mullo = _mm_mullo_epi16(gray16_1, start_end); + __m128i gray16_2_mullo = _mm_mullo_epi16(gray16_2, start_end); + __m128i gray16_3_mullo = _mm_mullo_epi16(gray16_3, start_end); + __m128i gray16_0_mulhi = _mm_mulhi_epi16(gray16_0, start_end); + __m128i gray16_1_mulhi = _mm_mulhi_epi16(gray16_1, start_end); + __m128i gray16_2_mulhi = _mm_mulhi_epi16(gray16_2, start_end); + __m128i gray16_3_mulhi = _mm_mulhi_epi16(gray16_3, start_end); + p32_0 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_0_mullo, gray16_0_mulhi), _mm_unpackhi_epi16(gray16_0_mullo, gray16_0_mulhi)), 8); + p32_1 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_1_mullo, gray16_1_mulhi), _mm_unpackhi_epi16(gray16_1_mullo, gray16_1_mulhi)), 8); + p32_2 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_2_mullo, gray16_2_mulhi), _mm_unpackhi_epi16(gray16_2_mullo, gray16_2_mulhi)), 8); + p32_3 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_3_mullo, gray16_3_mulhi), _mm_unpackhi_epi16(gray16_3_mullo, gray16_3_mulhi)), 8); - p16_0 = _mm_packs_epi32(p32_0, p32_1); - p16_1 = _mm_packs_epi32(p32_2, p32_3); - p = _mm_packus_epi16(p16_0, p16_1); + p16_0 = _mm_packs_epi32(p32_0, p32_1); + p16_1 = _mm_packs_epi32(p32_2, p32_3); + p = _mm_packus_epi16(p16_0, p16_1); - _mm_storeu_si128((__m128i*)pixels, p); - pixels += 16; + _mm_storeu_si128((__m128i*)pixels, p); + pixels += 16; + } + + for (int x = sse_length * 4; x < width; x++) + { + int fg_red = pixels[2]; + int fg_green = pixels[1]; + int fg_blue = pixels[0]; + + int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; + gray += (gray >> 7); // gray*=256/255 + int inv_gray = 256 - gray; + + int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); + int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); + int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); + + pixels[0] = (uint8_t)blue; + pixels[1] = (uint8_t)green; + pixels[2] = (uint8_t)red; + pixels[3] = 0xff; + + pixels += 4; + } + + y += thread->num_cores; + count--; } - - for (int x = sse_length * 4; x < width; x++) - { - int fg_red = pixels[2]; - int fg_green = pixels[1]; - int fg_blue = pixels[0]; - - int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; - gray += (gray >> 7); // gray*=256/255 - int inv_gray = 256 - gray; - - int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); - int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); - int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); - - pixels[0] = (BYTE)blue; - pixels[1] = (BYTE)green; - pixels[2] = (BYTE)red; - pixels[3] = 0xff; - - pixels += 4; - } - - y += thread->num_cores; - count--; } -} #endif -///////////////////////////////////////////////////////////////////////////// - -void R_DrawSingleSkyCol1_rgba(uint32_t solid_top, uint32_t solid_bottom) -{ - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); -} - -void R_DrawSingleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom) -{ - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); -} - -void R_DrawDoubleSkyCol1_rgba(uint32_t solid_top, uint32_t solid_bottom) -{ - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); -} - -void R_DrawDoubleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom) -{ - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); -} - -void R_DrawColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_FillColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_FillAddColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_FillAddClampColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_FillSubClampColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_FillRevSubClampColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawFuzzColumn_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - - dc_yl = MAX(dc_yl, 1); - dc_yh = MIN(dc_yh, fuzzviewheight); - if (dc_yl <= dc_yh) - fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; -} - -void R_DrawAddColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawTranslatedColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawTlatedAddColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawShadedColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawAddClampColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawAddClampTranslatedColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSubClampColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSubClampTranslatedColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawRevSubClampColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawRevSubClampTranslatedColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSpan_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSpanMasked_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSpanTranslucent_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSpanMaskedTranslucent_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSpanAddClamp_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSpanMaskedAddClamp_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_FillSpan_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) -{ - DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); -} - -void R_DrawColoredSpan_rgba(int y, int x1, int x2) -{ - DrawerCommandQueue::QueueCommand(y, x1, x2); -} - -static ShadeConstants slab_rgba_shade_constants; -static const BYTE *slab_rgba_colormap; -static fixed_t slab_rgba_light; - -void R_SetupDrawSlab_rgba(FSWColormap *base_colormap, float light, int shade) -{ - slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; - slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; - slab_rgba_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; - slab_rgba_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; - slab_rgba_shade_constants.fade_red = base_colormap->Fade.r; - slab_rgba_shade_constants.fade_green = base_colormap->Fade.g; - slab_rgba_shade_constants.fade_blue = base_colormap->Fade.b; - slab_rgba_shade_constants.fade_alpha = base_colormap->Fade.a; - slab_rgba_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; - slab_rgba_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); - slab_rgba_colormap = base_colormap->Maps; - slab_rgba_light = LIGHTSCALE(light, shade); -} - -void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) -{ - DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); -} - -DWORD vlinec1_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - return dc_texturefrac + dc_count * dc_iscale; -} - -void vlinec4_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; -} - -DWORD mvlinec1_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - return dc_texturefrac + dc_count * dc_iscale; -} - -void mvlinec4_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; -} - -fixed_t tmvline1_add_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - return dc_texturefrac + dc_count * dc_iscale; -} - -void tmvline4_add_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; -} - -fixed_t tmvline1_addclamp_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - return dc_texturefrac + dc_count * dc_iscale; -} - -void tmvline4_addclamp_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; -} - -fixed_t tmvline1_subclamp_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - return dc_texturefrac + dc_count * dc_iscale; -} - -void tmvline4_subclamp_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; -} - -fixed_t tmvline1_revsubclamp_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - return dc_texturefrac + dc_count * dc_iscale; -} - -void tmvline4_revsubclamp_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; -} - -void R_DrawFogBoundarySection_rgba(int y, int y2, int x1) -{ - for (; y < y2; ++y) - { - int x2 = spanend[y]; - DrawerCommandQueue::QueueCommand(y, x1, x2); - } -} - -void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip) -{ - // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis - - // This is essentially the same as R_MapVisPlane but with an extra step - // to create new horizontal spans whenever the light changes enough that - // we need to use a new colormap. - - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2 - x1 - 1); - int x = x2 - 1; - int t2 = uclip[x]; - int b2 = dclip[x]; - int rcolormap = GETPALOOKUP(light, wallshade); - int lcolormap; - BYTE *basecolormapdata = basecolormap->Maps; - - if (b2 > t2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - - R_SetColorMapLight(basecolormap, (float)light, wallshade); - - BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - for (--x; x >= x1; --x) - { - int t1 = uclip[x]; - int b1 = dclip[x]; - const int xr = x + 1; - int stop; - - light -= rw_lightstep; - lcolormap = GETPALOOKUP(light, wallshade); - if (lcolormap != rcolormap) - { - if (t2 < b2 && rcolormap != 0) - { // Colormap 0 is always the identity map, so rendering it is - // just a waste of time. - R_DrawFogBoundarySection_rgba(t2, b2, xr); - } - if (t1 < t2) t2 = t1; - if (b1 > b2) b2 = b1; - if (t2 < b2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - rcolormap = lcolormap; - R_SetColorMapLight(basecolormap, (float)light, wallshade); - fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - } - else - { - if (fake_dc_colormap != basecolormapdata) - { - stop = MIN(t1, b2); - while (t2 < stop) - { - int y = t2++; - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); - } - stop = MAX(b1, t2); - while (b2 > stop) - { - int y = --b2; - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); - } - } - else - { - t2 = MAX(t2, MIN(t1, b2)); - b2 = MIN(b2, MAX(b1, t2)); - } - - stop = MIN(t2, b1); - while (t1 < stop) - { - spanend[t1++] = x; - } - stop = MAX(b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; - } - } - - t2 = uclip[x]; - b2 = dclip[x]; - } - if (t2 < b2 && rcolormap != 0) - { - R_DrawFogBoundarySection_rgba(t2, b2, x1); - } -} - } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 5d159164ef..0790740689 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -1,31 +1,31 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// DESCRIPTION: -// System specific interface stuff. -// -//----------------------------------------------------------------------------- +/* +** Drawer commands for the RT family of drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ - -#ifndef __R_DRAW_RGBA__ -#define __R_DRAW_RGBA__ +#pragma once #include "r_draw.h" #include "v_palette.h" #include "r_thread.h" +#include "r_drawers.h" #ifndef NO_SSE #include @@ -38,173 +38,511 @@ EXTERN_CVAR(Float, r_lod_bias) namespace swrenderer { + // Give the compiler a strong hint we want these functions inlined: + #ifndef FORCEINLINE + #if defined(_MSC_VER) + #define FORCEINLINE __forceinline + #elif defined(__GNUC__) + #define FORCEINLINE __attribute__((always_inline)) inline + #else + #define FORCEINLINE inline + #endif + #endif -///////////////////////////////////////////////////////////////////////////// -// Drawer commands: + // Promise compiler we have no aliasing of this pointer + #ifndef RESTRICT + #if defined(_MSC_VER) + #define RESTRICT __restrict + #elif defined(__GNUC__) + #define RESTRICT __restrict__ + #else + #define RESTRICT + #endif + #endif -class ApplySpecialColormapRGBACommand : public DrawerCommand -{ - BYTE *buffer; - int pitch; - int width; - int height; - int start_red; - int start_green; - int start_blue; - int end_red; - int end_green; - int end_blue; + #define DECLARE_DRAW_COMMAND(name, func, base) \ + class name##LLVMCommand : public base \ + { \ + public: \ + using base::base; \ + void Execute(DrawerThread *thread) override \ + { \ + WorkerThreadData d = ThreadData(thread); \ + Drawers::Instance()->func(&args, &d); \ + } \ + }; -public: - ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override { return "ApplySpecialColormapRGBACommand"; } -}; - -template -class DrawerBlendCommand : public CommandType -{ -public: - void Execute(DrawerThread *thread) override + class DrawSpanLLVMCommand : public DrawerCommand { - typename CommandType::LoopIterator loop(this, thread); - if (!loop) return; - BlendMode blend(*this, loop); - do + public: + DrawSpanLLVMCommand(); + + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + + protected: + DrawSpanArgs args; + + private: + inline static bool sampler_setup(const uint32_t * &source, int &xbits, int &ybits, bool mipmapped); + }; + + class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand + { + public: + void Execute(DrawerThread *thread) override; + }; + + class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand + { + public: + void Execute(DrawerThread *thread) override; + }; + + class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand + { + public: + void Execute(DrawerThread *thread) override; + }; + + class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand + { + public: + void Execute(DrawerThread *thread) override; + }; + + class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand + { + public: + void Execute(DrawerThread *thread) override; + }; + + class DrawWall4LLVMCommand : public DrawerCommand + { + protected: + DrawWallArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread); + + public: + DrawWall4LLVMCommand(); + + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class DrawWall1LLVMCommand : public DrawerCommand + { + protected: + DrawWallArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread); + + public: + DrawWall1LLVMCommand(); + + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class DrawColumnLLVMCommand : public DrawerCommand + { + protected: + DrawColumnArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread); + FString DebugInfo() override; + + public: + DrawColumnLLVMCommand(); + + void Execute(DrawerThread *thread) override; + }; + + class DrawSkyLLVMCommand : public DrawerCommand + { + protected: + DrawSkyArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread); + + public: + DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom); + FString DebugInfo() override; + }; + + DECLARE_DRAW_COMMAND(DrawWallMasked4, mvlinec4, DrawWall4LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallAdd4, tmvline4_add, DrawWall4LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallAddClamp4, tmvline4_addclamp, DrawWall4LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallSubClamp4, tmvline4_subclamp, DrawWall4LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallRevSubClamp4, tmvline4_revsubclamp, DrawWall4LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallSubClamp1, tmvline1_subclamp, DrawWall1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallRevSubClamp1, tmvline1_revsubclamp, DrawWall1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnAdd, DrawColumnAdd, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnTranslated, DrawColumnTranslated, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnTlatedAdd, DrawColumnTlatedAdd, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnShaded, DrawColumnShaded, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnAddClamp, DrawColumnAddClamp, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnAddClampTranslated, DrawColumnAddClampTranslated, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnSubClamp, DrawColumnSubClamp, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnSubClampTranslated, DrawColumnSubClampTranslated, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRevSubClamp, DrawColumnRevSubClamp, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRevSubClampTranslated, DrawColumnRevSubClampTranslated, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(FillColumn, FillColumn, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(FillColumnAdd, FillColumnAdd, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(FillColumnAddClamp, FillColumnAddClamp, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawSingleSky1, DrawSky1, DrawSkyLLVMCommand); + DECLARE_DRAW_COMMAND(DrawSingleSky4, DrawSky4, DrawSkyLLVMCommand); + DECLARE_DRAW_COMMAND(DrawDoubleSky1, DrawDoubleSky1, DrawSkyLLVMCommand); + DECLARE_DRAW_COMMAND(DrawDoubleSky4, DrawDoubleSky4, DrawSkyLLVMCommand); + + class DrawFuzzColumnRGBACommand : public DrawerCommand + { + int _x; + int _yl; + int _yh; + uint8_t * RESTRICT _destorg; + int _pitch; + int _fuzzpos; + int _fuzzviewheight; + + public: + DrawFuzzColumnRGBACommand(); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class FillSpanRGBACommand : public DrawerCommand + { + int _x1; + int _x2; + int _y; + uint8_t * RESTRICT _destorg; + fixed_t _light; + int _color; + + public: + FillSpanRGBACommand(); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class DrawSlabRGBACommand : public DrawerCommand + { + int _dx; + fixed_t _v; + int _dy; + fixed_t _vi; + const uint8_t *_voxelptr; + uint32_t *_p; + ShadeConstants _shade_constants; + const uint8_t *_colormap; + fixed_t _light; + int _pitch; + int _start_y; + + public: + DrawSlabRGBACommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, ShadeConstants shade_constants, const uint8_t *colormap, fixed_t light); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class DrawFogBoundaryLineRGBACommand : public DrawerCommand + { + int _y; + int _x; + int _x2; + uint8_t * RESTRICT _destorg; + fixed_t _light; + ShadeConstants _shade_constants; + + public: + DrawFogBoundaryLineRGBACommand(int y, int x, int x2); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class DrawTiltedSpanRGBACommand : public DrawerCommand + { + int _x1; + int _x2; + int _y; + uint8_t * RESTRICT _destorg; + fixed_t _light; + ShadeConstants _shade_constants; + FVector3 _plane_sz; + FVector3 _plane_su; + FVector3 _plane_sv; + bool _plane_shade; + int _planeshade; + float _planelightfloat; + fixed_t _pviewx; + fixed_t _pviewy; + int _xbits; + int _ybits; + const uint32_t * RESTRICT _source; + + public: + DrawTiltedSpanRGBACommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class DrawColoredSpanRGBACommand : public DrawerCommand + { + int _y; + int _x1; + int _x2; + uint8_t * RESTRICT _destorg; + fixed_t _light; + int _color; + + public: + DrawColoredSpanRGBACommand(int y, int x1, int x2); + + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class FillTransColumnRGBACommand : public DrawerCommand + { + int _x; + int _y1; + int _y2; + int _color; + int _a; + uint8_t * RESTRICT _destorg; + int _pitch; + fixed_t _light; + + public: + FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class ApplySpecialColormapRGBACommand : public DrawerCommand + { + uint8_t *buffer; + int pitch; + int width; + int height; + int start_red; + int start_green; + int start_blue; + int end_red; + int end_green; + int end_blue; + + public: + ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "ApplySpecialColormapRGBACommand"; } + }; + + template + class DrawerBlendCommand : public CommandType + { + public: + void Execute(DrawerThread *thread) override { - blend.Blend(*this, loop); - } while (loop.next()); - } -}; + typename CommandType::LoopIterator loop(this, thread); + if (!loop) return; + BlendMode blend(*this, loop); + do + { + blend.Blend(*this, loop); + } while (loop.next()); + } + }; -///////////////////////////////////////////////////////////////////////////// -// Pixel shading inline functions: - -// Give the compiler a strong hint we want these functions inlined: -#ifndef FORCEINLINE -#if defined(_MSC_VER) -#define FORCEINLINE __forceinline -#elif defined(__GNUC__) -#define FORCEINLINE __attribute__((always_inline)) inline -#else -#define FORCEINLINE inline -#endif -#endif - -// Promise compiler we have no aliasing of this pointer -#ifndef RESTRICT -#if defined(_MSC_VER) -#define RESTRICT __restrict -#elif defined(__GNUC__) -#define RESTRICT __restrict__ -#else -#define RESTRICT -#endif -#endif - -class LightBgra -{ -public: - // calculates the light constant passed to the shade_pal_index function - FORCEINLINE static uint32_t calc_light_multiplier(dsfixed_t light) + class DrawColumnRt1LLVMCommand : public DrawerCommand { - return 256 - (light >> (FRACBITS - 8)); - } + protected: + DrawColumnArgs args; + WorkerThreadData ThreadData(DrawerThread *thread); - // Calculates a ARGB8 color for the given palette index and light multiplier - FORCEINLINE static uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) + public: + DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + DECLARE_DRAW_COMMAND(DrawColumnRt1Copy, DrawColumnRt1Copy, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1Add, DrawColumnRt1Add, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1Shaded, DrawColumnRt1Shaded, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1AddClamp, DrawColumnRt1AddClamp, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1SubClamp, DrawColumnRt1SubClamp, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClamp, DrawColumnRt1RevSubClamp, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1Translated, DrawColumnRt1Translated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1TlatedAdd, DrawColumnRt1TlatedAdd, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1AddClampTranslated, DrawColumnRt1AddClampTranslated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1SubClampTranslated, DrawColumnRt1SubClampTranslated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClampTranslated, DrawColumnRt1RevSubClampTranslated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4, DrawColumnRt4, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4Copy, DrawColumnRt4Copy, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4Add, DrawColumnRt4Add, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4Shaded, DrawColumnRt4Shaded, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4AddClamp, DrawColumnRt4AddClamp, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4SubClamp, DrawColumnRt4SubClamp, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4Translated, DrawColumnRt4Translated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4TlatedAdd, DrawColumnRt4TlatedAdd, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4AddClampTranslated, DrawColumnRt4AddClampTranslated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4SubClampTranslated, DrawColumnRt4SubClampTranslated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClampTranslated, DrawColumnRt4RevSubClampTranslated, DrawColumnRt1LLVMCommand); + + ///////////////////////////////////////////////////////////////////////////// + + class RtInitColsRGBACommand : public DrawerCommand { - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; + BYTE * RESTRICT buff; - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; + public: + RtInitColsRGBACommand(BYTE *buff); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; - return 0xff000000 | (red << 16) | (green << 8) | blue; - } - - // Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap - FORCEINLINE static uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) + template + class DrawColumnHorizRGBACommand : public DrawerCommand { - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t alpha = color.d & 0xff000000; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - if (constants.simple_shade) + int _count; + fixed_t _iscale; + fixed_t _texturefrac; + const InputPixelType * RESTRICT _source; + int _x; + int _yl; + int _yh; + + public: + DrawColumnHorizRGBACommand(); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class FillColumnHorizRGBACommand : public DrawerCommand + { + int _x; + int _yl; + int _yh; + int _count; + uint32_t _color; + + public: + FillColumnHorizRGBACommand(); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + ///////////////////////////////////////////////////////////////////////////// + // Pixel shading inline functions: + + class LightBgra + { + public: + // calculates the light constant passed to the shade_pal_index function + FORCEINLINE static uint32_t calc_light_multiplier(dsfixed_t light) { + return 256 - (light >> (FRACBITS - 8)); + } + + // Calculates a ARGB8 color for the given palette index and light multiplier + FORCEINLINE static uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) + { + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + red = red * light / 256; green = green * light / 256; blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; } - else + + // Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap + FORCEINLINE static uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t alpha = color.d & 0xff000000; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return alpha | (red << 16) | (green << 8) | blue; } - return alpha | (red << 16) | (green << 8) | blue; - } - FORCEINLINE static uint32_t shade_bgra_simple(uint32_t color, uint32_t light) - { - uint32_t red = RPART(color) * light / 256; - uint32_t green = GPART(color) * light / 256; - uint32_t blue = BPART(color) * light / 256; - return 0xff000000 | (red << 16) | (green << 8) | blue; - } - - FORCEINLINE static uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) - { - uint32_t alpha = color & 0xff000000; - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - if (constants.simple_shade) + FORCEINLINE static uint32_t shade_bgra_simple(uint32_t color, uint32_t light) { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; + uint32_t red = RPART(color) * light / 256; + uint32_t green = GPART(color) * light / 256; + uint32_t blue = BPART(color) * light / 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; } - else + + FORCEINLINE static uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; + uint32_t alpha = color & 0xff000000; + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return alpha | (red << 16) | (green << 8) | blue; } - return alpha | (red << 16) | (green << 8) | blue; - } -}; - + }; } - -#endif diff --git a/src/r_draw_tc.cpp b/src/r_draw_tc.cpp new file mode 100644 index 0000000000..a531d4aa9c --- /dev/null +++ b/src/r_draw_tc.cpp @@ -0,0 +1,1411 @@ + +#include + +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_plane.h" +#include "r_draw_tc.h" +#include "r_draw_rgba.h" +#include "r_thread.h" + +namespace swrenderer +{ + // Needed by R_DrawFogBoundary (which probably shouldn't be part of this file) + extern "C" short spanend[MAXHEIGHT]; + extern float rw_light; + extern float rw_lightstep; + extern int wallshade; + + double dc_texturemid; + + int ylookup[MAXHEIGHT]; + uint8_t shadetables[NUMCOLORMAPS * 16 * 256]; + FDynamicColormap ShadeFakeColormap[16]; + uint8_t identitymap[256]; + FDynamicColormap identitycolormap; + int fuzzoffset[FUZZTABLE + 1]; + int fuzzpos; + int fuzzviewheight; + + namespace drawerargs + { + int dc_pitch; + lighttable_t *dc_colormap; + FSWColormap *dc_fcolormap; + ShadeConstants dc_shade_constants; + fixed_t dc_light; + int dc_x; + int dc_yl; + int dc_yh; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + uint32_t dc_textureheight; + int dc_color; + uint32_t dc_srccolor; + uint32_t dc_srccolor_bgra; + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + const uint8_t *dc_source; + const uint8_t *dc_source2; + uint32_t dc_texturefracx; + uint8_t *dc_translation; + uint8_t *dc_dest; + uint8_t *dc_destorg; + int dc_destheight; + int dc_count; + uint32_t vplce[4]; + uint32_t vince[4]; + uint8_t *palookupoffse[4]; + fixed_t palookuplight[4]; + const uint8_t *bufplce[4]; + const uint8_t *bufplce2[4]; + uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; + int vlinebits; + int mvlinebits; + int tmvlinebits; + int ds_y; + int ds_x1; + int ds_x2; + lighttable_t * ds_colormap; + FSWColormap *ds_fcolormap; + ShadeConstants ds_shade_constants; + dsfixed_t ds_light; + dsfixed_t ds_xfrac; + dsfixed_t ds_yfrac; + dsfixed_t ds_xstep; + dsfixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + fixed_t ds_alpha; + double ds_lod; + const uint8_t *ds_source; + bool ds_source_mipmapped; + int ds_color; + bool drawer_needs_pal_input; + unsigned int dc_tspans[4][MAXHEIGHT]; + unsigned int *dc_ctspan[4]; + unsigned int *horizspan[4]; + } + + void R_InitColumnDrawers() + { + colfunc = basecolfunc = R_DrawColumn; + fuzzcolfunc = R_DrawFuzzColumn; + transcolfunc = R_DrawTranslatedColumn; + spanfunc = R_DrawSpan; + hcolfunc_pre = R_DrawColumnHoriz; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; + } + + void R_InitShadeMaps() + { + int i, j; + // set up shading tables for shaded columns + // 16 colormap sets, progressing from full alpha to minimum visible alpha + + uint8_t *table = shadetables; + + // Full alpha + for (i = 0; i < 16; ++i) + { + ShadeFakeColormap[i].Color = ~0u; + ShadeFakeColormap[i].Desaturate = ~0u; + ShadeFakeColormap[i].Next = NULL; + ShadeFakeColormap[i].Maps = table; + + for (j = 0; j < NUMCOLORMAPS; ++j) + { + int a = (NUMCOLORMAPS - j) * 256 / NUMCOLORMAPS * (16 - i); + for (int k = 0; k < 256; ++k) + { + uint8_t v = (((k + 2) * a) + 256) >> 14; + table[k] = MIN(v, 64); + } + table += 256; + } + } + for (i = 0; i < NUMCOLORMAPS * 16 * 256; ++i) + { + assert(shadetables[i] <= 64); + } + + // Set up a guaranteed identity map + for (i = 0; i < 256; ++i) + { + identitymap[i] = i; + } + } + + void R_InitFuzzTable(int fuzzoff) + { + /* + FUZZOFF,-FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF, + FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF, + FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF, + FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF + */ + + static const int8_t fuzzinit[FUZZTABLE] = { + 1,-1, 1,-1, 1, 1,-1, + 1, 1,-1, 1, 1, 1,-1, + 1, 1, 1,-1,-1,-1,-1, + 1,-1,-1, 1, 1, 1, 1,-1, + 1,-1, 1, 1,-1,-1, 1, + 1,-1,-1,-1,-1, 1, 1, + 1, 1,-1, 1, 1,-1, 1 + }; + + for (int i = 0; i < FUZZTABLE; i++) + { + fuzzoffset[i] = fuzzinit[i] * fuzzoff; + } + } + + namespace + { + bool R_SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) + { + using namespace drawerargs; + + // r_drawtrans is a seriously bad thing to turn off. I wonder if I should + // just remove it completely. + if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) + { + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillColumn; + hcolfunc_post1 = rt_copy1col; + hcolfunc_post4 = rt_copy4cols; + } + else if (dc_translation == NULL) + { + colfunc = basecolfunc; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; + } + else + { + colfunc = transcolfunc; + hcolfunc_post1 = rt_tlate1col; + hcolfunc_post4 = rt_tlate4cols; + drawer_needs_pal_input = true; + } + return true; + } + if (flags & STYLEF_InvertSource) + { + dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) + { + dc_srcblend = Col2RGB8[fglevel >> 10]; + dc_destblend = Col2RGB8[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else + { + dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + switch (op) + { + case STYLEOP_Add: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (fglevel + bglevel <= FRACUNIT) + { // Colors won't overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillAddColumn; + hcolfunc_post1 = rt_add1col; + hcolfunc_post4 = rt_add4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawAddColumn; + hcolfunc_post1 = rt_add1col; + hcolfunc_post4 = rt_add4cols; + } + else + { + colfunc = R_DrawTlatedAddColumn; + hcolfunc_post1 = rt_tlateadd1col; + hcolfunc_post4 = rt_tlateadd4cols; + drawer_needs_pal_input = true; + } + } + else + { // Colors might overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillAddClampColumn; + hcolfunc_post1 = rt_addclamp1col; + hcolfunc_post4 = rt_addclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawAddClampColumn; + hcolfunc_post1 = rt_addclamp1col; + hcolfunc_post4 = rt_addclamp4cols; + } + else + { + colfunc = R_DrawAddClampTranslatedColumn; + hcolfunc_post1 = rt_tlateaddclamp1col; + hcolfunc_post4 = rt_tlateaddclamp4cols; + drawer_needs_pal_input = true; + } + } + return true; + + case STYLEOP_Sub: + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else + { + colfunc = R_DrawSubClampTranslatedColumn; + hcolfunc_post1 = rt_tlatesubclamp1col; + hcolfunc_post4 = rt_tlatesubclamp4cols; + drawer_needs_pal_input = true; + } + return true; + + case STYLEOP_RevSub: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillRevSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawRevSubClampColumn; + hcolfunc_post1 = rt_revsubclamp1col; + hcolfunc_post4 = rt_revsubclamp4cols; + } + else + { + colfunc = R_DrawRevSubClampTranslatedColumn; + hcolfunc_post1 = rt_tlaterevsubclamp1col; + hcolfunc_post4 = rt_tlaterevsubclamp4cols; + drawer_needs_pal_input = true; + } + return true; + + default: + return false; + } + } + + fixed_t GetAlpha(int type, fixed_t alpha) + { + switch (type) + { + case STYLEALPHA_Zero: return 0; + case STYLEALPHA_One: return OPAQUE; + case STYLEALPHA_Src: return alpha; + case STYLEALPHA_InvSrc: return OPAQUE - alpha; + default: return 0; + } + } + + FDynamicColormap *basecolormapsave; + } + + ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color) + { + using namespace drawerargs; + + fixed_t fglevel, bglevel; + + drawer_needs_pal_input = false; + + style.CheckFuzz(); + + if (style.BlendOp == STYLEOP_Shadow) + { + style = LegacyRenderStyles[STYLE_TranslucentStencil]; + alpha = TRANSLUC33; + color = 0; + } + + if (style.Flags & STYLEF_TransSoulsAlpha) + { + alpha = fixed_t(transsouls * OPAQUE); + } + else if (style.Flags & STYLEF_Alpha1) + { + alpha = FRACUNIT; + } + else + { + alpha = clamp(alpha, 0, OPAQUE); + } + + if (translation != -1) + { + dc_translation = NULL; + if (translation != 0) + { + FRemapTable *table = TranslationToTable(translation); + if (table != NULL && !table->Inactive) + { + if (r_swtruecolor) + dc_translation = (uint8_t*)table->Palette; + else + dc_translation = table->Remap; + } + } + } + basecolormapsave = basecolormap; + hcolfunc_pre = R_DrawColumnHoriz; + + // Check for special modes + if (style.BlendOp == STYLEOP_Fuzz) + { + colfunc = fuzzcolfunc; + return DoDraw0; + } + else if (style == LegacyRenderStyles[STYLE_Shaded]) + { + // Shaded drawer only gets 16 levels of alpha because it saves memory. + if ((alpha >>= 12) == 0) + return DontDraw; + colfunc = R_DrawShadedColumn; + hcolfunc_post1 = rt_shaded1col; + hcolfunc_post4 = rt_shaded4cols; + drawer_needs_pal_input = true; + dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; + basecolormap = &ShadeFakeColormap[16 - alpha]; + if (fixedlightlev >= 0 && fixedcolormap == NULL) + { + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + } + else + { + R_SetColorMapLight(basecolormap, 0, 0); + } + bool active_columnmethod = r_columnmethod && !r_swtruecolor; + return active_columnmethod ? DoDraw1 : DoDraw0; + } + + fglevel = GetAlpha(style.SrcAlpha, alpha); + bglevel = GetAlpha(style.DestAlpha, alpha); + + if (style.Flags & STYLEF_ColorIsFixed) + { + uint32_t x = fglevel >> 10; + uint32_t r = RPART(color); + uint32_t g = GPART(color); + uint32_t b = BPART(color); + // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. + dc_color = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + if (style.Flags & STYLEF_InvertSource) + { + r = 255 - r; + g = 255 - g; + b = 255 - b; + } + uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); + dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; + // dc_srccolor is used by the R_Fill* routines. It is premultiplied + // with the alpha. + dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; + hcolfunc_pre = R_FillColumnHoriz; + R_SetColorMapLight(&identitycolormap, 0, 0); + } + + if (!R_SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) + { + return DontDraw; + } + bool active_columnmethod = r_columnmethod && !r_swtruecolor; + return active_columnmethod ? DoDraw1 : DoDraw0; + } + + ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color) + { + return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); + } + + void R_FinishSetPatchStyle() + { + basecolormap = basecolormapsave; + } + + const uint8_t *R_GetColumn(FTexture *tex, int col) + { + int width; + + // If the texture's width isn't a power of 2, then we need to make it a + // positive offset for proper clamping. + if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) + { + col = width + (col % width); + } + + if (r_swtruecolor) + return (const uint8_t *)tex->GetColumnBgra(col, nullptr); + else + return tex->GetColumn(col, nullptr); + } + + bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()) + { + if (colfunc == R_DrawAddColumn) + { + *tmvline1 = tmvline1_add; + *tmvline4 = tmvline4_add; + return true; + } + if (colfunc == R_DrawAddClampColumn) + { + *tmvline1 = tmvline1_addclamp; + *tmvline4 = tmvline4_addclamp; + return true; + } + if (colfunc == R_DrawSubClampColumn) + { + *tmvline1 = tmvline1_subclamp; + *tmvline4 = tmvline4_subclamp; + return true; + } + if (colfunc == R_DrawRevSubClampColumn) + { + *tmvline1 = tmvline1_revsubclamp; + *tmvline4 = tmvline4_revsubclamp; + return true; + } + return false; + } + + void setupvline(int fracbits) + { + drawerargs::vlinebits = fracbits; + } + + void setupmvline(int fracbits) + { + drawerargs::mvlinebits = fracbits; + } + + void setuptmvline(int fracbits) + { + drawerargs::tmvlinebits = fracbits; + } + + void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade) + { + using namespace drawerargs; + + dc_fcolormap = base_colormap; + if (r_swtruecolor) + { + dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; + dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; + dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; + dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; + dc_shade_constants.fade_red = dc_fcolormap->Fade.r; + dc_shade_constants.fade_green = dc_fcolormap->Fade.g; + dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; + dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; + dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); + dc_colormap = base_colormap->Maps; + dc_light = LIGHTSCALE(light, shade); + } + else + { + dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } + } + + void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade) + { + using namespace drawerargs; + + ds_fcolormap = base_colormap; + if (r_swtruecolor) + { + ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; + ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; + ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; + ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; + ds_shade_constants.fade_red = ds_fcolormap->Fade.r; + ds_shade_constants.fade_green = ds_fcolormap->Fade.g; + ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; + ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; + ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); + ds_colormap = base_colormap->Maps; + ds_light = LIGHTSCALE(light, shade); + } + else + { + ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } + } + + void R_SetTranslationMap(lighttable_t *translation) + { + using namespace drawerargs; + + if (r_swtruecolor) + { + dc_fcolormap = nullptr; + dc_colormap = nullptr; + dc_translation = translation; + dc_shade_constants.light_red = 256; + dc_shade_constants.light_green = 256; + dc_shade_constants.light_blue = 256; + dc_shade_constants.light_alpha = 256; + dc_shade_constants.fade_red = 0; + dc_shade_constants.fade_green = 0; + dc_shade_constants.fade_blue = 0; + dc_shade_constants.fade_alpha = 256; + dc_shade_constants.desaturate = 0; + dc_shade_constants.simple_shade = true; + dc_light = 0; + } + else + { + dc_fcolormap = nullptr; + dc_colormap = translation; + } + } + + void rt_initcols(uint8_t *buffer) + { + using namespace drawerargs; + + for (int y = 3; y >= 0; y--) + horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; + + DrawerCommandQueue::QueueCommand(buffer); + } + + void rt_span_coverage(int x, int start, int stop) + { + using namespace drawerargs; + + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; + } + + void rt_flip_posts() + { + using namespace drawerargs; + + unsigned int *front = horizspan[dc_x & 3]; + unsigned int *back = dc_ctspan[dc_x & 3] - 2; + + while (front < back) + { + swapvalues(front[0], back[0]); + swapvalues(front[1], back[1]); + front += 2; + back -= 2; + } + } + + void rt_draw4cols(int sx) + { + using namespace drawerargs; + + int x, bad; + unsigned int maxtop, minbot, minnexttop; + + // Place a dummy "span" in each column. These don't get + // drawn. They're just here to avoid special cases in the + // max/min calculations below. + for (x = 0; x < 4; ++x) + { + dc_ctspan[x][0] = screen->GetHeight()+1; + dc_ctspan[x][1] = screen->GetHeight(); + } + + for (;;) + { + // If a column is out of spans, mark it as such + bad = 0; + minnexttop = 0xffffffff; + for (x = 0; x < 4; ++x) + { + if (horizspan[x] >= dc_ctspan[x]) + { + bad |= 1 << x; + } + else if ((horizspan[x]+2)[0] < minnexttop) + { + minnexttop = (horizspan[x]+2)[0]; + } + } + // Once all columns are out of spans, we're done + if (bad == 15) + { + return; + } + + // Find the largest shared area for the spans in each column + maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]), + MAX (horizspan[2][0], horizspan[3][0])); + minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]), + MIN (horizspan[2][1], horizspan[3][1])); + + // If there is no shared area with these spans, draw each span + // individually and advance to the next spans until we reach a shared area. + // However, only draw spans down to the highest span in the next set of + // spans. If we allow the entire height of a span to be drawn, it could + // prevent any more shared areas from being drawn in these four columns. + // + // Example: Suppose we have the following arrangement: + // A CD + // A CD + // B D + // B D + // aB D + // aBcD + // aBcD + // aBc + // + // If we draw the entire height of the spans, we end up drawing this first: + // A CD + // A CD + // B D + // B D + // B D + // B D + // B D + // B D + // B + // + // This leaves only the "a" and "c" columns to be drawn, and they are not + // part of a shared area, but if we can include B and D with them, we can + // get a shared area. So we cut off everything in the first set just + // above the "a" column and end up drawing this first: + // A CD + // A CD + // B D + // B D + // + // Then the next time through, we have the following arrangement with an + // easily shared area to draw: + // aB D + // aBcD + // aBcD + // aBc + if (bad != 0 || maxtop > minbot) + { + int drawcount = 0; + for (x = 0; x < 4; ++x) + { + if (!(bad & 1)) + { + if (horizspan[x][1] < minnexttop) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], horizspan[x][1]); + horizspan[x] += 2; + drawcount++; + } + else if (minnexttop > horizspan[x][0]) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], minnexttop-1); + horizspan[x][0] = minnexttop; + drawcount++; + } + } + bad >>= 1; + } + // Drawcount *should* always be non-zero. The reality is that some situations + // can make this not true. Unfortunately, I'm not sure what those situations are. + if (drawcount == 0) + { + return; + } + continue; + } + + // Draw any span fragments above the shared area. + for (x = 0; x < 4; ++x) + { + if (maxtop > horizspan[x][0]) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], maxtop-1); + } + } + + // Draw the shared area. + hcolfunc_post4 (sx, maxtop, minbot); + + // For each column, if part of the span is past the shared area, + // set its top to just below the shared area. Otherwise, advance + // to the next span in that column. + for (x = 0; x < 4; ++x) + { + if (minbot < horizspan[x][1]) + { + horizspan[x][0] = minbot+1; + } + else + { + horizspan[x] += 2; + } + } + } + } + + void R_SetupSpanBits(FTexture *tex) + { + using namespace drawerargs; + + tex->GetWidth(); + ds_xbits = tex->WidthBits; + ds_ybits = tex->HeightBits; + if ((1 << ds_xbits) > tex->GetWidth()) + { + ds_xbits--; + } + if ((1 << ds_ybits) > tex->GetHeight()) + { + ds_ybits--; + } + } + + void R_SetSpanColormap(FDynamicColormap *colormap, int shade) + { + R_SetDSColorMapLight(colormap, 0, shade); + } + + void R_SetSpanSource(FTexture *tex) + { + using namespace drawerargs; + + ds_source = r_swtruecolor ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); + ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; + } + + ///////////////////////////////////////////////////////////////////////// + + void R_FillColumnHoriz() + { + using namespace drawerargs; + + if (dc_count <= 0) + return; + + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawColumnHoriz() + { + using namespace drawerargs; + + if (dc_count <= 0) + return; + + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + + if (drawer_needs_pal_input) + DrawerCommandQueue::QueueCommand>(); + else + DrawerCommandQueue::QueueCommand>(); + } + + // Copies one span at hx to the screen at sx. + void rt_copy1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Copies all four spans to the screen starting at sx. + void rt_copy4cols(int sx, int yl, int yh) + { + // To do: we could do this with SSE using __m128i + rt_copy1col(0, sx, yl, yh); + rt_copy1col(1, sx + 1, yl, yh); + rt_copy1col(2, sx + 2, yl, yh); + rt_copy1col(3, sx + 3, yl, yh); + } + + // Maps one span at hx to the screen at sx. + void rt_map1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Maps all four spans to the screen starting at sx. + void rt_map4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates one span at hx to the screen at sx. + void rt_tlate1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Translates all four spans to the screen starting at sx. + void rt_tlate4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Adds one span at hx to the screen at sx without clamping. + void rt_add1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Adds all four spans to the screen starting at sx without clamping. + void rt_add4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and adds one span at hx to the screen at sx without clamping. + void rt_tlateadd1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Translates and adds all four spans to the screen starting at sx without clamping. + void rt_tlateadd4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Shades one span at hx to the screen at sx. + void rt_shaded1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Shades all four spans to the screen starting at sx. + void rt_shaded4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Adds one span at hx to the screen at sx with clamping. + void rt_addclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Adds all four spans to the screen starting at sx with clamping. + void rt_addclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and adds one span at hx to the screen at sx with clamping. + void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Translates and adds all four spans to the screen starting at sx with clamping. + void rt_tlateaddclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Subtracts one span at hx to the screen at sx with clamping. + void rt_subclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Subtracts all four spans to the screen starting at sx with clamping. + void rt_subclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and subtracts one span at hx to the screen at sx with clamping. + void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Translates and subtracts all four spans to the screen starting at sx with clamping. + void rt_tlatesubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Subtracts one span at hx from the screen at sx with clamping. + void rt_revsubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Subtracts all four spans from the screen starting at sx with clamping. + void rt_revsubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and subtracts one span at hx from the screen at sx with clamping. + void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Translates and subtracts all four spans from the screen starting at sx with clamping. + void rt_tlaterevsubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + uint32_t vlinec1() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; + } + + void vlinec4() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + uint32_t mvlinec1() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; + } + + void mvlinec4() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_add() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_add() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_addclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_addclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_subclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_subclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_revsubclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_revsubclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillAddClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillRevSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawFuzzColumn() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + dc_yl = MAX(dc_yl, 1); + dc_yh = MIN(dc_yh, fuzzviewheight); + if (dc_yl <= dc_yh) + fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; + } + + void R_DrawAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTlatedAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawShadedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawAddClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawAddClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSubClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawRevSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawRevSubClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpan() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMasked() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanTranslucent() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMaskedTranslucent() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanAddClamp() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMaskedAddClamp() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillSpan() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + { + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + } + + void R_DrawColoredSpan(int y, int x1, int x2) + { + DrawerCommandQueue::QueueCommand(y, x1, x2); + } + + namespace + { + ShadeConstants slab_rgba_shade_constants; + const uint8_t *slab_rgba_colormap; + fixed_t slab_rgba_light; + } + + void R_SetupDrawSlab(FSWColormap *base_colormap, float light, int shade) + { + slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; + slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; + slab_rgba_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; + slab_rgba_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; + slab_rgba_shade_constants.fade_red = base_colormap->Fade.r; + slab_rgba_shade_constants.fade_green = base_colormap->Fade.g; + slab_rgba_shade_constants.fade_blue = base_colormap->Fade.b; + slab_rgba_shade_constants.fade_alpha = base_colormap->Fade.a; + slab_rgba_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + slab_rgba_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); + slab_rgba_colormap = base_colormap->Maps; + slab_rgba_light = LIGHTSCALE(light, shade); + } + + void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p) + { + DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); + } + + void R_DrawFogBoundarySection(int y, int y2, int x1) + { + for (; y < y2; ++y) + { + int x2 = spanend[y]; + DrawerCommandQueue::QueueCommand(y, x1, x2); + } + } + + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) + { + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + uint8_t *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + R_SetColorMapLight(basecolormap, (float)light, wallshade); + + uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + R_SetColorMapLight(basecolormap, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } + else + { + if (fake_dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection(t2, b2, x1); + } + } + + void R_DrawParticle(vissprite_t *sprite) + { + if (r_swtruecolor) + R_DrawParticle_rgba(sprite); + else + R_DrawParticle_C(sprite); + } +} diff --git a/src/r_draw_tc.h b/src/r_draw_tc.h new file mode 100644 index 0000000000..8c1af58fcb --- /dev/null +++ b/src/r_draw_tc.h @@ -0,0 +1,239 @@ + +#pragma once + +#include "r_defs.h" + +struct FSWColormap; + +EXTERN_CVAR(Bool, r_multithreaded); +EXTERN_CVAR(Bool, r_magfilter); +EXTERN_CVAR(Bool, r_minfilter); +EXTERN_CVAR(Bool, r_mipmap); +EXTERN_CVAR(Float, r_lod_bias); +EXTERN_CVAR(Int, r_drawfuzz); +EXTERN_CVAR(Bool, r_drawtrans); +EXTERN_CVAR(Float, transsouls); +EXTERN_CVAR(Int, r_columnmethod); + +namespace swrenderer +{ + struct vissprite_t; + + struct ShadeConstants + { + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; + }; + + extern double dc_texturemid; + + namespace drawerargs + { + extern int dc_pitch; + extern lighttable_t *dc_colormap; + extern FSWColormap *dc_fcolormap; + extern ShadeConstants dc_shade_constants; + extern fixed_t dc_light; + extern int dc_x; + extern int dc_yl; + extern int dc_yh; + extern fixed_t dc_iscale; + extern fixed_t dc_texturefrac; + extern uint32_t dc_textureheight; + extern int dc_color; + extern uint32_t dc_srccolor; + extern uint32_t dc_srccolor_bgra; + extern uint32_t *dc_srcblend; + extern uint32_t *dc_destblend; + extern fixed_t dc_srcalpha; + extern fixed_t dc_destalpha; + extern const uint8_t *dc_source; + extern const uint8_t *dc_source2; + extern uint32_t dc_texturefracx; + extern uint8_t *dc_translation; + extern uint8_t *dc_dest; + extern uint8_t *dc_destorg; + extern int dc_destheight; + extern int dc_count; + + extern bool drawer_needs_pal_input; + + extern uint32_t vplce[4]; + extern uint32_t vince[4]; + extern uint8_t *palookupoffse[4]; + extern fixed_t palookuplight[4]; + extern const uint8_t *bufplce[4]; + extern const uint8_t *bufplce2[4]; + extern uint32_t buftexturefracx[4]; + extern uint32_t bufheight[4]; + extern int vlinebits; + extern int mvlinebits; + extern int tmvlinebits; + + extern int ds_y; + extern int ds_x1; + extern int ds_x2; + extern lighttable_t * ds_colormap; + extern FSWColormap *ds_fcolormap; + extern ShadeConstants ds_shade_constants; + extern dsfixed_t ds_light; + extern dsfixed_t ds_xfrac; + extern dsfixed_t ds_yfrac; + extern dsfixed_t ds_xstep; + extern dsfixed_t ds_ystep; + extern int ds_xbits; + extern int ds_ybits; + extern fixed_t ds_alpha; + extern double ds_lod; + extern const uint8_t *ds_source; + extern bool ds_source_mipmapped; + extern int ds_color; + + extern unsigned int dc_tspans[4][MAXHEIGHT]; + extern unsigned int *dc_ctspan[4]; + extern unsigned int *horizspan[4]; + } + + extern int ylookup[MAXHEIGHT]; + extern uint8_t shadetables[/*NUMCOLORMAPS*16*256*/]; + extern FDynamicColormap ShadeFakeColormap[16]; + extern uint8_t identitymap[256]; + extern FDynamicColormap identitycolormap; + + // Spectre/Invisibility. + #define FUZZTABLE 50 + extern int fuzzoffset[FUZZTABLE + 1]; + extern int fuzzpos; + extern int fuzzviewheight; + + extern bool r_swtruecolor; + + void R_InitColumnDrawers(); + void R_InitShadeMaps(); + void R_InitFuzzTable(int fuzzoff); + + enum ESPSResult + { + DontDraw, // not useful to draw this + DoDraw0, // draw this as if r_columnmethod is 0 + DoDraw1, // draw this as if r_columnmethod is 1 + }; + + ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); + ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color); + void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade + bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()); + + const uint8_t *R_GetColumn(FTexture *tex, int col); + void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + + void rt_initcols(uint8_t *buffer = nullptr); + void rt_span_coverage(int x, int start, int stop); + void rt_draw4cols(int sx); + void rt_flip_posts(); + void rt_copy1col(int hx, int sx, int yl, int yh); + void rt_copy4cols(int sx, int yl, int yh); + void rt_shaded1col(int hx, int sx, int yl, int yh); + void rt_shaded4cols(int sx, int yl, int yh); + void rt_map1col(int hx, int sx, int yl, int yh); + void rt_add1col(int hx, int sx, int yl, int yh); + void rt_addclamp1col(int hx, int sx, int yl, int yh); + void rt_subclamp1col(int hx, int sx, int yl, int yh); + void rt_revsubclamp1col(int hx, int sx, int yl, int yh); + void rt_tlate1col(int hx, int sx, int yl, int yh); + void rt_tlateadd1col(int hx, int sx, int yl, int yh); + void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh); + void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh); + void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh); + void rt_map4cols(int sx, int yl, int yh); + void rt_add4cols(int sx, int yl, int yh); + void rt_addclamp4cols(int sx, int yl, int yh); + void rt_subclamp4cols(int sx, int yl, int yh); + void rt_revsubclamp4cols(int sx, int yl, int yh); + void rt_tlate4cols(int sx, int yl, int yh); + void rt_tlateadd4cols(int sx, int yl, int yh); + void rt_tlateaddclamp4cols(int sx, int yl, int yh); + void rt_tlatesubclamp4cols(int sx, int yl, int yh); + void rt_tlaterevsubclamp4cols(int sx, int yl, int yh); + void R_DrawColumnHoriz(); + void R_DrawColumn(); + void R_DrawFuzzColumn(); + void R_DrawTranslatedColumn(); + void R_DrawShadedColumn(); + void R_FillColumn(); + void R_FillAddColumn(); + void R_FillAddClampColumn(); + void R_FillSubClampColumn(); + void R_FillRevSubClampColumn(); + void R_DrawAddColumn(); + void R_DrawTlatedAddColumn(); + void R_DrawAddClampColumn(); + void R_DrawAddClampTranslatedColumn(); + void R_DrawSubClampColumn(); + void R_DrawSubClampTranslatedColumn(); + void R_DrawRevSubClampColumn(); + void R_DrawRevSubClampTranslatedColumn(); + void R_DrawSpan(); + void R_DrawSpanMasked(); + void R_DrawSpanTranslucent(); + void R_DrawSpanMaskedTranslucent(); + void R_DrawSpanAddClamp(); + void R_DrawSpanMaskedAddClamp(); + void R_FillSpan(); + void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + void R_DrawColoredSpan(int y, int x1, int x2); + void R_SetupDrawSlab(FSWColormap *base_colormap, float light, int shade); + void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p); + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); + uint32_t vlinec1(); + void vlinec4(); + uint32_t mvlinec1(); + void mvlinec4(); + fixed_t tmvline1_add(); + void tmvline4_add(); + fixed_t tmvline1_addclamp(); + void tmvline4_addclamp(); + fixed_t tmvline1_subclamp(); + void tmvline4_subclamp(); + fixed_t tmvline1_revsubclamp(); + void tmvline4_revsubclamp(); + void R_FillColumnHoriz(); + void R_FillSpan(); + + inline uint32_t dovline1() { return vlinec1(); } + inline void dovline4() { vlinec4(); } + inline uint32_t domvline1() { return mvlinec1(); } + inline void domvline4() { mvlinec4(); } + + void setupvline(int fracbits); + void setupmvline(int fracbits); + void setuptmvline(int fracbits); + + void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + + // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) + void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade); + void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); + void R_SetTranslationMap(lighttable_t *translation); + + void R_SetupSpanBits(FTexture *tex); + void R_SetSpanColormap(FDynamicColormap *colormap, int shade); + void R_SetSpanSource(FTexture *tex); + + void R_MapTiltedPlane(int y, int x1); + void R_MapColoredPlane(int y, int x1); + void R_DrawParticle(vissprite_t *); +} diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 539135afe9..d5aeed8a91 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -1,37 +1,23 @@ /* -** r_drawt_rgba.cpp -** Faster column drawers for modern processors, true color edition +** Drawer commands for the RT family of drawers +** Copyright (c) 2016 Magnus Norddahl ** -**--------------------------------------------------------------------------- -** Copyright 1998-2006 Randy Heit -** All rights reserved. +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. ** -** Redistribution and use in source and binary forms, with or without -** modification, are permitted provided that the following conditions -** are met: +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: ** -** 1. Redistributions of source code must retain the above copyright -** notice, this list of conditions and the following disclaimer. -** 2. Redistributions in binary form must reproduce the above copyright -** notice, this list of conditions and the following disclaimer in the -** documentation and/or other materials provided with the distribution. -** 3. The name of the author may not be used to endorse or promote products -** derived from this software without specific prior written permission. +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. ** -** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**--------------------------------------------------------------------------- -** -** True color versions of the similar functions in r_drawt.cpp -** Please see r_drawt.cpp for a description of the globals used. */ #include "templates.h" @@ -47,15 +33,7 @@ namespace swrenderer { - -///////////////////////////////////////////////////////////////////////////// - -class DrawColumnRt1LLVMCommand : public DrawerCommand -{ -protected: - DrawColumnArgs args; - - WorkerThreadData ThreadData(DrawerThread *thread) + WorkerThreadData DrawColumnRt1LLVMCommand::ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; @@ -66,8 +44,7 @@ protected: return d; } -public: - DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh) + DrawColumnRt1LLVMCommand::DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh) { using namespace drawerargs; @@ -105,90 +82,38 @@ public: DetectRangeError(args.dest, args.dest_y, args.count); } - void Execute(DrawerThread *thread) override + void DrawColumnRt1LLVMCommand::Execute(DrawerThread *thread) { WorkerThreadData d = ThreadData(thread); Drawers::Instance()->DrawColumnRt1(&args, &d); } - FString DebugInfo() override + FString DrawColumnRt1LLVMCommand::DebugInfo() { return "DrawColumnRt\n" + args.ToString(); } -}; -#define DECLARE_DRAW_COMMAND(name, func, base) \ -class name##LLVMCommand : public base \ -{ \ -public: \ - using base::base; \ - void Execute(DrawerThread *thread) override \ - { \ - WorkerThreadData d = ThreadData(thread); \ - Drawers::Instance()->func(&args, &d); \ - } \ -}; + ///////////////////////////////////////////////////////////////////////////// -DECLARE_DRAW_COMMAND(DrawColumnRt1Copy, DrawColumnRt1Copy, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1Add, DrawColumnRt1Add, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1Shaded, DrawColumnRt1Shaded, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1AddClamp, DrawColumnRt1AddClamp, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1SubClamp, DrawColumnRt1SubClamp, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClamp, DrawColumnRt1RevSubClamp, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1Translated, DrawColumnRt1Translated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1TlatedAdd, DrawColumnRt1TlatedAdd, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1AddClampTranslated, DrawColumnRt1AddClampTranslated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1SubClampTranslated, DrawColumnRt1SubClampTranslated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClampTranslated, DrawColumnRt1RevSubClampTranslated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4, DrawColumnRt4, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4Copy, DrawColumnRt4Copy, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4Add, DrawColumnRt4Add, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4Shaded, DrawColumnRt4Shaded, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4AddClamp, DrawColumnRt4AddClamp, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4SubClamp, DrawColumnRt4SubClamp, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4Translated, DrawColumnRt4Translated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4TlatedAdd, DrawColumnRt4TlatedAdd, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4AddClampTranslated, DrawColumnRt4AddClampTranslated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4SubClampTranslated, DrawColumnRt4SubClampTranslated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClampTranslated, DrawColumnRt4RevSubClampTranslated, DrawColumnRt1LLVMCommand); - -///////////////////////////////////////////////////////////////////////////// - -class RtInitColsRGBACommand : public DrawerCommand -{ - BYTE * RESTRICT buff; - -public: - RtInitColsRGBACommand(BYTE *buff) + RtInitColsRGBACommand::RtInitColsRGBACommand(BYTE *buff) { this->buff = buff; } - void Execute(DrawerThread *thread) override + void RtInitColsRGBACommand::Execute(DrawerThread *thread) { thread->dc_temp_rgba = buff == NULL ? thread->dc_temp_rgbabuff_rgba : (uint32_t*)buff; } - FString DebugInfo() override + FString RtInitColsRGBACommand::DebugInfo() { return "RtInitCols"; } -}; -template -class DrawColumnHorizRGBACommand : public DrawerCommand -{ - int _count; - fixed_t _iscale; - fixed_t _texturefrac; - const InputPixelType * RESTRICT _source; - int _x; - int _yl; - int _yh; + ///////////////////////////////////////////////////////////////////////////// -public: - DrawColumnHorizRGBACommand() + template + DrawColumnHorizRGBACommand::DrawColumnHorizRGBACommand() { using namespace drawerargs; @@ -201,7 +126,8 @@ public: _yh = dc_yh; } - void Execute(DrawerThread *thread) override + template + void DrawColumnHorizRGBACommand::Execute(DrawerThread *thread) { int count = _count; uint32_t *dest; @@ -252,22 +178,19 @@ public: } while (--count); } - FString DebugInfo() override + template + FString DrawColumnHorizRGBACommand::DebugInfo() { return "DrawColumnHoriz"; } -}; -class FillColumnHorizRGBACommand : public DrawerCommand -{ - int _x; - int _yl; - int _yh; - int _count; - uint32_t _color; + // Generate code for the versions we use: + template class DrawColumnHorizRGBACommand; + template class DrawColumnHorizRGBACommand; -public: - FillColumnHorizRGBACommand() + ///////////////////////////////////////////////////////////////////////////// + + FillColumnHorizRGBACommand::FillColumnHorizRGBACommand() { using namespace drawerargs; @@ -278,7 +201,7 @@ public: _yh = dc_yh; } - void Execute(DrawerThread *thread) override + void FillColumnHorizRGBACommand::Execute(DrawerThread *thread) { int count = _count; uint32_t color = _color; @@ -304,220 +227,8 @@ public: } while (--count); } - FString DebugInfo() override + FString FillColumnHorizRGBACommand::DebugInfo() { return "FillColumnHoriz"; } -}; - -///////////////////////////////////////////////////////////////////////////// - -// Copies one span at hx to the screen at sx. -void rt_copy1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Copies all four spans to the screen starting at sx. -void rt_copy4cols_rgba (int sx, int yl, int yh) -{ - // To do: we could do this with SSE using __m128i - rt_copy1col_rgba(0, sx, yl, yh); - rt_copy1col_rgba(1, sx + 1, yl, yh); - rt_copy1col_rgba(2, sx + 2, yl, yh); - rt_copy1col_rgba(3, sx + 3, yl, yh); -} - -// Maps one span at hx to the screen at sx. -void rt_map1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Maps all four spans to the screen starting at sx. -void rt_map4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Translates one span at hx to the screen at sx. -void rt_tlate1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Translates all four spans to the screen starting at sx. -void rt_tlate4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Adds one span at hx to the screen at sx without clamping. -void rt_add1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Adds all four spans to the screen starting at sx without clamping. -void rt_add4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols_rgba(int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Shades one span at hx to the screen at sx. -void rt_shaded1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Shades all four spans to the screen starting at sx. -void rt_shaded4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Adds all four spans to the screen starting at sx with clamping. -void rt_addclamp4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Before each pass through a rendering loop that uses these routines, -// call this function to set up the span pointers. -void rt_initcols_rgba (BYTE *buff) -{ - using namespace drawerargs; - - for (int y = 3; y >= 0; y--) - horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; - - DrawerCommandQueue::QueueCommand(buff); -} - -void rt_span_coverage_rgba(int x, int start, int stop) -{ - using namespace drawerargs; - - unsigned int **tspan = &dc_ctspan[x & 3]; - (*tspan)[0] = start; - (*tspan)[1] = stop; - *tspan += 2; -} - -// Stretches a column into a temporary buffer which is later -// drawn to the screen along with up to three other columns. -void R_DrawColumnHoriz_rgba (void) -{ - using namespace drawerargs; - - if (dc_count <= 0) - return; - - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - - if (drawer_needs_pal_input) - DrawerCommandQueue::QueueCommand>(); - else - DrawerCommandQueue::QueueCommand>(); -} - -// [RH] Just fills a column with a given color -void R_FillColumnHoriz_rgba (void) -{ - using namespace drawerargs; - - if (dc_count <= 0) - return; - - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - - DrawerCommandQueue::QueueCommand(); -} - } diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 36c2c1da5d..c6c7d6d2f0 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -511,9 +511,9 @@ void R_MapTiltedPlane_C (int y, int x1) #endif } -void R_MapTiltedPlane_rgba (int y, int x1) +void R_MapTiltedPlane (int y, int x1) { - R_DrawTiltedSpan_rgba(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + R_DrawTiltedSpan(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } //========================================================================== @@ -527,9 +527,9 @@ void R_MapColoredPlane_C (int y, int x1) memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); } -void R_MapColoredPlane_rgba(int y, int x1) +void R_MapColoredPlane(int y, int x1) { - R_DrawColoredSpan_rgba(y, x1, spanend[y]); + R_DrawColoredSpan(y, x1, spanend[y]); } //========================================================================== @@ -1073,32 +1073,16 @@ static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, doub uint32_t solid_top = frontskytex->GetSkyCapColor(false); uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); - if (r_swtruecolor) - { - if (columns == 4) - if (!backskytex) - R_DrawSingleSkyCol4_rgba(solid_top, solid_bottom); - else - R_DrawDoubleSkyCol4_rgba(solid_top, solid_bottom); + if (columns == 4) + if (!backskytex) + R_DrawSingleSkyCol4(solid_top, solid_bottom); else - if (!backskytex) - R_DrawSingleSkyCol1_rgba(solid_top, solid_bottom); - else - R_DrawDoubleSkyCol1_rgba(solid_top, solid_bottom); - } + R_DrawDoubleSkyCol4(solid_top, solid_bottom); else - { - if (columns == 4) - if (!backskytex) - R_DrawSingleSkyCol4(solid_top, solid_bottom); - else - R_DrawDoubleSkyCol4(solid_top, solid_bottom); + if (!backskytex) + R_DrawSingleSkyCol1(solid_top, solid_bottom); else - if (!backskytex) - R_DrawSingleSkyCol1(solid_top, solid_bottom); - else - R_DrawDoubleSkyCol1(solid_top, solid_bottom); - } + R_DrawDoubleSkyCol1(solid_top, solid_bottom); } static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) From dfbd7fd2adefae9a36af210b27a471acf6f334c8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 10:44:24 +0100 Subject: [PATCH 436/912] Add drawer commands for pal mode --- src/CMakeLists.txt | 2 + src/r_draw_pal.cpp | 221 +++++++++++++++++++++++++++ src/r_draw_pal.h | 163 ++++++++++++++++++++ src/r_draw_tc.cpp | 365 +++++++++++++++++++++++++++++++++++--------- src/r_drawt_pal.cpp | 113 ++++++++++++++ src/r_things.cpp | 2 + 6 files changed, 795 insertions(+), 71 deletions(-) create mode 100644 src/r_draw_pal.cpp create mode 100644 src/r_draw_pal.h create mode 100644 src/r_drawt_pal.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e62f4d4125..d026730480 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1046,6 +1046,8 @@ set( FASTMATH_PCH_SOURCES r_3dfloors.cpp r_bsp.cpp r_draw_tc.cpp + r_draw_pal.cpp + r_drawt_pal.cpp r_draw_rgba.cpp r_drawt_rgba.cpp r_drawers.cpp diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp new file mode 100644 index 0000000000..b3ed97dfb0 --- /dev/null +++ b/src/r_draw_pal.cpp @@ -0,0 +1,221 @@ + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "r_draw.h" +#include "r_main.h" +#include "r_things.h" +#include "v_video.h" +#include "r_draw_pal.h" + +namespace swrenderer +{ + PalWall1Command::PalWall1Command() + { + } + + PalWall4Command::PalWall4Command() + { + } + + void DrawWall1PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawWall4PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawWallMasked1PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawWallMasked4PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawWallAdd1PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawWallAdd4PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawWallAddClamp1PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawWallAddClamp4PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawWallSubClamp1PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawWallSubClamp4PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawWallRevSubClamp1PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawWallRevSubClamp4PalCommand::Execute(DrawerThread *thread) + { + } + + PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom) + { + } + + void DrawSingleSky1PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawSingleSky4PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawDoubleSky1PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawDoubleSky4PalCommand::Execute(DrawerThread *thread) + { + } + + PalColumnCommand::PalColumnCommand() + { + } + + void DrawColumnPalCommand::Execute(DrawerThread *thread) + { + } + + void FillColumnPalCommand::Execute(DrawerThread *thread) + { + } + + void FillColumnAddPalCommand::Execute(DrawerThread *thread) + { + } + + void FillColumnAddClampPalCommand::Execute(DrawerThread *thread) + { + } + + void FillColumnSubClampPalCommand::Execute(DrawerThread *thread) + { + } + + void FillColumnRevSubClampPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnAddPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnTranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnTlatedAddPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnShadedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnAddClampPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnAddClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnSubClampPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnSubClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRevSubClampPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawFuzzColumnPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawSpanPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawSpanMaskedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawSpanTranslucentPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawSpanMaskedTranslucentPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawSpanAddClampPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawSpanMaskedAddClampPalCommand::Execute(DrawerThread *thread) + { + } + + void FillSpanPalCommand::Execute(DrawerThread *thread) + { + } + + DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + { + } + + void DrawTiltedSpanPalCommand::Execute(DrawerThread *thread) + { + } + + DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(int y, int x1, int x2) + { + } + + void DrawColoredSpanPalCommand::Execute(DrawerThread *thread) + { + } + + DrawSlabPalCommand::DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap) + { + } + + void DrawSlabPalCommand::Execute(DrawerThread *thread) + { + } + + DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(int y, int y2, int x1) + { + } + + void DrawFogBoundaryLinePalCommand::Execute(DrawerThread *thread) + { + } +} diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h new file mode 100644 index 0000000000..3d540f8c60 --- /dev/null +++ b/src/r_draw_pal.h @@ -0,0 +1,163 @@ + +#pragma once + +#include "r_draw.h" +#include "v_palette.h" +#include "r_thread.h" + +namespace swrenderer +{ + class PalWall1Command : public DrawerCommand + { + public: + PalWall1Command(); + FString DebugInfo() override { return "PalWallCommand"; } + }; + + class PalWall4Command : public DrawerCommand + { + public: + PalWall4Command(); + FString DebugInfo() override { return "PalWallCommand"; } + }; + + class DrawWall1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWall4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallMasked1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallMasked4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAdd1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAdd4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAddClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAddClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallRevSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallRevSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + + class PalSkyCommand : public DrawerCommand + { + public: + PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom); + FString DebugInfo() override { return "PalSkyCommand"; } + }; + + class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + class DrawSingleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + class DrawDoubleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + class DrawDoubleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + + class PalColumnCommand : public DrawerCommand + { + public: + PalColumnCommand(); + FString DebugInfo() override { return "PalColumnCommand"; } + }; + + class DrawColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnTlatedAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnShadedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnAddClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnRevSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + + class DrawFuzzColumnPalCommand : public DrawerCommand + { + public: + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawFuzzColumnPalCommand"; } + }; + + class PalSpanCommand : public DrawerCommand + { + public: + FString DebugInfo() override { return "PalSpanCommand"; } + }; + + class DrawSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class FillSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + + class DrawTiltedSpanPalCommand : public DrawerCommand + { + public: + DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawTiltedSpanPalCommand"; } + }; + + class DrawColoredSpanPalCommand : public PalSpanCommand + { + public: + DrawColoredSpanPalCommand(int y, int x1, int x2); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawColoredSpanPalCommand"; } + }; + + class DrawSlabPalCommand : public PalSpanCommand + { + public: + DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap); + void Execute(DrawerThread *thread) override; + }; + + class DrawFogBoundaryLinePalCommand : public PalSpanCommand + { + public: + DrawFogBoundaryLinePalCommand(int y, int y2, int x1); + void Execute(DrawerThread *thread) override; + }; + + //class RtInitColsPalCommand : public DrawerCommand { public: void Execute(DrawerThread *thread) override; }; + //class DrawColumnHorizPalCommand : public DrawerCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnHorizPalCommand : public DrawerCommand + { + public: + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "FillColumnHorizPalCommand"; } + }; + + class PalRtCommand : public DrawerCommand + { + public: + PalRtCommand(int hx, int sx, int yl, int yh); + FString DebugInfo() override { return "PalRtCommand"; } + }; + + class DrawColumnRt1CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; +} diff --git a/src/r_draw_tc.cpp b/src/r_draw_tc.cpp index a531d4aa9c..2326cab91e 100644 --- a/src/r_draw_tc.cpp +++ b/src/r_draw_tc.cpp @@ -17,6 +17,7 @@ #include "r_plane.h" #include "r_draw_tc.h" #include "r_draw_rgba.h" +#include "r_draw_pal.h" #include "r_thread.h" namespace swrenderer @@ -841,7 +842,10 @@ namespace swrenderer (*span)[1] = dc_yh; *span += 2; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawColumnHoriz() @@ -857,7 +861,7 @@ namespace swrenderer (*span)[1] = dc_yh; *span += 2; - if (drawer_needs_pal_input) + if (drawer_needs_pal_input || !r_swtruecolor) DrawerCommandQueue::QueueCommand>(); else DrawerCommandQueue::QueueCommand>(); @@ -866,7 +870,10 @@ namespace swrenderer // Copies one span at hx to the screen at sx. void rt_copy1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. @@ -882,140 +889,210 @@ namespace swrenderer // Maps one span at hx to the screen at sx. void rt_map1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. void rt_map4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates one span at hx to the screen at sx. void rt_tlate1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. void rt_tlate4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. void rt_add1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and adds one span at hx to the screen at sx without clamping. void rt_tlateadd1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. void rt_tlateadd4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Shades one span at hx to the screen at sx. void rt_shaded1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. void rt_shaded4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and adds one span at hx to the screen at sx with clamping. void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. void rt_tlateaddclamp4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. void rt_subclamp1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. void rt_tlatesubclamp4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. void rt_revsubclamp1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. void rt_tlaterevsubclamp4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } uint32_t vlinec1() { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; } @@ -1023,7 +1100,11 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -1032,7 +1113,11 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; } @@ -1040,7 +1125,11 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -1049,7 +1138,11 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; } @@ -1057,7 +1150,11 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -1066,7 +1163,11 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; } @@ -1074,7 +1175,11 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -1083,7 +1188,11 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; } @@ -1091,7 +1200,11 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -1100,7 +1213,11 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; } @@ -1108,66 +1225,103 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) { - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + else + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) { - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + else + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) { - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + else + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) { - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + else + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } void R_DrawColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_FillColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_FillAddColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_FillAddClampColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_FillSubClampColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_FillRevSubClampColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawFuzzColumn() { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); dc_yl = MAX(dc_yl, 1); dc_yh = MIN(dc_yh, fuzzviewheight); @@ -1177,97 +1331,154 @@ namespace swrenderer void R_DrawAddColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawTranslatedColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawTlatedAddColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawShadedColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawAddClampColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawAddClampTranslatedColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSubClampColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSubClampTranslatedColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawRevSubClampColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawRevSubClampTranslatedColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSpan() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSpanMasked() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSpanTranslucent() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSpanMaskedTranslucent() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSpanAddClamp() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSpanMaskedAddClamp() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_FillSpan() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { - DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + else + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } void R_DrawColoredSpan(int y, int x1, int x2) { - DrawerCommandQueue::QueueCommand(y, x1, x2); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(y, x1, x2); + else + DrawerCommandQueue::QueueCommand(y, x1, x2); } namespace @@ -1295,7 +1506,10 @@ namespace swrenderer void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p) { - DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); + else + DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_colormap); } void R_DrawFogBoundarySection(int y, int y2, int x1) @@ -1303,7 +1517,10 @@ namespace swrenderer for (; y < y2; ++y) { int x2 = spanend[y]; - DrawerCommandQueue::QueueCommand(y, x1, x2); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(y, x1, x2); + else + DrawerCommandQueue::QueueCommand(y, x1, x2); } } @@ -1365,13 +1582,19 @@ namespace swrenderer while (t2 < stop) { int y = t2++; - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + else + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); } stop = MAX(b1, t2); while (b2 > stop) { int y = --b2; - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + else + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); } } else diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp new file mode 100644 index 0000000000..b35046285f --- /dev/null +++ b/src/r_drawt_pal.cpp @@ -0,0 +1,113 @@ + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "r_draw.h" +#include "r_main.h" +#include "r_things.h" +#include "v_video.h" +#include "r_draw_pal.h" + +namespace swrenderer +{ + PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) + { + } + + void FillColumnHorizPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt1CopyPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt1PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt4PalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt1AddPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt4AddPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt1AddTranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt4AddTranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt1ShadedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt4ShadedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt1AddClampPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt4AddClampPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt1AddClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt4AddClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt1SubClampPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt4SubClampPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt1SubClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt4SubClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt1RevSubClampPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt4RevSubClampPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt1RevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + } + + void DrawColumnRt4RevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + } +} diff --git a/src/r_things.cpp b/src/r_things.cpp index 6869e83415..57e101feb2 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2800,6 +2800,8 @@ void R_DrawParticle_C (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); + DrawerCommandQueue::WaitForWorkers(); + // vis->renderflags holds translucency level (0-255) { fixed_t fglevel, bglevel; From 645aed62d10e874df4210486bd65ab90d5f36584 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 11:38:21 +0100 Subject: [PATCH 437/912] Add wall drawers --- src/r_draw_pal.cpp | 353 +++++++++++++++++++++++++++++++++++++++++++++ src/r_draw_pal.h | 28 ++++ 2 files changed, 381 insertions(+) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index b3ed97dfb0..c4a05281b3 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -13,58 +13,411 @@ namespace swrenderer { PalWall1Command::PalWall1Command() { + using namespace drawerargs; + + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _colormap = dc_colormap; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; + _vlinebits = vlinebits; + _mvlinebits = mvlinebits; + _tmvlinebits = tmvlinebits; + _pitch = dc_pitch; + _srcblend = dc_srcblend; + _destblend = dc_destblend; } PalWall4Command::PalWall4Command() { + using namespace drawerargs; + + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _vlinebits = vlinebits; + _mvlinebits = mvlinebits; + _tmvlinebits = tmvlinebits; + for (int col = 0; col < 4; col++) + { + _palookupoffse[col] = palookupoffse[col]; + _bufplce[col] = bufplce[col]; + _vince[col] = vince[col]; + _vplce[col] = vplce[col]; + } + _srcblend = dc_srcblend; + _destblend = dc_destblend; } void DrawWall1PalCommand::Execute(DrawerThread *thread) { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _vlinebits; + int pitch = _pitch; + + do + { + *dest = colormap[source[frac >> bits]]; + frac += fracstep; + dest += pitch; + } while (--count); } void DrawWall4PalCommand::Execute(DrawerThread *thread) { + uint8_t *dest = _dest; + int count = _count; + int bits = _vlinebits; + uint32_t place; + auto pal0 = _palookupoffse[0]; + auto pal1 = _palookupoffse[1]; + auto pal2 = _palookupoffse[2]; + auto pal3 = _palookupoffse[3]; + auto buf0 = _bufplce[0]; + auto buf1 = _bufplce[1]; + auto buf2 = _bufplce[2]; + auto buf3 = _bufplce[3]; + const auto vince0 = _vince[0]; + const auto vince1 = _vince[1]; + const auto vince2 = _vince[2]; + const auto vince3 = _vince[3]; + auto vplce0 = _vplce[0]; + auto vplce1 = _vplce[1]; + auto vplce2 = _vplce[2]; + auto vplce3 = _vplce[3]; + auto pitch = _pitch; + + do + { + dest[0] = pal0[buf0[(place = vplce0) >> bits]]; vplce0 = place + vince0; + dest[1] = pal1[buf1[(place = vplce1) >> bits]]; vplce1 = place + vince1; + dest[2] = pal2[buf2[(place = vplce2) >> bits]]; vplce2 = place + vince2; + dest[3] = pal3[buf3[(place = vplce3) >> bits]]; vplce3 = place + vince3; + dest += pitch; + } while (--count); } void DrawWallMasked1PalCommand::Execute(DrawerThread *thread) { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _mvlinebits; + int pitch = _pitch; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + *dest = colormap[pix]; + } + frac += fracstep; + dest += pitch; + } while (--count); } void DrawWallMasked4PalCommand::Execute(DrawerThread *thread) { + uint8_t *dest = _dest; + int count = _count; + int bits = _mvlinebits; + uint32_t place; + auto pal0 = _palookupoffse[0]; + auto pal1 = _palookupoffse[1]; + auto pal2 = _palookupoffse[2]; + auto pal3 = _palookupoffse[3]; + auto buf0 = _bufplce[0]; + auto buf1 = _bufplce[1]; + auto buf2 = _bufplce[2]; + auto buf3 = _bufplce[3]; + const auto vince0 = _vince[0]; + const auto vince1 = _vince[1]; + const auto vince2 = _vince[2]; + const auto vince3 = _vince[3]; + auto vplce0 = _vplce[0]; + auto vplce1 = _vplce[1]; + auto vplce2 = _vplce[2]; + auto vplce3 = _vplce[3]; + auto pitch = _pitch; + + do + { + uint8_t pix; + + pix = buf0[(place = vplce0) >> bits]; if (pix) dest[0] = pal0[pix]; vplce0 = place + vince0; + pix = buf1[(place = vplce1) >> bits]; if (pix) dest[1] = pal1[pix]; vplce1 = place + vince1; + pix = buf2[(place = vplce2) >> bits]; if (pix) dest[2] = pal2[pix]; vplce2 = place + vince2; + pix = buf3[(place = vplce3) >> bits]; if (pix) dest[3] = pal3[pix]; vplce3 = place + vince3; + dest += pitch; + } while (--count); } void DrawWallAdd1PalCommand::Execute(DrawerThread *thread) { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = fg2rgb[colormap[pix]]; + uint32_t bg = bg2rgb[*dest]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); } void DrawWallAdd4PalCommand::Execute(DrawerThread *thread) { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = fg2rgb[_palookupoffse[i][pix]]; + uint32_t bg = bg2rgb[dest[i]]; + fg = (fg + bg) | 0x1f07c1f; + dest[i] = RGB32k.All[fg & (fg >> 15)]; + } + vplce[i] += _vince[i]; + } + dest += _pitch; + } while (--count); } void DrawWallAddClamp1PalCommand::Execute(DrawerThread *thread) { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); } void DrawWallAddClamp4PalCommand::Execute(DrawerThread *thread) { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t a = fg2rgb[_palookupoffse[i][pix]] + bg2rgb[dest[i]]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + vplce[i] += _vince[i]; + } + dest += _pitch; + } while (--count); } void DrawWallSubClamp1PalCommand::Execute(DrawerThread *thread) { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); } void DrawWallSubClamp4PalCommand::Execute(DrawerThread *thread) { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t a = (fg2rgb[_palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + vplce[i] += _vince[i]; + } + dest += _pitch; + } while (--count); } void DrawWallRevSubClamp1PalCommand::Execute(DrawerThread *thread) { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); } void DrawWallRevSubClamp4PalCommand::Execute(DrawerThread *thread) { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[_palookupoffse[i][pix]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + vplce[i] += _vince[i]; + } + dest += _pitch; + } while (--count); } PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom) diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index 3d540f8c60..ea56fb7c0b 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -12,6 +12,20 @@ namespace swrenderer public: PalWall1Command(); FString DebugInfo() override { return "PalWallCommand"; } + + protected: + uint32_t _iscale; + uint32_t _texturefrac; + uint8_t *_colormap; + int _count; + const uint8_t *_source; + uint8_t *_dest; + int _vlinebits; + int _mvlinebits; + int _tmvlinebits; + int _pitch; + uint32_t *_srcblend; + uint32_t *_destblend; }; class PalWall4Command : public DrawerCommand @@ -19,6 +33,20 @@ namespace swrenderer public: PalWall4Command(); FString DebugInfo() override { return "PalWallCommand"; } + + protected: + uint8_t *_dest; + int _count; + int _pitch; + int _vlinebits; + int _mvlinebits; + int _tmvlinebits; + uint8_t *_palookupoffse[4]; + const uint8_t *_bufplce[4]; + uint32_t _vince[4]; + uint32_t _vplce[4]; + uint32_t *_srcblend; + uint32_t *_destblend; }; class DrawWall1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; From 3b2d4fcff26e0db6bf1c15e987da6f4ddb3bb29f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 11:59:48 +0100 Subject: [PATCH 438/912] Added span drawers --- src/r_draw_pal.cpp | 415 +++++++++++++++++++++++++++++++++++++++++++++ src/r_draw_pal.h | 18 ++ 2 files changed, 433 insertions(+) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index c4a05281b3..80d2fc5749 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -420,6 +420,8 @@ namespace swrenderer } while (--count); } + ///////////////////////////////////////////////////////////////////////// + PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom) { } @@ -440,6 +442,8 @@ namespace swrenderer { } + ///////////////////////////////////////////////////////////////////////// + PalColumnCommand::PalColumnCommand() { } @@ -512,34 +516,439 @@ namespace swrenderer { } + ///////////////////////////////////////////////////////////////////////// + + PalSpanCommand::PalSpanCommand() + { + using namespace drawerargs; + + _source = ds_source; + _colormap = ds_colormap; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _y = ds_y; + _x1 = ds_x1; + _x2 = ds_x2; + _destorg = dc_destorg; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _color = ds_color; + } + void DrawSpanPalCommand::Execute(DrawerThread *thread) { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } void DrawSpanMaskedPalCommand::Execute(DrawerThread *thread) { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + BYTE *dest; + const BYTE *source = _source; + const BYTE *colormap = _colormap; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + int texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + *dest = colormap[texdata]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + int texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + *dest = colormap[texdata]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } void DrawSpanTranslucentPalCommand::Execute(DrawerThread *thread) { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + BYTE *dest; + const BYTE *source = _source; + const BYTE *colormap = _colormap; + int count; + int spot; + DWORD *fg2rgb = _srcblend; + DWORD *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + DWORD fg = colormap[source[spot]]; + DWORD bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + DWORD fg = colormap[source[spot]]; + DWORD bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } void DrawSpanMaskedTranslucentPalCommand::Execute(DrawerThread *thread) { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + BYTE *dest; + const BYTE *source = _source; + const BYTE *colormap = _colormap; + int count; + int spot; + DWORD *fg2rgb = _srcblend; + DWORD *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + DWORD fg = colormap[texdata]; + DWORD bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + DWORD fg = colormap[texdata]; + DWORD bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } void DrawSpanAddClampPalCommand::Execute(DrawerThread *thread) { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + BYTE *dest; + const BYTE *source = _source; + const BYTE *colormap = _colormap; + int count; + int spot; + DWORD *fg2rgb = _srcblend; + DWORD *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } void DrawSpanMaskedAddClampPalCommand::Execute(DrawerThread *thread) { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + BYTE *dest; + const BYTE *source = _source; + const BYTE *colormap = _colormap; + int count; + int spot; + DWORD *fg2rgb = _srcblend; + DWORD *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } void FillSpanPalCommand::Execute(DrawerThread *thread) { + memset(ylookup[_y] + _x1 + _destorg, _color, _x2 - _x1 + 1); } + ///////////////////////////////////////////////////////////////////////// + DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { } @@ -548,6 +957,8 @@ namespace swrenderer { } + ///////////////////////////////////////////////////////////////////////// + DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(int y, int x1, int x2) { } @@ -556,6 +967,8 @@ namespace swrenderer { } + ///////////////////////////////////////////////////////////////////////// + DrawSlabPalCommand::DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap) { } @@ -564,6 +977,8 @@ namespace swrenderer { } + ///////////////////////////////////////////////////////////////////////// + DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(int y, int y2, int x1) { } diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index ea56fb7c0b..bb0e5d1f0b 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -108,7 +108,25 @@ namespace swrenderer class PalSpanCommand : public DrawerCommand { public: + PalSpanCommand(); FString DebugInfo() override { return "PalSpanCommand"; } + + protected: + const uint8_t *_source; + const uint8_t *_colormap; + dsfixed_t _xfrac; + dsfixed_t _yfrac; + int _y; + int _x1; + int _x2; + uint8_t *_destorg; + dsfixed_t _xstep; + dsfixed_t _ystep; + int _xbits; + int _ybits; + uint32_t *_srcblend; + uint32_t *_destblend; + int _color; }; class DrawSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; From b755f489a341767d757c8078c4525aa917f68154 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 12:01:03 +0100 Subject: [PATCH 439/912] Fix types --- src/r_draw_pal.cpp | 106 ++++++++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 80d2fc5749..765891a87a 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -606,9 +606,9 @@ namespace swrenderer dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE *dest; - const BYTE *source = _source; - const BYTE *colormap = _colormap; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; int count; int spot; @@ -642,8 +642,8 @@ namespace swrenderer } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; int xmask = ((1 << _xbits) - 1) << _ybits; do { @@ -668,13 +668,13 @@ namespace swrenderer dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE *dest; - const BYTE *source = _source; - const BYTE *colormap = _colormap; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; int count; int spot; - DWORD *fg2rgb = _srcblend; - DWORD *bg2rgb = _destblend; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; xfrac = _xfrac; yfrac = _yfrac; @@ -692,8 +692,8 @@ namespace swrenderer do { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg + bg) | 0x1f07c1f; @@ -704,14 +704,14 @@ namespace swrenderer } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg + bg) | 0x1f07c1f; @@ -728,13 +728,13 @@ namespace swrenderer dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE *dest; - const BYTE *source = _source; - const BYTE *colormap = _colormap; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; int count; int spot; - DWORD *fg2rgb = _srcblend; - DWORD *bg2rgb = _destblend; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; xfrac = _xfrac; yfrac = _yfrac; @@ -751,14 +751,14 @@ namespace swrenderer // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint8_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - DWORD fg = colormap[texdata]; - DWORD bg = *dest; + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg + bg) | 0x1f07c1f; @@ -771,19 +771,19 @@ namespace swrenderer } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; int xmask = ((1 << _xbits) - 1) << _ybits; do { - BYTE texdata; + uint8_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - DWORD fg = colormap[texdata]; - DWORD bg = *dest; + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg + bg) | 0x1f07c1f; @@ -802,13 +802,13 @@ namespace swrenderer dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE *dest; - const BYTE *source = _source; - const BYTE *colormap = _colormap; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; int count; int spot; - DWORD *fg2rgb = _srcblend; - DWORD *bg2rgb = _destblend; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; xfrac = _xfrac; yfrac = _yfrac; @@ -826,8 +826,8 @@ namespace swrenderer do { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - DWORD b = a; + uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + uint32_t b = a; a |= 0x01f07c1f; b &= 0x40100400; @@ -841,14 +841,14 @@ namespace swrenderer } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - DWORD b = a; + uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + uint32_t b = a; a |= 0x01f07c1f; b &= 0x40100400; @@ -868,13 +868,13 @@ namespace swrenderer dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE *dest; - const BYTE *source = _source; - const BYTE *colormap = _colormap; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; int count; int spot; - DWORD *fg2rgb = _srcblend; - DWORD *bg2rgb = _destblend; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; xfrac = _xfrac; yfrac = _yfrac; @@ -891,14 +891,14 @@ namespace swrenderer // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint8_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - DWORD b = a; + uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + uint32_t b = a; a |= 0x01f07c1f; b &= 0x40100400; @@ -914,19 +914,19 @@ namespace swrenderer } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; int xmask = ((1 << _xbits) - 1) << _ybits; do { - BYTE texdata; + uint8_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - DWORD b = a; + uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + uint32_t b = a; a |= 0x01f07c1f; b &= 0x40100400; From bc66682860ca463429904940001b4aff5f2e0c6a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 12:16:10 +0100 Subject: [PATCH 440/912] Add sky drawers --- src/r_draw_pal.cpp | 386 ++++++++++++++++++++++++++++++++++++++++++++- src/r_draw_pal.h | 13 ++ 2 files changed, 398 insertions(+), 1 deletion(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 765891a87a..65bfaeb226 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -422,24 +422,408 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom) + PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom) : solid_top(solid_top), solid_bottom(solid_bottom) { + using namespace drawerargs; + + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + for (int col = 0; col < 4; col++) + { + _bufplce[col] = bufplce[col]; + _bufplce2[col] = bufplce2[col]; + _bufheight[col] = bufheight[col]; + _vince[col] = vince[col]; + _vplce[col] = vplce[col]; + } } void DrawSingleSky1PalCommand::Execute(DrawerThread *thread) { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0 = _bufplce[0]; + int textureheight0 = _bufheight[0]; + + int32_t frac = _vplce[0]; + int32_t fracstep = _vince[0]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + + int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + + if (alpha_top == 256 && alpha_bottom == 256) + { + *dest = fg; + } + else + { + int inv_alpha_top = 256 - alpha_top; + int inv_alpha_bottom = 256 - alpha_bottom; + + const auto &c = GPalette.BaseColors[fg]; + int c_red = c.r; + int c_green = c.g; + int c_blue = c.b; + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + } + + frac += fracstep; + dest += pitch; + } } void DrawSingleSky4PalCommand::Execute(DrawerThread *thread) { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0[4] = { _bufplce[0], _bufplce[1], _bufplce[2], _bufplce[3] }; + int textureheight0 = _bufheight[0]; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)_vplce[0], (int32_t)_vplce[1], (int32_t)_vplce[2], (int32_t)_vplce[3] }; + int32_t fracstep[4] = { (int32_t)_vince[0], (int32_t)_vince[1], (int32_t)_vince[2], (int32_t)_vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) + { + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + // Top solid color: + for (int index = 0; index < start_fadetop_y; index++) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + } + + // Top fade: + for (int index = start_fadetop_y; index < end_fadetop_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Textured center: + for (int index = end_fadetop_y; index < start_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + output[col] = source0[col][sample_index]; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Fade bottom: + for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Bottom solid color: + for (int index = end_fadebottom_y; index < count; index++) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; + } } void DrawDoubleSky1PalCommand::Execute(DrawerThread *thread) { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0 = _bufplce[0]; + const uint8_t *source1 = _bufplce2[0]; + int textureheight0 = _bufheight[0]; + uint32_t maxtextureheight1 = _bufheight[1] - 1; + + int32_t frac = _vplce[0]; + int32_t fracstep = _vince[0]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + + if (alpha_top == 256 && alpha_bottom == 256) + { + *dest = fg; + } + else + { + int inv_alpha_top = 256 - alpha_top; + int inv_alpha_bottom = 256 - alpha_bottom; + + const auto &c = GPalette.BaseColors[fg]; + int c_red = c.r; + int c_green = c.g; + int c_blue = c.b; + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + } + + frac += fracstep; + dest += pitch; + } } void DrawDoubleSky4PalCommand::Execute(DrawerThread *thread) { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0[4] = { _bufplce[0], _bufplce[1], _bufplce[2], _bufplce[3] }; + const uint8_t *source1[4] = { _bufplce2[0], _bufplce2[1], _bufplce2[2], _bufplce2[3] }; + int textureheight0 = _bufheight[0]; + uint32_t maxtextureheight1 = _bufheight[1] - 1; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)_vplce[0], (int32_t)_vplce[1], (int32_t)_vplce[2], (int32_t)_vplce[3] }; + int32_t fracstep[4] = { (int32_t)_vince[0], (int32_t)_vince[1], (int32_t)_vince[2], (int32_t)_vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) + { + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + // Top solid color: + for (int index = 0; index < start_fadetop_y; index++) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + } + + // Top fade: + for (int index = start_fadetop_y; index < end_fadetop_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Textured center: + for (int index = end_fadetop_y; index < start_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Fade bottom: + for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + } + + // Bottom solid color: + for (int index = end_fadebottom_y; index < count; index++) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; + } } ///////////////////////////////////////////////////////////////////////// diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index bb0e5d1f0b..2de04367d9 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -67,6 +67,19 @@ namespace swrenderer public: PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom); FString DebugInfo() override { return "PalSkyCommand"; } + + protected: + uint32_t solid_top; + uint32_t solid_bottom; + + uint8_t *_dest; + int _count; + int _pitch; + const uint8_t *_bufplce[4]; + const uint8_t *_bufplce2[4]; + int _bufheight[4]; + uint32_t _vince[4]; + uint32_t _vplce[4]; }; class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; From 2ca0238e2f48a416ecd9071448e5e6daecad622a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 12:41:16 +0100 Subject: [PATCH 441/912] Add column drawers --- src/r_draw_pal.cpp | 644 +++++++++++++++++++++++++++++++++++++++++++++ src/r_draw_pal.h | 22 ++ 2 files changed, 666 insertions(+) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 65bfaeb226..ea1ae96b80 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -9,6 +9,47 @@ #include "v_video.h" #include "r_draw_pal.h" +/* + [RH] This translucency algorithm is based on DOSDoom 0.65's, but uses + a 32k RGB table instead of an 8k one. At least on my machine, it's + slightly faster (probably because it uses only one shift instead of + two), and it looks considerably less green at the ends of the + translucency range. The extra size doesn't appear to be an issue. + + The following note is from DOSDoom 0.65: + + New translucency algorithm, by Erik Sandberg: + + Basically, we compute the red, green and blue values for each pixel, and + then use a RGB table to check which one of the palette colours that best + represents those RGB values. The RGB table is 8k big, with 4 R-bits, + 5 G-bits and 4 B-bits. A 4k table gives a bit too bad precision, and a 32k + table takes up more memory and results in more cache misses, so an 8k + table seemed to be quite ultimate. + + The computation of the RGB for each pixel is accelerated by using two + 1k tables for each translucency level. + The xth element of one of these tables contains the r, g and b values for + the colour x, weighted for the current translucency level (for example, + the weighted rgb values for background colour at 75% translucency are 1/4 + of the original rgb values). The rgb values are stored as three + low-precision fixed point values, packed into one long per colour: + Bit 0-4: Frac part of blue (5 bits) + Bit 5-8: Int part of blue (4 bits) + Bit 9-13: Frac part of red (5 bits) + Bit 14-17: Int part of red (4 bits) + Bit 18-22: Frac part of green (5 bits) + Bit 23-27: Int part of green (5 bits) + Bit 28-31: All zeros (4 bits) + + The point of this format is that the two colours now can be added, and + then be converted to a RGB table index very easily: First, we just set + all the frac bits and the four upper zero bits to 1. It's now possible + to get the RGB table index by anding the current value >> 5 with the + current value >> 19. When asm-optimised, this should be the fastest + algorithm that uses RGB tables. +*/ + namespace swrenderer { PalWall1Command::PalWall1Command() @@ -830,74 +871,677 @@ namespace swrenderer PalColumnCommand::PalColumnCommand() { + using namespace drawerargs; + + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _colormap = dc_colormap; + _source = dc_source; + _translation = dc_translation; + _color = dc_color; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _srccolor = dc_srccolor; } void DrawColumnPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + + // Zero length, column does not exceed a pixel. + if (count <= 0) + return; + + // Framebuffer destination address. + dest = _dest; + + // Determine scaling, + // which is the only mapping to be done. + fracstep = _iscale; + frac = _texturefrac; + + { + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + const BYTE *colormap = _colormap; + const BYTE *source = _source; + int pitch = _pitch; + + // Inner loop that does the actual texture mapping, + // e.g. a DDA-lile scaling. + // This is as fast as it gets. + do + { + // Re-map color indices from wall texture column + // using a lighting/special effects LUT. + *dest = colormap[source[frac >> FRACBITS]]; + + dest += pitch; + frac += fracstep; + + } while (--count); + } } void FillColumnPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + + count = _count; + + if (count <= 0) + return; + + dest = _dest; + + { + int pitch = _pitch; + BYTE color = _color; + + do + { + *dest = color; + dest += pitch; + } while (--count); + } } void FillColumnAddPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + DWORD *bg2rgb; + DWORD fg; + + bg2rgb = _destblend; + fg = _srccolor; + int pitch = _pitch; + + do + { + DWORD bg; + bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; + *dest = RGB32k.All[bg & (bg >> 15)]; + dest += pitch; + } while (--count); + } void FillColumnAddClampPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + DWORD *bg2rgb; + DWORD fg; + + bg2rgb = _destblend; + fg = _srccolor; + int pitch = _pitch; + + do + { + DWORD a = fg + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); } void FillColumnSubClampPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + DWORD *bg2rgb; + DWORD fg; + + bg2rgb = _destblend; + fg = _srccolor | 0x40100400; + int pitch = _pitch; + + do + { + DWORD a = fg - bg2rgb[*dest]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); } void FillColumnRevSubClampPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + DWORD *bg2rgb; + DWORD fg; + + bg2rgb = _destblend; + fg = _srccolor; + int pitch = _pitch; + + do + { + DWORD a = (bg2rgb[*dest] | 0x40100400) - fg; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); } void DrawColumnAddPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + { + DWORD *fg2rgb = _srcblend; + DWORD *bg2rgb = _destblend; + const BYTE *colormap = _colormap; + const BYTE *source = _source; + int pitch = _pitch; + + do + { + DWORD fg = colormap[source[frac >> FRACBITS]]; + DWORD bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnTranslatedPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE* dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + { + // [RH] Local copies of global vars to improve compiler optimizations + const BYTE *colormap = _colormap; + const BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch; + + do + { + *dest = colormap[translation[source[frac >> FRACBITS]]]; + dest += pitch; + + frac += fracstep; + } while (--count); + } } void DrawColumnTlatedAddPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + { + DWORD *fg2rgb = _srcblend; + DWORD *bg2rgb = _destblend; + const BYTE *translation = _translation; + const BYTE *colormap = _colormap; + const BYTE *source = _source; + int pitch = _pitch; + + do + { + DWORD fg = colormap[translation[source[frac >> FRACBITS]]]; + DWORD bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnShadedPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + fixed_t frac, fracstep; + + count = _count; + + if (count <= 0) + return; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + { + const BYTE *source = _source; + const BYTE *colormap = _colormap; + int pitch = _pitch; + DWORD *fgstart = &Col2RGB8[0][_color]; + + do + { + DWORD val = colormap[source[frac >> FRACBITS]]; + DWORD fg = fgstart[val << 8]; + val = (Col2RGB8[64 - val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val >> 15)]; + + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnAddClampPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + { + const BYTE *colormap = _colormap; + const BYTE *source = _source; + int pitch = _pitch; + DWORD *fg2rgb = _srcblend; + DWORD *bg2rgb = _destblend; + + do + { + DWORD a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnAddClampTranslatedPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + { + const BYTE *translation = _translation; + const BYTE *colormap = _colormap; + const BYTE *source = _source; + int pitch = _pitch; + DWORD *fg2rgb = _srcblend; + DWORD *bg2rgb = _destblend; + + do + { + DWORD a = fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnSubClampPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + { + const BYTE *colormap = _colormap; + const BYTE *source = _source; + int pitch = _pitch; + DWORD *fg2rgb = _srcblend; + DWORD *bg2rgb = _destblend; + + do + { + DWORD a = (fg2rgb[colormap[source[frac >> FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnSubClampTranslatedPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + { + const BYTE *translation = _translation; + const BYTE *colormap = _colormap; + const BYTE *source = _source; + int pitch = _pitch; + DWORD *fg2rgb = _srcblend; + DWORD *bg2rgb = _destblend; + + do + { + DWORD a = (fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] | 0x40100400) - bg2rgb[*dest]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnRevSubClampPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + { + const BYTE *colormap = _colormap; + const BYTE *source = _source; + int pitch = _pitch; + DWORD *fg2rgb = _srcblend; + DWORD *bg2rgb = _destblend; + + do + { + DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac >> FRACBITS]]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnRevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + { + const BYTE *translation = _translation; + const BYTE *colormap = _colormap; + const BYTE *source = _source; + int pitch = _pitch; + DWORD *fg2rgb = _srcblend; + DWORD *bg2rgb = _destblend; + + do + { + DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac >> FRACBITS]]]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + } + + ///////////////////////////////////////////////////////////////////////// + + DrawFuzzColumnPalCommand::DrawFuzzColumnPalCommand() + { + using namespace drawerargs; + + _yl = dc_yl; + _yh = dc_yh; + _x = dc_x; + _destorg = dc_destorg; + _pitch = dc_pitch; } void DrawFuzzColumnPalCommand::Execute(DrawerThread *thread) { + int count; + BYTE *dest; + + // Adjust borders. Low... + if (_yl == 0) + _yl = 1; + + // .. and high. + if (_yh > fuzzviewheight) + _yh = fuzzviewheight; + + count = _yh - _yl; + + // Zero length. + if (count < 0) + return; + + count++; + + dest = ylookup[_yl] + _x + _destorg; + + // colormap #6 is used for shading (of 0-31, a bit brighter than average) + { + // [RH] Make local copies of global vars to try and improve + // the optimizations made by the compiler. + int pitch = _pitch; + int fuzz = fuzzpos; + int cnt; + BYTE *map = &NormalLight.Maps[6 * 256]; + + // [RH] Split this into three separate loops to minimize + // the number of times fuzzpos needs to be clamped. + if (fuzz) + { + cnt = MIN(FUZZTABLE - fuzz, count); + count -= cnt; + do + { + *dest = map[dest[fuzzoffset[fuzz++]]]; + dest += pitch; + } while (--cnt); + } + if (fuzz == FUZZTABLE || count > 0) + { + while (count >= FUZZTABLE) + { + fuzz = 0; + cnt = FUZZTABLE; + count -= FUZZTABLE; + do + { + *dest = map[dest[fuzzoffset[fuzz++]]]; + dest += pitch; + } while (--cnt); + } + fuzz = 0; + if (count > 0) + { + do + { + *dest = map[dest[fuzzoffset[fuzz++]]]; + dest += pitch; + } while (--count); + } + } + fuzzpos = fuzz; + } } ///////////////////////////////////////////////////////////////////////// diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index 2de04367d9..f1297a06bf 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -92,6 +92,20 @@ namespace swrenderer public: PalColumnCommand(); FString DebugInfo() override { return "PalColumnCommand"; } + + protected: + int _count; + uint8_t *_dest; + int _pitch; + fixed_t _iscale; + fixed_t _texturefrac; + const uint8_t *_colormap; + const uint8_t *_source; + const uint8_t *_translation; + int _color; + uint32_t *_srcblend; + uint32_t *_destblend; + uint32_t _srccolor; }; class DrawColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; @@ -114,8 +128,16 @@ namespace swrenderer class DrawFuzzColumnPalCommand : public DrawerCommand { public: + DrawFuzzColumnPalCommand(); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "DrawFuzzColumnPalCommand"; } + + private: + int _yl; + int _yh; + int _x; + uint8_t *_destorg; + int _pitch; }; class PalSpanCommand : public DrawerCommand From d8a3174330b080b8b8bc2502d63ff300c79b95d7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 12:47:43 +0100 Subject: [PATCH 442/912] Thread awareness to the span drawers --- src/r_draw_pal.cpp | 21 +++++++++++++++++++++ src/r_thread.h | 5 ++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index ea1ae96b80..66b5dce068 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -1569,6 +1569,9 @@ namespace swrenderer void DrawSpanPalCommand::Execute(DrawerThread *thread) { + if (thread->skipped_by_thread(_dest_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1630,6 +1633,9 @@ namespace swrenderer void DrawSpanMaskedPalCommand::Execute(DrawerThread *thread) { + if (thread->skipped_by_thread(_dest_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1692,6 +1698,9 @@ namespace swrenderer void DrawSpanTranslucentPalCommand::Execute(DrawerThread *thread) { + if (thread->skipped_by_thread(_dest_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1752,6 +1761,9 @@ namespace swrenderer void DrawSpanMaskedTranslucentPalCommand::Execute(DrawerThread *thread) { + if (thread->skipped_by_thread(_dest_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1826,6 +1838,9 @@ namespace swrenderer void DrawSpanAddClampPalCommand::Execute(DrawerThread *thread) { + if (thread->skipped_by_thread(_dest_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1892,6 +1907,9 @@ namespace swrenderer void DrawSpanMaskedAddClampPalCommand::Execute(DrawerThread *thread) { + if (thread->skipped_by_thread(_dest_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1972,6 +1990,9 @@ namespace swrenderer void FillSpanPalCommand::Execute(DrawerThread *thread) { + if (thread->skipped_by_thread(_y)) + return; + memset(ylookup[_y] + _x1 + _destorg, _color, _x2 - _x1 + 1); } diff --git a/src/r_thread.h b/src/r_thread.h index 3077a095ff..3217e1904b 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -123,7 +123,10 @@ protected: public: DrawerCommand() { - _dest_y = static_cast((swrenderer::drawerargs::dc_dest - swrenderer::drawerargs::dc_destorg) / (swrenderer::drawerargs::dc_pitch * 4)); + if (swrenderer::r_swtruecolor) + _dest_y = static_cast((swrenderer::drawerargs::dc_dest - swrenderer::drawerargs::dc_destorg) / (swrenderer::drawerargs::dc_pitch * 4)); + else + _dest_y = static_cast((swrenderer::drawerargs::dc_dest - swrenderer::drawerargs::dc_destorg) / (swrenderer::drawerargs::dc_pitch)); } virtual ~DrawerCommand() { } From 246d1625e5912f9aa9765260cf52efea631198bc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 13:05:05 +0100 Subject: [PATCH 443/912] Make wall drawers thread aware --- src/r_draw_pal.cpp | 176 +++++++++++++++++++++++++++++++++++++++++---- src/r_thread.h | 3 +- 2 files changed, 163 insertions(+), 16 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 66b5dce068..0c45e629e0 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -102,6 +102,15 @@ namespace swrenderer int bits = _vlinebits; int pitch = _pitch; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + do { *dest = colormap[source[frac >> bits]]; @@ -124,16 +133,32 @@ namespace swrenderer auto buf1 = _bufplce[1]; auto buf2 = _bufplce[2]; auto buf3 = _bufplce[3]; - const auto vince0 = _vince[0]; - const auto vince1 = _vince[1]; - const auto vince2 = _vince[2]; - const auto vince3 = _vince[3]; + auto vince0 = _vince[0]; + auto vince1 = _vince[1]; + auto vince2 = _vince[2]; + auto vince3 = _vince[3]; auto vplce0 = _vplce[0]; auto vplce1 = _vplce[1]; auto vplce2 = _vplce[2]; auto vplce3 = _vplce[3]; auto pitch = _pitch; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + vplce0 += vince0 * skipped; + vplce1 += vince1 * skipped; + vplce2 += vince2 * skipped; + vplce3 += vince3 * skipped; + vince0 *= thread->num_cores; + vince1 *= thread->num_cores; + vince2 *= thread->num_cores; + vince3 *= thread->num_cores; + pitch *= thread->num_cores; + do { dest[0] = pal0[buf0[(place = vplce0) >> bits]]; vplce0 = place + vince0; @@ -155,6 +180,15 @@ namespace swrenderer int bits = _mvlinebits; int pitch = _pitch; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + do { uint8_t pix = source[frac >> bits]; @@ -181,16 +215,32 @@ namespace swrenderer auto buf1 = _bufplce[1]; auto buf2 = _bufplce[2]; auto buf3 = _bufplce[3]; - const auto vince0 = _vince[0]; - const auto vince1 = _vince[1]; - const auto vince2 = _vince[2]; - const auto vince3 = _vince[3]; + auto vince0 = _vince[0]; + auto vince1 = _vince[1]; + auto vince2 = _vince[2]; + auto vince3 = _vince[3]; auto vplce0 = _vplce[0]; auto vplce1 = _vplce[1]; auto vplce2 = _vplce[2]; auto vplce3 = _vplce[3]; auto pitch = _pitch; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + vplce0 += vince0 * skipped; + vplce1 += vince1 * skipped; + vplce2 += vince2 * skipped; + vplce3 += vince3 * skipped; + vince0 *= thread->num_cores; + vince1 *= thread->num_cores; + vince2 *= thread->num_cores; + vince3 *= thread->num_cores; + pitch *= thread->num_cores; + do { uint8_t pix; @@ -217,6 +267,15 @@ namespace swrenderer uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + do { uint8_t pix = source[frac >> bits]; @@ -242,6 +301,21 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; do { @@ -255,9 +329,9 @@ namespace swrenderer fg = (fg + bg) | 0x1f07c1f; dest[i] = RGB32k.All[fg & (fg >> 15)]; } - vplce[i] += _vince[i]; + vplce[i] += vince[i]; } - dest += _pitch; + dest += pitch; } while (--count); } @@ -275,6 +349,15 @@ namespace swrenderer uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + do { uint8_t pix = source[frac >> bits]; @@ -305,6 +388,21 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; do { @@ -323,9 +421,9 @@ namespace swrenderer a |= b; dest[i] = RGB32k.All[a & (a >> 15)]; } - vplce[i] += _vince[i]; + vplce[i] += vince[i]; } - dest += _pitch; + dest += pitch; } while (--count); } @@ -343,6 +441,15 @@ namespace swrenderer uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + do { uint8_t pix = source[frac >> bits]; @@ -372,6 +479,21 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; do { @@ -389,9 +511,9 @@ namespace swrenderer a |= 0x01f07c1f; dest[i] = RGB32k.All[a & (a >> 15)]; } - vplce[i] += _vince[i]; + vplce[i] += vince[i]; } - dest += _pitch; + dest += pitch; } while (--count); } @@ -409,6 +531,15 @@ namespace swrenderer uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + do { uint8_t pix = source[frac >> bits]; @@ -438,6 +569,21 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; do { @@ -455,7 +601,7 @@ namespace swrenderer a |= 0x01f07c1f; dest[i] = RGB32k.All[a & (a >> 15)]; } - vplce[i] += _vince[i]; + vplce[i] += vince[i]; } dest += _pitch; } while (--count); diff --git a/src/r_thread.h b/src/r_thread.h index 3217e1904b..29d971ad38 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -84,7 +84,8 @@ public: } // Calculate the dest address for the first line to be rendered by this thread - uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) + template + T *dest_for_thread(int first_line, int pitch, T *dest) { return dest + skipped_by_thread(first_line) * pitch; } From 836c7a5351c2ebee8fd5ef74a53cb203f57565e9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 13:23:30 +0100 Subject: [PATCH 444/912] Make sky drawers thread aware --- src/r_draw_pal.cpp | 72 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 10 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 0c45e629e0..84f9772b6b 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -646,6 +646,16 @@ namespace swrenderer int solid_bottom_g = GPART(solid_bottom); int solid_bottom_b = BPART(solid_bottom); + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * skipped; + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + for (int index = 0; index < count; index++) { uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; @@ -724,17 +734,29 @@ namespace swrenderer start_fadebottom_y = clamp(start_fadebottom_y, 0, count); end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int col = 0; col < 4; col++) + { + frac[col] += fracstep[col] * skipped; + fracstep[col] *= thread->num_cores; + } + pitch *= thread->num_cores; + int num_cores = thread->num_cores; + int index = skipped; + // Top solid color: - for (int index = 0; index < start_fadetop_y; index++) + while (index < start_fadetop_y) { *((uint32_t*)dest) = solid_top_fill; dest += pitch; for (int col = 0; col < 4; col++) frac[col] += fracstep[col]; + index += num_cores; } // Top fade: - for (int index = start_fadetop_y; index < end_fadetop_y; index++) + while (index < end_fadetop_y) { for (int col = 0; col < 4; col++) { @@ -756,10 +778,11 @@ namespace swrenderer } *((uint32_t*)dest) = *((uint32_t*)output); dest += pitch; + index += num_cores; } // Textured center: - for (int index = end_fadetop_y; index < start_fadebottom_y; index++) + while (index < start_fadebottom_y) { for (int col = 0; col < 4; col++) { @@ -771,10 +794,11 @@ namespace swrenderer *((uint32_t*)dest) = *((uint32_t*)output); dest += pitch; + index += num_cores; } // Fade bottom: - for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) + while (index < end_fadebottom_y) { for (int col = 0; col < 4; col++) { @@ -796,13 +820,15 @@ namespace swrenderer } *((uint32_t*)dest) = *((uint32_t*)output); dest += pitch; + index += num_cores; } // Bottom solid color: - for (int index = end_fadebottom_y; index < count; index++) + while (index < count) { *((uint32_t*)dest) = solid_bottom_fill; dest += pitch; + index += num_cores; } } @@ -828,6 +854,16 @@ namespace swrenderer int solid_bottom_g = GPART(solid_bottom); int solid_bottom_b = BPART(solid_bottom); + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * skipped; + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + for (int index = 0; index < count; index++) { uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; @@ -913,17 +949,29 @@ namespace swrenderer start_fadebottom_y = clamp(start_fadebottom_y, 0, count); end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int col = 0; col < 4; col++) + { + frac[col] += fracstep[col] * skipped; + fracstep[col] *= thread->num_cores; + } + pitch *= thread->num_cores; + int num_cores = thread->num_cores; + int index = skipped; + // Top solid color: - for (int index = 0; index < start_fadetop_y; index++) + while (index < start_fadetop_y) { *((uint32_t*)dest) = solid_top_fill; dest += pitch; for (int col = 0; col < 4; col++) frac[col] += fracstep[col]; + index += num_cores; } // Top fade: - for (int index = start_fadetop_y; index < end_fadetop_y; index++) + while (index < end_fadetop_y) { for (int col = 0; col < 4; col++) { @@ -951,10 +999,11 @@ namespace swrenderer } *((uint32_t*)dest) = *((uint32_t*)output); dest += pitch; + index += num_cores; } // Textured center: - for (int index = end_fadetop_y; index < start_fadebottom_y; index++) + while (index < start_fadebottom_y) { for (int col = 0; col < 4; col++) { @@ -972,10 +1021,11 @@ namespace swrenderer *((uint32_t*)dest) = *((uint32_t*)output); dest += pitch; + index += num_cores; } // Fade bottom: - for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) + while (index < end_fadebottom_y) { for (int col = 0; col < 4; col++) { @@ -1003,13 +1053,15 @@ namespace swrenderer } *((uint32_t*)dest) = *((uint32_t*)output); dest += pitch; + index += num_cores; } // Bottom solid color: - for (int index = end_fadebottom_y; index < count; index++) + while (index < count) { *((uint32_t*)dest) = solid_bottom_fill; dest += pitch; + index += num_cores; } } From 6122d982b7a024ff001e0ded93d6251834e43e28 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 13:47:30 +0100 Subject: [PATCH 445/912] Thread awareness to column drawers --- src/r_draw_pal.cpp | 648 +++++++++++++++++++++++++-------------------- 1 file changed, 356 insertions(+), 292 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 84f9772b6b..fbef5153e1 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -1088,16 +1088,12 @@ namespace swrenderer void DrawColumnPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; fixed_t frac; fixed_t fracstep; count = _count; - // Zero length, column does not exceed a pixel. - if (count <= 0) - return; - // Framebuffer destination address. dest = _dest; @@ -1106,73 +1102,84 @@ namespace swrenderer fracstep = _iscale; frac = _texturefrac; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + + // Inner loop that does the actual texture mapping, + // e.g. a DDA-lile scaling. + // This is as fast as it gets. + do { - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - const BYTE *colormap = _colormap; - const BYTE *source = _source; - int pitch = _pitch; + // Re-map color indices from wall texture column + // using a lighting/special effects LUT. + *dest = colormap[source[frac >> FRACBITS]]; - // Inner loop that does the actual texture mapping, - // e.g. a DDA-lile scaling. - // This is as fast as it gets. - do - { - // Re-map color indices from wall texture column - // using a lighting/special effects LUT. - *dest = colormap[source[frac >> FRACBITS]]; + dest += pitch; + frac += fracstep; - dest += pitch; - frac += fracstep; - - } while (--count); - } + } while (--count); } void FillColumnPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; count = _count; + dest = _dest; + count = thread->count_for_thread(_dest_y, count); if (count <= 0) return; - dest = _dest; + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + uint8_t color = _color; + do { - int pitch = _pitch; - BYTE color = _color; - - do - { - *dest = color; - dest += pitch; - } while (--count); - } + *dest = color; + dest += pitch; + } while (--count); } void FillColumnAddPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; count = _count; - if (count <= 0) - return; - dest = _dest; - DWORD *bg2rgb; - DWORD fg; + uint32_t *bg2rgb; + uint32_t fg; bg2rgb = _destblend; fg = _srccolor; int pitch = _pitch; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + do { - DWORD bg; + uint32_t bg; bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; *dest = RGB32k.All[bg & (bg >> 15)]; dest += pitch; @@ -1183,24 +1190,29 @@ namespace swrenderer void FillColumnAddClampPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; count = _count; - if (count <= 0) - return; dest = _dest; - DWORD *bg2rgb; - DWORD fg; + uint32_t *bg2rgb; + uint32_t fg; bg2rgb = _destblend; fg = _srccolor; int pitch = _pitch; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + do { - DWORD a = fg + bg2rgb[*dest]; - DWORD b = a; + uint32_t a = fg + bg2rgb[*dest]; + uint32_t b = a; a |= 0x01f07c1f; b &= 0x40100400; @@ -1215,24 +1227,29 @@ namespace swrenderer void FillColumnSubClampPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; count = _count; - if (count <= 0) - return; dest = _dest; - DWORD *bg2rgb; - DWORD fg; + uint32_t *bg2rgb; + uint32_t fg; bg2rgb = _destblend; fg = _srccolor | 0x40100400; int pitch = _pitch; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + do { - DWORD a = fg - bg2rgb[*dest]; - DWORD b = a; + uint32_t a = fg - bg2rgb[*dest]; + uint32_t b = a; b &= 0x40100400; b = b - (b >> 5); @@ -1246,24 +1263,31 @@ namespace swrenderer void FillColumnRevSubClampPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; count = _count; if (count <= 0) return; dest = _dest; - DWORD *bg2rgb; - DWORD fg; + uint32_t *bg2rgb; + uint32_t fg; bg2rgb = _destblend; fg = _srccolor; int pitch = _pitch; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + do { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg; - DWORD b = a; + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg; + uint32_t b = a; b &= 0x40100400; b = b - (b >> 5); @@ -1277,385 +1301,425 @@ namespace swrenderer void DrawColumnAddPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; fixed_t frac; fixed_t fracstep; count = _count; - if (count <= 0) - return; - dest = _dest; fracstep = _iscale; frac = _texturefrac; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + + do { - DWORD *fg2rgb = _srcblend; - DWORD *bg2rgb = _destblend; - const BYTE *colormap = _colormap; - const BYTE *source = _source; - int pitch = _pitch; + uint32_t fg = colormap[source[frac >> FRACBITS]]; + uint32_t bg = *dest; - do - { - DWORD fg = colormap[source[frac >> FRACBITS]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); } void DrawColumnTranslatedPalCommand::Execute(DrawerThread *thread) { int count; - BYTE* dest; + uint8_t* dest; fixed_t frac; fixed_t fracstep; count = _count; - if (count <= 0) - return; dest = _dest; fracstep = _iscale; frac = _texturefrac; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + // [RH] Local copies of global vars to improve compiler optimizations + const uint8_t *colormap = _colormap; + const uint8_t *translation = _translation; + const uint8_t *source = _source; + + do { - // [RH] Local copies of global vars to improve compiler optimizations - const BYTE *colormap = _colormap; - const BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch; + *dest = colormap[translation[source[frac >> FRACBITS]]]; + dest += pitch; - do - { - *dest = colormap[translation[source[frac >> FRACBITS]]]; - dest += pitch; - - frac += fracstep; - } while (--count); - } + frac += fracstep; + } while (--count); } void DrawColumnTlatedAddPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; fixed_t frac; fixed_t fracstep; count = _count; - if (count <= 0) - return; - dest = _dest; fracstep = _iscale; frac = _texturefrac; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + + do { - DWORD *fg2rgb = _srcblend; - DWORD *bg2rgb = _destblend; - const BYTE *translation = _translation; - const BYTE *colormap = _colormap; - const BYTE *source = _source; - int pitch = _pitch; + uint32_t fg = colormap[translation[source[frac >> FRACBITS]]]; + uint32_t bg = *dest; - do - { - DWORD fg = colormap[translation[source[frac >> FRACBITS]]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); } void DrawColumnShadedPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; fixed_t frac, fracstep; count = _count; - - if (count <= 0) - return; - dest = _dest; fracstep = _iscale; frac = _texturefrac; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + uint32_t *fgstart = &Col2RGB8[0][_color]; + + do { - const BYTE *source = _source; - const BYTE *colormap = _colormap; - int pitch = _pitch; - DWORD *fgstart = &Col2RGB8[0][_color]; + uint32_t val = colormap[source[frac >> FRACBITS]]; + uint32_t fg = fgstart[val << 8]; + val = (Col2RGB8[64 - val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val >> 15)]; - do - { - DWORD val = colormap[source[frac >> FRACBITS]]; - DWORD fg = fgstart[val << 8]; - val = (Col2RGB8[64 - val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val >> 15)]; - - dest += pitch; - frac += fracstep; - } while (--count); - } + dest += pitch; + frac += fracstep; + } while (--count); } void DrawColumnAddClampPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; fixed_t frac; fixed_t fracstep; count = _count; - if (count <= 0) - return; - dest = _dest; fracstep = _iscale; frac = _texturefrac; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do { - const BYTE *colormap = _colormap; - const BYTE *source = _source; - int pitch = _pitch; - DWORD *fg2rgb = _srcblend; - DWORD *bg2rgb = _destblend; + uint32_t a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + uint32_t b = a; - do - { - DWORD a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a >> 15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); } void DrawColumnAddClampTranslatedPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; fixed_t frac; fixed_t fracstep; count = _count; - if (count <= 0) - return; - dest = _dest; fracstep = _iscale; frac = _texturefrac; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do { - const BYTE *translation = _translation; - const BYTE *colormap = _colormap; - const BYTE *source = _source; - int pitch = _pitch; - DWORD *fg2rgb = _srcblend; - DWORD *bg2rgb = _destblend; + uint32_t a = fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] + bg2rgb[*dest]; + uint32_t b = a; - do - { - DWORD a = fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a >> 15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); } void DrawColumnSubClampPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; fixed_t frac; fixed_t fracstep; count = _count; - if (count <= 0) - return; - dest = _dest; fracstep = _iscale; frac = _texturefrac; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do { - const BYTE *colormap = _colormap; - const BYTE *source = _source; - int pitch = _pitch; - DWORD *fg2rgb = _srcblend; - DWORD *bg2rgb = _destblend; + uint32_t a = (fg2rgb[colormap[source[frac >> FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; - do - { - DWORD a = (fg2rgb[colormap[source[frac >> FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); } void DrawColumnSubClampTranslatedPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; fixed_t frac; fixed_t fracstep; count = _count; - if (count <= 0) - return; - dest = _dest; fracstep = _iscale; frac = _texturefrac; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do { - const BYTE *translation = _translation; - const BYTE *colormap = _colormap; - const BYTE *source = _source; - int pitch = _pitch; - DWORD *fg2rgb = _srcblend; - DWORD *bg2rgb = _destblend; + uint32_t a = (fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; - do - { - DWORD a = (fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a >> 15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); } void DrawColumnRevSubClampPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; fixed_t frac; fixed_t fracstep; count = _count; - if (count <= 0) - return; - dest = _dest; fracstep = _iscale; frac = _texturefrac; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do { - const BYTE *colormap = _colormap; - const BYTE *source = _source; - int pitch = _pitch; - DWORD *fg2rgb = _srcblend; - DWORD *bg2rgb = _destblend; + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac >> FRACBITS]]]; + uint32_t b = a; - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac >> FRACBITS]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); } void DrawColumnRevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; fixed_t frac; fixed_t fracstep; count = _count; - if (count <= 0) - return; - dest = _dest; fracstep = _iscale; frac = _texturefrac; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do { - const BYTE *translation = _translation; - const BYTE *colormap = _colormap; - const BYTE *source = _source; - int pitch = _pitch; - DWORD *fg2rgb = _srcblend; - DWORD *bg2rgb = _destblend; + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac >> FRACBITS]]]]; + uint32_t b = a; - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac >> FRACBITS]]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a >> 15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); } ///////////////////////////////////////////////////////////////////////// @@ -1674,7 +1738,7 @@ namespace swrenderer void DrawFuzzColumnPalCommand::Execute(DrawerThread *thread) { int count; - BYTE *dest; + uint8_t *dest; // Adjust borders. Low... if (_yl == 0) @@ -1701,7 +1765,7 @@ namespace swrenderer int pitch = _pitch; int fuzz = fuzzpos; int cnt; - BYTE *map = &NormalLight.Maps[6 * 256]; + uint8_t *map = &NormalLight.Maps[6 * 256]; // [RH] Split this into three separate loops to minimize // the number of times fuzzpos needs to be clamped. @@ -1767,7 +1831,7 @@ namespace swrenderer void DrawSpanPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_dest_y)) + if (thread->skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1831,7 +1895,7 @@ namespace swrenderer void DrawSpanMaskedPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_dest_y)) + if (thread->skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1896,7 +1960,7 @@ namespace swrenderer void DrawSpanTranslucentPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_dest_y)) + if (thread->skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1959,7 +2023,7 @@ namespace swrenderer void DrawSpanMaskedTranslucentPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_dest_y)) + if (thread->skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2036,7 +2100,7 @@ namespace swrenderer void DrawSpanAddClampPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_dest_y)) + if (thread->skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2105,7 +2169,7 @@ namespace swrenderer void DrawSpanMaskedAddClampPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_dest_y)) + if (thread->skipped_by_thread(_y)) return; dsfixed_t xfrac; From caa0deec105c085532650776ed3ea7f226a9971c Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 5 Dec 2016 16:05:33 -0500 Subject: [PATCH 446/912] - r_polyrenderer now notifies the gamesim on change of new pitch limits. --- src/r_swrenderer.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 0c49254545..a0f90f5ccb 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -56,6 +56,12 @@ CUSTOM_CVAR(Bool, r_polyrenderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOI { Printf("No GL BSP detected. You must restart the map before rendering will be correct\n"); } + + if (usergame) + { + // [SP] Update pitch limits to the netgame/gamesim. + players[consoleplayer].SendPitchLimits(); + } } namespace swrenderer From bd8b05e10ac90dcffe6d85879230a6dd7fb1480a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 6 Dec 2016 05:07:37 +0100 Subject: [PATCH 447/912] Fix compile error --- src/r_main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_main.cpp b/src/r_main.cpp index 1e314f1278..a8c7660994 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -117,7 +117,7 @@ static void R_ShutdownRenderer(); extern short *openings; extern bool r_fakingunderwater; -extern "C" int fuzzviewheight; +extern int fuzzviewheight; extern subsector_t *InSubsector; From f4172782ed9eaeebe7169fc74dd03b188959e8cf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 6 Dec 2016 06:22:58 +0100 Subject: [PATCH 448/912] Fix compile error in debug builds --- src/r_thread.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/r_thread.h b/src/r_thread.h index 29d971ad38..1e48ff3074 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -54,11 +54,11 @@ public: int pass_start_y = 0; int pass_end_y = MAXHEIGHT; - uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; - uint32_t *dc_temp_rgba; + uint8_t dc_temp_buff[MAXHEIGHT * 4]; + uint8_t *dc_temp = nullptr; - short triangle_clip_top[MAXWIDTH]; - short triangle_clip_bottom[MAXWIDTH]; + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba = nullptr; // Checks if a line is rendered by this thread bool line_skipped_by_thread(int line) @@ -100,7 +100,7 @@ protected: void DetectRangeError(uint32_t *&dest, int &dest_y, int &count) { #if defined(_MSC_VER) && defined(_DEBUG) - if (dest_y < 0 || count < 0 || dest_y + count > swrenderer::dc_destheight) + if (dest_y < 0 || count < 0 || dest_y + count > swrenderer::drawerargs::dc_destheight) __debugbreak(); // Buffer overrun detected! #endif From 6a3ae2ce45294a7f5440acdca8726c23de3cf663 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 6 Dec 2016 07:23:55 +0100 Subject: [PATCH 449/912] Add Rt drawers --- src/r_draw_pal.h | 66 +++- src/r_draw_tc.cpp | 91 ++++- src/r_drawt_pal.cpp | 801 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 904 insertions(+), 54 deletions(-) diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index f1297a06bf..f3b220f825 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -202,43 +202,85 @@ namespace swrenderer void Execute(DrawerThread *thread) override; }; - //class RtInitColsPalCommand : public DrawerCommand { public: void Execute(DrawerThread *thread) override; }; - //class DrawColumnHorizPalCommand : public DrawerCommand { public: void Execute(DrawerThread *thread) override; }; - class FillColumnHorizPalCommand : public DrawerCommand + class RtInitColsPalCommand : public DrawerCommand + { + public: + RtInitColsPalCommand(uint8_t *buff); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "RtInitColsPalCommand"; } + + private: + uint8_t *buff; + }; + + class PalColumnHorizCommand : public DrawerCommand + { + public: + PalColumnHorizCommand(); + + protected: + const uint8_t *_source; + fixed_t _iscale; + fixed_t _texturefrac; + int _count; + int _color; + int _x; + int _yl; + }; + + class DrawColumnHorizPalCommand : public PalColumnHorizCommand { public: void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "FillColumnHorizPalCommand"; } }; - + + class FillColumnHorizPalCommand : public PalColumnHorizCommand + { + public: + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "FillColumnHorizPalCommand"; } + }; + class PalRtCommand : public DrawerCommand { public: PalRtCommand(int hx, int sx, int yl, int yh); FString DebugInfo() override { return "PalRtCommand"; } + + protected: + int hx, sx, yl, yh; + uint8_t *_destorg; + int _pitch; + const uint8_t *_colormap; + const uint32_t *_srcblend; + const uint32_t *_destblend; + const uint8_t *_translation; + int _color; }; class DrawColumnRt1CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; } diff --git a/src/r_draw_tc.cpp b/src/r_draw_tc.cpp index 2326cab91e..77d4500064 100644 --- a/src/r_draw_tc.cpp +++ b/src/r_draw_tc.cpp @@ -622,7 +622,10 @@ namespace swrenderer for (int y = 3; y >= 0; y--) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; - DrawerCommandQueue::QueueCommand(buffer); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(buffer); + else + DrawerCommandQueue::QueueCommand(buffer); } void rt_span_coverage(int x, int start, int stop) @@ -861,10 +864,17 @@ namespace swrenderer (*span)[1] = dc_yh; *span += 2; - if (drawer_needs_pal_input || !r_swtruecolor) - DrawerCommandQueue::QueueCommand>(); + if (r_swtruecolor) + { + if (drawer_needs_pal_input) + DrawerCommandQueue::QueueCommand>(); + else + DrawerCommandQueue::QueueCommand>(); + } else - DrawerCommandQueue::QueueCommand>(); + { + DrawerCommandQueue::QueueCommand(); + } } // Copies one span at hx to the screen at sx. @@ -879,11 +889,18 @@ namespace swrenderer // Copies all four spans to the screen starting at sx. void rt_copy4cols(int sx, int yl, int yh) { - // To do: we could do this with SSE using __m128i - rt_copy1col(0, sx, yl, yh); - rt_copy1col(1, sx + 1, yl, yh); - rt_copy1col(2, sx + 2, yl, yh); - rt_copy1col(3, sx + 3, yl, yh); + if (r_swtruecolor) + { + // To do: we could do this with SSE using __m128i + rt_copy1col(0, sx, yl, yh); + rt_copy1col(1, sx + 1, yl, yh); + rt_copy1col(2, sx + 2, yl, yh); + rt_copy1col(3, sx + 3, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } } // Maps one span at hx to the screen at sx. @@ -944,18 +961,28 @@ namespace swrenderer void rt_tlateadd1col(int hx, int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_add1col(hx, sx, yl, yh); + } } // Translates and adds all four spans to the screen starting at sx without clamping. void rt_tlateadd4cols(int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_add4cols(sx, yl, yh); + } } // Shades one span at hx to the screen at sx. @@ -998,18 +1025,28 @@ namespace swrenderer void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_addclamp1col(hx, sx, yl, yh); + } } // Translates and adds all four spans to the screen starting at sx with clamping. void rt_tlateaddclamp4cols(int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_addclamp4cols(sx, yl, yh); + } } // Subtracts one span at hx to the screen at sx with clamping. @@ -1034,18 +1071,28 @@ namespace swrenderer void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_subclamp1col(hx, sx, yl, yh); + } } // Translates and subtracts all four spans to the screen starting at sx with clamping. void rt_tlatesubclamp4cols(int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_subclamp4cols(sx, yl, yh); + } } // Subtracts one span at hx from the screen at sx with clamping. @@ -1070,18 +1117,28 @@ namespace swrenderer void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_revsubclamp1col(hx, sx, yl, yh); + } } // Translates and subtracts all four spans from the screen starting at sx with clamping. void rt_tlaterevsubclamp4cols(int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_revsubclamp4cols(sx, yl, yh); + } } uint32_t vlinec1() diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp index b35046285f..b9f0c378d0 100644 --- a/src/r_drawt_pal.cpp +++ b/src/r_drawt_pal.cpp @@ -1,3 +1,42 @@ +/* +** r_drawt.cpp +** Faster column drawers for modern processors +** +**--------------------------------------------------------------------------- +** Copyright 1998-2006 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +** These functions stretch columns into a temporary buffer and then +** map them to the screen. On modern machines, this is faster than drawing +** them directly to the screen. +** +** Will I be able to even understand any of this if I come back to it later? +** Let's hope so. :-) +*/ #include "templates.h" #include "doomtype.h" @@ -9,105 +48,817 @@ #include "v_video.h" #include "r_draw_pal.h" +// I should have commented this stuff better. +// +// dc_temp is the buffer R_DrawColumnHoriz writes into. +// dc_tspans points into it. +// dc_ctspan points into dc_tspans. +// horizspan also points into dc_tspans. + +// dc_ctspan is advanced while drawing into dc_temp. +// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. + namespace swrenderer { - PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) + RtInitColsPalCommand::RtInitColsPalCommand(uint8_t *buff) : buff(buff) { } + void RtInitColsPalCommand::Execute(DrawerThread *thread) + { + thread->dc_temp = buff == nullptr ? thread->dc_temp_buff : buff; + } + + ///////////////////////////////////////////////////////////////////// + + PalColumnHorizCommand::PalColumnHorizCommand() + { + using namespace drawerargs; + + _source = dc_source; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _color = dc_color; + _x = dc_x; + _yl = dc_yl; + } + + void DrawColumnHorizPalCommand::Execute(DrawerThread *thread) + { + int count = _count; + uint8_t *dest; + fixed_t fracstep; + fixed_t frac; + + if (count <= 0) + return; + + { + int x = _x & 3; + dest = &thread->dc_temp[x + 4 * _yl]; + } + fracstep = _iscale; + frac = _texturefrac; + + const uint8_t *source = _source; + + if (count & 1) { + *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; + } + if (count & 2) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest[16] = source[frac >> FRACBITS]; frac += fracstep; + dest[20] = source[frac >> FRACBITS]; frac += fracstep; + dest[24] = source[frac >> FRACBITS]; frac += fracstep; + dest[28] = source[frac >> FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); + } + void FillColumnHorizPalCommand::Execute(DrawerThread *thread) { + int count = _count; + uint8_t color = _color; + uint8_t *dest; + + if (count <= 0) + return; + + { + int x = _x & 3; + dest = &thread->dc_temp[x + 4 * _yl]; + } + + if (count & 1) { + *dest = color; + dest += 4; + } + if (!(count >>= 1)) + return; + do { + dest[0] = color; dest[4] = color; + dest += 8; + } while (--count); + } + + ///////////////////////////////////////////////////////////////////// + + PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) : hx(hx), sx(sx), yl(yl), yh(yh) + { + using namespace drawerargs; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _translation = dc_translation; + _color = dc_color; } void DrawColumnRt1CopyPalCommand::Execute(DrawerThread *thread) { + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + + if (count & 1) { + *dest = *source; + source += 4; + dest += pitch; + } + if (count & 2) { + dest[0] = source[0]; + dest[pitch] = source[4]; + source += 8; + dest += pitch*2; + } + if (!(count >>= 2)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4]; + dest[pitch*2] = source[8]; + dest[pitch*3] = source[12]; + source += 16; + dest += pitch*4; + } while (--count); + } + + void DrawColumnRt4CopyPalCommand::Execute(DrawerThread *thread) + { + int *source; + int *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = (int *)(ylookup[yl] + sx + _destorg); + source = (int *)(&thread->dc_temp[yl*4]); + pitch = _pitch/sizeof(int); + + if (count & 1) { + *dest = *source; + source += 4/sizeof(int); + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4/sizeof(int)]; + source += 8/sizeof(int); + dest += pitch*2; + } while (--count); } void DrawColumnRt1PalCommand::Execute(DrawerThread *thread) { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + colormap = _colormap; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + + if (count & 1) { + *dest = colormap[*source]; + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = colormap[source[0]]; + dest[pitch] = colormap[source[4]]; + source += 8; + dest += pitch*2; + } while (--count); } void DrawColumnRt4PalCommand::Execute(DrawerThread *thread) { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + colormap = _colormap; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4]; + pitch = _pitch; + + if (count & 1) { + dest[0] = colormap[source[0]]; + dest[1] = colormap[source[1]]; + dest[2] = colormap[source[2]]; + dest[3] = colormap[source[3]]; + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = colormap[source[0]]; + dest[1] = colormap[source[1]]; + dest[2] = colormap[source[2]]; + dest[3] = colormap[source[3]]; + dest[pitch] = colormap[source[4]]; + dest[pitch+1] = colormap[source[5]]; + dest[pitch+2] = colormap[source[6]]; + dest[pitch+3] = colormap[source[7]]; + source += 8; + dest += pitch*2; + } while (--count); } void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread) { + int count = yh - yl + 1; + uint8_t *source = &thread->dc_temp[yl*4 + hx]; + const uint8_t *translation = _translation; + + // Things we do to hit the compiler's optimizer with a clue bat: + // 1. Parallelism is explicitly spelled out by using a separate + // C instruction for each assembly instruction. GCC lets me + // have four temporaries, but VC++ spills to the stack with + // more than two. Two is probably optimal, anyway. + // 2. The results of the translation lookups are explicitly + // stored in byte-sized variables. This causes the VC++ code + // to use byte mov instructions in most cases; for apparently + // random reasons, it will use movzx for some places. GCC + // ignores this and uses movzx always. + + // Do 8 rows at a time. + for (int count8 = count >> 3; count8; --count8) + { + int c0, c1; + uint8_t b0, b1; + + c0 = source[0]; c1 = source[4]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[4] = b1; + + c0 = source[8]; c1 = source[12]; + b0 = translation[c0]; b1 = translation[c1]; + source[8] = b0; source[12] = b1; + + c0 = source[16]; c1 = source[20]; + b0 = translation[c0]; b1 = translation[c1]; + source[16] = b0; source[20] = b1; + + c0 = source[24]; c1 = source[28]; + b0 = translation[c0]; b1 = translation[c1]; + source[24] = b0; source[28] = b1; + + source += 32; + } + // Finish by doing 1 row at a time. + for (count &= 7; count; --count, source += 4) + { + source[0] = translation[source[0]]; + } } void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread) { + int count = yh - yl + 1; + uint8_t *source = &thread->dc_temp[yl*4]; + const uint8_t *translation = _translation; + int c0, c1; + uint8_t b0, b1; + + // Do 2 rows at a time. + for (int count8 = count >> 1; count8; --count8) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + + c0 = source[4]; c1 = source[5]; + b0 = translation[c0]; b1 = translation[c1]; + source[4] = b0; source[5] = b1; + + c0 = source[6]; c1 = source[7]; + b0 = translation[c0]; b1 = translation[c1]; + source[6] = b0; source[7] = b1; + + source += 8; + } + // Do the final row if count was odd. + if (count & 1) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + } } void DrawColumnRt1AddPalCommand::Execute(DrawerThread *thread) { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t fg = colormap[*source]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg>>15)]; + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt4AddPalCommand::Execute(DrawerThread *thread) { - } + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; - void DrawColumnRt1AddTranslatedPalCommand::Execute(DrawerThread *thread) - { - } + count = yh-yl; + if (count < 0) + return; + count++; - void DrawColumnRt4AddTranslatedPalCommand::Execute(DrawerThread *thread) - { + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t fg = colormap[source[0]]; + uint32_t bg = dest[0]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[0] = RGB32k.All[fg & (fg>>15)]; + + fg = colormap[source[1]]; + bg = dest[1]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[1] = RGB32k.All[fg & (fg>>15)]; + + + fg = colormap[source[2]]; + bg = dest[2]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[2] = RGB32k.All[fg & (fg>>15)]; + + fg = colormap[source[3]]; + bg = dest[3]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[3] = RGB32k.All[fg & (fg>>15)]; + + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt1ShadedPalCommand::Execute(DrawerThread *thread) { + uint32_t *fgstart; + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + fgstart = &Col2RGB8[0][_color]; + colormap = _colormap; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + + do { + uint32_t val = colormap[*source]; + uint32_t fg = fgstart[val<<8]; + val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val>>15)]; + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt4ShadedPalCommand::Execute(DrawerThread *thread) { + uint32_t *fgstart; + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + fgstart = &Col2RGB8[0][_color]; + colormap = _colormap; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4]; + pitch = _pitch; + + do { + uint32_t val; + + val = colormap[source[0]]; + val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; + dest[0] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[1]]; + val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; + dest[1] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[2]]; + val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; + dest[2] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[3]]; + val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; + dest[3] = RGB32k.All[val & (val>>15)]; + + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt1AddClampPalCommand::Execute(DrawerThread *thread) { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt4AddClampPalCommand::Execute(DrawerThread *thread) { - } + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; - void DrawColumnRt1AddClampTranslatedPalCommand::Execute(DrawerThread *thread) - { - } + count = yh-yl; + if (count < 0) + return; + count++; - void DrawColumnRt4AddClampTranslatedPalCommand::Execute(DrawerThread *thread) - { + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4]; + pitch = _pitch; + colormap = _colormap; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + + do { + uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt1SubClampPalCommand::Execute(DrawerThread *thread) { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt4SubClampPalCommand::Execute(DrawerThread *thread) { - } + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; - void DrawColumnRt1SubClampTranslatedPalCommand::Execute(DrawerThread *thread) - { - } + count = yh-yl; + if (count < 0) + return; + count++; - void DrawColumnRt4SubClampTranslatedPalCommand::Execute(DrawerThread *thread) - { + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt1RevSubClampPalCommand::Execute(DrawerThread *thread) { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt4RevSubClampPalCommand::Execute(DrawerThread *thread) { - } + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; - void DrawColumnRt1RevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) - { - } + count = yh-yl; + if (count < 0) + return; + count++; - void DrawColumnRt4RevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) - { + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); } } From c16506bf5977aac3e53f763d9e4feb4d120664b7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 6 Dec 2016 15:13:43 +0100 Subject: [PATCH 450/912] Add thread awareness to the rt family of drawers --- src/r_draw_tc.cpp | 10 +++ src/r_drawt_pal.cpp | 215 ++++++++++++++++++++++---------------------- src/r_thread.h | 6 ++ 3 files changed, 125 insertions(+), 106 deletions(-) diff --git a/src/r_draw_tc.cpp b/src/r_draw_tc.cpp index 77d4500064..ec9129a89c 100644 --- a/src/r_draw_tc.cpp +++ b/src/r_draw_tc.cpp @@ -925,18 +925,28 @@ namespace swrenderer void rt_tlate1col(int hx, int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } else + { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_map1col(hx, sx, yl, yh); + } } // Translates all four spans to the screen starting at sx. void rt_tlate4cols(int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } else + { DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_map4cols(sx, yl, yh); + } } // Adds one span at hx to the screen at sx without clamping. diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp index b9f0c378d0..3356592d25 100644 --- a/src/r_drawt_pal.cpp +++ b/src/r_drawt_pal.cpp @@ -91,18 +91,20 @@ namespace swrenderer fixed_t fracstep; fixed_t frac; + count = thread->count_for_thread(_yl, count); if (count <= 0) return; - { - int x = _x & 3; - dest = &thread->dc_temp[x + 4 * _yl]; - } fracstep = _iscale; frac = _texturefrac; const uint8_t *source = _source; + int x = _x & 3; + dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; + frac += fracstep * thread->skipped_by_thread(_yl); + fracstep *= thread->num_cores; + if (count & 1) { *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; } @@ -141,13 +143,12 @@ namespace swrenderer uint8_t color = _color; uint8_t *dest; + count = thread->count_for_thread(_yl, count); if (count <= 0) return; - { - int x = _x & 3; - dest = &thread->dc_temp[x + 4 * _yl]; - } + int x = _x & 3; + dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; if (count & 1) { *dest = color; @@ -183,14 +184,15 @@ namespace swrenderer int count; int pitch; - count = yh-yl; - if (count < 0) - return; - count++; + count = yh - yl + 1; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; if (count & 1) { *dest = *source; @@ -223,14 +225,15 @@ namespace swrenderer int count; int pitch; - count = yh-yl; - if (count < 0) - return; - count++; + count = yh - yl + 1; - dest = (int *)(ylookup[yl] + sx + _destorg); - source = (int *)(&thread->dc_temp[yl*4]); - pitch = _pitch/sizeof(int); + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = (int *)(ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg); + source = (int *)(&thread->dc_temp[thread->temp_line_for_thread(yl)*4]); + pitch = _pitch*thread->num_cores/sizeof(int); if (count & 1) { *dest = *source; @@ -256,15 +259,16 @@ namespace swrenderer int count; int pitch; - count = yh-yl; - if (count < 0) + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; colormap = _colormap; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl) *4 + hx]; + pitch = _pitch*thread->num_cores; if (count & 1) { *dest = colormap[*source]; @@ -290,15 +294,16 @@ namespace swrenderer int count; int pitch; - count = yh-yl; - if (count < 0) + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; colormap = _colormap; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch*thread->num_cores; if (count & 1) { dest[0] = colormap[source[0]]; @@ -328,7 +333,11 @@ namespace swrenderer void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread) { int count = yh - yl + 1; - uint8_t *source = &thread->dc_temp[yl*4 + hx]; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; const uint8_t *translation = _translation; // Things we do to hit the compiler's optimizer with a clue bat: @@ -376,7 +385,11 @@ namespace swrenderer void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread) { int count = yh - yl + 1; - uint8_t *source = &thread->dc_temp[yl*4]; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; const uint8_t *translation = _translation; int c0, c1; uint8_t b0, b1; @@ -420,19 +433,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { @@ -453,19 +465,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { @@ -509,19 +520,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; fgstart = &Col2RGB8[0][_color]; colormap = _colormap; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; do { uint32_t val = colormap[*source]; @@ -539,19 +549,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; fgstart = &Col2RGB8[0][_color]; colormap = _colormap; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; do { uint32_t val; @@ -582,19 +591,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { @@ -617,17 +625,16 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; colormap = _colormap; const uint32_t *fg2rgb = _srcblend; @@ -681,19 +688,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { @@ -715,19 +721,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { @@ -774,19 +779,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { @@ -808,19 +812,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { diff --git a/src/r_thread.h b/src/r_thread.h index 1e48ff3074..9a8a5c1b58 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -89,6 +89,12 @@ public: { return dest + skipped_by_thread(first_line) * pitch; } + + // The first line in the dc_temp buffer used this thread + int temp_line_for_thread(int first_line) + { + return (first_line + skipped_by_thread(first_line)) / num_cores; + } }; // Task to be executed by each worker thread From 6054db0d865abf7a0c32bdd347dcb68e4b541965 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 6 Dec 2016 15:29:04 +0100 Subject: [PATCH 451/912] Make puzzy pinky fuzzywuzzy about threads --- src/r_draw_pal.cpp | 117 +++++++++++++++++++++++---------------------- src/r_draw_pal.h | 2 + 2 files changed, 63 insertions(+), 56 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index fbef5153e1..ab598957d0 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -1733,76 +1733,81 @@ namespace swrenderer _x = dc_x; _destorg = dc_destorg; _pitch = dc_pitch; + _fuzzpos = fuzzpos; + _fuzzviewheight = fuzzviewheight; } void DrawFuzzColumnPalCommand::Execute(DrawerThread *thread) { - int count; - uint8_t *dest; + int yl = MAX(_yl, 1); + int yh = MIN(_yh, _fuzzviewheight); - // Adjust borders. Low... - if (_yl == 0) - _yl = 1; - - // .. and high. - if (_yh > fuzzviewheight) - _yh = fuzzviewheight; - - count = _yh - _yl; + int count = thread->count_for_thread(yl, yh - yl + 1); // Zero length. - if (count < 0) + if (count <= 0) return; - count++; + uint8_t *map = &NormalLight.Maps[6 * 256]; - dest = ylookup[_yl] + _x + _destorg; + uint8_t *dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + _destorg); - // colormap #6 is used for shading (of 0-31, a bit brighter than average) + int pitch = _pitch * thread->num_cores; + int fuzzstep = thread->num_cores; + int fuzz = (_fuzzpos + thread->skipped_by_thread(yl)) % FUZZTABLE; + + yl += thread->skipped_by_thread(yl); + + // Handle the case where we would go out of bounds at the top: + if (yl < fuzzstep) { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = _pitch; - int fuzz = fuzzpos; - int cnt; - uint8_t *map = &NormalLight.Maps[6 * 256]; + uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep + pitch; + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) + *dest = map[*srcdest]; + dest += pitch; + fuzz += fuzzstep; + fuzz %= FUZZTABLE; + + count--; + if (count == 0) + return; + } + + bool lowerbounds = (yl + (count + fuzzstep - 1) * fuzzstep > _fuzzviewheight); + if (lowerbounds) + count--; + + // Fuzz where fuzzoffset stays within bounds + while (count > 0) + { + int available = (FUZZTABLE - fuzz); + int next_wrap = available / fuzzstep; + if (available % fuzzstep != 0) + next_wrap++; + + int cnt = MIN(count, next_wrap); + count -= cnt; + do { - cnt = MIN(FUZZTABLE - fuzz, count); - count -= cnt; - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; + uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep; + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + + *dest = map[*srcdest]; + dest += pitch; + fuzz += fuzzstep; + } while (--cnt); + + fuzz %= FUZZTABLE; + } + + // Handle the case where we would go out of bounds at the bottom + if (lowerbounds) + { + uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep - pitch; + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + + *dest = map[*srcdest]; } } diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index f3b220f825..d9fb2d4893 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -138,6 +138,8 @@ namespace swrenderer int _x; uint8_t *_destorg; int _pitch; + int _fuzzpos; + int _fuzzviewheight; }; class PalSpanCommand : public DrawerCommand From d8f805ddc92df236549b4e35d05790ea2a173941 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 6 Dec 2016 18:31:26 +0100 Subject: [PATCH 452/912] Added DrawSlabPalCommand and rewrote the actual for loop so nobody can say it came from build.. --- src/r_draw_pal.cpp | 30 ++++++++++++++++++++++++++++++ src/r_draw_pal.h | 11 +++++++++++ 2 files changed, 41 insertions(+) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index ab598957d0..fa4b7df1d9 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -2286,11 +2286,41 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// DrawSlabPalCommand::DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap) + : _dx(dx), _v(v), _dy(dy), _vi(vi), _vptr(vptr), _p(p), _colormap(colormap) { + using namespace drawerargs; + _pitch = dc_pitch; + _start_y = static_cast((p - dc_destorg) / dc_pitch); } void DrawSlabPalCommand::Execute(DrawerThread *thread) { + int count = _dy; + uint8_t *dest = _p; + int pitch = _pitch; + int width = _dx; + const uint8_t *colormap = _colormap; + const uint8_t *source = _vptr; + fixed_t fracpos = _v; + fixed_t iscale = _vi; + + count = thread->count_for_thread(_start_y, count); + dest = thread->dest_for_thread(_start_y, pitch, dest); + fracpos += iscale * thread->skipped_by_thread(_start_y); + iscale *= thread->num_cores; + pitch *= thread->num_cores; + + while (count > 0) + { + uint8_t color = colormap[source[fracpos >> FRACBITS]]; + + for (int x = 0; x < width; x++) + dest[x] = color; + + dest += pitch; + fracpos += iscale; + count--; + } } ///////////////////////////////////////////////////////////////////////// diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index d9fb2d4893..ba9a953c3d 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -195,6 +195,17 @@ namespace swrenderer public: DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap); void Execute(DrawerThread *thread) override; + + private: + int _dx; + fixed_t _v; + int _dy; + fixed_t _vi; + const uint8_t *_vptr; + uint8_t *_p; + const uint8_t *_colormap; + int _pitch; + int _start_y; }; class DrawFogBoundaryLinePalCommand : public PalSpanCommand From bec5d39ca89366155ef6f9d935538978a4bb799a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 6 Dec 2016 18:58:39 +0100 Subject: [PATCH 453/912] ColoredSpan and FogBoundary drawers --- src/r_draw_pal.cpp | 24 ++++++++++++++++++++++-- src/r_draw_pal.h | 14 +++++++++++++- src/r_plane.cpp | 5 ----- src/r_plane.h | 5 ----- 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index fa4b7df1d9..6eeba9ec75 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -2275,12 +2275,19 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(int y, int x1, int x2) + DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(int y, int x1, int x2) : y(y), x1(x1), x2(x2) { + using namespace drawerargs; + color = ds_color; + destorg = dc_destorg; } void DrawColoredSpanPalCommand::Execute(DrawerThread *thread) { + if (thread->line_skipped_by_thread(y)) + return; + + memset(ylookup[y] + x1 + destorg, color, x2 - x1 + 1); } ///////////////////////////////////////////////////////////////////////// @@ -2325,11 +2332,24 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(int y, int y2, int x1) + DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(int y, int x1, int x2) : y(y), x1(x1), x2(x2) { + using namespace drawerargs; + _colormap = dc_colormap; + _destorg = dc_destorg; } void DrawFogBoundaryLinePalCommand::Execute(DrawerThread *thread) { + if (thread->line_skipped_by_thread(y)) + return; + + const uint8_t *colormap = _colormap; + uint8_t *dest = ylookup[y] + _destorg; + int x = x1; + do + { + dest[x] = colormap[dest[x]]; + } while (++x <= x2); } } diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index ba9a953c3d..6fe775568d 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -188,6 +188,13 @@ namespace swrenderer DrawColoredSpanPalCommand(int y, int x1, int x2); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "DrawColoredSpanPalCommand"; } + + private: + int y; + int x1; + int x2; + int color; + uint8_t *destorg; }; class DrawSlabPalCommand : public PalSpanCommand @@ -211,8 +218,13 @@ namespace swrenderer class DrawFogBoundaryLinePalCommand : public PalSpanCommand { public: - DrawFogBoundaryLinePalCommand(int y, int y2, int x1); + DrawFogBoundaryLinePalCommand(int y, int x1, int x2); void Execute(DrawerThread *thread) override; + + private: + int y, x1, x2; + const uint8_t *_colormap; + uint8_t *_destorg; }; class RtInitColsPalCommand : public DrawerCommand diff --git a/src/r_plane.cpp b/src/r_plane.cpp index c6c7d6d2f0..3b026b293d 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -522,11 +522,6 @@ void R_MapTiltedPlane (int y, int x1) // //========================================================================== -void R_MapColoredPlane_C (int y, int x1) -{ - memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); -} - void R_MapColoredPlane(int y, int x1) { R_DrawColoredSpan(y, x1, spanend[y]); diff --git a/src/r_plane.h b/src/r_plane.h index 9141411af3..3a067b527e 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -96,11 +96,6 @@ void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t al void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); -void R_MapTiltedPlane_C(int y, int x1); -void R_MapTiltedPlane_rgba(int y, int x); -void R_MapColoredPlane_C(int y, int x1); -void R_MapColoredPlane_rgba(int y, int x1); - visplane_t *R_FindPlane ( const secplane_t &height, FTextureID picnum, From 30ddcfbc8f4be132b16a4f826278fccda9ecd030 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 6 Dec 2016 19:44:28 +0100 Subject: [PATCH 454/912] Add tilted plane drawer --- src/r_draw_pal.cpp | 238 +++++++++++++++++++++++++++++++++++++++ src/r_draw_pal.h | 22 ++++ src/r_plane.cpp | 273 +-------------------------------------------- src/r_thread.h | 5 + 4 files changed, 266 insertions(+), 272 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 6eeba9ec75..810192b961 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -2266,11 +2266,249 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + : y(y), x1(x1), x2(x2), plane_sz(plane_sz), plane_su(plane_su), plane_sv(plane_sv), plane_shade(plane_shade), planeshade(planeshade), planelightfloat(planelightfloat), pviewx(pviewx), pviewy(pviewy) { + using namespace drawerargs; + + _colormap = ds_colormap; + _destorg = dc_destorg; + _ybits = ds_ybits; + _xbits = ds_xbits; + _source = ds_source; + basecolormapdata = basecolormap->Maps; } void DrawTiltedSpanPalCommand::Execute(DrawerThread *thread) { + if (thread->line_skipped_by_thread(y)) + return; + + const uint8_t **tiltlighting = thread->tiltlighting; + + int width = x2 - x1; + double iz, uz, vz; + uint8_t *fb; + uint32_t u, v; + int i; + + iz = plane_sz[2] + plane_sz[1] * (centery - y) + plane_sz[0] * (x1 - centerx); + + // Lighting is simple. It's just linear interpolation from start to end + if (plane_shade) + { + uz = (iz + plane_sz[0] * width) * planelightfloat; + vz = iz * planelightfloat; + CalcTiltedLighting(vz, uz, width, thread); + } + else + { + for (int i = 0; i < width; ++i) + { + tiltlighting[i] = _colormap; + } + } + + uz = plane_su[2] + plane_su[1] * (centery - y) + plane_su[0] * (x1 - centerx); + vz = plane_sv[2] + plane_sv[1] * (centery - y) + plane_sv[0] * (x1 - centerx); + + fb = ylookup[y] + x1 + _destorg; + + uint8_t vshift = 32 - _ybits; + uint8_t ushift = vshift - _xbits; + int umask = ((1 << _xbits) - 1) << _ybits; + + #if 0 + // The "perfect" reference version of this routine. Pretty slow. + // Use it only to see how things are supposed to look. + i = 0; + do + { + double z = 1.f / iz; + + u = int64_t(uz*z) + pviewx; + v = int64_t(vz*z) + pviewy; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); + fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; + iz += plane_sz[0]; + uz += plane_su[0]; + vz += plane_sv[0]; + } while (--width >= 0); + #else + //#define SPANSIZE 32 + //#define INVSPAN 0.03125f + //#define SPANSIZE 8 + //#define INVSPAN 0.125f + #define SPANSIZE 16 + #define INVSPAN 0.0625f + + double startz = 1.f / iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep, uzstep, vzstep; + + izstep = plane_sz[0] * SPANSIZE; + uzstep = plane_su[0] * SPANSIZE; + vzstep = plane_sv[0] * SPANSIZE; + x1 = 0; + width++; + + while (width >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f / iz; + double endu = uz*endz; + double endv = vz*endz; + uint32_t stepu = (uint32_t)int64_t((endu - startu) * INVSPAN); + uint32_t stepv = (uint32_t)int64_t((endv - startv) * INVSPAN); + u = (uint32_t)(int64_t(startu) + pviewx); + v = (uint32_t)(int64_t(startv) + pviewy); + + for (i = SPANSIZE - 1; i >= 0; i--) + { + fb[x1] = *(tiltlighting[x1] + _source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + startu = endu; + startv = endv; + width -= SPANSIZE; + } + if (width > 0) + { + if (width == 1) + { + u = (uint32_t)int64_t(startu); + v = (uint32_t)int64_t(startv); + fb[x1] = *(tiltlighting[x1] + _source[(v >> vshift) | ((u >> ushift) & umask)]); + } + else + { + double left = width; + iz += plane_sz[0] * left; + uz += plane_su[0] * left; + vz += plane_sv[0] * left; + + double endz = 1.f / iz; + double endu = uz*endz; + double endv = vz*endz; + left = 1.f / left; + uint32_t stepu = (uint32_t)int64_t((endu - startu) * left); + uint32_t stepv = (uint32_t)int64_t((endv - startv) * left); + u = (uint32_t)(int64_t(startu) + pviewx); + v = (uint32_t)(int64_t(startv) + pviewy); + + for (; width != 0; width--) + { + fb[x1] = *(tiltlighting[x1] + _source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + } + } + #endif + } + + // Calculates the lighting for one row of a tilted plane. If the definition + // of GETPALOOKUP changes, this needs to change, too. + void DrawTiltedSpanPalCommand::CalcTiltedLighting(double lval, double lend, int width, DrawerThread *thread) + { + const uint8_t **tiltlighting = thread->tiltlighting; + + double lstep; + uint8_t *lightfiller; + int i = 0; + + if (width == 0 || lval == lend) + { // Constant lighting + lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); + } + else + { + lstep = (lend - lval) / width; + if (lval >= MAXLIGHTVIS) + { // lval starts "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); + for (; i <= width && lval >= MAXLIGHTVIS; ++i) + { + tiltlighting[i] = lightfiller; + lval += lstep; + } + } + if (lend >= MAXLIGHTVIS) + { // lend ends "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); + for (; width > i && lend >= MAXLIGHTVIS; --width) + { + tiltlighting[width] = lightfiller; + lend -= lstep; + } + } + if (width > 0) + { + lval = FIXED2DBL(planeshade) - lval; + lend = FIXED2DBL(planeshade) - lend; + lstep = (lend - lval) / width; + if (lstep < 0) + { // Going from dark to light + if (lval < 1.) + { // All bright + lightfiller = basecolormapdata; + } + else + { + if (lval >= NUMCOLORMAPS) + { // Starts beyond the dark end + uint8_t *clight = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); + while (lval >= NUMCOLORMAPS && i <= width) + { + tiltlighting[i++] = clight; + lval += lstep; + } + if (i > width) + return; + } + while (i <= width && lval >= 0) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata; + } + } + else + { // Going from light to dark + if (lval >= (NUMCOLORMAPS - 1)) + { // All dark + lightfiller = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); + } + else + { + while (lval < 0 && i <= width) + { + tiltlighting[i++] = basecolormapdata; + lval += lstep; + } + if (i > width) + return; + while (i <= width && lval < (NUMCOLORMAPS - 1)) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); + } + } + } + } + for (; i <= width; i++) + { + tiltlighting[i] = lightfiller; + } } ///////////////////////////////////////////////////////////////////////// diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index 6fe775568d..93c24857b3 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -180,6 +180,28 @@ namespace swrenderer DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "DrawTiltedSpanPalCommand"; } + + private: + void CalcTiltedLighting(double lval, double lend, int width, DrawerThread *thread); + + int y; + int x1; + int x2; + FVector3 plane_sz; + FVector3 plane_su; + FVector3 plane_sv; + bool plane_shade; + int planeshade; + float planelightfloat; + fixed_t pviewx; + fixed_t pviewy; + + const uint8_t *_colormap; + uint8_t *_destorg; + int _ybits; + int _xbits; + const uint8_t *_source; + uint8_t *basecolormapdata; }; class DrawColoredSpanPalCommand : public PalSpanCommand diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 3b026b293d..3769dda061 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -138,15 +138,12 @@ extern "C" { // spanend holds the end of a plane span in each screen row // short spanend[MAXHEIGHT]; -BYTE *tiltlighting[MAXWIDTH]; int planeshade; FVector3 plane_sz, plane_su, plane_sv; float planelightfloat; bool plane_shade; fixed_t pviewx, pviewy; - -void R_DrawTiltedPlane_ASM (int y, int x1); } float yslope[MAXHEIGHT]; @@ -154,13 +151,6 @@ static fixed_t xscale, yscale; static double xstepscale, ystepscale; static double basexfrac, baseyfrac; -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" void R_SetTiltedSpanSource_ASM (const BYTE *flat); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); //========================================================================== @@ -264,11 +254,6 @@ void R_MapPlane (int y, int x1) R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); } -#ifdef X86_ASM - if (!r_swtruecolor && ds_colormap != ds_curcolormap) - R_SetSpanColormap_ASM (ds_colormap); -#endif - ds_y = y; ds_x1 = x1; ds_x2 = x2; @@ -276,241 +261,12 @@ void R_MapPlane (int y, int x1) spanfunc (); } -//========================================================================== -// -// R_CalcTiltedLighting -// -// Calculates the lighting for one row of a tilted plane. If the definition -// of GETPALOOKUP changes, this needs to change, too. -// -//========================================================================== - -extern "C" { -void R_CalcTiltedLighting (double lval, double lend, int width) -{ - double lstep; - BYTE *lightfiller; - BYTE *basecolormapdata = basecolormap->Maps; - int i = 0; - - if (width == 0 || lval == lend) - { // Constant lighting - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - } - else - { - lstep = (lend - lval) / width; - if (lval >= MAXLIGHTVIS) - { // lval starts "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - for (; i <= width && lval >= MAXLIGHTVIS; ++i) - { - tiltlighting[i] = lightfiller; - lval += lstep; - } - } - if (lend >= MAXLIGHTVIS) - { // lend ends "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); - for (; width > i && lend >= MAXLIGHTVIS; --width) - { - tiltlighting[width] = lightfiller; - lend -= lstep; - } - } - if (width > 0) - { - lval = FIXED2DBL(planeshade) - lval; - lend = FIXED2DBL(planeshade) - lend; - lstep = (lend - lval) / width; - if (lstep < 0) - { // Going from dark to light - if (lval < 1.) - { // All bright - lightfiller = basecolormapdata; - } - else - { - if (lval >= NUMCOLORMAPS) - { // Starts beyond the dark end - BYTE *clight = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - while (lval >= NUMCOLORMAPS && i <= width) - { - tiltlighting[i++] = clight; - lval += lstep; - } - if (i > width) - return; - } - while (i <= width && lval >= 0) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata; - } - } - else - { // Going from light to dark - if (lval >= (NUMCOLORMAPS-1)) - { // All dark - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - else - { - while (lval < 0 && i <= width) - { - tiltlighting[i++] = basecolormapdata; - lval += lstep; - } - if (i > width) - return; - while (i <= width && lval < (NUMCOLORMAPS-1)) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - } - } - } - for (; i <= width; i++) - { - tiltlighting[i] = lightfiller; - } -} -} // extern "C" - //========================================================================== // // R_MapTiltedPlane // //========================================================================== -void R_MapTiltedPlane_C (int y, int x1) -{ - int x2 = spanend[y]; - int width = x2 - x1; - double iz, uz, vz; - BYTE *fb; - DWORD u, v; - int i; - - iz = plane_sz[2] + plane_sz[1] * (centery - y) + plane_sz[0] * (x1 - centerx); - - // Lighting is simple. It's just linear interpolation from start to end - if (plane_shade) - { - uz = (iz + plane_sz[0] * width) * planelightfloat; - vz = iz * planelightfloat; - R_CalcTiltedLighting(vz, uz, width); - } - - uz = plane_su[2] + plane_su[1] * (centery - y) + plane_su[0] * (x1 - centerx); - vz = plane_sv[2] + plane_sv[1] * (centery - y) + plane_sv[0] * (x1 - centerx); - - fb = ylookup[y] + x1 + dc_destorg; - - BYTE vshift = 32 - ds_ybits; - BYTE ushift = vshift - ds_xbits; - int umask = ((1 << ds_xbits) - 1) << ds_ybits; - -#if 0 // The "perfect" reference version of this routine. Pretty slow. - // Use it only to see how things are supposed to look. - i = 0; - do - { - double z = 1.f/iz; - - u = SQWORD(uz*z) + pviewx; - v = SQWORD(vz*z) + pviewy; - R_SetDSColorMapLight(tiltlighting[i], 0, 0); - fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; - iz += plane_sz[0]; - uz += plane_su[0]; - vz += plane_sv[0]; - } while (--width >= 0); -#else -//#define SPANSIZE 32 -//#define INVSPAN 0.03125f -//#define SPANSIZE 8 -//#define INVSPAN 0.125f -#define SPANSIZE 16 -#define INVSPAN 0.0625f - - double startz = 1.f/iz; - double startu = uz*startz; - double startv = vz*startz; - double izstep, uzstep, vzstep; - - izstep = plane_sz[0] * SPANSIZE; - uzstep = plane_su[0] * SPANSIZE; - vzstep = plane_sv[0] * SPANSIZE; - x1 = 0; - width++; - - while (width >= SPANSIZE) - { - iz += izstep; - uz += uzstep; - vz += vzstep; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - DWORD stepu = SQWORD((endu - startu) * INVSPAN); - DWORD stepv = SQWORD((endv - startv) * INVSPAN); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (i = SPANSIZE-1; i >= 0; i--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - startu = endu; - startv = endv; - width -= SPANSIZE; - } - if (width > 0) - { - if (width == 1) - { - u = SQWORD(startu); - v = SQWORD(startv); - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - } - else - { - double left = width; - iz += plane_sz[0] * left; - uz += plane_su[0] * left; - vz += plane_sv[0] * left; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - left = 1.f/left; - DWORD stepu = SQWORD((endu - startu) * left); - DWORD stepv = SQWORD((endv - startv) * left); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (; width != 0; width--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - } - } -#endif -} - void R_MapTiltedPlane (int y, int x1) { R_DrawTiltedSpan(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); @@ -1779,13 +1535,6 @@ void R_DrawSkyPlane (visplane_t *pl) void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { -#ifdef X86_ASM - if (!r_swtruecolor && ds_source != ds_cursource) - { - R_SetSpanSource_ASM (ds_source); - } -#endif - if (alpha <= 0) { return; @@ -2036,14 +1785,6 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t plane_shade = true; } - if (!plane_shade) - { - for (int i = 0; i < viewwidth; ++i) - { - tiltlighting[i] = ds_colormap; - } - } - // Hack in support for 1 x Z and Z x 1 texture sizes if (ds_ybits == 0) { @@ -2053,20 +1794,8 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t { plane_su[2] = plane_su[1] = plane_su[0] = 0; } -#if defined(X86_ASM) - if (!r_swtruecolor) - { - if (ds_source != ds_curtiltedsource) - R_SetTiltedSpanSource_ASM(ds_source); - R_MapVisPlane(pl, R_DrawTiltedPlane_ASM); - } - else - { - R_MapVisPlane(pl, R_MapTiltedPlane); - } -#else + R_MapVisPlane (pl, R_MapTiltedPlane); -#endif } //========================================================================== diff --git a/src/r_thread.h b/src/r_thread.h index 9a8a5c1b58..dbdbf1dded 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -54,12 +54,17 @@ public: int pass_start_y = 0; int pass_end_y = MAXHEIGHT; + // Working buffer used by Rt drawers uint8_t dc_temp_buff[MAXHEIGHT * 4]; uint8_t *dc_temp = nullptr; + // Working buffer used by Rt drawers, true color edition uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; uint32_t *dc_temp_rgba = nullptr; + // Working buffer used by the tilted (sloped) span drawer + const uint8_t *tiltlighting[MAXWIDTH]; + // Checks if a line is rendered by this thread bool line_skipped_by_thread(int line) { From 7441521a9604be309ff5d8321cde0a45d64a59b1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 6 Dec 2016 19:44:56 +0100 Subject: [PATCH 455/912] Remove comment about Build code as it is no longer correct (there's no build code left in that file) --- src/r_segs.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 44e6f906f3..efc894fc29 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -17,12 +17,6 @@ // DESCRIPTION: // All the clipping: columns, horizontal spans, sky columns. // -// This file contains some code from the Build Engine. -// -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// //----------------------------------------------------------------------------- #include From 0083c3c5f0eadbf787bdc174475fdde5abbbeeb3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 6 Dec 2016 21:41:41 +0100 Subject: [PATCH 456/912] Fix some typos --- src/r_draw_pal.cpp | 14 +++++++------- src/r_draw_pal.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 810192b961..0264dcbf9e 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -1836,7 +1836,7 @@ namespace swrenderer void DrawSpanPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1900,7 +1900,7 @@ namespace swrenderer void DrawSpanMaskedPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1965,7 +1965,7 @@ namespace swrenderer void DrawSpanTranslucentPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2028,7 +2028,7 @@ namespace swrenderer void DrawSpanMaskedTranslucentPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2105,7 +2105,7 @@ namespace swrenderer void DrawSpanAddClampPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2174,7 +2174,7 @@ namespace swrenderer void DrawSpanMaskedAddClampPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2257,7 +2257,7 @@ namespace swrenderer void FillSpanPalCommand::Execute(DrawerThread *thread) { - if (thread->skipped_by_thread(_y)) + if (thread->line_skipped_by_thread(_y)) return; memset(ylookup[_y] + _x1 + _destorg, _color, _x2 - _x1 + 1); diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index 93c24857b3..a2400d2759 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -279,7 +279,7 @@ namespace swrenderer { public: void Execute(DrawerThread *thread) override; - FString DebugInfo() override { return "FillColumnHorizPalCommand"; } + FString DebugInfo() override { return "DrawColumnHorizPalCommand"; } }; class FillColumnHorizPalCommand : public PalColumnHorizCommand From 3fd4d00a7724816f1c4c2238994ef79f813e0bc9 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Tue, 6 Dec 2016 18:13:41 -0500 Subject: [PATCH 457/912] - 1.2pre tag --- src/version.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/version.h b/src/version.h index 37f3b1dd41..6e28224920 100644 --- a/src/version.h +++ b/src/version.h @@ -41,17 +41,17 @@ const char *GetVersionString(); /** Lots of different version numbers **/ -#define VERSIONSTR "1.1pre" +#define VERSIONSTR "1.2pre" // The version as seen in the Windows resource -#define RC_FILEVERSION 1,0,9999,0 -#define RC_PRODUCTVERSION 1,0,9999,0 -#define RC_PRODUCTVERSION2 "1.1pre" +#define RC_FILEVERSION 1,1,9999,0 +#define RC_PRODUCTVERSION 1,1,9999,0 +#define RC_PRODUCTVERSION2 "1.2pre" // Version identifier for network games. // Bump it every time you do a release unless you're certain you // didn't change anything that will affect sync. -#define NETGAMEVERSION 234 +#define NETGAMEVERSION 235 // Version stored in the ini's [LastRun] section. // Bump it if you made some configuration change that you want to From 8a23befd4d64d0c032061d7f8f8270fc10d3591c Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Tue, 6 Dec 2016 18:22:36 -0500 Subject: [PATCH 458/912] - Fixed abstract class macro in SDLGLFB class. --- src/posix/sdl/sdlglvideo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/posix/sdl/sdlglvideo.cpp b/src/posix/sdl/sdlglvideo.cpp index 7e6dd89302..7858fdad98 100644 --- a/src/posix/sdl/sdlglvideo.cpp +++ b/src/posix/sdl/sdlglvideo.cpp @@ -29,7 +29,7 @@ // TYPES ------------------------------------------------------------------- -IMPLEMENT_ABSTRACT_CLASS(SDLGLFB, false, false, false, false) +IMPLEMENT_CLASS(SDLGLFB, true, false) struct MiniModeInfo { From c04e0d0df362cec661e9d8516d0bee73dfd05fc2 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Tue, 6 Dec 2016 18:44:59 -0500 Subject: [PATCH 459/912] - fixed: Whoops. One of those danged macros got away. ;) --- src/gl/system/gl_swframebuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 6b3caa81ef..5f142d30c7 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -115,7 +115,7 @@ DFrameBuffer *CreateGLSWFrameBuffer(int width, int height, bool bgra, bool fulls } #endif -IMPLEMENT_CLASS(OpenGLSWFrameBuffer) +IMPLEMENT_CLASS(OpenGLSWFrameBuffer, false, false) const char *const OpenGLSWFrameBuffer::ShaderDefines[OpenGLSWFrameBuffer::NUM_SHADERS] = { From e3e90e1a085c85513a53fafbfa3d0a9d1b7457a9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 7 Dec 2016 18:28:06 +0100 Subject: [PATCH 460/912] Merge TC handling back into r_draw --- src/r_draw.cpp | 590 +++++++++++++--- src/r_draw.h | 39 +- src/r_draw_tc.cpp | 1701 --------------------------------------------- src/r_draw_tc.h | 239 ------- 4 files changed, 518 insertions(+), 2051 deletions(-) delete mode 100644 src/r_draw_tc.cpp delete mode 100644 src/r_draw_tc.h diff --git a/src/r_draw.cpp b/src/r_draw.cpp index a2bf412e8b..9a79779d8e 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -16,6 +16,7 @@ #include "r_data/colormaps.h" #include "r_plane.h" #include "r_draw.h" +#include "r_draw_rgba.h" #include "r_draw_pal.h" #include "r_thread.h" @@ -42,6 +43,9 @@ namespace swrenderer { int dc_pitch; lighttable_t *dc_colormap; + FSWColormap *dc_fcolormap; + ShadeConstants dc_shade_constants; + fixed_t dc_light; int dc_x; int dc_yl; int dc_yh; @@ -78,6 +82,8 @@ namespace swrenderer int ds_x1; int ds_x2; lighttable_t * ds_colormap; + FSWColormap *ds_fcolormap; + ShadeConstants ds_shade_constants; dsfixed_t ds_light; dsfixed_t ds_xfrac; dsfixed_t ds_yfrac; @@ -88,7 +94,9 @@ namespace swrenderer fixed_t ds_alpha; double ds_lod; const uint8_t *ds_source; + bool ds_source_mipmapped; int ds_color; + bool drawer_needs_pal_input; unsigned int dc_tspans[4][MAXHEIGHT]; unsigned int *dc_ctspan[4]; unsigned int *horizspan[4]; @@ -199,6 +207,7 @@ namespace swrenderer colfunc = transcolfunc; hcolfunc_post1 = rt_tlate1col; hcolfunc_post4 = rt_tlate4cols; + drawer_needs_pal_input = true; } return true; } @@ -249,6 +258,7 @@ namespace swrenderer colfunc = R_DrawTlatedAddColumn; hcolfunc_post1 = rt_tlateadd1col; hcolfunc_post4 = rt_tlateadd4cols; + drawer_needs_pal_input = true; } } else @@ -270,6 +280,7 @@ namespace swrenderer colfunc = R_DrawAddClampTranslatedColumn; hcolfunc_post1 = rt_tlateaddclamp1col; hcolfunc_post4 = rt_tlateaddclamp4cols; + drawer_needs_pal_input = true; } } return true; @@ -292,6 +303,7 @@ namespace swrenderer colfunc = R_DrawSubClampTranslatedColumn; hcolfunc_post1 = rt_tlatesubclamp1col; hcolfunc_post4 = rt_tlatesubclamp4cols; + drawer_needs_pal_input = true; } return true; @@ -317,6 +329,7 @@ namespace swrenderer colfunc = R_DrawRevSubClampTranslatedColumn; hcolfunc_post1 = rt_tlaterevsubclamp1col; hcolfunc_post4 = rt_tlaterevsubclamp4cols; + drawer_needs_pal_input = true; } return true; @@ -346,6 +359,8 @@ namespace swrenderer fixed_t fglevel, bglevel; + drawer_needs_pal_input = false; + style.CheckFuzz(); if (style.BlendOp == STYLEOP_Shadow) @@ -376,7 +391,10 @@ namespace swrenderer FRemapTable *table = TranslationToTable(translation); if (table != NULL && !table->Inactive) { - dc_translation = table->Remap; + if (r_swtruecolor) + dc_translation = (uint8_t*)table->Palette; + else + dc_translation = table->Remap; } } } @@ -397,13 +415,19 @@ namespace swrenderer colfunc = R_DrawShadedColumn; hcolfunc_post1 = rt_shaded1col; hcolfunc_post4 = rt_shaded4cols; - dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; - dc_colormap = (basecolormap = &ShadeFakeColormap[16 - alpha])->Maps; + drawer_needs_pal_input = true; + dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; + basecolormap = &ShadeFakeColormap[16 - alpha]; if (fixedlightlev >= 0 && fixedcolormap == NULL) { - dc_colormap += fixedlightlev; + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); } - return r_columnmethod ? DoDraw1 : DoDraw0; + else + { + R_SetColorMapLight(basecolormap, 0, 0); + } + bool active_columnmethod = r_columnmethod && !r_swtruecolor; + return active_columnmethod ? DoDraw1 : DoDraw0; } fglevel = GetAlpha(style.SrcAlpha, alpha); @@ -429,14 +453,15 @@ namespace swrenderer // with the alpha. dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; hcolfunc_pre = R_FillColumnHoriz; - R_SetColorMapLight(identitycolormap.Maps, 0, 0); + R_SetColorMapLight(&identitycolormap, 0, 0); } if (!R_SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) { return DontDraw; } - return r_columnmethod ? DoDraw1 : DoDraw0; + bool active_columnmethod = r_columnmethod && !r_swtruecolor; + return active_columnmethod ? DoDraw1 : DoDraw0; } ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color) @@ -460,7 +485,10 @@ namespace swrenderer col = width + (col % width); } - return tex->GetColumn(col, nullptr); + if (r_swtruecolor) + return (const uint8_t *)tex->GetColumnBgra(col, nullptr); + else + return tex->GetColumn(col, nullptr); } bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()) @@ -507,25 +535,84 @@ namespace swrenderer drawerargs::tmvlinebits = fracbits; } - void R_SetColorMapLight(lighttable_t *base_colormap, float light, int shade) + void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade) { using namespace drawerargs; - dc_colormap = base_colormap + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + dc_fcolormap = base_colormap; + if (r_swtruecolor) + { + dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; + dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; + dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; + dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; + dc_shade_constants.fade_red = dc_fcolormap->Fade.r; + dc_shade_constants.fade_green = dc_fcolormap->Fade.g; + dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; + dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; + dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); + dc_colormap = base_colormap->Maps; + dc_light = LIGHTSCALE(light, shade); + } + else + { + dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } } - void R_SetDSColorMapLight(lighttable_t *base_colormap, float light, int shade) + void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade) { using namespace drawerargs; - ds_colormap = base_colormap + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + ds_fcolormap = base_colormap; + if (r_swtruecolor) + { + ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; + ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; + ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; + ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; + ds_shade_constants.fade_red = ds_fcolormap->Fade.r; + ds_shade_constants.fade_green = ds_fcolormap->Fade.g; + ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; + ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; + ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); + ds_colormap = base_colormap->Maps; + ds_light = LIGHTSCALE(light, shade); + } + else + { + ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } } void R_SetTranslationMap(lighttable_t *translation) { using namespace drawerargs; - dc_colormap = translation; + if (r_swtruecolor) + { + dc_fcolormap = nullptr; + dc_colormap = nullptr; + dc_translation = translation; + dc_shade_constants.light_red = 256; + dc_shade_constants.light_green = 256; + dc_shade_constants.light_blue = 256; + dc_shade_constants.light_alpha = 256; + dc_shade_constants.fade_red = 0; + dc_shade_constants.fade_green = 0; + dc_shade_constants.fade_blue = 0; + dc_shade_constants.fade_alpha = 256; + dc_shade_constants.desaturate = 0; + dc_shade_constants.simple_shade = true; + dc_light = 0; + } + else + { + dc_fcolormap = nullptr; + dc_colormap = translation; + } } void rt_initcols(uint8_t *buffer) @@ -535,7 +622,10 @@ namespace swrenderer for (int y = 3; y >= 0; y--) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; - DrawerCommandQueue::QueueCommand(buffer); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(buffer); + else + DrawerCommandQueue::QueueCommand(buffer); } void rt_span_coverage(int x, int start, int stop) @@ -727,18 +817,17 @@ namespace swrenderer } } - void R_SetSpanColormap(lighttable_t *colormap) + void R_SetSpanColormap(FDynamicColormap *colormap, int shade) { - using namespace drawerargs; - - ds_colormap = colormap; + R_SetDSColorMapLight(colormap, 0, shade); } void R_SetSpanSource(FTexture *tex) { using namespace drawerargs; - ds_source = tex->GetPixels(); + ds_source = r_swtruecolor ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); + ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; } ///////////////////////////////////////////////////////////////////////// @@ -756,7 +845,10 @@ namespace swrenderer (*span)[1] = dc_yh; *span += 2; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawColumnHoriz() @@ -772,168 +864,301 @@ namespace swrenderer (*span)[1] = dc_yh; *span += 2; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + { + if (drawer_needs_pal_input) + DrawerCommandQueue::QueueCommand>(); + else + DrawerCommandQueue::QueueCommand>(); + } + else + { + DrawerCommandQueue::QueueCommand(); + } } // Copies one span at hx to the screen at sx. void rt_copy1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. void rt_copy4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + { + // To do: we could do this with SSE using __m128i + rt_copy1col(0, sx, yl, yh); + rt_copy1col(1, sx + 1, yl, yh); + rt_copy1col(2, sx + 2, yl, yh); + rt_copy1col(3, sx + 3, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } } // Maps one span at hx to the screen at sx. void rt_map1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. void rt_map4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates one span at hx to the screen at sx. void rt_tlate1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_map1col(hx, sx, yl, yh); + if (r_swtruecolor) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_map1col(hx, sx, yl, yh); + } } // Translates all four spans to the screen starting at sx. void rt_tlate4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_map4cols(sx, yl, yh); + if (r_swtruecolor) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_map4cols(sx, yl, yh); + } } // Adds one span at hx to the screen at sx without clamping. void rt_add1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and adds one span at hx to the screen at sx without clamping. void rt_tlateadd1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_add1col(hx, sx, yl, yh); + if (r_swtruecolor) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_add1col(hx, sx, yl, yh); + } } // Translates and adds all four spans to the screen starting at sx without clamping. void rt_tlateadd4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_add4cols(sx, yl, yh); + if (r_swtruecolor) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_add4cols(sx, yl, yh); + } } // Shades one span at hx to the screen at sx. void rt_shaded1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. void rt_shaded4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and adds one span at hx to the screen at sx with clamping. void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_addclamp1col(hx, sx, yl, yh); + if (r_swtruecolor) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_addclamp1col(hx, sx, yl, yh); + } } // Translates and adds all four spans to the screen starting at sx with clamping. void rt_tlateaddclamp4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_addclamp4cols(sx, yl, yh); + if (r_swtruecolor) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_addclamp4cols(sx, yl, yh); + } } // Subtracts one span at hx to the screen at sx with clamping. void rt_subclamp1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_subclamp1col(hx, sx, yl, yh); + if (r_swtruecolor) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_subclamp1col(hx, sx, yl, yh); + } } // Translates and subtracts all four spans to the screen starting at sx with clamping. void rt_tlatesubclamp4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_subclamp4cols(sx, yl, yh); + if (r_swtruecolor) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_subclamp4cols(sx, yl, yh); + } } // Subtracts one span at hx from the screen at sx with clamping. void rt_revsubclamp1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_revsubclamp1col(hx, sx, yl, yh); + if (r_swtruecolor) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_revsubclamp1col(hx, sx, yl, yh); + } } // Translates and subtracts all four spans from the screen starting at sx with clamping. void rt_tlaterevsubclamp4cols(int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_revsubclamp4cols(sx, yl, yh); + if (r_swtruecolor) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_revsubclamp4cols(sx, yl, yh); + } } uint32_t vlinec1() { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } @@ -942,7 +1167,10 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -952,7 +1180,10 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } @@ -961,7 +1192,10 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -971,7 +1205,10 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } @@ -980,7 +1217,10 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -990,7 +1230,10 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } @@ -999,7 +1242,10 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -1009,7 +1255,10 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } @@ -1018,7 +1267,10 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -1028,7 +1280,10 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } @@ -1037,7 +1292,10 @@ namespace swrenderer { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -1045,59 +1303,92 @@ namespace swrenderer void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) { - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + else + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) { - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + else + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) { - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + else + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) { - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + else + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } void R_DrawColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_FillColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_FillAddColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_FillAddClampColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_FillSubClampColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_FillRevSubClampColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawFuzzColumn() { using namespace drawerargs; - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); dc_yl = MAX(dc_yl, 1); dc_yh = MIN(dc_yh, fuzzviewheight); @@ -1107,112 +1398,185 @@ namespace swrenderer void R_DrawAddColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawTranslatedColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawTlatedAddColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawShadedColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawAddClampColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawAddClampTranslatedColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSubClampColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSubClampTranslatedColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawRevSubClampColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawRevSubClampTranslatedColumn() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSpan() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSpanMasked() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSpanTranslucent() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSpanMaskedTranslucent() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSpanAddClamp() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawSpanMaskedAddClamp() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_FillSpan() { - DrawerCommandQueue::QueueCommand(); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { - DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + else + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } void R_DrawColoredSpan(int y, int x1, int x2) { - DrawerCommandQueue::QueueCommand(y, x1, x2); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(y, x1, x2); + else + DrawerCommandQueue::QueueCommand(y, x1, x2); } namespace { - const uint8_t *slab_colormap; + ShadeConstants slab_rgba_shade_constants; + const uint8_t *slab_rgba_colormap; + fixed_t slab_rgba_light; } - void R_SetupDrawSlab(uint8_t *colormap) + void R_SetupDrawSlab(FSWColormap *base_colormap, float light, int shade) { - slab_colormap = colormap; + slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; + slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; + slab_rgba_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; + slab_rgba_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; + slab_rgba_shade_constants.fade_red = base_colormap->Fade.r; + slab_rgba_shade_constants.fade_green = base_colormap->Fade.g; + slab_rgba_shade_constants.fade_blue = base_colormap->Fade.b; + slab_rgba_shade_constants.fade_alpha = base_colormap->Fade.a; + slab_rgba_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + slab_rgba_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); + slab_rgba_colormap = base_colormap->Maps; + slab_rgba_light = LIGHTSCALE(light, shade); } void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p) { - DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_colormap); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); + else + DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_colormap); } void R_DrawFogBoundarySection(int y, int y2, int x1) @@ -1220,7 +1584,10 @@ namespace swrenderer for (; y < y2; ++y) { int x2 = spanend[y]; - DrawerCommandQueue::QueueCommand(y, x1, x2); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(y, x1, x2); + else + DrawerCommandQueue::QueueCommand(y, x1, x2); } } @@ -1244,7 +1611,7 @@ namespace swrenderer clearbufshort(spanend + t2, b2 - t2, x); } - R_SetColorMapLight(basecolormap->Maps, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); @@ -1271,7 +1638,7 @@ namespace swrenderer clearbufshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; - R_SetColorMapLight(basecolormap->Maps, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); } else @@ -1282,13 +1649,19 @@ namespace swrenderer while (t2 < stop) { int y = t2++; - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + else + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); } stop = MAX(b1, t2); while (b2 > stop) { int y = --b2; - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + else + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); } } else @@ -1320,6 +1693,9 @@ namespace swrenderer void R_DrawParticle(vissprite_t *sprite) { - R_DrawParticle_C(sprite); + if (r_swtruecolor) + R_DrawParticle_rgba(sprite); + else + R_DrawParticle_C(sprite); } } diff --git a/src/r_draw.h b/src/r_draw.h index 40b3328964..8c1af58fcb 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -3,7 +3,13 @@ #include "r_defs.h" +struct FSWColormap; + EXTERN_CVAR(Bool, r_multithreaded); +EXTERN_CVAR(Bool, r_magfilter); +EXTERN_CVAR(Bool, r_minfilter); +EXTERN_CVAR(Bool, r_mipmap); +EXTERN_CVAR(Float, r_lod_bias); EXTERN_CVAR(Int, r_drawfuzz); EXTERN_CVAR(Bool, r_drawtrans); EXTERN_CVAR(Float, transsouls); @@ -13,12 +19,29 @@ namespace swrenderer { struct vissprite_t; + struct ShadeConstants + { + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; + }; + extern double dc_texturemid; namespace drawerargs { extern int dc_pitch; extern lighttable_t *dc_colormap; + extern FSWColormap *dc_fcolormap; + extern ShadeConstants dc_shade_constants; + extern fixed_t dc_light; extern int dc_x; extern int dc_yl; extern int dc_yh; @@ -41,6 +64,8 @@ namespace swrenderer extern int dc_destheight; extern int dc_count; + extern bool drawer_needs_pal_input; + extern uint32_t vplce[4]; extern uint32_t vince[4]; extern uint8_t *palookupoffse[4]; @@ -57,6 +82,8 @@ namespace swrenderer extern int ds_x1; extern int ds_x2; extern lighttable_t * ds_colormap; + extern FSWColormap *ds_fcolormap; + extern ShadeConstants ds_shade_constants; extern dsfixed_t ds_light; extern dsfixed_t ds_xfrac; extern dsfixed_t ds_yfrac; @@ -67,6 +94,7 @@ namespace swrenderer extern fixed_t ds_alpha; extern double ds_lod; extern const uint8_t *ds_source; + extern bool ds_source_mipmapped; extern int ds_color; extern unsigned int dc_tspans[4][MAXHEIGHT]; @@ -86,6 +114,8 @@ namespace swrenderer extern int fuzzpos; extern int fuzzviewheight; + extern bool r_swtruecolor; + void R_InitColumnDrawers(); void R_InitShadeMaps(); void R_InitFuzzTable(int fuzzoff); @@ -162,7 +192,7 @@ namespace swrenderer void R_FillSpan(); void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); void R_DrawColoredSpan(int y, int x1, int x2); - void R_SetupDrawSlab(uint8_t *colormap); + void R_SetupDrawSlab(FSWColormap *base_colormap, float light, int shade); void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p); void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); uint32_t vlinec1(); @@ -194,12 +224,13 @@ namespace swrenderer void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); - void R_SetColorMapLight(lighttable_t *base_colormap, float light, int shade); - void R_SetDSColorMapLight(lighttable_t *base_colormap, float light, int shade); + // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) + void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade); + void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); void R_SetupSpanBits(FTexture *tex); - void R_SetSpanColormap(lighttable_t *colormap); + void R_SetSpanColormap(FDynamicColormap *colormap, int shade); void R_SetSpanSource(FTexture *tex); void R_MapTiltedPlane(int y, int x1); diff --git a/src/r_draw_tc.cpp b/src/r_draw_tc.cpp deleted file mode 100644 index ec9129a89c..0000000000 --- a/src/r_draw_tc.cpp +++ /dev/null @@ -1,1701 +0,0 @@ - -#include - -#include "templates.h" -#include "doomdef.h" -#include "i_system.h" -#include "w_wad.h" -#include "r_local.h" -#include "v_video.h" -#include "doomstat.h" -#include "st_stuff.h" -#include "g_game.h" -#include "g_level.h" -#include "r_data/r_translate.h" -#include "v_palette.h" -#include "r_data/colormaps.h" -#include "r_plane.h" -#include "r_draw_tc.h" -#include "r_draw_rgba.h" -#include "r_draw_pal.h" -#include "r_thread.h" - -namespace swrenderer -{ - // Needed by R_DrawFogBoundary (which probably shouldn't be part of this file) - extern "C" short spanend[MAXHEIGHT]; - extern float rw_light; - extern float rw_lightstep; - extern int wallshade; - - double dc_texturemid; - - int ylookup[MAXHEIGHT]; - uint8_t shadetables[NUMCOLORMAPS * 16 * 256]; - FDynamicColormap ShadeFakeColormap[16]; - uint8_t identitymap[256]; - FDynamicColormap identitycolormap; - int fuzzoffset[FUZZTABLE + 1]; - int fuzzpos; - int fuzzviewheight; - - namespace drawerargs - { - int dc_pitch; - lighttable_t *dc_colormap; - FSWColormap *dc_fcolormap; - ShadeConstants dc_shade_constants; - fixed_t dc_light; - int dc_x; - int dc_yl; - int dc_yh; - fixed_t dc_iscale; - fixed_t dc_texturefrac; - uint32_t dc_textureheight; - int dc_color; - uint32_t dc_srccolor; - uint32_t dc_srccolor_bgra; - uint32_t *dc_srcblend; - uint32_t *dc_destblend; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - const uint8_t *dc_source; - const uint8_t *dc_source2; - uint32_t dc_texturefracx; - uint8_t *dc_translation; - uint8_t *dc_dest; - uint8_t *dc_destorg; - int dc_destheight; - int dc_count; - uint32_t vplce[4]; - uint32_t vince[4]; - uint8_t *palookupoffse[4]; - fixed_t palookuplight[4]; - const uint8_t *bufplce[4]; - const uint8_t *bufplce2[4]; - uint32_t buftexturefracx[4]; - uint32_t bufheight[4]; - int vlinebits; - int mvlinebits; - int tmvlinebits; - int ds_y; - int ds_x1; - int ds_x2; - lighttable_t * ds_colormap; - FSWColormap *ds_fcolormap; - ShadeConstants ds_shade_constants; - dsfixed_t ds_light; - dsfixed_t ds_xfrac; - dsfixed_t ds_yfrac; - dsfixed_t ds_xstep; - dsfixed_t ds_ystep; - int ds_xbits; - int ds_ybits; - fixed_t ds_alpha; - double ds_lod; - const uint8_t *ds_source; - bool ds_source_mipmapped; - int ds_color; - bool drawer_needs_pal_input; - unsigned int dc_tspans[4][MAXHEIGHT]; - unsigned int *dc_ctspan[4]; - unsigned int *horizspan[4]; - } - - void R_InitColumnDrawers() - { - colfunc = basecolfunc = R_DrawColumn; - fuzzcolfunc = R_DrawFuzzColumn; - transcolfunc = R_DrawTranslatedColumn; - spanfunc = R_DrawSpan; - hcolfunc_pre = R_DrawColumnHoriz; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; - } - - void R_InitShadeMaps() - { - int i, j; - // set up shading tables for shaded columns - // 16 colormap sets, progressing from full alpha to minimum visible alpha - - uint8_t *table = shadetables; - - // Full alpha - for (i = 0; i < 16; ++i) - { - ShadeFakeColormap[i].Color = ~0u; - ShadeFakeColormap[i].Desaturate = ~0u; - ShadeFakeColormap[i].Next = NULL; - ShadeFakeColormap[i].Maps = table; - - for (j = 0; j < NUMCOLORMAPS; ++j) - { - int a = (NUMCOLORMAPS - j) * 256 / NUMCOLORMAPS * (16 - i); - for (int k = 0; k < 256; ++k) - { - uint8_t v = (((k + 2) * a) + 256) >> 14; - table[k] = MIN(v, 64); - } - table += 256; - } - } - for (i = 0; i < NUMCOLORMAPS * 16 * 256; ++i) - { - assert(shadetables[i] <= 64); - } - - // Set up a guaranteed identity map - for (i = 0; i < 256; ++i) - { - identitymap[i] = i; - } - } - - void R_InitFuzzTable(int fuzzoff) - { - /* - FUZZOFF,-FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, - FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, - FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF, - FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, - FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF, - FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF, - FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF - */ - - static const int8_t fuzzinit[FUZZTABLE] = { - 1,-1, 1,-1, 1, 1,-1, - 1, 1,-1, 1, 1, 1,-1, - 1, 1, 1,-1,-1,-1,-1, - 1,-1,-1, 1, 1, 1, 1,-1, - 1,-1, 1, 1,-1,-1, 1, - 1,-1,-1,-1,-1, 1, 1, - 1, 1,-1, 1, 1,-1, 1 - }; - - for (int i = 0; i < FUZZTABLE; i++) - { - fuzzoffset[i] = fuzzinit[i] * fuzzoff; - } - } - - namespace - { - bool R_SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) - { - using namespace drawerargs; - - // r_drawtrans is a seriously bad thing to turn off. I wonder if I should - // just remove it completely. - if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) - { - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillColumn; - hcolfunc_post1 = rt_copy1col; - hcolfunc_post4 = rt_copy4cols; - } - else if (dc_translation == NULL) - { - colfunc = basecolfunc; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; - } - else - { - colfunc = transcolfunc; - hcolfunc_post1 = rt_tlate1col; - hcolfunc_post4 = rt_tlate4cols; - drawer_needs_pal_input = true; - } - return true; - } - if (flags & STYLEF_InvertSource) - { - dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; - dc_srcalpha = fglevel; - dc_destalpha = bglevel; - } - else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) - { - dc_srcblend = Col2RGB8[fglevel >> 10]; - dc_destblend = Col2RGB8[bglevel >> 10]; - dc_srcalpha = fglevel; - dc_destalpha = bglevel; - } - else - { - dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; - dc_srcalpha = fglevel; - dc_destalpha = bglevel; - } - switch (op) - { - case STYLEOP_Add: - if (fglevel == 0 && bglevel == FRACUNIT) - { - return false; - } - if (fglevel + bglevel <= FRACUNIT) - { // Colors won't overflow when added - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillAddColumn; - hcolfunc_post1 = rt_add1col; - hcolfunc_post4 = rt_add4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawAddColumn; - hcolfunc_post1 = rt_add1col; - hcolfunc_post4 = rt_add4cols; - } - else - { - colfunc = R_DrawTlatedAddColumn; - hcolfunc_post1 = rt_tlateadd1col; - hcolfunc_post4 = rt_tlateadd4cols; - drawer_needs_pal_input = true; - } - } - else - { // Colors might overflow when added - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillAddClampColumn; - hcolfunc_post1 = rt_addclamp1col; - hcolfunc_post4 = rt_addclamp4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawAddClampColumn; - hcolfunc_post1 = rt_addclamp1col; - hcolfunc_post4 = rt_addclamp4cols; - } - else - { - colfunc = R_DrawAddClampTranslatedColumn; - hcolfunc_post1 = rt_tlateaddclamp1col; - hcolfunc_post4 = rt_tlateaddclamp4cols; - drawer_needs_pal_input = true; - } - } - return true; - - case STYLEOP_Sub: - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; - } - else - { - colfunc = R_DrawSubClampTranslatedColumn; - hcolfunc_post1 = rt_tlatesubclamp1col; - hcolfunc_post4 = rt_tlatesubclamp4cols; - drawer_needs_pal_input = true; - } - return true; - - case STYLEOP_RevSub: - if (fglevel == 0 && bglevel == FRACUNIT) - { - return false; - } - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillRevSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawRevSubClampColumn; - hcolfunc_post1 = rt_revsubclamp1col; - hcolfunc_post4 = rt_revsubclamp4cols; - } - else - { - colfunc = R_DrawRevSubClampTranslatedColumn; - hcolfunc_post1 = rt_tlaterevsubclamp1col; - hcolfunc_post4 = rt_tlaterevsubclamp4cols; - drawer_needs_pal_input = true; - } - return true; - - default: - return false; - } - } - - fixed_t GetAlpha(int type, fixed_t alpha) - { - switch (type) - { - case STYLEALPHA_Zero: return 0; - case STYLEALPHA_One: return OPAQUE; - case STYLEALPHA_Src: return alpha; - case STYLEALPHA_InvSrc: return OPAQUE - alpha; - default: return 0; - } - } - - FDynamicColormap *basecolormapsave; - } - - ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color) - { - using namespace drawerargs; - - fixed_t fglevel, bglevel; - - drawer_needs_pal_input = false; - - style.CheckFuzz(); - - if (style.BlendOp == STYLEOP_Shadow) - { - style = LegacyRenderStyles[STYLE_TranslucentStencil]; - alpha = TRANSLUC33; - color = 0; - } - - if (style.Flags & STYLEF_TransSoulsAlpha) - { - alpha = fixed_t(transsouls * OPAQUE); - } - else if (style.Flags & STYLEF_Alpha1) - { - alpha = FRACUNIT; - } - else - { - alpha = clamp(alpha, 0, OPAQUE); - } - - if (translation != -1) - { - dc_translation = NULL; - if (translation != 0) - { - FRemapTable *table = TranslationToTable(translation); - if (table != NULL && !table->Inactive) - { - if (r_swtruecolor) - dc_translation = (uint8_t*)table->Palette; - else - dc_translation = table->Remap; - } - } - } - basecolormapsave = basecolormap; - hcolfunc_pre = R_DrawColumnHoriz; - - // Check for special modes - if (style.BlendOp == STYLEOP_Fuzz) - { - colfunc = fuzzcolfunc; - return DoDraw0; - } - else if (style == LegacyRenderStyles[STYLE_Shaded]) - { - // Shaded drawer only gets 16 levels of alpha because it saves memory. - if ((alpha >>= 12) == 0) - return DontDraw; - colfunc = R_DrawShadedColumn; - hcolfunc_post1 = rt_shaded1col; - hcolfunc_post4 = rt_shaded4cols; - drawer_needs_pal_input = true; - dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; - basecolormap = &ShadeFakeColormap[16 - alpha]; - if (fixedlightlev >= 0 && fixedcolormap == NULL) - { - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - } - else - { - R_SetColorMapLight(basecolormap, 0, 0); - } - bool active_columnmethod = r_columnmethod && !r_swtruecolor; - return active_columnmethod ? DoDraw1 : DoDraw0; - } - - fglevel = GetAlpha(style.SrcAlpha, alpha); - bglevel = GetAlpha(style.DestAlpha, alpha); - - if (style.Flags & STYLEF_ColorIsFixed) - { - uint32_t x = fglevel >> 10; - uint32_t r = RPART(color); - uint32_t g = GPART(color); - uint32_t b = BPART(color); - // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. - dc_color = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; - if (style.Flags & STYLEF_InvertSource) - { - r = 255 - r; - g = 255 - g; - b = 255 - b; - } - uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); - dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; - // dc_srccolor is used by the R_Fill* routines. It is premultiplied - // with the alpha. - dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; - hcolfunc_pre = R_FillColumnHoriz; - R_SetColorMapLight(&identitycolormap, 0, 0); - } - - if (!R_SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) - { - return DontDraw; - } - bool active_columnmethod = r_columnmethod && !r_swtruecolor; - return active_columnmethod ? DoDraw1 : DoDraw0; - } - - ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color) - { - return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); - } - - void R_FinishSetPatchStyle() - { - basecolormap = basecolormapsave; - } - - const uint8_t *R_GetColumn(FTexture *tex, int col) - { - int width; - - // If the texture's width isn't a power of 2, then we need to make it a - // positive offset for proper clamping. - if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) - { - col = width + (col % width); - } - - if (r_swtruecolor) - return (const uint8_t *)tex->GetColumnBgra(col, nullptr); - else - return tex->GetColumn(col, nullptr); - } - - bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()) - { - if (colfunc == R_DrawAddColumn) - { - *tmvline1 = tmvline1_add; - *tmvline4 = tmvline4_add; - return true; - } - if (colfunc == R_DrawAddClampColumn) - { - *tmvline1 = tmvline1_addclamp; - *tmvline4 = tmvline4_addclamp; - return true; - } - if (colfunc == R_DrawSubClampColumn) - { - *tmvline1 = tmvline1_subclamp; - *tmvline4 = tmvline4_subclamp; - return true; - } - if (colfunc == R_DrawRevSubClampColumn) - { - *tmvline1 = tmvline1_revsubclamp; - *tmvline4 = tmvline4_revsubclamp; - return true; - } - return false; - } - - void setupvline(int fracbits) - { - drawerargs::vlinebits = fracbits; - } - - void setupmvline(int fracbits) - { - drawerargs::mvlinebits = fracbits; - } - - void setuptmvline(int fracbits) - { - drawerargs::tmvlinebits = fracbits; - } - - void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade) - { - using namespace drawerargs; - - dc_fcolormap = base_colormap; - if (r_swtruecolor) - { - dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; - dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; - dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; - dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; - dc_shade_constants.fade_red = dc_fcolormap->Fade.r; - dc_shade_constants.fade_green = dc_fcolormap->Fade.g; - dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; - dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; - dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; - dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); - dc_colormap = base_colormap->Maps; - dc_light = LIGHTSCALE(light, shade); - } - else - { - dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); - } - } - - void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade) - { - using namespace drawerargs; - - ds_fcolormap = base_colormap; - if (r_swtruecolor) - { - ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; - ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; - ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; - ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; - ds_shade_constants.fade_red = ds_fcolormap->Fade.r; - ds_shade_constants.fade_green = ds_fcolormap->Fade.g; - ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; - ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; - ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; - ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); - ds_colormap = base_colormap->Maps; - ds_light = LIGHTSCALE(light, shade); - } - else - { - ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); - } - } - - void R_SetTranslationMap(lighttable_t *translation) - { - using namespace drawerargs; - - if (r_swtruecolor) - { - dc_fcolormap = nullptr; - dc_colormap = nullptr; - dc_translation = translation; - dc_shade_constants.light_red = 256; - dc_shade_constants.light_green = 256; - dc_shade_constants.light_blue = 256; - dc_shade_constants.light_alpha = 256; - dc_shade_constants.fade_red = 0; - dc_shade_constants.fade_green = 0; - dc_shade_constants.fade_blue = 0; - dc_shade_constants.fade_alpha = 256; - dc_shade_constants.desaturate = 0; - dc_shade_constants.simple_shade = true; - dc_light = 0; - } - else - { - dc_fcolormap = nullptr; - dc_colormap = translation; - } - } - - void rt_initcols(uint8_t *buffer) - { - using namespace drawerargs; - - for (int y = 3; y >= 0; y--) - horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(buffer); - else - DrawerCommandQueue::QueueCommand(buffer); - } - - void rt_span_coverage(int x, int start, int stop) - { - using namespace drawerargs; - - unsigned int **tspan = &dc_ctspan[x & 3]; - (*tspan)[0] = start; - (*tspan)[1] = stop; - *tspan += 2; - } - - void rt_flip_posts() - { - using namespace drawerargs; - - unsigned int *front = horizspan[dc_x & 3]; - unsigned int *back = dc_ctspan[dc_x & 3] - 2; - - while (front < back) - { - swapvalues(front[0], back[0]); - swapvalues(front[1], back[1]); - front += 2; - back -= 2; - } - } - - void rt_draw4cols(int sx) - { - using namespace drawerargs; - - int x, bad; - unsigned int maxtop, minbot, minnexttop; - - // Place a dummy "span" in each column. These don't get - // drawn. They're just here to avoid special cases in the - // max/min calculations below. - for (x = 0; x < 4; ++x) - { - dc_ctspan[x][0] = screen->GetHeight()+1; - dc_ctspan[x][1] = screen->GetHeight(); - } - - for (;;) - { - // If a column is out of spans, mark it as such - bad = 0; - minnexttop = 0xffffffff; - for (x = 0; x < 4; ++x) - { - if (horizspan[x] >= dc_ctspan[x]) - { - bad |= 1 << x; - } - else if ((horizspan[x]+2)[0] < minnexttop) - { - minnexttop = (horizspan[x]+2)[0]; - } - } - // Once all columns are out of spans, we're done - if (bad == 15) - { - return; - } - - // Find the largest shared area for the spans in each column - maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]), - MAX (horizspan[2][0], horizspan[3][0])); - minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]), - MIN (horizspan[2][1], horizspan[3][1])); - - // If there is no shared area with these spans, draw each span - // individually and advance to the next spans until we reach a shared area. - // However, only draw spans down to the highest span in the next set of - // spans. If we allow the entire height of a span to be drawn, it could - // prevent any more shared areas from being drawn in these four columns. - // - // Example: Suppose we have the following arrangement: - // A CD - // A CD - // B D - // B D - // aB D - // aBcD - // aBcD - // aBc - // - // If we draw the entire height of the spans, we end up drawing this first: - // A CD - // A CD - // B D - // B D - // B D - // B D - // B D - // B D - // B - // - // This leaves only the "a" and "c" columns to be drawn, and they are not - // part of a shared area, but if we can include B and D with them, we can - // get a shared area. So we cut off everything in the first set just - // above the "a" column and end up drawing this first: - // A CD - // A CD - // B D - // B D - // - // Then the next time through, we have the following arrangement with an - // easily shared area to draw: - // aB D - // aBcD - // aBcD - // aBc - if (bad != 0 || maxtop > minbot) - { - int drawcount = 0; - for (x = 0; x < 4; ++x) - { - if (!(bad & 1)) - { - if (horizspan[x][1] < minnexttop) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], horizspan[x][1]); - horizspan[x] += 2; - drawcount++; - } - else if (minnexttop > horizspan[x][0]) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], minnexttop-1); - horizspan[x][0] = minnexttop; - drawcount++; - } - } - bad >>= 1; - } - // Drawcount *should* always be non-zero. The reality is that some situations - // can make this not true. Unfortunately, I'm not sure what those situations are. - if (drawcount == 0) - { - return; - } - continue; - } - - // Draw any span fragments above the shared area. - for (x = 0; x < 4; ++x) - { - if (maxtop > horizspan[x][0]) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], maxtop-1); - } - } - - // Draw the shared area. - hcolfunc_post4 (sx, maxtop, minbot); - - // For each column, if part of the span is past the shared area, - // set its top to just below the shared area. Otherwise, advance - // to the next span in that column. - for (x = 0; x < 4; ++x) - { - if (minbot < horizspan[x][1]) - { - horizspan[x][0] = minbot+1; - } - else - { - horizspan[x] += 2; - } - } - } - } - - void R_SetupSpanBits(FTexture *tex) - { - using namespace drawerargs; - - tex->GetWidth(); - ds_xbits = tex->WidthBits; - ds_ybits = tex->HeightBits; - if ((1 << ds_xbits) > tex->GetWidth()) - { - ds_xbits--; - } - if ((1 << ds_ybits) > tex->GetHeight()) - { - ds_ybits--; - } - } - - void R_SetSpanColormap(FDynamicColormap *colormap, int shade) - { - R_SetDSColorMapLight(colormap, 0, shade); - } - - void R_SetSpanSource(FTexture *tex) - { - using namespace drawerargs; - - ds_source = r_swtruecolor ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); - ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; - } - - ///////////////////////////////////////////////////////////////////////// - - void R_FillColumnHoriz() - { - using namespace drawerargs; - - if (dc_count <= 0) - return; - - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawColumnHoriz() - { - using namespace drawerargs; - - if (dc_count <= 0) - return; - - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - - if (r_swtruecolor) - { - if (drawer_needs_pal_input) - DrawerCommandQueue::QueueCommand>(); - else - DrawerCommandQueue::QueueCommand>(); - } - else - { - DrawerCommandQueue::QueueCommand(); - } - } - - // Copies one span at hx to the screen at sx. - void rt_copy1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Copies all four spans to the screen starting at sx. - void rt_copy4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - // To do: we could do this with SSE using __m128i - rt_copy1col(0, sx, yl, yh); - rt_copy1col(1, sx + 1, yl, yh); - rt_copy1col(2, sx + 2, yl, yh); - rt_copy1col(3, sx + 3, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - } - - // Maps one span at hx to the screen at sx. - void rt_map1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Maps all four spans to the screen starting at sx. - void rt_map4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates one span at hx to the screen at sx. - void rt_tlate1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_map1col(hx, sx, yl, yh); - } - } - - // Translates all four spans to the screen starting at sx. - void rt_tlate4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_map4cols(sx, yl, yh); - } - } - - // Adds one span at hx to the screen at sx without clamping. - void rt_add1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Adds all four spans to the screen starting at sx without clamping. - void rt_add4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates and adds one span at hx to the screen at sx without clamping. - void rt_tlateadd1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_add1col(hx, sx, yl, yh); - } - } - - // Translates and adds all four spans to the screen starting at sx without clamping. - void rt_tlateadd4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_add4cols(sx, yl, yh); - } - } - - // Shades one span at hx to the screen at sx. - void rt_shaded1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Shades all four spans to the screen starting at sx. - void rt_shaded4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Adds one span at hx to the screen at sx with clamping. - void rt_addclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Adds all four spans to the screen starting at sx with clamping. - void rt_addclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates and adds one span at hx to the screen at sx with clamping. - void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_addclamp1col(hx, sx, yl, yh); - } - } - - // Translates and adds all four spans to the screen starting at sx with clamping. - void rt_tlateaddclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_addclamp4cols(sx, yl, yh); - } - } - - // Subtracts one span at hx to the screen at sx with clamping. - void rt_subclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Subtracts all four spans to the screen starting at sx with clamping. - void rt_subclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates and subtracts one span at hx to the screen at sx with clamping. - void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_subclamp1col(hx, sx, yl, yh); - } - } - - // Translates and subtracts all four spans to the screen starting at sx with clamping. - void rt_tlatesubclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_subclamp4cols(sx, yl, yh); - } - } - - // Subtracts one span at hx from the screen at sx with clamping. - void rt_revsubclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Subtracts all four spans from the screen starting at sx with clamping. - void rt_revsubclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates and subtracts one span at hx from the screen at sx with clamping. - void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_revsubclamp1col(hx, sx, yl, yh); - } - } - - // Translates and subtracts all four spans from the screen starting at sx with clamping. - void rt_tlaterevsubclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_revsubclamp4cols(sx, yl, yh); - } - } - - uint32_t vlinec1() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - return dc_texturefrac + dc_count * dc_iscale; - } - - void vlinec4() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; - } - - uint32_t mvlinec1() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - return dc_texturefrac + dc_count * dc_iscale; - } - - void mvlinec4() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; - } - - fixed_t tmvline1_add() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - return dc_texturefrac + dc_count * dc_iscale; - } - - void tmvline4_add() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; - } - - fixed_t tmvline1_addclamp() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - return dc_texturefrac + dc_count * dc_iscale; - } - - void tmvline4_addclamp() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; - } - - fixed_t tmvline1_subclamp() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - return dc_texturefrac + dc_count * dc_iscale; - } - - void tmvline4_subclamp() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; - } - - fixed_t tmvline1_revsubclamp() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - return dc_texturefrac + dc_count * dc_iscale; - } - - void tmvline4_revsubclamp() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; - } - - void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - else - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - } - - void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - else - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - } - - void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - else - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - } - - void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - else - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - } - - void R_DrawColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_FillColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_FillAddColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_FillAddClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_FillSubClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_FillRevSubClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawFuzzColumn() - { - using namespace drawerargs; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - dc_yl = MAX(dc_yl, 1); - dc_yh = MIN(dc_yh, fuzzviewheight); - if (dc_yl <= dc_yh) - fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; - } - - void R_DrawAddColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawTranslatedColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawTlatedAddColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawShadedColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawAddClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawAddClampTranslatedColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSubClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSubClampTranslatedColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawRevSubClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawRevSubClampTranslatedColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSpan() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSpanMasked() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSpanTranslucent() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSpanMaskedTranslucent() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSpanAddClamp() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSpanMaskedAddClamp() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_FillSpan() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); - else - DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); - } - - void R_DrawColoredSpan(int y, int x1, int x2) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(y, x1, x2); - else - DrawerCommandQueue::QueueCommand(y, x1, x2); - } - - namespace - { - ShadeConstants slab_rgba_shade_constants; - const uint8_t *slab_rgba_colormap; - fixed_t slab_rgba_light; - } - - void R_SetupDrawSlab(FSWColormap *base_colormap, float light, int shade) - { - slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; - slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; - slab_rgba_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; - slab_rgba_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; - slab_rgba_shade_constants.fade_red = base_colormap->Fade.r; - slab_rgba_shade_constants.fade_green = base_colormap->Fade.g; - slab_rgba_shade_constants.fade_blue = base_colormap->Fade.b; - slab_rgba_shade_constants.fade_alpha = base_colormap->Fade.a; - slab_rgba_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; - slab_rgba_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); - slab_rgba_colormap = base_colormap->Maps; - slab_rgba_light = LIGHTSCALE(light, shade); - } - - void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); - else - DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_colormap); - } - - void R_DrawFogBoundarySection(int y, int y2, int x1) - { - for (; y < y2; ++y) - { - int x2 = spanend[y]; - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(y, x1, x2); - else - DrawerCommandQueue::QueueCommand(y, x1, x2); - } - } - - void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) - { - // This is essentially the same as R_MapVisPlane but with an extra step - // to create new horizontal spans whenever the light changes enough that - // we need to use a new colormap. - - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2 - x1 - 1); - int x = x2 - 1; - int t2 = uclip[x]; - int b2 = dclip[x]; - int rcolormap = GETPALOOKUP(light, wallshade); - int lcolormap; - uint8_t *basecolormapdata = basecolormap->Maps; - - if (b2 > t2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - - R_SetColorMapLight(basecolormap, (float)light, wallshade); - - uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - for (--x; x >= x1; --x) - { - int t1 = uclip[x]; - int b1 = dclip[x]; - const int xr = x + 1; - int stop; - - light -= rw_lightstep; - lcolormap = GETPALOOKUP(light, wallshade); - if (lcolormap != rcolormap) - { - if (t2 < b2 && rcolormap != 0) - { // Colormap 0 is always the identity map, so rendering it is - // just a waste of time. - R_DrawFogBoundarySection(t2, b2, xr); - } - if (t1 < t2) t2 = t1; - if (b1 > b2) b2 = b1; - if (t2 < b2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - rcolormap = lcolormap; - R_SetColorMapLight(basecolormap, (float)light, wallshade); - fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - } - else - { - if (fake_dc_colormap != basecolormapdata) - { - stop = MIN(t1, b2); - while (t2 < stop) - { - int y = t2++; - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); - else - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); - } - stop = MAX(b1, t2); - while (b2 > stop) - { - int y = --b2; - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); - else - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); - } - } - else - { - t2 = MAX(t2, MIN(t1, b2)); - b2 = MIN(b2, MAX(b1, t2)); - } - - stop = MIN(t2, b1); - while (t1 < stop) - { - spanend[t1++] = x; - } - stop = MAX(b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; - } - } - - t2 = uclip[x]; - b2 = dclip[x]; - } - if (t2 < b2 && rcolormap != 0) - { - R_DrawFogBoundarySection(t2, b2, x1); - } - } - - void R_DrawParticle(vissprite_t *sprite) - { - if (r_swtruecolor) - R_DrawParticle_rgba(sprite); - else - R_DrawParticle_C(sprite); - } -} diff --git a/src/r_draw_tc.h b/src/r_draw_tc.h deleted file mode 100644 index 8c1af58fcb..0000000000 --- a/src/r_draw_tc.h +++ /dev/null @@ -1,239 +0,0 @@ - -#pragma once - -#include "r_defs.h" - -struct FSWColormap; - -EXTERN_CVAR(Bool, r_multithreaded); -EXTERN_CVAR(Bool, r_magfilter); -EXTERN_CVAR(Bool, r_minfilter); -EXTERN_CVAR(Bool, r_mipmap); -EXTERN_CVAR(Float, r_lod_bias); -EXTERN_CVAR(Int, r_drawfuzz); -EXTERN_CVAR(Bool, r_drawtrans); -EXTERN_CVAR(Float, transsouls); -EXTERN_CVAR(Int, r_columnmethod); - -namespace swrenderer -{ - struct vissprite_t; - - struct ShadeConstants - { - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - bool simple_shade; - }; - - extern double dc_texturemid; - - namespace drawerargs - { - extern int dc_pitch; - extern lighttable_t *dc_colormap; - extern FSWColormap *dc_fcolormap; - extern ShadeConstants dc_shade_constants; - extern fixed_t dc_light; - extern int dc_x; - extern int dc_yl; - extern int dc_yh; - extern fixed_t dc_iscale; - extern fixed_t dc_texturefrac; - extern uint32_t dc_textureheight; - extern int dc_color; - extern uint32_t dc_srccolor; - extern uint32_t dc_srccolor_bgra; - extern uint32_t *dc_srcblend; - extern uint32_t *dc_destblend; - extern fixed_t dc_srcalpha; - extern fixed_t dc_destalpha; - extern const uint8_t *dc_source; - extern const uint8_t *dc_source2; - extern uint32_t dc_texturefracx; - extern uint8_t *dc_translation; - extern uint8_t *dc_dest; - extern uint8_t *dc_destorg; - extern int dc_destheight; - extern int dc_count; - - extern bool drawer_needs_pal_input; - - extern uint32_t vplce[4]; - extern uint32_t vince[4]; - extern uint8_t *palookupoffse[4]; - extern fixed_t palookuplight[4]; - extern const uint8_t *bufplce[4]; - extern const uint8_t *bufplce2[4]; - extern uint32_t buftexturefracx[4]; - extern uint32_t bufheight[4]; - extern int vlinebits; - extern int mvlinebits; - extern int tmvlinebits; - - extern int ds_y; - extern int ds_x1; - extern int ds_x2; - extern lighttable_t * ds_colormap; - extern FSWColormap *ds_fcolormap; - extern ShadeConstants ds_shade_constants; - extern dsfixed_t ds_light; - extern dsfixed_t ds_xfrac; - extern dsfixed_t ds_yfrac; - extern dsfixed_t ds_xstep; - extern dsfixed_t ds_ystep; - extern int ds_xbits; - extern int ds_ybits; - extern fixed_t ds_alpha; - extern double ds_lod; - extern const uint8_t *ds_source; - extern bool ds_source_mipmapped; - extern int ds_color; - - extern unsigned int dc_tspans[4][MAXHEIGHT]; - extern unsigned int *dc_ctspan[4]; - extern unsigned int *horizspan[4]; - } - - extern int ylookup[MAXHEIGHT]; - extern uint8_t shadetables[/*NUMCOLORMAPS*16*256*/]; - extern FDynamicColormap ShadeFakeColormap[16]; - extern uint8_t identitymap[256]; - extern FDynamicColormap identitycolormap; - - // Spectre/Invisibility. - #define FUZZTABLE 50 - extern int fuzzoffset[FUZZTABLE + 1]; - extern int fuzzpos; - extern int fuzzviewheight; - - extern bool r_swtruecolor; - - void R_InitColumnDrawers(); - void R_InitShadeMaps(); - void R_InitFuzzTable(int fuzzoff); - - enum ESPSResult - { - DontDraw, // not useful to draw this - DoDraw0, // draw this as if r_columnmethod is 0 - DoDraw1, // draw this as if r_columnmethod is 1 - }; - - ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); - ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color); - void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade - bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()); - - const uint8_t *R_GetColumn(FTexture *tex, int col); - void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); - void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); - void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); - - void rt_initcols(uint8_t *buffer = nullptr); - void rt_span_coverage(int x, int start, int stop); - void rt_draw4cols(int sx); - void rt_flip_posts(); - void rt_copy1col(int hx, int sx, int yl, int yh); - void rt_copy4cols(int sx, int yl, int yh); - void rt_shaded1col(int hx, int sx, int yl, int yh); - void rt_shaded4cols(int sx, int yl, int yh); - void rt_map1col(int hx, int sx, int yl, int yh); - void rt_add1col(int hx, int sx, int yl, int yh); - void rt_addclamp1col(int hx, int sx, int yl, int yh); - void rt_subclamp1col(int hx, int sx, int yl, int yh); - void rt_revsubclamp1col(int hx, int sx, int yl, int yh); - void rt_tlate1col(int hx, int sx, int yl, int yh); - void rt_tlateadd1col(int hx, int sx, int yl, int yh); - void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh); - void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh); - void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh); - void rt_map4cols(int sx, int yl, int yh); - void rt_add4cols(int sx, int yl, int yh); - void rt_addclamp4cols(int sx, int yl, int yh); - void rt_subclamp4cols(int sx, int yl, int yh); - void rt_revsubclamp4cols(int sx, int yl, int yh); - void rt_tlate4cols(int sx, int yl, int yh); - void rt_tlateadd4cols(int sx, int yl, int yh); - void rt_tlateaddclamp4cols(int sx, int yl, int yh); - void rt_tlatesubclamp4cols(int sx, int yl, int yh); - void rt_tlaterevsubclamp4cols(int sx, int yl, int yh); - void R_DrawColumnHoriz(); - void R_DrawColumn(); - void R_DrawFuzzColumn(); - void R_DrawTranslatedColumn(); - void R_DrawShadedColumn(); - void R_FillColumn(); - void R_FillAddColumn(); - void R_FillAddClampColumn(); - void R_FillSubClampColumn(); - void R_FillRevSubClampColumn(); - void R_DrawAddColumn(); - void R_DrawTlatedAddColumn(); - void R_DrawAddClampColumn(); - void R_DrawAddClampTranslatedColumn(); - void R_DrawSubClampColumn(); - void R_DrawSubClampTranslatedColumn(); - void R_DrawRevSubClampColumn(); - void R_DrawRevSubClampTranslatedColumn(); - void R_DrawSpan(); - void R_DrawSpanMasked(); - void R_DrawSpanTranslucent(); - void R_DrawSpanMaskedTranslucent(); - void R_DrawSpanAddClamp(); - void R_DrawSpanMaskedAddClamp(); - void R_FillSpan(); - void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); - void R_DrawColoredSpan(int y, int x1, int x2); - void R_SetupDrawSlab(FSWColormap *base_colormap, float light, int shade); - void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p); - void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); - uint32_t vlinec1(); - void vlinec4(); - uint32_t mvlinec1(); - void mvlinec4(); - fixed_t tmvline1_add(); - void tmvline4_add(); - fixed_t tmvline1_addclamp(); - void tmvline4_addclamp(); - fixed_t tmvline1_subclamp(); - void tmvline4_subclamp(); - fixed_t tmvline1_revsubclamp(); - void tmvline4_revsubclamp(); - void R_FillColumnHoriz(); - void R_FillSpan(); - - inline uint32_t dovline1() { return vlinec1(); } - inline void dovline4() { vlinec4(); } - inline uint32_t domvline1() { return mvlinec1(); } - inline void domvline4() { mvlinec4(); } - - void setupvline(int fracbits); - void setupmvline(int fracbits); - void setuptmvline(int fracbits); - - void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); - void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); - void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); - void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); - - // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) - void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade); - void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); - void R_SetTranslationMap(lighttable_t *translation); - - void R_SetupSpanBits(FTexture *tex); - void R_SetSpanColormap(FDynamicColormap *colormap, int shade); - void R_SetSpanSource(FTexture *tex); - - void R_MapTiltedPlane(int y, int x1); - void R_MapColoredPlane(int y, int x1); - void R_DrawParticle(vissprite_t *); -} From 480b22c50eca98383fed42a938cb49d51d6af20d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 7 Dec 2016 18:59:48 +0100 Subject: [PATCH 461/912] Add more checks for FTexture::TEX_Null --- src/r_poly_sprite.cpp | 2 +- src/r_poly_wallsprite.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index c17bc5909f..e5d8e0ce56 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -74,7 +74,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla bool flipTextureX = false; FTexture *tex = GetSpriteTexture(thing, flipTextureX); - if (tex == nullptr) + if (tex == nullptr || tex->UseType == FTexture::TEX_Null) return; DVector2 spriteScale = thing->Scale; diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp index 0521fb06e3..e7fe267b8b 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/r_poly_wallsprite.cpp @@ -38,7 +38,7 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, const Vec4f &cli bool flipTextureX = false; FTexture *tex = RenderPolySprite::GetSpriteTexture(thing, flipTextureX); - if (tex == nullptr) + if (tex == nullptr || tex->UseType == FTexture::TEX_Null) return; DVector2 spriteScale = thing->Scale; From 066158be6993645d754a1f0e267ff015ab3fd102 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 7 Dec 2016 22:26:18 +0100 Subject: [PATCH 462/912] Fix mirror --- src/r_poly_portal.cpp | 34 +++++++++++++++++++++++----------- src/r_poly_portal.h | 2 +- src/r_poly_scene.cpp | 9 ++++++++- src/r_poly_triangle.cpp | 14 ++++++++++++-- src/r_poly_triangle.h | 4 +++- 5 files changed, 47 insertions(+), 16 deletions(-) diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 1bb6007795..96df9daf09 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -162,11 +162,14 @@ void PolyDrawLinePortal::Render(int portalDepth) TriMatrix::scale(1.0f, glset.pixelstretch, 1.0f) * TriMatrix::swapYZ() * TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); + if (Mirror) + worldToView = TriMatrix::scale(-1.0f, 1.0f, 1.0f) * worldToView; TriMatrix worldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; // Calculate plane clipping - DVector2 planePos = Portal->mDestination->v1->fPos(); - DVector2 planeNormal = (Portal->mDestination->v2->fPos() - Portal->mDestination->v1->fPos()).Rotated90CW(); + line_t *clipLine = Portal ? Portal->mDestination : Mirror; + DVector2 planePos = clipLine->v1->fPos(); + DVector2 planeNormal = (clipLine->v2->fPos() - clipLine->v1->fPos()).Rotated90CW(); planeNormal.MakeUnit(); double planeD = -(planeNormal | (planePos + planeNormal * 0.001)); Vec4f portalPlane((float)planeNormal.X, (float)planeNormal.Y, 0.0f, (float)planeD); @@ -191,7 +194,7 @@ void PolyDrawLinePortal::SaveGlobals() savedangle = ViewAngle; savedcamera = camera; savedsector = viewsector; - savedvisibility = camera ? camera->renderflags & RF_INVISIBLE : ActorRenderFlags::FromInt(0); + savedinvisibility = camera ? (camera->renderflags & RF_INVISIBLE) == RF_INVISIBLE : false; savedViewPath[0] = ViewPath[0]; savedViewPath[1] = ViewPath[1]; @@ -229,6 +232,9 @@ void PolyDrawLinePortal::SaveGlobals() ViewPos.Y = (y1 + r * dy) * 2 - y; } ViewAngle = Mirror->Delta().Angle() * 2 - startang; + + if (camera) + camera->renderflags &= ~RF_INVISIBLE; } else { @@ -255,22 +261,25 @@ void PolyDrawLinePortal::SaveGlobals() } } } - - /*if (Portal->mirror) - { - if (MirrorFlags & RF_XFLIP) MirrorFlags &= ~RF_XFLIP; - else MirrorFlags |= RF_XFLIP; - }*/ } - camera = nullptr; + //camera = nullptr; //viewsector = R_PointInSubsector(ViewPos)->sector; R_SetViewAngle(); + + if (Mirror) + PolyTriangleDrawer::toggle_mirror(); } void PolyDrawLinePortal::RestoreGlobals() { - if (!savedvisibility && camera) camera->renderflags &= ~RF_INVISIBLE; + if (camera) + { + if (savedinvisibility) + camera->renderflags |= RF_INVISIBLE; + else + camera->renderflags &= ~RF_INVISIBLE; + } camera = savedcamera; viewsector = savedsector; ViewPos = savedpos; @@ -279,4 +288,7 @@ void PolyDrawLinePortal::RestoreGlobals() ViewPath[0] = savedViewPath[0]; ViewPath[1] = savedViewPath[1]; R_SetViewAngle(); + + if (Mirror) + PolyTriangleDrawer::toggle_mirror(); } diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index 7835bb042f..6cad98ff6a 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -85,6 +85,6 @@ private: DAngle savedangle; AActor *savedcamera; sector_t *savedsector; - ActorRenderFlags savedvisibility; + bool savedinvisibility; DVector3 savedViewPath[2]; }; diff --git a/src/r_poly_scene.cpp b/src/r_poly_scene.cpp index 7304000a67..ef2ef5f1b4 100644 --- a/src/r_poly_scene.cpp +++ b/src/r_poly_scene.cpp @@ -121,9 +121,16 @@ void RenderPolyScene::RenderSubsector(subsector_t *sub) void RenderPolyScene::RenderSprite(AActor *thing, double sortDistance, const DVector2 &left, const DVector2 &right) { if (numnodes == 0) - RenderSprite(thing, sortDistance, left, right, 0.0, 1.0, subsectors); + { + subsector_t *sub = subsectors; + auto it = SubsectorDepths.find(sub); + if (it != SubsectorDepths.end()) + TranslucentObjects.push_back({ thing, sub, it->second, sortDistance, 0.0f, 1.0f }); + } else + { RenderSprite(thing, sortDistance, left, right, 0.0, 1.0, nodes + numnodes - 1); // The head node is the last node output. + } } void RenderPolyScene::RenderSprite(AActor *thing, double sortDistance, DVector2 left, DVector2 right, double t1, double t2, void *node) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 4b6e1c344a..45f244aabd 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -45,6 +45,7 @@ int PolyTriangleDrawer::dest_width; int PolyTriangleDrawer::dest_height; uint8_t *PolyTriangleDrawer::dest; bool PolyTriangleDrawer::dest_bgra; +bool PolyTriangleDrawer::mirror; void PolyTriangleDrawer::set_viewport(int x, int y, int width, int height, DCanvas *canvas) { @@ -66,11 +67,18 @@ void PolyTriangleDrawer::set_viewport(int x, int y, int width, int height, DCanv dest += (offsetx + offsety * dest_pitch) * pixelsize; dest_width = clamp(viewport_x + viewport_width, 0, dest_width - offsetx); dest_height = clamp(viewport_y + viewport_height, 0, dest_height - offsety); + + mirror = false; +} + +void PolyTriangleDrawer::toggle_mirror() +{ + mirror = !mirror; } void PolyTriangleDrawer::draw(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode) { - DrawerCommandQueue::QueueCommand(args, variant, blendmode); + DrawerCommandQueue::QueueCommand(args, variant, blendmode, mirror); } void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVariant variant, TriBlendMode blendmode, WorkerThreadData *thread) @@ -328,9 +336,11 @@ void PolyTriangleDrawer::clipedge(const ShadedTriVertex *verts, TriVertex *clipp ///////////////////////////////////////////////////////////////////////////// -DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode) +DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode, bool mirror) : args(args), variant(variant), blendmode(blendmode) { + if (mirror) + this->args.ccw = !this->args.ccw; } void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 46948310fe..ecf05a9217 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -158,6 +158,7 @@ class PolyTriangleDrawer public: static void set_viewport(int x, int y, int width, int height, DCanvas *canvas); static void draw(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode); + static void toggle_mirror(); private: static ShadedTriVertex shade_vertex(const TriMatrix &objectToClip, const float *clipPlane, const TriVertex &v); @@ -169,6 +170,7 @@ private: static int viewport_x, viewport_y, viewport_width, viewport_height, dest_pitch, dest_width, dest_height; static bool dest_bgra; static uint8_t *dest; + static bool mirror; enum { max_additional_vertices = 16 }; @@ -245,7 +247,7 @@ private: class DrawPolyTrianglesCommand : public DrawerCommand { public: - DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode); + DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode, bool mirror); void Execute(DrawerThread *thread) override; FString DebugInfo() override; From a1bb6e6b234fa673e5ab732bd845f3aa0273ebed Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 8 Dec 2016 10:29:52 +0100 Subject: [PATCH 463/912] Only render line portals once --- src/r_poly.cpp | 12 ++++++++++++ src/r_poly.h | 6 ++++++ src/r_poly_wall.cpp | 28 +++++++++++++++++----------- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 4ea047925e..2e100b3955 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -117,6 +117,8 @@ void PolyRenderer::ClearBuffers() PolyStencilBuffer::Instance()->Clear(RenderTarget->GetWidth(), RenderTarget->GetHeight(), 0); PolySubsectorGBuffer::Instance()->Resize(RenderTarget->GetPitch(), RenderTarget->GetHeight()); NextStencilValue = 0; + SeenLinePortals.clear(); + SeenMirrors.clear(); } void PolyRenderer::SetSceneViewport() @@ -170,3 +172,13 @@ void PolyRenderer::SetupPerspectiveMatrix() WorldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; } + +bool PolyRenderer::InsertSeenLinePortal(FLinePortal *portal) +{ + return SeenLinePortals.insert(portal).second; +} + +bool PolyRenderer::InsertSeenMirror(line_t *mirrorLine) +{ + return SeenMirrors.insert(mirrorLine).second; +} diff --git a/src/r_poly.h b/src/r_poly.h index bc108a8a13..f34106a4b2 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -47,6 +47,9 @@ public: uint32_t GetNextStencilValue() { uint32_t value = NextStencilValue; NextStencilValue += 2; return value; } + bool InsertSeenLinePortal(FLinePortal *portal); + bool InsertSeenMirror(line_t *mirrorLine); + private: void ClearBuffers(); void SetSceneViewport(); @@ -57,4 +60,7 @@ private: PolySkyDome Skydome; RenderPolyPlayerSprites PlayerSprites; uint32_t NextStencilValue = 0; + + std::set SeenLinePortals; + std::set SeenMirrors; }; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index 384529da08..efc9530138 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -40,24 +40,30 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, const Vec4f &clipP PolyDrawLinePortal *polyportal = nullptr; if (line->backsector == nullptr && line->linedef && line->sidedef == line->linedef->sidedef[0] && (line->linedef->special == Line_Mirror && r_drawmirrors)) { - linePortals.push_back(std::make_unique(line->linedef)); - polyportal = linePortals.back().get(); + if (PolyRenderer::Instance()->InsertSeenMirror(line->linedef)) + { + linePortals.push_back(std::make_unique(line->linedef)); + polyportal = linePortals.back().get(); + } } else if (line->linedef && line->linedef->isVisualPortal()) { FLinePortal *portal = line->linedef->getPortal(); - for (auto &p : linePortals) + if (PolyRenderer::Instance()->InsertSeenLinePortal(portal)) { - if (p->Portal == portal) // To do: what other criterias do we need to check for? + for (auto &p : linePortals) { - polyportal = p.get(); - break; + if (p->Portal == portal) // To do: what other criterias do we need to check for? + { + polyportal = p.get(); + break; + } + } + if (!polyportal) + { + linePortals.push_back(std::make_unique(portal)); + polyportal = linePortals.back().get(); } - } - if (!polyportal) - { - linePortals.push_back(std::make_unique(portal)); - polyportal = linePortals.back().get(); } } From dc82c19901819ffb239f13c3e7e9e7f1d3d04b54 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 8 Dec 2016 10:35:51 +0100 Subject: [PATCH 464/912] Fix sprites/translucent stuff being occluded by portal walls --- src/r_poly_scene.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/r_poly_scene.cpp b/src/r_poly_scene.cpp index ef2ef5f1b4..3b03507f17 100644 --- a/src/r_poly_scene.cpp +++ b/src/r_poly_scene.cpp @@ -274,7 +274,7 @@ void RenderPolyScene::RenderTranslucent(int portalDepth) args.objectToClip = &WorldToClip; args.mode = TriangleDrawMode::Fan; args.stenciltestvalue = portal->StencilValue + 1; - args.stencilwritevalue = StencilValue; + args.stencilwritevalue = StencilValue + 1; args.SetClipPlane(PortalPlane.x, PortalPlane.y, PortalPlane.z, PortalPlane.w); for (const auto &verts : portal->Shape) { @@ -295,7 +295,7 @@ void RenderPolyScene::RenderTranslucent(int portalDepth) args.objectToClip = &WorldToClip; args.mode = TriangleDrawMode::Fan; args.stenciltestvalue = portal->StencilValue + 1; - args.stencilwritevalue = StencilValue; + args.stencilwritevalue = StencilValue + 1; args.SetClipPlane(PortalPlane.x, PortalPlane.y, PortalPlane.z, PortalPlane.w); for (const auto &verts : portal->Shape) { From 2854e1fe03f847e39857b14c3efa8b4162c7a525 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 8 Dec 2016 13:07:52 +0100 Subject: [PATCH 465/912] Fix crash reporter for 64 bit (cherry picked from commit 3a0de8b0c714507823edaa24875bc97b39c76ab6) --- src/win32/i_main.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/win32/i_main.cpp b/src/win32/i_main.cpp index db92467bc3..0931aff99a 100644 --- a/src/win32/i_main.cpp +++ b/src/win32/i_main.cpp @@ -1184,6 +1184,11 @@ void CALLBACK ExitFatally (ULONG_PTR dummy) // //========================================================================== +namespace +{ + CONTEXT MainThreadContext; +} + LONG WINAPI CatchAllExceptions (LPEXCEPTION_POINTERS info) { #ifdef _DEBUG @@ -1208,11 +1213,7 @@ LONG WINAPI CatchAllExceptions (LPEXCEPTION_POINTERS info) // Otherwise, put the crashing thread to sleep and signal the main thread to clean up. if (GetCurrentThreadId() == MainThreadID) { -#ifndef _M_X64 - info->ContextRecord->Eip = (DWORD_PTR)ExitFatally; -#else - info->ContextRecord->Rip = (DWORD_PTR)ExitFatally; -#endif + *info->ContextRecord = MainThreadContext; } else { @@ -1304,6 +1305,15 @@ int WINAPI WinMain (HINSTANCE hInstance, HINSTANCE nothing, LPSTR cmdline, int n if (MainThread != INVALID_HANDLE_VALUE) { SetUnhandledExceptionFilter (CatchAllExceptions); + + static bool setJumpResult = false; + RtlCaptureContext(&MainThreadContext); + if (setJumpResult) + { + ExitFatally(0); + return 0; + } + setJumpResult = true; } #endif From f1df400cc7c7256530d00b94de37a23767f82b6b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 9 Dec 2016 03:17:35 +0100 Subject: [PATCH 466/912] Add some portal segment culling and disable sector portals for now --- src/r_poly_cull.cpp | 15 ++++++++-- src/r_poly_cull.h | 5 ++-- src/r_poly_plane.cpp | 66 +++++++++++++++++++++++++++++++++++++++---- src/r_poly_plane.h | 5 ++-- src/r_poly_portal.cpp | 4 ++- src/r_poly_portal.h | 10 +++++++ src/r_poly_scene.cpp | 22 ++++++++++++--- src/r_poly_scene.h | 3 ++ src/r_poly_wall.cpp | 21 ++++++++------ src/r_poly_wall.h | 7 +++-- 10 files changed, 129 insertions(+), 29 deletions(-) diff --git a/src/r_poly_cull.cpp b/src/r_poly_cull.cpp index a688c26444..59b7c0c5a3 100644 --- a/src/r_poly_cull.cpp +++ b/src/r_poly_cull.cpp @@ -30,7 +30,6 @@ void PolyCull::CullScene(const TriMatrix &worldToClip, const Vec4f &portalClipPlane) { - ClearSolidSegments(); PvsSectors.clear(); frustumPlanes = FrustumPlanes(worldToClip); PortalClipPlane = portalClipPlane; @@ -42,8 +41,6 @@ void PolyCull::CullScene(const TriMatrix &worldToClip, const Vec4f &portalClipPl CullSubsector(subsectors); else CullNode(nodes + numnodes - 1); // The head node is the last node output. - - ClearSolidSegments(); } void PolyCull::CullNode(void *node) @@ -109,6 +106,18 @@ void PolyCull::ClearSolidSegments() SolidSegments.push_back({ SolidCullScale , 0x7fff }); } +void PolyCull::InvertSegments() +{ + TempInvertSolidSegments.swap(SolidSegments); + ClearSolidSegments(); + int x = -0x7fff; + for (const auto &segment : TempInvertSolidSegments) + { + MarkSegmentCulled(x, segment.X1 - 1); + x = segment.X2 + 1; + } +} + bool PolyCull::IsSegmentCulled(int x1, int x2) const { x1 = clamp(x1, -0x7ffe, 0x7ffd); diff --git a/src/r_poly_cull.h b/src/r_poly_cull.h index fe3cd9f5d3..4c0cfe314b 100644 --- a/src/r_poly_cull.h +++ b/src/r_poly_cull.h @@ -35,11 +35,13 @@ enum class LineSegmentRange class PolyCull { public: + void ClearSolidSegments(); void CullScene(const TriMatrix &worldToClip, const Vec4f &portalClipPlane); LineSegmentRange GetSegmentRangeForLine(double x1, double y1, double x2, double y2, int &sx1, int &sx2) const; void MarkSegmentCulled(int x1, int x2); bool IsSegmentCulled(int x1, int x2) const; + void InvertSegments(); std::vector PvsSectors; double MaxCeilingHeight = 0.0; @@ -60,9 +62,8 @@ private: // Returns true if some part of the bbox might be visible. bool CheckBBox(float *bspcoord); - void ClearSolidSegments(); - std::vector SolidSegments; + std::vector TempInvertSolidSegments; const int SolidCullScale = 3000; FrustumPlanes frustumPlanes; diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index 24b00af000..c7f011a760 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -32,7 +32,7 @@ EXTERN_CVAR(Int, r_3dfloors) -void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight, std::vector> §orPortals) +void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, const Vec4f &clipPlane, PolyCull &cull, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight, std::vector> §orPortals) { RenderPolyPlane plane; @@ -84,8 +84,8 @@ void RenderPolyPlane::RenderPlanes(const TriMatrix &worldToClip, const Vec4f &cl } } - plane.Render(worldToClip, clipPlane, sub, subsectorDepth, stencilValue, true, skyCeilingHeight, sectorPortals); - plane.Render(worldToClip, clipPlane, sub, subsectorDepth, stencilValue, false, skyFloorHeight, sectorPortals); + plane.Render(worldToClip, clipPlane, cull, sub, subsectorDepth, stencilValue, true, skyCeilingHeight, sectorPortals); + plane.Render(worldToClip, clipPlane, cull, sub, subsectorDepth, stencilValue, false, skyFloorHeight, sectorPortals); } void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, F3DFloor *fakeFloor) @@ -147,9 +147,10 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); } -void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals) +void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, PolyCull &cull, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals) { - FSectorPortal *portal = sub->sector->ValidatePortal(ceiling ? sector_t::ceiling : sector_t::floor); + std::vector portalSegments; + FSectorPortal *portal = nullptr;// sub->sector->ValidatePortal(ceiling ? sector_t::ceiling : sector_t::floor); PolyDrawSectorPortal *polyportal = nullptr; if (portal && (portal->mFlags & PORTSF_INSKYBOX) == PORTSF_INSKYBOX) // Do not recurse into portals we already recursed into portal = nullptr; @@ -157,7 +158,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan { for (auto &p : sectorPortals) { - if (p->Portal == portal) // To do: what other criterias do we need to check for? + if (p->Portal == portal) // To do: what other criteria do we need to check for? { polyportal = p.get(); break; @@ -168,6 +169,54 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan sectorPortals.push_back(std::make_unique(portal, ceiling)); polyportal = sectorPortals.back().get(); } + + // Calculate portal clipping + + DVector2 v; + bool inside = true; + double vdist = 1.0e10; + + portalSegments.reserve(sub->numlines); + for (uint32_t i = 0; i < sub->numlines; i++) + { + seg_t *line = &sub->firstline[i]; + + DVector2 pt1 = line->v1->fPos() - ViewPos; + DVector2 pt2 = line->v2->fPos() - ViewPos; + if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) + inside = false; + + double dist = pt1.LengthSquared(); + if (dist < vdist) + { + v = line->v1->fPos(); + vdist = dist; + } + dist = pt2.LengthSquared(); + if (dist < vdist) + { + v = line->v2->fPos(); + vdist = dist; + } + + int sx1, sx2; + LineSegmentRange range = cull.GetSegmentRangeForLine(line->v1->fX(), line->v1->fY(), line->v2->fX(), line->v2->fY(), sx1, sx2); + if (range == LineSegmentRange::HasSegment) + portalSegments.push_back({ sx1, sx2 }); + } + + if (inside) + { + polyportal->PortalPlane = Vec4f(0.0f, 0.0f, 0.0f, 1.0f); + } + else if(polyportal->PortalPlane == Vec4f(0.0f) || Vec4f::dot(polyportal->PortalPlane, Vec4f((float)v.X, (float)v.Y, 0.0f, 1.0f)) > 0.0f) + { + DVector2 planePos = v; + DVector2 planeNormal = v - ViewPos; + planeNormal.MakeUnit(); + double planeD = -(planeNormal | (planePos + planeNormal * 0.001)); + polyportal->PortalPlane = Vec4f((float)planeNormal.X, (float)planeNormal.Y, 0.0f, (float)planeD); + } } sector_t *fakesector = sub->sector->heightsec; @@ -300,6 +349,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan args.stencilwritevalue = polyportal->StencilValue; PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw, subsectorDepth }); + polyportal->Segments.insert(polyportal->Segments.end(), portalSegments.begin(), portalSegments.end()); } } else @@ -308,6 +358,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan { args.stencilwritevalue = polyportal->StencilValue; polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw, subsectorDepth }); + polyportal->Segments.insert(polyportal->Segments.end(), portalSegments.begin(), portalSegments.end()); } else { @@ -385,7 +436,10 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); if (portal) + { polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw, subsectorDepth }); + polyportal->Segments.insert(polyportal->Segments.end(), portalSegments.begin(), portalSegments.end()); + } } } } diff --git a/src/r_poly_plane.h b/src/r_poly_plane.h index 396a9e4a88..be307b4e23 100644 --- a/src/r_poly_plane.h +++ b/src/r_poly_plane.h @@ -25,12 +25,13 @@ #include "r_poly_triangle.h" class PolyDrawSectorPortal; +class PolyCull; class Vec4f; class RenderPolyPlane { public: - static void RenderPlanes(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight, std::vector> §orPortals); + static void RenderPlanes(const TriMatrix &worldToClip, const Vec4f &clipPlane, PolyCull &cull, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, double skyCeilingHeight, double skyFloorHeight, std::vector> §orPortals); private: struct UVTransform @@ -48,6 +49,6 @@ private: }; void Render3DFloor(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, F3DFloor *fakefloor); - void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals); + void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, PolyCull &cull, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals); TriVertex PlaneVertex(vertex_t *v1, double height, const UVTransform &transform); }; diff --git a/src/r_poly_portal.cpp b/src/r_poly_portal.cpp index 96df9daf09..580622c1fb 100644 --- a/src/r_poly_portal.cpp +++ b/src/r_poly_portal.cpp @@ -64,7 +64,8 @@ void PolyDrawSectorPortal::Render(int portalDepth) TriMatrix::translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); TriMatrix worldToClip = TriMatrix::perspective(fovy, ratio, 5.0f, 65535.0f) * worldToView; - RenderPortal.SetViewpoint(worldToClip, Vec4f(0.0f, 0.0f, 0.0f, 1.0f), StencilValue); + RenderPortal.SetViewpoint(worldToClip, PortalPlane, StencilValue); + RenderPortal.SetPortalSegments(Segments); RenderPortal.Render(portalDepth); RestoreGlobals(); @@ -175,6 +176,7 @@ void PolyDrawLinePortal::Render(int portalDepth) Vec4f portalPlane((float)planeNormal.X, (float)planeNormal.Y, 0.0f, (float)planeD); RenderPortal.SetViewpoint(worldToClip, portalPlane, StencilValue); + RenderPortal.SetPortalSegments(Segments); RenderPortal.Render(portalDepth); RestoreGlobals(); diff --git a/src/r_poly_portal.h b/src/r_poly_portal.h index 6cad98ff6a..4f23567610 100644 --- a/src/r_poly_portal.h +++ b/src/r_poly_portal.h @@ -33,6 +33,13 @@ struct PolyPortalVertexRange uint32_t SubsectorDepth; }; +class PolyPortalSegment +{ +public: + PolyPortalSegment(int x1, int x2) : X1(x1), X2(x2) { } + int X1, X2; +}; + class PolyDrawSectorPortal { public: @@ -44,6 +51,8 @@ public: FSectorPortal *Portal = nullptr; uint32_t StencilValue = 0; std::vector Shape; + std::vector Segments; + Vec4f PortalPlane = Vec4f(0.0f); private: void SaveGlobals(); @@ -73,6 +82,7 @@ public: line_t *Mirror = nullptr; uint32_t StencilValue = 0; std::vector Shape; + std::vector Segments; private: void SaveGlobals(); diff --git a/src/r_poly_scene.cpp b/src/r_poly_scene.cpp index 3b03507f17..c0697f55ed 100644 --- a/src/r_poly_scene.cpp +++ b/src/r_poly_scene.cpp @@ -50,10 +50,24 @@ void RenderPolyScene::SetViewpoint(const TriMatrix &worldToClip, const Vec4f &po PortalPlane = portalPlane; } +void RenderPolyScene::SetPortalSegments(const std::vector &segments) +{ + Cull.ClearSolidSegments(); + for (const auto &segment : segments) + { + Cull.MarkSegmentCulled(segment.X1, segment.X2); + } + Cull.InvertSegments(); + PortalSegmentsAdded = true; +} + void RenderPolyScene::Render(int portalDepth) { ClearBuffers(); + if (!PortalSegmentsAdded) + Cull.ClearSolidSegments(); Cull.CullScene(WorldToClip, PortalPlane); + Cull.ClearSolidSegments(); RenderSectors(); RenderPortals(portalDepth); } @@ -91,7 +105,7 @@ void RenderPolyScene::RenderSubsector(subsector_t *sub) if (sub->sector->CenterFloor() != sub->sector->CenterCeiling()) { - RenderPolyPlane::RenderPlanes(WorldToClip, PortalPlane, sub, subsectorDepth, StencilValue, Cull.MaxCeilingHeight, Cull.MinFloorHeight, SectorPortals); + RenderPolyPlane::RenderPlanes(WorldToClip, PortalPlane, Cull, sub, subsectorDepth, StencilValue, Cull.MaxCeilingHeight, Cull.MinFloorHeight, SectorPortals); } for (uint32_t i = 0; i < sub->numlines; i++) @@ -199,12 +213,12 @@ void RenderPolyScene::RenderLine(subsector_t *sub, seg_t *line, sector_t *fronts if (!(fakeFloor->flags & FF_EXISTS)) continue; if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; if (!fakeFloor->model) continue; - RenderPolyWall::Render3DFloorLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, fakeFloor, TranslucentObjects); + RenderPolyWall::Render3DFloorLine(WorldToClip, PortalPlane, Cull, line, frontsector, subsectorDepth, StencilValue, fakeFloor, TranslucentObjects); } } // Render wall, and update culling info if its an occlusion blocker - if (RenderPolyWall::RenderLine(WorldToClip, PortalPlane, line, frontsector, subsectorDepth, StencilValue, TranslucentObjects, LinePortals)) + if (RenderPolyWall::RenderLine(WorldToClip, PortalPlane, Cull, line, frontsector, subsectorDepth, StencilValue, TranslucentObjects, LinePortals)) { if (segmentRange == LineSegmentRange::HasSegment) Cull.MarkSegmentCulled(sx1, sx2); @@ -332,7 +346,7 @@ void RenderPolyScene::RenderTranslucent(int portalDepth) } else if (!obj.thing) { - obj.wall.Render(WorldToClip, PortalPlane); + obj.wall.Render(WorldToClip, PortalPlane, Cull); } else if ((obj.thing->renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) { diff --git a/src/r_poly_scene.h b/src/r_poly_scene.h index 1e3037b124..00502e53ee 100644 --- a/src/r_poly_scene.h +++ b/src/r_poly_scene.h @@ -67,6 +67,7 @@ public: class PolyDrawSectorPortal; class PolyDrawLinePortal; +class PolyPortalSegment; // Renders everything from a specific viewpoint class RenderPolyScene @@ -75,6 +76,7 @@ public: RenderPolyScene(); ~RenderPolyScene(); void SetViewpoint(const TriMatrix &worldToClip, const Vec4f &portalPlane, uint32_t stencilValue); + void SetPortalSegments(const std::vector &segments); void Render(int portalDepth); void RenderTranslucent(int portalDepth); @@ -100,4 +102,5 @@ private: std::vector> SectorPortals; std::vector> LinePortals; + bool PortalSegmentsAdded = false; }; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index efc9530138..bcb5bb16b2 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -35,7 +35,7 @@ EXTERN_CVAR(Bool, r_drawmirrors) -bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput, std::vector> &linePortals) +bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, PolyCull &cull, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput, std::vector> &linePortals) { PolyDrawLinePortal *polyportal = nullptr; if (line->backsector == nullptr && line->linedef && line->sidedef == line->linedef->sidedef[0] && (line->linedef->special == Line_Mirror && r_drawmirrors)) @@ -91,7 +91,7 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, const Vec4f &clipP wall.UnpeggedCeil = frontceilz1; wall.Texpart = side_t::mid; wall.Polyportal = polyportal; - wall.Render(worldToClip, clipPlane); + wall.Render(worldToClip, clipPlane, cull); return true; } } @@ -126,7 +126,7 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, const Vec4f &clipP wall.BottomZ = topfloorz1; wall.UnpeggedCeil = topceilz1; wall.Texpart = side_t::top; - wall.Render(worldToClip, clipPlane); + wall.Render(worldToClip, clipPlane, cull); } if ((bottomfloorz1 < bottomceilz1 || bottomfloorz2 < bottomceilz2) && line->sidedef) @@ -136,7 +136,7 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, const Vec4f &clipP wall.BottomZ = bottomfloorz2; wall.UnpeggedCeil = topceilz1; wall.Texpart = side_t::bottom; - wall.Render(worldToClip, clipPlane); + wall.Render(worldToClip, clipPlane, cull); } if (line->sidedef) @@ -155,14 +155,14 @@ bool RenderPolyWall::RenderLine(const TriMatrix &worldToClip, const Vec4f &clipP if (polyportal) { wall.Polyportal = polyportal; - wall.Render(worldToClip, clipPlane); + wall.Render(worldToClip, clipPlane, cull); } } } return polyportal != nullptr; } -void RenderPolyWall::Render3DFloorLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, F3DFloor *fakeFloor, std::vector &translucentWallsOutput) +void RenderPolyWall::Render3DFloorLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, PolyCull &cull, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, F3DFloor *fakeFloor, std::vector &translucentWallsOutput) { double frontceilz1 = fakeFloor->top.plane->ZatPoint(line->v1); double frontfloorz1 = fakeFloor->bottom.plane->ZatPoint(line->v1); @@ -182,7 +182,7 @@ void RenderPolyWall::Render3DFloorLine(const TriMatrix &worldToClip, const Vec4f wall.BottomZ = frontfloorz1; wall.UnpeggedCeil = frontceilz1; wall.Texpart = side_t::mid; - wall.Render(worldToClip, clipPlane); + wall.Render(worldToClip, clipPlane, cull); } void RenderPolyWall::SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2) @@ -195,7 +195,7 @@ void RenderPolyWall::SetCoords(const DVector2 &v1, const DVector2 &v2, double ce this->floor2 = floor2; } -void RenderPolyWall::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane) +void RenderPolyWall::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, PolyCull &cull) { FTexture *tex = GetTexture(); if (!tex && !Polyportal) @@ -266,6 +266,11 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane args.stencilwritevalue = Polyportal->StencilValue; PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); Polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw, args.uniforms.subsectorDepth }); + + int sx1, sx2; + LineSegmentRange range = cull.GetSegmentRangeForLine(v1.X, v1.Y, v2.X, v2.Y, sx1, sx2); + if (range == LineSegmentRange::HasSegment) + Polyportal->Segments.push_back({ sx1, sx2 }); } else if (!Masked) { diff --git a/src/r_poly_wall.h b/src/r_poly_wall.h index bcecd5a05b..8443a174ad 100644 --- a/src/r_poly_wall.h +++ b/src/r_poly_wall.h @@ -26,16 +26,17 @@ class PolyTranslucentObject; class PolyDrawLinePortal; +class PolyCull; class Vec4f; class RenderPolyWall { public: - static bool RenderLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput, std::vector> &linePortals); - static void Render3DFloorLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, F3DFloor *fakeFloor, std::vector &translucentWallsOutput); + static bool RenderLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, PolyCull &cull, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, std::vector &translucentWallsOutput, std::vector> &linePortals); + static void Render3DFloorLine(const TriMatrix &worldToClip, const Vec4f &clipPlane, PolyCull &cull, seg_t *line, sector_t *frontsector, uint32_t subsectorDepth, uint32_t stencilValue, F3DFloor *fakeFloor, std::vector &translucentWallsOutput); void SetCoords(const DVector2 &v1, const DVector2 &v2, double ceil1, double floor1, double ceil2, double floor2); - void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane); + void Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, PolyCull &cull); DVector2 v1; DVector2 v2; From 6609403fbe739c52e7a6a83aaf7fd5495d6eebf5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 9 Dec 2016 10:27:18 +0100 Subject: [PATCH 467/912] Fix null pointer crash in rt column drawers if rt_init is not called before the first draw --- src/r_thread.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/r_thread.h b/src/r_thread.h index 7962dfc208..e44d872c23 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -42,6 +42,12 @@ void R_EndDrawerCommands(); class DrawerThread { public: + DrawerThread() + { + dc_temp = dc_temp_buff; + dc_temp_rgba = dc_temp_rgbabuff_rgba; + } + std::thread thread; // Thread line index of this thread From 5d210c64e0d0146d4c5c8ea85c07f4e5198b0303 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 00:41:34 +0100 Subject: [PATCH 468/912] - got rid of ksgn. Because every bit of Build code that can be removed is a good thing. This was only used in two places, one of which could be done better, the other one in the voxel drawer setup now uses a local C-inline version. (cherry picked from commit 5536184beed578e2b9df9ae673b2e965af92b2c0) --- src/basicinlines.h | 6 ------ src/gccinlines.h | 14 -------------- src/mscinlines.h | 9 --------- src/p_user.cpp | 6 +----- src/r_things.cpp | 7 ++++++- 5 files changed, 7 insertions(+), 35 deletions(-) diff --git a/src/basicinlines.h b/src/basicinlines.h index cc562f2b8c..2f81a6c646 100644 --- a/src/basicinlines.h +++ b/src/basicinlines.h @@ -195,9 +195,3 @@ static __forceinline void clearbufshort (void *buff, unsigned int count, WORD cl } } -static __forceinline SDWORD ksgn (SDWORD a) -{ - if (a < 0) return -1; - else if (a > 0) return 1; - else return 0; -} diff --git a/src/gccinlines.h b/src/gccinlines.h index 6cad307f7c..7311e9695d 100644 --- a/src/gccinlines.h +++ b/src/gccinlines.h @@ -316,17 +316,3 @@ static inline void clearbufshort (void *buff, unsigned int count, WORD clear) :"cc"); } -static inline SDWORD ksgn (SDWORD a) -{ - SDWORD result, dummy; - - asm volatile - ("add %0,%0\n\t" - "sbb %1,%1\n\t" - "cmp %0,%1\n\t" - "adc $0,%1" - :"=r" (dummy), "=r" (result) - :"0" (a) - :"cc"); - return result; -} diff --git a/src/mscinlines.h b/src/mscinlines.h index a8dd5fea73..7befe5395f 100644 --- a/src/mscinlines.h +++ b/src/mscinlines.h @@ -339,13 +339,4 @@ __forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) } } -__forceinline SDWORD ksgn (SDWORD a) -{ - __asm mov edx,a - __asm add edx,edx - __asm sbb eax,eax - __asm cmp eax,edx - __asm adc eax,0 -} - #pragma warning (default: 4035) diff --git a/src/p_user.cpp b/src/p_user.cpp index e894fa2012..8654009e1a 100644 --- a/src/p_user.cpp +++ b/src/p_user.cpp @@ -2610,11 +2610,7 @@ void P_PlayerThink (player_t *player) else if (cmd->ucmd.upmove != 0) { // Clamp the speed to some reasonable maximum. - int magnitude = abs (cmd->ucmd.upmove); - if (magnitude > 0x300) - { - cmd->ucmd.upmove = ksgn (cmd->ucmd.upmove) * 0x300; - } + cmd->ucmd.upmove = clamp(cmd->ucmd.upmove, -0x300, 0x300); if (player->mo->waterlevel >= 2 || (player->mo->flags2 & MF2_FLY) || (player->cheats & CF_NOCLIP2)) { player->mo->Vel.Z = player->mo->Speed * cmd->ucmd.upmove / 128.; diff --git a/src/r_things.cpp b/src/r_things.cpp index 57e101feb2..f5c75f012d 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2907,6 +2907,11 @@ void R_DrawParticle_rgba(vissprite_t *vis) extern double BaseYaspectMul;; +inline int sgn(int v) +{ + return v < 0 ? -1 : v > 0 ? 1 : 0; +} + void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, const FVector3 &dasprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, @@ -3048,7 +3053,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, xe += xi; ye += yi; } - i = ksgn(ys-backy)+ksgn(xs-backx)*3+4; + i = sgn(ys - backy) + sgn(xs - backx) * 3 + 4; switch(i) { case 6: case 7: x1 = 0; y1 = 0; break; From 695b08ed4bebb0bb8b08dc34151b06c0715ec983 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 10:31:17 +0100 Subject: [PATCH 469/912] - removed some unused functions from m_fixed.h. - removed the Build license note from r_bsp.cpp. This note was for code in R_AddLine which had been both refactored into several subfunctions and completely replaced with a floating point version. What is left is just some basic common math without any traits that resemble anything in Build. (cherry picked from commit 8748b9ef6defc5c8a2d9570da29afcc5dcac8bbf) --- src/m_fixed.h | 57 --------------------------------------------------- src/r_bsp.cpp | 6 ------ 2 files changed, 63 deletions(-) diff --git a/src/m_fixed.h b/src/m_fixed.h index 506b9702c8..4719ac4680 100644 --- a/src/m_fixed.h +++ b/src/m_fixed.h @@ -79,63 +79,6 @@ inline SDWORD SafeDivScale32 (SDWORD a, SDWORD b) #define FixedMul MulScale16 #define FixedDiv SafeDivScale16 -inline void qinterpolatedown16 (SDWORD *out, DWORD count, SDWORD val, SDWORD delta) -{ - if (count & 1) - { - out[0] = val >> 16; - val += delta; - } - count >>= 1; - while (count-- != 0) - { - int temp = val + delta; - out[0] = val >> 16; - val = temp + delta; - out[1] = temp >> 16; - out += 2; - } -} - -inline void qinterpolatedown16short (short *out, DWORD count, SDWORD val, SDWORD delta) -{ - if (count) - { - if ((size_t)out & 2) - { // align to dword boundary - *out++ = (short)(val >> 16); - count--; - val += delta; - } - DWORD *o2 = (DWORD *)out; - DWORD c2 = count>>1; - while (c2-- != 0) - { - SDWORD temp = val + delta; - *o2++ = (temp & 0xffff0000) | ((DWORD)val >> 16); - val = temp + delta; - } - if (count & 1) - { - *(short *)o2 = (short)(val >> 16); - } - } -} - - //returns num/den, dmval = num%den -inline SDWORD DivMod (SDWORD num, SDWORD den, SDWORD *dmval) -{ - *dmval = num % den; - return num / den; -} - - //returns num%den, dmval = num/den -inline SDWORD ModDiv (SDWORD num, SDWORD den, SDWORD *dmval) -{ - *dmval = num / den; - return num % den; -} - inline fixed_t FloatToFixed(double f) { return xs_Fix<16>::ToFix(f); diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 8004f0dfc3..bb076d72f5 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -17,12 +17,6 @@ // DESCRIPTION: // BSP traversal, handling of LineSegs for rendering. // -// This file contains some code from the Build Engine. -// -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// //----------------------------------------------------------------------------- From c61f30a6274c43041949e44ff1a6b9a7749c45cb Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 12:48:10 +0100 Subject: [PATCH 470/912] - got rid of clearbuf. This was used in only 4 places, 3 of which could easily be replaced with a memset, and the fourth, in the Strife status bar, suffering from a pointless performance optimization, rendering the code unreadable - the code spent here per frame is utterly insignificant so clarity should win here. (cherry picked from commit 12a99c3f3cc75fa3b241d89a8ebe43c8d3c07809) --- src/basicinlines.h | 9 --------- src/g_strife/strife_sbar.cpp | 10 +++------- src/gccinlines.h | 13 ------------- src/mscinlines.h | 9 --------- src/p_maputl.cpp | 2 +- src/r_data/sprites.cpp | 4 ++-- 6 files changed, 6 insertions(+), 41 deletions(-) diff --git a/src/basicinlines.h b/src/basicinlines.h index 2f81a6c646..488cdf5526 100644 --- a/src/basicinlines.h +++ b/src/basicinlines.h @@ -177,15 +177,6 @@ static inline SDWORD DivScale30 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD) static inline SDWORD DivScale31 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 31) / b); } static inline SDWORD DivScale32 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 32) / b); } -static __forceinline void clearbuf (void *buff, unsigned int count, SDWORD clear) -{ - SDWORD *b2 = (SDWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - static __forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) { SWORD *b2 = (SWORD *)buff; diff --git a/src/g_strife/strife_sbar.cpp b/src/g_strife/strife_sbar.cpp index 355910616d..ccf17f8f1f 100644 --- a/src/g_strife/strife_sbar.cpp +++ b/src/g_strife/strife_sbar.cpp @@ -169,14 +169,10 @@ void FHealthBar::MakeTexture () void FHealthBar::FillBar (int min, int max, BYTE light, BYTE dark) { -#ifdef __BIG_ENDIAN__ - SDWORD fill = (light << 24) | (dark << 16) | (light << 8) | dark; -#else - SDWORD fill = light | (dark << 8) | (light << 16) | (dark << 24); -#endif - if (max > min) + for (int i = min*2; i < max*2; i++) { - clearbuf (&Pixels[min*4], max - min, fill); + Pixels[i * 2] = light; + Pixels[i * 2 + 1] = dark; } } diff --git a/src/gccinlines.h b/src/gccinlines.h index 7311e9695d..54a2448c70 100644 --- a/src/gccinlines.h +++ b/src/gccinlines.h @@ -291,19 +291,6 @@ static inline SDWORD DivScale32 (SDWORD a, SDWORD b) return result; } -static inline void clearbuf (void *buff, int count, SDWORD clear) -{ - int dummy1, dummy2; - asm volatile - ("rep stosl" - :"=D" (dummy1), - "=c" (dummy2) - : "D" (buff), - "c" (count), - "a" (clear) - ); -} - static inline void clearbufshort (void *buff, unsigned int count, WORD clear) { asm volatile diff --git a/src/mscinlines.h b/src/mscinlines.h index 7befe5395f..1aded4e7b7 100644 --- a/src/mscinlines.h +++ b/src/mscinlines.h @@ -321,15 +321,6 @@ __forceinline SDWORD DivScale32 (SDWORD a, SDWORD b) __asm idiv b } -__forceinline void clearbuf (void *buff, unsigned int count, SDWORD clear) -{ - SDWORD *b2 = (SDWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - __forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) { SWORD *b2 = (SWORD *)buff; diff --git a/src/p_maputl.cpp b/src/p_maputl.cpp index 31b4125988..fdf66e969d 100644 --- a/src/p_maputl.cpp +++ b/src/p_maputl.cpp @@ -925,7 +925,7 @@ void FBlockThingsIterator::init(const FBoundingBox &box) void FBlockThingsIterator::ClearHash() { - clearbuf(Buckets, countof(Buckets), -1); + memset(Buckets, -1, sizeof(Buckets)); NumFixedHash = 0; DynHash.Clear(); } diff --git a/src/r_data/sprites.cpp b/src/r_data/sprites.cpp index 3e0cb80ccf..b384211fc6 100644 --- a/src/r_data/sprites.cpp +++ b/src/r_data/sprites.cpp @@ -270,7 +270,7 @@ void R_InitSpriteDefs () // Create a hash table to speed up the process smax = TexMan.NumTextures(); hashes = new Hasher[smax]; - clearbuf(hashes, sizeof(Hasher)*smax/4, -1); + memset(hashes, -1, sizeof(Hasher)*smax); for (i = 0; i < smax; ++i) { FTexture *tex = TexMan.ByIndex(i); @@ -285,7 +285,7 @@ void R_InitSpriteDefs () // Repeat, for voxels vmax = Wads.GetNumLumps(); vhashes = new VHasher[vmax]; - clearbuf(vhashes, sizeof(VHasher)*vmax/4, -1); + memset(vhashes, -1, sizeof(VHasher)*vmax); for (i = 0; i < vmax; ++i) { if (Wads.GetLumpNamespace(i) == ns_voxels) From cfd6bcec5ea628870de6e6fd1f26adf9ad7c4ac4 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 13:04:53 +0100 Subject: [PATCH 471/912] - more Build cleanup: Moved clearbufshort out of Build derived headers, removed the inline version for GCC 32 bit which was actually Build derived and renamed the C-version of this function, which does not originate from Build to 'fillshort'. - cleaned out a lot the SafeDivScale stuff in m_fixed.h. The only SafeDivScale variant still in use was #16 for FixedDiv, so all the SafeDivScale stuff has been removed and the 16 variant renamed to FixedDiv because that's the only form in which it is still being used. (2x in R_DrawVoxel and 1x in ACS's FixedDiv PCode.) - removed Build notice from m_fixed.h because aside from the inlines includes there is nothing here from Build anymore. (cherry picked from commit eab06ef0866b0c9ed84f94a8745e747116e2eba7) --- src/basicinlines.h | 9 ----- src/g_shared/a_lightning.cpp | 4 +- src/gccinlines.h | 13 ------- src/m_fixed.h | 73 +++++++----------------------------- src/mscinlines.h | 9 ----- src/p_effect.cpp | 2 +- src/r_bsp.cpp | 4 +- src/r_draw.cpp | 4 +- src/r_main.cpp | 4 +- src/r_plane.cpp | 10 ++--- src/r_segs.cpp | 12 +++--- src/r_things.cpp | 4 +- src/v_draw.cpp | 4 +- 13 files changed, 37 insertions(+), 115 deletions(-) diff --git a/src/basicinlines.h b/src/basicinlines.h index 488cdf5526..8492572818 100644 --- a/src/basicinlines.h +++ b/src/basicinlines.h @@ -177,12 +177,3 @@ static inline SDWORD DivScale30 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD) static inline SDWORD DivScale31 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 31) / b); } static inline SDWORD DivScale32 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 32) / b); } -static __forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) -{ - SWORD *b2 = (SWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - diff --git a/src/g_shared/a_lightning.cpp b/src/g_shared/a_lightning.cpp index 3f44f2fddd..bbc6638123 100644 --- a/src/g_shared/a_lightning.cpp +++ b/src/g_shared/a_lightning.cpp @@ -23,7 +23,7 @@ DLightningThinker::DLightningThinker () NextLightningFlash = ((pr_lightning()&15)+5)*35; // don't flash at level start LightningLightLevels.Resize(numsectors); - clearbufshort(&LightningLightLevels[0], numsectors, SHRT_MAX); + fillshort(&LightningLightLevels[0], numsectors, SHRT_MAX); } DLightningThinker::~DLightningThinker () @@ -87,7 +87,7 @@ void DLightningThinker::LightningFlash () tempSec->SetLightLevel(LightningLightLevels[j]); } } - clearbufshort(&LightningLightLevels[0], numsectors, SHRT_MAX); + fillshort(&LightningLightLevels[0], numsectors, SHRT_MAX); level.flags &= ~LEVEL_SWAPSKIES; } return; diff --git a/src/gccinlines.h b/src/gccinlines.h index 54a2448c70..eac1f3e6fb 100644 --- a/src/gccinlines.h +++ b/src/gccinlines.h @@ -290,16 +290,3 @@ static inline SDWORD DivScale32 (SDWORD a, SDWORD b) : "cc"); return result; } - -static inline void clearbufshort (void *buff, unsigned int count, WORD clear) -{ - asm volatile - ("shr $1,%%ecx\n\t" - "rep stosl\n\t" - "adc %%ecx,%%ecx\n\t" - "rep stosw" - :"=D" (buff), "=c" (count) - :"D" (buff), "c" (count), "a" (clear|(clear<<16)) - :"cc"); -} - diff --git a/src/m_fixed.h b/src/m_fixed.h index 4719ac4680..33d339608a 100644 --- a/src/m_fixed.h +++ b/src/m_fixed.h @@ -1,11 +1,3 @@ -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// -// This file is based on pragmas.h from Ken Silverman's original Build -// source code release and contains routines that were originally -// inline assembly but are not now. - #ifndef __M_FIXED__ #define __M_FIXED__ @@ -20,64 +12,25 @@ #include "basicinlines.h" #endif -#include "xs_Float.h" - -#define MAKESAFEDIVSCALE(x) \ - inline SDWORD SafeDivScale##x (SDWORD a, SDWORD b) \ - { \ - if ((DWORD)abs(a) >> (31-x) >= (DWORD)abs (b)) \ - return (a^b)<0 ? FIXED_MIN : FIXED_MAX; \ - return DivScale##x (a, b); \ - } - -MAKESAFEDIVSCALE(1) -MAKESAFEDIVSCALE(2) -MAKESAFEDIVSCALE(3) -MAKESAFEDIVSCALE(4) -MAKESAFEDIVSCALE(5) -MAKESAFEDIVSCALE(6) -MAKESAFEDIVSCALE(7) -MAKESAFEDIVSCALE(8) -MAKESAFEDIVSCALE(9) -MAKESAFEDIVSCALE(10) -MAKESAFEDIVSCALE(11) -MAKESAFEDIVSCALE(12) -MAKESAFEDIVSCALE(13) -MAKESAFEDIVSCALE(14) -MAKESAFEDIVSCALE(15) -MAKESAFEDIVSCALE(16) -MAKESAFEDIVSCALE(17) -MAKESAFEDIVSCALE(18) -MAKESAFEDIVSCALE(19) -MAKESAFEDIVSCALE(20) -MAKESAFEDIVSCALE(21) -MAKESAFEDIVSCALE(22) -MAKESAFEDIVSCALE(23) -MAKESAFEDIVSCALE(24) -MAKESAFEDIVSCALE(25) -MAKESAFEDIVSCALE(26) -MAKESAFEDIVSCALE(27) -MAKESAFEDIVSCALE(28) -MAKESAFEDIVSCALE(29) -MAKESAFEDIVSCALE(30) -#undef MAKESAFEDIVSCALE - -inline SDWORD SafeDivScale31 (SDWORD a, SDWORD b) +__forceinline void fillshort(void *buff, unsigned int count, WORD clear) { - if ((DWORD)abs(a) >= (DWORD)abs (b)) - return (a^b)<0 ? FIXED_MIN : FIXED_MAX; - return DivScale31 (a, b); + SWORD *b2 = (SWORD *)buff; + for (unsigned int i = 0; i != count; ++i) + { + b2[i] = clear; + } } -inline SDWORD SafeDivScale32 (SDWORD a, SDWORD b) -{ - if ((DWORD)abs(a) >= (DWORD)abs (b) >> 1) - return (a^b)<0 ? FIXED_MIN : FIXED_MAX; - return DivScale32 (a, b); +#include "xs_Float.h" + +inline SDWORD FixedDiv (SDWORD a, SDWORD b) +{ + if ((DWORD)abs(a) >> (31-16) >= (DWORD)abs (b)) + return (a^b)<0 ? FIXED_MIN : FIXED_MAX; + return DivScale16 (a, b); } #define FixedMul MulScale16 -#define FixedDiv SafeDivScale16 inline fixed_t FloatToFixed(double f) { diff --git a/src/mscinlines.h b/src/mscinlines.h index 1aded4e7b7..6af2da1de7 100644 --- a/src/mscinlines.h +++ b/src/mscinlines.h @@ -321,13 +321,4 @@ __forceinline SDWORD DivScale32 (SDWORD a, SDWORD b) __asm idiv b } -__forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) -{ - SWORD *b2 = (SWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - #pragma warning (default: 4035) diff --git a/src/p_effect.cpp b/src/p_effect.cpp index 42e82d7405..827a457ee2 100644 --- a/src/p_effect.cpp +++ b/src/p_effect.cpp @@ -198,7 +198,7 @@ void P_FindParticleSubsectors () ParticlesInSubsec.Reserve (numsubsectors - ParticlesInSubsec.Size()); } - clearbufshort (&ParticlesInSubsec[0], numsubsectors, NO_PARTICLE); + fillshort (&ParticlesInSubsec[0], numsubsectors, NO_PARTICLE); if (!r_particles) { diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index bb076d72f5..230e16dccf 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -740,8 +740,8 @@ void R_AddLine (seg_t *line) if (line->linedef->special == Line_Horizon) { // Be aware: Line_Horizon does not work properly with sloped planes - clearbufshort (walltop+WallC.sx1, WallC.sx2 - WallC.sx1, centery); - clearbufshort (wallbottom+WallC.sx1, WallC.sx2 - WallC.sx1, centery); + fillshort (walltop+WallC.sx1, WallC.sx2 - WallC.sx1, centery); + fillshort (wallbottom+WallC.sx1, WallC.sx2 - WallC.sx1, centery); } else { diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 766ad8d7aa..b23bf40818 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1641,7 +1641,7 @@ namespace swrenderer if (b2 > t2) { - clearbufshort(spanend + t2, b2 - t2, x); + fillshort(spanend + t2, b2 - t2, x); } R_SetColorMapLight(basecolormap, (float)light, wallshade); @@ -1668,7 +1668,7 @@ namespace swrenderer if (b1 > b2) b2 = b1; if (t2 < b2) { - clearbufshort(spanend + t2, b2 - t2, x); + fillshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; R_SetColorMapLight(basecolormap, (float)light, wallshade); diff --git a/src/r_main.cpp b/src/r_main.cpp index 543b7cd59a..e79800e49f 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -377,7 +377,7 @@ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, pspritexiscale = 1 / pspritexscale; // thing clipping - clearbufshort (screenheightarray, viewwidth, (short)viewheight); + fillshort (screenheightarray, viewwidth, (short)viewheight); R_InitTextureMapping (); @@ -400,7 +400,7 @@ void R_InitRenderer() { atterm(R_ShutdownRenderer); // viewwidth / viewheight are set by the defaults - clearbufshort (zeroarray, MAXWIDTH, 0); + fillshort (zeroarray, MAXWIDTH, 0); R_InitPlanes (); R_InitShadeMaps(); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 3769dda061..f8893dcd75 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -328,9 +328,9 @@ void R_ClearPlanes (bool fullclear) } // opening / clipping determination - clearbufshort (floorclip, viewwidth, viewheight); + fillshort (floorclip, viewwidth, viewheight); // [RH] clip ceiling to console bottom - clearbufshort (ceilingclip, viewwidth, + fillshort (ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); @@ -503,7 +503,7 @@ visplane_t *R_FindPlane (const secplane_t &height, FTextureID picnum, int lightl check->MirrorFlags = MirrorFlags; check->CurrentSkybox = CurrentSkybox; - clearbufshort (check->top, viewwidth, 0x7fff); + fillshort (check->top, viewwidth, 0x7fff); return check; } @@ -588,7 +588,7 @@ visplane_t *R_CheckPlane (visplane_t *pl, int start, int stop) pl = new_pl; pl->left = start; pl->right = stop; - clearbufshort (pl->top, viewwidth, 0x7fff); + fillshort (pl->top, viewwidth, 0x7fff); } return pl; } @@ -1816,7 +1816,7 @@ void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)) if (b2 > t2) { - clearbufshort (spanend+t2, b2-t2, x); + fillshort (spanend+t2, b2-t2, x); } for (--x; x >= pl->left; --x) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index efc894fc29..c24e7f7190 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -531,7 +531,7 @@ clearfog: } else { - clearbufshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); + fillshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); } } return; @@ -1881,8 +1881,8 @@ void R_RenderSegLoop () call_wallscan(x1, x2, walltop, wallbottom, swall, lwall, yscale, false); } } - clearbufshort (ceilingclip+x1, x2-x1, viewheight); - clearbufshort (floorclip+x1, x2-x1, 0xffff); + fillshort (ceilingclip+x1, x2-x1, viewheight); + fillshort (floorclip+x1, x2-x1, 0xffff); } else { // two sided line @@ -2405,7 +2405,7 @@ void R_StoreWallRange (int start, int stop) { ds_p->sprtopclip = R_NewOpening (stop - start); ds_p->sprbottomclip = R_NewOpening (stop - start); - clearbufshort (openings + ds_p->sprtopclip, stop-start, viewheight); + fillshort (openings + ds_p->sprtopclip, stop-start, viewheight); memset (openings + ds_p->sprbottomclip, -1, (stop-start)*sizeof(short)); ds_p->silhouette = SIL_BOTH; } @@ -2445,7 +2445,7 @@ void R_StoreWallRange (int start, int stop) if (doorclosed || (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) { // killough 1/17/98, 2/8/98 ds_p->sprtopclip = R_NewOpening (stop - start); - clearbufshort (openings + ds_p->sprtopclip, stop - start, viewheight); + fillshort (openings + ds_p->sprtopclip, stop - start, viewheight); ds_p->silhouette |= SIL_TOP; } } @@ -2666,7 +2666,7 @@ int WallMostAny(short *mostbuf, double z1, double z2, const FWallCoords *wallc) } else if (y1 > viewheight && y2 > viewheight) // entire line is below screen { - clearbufshort(&mostbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); + fillshort(&mostbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); return 12; } diff --git a/src/r_things.cpp b/src/r_things.cpp index f5c75f012d..0a9d74c204 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2481,11 +2481,11 @@ void R_DrawSprite (vissprite_t *spr) // for R_DrawVisVoxel(). if (x1 > 0) { - clearbufshort(cliptop, x1, viewheight); + fillshort(cliptop, x1, viewheight); } if (x2 < viewwidth - 1) { - clearbufshort(cliptop + x2, viewwidth - x2, viewheight); + fillshort(cliptop + x2, viewwidth - x2, viewheight); } int minvoxely = spr->gzt <= hzt ? 0 : xs_RoundToInt((spr->gzt - hzt) / spr->yscale); int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 5c10406bf4..a56c63998a 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -236,13 +236,13 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (bottomclipper[0] != parms.dclip) { - clearbufshort(bottomclipper, screen->GetWidth(), (short)parms.dclip); + fillshort(bottomclipper, screen->GetWidth(), (short)parms.dclip); } if (parms.uclip != 0) { if (topclipper[0] != parms.uclip) { - clearbufshort(topclipper, screen->GetWidth(), (short)parms.uclip); + fillshort(topclipper, screen->GetWidth(), (short)parms.uclip); } mceilingclip = topclipper; } From 1ea559dea4d96cce7e38d2110b736efccba67225 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 13:27:30 +0100 Subject: [PATCH 472/912] - removed several unused fixed point math functions from the inlines headers. (cherry picked from commit 86909ecdb2c2a4623bb25408bd61554ac65397fc) --- src/basicinlines.h | 58 ------------------- src/gccinlines.h | 116 ------------------------------------- src/mscinlines.h | 138 --------------------------------------------- 3 files changed, 312 deletions(-) diff --git a/src/basicinlines.h b/src/basicinlines.h index 8492572818..135208b81c 100644 --- a/src/basicinlines.h +++ b/src/basicinlines.h @@ -20,11 +20,6 @@ static __forceinline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) return (SDWORD)(((SQWORD)a*b)/c); } -static __forceinline SDWORD MulScale (SDWORD a, SDWORD b, SDWORD c) -{ - return (SDWORD)(((SQWORD)a*b)>>c); -} - static __forceinline SDWORD MulScale1 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 1); } static __forceinline SDWORD MulScale2 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 2); } static __forceinline SDWORD MulScale3 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 3); } @@ -60,11 +55,6 @@ static __forceinline SDWORD MulScale32 (SDWORD a, SDWORD b) { return (SDWORD)((( static __forceinline DWORD UMulScale16 (DWORD a, DWORD b) { return (DWORD)(((QWORD)a * b) >> 16); } -static __forceinline SDWORD DMulScale (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD s) -{ - return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> s); -} - static __forceinline SDWORD DMulScale1 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 1); } static __forceinline SDWORD DMulScale2 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 2); } static __forceinline SDWORD DMulScale3 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 3); } @@ -98,53 +88,6 @@ static __forceinline SDWORD DMulScale30 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) static __forceinline SDWORD DMulScale31 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 31); } static __forceinline SDWORD DMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 32); } -static __forceinline SDWORD TMulScale1 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 1); } -static __forceinline SDWORD TMulScale2 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 2); } -static __forceinline SDWORD TMulScale3 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 3); } -static __forceinline SDWORD TMulScale4 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 4); } -static __forceinline SDWORD TMulScale5 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 5); } -static __forceinline SDWORD TMulScale6 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 6); } -static __forceinline SDWORD TMulScale7 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 7); } -static __forceinline SDWORD TMulScale8 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 8); } -static __forceinline SDWORD TMulScale9 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 9); } -static __forceinline SDWORD TMulScale10 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 10); } -static __forceinline SDWORD TMulScale11 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 11); } -static __forceinline SDWORD TMulScale12 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 12); } -static __forceinline SDWORD TMulScale13 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 13); } -static __forceinline SDWORD TMulScale14 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 14); } -static __forceinline SDWORD TMulScale15 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 15); } -static __forceinline SDWORD TMulScale16 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 16); } -static __forceinline SDWORD TMulScale17 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 17); } -static __forceinline SDWORD TMulScale18 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 18); } -static __forceinline SDWORD TMulScale19 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 19); } -static __forceinline SDWORD TMulScale20 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 20); } -static __forceinline SDWORD TMulScale21 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 21); } -static __forceinline SDWORD TMulScale22 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 22); } -static __forceinline SDWORD TMulScale23 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 23); } -static __forceinline SDWORD TMulScale24 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 24); } -static __forceinline SDWORD TMulScale25 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 25); } -static __forceinline SDWORD TMulScale26 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 26); } -static __forceinline SDWORD TMulScale27 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 27); } -static __forceinline SDWORD TMulScale28 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 28); } -static __forceinline SDWORD TMulScale29 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 29); } -static __forceinline SDWORD TMulScale30 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 30); } -static __forceinline SDWORD TMulScale31 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 31); } -static __forceinline SDWORD TMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 32); } - -static __forceinline SDWORD BoundMulScale (SDWORD a, SDWORD b, SDWORD c) -{ - SQWORD x = ((SQWORD)a * b) >> c; - return x > 0x7FFFFFFFll ? 0x7FFFFFFF : - x < -0x80000000ll ? 0x80000000 : - (SDWORD)x; -} - -static inline SDWORD DivScale (SDWORD a, SDWORD b, SDWORD c) -{ - return (SDWORD)(((SQWORD)a << c) / b); -} - -static inline SDWORD DivScale1 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 1) / b); } static inline SDWORD DivScale2 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 2) / b); } static inline SDWORD DivScale3 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 3) / b); } static inline SDWORD DivScale4 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 4) / b); } @@ -175,5 +118,4 @@ static inline SDWORD DivScale28 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD) static inline SDWORD DivScale29 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 29) / b); } static inline SDWORD DivScale30 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 30) / b); } static inline SDWORD DivScale31 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 31) / b); } -static inline SDWORD DivScale32 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 32) / b); } diff --git a/src/gccinlines.h b/src/gccinlines.h index eac1f3e6fb..925a0037a9 100644 --- a/src/gccinlines.h +++ b/src/gccinlines.h @@ -47,23 +47,6 @@ static inline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) return result; } -static inline SDWORD MulScale (SDWORD a, SDWORD b, SDWORD c) -{ - SDWORD result, dummy; - - asm volatile - ("imull %3\n\t" - "shrdl %b4,%1,%0" - : "=a,a,a,a" (result), - "=d,d,d,d" (dummy) - : "a,a,a,a" (a), - "m,r,m,r" (b), - "c,c,I,I" (c) - : "cc" - ); - return result; -} - #define MAKECONSTMulScale(s) \ static inline SDWORD MulScale##s (SDWORD a, SDWORD b) { return ((SQWORD)a * b) >> s; } @@ -143,92 +126,6 @@ MAKECONSTDMulScale(31) MAKECONSTDMulScale(32) #undef MAKECONSTDMulScale -#define MAKECONSTTMulScale(s) \ - static inline SDWORD TMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD ee) \ - { \ - return (((SQWORD)a * b) + ((SQWORD)c * d) + ((SQWORD)e * ee)) >> s; \ - } - -MAKECONSTTMulScale(1) -MAKECONSTTMulScale(2) -MAKECONSTTMulScale(3) -MAKECONSTTMulScale(4) -MAKECONSTTMulScale(5) -MAKECONSTTMulScale(6) -MAKECONSTTMulScale(7) -MAKECONSTTMulScale(8) -MAKECONSTTMulScale(9) -MAKECONSTTMulScale(10) -MAKECONSTTMulScale(11) -MAKECONSTTMulScale(12) -MAKECONSTTMulScale(13) -MAKECONSTTMulScale(14) -MAKECONSTTMulScale(15) -MAKECONSTTMulScale(16) -MAKECONSTTMulScale(17) -MAKECONSTTMulScale(18) -MAKECONSTTMulScale(19) -MAKECONSTTMulScale(20) -MAKECONSTTMulScale(21) -MAKECONSTTMulScale(22) -MAKECONSTTMulScale(23) -MAKECONSTTMulScale(24) -MAKECONSTTMulScale(25) -MAKECONSTTMulScale(26) -MAKECONSTTMulScale(27) -MAKECONSTTMulScale(28) -MAKECONSTTMulScale(29) -MAKECONSTTMulScale(30) -MAKECONSTTMulScale(31) -MAKECONSTTMulScale(32) -#undef MAKECONSTTMulScale - -static inline SDWORD BoundMulScale (SDWORD a, SDWORD b, SDWORD c) -{ - union { - long long big; - struct - { - int l, h; - }; - } u; - u.big = ((long long)a * b) >> c; - if ((u.h ^ u.l) < 0 || (unsigned int)(u.h+1) > 1) return (u.h >> 31) ^ 0x7fffffff; - return u.l; -} - -static inline SDWORD DivScale (SDWORD a, SDWORD b, SDWORD c) -{ - SDWORD result, dummy; - SDWORD lo = a << c; - SDWORD hi = a >> (-c); - - asm volatile - ("idivl %4" - :"=a" (result), - "=d" (dummy) - : "a" (lo), - "d" (hi), - "r" (b) - : "cc"); - return result; -} - -static inline SDWORD DivScale1 (SDWORD a, SDWORD b) -{ - SDWORD result, dummy; - - asm volatile - ("addl %%eax,%%eax\n\t" - "sbbl %%edx,%%edx\n\t" - "idivl %3" - :"=a,a" (result), - "=&d,d" (dummy) - : "a,a" (a), - "r,m" (b) - : "cc"); - return result; -} #define MAKECONSTDivScale(s) \ static inline SDWORD DivScale##s (SDWORD a, SDWORD b) \ @@ -277,16 +174,3 @@ MAKECONSTDivScale(30) MAKECONSTDivScale(31) #undef MAKECONSTDivScale -static inline SDWORD DivScale32 (SDWORD a, SDWORD b) -{ - SDWORD result = 0, dummy; - - asm volatile - ("idivl %3" - :"+a,a" (result), - "=d,d" (dummy) - : "d,d" (a), - "r,m" (b) - : "cc"); - return result; -} diff --git a/src/mscinlines.h b/src/mscinlines.h index 6af2da1de7..3d00e9cb03 100644 --- a/src/mscinlines.h +++ b/src/mscinlines.h @@ -27,14 +27,6 @@ __forceinline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) __asm idiv c } -__forceinline SDWORD MulScale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm mov ecx,c - __asm imul b - __asm shrd eax,edx,cl -} - #define MAKECONSTMulScale(s) \ __forceinline SDWORD MulScale##s (SDWORD a, SDWORD b) \ { \ @@ -89,20 +81,6 @@ __forceinline DWORD UMulScale16(DWORD a, DWORD b) __asm shrd eax,edx,16 } -__forceinline SDWORD DMulScale (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD s) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,eax - __asm mov eax,c - __asm mov esi,edx - __asm mov ecx,s - __asm imul d - __asm add eax,ebx - __asm adc edx,esi - __asm shrd eax,edx,cl -} - #define MAKECONSTDMulScale(s) \ __forceinline SDWORD DMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d) \ { \ @@ -163,115 +141,6 @@ __forceinline SDWORD DMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) __asm mov eax,edx } -#define MAKECONSTTMulScale(s) \ - __forceinline SDWORD TMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) \ - { \ - __asm mov eax,a \ - __asm imul b \ - __asm mov ebx,eax \ - __asm mov eax,d \ - __asm mov ecx,edx \ - __asm imul c \ - __asm add ebx,eax \ - __asm mov eax,e \ - __asm adc ecx,edx \ - __asm imul f \ - __asm add eax,ebx \ - __asm adc edx,ecx \ - __asm shrd eax,edx,s \ - } - -MAKECONSTTMulScale(1) -MAKECONSTTMulScale(2) -MAKECONSTTMulScale(3) -MAKECONSTTMulScale(4) -MAKECONSTTMulScale(5) -MAKECONSTTMulScale(6) -MAKECONSTTMulScale(7) -MAKECONSTTMulScale(8) -MAKECONSTTMulScale(9) -MAKECONSTTMulScale(10) -MAKECONSTTMulScale(11) -MAKECONSTTMulScale(12) -MAKECONSTTMulScale(13) -MAKECONSTTMulScale(14) -MAKECONSTTMulScale(15) -MAKECONSTTMulScale(16) -MAKECONSTTMulScale(17) -MAKECONSTTMulScale(18) -MAKECONSTTMulScale(19) -MAKECONSTTMulScale(20) -MAKECONSTTMulScale(21) -MAKECONSTTMulScale(22) -MAKECONSTTMulScale(23) -MAKECONSTTMulScale(24) -MAKECONSTTMulScale(25) -MAKECONSTTMulScale(26) -MAKECONSTTMulScale(27) -MAKECONSTTMulScale(28) -MAKECONSTTMulScale(29) -MAKECONSTTMulScale(30) -MAKECONSTTMulScale(31) -#undef MAKECONSTTMulScale - -__forceinline SDWORD TMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,eax - __asm mov eax,c - __asm mov ecx,edx - __asm imul d - __asm add ebx,eax - __asm mov eax,e - __asm adc ecx,edx - __asm imul f - __asm add eax,ebx - __asm adc edx,ecx - __asm mov eax,edx -} - -__forceinline SDWORD BoundMulScale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,edx - __asm mov ecx,c - __asm shrd eax,edx,cl - __asm sar edx,cl - __asm xor edx,eax - __asm js checkit - __asm xor edx,eax - __asm jz skipboundit - __asm cmp edx,0xffffffff - __asm je skipboundit -checkit: - __asm mov eax,ebx - __asm sar eax,31 - __asm xor eax,0x7fffffff -skipboundit: - ; -} - -__forceinline SDWORD DivScale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm mov ecx,c - __asm shl eax,cl - __asm mov edx,a - __asm neg cl - __asm sar edx,cl - __asm idiv b -} - -__forceinline SDWORD DivScale1 (SDWORD a, SDWORD b) -{ - __asm mov eax,a - __asm add eax,eax - __asm sbb edx,edx - __asm idiv b -} - #define MAKECONSTDivScale(s) \ __forceinline SDWORD DivScale##s (SDWORD a, SDWORD b) \ { \ @@ -314,11 +183,4 @@ MAKECONSTDivScale(30) MAKECONSTDivScale(31) #undef MAKECONSTDivScale -__forceinline SDWORD DivScale32 (SDWORD a, SDWORD b) -{ - __asm mov edx,a - __asm xor eax,eax - __asm idiv b -} - #pragma warning (default: 4035) From 15cf9d36a999fbb18e5ffc0ad1ec38c308a56b08 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 11:49:18 +0100 Subject: [PATCH 473/912] - moved wallscan code into its own file so that it can get an appropriate copyright notice and does not inflate an already large source file even more. (cherry picked from commit 34d551fe1f3653cbb9447f9169d20f5255a69b9a) # Conflicts: # src/r_segs.cpp --- src/CMakeLists.txt | 1 + src/r_draw.h | 2 - src/r_segs.cpp | 771 +++------------------------------------------ src/r_wallscan.cpp | 583 ++++++++++++++++++++++++++++++++++ 4 files changed, 621 insertions(+), 736 deletions(-) create mode 100644 src/r_wallscan.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a3c63bb0b6..5f04effb13 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -828,6 +828,7 @@ set( FASTMATH_PCH_SOURCES r_segs.cpp r_sky.cpp r_things.cpp + r_wallscan.cpp s_advsound.cpp s_environment.cpp s_playlist.cpp diff --git a/src/r_draw.h b/src/r_draw.h index 8c1af58fcb..e412146407 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -134,8 +134,6 @@ namespace swrenderer const uint8_t *R_GetColumn(FTexture *tex, int col); void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); - void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); - void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); void rt_initcols(uint8_t *buffer = nullptr); void rt_span_coverage(int x, int start, int stop); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index c24e7f7190..1880273f90 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -17,6 +17,12 @@ // DESCRIPTION: // All the clipping: columns, horizontal spans, sky columns. // +// This file contains some code from the Build Engine. +// +// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman +// Ken Silverman's official web site: "http://www.advsys.net/ken" +// See the included license file "BUILDLIC.TXT" for license info. +// //----------------------------------------------------------------------------- #include @@ -44,22 +50,24 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" -#include "r_draw.h" #include "v_palette.h" #include "r_data/colormaps.h" #define WALLYREPEAT 8 -CVAR(Bool, r_np2, true, 0) + CVAR(Bool, r_fogboundary, true, 0) CVAR(Bool, r_drawmirrors, true, 0) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); -EXTERN_CVAR(Bool, r_mipmap) -namespace swrenderer +namespace swrenderer { using namespace drawerargs; + void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask); + void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask); + void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); + #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) @@ -135,9 +143,6 @@ static fixed_t *maskedtexturecol; static void R_RenderDecal (side_t *wall, DBaseDecal *first, drawseg_t *clipper, int pass); static void WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)); -void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask); -static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); -static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask); inline bool IsFogBoundary (sector_t *front, sector_t *back) { @@ -158,7 +163,7 @@ static void BlastMaskedColumn (FTexture *tex, bool useRt) // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - R_SetColorMapLight(basecolormap, rw_light, wallshade); + dc_colormap = basecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -176,7 +181,9 @@ static void BlastMaskedColumn (FTexture *tex, bool useRt) // when forming multipatched textures (see r_data.c). // draw the texture - R_DrawMaskedColumn(tex, maskedtexturecol[dc_x], useRt); + const FTexture::Span *spans; + const BYTE *pixels = tex->GetColumn (maskedtexturecol[dc_x] >> FRACBITS, &spans); + R_DrawMaskedColumn(pixels, spans, useRt); rw_light += rw_lightstep; spryscale += rw_scalestep; } @@ -292,9 +299,9 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + dc_colormap = (r_fullbrightignoresectorcolor) ? (FullNormalLight.Maps + fixedlightlev) : (basecolormap->Maps + fixedlightlev); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + dc_colormap = fixedcolormap; // find positioning texheight = tex->GetScaledHeightDouble(); @@ -440,7 +447,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) while (dc_x < stop) { - rt_initcols(nullptr); + rt_initcols(); BlastMaskedColumn (tex, true); dc_x++; BlastMaskedColumn (tex, true); dc_x++; BlastMaskedColumn (tex, true); dc_x++; @@ -531,7 +538,7 @@ clearfog: } else { - fillshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); + clearbufshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); } } return; @@ -609,9 +616,9 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) } if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + dc_colormap = (r_fullbrightignoresectorcolor) ? (FullNormalLight.Maps + fixedlightlev) : (basecolormap->Maps + fixedlightlev); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + dc_colormap = fixedcolormap; WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -1044,710 +1051,6 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } - -struct WallscanSampler -{ - WallscanSampler() { } - WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); - - uint32_t uv_pos; - uint32_t uv_step; - uint32_t uv_max; - - const BYTE *source; - const BYTE *source2; - uint32_t texturefracx; - uint32_t height; -}; - -WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) -{ - xoffset += FLOAT2FIXED(xmagnitude * 0.5); - - if (!r_swtruecolor) - { - height = texture->GetHeight(); - - int uv_fracbits = 32 - texture->HeightBits; - if (uv_fracbits != 32) - { - uv_max = height << uv_fracbits; - - // Find start uv in [0-base_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; - v = v - floor(v); - v *= height; - v *= (1 << uv_fracbits); - - uv_pos = (uint32_t)v; - uv_step = xs_ToFixed(uv_fracbits, uv_stepd); - if (uv_step == 0) // To prevent divide by zero elsewhere - uv_step = 1; - } - else - { // Hack for one pixel tall textures - uv_pos = 0; - uv_step = 0; - uv_max = 1; - } - - source = getcol(texture, xoffset >> FRACBITS); - source2 = nullptr; - texturefracx = 0; - } - else - { - // Normalize to 0-1 range: - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); - v = v - floor(v); - double v_step = uv_stepd / texture->GetHeight(); - - if (isnan(v) || isnan(v_step)) // this should never happen, but it apparently does.. - { - uv_stepd = 0.0; - v = 0.0; - v_step = 0.0; - } - - // Convert to uint32: - uv_pos = (uint32_t)(v * 0x100000000LL); - uv_step = (uint32_t)(v_step * 0x100000000LL); - uv_max = 0; - - // Texture mipmap and filter selection: - if (getcol != R_GetColumn) - { - source = getcol(texture, xoffset >> FRACBITS); - source2 = nullptr; - height = texture->GetHeight(); - texturefracx = 0; - } - else - { - double ymagnitude = fabs(uv_stepd); - double magnitude = MAX(ymagnitude, xmagnitude); - double min_lod = -1000.0; - double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); - bool magnifying = lod < 0.0f; - - int mipmap_offset = 0; - int mip_width = texture->GetWidth(); - int mip_height = texture->GetHeight(); - if (r_mipmap && texture->Mipmapped() && mip_width > 1 && mip_height > 1) - { - uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); - - int level = (int)lod; - while (level > 0 && mip_width > 1 && mip_height > 1) - { - mipmap_offset += mip_width * mip_height; - level--; - mip_width = MAX(mip_width >> 1, 1); - mip_height = MAX(mip_height >> 1, 1); - } - xoffset = (xpos >> FRACBITS) * mip_width; - } - - const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; - - bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); - if (filter_nearest) - { - int tx = (xoffset >> FRACBITS) % mip_width; - if (tx < 0) - tx += mip_width; - source = (BYTE*)(pixels + tx * mip_height); - source2 = nullptr; - height = mip_height; - texturefracx = 0; - } - else - { - xoffset -= FRACUNIT / 2; - int tx0 = (xoffset >> FRACBITS) % mip_width; - if (tx0 < 0) - tx0 += mip_width; - int tx1 = (tx0 + 1) % mip_width; - source = (BYTE*)(pixels + tx0 * mip_height); - source2 = (BYTE*)(pixels + tx1 * mip_height); - height = mip_height; - texturefracx = (xoffset >> (FRACBITS - 4)) & 15; - } - } - } -} - -// Draw a column with support for non-power-of-two ranges -void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) -{ - if (r_swtruecolor) - { - int count = y2 - y1; - - dc_source = sampler.source; - dc_source2 = sampler.source2; - dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = sampler.uv_pos; - dc_textureheight = sampler.height; - draw1column(); - - uint64_t step64 = sampler.uv_step; - uint64_t pos64 = sampler.uv_pos; - sampler.uv_pos = (uint32_t)(pos64 + step64 * count); - } - else - { - if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two - { - int count = y2 - y1; - - dc_source = sampler.source; - dc_source2 = sampler.source2; - dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = sampler.uv_pos; - draw1column(); - - uint64_t step64 = sampler.uv_step; - uint64_t pos64 = sampler.uv_pos; - sampler.uv_pos = (uint32_t)(pos64 + step64 * count); - } - else - { - uint32_t uv_pos = sampler.uv_pos; - - uint32_t left = y2 - y1; - while (left > 0) - { - uint32_t available = sampler.uv_max - uv_pos; - uint32_t next_uv_wrap = available / sampler.uv_step; - if (available % sampler.uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); - - dc_source = sampler.source; - dc_source2 = sampler.source2; - dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = uv_pos; - draw1column(); - - left -= count; - uv_pos += sampler.uv_step * count; - if (uv_pos >= sampler.uv_max) - uv_pos -= sampler.uv_max; - } - - sampler.uv_pos = uv_pos; - } - } -} - -// Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) -{ - if (r_swtruecolor) - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - bufplce[i] = sampler[i].source; - bufplce2[i] = sampler[i].source2; - buftexturefracx[i] = sampler[i].texturefracx; - bufheight[i] = sampler[i].height; - vplce[i] = sampler[i].uv_pos; - vince[i] = sampler[i].uv_step; - - uint64_t step64 = sampler[i].uv_step; - uint64_t pos64 = sampler[i].uv_pos; - sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - bufplce[i] = sampler[i].source; - bufplce2[i] = sampler[i].source2; - buftexturefracx[i] = sampler[i].texturefracx; - vplce[i] = sampler[i].uv_pos; - vince[i] = sampler[i].uv_step; - - uint64_t step64 = sampler[i].uv_step; - uint64_t pos64 = sampler[i].uv_pos; - sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - dc_dest = (ylookup[y1] + x) + dc_destorg; - for (int i = 0; i < 4; i++) - { - bufplce[i] = sampler[i].source; - bufplce2[i] = sampler[i].source2; - buftexturefracx[i] = sampler[i].texturefracx; - } - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; - uint32_t next_uv_wrap = available / sampler[i].uv_step; - if (available % sampler[i].uv_step != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps - for (int i = 0; i < 4; i++) - { - vplce[i] = sampler[i].uv_pos; - vince[i] = sampler[i].uv_step; - } - dc_count = count; - draw4columns(); - - // Wrap the uv position - for (int i = 0; i < 4; i++) - { - sampler[i].uv_pos += sampler[i].uv_step * count; - if (sampler[i].uv_pos >= sampler[i].uv_max) - sampler[i].uv_pos -= sampler[i].uv_max; - } - - left -= count; - } - } - } -} - -typedef DWORD(*Draw1ColumnFuncPtr)(); -typedef void(*Draw4ColumnsFuncPtr)(); - -void wallscan_any( - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, - const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) -{ - if (rw_pic->UseType == FTexture::TEX_Null) - return; - - fixed_t xoffset = rw_offset; - - rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set - int fracbits = 32 - rw_pic->HeightBits; - if (fracbits == 32) - { // Hack for one pixel tall textures - fracbits = 0; - yrepeat = 0; - dc_texturemid = 0; - } - - DWORD(*draw1column)(); - void(*draw4columns)(); - setupwallscan(r_swtruecolor ? FRACBITS : fracbits, draw1column, draw4columns); - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - palookuplight[0] = 0; - palookuplight[1] = 0; - palookuplight[2] = 0; - palookuplight[3] = 0; - } - - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); - else - R_SetColorMapLight(basecolormap, 0, 0); - - float light = rw_light; - - // Calculate where 4 column alignment begins and ends: - int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); - int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - - double xmagnitude = 1.0; - - // First unaligned columns: - for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); - - WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, sampler, draw1column); - } - - // The aligned columns - for (int x = aligned_x1; x < aligned_x2; x += 4) - { - // Find y1, y2, light and uv values for four columns: - int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; - int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - - float lights[4]; - for (int i = 0; i < 4; i++) - { - lights[i] = light; - light += rw_lightstep; - } - - WallscanSampler sampler[4]; - for (int i = 0; i < 4; i++) - { - if (x + i + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + i + 1]) - FIXED2DBL(lwal[x + i])); - sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, xmagnitude, rw_pic, getcol); - } - - // Figure out where we vertically can start and stop drawing 4 columns in one go - int middle_y1 = y1[0]; - int middle_y2 = y2[0]; - for (int i = 1; i < 4; i++) - { - middle_y1 = MAX(y1[i], middle_y1); - middle_y2 = MIN(y2[i], middle_y2); - } - - // If we got an empty column in our set we cannot draw 4 columns in one go: - bool empty_column_in_set = false; - int bilinear_count = 0; - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - empty_column_in_set = true; - if (sampler[i].source2) - bilinear_count++; - } - - if (empty_column_in_set || middle_y2 <= middle_y1 || (bilinear_count > 0 && bilinear_count < 4)) - { - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - wallscan_drawcol1(x + i, y1[i], y2[i], sampler[i], draw1column); - } - continue; - } - - // Draw the first rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (y1[i] < middle_y1) - wallscan_drawcol1(x + i, y1[i], middle_y1, sampler[i], draw1column); - } - - // Draw the area where all 4 columns are active - if (!fixed) - { - for (int i = 0; i < 4; i++) - { - if (r_swtruecolor) - { - palookupoffse[i] = basecolormap->Maps; - palookuplight[i] = LIGHTSCALE(lights[i], wallshade); - } - else - { - palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - palookuplight[i] = 0; - } - } - } - wallscan_drawcol4(x, middle_y1, middle_y2, sampler, draw4columns); - - // Draw the last rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (middle_y2 < y2[i]) - wallscan_drawcol1(x + i, middle_y2, y2[i], sampler[i], draw1column); - } - } - - // The last unaligned columns: - for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); - - WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, sampler, draw1column); - } - - NetUpdate (); -} - -void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupvline(bits); - line1 = dovline1; - line4 = dovline4; - }); -} - -void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. - { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupmvline(bits); - line1 = domvline1; - line4 = domvline4; - }); - } -} - -void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - static fixed_t(*tmvline1)(); - static void(*tmvline4)(); - if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) - { - // The current translucency is unsupported, so draw with regular maskwallscan instead. - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setuptmvline(bits); - line1 = reinterpret_cast(tmvline1); - line4 = tmvline4; - }); - } -} - -void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) -{ - FDynamicColormap *startcolormap = basecolormap; - int startshade = wallshade; - bool fogginess = foggy; - - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - - up = uwal; - down = most1; - - assert(WallC.sx1 <= x1); - assert(WallC.sx2 >= x2); - - // kg3D - fake floors instead of zdoom light list - for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) - { - int j = WallMost (most3, frontsector->e->XFloor.lightlist[i].plane, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp (most3[j], up[j], dwal[j]); - } - wallscan (x1, x2, up, down, swal, lwal, yrepeat); - up = down; - down = (down == most1) ? most2 : most1; - } - - lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, - *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); - } - - wallscan (x1, x2, up, dwal, swal, lwal, yrepeat); - basecolormap = startcolormap; - wallshade = startshade; -} - -static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask) -{ - if (mask) - { - if (colfunc == basecolfunc) - { - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - else - { - transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - } - else - { - if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) - { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - else - { - wallscan_striped(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - } -} - -//============================================================================= -// -// wallscan_np2 -// -// This is a wrapper around wallscan that helps it tile textures whose heights -// are not powers of 2. It divides the wall into texture-sized strips and calls -// wallscan for each of those. Since only one repetition of the texture fits -// in each strip, wallscan will not tile. -// -//============================================================================= - -void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) -{ - if (!r_np2) - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); - } - else - { - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - double texheight = rw_pic->GetHeight(); - double partition; - double scaledtexheight = texheight / yrepeat; - - if (yrepeat >= 0) - { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - if (partition == top) - { - partition -= scaledtexheight; - } - up = uwal; - down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition > bot) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp(most3[j], up[j], dwal[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - up = down; - down = (down == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); - } - else - { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - up = most1; - down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition < top) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 12) - { - for (int j = x1; j < x2; ++j) - { - up[j] = clamp(most3[j], uwal[j], down[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - down = up; - up = (up == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); - } - } -} - -static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) -{ - if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) - { - double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); - double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); - double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); - double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); - double top = MAX(frontcz1, frontcz2); - double bot = MIN(frontfz1, frontfz2); - if (fake3D & FAKE3D_CLIPTOP) - { - top = MIN(top, sclipTop); - } - if (fake3D & FAKE3D_CLIPBOTTOM) - { - bot = MAX(bot, sclipBottom); - } - wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); - } - else - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); - } -} - // // R_RenderSegLoop // Draws zero, one, or two textures for walls. @@ -1768,9 +1071,9 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + dc_colormap = (r_fullbrightignoresectorcolor) ? (FullNormalLight.Maps + fixedlightlev) : (basecolormap->Maps + fixedlightlev); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + dc_colormap = fixedcolormap; // clip wall to the floor and ceiling for (x = x1; x < x2; ++x) @@ -1881,8 +1184,8 @@ void R_RenderSegLoop () call_wallscan(x1, x2, walltop, wallbottom, swall, lwall, yscale, false); } } - fillshort (ceilingclip+x1, x2-x1, viewheight); - fillshort (floorclip+x1, x2-x1, 0xffff); + clearbufshort (ceilingclip+x1, x2-x1, viewheight); + clearbufshort (floorclip+x1, x2-x1, 0xffff); } else { // two sided line @@ -2405,7 +1708,7 @@ void R_StoreWallRange (int start, int stop) { ds_p->sprtopclip = R_NewOpening (stop - start); ds_p->sprbottomclip = R_NewOpening (stop - start); - fillshort (openings + ds_p->sprtopclip, stop-start, viewheight); + clearbufshort (openings + ds_p->sprtopclip, stop-start, viewheight); memset (openings + ds_p->sprbottomclip, -1, (stop-start)*sizeof(short)); ds_p->silhouette = SIL_BOTH; } @@ -2445,7 +1748,7 @@ void R_StoreWallRange (int start, int stop) if (doorclosed || (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) { // killough 1/17/98, 2/8/98 ds_p->sprtopclip = R_NewOpening (stop - start); - fillshort (openings + ds_p->sprtopclip, stop - start, viewheight); + clearbufshort (openings + ds_p->sprtopclip, stop - start, viewheight); ds_p->silhouette |= SIL_TOP; } } @@ -2666,7 +1969,7 @@ int WallMostAny(short *mostbuf, double z1, double z2, const FWallCoords *wallc) } else if (y1 > viewheight && y2 > viewheight) // entire line is below screen { - fillshort(&mostbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); + clearbufshort(&mostbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); return 12; } @@ -3037,11 +2340,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - savecoord.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + dc_colormap = (r_fullbrightignoresectorcolor) ? (FullNormalLight.Maps + fixedlightlev) : (usecolormap->Maps + fixedlightlev); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + dc_colormap = fixedcolormap; else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); + dc_colormap = (r_fullbrightignoresectorcolor) ? FullNormalLight.Maps : usecolormap->Maps; else calclighting = true; @@ -3092,7 +2395,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); } R_WallSpriteColumn (false); dc_x++; @@ -3102,9 +2405,9 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); } - rt_initcols(nullptr); + rt_initcols(); for (int zz = 4; zz; --zz) { R_WallSpriteColumn (true); @@ -3117,7 +2420,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); } R_WallSpriteColumn (false); dc_x++; @@ -3141,4 +2444,4 @@ done: WallC = savecoord; } -} +} \ No newline at end of file diff --git a/src/r_wallscan.cpp b/src/r_wallscan.cpp new file mode 100644 index 0000000000..c5bcb1d26c --- /dev/null +++ b/src/r_wallscan.cpp @@ -0,0 +1,583 @@ +/* +** Replacement for Build's wallscan free of any Build licensing issues. +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include + +#include "doomdef.h" +#include "doomstat.h" +#include "doomdata.h" + +#include "r_local.h" +#include "r_sky.h" +#include "v_video.h" + +#include "m_swap.h" +#include "a_sharedglobal.h" +#include "d_net.h" +#include "g_level.h" +#include "r_draw.h" +#include "r_bsp.h" +#include "r_plane.h" +#include "r_segs.h" +#include "r_3dfloors.h" +#include "v_palette.h" +#include "r_data/colormaps.h" + +namespace swrenderer +{ + using namespace drawerargs; + + extern FTexture *rw_pic; + extern int wallshade; + +struct WallscanSampler +{ + WallscanSampler() { } + WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + + uint32_t uv_pos; + uint32_t uv_step; + uint32_t uv_max; + + const BYTE *source; + uint32_t height; +}; + +WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) +{ + height = texture->GetHeight(); + + int uv_fracbits = 32 - texture->HeightBits; + if (uv_fracbits != 32) + { + uv_max = height << uv_fracbits; + + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + v = v - floor(v); + v *= height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + if (uv_step == 0) // To prevent divide by zero elsewhere + uv_step = 1; + } + else + { // Hack for one pixel tall textures + uv_pos = 0; + uv_step = 0; + uv_max = 1; + } + + source = getcol(texture, xoffset >> FRACBITS); +} + +// Draw a column with support for non-power-of-two ranges +void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) +{ + if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two + { + int count = y2 - y1; + + dc_source = sampler.source; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = sampler.uv_pos; + draw1column(); + + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); + } + else + { + uint32_t uv_pos = sampler.uv_pos; + + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = sampler.source; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = uv_pos; + draw1column(); + + left -= count; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; + } + + sampler.uv_pos = uv_pos; + } +} + +// Draw four columns with support for non-power-of-two ranges +void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) +{ + if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed + { + int count = y2 - y1; + for (int i = 0; i < 4; i++) + { + bufplce[i] = sampler[i].source; + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); + } + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + draw4columns(); + } + else + { + dc_dest = (ylookup[y1] + x) + dc_destorg; + for (int i = 0; i < 4; i++) + { + bufplce[i] = sampler[i].source; + } + + uint32_t left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + uint32_t count = left; + for (int i = 0; i < 4; i++) + { + uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; + uint32_t next_uv_wrap = available / sampler[i].uv_step; + if (available % sampler[i].uv_step != 0) + next_uv_wrap++; + count = MIN(next_uv_wrap, count); + } + + // Draw until that column wraps + for (int i = 0; i < 4; i++) + { + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + } + dc_count = count; + draw4columns(); + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + sampler[i].uv_pos += sampler[i].uv_step * count; + if (sampler[i].uv_pos >= sampler[i].uv_max) + sampler[i].uv_pos -= sampler[i].uv_max; + } + + left -= count; + } + } +} + +typedef DWORD(*Draw1ColumnFuncPtr)(); +typedef void(*Draw4ColumnsFuncPtr)(); + +void wallscan_any( + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, + const BYTE *(*getcol)(FTexture *tex, int x), + void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) +{ + if (rw_pic->UseType == FTexture::TEX_Null) + return; + + fixed_t xoffset = rw_offset; + + rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set + int fracbits = 32 - rw_pic->HeightBits; + if (fracbits == 32) + { // Hack for one pixel tall textures + fracbits = 0; + yrepeat = 0; + dc_texturemid = 0; + } + + DWORD(*draw1column)(); + void(*draw4columns)(); + setupwallscan(fracbits, draw1column, draw4columns); + + bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); + if (fixed) + { + palookupoffse[0] = dc_colormap; + palookupoffse[1] = dc_colormap; + palookupoffse[2] = dc_colormap; + palookupoffse[3] = dc_colormap; + } + + if (fixedcolormap) + dc_colormap = fixedcolormap; + else + dc_colormap = basecolormap->Maps; + + float light = rw_light; + + // Calculate where 4 column alignment begins and ends: + int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); + int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + + // First unaligned columns: + for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); + } + + // The aligned columns + for (int x = aligned_x1; x < aligned_x2; x += 4) + { + // Find y1, y2, light and uv values for four columns: + int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; + int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; + + float lights[4]; + for (int i = 0; i < 4; i++) + { + lights[i] = light; + light += rw_lightstep; + } + + WallscanSampler sampler[4]; + for (int i = 0; i < 4; i++) + sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, rw_pic, getcol); + + // Figure out where we vertically can start and stop drawing 4 columns in one go + int middle_y1 = y1[0]; + int middle_y2 = y2[0]; + for (int i = 1; i < 4; i++) + { + middle_y1 = MAX(y1[i], middle_y1); + middle_y2 = MIN(y2[i], middle_y2); + } + + // If we got an empty column in our set we cannot draw 4 columns in one go: + bool empty_column_in_set = false; + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + empty_column_in_set = true; + } + + if (empty_column_in_set || middle_y2 <= middle_y1) + { + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + continue; + + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + wallscan_drawcol1(x + i, y1[i], y2[i], sampler[i], draw1column); + } + continue; + } + + // Draw the first rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + + if (y1[i] < middle_y1) + wallscan_drawcol1(x + i, y1[i], middle_y1, sampler[i], draw1column); + } + + // Draw the area where all 4 columns are active + if (!fixed) + { + for (int i = 0; i < 4; i++) + { + palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + } + } + wallscan_drawcol4(x, middle_y1, middle_y2, sampler, draw4columns); + + // Draw the last rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + + if (middle_y2 < y2[i]) + wallscan_drawcol1(x + i, middle_y2, y2[i], sampler[i], draw1column); + } + } + + // The last unaligned columns: + for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); + } + + NetUpdate(); +} + +void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupvline(bits); + line1 = dovline1; + line4 = dovline4; + }); +} + +void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) +{ + if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupmvline(bits); + line1 = domvline1; + line4 = domvline4; + }); + } +} + +void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) +{ + static fixed_t(*tmvline1)(); + static void(*tmvline4)(); + if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) + { + // The current translucency is unsupported, so draw with regular maskwallscan instead. + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setuptmvline(bits); + line1 = reinterpret_cast(tmvline1); + line4 = tmvline4; + }); + } +} + +void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) +{ + FDynamicColormap *startcolormap = basecolormap; + int startshade = wallshade; + bool fogginess = foggy; + + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + + up = uwal; + down = most1; + + assert(WallC.sx1 <= x1); + assert(WallC.sx2 >= x2); + + // kg3D - fake floors instead of zdoom light list + for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) + { + int j = WallMost (most3, frontsector->e->XFloor.lightlist[i].plane, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp (most3[j], up[j], dwal[j]); + } + wallscan (x1, x2, up, down, swal, lwal, yrepeat); + up = down; + down = (down == most1) ? most2 : most1; + } + + lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, + *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); + } + + wallscan (x1, x2, up, dwal, swal, lwal, yrepeat); + basecolormap = startcolormap; + wallshade = startshade; +} + +void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask) +{ + if (mask) + { + if (colfunc == basecolfunc) + { + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + else + { + transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + } + else + { + if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + else + { + wallscan_striped(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + } +} + +//============================================================================= +// +// wallscan_np2 +// +// This is a wrapper around wallscan that helps it tile textures whose heights +// are not powers of 2. It divides the wall into texture-sized strips and calls +// wallscan for each of those. Since only one repetition of the texture fits +// in each strip, wallscan will not tile. +// +//============================================================================= + +void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) +{ + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + double texheight = rw_pic->GetHeight(); + double partition; + double scaledtexheight = texheight / yrepeat; + + if (yrepeat >= 0) + { // normal orientation: draw strips from top to bottom + partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + if (partition == top) + { + partition -= scaledtexheight; + } + up = uwal; + down = most1; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition > bot) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp(most3[j], up[j], dwal[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + up = down; + down = (down == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); + } + else + { // upside down: draw strips from bottom to top + partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + up = most1; + down = dwal; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition < top) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 12) + { + for (int j = x1; j < x2; ++j) + { + up[j] = clamp(most3[j], uwal[j], down[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + down = up; + up = (up == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); + } +} + +void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) +{ + if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) + { + double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); + double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); + double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); + double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); + double top = MAX(frontcz1, frontcz2); + double bot = MIN(frontfz1, frontfz2); + if (fake3D & FAKE3D_CLIPTOP) + { + top = MIN(top, sclipTop); + } + if (fake3D & FAKE3D_CLIPBOTTOM) + { + bot = MAX(bot, sclipBottom); + } + wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); + } + else + { + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); + } +} + + +} \ No newline at end of file From 2b5f9eebba1f508556d4c4f8787399c1d1525e0f Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Fri, 9 Dec 2016 08:41:59 -0500 Subject: [PATCH 474/912] - Correctly redid commit 34d551fe1f3653cbb9447f9169d20f5255a69b9a. --- src/r_wallscan.cpp | 454 +++++++++++++++++++++++++++++++-------------- 1 file changed, 314 insertions(+), 140 deletions(-) diff --git a/src/r_wallscan.cpp b/src/r_wallscan.cpp index c5bcb1d26c..8c5f04d54a 100644 --- a/src/r_wallscan.cpp +++ b/src/r_wallscan.cpp @@ -50,63 +50,157 @@ namespace swrenderer extern FTexture *rw_pic; extern int wallshade; + struct WallscanSampler { WallscanSampler() { } - WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); uint32_t uv_pos; uint32_t uv_step; uint32_t uv_max; const BYTE *source; + const BYTE *source2; + uint32_t texturefracx; uint32_t height; }; -WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) +WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) { - height = texture->GetHeight(); + xoffset += FLOAT2FIXED(xmagnitude * 0.5); - int uv_fracbits = 32 - texture->HeightBits; - if (uv_fracbits != 32) + if (!r_swtruecolor) { - uv_max = height << uv_fracbits; + height = texture->GetHeight(); - // Find start uv in [0-base_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; - v = v - floor(v); - v *= height; - v *= (1 << uv_fracbits); + int uv_fracbits = 32 - texture->HeightBits; + if (uv_fracbits != 32) + { + uv_max = height << uv_fracbits; - uv_pos = (uint32_t)v; - uv_step = xs_ToFixed(uv_fracbits, uv_stepd); - if (uv_step == 0) // To prevent divide by zero elsewhere - uv_step = 1; + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + v = v - floor(v); + v *= height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + if (uv_step == 0) // To prevent divide by zero elsewhere + uv_step = 1; + } + else + { // Hack for one pixel tall textures + uv_pos = 0; + uv_step = 0; + uv_max = 1; + } + + source = getcol(texture, xoffset >> FRACBITS); + source2 = nullptr; + texturefracx = 0; } else - { // Hack for one pixel tall textures - uv_pos = 0; - uv_step = 0; - uv_max = 1; - } + { + // Normalize to 0-1 range: + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); + v = v - floor(v); + double v_step = uv_stepd / texture->GetHeight(); - source = getcol(texture, xoffset >> FRACBITS); + if (isnan(v) || isnan(v_step)) // this should never happen, but it apparently does.. + { + uv_stepd = 0.0; + v = 0.0; + v_step = 0.0; + } + + // Convert to uint32: + uv_pos = (uint32_t)(v * 0x100000000LL); + uv_step = (uint32_t)(v_step * 0x100000000LL); + uv_max = 0; + + // Texture mipmap and filter selection: + if (getcol != R_GetColumn) + { + source = getcol(texture, xoffset >> FRACBITS); + source2 = nullptr; + height = texture->GetHeight(); + texturefracx = 0; + } + else + { + double ymagnitude = fabs(uv_stepd); + double magnitude = MAX(ymagnitude, xmagnitude); + double min_lod = -1000.0; + double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); + bool magnifying = lod < 0.0f; + + int mipmap_offset = 0; + int mip_width = texture->GetWidth(); + int mip_height = texture->GetHeight(); + if (r_mipmap && texture->Mipmapped() && mip_width > 1 && mip_height > 1) + { + uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); + + int level = (int)lod; + while (level > 0 && mip_width > 1 && mip_height > 1) + { + mipmap_offset += mip_width * mip_height; + level--; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + xoffset = (xpos >> FRACBITS) * mip_width; + } + + const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + + bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); + if (filter_nearest) + { + int tx = (xoffset >> FRACBITS) % mip_width; + if (tx < 0) + tx += mip_width; + source = (BYTE*)(pixels + tx * mip_height); + source2 = nullptr; + height = mip_height; + texturefracx = 0; + } + else + { + xoffset -= FRACUNIT / 2; + int tx0 = (xoffset >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + height = mip_height; + texturefracx = (xoffset >> (FRACBITS - 4)) & 15; + } + } + } } // Draw a column with support for non-power-of-two ranges void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) { - if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two + if (r_swtruecolor) { int count = y2 - y1; dc_source = sampler.source; - dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; dc_count = count; dc_iscale = sampler.uv_step; dc_texturefrac = sampler.uv_pos; + dc_textureheight = sampler.height; draw1column(); uint64_t step64 = sampler.uv_step; @@ -115,43 +209,68 @@ void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*d } else { - uint32_t uv_pos = sampler.uv_pos; - - uint32_t left = y2 - y1; - while (left > 0) + if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two { - uint32_t available = sampler.uv_max - uv_pos; - uint32_t next_uv_wrap = available / sampler.uv_step; - if (available % sampler.uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); + int count = y2 - y1; dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) + dc_destorg; dc_count = count; dc_iscale = sampler.uv_step; - dc_texturefrac = uv_pos; + dc_texturefrac = sampler.uv_pos; draw1column(); - left -= count; - uv_pos += sampler.uv_step * count; - if (uv_pos >= sampler.uv_max) - uv_pos -= sampler.uv_max; + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); } + else + { + uint32_t uv_pos = sampler.uv_pos; - sampler.uv_pos = uv_pos; + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = uv_pos; + draw1column(); + + left -= count; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; + } + + sampler.uv_pos = uv_pos; + } } } // Draw four columns with support for non-power-of-two ranges void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) { - if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed + if (r_swtruecolor) { int count = y2 - y1; for (int i = 0; i < 4; i++) { bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; + bufheight[i] = sampler[i].height; vplce[i] = sampler[i].uv_pos; vince[i] = sampler[i].uv_step; @@ -159,50 +278,74 @@ void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*dr uint64_t pos64 = sampler[i].uv_pos; sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } - dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; dc_count = count; draw4columns(); } else { - dc_dest = (ylookup[y1] + x) + dc_destorg; - for (int i = 0; i < 4; i++) + if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed { - bufplce[i] = sampler[i].source; - } - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; - uint32_t next_uv_wrap = available / sampler[i].uv_step; - if (available % sampler[i].uv_step != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps + int count = y2 - y1; for (int i = 0; i < 4; i++) { + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; vplce[i] = sampler[i].uv_pos; vince[i] = sampler[i].uv_step; + + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } + dc_dest = (ylookup[y1] + x) + dc_destorg; dc_count = count; draw4columns(); - - // Wrap the uv position + } + else + { + dc_dest = (ylookup[y1] + x) + dc_destorg; for (int i = 0; i < 4; i++) { - sampler[i].uv_pos += sampler[i].uv_step * count; - if (sampler[i].uv_pos >= sampler[i].uv_max) - sampler[i].uv_pos -= sampler[i].uv_max; + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; } - left -= count; + uint32_t left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + uint32_t count = left; + for (int i = 0; i < 4; i++) + { + uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; + uint32_t next_uv_wrap = available / sampler[i].uv_step; + if (available % sampler[i].uv_step != 0) + next_uv_wrap++; + count = MIN(next_uv_wrap, count); + } + + // Draw until that column wraps + for (int i = 0; i < 4; i++) + { + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + } + dc_count = count; + draw4columns(); + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + sampler[i].uv_pos += sampler[i].uv_step * count; + if (sampler[i].uv_pos >= sampler[i].uv_max) + sampler[i].uv_pos -= sampler[i].uv_max; + } + + left -= count; + } } } } @@ -231,7 +374,7 @@ void wallscan_any( DWORD(*draw1column)(); void(*draw4columns)(); - setupwallscan(fracbits, draw1column, draw4columns); + setupwallscan(r_swtruecolor ? FRACBITS : fracbits, draw1column, draw4columns); bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -240,12 +383,16 @@ void wallscan_any( palookupoffse[1] = dc_colormap; palookupoffse[2] = dc_colormap; palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; } if (fixedcolormap) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); else - dc_colormap = basecolormap->Maps; + R_SetColorMapLight(basecolormap, 0, 0); float light = rw_light; @@ -253,6 +400,8 @@ void wallscan_any( int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + double xmagnitude = 1.0; + // First unaligned columns: for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) { @@ -262,9 +411,11 @@ void wallscan_any( continue; if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(basecolormap, light, wallshade); - WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); + + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); wallscan_drawcol1(x, y1, y2, sampler, draw1column); } @@ -284,7 +435,10 @@ void wallscan_any( WallscanSampler sampler[4]; for (int i = 0; i < 4; i++) - sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, rw_pic, getcol); + { + if (x + i + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + i + 1]) - FIXED2DBL(lwal[x + i])); + sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, xmagnitude, rw_pic, getcol); + } // Figure out where we vertically can start and stop drawing 4 columns in one go int middle_y1 = y1[0]; @@ -297,13 +451,16 @@ void wallscan_any( // If we got an empty column in our set we cannot draw 4 columns in one go: bool empty_column_in_set = false; + int bilinear_count = 0; for (int i = 0; i < 4; i++) { if (y2[i] <= y1[i]) empty_column_in_set = true; + if (sampler[i].source2) + bilinear_count++; } - if (empty_column_in_set || middle_y2 <= middle_y1) + if (empty_column_in_set || middle_y2 <= middle_y1 || (bilinear_count > 0 && bilinear_count < 4)) { for (int i = 0; i < 4; i++) { @@ -311,7 +468,7 @@ void wallscan_any( continue; if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(basecolormap, lights[i], wallshade); wallscan_drawcol1(x + i, y1[i], y2[i], sampler[i], draw1column); } continue; @@ -321,7 +478,7 @@ void wallscan_any( for (int i = 0; i < 4; i++) { if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(basecolormap, lights[i], wallshade); if (y1[i] < middle_y1) wallscan_drawcol1(x + i, y1[i], middle_y1, sampler[i], draw1column); @@ -332,7 +489,16 @@ void wallscan_any( { for (int i = 0; i < 4; i++) { - palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + if (r_swtruecolor) + { + palookupoffse[i] = basecolormap->Maps; + palookuplight[i] = LIGHTSCALE(lights[i], wallshade); + } + else + { + palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + palookuplight[i] = 0; + } } } wallscan_drawcol4(x, middle_y1, middle_y2, sampler, draw4columns); @@ -341,7 +507,7 @@ void wallscan_any( for (int i = 0; i < 4; i++) { if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(basecolormap, lights[i], wallshade); if (middle_y2 < y2[i]) wallscan_drawcol1(x + i, middle_y2, y2[i], sampler[i], draw1column); @@ -357,13 +523,15 @@ void wallscan_any( continue; if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(basecolormap, light, wallshade); - WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); + + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); wallscan_drawcol1(x, y1, y2, sampler, draw1column); } - NetUpdate(); + NetUpdate (); } void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) @@ -376,7 +544,7 @@ void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lw }); } -void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) +void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. { @@ -393,7 +561,7 @@ void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t } } -void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) +void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { static fixed_t(*tmvline1)(); static void(*tmvline4)(); @@ -454,7 +622,7 @@ void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fi wallshade = startshade; } -void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask) +static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask) { if (mask) { @@ -493,67 +661,74 @@ void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) { - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - double texheight = rw_pic->GetHeight(); - double partition; - double scaledtexheight = texheight / yrepeat; - - if (yrepeat >= 0) - { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - if (partition == top) - { - partition -= scaledtexheight; - } - up = uwal; - down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition > bot) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp(most3[j], up[j], dwal[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - up = down; - down = (down == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); + if (!r_np2) + { + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); } else - { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - up = most1; - down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition < top) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 12) + { + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + double texheight = rw_pic->GetHeight(); + double partition; + double scaledtexheight = texheight / yrepeat; + + if (yrepeat >= 0) + { // normal orientation: draw strips from top to bottom + partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + if (partition == top) { - for (int j = x1; j < x2; ++j) - { - up[j] = clamp(most3[j], uwal[j], down[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - down = up; - up = (up == most1) ? most2 : most1; + partition -= scaledtexheight; } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); + up = uwal; + down = most1; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition > bot) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp(most3[j], up[j], dwal[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + up = down; + down = (down == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); + } + else + { // upside down: draw strips from bottom to top + partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + up = most1; + down = dwal; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition < top) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 12) + { + for (int j = x1; j < x2; ++j) + { + up[j] = clamp(most3[j], uwal[j], down[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + down = up; + up = (up == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); + } } } -void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) +static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) { @@ -579,5 +754,4 @@ void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, fl } } - } \ No newline at end of file From 56f37185b2c6ebad8945a5a6133da59167299870 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 12:21:27 +0100 Subject: [PATCH 475/912] - let's be a bit clearer and not name non-Build stuff with Build terms. (cherry picked from commit f3389ff5b716d7c913c4a5fa8ee27687de520db4) --- src/CMakeLists.txt | 2 +- src/{r_wallscan.cpp => r_walldraw.cpp} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename src/{r_wallscan.cpp => r_walldraw.cpp} (99%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5f04effb13..aa67b13d1a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -828,7 +828,7 @@ set( FASTMATH_PCH_SOURCES r_segs.cpp r_sky.cpp r_things.cpp - r_wallscan.cpp + r_walldraw.cpp s_advsound.cpp s_environment.cpp s_playlist.cpp diff --git a/src/r_wallscan.cpp b/src/r_walldraw.cpp similarity index 99% rename from src/r_wallscan.cpp rename to src/r_walldraw.cpp index 8c5f04d54a..de039115a3 100644 --- a/src/r_wallscan.cpp +++ b/src/r_walldraw.cpp @@ -1,5 +1,5 @@ /* -** Replacement for Build's wallscan free of any Build licensing issues. +** Wall drawing stuff free of Build pollution ** Copyright (c) 2016 Magnus Norddahl ** ** This software is provided 'as-is', without any express or implied From d1379e26f5049d81267a3568bcabf15aae1f548b Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Fri, 9 Dec 2016 09:14:01 -0500 Subject: [PATCH 476/912] Revert "- moved wallscan code into its own file so that it can get an appropriate copyright notice and does not inflate an already large source file even more." This reverts commit 15cf9d36a999fbb18e5ffc0ad1ec38c308a56b08. --- src/CMakeLists.txt | 1 - src/r_draw.h | 2 + src/r_segs.cpp | 771 ++++++++++++++++++++++++++++++++++++++++++--- src/r_walldraw.cpp | 757 -------------------------------------------- 4 files changed, 736 insertions(+), 795 deletions(-) delete mode 100644 src/r_walldraw.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index aa67b13d1a..a3c63bb0b6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -828,7 +828,6 @@ set( FASTMATH_PCH_SOURCES r_segs.cpp r_sky.cpp r_things.cpp - r_walldraw.cpp s_advsound.cpp s_environment.cpp s_playlist.cpp diff --git a/src/r_draw.h b/src/r_draw.h index e412146407..8c1af58fcb 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -134,6 +134,8 @@ namespace swrenderer const uint8_t *R_GetColumn(FTexture *tex, int col); void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); void rt_initcols(uint8_t *buffer = nullptr); void rt_span_coverage(int x, int start, int stop); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 1880273f90..c24e7f7190 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -17,12 +17,6 @@ // DESCRIPTION: // All the clipping: columns, horizontal spans, sky columns. // -// This file contains some code from the Build Engine. -// -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// //----------------------------------------------------------------------------- #include @@ -50,24 +44,22 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" +#include "r_draw.h" #include "v_palette.h" #include "r_data/colormaps.h" #define WALLYREPEAT 8 - +CVAR(Bool, r_np2, true, 0) CVAR(Bool, r_fogboundary, true, 0) CVAR(Bool, r_drawmirrors, true, 0) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); +EXTERN_CVAR(Bool, r_mipmap) -namespace swrenderer +namespace swrenderer { using namespace drawerargs; - void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask); - void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask); - void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); - #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) @@ -143,6 +135,9 @@ static fixed_t *maskedtexturecol; static void R_RenderDecal (side_t *wall, DBaseDecal *first, drawseg_t *clipper, int pass); static void WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)); +void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask); +static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); +static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask); inline bool IsFogBoundary (sector_t *front, sector_t *back) { @@ -163,7 +158,7 @@ static void BlastMaskedColumn (FTexture *tex, bool useRt) // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - dc_colormap = basecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(basecolormap, rw_light, wallshade); } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -181,9 +176,7 @@ static void BlastMaskedColumn (FTexture *tex, bool useRt) // when forming multipatched textures (see r_data.c). // draw the texture - const FTexture::Span *spans; - const BYTE *pixels = tex->GetColumn (maskedtexturecol[dc_x] >> FRACBITS, &spans); - R_DrawMaskedColumn(pixels, spans, useRt); + R_DrawMaskedColumn(tex, maskedtexturecol[dc_x], useRt); rw_light += rw_lightstep; spryscale += rw_scalestep; } @@ -299,9 +292,9 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - dc_colormap = (r_fullbrightignoresectorcolor) ? (FullNormalLight.Maps + fixedlightlev) : (basecolormap->Maps + fixedlightlev); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); // find positioning texheight = tex->GetScaledHeightDouble(); @@ -447,7 +440,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) while (dc_x < stop) { - rt_initcols(); + rt_initcols(nullptr); BlastMaskedColumn (tex, true); dc_x++; BlastMaskedColumn (tex, true); dc_x++; BlastMaskedColumn (tex, true); dc_x++; @@ -538,7 +531,7 @@ clearfog: } else { - clearbufshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); + fillshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); } } return; @@ -616,9 +609,9 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) } if (fixedlightlev >= 0) - dc_colormap = (r_fullbrightignoresectorcolor) ? (FullNormalLight.Maps + fixedlightlev) : (basecolormap->Maps + fixedlightlev); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -1051,6 +1044,710 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } + +struct WallscanSampler +{ + WallscanSampler() { } + WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + + uint32_t uv_pos; + uint32_t uv_step; + uint32_t uv_max; + + const BYTE *source; + const BYTE *source2; + uint32_t texturefracx; + uint32_t height; +}; + +WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) +{ + xoffset += FLOAT2FIXED(xmagnitude * 0.5); + + if (!r_swtruecolor) + { + height = texture->GetHeight(); + + int uv_fracbits = 32 - texture->HeightBits; + if (uv_fracbits != 32) + { + uv_max = height << uv_fracbits; + + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + v = v - floor(v); + v *= height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + if (uv_step == 0) // To prevent divide by zero elsewhere + uv_step = 1; + } + else + { // Hack for one pixel tall textures + uv_pos = 0; + uv_step = 0; + uv_max = 1; + } + + source = getcol(texture, xoffset >> FRACBITS); + source2 = nullptr; + texturefracx = 0; + } + else + { + // Normalize to 0-1 range: + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); + v = v - floor(v); + double v_step = uv_stepd / texture->GetHeight(); + + if (isnan(v) || isnan(v_step)) // this should never happen, but it apparently does.. + { + uv_stepd = 0.0; + v = 0.0; + v_step = 0.0; + } + + // Convert to uint32: + uv_pos = (uint32_t)(v * 0x100000000LL); + uv_step = (uint32_t)(v_step * 0x100000000LL); + uv_max = 0; + + // Texture mipmap and filter selection: + if (getcol != R_GetColumn) + { + source = getcol(texture, xoffset >> FRACBITS); + source2 = nullptr; + height = texture->GetHeight(); + texturefracx = 0; + } + else + { + double ymagnitude = fabs(uv_stepd); + double magnitude = MAX(ymagnitude, xmagnitude); + double min_lod = -1000.0; + double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); + bool magnifying = lod < 0.0f; + + int mipmap_offset = 0; + int mip_width = texture->GetWidth(); + int mip_height = texture->GetHeight(); + if (r_mipmap && texture->Mipmapped() && mip_width > 1 && mip_height > 1) + { + uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); + + int level = (int)lod; + while (level > 0 && mip_width > 1 && mip_height > 1) + { + mipmap_offset += mip_width * mip_height; + level--; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + xoffset = (xpos >> FRACBITS) * mip_width; + } + + const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + + bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); + if (filter_nearest) + { + int tx = (xoffset >> FRACBITS) % mip_width; + if (tx < 0) + tx += mip_width; + source = (BYTE*)(pixels + tx * mip_height); + source2 = nullptr; + height = mip_height; + texturefracx = 0; + } + else + { + xoffset -= FRACUNIT / 2; + int tx0 = (xoffset >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + height = mip_height; + texturefracx = (xoffset >> (FRACBITS - 4)) & 15; + } + } + } +} + +// Draw a column with support for non-power-of-two ranges +void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) +{ + if (r_swtruecolor) + { + int count = y2 - y1; + + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = sampler.uv_pos; + dc_textureheight = sampler.height; + draw1column(); + + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); + } + else + { + if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two + { + int count = y2 - y1; + + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = sampler.uv_pos; + draw1column(); + + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); + } + else + { + uint32_t uv_pos = sampler.uv_pos; + + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = uv_pos; + draw1column(); + + left -= count; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; + } + + sampler.uv_pos = uv_pos; + } + } +} + +// Draw four columns with support for non-power-of-two ranges +void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) +{ + if (r_swtruecolor) + { + int count = y2 - y1; + for (int i = 0; i < 4; i++) + { + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; + bufheight[i] = sampler[i].height; + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); + } + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; + dc_count = count; + draw4columns(); + } + else + { + if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed + { + int count = y2 - y1; + for (int i = 0; i < 4; i++) + { + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); + } + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + draw4columns(); + } + else + { + dc_dest = (ylookup[y1] + x) + dc_destorg; + for (int i = 0; i < 4; i++) + { + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; + } + + uint32_t left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + uint32_t count = left; + for (int i = 0; i < 4; i++) + { + uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; + uint32_t next_uv_wrap = available / sampler[i].uv_step; + if (available % sampler[i].uv_step != 0) + next_uv_wrap++; + count = MIN(next_uv_wrap, count); + } + + // Draw until that column wraps + for (int i = 0; i < 4; i++) + { + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + } + dc_count = count; + draw4columns(); + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + sampler[i].uv_pos += sampler[i].uv_step * count; + if (sampler[i].uv_pos >= sampler[i].uv_max) + sampler[i].uv_pos -= sampler[i].uv_max; + } + + left -= count; + } + } + } +} + +typedef DWORD(*Draw1ColumnFuncPtr)(); +typedef void(*Draw4ColumnsFuncPtr)(); + +void wallscan_any( + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, + const BYTE *(*getcol)(FTexture *tex, int x), + void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) +{ + if (rw_pic->UseType == FTexture::TEX_Null) + return; + + fixed_t xoffset = rw_offset; + + rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set + int fracbits = 32 - rw_pic->HeightBits; + if (fracbits == 32) + { // Hack for one pixel tall textures + fracbits = 0; + yrepeat = 0; + dc_texturemid = 0; + } + + DWORD(*draw1column)(); + void(*draw4columns)(); + setupwallscan(r_swtruecolor ? FRACBITS : fracbits, draw1column, draw4columns); + + bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); + if (fixed) + { + palookupoffse[0] = dc_colormap; + palookupoffse[1] = dc_colormap; + palookupoffse[2] = dc_colormap; + palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; + } + + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + + float light = rw_light; + + // Calculate where 4 column alignment begins and ends: + int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); + int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + + double xmagnitude = 1.0; + + // First unaligned columns: + for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, light, wallshade); + + if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); + + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); + } + + // The aligned columns + for (int x = aligned_x1; x < aligned_x2; x += 4) + { + // Find y1, y2, light and uv values for four columns: + int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; + int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; + + float lights[4]; + for (int i = 0; i < 4; i++) + { + lights[i] = light; + light += rw_lightstep; + } + + WallscanSampler sampler[4]; + for (int i = 0; i < 4; i++) + { + if (x + i + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + i + 1]) - FIXED2DBL(lwal[x + i])); + sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, xmagnitude, rw_pic, getcol); + } + + // Figure out where we vertically can start and stop drawing 4 columns in one go + int middle_y1 = y1[0]; + int middle_y2 = y2[0]; + for (int i = 1; i < 4; i++) + { + middle_y1 = MAX(y1[i], middle_y1); + middle_y2 = MIN(y2[i], middle_y2); + } + + // If we got an empty column in our set we cannot draw 4 columns in one go: + bool empty_column_in_set = false; + int bilinear_count = 0; + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + empty_column_in_set = true; + if (sampler[i].source2) + bilinear_count++; + } + + if (empty_column_in_set || middle_y2 <= middle_y1 || (bilinear_count > 0 && bilinear_count < 4)) + { + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + wallscan_drawcol1(x + i, y1[i], y2[i], sampler[i], draw1column); + } + continue; + } + + // Draw the first rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + + if (y1[i] < middle_y1) + wallscan_drawcol1(x + i, y1[i], middle_y1, sampler[i], draw1column); + } + + // Draw the area where all 4 columns are active + if (!fixed) + { + for (int i = 0; i < 4; i++) + { + if (r_swtruecolor) + { + palookupoffse[i] = basecolormap->Maps; + palookuplight[i] = LIGHTSCALE(lights[i], wallshade); + } + else + { + palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + palookuplight[i] = 0; + } + } + } + wallscan_drawcol4(x, middle_y1, middle_y2, sampler, draw4columns); + + // Draw the last rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + + if (middle_y2 < y2[i]) + wallscan_drawcol1(x + i, middle_y2, y2[i], sampler[i], draw1column); + } + } + + // The last unaligned columns: + for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, light, wallshade); + + if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); + + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); + } + + NetUpdate (); +} + +void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupvline(bits); + line1 = dovline1; + line4 = dovline4; + }); +} + +void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupmvline(bits); + line1 = domvline1; + line4 = domvline4; + }); + } +} + +void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + static fixed_t(*tmvline1)(); + static void(*tmvline4)(); + if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) + { + // The current translucency is unsupported, so draw with regular maskwallscan instead. + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setuptmvline(bits); + line1 = reinterpret_cast(tmvline1); + line4 = tmvline4; + }); + } +} + +void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) +{ + FDynamicColormap *startcolormap = basecolormap; + int startshade = wallshade; + bool fogginess = foggy; + + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + + up = uwal; + down = most1; + + assert(WallC.sx1 <= x1); + assert(WallC.sx2 >= x2); + + // kg3D - fake floors instead of zdoom light list + for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) + { + int j = WallMost (most3, frontsector->e->XFloor.lightlist[i].plane, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp (most3[j], up[j], dwal[j]); + } + wallscan (x1, x2, up, down, swal, lwal, yrepeat); + up = down; + down = (down == most1) ? most2 : most1; + } + + lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, + *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); + } + + wallscan (x1, x2, up, dwal, swal, lwal, yrepeat); + basecolormap = startcolormap; + wallshade = startshade; +} + +static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask) +{ + if (mask) + { + if (colfunc == basecolfunc) + { + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + else + { + transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + } + else + { + if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + else + { + wallscan_striped(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + } +} + +//============================================================================= +// +// wallscan_np2 +// +// This is a wrapper around wallscan that helps it tile textures whose heights +// are not powers of 2. It divides the wall into texture-sized strips and calls +// wallscan for each of those. Since only one repetition of the texture fits +// in each strip, wallscan will not tile. +// +//============================================================================= + +void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) +{ + if (!r_np2) + { + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); + } + else + { + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + double texheight = rw_pic->GetHeight(); + double partition; + double scaledtexheight = texheight / yrepeat; + + if (yrepeat >= 0) + { // normal orientation: draw strips from top to bottom + partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + if (partition == top) + { + partition -= scaledtexheight; + } + up = uwal; + down = most1; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition > bot) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp(most3[j], up[j], dwal[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + up = down; + down = (down == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); + } + else + { // upside down: draw strips from bottom to top + partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + up = most1; + down = dwal; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition < top) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 12) + { + for (int j = x1; j < x2; ++j) + { + up[j] = clamp(most3[j], uwal[j], down[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + down = up; + up = (up == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); + } + } +} + +static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) +{ + if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) + { + double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); + double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); + double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); + double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); + double top = MAX(frontcz1, frontcz2); + double bot = MIN(frontfz1, frontfz2); + if (fake3D & FAKE3D_CLIPTOP) + { + top = MIN(top, sclipTop); + } + if (fake3D & FAKE3D_CLIPBOTTOM) + { + bot = MAX(bot, sclipBottom); + } + wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); + } + else + { + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); + } +} + // // R_RenderSegLoop // Draws zero, one, or two textures for walls. @@ -1071,9 +1768,9 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - dc_colormap = (r_fullbrightignoresectorcolor) ? (FullNormalLight.Maps + fixedlightlev) : (basecolormap->Maps + fixedlightlev); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); // clip wall to the floor and ceiling for (x = x1; x < x2; ++x) @@ -1184,8 +1881,8 @@ void R_RenderSegLoop () call_wallscan(x1, x2, walltop, wallbottom, swall, lwall, yscale, false); } } - clearbufshort (ceilingclip+x1, x2-x1, viewheight); - clearbufshort (floorclip+x1, x2-x1, 0xffff); + fillshort (ceilingclip+x1, x2-x1, viewheight); + fillshort (floorclip+x1, x2-x1, 0xffff); } else { // two sided line @@ -1708,7 +2405,7 @@ void R_StoreWallRange (int start, int stop) { ds_p->sprtopclip = R_NewOpening (stop - start); ds_p->sprbottomclip = R_NewOpening (stop - start); - clearbufshort (openings + ds_p->sprtopclip, stop-start, viewheight); + fillshort (openings + ds_p->sprtopclip, stop-start, viewheight); memset (openings + ds_p->sprbottomclip, -1, (stop-start)*sizeof(short)); ds_p->silhouette = SIL_BOTH; } @@ -1748,7 +2445,7 @@ void R_StoreWallRange (int start, int stop) if (doorclosed || (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) { // killough 1/17/98, 2/8/98 ds_p->sprtopclip = R_NewOpening (stop - start); - clearbufshort (openings + ds_p->sprtopclip, stop - start, viewheight); + fillshort (openings + ds_p->sprtopclip, stop - start, viewheight); ds_p->silhouette |= SIL_TOP; } } @@ -1969,7 +2666,7 @@ int WallMostAny(short *mostbuf, double z1, double z2, const FWallCoords *wallc) } else if (y1 > viewheight && y2 > viewheight) // entire line is below screen { - clearbufshort(&mostbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); + fillshort(&mostbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); return 12; } @@ -2340,11 +3037,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - savecoord.sx1) * rw_lightstep; if (fixedlightlev >= 0) - dc_colormap = (r_fullbrightignoresectorcolor) ? (FullNormalLight.Maps + fixedlightlev) : (usecolormap->Maps + fixedlightlev); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - dc_colormap = (r_fullbrightignoresectorcolor) ? FullNormalLight.Maps : usecolormap->Maps; + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; @@ -2395,7 +3092,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } R_WallSpriteColumn (false); dc_x++; @@ -2405,9 +3102,9 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { R_WallSpriteColumn (true); @@ -2420,7 +3117,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } R_WallSpriteColumn (false); dc_x++; @@ -2444,4 +3141,4 @@ done: WallC = savecoord; } -} \ No newline at end of file +} diff --git a/src/r_walldraw.cpp b/src/r_walldraw.cpp deleted file mode 100644 index de039115a3..0000000000 --- a/src/r_walldraw.cpp +++ /dev/null @@ -1,757 +0,0 @@ -/* -** Wall drawing stuff free of Build pollution -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include -#include - -#include "doomdef.h" -#include "doomstat.h" -#include "doomdata.h" - -#include "r_local.h" -#include "r_sky.h" -#include "v_video.h" - -#include "m_swap.h" -#include "a_sharedglobal.h" -#include "d_net.h" -#include "g_level.h" -#include "r_draw.h" -#include "r_bsp.h" -#include "r_plane.h" -#include "r_segs.h" -#include "r_3dfloors.h" -#include "v_palette.h" -#include "r_data/colormaps.h" - -namespace swrenderer -{ - using namespace drawerargs; - - extern FTexture *rw_pic; - extern int wallshade; - - -struct WallscanSampler -{ - WallscanSampler() { } - WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); - - uint32_t uv_pos; - uint32_t uv_step; - uint32_t uv_max; - - const BYTE *source; - const BYTE *source2; - uint32_t texturefracx; - uint32_t height; -}; - -WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) -{ - xoffset += FLOAT2FIXED(xmagnitude * 0.5); - - if (!r_swtruecolor) - { - height = texture->GetHeight(); - - int uv_fracbits = 32 - texture->HeightBits; - if (uv_fracbits != 32) - { - uv_max = height << uv_fracbits; - - // Find start uv in [0-base_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; - v = v - floor(v); - v *= height; - v *= (1 << uv_fracbits); - - uv_pos = (uint32_t)v; - uv_step = xs_ToFixed(uv_fracbits, uv_stepd); - if (uv_step == 0) // To prevent divide by zero elsewhere - uv_step = 1; - } - else - { // Hack for one pixel tall textures - uv_pos = 0; - uv_step = 0; - uv_max = 1; - } - - source = getcol(texture, xoffset >> FRACBITS); - source2 = nullptr; - texturefracx = 0; - } - else - { - // Normalize to 0-1 range: - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); - v = v - floor(v); - double v_step = uv_stepd / texture->GetHeight(); - - if (isnan(v) || isnan(v_step)) // this should never happen, but it apparently does.. - { - uv_stepd = 0.0; - v = 0.0; - v_step = 0.0; - } - - // Convert to uint32: - uv_pos = (uint32_t)(v * 0x100000000LL); - uv_step = (uint32_t)(v_step * 0x100000000LL); - uv_max = 0; - - // Texture mipmap and filter selection: - if (getcol != R_GetColumn) - { - source = getcol(texture, xoffset >> FRACBITS); - source2 = nullptr; - height = texture->GetHeight(); - texturefracx = 0; - } - else - { - double ymagnitude = fabs(uv_stepd); - double magnitude = MAX(ymagnitude, xmagnitude); - double min_lod = -1000.0; - double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); - bool magnifying = lod < 0.0f; - - int mipmap_offset = 0; - int mip_width = texture->GetWidth(); - int mip_height = texture->GetHeight(); - if (r_mipmap && texture->Mipmapped() && mip_width > 1 && mip_height > 1) - { - uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); - - int level = (int)lod; - while (level > 0 && mip_width > 1 && mip_height > 1) - { - mipmap_offset += mip_width * mip_height; - level--; - mip_width = MAX(mip_width >> 1, 1); - mip_height = MAX(mip_height >> 1, 1); - } - xoffset = (xpos >> FRACBITS) * mip_width; - } - - const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; - - bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); - if (filter_nearest) - { - int tx = (xoffset >> FRACBITS) % mip_width; - if (tx < 0) - tx += mip_width; - source = (BYTE*)(pixels + tx * mip_height); - source2 = nullptr; - height = mip_height; - texturefracx = 0; - } - else - { - xoffset -= FRACUNIT / 2; - int tx0 = (xoffset >> FRACBITS) % mip_width; - if (tx0 < 0) - tx0 += mip_width; - int tx1 = (tx0 + 1) % mip_width; - source = (BYTE*)(pixels + tx0 * mip_height); - source2 = (BYTE*)(pixels + tx1 * mip_height); - height = mip_height; - texturefracx = (xoffset >> (FRACBITS - 4)) & 15; - } - } - } -} - -// Draw a column with support for non-power-of-two ranges -void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) -{ - if (r_swtruecolor) - { - int count = y2 - y1; - - dc_source = sampler.source; - dc_source2 = sampler.source2; - dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = sampler.uv_pos; - dc_textureheight = sampler.height; - draw1column(); - - uint64_t step64 = sampler.uv_step; - uint64_t pos64 = sampler.uv_pos; - sampler.uv_pos = (uint32_t)(pos64 + step64 * count); - } - else - { - if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two - { - int count = y2 - y1; - - dc_source = sampler.source; - dc_source2 = sampler.source2; - dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = sampler.uv_pos; - draw1column(); - - uint64_t step64 = sampler.uv_step; - uint64_t pos64 = sampler.uv_pos; - sampler.uv_pos = (uint32_t)(pos64 + step64 * count); - } - else - { - uint32_t uv_pos = sampler.uv_pos; - - uint32_t left = y2 - y1; - while (left > 0) - { - uint32_t available = sampler.uv_max - uv_pos; - uint32_t next_uv_wrap = available / sampler.uv_step; - if (available % sampler.uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); - - dc_source = sampler.source; - dc_source2 = sampler.source2; - dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = uv_pos; - draw1column(); - - left -= count; - uv_pos += sampler.uv_step * count; - if (uv_pos >= sampler.uv_max) - uv_pos -= sampler.uv_max; - } - - sampler.uv_pos = uv_pos; - } - } -} - -// Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) -{ - if (r_swtruecolor) - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - bufplce[i] = sampler[i].source; - bufplce2[i] = sampler[i].source2; - buftexturefracx[i] = sampler[i].texturefracx; - bufheight[i] = sampler[i].height; - vplce[i] = sampler[i].uv_pos; - vince[i] = sampler[i].uv_step; - - uint64_t step64 = sampler[i].uv_step; - uint64_t pos64 = sampler[i].uv_pos; - sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - bufplce[i] = sampler[i].source; - bufplce2[i] = sampler[i].source2; - buftexturefracx[i] = sampler[i].texturefracx; - vplce[i] = sampler[i].uv_pos; - vince[i] = sampler[i].uv_step; - - uint64_t step64 = sampler[i].uv_step; - uint64_t pos64 = sampler[i].uv_pos; - sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - dc_dest = (ylookup[y1] + x) + dc_destorg; - for (int i = 0; i < 4; i++) - { - bufplce[i] = sampler[i].source; - bufplce2[i] = sampler[i].source2; - buftexturefracx[i] = sampler[i].texturefracx; - } - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; - uint32_t next_uv_wrap = available / sampler[i].uv_step; - if (available % sampler[i].uv_step != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps - for (int i = 0; i < 4; i++) - { - vplce[i] = sampler[i].uv_pos; - vince[i] = sampler[i].uv_step; - } - dc_count = count; - draw4columns(); - - // Wrap the uv position - for (int i = 0; i < 4; i++) - { - sampler[i].uv_pos += sampler[i].uv_step * count; - if (sampler[i].uv_pos >= sampler[i].uv_max) - sampler[i].uv_pos -= sampler[i].uv_max; - } - - left -= count; - } - } - } -} - -typedef DWORD(*Draw1ColumnFuncPtr)(); -typedef void(*Draw4ColumnsFuncPtr)(); - -void wallscan_any( - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, - const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) -{ - if (rw_pic->UseType == FTexture::TEX_Null) - return; - - fixed_t xoffset = rw_offset; - - rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set - int fracbits = 32 - rw_pic->HeightBits; - if (fracbits == 32) - { // Hack for one pixel tall textures - fracbits = 0; - yrepeat = 0; - dc_texturemid = 0; - } - - DWORD(*draw1column)(); - void(*draw4columns)(); - setupwallscan(r_swtruecolor ? FRACBITS : fracbits, draw1column, draw4columns); - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - palookuplight[0] = 0; - palookuplight[1] = 0; - palookuplight[2] = 0; - palookuplight[3] = 0; - } - - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); - else - R_SetColorMapLight(basecolormap, 0, 0); - - float light = rw_light; - - // Calculate where 4 column alignment begins and ends: - int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); - int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - - double xmagnitude = 1.0; - - // First unaligned columns: - for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); - - WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, sampler, draw1column); - } - - // The aligned columns - for (int x = aligned_x1; x < aligned_x2; x += 4) - { - // Find y1, y2, light and uv values for four columns: - int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; - int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - - float lights[4]; - for (int i = 0; i < 4; i++) - { - lights[i] = light; - light += rw_lightstep; - } - - WallscanSampler sampler[4]; - for (int i = 0; i < 4; i++) - { - if (x + i + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + i + 1]) - FIXED2DBL(lwal[x + i])); - sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, xmagnitude, rw_pic, getcol); - } - - // Figure out where we vertically can start and stop drawing 4 columns in one go - int middle_y1 = y1[0]; - int middle_y2 = y2[0]; - for (int i = 1; i < 4; i++) - { - middle_y1 = MAX(y1[i], middle_y1); - middle_y2 = MIN(y2[i], middle_y2); - } - - // If we got an empty column in our set we cannot draw 4 columns in one go: - bool empty_column_in_set = false; - int bilinear_count = 0; - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - empty_column_in_set = true; - if (sampler[i].source2) - bilinear_count++; - } - - if (empty_column_in_set || middle_y2 <= middle_y1 || (bilinear_count > 0 && bilinear_count < 4)) - { - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - wallscan_drawcol1(x + i, y1[i], y2[i], sampler[i], draw1column); - } - continue; - } - - // Draw the first rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (y1[i] < middle_y1) - wallscan_drawcol1(x + i, y1[i], middle_y1, sampler[i], draw1column); - } - - // Draw the area where all 4 columns are active - if (!fixed) - { - for (int i = 0; i < 4; i++) - { - if (r_swtruecolor) - { - palookupoffse[i] = basecolormap->Maps; - palookuplight[i] = LIGHTSCALE(lights[i], wallshade); - } - else - { - palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - palookuplight[i] = 0; - } - } - } - wallscan_drawcol4(x, middle_y1, middle_y2, sampler, draw4columns); - - // Draw the last rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (middle_y2 < y2[i]) - wallscan_drawcol1(x + i, middle_y2, y2[i], sampler[i], draw1column); - } - } - - // The last unaligned columns: - for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); - - WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, sampler, draw1column); - } - - NetUpdate (); -} - -void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupvline(bits); - line1 = dovline1; - line4 = dovline4; - }); -} - -void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. - { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupmvline(bits); - line1 = domvline1; - line4 = domvline4; - }); - } -} - -void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - static fixed_t(*tmvline1)(); - static void(*tmvline4)(); - if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) - { - // The current translucency is unsupported, so draw with regular maskwallscan instead. - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setuptmvline(bits); - line1 = reinterpret_cast(tmvline1); - line4 = tmvline4; - }); - } -} - -void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) -{ - FDynamicColormap *startcolormap = basecolormap; - int startshade = wallshade; - bool fogginess = foggy; - - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - - up = uwal; - down = most1; - - assert(WallC.sx1 <= x1); - assert(WallC.sx2 >= x2); - - // kg3D - fake floors instead of zdoom light list - for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) - { - int j = WallMost (most3, frontsector->e->XFloor.lightlist[i].plane, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp (most3[j], up[j], dwal[j]); - } - wallscan (x1, x2, up, down, swal, lwal, yrepeat); - up = down; - down = (down == most1) ? most2 : most1; - } - - lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, - *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); - } - - wallscan (x1, x2, up, dwal, swal, lwal, yrepeat); - basecolormap = startcolormap; - wallshade = startshade; -} - -static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask) -{ - if (mask) - { - if (colfunc == basecolfunc) - { - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - else - { - transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - } - else - { - if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) - { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - else - { - wallscan_striped(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - } -} - -//============================================================================= -// -// wallscan_np2 -// -// This is a wrapper around wallscan that helps it tile textures whose heights -// are not powers of 2. It divides the wall into texture-sized strips and calls -// wallscan for each of those. Since only one repetition of the texture fits -// in each strip, wallscan will not tile. -// -//============================================================================= - -void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) -{ - if (!r_np2) - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); - } - else - { - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - double texheight = rw_pic->GetHeight(); - double partition; - double scaledtexheight = texheight / yrepeat; - - if (yrepeat >= 0) - { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - if (partition == top) - { - partition -= scaledtexheight; - } - up = uwal; - down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition > bot) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp(most3[j], up[j], dwal[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - up = down; - down = (down == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); - } - else - { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - up = most1; - down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition < top) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 12) - { - for (int j = x1; j < x2; ++j) - { - up[j] = clamp(most3[j], uwal[j], down[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - down = up; - up = (up == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); - } - } -} - -static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) -{ - if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) - { - double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); - double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); - double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); - double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); - double top = MAX(frontcz1, frontcz2); - double bot = MIN(frontfz1, frontfz2); - if (fake3D & FAKE3D_CLIPTOP) - { - top = MIN(top, sclipTop); - } - if (fake3D & FAKE3D_CLIPBOTTOM) - { - bot = MAX(bot, sclipBottom); - } - wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); - } - else - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); - } -} - -} \ No newline at end of file From 7376414607177794ad9b5f1133daa256b9469ef4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 10 Dec 2016 00:17:18 +0100 Subject: [PATCH 477/912] Added some missing static statements --- src/r_walldraw.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_walldraw.cpp b/src/r_walldraw.cpp index 0f2802b016..1570707ccc 100644 --- a/src/r_walldraw.cpp +++ b/src/r_walldraw.cpp @@ -552,7 +552,7 @@ WallSampler::WallSampler(int y1, float swal, double yrepeat, fixed_t xoffset, do } // Draw a column with support for non-power-of-two ranges -void Draw1Column(int x, int y1, int y2, WallSampler &sampler, DWORD(*draw1column)()) +static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, DWORD(*draw1column)()) { if (r_swtruecolor) { @@ -625,7 +625,7 @@ void Draw1Column(int x, int y1, int y2, WallSampler &sampler, DWORD(*draw1column } // Draw four columns with support for non-power-of-two ranges -void Draw4Columns(int x, int y1, int y2, WallSampler *sampler, void(*draw4columns)()) +static void Draw4Columns(int x, int y1, int y2, WallSampler *sampler, void(*draw4columns)()) { if (r_swtruecolor) { @@ -718,7 +718,7 @@ void Draw4Columns(int x, int y1, int y2, WallSampler *sampler, void(*draw4column typedef DWORD(*Draw1ColumnFuncPtr)(); typedef void(*Draw4ColumnsFuncPtr)(); -void ProcessWallWorker( +static void ProcessWallWorker( int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x), void(setupProcessNormalWall(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) From 21341b92a5cc61f39d3e40419c2cc06571c502c0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 11 Dec 2016 17:39:44 +0100 Subject: [PATCH 478/912] Experiment with splitting triangle block coverage from block drawing --- src/r_poly_triangle.cpp | 261 ++++++++++++++++++++++++++++++++++++++++ src/r_poly_triangle.h | 46 +++++++ 2 files changed, 307 insertions(+) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 45f244aabd..13c04d5ef9 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -92,6 +92,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian switch (variant) { default: + //case TriDrawVariant::DrawNormal: drawfunc = &ScreenTriangle::DrawFunc; break; case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break; case TriDrawVariant::FillNormal: drawfunc = dest_bgra ? llvm->TriFillNormal32[bmode] : llvm->TriFillNormal8[bmode]; break; case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? llvm->TriDrawSubsector32[bmode] : llvm->TriDrawSubsector8[bmode]; break; @@ -569,3 +570,263 @@ void PolyVertexBuffer::Clear() { NextBufferVertex = 0; } + +///////////////////////////////////////////////////////////////////////////// + +void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipright = args->clipright; + int clipbottom = args->clipbottom; + + ScreenTriangleFullSpan *span = FullSpans; + ScreenTrianglePartialBlock *partial = PartialBlocks; + int curSpan = 0; + int curPartial = 0; + span->Length = 0; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // First block line for this thread + int core = thread->core; + int num_cores = thread->num_cores; + int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores; + miny += core_skip * q; + + // Loop through blocks + for (int y = miny; y < maxy; y += q * num_cores) + { + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Skip block when outside an edge + if (a == 0x0 || b == 0x0 || c == 0x0) continue; + + // Accept whole block when totally covered + if (a == 0xF && b == 0xF && c == 0xF && x + q <= clipright && y + q <= clipbottom) + { + if (span->Length != 0) + { + span->Length++; + } + else + { + span->X = x; + span->Y = y; + span->Length = 1; + } + } + else // Partially covered block + { + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + uint64_t mask = 0; + + for (int iy = 0; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = x; ix < x + q; ix++) + { + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && ix < clipright && iy < clipbottom); + mask <<= 1; + mask |= covered; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + } + + if (mask != 0xffffffffffffffffLL) + { + if (span->Length > 0) + { + curSpan++; + span++; + span->Length = 0; + } + + partial->X = x; + partial->Y = y; + partial->Mask0 = (uint32_t)(mask >> 32); + partial->Mask1 = (uint32_t)mask; + partial++; + curPartial++; + } + else if (span->Length != 0) + { + span->Length++; + } + else + { + span->X = x; + span->Y = y; + span->Length = 1; + } + } + } + + if (span->Length > 0) + { + curSpan++; + span++; + span->Length = 0; + } + } + + NumFullSpans = curSpan; + NumPartialBlocks = curPartial; +} + +void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) +{ + for (int i = 0; i < NumFullSpans; i++) + { + const auto &span = FullSpans[i]; + + uint32_t *dest = (uint32_t*)args->dest + span.X + span.Y * args->pitch; + int pitch = args->pitch; + int width = span.Length * 8; + int height = 8; + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + dest[x] = 0xffffffff; + } + + dest += pitch; + } + } + + for (int i = 0; i < NumPartialBlocks; i++) + { + const auto &block = PartialBlocks[i]; + + uint32_t *dest = (uint32_t*)args->dest + block.X + block.Y * args->pitch; + int pitch = args->pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + for (int x = 0; x < 8; x++) + { + if (mask0 & (1<<31)) + dest[x] = 0xffff0000; + mask0 <<= 1; + } + dest += pitch; + } + for (int y = 4; y < 8; y++) + { + for (int x = 0; x < 8; x++) + { + if (mask1 & (1<<31)) + dest[x] = 0xffff0000; + mask1 <<= 1; + } + dest += pitch; + } + } +} + +void ScreenTriangle::DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + static ScreenTriangle triangle[8]; + + triangle[thread->core].Setup(args, thread); + triangle[thread->core].Draw(args); +} + +ScreenTriangle::ScreenTriangle() +{ + FullSpansBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8)); + PartialBlocksBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8)); + FullSpans = FullSpansBuffer.data(); + PartialBlocks = PartialBlocksBuffer.data(); +} diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index ecf05a9217..28c903c0ce 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -264,3 +264,49 @@ public: static TriVertex *GetVertices(int count); static void Clear(); }; + +struct ScreenTriangleBlock +{ + float W; + float Varying[TriVertex::NumVarying]; +}; + +struct ScreenTriangleFullSpan +{ + uint16_t X; + uint16_t Y; + uint32_t Length; +}; + +struct ScreenTrianglePartialBlock +{ + uint16_t X; + uint16_t Y; + uint32_t Mask0; + uint32_t Mask1; +}; + +class ScreenTriangle +{ +public: + static void DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + + ScreenTriangle(); + + void Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + void Draw(const TriDrawTriangleArgs *args); + + ScreenTriangleFullSpan *FullSpans; + ScreenTrianglePartialBlock *PartialBlocks; + int NumFullSpans; + int NumPartialBlocks; + int StartX; + int StartY; + ScreenTriangleBlock Start; + ScreenTriangleBlock GradientX; + ScreenTriangleBlock GradientY; + +private: + std::vector FullSpansBuffer; + std::vector PartialBlocksBuffer; +}; From b2bed88092a402b0da17257adc61c29cb9e4981f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 12 Dec 2016 01:33:51 +0100 Subject: [PATCH 479/912] SSE speed test --- src/r_poly_triangle.cpp | 260 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 246 insertions(+), 14 deletions(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 13c04d5ef9..a8b0154af5 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -572,7 +572,223 @@ void PolyVertexBuffer::Clear() } ///////////////////////////////////////////////////////////////////////////// +#if 1 +void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipright = args->clipright; + int clipbottom = args->clipbottom; + ScreenTriangleFullSpan *span = FullSpans; + ScreenTrianglePartialBlock *partial = PartialBlocks; + span->Length = 0; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + if (minx >= maxx || miny >= maxy) + return; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // First block line for this thread + int core = thread->core; + int num_cores = thread->num_cores; + int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores; + miny += core_skip * q; + + __m128i mC1 = _mm_set1_epi32(C1); + __m128i mC2 = _mm_set1_epi32(C2); + __m128i mC3 = _mm_set1_epi32(C3); + __m128i mDX12 = _mm_set1_epi32(DX12); + __m128i mDX23 = _mm_set1_epi32(DX23); + __m128i mDX31 = _mm_set1_epi32(DX31); + __m128i mDY12 = _mm_set1_epi32(DY12); + __m128i mDY23 = _mm_set1_epi32(DY23); + __m128i mDY31 = _mm_set1_epi32(DY31); + + // Loop through blocks + for (int y = miny; y < maxy; y += q * num_cores) + { + // Corners of block + int x0 = minx << 4; + int x1 = (minx + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + __m128i my0y1 = _mm_set_epi32(y0, y0, y1, y1); + __m128i mx0x1 = _mm_set_epi32(x0, x1, x0, x1); + __m128i mAxx = _mm_add_epi32(mC1, _mm_sub_epi32(_mm_mullo_epi32(mDX12, my0y1), _mm_mullo_epi32(mDY12, mx0x1))); + __m128i mBxx = _mm_add_epi32(mC2, _mm_sub_epi32(_mm_mullo_epi32(mDX23, my0y1), _mm_mullo_epi32(mDY23, mx0x1))); + __m128i mCxx = _mm_add_epi32(mC3, _mm_sub_epi32(_mm_mullo_epi32(mDX31, my0y1), _mm_mullo_epi32(mDY31, mx0x1))); + + for (int x = minx; x < maxx; x += q) + { + // Evaluate half-space functions + int a = _mm_movemask_epi8(_mm_cmpgt_epi32(mAxx, _mm_setzero_si128())); + int b = _mm_movemask_epi8(_mm_cmpgt_epi32(mBxx, _mm_setzero_si128())); + int c = _mm_movemask_epi8(_mm_cmpgt_epi32(mCxx, _mm_setzero_si128())); + + mAxx = _mm_sub_epi32(mAxx, _mm_slli_epi32(mDY12, 7)); + mBxx = _mm_sub_epi32(mBxx, _mm_slli_epi32(mDY23, 7)); + mCxx = _mm_sub_epi32(mCxx, _mm_slli_epi32(mDY31, 7)); + + // Skip block when outside an edge + if (a == 0 || b == 0 || c == 0) continue; + + // Accept whole block when totally covered + if (a == 0xffff && b == 0xffff && c == 0xffff && x + q <= clipright && y + q <= clipbottom) + { + if (span->Length != 0) + { + span->Length++; + } + else + { + span->X = x; + span->Y = y; + span->Length = 1; + } + } + else // Partially covered block + { + x0 = x << 4; + x1 = (x + q - 1) << 4; + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 4; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = x; ix < x + q; ix++) + { + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && ix < clipright && iy < clipbottom); + mask0 <<= 1; + mask0 |= (uint32_t)covered; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + } + + for (int iy = 4; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = x; ix < x + q; ix++) + { + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && ix < clipright && iy < clipbottom); + mask1 <<= 1; + mask1 |= (uint32_t)covered; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + } + + if (mask0 != 0xffffffff || mask1 != 0xffffffff) + { + if (span->Length > 0) + { + span++; + span->Length = 0; + } + + partial->X = x; + partial->Y = y; + partial->Mask0 = mask0; + partial->Mask1 = mask1; + partial++; + } + else if (span->Length != 0) + { + span->Length++; + } + else + { + span->X = x; + span->Y = y; + span->Length = 1; + } + } + } + + if (span->Length > 0) + { + span++; + span->Length = 0; + } + } + + NumFullSpans = (int)(span - FullSpans); + NumPartialBlocks = (int)(partial - PartialBlocks); +} +#else void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { const TriVertex &v1 = *args->v1; @@ -583,8 +799,6 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th ScreenTriangleFullSpan *span = FullSpans; ScreenTrianglePartialBlock *partial = PartialBlocks; - int curSpan = 0; - int curPartial = 0; span->Length = 0; // 28.4 fixed-point coordinates @@ -676,7 +890,15 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) continue; + if (a == 0x0 || b == 0x0 || c == 0x0) + { + if (span->Length != 0) + { + span++; + span->Length = 0; + } + continue; + } // Accept whole block when totally covered if (a == 0xF && b == 0xF && c == 0xF && x + q <= clipright && y + q <= clipbottom) @@ -710,7 +932,7 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th { bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && ix < clipright && iy < clipbottom); mask <<= 1; - mask |= covered; + mask |= (uint64_t)covered; CX1 -= FDY12; CX2 -= FDY23; @@ -724,9 +946,8 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th if (mask != 0xffffffffffffffffLL) { - if (span->Length > 0) + if (span->Length != 0) { - curSpan++; span++; span->Length = 0; } @@ -736,7 +957,6 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th partial->Mask0 = (uint32_t)(mask >> 32); partial->Mask1 = (uint32_t)mask; partial++; - curPartial++; } else if (span->Length != 0) { @@ -751,20 +971,32 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th } } - if (span->Length > 0) + if (span->Length != 0) { - curSpan++; span++; span->Length = 0; } } - NumFullSpans = curSpan; - NumPartialBlocks = curPartial; + NumFullSpans = (int)(span - FullSpans); + NumPartialBlocks = (int)(partial - PartialBlocks); } +#endif void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) { + float r = args->v1->x / 255.0f; + float g = args->v1->y / 255.0f; + float b = args->v1->z / 255.0f; + r = (r - floor(r)) * 255; + g = (g - floor(g)) * 255; + b = (b - floor(b)) * 255; + + uint32_t red = (uint32_t)r; + uint32_t green = (uint32_t)g; + uint32_t blue = (uint32_t)b; + uint32_t solidcolor = 0xff000000 | (red << 16) | (green << 8) | blue; + for (int i = 0; i < NumFullSpans; i++) { const auto &span = FullSpans[i]; @@ -777,7 +1009,7 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) { for (int x = 0; x < width; x++) { - dest[x] = 0xffffffff; + dest[x] = solidcolor; } dest += pitch; @@ -797,7 +1029,7 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) for (int x = 0; x < 8; x++) { if (mask0 & (1<<31)) - dest[x] = 0xffff0000; + dest[x] = solidcolor; mask0 <<= 1; } dest += pitch; @@ -807,7 +1039,7 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) for (int x = 0; x < 8; x++) { if (mask1 & (1<<31)) - dest[x] = 0xffff0000; + dest[x] = solidcolor; mask1 <<= 1; } dest += pitch; From bee363d79a02caf368452dbf74fddaf4be943afb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 12 Dec 2016 10:44:34 +0100 Subject: [PATCH 480/912] Add stencil testing --- src/r_poly_triangle.cpp | 116 ++++++++++++++++++++++++++++++---------- 1 file changed, 89 insertions(+), 27 deletions(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index a8b0154af5..be1d5ea75f 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -35,6 +35,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_poly_triangle.h" +#include int PolyTriangleDrawer::viewport_x; int PolyTriangleDrawer::viewport_y; @@ -572,7 +573,7 @@ void PolyVertexBuffer::Clear() } ///////////////////////////////////////////////////////////////////////////// -#if 1 +#if 0 void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { const TriVertex &v1 = *args->v1; @@ -580,6 +581,11 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th const TriVertex &v3 = *args->v3; int clipright = args->clipright; int clipbottom = args->clipbottom; + + int stencilPitch = args->stencilPitch; + uint8_t *stencilValues = args->stencilValues; + uint32_t *stencilMasks = args->stencilMasks; + uint8_t stencilTestValue = args->stencilTestValue; ScreenTriangleFullSpan *span = FullSpans; ScreenTrianglePartialBlock *partial = PartialBlocks; @@ -678,12 +684,27 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th mAxx = _mm_sub_epi32(mAxx, _mm_slli_epi32(mDY12, 7)); mBxx = _mm_sub_epi32(mBxx, _mm_slli_epi32(mDY23, 7)); mCxx = _mm_sub_epi32(mCxx, _mm_slli_epi32(mDY31, 7)); + + // Stencil test the whole block, if possible + int block = x / 8 + y / 8 * stencilPitch; + uint8_t *stencilBlock = &stencilValues[block * 64]; + uint32_t *stencilBlockMask = &stencilMasks[block]; + bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; + bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; // Skip block when outside an edge - if (a == 0 || b == 0 || c == 0) continue; + if (a == 0 || b == 0 || c == 0 || skipBlock) + { + if (span->Length != 0) + { + span++; + span->Length = 0; + } + continue; + } // Accept whole block when totally covered - if (a == 0xffff && b == 0xffff && c == 0xffff && x + q <= clipright && y + q <= clipbottom) + if (a == 0xffff && b == 0xffff && c == 0xffff && x + q <= clipright && y + q <= clipbottom && blockIsSingleStencil) { if (span->Length != 0) { @@ -713,9 +734,10 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th int CX2 = CY2; int CX3 = CY3; - for (int ix = x; ix < x + q; ix++) + for (int ix = 0; ix < q; ix++) { - bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && ix < clipright && iy < clipbottom); + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest); mask0 <<= 1; mask0 |= (uint32_t)covered; @@ -735,9 +757,10 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th int CX2 = CY2; int CX3 = CY3; - for (int ix = x; ix < x + q; ix++) + for (int ix = 0; ix < q; ix++) { - bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && ix < clipright && iy < clipbottom); + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest); mask1 <<= 1; mask1 |= (uint32_t)covered; @@ -797,6 +820,11 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th int clipright = args->clipright; int clipbottom = args->clipbottom; + int stencilPitch = args->stencilPitch; + uint8_t *stencilValues = args->stencilValues; + uint32_t *stencilMasks = args->stencilMasks; + uint8_t stencilTestValue = args->stencilTestValue; + ScreenTriangleFullSpan *span = FullSpans; ScreenTrianglePartialBlock *partial = PartialBlocks; span->Length = 0; @@ -889,8 +917,15 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + // Stencil test the whole block, if possible + int block = x / 8 + y / 8 * stencilPitch; + uint8_t *stencilBlock = &stencilValues[block * 64]; + uint32_t *stencilBlockMask = &stencilMasks[block]; + bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; + bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; + // Skip block when outside an edge - if (a == 0x0 || b == 0x0 || c == 0x0) + if (a == 0 || b == 0 || c == 0 || skipBlock) { if (span->Length != 0) { @@ -899,9 +934,9 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th } continue; } - + // Accept whole block when totally covered - if (a == 0xF && b == 0xF && c == 0xF && x + q <= clipright && y + q <= clipbottom) + if (a == 0xf && b == 0xf && c == 0xf && x + q <= clipright && y + q <= clipbottom && blockIsSingleStencil) { if (span->Length != 0) { @@ -916,46 +951,73 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th } else // Partially covered block { + x0 = x << 4; + x1 = (x + q - 1) << 4; int CY1 = C1 + DX12 * y0 - DY12 * x0; int CY2 = C2 + DX23 * y0 - DY23 * x0; int CY3 = C3 + DX31 * y0 - DY31 * x0; - - uint64_t mask = 0; - - for (int iy = 0; iy < q; iy++) + + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 4; iy++) { int CX1 = CY1; int CX2 = CY2; int CX3 = CY3; - - for (int ix = x; ix < x + q; ix++) + + for (int ix = 0; ix < q; ix++) { - bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && ix < clipright && iy < clipbottom); - mask <<= 1; - mask |= (uint64_t)covered; - + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest); + mask0 <<= 1; + mask0 |= (uint32_t)covered; + CX1 -= FDY12; CX2 -= FDY23; CX3 -= FDY31; } - + CY1 += FDX12; CY2 += FDX23; CY3 += FDX31; } - - if (mask != 0xffffffffffffffffLL) + + for (int iy = 4; iy < q; iy++) { - if (span->Length != 0) + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = 0; ix < q; ix++) + { + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest); + mask1 <<= 1; + mask1 |= (uint32_t)covered; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + } + + if (mask0 != 0xffffffff || mask1 != 0xffffffff) + { + if (span->Length > 0) { span++; span->Length = 0; } - + partial->X = x; partial->Y = y; - partial->Mask0 = (uint32_t)(mask >> 32); - partial->Mask1 = (uint32_t)mask; + partial->Mask0 = mask0; + partial->Mask1 = mask1; partial++; } else if (span->Length != 0) From e016a96efe96849b873b68a285ae43e689fadd1b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 12 Dec 2016 12:38:46 +0100 Subject: [PATCH 481/912] Use less triangles for the sky --- src/r_poly_sky.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index dafe1f7ec8..e4e0a01444 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -102,7 +102,7 @@ void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int ro void PolySkyDome::CreateDome() { - mColumns = 128; + mColumns = 16;// 128; mRows = 4; CreateSkyHemisphere(false); CreateSkyHemisphere(true); From 79ad5d2a2a189a2bce65b29dd15dc675abd5111c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 12 Dec 2016 12:39:25 +0100 Subject: [PATCH 482/912] Write out subsector info --- src/r_drawers.h | 2 + src/r_poly_triangle.cpp | 285 +++++----------------------------------- 2 files changed, 34 insertions(+), 253 deletions(-) diff --git a/src/r_drawers.h b/src/r_drawers.h index ebe5d90029..2e91d502b2 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -26,6 +26,7 @@ #include class FString; +class DrawerThread; struct WorkerThreadData { @@ -34,6 +35,7 @@ struct WorkerThreadData int32_t pass_start_y; int32_t pass_end_y; uint32_t *temp; + DrawerThread *drawer_thread; }; struct DrawWallArgs diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index be1d5ea75f..37c61655f5 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -35,7 +35,6 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_poly_triangle.h" -#include int PolyTriangleDrawer::viewport_x; int PolyTriangleDrawer::viewport_y; @@ -93,7 +92,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian switch (variant) { default: - //case TriDrawVariant::DrawNormal: drawfunc = &ScreenTriangle::DrawFunc; break; + //case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? &ScreenTriangle::DrawFunc : llvm->TriDrawNormal8[bmode]; break; case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break; case TriDrawVariant::FillNormal: drawfunc = dest_bgra ? llvm->TriFillNormal32[bmode] : llvm->TriFillNormal8[bmode]; break; case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? llvm->TriDrawSubsector32[bmode] : llvm->TriDrawSubsector8[bmode]; break; @@ -573,7 +572,7 @@ void PolyVertexBuffer::Clear() } ///////////////////////////////////////////////////////////////////////////// -#if 0 + void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { const TriVertex &v1 = *args->v1; @@ -586,283 +585,48 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th uint8_t *stencilValues = args->stencilValues; uint32_t *stencilMasks = args->stencilMasks; uint8_t stencilTestValue = args->stencilTestValue; - + ScreenTriangleFullSpan *span = FullSpans; ScreenTrianglePartialBlock *partial = PartialBlocks; - span->Length = 0; - + // 28.4 fixed-point coordinates const int Y1 = (int)round(16.0f * v1.y); const int Y2 = (int)round(16.0f * v2.y); const int Y3 = (int)round(16.0f * v3.y); - + const int X1 = (int)round(16.0f * v1.x); const int X2 = (int)round(16.0f * v2.x); const int X3 = (int)round(16.0f * v3.x); - + // Deltas const int DX12 = X1 - X2; const int DX23 = X2 - X3; const int DX31 = X3 - X1; - + const int DY12 = Y1 - Y2; const int DY23 = Y2 - Y3; const int DY31 = Y3 - Y1; - + // Fixed-point deltas const int FDX12 = DX12 << 4; const int FDX23 = DX23 << 4; const int FDX31 = DX31 << 4; - + const int FDY12 = DY12 << 4; const int FDY23 = DY23 << 4; const int FDY31 = DY31 << 4; - + // Bounding rectangle int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); if (minx >= maxx || miny >= maxy) - return; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // First block line for this thread - int core = thread->core; - int num_cores = thread->num_cores; - int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores; - miny += core_skip * q; - - __m128i mC1 = _mm_set1_epi32(C1); - __m128i mC2 = _mm_set1_epi32(C2); - __m128i mC3 = _mm_set1_epi32(C3); - __m128i mDX12 = _mm_set1_epi32(DX12); - __m128i mDX23 = _mm_set1_epi32(DX23); - __m128i mDX31 = _mm_set1_epi32(DX31); - __m128i mDY12 = _mm_set1_epi32(DY12); - __m128i mDY23 = _mm_set1_epi32(DY23); - __m128i mDY31 = _mm_set1_epi32(DY31); - - // Loop through blocks - for (int y = miny; y < maxy; y += q * num_cores) { - // Corners of block - int x0 = minx << 4; - int x1 = (minx + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - __m128i my0y1 = _mm_set_epi32(y0, y0, y1, y1); - __m128i mx0x1 = _mm_set_epi32(x0, x1, x0, x1); - __m128i mAxx = _mm_add_epi32(mC1, _mm_sub_epi32(_mm_mullo_epi32(mDX12, my0y1), _mm_mullo_epi32(mDY12, mx0x1))); - __m128i mBxx = _mm_add_epi32(mC2, _mm_sub_epi32(_mm_mullo_epi32(mDX23, my0y1), _mm_mullo_epi32(mDY23, mx0x1))); - __m128i mCxx = _mm_add_epi32(mC3, _mm_sub_epi32(_mm_mullo_epi32(mDX31, my0y1), _mm_mullo_epi32(mDY31, mx0x1))); - - for (int x = minx; x < maxx; x += q) - { - // Evaluate half-space functions - int a = _mm_movemask_epi8(_mm_cmpgt_epi32(mAxx, _mm_setzero_si128())); - int b = _mm_movemask_epi8(_mm_cmpgt_epi32(mBxx, _mm_setzero_si128())); - int c = _mm_movemask_epi8(_mm_cmpgt_epi32(mCxx, _mm_setzero_si128())); - - mAxx = _mm_sub_epi32(mAxx, _mm_slli_epi32(mDY12, 7)); - mBxx = _mm_sub_epi32(mBxx, _mm_slli_epi32(mDY23, 7)); - mCxx = _mm_sub_epi32(mCxx, _mm_slli_epi32(mDY31, 7)); - - // Stencil test the whole block, if possible - int block = x / 8 + y / 8 * stencilPitch; - uint8_t *stencilBlock = &stencilValues[block * 64]; - uint32_t *stencilBlockMask = &stencilMasks[block]; - bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; - bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; - - // Skip block when outside an edge - if (a == 0 || b == 0 || c == 0 || skipBlock) - { - if (span->Length != 0) - { - span++; - span->Length = 0; - } - continue; - } - - // Accept whole block when totally covered - if (a == 0xffff && b == 0xffff && c == 0xffff && x + q <= clipright && y + q <= clipbottom && blockIsSingleStencil) - { - if (span->Length != 0) - { - span->Length++; - } - else - { - span->X = x; - span->Y = y; - span->Length = 1; - } - } - else // Partially covered block - { - x0 = x << 4; - x1 = (x + q - 1) << 4; - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; - bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest); - mask0 <<= 1; - mask0 |= (uint32_t)covered; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - } - - for (int iy = 4; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; - bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest); - mask1 <<= 1; - mask1 |= (uint32_t)covered; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - } - - if (mask0 != 0xffffffff || mask1 != 0xffffffff) - { - if (span->Length > 0) - { - span++; - span->Length = 0; - } - - partial->X = x; - partial->Y = y; - partial->Mask0 = mask0; - partial->Mask1 = mask1; - partial++; - } - else if (span->Length != 0) - { - span->Length++; - } - else - { - span->X = x; - span->Y = y; - span->Length = 1; - } - } - } - - if (span->Length > 0) - { - span++; - span->Length = 0; - } - } - - NumFullSpans = (int)(span - FullSpans); - NumPartialBlocks = (int)(partial - PartialBlocks); -} -#else -void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipright = args->clipright; - int clipbottom = args->clipbottom; - - int stencilPitch = args->stencilPitch; - uint8_t *stencilValues = args->stencilValues; - uint32_t *stencilMasks = args->stencilMasks; - uint8_t stencilTestValue = args->stencilTestValue; - - ScreenTriangleFullSpan *span = FullSpans; - ScreenTrianglePartialBlock *partial = PartialBlocks; - span->Length = 0; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); - if (minx >= maxx || miny >= maxy) + NumFullSpans = 0; + NumPartialBlocks = 0; return; + } // Block size, standard 8x8 (must be power of two) const int q = 8; @@ -886,7 +650,9 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th int num_cores = thread->num_cores; int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores; miny += core_skip * q; - + + span->Length = 0; + // Loop through blocks for (int y = miny; y < maxy; y += q * num_cores) { @@ -1043,7 +809,6 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th NumFullSpans = (int)(span - FullSpans); NumPartialBlocks = (int)(partial - PartialBlocks); } -#endif void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) { @@ -1059,11 +824,14 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) uint32_t blue = (uint32_t)b; uint32_t solidcolor = 0xff000000 | (red << 16) | (green << 8) | blue; + uint32_t subsectorDepth = args->uniforms->subsectorDepth; + for (int i = 0; i < NumFullSpans; i++) { const auto &span = FullSpans[i]; uint32_t *dest = (uint32_t*)args->dest + span.X + span.Y * args->pitch; + uint32_t *subsector = args->subsectorGBuffer + span.X + span.Y * args->pitch; int pitch = args->pitch; int width = span.Length * 8; int height = 8; @@ -1072,9 +840,11 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) for (int x = 0; x < width; x++) { dest[x] = solidcolor; + subsector[x] = subsectorDepth; } dest += pitch; + subsector += pitch; } } @@ -1083,6 +853,7 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) const auto &block = PartialBlocks[i]; uint32_t *dest = (uint32_t*)args->dest + block.X + block.Y * args->pitch; + uint32_t *subsector = args->subsectorGBuffer + block.X + block.Y * args->pitch; int pitch = args->pitch; uint32_t mask0 = block.Mask0; uint32_t mask1 = block.Mask1; @@ -1090,21 +861,29 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) { for (int x = 0; x < 8; x++) { - if (mask0 & (1<<31)) + if (mask0 & (1 << 31)) + { dest[x] = solidcolor; + subsector[x] = subsectorDepth; + } mask0 <<= 1; } dest += pitch; + subsector += pitch; } for (int y = 4; y < 8; y++) { for (int x = 0; x < 8; x++) { - if (mask1 & (1<<31)) + if (mask1 & (1 << 31)) + { dest[x] = solidcolor; + subsector[x] = subsectorDepth; + } mask1 <<= 1; } dest += pitch; + subsector += pitch; } } } From afb946d58635d33a643db84f7bdea869403d093d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 12 Dec 2016 21:34:22 +0100 Subject: [PATCH 483/912] Added texturing --- src/r_poly_triangle.cpp | 178 ++++++++++++++++++++++++++++++++++++---- src/r_poly_triangle.h | 8 +- 2 files changed, 165 insertions(+), 21 deletions(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 37c61655f5..254fc2ef93 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -198,6 +198,19 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool v.y = viewport_y + viewport_height * (1.0f - v.y) * 0.5f; } + // Keep varyings in -128 to 128 range if possible + if (numclipvert > 0) + { + for (int j = 0; j < TriVertex::NumVarying; j++) + { + float newOrigin = floorf(clippedvert[0].varying[j] * 0.1f) * 10.0f; + for (int i = 0; i < numclipvert; i++) + { + clippedvert[i].varying[j] -= newOrigin; + } + } + } + // Draw screen triangles if (ccw) { @@ -651,6 +664,8 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores; miny += core_skip * q; + StartX = minx; + StartY = miny; span->Length = 0; // Loop through blocks @@ -810,39 +825,101 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th NumPartialBlocks = (int)(partial - PartialBlocks); } +float ScreenTriangle::FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); + float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); + return top / bottom; +} + +float ScreenTriangle::FindGradientY(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); + float bottom = (x0 - x2) * (y1 - y2) - (x1 - x2) * (y0 - y2); + return top / bottom; +} + void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) { - float r = args->v1->x / 255.0f; - float g = args->v1->y / 255.0f; - float b = args->v1->z / 255.0f; - r = (r - floor(r)) * 255; - g = (g - floor(g)) * 255; - b = (b - floor(b)) * 255; + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (StartX - v1.x) + gradientY.W * (StartY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (StartX - v1.x) + gradientY.Varying[i] * (StartY - v1.y); + } - uint32_t red = (uint32_t)r; - uint32_t green = (uint32_t)g; - uint32_t blue = (uint32_t)b; - uint32_t solidcolor = 0xff000000 | (red << 16) | (green << 8) | blue; + const uint32_t *texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; uint32_t subsectorDepth = args->uniforms->subsectorDepth; for (int i = 0; i < NumFullSpans; i++) { const auto &span = FullSpans[i]; - + uint32_t *dest = (uint32_t*)args->dest + span.X + span.Y * args->pitch; uint32_t *subsector = args->subsectorGBuffer + span.X + span.Y * args->pitch; int pitch = args->pitch; - int width = span.Length * 8; + int width = span.Length; int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - StartX) + gradientY.W * (span.Y - StartY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - StartX) + gradientY.Varying[j] * (span.Y - StartY); + for (int y = 0; y < height; y++) { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + for (int x = 0; x < width; x++) { - dest[x] = solidcolor; - subsector[x] = subsectorDepth; + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + + dest[x * 8 + ix] = fg; + subsector[x * 8 + ix] = subsectorDepth; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + } } + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + dest += pitch; subsector += pitch; } @@ -851,7 +928,12 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) for (int i = 0; i < NumPartialBlocks; i++) { const auto &block = PartialBlocks[i]; - + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - StartX) + gradientY.W * (block.Y - StartY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - StartX) + gradientY.Varying[j] * (block.Y - StartY); + uint32_t *dest = (uint32_t*)args->dest + block.X + block.Y * args->pitch; uint32_t *subsector = args->subsectorGBuffer + block.X + block.Y * args->pitch; int pitch = args->pitch; @@ -859,29 +941,91 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) uint32_t mask1 = block.Mask1; for (int y = 0; y < 4; y++) { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + for (int x = 0; x < 8; x++) { if (mask0 & (1 << 31)) { - dest[x] = solidcolor; + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + + dest[x] = fg; subsector[x] = subsectorDepth; } mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + dest += pitch; subsector += pitch; } for (int y = 4; y < 8; y++) { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + for (int x = 0; x < 8; x++) { if (mask1 & (1 << 31)) { - dest[x] = solidcolor; + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + + dest[x] = fg; subsector[x] = subsectorDepth; } mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + dest += pitch; subsector += pitch; } diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 28c903c0ce..e014752ce7 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -265,7 +265,7 @@ public: static void Clear(); }; -struct ScreenTriangleBlock +struct ScreenTriangleStepVariables { float W; float Varying[TriVertex::NumVarying]; @@ -302,11 +302,11 @@ public: int NumPartialBlocks; int StartX; int StartY; - ScreenTriangleBlock Start; - ScreenTriangleBlock GradientX; - ScreenTriangleBlock GradientY; private: + float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); + float FindGradientY(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); + std::vector FullSpansBuffer; std::vector PartialBlocksBuffer; }; From 5277d4ae28b2d20b8ab133efd2abb92a3f1e12aa Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 12 Dec 2016 22:42:42 +0100 Subject: [PATCH 484/912] Diminishing light test --- src/r_poly_triangle.cpp | 46 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 254fc2ef93..69b37a5b7c 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -864,6 +864,10 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) uint32_t subsectorDepth = args->uniforms->subsectorDepth; + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = 1706.0f; + for (int i = 0; i < NumFullSpans; i++) { const auto &span = FullSpans[i]; @@ -890,6 +894,8 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) for (int x = 0; x < width; x++) { + int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); + blockPosX.W += gradientX.W * 8; for (int j = 0; j < TriVertex::NumVarying; j++) blockPosX.Varying[j] += gradientX.Varying[j] * 8; @@ -902,17 +908,29 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) varyingStep[j] = (nextPos - varyingPos[j]) / 8; } + int lightnext = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); + int lightstep = (lightnext - lightpos) / 8; + for (int ix = 0; ix < 8; ix++) { int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = r * lightpos / 256; + g = g * lightpos / 256; + b = b * lightpos / 256; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; subsector[x * 8 + ix] = subsectorDepth; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; + lightpos += lightstep; } } @@ -948,6 +966,8 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); + blockPosX.W += gradientX.W * 8; for (int j = 0; j < TriVertex::NumVarying; j++) blockPosX.Varying[j] += gradientX.Varying[j] * 8; @@ -960,6 +980,9 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) varyingStep[j] = (nextPos - varyingPos[j]) / 8; } + int lightnext = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); + int lightstep = (lightnext - lightpos) / 8; + for (int x = 0; x < 8; x++) { if (mask0 & (1 << 31)) @@ -968,6 +991,14 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = r * lightpos / 256; + g = g * lightpos / 256; + b = b * lightpos / 256; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; subsector[x] = subsectorDepth; } @@ -975,6 +1006,7 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; + lightpos += lightstep; } blockPosY.W += gradientY.W; @@ -993,6 +1025,8 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); + blockPosX.W += gradientX.W * 8; for (int j = 0; j < TriVertex::NumVarying; j++) blockPosX.Varying[j] += gradientX.Varying[j] * 8; @@ -1005,6 +1039,9 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) varyingStep[j] = (nextPos - varyingPos[j]) / 8; } + int lightnext = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); + int lightstep = (lightnext - lightpos) / 8; + for (int x = 0; x < 8; x++) { if (mask1 & (1 << 31)) @@ -1013,6 +1050,14 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = r * lightpos / 256; + g = g * lightpos / 256; + b = b * lightpos / 256; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; subsector[x] = subsectorDepth; } @@ -1020,6 +1065,7 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; + lightpos += lightstep; } blockPosY.W += gradientY.W; From 022368d349c7249d76eea17f5226b0e4c614ea75 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 13 Dec 2016 00:55:29 +0100 Subject: [PATCH 485/912] Stencil write --- src/r_poly_triangle.cpp | 76 +++++++++++++++++++++++++++++++++++++++-- src/r_poly_triangle.h | 2 ++ 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 69b37a5b7c..a3a48445ae 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -93,6 +93,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian { default: //case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? &ScreenTriangle::DrawFunc : llvm->TriDrawNormal8[bmode]; break; + //case TriDrawVariant::Stencil: drawfunc = &ScreenTriangle::StencilFunc; break; case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break; case TriDrawVariant::FillNormal: drawfunc = dest_bgra ? llvm->TriFillNormal32[bmode] : llvm->TriFillNormal8[bmode]; break; case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? llvm->TriDrawSubsector32[bmode] : llvm->TriDrawSubsector8[bmode]; break; @@ -825,6 +826,71 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th NumPartialBlocks = (int)(partial - PartialBlocks); } +void ScreenTriangle::StencilWrite(const TriDrawTriangleArgs *args) +{ + uint8_t *stencilValues = args->stencilValues; + uint32_t *stencilMasks = args->stencilMasks; + uint32_t stencilWriteValue = args->stencilWriteValue; + uint32_t stencilPitch = args->stencilPitch; + + for (int i = 0; i < NumFullSpans; i++) + { + const auto &span = FullSpans[i]; + + int block = span.X / 8 + span.Y / 8 * stencilPitch; + uint8_t *stencilBlock = &stencilValues[block * 64]; + uint32_t *stencilBlockMask = &stencilMasks[block]; + + int width = span.Length; + for (int x = 0; x < width; x++) + stencilBlockMask[x] = 0xffffff00 | stencilWriteValue; + } + + for (int i = 0; i < NumPartialBlocks; i++) + { + const auto &block = PartialBlocks[i]; + + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + + int sblock = block.X / 8 + block.Y / 8 * stencilPitch; + uint8_t *stencilBlock = &stencilValues[sblock * 64]; + uint32_t *stencilBlockMask = &stencilMasks[sblock]; + + bool isSingleValue = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; + if (isSingleValue) + { + uint8_t value = (*stencilBlockMask) & 0xff; + for (int v = 0; v < 64; v++) + stencilBlock[v] = value; + *stencilBlockMask = 0; + } + + int count = 0; + for (int v = 0; v < 32; v++) + { + if ((mask0 & (1 << 31)) || stencilBlock[v] == stencilWriteValue) + { + stencilBlock[v] = stencilWriteValue; + count++; + } + mask0 <<= 1; + } + for (int v = 32; v < 64; v++) + { + if ((mask1 & (1 << 31)) || stencilBlock[v] == stencilWriteValue) + { + stencilBlock[v] = stencilWriteValue; + count++; + } + mask1 <<= 1; + } + + if (count == 64) + *stencilBlockMask = 0xffffff00 | stencilWriteValue; + } +} + float ScreenTriangle::FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) { float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); @@ -1078,14 +1144,20 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) } } +static ScreenTriangle triangle[8]; + void ScreenTriangle::DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { - static ScreenTriangle triangle[8]; - triangle[thread->core].Setup(args, thread); triangle[thread->core].Draw(args); } +void ScreenTriangle::StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + triangle[thread->core].Setup(args, thread); + triangle[thread->core].StencilWrite(args); +} + ScreenTriangle::ScreenTriangle() { FullSpansBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8)); diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index e014752ce7..6e0508fbb1 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -290,11 +290,13 @@ class ScreenTriangle { public: static void DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); ScreenTriangle(); void Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread); void Draw(const TriDrawTriangleArgs *args); + void StencilWrite(const TriDrawTriangleArgs *args); ScreenTriangleFullSpan *FullSpans; ScreenTrianglePartialBlock *PartialBlocks; From c1e2c25907bb9b36de55b6d940a6728f221ef616 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 13 Dec 2016 02:13:48 +0100 Subject: [PATCH 486/912] Stencil close --- src/r_poly_triangle.cpp | 59 +++++++++++++++++++++++++++++++++++++++++ src/r_poly_triangle.h | 2 ++ 2 files changed, 61 insertions(+) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index a3a48445ae..3e334b6179 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -94,6 +94,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian default: //case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? &ScreenTriangle::DrawFunc : llvm->TriDrawNormal8[bmode]; break; //case TriDrawVariant::Stencil: drawfunc = &ScreenTriangle::StencilFunc; break; + //case TriDrawVariant::StencilClose: drawfunc = &ScreenTriangle::StencilCloseFunc; break; case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break; case TriDrawVariant::FillNormal: drawfunc = dest_bgra ? llvm->TriFillNormal32[bmode] : llvm->TriFillNormal8[bmode]; break; case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? llvm->TriDrawSubsector32[bmode] : llvm->TriDrawSubsector8[bmode]; break; @@ -891,6 +892,57 @@ void ScreenTriangle::StencilWrite(const TriDrawTriangleArgs *args) } } +void ScreenTriangle::SubsectorWrite(const TriDrawTriangleArgs *args) +{ + uint32_t subsectorDepth = args->uniforms->subsectorDepth; + + for (int i = 0; i < NumFullSpans; i++) + { + const auto &span = FullSpans[i]; + + uint32_t *subsector = args->subsectorGBuffer + span.X + span.Y * args->pitch; + int pitch = args->pitch; + int width = span.Length * 8; + int height = 8; + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + subsector[x] = subsectorDepth; + subsector += pitch; + } + } + + for (int i = 0; i < NumPartialBlocks; i++) + { + const auto &block = PartialBlocks[i]; + + uint32_t *subsector = args->subsectorGBuffer + block.X + block.Y * args->pitch; + int pitch = args->pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + subsector[x] = subsectorDepth; + mask0 <<= 1; + } + subsector += pitch; + } + for (int y = 4; y < 8; y++) + { + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + subsector[x] = subsectorDepth; + mask1 <<= 1; + } + subsector += pitch; + } + } +} + float ScreenTriangle::FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) { float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); @@ -1158,6 +1210,13 @@ void ScreenTriangle::StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadDa triangle[thread->core].StencilWrite(args); } +void ScreenTriangle::StencilCloseFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + triangle[thread->core].Setup(args, thread); + triangle[thread->core].StencilWrite(args); + triangle[thread->core].SubsectorWrite(args); +} + ScreenTriangle::ScreenTriangle() { FullSpansBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8)); diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 6e0508fbb1..12591425a9 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -291,12 +291,14 @@ class ScreenTriangle public: static void DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void StencilCloseFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); ScreenTriangle(); void Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread); void Draw(const TriDrawTriangleArgs *args); void StencilWrite(const TriDrawTriangleArgs *args); + void SubsectorWrite(const TriDrawTriangleArgs *args); ScreenTriangleFullSpan *FullSpans; ScreenTrianglePartialBlock *PartialBlocks; From be357e1c98bed3efb85a6b20569cd2fea74cab68 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 13 Dec 2016 12:57:04 +0100 Subject: [PATCH 487/912] Triangle setup function for subsector based drawing --- src/r_poly_triangle.cpp | 306 +++++++++++++++++++++++++++++++++++++++- src/r_poly_triangle.h | 4 +- 2 files changed, 304 insertions(+), 6 deletions(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 3e334b6179..30876768ba 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -92,7 +92,8 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian switch (variant) { default: - //case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? &ScreenTriangle::DrawFunc : llvm->TriDrawNormal8[bmode]; break; + //case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? &ScreenTriangle::DrawFunc : llvm->TriDrawNormal8[bmode]; break; + //case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? &ScreenTriangle::DrawSubsectorFunc : llvm->TriDrawSubsector8[bmode]; break; //case TriDrawVariant::Stencil: drawfunc = &ScreenTriangle::StencilFunc; break; //case TriDrawVariant::StencilClose: drawfunc = &ScreenTriangle::StencilCloseFunc; break; case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break; @@ -588,7 +589,7 @@ void PolyVertexBuffer::Clear() ///////////////////////////////////////////////////////////////////////////// -void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { const TriVertex &v1 = *args->v1; const TriVertex &v2 = *args->v2; @@ -827,6 +828,295 @@ void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *th NumPartialBlocks = (int)(partial - PartialBlocks); } +void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipright = args->clipright; + int clipbottom = args->clipbottom; + + int stencilPitch = args->stencilPitch; + uint8_t *stencilValues = args->stencilValues; + uint32_t *stencilMasks = args->stencilMasks; + uint8_t stencilTestValue = args->stencilTestValue; + + uint32_t subsectorDepth = args->uniforms->subsectorDepth; + int32_t pitch = args->pitch; + + ScreenTriangleFullSpan *span = FullSpans; + ScreenTrianglePartialBlock *partial = PartialBlocks; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + if (minx >= maxx || miny >= maxy) + { + NumFullSpans = 0; + NumPartialBlocks = 0; + return; + } + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // First block line for this thread + int core = thread->core; + int num_cores = thread->num_cores; + int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores; + miny += core_skip * q; + + StartX = minx; + StartY = miny; + span->Length = 0; + + // Loop through blocks + for (int y = miny; y < maxy; y += q * num_cores) + { + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Stencil test the whole block, if possible + int block = x / 8 + y / 8 * stencilPitch; + uint8_t *stencilBlock = &stencilValues[block * 64]; + uint32_t *stencilBlockMask = &stencilMasks[block]; + bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; + bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; + + // Skip block when outside an edge + if (a == 0 || b == 0 || c == 0 || skipBlock) + { + if (span->Length != 0) + { + span++; + span->Length = 0; + } + continue; + } + + // Accept whole block when totally covered + if (a == 0xf && b == 0xf && c == 0xf && x + q <= clipright && y + q <= clipbottom && blockIsSingleStencil) + { + // Totally covered block still needs a subsector coverage test: + + uint32_t *subsector = args->subsectorGBuffer + x + y * pitch; + + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 4; iy++) + { + for (int ix = 0; ix < q; ix++) + { + bool covered = subsector[ix] >= subsectorDepth; + mask0 <<= 1; + mask0 |= (uint32_t)covered; + } + subsector += pitch; + } + + for (int iy = 4; iy < q; iy++) + { + for (int ix = 0; ix < q; ix++) + { + bool covered = subsector[ix] >= subsectorDepth; + mask1 <<= 1; + mask1 |= (uint32_t)covered; + } + subsector += pitch; + } + + if (mask0 != 0xffffffff || mask1 != 0xffffffff) + { + if (span->Length > 0) + { + span++; + span->Length = 0; + } + + partial->X = x; + partial->Y = y; + partial->Mask0 = mask0; + partial->Mask1 = mask1; + partial++; + } + else if (span->Length != 0) + { + span->Length++; + } + else + { + span->X = x; + span->Y = y; + span->Length = 1; + } + } + else // Partially covered block + { + x0 = x << 4; + x1 = (x + q - 1) << 4; + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + uint32_t *subsector = args->subsectorGBuffer + x + y * pitch; + + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 4; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = 0; ix < q; ix++) + { + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest && subsector[ix] >= subsectorDepth); + mask0 <<= 1; + mask0 |= (uint32_t)covered; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + subsector += pitch; + } + + for (int iy = 4; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = 0; ix < q; ix++) + { + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest && subsector[ix] >= subsectorDepth); + mask1 <<= 1; + mask1 |= (uint32_t)covered; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + subsector += pitch; + } + + if (mask0 != 0xffffffff || mask1 != 0xffffffff) + { + if (span->Length > 0) + { + span++; + span->Length = 0; + } + + partial->X = x; + partial->Y = y; + partial->Mask0 = mask0; + partial->Mask1 = mask1; + partial++; + } + else if (span->Length != 0) + { + span->Length++; + } + else + { + span->X = x; + span->Y = y; + span->Length = 1; + } + } + } + + if (span->Length != 0) + { + span++; + span->Length = 0; + } + } + + NumFullSpans = (int)(span - FullSpans); + NumPartialBlocks = (int)(partial - PartialBlocks); +} + void ScreenTriangle::StencilWrite(const TriDrawTriangleArgs *args) { uint8_t *stencilValues = args->stencilValues; @@ -1200,19 +1490,25 @@ static ScreenTriangle triangle[8]; void ScreenTriangle::DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { - triangle[thread->core].Setup(args, thread); + triangle[thread->core].SetupNormal(args, thread); + triangle[thread->core].Draw(args); +} + +void ScreenTriangle::DrawSubsectorFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + triangle[thread->core].SetupSubsector(args, thread); triangle[thread->core].Draw(args); } void ScreenTriangle::StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { - triangle[thread->core].Setup(args, thread); + triangle[thread->core].SetupNormal(args, thread); triangle[thread->core].StencilWrite(args); } void ScreenTriangle::StencilCloseFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { - triangle[thread->core].Setup(args, thread); + triangle[thread->core].SetupNormal(args, thread); triangle[thread->core].StencilWrite(args); triangle[thread->core].SubsectorWrite(args); } diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 12591425a9..3fb963fe9d 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -290,12 +290,14 @@ class ScreenTriangle { public: static void DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void DrawSubsectorFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilCloseFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); ScreenTriangle(); - void Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + void SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + void SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread); void Draw(const TriDrawTriangleArgs *args); void StencilWrite(const TriDrawTriangleArgs *args); void SubsectorWrite(const TriDrawTriangleArgs *args); From 145c0a6d9eab69c7a3b678efb8f7c14564e18ec0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 13 Dec 2016 19:26:13 +0100 Subject: [PATCH 488/912] Prepare triangle setup to be used by LLVM drawers --- src/r_drawers.h | 25 +++++- src/r_poly_triangle.cpp | 163 ++++++++++++++++++++++------------------ src/r_poly_triangle.h | 41 ++-------- src/r_thread.h | 8 ++ 4 files changed, 126 insertions(+), 111 deletions(-) diff --git a/src/r_drawers.h b/src/r_drawers.h index 2e91d502b2..bc776be5bc 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -26,7 +26,21 @@ #include class FString; -class DrawerThread; + +struct TriFullSpan +{ + uint16_t X; + uint16_t Y; + uint32_t Length; +}; + +struct TriPartialBlock +{ + uint16_t X; + uint16_t Y; + uint32_t Mask0; + uint32_t Mask1; +}; struct WorkerThreadData { @@ -35,7 +49,14 @@ struct WorkerThreadData int32_t pass_start_y; int32_t pass_end_y; uint32_t *temp; - DrawerThread *drawer_thread; + + // Triangle working data: + TriFullSpan *FullSpans; + TriPartialBlock *PartialBlocks; + uint32_t NumFullSpans; + uint32_t NumPartialBlocks; + int32_t StartX; + int32_t StartY; }; struct DrawWallArgs diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 30876768ba..a97afaead9 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -35,6 +35,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_poly_triangle.h" +#include "r_draw_rgba.h" int PolyTriangleDrawer::viewport_x; int PolyTriangleDrawer::viewport_y; @@ -368,6 +369,8 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) thread_data.pass_start_y = thread->pass_start_y; thread_data.pass_end_y = thread->pass_end_y; thread_data.temp = thread->dc_temp_rgba; + thread_data.FullSpans = thread->FullSpansBuffer.data(); + thread_data.PartialBlocks = thread->PartialBlocksBuffer.data(); PolyTriangleDrawer::draw_arrays(args, variant, blendmode, &thread_data); } @@ -598,12 +601,12 @@ void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadDa int clipbottom = args->clipbottom; int stencilPitch = args->stencilPitch; - uint8_t *stencilValues = args->stencilValues; - uint32_t *stencilMasks = args->stencilMasks; + uint8_t * RESTRICT stencilValues = args->stencilValues; + uint32_t * RESTRICT stencilMasks = args->stencilMasks; uint8_t stencilTestValue = args->stencilTestValue; - ScreenTriangleFullSpan *span = FullSpans; - ScreenTrianglePartialBlock *partial = PartialBlocks; + TriFullSpan * RESTRICT span = thread->FullSpans; + TriPartialBlock * RESTRICT partial = thread->PartialBlocks; // 28.4 fixed-point coordinates const int Y1 = (int)round(16.0f * v1.y); @@ -639,8 +642,8 @@ void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadDa int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); if (minx >= maxx || miny >= maxy) { - NumFullSpans = 0; - NumPartialBlocks = 0; + thread->NumFullSpans = 0; + thread->NumPartialBlocks = 0; return; } @@ -667,8 +670,8 @@ void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadDa int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores; miny += core_skip * q; - StartX = minx; - StartY = miny; + thread->StartX = minx; + thread->StartY = miny; span->Length = 0; // Loop through blocks @@ -824,8 +827,8 @@ void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadDa } } - NumFullSpans = (int)(span - FullSpans); - NumPartialBlocks = (int)(partial - PartialBlocks); + thread->NumFullSpans = (int)(span - thread->FullSpans); + thread->NumPartialBlocks = (int)(partial - thread->PartialBlocks); } void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread) @@ -837,15 +840,16 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea int clipbottom = args->clipbottom; int stencilPitch = args->stencilPitch; - uint8_t *stencilValues = args->stencilValues; - uint32_t *stencilMasks = args->stencilMasks; + uint8_t * RESTRICT stencilValues = args->stencilValues; + uint32_t * RESTRICT stencilMasks = args->stencilMasks; uint8_t stencilTestValue = args->stencilTestValue; + uint32_t * RESTRICT subsectorGBuffer = args->subsectorGBuffer; uint32_t subsectorDepth = args->uniforms->subsectorDepth; int32_t pitch = args->pitch; - ScreenTriangleFullSpan *span = FullSpans; - ScreenTrianglePartialBlock *partial = PartialBlocks; + TriFullSpan * RESTRICT span = thread->FullSpans; + TriPartialBlock * RESTRICT partial = thread->PartialBlocks; // 28.4 fixed-point coordinates const int Y1 = (int)round(16.0f * v1.y); @@ -881,8 +885,8 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); if (minx >= maxx || miny >= maxy) { - NumFullSpans = 0; - NumPartialBlocks = 0; + thread->NumFullSpans = 0; + thread->NumPartialBlocks = 0; return; } @@ -909,8 +913,8 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores; miny += core_skip * q; - StartX = minx; - StartY = miny; + thread->StartX = minx; + thread->StartY = miny; span->Length = 0; // Loop through blocks @@ -966,7 +970,7 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea { // Totally covered block still needs a subsector coverage test: - uint32_t *subsector = args->subsectorGBuffer + x + y * pitch; + uint32_t *subsector = subsectorGBuffer + x + y * pitch; uint32_t mask0 = 0; uint32_t mask1 = 0; @@ -1026,7 +1030,7 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea int CY2 = C2 + DX23 * y0 - DY23 * x0; int CY3 = C3 + DX31 * y0 - DY31 * x0; - uint32_t *subsector = args->subsectorGBuffer + x + y * pitch; + uint32_t *subsector = subsectorGBuffer + x + y * pitch; uint32_t mask0 = 0; uint32_t mask1 = 0; @@ -1113,20 +1117,25 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea } } - NumFullSpans = (int)(span - FullSpans); - NumPartialBlocks = (int)(partial - PartialBlocks); + thread->NumFullSpans = (int)(span - thread->FullSpans); + thread->NumPartialBlocks = (int)(partial - thread->PartialBlocks); } -void ScreenTriangle::StencilWrite(const TriDrawTriangleArgs *args) +void ScreenTriangle::StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { - uint8_t *stencilValues = args->stencilValues; - uint32_t *stencilMasks = args->stencilMasks; + uint8_t * RESTRICT stencilValues = args->stencilValues; + uint32_t * RESTRICT stencilMasks = args->stencilMasks; uint32_t stencilWriteValue = args->stencilWriteValue; uint32_t stencilPitch = args->stencilPitch; - for (int i = 0; i < NumFullSpans; i++) + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + + for (int i = 0; i < numSpans; i++) { - const auto &span = FullSpans[i]; + const auto &span = fullSpans[i]; int block = span.X / 8 + span.Y / 8 * stencilPitch; uint8_t *stencilBlock = &stencilValues[block * 64]; @@ -1137,9 +1146,9 @@ void ScreenTriangle::StencilWrite(const TriDrawTriangleArgs *args) stencilBlockMask[x] = 0xffffff00 | stencilWriteValue; } - for (int i = 0; i < NumPartialBlocks; i++) + for (int i = 0; i < numBlocks; i++) { - const auto &block = PartialBlocks[i]; + const auto &block = partialBlocks[i]; uint32_t mask0 = block.Mask0; uint32_t mask1 = block.Mask1; @@ -1182,16 +1191,22 @@ void ScreenTriangle::StencilWrite(const TriDrawTriangleArgs *args) } } -void ScreenTriangle::SubsectorWrite(const TriDrawTriangleArgs *args) +void ScreenTriangle::SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { + uint32_t * RESTRICT subsectorGBuffer = args->subsectorGBuffer; uint32_t subsectorDepth = args->uniforms->subsectorDepth; + int pitch = args->pitch; - for (int i = 0; i < NumFullSpans; i++) + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + + for (int i = 0; i < numSpans; i++) { - const auto &span = FullSpans[i]; + const auto &span = fullSpans[i]; - uint32_t *subsector = args->subsectorGBuffer + span.X + span.Y * args->pitch; - int pitch = args->pitch; + uint32_t *subsector = subsectorGBuffer + span.X + span.Y * pitch; int width = span.Length * 8; int height = 8; for (int y = 0; y < height; y++) @@ -1202,12 +1217,11 @@ void ScreenTriangle::SubsectorWrite(const TriDrawTriangleArgs *args) } } - for (int i = 0; i < NumPartialBlocks; i++) + for (int i = 0; i < numBlocks; i++) { - const auto &block = PartialBlocks[i]; + const auto &block = partialBlocks[i]; - uint32_t *subsector = args->subsectorGBuffer + block.X + block.Y * args->pitch; - int pitch = args->pitch; + uint32_t *subsector = subsectorGBuffer + block.X + block.Y * pitch; uint32_t mask0 = block.Mask0; uint32_t mask1 = block.Mask1; for (int y = 0; y < 4; y++) @@ -1247,8 +1261,15 @@ float ScreenTriangle::FindGradientY(float x0, float y0, float x1, float y1, floa return top / bottom; } -void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) +void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + // Calculate gradients const TriVertex &v1 = *args->v1; const TriVertex &v2 = *args->v2; @@ -1258,38 +1279,41 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) ScreenTriangleStepVariables start; gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - start.W = v1.w + gradientX.W * (StartX - v1.x) + gradientY.W * (StartY - v1.y); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); for (int i = 0; i < TriVertex::NumVarying; i++) { gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (StartX - v1.x) + gradientY.Varying[i] * (StartY - v1.y); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t *texPixels = (const uint32_t *)args->texturePixels; + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + uint32_t * RESTRICT subsectorGBuffer = (uint32_t*)args->subsectorGBuffer; + int pitch = args->pitch; + uint32_t subsectorDepth = args->uniforms->subsectorDepth; uint32_t light = args->uniforms->light; float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; float globVis = 1706.0f; - for (int i = 0; i < NumFullSpans; i++) + for (int i = 0; i < numSpans; i++) { - const auto &span = FullSpans[i]; + const auto &span = fullSpans[i]; - uint32_t *dest = (uint32_t*)args->dest + span.X + span.Y * args->pitch; - uint32_t *subsector = args->subsectorGBuffer + span.X + span.Y * args->pitch; - int pitch = args->pitch; + uint32_t *dest = destOrg + span.X + span.Y * pitch; + uint32_t *subsector = subsectorGBuffer + span.X + span.Y * pitch; int width = span.Length; int height = 8; ScreenTriangleStepVariables blockPosY; - blockPosY.W = start.W + gradientX.W * (span.X - StartX) + gradientY.W * (span.Y - StartY); + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - StartX) + gradientY.Varying[j] * (span.Y - StartY); + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); for (int y = 0; y < height; y++) { @@ -1351,18 +1375,17 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) } } - for (int i = 0; i < NumPartialBlocks; i++) + for (int i = 0; i < numBlocks; i++) { - const auto &block = PartialBlocks[i]; + const auto &block = partialBlocks[i]; ScreenTriangleStepVariables blockPosY; - blockPosY.W = start.W + gradientX.W * (block.X - StartX) + gradientY.W * (block.Y - StartY); + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - StartX) + gradientY.Varying[j] * (block.Y - StartY); + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); - uint32_t *dest = (uint32_t*)args->dest + block.X + block.Y * args->pitch; - uint32_t *subsector = args->subsectorGBuffer + block.X + block.Y * args->pitch; - int pitch = args->pitch; + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t *subsector = subsectorGBuffer + block.X + block.Y * pitch; uint32_t mask0 = block.Mask0; uint32_t mask1 = block.Mask1; for (int y = 0; y < 4; y++) @@ -1486,37 +1509,27 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args) } } -static ScreenTriangle triangle[8]; - void ScreenTriangle::DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { - triangle[thread->core].SetupNormal(args, thread); - triangle[thread->core].Draw(args); + SetupNormal(args, thread); + Draw(args, thread); } void ScreenTriangle::DrawSubsectorFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { - triangle[thread->core].SetupSubsector(args, thread); - triangle[thread->core].Draw(args); + SetupSubsector(args, thread); + Draw(args, thread); } void ScreenTriangle::StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { - triangle[thread->core].SetupNormal(args, thread); - triangle[thread->core].StencilWrite(args); + SetupNormal(args, thread); + StencilWrite(args, thread); } void ScreenTriangle::StencilCloseFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { - triangle[thread->core].SetupNormal(args, thread); - triangle[thread->core].StencilWrite(args); - triangle[thread->core].SubsectorWrite(args); -} - -ScreenTriangle::ScreenTriangle() -{ - FullSpansBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8)); - PartialBlocksBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8)); - FullSpans = FullSpansBuffer.data(); - PartialBlocks = PartialBlocksBuffer.data(); + SetupNormal(args, thread); + StencilWrite(args, thread); + SubsectorWrite(args, thread); } diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 3fb963fe9d..14cfb13350 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -271,21 +271,6 @@ struct ScreenTriangleStepVariables float Varying[TriVertex::NumVarying]; }; -struct ScreenTriangleFullSpan -{ - uint16_t X; - uint16_t Y; - uint32_t Length; -}; - -struct ScreenTrianglePartialBlock -{ - uint16_t X; - uint16_t Y; - uint32_t Mask0; - uint32_t Mask1; -}; - class ScreenTriangle { public: @@ -294,25 +279,13 @@ public: static void StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilCloseFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - ScreenTriangle(); - - void SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - void SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - void Draw(const TriDrawTriangleArgs *args); - void StencilWrite(const TriDrawTriangleArgs *args); - void SubsectorWrite(const TriDrawTriangleArgs *args); - - ScreenTriangleFullSpan *FullSpans; - ScreenTrianglePartialBlock *PartialBlocks; - int NumFullSpans; - int NumPartialBlocks; - int StartX; - int StartY; - private: - float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); - float FindGradientY(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); + static void SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - std::vector FullSpansBuffer; - std::vector PartialBlocksBuffer; + static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); + static float FindGradientY(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); }; diff --git a/src/r_thread.h b/src/r_thread.h index 66c9332c34..ea1ceaa29b 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -23,6 +23,7 @@ #pragma once #include "r_draw.h" +#include "r_drawers.h" #include #include #include @@ -46,6 +47,9 @@ public: { dc_temp = dc_temp_buff; dc_temp_rgba = dc_temp_rgbabuff_rgba; + + FullSpansBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8)); + PartialBlocksBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8)); } std::thread thread; @@ -71,6 +75,10 @@ public: // Working buffer used by the tilted (sloped) span drawer const uint8_t *tiltlighting[MAXWIDTH]; + // Working buffer used by the triangler drawer + std::vector FullSpansBuffer; + std::vector PartialBlocksBuffer; + // Checks if a line is rendered by this thread bool line_skipped_by_thread(int line) { From c643238b86f448911427feb5831e182874483d46 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 15 Dec 2016 01:33:26 +0100 Subject: [PATCH 489/912] Rewrote the LLVM triangle drawer to use the data from the triangle setup function --- src/r_poly_sky.cpp | 4 +- src/r_poly_triangle.cpp | 52 +- src/r_poly_triangle.h | 11 +- .../fixedfunction/drawtrianglecodegen.cpp | 960 ++++++++++++------ .../fixedfunction/drawtrianglecodegen.h | 87 ++ tools/drawergen/llvmdrawers.cpp | 85 +- tools/drawergen/llvmdrawers.h | 32 +- 7 files changed, 843 insertions(+), 388 deletions(-) diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index e4e0a01444..823a510f21 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -58,7 +58,7 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) args.uniforms.subsectorDepth = RenderPolyScene::SkySubsectorDepth; args.objectToClip = &objectToClip; args.stenciltestvalue = 255; - args.stencilwritevalue = 255; + args.stencilwritevalue = 1; args.SetTexture(frontskytex); args.SetColormap(&NormalLight); args.SetClipPlane(0.0f, 0.0f, 0.0f, 0.0f); @@ -84,6 +84,7 @@ void PolySkyDome::RenderRow(PolyDrawArgs &args, int row, uint32_t capcolor) args.ccw = false; args.uniforms.color = capcolor; PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Skycap); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Skycap); } void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap) @@ -98,6 +99,7 @@ void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int ro args.ccw = bottomCap; args.uniforms.color = solid; PolyTriangleDrawer::draw(args, TriDrawVariant::FillNormal, TriBlendMode::Copy); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); } void PolySkyDome::CreateDome() diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index a97afaead9..164fc98e2c 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -88,22 +88,19 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian return; auto llvm = Drawers::Instance(); - void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *); + PolyDrawFuncPtr setupfunc = nullptr; + PolyDrawFuncPtr drawfunc = nullptr; int bmode = (int)blendmode; switch (variant) { default: - //case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? &ScreenTriangle::DrawFunc : llvm->TriDrawNormal8[bmode]; break; - //case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? &ScreenTriangle::DrawSubsectorFunc : llvm->TriDrawSubsector8[bmode]; break; - //case TriDrawVariant::Stencil: drawfunc = &ScreenTriangle::StencilFunc; break; - //case TriDrawVariant::StencilClose: drawfunc = &ScreenTriangle::StencilCloseFunc; break; - case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break; - case TriDrawVariant::FillNormal: drawfunc = dest_bgra ? llvm->TriFillNormal32[bmode] : llvm->TriFillNormal8[bmode]; break; - case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? llvm->TriDrawSubsector32[bmode] : llvm->TriDrawSubsector8[bmode]; break; + case TriDrawVariant::DrawNormal: setupfunc = &ScreenTriangle::SetupNormal; drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break; + case TriDrawVariant::FillNormal: setupfunc = &ScreenTriangle::SetupNormal; drawfunc = dest_bgra ? llvm->TriFillNormal32[bmode] : llvm->TriFillNormal8[bmode]; break; + case TriDrawVariant::DrawSubsector: setupfunc = &ScreenTriangle::SetupSubsector; drawfunc = dest_bgra ? llvm->TriDrawSubsector32[bmode] : llvm->TriDrawSubsector8[bmode]; break; case TriDrawVariant::FuzzSubsector: - case TriDrawVariant::FillSubsector: drawfunc = dest_bgra ? llvm->TriFillSubsector32[bmode] : llvm->TriFillSubsector8[bmode]; break; - case TriDrawVariant::Stencil: drawfunc = llvm->TriStencil; break; - case TriDrawVariant::StencilClose: drawfunc = llvm->TriStencilClose; break; + case TriDrawVariant::FillSubsector: setupfunc = &ScreenTriangle::SetupSubsector; drawfunc = dest_bgra ? llvm->TriFillSubsector32[bmode] : llvm->TriFillSubsector8[bmode]; break; + case TriDrawVariant::Stencil: drawfunc = &ScreenTriangle::StencilFunc; break; + case TriDrawVariant::StencilClose: drawfunc = &ScreenTriangle::StencilCloseFunc; break; } TriDrawTriangleArgs args; @@ -139,7 +136,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian { for (int j = 0; j < 3; j++) vert[j] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); + draw_shaded_triangle(vert, ccw, &args, thread, setupfunc, drawfunc); } } else if (drawargs.mode == TriangleDrawMode::Fan) @@ -149,7 +146,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian for (int i = 2; i < vcount; i++) { vert[2] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); + draw_shaded_triangle(vert, ccw, &args, thread, setupfunc, drawfunc); vert[1] = vert[2]; } } @@ -160,7 +157,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian for (int i = 2; i < vcount; i++) { vert[2] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); + draw_shaded_triangle(vert, ccw, &args, thread, setupfunc, drawfunc); vert[0] = vert[1]; vert[1] = vert[2]; ccw = !ccw; @@ -179,7 +176,7 @@ ShadedTriVertex PolyTriangleDrawer::shade_vertex(const TriMatrix &objectToClip, return sv; } -void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)) +void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr setupfunc, PolyDrawFuncPtr drawfunc) { // Cull, clip and generate additional vertices as needed TriVertex clippedvert[max_additional_vertices]; @@ -223,6 +220,7 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool args->v1 = &clippedvert[numclipvert - 1]; args->v2 = &clippedvert[i - 1]; args->v3 = &clippedvert[i - 2]; + if (setupfunc) setupfunc(args, thread); drawfunc(args, thread); } } @@ -233,6 +231,7 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool args->v1 = &clippedvert[0]; args->v2 = &clippedvert[i - 1]; args->v3 = &clippedvert[i]; + if (setupfunc) setupfunc(args, thread); drawfunc(args, thread); } } @@ -952,7 +951,7 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea uint8_t *stencilBlock = &stencilValues[block * 64]; uint32_t *stencilBlockMask = &stencilMasks[block]; bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; - bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; + bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) < stencilTestValue; // Skip block when outside an edge if (a == 0 || b == 0 || c == 0 || skipBlock) @@ -1043,7 +1042,7 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea for (int ix = 0; ix < q; ix++) { - bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] >= stencilTestValue; bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest && subsector[ix] >= subsectorDepth); mask0 <<= 1; mask0 |= (uint32_t)covered; @@ -1067,7 +1066,7 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea for (int ix = 0; ix < q; ix++) { - bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] >= stencilTestValue; bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest && subsector[ix] >= subsectorDepth); mask1 <<= 1; mask1 |= (uint32_t)covered; @@ -1247,6 +1246,7 @@ void ScreenTriangle::SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThrea } } +#if 0 float ScreenTriangle::FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) { float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); @@ -1323,11 +1323,10 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thr int32_t varyingPos[TriVertex::NumVarying]; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); for (int x = 0; x < width; x++) { - int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); - blockPosX.W += gradientX.W * 8; for (int j = 0; j < TriVertex::NumVarying; j++) blockPosX.Varying[j] += gradientX.Varying[j] * 8; @@ -1508,18 +1507,7 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thr } } } - -void ScreenTriangle::DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - SetupNormal(args, thread); - Draw(args, thread); -} - -void ScreenTriangle::DrawSubsectorFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - SetupSubsector(args, thread); - Draw(args, thread); -} +#endif void ScreenTriangle::StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 14cfb13350..59e52ef66d 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -153,6 +153,8 @@ struct TriMatrix float matrix[16]; }; +typedef void(*PolyDrawFuncPtr)(const TriDrawTriangleArgs *, WorkerThreadData *); + class PolyTriangleDrawer { public: @@ -163,7 +165,7 @@ public: private: static ShadedTriVertex shade_vertex(const TriMatrix &objectToClip, const float *clipPlane, const TriVertex &v); static void draw_arrays(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode, WorkerThreadData *thread); - static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)); + static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr setupfunc, PolyDrawFuncPtr drawfunc); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const ShadedTriVertex *verts, TriVertex *clippedvert, int &numclipvert); @@ -274,18 +276,11 @@ struct ScreenTriangleStepVariables class ScreenTriangle { public: - static void DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void DrawSubsectorFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilCloseFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); -private: static void SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - - static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); - static float FindGradientY(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); }; diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 3806d5253d..f38b423504 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -32,6 +32,636 @@ #include "ssa/ssa_struct_type.h" #include "ssa/ssa_value.h" +void DrawTriangleCodegen::Generate(TriDrawVariant variant, TriBlendMode blendmode, bool truecolor, SSAValue args, SSAValue thread_data) +{ + this->variant = variant; + this->blendmode = blendmode; + this->truecolor = truecolor; + pixelsize = truecolor ? 4 : 1; + + LoadArgs(args, thread_data); + CalculateGradients(); + DrawFullSpans(); + DrawPartialBlocks(); +} + +void DrawTriangleCodegen::DrawFullSpans() +{ + stack_i.store(SSAInt(0)); + SSAForBlock loop; + SSAInt i = stack_i.load(); + loop.loop_block(i < numSpans, 0); + { + SSAInt spanX = SSAShort(fullSpans[i][0].load(true).v).zext_int(); + SSAInt spanY = SSAShort(fullSpans[i][1].load(true).v).zext_int(); + SSAInt spanLength = fullSpans[i][2].load(true); + + SSAInt width = spanLength; + SSAInt height = SSAInt(8); + + stack_dest.store(destOrg[(spanX + spanY * pitch) * pixelsize]); + stack_subsector.store(subsectorGBuffer[spanX + spanY * pitch]); + stack_posYW.store(start.W + gradientX.W * (spanX - startX) + gradientY.W * (spanY - startY)); + for (int j = 0; j < TriVertex::NumVarying; j++) + stack_posYVarying[j].store(start.Varying[j] + gradientX.Varying[j] * (spanX - startX) + gradientY.Varying[j] * (spanY - startY)); + stack_y.store(SSAInt(0)); + + SSAForBlock loop_y; + SSAInt y = stack_y.load(); + SSAUBytePtr dest = stack_dest.load(); + SSAIntPtr subsector = stack_subsector.load(); + SSAStepVariables blockPosY; + blockPosY.W = stack_posYW.load(); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = stack_posYVarying[j].load(); + loop_y.loop_block(y < height, 0); + { + stack_posXW.store(blockPosY.W); + for (int j = 0; j < TriVertex::NumVarying; j++) + stack_posXVarying[j].store(blockPosY.Varying[j]); + + SSAFloat rcpW = SSAFloat((float)0x01000000) / blockPosY.W; + stack_lightpos.store(FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosY.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true)); + for (int j = 0; j < TriVertex::NumVarying; j++) + stack_varyingPos[j].store(SSAInt(blockPosY.Varying[j] * rcpW, false)); + stack_x.store(SSAInt(0)); + + SSAForBlock loop_x; + SSAInt x = stack_x.load(); + SSAStepVariables blockPosX; + blockPosX.W = stack_posXW.load(); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] = stack_posXVarying[j].load(); + SSAInt lightpos = stack_lightpos.load(); + SSAInt varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = stack_varyingPos[j].load(); + loop_x.loop_block(x < width, 0); + { + blockPosX.W = blockPosX.W + gradientX.W * 8.0f; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] = blockPosX.Varying[j] + gradientX.Varying[j] * 8.0f; + + rcpW = SSAFloat((float)0x01000000) / blockPosX.W; + SSAInt varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + SSAInt nextPos = SSAInt(blockPosX.Varying[j] * rcpW, false); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosX.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); + SSAInt lightstep = (lightnext - lightpos) / 8; + + for (int ix = 0; ix < 8; ix++) + { + if (truecolor) + { + currentlight = is_fixed_light.select(light, lightpos >> 8); + + SSAUBytePtr destptr = dest[(x * 8 + ix) * 4]; + destptr.store_vec4ub(ProcessPixel32(destptr.load_vec4ub(false), varyingPos)); + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsector[x * 8 + ix].store(subsectorDepth); + } + else + { + currentlight = is_fixed_light.select(light, lightpos >> 8); + SSAInt colormapindex = SSAInt::MIN((256 - currentlight) * 32 / 256, SSAInt(31)); + currentcolormap = Colormaps[colormapindex << 8]; + + SSAUBytePtr destptr = dest[(x * 8 + ix)]; + destptr.store(ProcessPixel8(destptr.load(false).zext_int(), varyingPos).trunc_ubyte()); + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsector[x * 8 + ix].store(subsectorDepth); + } + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = varyingPos[j] + varyingStep[j]; + lightpos = lightpos + lightstep; + } + + for (int j = 0; j < TriVertex::NumVarying; j++) + stack_varyingPos[j].store(varyingPos[j]); + stack_lightpos.store(lightpos); + stack_posXW.store(blockPosX.W); + for (int j = 0; j < TriVertex::NumVarying; j++) + stack_posXVarying[j].store(blockPosX.Varying[j]); + stack_x.store(x + 1); + } + loop_x.end_block(); + + stack_posYW.store(blockPosY.W + gradientY.W); + for (int j = 0; j < TriVertex::NumVarying; j++) + stack_posYVarying[j].store(blockPosY.Varying[j] + gradientY.Varying[j]); + stack_dest.store(dest[pitch * pixelsize]); + stack_subsector.store(subsector[pitch]); + stack_y.store(y + 1); + } + loop_y.end_block(); + + stack_i.store(i + 1); + } + loop.end_block(); +} + +void DrawTriangleCodegen::DrawPartialBlocks() +{ + stack_i.store(SSAInt(0)); + SSAForBlock loop; + SSAInt i = stack_i.load(); + loop.loop_block(i < numBlocks, 0); + { + SSAInt blockX = SSAShort(partialBlocks[i][0].load(true).v).zext_int(); + SSAInt blockY = SSAShort(partialBlocks[i][1].load(true).v).zext_int(); + SSAInt mask0 = partialBlocks[i][2].load(true); + SSAInt mask1 = partialBlocks[i][3].load(true); + + SSAUBytePtr dest = destOrg[(blockX + blockY * pitch) * pixelsize]; + SSAIntPtr subsector = subsectorGBuffer[blockX + blockY * pitch]; + + SSAStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (blockX - startX) + gradientY.W * (blockY - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (blockX - startX) + gradientY.Varying[j] * (blockY - startY); + + for (int maskNum = 0; maskNum < 2; maskNum++) + { + SSAInt mask = (maskNum == 0) ? mask0 : mask1; + + for (int y = 0; y < 4; y++) + { + SSAStepVariables blockPosX = blockPosY; + + SSAFloat rcpW = SSAFloat((float)0x01000000) / blockPosX.W; + SSAInt varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = SSAInt(blockPosX.Varying[j] * rcpW, false); + + SSAInt lightpos = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosX.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); + + blockPosX.W = blockPosX.W + gradientX.W * 8.0f; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] = blockPosX.Varying[j] + gradientX.Varying[j] * 8.0f; + + rcpW = SSAFloat((float)0x01000000) / blockPosX.W; + SSAInt varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + SSAInt nextPos = SSAInt(blockPosX.Varying[j] * rcpW, false); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosX.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); + SSAInt lightstep = (lightnext - lightpos) / 8; + + for (int x = 0; x < 8; x++) + { + SSABool covered = !((mask & (1 << (31 - y * 8 - x))) == SSAInt(0)); + SSAIfBlock branch; + branch.if_block(covered); + { + if (truecolor) + { + currentlight = is_fixed_light.select(light, lightpos >> 8); + + SSAUBytePtr destptr = dest[x * 4]; + destptr.store_vec4ub(ProcessPixel32(destptr.load_vec4ub(false), varyingPos)); + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsector[x].store(subsectorDepth); + } + else + { + currentlight = is_fixed_light.select(light, lightpos >> 8); + SSAInt colormapindex = SSAInt::MIN((256 - currentlight) * 32 / 256, SSAInt(31)); + currentcolormap = Colormaps[colormapindex << 8]; + + SSAUBytePtr destptr = dest[x]; + destptr.store(ProcessPixel8(destptr.load(false).zext_int(), varyingPos).trunc_ubyte()); + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsector[x].store(subsectorDepth); + } + } + branch.end_block(); + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = varyingPos[j] + varyingStep[j]; + lightpos = lightpos + lightstep; + } + + blockPosY.W = blockPosY.W + gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = blockPosY.Varying[j] + gradientY.Varying[j]; + + dest = dest[pitch * pixelsize]; + subsector = subsector[pitch]; + } + } + + stack_i.store(i + 1); + } + loop.end_block(); +} + +SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt *varying) +{ + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return translation[color * 4].load_vec4ub(true); + else + return translation[texturePixels[uvoffset].load(true).zext_int() * 4].load_vec4ub(true); +} + +SSAInt DrawTriangleCodegen::TranslateSample8(SSAInt *varying) +{ + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return translation[color].load(true).zext_int(); + else + return translation[texturePixels[uvoffset].load(true).zext_int()].load(true).zext_int(); +} + +SSAVec4i DrawTriangleCodegen::Sample32(SSAInt *varying) +{ + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return SSAVec4i::unpack(color); + + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; + + SSAVec4i nearest; + SSAVec4i linear; + + { + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + nearest = texturePixels[uvoffset * 4].load_vec4ub(true); + } + + return nearest; + + /* + { + SSAInt uone = (SSAInt(0x01000000) / textureWidth) << 8; + SSAInt vone = (SSAInt(0x01000000) / textureHeight) << 8; + + ufrac = ufrac - (uone >> 1); + vfrac = vfrac - (vone >> 1); + + SSAInt frac_x0 = (ufrac >> FRACBITS) * textureWidth; + SSAInt frac_x1 = ((ufrac + uone) >> FRACBITS) * textureWidth; + SSAInt frac_y0 = (vfrac >> FRACBITS) * textureHeight; + SSAInt frac_y1 = ((vfrac + vone) >> FRACBITS) * textureHeight; + + SSAInt x0 = frac_x0 >> FRACBITS; + SSAInt x1 = frac_x1 >> FRACBITS; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; + + SSAVec4i p00 = texturePixels[(x0 * textureHeight + y0) * 4].load_vec4ub(true); + SSAVec4i p01 = texturePixels[(x0 * textureHeight + y1) * 4].load_vec4ub(true); + SSAVec4i p10 = texturePixels[(x1 * textureHeight + y0) * 4].load_vec4ub(true); + SSAVec4i p11 = texturePixels[(x1 * textureHeight + y1) * 4].load_vec4ub(true); + + SSAInt inv_b = (frac_x1 >> (FRACBITS - 4)) & 15; + SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + linear = (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; + } + + return AffineLinear.select(linear, nearest); + */ +} + +SSAInt DrawTriangleCodegen::Sample8(SSAInt *varying) +{ + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return color; + else + return texturePixels[uvoffset].load(true).zext_int(); +} + +SSAInt DrawTriangleCodegen::Shade8(SSAInt c) +{ + return currentcolormap[c].load(true).zext_int(); +} + +SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) +{ + SSAVec4i fg; + SSAVec4i output; + + switch (blendmode) + { + default: + case TriBlendMode::Copy: + fg = Sample32(varying); + output = blend_copy(shade_bgra_simple(fg, currentlight)); + break; + case TriBlendMode::AlphaBlend: + fg = Sample32(varying); + output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); + break; + case TriBlendMode::AddSolid: + fg = Sample32(varying); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); + break; + case TriBlendMode::Add: + fg = Sample32(varying); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::Sub: + fg = Sample32(varying); + output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::RevSub: + fg = Sample32(varying); + output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::Stencil: + fg = Sample32(varying); + output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), fg[3], bg, srcalpha, destalpha); + break; + case TriBlendMode::Shaded: + output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), Sample8(varying), bg, srcalpha, destalpha); + break; + case TriBlendMode::TranslateCopy: + fg = TranslateSample32(varying); + output = blend_copy(shade_bgra_simple(fg, currentlight)); + break; + case TriBlendMode::TranslateAlphaBlend: + fg = TranslateSample32(varying); + output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); + break; + case TriBlendMode::TranslateAdd: + fg = TranslateSample32(varying); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::TranslateSub: + fg = TranslateSample32(varying); + output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::TranslateRevSub: + fg = TranslateSample32(varying); + output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::AddSrcColorOneMinusSrcColor: + fg = Sample32(varying); + output = blend_add_srccolor_oneminussrccolor(shade_bgra_simple(fg, currentlight), bg); + break; + case TriBlendMode::Skycap: + fg = Sample32(varying); + output = FadeOut(varying[1], fg); + break; + } + + return output; +} + +SSAVec4i DrawTriangleCodegen::ToBgra(SSAInt index) +{ + SSAVec4i c = BaseColors[index * 4].load_vec4ub(true); + c = c.insert(3, 255); + return c; +} + +SSAInt DrawTriangleCodegen::ToPal8(SSAVec4i c) +{ + return RGB32k[((c[2] >> 3) * 32 + (c[1] >> 3)) * 32 + (c[0] >> 3)].load(true).zext_int(); +} + +SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) +{ + SSAVec4i fg; + SSAInt alpha, inv_alpha; + SSAInt output; + SSAInt palindex; + + switch (blendmode) + { + default: + case TriBlendMode::Copy: + output = Shade8(Sample8(varying)); + break; + case TriBlendMode::AlphaBlend: + palindex = Sample8(varying); + output = Shade8(palindex); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::AddSolid: + palindex = Sample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, destalpha)); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::Add: + palindex = Sample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::Sub: + palindex = Sample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::RevSub: + palindex = Sample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::Stencil: + output = ToPal8(blend_stencil(ToBgra(Shade8(color)), (Sample8(varying) == SSAInt(0)).select(SSAInt(0), SSAInt(256)), ToBgra(bg), srcalpha, destalpha)); + break; + case TriBlendMode::Shaded: + palindex = Sample8(varying); + output = ToPal8(blend_stencil(ToBgra(Shade8(color)), palindex, ToBgra(bg), srcalpha, destalpha)); + break; + case TriBlendMode::TranslateCopy: + palindex = TranslateSample8(varying); + output = Shade8(palindex); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::TranslateAlphaBlend: + palindex = TranslateSample8(varying); + output = Shade8(palindex); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::TranslateAdd: + palindex = TranslateSample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::TranslateSub: + palindex = TranslateSample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::TranslateRevSub: + palindex = TranslateSample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::AddSrcColorOneMinusSrcColor: + palindex = Sample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_add_srccolor_oneminussrccolor(fg, ToBgra(bg))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::Skycap: + fg = ToBgra(Sample8(varying)); + output = ToPal8(FadeOut(varying[1], fg)); + break; + } + + return output; +} + +SSAVec4i DrawTriangleCodegen::FadeOut(SSAInt frac, SSAVec4i fg) +{ + int start_fade = 2; // How fast it should fade out + + SSAInt alpha_top = SSAInt::MAX(SSAInt::MIN(frac.ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); + SSAInt alpha_bottom = SSAInt::MAX(SSAInt::MIN(((2 << 24) - frac).ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); + SSAInt alpha = SSAInt::MIN(alpha_top, alpha_bottom); + SSAInt inv_alpha = 256 - alpha; + + fg = (fg * alpha + SSAVec4i::unpack(color) * inv_alpha) / 256; + return fg.insert(3, 255); +} + +void DrawTriangleCodegen::CalculateGradients() +{ + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (SSAFloat(startX) - v1.x) + gradientY.W * (SSAFloat(startY) - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (SSAFloat(startX) - v1.x) + gradientY.Varying[i] * (SSAFloat(startY) - v1.y); + } + + shade = (64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f) / 32.0f; + globVis = SSAFloat(1706.0f); +} + +void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) +{ + destOrg = args[0][0].load(true); + pitch = args[0][1].load(true); + v1 = LoadTriVertex(args[0][2].load(true)); + v2 = LoadTriVertex(args[0][3].load(true)); + v3 = LoadTriVertex(args[0][4].load(true)); + texturePixels = args[0][9].load(true); + textureWidth = args[0][10].load(true); + textureHeight = args[0][11].load(true); + translation = args[0][12].load(true); + LoadUniforms(args[0][13].load(true)); + subsectorGBuffer = args[0][19].load(true); + if (!truecolor) + { + Colormaps = args[0][20].load(true); + RGB32k = args[0][21].load(true); + BaseColors = args[0][22].load(true); + } + + fullSpans = thread_data[0][5].load(true); + partialBlocks = thread_data[0][6].load(true); + numSpans = thread_data[0][7].load(true); + numBlocks = thread_data[0][8].load(true); + startX = thread_data[0][9].load(true); + startY = thread_data[0][10].load(true); +} + +SSATriVertex DrawTriangleCodegen::LoadTriVertex(SSAValue ptr) +{ + SSATriVertex v; + v.x = ptr[0][0].load(true); + v.y = ptr[0][1].load(true); + v.z = ptr[0][2].load(true); + v.w = ptr[0][3].load(true); + for (int i = 0; i < TriVertex::NumVarying; i++) + v.varying[i] = ptr[0][4 + i].load(true); + return v; +} + +void DrawTriangleCodegen::LoadUniforms(SSAValue uniforms) +{ + light = uniforms[0][0].load(true); + subsectorDepth = uniforms[0][1].load(true); + color = uniforms[0][2].load(true); + srcalpha = uniforms[0][3].load(true); + destalpha = uniforms[0][4].load(true); + + SSAShort light_alpha = uniforms[0][5].load(true); + SSAShort light_red = uniforms[0][6].load(true); + SSAShort light_green = uniforms[0][7].load(true); + SSAShort light_blue = uniforms[0][8].load(true); + SSAShort fade_alpha = uniforms[0][9].load(true); + SSAShort fade_red = uniforms[0][10].load(true); + SSAShort fade_green = uniforms[0][11].load(true); + SSAShort fade_blue = uniforms[0][12].load(true); + SSAShort desaturate = uniforms[0][13].load(true); + SSAInt flags = uniforms[0][14].load(true); + shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); + shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); + shade_constants.desaturate = desaturate.zext_int(); + + is_simple_shade = (flags & TriUniforms::simple_shade) == SSAInt(TriUniforms::simple_shade); + is_nearest_filter = (flags & TriUniforms::nearest_filter) == SSAInt(TriUniforms::nearest_filter); + is_fixed_light = (flags & TriUniforms::fixed_light) == SSAInt(TriUniforms::fixed_light); +} + +SSAFloat DrawTriangleCodegen::FindGradientX(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2) +{ + SSAFloat top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); + SSAFloat bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); + return top / bottom; +} + +SSAFloat DrawTriangleCodegen::FindGradientY(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2) +{ + SSAFloat top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); + SSAFloat bottom = (x0 - x2) * (y1 - y2) - (x1 - x2) * (y0 - y2); + return top / bottom; +} + + +#if 0 + void DrawTriangleCodegen::Generate(TriDrawVariant variant, TriBlendMode blendmode, bool truecolor, SSAValue args, SSAValue thread_data) { this->variant = variant; @@ -742,297 +1372,6 @@ void DrawTriangleCodegen::LoopMaskedStoreBlock() } #endif -SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt *varying) -{ - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - return translation[color * 4].load_vec4ub(true); - else - return translation[texturePixels[uvoffset].load(true).zext_int() * 4].load_vec4ub(true); -} - -SSAInt DrawTriangleCodegen::TranslateSample8(SSAInt *varying) -{ - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - return translation[color].load(true).zext_int(); - else - return translation[texturePixels[uvoffset].load(true).zext_int()].load(true).zext_int(); -} - -SSAVec4i DrawTriangleCodegen::Sample32(SSAInt *varying) -{ - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - return SSAVec4i::unpack(color); - - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAVec4i nearest; - SSAVec4i linear; - - { - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - nearest = texturePixels[uvoffset * 4].load_vec4ub(true); - } - - { - SSAInt uone = (SSAInt(0x01000000) / textureWidth) << 8; - SSAInt vone = (SSAInt(0x01000000) / textureHeight) << 8; - - ufrac = ufrac - (uone >> 1); - vfrac = vfrac - (vone >> 1); - - SSAInt frac_x0 = (ufrac >> FRACBITS) * textureWidth; - SSAInt frac_x1 = ((ufrac + uone) >> FRACBITS) * textureWidth; - SSAInt frac_y0 = (vfrac >> FRACBITS) * textureHeight; - SSAInt frac_y1 = ((vfrac + vone) >> FRACBITS) * textureHeight; - - SSAInt x0 = frac_x0 >> FRACBITS; - SSAInt x1 = frac_x1 >> FRACBITS; - SSAInt y0 = frac_y0 >> FRACBITS; - SSAInt y1 = frac_y1 >> FRACBITS; - - SSAVec4i p00 = texturePixels[(x0 * textureHeight + y0) * 4].load_vec4ub(true); - SSAVec4i p01 = texturePixels[(x0 * textureHeight + y1) * 4].load_vec4ub(true); - SSAVec4i p10 = texturePixels[(x1 * textureHeight + y0) * 4].load_vec4ub(true); - SSAVec4i p11 = texturePixels[(x1 * textureHeight + y1) * 4].load_vec4ub(true); - - SSAInt inv_b = (frac_x1 >> (FRACBITS - 4)) & 15; - SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - linear = (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; - } - - return AffineLinear.select(linear, nearest); -} - -SSAInt DrawTriangleCodegen::Sample8(SSAInt *varying) -{ - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - return color; - else - return texturePixels[uvoffset].load(true).zext_int(); -} - -SSAInt DrawTriangleCodegen::Shade8(SSAInt c) -{ - return currentcolormap[c].load(true).zext_int(); -} - -SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) -{ - SSAVec4i fg; - SSAVec4i output; - - switch (blendmode) - { - default: - case TriBlendMode::Copy: - fg = Sample32(varying); - output = blend_copy(shade_bgra_simple(fg, currentlight)); - break; - case TriBlendMode::AlphaBlend: - fg = Sample32(varying); - output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); - break; - case TriBlendMode::AddSolid: - fg = Sample32(varying); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); - break; - case TriBlendMode::Add: - fg = Sample32(varying); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::Sub: - fg = Sample32(varying); - output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::RevSub: - fg = Sample32(varying); - output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::Stencil: - fg = Sample32(varying); - output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), fg[3], bg, srcalpha, destalpha); - break; - case TriBlendMode::Shaded: - output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), Sample8(varying), bg, srcalpha, destalpha); - break; - case TriBlendMode::TranslateCopy: - fg = TranslateSample32(varying); - output = blend_copy(shade_bgra_simple(fg, currentlight)); - break; - case TriBlendMode::TranslateAlphaBlend: - fg = TranslateSample32(varying); - output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); - break; - case TriBlendMode::TranslateAdd: - fg = TranslateSample32(varying); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::TranslateSub: - fg = TranslateSample32(varying); - output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::TranslateRevSub: - fg = TranslateSample32(varying); - output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::AddSrcColorOneMinusSrcColor: - fg = Sample32(varying); - output = blend_add_srccolor_oneminussrccolor(shade_bgra_simple(fg, currentlight), bg); - break; - case TriBlendMode::Skycap: - fg = Sample32(varying); - output = FadeOut(varying[1], fg); - break; - } - - return output; -} - -SSAVec4i DrawTriangleCodegen::ToBgra(SSAInt index) -{ - SSAVec4i c = BaseColors[index * 4].load_vec4ub(true); - c = c.insert(3, 255); - return c; -} - -SSAInt DrawTriangleCodegen::ToPal8(SSAVec4i c) -{ - return RGB32k[((c[2] >> 3) * 32 + (c[1] >> 3)) * 32 + (c[0] >> 3)].load(true).zext_int(); -} - -SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) -{ - SSAVec4i fg; - SSAInt alpha, inv_alpha; - SSAInt output; - SSAInt palindex; - - switch (blendmode) - { - default: - case TriBlendMode::Copy: - output = Shade8(Sample8(varying)); - break; - case TriBlendMode::AlphaBlend: - palindex = Sample8(varying); - output = Shade8(palindex); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::AddSolid: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, destalpha)); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::Add: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::Sub: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::RevSub: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::Stencil: - output = ToPal8(blend_stencil(ToBgra(Shade8(color)), (Sample8(varying) == SSAInt(0)).select(SSAInt(0), SSAInt(256)), ToBgra(bg), srcalpha, destalpha)); - break; - case TriBlendMode::Shaded: - palindex = Sample8(varying); - output = ToPal8(blend_stencil(ToBgra(Shade8(color)), palindex, ToBgra(bg), srcalpha, destalpha)); - break; - case TriBlendMode::TranslateCopy: - palindex = TranslateSample8(varying); - output = Shade8(palindex); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::TranslateAlphaBlend: - palindex = TranslateSample8(varying); - output = Shade8(palindex); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::TranslateAdd: - palindex = TranslateSample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::TranslateSub: - palindex = TranslateSample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::TranslateRevSub: - palindex = TranslateSample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::AddSrcColorOneMinusSrcColor: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_add_srccolor_oneminussrccolor(fg, ToBgra(bg))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::Skycap: - fg = ToBgra(Sample8(varying)); - output = ToPal8(FadeOut(varying[1], fg)); - break; - } - - return output; -} - -SSAVec4i DrawTriangleCodegen::FadeOut(SSAInt frac, SSAVec4i fg) -{ - int start_fade = 2; // How fast it should fade out - - SSAInt alpha_top = SSAInt::MAX(SSAInt::MIN(frac.ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); - SSAInt alpha_bottom = SSAInt::MAX(SSAInt::MIN(((2 << 24) - frac).ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); - SSAInt alpha = SSAInt::MIN(alpha_top, alpha_bottom); - SSAInt inv_alpha = 256 - alpha; - - fg = (fg * alpha + SSAVec4i::unpack(color) * inv_alpha) / 256; - return fg.insert(3, 255); -} - void DrawTriangleCodegen::SetStencilBlock(SSAInt block) { StencilBlock = stencilValues[block * 64]; @@ -1087,41 +1426,4 @@ void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) thread.pass_end_y = SSAInt(32000); } -SSATriVertex DrawTriangleCodegen::LoadTriVertex(SSAValue ptr) -{ - SSATriVertex v; - v.x = ptr[0][0].load(true); - v.y = ptr[0][1].load(true); - v.z = ptr[0][2].load(true); - v.w = ptr[0][3].load(true); - for (int i = 0; i < TriVertex::NumVarying; i++) - v.varying[i] = ptr[0][4 + i].load(true); - return v; -} - -void DrawTriangleCodegen::LoadUniforms(SSAValue uniforms) -{ - light = uniforms[0][0].load(true); - subsectorDepth = uniforms[0][1].load(true); - color = uniforms[0][2].load(true); - srcalpha = uniforms[0][3].load(true); - destalpha = uniforms[0][4].load(true); - - SSAShort light_alpha = uniforms[0][5].load(true); - SSAShort light_red = uniforms[0][6].load(true); - SSAShort light_green = uniforms[0][7].load(true); - SSAShort light_blue = uniforms[0][8].load(true); - SSAShort fade_alpha = uniforms[0][9].load(true); - SSAShort fade_red = uniforms[0][10].load(true); - SSAShort fade_green = uniforms[0][11].load(true); - SSAShort fade_blue = uniforms[0][12].load(true); - SSAShort desaturate = uniforms[0][13].load(true); - SSAInt flags = uniforms[0][14].load(true); - shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); - shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); - shade_constants.desaturate = desaturate.zext_int(); - - is_simple_shade = (flags & TriUniforms::simple_shade) == SSAInt(TriUniforms::simple_shade); - is_nearest_filter = (flags & TriUniforms::nearest_filter) == SSAInt(TriUniforms::nearest_filter); - is_fixed_light = (flags & TriUniforms::fixed_light) == SSAInt(TriUniforms::fixed_light); -} +#endif diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index 81a5e57d27..02db1451c1 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -30,6 +30,91 @@ struct SSATriVertex SSAFloat varying[TriVertex::NumVarying]; }; +struct SSAStepVariables +{ + SSAFloat W; + SSAFloat Varying[TriVertex::NumVarying]; +}; + +class DrawTriangleCodegen : public DrawerCodegen +{ +public: + void Generate(TriDrawVariant variant, TriBlendMode blendmode, bool truecolor, SSAValue args, SSAValue thread_data); + +private: + void LoadArgs(SSAValue args, SSAValue thread_data); + SSATriVertex LoadTriVertex(SSAValue v); + void LoadUniforms(SSAValue uniforms); + void CalculateGradients(); + SSAFloat FindGradientX(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); + SSAFloat FindGradientY(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); + void DrawFullSpans(); + void DrawPartialBlocks(); + + SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying); + SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying); + SSAVec4i TranslateSample32(SSAInt *varying); + SSAInt TranslateSample8(SSAInt *varying); + SSAVec4i Sample32(SSAInt *varying); + SSAInt Sample8(SSAInt *varying); + SSAInt Shade8(SSAInt c); + SSAVec4i ToBgra(SSAInt index); + SSAInt ToPal8(SSAVec4i c); + SSAVec4i FadeOut(SSAInt frac, SSAVec4i color); + + SSAStack stack_i, stack_y, stack_x; + SSAStack stack_posYW, stack_posXW; + SSAStack stack_posYVarying[TriVertex::NumVarying]; + SSAStack stack_posXVarying[TriVertex::NumVarying]; + SSAStack stack_varyingPos[TriVertex::NumVarying]; + SSAStack stack_lightpos; + SSAStack stack_dest; + SSAStack stack_subsector; + + SSAStepVariables gradientX, gradientY, start; + SSAFloat shade, globVis; + + SSAInt currentlight; + SSAUBytePtr currentcolormap; + + SSAUBytePtr destOrg; + SSAIntPtr subsectorGBuffer; + SSAInt pitch; + SSATriVertex v1; + SSATriVertex v2; + SSATriVertex v3; + SSAUBytePtr texturePixels; + SSAInt textureWidth; + SSAInt textureHeight; + SSAUBytePtr translation; + SSAInt color, srcalpha, destalpha; + + SSAInt light; + SSAInt subsectorDepth; + SSAShadeConstants shade_constants; + SSABool is_simple_shade; + SSABool is_nearest_filter; + SSABool is_fixed_light; + + SSAUBytePtr Colormaps; + SSAUBytePtr RGB32k; + SSAUBytePtr BaseColors; + + SSAInt numSpans; + SSAInt numBlocks; + SSAInt startX; + SSAInt startY; + SSAValue fullSpans; // TriFullSpan[] + SSAValue partialBlocks; // TriPartialBlock[] + + TriDrawVariant variant; + TriBlendMode blendmode; + bool truecolor; + int pixelsize; +}; + +#if 0 + class DrawTriangleCodegen : public DrawerCodegen { public: @@ -154,3 +239,5 @@ private: SSAUBytePtr StencilBlock; SSAIntPtr StencilBlockMask; }; + +#endif diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index 49eeb5a46a..83f1b14163 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -204,6 +204,9 @@ void LLVMDrawers::CodegenDrawTriangle(const std::string &name, TriDrawVariant va llvm::Type *LLVMDrawers::GetDrawColumnArgsStruct(llvm::LLVMContext &context) { + if (DrawColumnArgsStruct) + return DrawColumnArgsStruct; + std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; @@ -233,11 +236,15 @@ llvm::Type *LLVMDrawers::GetDrawColumnArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::create(context, elements, "DrawColumnArgs", false)->getPointerTo(); + DrawColumnArgsStruct = llvm::StructType::create(context, elements, "DrawColumnArgs", false)->getPointerTo(); + return DrawColumnArgsStruct; } llvm::Type *LLVMDrawers::GetDrawSpanArgsStruct(llvm::LLVMContext &context) { + if (DrawSpanArgsStruct) + return DrawSpanArgsStruct; + std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; @@ -264,11 +271,15 @@ llvm::Type *LLVMDrawers::GetDrawSpanArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo(); + DrawSpanArgsStruct = llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo(); + return DrawSpanArgsStruct; } llvm::Type *LLVMDrawers::GetDrawWallArgsStruct(llvm::LLVMContext &context) { + if (DrawWallArgsStruct) + return DrawWallArgsStruct; + std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); for (int i = 0; i < 8; i++) @@ -285,47 +296,71 @@ llvm::Type *LLVMDrawers::GetDrawWallArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo(); + DrawWallArgsStruct = llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo(); + return DrawWallArgsStruct; } llvm::Type *LLVMDrawers::GetDrawSkyArgsStruct(llvm::LLVMContext &context) { + if (DrawSkyArgsStruct) + return DrawSkyArgsStruct; + std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); for (int i = 0; i < 8; i++) elements.push_back(llvm::Type::getInt8PtrTy(context)); for (int i = 0; i < 15; i++) elements.push_back(llvm::Type::getInt32Ty(context)); - return llvm::StructType::create(context, elements, "DrawSkyArgs", false)->getPointerTo(); + DrawSkyArgsStruct = llvm::StructType::create(context, elements, "DrawSkyArgs", false)->getPointerTo(); + return DrawSkyArgsStruct; } llvm::Type *LLVMDrawers::GetWorkerThreadDataStruct(llvm::LLVMContext &context) { + if (WorkerThreadDataStruct) + return WorkerThreadDataStruct; + std::vector elements; for (int i = 0; i < 4; i++) elements.push_back(llvm::Type::getInt32Ty(context)); elements.push_back(llvm::Type::getInt8PtrTy(context)); - return llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo(); + elements.push_back(GetTriFullSpanStruct(context)); + elements.push_back(GetTriPartialBlockStruct(context)); + for (int i = 0; i < 4; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + WorkerThreadDataStruct = llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo(); + return WorkerThreadDataStruct; } llvm::Type *LLVMDrawers::GetTriVertexStruct(llvm::LLVMContext &context) { + if (TriVertexStruct) + return TriVertexStruct; + std::vector elements; for (int i = 0; i < 4 + TriVertex::NumVarying; i++) elements.push_back(llvm::Type::getFloatTy(context)); - return llvm::StructType::create(context, elements, "TriVertex", false)->getPointerTo(); + TriVertexStruct = llvm::StructType::create(context, elements, "TriVertex", false)->getPointerTo(); + return TriVertexStruct; } llvm::Type *LLVMDrawers::GetTriMatrixStruct(llvm::LLVMContext &context) { + if (TriMatrixStruct) + return TriMatrixStruct; + std::vector elements; for (int i = 0; i < 4 * 4; i++) elements.push_back(llvm::Type::getFloatTy(context)); - return llvm::StructType::create(context, elements, "TriMatrix", false)->getPointerTo(); + TriMatrixStruct = llvm::StructType::create(context, elements, "TriMatrix", false)->getPointerTo(); + return TriMatrixStruct; } llvm::Type *LLVMDrawers::GetTriUniformsStruct(llvm::LLVMContext &context) { + if (TriUniformsStruct) + return TriUniformsStruct; + std::vector elements; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t subsectorDepth; @@ -343,11 +378,42 @@ llvm::Type *LLVMDrawers::GetTriUniformsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; elements.push_back(GetTriMatrixStruct(context)); // TriMatrix objectToClip - return llvm::StructType::create(context, elements, "TriUniforms", false)->getPointerTo(); + TriUniformsStruct = llvm::StructType::create(context, elements, "TriUniforms", false)->getPointerTo(); + return TriUniformsStruct; +} + +llvm::Type *LLVMDrawers::GetTriFullSpanStruct(llvm::LLVMContext &context) +{ + if (TriFullSpanStruct) + return TriFullSpanStruct; + + std::vector elements; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t X; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t Y; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t Length; + TriFullSpanStruct = llvm::StructType::create(context, elements, "TriFullSpan", false)->getPointerTo(); + return TriFullSpanStruct; +} + +llvm::Type *LLVMDrawers::GetTriPartialBlockStruct(llvm::LLVMContext &context) +{ + if (TriPartialBlockStruct) + return TriPartialBlockStruct; + + std::vector elements; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t X; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t Y; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t Mask0; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t Mask1; + TriPartialBlockStruct = llvm::StructType::create(context, elements, "TriPartialBlock", false)->getPointerTo(); + return TriPartialBlockStruct; } llvm::Type *LLVMDrawers::GetTriDrawTriangleArgs(llvm::LLVMContext &context) { + if (TriDrawTriangleArgs) + return TriDrawTriangleArgs; + std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *dest; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; @@ -372,5 +438,6 @@ llvm::Type *LLVMDrawers::GetTriDrawTriangleArgs(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *colormaps; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB32k; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *BaseColors; - return llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); + TriDrawTriangleArgs = llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); + return TriDrawTriangleArgs; } diff --git a/tools/drawergen/llvmdrawers.h b/tools/drawergen/llvmdrawers.h index 3eef605df7..df6078f4d8 100644 --- a/tools/drawergen/llvmdrawers.h +++ b/tools/drawergen/llvmdrawers.h @@ -53,15 +53,29 @@ private: void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); void CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor); - static llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriVertexStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriMatrixStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriUniformsStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriDrawTriangleArgs(llvm::LLVMContext &context); + llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); + llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); + llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); + llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context); + llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); + llvm::Type *GetTriVertexStruct(llvm::LLVMContext &context); + llvm::Type *GetTriMatrixStruct(llvm::LLVMContext &context); + llvm::Type *GetTriUniformsStruct(llvm::LLVMContext &context); + llvm::Type *GetTriFullSpanStruct(llvm::LLVMContext &context); + llvm::Type *GetTriPartialBlockStruct(llvm::LLVMContext &context); + llvm::Type *GetTriDrawTriangleArgs(llvm::LLVMContext &context); + + llvm::Type *DrawColumnArgsStruct = nullptr; + llvm::Type *DrawSpanArgsStruct = nullptr; + llvm::Type *DrawWallArgsStruct = nullptr; + llvm::Type *DrawSkyArgsStruct = nullptr; + llvm::Type *WorkerThreadDataStruct = nullptr; + llvm::Type *TriVertexStruct = nullptr; + llvm::Type *TriMatrixStruct = nullptr; + llvm::Type *TriUniformsStruct = nullptr; + llvm::Type *TriFullSpanStruct = nullptr; + llvm::Type *TriPartialBlockStruct = nullptr; + llvm::Type *TriDrawTriangleArgs = nullptr; LLVMProgram mProgram; std::string mNamePostfix; From 823dc6ea00205e339629339d80a040b5c6d1656e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 15 Dec 2016 02:39:55 +0100 Subject: [PATCH 490/912] Clamp RGB values in pal mode --- tools/drawergen/fixedfunction/drawtrianglecodegen.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index f38b423504..f1cd25c359 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -454,7 +454,10 @@ SSAVec4i DrawTriangleCodegen::ToBgra(SSAInt index) SSAInt DrawTriangleCodegen::ToPal8(SSAVec4i c) { - return RGB32k[((c[2] >> 3) * 32 + (c[1] >> 3)) * 32 + (c[0] >> 3)].load(true).zext_int(); + SSAInt red = SSAInt::clamp(c[0], SSAInt(0), SSAInt(255)); + SSAInt green = SSAInt::clamp(c[1], SSAInt(0), SSAInt(255)); + SSAInt blue = SSAInt::clamp(c[2], SSAInt(0), SSAInt(255)); + return RGB32k[((blue >> 3) * 32 + (green >> 3)) * 32 + (red >> 3)].load(true).zext_int(); } SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) From 99435f5018c95e3972a566acb161c5229a508b05 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 15 Dec 2016 23:29:31 +0100 Subject: [PATCH 491/912] Remove TriDrawVariant from LLVM codegen --- src/r_drawers.cpp | 364 ++++++------------ src/r_drawers.h | 14 +- src/r_poly_triangle.cpp | 78 ++-- src/r_poly_triangle.h | 5 +- .../fixedfunction/drawtrianglecodegen.cpp | 31 +- .../fixedfunction/drawtrianglecodegen.h | 11 +- tools/drawergen/llvmdrawers.cpp | 20 +- tools/drawergen/llvmdrawers.h | 2 +- 8 files changed, 189 insertions(+), 336 deletions(-) diff --git a/src/r_drawers.cpp b/src/r_drawers.cpp index acbe71de45..dd81af1096 100644 --- a/src/r_drawers.cpp +++ b/src/r_drawers.cpp @@ -93,128 +93,66 @@ extern "C" void DrawSky4_SSE2(const DrawSkyArgs *, const WorkerThreadData *); void DrawDoubleSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *); void DrawDoubleSky4_SSE2(const DrawSkyArgs *, const WorkerThreadData *); - void TriDrawNormal8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawNormal32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillNormal32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDrawSubsector32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFillSubsector32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriStencil_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriStencilClose_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriDraw32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); + void TriFill32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); } ///////////////////////////////////////////////////////////////////////////// @@ -283,128 +221,66 @@ Drawers::Drawers() DrawSky4 = DrawSky4_SSE2; DrawDoubleSky1 = DrawDoubleSky1_SSE2; DrawDoubleSky4 = DrawDoubleSky4_SSE2; - TriDrawNormal8.push_back(TriDrawNormal8_0_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_1_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_2_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_3_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_4_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_5_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_6_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_7_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_8_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_9_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_10_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_11_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_12_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_13_SSE2); - TriDrawNormal8.push_back(TriDrawNormal8_14_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_0_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_1_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_2_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_3_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_4_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_5_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_6_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_7_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_8_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_9_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_10_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_11_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_12_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_13_SSE2); - TriDrawNormal32.push_back(TriDrawNormal32_14_SSE2); - TriFillNormal8.push_back(TriFillNormal8_0_SSE2); - TriFillNormal8.push_back(TriFillNormal8_1_SSE2); - TriFillNormal8.push_back(TriFillNormal8_2_SSE2); - TriFillNormal8.push_back(TriFillNormal8_3_SSE2); - TriFillNormal8.push_back(TriFillNormal8_4_SSE2); - TriFillNormal8.push_back(TriFillNormal8_5_SSE2); - TriFillNormal8.push_back(TriFillNormal8_6_SSE2); - TriFillNormal8.push_back(TriFillNormal8_7_SSE2); - TriFillNormal8.push_back(TriFillNormal8_8_SSE2); - TriFillNormal8.push_back(TriFillNormal8_9_SSE2); - TriFillNormal8.push_back(TriFillNormal8_10_SSE2); - TriFillNormal8.push_back(TriFillNormal8_11_SSE2); - TriFillNormal8.push_back(TriFillNormal8_12_SSE2); - TriFillNormal8.push_back(TriFillNormal8_13_SSE2); - TriFillNormal8.push_back(TriFillNormal8_14_SSE2); - TriFillNormal32.push_back(TriFillNormal32_0_SSE2); - TriFillNormal32.push_back(TriFillNormal32_1_SSE2); - TriFillNormal32.push_back(TriFillNormal32_2_SSE2); - TriFillNormal32.push_back(TriFillNormal32_3_SSE2); - TriFillNormal32.push_back(TriFillNormal32_4_SSE2); - TriFillNormal32.push_back(TriFillNormal32_5_SSE2); - TriFillNormal32.push_back(TriFillNormal32_6_SSE2); - TriFillNormal32.push_back(TriFillNormal32_7_SSE2); - TriFillNormal32.push_back(TriFillNormal32_8_SSE2); - TriFillNormal32.push_back(TriFillNormal32_9_SSE2); - TriFillNormal32.push_back(TriFillNormal32_10_SSE2); - TriFillNormal32.push_back(TriFillNormal32_11_SSE2); - TriFillNormal32.push_back(TriFillNormal32_12_SSE2); - TriFillNormal32.push_back(TriFillNormal32_13_SSE2); - TriFillNormal32.push_back(TriFillNormal32_14_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_0_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_1_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_2_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_3_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_4_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_5_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_6_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_7_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_8_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_9_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_10_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_11_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_12_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_13_SSE2); - TriDrawSubsector8.push_back(TriDrawSubsector8_14_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_0_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_1_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_2_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_3_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_4_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_5_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_6_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_7_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_8_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_9_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_10_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_11_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_12_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_13_SSE2); - TriDrawSubsector32.push_back(TriDrawSubsector32_14_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_0_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_1_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_2_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_3_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_4_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_5_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_6_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_7_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_8_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_9_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_10_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_11_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_12_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_13_SSE2); - TriFillSubsector8.push_back(TriFillSubsector8_14_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_0_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_1_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_2_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_3_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_4_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_5_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_6_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_7_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_8_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_9_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_10_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_11_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_12_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_13_SSE2); - TriFillSubsector32.push_back(TriFillSubsector32_14_SSE2); - TriStencil = TriStencil_SSE2; - TriStencilClose = TriStencilClose_SSE2; + TriDraw8.push_back(TriDraw8_0_SSE2); + TriDraw8.push_back(TriDraw8_1_SSE2); + TriDraw8.push_back(TriDraw8_2_SSE2); + TriDraw8.push_back(TriDraw8_3_SSE2); + TriDraw8.push_back(TriDraw8_4_SSE2); + TriDraw8.push_back(TriDraw8_5_SSE2); + TriDraw8.push_back(TriDraw8_6_SSE2); + TriDraw8.push_back(TriDraw8_7_SSE2); + TriDraw8.push_back(TriDraw8_8_SSE2); + TriDraw8.push_back(TriDraw8_9_SSE2); + TriDraw8.push_back(TriDraw8_10_SSE2); + TriDraw8.push_back(TriDraw8_11_SSE2); + TriDraw8.push_back(TriDraw8_12_SSE2); + TriDraw8.push_back(TriDraw8_13_SSE2); + TriDraw8.push_back(TriDraw8_14_SSE2); + TriDraw32.push_back(TriDraw32_0_SSE2); + TriDraw32.push_back(TriDraw32_1_SSE2); + TriDraw32.push_back(TriDraw32_2_SSE2); + TriDraw32.push_back(TriDraw32_3_SSE2); + TriDraw32.push_back(TriDraw32_4_SSE2); + TriDraw32.push_back(TriDraw32_5_SSE2); + TriDraw32.push_back(TriDraw32_6_SSE2); + TriDraw32.push_back(TriDraw32_7_SSE2); + TriDraw32.push_back(TriDraw32_8_SSE2); + TriDraw32.push_back(TriDraw32_9_SSE2); + TriDraw32.push_back(TriDraw32_10_SSE2); + TriDraw32.push_back(TriDraw32_11_SSE2); + TriDraw32.push_back(TriDraw32_12_SSE2); + TriDraw32.push_back(TriDraw32_13_SSE2); + TriDraw32.push_back(TriDraw32_14_SSE2); + TriFill8.push_back(TriFill8_0_SSE2); + TriFill8.push_back(TriFill8_1_SSE2); + TriFill8.push_back(TriFill8_2_SSE2); + TriFill8.push_back(TriFill8_3_SSE2); + TriFill8.push_back(TriFill8_4_SSE2); + TriFill8.push_back(TriFill8_5_SSE2); + TriFill8.push_back(TriFill8_6_SSE2); + TriFill8.push_back(TriFill8_7_SSE2); + TriFill8.push_back(TriFill8_8_SSE2); + TriFill8.push_back(TriFill8_9_SSE2); + TriFill8.push_back(TriFill8_10_SSE2); + TriFill8.push_back(TriFill8_11_SSE2); + TriFill8.push_back(TriFill8_12_SSE2); + TriFill8.push_back(TriFill8_13_SSE2); + TriFill8.push_back(TriFill8_14_SSE2); + TriFill32.push_back(TriFill32_0_SSE2); + TriFill32.push_back(TriFill32_1_SSE2); + TriFill32.push_back(TriFill32_2_SSE2); + TriFill32.push_back(TriFill32_3_SSE2); + TriFill32.push_back(TriFill32_4_SSE2); + TriFill32.push_back(TriFill32_5_SSE2); + TriFill32.push_back(TriFill32_6_SSE2); + TriFill32.push_back(TriFill32_7_SSE2); + TriFill32.push_back(TriFill32_8_SSE2); + TriFill32.push_back(TriFill32_9_SSE2); + TriFill32.push_back(TriFill32_10_SSE2); + TriFill32.push_back(TriFill32_11_SSE2); + TriFill32.push_back(TriFill32_12_SSE2); + TriFill32.push_back(TriFill32_13_SSE2); + TriFill32.push_back(TriFill32_14_SSE2); } Drawers *Drawers::Instance() diff --git a/src/r_drawers.h b/src/r_drawers.h index bc776be5bc..5437966093 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -354,16 +354,10 @@ public: void(*DrawDoubleSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; void(*DrawDoubleSky4)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; - std::vector TriDrawNormal8; - std::vector TriDrawNormal32; - std::vector TriFillNormal8; - std::vector TriFillNormal32; - std::vector TriDrawSubsector8; - std::vector TriDrawSubsector32; - std::vector TriFillSubsector8; - std::vector TriFillSubsector32; - void(*TriStencil)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; - void(*TriStencilClose)(const TriDrawTriangleArgs *, WorkerThreadData *) = nullptr; + std::vector TriDraw8; + std::vector TriDraw32; + std::vector TriFill8; + std::vector TriFill32; private: Drawers(); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 164fc98e2c..90f78b639e 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -88,19 +88,48 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian return; auto llvm = Drawers::Instance(); - PolyDrawFuncPtr setupfunc = nullptr; - PolyDrawFuncPtr drawfunc = nullptr; + + PolyDrawFuncPtr drawfuncs[3]; + int num_drawfuncs = 0; + int bmode = (int)blendmode; switch (variant) { + case TriDrawVariant::DrawNormal: + drawfuncs[num_drawfuncs++] = &ScreenTriangle::SetupNormal; + drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriDraw32[bmode] : llvm->TriDraw8[bmode]; + drawfuncs[num_drawfuncs++] = &ScreenTriangle::SubsectorWrite; + break; + + case TriDrawVariant::FillNormal: + drawfuncs[num_drawfuncs++] = &ScreenTriangle::SetupNormal; + drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriFill32[bmode] : llvm->TriFill8[bmode]; + drawfuncs[num_drawfuncs++] = &ScreenTriangle::SubsectorWrite; + break; + + case TriDrawVariant::DrawSubsector: + drawfuncs[num_drawfuncs++] = &ScreenTriangle::SetupSubsector; + drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriDraw32[bmode] : llvm->TriDraw8[bmode]; + break; + + case TriDrawVariant::FillSubsector: + drawfuncs[num_drawfuncs++] = &ScreenTriangle::SetupSubsector; + drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriFill32[bmode] : llvm->TriFill8[bmode]; + break; + + case TriDrawVariant::Stencil: + drawfuncs[num_drawfuncs++] = &ScreenTriangle::SetupNormal; + drawfuncs[num_drawfuncs++] = &ScreenTriangle::StencilWrite; + break; + + case TriDrawVariant::StencilClose: + drawfuncs[num_drawfuncs++] = &ScreenTriangle::SetupNormal; + drawfuncs[num_drawfuncs++] = &ScreenTriangle::StencilWrite; + drawfuncs[num_drawfuncs++] = &ScreenTriangle::SubsectorWrite; + break; + default: - case TriDrawVariant::DrawNormal: setupfunc = &ScreenTriangle::SetupNormal; drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break; - case TriDrawVariant::FillNormal: setupfunc = &ScreenTriangle::SetupNormal; drawfunc = dest_bgra ? llvm->TriFillNormal32[bmode] : llvm->TriFillNormal8[bmode]; break; - case TriDrawVariant::DrawSubsector: setupfunc = &ScreenTriangle::SetupSubsector; drawfunc = dest_bgra ? llvm->TriDrawSubsector32[bmode] : llvm->TriDrawSubsector8[bmode]; break; - case TriDrawVariant::FuzzSubsector: - case TriDrawVariant::FillSubsector: setupfunc = &ScreenTriangle::SetupSubsector; drawfunc = dest_bgra ? llvm->TriFillSubsector32[bmode] : llvm->TriFillSubsector8[bmode]; break; - case TriDrawVariant::Stencil: drawfunc = &ScreenTriangle::StencilFunc; break; - case TriDrawVariant::StencilClose: drawfunc = &ScreenTriangle::StencilCloseFunc; break; + break; } TriDrawTriangleArgs args; @@ -136,7 +165,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian { for (int j = 0; j < 3; j++) vert[j] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, thread, setupfunc, drawfunc); + draw_shaded_triangle(vert, ccw, &args, thread, drawfuncs, num_drawfuncs); } } else if (drawargs.mode == TriangleDrawMode::Fan) @@ -146,7 +175,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian for (int i = 2; i < vcount; i++) { vert[2] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, thread, setupfunc, drawfunc); + draw_shaded_triangle(vert, ccw, &args, thread, drawfuncs, num_drawfuncs); vert[1] = vert[2]; } } @@ -157,7 +186,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian for (int i = 2; i < vcount; i++) { vert[2] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, thread, setupfunc, drawfunc); + draw_shaded_triangle(vert, ccw, &args, thread, drawfuncs, num_drawfuncs); vert[0] = vert[1]; vert[1] = vert[2]; ccw = !ccw; @@ -176,7 +205,7 @@ ShadedTriVertex PolyTriangleDrawer::shade_vertex(const TriMatrix &objectToClip, return sv; } -void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr setupfunc, PolyDrawFuncPtr drawfunc) +void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr *drawfuncs, int num_drawfuncs) { // Cull, clip and generate additional vertices as needed TriVertex clippedvert[max_additional_vertices]; @@ -220,8 +249,9 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool args->v1 = &clippedvert[numclipvert - 1]; args->v2 = &clippedvert[i - 1]; args->v3 = &clippedvert[i - 2]; - if (setupfunc) setupfunc(args, thread); - drawfunc(args, thread); + + for (int j = 0; j < num_drawfuncs; j++) + drawfuncs[j](args, thread); } } else @@ -231,8 +261,9 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool args->v1 = &clippedvert[0]; args->v2 = &clippedvert[i - 1]; args->v3 = &clippedvert[i]; - if (setupfunc) setupfunc(args, thread); - drawfunc(args, thread); + + for (int j = 0; j < num_drawfuncs; j++) + drawfuncs[j](args, thread); } } } @@ -1508,16 +1539,3 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thr } } #endif - -void ScreenTriangle::StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - SetupNormal(args, thread); - StencilWrite(args, thread); -} - -void ScreenTriangle::StencilCloseFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - SetupNormal(args, thread); - StencilWrite(args, thread); - SubsectorWrite(args, thread); -} diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 59e52ef66d..4fc302eaba 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -165,7 +165,7 @@ public: private: static ShadedTriVertex shade_vertex(const TriMatrix &objectToClip, const float *clipPlane, const TriVertex &v); static void draw_arrays(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode, WorkerThreadData *thread); - static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr setupfunc, PolyDrawFuncPtr drawfunc); + static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr *drawfuncs, int num_drawfuncs); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const ShadedTriVertex *verts, TriVertex *clippedvert, int &numclipvert); @@ -276,9 +276,6 @@ struct ScreenTriangleStepVariables class ScreenTriangle { public: - static void StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void StencilCloseFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index f1cd25c359..38e9922c0a 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -32,11 +32,11 @@ #include "ssa/ssa_struct_type.h" #include "ssa/ssa_value.h" -void DrawTriangleCodegen::Generate(TriDrawVariant variant, TriBlendMode blendmode, bool truecolor, SSAValue args, SSAValue thread_data) +void DrawTriangleCodegen::Generate(TriBlendMode blendmode, bool truecolor, bool colorfill, SSAValue args, SSAValue thread_data) { - this->variant = variant; this->blendmode = blendmode; this->truecolor = truecolor; + this->colorfill = colorfill; pixelsize = truecolor ? 4 : 1; LoadArgs(args, thread_data); @@ -60,7 +60,6 @@ void DrawTriangleCodegen::DrawFullSpans() SSAInt height = SSAInt(8); stack_dest.store(destOrg[(spanX + spanY * pitch) * pixelsize]); - stack_subsector.store(subsectorGBuffer[spanX + spanY * pitch]); stack_posYW.store(start.W + gradientX.W * (spanX - startX) + gradientY.W * (spanY - startY)); for (int j = 0; j < TriVertex::NumVarying; j++) stack_posYVarying[j].store(start.Varying[j] + gradientX.Varying[j] * (spanX - startX) + gradientY.Varying[j] * (spanY - startY)); @@ -69,7 +68,6 @@ void DrawTriangleCodegen::DrawFullSpans() SSAForBlock loop_y; SSAInt y = stack_y.load(); SSAUBytePtr dest = stack_dest.load(); - SSAIntPtr subsector = stack_subsector.load(); SSAStepVariables blockPosY; blockPosY.W = stack_posYW.load(); for (int j = 0; j < TriVertex::NumVarying; j++) @@ -121,9 +119,6 @@ void DrawTriangleCodegen::DrawFullSpans() SSAUBytePtr destptr = dest[(x * 8 + ix) * 4]; destptr.store_vec4ub(ProcessPixel32(destptr.load_vec4ub(false), varyingPos)); - - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) - subsector[x * 8 + ix].store(subsectorDepth); } else { @@ -133,9 +128,6 @@ void DrawTriangleCodegen::DrawFullSpans() SSAUBytePtr destptr = dest[(x * 8 + ix)]; destptr.store(ProcessPixel8(destptr.load(false).zext_int(), varyingPos).trunc_ubyte()); - - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) - subsector[x * 8 + ix].store(subsectorDepth); } for (int j = 0; j < TriVertex::NumVarying; j++) @@ -157,7 +149,6 @@ void DrawTriangleCodegen::DrawFullSpans() for (int j = 0; j < TriVertex::NumVarying; j++) stack_posYVarying[j].store(blockPosY.Varying[j] + gradientY.Varying[j]); stack_dest.store(dest[pitch * pixelsize]); - stack_subsector.store(subsector[pitch]); stack_y.store(y + 1); } loop_y.end_block(); @@ -180,7 +171,6 @@ void DrawTriangleCodegen::DrawPartialBlocks() SSAInt mask1 = partialBlocks[i][3].load(true); SSAUBytePtr dest = destOrg[(blockX + blockY * pitch) * pixelsize]; - SSAIntPtr subsector = subsectorGBuffer[blockX + blockY * pitch]; SSAStepVariables blockPosY; blockPosY.W = start.W + gradientX.W * (blockX - startX) + gradientY.W * (blockY - startY); @@ -229,9 +219,6 @@ void DrawTriangleCodegen::DrawPartialBlocks() SSAUBytePtr destptr = dest[x * 4]; destptr.store_vec4ub(ProcessPixel32(destptr.load_vec4ub(false), varyingPos)); - - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) - subsector[x].store(subsectorDepth); } else { @@ -241,9 +228,6 @@ void DrawTriangleCodegen::DrawPartialBlocks() SSAUBytePtr destptr = dest[x]; destptr.store(ProcessPixel8(destptr.load(false).zext_int(), varyingPos).trunc_ubyte()); - - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) - subsector[x].store(subsectorDepth); } } branch.end_block(); @@ -258,7 +242,6 @@ void DrawTriangleCodegen::DrawPartialBlocks() blockPosY.Varying[j] = blockPosY.Varying[j] + gradientY.Varying[j]; dest = dest[pitch * pixelsize]; - subsector = subsector[pitch]; } } @@ -276,7 +259,7 @@ SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt *varying) SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; SSAInt uvoffset = upos * textureHeight + vpos; - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + if (colorfill) return translation[color * 4].load_vec4ub(true); else return translation[texturePixels[uvoffset].load(true).zext_int() * 4].load_vec4ub(true); @@ -291,7 +274,7 @@ SSAInt DrawTriangleCodegen::TranslateSample8(SSAInt *varying) SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; SSAInt uvoffset = upos * textureHeight + vpos; - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + if (colorfill) return translation[color].load(true).zext_int(); else return translation[texturePixels[uvoffset].load(true).zext_int()].load(true).zext_int(); @@ -299,7 +282,7 @@ SSAInt DrawTriangleCodegen::TranslateSample8(SSAInt *varying) SSAVec4i DrawTriangleCodegen::Sample32(SSAInt *varying) { - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + if (colorfill) return SSAVec4i::unpack(color); SSAInt ufrac = varying[0] << 8; @@ -362,7 +345,7 @@ SSAInt DrawTriangleCodegen::Sample8(SSAInt *varying) SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; SSAInt uvoffset = upos * textureHeight + vpos; - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + if (colorfill) return color; else return texturePixels[uvoffset].load(true).zext_int(); @@ -593,7 +576,6 @@ void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) textureHeight = args[0][11].load(true); translation = args[0][12].load(true); LoadUniforms(args[0][13].load(true)); - subsectorGBuffer = args[0][19].load(true); if (!truecolor) { Colormaps = args[0][20].load(true); @@ -624,7 +606,6 @@ SSATriVertex DrawTriangleCodegen::LoadTriVertex(SSAValue ptr) void DrawTriangleCodegen::LoadUniforms(SSAValue uniforms) { light = uniforms[0][0].load(true); - subsectorDepth = uniforms[0][1].load(true); color = uniforms[0][2].load(true); srcalpha = uniforms[0][3].load(true); destalpha = uniforms[0][4].load(true); diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index 02db1451c1..8197656fa2 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -39,7 +39,7 @@ struct SSAStepVariables class DrawTriangleCodegen : public DrawerCodegen { public: - void Generate(TriDrawVariant variant, TriBlendMode blendmode, bool truecolor, SSAValue args, SSAValue thread_data); + void Generate(TriBlendMode blendmode, bool truecolor, bool colorfill, SSAValue args, SSAValue thread_data); private: void LoadArgs(SSAValue args, SSAValue thread_data); @@ -69,7 +69,6 @@ private: SSAStack stack_varyingPos[TriVertex::NumVarying]; SSAStack stack_lightpos; SSAStack stack_dest; - SSAStack stack_subsector; SSAStepVariables gradientX, gradientY, start; SSAFloat shade, globVis; @@ -78,7 +77,6 @@ private: SSAUBytePtr currentcolormap; SSAUBytePtr destOrg; - SSAIntPtr subsectorGBuffer; SSAInt pitch; SSATriVertex v1; SSATriVertex v2; @@ -90,7 +88,6 @@ private: SSAInt color, srcalpha, destalpha; SSAInt light; - SSAInt subsectorDepth; SSAShadeConstants shade_constants; SSABool is_simple_shade; SSABool is_nearest_filter; @@ -107,9 +104,9 @@ private: SSAValue fullSpans; // TriFullSpan[] SSAValue partialBlocks; // TriPartialBlock[] - TriDrawVariant variant; TriBlendMode blendmode; bool truecolor; + bool colorfill; int pixelsize; }; @@ -160,10 +157,8 @@ private: SSAStack stack_C1, stack_C2, stack_C3; SSAStack stack_y; SSAStack stack_dest; - SSAStack stack_subsectorGBuffer; SSAStack stack_x; SSAStack stack_buffer; - SSAStack stack_subsectorbuffer; SSAStack stack_iy, stack_ix; SSAStack stack_CY1, stack_CY2, stack_CY3; SSAStack stack_CX1, stack_CX2, stack_CX3; @@ -189,7 +184,6 @@ private: SSAInt color, srcalpha, destalpha; SSAInt light; - SSAInt subsectorDepth; SSAShadeConstants shade_constants; SSABool is_simple_shade; SSABool is_nearest_filter; @@ -200,7 +194,6 @@ private: SSAInt stencilPitch; SSAUByte stencilTestValue; SSAUByte stencilWriteValue; - SSAIntPtr subsectorGBuffer; SSAUBytePtr Colormaps; SSAUBytePtr RGB32k; diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index 83f1b14163..e0300946cb 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -93,17 +93,11 @@ LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName, CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4); for (int i = 0; i < NumTriBlendModes(); i++) { - CodegenDrawTriangle("TriDrawNormal8_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, false); - CodegenDrawTriangle("TriDrawNormal32_" + std::to_string(i), TriDrawVariant::DrawNormal, (TriBlendMode)i, true); - CodegenDrawTriangle("TriFillNormal8_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, false); - CodegenDrawTriangle("TriFillNormal32_" + std::to_string(i), TriDrawVariant::FillNormal, (TriBlendMode)i, true); - CodegenDrawTriangle("TriDrawSubsector8_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, false); - CodegenDrawTriangle("TriDrawSubsector32_" + std::to_string(i), TriDrawVariant::DrawSubsector, (TriBlendMode)i, true); - CodegenDrawTriangle("TriFillSubsector8_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, false); - CodegenDrawTriangle("TriFillSubsector32_" + std::to_string(i), TriDrawVariant::FillSubsector, (TriBlendMode)i, true); + CodegenDrawTriangle("TriDraw8_" + std::to_string(i), (TriBlendMode)i, false, false); + CodegenDrawTriangle("TriDraw32_" + std::to_string(i), (TriBlendMode)i, true, false); + CodegenDrawTriangle("TriFill8_" + std::to_string(i), (TriBlendMode)i, false, true); + CodegenDrawTriangle("TriFill32_" + std::to_string(i), (TriBlendMode)i, true, true); } - CodegenDrawTriangle("TriStencil", TriDrawVariant::Stencil, TriBlendMode::Copy, false); - CodegenDrawTriangle("TriStencilClose", TriDrawVariant::StencilClose, TriBlendMode::Copy, false); ObjectFile = mProgram.GenerateObjectFile(triple, cpuName, features); } @@ -183,7 +177,7 @@ void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant, int c throw Exception("verifyFunction failed for CodegenDrawSky()"); } -void LLVMDrawers::CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor) +void LLVMDrawers::CodegenDrawTriangle(const std::string &name, TriBlendMode blendmode, bool truecolor, bool colorfill) { llvm::IRBuilder<> builder(mProgram.context()); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); @@ -194,12 +188,12 @@ void LLVMDrawers::CodegenDrawTriangle(const std::string &name, TriDrawVariant va function.create_public(); DrawTriangleCodegen codegen; - codegen.Generate(variant, blendmode, truecolor, function.parameter(0), function.parameter(1)); + codegen.Generate(blendmode, truecolor, colorfill, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); if (llvm::verifyFunction(*function.func)) - throw Exception(std::string("verifyFunction failed for CodegenDrawTriangle(") + std::to_string((int)variant) + ", " + std::to_string((int)blendmode) + ", " + std::to_string((int)truecolor) + ")"); + throw Exception("verifyFunction failed for CodegenDrawTriangle()"); } llvm::Type *LLVMDrawers::GetDrawColumnArgsStruct(llvm::LLVMContext &context) diff --git a/tools/drawergen/llvmdrawers.h b/tools/drawergen/llvmdrawers.h index df6078f4d8..a1b7b53616 100644 --- a/tools/drawergen/llvmdrawers.h +++ b/tools/drawergen/llvmdrawers.h @@ -51,7 +51,7 @@ private: void CodegenDrawSpan(const char *name, DrawSpanVariant variant); void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); - void CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor); + void CodegenDrawTriangle(const std::string &name, TriBlendMode blendmode, bool truecolor, bool colorfill); llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); From adb65ce72a735997bf98f298332717a14e7619cc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 16 Dec 2016 00:35:45 +0100 Subject: [PATCH 492/912] Remove TriDrawVariant --- src/r_drawers.h | 11 ------ src/r_poly_decal.cpp | 6 +++- src/r_poly_particle.cpp | 6 +++- src/r_poly_plane.cpp | 18 ++++++---- src/r_poly_scene.cpp | 11 +++--- src/r_poly_sky.cpp | 11 +++--- src/r_poly_sprite.cpp | 6 +++- src/r_poly_triangle.cpp | 76 ++++++++++----------------------------- src/r_poly_triangle.h | 14 +++++--- src/r_poly_wall.cpp | 16 ++++++--- src/r_poly_wallsprite.cpp | 6 +++- 11 files changed, 83 insertions(+), 98 deletions(-) diff --git a/src/r_drawers.h b/src/r_drawers.h index 5437966093..102d3159fa 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -251,17 +251,6 @@ struct TriDrawTriangleArgs const uint8_t *BaseColors; }; -enum class TriDrawVariant -{ - DrawNormal, - FillNormal, - DrawSubsector, - FillSubsector, - FuzzSubsector, - Stencil, - StencilClose -}; - enum class TriBlendMode { Copy, // blend_copy(shade(fg)) diff --git a/src/r_poly_decal.cpp b/src/r_poly_decal.cpp index e91e25b582..7718d422e1 100644 --- a/src/r_poly_decal.cpp +++ b/src/r_poly_decal.cpp @@ -167,5 +167,9 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan args.stencilwritevalue = stencilValue; //mode = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); args.SetClipPlane(clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w); - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Shaded); + args.blendmode = TriBlendMode::Shaded; + args.subsectorTest = true; + args.writeStencil = false; + args.writeSubsector = false; + PolyTriangleDrawer::draw(args); } diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp index 0d31614d22..27888e77b0 100644 --- a/src/r_poly_particle.cpp +++ b/src/r_poly_particle.cpp @@ -105,5 +105,9 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipP args.stencilwritevalue = stencilValue; args.SetColormap(sub->sector->ColorMap); args.SetClipPlane(clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w); - PolyTriangleDrawer::draw(args, TriDrawVariant::FillSubsector, TriBlendMode::AlphaBlend); + args.subsectorTest = true; + args.writeStencil = false; + args.writeSubsector = false; + args.blendmode = TriBlendMode::AlphaBlend; + PolyTriangleDrawer::draw(args); } diff --git a/src/r_poly_plane.cpp b/src/r_poly_plane.cpp index c7f011a760..e21868dc2f 100644 --- a/src/r_poly_plane.cpp +++ b/src/r_poly_plane.cpp @@ -143,8 +143,8 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c args.SetTexture(tex); args.SetColormap(sub->sector->ColorMap); args.SetClipPlane(clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w); - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + args.blendmode = TriBlendMode::Copy; + PolyTriangleDrawer::draw(args); } void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, PolyCull &cull, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue, bool ceiling, double skyHeight, std::vector> §orPortals) @@ -341,13 +341,15 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan if (!portal) { args.SetTexture(tex); - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + args.blendmode = TriBlendMode::Copy; + PolyTriangleDrawer::draw(args); } else { args.stencilwritevalue = polyportal->StencilValue; - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + args.writeColor = false; + args.writeSubsector = false; + PolyTriangleDrawer::draw(args); polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw, subsectorDepth }); polyportal->Segments.insert(polyportal->Segments.end(), portalSegments.begin(), portalSegments.end()); } @@ -365,7 +367,9 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan args.stencilwritevalue = 255; } - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + args.writeColor = false; + args.writeSubsector = false; + PolyTriangleDrawer::draw(args); for (uint32_t i = 0; i < sub->numlines; i++) { @@ -433,7 +437,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan args.vinput = wallvert; args.vcount = 4; - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + PolyTriangleDrawer::draw(args); if (portal) { diff --git a/src/r_poly_scene.cpp b/src/r_poly_scene.cpp index c0697f55ed..cfb858e0b9 100644 --- a/src/r_poly_scene.cpp +++ b/src/r_poly_scene.cpp @@ -255,7 +255,8 @@ void RenderPolyScene::RenderPortals(int portalDepth) args.vcount = verts.Count; args.ccw = verts.Ccw; args.uniforms.subsectorDepth = verts.SubsectorDepth; - PolyTriangleDrawer::draw(args, TriDrawVariant::FillNormal, TriBlendMode::Copy); + args.blendmode = TriBlendMode::Copy; + PolyTriangleDrawer::draw(args); } } @@ -269,7 +270,7 @@ void RenderPolyScene::RenderPortals(int portalDepth) args.vcount = verts.Count; args.ccw = verts.Ccw; args.uniforms.subsectorDepth = verts.SubsectorDepth; - PolyTriangleDrawer::draw(args, TriDrawVariant::FillNormal, TriBlendMode::Copy); + PolyTriangleDrawer::draw(args); } } } @@ -296,7 +297,8 @@ void RenderPolyScene::RenderTranslucent(int portalDepth) args.vcount = verts.Count; args.ccw = verts.Ccw; args.uniforms.subsectorDepth = verts.SubsectorDepth; - PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); + args.writeColor = false; + PolyTriangleDrawer::draw(args); } } @@ -317,7 +319,8 @@ void RenderPolyScene::RenderTranslucent(int portalDepth) args.vcount = verts.Count; args.ccw = verts.Ccw; args.uniforms.subsectorDepth = verts.SubsectorDepth; - PolyTriangleDrawer::draw(args, TriDrawVariant::StencilClose, TriBlendMode::Copy); + args.writeColor = false; + PolyTriangleDrawer::draw(args); } } } diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index 823a510f21..d2ea632b85 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -59,13 +59,14 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) args.objectToClip = &objectToClip; args.stenciltestvalue = 255; args.stencilwritevalue = 1; - args.SetTexture(frontskytex); args.SetColormap(&NormalLight); args.SetClipPlane(0.0f, 0.0f, 0.0f, 0.0f); RenderCapColorRow(args, frontskytex, 0, false); RenderCapColorRow(args, frontskytex, rc, true); + args.SetTexture(frontskytex); + uint32_t topcapcolor = frontskytex->GetSkyCapColor(false); uint32_t bottomcapcolor = frontskytex->GetSkyCapColor(true); @@ -83,8 +84,8 @@ void PolySkyDome::RenderRow(PolyDrawArgs &args, int row, uint32_t capcolor) args.mode = TriangleDrawMode::Strip; args.ccw = false; args.uniforms.color = capcolor; - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Skycap); - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Skycap); + args.blendmode = TriBlendMode::Skycap; + PolyTriangleDrawer::draw(args); } void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap) @@ -98,8 +99,8 @@ void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int ro args.mode = TriangleDrawMode::Fan; args.ccw = bottomCap; args.uniforms.color = solid; - PolyTriangleDrawer::draw(args, TriDrawVariant::FillNormal, TriBlendMode::Copy); - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + args.blendmode = TriBlendMode::Copy; + PolyTriangleDrawer::draw(args); } void PolySkyDome::CreateDome() diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index e5d8e0ce56..2cb285298c 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -258,7 +258,11 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla blendmode = TriBlendMode::Add; } - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, blendmode); + args.subsectorTest = true; + args.writeSubsector = false; + args.writeStencil = false; + args.blendmode = blendmode; + PolyTriangleDrawer::draw(args); } bool RenderPolySprite::IsThingCulled(AActor *thing) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 90f78b639e..1edd73c719 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -77,12 +77,12 @@ void PolyTriangleDrawer::toggle_mirror() mirror = !mirror; } -void PolyTriangleDrawer::draw(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode) +void PolyTriangleDrawer::draw(const PolyDrawArgs &args) { - DrawerCommandQueue::QueueCommand(args, variant, blendmode, mirror); + DrawerCommandQueue::QueueCommand(args, mirror); } -void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVariant variant, TriBlendMode blendmode, WorkerThreadData *thread) +void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadData *thread) { if (drawargs.vcount < 3) return; @@ -92,46 +92,20 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian PolyDrawFuncPtr drawfuncs[3]; int num_drawfuncs = 0; - int bmode = (int)blendmode; - switch (variant) - { - case TriDrawVariant::DrawNormal: - drawfuncs[num_drawfuncs++] = &ScreenTriangle::SetupNormal; + drawfuncs[num_drawfuncs++] = drawargs.subsectorTest ? &ScreenTriangle::SetupSubsector : &ScreenTriangle::SetupNormal; + + int bmode = (int)drawargs.blendmode; + if (drawargs.writeColor && drawargs.texturePixels) drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriDraw32[bmode] : llvm->TriDraw8[bmode]; - drawfuncs[num_drawfuncs++] = &ScreenTriangle::SubsectorWrite; - break; - - case TriDrawVariant::FillNormal: - drawfuncs[num_drawfuncs++] = &ScreenTriangle::SetupNormal; + else if (drawargs.writeColor) drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriFill32[bmode] : llvm->TriFill8[bmode]; - drawfuncs[num_drawfuncs++] = &ScreenTriangle::SubsectorWrite; - break; - - case TriDrawVariant::DrawSubsector: - drawfuncs[num_drawfuncs++] = &ScreenTriangle::SetupSubsector; - drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriDraw32[bmode] : llvm->TriDraw8[bmode]; - break; - - case TriDrawVariant::FillSubsector: - drawfuncs[num_drawfuncs++] = &ScreenTriangle::SetupSubsector; - drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriFill32[bmode] : llvm->TriFill8[bmode]; - break; - - case TriDrawVariant::Stencil: - drawfuncs[num_drawfuncs++] = &ScreenTriangle::SetupNormal; - drawfuncs[num_drawfuncs++] = &ScreenTriangle::StencilWrite; - break; - - case TriDrawVariant::StencilClose: - drawfuncs[num_drawfuncs++] = &ScreenTriangle::SetupNormal; + + if (drawargs.writeStencil) drawfuncs[num_drawfuncs++] = &ScreenTriangle::StencilWrite; + + if (drawargs.writeSubsector) drawfuncs[num_drawfuncs++] = &ScreenTriangle::SubsectorWrite; - break; - - default: - break; - } - + TriDrawTriangleArgs args; args.dest = dest; args.pitch = dest_pitch; @@ -384,8 +358,8 @@ void PolyTriangleDrawer::clipedge(const ShadedTriVertex *verts, TriVertex *clipp ///////////////////////////////////////////////////////////////////////////// -DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode, bool mirror) - : args(args), variant(variant), blendmode(blendmode) +DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, bool mirror) + : args(args) { if (mirror) this->args.ccw = !this->args.ccw; @@ -402,25 +376,13 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) thread_data.FullSpans = thread->FullSpansBuffer.data(); thread_data.PartialBlocks = thread->PartialBlocksBuffer.data(); - PolyTriangleDrawer::draw_arrays(args, variant, blendmode, &thread_data); + PolyTriangleDrawer::draw_arrays(args, &thread_data); } FString DrawPolyTrianglesCommand::DebugInfo() { - FString variantstr; - switch (variant) - { - default: variantstr = "Unknown"; break; - case TriDrawVariant::DrawNormal: variantstr = "DrawNormal"; break; - case TriDrawVariant::FillNormal: variantstr = "FillNormal"; break; - case TriDrawVariant::DrawSubsector: variantstr = "DrawSubsector"; break; - case TriDrawVariant::FillSubsector: variantstr = "FillSubsector"; break; - case TriDrawVariant::FuzzSubsector: variantstr = "FuzzSubsector"; break; - case TriDrawVariant::Stencil: variantstr = "Stencil"; break; - } - FString blendmodestr; - switch (blendmode) + switch (args.blendmode) { default: blendmodestr = "Unknown"; break; case TriBlendMode::Copy: blendmodestr = "Copy"; break; @@ -440,8 +402,8 @@ FString DrawPolyTrianglesCommand::DebugInfo() } FString info; - info.Format("DrawPolyTriangles: variant = %s, blend mode = %s, color = %d, light = %d, textureWidth = %d, textureHeight = %d, texture = %s, translation = %s, colormaps = %s", - variantstr.GetChars(), blendmodestr.GetChars(), args.uniforms.color, args.uniforms.light, args.textureWidth, args.textureHeight, + info.Format("DrawPolyTriangles: blend mode = %s, color = %d, light = %d, textureWidth = %d, textureHeight = %d, texture = %s, translation = %s, colormaps = %s", + blendmodestr.GetChars(), args.uniforms.color, args.uniforms.light, args.textureWidth, args.textureHeight, args.texturePixels ? "ptr" : "null", args.translation ? "ptr" : "null", args.colormaps ? "ptr" : "null"); return info; } diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 4fc302eaba..5b66e80e2b 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -49,6 +49,11 @@ public: int vcount = 0; TriangleDrawMode mode = TriangleDrawMode::Normal; bool ccw = false; + // bool stencilTest = true; // Always true for now + bool subsectorTest = false; + bool writeStencil = true; + bool writeColor = true; + bool writeSubsector = true; const uint8_t *texturePixels = nullptr; int textureWidth = 0; int textureHeight = 0; @@ -57,6 +62,7 @@ public: uint8_t stencilwritevalue = 0; const uint8_t *colormaps = nullptr; float clipPlane[4]; + TriBlendMode blendmode = TriBlendMode::Copy; void SetClipPlane(float a, float b, float c, float d) { @@ -159,12 +165,12 @@ class PolyTriangleDrawer { public: static void set_viewport(int x, int y, int width, int height, DCanvas *canvas); - static void draw(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode); + static void draw(const PolyDrawArgs &args); static void toggle_mirror(); private: static ShadedTriVertex shade_vertex(const TriMatrix &objectToClip, const float *clipPlane, const TriVertex &v); - static void draw_arrays(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode, WorkerThreadData *thread); + static void draw_arrays(const PolyDrawArgs &args, WorkerThreadData *thread); static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr *drawfuncs, int num_drawfuncs); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const ShadedTriVertex *verts, TriVertex *clippedvert, int &numclipvert); @@ -249,15 +255,13 @@ private: class DrawPolyTrianglesCommand : public DrawerCommand { public: - DrawPolyTrianglesCommand(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode, bool mirror); + DrawPolyTrianglesCommand(const PolyDrawArgs &args, bool mirror); void Execute(DrawerThread *thread) override; FString DebugInfo() override; private: PolyDrawArgs args; - TriDrawVariant variant; - TriBlendMode blendmode; }; class PolyVertexBuffer diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index bcb5bb16b2..ebb355431b 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -264,7 +264,9 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane if (Polyportal) { args.stencilwritevalue = Polyportal->StencilValue; - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + args.writeColor = false; + args.writeSubsector = false; + PolyTriangleDrawer::draw(args); Polyportal->Shape.push_back({ args.vinput, args.vcount, args.ccw, args.uniforms.subsectorDepth }); int sx1, sx2; @@ -274,17 +276,21 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane } else if (!Masked) { - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Copy); - PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); + args.blendmode = TriBlendMode::Copy; + PolyTriangleDrawer::draw(args); } else { args.uniforms.destalpha = (Line->flags & ML_ADDTRANS) ? 256 : (uint32_t)(256 - Line->alpha * 256); args.uniforms.srcalpha = (uint32_t)(Line->alpha * 256); + args.subsectorTest = true; + args.writeSubsector = false; + args.writeStencil = false; if (args.uniforms.destalpha == 0 && args.uniforms.srcalpha == 256) - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); + args.blendmode = TriBlendMode::AlphaBlend; else - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::Add); + args.blendmode = TriBlendMode::Add; + PolyTriangleDrawer::draw(args); } RenderPolyDecal::RenderWallDecals(worldToClip, clipPlane, LineSeg, SubsectorDepth, StencilValue); diff --git a/src/r_poly_wallsprite.cpp b/src/r_poly_wallsprite.cpp index e7fe267b8b..fe12b271db 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/r_poly_wallsprite.cpp @@ -121,5 +121,9 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, const Vec4f &cli args.SetTexture(tex); args.SetColormap(sub->sector->ColorMap); args.SetClipPlane(clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w); - PolyTriangleDrawer::draw(args, TriDrawVariant::DrawSubsector, TriBlendMode::AlphaBlend); + args.subsectorTest = true; + args.writeSubsector = false; + args.writeStencil = false; + args.blendmode = TriBlendMode::AlphaBlend; + PolyTriangleDrawer::draw(args); } From 9b98c4d51229c42c99317872c07d1098c2357651 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 16 Dec 2016 05:01:49 +0100 Subject: [PATCH 493/912] Fix out of bounds bug --- src/r_poly_triangle.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 1edd73c719..46b82f517e 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -89,7 +89,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD auto llvm = Drawers::Instance(); - PolyDrawFuncPtr drawfuncs[3]; + PolyDrawFuncPtr drawfuncs[4]; int num_drawfuncs = 0; drawfuncs[num_drawfuncs++] = drawargs.subsectorTest ? &ScreenTriangle::SetupSubsector : &ScreenTriangle::SetupNormal; From 6d295a25c2d4be25ef743f0fea4fabf1d1632d36 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 16 Dec 2016 06:21:17 +0100 Subject: [PATCH 494/912] Minor tweaking --- src/r_poly_triangle.cpp | 32 ++++++++++---- .../fixedfunction/drawtrianglecodegen.cpp | 42 ++++++++++++++----- 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 46b82f517e..e06cf2b7fa 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -37,6 +37,8 @@ #include "r_poly_triangle.h" #include "r_draw_rgba.h" +CVAR(Bool, r_debug_trisetup, false, 0); + int PolyTriangleDrawer::viewport_x; int PolyTriangleDrawer::viewport_y; int PolyTriangleDrawer::viewport_width; @@ -93,19 +95,22 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD int num_drawfuncs = 0; drawfuncs[num_drawfuncs++] = drawargs.subsectorTest ? &ScreenTriangle::SetupSubsector : &ScreenTriangle::SetupNormal; - - int bmode = (int)drawargs.blendmode; - if (drawargs.writeColor && drawargs.texturePixels) - drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriDraw32[bmode] : llvm->TriDraw8[bmode]; - else if (drawargs.writeColor) - drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriFill32[bmode] : llvm->TriFill8[bmode]; - + + if (!r_debug_trisetup) // For profiling how much time is spent in setup vs drawal + { + int bmode = (int)drawargs.blendmode; + if (drawargs.writeColor && drawargs.texturePixels) + drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriDraw32[bmode] : llvm->TriDraw8[bmode]; + else if (drawargs.writeColor) + drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriFill32[bmode] : llvm->TriFill8[bmode]; + } + if (drawargs.writeStencil) drawfuncs[num_drawfuncs++] = &ScreenTriangle::StencilWrite; - + if (drawargs.writeSubsector) drawfuncs[num_drawfuncs++] = &ScreenTriangle::SubsectorWrite; - + TriDrawTriangleArgs args; args.dest = dest; args.pitch = dest_pitch; @@ -793,6 +798,9 @@ void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadDa span->Length = 0; } + if (mask0 == 0 && mask1 == 0) + continue; + partial->X = x; partial->Y = y; partial->Mask0 = mask0; @@ -997,6 +1005,9 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea span->Length = 0; } + if (mask0 == 0 && mask1 == 0) + continue; + partial->X = x; partial->Y = y; partial->Mask0 = mask0; @@ -1083,6 +1094,9 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea span->Length = 0; } + if (mask0 == 0 && mask1 == 0) + continue; + partial->X = x; partial->Y = y; partial->Mask0 = mask0; diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 38e9922c0a..8e4ed803f7 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -111,16 +111,38 @@ void DrawTriangleCodegen::DrawFullSpans() SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosX.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); SSAInt lightstep = (lightnext - lightpos) / 8; - for (int ix = 0; ix < 8; ix++) + if (truecolor) { - if (truecolor) + for (int ix = 0; ix < 8; ix += 4) { - currentlight = is_fixed_light.select(light, lightpos >> 8); - SSAUBytePtr destptr = dest[(x * 8 + ix) * 4]; - destptr.store_vec4ub(ProcessPixel32(destptr.load_vec4ub(false), varyingPos)); + SSAVec16ub pixels16 = destptr.load_unaligned_vec16ub(false); + SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16); + SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16); + SSAVec4i pixels[4] = + { + SSAVec4i::extendlo(pixels8lo), + SSAVec4i::extendhi(pixels8lo), + SSAVec4i::extendlo(pixels8hi), + SSAVec4i::extendhi(pixels8hi) + }; + + for (int sse = 0; sse < 4; sse++) + { + currentlight = is_fixed_light.select(light, lightpos >> 8); + pixels[sse] = ProcessPixel32(pixels[sse], varyingPos); + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = varyingPos[j] + varyingStep[j]; + lightpos = lightpos + lightstep; + } + + destptr.store_unaligned_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3]))); } - else + } + else + { + for (int ix = 0; ix < 8; ix++) { currentlight = is_fixed_light.select(light, lightpos >> 8); SSAInt colormapindex = SSAInt::MIN((256 - currentlight) * 32 / 256, SSAInt(31)); @@ -128,11 +150,11 @@ void DrawTriangleCodegen::DrawFullSpans() SSAUBytePtr destptr = dest[(x * 8 + ix)]; destptr.store(ProcessPixel8(destptr.load(false).zext_int(), varyingPos).trunc_ubyte()); - } - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] = varyingPos[j] + varyingStep[j]; - lightpos = lightpos + lightstep; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = varyingPos[j] + varyingStep[j]; + lightpos = lightpos + lightstep; + } } for (int j = 0; j < TriVertex::NumVarying; j++) From a360a1963f97ba8e4492df85e5b282bdcfd060d5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 16 Dec 2016 15:25:03 +0100 Subject: [PATCH 495/912] Move setup triangle stuff to its own file --- tools/drawergen/CMakeLists.txt | 1 + .../fixedfunction/drawtrianglecodegen.cpp | 772 +----------------- .../fixedfunction/drawtrianglecodegen.h | 125 --- .../fixedfunction/setuptrianglecodegen.cpp | 573 +++++++++++++ .../fixedfunction/setuptrianglecodegen.h | 98 +++ 5 files changed, 675 insertions(+), 894 deletions(-) create mode 100644 tools/drawergen/fixedfunction/setuptrianglecodegen.cpp create mode 100644 tools/drawergen/fixedfunction/setuptrianglecodegen.h diff --git a/tools/drawergen/CMakeLists.txt b/tools/drawergen/CMakeLists.txt index d330e799a1..12e004e28a 100644 --- a/tools/drawergen/CMakeLists.txt +++ b/tools/drawergen/CMakeLists.txt @@ -141,6 +141,7 @@ set (SOURCES fixedfunction/drawcolumncodegen.cpp fixedfunction/drawskycodegen.cpp fixedfunction/drawtrianglecodegen.cpp + fixedfunction/setuptrianglecodegen.cpp ) enable_precompiled_headers( precomp.h SOURCES ) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 8e4ed803f7..01deb1ce22 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -354,6 +354,9 @@ SSAVec4i DrawTriangleCodegen::Sample32(SSAInt *varying) linear = (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; } + // // Min filter = linear, Mag filter = nearest: + // AffineLinear = (gradVaryingX[0] / AffineW) > SSAFloat(1.0f) || (gradVaryingX[0] / AffineW) < SSAFloat(-1.0f); + return AffineLinear.select(linear, nearest); */ } @@ -664,772 +667,3 @@ SSAFloat DrawTriangleCodegen::FindGradientY(SSAFloat x0, SSAFloat y0, SSAFloat x SSAFloat bottom = (x0 - x2) * (y1 - y2) - (x1 - x2) * (y0 - y2); return top / bottom; } - - -#if 0 - -void DrawTriangleCodegen::Generate(TriDrawVariant variant, TriBlendMode blendmode, bool truecolor, SSAValue args, SSAValue thread_data) -{ - this->variant = variant; - this->blendmode = blendmode; - this->truecolor = truecolor; - LoadArgs(args, thread_data); - Setup(); - LoopBlockY(); -} - -SSAInt DrawTriangleCodegen::FloatTo28_4(SSAFloat v) -{ - // SSAInt(SSAFloat::round(16.0f * v), false); - SSAInt a = SSAInt(v * 32.0f, false); - return (a + (a.ashr(31) | SSAInt(1))).ashr(1); -} - -void DrawTriangleCodegen::Setup() -{ - int pixelsize = truecolor ? 4 : 1; - - // 28.4 fixed-point coordinates - Y1 = FloatTo28_4(v1.y); - Y2 = FloatTo28_4(v2.y); - Y3 = FloatTo28_4(v3.y); - - X1 = FloatTo28_4(v1.x); - X2 = FloatTo28_4(v2.x); - X3 = FloatTo28_4(v3.x); - - // Deltas - DX12 = X1 - X2; - DX23 = X2 - X3; - DX31 = X3 - X1; - - DY12 = Y1 - Y2; - DY23 = Y2 - Y3; - DY31 = Y3 - Y1; - - // Fixed-point deltas - FDX12 = DX12 << 4; - FDX23 = DX23 << 4; - FDX31 = DX31 << 4; - - FDY12 = DY12 << 4; - FDY23 = DY23 << 4; - FDY31 = DY31 << 4; - - // Bounding rectangle - minx = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(X1, X2), X3) + 0xF).ashr(4), SSAInt(0)); - maxx = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(X1, X2), X3) + 0xF).ashr(4), clipright - 1); - miny = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(Y1, Y2), Y3) + 0xF).ashr(4), SSAInt(0)); - maxy = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(Y1, Y2), Y3) + 0xF).ashr(4), clipbottom - 1); - - SSAIfBlock if0; - if0.if_block(minx >= maxx || miny >= maxy); - if0.end_retvoid(); - - // Start in corner of 8x8 block - minx = minx & ~(q - 1); - miny = miny & ~(q - 1); - - dest = dest[miny * pitch * pixelsize]; - subsectorGBuffer = subsectorGBuffer[miny * pitch]; - - // Half-edge constants - C1 = DY12 * X1 - DX12 * Y1; - C2 = DY23 * X2 - DX23 * Y2; - C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - SSAIfBlock if1; - if1.if_block(DY12 < SSAInt(0) || (DY12 == SSAInt(0) && DX12 > SSAInt(0))); - stack_C1.store(C1 + 1); - if1.else_block(); - stack_C1.store(C1); - if1.end_block(); - C1 = stack_C1.load(); - SSAIfBlock if2; - if2.if_block(DY23 < SSAInt(0) || (DY23 == SSAInt(0) && DX23 > SSAInt(0))); - stack_C2.store(C2 + 1); - if2.else_block(); - stack_C2.store(C2); - if2.end_block(); - C2 = stack_C2.load(); - SSAIfBlock if3; - if3.if_block(DY31 < SSAInt(0) || (DY31 == SSAInt(0) && DX31 > SSAInt(0))); - stack_C3.store(C3 + 1); - if3.else_block(); - stack_C3.store(C3); - if3.end_block(); - C3 = stack_C3.load(); - - // Gradients - v1.x = SSAFloat(X1) * 0.0625f; - v2.x = SSAFloat(X2) * 0.0625f; - v3.x = SSAFloat(X3) * 0.0625f; - v1.y = SSAFloat(Y1) * 0.0625f; - v2.y = SSAFloat(Y2) * 0.0625f; - v3.y = SSAFloat(Y3) * 0.0625f; - gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - stack_posy_w.store(v1.w + gradWX * (SSAFloat(minx) - v1.x) + gradWY * (SSAFloat(miny) - v1.y)); - for (int i = 0; i < TriVertex::NumVarying; i++) - { - gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - stack_posy_varying[i].store(v1.varying[i] * v1.w + gradVaryingX[i] * (SSAFloat(minx) - v1.x) + gradVaryingY[i] * (SSAFloat(miny) - v1.y)); - } - - gradWX = gradWX * (float)q; - for (int i = 0; i < TriVertex::NumVarying; i++) - gradVaryingX[i] = gradVaryingX[i] * (float)q; - - shade = 64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f; -} - -SSAFloat DrawTriangleCodegen::gradx(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2) -{ - SSAFloat top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); - SSAFloat bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); - return top / bottom; -} - -SSAFloat DrawTriangleCodegen::grady(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2) -{ - SSAFloat top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); - SSAFloat bottom = (x0 - x2) * (y1 - y2) - (x1 - x2) * (y0 - y2); - return top / bottom; -} - -void DrawTriangleCodegen::LoopBlockY() -{ - int pixelsize = truecolor ? 4 : 1; - - SSAInt blocks_skipped = skipped_by_thread(miny / q, thread); - stack_y.store(miny + blocks_skipped * q); - stack_dest.store(dest[blocks_skipped * q * pitch * pixelsize]); - stack_subsectorGBuffer.store(subsectorGBuffer[blocks_skipped * q * pitch]); - stack_posy_w.store(stack_posy_w.load() + gradWY * (q * blocks_skipped)); - for (int i = 0; i < TriVertex::NumVarying; i++) - stack_posy_varying[i].store(stack_posy_varying[i].load() + gradVaryingY[i] * (blocks_skipped * q)); - - SSAForBlock loop; - y = stack_y.load(); - dest = stack_dest.load(); - subsectorGBuffer = stack_subsectorGBuffer.load(); - posy_w = stack_posy_w.load(); - for (int i = 0; i < TriVertex::NumVarying; i++) - posy_varying[i] = stack_posy_varying[i].load(); - loop.loop_block(y < maxy, 0); - { - LoopBlockX(); - - stack_posy_w.store(posy_w + gradWY * (q * thread.num_cores)); - for (int i = 0; i < TriVertex::NumVarying; i++) - stack_posy_varying[i].store(posy_varying[i] + gradVaryingY[i] * (q * thread.num_cores)); - - stack_dest.store(dest[q * pitch * pixelsize * thread.num_cores]); - stack_subsectorGBuffer.store(subsectorGBuffer[q * pitch * thread.num_cores]); - stack_y.store(y + thread.num_cores * q); - } - loop.end_block(); -} - -void DrawTriangleCodegen::LoopBlockX() -{ - stack_x.store(minx); - stack_posx_w.store(posy_w); - for (int i = 0; i < TriVertex::NumVarying; i++) - stack_posx_varying[i].store(posy_varying[i]); - - SSAForBlock loop; - x = stack_x.load(); - posx_w = stack_posx_w.load(); - for (int i = 0; i < TriVertex::NumVarying; i++) - posx_varying[i] = stack_posx_varying[i].load(); - loop.loop_block(x < maxx, 0); - { - // Corners of block - x0 = x << 4; - x1 = (x + q - 1) << 4; - y0 = y << 4; - y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - SSABool a00 = C1 + DX12 * y0 - DY12 * x0 > SSAInt(0); - SSABool a10 = C1 + DX12 * y0 - DY12 * x1 > SSAInt(0); - SSABool a01 = C1 + DX12 * y1 - DY12 * x0 > SSAInt(0); - SSABool a11 = C1 + DX12 * y1 - DY12 * x1 > SSAInt(0); - - SSAInt a = (a00.zext_int() << 0) | (a10.zext_int() << 1) | (a01.zext_int() << 2) | (a11.zext_int() << 3); - - SSABool b00 = C2 + DX23 * y0 - DY23 * x0 > SSAInt(0); - SSABool b10 = C2 + DX23 * y0 - DY23 * x1 > SSAInt(0); - SSABool b01 = C2 + DX23 * y1 - DY23 * x0 > SSAInt(0); - SSABool b11 = C2 + DX23 * y1 - DY23 * x1 > SSAInt(0); - SSAInt b = (b00.zext_int() << 0) | (b10.zext_int() << 1) | (b01.zext_int() << 2) | (b11.zext_int() << 3); - - SSABool c00 = C3 + DX31 * y0 - DY31 * x0 > SSAInt(0); - SSABool c10 = C3 + DX31 * y0 - DY31 * x1 > SSAInt(0); - SSABool c01 = C3 + DX31 * y1 - DY31 * x0 > SSAInt(0); - SSABool c11 = C3 + DX31 * y1 - DY31 * x1 > SSAInt(0); - SSAInt c = (c00.zext_int() << 0) | (c10.zext_int() << 1) | (c01.zext_int() << 2) | (c11.zext_int() << 3); - - // Skip block when outside an edge - SSABool process_block = !(a == SSAInt(0) || b == SSAInt(0) || c == SSAInt(0)); - - SetStencilBlock(x / 8 + y / 8 * stencilPitch); - - // Stencil test the whole block, if possible - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector || variant == TriDrawVariant::StencilClose) - { - process_block = process_block && (!StencilIsSingleValue() || SSABool::compare_uge(StencilGetSingle(), stencilTestValue)); - } - else - { - process_block = process_block && (!StencilIsSingleValue() || StencilGetSingle() == stencilTestValue); - } - - SSAIfBlock branch; - branch.if_block(process_block); - - // Check if block needs clipping - SSABool clipneeded = (x + q) > clipright || (y + q) > clipbottom; - - SSAFloat globVis = SSAFloat(1706.0f); - SSAFloat vis = globVis * posx_w; - SSAInt lightscale = SSAInt(SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * 256.0f, true); - SSAInt diminishedlight = 256 - lightscale; - - if (!truecolor) - { - SSAInt diminishedindex = lightscale / 8; - SSAInt lightindex = SSAInt::MIN((256 - light) * 32 / 256, SSAInt(31)); - SSAInt colormapindex = (!is_fixed_light).select(diminishedindex, lightindex); - currentcolormap = Colormaps[colormapindex << 8]; - } - else - { - currentlight = (!is_fixed_light).select(diminishedlight, light); - } - - SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded && StencilIsSingleValue(); - - // Accept whole block when totally covered - SSAIfBlock branch_covered; - branch_covered.if_block(covered); - { - LoopFullBlock(); - } - branch_covered.else_block(); - { - SSAIfBlock branch_covered_stencil; - branch_covered_stencil.if_block(StencilIsSingleValue()); - { - SSABool stenciltestpass; - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector || variant == TriDrawVariant::StencilClose) - { - stenciltestpass = SSABool::compare_uge(StencilGetSingle(), stencilTestValue); - } - else - { - stenciltestpass = StencilGetSingle() == stencilTestValue; - } - - SSAIfBlock branch_stenciltestpass; - branch_stenciltestpass.if_block(stenciltestpass); - { - LoopPartialBlock(true); - } - branch_stenciltestpass.end_block(); - } - branch_covered_stencil.else_block(); - { - LoopPartialBlock(false); - } - branch_covered_stencil.end_block(); - } - branch_covered.end_block(); - - branch.end_block(); - - stack_posx_w.store(posx_w + gradWX); - for (int i = 0; i < TriVertex::NumVarying; i++) - stack_posx_varying[i].store(posx_varying[i] + gradVaryingX[i]); - - stack_x.store(x + q); - } - loop.end_block(); -} - -void DrawTriangleCodegen::SetupAffineBlock() -{ - SSAFloat rcpW0 = (float)0x01000000 / AffineW; - SSAFloat rcpW1 = (float)0x01000000 / (AffineW + gradWX); - - for (int i = 0; i < TriVertex::NumVarying; i++) - { - AffineVaryingPosX[i] = SSAInt(AffineVaryingPosY[i] * rcpW0, false); - AffineVaryingStepX[i] = (SSAInt((AffineVaryingPosY[i] + gradVaryingX[i]) * rcpW1, false) - AffineVaryingPosX[i]) / q; - } - - // Min filter = linear, Mag filter = nearest: - AffineLinear = (gradVaryingX[0] / AffineW) > SSAFloat(1.0f) || (gradVaryingX[0] / AffineW) < SSAFloat(-1.0f); -} - -void DrawTriangleCodegen::LoopFullBlock() -{ - if (variant == TriDrawVariant::Stencil) - { - StencilClear(stencilWriteValue); - } - else if (variant == TriDrawVariant::StencilClose) - { - StencilClear(stencilWriteValue); - for (int iy = 0; iy < q; iy++) - { - SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch]; - for (int ix = 0; ix < q; ix += 4) - { - subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth)); - } - } - } - else - { - int pixelsize = truecolor ? 4 : 1; - - AffineW = posx_w; - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosY[i] = posx_varying[i]; - - for (int iy = 0; iy < q; iy++) - { - SSAUBytePtr buffer = dest[(x + iy * pitch) * pixelsize]; - SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch]; - - SetupAffineBlock(); - - for (int ix = 0; ix < q; ix += 4) - { - SSAUBytePtr buf = buffer[ix * pixelsize]; - if (truecolor) - { - SSAVec16ub pixels16 = buf.load_unaligned_vec16ub(false); - SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16); - SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16); - SSAVec4i pixels[4] = - { - SSAVec4i::extendlo(pixels8lo), - SSAVec4i::extendhi(pixels8lo), - SSAVec4i::extendlo(pixels8hi), - SSAVec4i::extendhi(pixels8hi) - }; - - for (int sse = 0; sse < 4; sse++) - { - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - { - SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth; - pixels[sse] = subsectorTest.select(ProcessPixel32(pixels[sse], AffineVaryingPosX), pixels[sse]); - } - else - { - pixels[sse] = ProcessPixel32(pixels[sse], AffineVaryingPosX); - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i]; - } - - buf.store_unaligned_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3]))); - } - else - { - SSAVec4i pixelsvec = buf.load_vec4ub(false); - SSAInt pixels[4] = - { - pixelsvec[0], - pixelsvec[1], - pixelsvec[2], - pixelsvec[3] - }; - - for (int sse = 0; sse < 4; sse++) - { - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - { - SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth; - pixels[sse] = subsectorTest.select(ProcessPixel8(pixels[sse], AffineVaryingPosX), pixels[sse]); - } - else - { - pixels[sse] = ProcessPixel8(pixels[sse], AffineVaryingPosX); - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i]; - } - - buf.store_vec4ub(SSAVec4i(pixels[0], pixels[1], pixels[2], pixels[3])); - } - - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) - subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth)); - } - - AffineW = AffineW + gradWY; - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosY[i] = AffineVaryingPosY[i] + gradVaryingY[i]; - } - } -} - -void DrawTriangleCodegen::LoopPartialBlock(bool isSingleStencilValue) -{ - int pixelsize = truecolor ? 4 : 1; - - if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) - { - if (isSingleStencilValue) - { - SSAInt stencilMask = StencilBlockMask.load(false); - SSAUByte val0 = stencilMask.trunc_ubyte(); - for (int i = 0; i < 8 * 8; i++) - StencilBlock[i].store(val0); - StencilBlockMask.store(SSAInt(0)); - } - - SSAUByte lastStencilValue = StencilBlock[0].load(false); - stack_stencilblock_restored.store(SSABool(true)); - stack_stencilblock_lastval.store(lastStencilValue); - } - - stack_CY1.store(C1 + DX12 * y0 - DY12 * x0); - stack_CY2.store(C2 + DX23 * y0 - DY23 * x0); - stack_CY3.store(C3 + DX31 * y0 - DY31 * x0); - stack_iy.store(SSAInt(0)); - stack_buffer.store(dest[x * pixelsize]); - stack_subsectorbuffer.store(subsectorGBuffer[x]); - stack_AffineW.store(posx_w); - for (int i = 0; i < TriVertex::NumVarying; i++) - { - stack_AffineVaryingPosY[i].store(posx_varying[i]); - } - - SSAForBlock loopy; - SSAInt iy = stack_iy.load(); - SSAUBytePtr buffer = stack_buffer.load(); - SSAIntPtr subsectorbuffer = stack_subsectorbuffer.load(); - SSAInt CY1 = stack_CY1.load(); - SSAInt CY2 = stack_CY2.load(); - SSAInt CY3 = stack_CY3.load(); - AffineW = stack_AffineW.load(); - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosY[i] = stack_AffineVaryingPosY[i].load(); - loopy.loop_block(iy < SSAInt(q), q); - { - SetupAffineBlock(); - - for (int i = 0; i < TriVertex::NumVarying; i++) - stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i]); - - stack_CX1.store(CY1); - stack_CX2.store(CY2); - stack_CX3.store(CY3); - stack_ix.store(SSAInt(0)); - - SSAForBlock loopx; - SSABool stencilblock_restored; - SSAUByte lastStencilValue; - if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) - { - stencilblock_restored = stack_stencilblock_restored.load(); - lastStencilValue = stack_stencilblock_lastval.load(); - } - SSAInt ix = stack_ix.load(); - SSAInt CX1 = stack_CX1.load(); - SSAInt CX2 = stack_CX2.load(); - SSAInt CX3 = stack_CX3.load(); - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosX[i] = stack_AffineVaryingPosX[i].load(); - loopx.loop_block(ix < SSAInt(q), q); - { - SSABool visible = (ix + x < clipright) && (iy + y < clipbottom); - SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible; - - if (!isSingleStencilValue) - { - SSAUByte stencilValue = StencilBlock[ix + iy * 8].load(false); - - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - { - covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth; - } - else if (variant == TriDrawVariant::StencilClose) - { - covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue); - } - else - { - covered = covered && stencilValue == stencilTestValue; - } - } - else if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - { - covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth; - } - - SSAIfBlock branch; - branch.if_block(covered); - { - if (variant == TriDrawVariant::Stencil) - { - StencilBlock[ix + iy * 8].store(stencilWriteValue); - } - else if (variant == TriDrawVariant::StencilClose) - { - StencilBlock[ix + iy * 8].store(stencilWriteValue); - subsectorbuffer[ix].store(subsectorDepth); - } - else - { - SSAUBytePtr buf = buffer[ix * pixelsize]; - - if (truecolor) - { - SSAVec4i bg = buf.load_vec4ub(false); - buf.store_vec4ub(ProcessPixel32(bg, AffineVaryingPosX)); - } - else - { - SSAUByte bg = buf.load(false); - buf.store(ProcessPixel8(bg.zext_int(), AffineVaryingPosX).trunc_ubyte()); - } - - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) - subsectorbuffer[ix].store(subsectorDepth); - } - } - branch.end_block(); - - if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) - { - SSAUByte newStencilValue = StencilBlock[ix + iy * 8].load(false); - stack_stencilblock_restored.store(stencilblock_restored && newStencilValue == lastStencilValue); - stack_stencilblock_lastval.store(newStencilValue); - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i] + AffineVaryingStepX[i]); - - stack_CX1.store(CX1 - FDY12); - stack_CX2.store(CX2 - FDY23); - stack_CX3.store(CX3 - FDY31); - stack_ix.store(ix + 1); - } - loopx.end_block(); - - stack_AffineW.store(AffineW + gradWY); - for (int i = 0; i < TriVertex::NumVarying; i++) - stack_AffineVaryingPosY[i].store(AffineVaryingPosY[i] + gradVaryingY[i]); - stack_CY1.store(CY1 + FDX12); - stack_CY2.store(CY2 + FDX23); - stack_CY3.store(CY3 + FDX31); - stack_buffer.store(buffer[pitch * pixelsize]); - stack_subsectorbuffer.store(subsectorbuffer[pitch]); - stack_iy.store(iy + 1); - } - loopy.end_block(); - - if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) - { - SSAIfBlock branch; - SSABool restored = stack_stencilblock_restored.load(); - branch.if_block(restored); - { - SSAUByte lastStencilValue = stack_stencilblock_lastval.load(); - StencilClear(lastStencilValue); - } - branch.end_block(); - } -} - -#if 0 -void DrawTriangleCodegen::LoopMaskedStoreBlock() -{ - if (variant == TriDrawVariant::Stencil) - { - } - else if (variant == TriDrawVariant::StencilClose) - { - } - else - { - int pixelsize = truecolor ? 4 : 1; - - AffineW = posx_w; - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosY[i] = posx_varying[i]; - - SSAInt CY1 = C1 + DX12 * y0 - DY12 * x0; - SSAInt CY2 = C2 + DX23 * y0 - DY23 * x0; - SSAInt CY3 = C3 + DX31 * y0 - DY31 * x0; - - for (int iy = 0; iy < q; iy++) - { - SSAUBytePtr buffer = dest[(x + iy * pitch) * pixelsize]; - SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch]; - - SetupAffineBlock(); - - SSAInt CX1 = CY1; - SSAInt CX2 = CY2; - SSAInt CX3 = CY3; - - for (int ix = 0; ix < q; ix += 4) - { - SSABool covered[4]; - for (int maskindex = 0; maskindex < 4; maskindex++) - { - covered[maskindex] = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0); - - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - { - auto xx = SSAInt(ix + maskindex); - auto yy = SSAInt(iy); - covered[maskindex] = covered[maskindex] && SSABool::compare_uge(StencilGet(xx, yy), stencilTestValue) && subsectorbuffer[ix + maskindex].load(true) >= subsectorDepth; - } - else if (variant == TriDrawVariant::StencilClose) - { - auto xx = SSAInt(ix + maskindex); - auto yy = SSAInt(iy); - covered[maskindex] = covered[maskindex] && SSABool::compare_uge(StencilGet(xx, yy), stencilTestValue); - } - else - { - auto xx = SSAInt(ix + maskindex); - auto yy = SSAInt(iy); - covered[maskindex] = covered[maskindex] && StencilGet(xx, yy) == stencilTestValue; - } - - CX1 = CX1 - FDY12; - CX2 = CX2 - FDY23; - CX3 = CX3 - FDY31; - } - - SSAUBytePtr buf = buffer[ix * pixelsize]; - if (truecolor) - { - SSAVec16ub pixels16 = buf.load_unaligned_vec16ub(false); - SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16); - SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16); - SSAVec4i pixels[4] = - { - SSAVec4i::extendlo(pixels8lo), - SSAVec4i::extendhi(pixels8lo), - SSAVec4i::extendlo(pixels8hi), - SSAVec4i::extendhi(pixels8hi) - }; - - for (int sse = 0; sse < 4; sse++) - { - pixels[sse] = ProcessPixel32(pixels[sse], AffineVaryingPosX); - - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i]; - } - - buf.store_masked_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3])), covered); - } - else - { - SSAVec4i pixelsvec = buf.load_vec4ub(false); - SSAInt pixels[4] = - { - pixelsvec[0], - pixelsvec[1], - pixelsvec[2], - pixelsvec[3] - }; - - for (int sse = 0; sse < 4; sse++) - { - pixels[sse] = ProcessPixel8(pixels[sse], AffineVaryingPosX); - - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i]; - } - - buf.store_masked_vec4ub(SSAVec4i(pixels[0], pixels[1], pixels[2], pixels[3]), covered); - } - - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) - subsectorbuffer[ix].store_masked_vec4i(SSAVec4i(subsectorDepth), covered); - } - - AffineW = AffineW + gradWY; - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosY[i] = AffineVaryingPosY[i] + gradVaryingY[i]; - - CY1 = CY1 + FDX12; - CY2 = CY2 + FDX23; - CY3 = CY3 + FDX31; - } - } -} -#endif - -void DrawTriangleCodegen::SetStencilBlock(SSAInt block) -{ - StencilBlock = stencilValues[block * 64]; - StencilBlockMask = stencilMasks[block]; -} - -SSAUByte DrawTriangleCodegen::StencilGetSingle() -{ - return StencilBlockMask.load(false).trunc_ubyte(); -} - -void DrawTriangleCodegen::StencilClear(SSAUByte value) -{ - StencilBlockMask.store(SSAInt(0xffffff00) | value.zext_int()); -} - -SSABool DrawTriangleCodegen::StencilIsSingleValue() -{ - return (StencilBlockMask.load(false) & SSAInt(0xffffff00)) == SSAInt(0xffffff00); -} - -void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) -{ - dest = args[0][0].load(true); - pitch = args[0][1].load(true); - v1 = LoadTriVertex(args[0][2].load(true)); - v2 = LoadTriVertex(args[0][3].load(true)); - v3 = LoadTriVertex(args[0][4].load(true)); - clipright = args[0][6].load(true); - clipbottom = args[0][8].load(true); - texturePixels = args[0][9].load(true); - textureWidth = args[0][10].load(true); - textureHeight = args[0][11].load(true); - translation = args[0][12].load(true); - LoadUniforms(args[0][13].load(true)); - stencilValues = args[0][14].load(true); - stencilMasks = args[0][15].load(true); - stencilPitch = args[0][16].load(true); - stencilTestValue = args[0][17].load(true); - stencilWriteValue = args[0][18].load(true); - subsectorGBuffer = args[0][19].load(true); - if (!truecolor) - { - Colormaps = args[0][20].load(true); - RGB32k = args[0][21].load(true); - BaseColors = args[0][22].load(true); - } - - thread.core = thread_data[0][0].load(true); - thread.num_cores = thread_data[0][1].load(true); - thread.pass_start_y = SSAInt(0); - thread.pass_end_y = SSAInt(32000); -} - -#endif diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index 8197656fa2..69783e8454 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -109,128 +109,3 @@ private: bool colorfill; int pixelsize; }; - -#if 0 - -class DrawTriangleCodegen : public DrawerCodegen -{ -public: - void Generate(TriDrawVariant variant, TriBlendMode blendmode, bool truecolor, SSAValue args, SSAValue thread_data); - -private: - void LoadArgs(SSAValue args, SSAValue thread_data); - SSATriVertex LoadTriVertex(SSAValue v); - void LoadUniforms(SSAValue uniforms); - void Setup(); - SSAInt FloatTo28_4(SSAFloat v); - void LoopBlockY(); - void LoopBlockX(); - void LoopFullBlock(); - void LoopPartialBlock(bool isSingleStencilValue); - void SetupAffineBlock(); - - SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying); - SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying); - - SSAVec4i TranslateSample32(SSAInt *varying); - SSAInt TranslateSample8(SSAInt *varying); - SSAVec4i Sample32(SSAInt *varying); - SSAInt Sample8(SSAInt *varying); - SSAInt Shade8(SSAInt c); - SSAVec4i ToBgra(SSAInt index); - SSAInt ToPal8(SSAVec4i c); - - SSAVec4i FadeOut(SSAInt frac, SSAVec4i color); - - void SetStencilBlock(SSAInt block); - void StencilClear(SSAUByte value); - SSAUByte StencilGetSingle(); - SSABool StencilIsSingleValue(); - - SSAFloat gradx(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); - SSAFloat grady(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); - - TriDrawVariant variant; - TriBlendMode blendmode; - bool truecolor; - - SSAStack stack_C1, stack_C2, stack_C3; - SSAStack stack_y; - SSAStack stack_dest; - SSAStack stack_x; - SSAStack stack_buffer; - SSAStack stack_iy, stack_ix; - SSAStack stack_CY1, stack_CY2, stack_CY3; - SSAStack stack_CX1, stack_CX2, stack_CX3; - SSAStack stack_posy_w, stack_posy_varying[TriVertex::NumVarying]; - SSAStack stack_posx_w, stack_posx_varying[TriVertex::NumVarying]; - SSAStack stack_AffineW; - SSAStack stack_AffineVaryingPosY[TriVertex::NumVarying]; - SSAStack stack_AffineVaryingPosX[TriVertex::NumVarying]; - SSAStack stack_stencilblock_restored; - SSAStack stack_stencilblock_lastval; - - SSAUBytePtr dest; - SSAInt pitch; - SSATriVertex v1; - SSATriVertex v2; - SSATriVertex v3; - SSAInt clipright; - SSAInt clipbottom; - SSAUBytePtr texturePixels; - SSAInt textureWidth; - SSAInt textureHeight; - SSAUBytePtr translation; - SSAInt color, srcalpha, destalpha; - - SSAInt light; - SSAShadeConstants shade_constants; - SSABool is_simple_shade; - SSABool is_nearest_filter; - SSABool is_fixed_light; - - SSAUBytePtr stencilValues; - SSAIntPtr stencilMasks; - SSAInt stencilPitch; - SSAUByte stencilTestValue; - SSAUByte stencilWriteValue; - - SSAUBytePtr Colormaps; - SSAUBytePtr RGB32k; - SSAUBytePtr BaseColors; - - SSAWorkerThread thread; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - SSAInt Y1, Y2, Y3; - SSAInt X1, X2, X3; - SSAInt DX12, DX23, DX31; - SSAInt DY12, DY23, DY31; - SSAInt FDX12, FDX23, FDX31; - SSAInt FDY12, FDY23, FDY31; - SSAInt minx, maxx, miny, maxy; - SSAInt C1, C2, C3; - SSAFloat gradWX, gradWY; - SSAFloat gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying]; - SSAFloat shade; - - SSAInt x, y; - SSAInt x0, x1, y0, y1; - SSAInt currentlight; - SSAUBytePtr currentcolormap; - SSAFloat AffineW; - SSABool AffineLinear; - SSAFloat AffineVaryingPosY[TriVertex::NumVarying]; - SSAInt AffineVaryingPosX[TriVertex::NumVarying]; - SSAInt AffineVaryingStepX[TriVertex::NumVarying]; - - SSAFloat posy_w, posy_varying[TriVertex::NumVarying]; - SSAFloat posx_w, posx_varying[TriVertex::NumVarying]; - - SSAUBytePtr StencilBlock; - SSAIntPtr StencilBlockMask; -}; - -#endif diff --git a/tools/drawergen/fixedfunction/setuptrianglecodegen.cpp b/tools/drawergen/fixedfunction/setuptrianglecodegen.cpp new file mode 100644 index 0000000000..f29764d9eb --- /dev/null +++ b/tools/drawergen/fixedfunction/setuptrianglecodegen.cpp @@ -0,0 +1,573 @@ +/* +** DrawTriangle code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include "precomp.h" +#include "timestamp.h" +#include "fixedfunction/setuptrianglecodegen.h" +#include "ssa/ssa_function.h" +#include "ssa/ssa_scope.h" +#include "ssa/ssa_for_block.h" +#include "ssa/ssa_if_block.h" +#include "ssa/ssa_stack.h" +#include "ssa/ssa_function.h" +#include "ssa/ssa_struct_type.h" +#include "ssa/ssa_value.h" + +void SetupTriangleCodegen::Generate(bool subsectorTest, SSAValue args, SSAValue thread_data) +{ + this->subsectorTest = subsectorTest; + LoadArgs(args, thread_data); + Setup(); + LoopBlockY(); +} + +SSAInt SetupTriangleCodegen::FloatTo28_4(SSAFloat v) +{ + // SSAInt(SSAFloat::round(16.0f * v), false); + SSAInt a = SSAInt(v * 32.0f, false); + return (a + (a.ashr(31) | SSAInt(1))).ashr(1); +} + +void SetupTriangleCodegen::Setup() +{ + // 28.4 fixed-point coordinates + Y1 = FloatTo28_4(v1.y); + Y2 = FloatTo28_4(v2.y); + Y3 = FloatTo28_4(v3.y); + + X1 = FloatTo28_4(v1.x); + X2 = FloatTo28_4(v2.x); + X3 = FloatTo28_4(v3.x); + + // Deltas + DX12 = X1 - X2; + DX23 = X2 - X3; + DX31 = X3 - X1; + + DY12 = Y1 - Y2; + DY23 = Y2 - Y3; + DY31 = Y3 - Y1; + + // Fixed-point deltas + FDX12 = DX12 << 4; + FDX23 = DX23 << 4; + FDX31 = DX31 << 4; + + FDY12 = DY12 << 4; + FDY23 = DY23 << 4; + FDY31 = DY31 << 4; + + // Bounding rectangle + minx = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(X1, X2), X3) + 0xF).ashr(4), SSAInt(0)); + maxx = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(X1, X2), X3) + 0xF).ashr(4), clipright - 1); + miny = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(Y1, Y2), Y3) + 0xF).ashr(4), SSAInt(0)); + maxy = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(Y1, Y2), Y3) + 0xF).ashr(4), clipbottom - 1); + + SSAIfBlock if0; + if0.if_block(minx >= maxx || miny >= maxy); + if0.end_retvoid(); + + // Start in corner of 8x8 block + minx = minx & ~(q - 1); + miny = miny & ~(q - 1); + + // Half-edge constants + C1 = DY12 * X1 - DX12 * Y1; + C2 = DY23 * X2 - DX23 * Y2; + C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + SSAIfBlock if1; + if1.if_block(DY12 < SSAInt(0) || (DY12 == SSAInt(0) && DX12 > SSAInt(0))); + stack_C1.store(C1 + 1); + if1.else_block(); + stack_C1.store(C1); + if1.end_block(); + C1 = stack_C1.load(); + SSAIfBlock if2; + if2.if_block(DY23 < SSAInt(0) || (DY23 == SSAInt(0) && DX23 > SSAInt(0))); + stack_C2.store(C2 + 1); + if2.else_block(); + stack_C2.store(C2); + if2.end_block(); + C2 = stack_C2.load(); + SSAIfBlock if3; + if3.if_block(DY31 < SSAInt(0) || (DY31 == SSAInt(0) && DX31 > SSAInt(0))); + stack_C3.store(C3 + 1); + if3.else_block(); + stack_C3.store(C3); + if3.end_block(); + C3 = stack_C3.load(); +} + +void SetupTriangleCodegen::LoopBlockY() +{ + SSAInt blocks_skipped = skipped_by_thread(miny / q, thread); + stack_y.store(miny + blocks_skipped * q); + stack_subsectorGBuffer.store(subsectorGBuffer[blocks_skipped * q * pitch]); + + SSAForBlock loop; + y = stack_y.load(); + subsectorGBuffer = stack_subsectorGBuffer.load(); + loop.loop_block(y < maxy, 0); + { + LoopBlockX(); + + stack_subsectorGBuffer.store(subsectorGBuffer[q * pitch * thread.num_cores]); + stack_y.store(y + thread.num_cores * q); + } + loop.end_block(); +} + +void SetupTriangleCodegen::LoopBlockX() +{ + stack_x.store(minx); + + SSAForBlock loop; + x = stack_x.load(); + loop.loop_block(x < maxx, 0); + { + // Corners of block + x0 = x << 4; + x1 = (x + q - 1) << 4; + y0 = y << 4; + y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + SSABool a00 = C1 + DX12 * y0 - DY12 * x0 > SSAInt(0); + SSABool a10 = C1 + DX12 * y0 - DY12 * x1 > SSAInt(0); + SSABool a01 = C1 + DX12 * y1 - DY12 * x0 > SSAInt(0); + SSABool a11 = C1 + DX12 * y1 - DY12 * x1 > SSAInt(0); + + SSAInt a = (a00.zext_int() << 0) | (a10.zext_int() << 1) | (a01.zext_int() << 2) | (a11.zext_int() << 3); + + SSABool b00 = C2 + DX23 * y0 - DY23 * x0 > SSAInt(0); + SSABool b10 = C2 + DX23 * y0 - DY23 * x1 > SSAInt(0); + SSABool b01 = C2 + DX23 * y1 - DY23 * x0 > SSAInt(0); + SSABool b11 = C2 + DX23 * y1 - DY23 * x1 > SSAInt(0); + SSAInt b = (b00.zext_int() << 0) | (b10.zext_int() << 1) | (b01.zext_int() << 2) | (b11.zext_int() << 3); + + SSABool c00 = C3 + DX31 * y0 - DY31 * x0 > SSAInt(0); + SSABool c10 = C3 + DX31 * y0 - DY31 * x1 > SSAInt(0); + SSABool c01 = C3 + DX31 * y1 - DY31 * x0 > SSAInt(0); + SSABool c11 = C3 + DX31 * y1 - DY31 * x1 > SSAInt(0); + SSAInt c = (c00.zext_int() << 0) | (c10.zext_int() << 1) | (c01.zext_int() << 2) | (c11.zext_int() << 3); + + // Skip block when outside an edge + SSABool process_block = !(a == SSAInt(0) || b == SSAInt(0) || c == SSAInt(0)); + + SetStencilBlock(x / 8 + y / 8 * stencilPitch); + + // Stencil test the whole block, if possible + if (subsectorTest) + { + process_block = process_block && (!StencilIsSingleValue() || SSABool::compare_uge(StencilGetSingle(), stencilTestValue)); + } + else + { + process_block = process_block && (!StencilIsSingleValue() || StencilGetSingle() == stencilTestValue); + } + + SSAIfBlock branch; + branch.if_block(process_block); + + // Check if block needs clipping + SSABool clipneeded = (x + q) > clipright || (y + q) > clipbottom; + + SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded && StencilIsSingleValue(); + + // Accept whole block when totally covered + SSAIfBlock branch_covered; + branch_covered.if_block(covered); + { + LoopFullBlock(); + } + branch_covered.else_block(); + { + SSAIfBlock branch_covered_stencil; + branch_covered_stencil.if_block(StencilIsSingleValue()); + { + SSABool stenciltestpass; + if (subsectorTest) + { + stenciltestpass = SSABool::compare_uge(StencilGetSingle(), stencilTestValue); + } + else + { + stenciltestpass = StencilGetSingle() == stencilTestValue; + } + + SSAIfBlock branch_stenciltestpass; + branch_stenciltestpass.if_block(stenciltestpass); + { + LoopPartialBlock(true); + } + branch_stenciltestpass.end_block(); + } + branch_covered_stencil.else_block(); + { + LoopPartialBlock(false); + } + branch_covered_stencil.end_block(); + } + branch_covered.end_block(); + + branch.end_block(); + + stack_x.store(x + q); + } + loop.end_block(); +} + +void SetupTriangleCodegen::LoopFullBlock() +{ + /* + if (variant == TriDrawVariant::Stencil) + { + StencilClear(stencilWriteValue); + } + else if (variant == TriDrawVariant::StencilClose) + { + StencilClear(stencilWriteValue); + for (int iy = 0; iy < q; iy++) + { + SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch]; + for (int ix = 0; ix < q; ix += 4) + { + subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth)); + } + } + } + else + { + int pixelsize = truecolor ? 4 : 1; + + AffineW = posx_w; + for (int i = 0; i < TriVertex::NumVarying; i++) + AffineVaryingPosY[i] = posx_varying[i]; + + for (int iy = 0; iy < q; iy++) + { + SSAUBytePtr buffer = dest[(x + iy * pitch) * pixelsize]; + SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch]; + + SetupAffineBlock(); + + for (int ix = 0; ix < q; ix += 4) + { + SSAUBytePtr buf = buffer[ix * pixelsize]; + if (truecolor) + { + SSAVec16ub pixels16 = buf.load_unaligned_vec16ub(false); + SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16); + SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16); + SSAVec4i pixels[4] = + { + SSAVec4i::extendlo(pixels8lo), + SSAVec4i::extendhi(pixels8lo), + SSAVec4i::extendlo(pixels8hi), + SSAVec4i::extendhi(pixels8hi) + }; + + for (int sse = 0; sse < 4; sse++) + { + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + { + SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth; + pixels[sse] = subsectorTest.select(ProcessPixel32(pixels[sse], AffineVaryingPosX), pixels[sse]); + } + else + { + pixels[sse] = ProcessPixel32(pixels[sse], AffineVaryingPosX); + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i]; + } + + buf.store_unaligned_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3]))); + } + else + { + SSAVec4i pixelsvec = buf.load_vec4ub(false); + SSAInt pixels[4] = + { + pixelsvec[0], + pixelsvec[1], + pixelsvec[2], + pixelsvec[3] + }; + + for (int sse = 0; sse < 4; sse++) + { + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + { + SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth; + pixels[sse] = subsectorTest.select(ProcessPixel8(pixels[sse], AffineVaryingPosX), pixels[sse]); + } + else + { + pixels[sse] = ProcessPixel8(pixels[sse], AffineVaryingPosX); + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i]; + } + + buf.store_vec4ub(SSAVec4i(pixels[0], pixels[1], pixels[2], pixels[3])); + } + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth)); + } + + AffineW = AffineW + gradWY; + for (int i = 0; i < TriVertex::NumVarying; i++) + AffineVaryingPosY[i] = AffineVaryingPosY[i] + gradVaryingY[i]; + } + } + */ +} + +void SetupTriangleCodegen::LoopPartialBlock(bool isSingleStencilValue) +{ + /* + int pixelsize = truecolor ? 4 : 1; + + if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) + { + if (isSingleStencilValue) + { + SSAInt stencilMask = StencilBlockMask.load(false); + SSAUByte val0 = stencilMask.trunc_ubyte(); + for (int i = 0; i < 8 * 8; i++) + StencilBlock[i].store(val0); + StencilBlockMask.store(SSAInt(0)); + } + + SSAUByte lastStencilValue = StencilBlock[0].load(false); + stack_stencilblock_restored.store(SSABool(true)); + stack_stencilblock_lastval.store(lastStencilValue); + } + + stack_CY1.store(C1 + DX12 * y0 - DY12 * x0); + stack_CY2.store(C2 + DX23 * y0 - DY23 * x0); + stack_CY3.store(C3 + DX31 * y0 - DY31 * x0); + stack_iy.store(SSAInt(0)); + stack_buffer.store(dest[x * pixelsize]); + stack_subsectorbuffer.store(subsectorGBuffer[x]); + stack_AffineW.store(posx_w); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + stack_AffineVaryingPosY[i].store(posx_varying[i]); + } + + SSAForBlock loopy; + SSAInt iy = stack_iy.load(); + SSAUBytePtr buffer = stack_buffer.load(); + SSAIntPtr subsectorbuffer = stack_subsectorbuffer.load(); + SSAInt CY1 = stack_CY1.load(); + SSAInt CY2 = stack_CY2.load(); + SSAInt CY3 = stack_CY3.load(); + AffineW = stack_AffineW.load(); + for (int i = 0; i < TriVertex::NumVarying; i++) + AffineVaryingPosY[i] = stack_AffineVaryingPosY[i].load(); + loopy.loop_block(iy < SSAInt(q), q); + { + SetupAffineBlock(); + + for (int i = 0; i < TriVertex::NumVarying; i++) + stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i]); + + stack_CX1.store(CY1); + stack_CX2.store(CY2); + stack_CX3.store(CY3); + stack_ix.store(SSAInt(0)); + + SSAForBlock loopx; + SSABool stencilblock_restored; + SSAUByte lastStencilValue; + if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) + { + stencilblock_restored = stack_stencilblock_restored.load(); + lastStencilValue = stack_stencilblock_lastval.load(); + } + SSAInt ix = stack_ix.load(); + SSAInt CX1 = stack_CX1.load(); + SSAInt CX2 = stack_CX2.load(); + SSAInt CX3 = stack_CX3.load(); + for (int i = 0; i < TriVertex::NumVarying; i++) + AffineVaryingPosX[i] = stack_AffineVaryingPosX[i].load(); + loopx.loop_block(ix < SSAInt(q), q); + { + SSABool visible = (ix + x < clipright) && (iy + y < clipbottom); + SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible; + + if (!isSingleStencilValue) + { + SSAUByte stencilValue = StencilBlock[ix + iy * 8].load(false); + + if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + { + covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth; + } + else if (variant == TriDrawVariant::StencilClose) + { + covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue); + } + else + { + covered = covered && stencilValue == stencilTestValue; + } + } + else if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + { + covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth; + } + + SSAIfBlock branch; + branch.if_block(covered); + { + if (variant == TriDrawVariant::Stencil) + { + StencilBlock[ix + iy * 8].store(stencilWriteValue); + } + else if (variant == TriDrawVariant::StencilClose) + { + StencilBlock[ix + iy * 8].store(stencilWriteValue); + subsectorbuffer[ix].store(subsectorDepth); + } + else + { + SSAUBytePtr buf = buffer[ix * pixelsize]; + + if (truecolor) + { + SSAVec4i bg = buf.load_vec4ub(false); + buf.store_vec4ub(ProcessPixel32(bg, AffineVaryingPosX)); + } + else + { + SSAUByte bg = buf.load(false); + buf.store(ProcessPixel8(bg.zext_int(), AffineVaryingPosX).trunc_ubyte()); + } + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsectorbuffer[ix].store(subsectorDepth); + } + } + branch.end_block(); + + if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) + { + SSAUByte newStencilValue = StencilBlock[ix + iy * 8].load(false); + stack_stencilblock_restored.store(stencilblock_restored && newStencilValue == lastStencilValue); + stack_stencilblock_lastval.store(newStencilValue); + } + + for (int i = 0; i < TriVertex::NumVarying; i++) + stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i] + AffineVaryingStepX[i]); + + stack_CX1.store(CX1 - FDY12); + stack_CX2.store(CX2 - FDY23); + stack_CX3.store(CX3 - FDY31); + stack_ix.store(ix + 1); + } + loopx.end_block(); + + stack_AffineW.store(AffineW + gradWY); + for (int i = 0; i < TriVertex::NumVarying; i++) + stack_AffineVaryingPosY[i].store(AffineVaryingPosY[i] + gradVaryingY[i]); + stack_CY1.store(CY1 + FDX12); + stack_CY2.store(CY2 + FDX23); + stack_CY3.store(CY3 + FDX31); + stack_buffer.store(buffer[pitch * pixelsize]); + stack_subsectorbuffer.store(subsectorbuffer[pitch]); + stack_iy.store(iy + 1); + } + loopy.end_block(); + + if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) + { + SSAIfBlock branch; + SSABool restored = stack_stencilblock_restored.load(); + branch.if_block(restored); + { + SSAUByte lastStencilValue = stack_stencilblock_lastval.load(); + StencilClear(lastStencilValue); + } + branch.end_block(); + } + */ +} + +void SetupTriangleCodegen::SetStencilBlock(SSAInt block) +{ + StencilBlock = stencilValues[block * 64]; + StencilBlockMask = stencilMasks[block]; +} + +SSAUByte SetupTriangleCodegen::StencilGetSingle() +{ + return StencilBlockMask.load(false).trunc_ubyte(); +} + +void SetupTriangleCodegen::StencilClear(SSAUByte value) +{ + StencilBlockMask.store(SSAInt(0xffffff00) | value.zext_int()); +} + +SSABool SetupTriangleCodegen::StencilIsSingleValue() +{ + return (StencilBlockMask.load(false) & SSAInt(0xffffff00)) == SSAInt(0xffffff00); +} + +void SetupTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) +{ + pitch = args[0][1].load(true); + v1 = LoadTriVertex(args[0][2].load(true)); + v2 = LoadTriVertex(args[0][3].load(true)); + v3 = LoadTriVertex(args[0][4].load(true)); + clipright = args[0][6].load(true); + clipbottom = args[0][8].load(true); + stencilValues = args[0][14].load(true); + stencilMasks = args[0][15].load(true); + stencilPitch = args[0][16].load(true); + stencilTestValue = args[0][17].load(true); + stencilWriteValue = args[0][18].load(true); + subsectorGBuffer = args[0][19].load(true); + + thread.core = thread_data[0][0].load(true); + thread.num_cores = thread_data[0][1].load(true); + thread.pass_start_y = SSAInt(0); + thread.pass_end_y = SSAInt(32000); +} + +SSASetupVertex SetupTriangleCodegen::LoadTriVertex(SSAValue ptr) +{ + SSASetupVertex v; + v.x = ptr[0][0].load(true); + v.y = ptr[0][1].load(true); + v.z = ptr[0][2].load(true); + v.w = ptr[0][3].load(true); + return v; +} diff --git a/tools/drawergen/fixedfunction/setuptrianglecodegen.h b/tools/drawergen/fixedfunction/setuptrianglecodegen.h new file mode 100644 index 0000000000..f8db559a29 --- /dev/null +++ b/tools/drawergen/fixedfunction/setuptrianglecodegen.h @@ -0,0 +1,98 @@ +/* +** SetupTriangle code generation +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "drawercodegen.h" + +struct SSASetupVertex +{ + SSAFloat x, y, z, w; +}; + +class SetupTriangleCodegen : public DrawerCodegen +{ +public: + void Generate(bool subsectorTest, SSAValue args, SSAValue thread_data); + +private: + void LoadArgs(SSAValue args, SSAValue thread_data); + SSASetupVertex LoadTriVertex(SSAValue v); + void Setup(); + SSAInt FloatTo28_4(SSAFloat v); + void LoopBlockY(); + void LoopBlockX(); + void LoopFullBlock(); + void LoopPartialBlock(bool isSingleStencilValue); + + void SetStencilBlock(SSAInt block); + void StencilClear(SSAUByte value); + SSAUByte StencilGetSingle(); + SSABool StencilIsSingleValue(); + + bool subsectorTest; + + SSAStack stack_C1, stack_C2, stack_C3; + SSAStack stack_y; + SSAStack stack_subsectorGBuffer; + SSAStack stack_x; + SSAStack stack_buffer; + SSAStack stack_iy, stack_ix; + SSAStack stack_CY1, stack_CY2, stack_CY3; + SSAStack stack_CX1, stack_CX2, stack_CX3; + //SSAStack stack_stencilblock_restored; + //SSAStack stack_stencilblock_lastval; + + SSAUBytePtr subsectorGBuffer; + SSAInt pitch; + SSASetupVertex v1; + SSASetupVertex v2; + SSASetupVertex v3; + SSAInt clipright; + SSAInt clipbottom; + + SSAUBytePtr stencilValues; + SSAIntPtr stencilMasks; + SSAInt stencilPitch; + SSAUByte stencilTestValue; + SSAUByte stencilWriteValue; + + SSAWorkerThread thread; + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + SSAInt Y1, Y2, Y3; + SSAInt X1, X2, X3; + SSAInt DX12, DX23, DX31; + SSAInt DY12, DY23, DY31; + SSAInt FDX12, FDX23, FDX31; + SSAInt FDY12, FDY23, FDY31; + SSAInt minx, maxx, miny, maxy; + SSAInt C1, C2, C3; + + SSAInt x, y; + SSAInt x0, x1, y0, y1; + + SSAUBytePtr StencilBlock; + SSAIntPtr StencilBlockMask; +}; From 9302b3b8bf03e9cb399676f86542e261c103be98 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 16 Dec 2016 19:09:17 +0100 Subject: [PATCH 496/912] Fix 64 bit building --- tools/drawergen/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/drawergen/CMakeLists.txt b/tools/drawergen/CMakeLists.txt index 12e004e28a..6a5804e8a1 100644 --- a/tools/drawergen/CMakeLists.txt +++ b/tools/drawergen/CMakeLists.txt @@ -7,6 +7,10 @@ include(../../precompiled_headers.cmake) # Path where it looks for the LLVM compiled files on Windows set( LLVM_PRECOMPILED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../llvm" ) +if( CMAKE_SIZEOF_VOID_P MATCHES "8" ) + set( X64 64 ) +endif() + if( NOT DRAWERGEN_LIBS ) set( DRAWERGEN_LIBS "" ) endif() From e1a874958687a0126b406c606a44fab567805658 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 16 Dec 2016 19:28:25 +0100 Subject: [PATCH 497/912] Wrong type --- tools/drawergen/fixedfunction/setuptrianglecodegen.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/drawergen/fixedfunction/setuptrianglecodegen.h b/tools/drawergen/fixedfunction/setuptrianglecodegen.h index f8db559a29..dda7b7667d 100644 --- a/tools/drawergen/fixedfunction/setuptrianglecodegen.h +++ b/tools/drawergen/fixedfunction/setuptrianglecodegen.h @@ -53,7 +53,7 @@ private: SSAStack stack_C1, stack_C2, stack_C3; SSAStack stack_y; - SSAStack stack_subsectorGBuffer; + SSAStack stack_subsectorGBuffer; SSAStack stack_x; SSAStack stack_buffer; SSAStack stack_iy, stack_ix; @@ -62,7 +62,7 @@ private: //SSAStack stack_stencilblock_restored; //SSAStack stack_stencilblock_lastval; - SSAUBytePtr subsectorGBuffer; + SSAIntPtr subsectorGBuffer; SSAInt pitch; SSASetupVertex v1; SSASetupVertex v2; From b5f3f63b9327706fd92fe78a02da2903a354105b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 16 Dec 2016 20:52:56 +0100 Subject: [PATCH 498/912] Minor bug fixes --- src/r_poly.cpp | 2 +- src/r_poly.h | 2 ++ src/r_poly_cull.cpp | 30 ++++++++++++++++++++++++++---- src/r_poly_scene.cpp | 2 +- 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 2e100b3955..f7fa339795 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -74,7 +74,7 @@ void PolyRenderer::RenderActorView(AActor *actor, bool dontmaplines) { NetUpdate(); - //swrenderer::r_dontmaplines = dontmaplines; + DontMapLines = dontmaplines; P_FindParticleSubsectors(); PO_LinkToSubsectors(); diff --git a/src/r_poly.h b/src/r_poly.h index f34106a4b2..10d2e92aa9 100644 --- a/src/r_poly.h +++ b/src/r_poly.h @@ -50,6 +50,8 @@ public: bool InsertSeenLinePortal(FLinePortal *portal); bool InsertSeenMirror(line_t *mirrorLine); + bool DontMapLines = false; + private: void ClearBuffers(); void SetSceneViewport(); diff --git a/src/r_poly_cull.cpp b/src/r_poly_cull.cpp index 59b7c0c5a3..02a0f2fc51 100644 --- a/src/r_poly_cull.cpp +++ b/src/r_poly_cull.cpp @@ -239,10 +239,31 @@ LineSegmentRange PolyCull::GetSegmentRangeForLine(double x1, double y1, double x { double znear = 5.0; double updownnear = -400.0; + double sidenear = 400.0; - // Cull if entirely behind the portal clip plane (tbd: should we clip the segment?) - if (Vec4f::dot(PortalClipPlane, Vec4f((float)x1, (float)y1, 0.0f, 1.0f)) < 0.0f && Vec4f::dot(PortalClipPlane, Vec4f((float)x2, (float)y2, 0.0f, 1.0f)) < 0.0f) + // Clip line to the portal clip plane + float distance1 = Vec4f::dot(PortalClipPlane, Vec4f((float)x1, (float)y1, 0.0f, 1.0f)); + float distance2 = Vec4f::dot(PortalClipPlane, Vec4f((float)x2, (float)y2, 0.0f, 1.0f)); + if (distance1 < 0.0f && distance2 < 0.0f) + { return LineSegmentRange::NotVisible; + } + else if (distance1 < 0.0f || distance2 < 0.0f) + { + double t1 = 0.0f, t2 = 1.0f; + if (distance1 < 0.0f) + t1 = clamp(distance1 / (distance1 - distance2), 0.0f, 1.0f); + else + t2 = clamp(distance2 / (distance1 - distance2), 0.0f, 1.0f); + double nx1 = x1 * (1.0 - t1) + x2 * t1; + double ny1 = y1 * (1.0 - t1) + y2 * t1; + double nx2 = x1 * (1.0 - t2) + x2 * t2; + double ny2 = y1 * (1.0 - t2) + y2 * t2; + x1 = nx1; + x2 = nx2; + y1 = ny1; + y2 = ny2; + } // Transform to 2D view space: x1 = x1 - ViewPos.X; @@ -255,7 +276,8 @@ LineSegmentRange PolyCull::GetSegmentRangeForLine(double x1, double y1, double x double ry2 = x2 * ViewCos + y2 * ViewSin; // Is it potentially visible when looking straight up or down? - if (!(ry1 < updownnear && ry2 < updownnear) && !(ry1 > znear && ry2 > znear)) + if (!(ry1 < updownnear && ry2 < updownnear) && !(ry1 > znear && ry2 > znear) && + !(rx1 < -sidenear && rx2 < -sidenear) && !(rx1 > sidenear && rx2 > sidenear)) return LineSegmentRange::AlwaysVisible; // Cull if line is entirely behind view @@ -267,7 +289,7 @@ LineSegmentRange PolyCull::GetSegmentRangeForLine(double x1, double y1, double x if (ry1 < znear) t1 = clamp((znear - ry1) / (ry2 - ry1), 0.0, 1.0); if (ry2 < znear) - t2 = clamp((znear - ry1) / (ry2 - ry1), 0.0, 1.0); + t2 = clamp((znear - ry2) / (ry2 - ry1), 0.0, 1.0); if (t1 != 0.0 || t2 != 1.0) { double nx1 = rx1 * (1.0 - t1) + rx2 * t1; diff --git a/src/r_poly_scene.cpp b/src/r_poly_scene.cpp index cfb858e0b9..8bf6d1ce99 100644 --- a/src/r_poly_scene.cpp +++ b/src/r_poly_scene.cpp @@ -198,7 +198,7 @@ void RenderPolyScene::RenderLine(subsector_t *sub, seg_t *line, sector_t *fronts return; // Tell automap we saw this - if (!swrenderer::r_dontmaplines && line->linedef && segmentRange != LineSegmentRange::AlwaysVisible) + if (!PolyRenderer::Instance()->DontMapLines && line->linedef && segmentRange != LineSegmentRange::AlwaysVisible) { line->linedef->flags |= ML_MAPPED; sub->flags |= SSECF_DRAWN; From ecebeed296a4a2b22f3a3c6d32b69a2f6c1d1f4b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 16 Dec 2016 22:16:24 +0100 Subject: [PATCH 499/912] Fix player sprites disappearing when a texture camera is visible --- src/r_poly_playersprite.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/r_poly_playersprite.cpp b/src/r_poly_playersprite.cpp index 381f2e2033..bf5c2767c7 100644 --- a/src/r_poly_playersprite.cpp +++ b/src/r_poly_playersprite.cpp @@ -35,11 +35,8 @@ EXTERN_CVAR(Bool, st_scale) void RenderPolyPlayerSprites::Render() { - // In theory, everything in this function could be moved to RenderRemainingSprites. - // Just in case there's some hack elsewhere that relies on this happening as part - // of the main rendering we do it exactly as the old software renderer did. - - ScreenSprites.clear(); + // This code cannot be moved directly to RenderRemainingSprites because the engine + // draws the canvas textures between this call and the final call to RenderRemainingSprites.. if (!r_drawplayersprites || !camera || @@ -90,6 +87,7 @@ void RenderPolyPlayerSprites::RenderRemainingSprites() { for (auto &sprite : ScreenSprites) sprite.Render(); + ScreenSprites.clear(); } void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac) From 6e8174d9b4cecedbadd2aa56274014bad6bf91c4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 16 Dec 2016 23:02:34 +0100 Subject: [PATCH 500/912] More psprite stuff --- src/r_poly_playersprite.cpp | 132 +++++++++++++++++++++++++++++++++++- 1 file changed, 130 insertions(+), 2 deletions(-) diff --git a/src/r_poly_playersprite.cpp b/src/r_poly_playersprite.cpp index bf5c2767c7..cd9584b681 100644 --- a/src/r_poly_playersprite.cpp +++ b/src/r_poly_playersprite.cpp @@ -32,6 +32,8 @@ EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_deathcamera) EXTERN_CVAR(Bool, st_scale) +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) +EXTERN_CVAR(Bool, r_shadercolormaps) void RenderPolyPlayerSprites::Render() { @@ -219,7 +221,113 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa if (sprite->GetID() < PSP_TARGETCENTER) { - // Lots of complicated style and noaccel stuff + visstyle.Alpha = float(owner->Alpha); + visstyle.RenderStyle = owner->RenderStyle; + + // The software renderer cannot invert the source without inverting the overlay + // too. That means if the source is inverted, we need to do the reverse of what + // the invert overlay flag says to do. + INTBOOL invertcolormap = (visstyle.RenderStyle.Flags & STYLEF_InvertOverlay); + + if (visstyle.RenderStyle.Flags & STYLEF_InvertSource) + { + invertcolormap = !invertcolormap; + } + + FDynamicColormap *mybasecolormap = basecolormap; + + if (visstyle.RenderStyle.Flags & STYLEF_FadeToBlack) + { + if (invertcolormap) + { // Fade to white + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); + invertcolormap = false; + } + else + { // Fade to black + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); + } + } + + /* + if (swrenderer::realfixedcolormap != nullptr && (!swrenderer::r_swtruecolor || (r_shadercolormaps && screen->Accel2D))) + { // fixed color + visstyle.BaseColormap = swrenderer::realfixedcolormap; + visstyle.ColormapNum = 0; + } + else + { + if (invertcolormap) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); + } + if (swrenderer::fixedlightlev >= 0) + { + visstyle.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + visstyle.ColormapNum = swrenderer::fixedlightlev >> COLORMAPSHIFT; + } + else if (!foggy && sprite->GetState()->GetFullbright()) + { // full bright + visstyle.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap + visstyle.ColormapNum = 0; + } + else + { // local light + visstyle.BaseColormap = mybasecolormap; + visstyle.ColormapNum = GETPALOOKUP(0, spriteshade); + } + } + */ + + if (camera->Inventory != nullptr) + { + BYTE oldcolormapnum = visstyle.ColormapNum; + FSWColormap *oldcolormap = visstyle.BaseColormap; + camera->Inventory->AlterWeaponSprite(&visstyle); + if (visstyle.BaseColormap != oldcolormap || visstyle.ColormapNum != oldcolormapnum) + { + // The colormap has changed. Is it one we can easily identify? + // If not, then don't bother trying to identify it for + // hardware accelerated drawing. + if (visstyle.BaseColormap < &SpecialColormaps[0] || + visstyle.BaseColormap > &SpecialColormaps.Last()) + { + noaccel = true; + } + // Has the basecolormap changed? If so, we can't hardware accelerate it, + // since we don't know what it is anymore. + else if (visstyle.BaseColormap != mybasecolormap) + { + noaccel = true; + } + } + } + // If we're drawing with a special colormap, but shaders for them are disabled, do + // not accelerate. + if (!r_shadercolormaps && (visstyle.BaseColormap >= &SpecialColormaps[0] && + visstyle.BaseColormap <= &SpecialColormaps.Last())) + { + noaccel = true; + } + // If drawing with a BOOM colormap, disable acceleration. + if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps) + { + noaccel = true; + } + // If the main colormap has fixed lights, and this sprite is being drawn with that + // colormap, disable acceleration so that the lights can remain fixed. + if (!noaccel && swrenderer::realfixedcolormap == nullptr && + NormalLightHasFixedLights && mybasecolormap == &NormalLight && + tex->UseBasePalette()) + { + noaccel = true; + } + // [SP] If emulating GZDoom fullbright, disable acceleration + if (r_fullbrightignoresectorcolor && swrenderer::fixedlightlev >= 0) + mybasecolormap = &FullNormalLight; + if (r_fullbrightignoresectorcolor && !foggy && sprite->GetState()->GetFullbright()) + mybasecolormap = &FullNormalLight; + colormap_to_use = mybasecolormap; } // Check for hardware-assisted 2D. If it's available, and this sprite is not @@ -245,7 +353,27 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa } } - //R_DrawVisSprite(vis); + // To do: draw sprite same way as R_DrawVisSprite(vis) here + + // Draw the fuzzy weapon: + FRenderStyle style = visstyle.RenderStyle; + style.CheckFuzz(); + if (style.BlendOp == STYLEOP_Fuzz) + { + visstyle.RenderStyle = LegacyRenderStyles[STYLE_Shadow]; + + PolyScreenSprite screenSprite; + screenSprite.Pic = tex; + screenSprite.X1 = viewwindowx + x1; + screenSprite.Y1 = viewwindowy + viewheight / 2 - texturemid * yscale - 0.5; + screenSprite.Width = tex->GetWidth() * xscale; + screenSprite.Height = tex->GetHeight() * yscale; + screenSprite.Translation = TranslationToTable(translation); + screenSprite.Flip = xiscale < 0; + screenSprite.visstyle = visstyle; + screenSprite.Colormap = colormap_to_use; + ScreenSprites.push_back(screenSprite); + } } void PolyScreenSprite::Render() From ddb18d8e3252e41fe5ff466d2400a47da8763291 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 17 Dec 2016 02:10:52 +0100 Subject: [PATCH 501/912] Add dynamic colormaps support --- .../fixedfunction/drawtrianglecodegen.cpp | 69 +++++++++++++------ .../fixedfunction/drawtrianglecodegen.h | 7 +- 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 01deb1ce22..8b5df27c37 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -41,11 +41,30 @@ void DrawTriangleCodegen::Generate(TriBlendMode blendmode, bool truecolor, bool LoadArgs(args, thread_data); CalculateGradients(); - DrawFullSpans(); - DrawPartialBlocks(); + + if (truecolor) + { + SSAIfBlock branch; + branch.if_block(is_simple_shade); + { + DrawFullSpans(true); + DrawPartialBlocks(true); + } + branch.else_block(); + { + DrawFullSpans(false); + DrawPartialBlocks(false); + } + branch.end_block(); + } + else + { + DrawFullSpans(true); + DrawPartialBlocks(true); + } } -void DrawTriangleCodegen::DrawFullSpans() +void DrawTriangleCodegen::DrawFullSpans(bool isSimpleShade) { stack_i.store(SSAInt(0)); SSAForBlock loop; @@ -130,7 +149,7 @@ void DrawTriangleCodegen::DrawFullSpans() for (int sse = 0; sse < 4; sse++) { currentlight = is_fixed_light.select(light, lightpos >> 8); - pixels[sse] = ProcessPixel32(pixels[sse], varyingPos); + pixels[sse] = ProcessPixel32(pixels[sse], varyingPos, isSimpleShade); for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] = varyingPos[j] + varyingStep[j]; @@ -180,7 +199,7 @@ void DrawTriangleCodegen::DrawFullSpans() loop.end_block(); } -void DrawTriangleCodegen::DrawPartialBlocks() +void DrawTriangleCodegen::DrawPartialBlocks(bool isSimpleShade) { stack_i.store(SSAInt(0)); SSAForBlock loop; @@ -240,7 +259,7 @@ void DrawTriangleCodegen::DrawPartialBlocks() currentlight = is_fixed_light.select(light, lightpos >> 8); SSAUBytePtr destptr = dest[x * 4]; - destptr.store_vec4ub(ProcessPixel32(destptr.load_vec4ub(false), varyingPos)); + destptr.store_vec4ub(ProcessPixel32(destptr.load_vec4ub(false), varyingPos, isSimpleShade)); } else { @@ -381,7 +400,15 @@ SSAInt DrawTriangleCodegen::Shade8(SSAInt c) return currentcolormap[c].load(true).zext_int(); } -SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) +SSAVec4i DrawTriangleCodegen::Shade32(SSAVec4i fg, SSAInt light, bool isSimpleShade) +{ + if (isSimpleShade) + return shade_bgra_simple(fg, currentlight); + else + return shade_bgra_advanced(fg, currentlight, shade_constants); +} + +SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying, bool isSimpleShade) { SSAVec4i fg; SSAVec4i output; @@ -391,58 +418,58 @@ SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) default: case TriBlendMode::Copy: fg = Sample32(varying); - output = blend_copy(shade_bgra_simple(fg, currentlight)); + output = blend_copy(Shade32(fg, currentlight, isSimpleShade)); break; case TriBlendMode::AlphaBlend: fg = Sample32(varying); - output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); + output = blend_alpha_blend(Shade32(fg, currentlight, isSimpleShade), bg); break; case TriBlendMode::AddSolid: fg = Sample32(varying); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); + output = blend_add(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, destalpha); break; case TriBlendMode::Add: fg = Sample32(varying); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + output = blend_add(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::Sub: fg = Sample32(varying); - output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + output = blend_sub(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::RevSub: fg = Sample32(varying); - output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + output = blend_revsub(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::Stencil: fg = Sample32(varying); - output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), fg[3], bg, srcalpha, destalpha); + output = blend_stencil(Shade32(SSAVec4i::unpack(color), currentlight, isSimpleShade), fg[3], bg, srcalpha, destalpha); break; case TriBlendMode::Shaded: - output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), Sample8(varying), bg, srcalpha, destalpha); + output = blend_stencil(Shade32(SSAVec4i::unpack(color), currentlight, isSimpleShade), Sample8(varying), bg, srcalpha, destalpha); break; case TriBlendMode::TranslateCopy: fg = TranslateSample32(varying); - output = blend_copy(shade_bgra_simple(fg, currentlight)); + output = blend_copy(Shade32(fg, currentlight, isSimpleShade)); break; case TriBlendMode::TranslateAlphaBlend: fg = TranslateSample32(varying); - output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); + output = blend_alpha_blend(Shade32(fg, currentlight, isSimpleShade), bg); break; case TriBlendMode::TranslateAdd: fg = TranslateSample32(varying); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + output = blend_add(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::TranslateSub: fg = TranslateSample32(varying); - output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + output = blend_sub(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::TranslateRevSub: fg = TranslateSample32(varying); - output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + output = blend_revsub(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::AddSrcColorOneMinusSrcColor: fg = Sample32(varying); - output = blend_add_srccolor_oneminussrccolor(shade_bgra_simple(fg, currentlight), bg); + output = blend_add_srccolor_oneminussrccolor(Shade32(fg, currentlight, isSimpleShade), bg); break; case TriBlendMode::Skycap: fg = Sample32(varying); diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index 69783e8454..640a22aea4 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -48,15 +48,16 @@ private: void CalculateGradients(); SSAFloat FindGradientX(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); SSAFloat FindGradientY(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); - void DrawFullSpans(); - void DrawPartialBlocks(); + void DrawFullSpans(bool isSimpleShade); + void DrawPartialBlocks(bool isSimpleShade); - SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying); + SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying, bool isSimpleShade); SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying); SSAVec4i TranslateSample32(SSAInt *varying); SSAInt TranslateSample8(SSAInt *varying); SSAVec4i Sample32(SSAInt *varying); SSAInt Sample8(SSAInt *varying); + SSAVec4i Shade32(SSAVec4i fg, SSAInt light, bool isSimpleShade); SSAInt Shade8(SSAInt c); SSAVec4i ToBgra(SSAInt index); SSAInt ToPal8(SSAVec4i c); From afe5d0fcdc482c51b8fd88d4daf2fc2063387308 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 17 Dec 2016 04:24:28 +0100 Subject: [PATCH 502/912] Simplify light calculation slightly --- tools/drawergen/fixedfunction/drawtrianglecodegen.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 8b5df27c37..674543ff2d 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -98,7 +98,7 @@ void DrawTriangleCodegen::DrawFullSpans(bool isSimpleShade) stack_posXVarying[j].store(blockPosY.Varying[j]); SSAFloat rcpW = SSAFloat((float)0x01000000) / blockPosY.W; - stack_lightpos.store(FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosY.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true)); + stack_lightpos.store(FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f / 32.0f), globVis * blockPosY.W), SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true)); for (int j = 0; j < TriVertex::NumVarying; j++) stack_varyingPos[j].store(SSAInt(blockPosY.Varying[j] * rcpW, false)); stack_x.store(SSAInt(0)); @@ -127,7 +127,7 @@ void DrawTriangleCodegen::DrawFullSpans(bool isSimpleShade) varyingStep[j] = (nextPos - varyingPos[j]) / 8; } - SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosX.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); + SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f / 32.0f), globVis * blockPosX.W), SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); SSAInt lightstep = (lightnext - lightpos) / 8; if (truecolor) @@ -231,7 +231,7 @@ void DrawTriangleCodegen::DrawPartialBlocks(bool isSimpleShade) for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] = SSAInt(blockPosX.Varying[j] * rcpW, false); - SSAInt lightpos = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosX.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); + SSAInt lightpos = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f / 32.0f), globVis * blockPosX.W), SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); blockPosX.W = blockPosX.W + gradientX.W * 8.0f; for (int j = 0; j < TriVertex::NumVarying; j++) @@ -245,7 +245,7 @@ void DrawTriangleCodegen::DrawPartialBlocks(bool isSimpleShade) varyingStep[j] = (nextPos - varyingPos[j]) / 8; } - SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosX.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); + SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f / 32.0f), globVis * blockPosX.W), SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); SSAInt lightstep = (lightnext - lightpos) / 8; for (int x = 0; x < 8; x++) @@ -613,7 +613,7 @@ void DrawTriangleCodegen::CalculateGradients() } shade = (64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f) / 32.0f; - globVis = SSAFloat(1706.0f); + globVis = SSAFloat(1706.0f / 32.0f); } void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) From 2ba402dc7404116d38b54b2878046fb161ede18a Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 17 Dec 2016 04:11:52 -0500 Subject: [PATCH 503/912] - Implementing RGB666 colormatching to replace less precise RGB555 in some parts of the code. --- src/r_draw_pal.cpp | 91 ++++++++++++++++++++++++++++++++++++- src/textures/pngtexture.cpp | 13 ++++++ src/textures/tgatexture.cpp | 12 +++++ src/v_draw.cpp | 2 +- src/v_video.cpp | 11 +++++ src/v_video.h | 10 ++++ 6 files changed, 137 insertions(+), 2 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index cfb55a6706..439321e53f 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -314,10 +314,17 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { +#ifdef NO_RGB666 uint32_t fg = fg2rgb[colormap[pix]]; uint32_t bg = bg2rgb[*dest]; fg = (fg + bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg >> 15)]; +#else + uint32_t r = clamp(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 0, 255); + uint32_t g = clamp(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 0, 255); + uint32_t b = clamp(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 0, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; +#endif } frac += fracstep; dest += pitch; @@ -357,10 +364,17 @@ namespace swrenderer uint8_t pix = _bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifdef NO_RGB666 uint32_t fg = fg2rgb[_palookupoffse[i][pix]]; uint32_t bg = bg2rgb[dest[i]]; fg = (fg + bg) | 0x1f07c1f; dest[i] = RGB32k.All[fg & (fg >> 15)]; +#else + uint32_t r = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255); + uint32_t g = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255); + uint32_t b = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255); + dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; +#endif } vplce[i] += vince[i]; } @@ -396,6 +410,7 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { +#ifdef NO_RGB666 uint32_t a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; uint32_t b = a; @@ -405,6 +420,12 @@ namespace swrenderer b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a >> 15)]; +#else + uint32_t r = clamp(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 0, 255); + uint32_t g = clamp(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 0, 255); + uint32_t b = clamp(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 0, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; +#endif } frac += fracstep; dest += pitch; @@ -444,6 +465,7 @@ namespace swrenderer uint8_t pix = _bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifdef NO_RGB666 uint32_t a = fg2rgb[_palookupoffse[i][pix]] + bg2rgb[dest[i]]; uint32_t b = a; @@ -453,6 +475,12 @@ namespace swrenderer b = b - (b >> 5); a |= b; dest[i] = RGB32k.All[a & (a >> 15)]; +#else + uint32_t r = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255); + uint32_t g = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255); + uint32_t b = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255); + dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; +#endif } vplce[i] += vince[i]; } @@ -488,6 +516,7 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { +#ifdef NO_RGB666 uint32_t a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; uint32_t b = a; @@ -496,6 +525,12 @@ namespace swrenderer a &= b; a |= 0x01f07c1f; *dest = RGB32k.All[a & (a >> 15)]; +#else + uint32_t r = clamp(-GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 0, 255); + uint32_t g = clamp(-GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 0, 255); + uint32_t b = clamp(-GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 0, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; +#endif } frac += fracstep; dest += pitch; @@ -535,6 +570,7 @@ namespace swrenderer uint8_t pix = _bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifdef NO_RGB666 uint32_t a = (fg2rgb[_palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; uint32_t b = a; @@ -543,6 +579,12 @@ namespace swrenderer a &= b; a |= 0x01f07c1f; dest[i] = RGB32k.All[a & (a >> 15)]; +#else + uint32_t r = clamp(-GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255); + uint32_t g = clamp(-GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255); + uint32_t b = clamp(-GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255); + dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; +#endif } vplce[i] += vince[i]; } @@ -578,6 +620,7 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { +#ifdef NO_RGB666 uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; uint32_t b = a; @@ -586,6 +629,12 @@ namespace swrenderer a &= b; a |= 0x01f07c1f; *dest = RGB32k.All[a & (a >> 15)]; +#else + uint32_t r = clamp(GPalette.BaseColors[colormap[pix]].r - GPalette.BaseColors[*dest].r, 0, 255); + uint32_t g = clamp(GPalette.BaseColors[colormap[pix]].g - GPalette.BaseColors[*dest].g, 0, 255); + uint32_t b = clamp(GPalette.BaseColors[colormap[pix]].b - GPalette.BaseColors[*dest].b, 0, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; +#endif } frac += fracstep; dest += pitch; @@ -625,6 +674,7 @@ namespace swrenderer uint8_t pix = _bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifdef NO_RGB666 uint32_t a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[_palookupoffse[i][pix]]; uint32_t b = a; @@ -633,6 +683,12 @@ namespace swrenderer a &= b; a |= 0x01f07c1f; dest[i] = RGB32k.All[a & (a >> 15)]; +#else + uint32_t r = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].r - GPalette.BaseColors[dest[i]].r, 0, 255); + uint32_t g = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].g - GPalette.BaseColors[dest[i]].g, 0, 255); + uint32_t b = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].b - GPalette.BaseColors[dest[i]].b, 0, 255); + dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; +#endif } vplce[i] += vince[i]; } @@ -716,7 +772,11 @@ namespace swrenderer c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; +#ifdef NO_RGB666 *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; +#else + *dest = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; +#endif } frac += fracstep; @@ -744,8 +804,13 @@ namespace swrenderer int solid_bottom_r = RPART(solid_bottom); int solid_bottom_g = GPART(solid_bottom); int solid_bottom_b = BPART(solid_bottom); +#ifdef NO_RGB666 uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; +#else + uint32_t solid_top_fill = RGB256k.RGB[(solid_top_r >> 2)][(solid_top_g >> 2)][(solid_top_b >> 2)]; + uint32_t solid_bottom_fill = RGB256k.RGB[(solid_bottom_r >> 2)][(solid_bottom_g >> 2)][(solid_bottom_b >> 2)]; +#endif solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; @@ -805,8 +870,11 @@ namespace swrenderer c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; +#ifdef NO_RGB666 output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - +#else + output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; +#endif frac[col] += fracstep[col]; } *((uint32_t*)dest) = *((uint32_t*)output); @@ -847,7 +915,11 @@ namespace swrenderer c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; +#ifdef NO_RGB666 output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; +#else + output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; +#endif frac[col] += fracstep[col]; } @@ -929,7 +1001,11 @@ namespace swrenderer c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; +#ifdef NO_RGB666 *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; +#else + *dest = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; +#endif } frac += fracstep; @@ -959,8 +1035,13 @@ namespace swrenderer int solid_bottom_r = RPART(solid_bottom); int solid_bottom_g = GPART(solid_bottom); int solid_bottom_b = BPART(solid_bottom); +#ifdef NO_RGB666 uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; +#else + uint32_t solid_top_fill = RGB256k.RGB[(solid_top_r >> 2)][(solid_top_g >> 2)][(solid_top_b >> 2)]; + uint32_t solid_bottom_fill = RGB256k.RGB[(solid_bottom_r >> 2)][(solid_bottom_g >> 2)][(solid_bottom_b >> 2)]; +#endif solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; @@ -1026,7 +1107,11 @@ namespace swrenderer c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; +#ifdef NO_RGB666 output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; +#else + output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; +#endif frac[col] += fracstep[col]; } @@ -1080,7 +1165,11 @@ namespace swrenderer c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; +#ifdef NO_RGB666 output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; +#else + output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; +#endif frac[col] += fracstep[col]; } diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index d24cd92d11..e13944173d 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -536,7 +536,12 @@ void FPNGTexture::MakeTexture () { if (!HaveTrans) { +#ifdef NO_RGB666 *out++ = RGB32k.RGB[in[0]>>3][in[1]>>3][in[2]>>3]; +#else + *out++ = RGB256k.RGB[in[0]>>2][in[1]>>2][in[2]>>2]; +#endif + } else { @@ -548,7 +553,11 @@ void FPNGTexture::MakeTexture () } else { +#ifdef NO_RGB666 *out++ = RGB32k.RGB[in[0]>>3][in[1]>>3][in[2]>>3]; +#else + *out++ = RGB256k.RGB[in[0]>>2][in[1]>>2][in[2]>>2]; +#endif } } in += pitch; @@ -593,7 +602,11 @@ void FPNGTexture::MakeTexture () { for (y = Height; y > 0; --y) { +#ifdef NO_RGB666 *out++ = in[3] < 128 ? 0 : RGB32k.RGB[in[0]>>3][in[1]>>3][in[2]>>3]; +#else + *out++ = in[3] < 128 ? 0 : RGB256k.RGB[in[0]>>2][in[1]>>2][in[2]>>2]; +#endif in += pitch; } in -= backstep; diff --git a/src/textures/tgatexture.cpp b/src/textures/tgatexture.cpp index b208a51a37..e24b926caa 100644 --- a/src/textures/tgatexture.cpp +++ b/src/textures/tgatexture.cpp @@ -405,7 +405,11 @@ void FTGATexture::MakeTexture () BYTE * p = ptr + y * Pitch; for(int x=0;x>3][p[1]>>3][p[0]>>3]; +#else + Pixels[x*Height+y] = RGB256k.RGB[p[2]>>2][p[1]>>2][p[0]>>2]; +#endif p+=step_x; } } @@ -419,7 +423,11 @@ void FTGATexture::MakeTexture () BYTE * p = ptr + y * Pitch; for(int x=0;x>3][p[1]>>3][p[0]>>3]; +#else + Pixels[x*Height+y] = RGB256k.RGB[p[2]>>2][p[1]>>2][p[0]>>2]; +#endif p+=step_x; } } @@ -431,7 +439,11 @@ void FTGATexture::MakeTexture () BYTE * p = ptr + y * Pitch; for(int x=0;x= 128? RGB32k.RGB[p[2]>>3][p[1]>>3][p[0]>>3] : 0; +#else + Pixels[x*Height+y] = p[3] >= 128? RGB256k.RGB[p[2]>>2][p[1]>>2][p[0]>>2] : 0; +#endif p+=step_x; } } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 1524c7ba4a..bf7ccaab4e 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1031,7 +1031,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) DWORD *fg2rgb = Col2RGB8[63-level]; DWORD fg = fg2rgb[basecolor]; DWORD bg = bg2rgb[*spot]; - bg = (fg+bg) | 0x1f07c1f; + bg = (fg+bg) | 0x01f07c1f; *spot = RGB32k.All[bg&(bg>>15)]; } diff --git a/src/v_video.cpp b/src/v_video.cpp index efe93aa04a..92992220b2 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -144,8 +144,12 @@ DWORD Col2RGB8[65][256]; DWORD *Col2RGB8_LessPrecision[65]; DWORD Col2RGB8_Inverse[65][256]; ColorTable32k RGB32k; +#ifndef NO_RGB666 +ColorTable256k RGB256k; +#endif } + static DWORD Col2RGB8_2[63][256]; // [RH] The framebuffer is no longer a mere byte array. @@ -669,6 +673,13 @@ static void BuildTransTable (const PalEntry *palette) for (g = 0; g < 32; g++) for (b = 0; b < 32; b++) RGB32k.RGB[r][g][b] = ColorMatcher.Pick ((r<<3)|(r>>2), (g<<3)|(g>>2), (b<<3)|(b>>2)); +#ifndef NO_RGB666 + // create the RGB666 lookup table + for (r = 0; r < 64; r++) + for (g = 0; g < 64; g++) + for (b = 0; b < 64; b++) + RGB256k.RGB[r][g][b] = ColorMatcher.Pick ((r<<2)|(r>>4), (g<<2)|(g>>4), (b<<2)|(b>>4)); +#endif int x, y; diff --git a/src/v_video.h b/src/v_video.h index b72f670947..2079ff64b5 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -462,6 +462,16 @@ union ColorTable32k }; extern "C" ColorTable32k RGB32k; +// [SP] RGB666 support +#ifndef NO_RGB666 +union ColorTable256k +{ + BYTE RGB[64][64][64]; + BYTE All[64 *64 *64]; +}; +extern "C" ColorTable256k RGB256k; +#endif + // Col2RGB8 is a pre-multiplied palette for color lookup. It is stored in a // special R10B10G10 format for efficient blending computation. // --RRRRRrrr--BBBBBbbb--GGGGGggg-- at level 64 From 179c7dba8315723495b1f08f3dff056f3c1ef446 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 17 Dec 2016 10:25:35 -0500 Subject: [PATCH 504/912] - Yeah, it's time to take a break. Have fun with this! :D --- src/r_draw_pal.cpp | 99 ++++++++++------------------------------------ src/r_things.cpp | 39 ++++-------------- src/v_draw.cpp | 33 +++------------- src/v_video.cpp | 30 ++++++-------- 4 files changed, 47 insertions(+), 154 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 439321e53f..5a6c943275 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -320,9 +320,9 @@ namespace swrenderer fg = (fg + bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg >> 15)]; #else - uint32_t r = clamp(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 0, 255); - uint32_t g = clamp(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 0, 255); - uint32_t b = clamp(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 0, 255); + uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); + uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); + uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; #endif } @@ -370,9 +370,9 @@ namespace swrenderer fg = (fg + bg) | 0x1f07c1f; dest[i] = RGB32k.All[fg & (fg >> 15)]; #else - uint32_t r = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255); - uint32_t g = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255); - uint32_t b = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255); + uint32_t r = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); + uint32_t g = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); + uint32_t b = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; #endif } @@ -410,22 +410,10 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { -#ifdef NO_RGB666 - uint32_t a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a >> 15)]; -#else - uint32_t r = clamp(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 0, 255); - uint32_t g = clamp(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 0, 255); - uint32_t b = clamp(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 0, 255); + uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); + uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); + uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; -#endif } frac += fracstep; dest += pitch; @@ -465,22 +453,10 @@ namespace swrenderer uint8_t pix = _bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifdef NO_RGB666 - uint32_t a = fg2rgb[_palookupoffse[i][pix]] + bg2rgb[dest[i]]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[i] = RGB32k.All[a & (a >> 15)]; -#else - uint32_t r = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255); - uint32_t g = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255); - uint32_t b = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255); + uint32_t r = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); + uint32_t g = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); + uint32_t b = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; -#endif } vplce[i] += vince[i]; } @@ -516,21 +492,10 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { -#ifdef NO_RGB666 - uint32_t a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; -#else - uint32_t r = clamp(-GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 0, 255); - uint32_t g = clamp(-GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 0, 255); - uint32_t b = clamp(-GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 0, 255); + int r = clamp(-GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 0, 255); + int g = clamp(-GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 0, 255); + int b = clamp(-GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 0, 255); *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; -#endif } frac += fracstep; dest += pitch; @@ -570,21 +535,10 @@ namespace swrenderer uint8_t pix = _bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifdef NO_RGB666 - uint32_t a = (fg2rgb[_palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a >> 15)]; -#else - uint32_t r = clamp(-GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255); - uint32_t g = clamp(-GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255); - uint32_t b = clamp(-GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255); + int r = clamp(-GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255); + int g = clamp(-GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255); + int b = clamp(-GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255); dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; -#endif } vplce[i] += vince[i]; } @@ -620,21 +574,10 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { -#ifdef NO_RGB666 - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; -#else - uint32_t r = clamp(GPalette.BaseColors[colormap[pix]].r - GPalette.BaseColors[*dest].r, 0, 255); - uint32_t g = clamp(GPalette.BaseColors[colormap[pix]].g - GPalette.BaseColors[*dest].g, 0, 255); - uint32_t b = clamp(GPalette.BaseColors[colormap[pix]].b - GPalette.BaseColors[*dest].b, 0, 255); + int r = clamp(GPalette.BaseColors[colormap[pix]].r - GPalette.BaseColors[*dest].r, 0, 255); + int g = clamp(GPalette.BaseColors[colormap[pix]].g - GPalette.BaseColors[*dest].g, 0, 255); + int b = clamp(GPalette.BaseColors[colormap[pix]].b - GPalette.BaseColors[*dest].b, 0, 255); *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; -#endif } frac += fracstep; dest += pitch; diff --git a/src/r_things.cpp b/src/r_things.cpp index 9e9b161cdf..7c178ce8b8 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2634,10 +2634,8 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) void R_DrawParticle_C (vissprite_t *vis) { - DWORD *bg2rgb; int spacing; BYTE *dest; - DWORD fg; BYTE color = vis->Style.colormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2649,33 +2647,10 @@ void R_DrawParticle_C (vissprite_t *vis) DrawerCommandQueue::WaitForWorkers(); // vis->renderflags holds translucency level (0-255) - { - fixed_t fglevel, bglevel; - DWORD *fg2rgb; + fixed_t fglevel, bglevel; - fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - bglevel = FRACUNIT-fglevel; - fg2rgb = Col2RGB8[fglevel>>10]; - bg2rgb = Col2RGB8[bglevel>>10]; - fg = fg2rgb[color]; - } - - /* - - spacing = RenderTarget->GetPitch() - countbase; - dest = ylookup[yl] + x1 + dc_destorg; - - do - { - int count = countbase; - do - { - DWORD bg = bg2rgb[*dest]; - bg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[bg & (bg>>15)]; - } while (--count); - dest += spacing; - } while (--ycount);*/ + fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; + bglevel = FRACUNIT-fglevel; // original was row-wise // width = countbase @@ -2691,9 +2666,11 @@ void R_DrawParticle_C (vissprite_t *vis) dest = ylookup[yl] + x + dc_destorg; for (int y = 0; y < ycount; y++) { - DWORD bg = bg2rgb[*dest]; - bg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg>>15)]; + int dest_r = (GPalette.BaseColors[*dest].r * bglevel + GPalette.BaseColors[color].r * fglevel) >> 10; + int dest_g = (GPalette.BaseColors[*dest].g * bglevel + GPalette.BaseColors[color].g * fglevel) >> 10; + int dest_b = (GPalette.BaseColors[*dest].b * bglevel + GPalette.BaseColors[color].b * fglevel) >> 10; + + *dest = RGB256k.RGB[dest_r][dest_g][dest_b]; dest += spacing; } } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index bf7ccaab4e..34db093565 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -989,27 +989,6 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) static int oldyy; static int oldyyshifted; -#if 0 - if(xx < 32) - cc += 7-(xx>>2); - else if(xx > (finit_width - 32)) - cc += 7-((finit_width-xx) >> 2); -// if(cc==oldcc) //make sure that we don't double fade the corners. -// { - if(yy < 32) - cc += 7-(yy>>2); - else if(yy > (finit_height - 32)) - cc += 7-((finit_height-yy) >> 2); -// } - if(cc > cm && cm != NULL) - { - cc = cm; - } - else if(cc > oldcc+6) // don't let the color escape from the fade table... - { - cc=oldcc+6; - } -#endif if (yy == oldyy+1) { oldyy++; @@ -1027,12 +1006,12 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) } BYTE *spot = GetBuffer() + oldyyshifted + xx; - DWORD *bg2rgb = Col2RGB8[1+level]; - DWORD *fg2rgb = Col2RGB8[63-level]; - DWORD fg = fg2rgb[basecolor]; - DWORD bg = bg2rgb[*spot]; - bg = (fg+bg) | 0x01f07c1f; - *spot = RGB32k.All[bg&(bg>>15)]; + + uint32_t r = (GPalette.BaseColors[*spot].r * (64 - level) + GPalette.BaseColors[basecolor].r * level) / 64; + uint32_t g = (GPalette.BaseColors[*spot].g * (64 - level) + GPalette.BaseColors[basecolor].g * level) / 64; + uint32_t b = (GPalette.BaseColors[*spot].b * (64 - level) + GPalette.BaseColors[basecolor].b * level) / 64; + + *spot = (BYTE)RGB256k.RGB[r][g][b]; } void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) diff --git a/src/v_video.cpp b/src/v_video.cpp index 92992220b2..6e333f8406 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -349,8 +349,6 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) if (damount == 0.f) return; - DWORD *bg2rgb; - DWORD fg; int gap; BYTE *spot; int x, y; @@ -372,28 +370,23 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; } - { - int amount; - - amount = (int)(damount * 64); - bg2rgb = Col2RGB8[64-amount]; - - fg = (((color.r * amount) >> 4) << 20) | - ((color.g * amount) >> 4) | - (((color.b * amount) >> 4) << 10); - } spot = Buffer + x1 + y1*Pitch; gap = Pitch - w; + + int alpha = (int)((float)64 * damount); + int ialpha = 64 - alpha; + int dimmedcolor_r = color.r * alpha; + int dimmedcolor_g = color.g * alpha; + int dimmedcolor_b = color.b * alpha; for (y = h; y != 0; y--) { for (x = w; x != 0; x--) { - DWORD bg; - - bg = bg2rgb[(*spot)&0xff]; - bg = (fg+bg) | 0x1f07c1f; - *spot = RGB32k.All[bg&(bg>>15)]; + uint32_t r = (dimmedcolor_r + GPalette.BaseColors[*spot].r * ialpha) >> 8; + uint32_t g = (dimmedcolor_g + GPalette.BaseColors[*spot].g * ialpha) >> 8; + uint32_t b = (dimmedcolor_b + GPalette.BaseColors[*spot].b * ialpha) >> 8; + *spot = (BYTE)RGB256k.RGB[r][g][b]; spot++; } spot += gap; @@ -672,7 +665,8 @@ static void BuildTransTable (const PalEntry *palette) for (r = 0; r < 32; r++) for (g = 0; g < 32; g++) for (b = 0; b < 32; b++) - RGB32k.RGB[r][g][b] = ColorMatcher.Pick ((r<<3)|(r>>2), (g<<3)|(g>>2), (b<<3)|(b>>2)); + //RGB32k.RGB[r][g][b] = ColorMatcher.Pick ((r<<3)|(r>>2), (g<<3)|(g>>2), (b<<3)|(b>>2)); + RGB32k.RGB[r][g][b] = 2; #ifndef NO_RGB666 // create the RGB666 lookup table for (r = 0; r < 64; r++) From 1ec06463d982b22ca3e5617c9fba74fe6e0ef054 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 17 Dec 2016 22:34:36 +0100 Subject: [PATCH 505/912] - Set colormap light info using R_SetColorMapLight and R_SetDSColorMapLight rather than manually calculating it - Move texture and span management into R_DrawMaskedColumn --- src/r_draw.cpp | 10 +++++++ src/r_draw.h | 2 ++ src/r_main.h | 3 +++ src/r_plane.cpp | 40 ++++++++++++++++++++-------- src/r_segs.cpp | 40 ++++++++++++++-------------- src/r_things.cpp | 68 +++++++++++++++++++++++++----------------------- src/r_things.h | 2 +- src/v_draw.cpp | 30 +++------------------ 8 files changed, 104 insertions(+), 91 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index a461e1877d..25297f6c84 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -548,6 +548,11 @@ namespace swrenderer dc_colormap = base_colormap + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); } + void R_SetColorMapLight(FDynamicColormap *base_colormap, float light, int shade) + { + R_SetColorMapLight(base_colormap->Maps, light, shade); + } + void R_SetDSColorMapLight(lighttable_t *base_colormap, float light, int shade) { using namespace drawerargs; @@ -555,6 +560,11 @@ namespace swrenderer ds_colormap = base_colormap + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); } + void R_SetDSColorMapLight(FDynamicColormap *base_colormap, float light, int shade) + { + R_SetDSColorMapLight(base_colormap->Maps, light, shade); + } + void R_SetTranslationMap(lighttable_t *translation) { using namespace drawerargs; diff --git a/src/r_draw.h b/src/r_draw.h index 58934981b0..ab430d2af5 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -192,7 +192,9 @@ namespace swrenderer void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); void R_SetColorMapLight(lighttable_t *base_colormap, float light, int shade); + void R_SetColorMapLight(FDynamicColormap *base_colormap, float light, int shade); void R_SetDSColorMapLight(lighttable_t *base_colormap, float light, int shade); + void R_SetDSColorMapLight(FDynamicColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); void R_SetupSpanBits(FTexture *tex); diff --git a/src/r_main.h b/src/r_main.h index 87b56163b0..be1c36306d 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -85,6 +85,9 @@ extern bool r_dontmaplines; // Change R_CalcTiltedLighting() when this changes. #define GETPALOOKUP(vis,shade) (clamp (((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis))))>>FRACBITS, 0, NUMCOLORMAPS-1)) +// Converts fixedlightlev into a shade value +#define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) + extern double GlobVis; void R_SetVisibility(double visibility); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index eafc9fa26e..cd378aec14 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -240,8 +240,7 @@ void R_MapPlane (int y, int x1) if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - ds_colormap = basecolormap->Maps + (GETPALOOKUP ( - GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT); + R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); } ds_y = y; @@ -1043,7 +1042,7 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske R_SetupSpanBits(tex); double xscale = pl->xform.xScale * tex->Scale.X; double yscale = pl->xform.yScale * tex->Scale.Y; - ds_source = tex->GetPixels (); + R_SetSpanSource(tex); basecolormap = pl->colormap; planeshade = LIGHT2SHADE(pl->lightlevel); @@ -1405,12 +1404,13 @@ void R_DrawSkyPlane (visplane_t *pl) bool fakefixed = false; if (fixedcolormap) { - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); } else { fakefixed = true; - fixedcolormap = dc_colormap = NormalLight.Maps; + fixedcolormap = NormalLight.Maps; + R_SetColorMapLight(fixedcolormap, 0, 0); } R_DrawSky (pl); @@ -1484,12 +1484,21 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t planeheight = fabs(pl->height.Zat0() - ViewPos.Z); GlobVis = r_FloorVisibility / planeheight; + ds_light = 0; if (fixedlightlev >= 0) - ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; + { + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + plane_shade = false; + } else if (fixedcolormap) - ds_colormap = fixedcolormap, plane_shade = false; + { + R_SetDSColorMapLight(fixedcolormap, 0, 0); + plane_shade = false; + } else + { plane_shade = true; + } if (spanfunc != R_FillSpan) { @@ -1645,11 +1654,20 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t planelightfloat = -planelightfloat; if (fixedlightlev >= 0) - ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; + { + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + plane_shade = false; + } else if (fixedcolormap) - ds_colormap = fixedcolormap, plane_shade = false; + { + R_SetDSColorMapLight(fixedcolormap, 0, 0); + plane_shade = false; + } else - ds_colormap = basecolormap->Maps, plane_shade = true; + { + R_SetDSColorMapLight(basecolormap, 0, 0); + plane_shade = true; + } // Hack in support for 1 x Z and Z x 1 texture sizes if (ds_ybits == 0) @@ -1766,4 +1784,4 @@ bool R_PlaneInitData () return true; } -} \ No newline at end of file +} diff --git a/src/r_segs.cpp b/src/r_segs.cpp index d6ab86aa07..38db1a6b28 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -44,6 +44,7 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" +#include "r_draw.h" #include "v_palette.h" #include "r_data/colormaps.h" @@ -53,8 +54,9 @@ CVAR(Bool, r_fogboundary, true, 0) CVAR(Bool, r_drawmirrors, true, 0) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); +EXTERN_CVAR(Bool, r_mipmap) -namespace swrenderer +namespace swrenderer { using namespace drawerargs; @@ -156,7 +158,7 @@ static void BlastMaskedColumn (FTexture *tex, bool useRt) // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - dc_colormap = basecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(basecolormap, rw_light, wallshade); } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -174,9 +176,7 @@ static void BlastMaskedColumn (FTexture *tex, bool useRt) // when forming multipatched textures (see r_data.c). // draw the texture - const FTexture::Span *spans; - const BYTE *pixels = tex->GetColumn (maskedtexturecol[dc_x] >> FRACBITS, &spans); - R_DrawMaskedColumn(pixels, spans, useRt); + R_DrawMaskedColumn(tex, maskedtexturecol[dc_x], useRt); rw_light += rw_lightstep; spryscale += rw_scalestep; } @@ -292,9 +292,9 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - dc_colormap = (r_fullbrightignoresectorcolor) ? (FullNormalLight.Maps + fixedlightlev) : (basecolormap->Maps + fixedlightlev); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); // find positioning texheight = tex->GetScaledHeightDouble(); @@ -440,7 +440,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) while (dc_x < stop) { - rt_initcols(); + rt_initcols(nullptr); BlastMaskedColumn (tex, true); dc_x++; BlastMaskedColumn (tex, true); dc_x++; BlastMaskedColumn (tex, true); dc_x++; @@ -609,9 +609,9 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) } if (fixedlightlev >= 0) - dc_colormap = (r_fullbrightignoresectorcolor) ? (FullNormalLight.Maps + fixedlightlev) : (basecolormap->Maps + fixedlightlev); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -1061,9 +1061,9 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - dc_colormap = (r_fullbrightignoresectorcolor) ? (FullNormalLight.Maps + fixedlightlev) : (basecolormap->Maps + fixedlightlev); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); // clip wall to the floor and ceiling for (x = x1; x < x2; ++x) @@ -2304,11 +2304,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - savecoord.sx1) * rw_lightstep; if (fixedlightlev >= 0) - dc_colormap = (r_fullbrightignoresectorcolor) ? (FullNormalLight.Maps + fixedlightlev) : (usecolormap->Maps + fixedlightlev); + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - dc_colormap = (r_fullbrightignoresectorcolor) ? FullNormalLight.Maps : usecolormap->Maps; + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; @@ -2359,7 +2359,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } R_WallSpriteColumn (false); dc_x++; @@ -2369,9 +2369,9 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { R_WallSpriteColumn (true); @@ -2384,7 +2384,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } R_WallSpriteColumn (false); dc_x++; @@ -2408,4 +2408,4 @@ done: WallC = savecoord; } -} \ No newline at end of file +} diff --git a/src/r_things.cpp b/src/r_things.cpp index 9e9b161cdf..f03bfbae0a 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -256,8 +256,23 @@ double sprtopscreen; bool sprflipvert; -void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span, bool useRt) +void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked) { + const FTexture::Span *span; + const BYTE *column; + + column = tex->GetColumn(col >> FRACBITS, &span); + + FTexture::Span unmaskedSpan[2]; + if (unmasked) + { + span = unmaskedSpan; + unmaskedSpan[0].TopOffset = 0; + unmaskedSpan[0].Length = tex->GetHeight(); + unmaskedSpan[1].TopOffset = 0; + unmaskedSpan[1].Length = 0; + } + while (span->Length != 0) { const int length = span->Length; @@ -377,8 +392,6 @@ static inline bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) // void R_DrawVisSprite (vissprite_t *vis) { - const BYTE *pixels; - const FTexture::Span *spans; fixed_t frac; FTexture *tex; int x2, stop4; @@ -392,7 +405,7 @@ void R_DrawVisSprite (vissprite_t *vis) } fixed_t centeryfrac = FLOAT2FIXED(CenterY); - dc_colormap = vis->Style.colormap; + R_SetColorMapLight(vis->Style.colormap, 0.0f, 0); mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); @@ -400,7 +413,7 @@ void R_DrawVisSprite (vissprite_t *vis) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - dc_colormap += vis->ColormapNum << COLORMAPSHIFT; + R_SetColorMapLight(dc_colormap, 0, vis->ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -445,21 +458,19 @@ void R_DrawVisSprite (vissprite_t *vis) { while ((dc_x < stop4) && (dc_x & 3)) { - pixels = tex->GetColumn (frac >> FRACBITS, &spans); if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans, false); + R_DrawMaskedColumn (tex, frac, false); dc_x++; frac += xiscale; } while (dc_x < stop4) { - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { - pixels = tex->GetColumn (frac >> FRACBITS, &spans); if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans, true); + R_DrawMaskedColumn (tex, frac, true); dc_x++; frac += xiscale; } @@ -468,9 +479,8 @@ void R_DrawVisSprite (vissprite_t *vis) while (dc_x < x2) { - pixels = tex->GetColumn (frac >> FRACBITS, &spans); if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans, false); + R_DrawMaskedColumn (tex, frac, false); dc_x++; frac += xiscale; } @@ -522,11 +532,11 @@ void R_DrawWallSprite(vissprite_t *spr) rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; if (fixedlightlev >= 0) - dc_colormap = usecolormap->Maps + fixedlightlev; + R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - dc_colormap = (r_fullbrightignoresectorcolor) ? FullNormalLight.Maps : usecolormap->Maps; + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; @@ -577,7 +587,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); + R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(false); @@ -588,9 +598,9 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); + R_SetColorMapLight(usecolormap, rw_light, shade); } - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { if (!R_ClipSpriteColumnWithPortals(spr)) @@ -604,7 +614,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); + R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(false); @@ -624,11 +634,8 @@ void R_WallSpriteColumn (bool useRt) else sprtopscreen = CenterY - dc_texturemid * spryscale; - const BYTE *column; - const FTexture::Span *spans; - column = WallSpriteTile->GetColumn (lwall[dc_x] >> FRACBITS, &spans); dc_texturefrac = 0; - R_DrawMaskedColumn(column, spans, useRt); + R_DrawMaskedColumn(WallSpriteTile, lwall[dc_x], useRt); rw_light += rw_lightstep; } @@ -638,7 +645,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop int flags = 0; // Do setup for blending. - dc_colormap = spr->Style.colormap; + R_SetColorMapLight(spr->Style.colormap, 0.0f, 0); mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) @@ -689,10 +696,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop } else { - unsigned int **tspan = &dc_ctspan[x & 3]; - (*tspan)[0] = span->Start; - (*tspan)[1] = span->Stop - 1; - *tspan += 2; + rt_span_coverage(x, span->Start, span->Stop - 1); } } if (!(flags & DVF_SPANSONLY) && (x & 3) == 3) @@ -2044,7 +2048,7 @@ void R_DrawSprite (vissprite_t *spr) else { // diminished light spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); - spr->Style.colormap = mybasecolormap->Maps + (GETPALOOKUP ( + spr->Style.colormap = mybasecolormap->Maps + (GETPALOOKUP( r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade) << COLORMAPSHIFT); } } @@ -3237,16 +3241,16 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly) { if (OffscreenColorBuffer == NULL) { - OffscreenColorBuffer = new BYTE[width * height]; + OffscreenColorBuffer = new BYTE[width * height * 4]; } else if (OffscreenBufferWidth != width || OffscreenBufferHeight != height) { delete[] OffscreenColorBuffer; - OffscreenColorBuffer = new BYTE[width * height]; + OffscreenColorBuffer = new BYTE[width * height * 4]; } } OffscreenBufferWidth = width; OffscreenBufferHeight = height; } -} \ No newline at end of file +} diff --git a/src/r_things.h b/src/r_things.h index bf32b655f2..6d694b8fd7 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -128,7 +128,7 @@ extern double pspriteyscale; extern FTexture *WallSpriteTile; -void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *spans, bool useRt); +void R_DrawMaskedColumn (FTexture *texture, fixed_t column, bool useRt, bool unmasked = false); void R_WallSpriteColumn (bool useRt); void R_CacheSprite (spritedef_t *sprite); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 1524c7ba4a..6f8bc51988 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -135,20 +135,9 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) using namespace swrenderer; using namespace drawerargs; - FTexture::Span unmaskedSpan[2]; - const FTexture::Span **spanptr, *spans; static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; const BYTE *translation = NULL; - if (parms.masked) - { - spanptr = &spans; - } - else - { - spanptr = NULL; - } - if (APART(parms.colorOverlay) != 0) { // The software renderer cannot invert the source without inverting the overlay @@ -198,18 +187,8 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (mode != DontDraw) { - const BYTE *pixels; int stop4; - if (spanptr == NULL) - { // Create a single span for forced unmasked images - spans = unmaskedSpan; - unmaskedSpan[0].TopOffset = 0; - unmaskedSpan[0].Length = img->GetHeight(); - unmaskedSpan[1].TopOffset = 0; - unmaskedSpan[1].Length = 0; - } - double centeryback = CenterY; CenterY = 0; @@ -301,8 +280,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) { while ((dc_x < stop4) && (dc_x & 3)) { - pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumn(pixels, spans, false); + R_DrawMaskedColumn(img, frac, false, !parms.masked); dc_x++; frac += xiscale_i; } @@ -312,8 +290,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) rt_initcols(); for (int zz = 4; zz; --zz) { - pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumn(pixels, spans, true); + R_DrawMaskedColumn(img, frac, true, !parms.masked); dc_x++; frac += xiscale_i; } @@ -322,8 +299,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) while (dc_x < x2_i) { - pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumn(pixels, spans, false); + R_DrawMaskedColumn(img, frac, false, !parms.masked); dc_x++; frac += xiscale_i; } From bb3fa15bed3f4cf5926b1f1e087a4f2ee21596f6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 17 Dec 2016 22:57:57 +0100 Subject: [PATCH 506/912] Removed leftovers from QZDoom --- src/r_segs.cpp | 3 +-- src/r_things.cpp | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 38db1a6b28..ccf6ccf20c 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -54,9 +54,8 @@ CVAR(Bool, r_fogboundary, true, 0) CVAR(Bool, r_drawmirrors, true, 0) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); -EXTERN_CVAR(Bool, r_mipmap) -namespace swrenderer +namespace swrenderer { using namespace drawerargs; diff --git a/src/r_things.cpp b/src/r_things.cpp index f03bfbae0a..a1ace0d49c 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -3241,12 +3241,12 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly) { if (OffscreenColorBuffer == NULL) { - OffscreenColorBuffer = new BYTE[width * height * 4]; + OffscreenColorBuffer = new BYTE[width * height]; } else if (OffscreenBufferWidth != width || OffscreenBufferHeight != height) { delete[] OffscreenColorBuffer; - OffscreenColorBuffer = new BYTE[width * height * 4]; + OffscreenColorBuffer = new BYTE[width * height]; } } OffscreenBufferWidth = width; From 6235b12cfcd28e699285af405ca25b1443d4cbd5 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 17 Dec 2016 21:26:08 -0500 Subject: [PATCH 507/912] - No matter how many drawers you have, you have more drawers to do. --- src/r_draw_pal.h | 2 + src/r_drawt_pal.cpp | 148 +++++++++-------------------- src/textures/ddstexture.cpp | 8 +- src/textures/jpegtexture.cpp | 4 +- src/textures/multipatchtexture.cpp | 2 +- src/textures/pngtexture.cpp | 13 --- src/textures/tgatexture.cpp | 14 +-- 7 files changed, 53 insertions(+), 138 deletions(-) diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index f2b1f05712..262b703875 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -303,6 +303,8 @@ namespace swrenderer const uint32_t *_srcblend; const uint32_t *_destblend; const uint8_t *_translation; + fixed_t _srcalpha; + fixed_t _destalpha; int _color; }; diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp index 3356592d25..16d24ecaf8 100644 --- a/src/r_drawt_pal.cpp +++ b/src/r_drawt_pal.cpp @@ -173,6 +173,8 @@ namespace swrenderer _colormap = dc_colormap; _srcblend = dc_srcblend; _destblend = dc_destblend; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; _translation = dc_translation; _color = dc_color; } @@ -440,8 +442,6 @@ namespace swrenderer if (count <= 0) return; - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; @@ -451,10 +451,10 @@ namespace swrenderer uint32_t fg = colormap[*source]; uint32_t bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; + int r = MIN((GPalette.BaseColors[fg].r * _srcalpha + GPalette.BaseColors[bg].r * _destalpha)>>18, 63); + int g = MIN((GPalette.BaseColors[fg].g * _srcalpha + GPalette.BaseColors[bg].g * _destalpha)>>18, 63); + int b = MIN((GPalette.BaseColors[fg].b * _srcalpha + GPalette.BaseColors[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; source += 4; dest += pitch; } while (--count); @@ -472,42 +472,21 @@ namespace swrenderer if (count <= 0) return; - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; colormap = _colormap; do { - uint32_t fg = colormap[source[0]]; - uint32_t bg = dest[0]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[0] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[1]]; - bg = dest[1]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[1] = RGB32k.All[fg & (fg>>15)]; - - - fg = colormap[source[2]]; - bg = dest[2]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[2] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[3]]; - bg = dest[3]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[3] = RGB32k.All[fg & (fg>>15)]; + for (int ks = 0; ks < 4; ks++) + { // [SP] this 4col function was a block of copy-pasted code. 4 times. I regret nothing. + uint32_t fg = colormap[source[ks]]; + uint32_t bg = dest[ks]; + int r = MIN((GPalette.BaseColors[fg].r * _srcalpha + GPalette.BaseColors[bg].r * _destalpha)>>18, 63); + int g = MIN((GPalette.BaseColors[fg].g * _srcalpha + GPalette.BaseColors[bg].g * _destalpha)>>18, 63); + int b = MIN((GPalette.BaseColors[fg].b * _srcalpha + GPalette.BaseColors[bg].b * _destalpha)>>18, 63); + dest[ks] = RGB256k.RGB[r][g][b]; + } source += 4; dest += pitch; @@ -534,10 +513,11 @@ namespace swrenderer pitch = _pitch * thread->num_cores; do { - uint32_t val = colormap[*source]; - uint32_t fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; + uint32_t val = *source; + int r = (GPalette.BaseColors[*dest].r * (63-val) + GPalette.BaseColors[_color].r * val) >> 8; + int g = (GPalette.BaseColors[*dest].g * (63-val) + GPalette.BaseColors[_color].g * val) >> 8; + int b = (GPalette.BaseColors[*dest].b * (63-val) + GPalette.BaseColors[_color].b * val) >> 8; + *dest = RGB256k.RGB[MIN(r,63)][MIN(g,63)][MIN(b,63)]; source += 4; dest += pitch; } while (--count); @@ -564,22 +544,15 @@ namespace swrenderer do { uint32_t val; - - val = colormap[source[0]]; - val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; - dest[0] = RGB32k.All[val & (val>>15)]; - val = colormap[source[1]]; - val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; - dest[1] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[2]]; - val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; - dest[2] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[3]]; - val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; - dest[3] = RGB32k.All[val & (val>>15)]; + for (int ks = 0; ks < 4; ks++) + { + val = source[ks]; + int r = (GPalette.BaseColors[dest[ks]].r * (63-val) + GPalette.BaseColors[_color].r * val) >> 8; + int g = (GPalette.BaseColors[dest[ks]].g * (63-val) + GPalette.BaseColors[_color].g * val) >> 8; + int b = (GPalette.BaseColors[dest[ks]].b * (63-val) + GPalette.BaseColors[_color].b * val) >> 8; + dest[ks] = RGB256k.RGB[MIN(r,63)][MIN(g,63)][MIN(b,63)]; + } source += 4; dest += pitch; @@ -598,23 +571,18 @@ namespace swrenderer if (count <= 0) return; - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; colormap = _colormap; do { - uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a>>15) & a]; + int fg = *source; + int bg = *dest; + int r = MIN((GPalette.BaseColors[fg].r * _srcalpha + GPalette.BaseColors[bg].r * _destalpha)>>18, 63); + int g = MIN((GPalette.BaseColors[fg].g * _srcalpha + GPalette.BaseColors[bg].g * _destalpha)>>18, 63); + int b = MIN((GPalette.BaseColors[fg].b * _srcalpha + GPalette.BaseColors[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; source += 4; dest += pitch; } while (--count); @@ -637,46 +605,16 @@ namespace swrenderer pitch = _pitch * thread->num_cores; colormap = _colormap; - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - do { - uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[3] = RGB32k.All[(a>>15) & a]; + for (int ks = 0; ks < 4; ks++) + { + int fg = source[ks]; + int bg = dest[ks]; + int r = MIN((GPalette.BaseColors[fg].r * _srcalpha + GPalette.BaseColors[bg].r * _destalpha)>>18, 63); + int g = MIN((GPalette.BaseColors[fg].g * _srcalpha + GPalette.BaseColors[bg].g * _destalpha)>>18, 63); + int b = MIN((GPalette.BaseColors[fg].b * _srcalpha + GPalette.BaseColors[bg].b * _destalpha)>>18, 63); + dest[ks] = RGB256k.RGB[r][g][b]; + } source += 4; dest += pitch; diff --git a/src/textures/ddstexture.cpp b/src/textures/ddstexture.cpp index 31e7480221..a2c69b38b2 100644 --- a/src/textures/ddstexture.cpp +++ b/src/textures/ddstexture.cpp @@ -551,7 +551,7 @@ void FDDSTexture::ReadRGB (FWadLump &lump, BYTE *tcbuf) DWORD r = (c & RMask) << RShiftL; r |= r >> RShiftR; DWORD g = (c & GMask) << GShiftL; g |= g >> GShiftR; DWORD b = (c & BMask) << BShiftL; b |= b >> BShiftR; - *pixelp = RGB32k.RGB[r >> 27][g >> 27][b >> 27]; + *pixelp = RGB256k.RGB[r >> 26][g >> 26][b >> 26]; } else { @@ -637,7 +637,7 @@ void FDDSTexture::DecompressDXT1 (FWadLump &lump, BYTE *tcbuf) // Pick colors from the palette for each of the four colors. /*if (!tcbuf)*/ for (i = 3; i >= 0; --i) { - palcol[i] = color[i].a ? RGB32k.RGB[color[i].r >> 3][color[i].g >> 3][color[i].b >> 3] : 0; + palcol[i] = color[i].a ? RGB256k.RGB[color[i].r >> 2][color[i].g >> 2][color[i].b >> 2] : 0; } // Now decode this 4x4 block to the pixel buffer. for (y = 0; y < 4; ++y) @@ -717,7 +717,7 @@ void FDDSTexture::DecompressDXT3 (FWadLump &lump, bool premultiplied, BYTE *tcbu // Pick colors from the palette for each of the four colors. if (!tcbuf) for (i = 3; i >= 0; --i) { - palcol[i] = RGB32k.RGB[color[i].r >> 3][color[i].g >> 3][color[i].b >> 3]; + palcol[i] = RGB256k.RGB[color[i].r >> 2][color[i].g >> 2][color[i].b >> 2]; } // Now decode this 4x4 block to the pixel buffer. for (y = 0; y < 4; ++y) @@ -822,7 +822,7 @@ void FDDSTexture::DecompressDXT5 (FWadLump &lump, bool premultiplied, BYTE *tcbu // Pick colors from the palette for each of the four colors. if (!tcbuf) for (i = 3; i >= 0; --i) { - palcol[i] = RGB32k.RGB[color[i].r >> 3][color[i].g >> 3][color[i].b >> 3]; + palcol[i] = RGB256k.RGB[color[i].r >> 2][color[i].g >> 2][color[i].b >> 2]; } // Now decode this 4x4 block to the pixel buffer. for (y = 0; y < 4; ++y) diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index 2253965987..c138edbfa3 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -406,7 +406,7 @@ void FJPEGTexture::MakeTexture () case JCS_RGB: for (int x = Width; x > 0; --x) { - *out = RGB32k.RGB[in[0]>>3][in[1]>>3][in[2]>>3]; + *out = RGB256k.RGB[in[0]>>2][in[1]>>2][in[2]>>2]; out += Height; in += 3; } @@ -430,7 +430,7 @@ void FJPEGTexture::MakeTexture () int r = in[3] - (((256-in[0])*in[3]) >> 8); int g = in[3] - (((256-in[1])*in[3]) >> 8); int b = in[3] - (((256-in[2])*in[3]) >> 8); - *out = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + *out = RGB256k.RGB[r >> 2][g >> 2][b >> 2]; out += Height; in += 4; } diff --git a/src/textures/multipatchtexture.cpp b/src/textures/multipatchtexture.cpp index e68c4e20f2..991893845b 100644 --- a/src/textures/multipatchtexture.cpp +++ b/src/textures/multipatchtexture.cpp @@ -531,7 +531,7 @@ void FMultiPatchTexture::MakeTexture () { if (*out == 0 && in[3] != 0) { - *out = RGB32k.RGB[in[2]>>3][in[1]>>3][in[0]>>3]; + *out = RGB256k.RGB[in[2]>>2][in[1]>>2][in[0]>>2]; } out += Height; in += 4; diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index e13944173d..414c424b8f 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -536,12 +536,7 @@ void FPNGTexture::MakeTexture () { if (!HaveTrans) { -#ifdef NO_RGB666 - *out++ = RGB32k.RGB[in[0]>>3][in[1]>>3][in[2]>>3]; -#else *out++ = RGB256k.RGB[in[0]>>2][in[1]>>2][in[2]>>2]; -#endif - } else { @@ -553,11 +548,7 @@ void FPNGTexture::MakeTexture () } else { -#ifdef NO_RGB666 - *out++ = RGB32k.RGB[in[0]>>3][in[1]>>3][in[2]>>3]; -#else *out++ = RGB256k.RGB[in[0]>>2][in[1]>>2][in[2]>>2]; -#endif } } in += pitch; @@ -602,11 +593,7 @@ void FPNGTexture::MakeTexture () { for (y = Height; y > 0; --y) { -#ifdef NO_RGB666 - *out++ = in[3] < 128 ? 0 : RGB32k.RGB[in[0]>>3][in[1]>>3][in[2]>>3]; -#else *out++ = in[3] < 128 ? 0 : RGB256k.RGB[in[0]>>2][in[1]>>2][in[2]>>2]; -#endif in += pitch; } in -= backstep; diff --git a/src/textures/tgatexture.cpp b/src/textures/tgatexture.cpp index e24b926caa..331747cfe0 100644 --- a/src/textures/tgatexture.cpp +++ b/src/textures/tgatexture.cpp @@ -393,7 +393,7 @@ void FTGATexture::MakeTexture () for(int x=0;x>10) & 0x1f][(v>>5) & 0x1f][v & 0x1f]; + Pixels[x*Height+y] = RGB256k.RGB[((v>>10) & 0x1f)*2][((v>>5) & 0x1f)*2][(v & 0x1f)*2]; p+=step_x; } } @@ -405,11 +405,7 @@ void FTGATexture::MakeTexture () BYTE * p = ptr + y * Pitch; for(int x=0;x>3][p[1]>>3][p[0]>>3]; -#else Pixels[x*Height+y] = RGB256k.RGB[p[2]>>2][p[1]>>2][p[0]>>2]; -#endif p+=step_x; } } @@ -423,11 +419,7 @@ void FTGATexture::MakeTexture () BYTE * p = ptr + y * Pitch; for(int x=0;x>3][p[1]>>3][p[0]>>3]; -#else Pixels[x*Height+y] = RGB256k.RGB[p[2]>>2][p[1]>>2][p[0]>>2]; -#endif p+=step_x; } } @@ -439,11 +431,7 @@ void FTGATexture::MakeTexture () BYTE * p = ptr + y * Pitch; for(int x=0;x= 128? RGB32k.RGB[p[2]>>3][p[1]>>3][p[0]>>3] : 0; -#else Pixels[x*Height+y] = p[3] >= 128? RGB256k.RGB[p[2]>>2][p[1]>>2][p[0]>>2] : 0; -#endif p+=step_x; } } From 9fbd9985c8b77ba4ba1a0613d1d1708e09046763 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 18 Dec 2016 05:33:39 -0500 Subject: [PATCH 508/912] - more work on rgb666 --- src/r_draw_pal.cpp | 82 +++--------------- src/r_drawt_pal.cpp | 160 ++++++++++++++---------------------- src/textures/pcxtexture.cpp | 2 +- 3 files changed, 72 insertions(+), 172 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 5a6c943275..33c4829346 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -314,17 +314,10 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { -#ifdef NO_RGB666 - uint32_t fg = fg2rgb[colormap[pix]]; - uint32_t bg = bg2rgb[*dest]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; -#else uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; -#endif } frac += fracstep; dest += pitch; @@ -364,17 +357,10 @@ namespace swrenderer uint8_t pix = _bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifdef NO_RGB666 - uint32_t fg = fg2rgb[_palookupoffse[i][pix]]; - uint32_t bg = bg2rgb[dest[i]]; - fg = (fg + bg) | 0x1f07c1f; - dest[i] = RGB32k.All[fg & (fg >> 15)]; -#else uint32_t r = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); uint32_t g = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); uint32_t b = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; -#endif } vplce[i] += vince[i]; } @@ -617,21 +603,10 @@ namespace swrenderer uint8_t pix = _bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifdef NO_RGB666 - uint32_t a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[_palookupoffse[i][pix]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a >> 15)]; -#else uint32_t r = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].r - GPalette.BaseColors[dest[i]].r, 0, 255); uint32_t g = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].g - GPalette.BaseColors[dest[i]].g, 0, 255); uint32_t b = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].b - GPalette.BaseColors[dest[i]].b, 0, 255); dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; -#endif } vplce[i] += vince[i]; } @@ -715,11 +690,7 @@ namespace swrenderer c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; -#ifdef NO_RGB666 - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; -#else *dest = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; -#endif } frac += fracstep; @@ -747,13 +718,8 @@ namespace swrenderer int solid_bottom_r = RPART(solid_bottom); int solid_bottom_g = GPART(solid_bottom); int solid_bottom_b = BPART(solid_bottom); -#ifdef NO_RGB666 - uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; - uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; -#else uint32_t solid_top_fill = RGB256k.RGB[(solid_top_r >> 2)][(solid_top_g >> 2)][(solid_top_b >> 2)]; uint32_t solid_bottom_fill = RGB256k.RGB[(solid_bottom_r >> 2)][(solid_bottom_g >> 2)][(solid_bottom_b >> 2)]; -#endif solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; @@ -813,11 +779,7 @@ namespace swrenderer c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; -#ifdef NO_RGB666 - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; -#else output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; -#endif frac[col] += fracstep[col]; } *((uint32_t*)dest) = *((uint32_t*)output); @@ -858,11 +820,7 @@ namespace swrenderer c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; -#ifdef NO_RGB666 - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; -#else output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; -#endif frac[col] += fracstep[col]; } @@ -944,11 +902,7 @@ namespace swrenderer c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; -#ifdef NO_RGB666 - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; -#else *dest = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; -#endif } frac += fracstep; @@ -978,13 +932,8 @@ namespace swrenderer int solid_bottom_r = RPART(solid_bottom); int solid_bottom_g = GPART(solid_bottom); int solid_bottom_b = BPART(solid_bottom); -#ifdef NO_RGB666 - uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; - uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; -#else uint32_t solid_top_fill = RGB256k.RGB[(solid_top_r >> 2)][(solid_top_g >> 2)][(solid_top_b >> 2)]; uint32_t solid_bottom_fill = RGB256k.RGB[(solid_bottom_r >> 2)][(solid_bottom_g >> 2)][(solid_bottom_b >> 2)]; -#endif solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; @@ -1050,11 +999,7 @@ namespace swrenderer c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; -#ifdef NO_RGB666 - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; -#else output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; -#endif frac[col] += fracstep[col]; } @@ -1108,11 +1053,7 @@ namespace swrenderer c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; -#ifdef NO_RGB666 - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; -#else output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; -#endif frac[col] += fracstep[col]; } @@ -1244,9 +1185,12 @@ namespace swrenderer do { - uint32_t bg; - bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg >> 15)]; + const PalEntry* pal = GPalette.BaseColors; + // *** [SP] this is incomplete, not sure what to do here. + /*int r = clamp((int)pal[_srccolor].r, 0, 255) >> 2; + int g = clamp((int)pal[_srccolor].g, 0, 255) >> 2; + int b = clamp((int)pal[_srccolor].b, 0, 255) >> 2; + *dest = RGB256k.RGB[r][g][b];*/ dest += pitch; } while (--count); @@ -1276,15 +1220,11 @@ namespace swrenderer do { - uint32_t a = fg + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + const PalEntry* pal = GPalette.BaseColors; + int r = clamp(pal[*dest].r + pal[fg].r, 0, 255) >> 2; + int g = clamp(pal[*dest].g + pal[fg].g, 0, 255) >> 2; + int b = clamp(pal[*dest].b + pal[fg].b, 0, 255) >> 2; + *dest = RGB256k.RGB[r][g][b]; dest += pitch; } while (--count); } diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp index 16d24ecaf8..156e97ba66 100644 --- a/src/r_drawt_pal.cpp +++ b/src/r_drawt_pal.cpp @@ -446,14 +446,15 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { uint32_t fg = colormap[*source]; uint32_t bg = *dest; - int r = MIN((GPalette.BaseColors[fg].r * _srcalpha + GPalette.BaseColors[bg].r * _destalpha)>>18, 63); - int g = MIN((GPalette.BaseColors[fg].g * _srcalpha + GPalette.BaseColors[bg].g * _destalpha)>>18, 63); - int b = MIN((GPalette.BaseColors[fg].b * _srcalpha + GPalette.BaseColors[bg].b * _destalpha)>>18, 63); + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); *dest = RGB256k.RGB[r][g][b]; source += 4; dest += pitch; @@ -476,15 +477,16 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { for (int ks = 0; ks < 4; ks++) { // [SP] this 4col function was a block of copy-pasted code. 4 times. I regret nothing. uint32_t fg = colormap[source[ks]]; uint32_t bg = dest[ks]; - int r = MIN((GPalette.BaseColors[fg].r * _srcalpha + GPalette.BaseColors[bg].r * _destalpha)>>18, 63); - int g = MIN((GPalette.BaseColors[fg].g * _srcalpha + GPalette.BaseColors[bg].g * _destalpha)>>18, 63); - int b = MIN((GPalette.BaseColors[fg].b * _srcalpha + GPalette.BaseColors[bg].b * _destalpha)>>18, 63); + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); dest[ks] = RGB256k.RGB[r][g][b]; } @@ -511,13 +513,14 @@ namespace swrenderer dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; + const PalEntry *palette = GPalette.BaseColors; do { uint32_t val = *source; - int r = (GPalette.BaseColors[*dest].r * (63-val) + GPalette.BaseColors[_color].r * val) >> 8; - int g = (GPalette.BaseColors[*dest].g * (63-val) + GPalette.BaseColors[_color].g * val) >> 8; - int b = (GPalette.BaseColors[*dest].b * (63-val) + GPalette.BaseColors[_color].b * val) >> 8; - *dest = RGB256k.RGB[MIN(r,63)][MIN(g,63)][MIN(b,63)]; + int r = (palette[*dest].r * (255-val)) >> 10; + int g = (palette[*dest].g * (255-val)) >> 10; + int b = (palette[*dest].b * (255-val)) >> 10; + *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; source += 4; dest += pitch; } while (--count); @@ -541,6 +544,7 @@ namespace swrenderer dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; + const PalEntry *palette = GPalette.BaseColors; do { uint32_t val; @@ -548,10 +552,10 @@ namespace swrenderer for (int ks = 0; ks < 4; ks++) { val = source[ks]; - int r = (GPalette.BaseColors[dest[ks]].r * (63-val) + GPalette.BaseColors[_color].r * val) >> 8; - int g = (GPalette.BaseColors[dest[ks]].g * (63-val) + GPalette.BaseColors[_color].g * val) >> 8; - int b = (GPalette.BaseColors[dest[ks]].b * (63-val) + GPalette.BaseColors[_color].b * val) >> 8; - dest[ks] = RGB256k.RGB[MIN(r,63)][MIN(g,63)][MIN(b,63)]; + int r = (palette[dest[ks]].r * (255-val)) >> 10; + int g = (palette[dest[ks]].g * (255-val)) >> 10; + int b = (palette[dest[ks]].b * (255-val)) >> 10; + dest[ks] = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; } source += 4; @@ -575,13 +579,14 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { int fg = *source; int bg = *dest; - int r = MIN((GPalette.BaseColors[fg].r * _srcalpha + GPalette.BaseColors[bg].r * _destalpha)>>18, 63); - int g = MIN((GPalette.BaseColors[fg].g * _srcalpha + GPalette.BaseColors[bg].g * _destalpha)>>18, 63); - int b = MIN((GPalette.BaseColors[fg].b * _srcalpha + GPalette.BaseColors[bg].b * _destalpha)>>18, 63); + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); *dest = RGB256k.RGB[r][g][b]; source += 4; dest += pitch; @@ -604,15 +609,16 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { for (int ks = 0; ks < 4; ks++) { int fg = source[ks]; int bg = dest[ks]; - int r = MIN((GPalette.BaseColors[fg].r * _srcalpha + GPalette.BaseColors[bg].r * _destalpha)>>18, 63); - int g = MIN((GPalette.BaseColors[fg].g * _srcalpha + GPalette.BaseColors[bg].g * _destalpha)>>18, 63); - int b = MIN((GPalette.BaseColors[fg].b * _srcalpha + GPalette.BaseColors[bg].b * _destalpha)>>18, 63); + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); dest[ks] = RGB256k.RGB[r][g][b]; } @@ -639,16 +645,15 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; + int fg = *source; + int bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; source += 4; dest += pitch; } while (--count); @@ -672,40 +677,18 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; + for (int ks = 0; ks < 4; ks++) + { + int fg = source[ks]; + int bg = dest[ks]; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + dest[ks] = RGB256k.RGB[r][g][b]; + } source += 4; dest += pitch; @@ -730,16 +713,15 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; + int fg = *source; + int bg = *dest; + int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; source += 4; dest += pitch; } while (--count); @@ -763,40 +745,18 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; + for (int ks = 0; ks < 4; ks++) + { + int fg = source[ks]; + int bg = dest[ks]; + int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + dest[ks] = RGB256k.RGB[r][g][b]; + } source += 4; dest += pitch; diff --git a/src/textures/pcxtexture.cpp b/src/textures/pcxtexture.cpp index 0ec5d2933c..dda431993c 100644 --- a/src/textures/pcxtexture.cpp +++ b/src/textures/pcxtexture.cpp @@ -528,7 +528,7 @@ void FPCXTexture::MakeTexture() { for(int x=0; x < Width; x++) { - Pixels[y+Height*x] = RGB32k.RGB[row[0]>>3][row[1]>>3][row[2]>>3]; + Pixels[y+Height*x] = RGB256k.RGB[row[0]>>2][row[1]>>2][row[2]>>2]; row+=3; } } From e4e0f0bcd912f11b9867ce7e72dd9fa8b43ee032 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 18 Dec 2016 08:34:33 -0500 Subject: [PATCH 509/912] - playing a bit with the fill drawers --- src/r_draw_pal.cpp | 15 +++++++++------ src/v_video.cpp | 6 +----- src/v_video.h | 2 -- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 33c4829346..9d15460af9 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -1183,14 +1183,17 @@ namespace swrenderer dest = thread->dest_for_thread(_dest_y, pitch, dest); pitch *= thread->num_cores; + const PalEntry* pal = GPalette.BaseColors; + int _srcalpha = 32768, _destalpha = 32768; do { - const PalEntry* pal = GPalette.BaseColors; - // *** [SP] this is incomplete, not sure what to do here. - /*int r = clamp((int)pal[_srccolor].r, 0, 255) >> 2; - int g = clamp((int)pal[_srccolor].g, 0, 255) >> 2; - int b = clamp((int)pal[_srccolor].b, 0, 255) >> 2; - *dest = RGB256k.RGB[r][g][b];*/ + int src_r = ((_srccolor << 3) & 0x78) * _srcalpha; + int src_g = ((_srccolor >> 17) & 0x78) * _srcalpha; + int src_b = ((_srccolor >> 7) & 0x78) * _srcalpha; + int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); + int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); + int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; } while (--count); diff --git a/src/v_video.cpp b/src/v_video.cpp index 6e333f8406..62a587281d 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -144,9 +144,7 @@ DWORD Col2RGB8[65][256]; DWORD *Col2RGB8_LessPrecision[65]; DWORD Col2RGB8_Inverse[65][256]; ColorTable32k RGB32k; -#ifndef NO_RGB666 ColorTable256k RGB256k; -#endif } @@ -666,14 +664,12 @@ static void BuildTransTable (const PalEntry *palette) for (g = 0; g < 32; g++) for (b = 0; b < 32; b++) //RGB32k.RGB[r][g][b] = ColorMatcher.Pick ((r<<3)|(r>>2), (g<<3)|(g>>2), (b<<3)|(b>>2)); - RGB32k.RGB[r][g][b] = 2; -#ifndef NO_RGB666 + RGB32k.RGB[r][g][b] = 4; // create the RGB666 lookup table for (r = 0; r < 64; r++) for (g = 0; g < 64; g++) for (b = 0; b < 64; b++) RGB256k.RGB[r][g][b] = ColorMatcher.Pick ((r<<2)|(r>>4), (g<<2)|(g>>4), (b<<2)|(b>>4)); -#endif int x, y; diff --git a/src/v_video.h b/src/v_video.h index 2079ff64b5..57d15869cd 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -463,14 +463,12 @@ union ColorTable32k extern "C" ColorTable32k RGB32k; // [SP] RGB666 support -#ifndef NO_RGB666 union ColorTable256k { BYTE RGB[64][64][64]; BYTE All[64 *64 *64]; }; extern "C" ColorTable256k RGB256k; -#endif // Col2RGB8 is a pre-multiplied palette for color lookup. It is stored in a // special R10B10G10 format for efficient blending computation. From 1b50620a87d4dc477daf0aef24d0fb0be9a6840b Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 18 Dec 2016 09:05:14 -0500 Subject: [PATCH 510/912] - finally decoded _srccolor in the fill drawers. still need to figure out how to get _srcalpha and _destalpha in there. --- src/r_draw_pal.cpp | 19 ++++++++++++------- src/r_draw_pal.h | 2 ++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 9d15460af9..81975e23c3 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -1187,9 +1187,9 @@ namespace swrenderer int _srcalpha = 32768, _destalpha = 32768; do { - int src_r = ((_srccolor << 3) & 0x78) * _srcalpha; - int src_g = ((_srccolor >> 17) & 0x78) * _srcalpha; - int src_b = ((_srccolor >> 7) & 0x78) * _srcalpha; + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); @@ -1221,12 +1221,17 @@ namespace swrenderer dest = thread->dest_for_thread(_dest_y, pitch, dest); pitch *= thread->num_cores; + const PalEntry* pal = GPalette.BaseColors; + int _srcalpha = 32768, _destalpha = 32768; + do { - const PalEntry* pal = GPalette.BaseColors; - int r = clamp(pal[*dest].r + pal[fg].r, 0, 255) >> 2; - int g = clamp(pal[*dest].g + pal[fg].g, 0, 255) >> 2; - int b = clamp(pal[*dest].b + pal[fg].b, 0, 255) >> 2; + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); + int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); + int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); *dest = RGB256k.RGB[r][g][b]; dest += pitch; } while (--count); diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index 262b703875..54c74dc15c 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -106,6 +106,8 @@ namespace swrenderer uint32_t *_srcblend; uint32_t *_destblend; uint32_t _srccolor; + fixed_t _srcalpha; + fixed_t _destalpha; }; class DrawColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; From d687e52009384105fcf4ef6eb43919e5b76625e0 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 18 Dec 2016 17:08:56 -0500 Subject: [PATCH 511/912] - renamed R_DrawParticle_C to R_DrawParticle to remove vestigial ASM hook. - fixed particle crashes, fixed particle color math. --- src/r_draw.cpp | 4 ---- src/r_draw.h | 1 - src/r_things.cpp | 10 ++++++---- src/r_things.h | 2 +- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index a461e1877d..2c5d6fd917 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1352,8 +1352,4 @@ namespace swrenderer } } - void R_DrawParticle(vissprite_t *sprite) - { - R_DrawParticle_C(sprite); - } } diff --git a/src/r_draw.h b/src/r_draw.h index 58934981b0..f47179c510 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -201,5 +201,4 @@ namespace swrenderer void R_MapTiltedPlane(int y, int x1); void R_MapColoredPlane(int y, int x1); - void R_DrawParticle(vissprite_t *); } diff --git a/src/r_things.cpp b/src/r_things.cpp index 7c178ce8b8..03900b3211 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2632,7 +2632,9 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) } } -void R_DrawParticle_C (vissprite_t *vis) +//inline int clamp(int x, int y, int z) { return ((x < y) ? x : (z < y) ? z : y); } + +void R_DrawParticle (vissprite_t *vis) { int spacing; BYTE *dest; @@ -2666,9 +2668,9 @@ void R_DrawParticle_C (vissprite_t *vis) dest = ylookup[yl] + x + dc_destorg; for (int y = 0; y < ycount; y++) { - int dest_r = (GPalette.BaseColors[*dest].r * bglevel + GPalette.BaseColors[color].r * fglevel) >> 10; - int dest_g = (GPalette.BaseColors[*dest].g * bglevel + GPalette.BaseColors[color].g * fglevel) >> 10; - int dest_b = (GPalette.BaseColors[*dest].b * bglevel + GPalette.BaseColors[color].b * fglevel) >> 10; + uint32_t dest_r = MIN((GPalette.BaseColors[*dest].r * bglevel + GPalette.BaseColors[color].r * fglevel) >> 18, 63); + uint32_t dest_g = MIN((GPalette.BaseColors[*dest].g * bglevel + GPalette.BaseColors[color].g * fglevel) >> 18, 63); + uint32_t dest_b = MIN((GPalette.BaseColors[*dest].b * bglevel + GPalette.BaseColors[color].b * fglevel) >> 18, 63); *dest = RGB256k.RGB[dest_r][dest_g][dest_b]; dest += spacing; diff --git a/src/r_things.h b/src/r_things.h index bf32b655f2..6b789cdc5c 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -101,7 +101,7 @@ struct vissprite_t vissprite_t() {} }; -void R_DrawParticle_C (vissprite_t *); +void R_DrawParticle (vissprite_t *); void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); extern int MaxVisSprites; From a4944067e824a1f82b42c5c0bf86e5b35b4b4dcb Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 18 Dec 2016 20:57:27 -0500 Subject: [PATCH 512/912] - fixed: whoops, fixed quite a merge faux pas there. --- src/r_segs.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index ccf6ccf20c..41a8a91669 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -54,6 +54,7 @@ CVAR(Bool, r_fogboundary, true, 0) CVAR(Bool, r_drawmirrors, true, 0) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); +EXTERN_CVAR(Bool, r_mipmap) namespace swrenderer { From b7629fcf0e0f2980469b5608567d20783db0bc78 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 19 Dec 2016 03:08:10 -0500 Subject: [PATCH 513/912] - This space intentionally left blank. - And white rabbits. --- src/r_draw_pal.cpp | 297 +++++++++++++++++++++------------------------ src/r_draw_pal.h | 2 + 2 files changed, 137 insertions(+), 162 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 81975e23c3..9f474911e6 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -4,6 +4,7 @@ **--------------------------------------------------------------------------- ** Copyright 1998-2016 Randy Heit ** Copyright 2016 Magnus Norddahl +** Copyright 2016 Rachael Alexanderson ** All rights reserved. ** ** Redistribution and use in source and binary forms, with or without @@ -1089,6 +1090,8 @@ namespace swrenderer _srcblend = dc_srcblend; _destblend = dc_destblend; _srccolor = dc_srccolor; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void DrawColumnPalCommand::Execute(DrawerThread *thread) @@ -1184,7 +1187,7 @@ namespace swrenderer pitch *= thread->num_cores; const PalEntry* pal = GPalette.BaseColors; - int _srcalpha = 32768, _destalpha = 32768; + do { int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; @@ -1222,7 +1225,6 @@ namespace swrenderer pitch *= thread->num_cores; const PalEntry* pal = GPalette.BaseColors; - int _srcalpha = 32768, _destalpha = 32768; do { @@ -1245,11 +1247,7 @@ namespace swrenderer count = _count; dest = _dest; - uint32_t *bg2rgb; - uint32_t fg; - bg2rgb = _destblend; - fg = _srccolor | 0x40100400; int pitch = _pitch; count = thread->count_for_thread(_dest_y, count); @@ -1259,16 +1257,19 @@ namespace swrenderer dest = thread->dest_for_thread(_dest_y, pitch, dest); pitch *= thread->num_cores; + const PalEntry* palette = GPalette.BaseColors; + do { - uint32_t a = fg - bg2rgb[*dest]; - uint32_t b = a; + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int bg = *dest; + int r = MAX((src_r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((src_g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((src_b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; + *dest = RGB256k.RGB[r][g][b]; dest += pitch; } while (--count); } @@ -1283,11 +1284,7 @@ namespace swrenderer return; dest = _dest; - uint32_t *bg2rgb; - uint32_t fg; - bg2rgb = _destblend; - fg = _srccolor; int pitch = _pitch; count = thread->count_for_thread(_dest_y, count); @@ -1297,16 +1294,19 @@ namespace swrenderer dest = thread->dest_for_thread(_dest_y, pitch, dest); pitch *= thread->num_cores; + const PalEntry *palette = GPalette.BaseColors; + do { - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg; - uint32_t b = a; + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int bg = *dest; + int r = MAX((src_r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((src_g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((src_b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; + *dest = RGB256k.RGB[r][g][b]; dest += pitch; } while (--count); } @@ -1334,20 +1334,18 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; const uint8_t *colormap = _colormap; const uint8_t *source = _source; + const PalEntry *palette = GPalette.BaseColors; do { uint32_t fg = colormap[source[frac >> FRACBITS]]; uint32_t bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; + uint32_t r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + uint32_t g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + uint32_t b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1414,21 +1412,20 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; const uint8_t *translation = _translation; const uint8_t *colormap = _colormap; const uint8_t *source = _source; + const PalEntry *palette = GPalette.BaseColors; + do { uint32_t fg = colormap[translation[source[frac >> FRACBITS]]]; uint32_t bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; + uint32_t r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + uint32_t g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + uint32_t b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1460,12 +1457,16 @@ namespace swrenderer const uint8_t *colormap = _colormap; uint32_t *fgstart = &Col2RGB8[0][_color]; + const PalEntry *palette = GPalette.BaseColors; + do { - uint32_t val = colormap[source[frac >> FRACBITS]]; - uint32_t fg = fgstart[val << 8]; - val = (Col2RGB8[64 - val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val >> 15)]; + uint32_t val = source[frac >> FRACBITS]; + + int r = (palette[*dest].r * (255-val)) >> 10; + int g = (palette[*dest].g * (255-val)) >> 10; + int b = (palette[*dest].b * (255-val)) >> 10; + *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; dest += pitch; frac += fracstep; @@ -1497,20 +1498,16 @@ namespace swrenderer const uint8_t *colormap = _colormap; const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + int fg = colormap[source[frac >> FRACBITS]]; + int bg = *dest; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1542,20 +1539,16 @@ namespace swrenderer const uint8_t *translation = _translation; const uint8_t *colormap = _colormap; const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a >> 15) & a]; + int fg = colormap[translation[source[frac >> FRACBITS]]]; + int bg = *dest; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1586,19 +1579,16 @@ namespace swrenderer const uint8_t *colormap = _colormap; const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (fg2rgb[colormap[source[frac >> FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; + int fg = colormap[source[frac >> FRACBITS]]; + int bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1630,19 +1620,16 @@ namespace swrenderer const uint8_t *translation = _translation; const uint8_t *colormap = _colormap; const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] | 0x40100400) - bg2rgb[*dest]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a >> 15) & a]; + int fg = colormap[translation[source[frac >> FRACBITS]]]; + int bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1673,19 +1660,16 @@ namespace swrenderer const uint8_t *colormap = _colormap; const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac >> FRACBITS]]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; + int fg = colormap[source[frac >> FRACBITS]]; + int bg = *dest; + int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1717,19 +1701,16 @@ namespace swrenderer const uint8_t *translation = _translation; const uint8_t *colormap = _colormap; const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac >> FRACBITS]]]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a >> 15) & a]; + int fg = colormap[translation[source[frac >> FRACBITS]]]; + int bg = *dest; + int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1845,6 +1826,8 @@ namespace swrenderer _srcblend = dc_srcblend; _destblend = dc_destblend; _color = ds_color; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void DrawSpanPalCommand::Execute(DrawerThread *thread) @@ -1990,8 +1973,6 @@ namespace swrenderer const uint8_t *colormap = _colormap; int count; int spot; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; xfrac = _xfrac; yfrac = _yfrac; @@ -2003,6 +1984,8 @@ namespace swrenderer xstep = _xstep; ystep = _ystep; + const PalEntry *palette = GPalette.BaseColors; + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -2011,10 +1994,11 @@ namespace swrenderer spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); uint32_t fg = colormap[source[spot]]; uint32_t bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg >> 15)]; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest++ = RGB256k.RGB[r][g][b]; + xfrac += xstep; yfrac += ystep; } while (--count); @@ -2029,10 +2013,11 @@ namespace swrenderer spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); uint32_t fg = colormap[source[spot]]; uint32_t bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg >> 15)]; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest++ = RGB256k.RGB[r][g][b]; + xfrac += xstep; yfrac += ystep; } while (--count); @@ -2053,8 +2038,8 @@ namespace swrenderer const uint8_t *colormap = _colormap; int count; int spot; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + + const PalEntry *palette = GPalette.BaseColors; xfrac = _xfrac; yfrac = _yfrac; @@ -2079,10 +2064,10 @@ namespace swrenderer { uint32_t fg = colormap[texdata]; uint32_t bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; } dest++; xfrac += xstep; @@ -2104,10 +2089,10 @@ namespace swrenderer { uint32_t fg = colormap[texdata]; uint32_t bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; } dest++; xfrac += xstep; @@ -2130,8 +2115,7 @@ namespace swrenderer const uint8_t *colormap = _colormap; int count; int spot; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; xfrac = _xfrac; yfrac = _yfrac; @@ -2149,15 +2133,13 @@ namespace swrenderer do { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - uint32_t b = a; + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest++ = RGB256k.RGB[r][g][b]; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a >> 15)]; xfrac += xstep; yfrac += ystep; } while (--count); @@ -2170,15 +2152,13 @@ namespace swrenderer do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - uint32_t b = a; + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest++ = RGB256k.RGB[r][g][b]; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a >> 15)]; xfrac += xstep; yfrac += ystep; } while (--count); @@ -2199,8 +2179,7 @@ namespace swrenderer const uint8_t *colormap = _colormap; int count; int spot; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; xfrac = _xfrac; yfrac = _yfrac; @@ -2223,15 +2202,12 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; } dest++; xfrac += xstep; @@ -2251,15 +2227,12 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; } dest++; xfrac += xstep; diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index 54c74dc15c..0b1a5eebf8 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -166,6 +166,8 @@ namespace swrenderer uint32_t *_srcblend; uint32_t *_destblend; int _color; + fixed_t _srcalpha; + fixed_t _destalpha; }; class DrawSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; From c7d4d7cd1db1b5418150b5a74a6bc478c2aa2850 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 19 Dec 2016 03:51:54 -0500 Subject: [PATCH 514/912] - fixed: Sometimes it helps to stay positive. Especially when deciding whether to add or subtract the background in the span drawers. --- src/r_draw_pal.cpp | 48 +++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 9f474911e6..a1bddb16b9 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -1994,9 +1994,9 @@ namespace swrenderer spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); uint32_t fg = colormap[source[spot]]; uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); *dest++ = RGB256k.RGB[r][g][b]; xfrac += xstep; @@ -2013,9 +2013,9 @@ namespace swrenderer spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); uint32_t fg = colormap[source[spot]]; uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); *dest++ = RGB256k.RGB[r][g][b]; xfrac += xstep; @@ -2064,9 +2064,9 @@ namespace swrenderer { uint32_t fg = colormap[texdata]; uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); *dest = RGB256k.RGB[r][g][b]; } dest++; @@ -2089,9 +2089,9 @@ namespace swrenderer { uint32_t fg = colormap[texdata]; uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); *dest = RGB256k.RGB[r][g][b]; } dest++; @@ -2135,9 +2135,9 @@ namespace swrenderer spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); uint32_t fg = colormap[source[spot]]; uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); *dest++ = RGB256k.RGB[r][g][b]; xfrac += xstep; @@ -2154,9 +2154,9 @@ namespace swrenderer spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); uint32_t fg = colormap[source[spot]]; uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); *dest++ = RGB256k.RGB[r][g][b]; xfrac += xstep; @@ -2204,9 +2204,9 @@ namespace swrenderer { uint32_t fg = colormap[texdata]; uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); *dest = RGB256k.RGB[r][g][b]; } dest++; @@ -2229,9 +2229,9 @@ namespace swrenderer { uint32_t fg = colormap[texdata]; uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); *dest = RGB256k.RGB[r][g][b]; } dest++; From 2d0960044c12439f81800628129c4b2b2ec540d1 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 19 Dec 2016 04:01:25 -0500 Subject: [PATCH 515/912] - Added colormap checking to some blending drawers that were missing it. --- src/r_drawt_pal.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp index 156e97ba66..ebf0ea00ed 100644 --- a/src/r_drawt_pal.cpp +++ b/src/r_drawt_pal.cpp @@ -582,7 +582,7 @@ namespace swrenderer const PalEntry *palette = GPalette.BaseColors; do { - int fg = *source; + int fg = colormap[*source]; int bg = *dest; int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); @@ -614,7 +614,7 @@ namespace swrenderer do { for (int ks = 0; ks < 4; ks++) { - int fg = source[ks]; + int fg = colormap[source[ks]]; int bg = dest[ks]; int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); @@ -648,7 +648,7 @@ namespace swrenderer const PalEntry *palette = GPalette.BaseColors; do { - int fg = *source; + int fg = colormap[*source]; int bg = *dest; int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); @@ -682,7 +682,7 @@ namespace swrenderer do { for (int ks = 0; ks < 4; ks++) { - int fg = source[ks]; + int fg = colormap[source[ks]]; int bg = dest[ks]; int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); @@ -716,7 +716,7 @@ namespace swrenderer const PalEntry *palette = GPalette.BaseColors; do { - int fg = *source; + int fg = colormap[*source]; int bg = *dest; int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); @@ -750,7 +750,7 @@ namespace swrenderer do { for (int ks = 0; ks < 4; ks++) { - int fg = source[ks]; + int fg = colormap[source[ks]]; int bg = dest[ks]; int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); From e2be28f92556249cae3f610042f5593c88af58bc Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 19 Dec 2016 04:27:56 -0500 Subject: [PATCH 516/912] - implemented RGB256k for screen crossfade and burn --- src/f_wipe.cpp | 27 +++++++++++++++++---------- src/r_draw.cpp | 2 +- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index a3ceb8d508..352122007a 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -28,6 +28,7 @@ #include "f_wipe.h" #include "c_cvars.h" #include "templates.h" +#include "v_palette.h" // // SCREEN WIPE PACKAGE @@ -299,12 +300,15 @@ bool wipe_doBurn (int ticks) else { int bglevel = 64-fglevel; - DWORD *fg2rgb = Col2RGB8[fglevel]; - DWORD *bg2rgb = Col2RGB8[bglevel]; - DWORD fg = fg2rgb[fromnew[x]]; - DWORD bg = bg2rgb[fromold[x]]; - fg = (fg+bg) | 0x1f07c1f; - to[x] = RGB32k.All[fg & (fg>>15)]; + + const PalEntry* pal = GPalette.BaseColors; + + DWORD fg = fromnew[x]; + DWORD bg = fromold[x]; + int r = MIN((pal[fg].r * fglevel + pal[bg].r * bglevel) >> 8, 63); + int g = MIN((pal[fg].g * fglevel + pal[bg].g * bglevel) >> 8, 63); + int b = MIN((pal[fg].b * fglevel + pal[bg].b * bglevel) >> 8, 63); + to[x] = RGB256k.RGB[r][g][b]; done = false; } } @@ -347,15 +351,18 @@ bool wipe_doFade (int ticks) BYTE *fromnew = (BYTE *)wipe_scr_end; BYTE *fromold = (BYTE *)wipe_scr_start; BYTE *to = screen->GetBuffer(); + const PalEntry *pal = GPalette.BaseColors; for (y = 0; y < SCREENHEIGHT; y++) { for (x = 0; x < SCREENWIDTH; x++) { - DWORD fg = fg2rgb[fromnew[x]]; - DWORD bg = bg2rgb[fromold[x]]; - fg = (fg+bg) | 0x1f07c1f; - to[x] = RGB32k.All[fg & (fg>>15)]; + DWORD fg = fromnew[x]; + DWORD bg = fromold[x]; + int r = MIN((pal[fg].r * (64-bglevel) + pal[bg].r * bglevel) >> 8, 63); + int g = MIN((pal[fg].g * (64-bglevel) + pal[bg].g * bglevel) >> 8, 63); + int b = MIN((pal[fg].b * (64-bglevel) + pal[bg].b * bglevel) >> 8, 63); + to[x] = RGB256k.RGB[r][g][b]; } fromnew += SCREENWIDTH; fromold += SCREENWIDTH; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 2c5d6fd917..2d86ee9859 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -450,7 +450,7 @@ namespace swrenderer uint32_t g = GPART(color); uint32_t b = BPART(color); // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. - dc_color = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + dc_color = RGB256k.RGB[r >> 2][g >> 2][b >> 2]; if (style.Flags & STYLEF_InvertSource) { r = 255 - r; From 1e9d48216c6578bbce183252dab32757ec820bb5 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 19 Dec 2016 04:45:08 -0500 Subject: [PATCH 517/912] - removed all references to COL2RGB8 and RGB32k. Everything has now been transformed to RGB256k. --- src/f_wipe.cpp | 2 -- src/r_draw.cpp | 6 ------ src/r_draw_pal.cpp | 1 - src/r_drawt_pal.cpp | 2 -- src/r_plane.cpp | 16 ++++++++-------- src/v_video.cpp | 37 ------------------------------------- src/v_video.h | 39 --------------------------------------- 7 files changed, 8 insertions(+), 95 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index 352122007a..8d5f072f62 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -346,8 +346,6 @@ bool wipe_doFade (int ticks) { int x, y; int bglevel = 64 - fade; - DWORD *fg2rgb = Col2RGB8[fade]; - DWORD *bg2rgb = Col2RGB8[bglevel]; BYTE *fromnew = (BYTE *)wipe_scr_end; BYTE *fromold = (BYTE *)wipe_scr_start; BYTE *to = screen->GetBuffer(); diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 2d86ee9859..62e92273ec 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -238,22 +238,16 @@ namespace swrenderer } if (flags & STYLEF_InvertSource) { - dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; dc_srcalpha = fglevel; dc_destalpha = bglevel; } else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) { - dc_srcblend = Col2RGB8[fglevel >> 10]; - dc_destblend = Col2RGB8[bglevel >> 10]; dc_srcalpha = fglevel; dc_destalpha = bglevel; } else { - dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; dc_srcalpha = fglevel; dc_destalpha = bglevel; } diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index a1bddb16b9..b99f58f887 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -1455,7 +1455,6 @@ namespace swrenderer const uint8_t *source = _source; const uint8_t *colormap = _colormap; - uint32_t *fgstart = &Col2RGB8[0][_color]; const PalEntry *palette = GPalette.BaseColors; diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp index ebf0ea00ed..27c2a42943 100644 --- a/src/r_drawt_pal.cpp +++ b/src/r_drawt_pal.cpp @@ -508,7 +508,6 @@ namespace swrenderer if (count <= 0) return; - fgstart = &Col2RGB8[0][_color]; colormap = _colormap; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; @@ -539,7 +538,6 @@ namespace swrenderer if (count <= 0) return; - fgstart = &Col2RGB8[0][_color]; colormap = _colormap; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index eafc9fa26e..1a7758923f 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1500,14 +1500,14 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t if (!additive) { spanfunc = R_DrawSpanMaskedTranslucent; - dc_srcblend = Col2RGB8[alpha>>10]; - dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE-alpha; } else { spanfunc = R_DrawSpanMaskedAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; } } else @@ -1522,14 +1522,14 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t if (!additive) { spanfunc = R_DrawSpanTranslucent; - dc_srcblend = Col2RGB8[alpha>>10]; - dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE-alpha; } else { spanfunc = R_DrawSpanAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE-alpha; } } else diff --git a/src/v_video.cpp b/src/v_video.cpp index 62a587281d..b639939eed 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -143,7 +143,6 @@ extern "C" { DWORD Col2RGB8[65][256]; DWORD *Col2RGB8_LessPrecision[65]; DWORD Col2RGB8_Inverse[65][256]; -ColorTable32k RGB32k; ColorTable256k RGB256k; } @@ -659,48 +658,12 @@ static void BuildTransTable (const PalEntry *palette) { int r, g, b; - // create the RGB555 lookup table - for (r = 0; r < 32; r++) - for (g = 0; g < 32; g++) - for (b = 0; b < 32; b++) - //RGB32k.RGB[r][g][b] = ColorMatcher.Pick ((r<<3)|(r>>2), (g<<3)|(g>>2), (b<<3)|(b>>2)); - RGB32k.RGB[r][g][b] = 4; // create the RGB666 lookup table for (r = 0; r < 64; r++) for (g = 0; g < 64; g++) for (b = 0; b < 64; b++) RGB256k.RGB[r][g][b] = ColorMatcher.Pick ((r<<2)|(r>>4), (g<<2)|(g>>4), (b<<2)|(b>>4)); - int x, y; - - // create the swizzled palette - for (x = 0; x < 65; x++) - for (y = 0; y < 256; y++) - Col2RGB8[x][y] = (((palette[y].r*x)>>4)<<20) | - ((palette[y].g*x)>>4) | - (((palette[y].b*x)>>4)<<10); - - // create the swizzled palette with the lsb of red and blue forced to 0 - // (for green, a 1 is okay since it never gets added into) - for (x = 1; x < 64; x++) - { - Col2RGB8_LessPrecision[x] = Col2RGB8_2[x-1]; - for (y = 0; y < 256; y++) - { - Col2RGB8_2[x-1][y] = Col2RGB8[x][y] & 0x3feffbff; - } - } - Col2RGB8_LessPrecision[0] = Col2RGB8[0]; - Col2RGB8_LessPrecision[64] = Col2RGB8[64]; - - // create the inverse swizzled palette - for (x = 0; x < 65; x++) - for (y = 0; y < 256; y++) - { - Col2RGB8_Inverse[x][y] = (((((255-palette[y].r)*x)>>4)<<20) | - (((255-palette[y].g)*x)>>4) | - ((((255-palette[y].b)*x)>>4)<<10)) & 0x3feffbff; - } } //========================================================================== diff --git a/src/v_video.h b/src/v_video.h index 57d15869cd..0da6b9b500 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -448,20 +448,6 @@ EXTERN_CVAR (Float, Gamma) // Translucency tables -// RGB32k is a normal R5G5B5 -> palette lookup table. - -// Use a union so we can "overflow" without warnings. -// Otherwise, we get stuff like this from Clang (when compiled -// with -fsanitize=bounds) while running: -// src/v_video.cpp:390:12: runtime error: index 1068 out of bounds for type 'BYTE [32]' -// src/r_draw.cpp:273:11: runtime error: index 1057 out of bounds for type 'BYTE [32]' -union ColorTable32k -{ - BYTE RGB[32][32][32]; - BYTE All[32 *32 *32]; -}; -extern "C" ColorTable32k RGB32k; - // [SP] RGB666 support union ColorTable256k { @@ -470,31 +456,6 @@ union ColorTable256k }; extern "C" ColorTable256k RGB256k; -// Col2RGB8 is a pre-multiplied palette for color lookup. It is stored in a -// special R10B10G10 format for efficient blending computation. -// --RRRRRrrr--BBBBBbbb--GGGGGggg-- at level 64 -// --------rrrr------bbbb------gggg at level 1 -extern "C" DWORD Col2RGB8[65][256]; - -// Col2RGB8_LessPrecision is the same as Col2RGB8, but the LSB for red -// and blue are forced to zero, so if the blend overflows, it won't spill -// over into the next component's value. -// --RRRRRrrr-#BBBBBbbb-#GGGGGggg-- at level 64 -// --------rrr#------bbb#------gggg at level 1 -extern "C" DWORD *Col2RGB8_LessPrecision[65]; - -// Col2RGB8_Inverse is the same as Col2RGB8_LessPrecision, except the source -// palette has been inverted. -extern "C" DWORD Col2RGB8_Inverse[65][256]; - -// "Magic" numbers used during the blending: -// --000001111100000111110000011111 = 0x01f07c1f -// -0111111111011111111101111111111 = 0x3FEFFBFF -// -1000000000100000000010000000000 = 0x40100400 -// ------10000000001000000000100000 = 0x40100400 >> 5 -// --11111-----11111-----11111----- = 0x40100400 - (0x40100400 >> 5) aka "white" -// --111111111111111111111111111111 = 0x3FFFFFFF - // Allocates buffer screens, call before R_Init. void V_Init (bool restart); From 42fbe6358493f0cb7587745aa39b59921c565522 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 19 Dec 2016 05:02:44 -0500 Subject: [PATCH 518/912] - fixed: blood decals are now red. --- src/r_draw_pal.cpp | 6 +++--- src/r_drawt_pal.cpp | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index b99f58f887..fe6c63b6de 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -1462,9 +1462,9 @@ namespace swrenderer { uint32_t val = source[frac >> FRACBITS]; - int r = (palette[*dest].r * (255-val)) >> 10; - int g = (palette[*dest].g * (255-val)) >> 10; - int b = (palette[*dest].b * (255-val)) >> 10; + int r = (palette[*dest].r * (255-val) + palette[_color].r * val) >> 10; + int g = (palette[*dest].g * (255-val) + palette[_color].g * val) >> 10; + int b = (palette[*dest].b * (255-val) + palette[_color].b * val) >> 10; *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; dest += pitch; diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp index 27c2a42943..26cb562218 100644 --- a/src/r_drawt_pal.cpp +++ b/src/r_drawt_pal.cpp @@ -516,9 +516,9 @@ namespace swrenderer do { uint32_t val = *source; - int r = (palette[*dest].r * (255-val)) >> 10; - int g = (palette[*dest].g * (255-val)) >> 10; - int b = (palette[*dest].b * (255-val)) >> 10; + int r = (palette[*dest].r * (255-val) + palette[_color].r * val) >> 10; + int g = (palette[*dest].g * (255-val) + palette[_color].g * val) >> 10; + int b = (palette[*dest].b * (255-val) + palette[_color].b * val) >> 10; *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; source += 4; dest += pitch; @@ -550,9 +550,9 @@ namespace swrenderer for (int ks = 0; ks < 4; ks++) { val = source[ks]; - int r = (palette[dest[ks]].r * (255-val)) >> 10; - int g = (palette[dest[ks]].g * (255-val)) >> 10; - int b = (palette[dest[ks]].b * (255-val)) >> 10; + int r = (palette[dest[ks]].r * (255-val) + palette[_color].r * val) >> 10; + int g = (palette[dest[ks]].g * (255-val) + palette[_color].g * val) >> 10; + int b = (palette[dest[ks]].b * (255-val) + palette[_color].b * val) >> 10; dest[ks] = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; } From 4a95ef93ed07ef6252bd0897ac11dcbab356c902 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 19 Dec 2016 05:19:10 -0500 Subject: [PATCH 519/912] - Added names to r_drawt_pal.cpp copyright notice. --- src/r_drawt_pal.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp index 26cb562218..e2af7bbf19 100644 --- a/src/r_drawt_pal.cpp +++ b/src/r_drawt_pal.cpp @@ -4,6 +4,8 @@ ** **--------------------------------------------------------------------------- ** Copyright 1998-2006 Randy Heit +** Copyright 2016 Magnus Norddahl +** Copyright 2016 Rachael Alexanderson ** All rights reserved. ** ** Redistribution and use in source and binary forms, with or without From c1352d6ecb1782cff6ca4899fe4ad5a2444071dd Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 19 Dec 2016 06:06:19 -0500 Subject: [PATCH 520/912] - fixed: One of these floors was supposed to be additive, not translucent... --- src/r_plane.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 1a7758923f..581e8b7327 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1529,7 +1529,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t { spanfunc = R_DrawSpanAddClamp; dc_srcalpha = alpha; - dc_destalpha = OPAQUE-alpha; + dc_destalpha = FRACUNIT; } } else From 5ffee5231fc1cd4fdf928d3c5c6e2e6d0d8db9b0 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 19 Dec 2016 11:27:46 -0500 Subject: [PATCH 521/912] - Hooking the new RGB256k drawer to triangle drawer in place of RGB32k. --- src/r_drawers.h | 2 +- src/r_poly_triangle.cpp | 2 +- tools/drawergen/fixedfunction/drawtrianglecodegen.cpp | 4 ++-- tools/drawergen/fixedfunction/drawtrianglecodegen.h | 2 +- tools/drawergen/llvmdrawers.cpp | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/r_drawers.h b/src/r_drawers.h index 102d3159fa..8ac259e11d 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -247,7 +247,7 @@ struct TriDrawTriangleArgs uint8_t stencilWriteValue; uint32_t *subsectorGBuffer; const uint8_t *colormaps; - const uint8_t *RGB32k; + const uint8_t *RGB256k; const uint8_t *BaseColors; }; diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index e06cf2b7fa..72851b6b34 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -130,7 +130,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD args.stencilMasks = PolyStencilBuffer::Instance()->Masks(); args.subsectorGBuffer = PolySubsectorGBuffer::Instance()->Values(); args.colormaps = drawargs.colormaps; - args.RGB32k = RGB32k.All; + args.RGB256k = RGB256k.All; args.BaseColors = (const uint8_t *)GPalette.BaseColors; bool ccw = drawargs.ccw; diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 674543ff2d..c5f7405f5c 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -492,7 +492,7 @@ SSAInt DrawTriangleCodegen::ToPal8(SSAVec4i c) SSAInt red = SSAInt::clamp(c[0], SSAInt(0), SSAInt(255)); SSAInt green = SSAInt::clamp(c[1], SSAInt(0), SSAInt(255)); SSAInt blue = SSAInt::clamp(c[2], SSAInt(0), SSAInt(255)); - return RGB32k[((blue >> 3) * 32 + (green >> 3)) * 32 + (red >> 3)].load(true).zext_int(); + return RGB256k[((blue >> 2) * 64 + (green >> 2)) * 64 + (red >> 2)].load(true).zext_int(); } SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) @@ -631,7 +631,7 @@ void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) if (!truecolor) { Colormaps = args[0][20].load(true); - RGB32k = args[0][21].load(true); + RGB256k = args[0][21].load(true); BaseColors = args[0][22].load(true); } diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index 640a22aea4..71d6ebd58f 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -95,7 +95,7 @@ private: SSABool is_fixed_light; SSAUBytePtr Colormaps; - SSAUBytePtr RGB32k; + SSAUBytePtr RGB256k; SSAUBytePtr BaseColors; SSAInt numSpans; diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index e0300946cb..e68470188f 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -430,7 +430,7 @@ llvm::Type *LLVMDrawers::GetTriDrawTriangleArgs(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilWriteValue; elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *subsectorGBuffer; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *colormaps; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB32k; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB256k; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *BaseColors; TriDrawTriangleArgs = llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); return TriDrawTriangleArgs; From b3bed807de48069c0454c211ab7f2729d6d41421 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Tue, 20 Dec 2016 13:31:38 -0500 Subject: [PATCH 522/912] - Mostly implemented Zandronum's IWAD selection box. Currently, this change only affects Windows. --- src/win32/i_system.cpp | 18 ++++++++++++++++++ src/win32/resource.h | 7 +++++++ src/win32/zdoom.rc | 31 +++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/src/win32/i_system.cpp b/src/win32/i_system.cpp index acea7a7881..ebb3d8c676 100644 --- a/src/win32/i_system.cpp +++ b/src/win32/i_system.cpp @@ -127,6 +127,9 @@ static void DestroyCustomCursor(); EXTERN_CVAR(String, language); EXTERN_CVAR (Bool, queryiwad); +// Used on welcome/IWAD screen. +EXTERN_CVAR (Int, vid_renderer) +EXTERN_CVAR (Bool, fullscreen) extern HWND Window, ConWindow, GameTitleWindow; extern HANDLE StdOut; @@ -1159,6 +1162,18 @@ BOOL CALLBACK IWADBoxCallback(HWND hDlg, UINT message, WPARAM wParam, LPARAM lPa newlabel.Format(GAMESIG " %s: %s", GetVersionString(), label); SetWindowText(hDlg, newlabel.GetChars()); } + + // [SP] Upstreamed from Zandronum + char szString[256]; + + // Check the current video settings. + SendDlgItemMessage( hDlg, vid_renderer ? IDC_WELCOME_OPENGL : IDC_WELCOME_SOFTWARE, BM_SETCHECK, BST_CHECKED, 0 ); + SendDlgItemMessage( hDlg, IDC_WELCOME_FULLSCREEN, BM_SETCHECK, fullscreen ? BST_CHECKED : BST_UNCHECKED, 0 ); + + // Set up our version string. + sprintf(szString, "Version %s.", GetVersionString()); + SetDlgItemText (hDlg, IDC_WELCOME_VERSION, szString); + // Populate the list with all the IWADs found ctrl = GetDlgItem(hDlg, IDC_IWADLIST); for (i = 0; i < NumWads; i++) @@ -1192,6 +1207,9 @@ BOOL CALLBACK IWADBoxCallback(HWND hDlg, UINT message, WPARAM wParam, LPARAM lPa (LOWORD(wParam) == IDC_IWADLIST && HIWORD(wParam) == LBN_DBLCLK)) { SetQueryIWad(hDlg); + // [SP] Upstreamed from Zandronum + vid_renderer = SendDlgItemMessage( hDlg, IDC_WELCOME_OPENGL, BM_GETCHECK, 0, 0 ) == BST_CHECKED; + fullscreen = SendDlgItemMessage( hDlg, IDC_WELCOME_FULLSCREEN, BM_GETCHECK, 0, 0 ) == BST_CHECKED; ctrl = GetDlgItem (hDlg, IDC_IWADLIST); EndDialog(hDlg, SendMessage (ctrl, LB_GETCURSEL, 0, 0)); } diff --git a/src/win32/resource.h b/src/win32/resource.h index 6886e83fdd..52db9de2e6 100644 --- a/src/win32/resource.h +++ b/src/win32/resource.h @@ -164,3 +164,10 @@ #define _APS_NEXT_SYMED_VALUE 101 #endif #endif + +// [SP] Upstreamed from Zandronum +#define IDC_WELCOME_VERSION 4019 // [RC] "Welcome" screen. +#define IDC_WELCOME_OPENGL 4020 +#define IDC_WELCOME_SOFTWARE 4021 +#define IDC_WELCOME_FULLSCREEN 4022 +#define IDI_ICONST 151 diff --git a/src/win32/zdoom.rc b/src/win32/zdoom.rc index 1f760d8ea2..4d38762e0b 100644 --- a/src/win32/zdoom.rc +++ b/src/win32/zdoom.rc @@ -220,6 +220,7 @@ END // Dialog // +/* IDD_IWADDIALOG DIALOGEX 0, 0, 212, 186 STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | DS_CENTER | WS_POPUP | WS_CAPTION | WS_SYSMENU EXSTYLE WS_EX_APPWINDOW @@ -233,6 +234,36 @@ BEGIN LTEXT "ZDoom found more than one IWAD present.",IDC_STATIC,5,5,140,8 LTEXT "Select from the list below to determine which one to use:",IDC_STATIC,5,15,200,8 END +*/ +// [SP] Upstreamed from Zandronum +IDD_IWADDIALOG DIALOGEX 0, 0, 224, 236 +STYLE DS_MODALFRAME | DS_FIXEDSYS | DS_CENTER | WS_POPUP | WS_CAPTION | + WS_SYSMENU +EXSTYLE WS_EX_APPWINDOW +CAPTION "Welcome" +FONT 8, "MS Shell Dlg", 0, 0, 0x1 +BEGIN + ICON IDI_ICON1,IDC_STATIC,7,7,32,32 + LTEXT "Welcome to QZDoom!",IDC_STATIC,42,8,180,8 + LTEXT "",IDC_WELCOME_VERSION,42,18,180,8 + GROUPBOX "IWAD selection",IDC_STATIC,8,32,224-16,102 + LTEXT "Select which game file (IWAD) to run.", IDC_STATIC,12,32+12,190,8 + LISTBOX IDC_IWADLIST,12,32+24,224-24,72,LBS_NOINTEGRALHEIGHT | + WS_VSCROLL | WS_TABSTOP + GROUPBOX "Video settings",IDC_STATIC,8,138,224-16,48 + LTEXT "Choose how QZDoom will render the game.", IDC_STATIC,12,148,190,8 + CONTROL "Hardware (OpenGL)",IDC_WELCOME_OPENGL,"Button", + BS_AUTORADIOBUTTON,12,170,93,10 + CONTROL "Software (Doom)",IDC_WELCOME_SOFTWARE,"Button", + BS_AUTORADIOBUTTON,12,160,93,10 + CONTROL "Fullscreen",IDC_WELCOME_FULLSCREEN,"Button", + BS_AUTOCHECKBOX | WS_TABSTOP, 124,160,48,10 + CONTROL "Don't ask me this again",IDC_DONTASKIWAD,"Button", + BS_AUTOCHECKBOX | WS_TABSTOP,72,192,87,10 + DEFPUSHBUTTON "Play QZDoom",IDOK,8,236-18,90,14 + PUSHBUTTON "Exit",IDCANCEL,224-58,236-18,50,14 + +END IDD_EAXPROPERTYLIST DIALOGEX 0, 0, 265, 404 STYLE DS_SETFONT | DS_3DLOOK | DS_FIXEDSYS | DS_CONTROL | WS_CHILD | WS_VSCROLL From 8a3db6c0036dc0f387afcd4c33ea1db2c87d4424 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 20 Dec 2016 23:21:34 +0100 Subject: [PATCH 523/912] Dynamic lights? Did anyone say DYNAMIC LIGHTS? --- src/gl/dynlights/a_dynlight.cpp | 5 --- src/r_draw_rgba.cpp | 25 +++++++++++ src/r_drawers.h | 11 +++++ src/r_poly_wall.cpp | 3 ++ src/r_swrenderer.cpp | 7 +++- src/r_swrenderer.h | 19 +++++++++ .../fixedfunction/drawwallcodegen.cpp | 42 ++++++++++++++++++- .../drawergen/fixedfunction/drawwallcodegen.h | 10 ++++- tools/drawergen/llvmdrawers.cpp | 18 ++++++++ tools/drawergen/llvmdrawers.h | 2 + 10 files changed, 133 insertions(+), 9 deletions(-) diff --git a/src/gl/dynlights/a_dynlight.cpp b/src/gl/dynlights/a_dynlight.cpp index 0ab8345053..d2efbe991a 100644 --- a/src/gl/dynlights/a_dynlight.cpp +++ b/src/gl/dynlights/a_dynlight.cpp @@ -79,7 +79,6 @@ #include "gl/utility/gl_convert.h" #include "gl/utility/gl_templates.h" -EXTERN_CVAR(Int, vid_renderer) //========================================================================== @@ -260,10 +259,6 @@ void ADynamicLight::Deactivate(AActor *activator) //========================================================================== void ADynamicLight::Tick() { - if (vid_renderer == 0) - { - return; - } if (IsOwned()) { if (!target || !target->state) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 09293d6de4..7b47638553 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -221,6 +221,26 @@ namespace swrenderer if (args.source2[0] == nullptr) args.flags |= DrawWallArgs::nearest_filter; + args.dynlights = nullptr; + args.num_dynlights = 0; + /* + static TriLight fakelight; + static bool first = true; + if (first) + { + fakelight.x = 100.0f; + fakelight.y = 0.0f; + fakelight.z = 100.0f; + fakelight.color = 0xffffff00; + fakelight.radius = 256.0f / 1000.0f; + first = false; + } + + args.z = 0.0f; + args.step_z = 1.0f; + args.dynlights = &fakelight; + args.num_dynlights = 1; + */ DetectRangeError(args.dest, args.dest_y, args.count); } @@ -279,6 +299,11 @@ namespace swrenderer if (args.source2[0] == nullptr) args.flags |= DrawWallArgs::nearest_filter; + args.z = 0.0f; + args.step_z = 0.0f; + args.dynlights = nullptr; + args.num_dynlights = 0; + DetectRangeError(args.dest, args.dest_y, args.count); } diff --git a/src/r_drawers.h b/src/r_drawers.h index 8ac259e11d..eff6ea3707 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -59,6 +59,13 @@ struct WorkerThreadData int32_t StartY; }; +struct TriLight +{ + uint32_t color; + float x, y, z; + float radius; +}; + struct DrawWallArgs { uint32_t *dest; @@ -91,6 +98,10 @@ struct DrawWallArgs nearest_filter = 2 }; + float z, step_z; + TriLight *dynlights; + uint32_t num_dynlights; + FString ToString(); }; diff --git a/src/r_poly_wall.cpp b/src/r_poly_wall.cpp index ebb355431b..f9d409b604 100644 --- a/src/r_poly_wall.cpp +++ b/src/r_poly_wall.cpp @@ -261,6 +261,9 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane args.SetColormap(Line->frontsector->ColorMap); args.SetClipPlane(clipPlane.x, clipPlane.y, clipPlane.z, clipPlane.w); + //if (Side && Side->lighthead) + // args.uniforms.light = 255; // Make walls touched by a light fullbright! + if (Polyportal) { args.stencilwritevalue = Polyportal->StencilValue; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index a0f90f5ccb..5d94e04421 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -47,6 +47,9 @@ #include "r_poly.h" #include "p_setup.h" +void gl_ParseDefs(); +void gl_InitData(); + EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Float, maxviewpitch) // [SP] CVAR from GZDoom @@ -92,6 +95,8 @@ FSoftwareRenderer::~FSoftwareRenderer() void FSoftwareRenderer::Init() { + gl_ParseDefs(); + r_swtruecolor = screen->IsBgra(); R_InitRenderer(); } @@ -310,7 +315,7 @@ int FSoftwareRenderer::GetMaxViewPitch(bool down) bool FSoftwareRenderer::RequireGLNodes() { - return r_polyrenderer; + return true; } //========================================================================== diff --git a/src/r_swrenderer.h b/src/r_swrenderer.h index 5b205b8522..812ec32b50 100644 --- a/src/r_swrenderer.h +++ b/src/r_swrenderer.h @@ -3,6 +3,10 @@ #include "r_renderer.h" +void gl_SetActorLights(AActor *); +void gl_PreprocessLevel(); +void gl_CleanLevelData(); + struct FSoftwareRenderer : public FRenderer { FSoftwareRenderer(); @@ -40,6 +44,21 @@ struct FSoftwareRenderer : public FRenderer void RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) override; sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, bool back) override; + void StateChanged(AActor *actor) override + { + gl_SetActorLights(actor); + } + + void PreprocessLevel() override + { + gl_PreprocessLevel(); + } + + void CleanLevelData() override + { + gl_CleanLevelData(); + } + }; diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.cpp b/tools/drawergen/fixedfunction/drawwallcodegen.cpp index 94b807f40f..1ec00df76c 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.cpp +++ b/tools/drawergen/fixedfunction/drawwallcodegen.cpp @@ -78,6 +78,10 @@ void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAVal SSAShort fade_blue = args[0][41].load(true); SSAShort desaturate = args[0][42].load(true); SSAInt flags = args[0][43].load(true); + start_z = args[0][44].load(true); + step_z = args[0][45].load(true); + dynlights = args[0][46].load(true); + num_dynlights = args[0][47].load(true); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); @@ -129,9 +133,11 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim int numColumns = fourColumns ? 4 : 1; stack_index.store(SSAInt(0)); + stack_z.store(start_z); { SSAForBlock loop; SSAInt index = stack_index.load(); + z = stack_z.load(); loop.loop_block(index < count); SSAInt frac[4]; @@ -167,6 +173,7 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim dest[offset].store_vec4ub(color); } + stack_z.store(z + step_z); stack_index.store(index.add(SSAInt(1), true, true)); for (int i = 0; i < numColumns; i++) stack_frac[i].store(frac[i] + fracstep[i]); @@ -209,10 +216,41 @@ SSAVec4i DrawWallCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAIn SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) { + SSAVec4i c; if (isSimpleShade) - return shade_bgra_simple(fg, light[index]); + c = shade_bgra_simple(fg, light[index]); else - return shade_bgra_advanced(fg, light[index], shade_constants); + c = shade_bgra_advanced(fg, light[index], shade_constants); + + stack_lit_color.store(c); + stack_light_index.store(SSAInt(0)); + + SSAForBlock block; + SSAInt light_index = stack_light_index.load(); + SSAVec4i lit_color = stack_lit_color.load(); + block.loop_block(light_index < num_dynlights); + { + SSAVec4i light_color = SSAUBytePtr(dynlights[light_index][0].v).load_vec4ub(true); + SSAFloat light_x = dynlights[light_index][1].load(true); + //SSAFloat light_y = dynlights[light_index][2].load(true); + SSAFloat light_z = dynlights[light_index][3].load(true); + SSAFloat light_rcp_radius = dynlights[light_index][4].load(true); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // attenuation = 1 - MIN(dist * (1/radius), 1) + SSAFloat Lxy2 = light_x; // L.x*L.x + L.y*L.y + SSAFloat Lz = light_z - z; + SSAFloat dist = SSAFloat::sqrt(Lxy2 + Lz * Lz); + SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true); + SSAVec4i contribution = (light_color * fg * attenuation) >> 16; + + stack_lit_color.store(lit_color + contribution); + stack_light_index.store(light_index + 1); + } + block.end_block(); + + return stack_lit_color.load(); } SSAVec4i DrawWallCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant) diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.h b/tools/drawergen/fixedfunction/drawwallcodegen.h index 4a1599063c..cb46dcd5f4 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.h +++ b/tools/drawergen/fixedfunction/drawwallcodegen.h @@ -47,7 +47,9 @@ private: SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); - SSAStack stack_index, stack_frac[4]; + SSAStack stack_index, stack_frac[4], stack_light_index; + SSAStack stack_lit_color; + SSAStack stack_z; SSAUBytePtr dest; SSAUBytePtr source[4]; @@ -69,4 +71,10 @@ private: SSAInt fracstep[4]; SSAInt one[4]; + + SSAFloat start_z, step_z; + + SSAValue dynlights; // TriLight* + SSAInt num_dynlights; + SSAFloat z; }; diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index e68470188f..706cfb79f2 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -290,6 +290,11 @@ llvm::Type *LLVMDrawers::GetDrawWallArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + elements.push_back(llvm::Type::getFloatTy(context)); // float z; + elements.push_back(llvm::Type::getFloatTy(context)); // float step_z; + elements.push_back(GetTriLightStruct(context)); // TriLight *dynlights; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t num_dynlights; + DrawWallArgsStruct = llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo(); return DrawWallArgsStruct; } @@ -326,6 +331,19 @@ llvm::Type *LLVMDrawers::GetWorkerThreadDataStruct(llvm::LLVMContext &context) return WorkerThreadDataStruct; } +llvm::Type *LLVMDrawers::GetTriLightStruct(llvm::LLVMContext &context) +{ + if (TriLightStruct) + return TriLightStruct; + + std::vector elements; + elements.push_back(llvm::Type::getInt32Ty(context)); + for (int i = 0; i < 4 + TriVertex::NumVarying; i++) + elements.push_back(llvm::Type::getFloatTy(context)); + TriLightStruct = llvm::StructType::create(context, elements, "TriLight", false)->getPointerTo(); + return TriLightStruct; +} + llvm::Type *LLVMDrawers::GetTriVertexStruct(llvm::LLVMContext &context) { if (TriVertexStruct) diff --git a/tools/drawergen/llvmdrawers.h b/tools/drawergen/llvmdrawers.h index a1b7b53616..f546d1dff2 100644 --- a/tools/drawergen/llvmdrawers.h +++ b/tools/drawergen/llvmdrawers.h @@ -58,6 +58,7 @@ private: llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context); llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); + llvm::Type *GetTriLightStruct(llvm::LLVMContext &context); llvm::Type *GetTriVertexStruct(llvm::LLVMContext &context); llvm::Type *GetTriMatrixStruct(llvm::LLVMContext &context); llvm::Type *GetTriUniformsStruct(llvm::LLVMContext &context); @@ -70,6 +71,7 @@ private: llvm::Type *DrawWallArgsStruct = nullptr; llvm::Type *DrawSkyArgsStruct = nullptr; llvm::Type *WorkerThreadDataStruct = nullptr; + llvm::Type *TriLightStruct = nullptr; llvm::Type *TriVertexStruct = nullptr; llvm::Type *TriMatrixStruct = nullptr; llvm::Type *TriUniformsStruct = nullptr; From 578e3270e20f6b0dc6dba7e1001c95a963acc283 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Tue, 20 Dec 2016 23:03:06 -0500 Subject: [PATCH 524/912] - Implemented Gez's suggestion for disable autoload checkbox on the IWAD selection box. --- src/d_main.cpp | 4 +++- src/win32/i_system.cpp | 6 ++++++ src/win32/resource.h | 3 +++ src/win32/zdoom.rc | 11 ++++++----- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/d_main.cpp b/src/d_main.cpp index 04ba34cbb0..2b8ff74372 100644 --- a/src/d_main.cpp +++ b/src/d_main.cpp @@ -205,6 +205,8 @@ CUSTOM_CVAR (String, vid_cursor, "None", CVAR_ARCHIVE | CVAR_NOINITCALL) } } +bool disableautoload = false; // [SP] No auto load + bool wantToRestart; bool DrawFSHUD; // [RH] Draw fullscreen HUD? TArray allwads; @@ -2038,7 +2040,7 @@ static void AddAutoloadFiles(const char *autoname) { LumpFilterIWAD.Format("%s.", autoname); // The '.' is appened to simplify parsing the string - if (!(gameinfo.flags & GI_SHAREWARE) && !Args->CheckParm("-noautoload")) + if (!(gameinfo.flags & GI_SHAREWARE) && !Args->CheckParm("-noautoload") && !disableautoload) { FString file; diff --git a/src/win32/i_system.cpp b/src/win32/i_system.cpp index ebb3d8c676..52f054dc2f 100644 --- a/src/win32/i_system.cpp +++ b/src/win32/i_system.cpp @@ -130,6 +130,7 @@ EXTERN_CVAR (Bool, queryiwad); // Used on welcome/IWAD screen. EXTERN_CVAR (Int, vid_renderer) EXTERN_CVAR (Bool, fullscreen) +extern bool disableautoload; extern HWND Window, ConWindow, GameTitleWindow; extern HANDLE StdOut; @@ -1169,6 +1170,7 @@ BOOL CALLBACK IWADBoxCallback(HWND hDlg, UINT message, WPARAM wParam, LPARAM lPa // Check the current video settings. SendDlgItemMessage( hDlg, vid_renderer ? IDC_WELCOME_OPENGL : IDC_WELCOME_SOFTWARE, BM_SETCHECK, BST_CHECKED, 0 ); SendDlgItemMessage( hDlg, IDC_WELCOME_FULLSCREEN, BM_SETCHECK, fullscreen ? BST_CHECKED : BST_UNCHECKED, 0 ); + SendDlgItemMessage( hDlg, IDC_WELCOME_NOAUTOLOAD, BM_SETCHECK, disableautoload ? BST_CHECKED : BST_UNCHECKED, 0 ); // Set up our version string. sprintf(szString, "Version %s.", GetVersionString()); @@ -1210,6 +1212,10 @@ BOOL CALLBACK IWADBoxCallback(HWND hDlg, UINT message, WPARAM wParam, LPARAM lPa // [SP] Upstreamed from Zandronum vid_renderer = SendDlgItemMessage( hDlg, IDC_WELCOME_OPENGL, BM_GETCHECK, 0, 0 ) == BST_CHECKED; fullscreen = SendDlgItemMessage( hDlg, IDC_WELCOME_FULLSCREEN, BM_GETCHECK, 0, 0 ) == BST_CHECKED; + + // [SP] This is our's. + disableautoload = SendDlgItemMessage( hDlg, IDC_WELCOME_NOAUTOLOAD, BM_GETCHECK, 0, 0 ) == BST_CHECKED; + ctrl = GetDlgItem (hDlg, IDC_IWADLIST); EndDialog(hDlg, SendMessage (ctrl, LB_GETCURSEL, 0, 0)); } diff --git a/src/win32/resource.h b/src/win32/resource.h index 52db9de2e6..937a736586 100644 --- a/src/win32/resource.h +++ b/src/win32/resource.h @@ -171,3 +171,6 @@ #define IDC_WELCOME_SOFTWARE 4021 #define IDC_WELCOME_FULLSCREEN 4022 #define IDI_ICONST 151 + +// [SP] This is our's. +#define IDC_WELCOME_NOAUTOLOAD 4023 diff --git a/src/win32/zdoom.rc b/src/win32/zdoom.rc index 4d38762e0b..c5c7bd2599 100644 --- a/src/win32/zdoom.rc +++ b/src/win32/zdoom.rc @@ -236,7 +236,7 @@ BEGIN END */ // [SP] Upstreamed from Zandronum -IDD_IWADDIALOG DIALOGEX 0, 0, 224, 236 +IDD_IWADDIALOG DIALOGEX 0, 0, 224, 246 STYLE DS_MODALFRAME | DS_FIXEDSYS | DS_CENTER | WS_POPUP | WS_CAPTION | WS_SYSMENU EXSTYLE WS_EX_APPWINDOW @@ -258,11 +258,12 @@ BEGIN BS_AUTORADIOBUTTON,12,160,93,10 CONTROL "Fullscreen",IDC_WELCOME_FULLSCREEN,"Button", BS_AUTOCHECKBOX | WS_TABSTOP, 124,160,48,10 - CONTROL "Don't ask me this again",IDC_DONTASKIWAD,"Button", + CONTROL "Disable autoload",IDC_WELCOME_NOAUTOLOAD,"Button", BS_AUTOCHECKBOX | WS_TABSTOP,72,192,87,10 - DEFPUSHBUTTON "Play QZDoom",IDOK,8,236-18,90,14 - PUSHBUTTON "Exit",IDCANCEL,224-58,236-18,50,14 - + CONTROL "Don't ask me this again",IDC_DONTASKIWAD,"Button", + BS_AUTOCHECKBOX | WS_TABSTOP,72,202,87,10 + DEFPUSHBUTTON "Play QZDoom",IDOK,8,246-18,90,14 + PUSHBUTTON "Exit",IDCANCEL,224-58,246-18,50,14 END IDD_EAXPROPERTYLIST DIALOGEX 0, 0, 265, 404 From 4e56c9a41acd255e118bb61382f8d3665642a754 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 21 Dec 2016 07:33:28 +0100 Subject: [PATCH 525/912] Hooked up dynamic light on walls --- src/r_draw.cpp | 5 ++ src/r_draw.h | 7 ++ src/r_draw_rgba.cpp | 27 ++----- src/r_segs.cpp | 10 ++- src/r_walldraw.cpp | 73 ++++++++++++++++++- .../fixedfunction/drawwallcodegen.cpp | 3 + 6 files changed, 99 insertions(+), 26 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 6e159243a2..f476e95387 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -62,6 +62,7 @@ namespace swrenderer extern int wallshade; double dc_texturemid; + FLightNode *dc_light_list; int ylookup[MAXHEIGHT]; uint8_t shadetables[NUMCOLORMAPS * 16 * 256]; @@ -100,6 +101,10 @@ namespace swrenderer uint8_t *dc_destorg; int dc_destheight; int dc_count; + FVector3 dc_viewpos; + FVector3 dc_viewpos_step; + TriLight *dc_lights; + int dc_num_lights; uint32_t dc_wall_texturefrac[4]; uint32_t dc_wall_iscale[4]; uint8_t *dc_wall_colormap[4]; diff --git a/src/r_draw.h b/src/r_draw.h index 88ac2338d5..a337b49437 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -4,6 +4,8 @@ #include "r_defs.h" struct FSWColormap; +struct FLightNode; +struct TriLight; EXTERN_CVAR(Bool, r_multithreaded); EXTERN_CVAR(Bool, r_magfilter); @@ -34,6 +36,7 @@ namespace swrenderer }; extern double dc_texturemid; + extern FLightNode *dc_light_list; namespace drawerargs { @@ -63,6 +66,10 @@ namespace swrenderer extern uint8_t *dc_destorg; extern int dc_destheight; extern int dc_count; + extern FVector3 dc_viewpos; + extern FVector3 dc_viewpos_step; + extern TriLight *dc_lights; + extern int dc_num_lights; extern bool drawer_needs_pal_input; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 7b47638553..2928ac73e0 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -221,26 +221,11 @@ namespace swrenderer if (args.source2[0] == nullptr) args.flags |= DrawWallArgs::nearest_filter; + args.z = 0.0f; + args.step_z = 0.0f; args.dynlights = nullptr; args.num_dynlights = 0; - /* - static TriLight fakelight; - static bool first = true; - if (first) - { - fakelight.x = 100.0f; - fakelight.y = 0.0f; - fakelight.z = 100.0f; - fakelight.color = 0xffffff00; - fakelight.radius = 256.0f / 1000.0f; - first = false; - } - args.z = 0.0f; - args.step_z = 1.0f; - args.dynlights = &fakelight; - args.num_dynlights = 1; - */ DetectRangeError(args.dest, args.dest_y, args.count); } @@ -299,10 +284,10 @@ namespace swrenderer if (args.source2[0] == nullptr) args.flags |= DrawWallArgs::nearest_filter; - args.z = 0.0f; - args.step_z = 0.0f; - args.dynlights = nullptr; - args.num_dynlights = 0; + args.z = dc_viewpos.Z; + args.step_z = dc_viewpos_step.Z; + args.dynlights = dc_lights; + args.num_dynlights = dc_num_lights; DetectRangeError(args.dest, args.dest_y, args.count); } diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 41a8a91669..14abcfc249 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -60,7 +60,7 @@ namespace swrenderer { using namespace drawerargs; - void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask); + void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, FLightNode *light_list = nullptr); void R_DrawDrawSeg(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); #define HEIGHTBITS 12 @@ -1139,6 +1139,8 @@ void R_RenderSegLoop () } if(fake3D & 7) return; + FLightNode *light_list = (curline && curline->sidedef) ? curline->sidedef->lighthead : nullptr; + // draw the wall tiers if (midtexture) { // one sided line @@ -1165,7 +1167,7 @@ void R_RenderSegLoop () { rw_offset = -rw_offset; } - R_DrawWallSegment(rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false); + R_DrawWallSegment(rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, light_list); } fillshort (ceilingclip+x1, x2-x1, viewheight); fillshort (floorclip+x1, x2-x1, 0xffff); @@ -1201,7 +1203,7 @@ void R_RenderSegLoop () { rw_offset = -rw_offset; } - R_DrawWallSegment(rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false); + R_DrawWallSegment(rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, light_list); } memcpy (ceilingclip+x1, wallupper+x1, (x2-x1)*sizeof(short)); } @@ -1240,7 +1242,7 @@ void R_RenderSegLoop () { rw_offset = -rw_offset; } - R_DrawWallSegment(rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false); + R_DrawWallSegment(rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, light_list); } memcpy (floorclip+x1, walllower+x1, (x2-x1)*sizeof(short)); } diff --git a/src/r_walldraw.cpp b/src/r_walldraw.cpp index dcdfa4b897..ad82f54dcb 100644 --- a/src/r_walldraw.cpp +++ b/src/r_walldraw.cpp @@ -42,6 +42,8 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "gl/dynlights/gl_dynlight.h" +#include "r_drawers.h" namespace swrenderer { @@ -537,6 +539,53 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1 { if (r_swtruecolor) { + // Find column position in view space + float w1 = 1.0f / WallC.sz1; + float w2 = 1.0f / WallC.sz2; + float t = (x - WallC.sx1 + 0.5f) / (WallC.sx2 - WallC.sx1); + float wcol = w1 * (1.0f - t) + w2 * t; + float zcol = 1.0f / wcol; + float xcol = (WallC.tleft.X * w1 * (1.0f - t) + WallC.tright.X * w2 * t) * zcol; + float ycol = (WallC.tleft.Y * w1 * (1.0f - t) + WallC.tright.Y * w2 * t) * zcol; + dc_viewpos.X = xcol; + dc_viewpos.Y = ycol; + dc_viewpos.Z = (float)((CenterY - y1 - 0.5) / InvZtoScale * zcol); + dc_viewpos_step.Z = (float)(-zcol / InvZtoScale); + + static TriLight lightbuffer[64 * 1024]; + static int nextlightindex = 0; + + // Setup lights for column + dc_num_lights = 0; + dc_lights = lightbuffer + nextlightindex; + FLightNode *cur_node = dc_light_list; + while (cur_node && nextlightindex < 64 * 1024) + { + uint32_t red = cur_node->lightsource->GetRed(); + uint32_t green = cur_node->lightsource->GetGreen(); + uint32_t blue = cur_node->lightsource->GetBlue(); + + double lightX = cur_node->lightsource->X() - ViewPos.X; + double lightY = cur_node->lightsource->Y() - ViewPos.Y; + double lightZ = cur_node->lightsource->Z() - ViewPos.Z; + + nextlightindex++; + auto &light = dc_lights[dc_num_lights++]; + light.x = (float)(lightX * ViewSin - lightY * ViewCos) - dc_viewpos.X; + light.y = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; + light.z = (float)lightZ; + light.radius = 256.0f / cur_node->lightsource->GetRadius(); + light.color = 0xff000000 | (red << 16) | (green << 8) | blue; + + // Precalculate the constant part of the dot here so the drawer doesn't have to. + light.x = light.x * light.x + light.y * light.y; + + cur_node = cur_node->nextLight; + } + + if (nextlightindex == 64 * 1024) + nextlightindex = 0; + int count = y2 - y1; dc_source = sampler.source; @@ -738,6 +787,25 @@ static void ProcessWallWorker( float light = rw_light; + double xmagnitude = 1.0; + +#if !defined(NO_DYNAMIC_SWLIGHTS) + for (int x = x1; x < x2; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, light, wallshade); + + if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); + + WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); + Draw1Column(x, y1, y2, sampler, draw1column); + } +#else // Calculate where 4 column alignment begins and ends: int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); @@ -872,6 +940,7 @@ static void ProcessWallWorker( WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); Draw1Column(x, y1, y2, sampler, draw1column); } +#endif NetUpdate(); } @@ -1077,8 +1146,9 @@ void R_DrawDrawSeg(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, floa } -void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask) +void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, FLightNode *light_list) { + dc_light_list = light_list; if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) { ProcessWallNP2(x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, false); @@ -1087,6 +1157,7 @@ void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short * { ProcessWall(x1, x2, walltop, wallbottom, swall, lwall, yscale, false); } + dc_light_list = nullptr; } void R_DrawSkySegment(visplane_t *pl, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.cpp b/tools/drawergen/fixedfunction/drawwallcodegen.cpp index 1ec00df76c..6f546b9b3f 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.cpp +++ b/tools/drawergen/fixedfunction/drawwallcodegen.cpp @@ -107,6 +107,9 @@ void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAVal one[i] = ((0x80000000 + textureheight[i] - 1) / textureheight[i]) * 2 + 1; } + start_z = start_z + step_z * SSAFloat(skipped_by_thread(dest_y, thread)); + step_z = step_z * SSAFloat(thread.num_cores); + SSAIfBlock branch; branch.if_block(is_simple_shade); LoopShade(variant, fourColumns, true); From dcd0ab98cc292d5485578f98589cc17c7c0e487c Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Wed, 21 Dec 2016 02:16:44 -0500 Subject: [PATCH 526/912] - Added Zandronum-specific copyright notices for IWAD picker. --- src/win32/i_system.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/win32/i_system.cpp b/src/win32/i_system.cpp index 52f054dc2f..2dc6c4d3dd 100644 --- a/src/win32/i_system.cpp +++ b/src/win32/i_system.cpp @@ -4,6 +4,8 @@ ** **--------------------------------------------------------------------------- ** Copyright 1998-2009 Randy Heit +** Copyright (C) 2007-2012 Skulltag Development Team +** Copyright (C) 2007-2016 Zandronum Development Team ** All rights reserved. ** ** Redistribution and use in source and binary forms, with or without @@ -17,6 +19,15 @@ ** documentation and/or other materials provided with the distribution. ** 3. The name of the author may not be used to endorse or promote products ** derived from this software without specific prior written permission. +** 4. Redistributions in any form must be accompanied by information on how to +** obtain complete source code for the software and any accompanying software +** that uses the software. The source code must either be included in the +** distribution or be available for no more than the cost of distribution plus +** a nominal fee, and must be freely redistributable under reasonable +** conditions. For an executable file, complete source code means the source +** code for all modules it contains. It does not include source code for +** modules or files that typically accompany the major components of the +** operating system on which the executable file runs. ** ** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR ** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES From cd3fc2d6b8033f649761f304fc83906caf81b550 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Wed, 21 Dec 2016 05:11:29 -0500 Subject: [PATCH 527/912] - More options for the IWAD picker. - Rearranged some options around a bit. - Disable auto load is now an archived cvar. --- src/d_main.cpp | 22 ++++++++++++++++++++- src/win32/i_system.cpp | 11 +++++++++-- src/win32/resource.h | 5 +++-- src/win32/zdoom.rc | 45 +++++++++++++++++++----------------------- 4 files changed, 53 insertions(+), 30 deletions(-) diff --git a/src/d_main.cpp b/src/d_main.cpp index 2b8ff74372..1ea2d2707c 100644 --- a/src/d_main.cpp +++ b/src/d_main.cpp @@ -205,7 +205,10 @@ CUSTOM_CVAR (String, vid_cursor, "None", CVAR_ARCHIVE | CVAR_NOINITCALL) } } -bool disableautoload = false; // [SP] No auto load +// Controlled by startup dialog +CVAR (Bool, disableautoload, false, CVAR_ARCHIVE | CVAR_NOINITCALL | CVAR_GLOBALCONFIG) +CVAR (Bool, autoloadbrightmaps, false, CVAR_ARCHIVE | CVAR_NOINITCALL | CVAR_GLOBALCONFIG) +CVAR (Bool, autoloadlights, false, CVAR_ARCHIVE | CVAR_NOINITCALL | CVAR_GLOBALCONFIG) bool wantToRestart; bool DrawFSHUD; // [RH] Draw fullscreen HUD? @@ -2040,6 +2043,23 @@ static void AddAutoloadFiles(const char *autoname) { LumpFilterIWAD.Format("%s.", autoname); // The '.' is appened to simplify parsing the string + // [SP] Dialog reaction - load lights.pk3 and brightmaps.pk3 based on user choices + if (!(gameinfo.flags & GI_SHAREWARE)) + { + if (autoloadlights) + { + const char *lightswad = BaseFileSearch ("lights.pk3", NULL); + if (lightswad) + D_AddFile (allwads, lightswad); + } + if (autoloadbrightmaps) + { + const char *bmwad = BaseFileSearch ("brightmaps.pk3", NULL); + if (bmwad) + D_AddFile (allwads, bmwad); + } + } + if (!(gameinfo.flags & GI_SHAREWARE) && !Args->CheckParm("-noautoload") && !disableautoload) { FString file; diff --git a/src/win32/i_system.cpp b/src/win32/i_system.cpp index 2dc6c4d3dd..f07fb8f1d3 100644 --- a/src/win32/i_system.cpp +++ b/src/win32/i_system.cpp @@ -141,7 +141,9 @@ EXTERN_CVAR (Bool, queryiwad); // Used on welcome/IWAD screen. EXTERN_CVAR (Int, vid_renderer) EXTERN_CVAR (Bool, fullscreen) -extern bool disableautoload; +EXTERN_CVAR (Bool, disableautoload) +EXTERN_CVAR (Bool, autoloadlights) +EXTERN_CVAR (Bool, autoloadbrightmaps) extern HWND Window, ConWindow, GameTitleWindow; extern HANDLE StdOut; @@ -1181,7 +1183,11 @@ BOOL CALLBACK IWADBoxCallback(HWND hDlg, UINT message, WPARAM wParam, LPARAM lPa // Check the current video settings. SendDlgItemMessage( hDlg, vid_renderer ? IDC_WELCOME_OPENGL : IDC_WELCOME_SOFTWARE, BM_SETCHECK, BST_CHECKED, 0 ); SendDlgItemMessage( hDlg, IDC_WELCOME_FULLSCREEN, BM_SETCHECK, fullscreen ? BST_CHECKED : BST_UNCHECKED, 0 ); + + // [SP] This is our's SendDlgItemMessage( hDlg, IDC_WELCOME_NOAUTOLOAD, BM_SETCHECK, disableautoload ? BST_CHECKED : BST_UNCHECKED, 0 ); + SendDlgItemMessage( hDlg, IDC_WELCOME_LIGHTS, BM_SETCHECK, autoloadlights ? BST_CHECKED : BST_UNCHECKED, 0 ); + SendDlgItemMessage( hDlg, IDC_WELCOME_BRIGHTMAPS, BM_SETCHECK, autoloadbrightmaps ? BST_CHECKED : BST_UNCHECKED, 0 ); // Set up our version string. sprintf(szString, "Version %s.", GetVersionString()); @@ -1226,7 +1232,8 @@ BOOL CALLBACK IWADBoxCallback(HWND hDlg, UINT message, WPARAM wParam, LPARAM lPa // [SP] This is our's. disableautoload = SendDlgItemMessage( hDlg, IDC_WELCOME_NOAUTOLOAD, BM_GETCHECK, 0, 0 ) == BST_CHECKED; - + autoloadlights = SendDlgItemMessage( hDlg, IDC_WELCOME_LIGHTS, BM_GETCHECK, 0, 0 ) == BST_CHECKED; + autoloadbrightmaps = SendDlgItemMessage( hDlg, IDC_WELCOME_BRIGHTMAPS, BM_GETCHECK, 0, 0 ) == BST_CHECKED; ctrl = GetDlgItem (hDlg, IDC_IWADLIST); EndDialog(hDlg, SendMessage (ctrl, LB_GETCURSEL, 0, 0)); } diff --git a/src/win32/resource.h b/src/win32/resource.h index 937a736586..cf41b6f1f5 100644 --- a/src/win32/resource.h +++ b/src/win32/resource.h @@ -158,7 +158,7 @@ // #ifdef APSTUDIO_INVOKED #ifndef APSTUDIO_READONLY_SYMBOLS -#define _APS_NEXT_RESOURCE_VALUE 150 +#define _APS_NEXT_RESOURCE_VALUE 151 #define _APS_NEXT_COMMAND_VALUE 40001 #define _APS_NEXT_CONTROL_VALUE 1084 #define _APS_NEXT_SYMED_VALUE 101 @@ -170,7 +170,8 @@ #define IDC_WELCOME_OPENGL 4020 #define IDC_WELCOME_SOFTWARE 4021 #define IDC_WELCOME_FULLSCREEN 4022 -#define IDI_ICONST 151 // [SP] This is our's. #define IDC_WELCOME_NOAUTOLOAD 4023 +#define IDC_WELCOME_LIGHTS 4024 +#define IDC_WELCOME_BRIGHTMAPS 4025 diff --git a/src/win32/zdoom.rc b/src/win32/zdoom.rc index c5c7bd2599..99422e4ad9 100644 --- a/src/win32/zdoom.rc +++ b/src/win32/zdoom.rc @@ -120,9 +120,9 @@ BEGIN IDD_IWADDIALOG, DIALOG BEGIN LEFTMARGIN, 5 - RIGHTMARGIN, 205 + RIGHTMARGIN, 222 TOPMARGIN, 7 - BOTTOMMARGIN, 179 + BOTTOMMARGIN, 242 END IDD_EAXPROPERTYLIST, DIALOG @@ -237,33 +237,28 @@ END */ // [SP] Upstreamed from Zandronum IDD_IWADDIALOG DIALOGEX 0, 0, 224, 246 -STYLE DS_MODALFRAME | DS_FIXEDSYS | DS_CENTER | WS_POPUP | WS_CAPTION | - WS_SYSMENU +STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | DS_CENTER | WS_POPUP | WS_CAPTION | WS_SYSMENU EXSTYLE WS_EX_APPWINDOW CAPTION "Welcome" FONT 8, "MS Shell Dlg", 0, 0, 0x1 BEGIN - ICON IDI_ICON1,IDC_STATIC,7,7,32,32 - LTEXT "Welcome to QZDoom!",IDC_STATIC,42,8,180,8 - LTEXT "",IDC_WELCOME_VERSION,42,18,180,8 - GROUPBOX "IWAD selection",IDC_STATIC,8,32,224-16,102 - LTEXT "Select which game file (IWAD) to run.", IDC_STATIC,12,32+12,190,8 - LISTBOX IDC_IWADLIST,12,32+24,224-24,72,LBS_NOINTEGRALHEIGHT | - WS_VSCROLL | WS_TABSTOP - GROUPBOX "Video settings",IDC_STATIC,8,138,224-16,48 - LTEXT "Choose how QZDoom will render the game.", IDC_STATIC,12,148,190,8 - CONTROL "Hardware (OpenGL)",IDC_WELCOME_OPENGL,"Button", - BS_AUTORADIOBUTTON,12,170,93,10 - CONTROL "Software (Doom)",IDC_WELCOME_SOFTWARE,"Button", - BS_AUTORADIOBUTTON,12,160,93,10 - CONTROL "Fullscreen",IDC_WELCOME_FULLSCREEN,"Button", - BS_AUTOCHECKBOX | WS_TABSTOP, 124,160,48,10 - CONTROL "Disable autoload",IDC_WELCOME_NOAUTOLOAD,"Button", - BS_AUTOCHECKBOX | WS_TABSTOP,72,192,87,10 - CONTROL "Don't ask me this again",IDC_DONTASKIWAD,"Button", - BS_AUTOCHECKBOX | WS_TABSTOP,72,202,87,10 - DEFPUSHBUTTON "Play QZDoom",IDOK,8,246-18,90,14 - PUSHBUTTON "Exit",IDCANCEL,224-58,246-18,50,14 + ICON IDI_ICON1,IDC_STATIC,7,7,21,20 + LTEXT "Welcome to QZDoom!",IDC_STATIC,42,8,180,8 + LTEXT "",IDC_WELCOME_VERSION,42,18,180,8 + GROUPBOX "IWAD selection",IDC_STATIC,8,32,208,102 + LTEXT "Select which game file (IWAD) to run.",IDC_STATIC,12,44,190,8 + LISTBOX IDC_IWADLIST,12,56,200,72,LBS_NOINTEGRALHEIGHT | WS_VSCROLL | WS_TABSTOP + GROUPBOX "Video settings",IDC_STATIC,8,138,208,48 + LTEXT "Choose how QZDoom will render the game.",IDC_STATIC,12,148,190,8 + CONTROL "Hardware (OpenGL)",IDC_WELCOME_OPENGL,"Button",BS_AUTORADIOBUTTON,12,170,93,10 + CONTROL "Software (Doom)",IDC_WELCOME_SOFTWARE,"Button",BS_AUTORADIOBUTTON,12,160,93,10 + CONTROL "Fullscreen",IDC_WELCOME_FULLSCREEN,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,124,160,48,10 + CONTROL "Disable autoload",IDC_WELCOME_NOAUTOLOAD,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,148,39,65,10 + CONTROL "Lights.pk3",IDC_WELCOME_LIGHTS,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,46,192,51,10 + CONTROL "Brightmaps.pk3",IDC_WELCOME_BRIGHTMAPS,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,111,192,65,10 + CONTROL "Don't ask me this again",IDC_DONTASKIWAD,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,73,211,87,10 + DEFPUSHBUTTON "Play QZDoom",IDOK,8,228,90,14 + PUSHBUTTON "Exit",IDCANCEL,166,228,50,14 END IDD_EAXPROPERTYLIST DIALOGEX 0, 0, 265, 404 From 62716c3d917dafd587568da4c507f04864b7b7c5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 21 Dec 2016 18:38:01 +0100 Subject: [PATCH 528/912] Fix drawing dormant lights --- src/r_walldraw.cpp | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/src/r_walldraw.cpp b/src/r_walldraw.cpp index ad82f54dcb..8645477a37 100644 --- a/src/r_walldraw.cpp +++ b/src/r_walldraw.cpp @@ -561,24 +561,27 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1 FLightNode *cur_node = dc_light_list; while (cur_node && nextlightindex < 64 * 1024) { - uint32_t red = cur_node->lightsource->GetRed(); - uint32_t green = cur_node->lightsource->GetGreen(); - uint32_t blue = cur_node->lightsource->GetBlue(); + if (!(cur_node->lightsource->flags2&MF2_DORMANT)) + { + uint32_t red = cur_node->lightsource->GetRed(); + uint32_t green = cur_node->lightsource->GetGreen(); + uint32_t blue = cur_node->lightsource->GetBlue(); - double lightX = cur_node->lightsource->X() - ViewPos.X; - double lightY = cur_node->lightsource->Y() - ViewPos.Y; - double lightZ = cur_node->lightsource->Z() - ViewPos.Z; + double lightX = cur_node->lightsource->X() - ViewPos.X; + double lightY = cur_node->lightsource->Y() - ViewPos.Y; + double lightZ = cur_node->lightsource->Z() - ViewPos.Z; - nextlightindex++; - auto &light = dc_lights[dc_num_lights++]; - light.x = (float)(lightX * ViewSin - lightY * ViewCos) - dc_viewpos.X; - light.y = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; - light.z = (float)lightZ; - light.radius = 256.0f / cur_node->lightsource->GetRadius(); - light.color = 0xff000000 | (red << 16) | (green << 8) | blue; + nextlightindex++; + auto &light = dc_lights[dc_num_lights++]; + light.x = (float)(lightX * ViewSin - lightY * ViewCos) - dc_viewpos.X; + light.y = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; + light.z = (float)lightZ; + light.radius = 256.0f / cur_node->lightsource->GetRadius(); + light.color = 0xff000000 | (red << 16) | (green << 8) | blue; - // Precalculate the constant part of the dot here so the drawer doesn't have to. - light.x = light.x * light.x + light.y * light.y; + // Precalculate the constant part of the dot here so the drawer doesn't have to. + light.x = light.x * light.x + light.y * light.y; + } cur_node = cur_node->nextLight; } From 6fe124d9372844871300c9dbddac82c4bfe56e3d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 21 Dec 2016 18:48:26 +0100 Subject: [PATCH 529/912] Fixed TriLight struct declaration bug --- tools/drawergen/llvmdrawers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index 706cfb79f2..f8c5da8751 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -338,7 +338,7 @@ llvm::Type *LLVMDrawers::GetTriLightStruct(llvm::LLVMContext &context) std::vector elements; elements.push_back(llvm::Type::getInt32Ty(context)); - for (int i = 0; i < 4 + TriVertex::NumVarying; i++) + for (int i = 0; i < 4; i++) elements.push_back(llvm::Type::getFloatTy(context)); TriLightStruct = llvm::StructType::create(context, elements, "TriLight", false)->getPointerTo(); return TriLightStruct; From 4551052174bbfcf9aca5690eee7ed93547d31057 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 22 Dec 2016 03:02:58 +0100 Subject: [PATCH 530/912] Fix dynamic light calculations to be unaffected by wall clipping --- src/r_walldraw.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/r_walldraw.cpp b/src/r_walldraw.cpp index 8645477a37..ad73c1fa58 100644 --- a/src/r_walldraw.cpp +++ b/src/r_walldraw.cpp @@ -545,10 +545,8 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1 float t = (x - WallC.sx1 + 0.5f) / (WallC.sx2 - WallC.sx1); float wcol = w1 * (1.0f - t) + w2 * t; float zcol = 1.0f / wcol; - float xcol = (WallC.tleft.X * w1 * (1.0f - t) + WallC.tright.X * w2 * t) * zcol; - float ycol = (WallC.tleft.Y * w1 * (1.0f - t) + WallC.tright.Y * w2 * t) * zcol; - dc_viewpos.X = xcol; - dc_viewpos.Y = ycol; + dc_viewpos.X = (float)((x + 0.5 - CenterX) / CenterX * zcol); + dc_viewpos.Y = zcol; dc_viewpos.Z = (float)((CenterY - y1 - 0.5) / InvZtoScale * zcol); dc_viewpos_step.Z = (float)(-zcol / InvZtoScale); From 5cfe0cc955f0d251b1c0abbdb436761f00180daf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 22 Dec 2016 05:20:53 +0100 Subject: [PATCH 531/912] Remove some whitespace --- src/r_3dfloors.cpp | 2 +- src/r_bsp.cpp | 2 +- src/r_bsp.h | 2 +- src/r_draw_rgba.cpp | 2 +- src/r_draw_rgba.h | 2 +- src/r_drawt_rgba.cpp | 2 +- src/r_main.cpp | 2 +- src/r_main.h | 2 +- src/r_plane.cpp | 2 +- src/r_plane.h | 2 +- src/r_segs.h | 2 +- src/r_swrenderer.cpp | 2 +- src/r_things.h | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/r_3dfloors.cpp b/src/r_3dfloors.cpp index 4289b78433..87c8af618e 100644 --- a/src/r_3dfloors.cpp +++ b/src/r_3dfloors.cpp @@ -17,7 +17,7 @@ CVAR(Int, r_3dfloors, true, 0); -namespace swrenderer +namespace swrenderer { // external variables diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 9918e62a96..fef7271e10 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -55,7 +55,7 @@ CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs? EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); -namespace swrenderer +namespace swrenderer { using namespace drawerargs; diff --git a/src/r_bsp.h b/src/r_bsp.h index e4b518e75d..e4d70c4cf1 100644 --- a/src/r_bsp.h +++ b/src/r_bsp.h @@ -29,7 +29,7 @@ EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs? -namespace swrenderer +namespace swrenderer { // The 3072 below is just an arbitrary value picked to avoid diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2928ac73e0..9f8a0c312b 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,7 +58,7 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Level of detail texture bias CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG when a good default has been decided -namespace swrenderer +namespace swrenderer { extern "C" short spanend[MAXHEIGHT]; extern float rw_light; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 0790740689..afb3bc22d6 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -36,7 +36,7 @@ struct FSpecialColormap; EXTERN_CVAR(Bool, r_mipmap) EXTERN_CVAR(Float, r_lod_bias) -namespace swrenderer +namespace swrenderer { // Give the compiler a strong hint we want these functions inlined: #ifndef FORCEINLINE diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index d5aeed8a91..5d3064e5d7 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -31,7 +31,7 @@ #include "r_draw_rgba.h" #include "r_drawers.h" -namespace swrenderer +namespace swrenderer { WorkerThreadData DrawColumnRt1LLVMCommand::ThreadData(DrawerThread *thread) { diff --git a/src/r_main.cpp b/src/r_main.cpp index 5242f369fe..17b8e682f5 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -87,7 +87,7 @@ extern cycle_t FrameCycles; extern bool r_showviewer; -namespace swrenderer +namespace swrenderer { // MACROS ------------------------------------------------------------------ diff --git a/src/r_main.h b/src/r_main.h index c73b59d916..37ead76ce3 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -35,7 +35,7 @@ extern int viewwindowy; typedef BYTE lighttable_t; // This could be wider for >8 bit display. -namespace swrenderer +namespace swrenderer { // diff --git a/src/r_plane.cpp b/src/r_plane.cpp index d897ff6764..ef9a73540f 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -70,7 +70,7 @@ CVAR(Bool, r_skyboxes, true, 0) EXTERN_CVAR(Int, r_skymode) -namespace swrenderer +namespace swrenderer { using namespace drawerargs; diff --git a/src/r_plane.h b/src/r_plane.h index 3a067b527e..0e133a7cd2 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -27,7 +27,7 @@ class ASkyViewpoint; -namespace swrenderer +namespace swrenderer { // diff --git a/src/r_segs.h b/src/r_segs.h index 6dc08d52c7..8f552daeee 100644 --- a/src/r_segs.h +++ b/src/r_segs.h @@ -23,7 +23,7 @@ #ifndef __R_SEGS_H__ #define __R_SEGS_H__ -namespace swrenderer +namespace swrenderer { struct drawseg_t; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 5d94e04421..77cbb28eb7 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -67,7 +67,7 @@ CUSTOM_CVAR(Bool, r_polyrenderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOI } } -namespace swrenderer +namespace swrenderer { void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); diff --git a/src/r_things.h b/src/r_things.h index f777488a5f..c1ba2bc40a 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -28,7 +28,7 @@ struct particle_t; struct FVoxel; -namespace swrenderer +namespace swrenderer { // A vissprite_t is a thing From fcbacf8cacefa5f4f5464bb295abbeda521abdd6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 22 Dec 2016 07:06:18 +0100 Subject: [PATCH 532/912] Plane dynlight setup --- src/r_bsp.cpp | 12 +++++++ src/r_draw.cpp | 1 + src/r_draw.h | 2 ++ src/r_plane.cpp | 94 +++++++++++++++++++++++++++++++++++++++++++++++++ src/r_plane.h | 11 ++++++ 5 files changed, 120 insertions(+) diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index fef7271e10..d8a6c2c01a 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -1129,6 +1129,9 @@ void R_Subsector (subsector_t *sub) portal ) : NULL; + if (ceilingplane) + R_AddPlaneLights(ceilingplane, frontsector->lighthead); + if (fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) { light = P_GetPlaneLight(frontsector, &frontsector->floorplane, false); @@ -1166,6 +1169,9 @@ void R_Subsector (subsector_t *sub) portal ) : NULL; + if (floorplane) + R_AddPlaneLights(floorplane, frontsector->lighthead); + // kg3D - fake planes rendering if (r_3dfloors && frontsector->e && frontsector->e->XFloor.ffloors.Size()) { @@ -1223,6 +1229,9 @@ void R_Subsector (subsector_t *sub) frontsector->sky, NULL); + if (floorplane) + R_AddPlaneLights(floorplane, frontsector->lighthead); + R_FakeDrawLoop(sub); fake3D = 0; frontsector = sub->sector; @@ -1284,6 +1293,9 @@ void R_Subsector (subsector_t *sub) frontsector->sky, NULL); + if (ceilingplane) + R_AddPlaneLights(ceilingplane, frontsector->lighthead); + R_FakeDrawLoop(sub); fake3D = 0; frontsector = sub->sector; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index f476e95387..2968ed3745 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -63,6 +63,7 @@ namespace swrenderer double dc_texturemid; FLightNode *dc_light_list; + visplane_light *ds_light_list; int ylookup[MAXHEIGHT]; uint8_t shadetables[NUMCOLORMAPS * 16 * 256]; diff --git a/src/r_draw.h b/src/r_draw.h index a337b49437..8b5789b555 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -20,6 +20,7 @@ EXTERN_CVAR(Int, r_columnmethod); namespace swrenderer { struct vissprite_t; + struct visplane_light; struct ShadeConstants { @@ -37,6 +38,7 @@ namespace swrenderer extern double dc_texturemid; extern FLightNode *dc_light_list; + extern visplane_light *ds_light_list; namespace drawerargs { diff --git a/src/r_plane.cpp b/src/r_plane.cpp index ef9a73540f..bb42cadef0 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -59,6 +59,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_draw_rgba.h" +#include "gl/dynlights/gl_dynlight.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -255,6 +256,50 @@ void R_MapPlane (int y, int x1) R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); } + if (r_swtruecolor) + { + // Find row position in view space + float zspan = (float)((CenterY - y - 0.5) * InvZtoScale / planeheight); + dc_viewpos.X = (float)((x1 + 0.5 - CenterX) / CenterX * zspan); + dc_viewpos.Y = zspan; + dc_viewpos.Z = (float)planeheight; + dc_viewpos_step.X = (float)(-zspan / InvZtoScale); + + static TriLight lightbuffer[64 * 1024]; + static int nextlightindex = 0; + + // Setup lights for column + dc_num_lights = 0; + dc_lights = lightbuffer + nextlightindex; + visplane_light *cur_node = ds_light_list; + while (cur_node && nextlightindex < 64 * 1024) + { + uint32_t red = cur_node->lightsource->GetRed(); + uint32_t green = cur_node->lightsource->GetGreen(); + uint32_t blue = cur_node->lightsource->GetBlue(); + + double lightX = cur_node->lightsource->X() - ViewPos.X; + double lightY = cur_node->lightsource->Y() - ViewPos.Y; + double lightZ = cur_node->lightsource->Z() - ViewPos.Z; + + nextlightindex++; + auto &light = dc_lights[dc_num_lights++]; + light.x = (float)(lightX * ViewSin - lightY * ViewCos); + light.y = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; + light.z = (float)(lightZ - dc_viewpos.Z); + light.radius = 256.0f / cur_node->lightsource->GetRadius(); + light.color = 0xff000000 | (red << 16) | (green << 8) | blue; + + // Precalculate the constant part of the dot here so the drawer doesn't have to. + light.y = light.y * light.y + light.z * light.z; + + cur_node = cur_node->next; + } + + if (nextlightindex == 64 * 1024) + nextlightindex = 0; + } + ds_y = y; ds_x1 = x1; ds_x2 = x2; @@ -284,6 +329,47 @@ void R_MapColoredPlane(int y, int x1) R_DrawColoredSpan(y, x1, spanend[y]); } +//========================================================================== + +namespace +{ + enum { max_plane_lights = 32 * 1024 }; + visplane_light plane_lights[max_plane_lights]; + int next_plane_light = 0; +} + +void R_AddPlaneLights(visplane_t *plane, FLightNode *node) +{ + while (node) + { + if (!(node->lightsource->flags2&MF2_DORMANT)) + { + bool found = false; + visplane_light *light_node = plane->lights; + while (light_node) + { + if (light_node->lightsource == node->lightsource) + { + found = true; + break; + } + light_node = light_node->next; + } + if (!found) + { + if (next_plane_light == max_plane_lights) + return; + + visplane_light *newlight = &plane_lights[next_plane_light++]; + newlight->next = plane->lights; + newlight->lightsource = node->lightsource; + plane->lights = newlight; + } + } + node = node->nextLight; + } +} + //========================================================================== // // R_ClearPlanes @@ -336,6 +422,8 @@ void R_ClearPlanes (bool fullclear) ? (ConBottom - viewwindowy) : 0); lastopening = 0; + + next_plane_light = 0; } } @@ -363,6 +451,8 @@ static visplane_t *new_visplane (unsigned hash) freehead = &freetail; } + check->lights = nullptr; + check->next = visplanes[hash]; visplanes[hash] = check; return check; @@ -1815,6 +1905,8 @@ void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)) int t2 = pl->top[x]; int b2 = pl->bottom[x]; + ds_light_list = pl->lights; + if (b2 > t2) { fillshort (spanend+t2, b2-t2, x); @@ -1861,6 +1953,8 @@ void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)) { mapfunc (--b2, pl->left); } + + ds_light_list = nullptr; } //========================================================================== diff --git a/src/r_plane.h b/src/r_plane.h index 0e133a7cd2..740020209c 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -26,10 +26,18 @@ #include class ASkyViewpoint; +class ADynamicLight; +struct FLightNode; namespace swrenderer { +struct visplane_light +{ + ADynamicLight *lightsource; + visplane_light *next; +}; + // // The infamous visplane // @@ -39,6 +47,7 @@ struct visplane_s FDynamicColormap *colormap; // [RH] Support multiple colormaps FSectorPortal *portal; // [RH] Support sky boxes + visplane_light *lights; FTransform xform; secplane_t height; @@ -89,6 +98,8 @@ void R_InitPlanes (); void R_DeinitPlanes (); void R_ClearPlanes (bool fullclear); +void R_AddPlaneLights(visplane_t *plane, FLightNode *light_head); + int R_DrawPlanes (); void R_DrawPortals (); void R_DrawSkyPlane (visplane_t *pl); From 4f7c1dfdf51e8ca0af6bb8590f688f9b759042e8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 22 Dec 2016 08:42:21 +0100 Subject: [PATCH 533/912] Dynamic lights on flats --- src/r_draw_rgba.cpp | 5 +++ src/r_drawers.h | 4 ++ src/r_plane.cpp | 10 ++--- src/r_walldraw.cpp | 2 +- .../fixedfunction/drawspancodegen.cpp | 44 ++++++++++++++++++- .../drawergen/fixedfunction/drawspancodegen.h | 9 +++- tools/drawergen/llvmdrawers.cpp | 4 ++ 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9f8a0c312b..a9fc5c80ea 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -100,6 +100,11 @@ namespace swrenderer args.flags |= DrawSpanArgs::simple_shade; if (!sampler_setup(args.source, args.xbits, args.ybits, ds_source_mipmapped)) args.flags |= DrawSpanArgs::nearest_filter; + + args.viewpos_x = dc_viewpos.X; + args.step_viewpos_x = dc_viewpos_step.X; + args.dynlights = dc_lights; + args.num_dynlights = dc_num_lights; } void DrawSpanLLVMCommand::Execute(DrawerThread *thread) diff --git a/src/r_drawers.h b/src/r_drawers.h index eff6ea3707..2ff2fd087a 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -139,6 +139,10 @@ struct DrawSpanArgs nearest_filter = 2 }; + float viewpos_x, step_viewpos_x; + TriLight *dynlights; + uint32_t num_dynlights; + FString ToString(); }; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index bb42cadef0..48a5bcb0f3 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -259,11 +259,11 @@ void R_MapPlane (int y, int x1) if (r_swtruecolor) { // Find row position in view space - float zspan = (float)((CenterY - y - 0.5) * InvZtoScale / planeheight); + float zspan = planeheight / (fabs(y + 0.5 - CenterY) / InvZtoScale); dc_viewpos.X = (float)((x1 + 0.5 - CenterX) / CenterX * zspan); dc_viewpos.Y = zspan; - dc_viewpos.Z = (float)planeheight; - dc_viewpos_step.X = (float)(-zspan / InvZtoScale); + dc_viewpos.Z = (float)((CenterY - y - 0.5) / InvZtoScale * zspan); + dc_viewpos_step.X = (float)(zspan / CenterX); static TriLight lightbuffer[64 * 1024]; static int nextlightindex = 0; @@ -286,9 +286,9 @@ void R_MapPlane (int y, int x1) auto &light = dc_lights[dc_num_lights++]; light.x = (float)(lightX * ViewSin - lightY * ViewCos); light.y = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; - light.z = (float)(lightZ - dc_viewpos.Z); + light.z = (float)lightZ - dc_viewpos.Z; light.radius = 256.0f / cur_node->lightsource->GetRadius(); - light.color = 0xff000000 | (red << 16) | (green << 8) | blue; + light.color = (red << 16) | (green << 8) | blue; // Precalculate the constant part of the dot here so the drawer doesn't have to. light.y = light.y * light.y + light.z * light.z; diff --git a/src/r_walldraw.cpp b/src/r_walldraw.cpp index ad73c1fa58..f43b8f9d48 100644 --- a/src/r_walldraw.cpp +++ b/src/r_walldraw.cpp @@ -575,7 +575,7 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1 light.y = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; light.z = (float)lightZ; light.radius = 256.0f / cur_node->lightsource->GetRadius(); - light.color = 0xff000000 | (red << 16) | (green << 8) | blue; + light.color = (red << 16) | (green << 8) | blue; // Precalculate the constant part of the dot here so the drawer doesn't have to. light.x = light.x * light.x + light.y * light.y; diff --git a/tools/drawergen/fixedfunction/drawspancodegen.cpp b/tools/drawergen/fixedfunction/drawspancodegen.cpp index c6aacc75a2..ce6ce9bad2 100644 --- a/tools/drawergen/fixedfunction/drawspancodegen.cpp +++ b/tools/drawergen/fixedfunction/drawspancodegen.cpp @@ -59,6 +59,10 @@ void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args) SSAShort fade_blue = args[0][22].load(true); SSAShort desaturate = args[0][23].load(true); SSAInt flags = args[0][24].load(true); + start_viewpos_x = args[0][25].load(true); + step_viewpos_x = args[0][26].load(true); + dynlights = args[0][27].load(true); + num_dynlights = args[0][28].load(true); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); @@ -115,6 +119,7 @@ SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool { SSAInt sseLength = count / 4; stack_index.store(SSAInt(0)); + stack_viewpos_x.store(start_viewpos_x); { SSAForBlock loop; SSAInt index = stack_index.load(); @@ -136,9 +141,11 @@ SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool { SSAInt xfrac = stack_xfrac.load(); SSAInt yfrac = stack_yfrac.load(); + viewpos_x = stack_viewpos_x.load(); colors[i] = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolors[i], variant); + stack_viewpos_x.store(viewpos_x + step_viewpos_x); stack_xfrac.store(xfrac + xstep); stack_yfrac.store(yfrac + ystep); } @@ -158,6 +165,7 @@ void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleS { SSAForBlock loop; SSAInt index = stack_index.load(); + viewpos_x = stack_viewpos_x.load(); loop.loop_block(index < count); SSAInt xfrac = stack_xfrac.load(); @@ -167,6 +175,7 @@ void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleS SSAVec4i color = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolor, variant); data[index * 4].store_vec4ub(color); + stack_viewpos_x.store(viewpos_x + step_viewpos_x); stack_index.store(index.add(SSAInt(1), true, true)); stack_xfrac.store(xfrac + xstep); stack_yfrac.store(yfrac + ystep); @@ -222,10 +231,41 @@ SSAVec4i DrawSpanCodegen::SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) { + SSAVec4i c; if (isSimpleShade) - return shade_bgra_simple(fg, light); + c = shade_bgra_simple(fg, light); else - return shade_bgra_advanced(fg, light, shade_constants); + c = shade_bgra_advanced(fg, light, shade_constants); + + stack_lit_color.store(c); + stack_light_index.store(SSAInt(0)); + + SSAForBlock block; + SSAInt light_index = stack_light_index.load(); + SSAVec4i lit_color = stack_lit_color.load(); + block.loop_block(light_index < num_dynlights); + { + SSAVec4i light_color = SSAUBytePtr(dynlights[light_index][0].v).load_vec4ub(true); + SSAFloat light_x = dynlights[light_index][1].load(true); + SSAFloat light_y = dynlights[light_index][2].load(true); + //SSAFloat light_z = dynlights[light_index][3].load(true); + SSAFloat light_rcp_radius = dynlights[light_index][4].load(true); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // attenuation = 1 - MIN(dist * (1/radius), 1) + SSAFloat Lyz2 = light_y; // L.y*L.y + L.z*L.z + SSAFloat Lx = light_x - viewpos_x; + SSAFloat dist = SSAFloat::sqrt(Lyz2 + Lx * Lx); + SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true); + SSAVec4i contribution = (light_color * fg * attenuation) >> 16; + + stack_lit_color.store(lit_color + contribution); + stack_light_index.store(light_index + 1); + } + block.end_block(); + + return stack_lit_color.load(); } SSAVec4i DrawSpanCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant) diff --git a/tools/drawergen/fixedfunction/drawspancodegen.h b/tools/drawergen/fixedfunction/drawspancodegen.h index 48c86040bd..9e0c67c412 100644 --- a/tools/drawergen/fixedfunction/drawspancodegen.h +++ b/tools/drawergen/fixedfunction/drawspancodegen.h @@ -49,7 +49,9 @@ private: SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant); - SSAStack stack_index, stack_xfrac, stack_yfrac; + SSAStack stack_index, stack_xfrac, stack_yfrac, stack_light_index; + SSAStack stack_lit_color; + SSAStack stack_viewpos_x; SSAUBytePtr destorg; SSAUBytePtr source; @@ -73,4 +75,9 @@ private: SSABool is_simple_shade; SSABool is_nearest_filter; SSAShadeConstants shade_constants; + + SSAFloat start_viewpos_x, step_viewpos_x; + SSAValue dynlights; // TriLight* + SSAInt num_dynlights; + SSAFloat viewpos_x; }; diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index f8c5da8751..5c3bf05a43 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -265,6 +265,10 @@ llvm::Type *LLVMDrawers::GetDrawSpanArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + elements.push_back(llvm::Type::getFloatTy(context)); // float viewpos_x; + elements.push_back(llvm::Type::getFloatTy(context)); // float step_viewpos_x; + elements.push_back(GetTriLightStruct(context)); // TriLight *dynlights; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t num_dynlights; DrawSpanArgsStruct = llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo(); return DrawSpanArgsStruct; } From 62724c21612f3e4cff271d6cbf2ec0eec81277d0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 22 Dec 2016 08:50:52 +0100 Subject: [PATCH 534/912] Fix light color indexing bug --- tools/drawergen/fixedfunction/drawspancodegen.cpp | 2 +- tools/drawergen/fixedfunction/drawwallcodegen.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawspancodegen.cpp b/tools/drawergen/fixedfunction/drawspancodegen.cpp index ce6ce9bad2..c35198ba81 100644 --- a/tools/drawergen/fixedfunction/drawspancodegen.cpp +++ b/tools/drawergen/fixedfunction/drawspancodegen.cpp @@ -245,7 +245,7 @@ SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) SSAVec4i lit_color = stack_lit_color.load(); block.loop_block(light_index < num_dynlights); { - SSAVec4i light_color = SSAUBytePtr(dynlights[light_index][0].v).load_vec4ub(true); + SSAVec4i light_color = SSAUBytePtr(SSAValue(dynlights[light_index][0]).v).load_vec4ub(true); SSAFloat light_x = dynlights[light_index][1].load(true); SSAFloat light_y = dynlights[light_index][2].load(true); //SSAFloat light_z = dynlights[light_index][3].load(true); diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.cpp b/tools/drawergen/fixedfunction/drawwallcodegen.cpp index 6f546b9b3f..5e7f9000d6 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.cpp +++ b/tools/drawergen/fixedfunction/drawwallcodegen.cpp @@ -233,7 +233,7 @@ SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) SSAVec4i lit_color = stack_lit_color.load(); block.loop_block(light_index < num_dynlights); { - SSAVec4i light_color = SSAUBytePtr(dynlights[light_index][0].v).load_vec4ub(true); + SSAVec4i light_color = SSAUBytePtr(SSAValue(dynlights[light_index][0]).v).load_vec4ub(true); SSAFloat light_x = dynlights[light_index][1].load(true); //SSAFloat light_y = dynlights[light_index][2].load(true); SSAFloat light_z = dynlights[light_index][3].load(true); From ea1aeb3cdc37df0666e727353f21ec5a5e49672d Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Thu, 22 Dec 2016 03:21:56 -0500 Subject: [PATCH 535/912] - reversed sub and revsub in LLVM drawers --- src/r_poly_sprite.cpp | 2 +- tools/drawergen/fixedfunction/drawercodegen.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_poly_sprite.cpp b/src/r_poly_sprite.cpp index 2cb285298c..1821357535 100644 --- a/src/r_poly_sprite.cpp +++ b/src/r_poly_sprite.cpp @@ -185,7 +185,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla { args.uniforms.destalpha = (uint32_t)(1.0 * 256); args.uniforms.srcalpha = (uint32_t)(thing->Alpha * 256); - blendmode = args.translation ? TriBlendMode::TranslateSub : TriBlendMode::Sub; + blendmode = args.translation ? TriBlendMode::TranslateRevSub : TriBlendMode::RevSub; } else if (thing->RenderStyle == LegacyRenderStyles[STYLE_SoulTrans]) { diff --git a/tools/drawergen/fixedfunction/drawercodegen.cpp b/tools/drawergen/fixedfunction/drawercodegen.cpp index ccff40ac04..f7a67e5a62 100644 --- a/tools/drawergen/fixedfunction/drawercodegen.cpp +++ b/tools/drawergen/fixedfunction/drawercodegen.cpp @@ -119,7 +119,7 @@ SSAVec4i DrawerCodegen::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSA SSAInt alpha = fg[3]; alpha = alpha + (alpha >> 7); // 255 -> 256 srcalpha = (alpha * srcalpha + 128) >> 8; - SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; + SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; return color.insert(3, 255); } @@ -128,7 +128,7 @@ SSAVec4i DrawerCodegen::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt alpha = fg[3]; alpha = alpha + (alpha >> 7); // 255 -> 256 srcalpha = (alpha * srcalpha + 128) >> 8; - SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; + SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; return color.insert(3, 255); } From 9a529192b0a8ea98c88a226110c4ed00ac17bd15 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 22 Dec 2016 21:21:57 +0100 Subject: [PATCH 536/912] Switch to the lower precision rsqrtss instruction as it is faster, especially on older Intel architectures and on AMD --- tools/drawergen/fixedfunction/drawspancodegen.cpp | 2 +- tools/drawergen/fixedfunction/drawwallcodegen.cpp | 2 +- tools/drawergen/ssa/ssa_float.cpp | 13 +++++++++++++ tools/drawergen/ssa/ssa_float.h | 2 ++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/drawergen/fixedfunction/drawspancodegen.cpp b/tools/drawergen/fixedfunction/drawspancodegen.cpp index c35198ba81..2272acf8db 100644 --- a/tools/drawergen/fixedfunction/drawspancodegen.cpp +++ b/tools/drawergen/fixedfunction/drawspancodegen.cpp @@ -256,7 +256,7 @@ SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) // attenuation = 1 - MIN(dist * (1/radius), 1) SSAFloat Lyz2 = light_y; // L.y*L.y + L.z*L.z SSAFloat Lx = light_x - viewpos_x; - SSAFloat dist = SSAFloat::sqrt(Lyz2 + Lx * Lx); + SSAFloat dist = SSAFloat::fastsqrt(Lyz2 + Lx * Lx); SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true); SSAVec4i contribution = (light_color * fg * attenuation) >> 16; diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.cpp b/tools/drawergen/fixedfunction/drawwallcodegen.cpp index 5e7f9000d6..898aebdbb3 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.cpp +++ b/tools/drawergen/fixedfunction/drawwallcodegen.cpp @@ -244,7 +244,7 @@ SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) // attenuation = 1 - MIN(dist * (1/radius), 1) SSAFloat Lxy2 = light_x; // L.x*L.x + L.y*L.y SSAFloat Lz = light_z - z; - SSAFloat dist = SSAFloat::sqrt(Lxy2 + Lz * Lz); + SSAFloat dist = SSAFloat::fastsqrt(Lxy2 + Lz * Lz); SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true); SSAVec4i contribution = (light_color * fg * attenuation) >> 16; diff --git a/tools/drawergen/ssa/ssa_float.cpp b/tools/drawergen/ssa/ssa_float.cpp index f587d7b5a8..f537792fc9 100644 --- a/tools/drawergen/ssa/ssa_float.cpp +++ b/tools/drawergen/ssa/ssa_float.cpp @@ -25,6 +25,7 @@ #include "ssa_int.h" #include "ssa_scope.h" #include "ssa_bool.h" +#include "ssa_vec4f.h" SSAFloat::SSAFloat() : v(0) @@ -60,6 +61,18 @@ SSAFloat SSAFloat::sqrt(SSAFloat f) return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint())); } +SSAFloat SSAFloat::fastsqrt(SSAFloat f) +{ + return f * rsqrt(f); +} + +SSAFloat SSAFloat::rsqrt(SSAFloat f) +{ + llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint()); + return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(f_ss, SSAInt(0).v, SSAScope::hint())); +} + SSAFloat SSAFloat::sin(SSAFloat val) { std::vector params; diff --git a/tools/drawergen/ssa/ssa_float.h b/tools/drawergen/ssa/ssa_float.h index 69fb81a759..f7e2b93ef9 100644 --- a/tools/drawergen/ssa/ssa_float.h +++ b/tools/drawergen/ssa/ssa_float.h @@ -37,6 +37,8 @@ public: static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); } static llvm::Type *llvm_type(); static SSAFloat sqrt(SSAFloat f); + static SSAFloat fastsqrt(SSAFloat f); + static SSAFloat rsqrt(SSAFloat f); static SSAFloat sin(SSAFloat val); static SSAFloat cos(SSAFloat val); static SSAFloat pow(SSAFloat val, SSAFloat power); From b0a96af220d3062448a50f1bc6d95ad73e5b0e32 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 22 Dec 2016 21:34:03 +0100 Subject: [PATCH 537/912] Improve dynamic light performance by only including lights if they touch a column or span --- src/r_plane.cpp | 32 ++++++++++++++++++++------------ src/r_walldraw.cpp | 32 ++++++++++++++++++++------------ 2 files changed, 40 insertions(+), 24 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 48a5bcb0f3..f93e0579fb 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -274,24 +274,32 @@ void R_MapPlane (int y, int x1) visplane_light *cur_node = ds_light_list; while (cur_node && nextlightindex < 64 * 1024) { - uint32_t red = cur_node->lightsource->GetRed(); - uint32_t green = cur_node->lightsource->GetGreen(); - uint32_t blue = cur_node->lightsource->GetBlue(); - double lightX = cur_node->lightsource->X() - ViewPos.X; double lightY = cur_node->lightsource->Y() - ViewPos.Y; double lightZ = cur_node->lightsource->Z() - ViewPos.Z; - nextlightindex++; - auto &light = dc_lights[dc_num_lights++]; - light.x = (float)(lightX * ViewSin - lightY * ViewCos); - light.y = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; - light.z = (float)lightZ - dc_viewpos.Z; - light.radius = 256.0f / cur_node->lightsource->GetRadius(); - light.color = (red << 16) | (green << 8) | blue; + float lx = (float)(lightX * ViewSin - lightY * ViewCos); + float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; + float lz = (float)lightZ - dc_viewpos.Z; // Precalculate the constant part of the dot here so the drawer doesn't have to. - light.y = light.y * light.y + light.z * light.z; + float lconstant = ly * ly + lz * lz; + + // Include light only if it touches this row + float radius = cur_node->lightsource->GetRadius(); + if (radius * radius >= lconstant) + { + uint32_t red = cur_node->lightsource->GetRed(); + uint32_t green = cur_node->lightsource->GetGreen(); + uint32_t blue = cur_node->lightsource->GetBlue(); + + nextlightindex++; + auto &light = dc_lights[dc_num_lights++]; + light.x = lx; + light.y = lconstant; + light.radius = 256.0f / radius; + light.color = (red << 16) | (green << 8) | blue; + } cur_node = cur_node->next; } diff --git a/src/r_walldraw.cpp b/src/r_walldraw.cpp index f43b8f9d48..0523d8960f 100644 --- a/src/r_walldraw.cpp +++ b/src/r_walldraw.cpp @@ -561,24 +561,32 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1 { if (!(cur_node->lightsource->flags2&MF2_DORMANT)) { - uint32_t red = cur_node->lightsource->GetRed(); - uint32_t green = cur_node->lightsource->GetGreen(); - uint32_t blue = cur_node->lightsource->GetBlue(); - double lightX = cur_node->lightsource->X() - ViewPos.X; double lightY = cur_node->lightsource->Y() - ViewPos.Y; double lightZ = cur_node->lightsource->Z() - ViewPos.Z; - nextlightindex++; - auto &light = dc_lights[dc_num_lights++]; - light.x = (float)(lightX * ViewSin - lightY * ViewCos) - dc_viewpos.X; - light.y = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; - light.z = (float)lightZ; - light.radius = 256.0f / cur_node->lightsource->GetRadius(); - light.color = (red << 16) | (green << 8) | blue; + float lx = (float)(lightX * ViewSin - lightY * ViewCos) - dc_viewpos.X; + float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; + float lz = (float)lightZ; // Precalculate the constant part of the dot here so the drawer doesn't have to. - light.x = light.x * light.x + light.y * light.y; + float lconstant = lx * lx + ly * ly; + + // Include light only if it touches this column + float radius = cur_node->lightsource->GetRadius(); + if (radius * radius >= lconstant) + { + uint32_t red = cur_node->lightsource->GetRed(); + uint32_t green = cur_node->lightsource->GetGreen(); + uint32_t blue = cur_node->lightsource->GetBlue(); + + nextlightindex++; + auto &light = dc_lights[dc_num_lights++]; + light.x = lconstant; + light.z = lz; + light.radius = 256.0f / cur_node->lightsource->GetRadius(); + light.color = (red << 16) | (green << 8) | blue; + } } cur_node = cur_node->nextLight; From 175701ac5d0df81a9b5560afd30ea5ba4fc96610 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 23 Dec 2016 03:54:57 +0100 Subject: [PATCH 538/912] Fix lights not being copied when a visplane is duplicated --- src/r_plane.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index f93e0579fb..502d8490db 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -684,6 +684,7 @@ visplane_t *R_CheckPlane (visplane_t *pl, int start, int stop) new_pl->CurrentPortalUniq = pl->CurrentPortalUniq; new_pl->MirrorFlags = pl->MirrorFlags; new_pl->CurrentSkybox = pl->CurrentSkybox; + new_pl->lights = pl->lights; pl = new_pl; pl->left = start; pl->right = stop; From 821b10a254867350af85e4d273b50fc920ac4577 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 17 Dec 2016 04:11:52 -0500 Subject: [PATCH 539/912] - Implementing RGB666 colormatching to replace less precise RGB555 in some parts of the code. --- src/f_wipe.cpp | 29 +- src/r_draw.cpp | 12 +- src/r_draw.h | 1 - src/r_draw_pal.cpp | 436 +++++++++++++---------------- src/r_draw_pal.h | 6 + src/r_drawt_pal.cpp | 272 ++++++------------ src/r_plane.cpp | 16 +- src/r_things.cpp | 43 +-- src/r_things.h | 2 +- src/textures/ddstexture.cpp | 8 +- src/textures/jpegtexture.cpp | 4 +- src/textures/multipatchtexture.cpp | 2 +- src/textures/pcxtexture.cpp | 2 +- src/textures/pngtexture.cpp | 6 +- src/textures/tgatexture.cpp | 8 +- src/v_draw.cpp | 33 +-- src/v_video.cpp | 70 ++--- src/v_video.h | 41 +-- 18 files changed, 366 insertions(+), 625 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index a3ceb8d508..8d5f072f62 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -28,6 +28,7 @@ #include "f_wipe.h" #include "c_cvars.h" #include "templates.h" +#include "v_palette.h" // // SCREEN WIPE PACKAGE @@ -299,12 +300,15 @@ bool wipe_doBurn (int ticks) else { int bglevel = 64-fglevel; - DWORD *fg2rgb = Col2RGB8[fglevel]; - DWORD *bg2rgb = Col2RGB8[bglevel]; - DWORD fg = fg2rgb[fromnew[x]]; - DWORD bg = bg2rgb[fromold[x]]; - fg = (fg+bg) | 0x1f07c1f; - to[x] = RGB32k.All[fg & (fg>>15)]; + + const PalEntry* pal = GPalette.BaseColors; + + DWORD fg = fromnew[x]; + DWORD bg = fromold[x]; + int r = MIN((pal[fg].r * fglevel + pal[bg].r * bglevel) >> 8, 63); + int g = MIN((pal[fg].g * fglevel + pal[bg].g * bglevel) >> 8, 63); + int b = MIN((pal[fg].b * fglevel + pal[bg].b * bglevel) >> 8, 63); + to[x] = RGB256k.RGB[r][g][b]; done = false; } } @@ -342,20 +346,21 @@ bool wipe_doFade (int ticks) { int x, y; int bglevel = 64 - fade; - DWORD *fg2rgb = Col2RGB8[fade]; - DWORD *bg2rgb = Col2RGB8[bglevel]; BYTE *fromnew = (BYTE *)wipe_scr_end; BYTE *fromold = (BYTE *)wipe_scr_start; BYTE *to = screen->GetBuffer(); + const PalEntry *pal = GPalette.BaseColors; for (y = 0; y < SCREENHEIGHT; y++) { for (x = 0; x < SCREENWIDTH; x++) { - DWORD fg = fg2rgb[fromnew[x]]; - DWORD bg = bg2rgb[fromold[x]]; - fg = (fg+bg) | 0x1f07c1f; - to[x] = RGB32k.All[fg & (fg>>15)]; + DWORD fg = fromnew[x]; + DWORD bg = fromold[x]; + int r = MIN((pal[fg].r * (64-bglevel) + pal[bg].r * bglevel) >> 8, 63); + int g = MIN((pal[fg].g * (64-bglevel) + pal[bg].g * bglevel) >> 8, 63); + int b = MIN((pal[fg].b * (64-bglevel) + pal[bg].b * bglevel) >> 8, 63); + to[x] = RGB256k.RGB[r][g][b]; } fromnew += SCREENWIDTH; fromold += SCREENWIDTH; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 25297f6c84..20127f50d3 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -238,22 +238,16 @@ namespace swrenderer } if (flags & STYLEF_InvertSource) { - dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; dc_srcalpha = fglevel; dc_destalpha = bglevel; } else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) { - dc_srcblend = Col2RGB8[fglevel >> 10]; - dc_destblend = Col2RGB8[bglevel >> 10]; dc_srcalpha = fglevel; dc_destalpha = bglevel; } else { - dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; dc_srcalpha = fglevel; dc_destalpha = bglevel; } @@ -450,7 +444,7 @@ namespace swrenderer uint32_t g = GPART(color); uint32_t b = BPART(color); // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. - dc_color = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + dc_color = RGB256k.RGB[r >> 2][g >> 2][b >> 2]; if (style.Flags & STYLEF_InvertSource) { r = 255 - r; @@ -1362,8 +1356,4 @@ namespace swrenderer } } - void R_DrawParticle(vissprite_t *sprite) - { - R_DrawParticle_C(sprite); - } } diff --git a/src/r_draw.h b/src/r_draw.h index ab430d2af5..d9ad7a2da1 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -203,5 +203,4 @@ namespace swrenderer void R_MapTiltedPlane(int y, int x1); void R_MapColoredPlane(int y, int x1); - void R_DrawParticle(vissprite_t *); } diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index cfb55a6706..fe6c63b6de 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -4,6 +4,7 @@ **--------------------------------------------------------------------------- ** Copyright 1998-2016 Randy Heit ** Copyright 2016 Magnus Norddahl +** Copyright 2016 Rachael Alexanderson ** All rights reserved. ** ** Redistribution and use in source and binary forms, with or without @@ -314,10 +315,10 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = fg2rgb[colormap[pix]]; - uint32_t bg = bg2rgb[*dest]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; + uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); + uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); + uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; } frac += fracstep; dest += pitch; @@ -357,10 +358,10 @@ namespace swrenderer uint8_t pix = _bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = fg2rgb[_palookupoffse[i][pix]]; - uint32_t bg = bg2rgb[dest[i]]; - fg = (fg + bg) | 0x1f07c1f; - dest[i] = RGB32k.All[fg & (fg >> 15)]; + uint32_t r = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); + uint32_t g = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); + uint32_t b = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); + dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; } vplce[i] += vince[i]; } @@ -396,15 +397,10 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); + uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); + uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; } frac += fracstep; dest += pitch; @@ -444,15 +440,10 @@ namespace swrenderer uint8_t pix = _bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t a = fg2rgb[_palookupoffse[i][pix]] + bg2rgb[dest[i]]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[i] = RGB32k.All[a & (a >> 15)]; + uint32_t r = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); + uint32_t g = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); + uint32_t b = MIN(GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); + dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; } vplce[i] += vince[i]; } @@ -488,14 +479,10 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; + int r = clamp(-GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 0, 255); + int g = clamp(-GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 0, 255); + int b = clamp(-GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 0, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; } frac += fracstep; dest += pitch; @@ -535,14 +522,10 @@ namespace swrenderer uint8_t pix = _bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t a = (fg2rgb[_palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a >> 15)]; + int r = clamp(-GPalette.BaseColors[_palookupoffse[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255); + int g = clamp(-GPalette.BaseColors[_palookupoffse[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255); + int b = clamp(-GPalette.BaseColors[_palookupoffse[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255); + dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; } vplce[i] += vince[i]; } @@ -578,14 +561,10 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; + int r = clamp(GPalette.BaseColors[colormap[pix]].r - GPalette.BaseColors[*dest].r, 0, 255); + int g = clamp(GPalette.BaseColors[colormap[pix]].g - GPalette.BaseColors[*dest].g, 0, 255); + int b = clamp(GPalette.BaseColors[colormap[pix]].b - GPalette.BaseColors[*dest].b, 0, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; } frac += fracstep; dest += pitch; @@ -625,14 +604,10 @@ namespace swrenderer uint8_t pix = _bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[_palookupoffse[i][pix]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a >> 15)]; + uint32_t r = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].r - GPalette.BaseColors[dest[i]].r, 0, 255); + uint32_t g = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].g - GPalette.BaseColors[dest[i]].g, 0, 255); + uint32_t b = clamp(GPalette.BaseColors[_palookupoffse[i][pix]].b - GPalette.BaseColors[dest[i]].b, 0, 255); + dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; } vplce[i] += vince[i]; } @@ -716,7 +691,7 @@ namespace swrenderer c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + *dest = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; } frac += fracstep; @@ -744,8 +719,8 @@ namespace swrenderer int solid_bottom_r = RPART(solid_bottom); int solid_bottom_g = GPART(solid_bottom); int solid_bottom_b = BPART(solid_bottom); - uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; - uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + uint32_t solid_top_fill = RGB256k.RGB[(solid_top_r >> 2)][(solid_top_g >> 2)][(solid_top_b >> 2)]; + uint32_t solid_bottom_fill = RGB256k.RGB[(solid_bottom_r >> 2)][(solid_bottom_g >> 2)][(solid_bottom_b >> 2)]; solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; @@ -805,8 +780,7 @@ namespace swrenderer c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - + output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; frac[col] += fracstep[col]; } *((uint32_t*)dest) = *((uint32_t*)output); @@ -847,7 +821,7 @@ namespace swrenderer c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; frac[col] += fracstep[col]; } @@ -929,7 +903,7 @@ namespace swrenderer c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + *dest = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; } frac += fracstep; @@ -959,8 +933,8 @@ namespace swrenderer int solid_bottom_r = RPART(solid_bottom); int solid_bottom_g = GPART(solid_bottom); int solid_bottom_b = BPART(solid_bottom); - uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; - uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + uint32_t solid_top_fill = RGB256k.RGB[(solid_top_r >> 2)][(solid_top_g >> 2)][(solid_top_b >> 2)]; + uint32_t solid_bottom_fill = RGB256k.RGB[(solid_bottom_r >> 2)][(solid_bottom_g >> 2)][(solid_bottom_b >> 2)]; solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; @@ -1026,7 +1000,7 @@ namespace swrenderer c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; frac[col] += fracstep[col]; } @@ -1080,7 +1054,7 @@ namespace swrenderer c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; frac[col] += fracstep[col]; } @@ -1116,6 +1090,8 @@ namespace swrenderer _srcblend = dc_srcblend; _destblend = dc_destblend; _srccolor = dc_srccolor; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void DrawColumnPalCommand::Execute(DrawerThread *thread) @@ -1210,11 +1186,17 @@ namespace swrenderer dest = thread->dest_for_thread(_dest_y, pitch, dest); pitch *= thread->num_cores; + const PalEntry* pal = GPalette.BaseColors; + do { - uint32_t bg; - bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg >> 15)]; + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); + int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); + int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; } while (--count); @@ -1242,17 +1224,17 @@ namespace swrenderer dest = thread->dest_for_thread(_dest_y, pitch, dest); pitch *= thread->num_cores; + const PalEntry* pal = GPalette.BaseColors; + do { - uint32_t a = fg + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); + int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); + int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; } while (--count); } @@ -1265,11 +1247,7 @@ namespace swrenderer count = _count; dest = _dest; - uint32_t *bg2rgb; - uint32_t fg; - bg2rgb = _destblend; - fg = _srccolor | 0x40100400; int pitch = _pitch; count = thread->count_for_thread(_dest_y, count); @@ -1279,16 +1257,19 @@ namespace swrenderer dest = thread->dest_for_thread(_dest_y, pitch, dest); pitch *= thread->num_cores; + const PalEntry* palette = GPalette.BaseColors; + do { - uint32_t a = fg - bg2rgb[*dest]; - uint32_t b = a; + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int bg = *dest; + int r = MAX((src_r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((src_g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((src_b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; + *dest = RGB256k.RGB[r][g][b]; dest += pitch; } while (--count); } @@ -1303,11 +1284,7 @@ namespace swrenderer return; dest = _dest; - uint32_t *bg2rgb; - uint32_t fg; - bg2rgb = _destblend; - fg = _srccolor; int pitch = _pitch; count = thread->count_for_thread(_dest_y, count); @@ -1317,16 +1294,19 @@ namespace swrenderer dest = thread->dest_for_thread(_dest_y, pitch, dest); pitch *= thread->num_cores; + const PalEntry *palette = GPalette.BaseColors; + do { - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg; - uint32_t b = a; + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int bg = *dest; + int r = MAX((src_r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((src_g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((src_b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; + *dest = RGB256k.RGB[r][g][b]; dest += pitch; } while (--count); } @@ -1354,20 +1334,18 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; const uint8_t *colormap = _colormap; const uint8_t *source = _source; + const PalEntry *palette = GPalette.BaseColors; do { uint32_t fg = colormap[source[frac >> FRACBITS]]; uint32_t bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; + uint32_t r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + uint32_t g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + uint32_t b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1434,21 +1412,20 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; const uint8_t *translation = _translation; const uint8_t *colormap = _colormap; const uint8_t *source = _source; + const PalEntry *palette = GPalette.BaseColors; + do { uint32_t fg = colormap[translation[source[frac >> FRACBITS]]]; uint32_t bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; + uint32_t r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + uint32_t g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + uint32_t b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1478,14 +1455,17 @@ namespace swrenderer const uint8_t *source = _source; const uint8_t *colormap = _colormap; - uint32_t *fgstart = &Col2RGB8[0][_color]; + + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t val = colormap[source[frac >> FRACBITS]]; - uint32_t fg = fgstart[val << 8]; - val = (Col2RGB8[64 - val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val >> 15)]; + uint32_t val = source[frac >> FRACBITS]; + + int r = (palette[*dest].r * (255-val) + palette[_color].r * val) >> 10; + int g = (palette[*dest].g * (255-val) + palette[_color].g * val) >> 10; + int b = (palette[*dest].b * (255-val) + palette[_color].b * val) >> 10; + *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; dest += pitch; frac += fracstep; @@ -1517,20 +1497,16 @@ namespace swrenderer const uint8_t *colormap = _colormap; const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + int fg = colormap[source[frac >> FRACBITS]]; + int bg = *dest; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1562,20 +1538,16 @@ namespace swrenderer const uint8_t *translation = _translation; const uint8_t *colormap = _colormap; const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a >> 15) & a]; + int fg = colormap[translation[source[frac >> FRACBITS]]]; + int bg = *dest; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1606,19 +1578,16 @@ namespace swrenderer const uint8_t *colormap = _colormap; const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (fg2rgb[colormap[source[frac >> FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; + int fg = colormap[source[frac >> FRACBITS]]; + int bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1650,19 +1619,16 @@ namespace swrenderer const uint8_t *translation = _translation; const uint8_t *colormap = _colormap; const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] | 0x40100400) - bg2rgb[*dest]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a >> 15) & a]; + int fg = colormap[translation[source[frac >> FRACBITS]]]; + int bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1693,19 +1659,16 @@ namespace swrenderer const uint8_t *colormap = _colormap; const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac >> FRACBITS]]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a >> 15)]; + int fg = colormap[source[frac >> FRACBITS]]; + int bg = *dest; + int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1737,19 +1700,16 @@ namespace swrenderer const uint8_t *translation = _translation; const uint8_t *colormap = _colormap; const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac >> FRACBITS]]]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a >> 15) & a]; + int fg = colormap[translation[source[frac >> FRACBITS]]]; + int bg = *dest; + int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; } while (--count); @@ -1865,6 +1825,8 @@ namespace swrenderer _srcblend = dc_srcblend; _destblend = dc_destblend; _color = ds_color; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void DrawSpanPalCommand::Execute(DrawerThread *thread) @@ -2010,8 +1972,6 @@ namespace swrenderer const uint8_t *colormap = _colormap; int count; int spot; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; xfrac = _xfrac; yfrac = _yfrac; @@ -2023,6 +1983,8 @@ namespace swrenderer xstep = _xstep; ystep = _ystep; + const PalEntry *palette = GPalette.BaseColors; + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -2031,10 +1993,11 @@ namespace swrenderer spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); uint32_t fg = colormap[source[spot]]; uint32_t bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg >> 15)]; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest++ = RGB256k.RGB[r][g][b]; + xfrac += xstep; yfrac += ystep; } while (--count); @@ -2049,10 +2012,11 @@ namespace swrenderer spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); uint32_t fg = colormap[source[spot]]; uint32_t bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg >> 15)]; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest++ = RGB256k.RGB[r][g][b]; + xfrac += xstep; yfrac += ystep; } while (--count); @@ -2073,8 +2037,8 @@ namespace swrenderer const uint8_t *colormap = _colormap; int count; int spot; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + + const PalEntry *palette = GPalette.BaseColors; xfrac = _xfrac; yfrac = _yfrac; @@ -2099,10 +2063,10 @@ namespace swrenderer { uint32_t fg = colormap[texdata]; uint32_t bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; } dest++; xfrac += xstep; @@ -2124,10 +2088,10 @@ namespace swrenderer { uint32_t fg = colormap[texdata]; uint32_t bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; } dest++; xfrac += xstep; @@ -2150,8 +2114,7 @@ namespace swrenderer const uint8_t *colormap = _colormap; int count; int spot; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; xfrac = _xfrac; yfrac = _yfrac; @@ -2169,15 +2132,13 @@ namespace swrenderer do { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - uint32_t b = a; + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest++ = RGB256k.RGB[r][g][b]; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a >> 15)]; xfrac += xstep; yfrac += ystep; } while (--count); @@ -2190,15 +2151,13 @@ namespace swrenderer do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - uint32_t b = a; + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest++ = RGB256k.RGB[r][g][b]; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a >> 15)]; xfrac += xstep; yfrac += ystep; } while (--count); @@ -2219,8 +2178,7 @@ namespace swrenderer const uint8_t *colormap = _colormap; int count; int spot; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; xfrac = _xfrac; yfrac = _yfrac; @@ -2243,15 +2201,12 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; } dest++; xfrac += xstep; @@ -2271,15 +2226,12 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; } dest++; xfrac += xstep; diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index f2b1f05712..0b1a5eebf8 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -106,6 +106,8 @@ namespace swrenderer uint32_t *_srcblend; uint32_t *_destblend; uint32_t _srccolor; + fixed_t _srcalpha; + fixed_t _destalpha; }; class DrawColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; @@ -164,6 +166,8 @@ namespace swrenderer uint32_t *_srcblend; uint32_t *_destblend; int _color; + fixed_t _srcalpha; + fixed_t _destalpha; }; class DrawSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; @@ -303,6 +307,8 @@ namespace swrenderer const uint32_t *_srcblend; const uint32_t *_destblend; const uint8_t *_translation; + fixed_t _srcalpha; + fixed_t _destalpha; int _color; }; diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp index 3356592d25..e2af7bbf19 100644 --- a/src/r_drawt_pal.cpp +++ b/src/r_drawt_pal.cpp @@ -4,6 +4,8 @@ ** **--------------------------------------------------------------------------- ** Copyright 1998-2006 Randy Heit +** Copyright 2016 Magnus Norddahl +** Copyright 2016 Rachael Alexanderson ** All rights reserved. ** ** Redistribution and use in source and binary forms, with or without @@ -173,6 +175,8 @@ namespace swrenderer _colormap = dc_colormap; _srcblend = dc_srcblend; _destblend = dc_destblend; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; _translation = dc_translation; _color = dc_color; } @@ -440,21 +444,20 @@ namespace swrenderer if (count <= 0) return; - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { uint32_t fg = colormap[*source]; uint32_t bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; source += 4; dest += pitch; } while (--count); @@ -472,42 +475,22 @@ namespace swrenderer if (count <= 0) return; - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t fg = colormap[source[0]]; - uint32_t bg = dest[0]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[0] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[1]]; - bg = dest[1]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[1] = RGB32k.All[fg & (fg>>15)]; - - - fg = colormap[source[2]]; - bg = dest[2]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[2] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[3]]; - bg = dest[3]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[3] = RGB32k.All[fg & (fg>>15)]; + for (int ks = 0; ks < 4; ks++) + { // [SP] this 4col function was a block of copy-pasted code. 4 times. I regret nothing. + uint32_t fg = colormap[source[ks]]; + uint32_t bg = dest[ks]; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + dest[ks] = RGB256k.RGB[r][g][b]; + } source += 4; dest += pitch; @@ -527,17 +510,18 @@ namespace swrenderer if (count <= 0) return; - fgstart = &Col2RGB8[0][_color]; colormap = _colormap; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t val = colormap[*source]; - uint32_t fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; + uint32_t val = *source; + int r = (palette[*dest].r * (255-val) + palette[_color].r * val) >> 10; + int g = (palette[*dest].g * (255-val) + palette[_color].g * val) >> 10; + int b = (palette[*dest].b * (255-val) + palette[_color].b * val) >> 10; + *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; source += 4; dest += pitch; } while (--count); @@ -556,30 +540,23 @@ namespace swrenderer if (count <= 0) return; - fgstart = &Col2RGB8[0][_color]; colormap = _colormap; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; + const PalEntry *palette = GPalette.BaseColors; do { uint32_t val; - - val = colormap[source[0]]; - val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; - dest[0] = RGB32k.All[val & (val>>15)]; - val = colormap[source[1]]; - val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; - dest[1] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[2]]; - val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; - dest[2] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[3]]; - val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; - dest[3] = RGB32k.All[val & (val>>15)]; + for (int ks = 0; ks < 4; ks++) + { + val = source[ks]; + int r = (palette[dest[ks]].r * (255-val) + palette[_color].r * val) >> 10; + int g = (palette[dest[ks]].g * (255-val) + palette[_color].g * val) >> 10; + int b = (palette[dest[ks]].b * (255-val) + palette[_color].b * val) >> 10; + dest[ks] = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; + } source += 4; dest += pitch; @@ -598,23 +575,19 @@ namespace swrenderer if (count <= 0) return; - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a>>15) & a]; + int fg = colormap[*source]; + int bg = *dest; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; source += 4; dest += pitch; } while (--count); @@ -636,47 +609,18 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; colormap = _colormap; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[3] = RGB32k.All[(a>>15) & a]; + for (int ks = 0; ks < 4; ks++) + { + int fg = colormap[source[ks]]; + int bg = dest[ks]; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + dest[ks] = RGB256k.RGB[r][g][b]; + } source += 4; dest += pitch; @@ -701,16 +645,15 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; + int fg = colormap[*source]; + int bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; source += 4; dest += pitch; } while (--count); @@ -734,40 +677,18 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; + for (int ks = 0; ks < 4; ks++) + { + int fg = colormap[source[ks]]; + int bg = dest[ks]; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + dest[ks] = RGB256k.RGB[r][g][b]; + } source += 4; dest += pitch; @@ -792,16 +713,15 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; + int fg = colormap[*source]; + int bg = *dest; + int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; source += 4; dest += pitch; } while (--count); @@ -825,40 +745,18 @@ namespace swrenderer source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; colormap = _colormap; + const PalEntry *palette = GPalette.BaseColors; do { - uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; + for (int ks = 0; ks < 4; ks++) + { + int fg = colormap[source[ks]]; + int bg = dest[ks]; + int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + dest[ks] = RGB256k.RGB[r][g][b]; + } source += 4; dest += pitch; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index cd378aec14..865517dd11 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1509,14 +1509,14 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t if (!additive) { spanfunc = R_DrawSpanMaskedTranslucent; - dc_srcblend = Col2RGB8[alpha>>10]; - dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE-alpha; } else { spanfunc = R_DrawSpanMaskedAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; } } else @@ -1531,14 +1531,14 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t if (!additive) { spanfunc = R_DrawSpanTranslucent; - dc_srcblend = Col2RGB8[alpha>>10]; - dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE-alpha; } else { spanfunc = R_DrawSpanAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; } } else diff --git a/src/r_things.cpp b/src/r_things.cpp index a1ace0d49c..61da979da8 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2636,12 +2636,12 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) } } -void R_DrawParticle_C (vissprite_t *vis) +//inline int clamp(int x, int y, int z) { return ((x < y) ? x : (z < y) ? z : y); } + +void R_DrawParticle (vissprite_t *vis) { - DWORD *bg2rgb; int spacing; BYTE *dest; - DWORD fg; BYTE color = vis->Style.colormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2653,33 +2653,10 @@ void R_DrawParticle_C (vissprite_t *vis) DrawerCommandQueue::WaitForWorkers(); // vis->renderflags holds translucency level (0-255) - { - fixed_t fglevel, bglevel; - DWORD *fg2rgb; + fixed_t fglevel, bglevel; - fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - bglevel = FRACUNIT-fglevel; - fg2rgb = Col2RGB8[fglevel>>10]; - bg2rgb = Col2RGB8[bglevel>>10]; - fg = fg2rgb[color]; - } - - /* - - spacing = RenderTarget->GetPitch() - countbase; - dest = ylookup[yl] + x1 + dc_destorg; - - do - { - int count = countbase; - do - { - DWORD bg = bg2rgb[*dest]; - bg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[bg & (bg>>15)]; - } while (--count); - dest += spacing; - } while (--ycount);*/ + fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; + bglevel = FRACUNIT-fglevel; // original was row-wise // width = countbase @@ -2695,9 +2672,11 @@ void R_DrawParticle_C (vissprite_t *vis) dest = ylookup[yl] + x + dc_destorg; for (int y = 0; y < ycount; y++) { - DWORD bg = bg2rgb[*dest]; - bg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg>>15)]; + uint32_t dest_r = MIN((GPalette.BaseColors[*dest].r * bglevel + GPalette.BaseColors[color].r * fglevel) >> 18, 63); + uint32_t dest_g = MIN((GPalette.BaseColors[*dest].g * bglevel + GPalette.BaseColors[color].g * fglevel) >> 18, 63); + uint32_t dest_b = MIN((GPalette.BaseColors[*dest].b * bglevel + GPalette.BaseColors[color].b * fglevel) >> 18, 63); + + *dest = RGB256k.RGB[dest_r][dest_g][dest_b]; dest += spacing; } } diff --git a/src/r_things.h b/src/r_things.h index 6d694b8fd7..740af279a5 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -101,7 +101,7 @@ struct vissprite_t vissprite_t() {} }; -void R_DrawParticle_C (vissprite_t *); +void R_DrawParticle (vissprite_t *); void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); extern int MaxVisSprites; diff --git a/src/textures/ddstexture.cpp b/src/textures/ddstexture.cpp index 31e7480221..a2c69b38b2 100644 --- a/src/textures/ddstexture.cpp +++ b/src/textures/ddstexture.cpp @@ -551,7 +551,7 @@ void FDDSTexture::ReadRGB (FWadLump &lump, BYTE *tcbuf) DWORD r = (c & RMask) << RShiftL; r |= r >> RShiftR; DWORD g = (c & GMask) << GShiftL; g |= g >> GShiftR; DWORD b = (c & BMask) << BShiftL; b |= b >> BShiftR; - *pixelp = RGB32k.RGB[r >> 27][g >> 27][b >> 27]; + *pixelp = RGB256k.RGB[r >> 26][g >> 26][b >> 26]; } else { @@ -637,7 +637,7 @@ void FDDSTexture::DecompressDXT1 (FWadLump &lump, BYTE *tcbuf) // Pick colors from the palette for each of the four colors. /*if (!tcbuf)*/ for (i = 3; i >= 0; --i) { - palcol[i] = color[i].a ? RGB32k.RGB[color[i].r >> 3][color[i].g >> 3][color[i].b >> 3] : 0; + palcol[i] = color[i].a ? RGB256k.RGB[color[i].r >> 2][color[i].g >> 2][color[i].b >> 2] : 0; } // Now decode this 4x4 block to the pixel buffer. for (y = 0; y < 4; ++y) @@ -717,7 +717,7 @@ void FDDSTexture::DecompressDXT3 (FWadLump &lump, bool premultiplied, BYTE *tcbu // Pick colors from the palette for each of the four colors. if (!tcbuf) for (i = 3; i >= 0; --i) { - palcol[i] = RGB32k.RGB[color[i].r >> 3][color[i].g >> 3][color[i].b >> 3]; + palcol[i] = RGB256k.RGB[color[i].r >> 2][color[i].g >> 2][color[i].b >> 2]; } // Now decode this 4x4 block to the pixel buffer. for (y = 0; y < 4; ++y) @@ -822,7 +822,7 @@ void FDDSTexture::DecompressDXT5 (FWadLump &lump, bool premultiplied, BYTE *tcbu // Pick colors from the palette for each of the four colors. if (!tcbuf) for (i = 3; i >= 0; --i) { - palcol[i] = RGB32k.RGB[color[i].r >> 3][color[i].g >> 3][color[i].b >> 3]; + palcol[i] = RGB256k.RGB[color[i].r >> 2][color[i].g >> 2][color[i].b >> 2]; } // Now decode this 4x4 block to the pixel buffer. for (y = 0; y < 4; ++y) diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index 2253965987..c138edbfa3 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -406,7 +406,7 @@ void FJPEGTexture::MakeTexture () case JCS_RGB: for (int x = Width; x > 0; --x) { - *out = RGB32k.RGB[in[0]>>3][in[1]>>3][in[2]>>3]; + *out = RGB256k.RGB[in[0]>>2][in[1]>>2][in[2]>>2]; out += Height; in += 3; } @@ -430,7 +430,7 @@ void FJPEGTexture::MakeTexture () int r = in[3] - (((256-in[0])*in[3]) >> 8); int g = in[3] - (((256-in[1])*in[3]) >> 8); int b = in[3] - (((256-in[2])*in[3]) >> 8); - *out = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + *out = RGB256k.RGB[r >> 2][g >> 2][b >> 2]; out += Height; in += 4; } diff --git a/src/textures/multipatchtexture.cpp b/src/textures/multipatchtexture.cpp index e68c4e20f2..991893845b 100644 --- a/src/textures/multipatchtexture.cpp +++ b/src/textures/multipatchtexture.cpp @@ -531,7 +531,7 @@ void FMultiPatchTexture::MakeTexture () { if (*out == 0 && in[3] != 0) { - *out = RGB32k.RGB[in[2]>>3][in[1]>>3][in[0]>>3]; + *out = RGB256k.RGB[in[2]>>2][in[1]>>2][in[0]>>2]; } out += Height; in += 4; diff --git a/src/textures/pcxtexture.cpp b/src/textures/pcxtexture.cpp index 0ec5d2933c..dda431993c 100644 --- a/src/textures/pcxtexture.cpp +++ b/src/textures/pcxtexture.cpp @@ -528,7 +528,7 @@ void FPCXTexture::MakeTexture() { for(int x=0; x < Width; x++) { - Pixels[y+Height*x] = RGB32k.RGB[row[0]>>3][row[1]>>3][row[2]>>3]; + Pixels[y+Height*x] = RGB256k.RGB[row[0]>>2][row[1]>>2][row[2]>>2]; row+=3; } } diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index d24cd92d11..414c424b8f 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -536,7 +536,7 @@ void FPNGTexture::MakeTexture () { if (!HaveTrans) { - *out++ = RGB32k.RGB[in[0]>>3][in[1]>>3][in[2]>>3]; + *out++ = RGB256k.RGB[in[0]>>2][in[1]>>2][in[2]>>2]; } else { @@ -548,7 +548,7 @@ void FPNGTexture::MakeTexture () } else { - *out++ = RGB32k.RGB[in[0]>>3][in[1]>>3][in[2]>>3]; + *out++ = RGB256k.RGB[in[0]>>2][in[1]>>2][in[2]>>2]; } } in += pitch; @@ -593,7 +593,7 @@ void FPNGTexture::MakeTexture () { for (y = Height; y > 0; --y) { - *out++ = in[3] < 128 ? 0 : RGB32k.RGB[in[0]>>3][in[1]>>3][in[2]>>3]; + *out++ = in[3] < 128 ? 0 : RGB256k.RGB[in[0]>>2][in[1]>>2][in[2]>>2]; in += pitch; } in -= backstep; diff --git a/src/textures/tgatexture.cpp b/src/textures/tgatexture.cpp index b208a51a37..331747cfe0 100644 --- a/src/textures/tgatexture.cpp +++ b/src/textures/tgatexture.cpp @@ -393,7 +393,7 @@ void FTGATexture::MakeTexture () for(int x=0;x>10) & 0x1f][(v>>5) & 0x1f][v & 0x1f]; + Pixels[x*Height+y] = RGB256k.RGB[((v>>10) & 0x1f)*2][((v>>5) & 0x1f)*2][(v & 0x1f)*2]; p+=step_x; } } @@ -405,7 +405,7 @@ void FTGATexture::MakeTexture () BYTE * p = ptr + y * Pitch; for(int x=0;x>3][p[1]>>3][p[0]>>3]; + Pixels[x*Height+y] = RGB256k.RGB[p[2]>>2][p[1]>>2][p[0]>>2]; p+=step_x; } } @@ -419,7 +419,7 @@ void FTGATexture::MakeTexture () BYTE * p = ptr + y * Pitch; for(int x=0;x>3][p[1]>>3][p[0]>>3]; + Pixels[x*Height+y] = RGB256k.RGB[p[2]>>2][p[1]>>2][p[0]>>2]; p+=step_x; } } @@ -431,7 +431,7 @@ void FTGATexture::MakeTexture () BYTE * p = ptr + y * Pitch; for(int x=0;x= 128? RGB32k.RGB[p[2]>>3][p[1]>>3][p[0]>>3] : 0; + Pixels[x*Height+y] = p[3] >= 128? RGB256k.RGB[p[2]>>2][p[1]>>2][p[0]>>2] : 0; p+=step_x; } } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 6f8bc51988..5f0d39d4f2 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -965,27 +965,6 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) static int oldyy; static int oldyyshifted; -#if 0 - if(xx < 32) - cc += 7-(xx>>2); - else if(xx > (finit_width - 32)) - cc += 7-((finit_width-xx) >> 2); -// if(cc==oldcc) //make sure that we don't double fade the corners. -// { - if(yy < 32) - cc += 7-(yy>>2); - else if(yy > (finit_height - 32)) - cc += 7-((finit_height-yy) >> 2); -// } - if(cc > cm && cm != NULL) - { - cc = cm; - } - else if(cc > oldcc+6) // don't let the color escape from the fade table... - { - cc=oldcc+6; - } -#endif if (yy == oldyy+1) { oldyy++; @@ -1003,12 +982,12 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) } BYTE *spot = GetBuffer() + oldyyshifted + xx; - DWORD *bg2rgb = Col2RGB8[1+level]; - DWORD *fg2rgb = Col2RGB8[63-level]; - DWORD fg = fg2rgb[basecolor]; - DWORD bg = bg2rgb[*spot]; - bg = (fg+bg) | 0x1f07c1f; - *spot = RGB32k.All[bg&(bg>>15)]; + + uint32_t r = (GPalette.BaseColors[*spot].r * (64 - level) + GPalette.BaseColors[basecolor].r * level) / 64; + uint32_t g = (GPalette.BaseColors[*spot].g * (64 - level) + GPalette.BaseColors[basecolor].g * level) / 64; + uint32_t b = (GPalette.BaseColors[*spot].b * (64 - level) + GPalette.BaseColors[basecolor].b * level) / 64; + + *spot = (BYTE)RGB256k.RGB[r][g][b]; } void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) diff --git a/src/v_video.cpp b/src/v_video.cpp index efe93aa04a..b639939eed 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -143,9 +143,10 @@ extern "C" { DWORD Col2RGB8[65][256]; DWORD *Col2RGB8_LessPrecision[65]; DWORD Col2RGB8_Inverse[65][256]; -ColorTable32k RGB32k; +ColorTable256k RGB256k; } + static DWORD Col2RGB8_2[63][256]; // [RH] The framebuffer is no longer a mere byte array. @@ -345,8 +346,6 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) if (damount == 0.f) return; - DWORD *bg2rgb; - DWORD fg; int gap; BYTE *spot; int x, y; @@ -368,28 +367,23 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; } - { - int amount; - - amount = (int)(damount * 64); - bg2rgb = Col2RGB8[64-amount]; - - fg = (((color.r * amount) >> 4) << 20) | - ((color.g * amount) >> 4) | - (((color.b * amount) >> 4) << 10); - } spot = Buffer + x1 + y1*Pitch; gap = Pitch - w; + + int alpha = (int)((float)64 * damount); + int ialpha = 64 - alpha; + int dimmedcolor_r = color.r * alpha; + int dimmedcolor_g = color.g * alpha; + int dimmedcolor_b = color.b * alpha; for (y = h; y != 0; y--) { for (x = w; x != 0; x--) { - DWORD bg; - - bg = bg2rgb[(*spot)&0xff]; - bg = (fg+bg) | 0x1f07c1f; - *spot = RGB32k.All[bg&(bg>>15)]; + uint32_t r = (dimmedcolor_r + GPalette.BaseColors[*spot].r * ialpha) >> 8; + uint32_t g = (dimmedcolor_g + GPalette.BaseColors[*spot].g * ialpha) >> 8; + uint32_t b = (dimmedcolor_b + GPalette.BaseColors[*spot].b * ialpha) >> 8; + *spot = (BYTE)RGB256k.RGB[r][g][b]; spot++; } spot += gap; @@ -664,42 +658,12 @@ static void BuildTransTable (const PalEntry *palette) { int r, g, b; - // create the RGB555 lookup table - for (r = 0; r < 32; r++) - for (g = 0; g < 32; g++) - for (b = 0; b < 32; b++) - RGB32k.RGB[r][g][b] = ColorMatcher.Pick ((r<<3)|(r>>2), (g<<3)|(g>>2), (b<<3)|(b>>2)); + // create the RGB666 lookup table + for (r = 0; r < 64; r++) + for (g = 0; g < 64; g++) + for (b = 0; b < 64; b++) + RGB256k.RGB[r][g][b] = ColorMatcher.Pick ((r<<2)|(r>>4), (g<<2)|(g>>4), (b<<2)|(b>>4)); - int x, y; - - // create the swizzled palette - for (x = 0; x < 65; x++) - for (y = 0; y < 256; y++) - Col2RGB8[x][y] = (((palette[y].r*x)>>4)<<20) | - ((palette[y].g*x)>>4) | - (((palette[y].b*x)>>4)<<10); - - // create the swizzled palette with the lsb of red and blue forced to 0 - // (for green, a 1 is okay since it never gets added into) - for (x = 1; x < 64; x++) - { - Col2RGB8_LessPrecision[x] = Col2RGB8_2[x-1]; - for (y = 0; y < 256; y++) - { - Col2RGB8_2[x-1][y] = Col2RGB8[x][y] & 0x3feffbff; - } - } - Col2RGB8_LessPrecision[0] = Col2RGB8[0]; - Col2RGB8_LessPrecision[64] = Col2RGB8[64]; - - // create the inverse swizzled palette - for (x = 0; x < 65; x++) - for (y = 0; y < 256; y++) - { - Col2RGB8_Inverse[x][y] = (((((255-palette[y].r)*x)>>4)<<20) | - (((255-palette[y].g)*x)>>4) | - ((((255-palette[y].b)*x)>>4)<<10)) & 0x3feffbff; - } } //========================================================================== diff --git a/src/v_video.h b/src/v_video.h index b72f670947..0da6b9b500 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -448,44 +448,13 @@ EXTERN_CVAR (Float, Gamma) // Translucency tables -// RGB32k is a normal R5G5B5 -> palette lookup table. - -// Use a union so we can "overflow" without warnings. -// Otherwise, we get stuff like this from Clang (when compiled -// with -fsanitize=bounds) while running: -// src/v_video.cpp:390:12: runtime error: index 1068 out of bounds for type 'BYTE [32]' -// src/r_draw.cpp:273:11: runtime error: index 1057 out of bounds for type 'BYTE [32]' -union ColorTable32k +// [SP] RGB666 support +union ColorTable256k { - BYTE RGB[32][32][32]; - BYTE All[32 *32 *32]; + BYTE RGB[64][64][64]; + BYTE All[64 *64 *64]; }; -extern "C" ColorTable32k RGB32k; - -// Col2RGB8 is a pre-multiplied palette for color lookup. It is stored in a -// special R10B10G10 format for efficient blending computation. -// --RRRRRrrr--BBBBBbbb--GGGGGggg-- at level 64 -// --------rrrr------bbbb------gggg at level 1 -extern "C" DWORD Col2RGB8[65][256]; - -// Col2RGB8_LessPrecision is the same as Col2RGB8, but the LSB for red -// and blue are forced to zero, so if the blend overflows, it won't spill -// over into the next component's value. -// --RRRRRrrr-#BBBBBbbb-#GGGGGggg-- at level 64 -// --------rrr#------bbb#------gggg at level 1 -extern "C" DWORD *Col2RGB8_LessPrecision[65]; - -// Col2RGB8_Inverse is the same as Col2RGB8_LessPrecision, except the source -// palette has been inverted. -extern "C" DWORD Col2RGB8_Inverse[65][256]; - -// "Magic" numbers used during the blending: -// --000001111100000111110000011111 = 0x01f07c1f -// -0111111111011111111101111111111 = 0x3FEFFBFF -// -1000000000100000000010000000000 = 0x40100400 -// ------10000000001000000000100000 = 0x40100400 >> 5 -// --11111-----11111-----11111----- = 0x40100400 - (0x40100400 >> 5) aka "white" -// --111111111111111111111111111111 = 0x3FFFFFFF +extern "C" ColorTable256k RGB256k; // Allocates buffer screens, call before R_Init. void V_Init (bool restart); From 2bb2395569701dafade073d41da83fec4cf70300 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 23 Dec 2016 23:44:52 +0100 Subject: [PATCH 540/912] Add menu option to enable dynamic lights (independent of the OpenGL setting so that you can have it on in OpenGL and off in Software) --- src/r_draw.cpp | 2 + src/r_draw.h | 1 + src/r_plane.cpp | 90 ++++++++++++++++++--------------- src/r_walldraw.cpp | 101 ++++++++++++++++++++----------------- wadsrc/static/language.enu | 1 + wadsrc/static/menudef.txt | 1 + 6 files changed, 109 insertions(+), 87 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 2968ed3745..dabc97ff25 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -53,6 +53,8 @@ #include "r_draw_pal.h" #include "r_thread.h" +CVAR(Bool, r_dynlights, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + namespace swrenderer { // Needed by R_DrawFogBoundary (which probably shouldn't be part of this file) diff --git a/src/r_draw.h b/src/r_draw.h index 8b5789b555..31d7cdbc03 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -16,6 +16,7 @@ EXTERN_CVAR(Int, r_drawfuzz); EXTERN_CVAR(Bool, r_drawtrans); EXTERN_CVAR(Float, transsouls); EXTERN_CVAR(Int, r_columnmethod); +EXTERN_CVAR(Bool, r_dynlights); namespace swrenderer { diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 502d8490db..074bc39f46 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -258,54 +258,61 @@ void R_MapPlane (int y, int x1) if (r_swtruecolor) { - // Find row position in view space - float zspan = planeheight / (fabs(y + 0.5 - CenterY) / InvZtoScale); - dc_viewpos.X = (float)((x1 + 0.5 - CenterX) / CenterX * zspan); - dc_viewpos.Y = zspan; - dc_viewpos.Z = (float)((CenterY - y - 0.5) / InvZtoScale * zspan); - dc_viewpos_step.X = (float)(zspan / CenterX); - - static TriLight lightbuffer[64 * 1024]; - static int nextlightindex = 0; - - // Setup lights for column - dc_num_lights = 0; - dc_lights = lightbuffer + nextlightindex; - visplane_light *cur_node = ds_light_list; - while (cur_node && nextlightindex < 64 * 1024) + if (r_dynlights) { - double lightX = cur_node->lightsource->X() - ViewPos.X; - double lightY = cur_node->lightsource->Y() - ViewPos.Y; - double lightZ = cur_node->lightsource->Z() - ViewPos.Z; + // Find row position in view space + float zspan = planeheight / (fabs(y + 0.5 - CenterY) / InvZtoScale); + dc_viewpos.X = (float)((x1 + 0.5 - CenterX) / CenterX * zspan); + dc_viewpos.Y = zspan; + dc_viewpos.Z = (float)((CenterY - y - 0.5) / InvZtoScale * zspan); + dc_viewpos_step.X = (float)(zspan / CenterX); - float lx = (float)(lightX * ViewSin - lightY * ViewCos); - float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; - float lz = (float)lightZ - dc_viewpos.Z; + static TriLight lightbuffer[64 * 1024]; + static int nextlightindex = 0; - // Precalculate the constant part of the dot here so the drawer doesn't have to. - float lconstant = ly * ly + lz * lz; - - // Include light only if it touches this row - float radius = cur_node->lightsource->GetRadius(); - if (radius * radius >= lconstant) + // Setup lights for column + dc_num_lights = 0; + dc_lights = lightbuffer + nextlightindex; + visplane_light *cur_node = ds_light_list; + while (cur_node && nextlightindex < 64 * 1024) { - uint32_t red = cur_node->lightsource->GetRed(); - uint32_t green = cur_node->lightsource->GetGreen(); - uint32_t blue = cur_node->lightsource->GetBlue(); + double lightX = cur_node->lightsource->X() - ViewPos.X; + double lightY = cur_node->lightsource->Y() - ViewPos.Y; + double lightZ = cur_node->lightsource->Z() - ViewPos.Z; - nextlightindex++; - auto &light = dc_lights[dc_num_lights++]; - light.x = lx; - light.y = lconstant; - light.radius = 256.0f / radius; - light.color = (red << 16) | (green << 8) | blue; + float lx = (float)(lightX * ViewSin - lightY * ViewCos); + float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; + float lz = (float)lightZ - dc_viewpos.Z; + + // Precalculate the constant part of the dot here so the drawer doesn't have to. + float lconstant = ly * ly + lz * lz; + + // Include light only if it touches this row + float radius = cur_node->lightsource->GetRadius(); + if (radius * radius >= lconstant) + { + uint32_t red = cur_node->lightsource->GetRed(); + uint32_t green = cur_node->lightsource->GetGreen(); + uint32_t blue = cur_node->lightsource->GetBlue(); + + nextlightindex++; + auto &light = dc_lights[dc_num_lights++]; + light.x = lx; + light.y = lconstant; + light.radius = 256.0f / radius; + light.color = (red << 16) | (green << 8) | blue; + } + + cur_node = cur_node->next; } - cur_node = cur_node->next; + if (nextlightindex == 64 * 1024) + nextlightindex = 0; + } + else + { + dc_num_lights = 0; } - - if (nextlightindex == 64 * 1024) - nextlightindex = 0; } ds_y = y; @@ -348,6 +355,9 @@ namespace void R_AddPlaneLights(visplane_t *plane, FLightNode *node) { + if (!r_dynlights) + return; + while (node) { if (!(node->lightsource->flags2&MF2_DORMANT)) diff --git a/src/r_walldraw.cpp b/src/r_walldraw.cpp index 0523d8960f..4f532a78e8 100644 --- a/src/r_walldraw.cpp +++ b/src/r_walldraw.cpp @@ -539,61 +539,68 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1 { if (r_swtruecolor) { - // Find column position in view space - float w1 = 1.0f / WallC.sz1; - float w2 = 1.0f / WallC.sz2; - float t = (x - WallC.sx1 + 0.5f) / (WallC.sx2 - WallC.sx1); - float wcol = w1 * (1.0f - t) + w2 * t; - float zcol = 1.0f / wcol; - dc_viewpos.X = (float)((x + 0.5 - CenterX) / CenterX * zcol); - dc_viewpos.Y = zcol; - dc_viewpos.Z = (float)((CenterY - y1 - 0.5) / InvZtoScale * zcol); - dc_viewpos_step.Z = (float)(-zcol / InvZtoScale); - - static TriLight lightbuffer[64 * 1024]; - static int nextlightindex = 0; - - // Setup lights for column - dc_num_lights = 0; - dc_lights = lightbuffer + nextlightindex; - FLightNode *cur_node = dc_light_list; - while (cur_node && nextlightindex < 64 * 1024) + if (r_dynlights) { - if (!(cur_node->lightsource->flags2&MF2_DORMANT)) + // Find column position in view space + float w1 = 1.0f / WallC.sz1; + float w2 = 1.0f / WallC.sz2; + float t = (x - WallC.sx1 + 0.5f) / (WallC.sx2 - WallC.sx1); + float wcol = w1 * (1.0f - t) + w2 * t; + float zcol = 1.0f / wcol; + dc_viewpos.X = (float)((x + 0.5 - CenterX) / CenterX * zcol); + dc_viewpos.Y = zcol; + dc_viewpos.Z = (float)((CenterY - y1 - 0.5) / InvZtoScale * zcol); + dc_viewpos_step.Z = (float)(-zcol / InvZtoScale); + + static TriLight lightbuffer[64 * 1024]; + static int nextlightindex = 0; + + // Setup lights for column + dc_num_lights = 0; + dc_lights = lightbuffer + nextlightindex; + FLightNode *cur_node = dc_light_list; + while (cur_node && nextlightindex < 64 * 1024) { - double lightX = cur_node->lightsource->X() - ViewPos.X; - double lightY = cur_node->lightsource->Y() - ViewPos.Y; - double lightZ = cur_node->lightsource->Z() - ViewPos.Z; - - float lx = (float)(lightX * ViewSin - lightY * ViewCos) - dc_viewpos.X; - float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; - float lz = (float)lightZ; - - // Precalculate the constant part of the dot here so the drawer doesn't have to. - float lconstant = lx * lx + ly * ly; - - // Include light only if it touches this column - float radius = cur_node->lightsource->GetRadius(); - if (radius * radius >= lconstant) + if (!(cur_node->lightsource->flags2&MF2_DORMANT)) { - uint32_t red = cur_node->lightsource->GetRed(); - uint32_t green = cur_node->lightsource->GetGreen(); - uint32_t blue = cur_node->lightsource->GetBlue(); + double lightX = cur_node->lightsource->X() - ViewPos.X; + double lightY = cur_node->lightsource->Y() - ViewPos.Y; + double lightZ = cur_node->lightsource->Z() - ViewPos.Z; - nextlightindex++; - auto &light = dc_lights[dc_num_lights++]; - light.x = lconstant; - light.z = lz; - light.radius = 256.0f / cur_node->lightsource->GetRadius(); - light.color = (red << 16) | (green << 8) | blue; + float lx = (float)(lightX * ViewSin - lightY * ViewCos) - dc_viewpos.X; + float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; + float lz = (float)lightZ; + + // Precalculate the constant part of the dot here so the drawer doesn't have to. + float lconstant = lx * lx + ly * ly; + + // Include light only if it touches this column + float radius = cur_node->lightsource->GetRadius(); + if (radius * radius >= lconstant) + { + uint32_t red = cur_node->lightsource->GetRed(); + uint32_t green = cur_node->lightsource->GetGreen(); + uint32_t blue = cur_node->lightsource->GetBlue(); + + nextlightindex++; + auto &light = dc_lights[dc_num_lights++]; + light.x = lconstant; + light.z = lz; + light.radius = 256.0f / cur_node->lightsource->GetRadius(); + light.color = (red << 16) | (green << 8) | blue; + } } + + cur_node = cur_node->nextLight; } - cur_node = cur_node->nextLight; + if (nextlightindex == 64 * 1024) + nextlightindex = 0; + } + else + { + dc_num_lights = 0; } - - if (nextlightindex == 64 * 1024) - nextlightindex = 0; int count = y2 - y1; diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 177d635214..48e2484fd4 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -2754,4 +2754,5 @@ TCMNU_TRUECOLOR = "True color output"; TCMNU_MINFILTER = "Linear filter when downscaling"; TCMNU_MAGFILTER = "Linear filter when upscaling"; TCMNU_MIPMAP = "Use mipmapped textures"; +TCMNU_DYNLIGHTS = "Dynamic lights"; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 1c9d42de15..3086312a9b 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -675,6 +675,7 @@ OptionMenu "TrueColorOptions" Option "$TCMNU_MINFILTER", "r_minfilter", "OnOff" Option "$TCMNU_MAGFILTER", "r_magfilter", "OnOff" Option "$TCMNU_MIPMAP", "r_mipmap", "OnOff" + Option "$TCMNU_DYNLIGHTS", "r_dynlights", "OnOff" } OptionMenu "VideoOptions" From 52892cb7ef21e9bd745b5c9ecf63992245966a08 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 24 Dec 2016 01:50:54 +0100 Subject: [PATCH 541/912] Move particle drawing to a command to prevent pipeline stalls --- src/r_draw_rgba.h | 17 +++++++++++++++ src/r_drawt_rgba.cpp | 52 ++++++++++++++++++++++++++++++++++++++++++++ src/r_things.cpp | 24 +------------------- 3 files changed, 70 insertions(+), 23 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index afb3bc22d6..6d16558c06 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -440,6 +440,23 @@ namespace swrenderer FString DebugInfo() override; }; + ///////////////////////////////////////////////////////////////////////////// + + class DrawParticleColumnRGBACommand : public DrawerCommand + { + public: + DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + + private: + uint32_t *_dest; + int _pitch; + int _count; + uint32_t _fg; + uint32_t _alpha; + }; + ///////////////////////////////////////////////////////////////////////////// // Pixel shading inline functions: diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 5d3064e5d7..3609956fe4 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -231,4 +231,56 @@ namespace swrenderer { return "FillColumnHoriz"; } + + ///////////////////////////////////////////////////////////////////////////// + + DrawParticleColumnRGBACommand::DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha) + { + _dest = dest; + _pitch = pitch; + _count = count; + _fg = fg; + _alpha = alpha; + _dest_y = dest_y; + } + + void DrawParticleColumnRGBACommand::Execute(DrawerThread *thread) + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, _dest); + int pitch = _pitch * thread->num_cores; + + uint32_t alpha = _alpha; + uint32_t inv_alpha = 256 - alpha; + + uint32_t fg_red = (_fg >> 16) & 0xff; + uint32_t fg_green = (_fg >> 8) & 0xff; + uint32_t fg_blue = _fg & 0xff; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + for (int y = 0; y < count; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } + } + + FString DrawParticleColumnRGBACommand::DebugInfo() + { + return "DrawParticle"; + } } diff --git a/src/r_things.cpp b/src/r_things.cpp index 6e1261c1bb..bbd9eb9cf8 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2865,21 +2865,11 @@ void R_DrawParticle_rgba(vissprite_t *vis) R_DrawMaskedSegsBehindParticle(vis); - DrawerCommandQueue::WaitForWorkers(); - uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; // vis->renderflags holds translucency level (0-255) fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; uint32_t alpha = fglevel * 256 / FRACUNIT; - uint32_t inv_alpha = 256 - alpha; - - fg_red *= alpha; - fg_green *= alpha; - fg_blue *= alpha; spacing = RenderTarget->GetPitch(); @@ -2889,19 +2879,7 @@ void R_DrawParticle_rgba(vissprite_t *vis) if (R_ClipSpriteColumnWithPortals(vis)) continue; dest = ylookup[yl] + x + (uint32_t*)dc_destorg; - for (int y = 0; y < ycount; y++) - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += spacing; - } + DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha); } } From 8de11ee81a1c3ff817781afb0e9bf3841e84af1a Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 24 Dec 2016 03:49:56 -0500 Subject: [PATCH 542/912] - Begin reimplementing rgb555 again. --- src/r_draw_pal.cpp | 137 ++++++++++++++++++++++++++++++++++----------- src/v_video.cpp | 84 ++++++++++++++++++++++++--- src/v_video.h | 39 +++++++++++++ 3 files changed, 218 insertions(+), 42 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 17e77d39ae..c7adc9d87a 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -43,6 +43,9 @@ #include "v_video.h" #include "r_draw_pal.h" +// [SP] r_blendmode - false = rgb555 matching (ZDoom classic), true = rgb666 (refactored) +CVAR(Bool, r_blendmode, false, CVAR_GLOBALCONFIG | CVAR_ARCHIVE) + /* [RH] This translucency algorithm is based on DOSDoom 0.65's, but uses a 32k RGB table instead of an 8k one. At least on my machine, it's @@ -303,19 +306,39 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - do + if (!r_blendmode) { - uint8_t pix = source[frac >> bits]; - if (pix != 0) + do { - uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); - uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); - uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); - *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - frac += fracstep; - dest += pitch; - } while (--count); + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = fg2rgb[colormap[pix]]; + uint32_t bg = bg2rgb[*dest]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + } + else + { + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); + uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); + uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } } void DrawWallAdd4PalCommand::Execute(DrawerThread *thread) @@ -341,22 +364,44 @@ namespace swrenderer } pitch *= thread->num_cores; - do + if (!r_blendmode) { - for (int i = 0; i < 4; ++i) + do { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) + for (int i = 0; i < 4; ++i) { - uint32_t r = MIN(GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); - uint32_t g = MIN(GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); - uint32_t b = MIN(GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); - dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; + uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; + if (pix != 0) + { + uint32_t fg = fg2rgb[_colormap[i][pix]]; + uint32_t bg = bg2rgb[dest[i]]; + fg = (fg + bg) | 0x1f07c1f; + dest[i] = RGB32k.All[fg & (fg >> 15)]; + } + dc_wall_texturefrac[i] += dc_wall_iscale[i]; } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); + dest += pitch; + } while (--count); + } + else + { + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; + if (pix != 0) + { + uint32_t r = MIN(GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); + uint32_t g = MIN(GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); + uint32_t b = MIN(GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); + dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; + } + dc_wall_texturefrac[i] += dc_wall_iscale[i]; + } + dest += pitch; + } while (--count); + } } void DrawWallAddClamp1PalCommand::Execute(DrawerThread *thread) @@ -379,19 +424,43 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - do + if (!r_blendmode) { - uint8_t pix = source[frac >> bits]; - if (pix != 0) + do { - uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); - uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); - uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); - *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - frac += fracstep; - dest += pitch; - } while (--count); + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + else + { + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); + uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); + uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } } void DrawWallAddClamp4PalCommand::Execute(DrawerThread *thread) diff --git a/src/v_video.cpp b/src/v_video.cpp index b639939eed..5a8ef10d59 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -143,6 +143,7 @@ extern "C" { DWORD Col2RGB8[65][256]; DWORD *Col2RGB8_LessPrecision[65]; DWORD Col2RGB8_Inverse[65][256]; +ColorTable32k RGB32k; ColorTable256k RGB256k; } @@ -346,6 +347,8 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) if (damount == 0.f) return; + DWORD *bg2rgb; + DWORD fg; int gap; BYTE *spot; int x, y; @@ -367,6 +370,16 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; } + { + int amount; + + amount = (int)(damount * 64); + bg2rgb = Col2RGB8[64-amount]; + + fg = (((color.r * amount) >> 4) << 20) | + ((color.g * amount) >> 4) | + (((color.b * amount) >> 4) << 10); + } spot = Buffer + x1 + y1*Pitch; gap = Pitch - w; @@ -376,17 +389,37 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) int dimmedcolor_r = color.r * alpha; int dimmedcolor_g = color.g * alpha; int dimmedcolor_b = color.b * alpha; - for (y = h; y != 0; y--) + + if (!r_blendmode) { - for (x = w; x != 0; x--) + for (y = h; y != 0; y--) { - uint32_t r = (dimmedcolor_r + GPalette.BaseColors[*spot].r * ialpha) >> 8; - uint32_t g = (dimmedcolor_g + GPalette.BaseColors[*spot].g * ialpha) >> 8; - uint32_t b = (dimmedcolor_b + GPalette.BaseColors[*spot].b * ialpha) >> 8; - *spot = (BYTE)RGB256k.RGB[r][g][b]; - spot++; + for (x = w; x != 0; x--) + { + DWORD bg; + + bg = bg2rgb[(*spot)&0xff]; + bg = (fg+bg) | 0x1f07c1f; + *spot = RGB32k.All[bg&(bg>>15)]; + spot++; + } + spot += gap; + } + } + else + { + for (y = h; y != 0; y--) + { + for (x = w; x != 0; x--) + { + uint32_t r = (dimmedcolor_r + GPalette.BaseColors[*spot].r * ialpha) >> 8; + uint32_t g = (dimmedcolor_g + GPalette.BaseColors[*spot].g * ialpha) >> 8; + uint32_t b = (dimmedcolor_b + GPalette.BaseColors[*spot].b * ialpha) >> 8; + *spot = (BYTE)RGB256k.RGB[r][g][b]; + spot++; + } + spot += gap; } - spot += gap; } } @@ -658,12 +691,47 @@ static void BuildTransTable (const PalEntry *palette) { int r, g, b; + // create the RGB555 lookup table + for (r = 0; r < 32; r++) + for (g = 0; g < 32; g++) + for (b = 0; b < 32; b++) + RGB32k.RGB[r][g][b] = ColorMatcher.Pick ((r<<3)|(r>>2), (g<<3)|(g>>2), (b<<3)|(b>>2)); // create the RGB666 lookup table for (r = 0; r < 64; r++) for (g = 0; g < 64; g++) for (b = 0; b < 64; b++) RGB256k.RGB[r][g][b] = ColorMatcher.Pick ((r<<2)|(r>>4), (g<<2)|(g>>4), (b<<2)|(b>>4)); + int x, y; + + // create the swizzled palette + for (x = 0; x < 65; x++) + for (y = 0; y < 256; y++) + Col2RGB8[x][y] = (((palette[y].r*x)>>4)<<20) | + ((palette[y].g*x)>>4) | + (((palette[y].b*x)>>4)<<10); + + // create the swizzled palette with the lsb of red and blue forced to 0 + // (for green, a 1 is okay since it never gets added into) + for (x = 1; x < 64; x++) + { + Col2RGB8_LessPrecision[x] = Col2RGB8_2[x-1]; + for (y = 0; y < 256; y++) + { + Col2RGB8_2[x-1][y] = Col2RGB8[x][y] & 0x3feffbff; + } + } + Col2RGB8_LessPrecision[0] = Col2RGB8[0]; + Col2RGB8_LessPrecision[64] = Col2RGB8[64]; + + // create the inverse swizzled palette + for (x = 0; x < 65; x++) + for (y = 0; y < 256; y++) + { + Col2RGB8_Inverse[x][y] = (((((255-palette[y].r)*x)>>4)<<20) | + (((255-palette[y].g)*x)>>4) | + ((((255-palette[y].b)*x)>>4)<<10)) & 0x3feffbff; + } } //========================================================================== diff --git a/src/v_video.h b/src/v_video.h index 0da6b9b500..57d15869cd 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -448,6 +448,20 @@ EXTERN_CVAR (Float, Gamma) // Translucency tables +// RGB32k is a normal R5G5B5 -> palette lookup table. + +// Use a union so we can "overflow" without warnings. +// Otherwise, we get stuff like this from Clang (when compiled +// with -fsanitize=bounds) while running: +// src/v_video.cpp:390:12: runtime error: index 1068 out of bounds for type 'BYTE [32]' +// src/r_draw.cpp:273:11: runtime error: index 1057 out of bounds for type 'BYTE [32]' +union ColorTable32k +{ + BYTE RGB[32][32][32]; + BYTE All[32 *32 *32]; +}; +extern "C" ColorTable32k RGB32k; + // [SP] RGB666 support union ColorTable256k { @@ -456,6 +470,31 @@ union ColorTable256k }; extern "C" ColorTable256k RGB256k; +// Col2RGB8 is a pre-multiplied palette for color lookup. It is stored in a +// special R10B10G10 format for efficient blending computation. +// --RRRRRrrr--BBBBBbbb--GGGGGggg-- at level 64 +// --------rrrr------bbbb------gggg at level 1 +extern "C" DWORD Col2RGB8[65][256]; + +// Col2RGB8_LessPrecision is the same as Col2RGB8, but the LSB for red +// and blue are forced to zero, so if the blend overflows, it won't spill +// over into the next component's value. +// --RRRRRrrr-#BBBBBbbb-#GGGGGggg-- at level 64 +// --------rrr#------bbb#------gggg at level 1 +extern "C" DWORD *Col2RGB8_LessPrecision[65]; + +// Col2RGB8_Inverse is the same as Col2RGB8_LessPrecision, except the source +// palette has been inverted. +extern "C" DWORD Col2RGB8_Inverse[65][256]; + +// "Magic" numbers used during the blending: +// --000001111100000111110000011111 = 0x01f07c1f +// -0111111111011111111101111111111 = 0x3FEFFBFF +// -1000000000100000000010000000000 = 0x40100400 +// ------10000000001000000000100000 = 0x40100400 >> 5 +// --11111-----11111-----11111----- = 0x40100400 - (0x40100400 >> 5) aka "white" +// --111111111111111111111111111111 = 0x3FFFFFFF + // Allocates buffer screens, call before R_Init. void V_Init (bool restart); From 9d2128a4f4af0ea16048addd40bc48917177f837 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 24 Dec 2016 04:01:50 -0500 Subject: [PATCH 543/912] - Fixed compile errors. --- src/r_draw_pal.cpp | 12 ++++++++++++ src/v_video.cpp | 2 ++ 2 files changed, 14 insertions(+) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index c7adc9d87a..6b40621b6a 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -297,6 +297,9 @@ namespace swrenderer int bits = _fracbits; int pitch = _pitch; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + count = thread->count_for_thread(_dest_y, count); if (count <= 0) return; @@ -347,6 +350,9 @@ namespace swrenderer int count = _count; int bits = _fracbits; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] }; uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] }; @@ -415,6 +421,9 @@ namespace swrenderer int bits = _fracbits; int pitch = _pitch; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + count = thread->count_for_thread(_dest_y, count); if (count <= 0) return; @@ -469,6 +478,9 @@ namespace swrenderer int count = _count; int bits = _fracbits; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] }; uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] }; diff --git a/src/v_video.cpp b/src/v_video.cpp index 5a8ef10d59..9abe13f1b4 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -66,6 +66,8 @@ #include "menu/menu.h" #include "r_data/voxels.h" +EXTERN_CVAR(Bool, r_blendmode) + int active_con_scale(); FRenderer *Renderer; From 88b60389992bd8919c382accae10ff27b7805372 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 24 Dec 2016 04:35:05 -0500 Subject: [PATCH 544/912] - More rgb555 reimplements. --- src/r_draw_pal.cpp | 101 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 23 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 6b40621b6a..19015d8e11 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -527,6 +527,9 @@ namespace swrenderer int bits = _fracbits; int pitch = _pitch; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + count = thread->count_for_thread(_dest_y, count); if (count <= 0) return; @@ -536,19 +539,42 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - do + if (!r_blendmode) { - uint8_t pix = source[frac >> bits]; - if (pix != 0) + do { - int r = clamp(-GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 0, 255); - int g = clamp(-GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 0, 255); - int b = clamp(-GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 0, 255); - *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - frac += fracstep; - dest += pitch; - } while (--count); + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + else + { + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + int r = clamp(-GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 0, 255); + int g = clamp(-GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 0, 255); + int b = clamp(-GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 0, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } } void DrawWallSubClamp4PalCommand::Execute(DrawerThread *thread) @@ -557,6 +583,9 @@ namespace swrenderer int count = _count; int bits = _fracbits; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] }; uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] }; @@ -574,22 +603,48 @@ namespace swrenderer } pitch *= thread->num_cores; - do + if (!r_blendmode) { - for (int i = 0; i < 4; ++i) + do { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) + for (int i = 0; i < 4; ++i) { - int r = clamp(-GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255); - int g = clamp(-GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255); - int b = clamp(-GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255); - dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; + uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; + if (pix != 0) + { + uint32_t a = (fg2rgb[_colormap[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + dc_wall_texturefrac[i] += dc_wall_iscale[i]; } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); + dest += pitch; + } while (--count); + } + else + { + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; + if (pix != 0) + { + int r = clamp(-GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255); + int g = clamp(-GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255); + int b = clamp(-GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255); + dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; + } + dc_wall_texturefrac[i] += dc_wall_iscale[i]; + } + dest += pitch; + } while (--count); + } } void DrawWallRevSubClamp1PalCommand::Execute(DrawerThread *thread) From b04ac8f34649caceba547b47546ebeddcbd0df32 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 24 Dec 2016 14:45:56 +0100 Subject: [PATCH 545/912] Rounded particles --- src/r_draw_rgba.h | 3 ++- src/r_drawt_rgba.cpp | 48 +++++++++++++++++++++++++++++++++++--------- src/r_things.cpp | 10 +++++++-- 3 files changed, 48 insertions(+), 13 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 6d16558c06..d5b2691064 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -445,7 +445,7 @@ namespace swrenderer class DrawParticleColumnRGBACommand : public DrawerCommand { public: - DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha); + DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx); void Execute(DrawerThread *thread) override; FString DebugInfo() override; @@ -455,6 +455,7 @@ namespace swrenderer int _count; uint32_t _fg; uint32_t _alpha; + uint32_t _fracposx; }; ///////////////////////////////////////////////////////////////////////////// diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 3609956fe4..b5be7a9c36 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -234,13 +234,37 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - DrawParticleColumnRGBACommand::DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha) + namespace + { + static uint32_t particle_texture[16 * 16] = + { + 1*1, 2*1, 3*1, 4*1, 5*1, 6*1, 7*1, 8*1, 8*1, 7*1, 6*1, 5*1, 4*1, 3*1, 2*1, 1*1, + 1*2, 2*2, 3*2, 4*2, 5*2, 6*2, 7*2, 8*2, 8*2, 7*2, 6*2, 5*2, 4*2, 3*2, 2*2, 1*2, + 1*3, 2*3, 3*3, 4*3, 5*3, 6*3, 7*3, 8*3, 8*3, 7*3, 6*3, 5*3, 4*3, 3*3, 2*3, 1*3, + 1*4, 2*4, 3*4, 4*4, 5*4, 6*4, 7*4, 8*4, 8*4, 7*4, 6*4, 5*4, 4*4, 3*4, 2*4, 1*4, + 1*5, 2*5, 3*5, 4*5, 5*5, 6*5, 7*5, 8*5, 8*5, 7*5, 6*5, 5*5, 4*5, 3*5, 2*5, 1*5, + 1*6, 2*6, 3*6, 4*6, 5*6, 6*6, 7*6, 8*6, 8*6, 7*6, 6*6, 5*6, 4*6, 3*6, 2*6, 1*6, + 1*7, 2*7, 3*7, 4*7, 5*7, 6*7, 7*7, 8*7, 8*7, 7*7, 6*7, 5*7, 4*7, 3*7, 2*7, 1*7, + 1*8, 2*8, 3*8, 4*8, 5*8, 6*8, 7*8, 8*8, 8*8, 7*8, 6*8, 5*8, 4*8, 3*8, 2*8, 1*8, + 1*8, 2*8, 3*8, 4*8, 5*8, 6*8, 7*8, 8*8, 8*8, 7*8, 6*8, 5*8, 4*8, 3*8, 2*8, 1*8, + 1*7, 2*7, 3*7, 4*7, 5*7, 6*7, 7*7, 8*7, 8*7, 7*7, 6*7, 5*7, 4*7, 3*7, 2*7, 1*7, + 1*6, 2*6, 3*6, 4*6, 5*6, 6*6, 7*6, 8*6, 8*6, 7*6, 6*6, 5*6, 4*6, 3*6, 2*6, 1*6, + 1*5, 2*5, 3*5, 4*5, 5*5, 6*5, 7*5, 8*5, 8*5, 7*5, 6*5, 5*5, 4*5, 3*5, 2*5, 1*5, + 1*4, 2*4, 3*4, 4*4, 5*4, 6*4, 7*4, 8*4, 8*4, 7*4, 6*4, 5*4, 4*4, 3*4, 2*4, 1*4, + 1*3, 2*3, 3*3, 4*3, 5*3, 6*3, 7*3, 8*3, 8*3, 7*3, 6*3, 5*3, 4*3, 3*3, 2*3, 1*3, + 1*2, 2*2, 3*2, 4*2, 5*2, 6*2, 7*2, 8*2, 8*2, 7*2, 6*2, 5*2, 4*2, 3*2, 2*2, 1*2, + 1*1, 2*1, 3*1, 4*1, 5*1, 6*1, 7*1, 8*1, 8*1, 7*1, 6*1, 5*1, 4*1, 3*1, 2*1, 1*1 + }; + } + + DrawParticleColumnRGBACommand::DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx) { _dest = dest; _pitch = pitch; _count = count; _fg = fg; _alpha = alpha; + _fracposx = fracposx; _dest_y = dest_y; } @@ -253,29 +277,33 @@ namespace swrenderer uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, _dest); int pitch = _pitch * thread->num_cores; - uint32_t alpha = _alpha; - uint32_t inv_alpha = 256 - alpha; + const uint32_t *source = &particle_texture[(_fracposx >> FRACBITS) * 16]; + uint32_t particle_alpha = _alpha; + + uint32_t fracstep = 16 * FRACUNIT / _count; + uint32_t fracpos = fracstep * thread->skipped_by_thread(_dest_y) + fracstep / 2; + fracstep *= thread->num_cores; uint32_t fg_red = (_fg >> 16) & 0xff; uint32_t fg_green = (_fg >> 8) & 0xff; uint32_t fg_blue = _fg & 0xff; - fg_red *= alpha; - fg_green *= alpha; - fg_blue *= alpha; - for (int y = 0; y < count; y++) { + uint32_t alpha = (source[fracpos >> FRACBITS] * particle_alpha) >> 6; + uint32_t inv_alpha = 256 - alpha; + uint32_t bg_red = (*dest >> 16) & 0xff; uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; + fracpos += fracstep; } } diff --git a/src/r_things.cpp b/src/r_things.cpp index bbd9eb9cf8..6e6b95402a 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2863,6 +2863,9 @@ void R_DrawParticle_rgba(vissprite_t *vis) int x1 = vis->x1; int countbase = vis->x2 - x1; + if (ycount <= 0 || countbase <= 0) + return; + R_DrawMaskedSegsBehindParticle(vis); uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); @@ -2873,13 +2876,16 @@ void R_DrawParticle_rgba(vissprite_t *vis) spacing = RenderTarget->GetPitch(); - for (int x = x1; x < (x1 + countbase); x++) + uint32_t fracstepx = 16 * FRACUNIT / countbase; + uint32_t fracposx = fracstepx / 2; + + for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) { dc_x = x; if (R_ClipSpriteColumnWithPortals(vis)) continue; dest = ylookup[yl] + x + (uint32_t*)dc_destorg; - DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha); + DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); } } From 80482e98a389c84b12770ffdfcd990065013f35c Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 24 Dec 2016 10:15:02 -0500 Subject: [PATCH 546/912] - renamed r_blendmode to r_blendmethod - did another drawer --- src/r_draw_pal.cpp | 59 ++++++++++++++++++++++++++++++++-------------- src/v_video.cpp | 4 ++-- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 19015d8e11..0da9e38bee 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -43,8 +43,8 @@ #include "v_video.h" #include "r_draw_pal.h" -// [SP] r_blendmode - false = rgb555 matching (ZDoom classic), true = rgb666 (refactored) -CVAR(Bool, r_blendmode, false, CVAR_GLOBALCONFIG | CVAR_ARCHIVE) +// [SP] r_blendmethod - false = rgb555 matching (ZDoom classic), true = rgb666 (refactored) +CVAR(Bool, r_blendmethod, false, CVAR_GLOBALCONFIG | CVAR_ARCHIVE) /* [RH] This translucency algorithm is based on DOSDoom 0.65's, but uses @@ -309,7 +309,7 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - if (!r_blendmode) + if (!r_blendmethod) { do { @@ -370,7 +370,7 @@ namespace swrenderer } pitch *= thread->num_cores; - if (!r_blendmode) + if (!r_blendmethod) { do { @@ -433,7 +433,7 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - if (!r_blendmode) + if (!r_blendmethod) { do { @@ -539,7 +539,7 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - if (!r_blendmode) + if (!r_blendmethod) { do { @@ -603,7 +603,7 @@ namespace swrenderer } pitch *= thread->num_cores; - if (!r_blendmode) + if (!r_blendmethod) { do { @@ -667,19 +667,42 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - do + if (!r_blendmethod) { - uint8_t pix = source[frac >> bits]; - if (pix != 0) + do { - int r = clamp(GPalette.BaseColors[colormap[pix]].r - GPalette.BaseColors[*dest].r, 0, 255); - int g = clamp(GPalette.BaseColors[colormap[pix]].g - GPalette.BaseColors[*dest].g, 0, 255); - int b = clamp(GPalette.BaseColors[colormap[pix]].b - GPalette.BaseColors[*dest].b, 0, 255); - *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - frac += fracstep; - dest += pitch; - } while (--count); + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + else + { + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + int r = clamp(GPalette.BaseColors[colormap[pix]].r - GPalette.BaseColors[*dest].r, 0, 255); + int g = clamp(GPalette.BaseColors[colormap[pix]].g - GPalette.BaseColors[*dest].g, 0, 255); + int b = clamp(GPalette.BaseColors[colormap[pix]].b - GPalette.BaseColors[*dest].b, 0, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } } void DrawWallRevSubClamp4PalCommand::Execute(DrawerThread *thread) diff --git a/src/v_video.cpp b/src/v_video.cpp index 9abe13f1b4..2b31415a2b 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -66,7 +66,7 @@ #include "menu/menu.h" #include "r_data/voxels.h" -EXTERN_CVAR(Bool, r_blendmode) +EXTERN_CVAR(Bool, r_blendmethod) int active_con_scale(); @@ -392,7 +392,7 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) int dimmedcolor_g = color.g * alpha; int dimmedcolor_b = color.b * alpha; - if (!r_blendmode) + if (!r_blendmethod) { for (y = h; y != 0; y--) { From 101108877acc4bba39668c066fba40793863b326 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 24 Dec 2016 11:40:15 -0500 Subject: [PATCH 547/912] - Reimplemented rgb555 into all drawers in r_draw_pal.cpp including span drawers. All that remains now are the 4col drawers in r_drawt_pal.cpp. --- src/r_draw.cpp | 6 + src/r_draw_pal.cpp | 1051 ++++++++++++++++++++++++++++++++------------ src/r_plane.cpp | 8 + src/r_things.cpp | 73 ++- 4 files changed, 847 insertions(+), 291 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index a08a204ca3..b44bc5f951 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -236,16 +236,22 @@ namespace swrenderer } if (flags & STYLEF_InvertSource) { + dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; dc_srcalpha = fglevel; dc_destalpha = bglevel; } else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) { + dc_srcblend = Col2RGB8[fglevel >> 10]; + dc_destblend = Col2RGB8[bglevel >> 10]; dc_srcalpha = fglevel; dc_destalpha = bglevel; } else { + dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; dc_srcalpha = fglevel; dc_destalpha = bglevel; } diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 0da9e38bee..88fe93f85a 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -658,6 +658,9 @@ namespace swrenderer int bits = _fracbits; int pitch = _pitch; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + count = thread->count_for_thread(_dest_y, count); if (count <= 0) return; @@ -711,6 +714,9 @@ namespace swrenderer int count = _count; int bits = _fracbits; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] }; uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] }; @@ -728,22 +734,48 @@ namespace swrenderer } pitch *= thread->num_cores; - do + if (!r_blendmethod) { - for (int i = 0; i < 4; ++i) + do { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) + for (int i = 0; i < 4; ++i) { - uint32_t r = clamp(GPalette.BaseColors[_colormap[i][pix]].r - GPalette.BaseColors[dest[i]].r, 0, 255); - uint32_t g = clamp(GPalette.BaseColors[_colormap[i][pix]].g - GPalette.BaseColors[dest[i]].g, 0, 255); - uint32_t b = clamp(GPalette.BaseColors[_colormap[i][pix]].b - GPalette.BaseColors[dest[i]].b, 0, 255); - dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; + uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; + if (pix != 0) + { + uint32_t a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[_colormap[i][pix]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + dc_wall_texturefrac[i] += dc_wall_iscale[i]; } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += _pitch; - } while (--count); + dest += _pitch; + } while (--count); + } + else + { + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; + if (pix != 0) + { + uint32_t r = clamp(GPalette.BaseColors[_colormap[i][pix]].r - GPalette.BaseColors[dest[i]].r, 0, 255); + uint32_t g = clamp(GPalette.BaseColors[_colormap[i][pix]].g - GPalette.BaseColors[dest[i]].g, 0, 255); + uint32_t b = clamp(GPalette.BaseColors[_colormap[i][pix]].b - GPalette.BaseColors[dest[i]].b, 0, 255); + dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; + } + dc_wall_texturefrac[i] += dc_wall_iscale[i]; + } + dest += _pitch; + } while (--count); + } } ///////////////////////////////////////////////////////////////////////// @@ -1319,18 +1351,30 @@ namespace swrenderer const PalEntry* pal = GPalette.BaseColors; - do + if (!r_blendmethod) { - int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; - int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; - int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; - int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); - int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); - int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); - *dest = RGB256k.RGB[r][g][b]; - dest += pitch; - } while (--count); - + do + { + uint32_t bg; + bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; + *dest = RGB32k.All[bg & (bg >> 15)]; + dest += pitch; + } while (--count); + } + else + { + do + { + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); + int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); + int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); + *dest = RGB256k.RGB[r][g][b]; + dest += pitch; + } while (--count); + } } void FillColumnAddClampPalCommand::Execute(DrawerThread *thread) @@ -1357,17 +1401,36 @@ namespace swrenderer const PalEntry* pal = GPalette.BaseColors; - do + if (!r_blendmethod) { - int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; - int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; - int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; - int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); - int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); - int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); - *dest = RGB256k.RGB[r][g][b]; - dest += pitch; - } while (--count); + do + { + uint32_t a = fg + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); + } + else + { + do + { + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); + int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); + int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); + *dest = RGB256k.RGB[r][g][b]; + dest += pitch; + } while (--count); + } } void FillColumnSubClampPalCommand::Execute(DrawerThread *thread) @@ -1378,6 +1441,8 @@ namespace swrenderer count = _count; dest = _dest; + uint32_t *bg2rgb = _destblend; + uint32_t fg = _srccolor; int pitch = _pitch; @@ -1390,19 +1455,37 @@ namespace swrenderer const PalEntry* palette = GPalette.BaseColors; - do + if (!r_blendmethod) { - int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; - int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; - int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; - int bg = *dest; - int r = MAX((src_r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((src_g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((src_b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + do + { + uint32_t a = fg - bg2rgb[*dest]; + uint32_t b = a; - *dest = RGB256k.RGB[r][g][b]; - dest += pitch; - } while (--count); + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); + } + else + { + do + { + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int bg = *dest; + int r = MAX((src_r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((src_g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((src_b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + + *dest = RGB256k.RGB[r][g][b]; + dest += pitch; + } while (--count); + } } void FillColumnRevSubClampPalCommand::Execute(DrawerThread *thread) @@ -1415,6 +1498,8 @@ namespace swrenderer return; dest = _dest; + uint32_t *bg2rgb = _destblend; + uint32_t fg = _srccolor; int pitch = _pitch; @@ -1427,19 +1512,37 @@ namespace swrenderer const PalEntry *palette = GPalette.BaseColors; - do + if (!r_blendmethod) { - int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; - int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; - int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; - int bg = *dest; - int r = MAX((src_r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((src_g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((src_b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + do + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg; + uint32_t b = a; - *dest = RGB256k.RGB[r][g][b]; - dest += pitch; - } while (--count); + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); + } + else + { + do + { + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int bg = *dest; + int r = MAX((src_r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((src_g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((src_b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + + *dest = RGB256k.RGB[r][g][b]; + dest += pitch; + } while (--count); + } } void DrawColumnAddPalCommand::Execute(DrawerThread *thread) @@ -1465,21 +1568,41 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; const uint8_t *colormap = _colormap; const uint8_t *source = _source; const PalEntry *palette = GPalette.BaseColors; - do + if (!r_blendmethod) { - uint32_t fg = colormap[source[frac >> FRACBITS]]; - uint32_t bg = *dest; - uint32_t r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - uint32_t g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - uint32_t b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - *dest = RGB256k.RGB[r][g][b]; - dest += pitch; - frac += fracstep; - } while (--count); + do + { + uint32_t fg = colormap[source[frac >> FRACBITS]]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + else + { + do + { + uint32_t fg = colormap[source[frac >> FRACBITS]]; + uint32_t bg = *dest; + uint32_t r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + uint32_t g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + uint32_t b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnTranslatedPalCommand::Execute(DrawerThread *thread) @@ -1543,23 +1666,43 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; const uint8_t *translation = _translation; const uint8_t *colormap = _colormap; const uint8_t *source = _source; const PalEntry *palette = GPalette.BaseColors; - do + if (!r_blendmethod) { - uint32_t fg = colormap[translation[source[frac >> FRACBITS]]]; - uint32_t bg = *dest; - uint32_t r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - uint32_t g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - uint32_t b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - *dest = RGB256k.RGB[r][g][b]; - dest += pitch; - frac += fracstep; - } while (--count); + do + { + uint32_t fg = colormap[translation[source[frac >> FRACBITS]]]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + else + { + do + { + uint32_t fg = colormap[translation[source[frac >> FRACBITS]]]; + uint32_t bg = *dest; + uint32_t r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + uint32_t g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + uint32_t b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnShadedPalCommand::Execute(DrawerThread *thread) @@ -1586,21 +1729,37 @@ namespace swrenderer const uint8_t *source = _source; const uint8_t *colormap = _colormap; - + uint32_t *fgstart = &Col2RGB8[0][_color]; const PalEntry *palette = GPalette.BaseColors; - do + if (!r_blendmethod) { - uint32_t val = source[frac >> FRACBITS]; + do + { + uint32_t val = colormap[source[frac >> FRACBITS]]; + uint32_t fg = fgstart[val << 8]; + val = (Col2RGB8[64 - val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val >> 15)]; - int r = (palette[*dest].r * (255-val) + palette[_color].r * val) >> 10; - int g = (palette[*dest].g * (255-val) + palette[_color].g * val) >> 10; - int b = (palette[*dest].b * (255-val) + palette[_color].b * val) >> 10; - *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + else + { + do + { + uint32_t val = source[frac >> FRACBITS]; - dest += pitch; - frac += fracstep; - } while (--count); + int r = (palette[*dest].r * (255-val) + palette[_color].r * val) >> 10; + int g = (palette[*dest].g * (255-val) + palette[_color].g * val) >> 10; + int b = (palette[*dest].b * (255-val) + palette[_color].b * val) >> 10; + *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; + + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnAddClampPalCommand::Execute(DrawerThread *thread) @@ -1628,19 +1787,41 @@ namespace swrenderer const uint8_t *colormap = _colormap; const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; - do + if (!r_blendmethod) { - int fg = colormap[source[frac >> FRACBITS]]; - int bg = *dest; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - *dest = RGB256k.RGB[r][g][b]; - dest += pitch; - frac += fracstep; - } while (--count); + do + { + uint32_t a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + else + { + do + { + int fg = colormap[source[frac >> FRACBITS]]; + int bg = *dest; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnAddClampTranslatedPalCommand::Execute(DrawerThread *thread) @@ -1669,19 +1850,41 @@ namespace swrenderer const uint8_t *translation = _translation; const uint8_t *colormap = _colormap; const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; - do + if (!r_blendmethod) { - int fg = colormap[translation[source[frac >> FRACBITS]]]; - int bg = *dest; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - *dest = RGB256k.RGB[r][g][b]; - dest += pitch; - frac += fracstep; - } while (--count); + do + { + uint32_t a = fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + else + { + do + { + int fg = colormap[translation[source[frac >> FRACBITS]]]; + int bg = *dest; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnSubClampPalCommand::Execute(DrawerThread *thread) @@ -1709,19 +1912,40 @@ namespace swrenderer const uint8_t *colormap = _colormap; const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; - do + if (!r_blendmethod) { - int fg = colormap[source[frac >> FRACBITS]]; - int bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - dest += pitch; - frac += fracstep; - } while (--count); + do + { + uint32_t a = (fg2rgb[colormap[source[frac >> FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + else + { + do + { + int fg = colormap[source[frac >> FRACBITS]]; + int bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnSubClampTranslatedPalCommand::Execute(DrawerThread *thread) @@ -1750,19 +1974,40 @@ namespace swrenderer const uint8_t *translation = _translation; const uint8_t *colormap = _colormap; const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; - do + if (!r_blendmethod) { - int fg = colormap[translation[source[frac >> FRACBITS]]]; - int bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - dest += pitch; - frac += fracstep; - } while (--count); + do + { + uint32_t a = (fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + else + { + do + { + int fg = colormap[translation[source[frac >> FRACBITS]]]; + int bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnRevSubClampPalCommand::Execute(DrawerThread *thread) @@ -1790,19 +2035,40 @@ namespace swrenderer const uint8_t *colormap = _colormap; const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; - do + if (!r_blendmethod) { - int fg = colormap[source[frac >> FRACBITS]]; - int bg = *dest; - int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - dest += pitch; - frac += fracstep; - } while (--count); + do + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac >> FRACBITS]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + else + { + do + { + int fg = colormap[source[frac >> FRACBITS]]; + int bg = *dest; + int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; + dest += pitch; + frac += fracstep; + } while (--count); + } } void DrawColumnRevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) @@ -1831,19 +2097,40 @@ namespace swrenderer const uint8_t *translation = _translation; const uint8_t *colormap = _colormap; const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; - do + if (!r_blendmethod) { - int fg = colormap[translation[source[frac >> FRACBITS]]]; - int bg = *dest; - int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - dest += pitch; - frac += fracstep; - } while (--count); + do + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac >> FRACBITS]]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + else + { + do + { + int fg = colormap[translation[source[frac >> FRACBITS]]]; + int bg = *dest; + int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; + dest += pitch; + frac += fracstep; + } while (--count); + } } ///////////////////////////////////////////////////////////////////////// @@ -2103,6 +2390,8 @@ namespace swrenderer const uint8_t *colormap = _colormap; int count; int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; xfrac = _xfrac; yfrac = _yfrac; @@ -2116,41 +2405,81 @@ namespace swrenderer const PalEntry *palette = GPalette.BaseColors; - if (_xbits == 6 && _ybits == 6) + if (!r_blendmethod) { - // 64x64 is the most common case by far, so special case it. - do + if (_xbits == 6 && _ybits == 6) { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = colormap[source[spot]]; - uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest++ = RGB256k.RGB[r][g][b]; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } else { - uint8_t yshift = 32 - _ybits; - uint8_t xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - do + if (_xbits == 6 && _ybits == 6) { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = colormap[source[spot]]; - uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest++ = RGB256k.RGB[r][g][b]; + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest++ = RGB256k.RGB[r][g][b]; - xfrac += xstep; - yfrac += ystep; - } while (--count); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest++ = RGB256k.RGB[r][g][b]; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } } @@ -2168,6 +2497,8 @@ namespace swrenderer const uint8_t *colormap = _colormap; int count; int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; @@ -2181,53 +2512,107 @@ namespace swrenderer xstep = _xstep; ystep = _ystep; - if (_xbits == 6 && _ybits == 6) + if (!r_blendmethod) { - // 64x64 is the most common case by far, so special case it. - do + if (_xbits == 6 && _ybits == 6) { - uint8_t texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) + // 64x64 is the most common case by far, so special case it. + do { - uint32_t fg = colormap[texdata]; - uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint8_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint8_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } else { - uint8_t yshift = 32 - _ybits; - uint8_t xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - do + if (_xbits == 6 && _ybits == 6) { - uint8_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) + // 64x64 is the most common case by far, so special case it. + do { - uint32_t fg = colormap[texdata]; - uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint8_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint8_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } } @@ -2245,6 +2630,8 @@ namespace swrenderer const uint8_t *colormap = _colormap; int count; int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; xfrac = _xfrac; @@ -2257,41 +2644,87 @@ namespace swrenderer xstep = _xstep; ystep = _ystep; - if (_xbits == 6 && _ybits == 6) + if (!r_blendmethod) { - // 64x64 is the most common case by far, so special case it. - do + if (_xbits == 6 && _ybits == 6) { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = colormap[source[spot]]; - uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest++ = RGB256k.RGB[r][g][b]; + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + uint32_t b = a; - xfrac += xstep; - yfrac += ystep; - } while (--count); + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } else { - uint8_t yshift = 32 - _ybits; - uint8_t xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - do + if (_xbits == 6 && _ybits == 6) { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = colormap[source[spot]]; - uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest++ = RGB256k.RGB[r][g][b]; + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest++ = RGB256k.RGB[r][g][b]; - xfrac += xstep; - yfrac += ystep; - } while (--count); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest++ = RGB256k.RGB[r][g][b]; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } } @@ -2309,6 +2742,8 @@ namespace swrenderer const uint8_t *colormap = _colormap; int count; int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; xfrac = _xfrac; @@ -2321,53 +2756,113 @@ namespace swrenderer xstep = _xstep; ystep = _ystep; - if (_xbits == 6 && _ybits == 6) + if (!r_columnmethod) { - // 64x64 is the most common case by far, so special case it. - do + if (_xbits == 6 && _ybits == 6) { - uint8_t texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) + // 64x64 is the most common case by far, so special case it. + do { - uint32_t fg = colormap[texdata]; - uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint8_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint8_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } else { - uint8_t yshift = 32 - _ybits; - uint8_t xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - do + if (_xbits == 6 && _ybits == 6) { - uint8_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) + // 64x64 is the most common case by far, so special case it. + do { - uint32_t fg = colormap[texdata]; - uint32_t bg = *dest; - int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint8_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint8_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } } diff --git a/src/r_plane.cpp b/src/r_plane.cpp index dbecb5247d..b8368e73a7 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1509,12 +1509,16 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t if (!additive) { spanfunc = R_DrawSpanMaskedTranslucent; + dc_srcblend = Col2RGB8[alpha>>10]; + dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; dc_srcalpha = alpha; dc_destalpha = OPAQUE-alpha; } else { spanfunc = R_DrawSpanMaskedAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; dc_srcalpha = alpha; dc_destalpha = FRACUNIT; } @@ -1531,12 +1535,16 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t if (!additive) { spanfunc = R_DrawSpanTranslucent; + dc_srcblend = Col2RGB8[alpha>>10]; + dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; dc_srcalpha = alpha; dc_destalpha = OPAQUE-alpha; } else { spanfunc = R_DrawSpanAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; dc_srcalpha = alpha; dc_destalpha = FRACUNIT; } diff --git a/src/r_things.cpp b/src/r_things.cpp index 61da979da8..95b563c0d6 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -72,6 +72,7 @@ EXTERN_CVAR(Int, r_drawfuzz) EXTERN_CVAR(Bool, r_deathcamera); EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_drawvoxels) +EXTERN_CVAR(Bool, r_blendmethod) CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); //CVAR(Bool, r_splitsprites, true, CVAR_ARCHIVE) @@ -2640,8 +2641,10 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) void R_DrawParticle (vissprite_t *vis) { + DWORD *bg2rgb; int spacing; BYTE *dest; + DWORD fg; BYTE color = vis->Style.colormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2655,8 +2658,32 @@ void R_DrawParticle (vissprite_t *vis) // vis->renderflags holds translucency level (0-255) fixed_t fglevel, bglevel; - fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - bglevel = FRACUNIT-fglevel; + { + DWORD *fg2rgb; + + fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; + bglevel = FRACUNIT-fglevel; + fg2rgb = Col2RGB8[fglevel>>10]; + bg2rgb = Col2RGB8[bglevel>>10]; + fg = fg2rgb[color]; + } + + /* + + spacing = RenderTarget->GetPitch() - countbase; + dest = ylookup[yl] + x1 + dc_destorg; + + do + { + int count = countbase; + do + { + DWORD bg = bg2rgb[*dest]; + bg = (fg+bg) | 0x1f07c1f; + *dest++ = RGB32k.All[bg & (bg>>15)]; + } while (--count); + dest += spacing; + } while (--ycount);*/ // original was row-wise // width = countbase @@ -2664,20 +2691,40 @@ void R_DrawParticle (vissprite_t *vis) spacing = RenderTarget->GetPitch(); - for (int x = x1; x < (x1+countbase); x++) + if (!r_blendmethod) { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) - continue; - dest = ylookup[yl] + x + dc_destorg; - for (int y = 0; y < ycount; y++) + for (int x = x1; x < (x1+countbase); x++) { - uint32_t dest_r = MIN((GPalette.BaseColors[*dest].r * bglevel + GPalette.BaseColors[color].r * fglevel) >> 18, 63); - uint32_t dest_g = MIN((GPalette.BaseColors[*dest].g * bglevel + GPalette.BaseColors[color].g * fglevel) >> 18, 63); - uint32_t dest_b = MIN((GPalette.BaseColors[*dest].b * bglevel + GPalette.BaseColors[color].b * fglevel) >> 18, 63); + dc_x = x; + if (R_ClipSpriteColumnWithPortals(vis)) + continue; + dest = ylookup[yl] + x + dc_destorg; + for (int y = 0; y < ycount; y++) + { + DWORD bg = bg2rgb[*dest]; + bg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[bg & (bg>>15)]; + dest += spacing; + } + } + } + else + { + for (int x = x1; x < (x1+countbase); x++) + { + dc_x = x; + if (R_ClipSpriteColumnWithPortals(vis)) + continue; + dest = ylookup[yl] + x + dc_destorg; + for (int y = 0; y < ycount; y++) + { + uint32_t dest_r = MIN((GPalette.BaseColors[*dest].r * bglevel + GPalette.BaseColors[color].r * fglevel) >> 18, 63); + uint32_t dest_g = MIN((GPalette.BaseColors[*dest].g * bglevel + GPalette.BaseColors[color].g * fglevel) >> 18, 63); + uint32_t dest_b = MIN((GPalette.BaseColors[*dest].b * bglevel + GPalette.BaseColors[color].b * fglevel) >> 18, 63); - *dest = RGB256k.RGB[dest_r][dest_g][dest_b]; - dest += spacing; + *dest = RGB256k.RGB[dest_r][dest_g][dest_b]; + dest += spacing; + } } } } From 785b58f57a9c8b93ad407a021ab536d5ed3545b7 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 24 Dec 2016 12:30:45 -0500 Subject: [PATCH 548/912] - Reimplemented rgb555 for drawers in r_drawt_pal.cpp --- src/r_drawt_pal.cpp | 529 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 415 insertions(+), 114 deletions(-) diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp index d07ecd67a5..dcc0a09389 100644 --- a/src/r_drawt_pal.cpp +++ b/src/r_drawt_pal.cpp @@ -50,6 +50,8 @@ #include "v_video.h" #include "r_draw_pal.h" +EXTERN_CVAR(Bool, r_blendmethod) + // I should have commented this stuff better. // // dc_temp is the buffer R_DrawColumnHoriz writes into. @@ -444,23 +446,42 @@ namespace swrenderer if (count <= 0) return; + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; colormap = _colormap; const PalEntry *palette = GPalette.BaseColors; - do { - uint32_t fg = colormap[*source]; - uint32_t bg = *dest; + if (!r_blendmethod) + { + do { + uint32_t fg = colormap[*source]; + uint32_t bg = *dest; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - *dest = RGB256k.RGB[r][g][b]; - source += 4; - dest += pitch; - } while (--count); + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg>>15)]; + source += 4; + dest += pitch; + } while (--count); + } + else + { + do { + uint32_t fg = colormap[*source]; + uint32_t bg = *dest; + + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; + source += 4; + dest += pitch; + } while (--count); + } } void DrawColumnRt4AddPalCommand::Execute(DrawerThread *thread) @@ -475,30 +496,72 @@ namespace swrenderer if (count <= 0) return; + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; colormap = _colormap; const PalEntry *palette = GPalette.BaseColors; - do { - for (int ks = 0; ks < 4; ks++) - { // [SP] this 4col function was a block of copy-pasted code. 4 times. I regret nothing. - uint32_t fg = colormap[source[ks]]; - uint32_t bg = dest[ks]; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - dest[ks] = RGB256k.RGB[r][g][b]; - } + if (!r_blendmethod) + { + do { + uint32_t fg = colormap[source[0]]; + uint32_t bg = dest[0]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[0] = RGB32k.All[fg & (fg>>15)]; - source += 4; - dest += pitch; - } while (--count); + fg = colormap[source[1]]; + bg = dest[1]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[1] = RGB32k.All[fg & (fg>>15)]; + + + fg = colormap[source[2]]; + bg = dest[2]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[2] = RGB32k.All[fg & (fg>>15)]; + + fg = colormap[source[3]]; + bg = dest[3]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[3] = RGB32k.All[fg & (fg>>15)]; + + source += 4; + dest += pitch; + } while (--count); + } + else + { + do { + for (int ks = 0; ks < 4; ks++) + { // [SP] this 4col function was a block of copy-pasted code. 4 times. I regret nothing. + uint32_t fg = colormap[source[ks]]; + uint32_t bg = dest[ks]; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + dest[ks] = RGB256k.RGB[r][g][b]; + } + + source += 4; + dest += pitch; + } while (--count); + } } void DrawColumnRt1ShadedPalCommand::Execute(DrawerThread *thread) { + uint32_t *fgstart; const uint8_t *colormap; uint8_t *source; uint8_t *dest; @@ -509,25 +572,41 @@ namespace swrenderer if (count <= 0) return; + fgstart = &Col2RGB8[0][_color]; colormap = _colormap; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; const PalEntry *palette = GPalette.BaseColors; - do { - uint32_t val = *source; - int r = (palette[*dest].r * (255-val) + palette[_color].r * val) >> 10; - int g = (palette[*dest].g * (255-val) + palette[_color].g * val) >> 10; - int b = (palette[*dest].b * (255-val) + palette[_color].b * val) >> 10; - *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; - source += 4; - dest += pitch; - } while (--count); + if (!r_blendmethod) + { + do { + uint32_t val = colormap[*source]; + uint32_t fg = fgstart[val<<8]; + val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val>>15)]; + source += 4; + dest += pitch; + } while (--count); + } + else + { + do { + uint32_t val = *source; + int r = (palette[*dest].r * (255-val) + palette[_color].r * val) >> 10; + int g = (palette[*dest].g * (255-val) + palette[_color].g * val) >> 10; + int b = (palette[*dest].b * (255-val) + palette[_color].b * val) >> 10; + *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; + source += 4; + dest += pitch; + } while (--count); + } } void DrawColumnRt4ShadedPalCommand::Execute(DrawerThread *thread) { + uint32_t *fgstart; const uint8_t *colormap; uint8_t *source; uint8_t *dest; @@ -538,27 +617,56 @@ namespace swrenderer if (count <= 0) return; + fgstart = &Col2RGB8[0][_color]; colormap = _colormap; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; pitch = _pitch * thread->num_cores; const PalEntry *palette = GPalette.BaseColors; - do { - uint32_t val; + if (!r_blendmethod) + { + do { + uint32_t val; + + val = colormap[source[0]]; + val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; + dest[0] = RGB32k.All[val & (val>>15)]; - for (int ks = 0; ks < 4; ks++) - { - val = source[ks]; - int r = (palette[dest[ks]].r * (255-val) + palette[_color].r * val) >> 10; - int g = (palette[dest[ks]].g * (255-val) + palette[_color].g * val) >> 10; - int b = (palette[dest[ks]].b * (255-val) + palette[_color].b * val) >> 10; - dest[ks] = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; - } + val = colormap[source[1]]; + val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; + dest[1] = RGB32k.All[val & (val>>15)]; - source += 4; - dest += pitch; - } while (--count); + val = colormap[source[2]]; + val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; + dest[2] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[3]]; + val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; + dest[3] = RGB32k.All[val & (val>>15)]; + + source += 4; + dest += pitch; + } while (--count); + } + else + { + do { + uint32_t val; + + for (int ks = 0; ks < 4; ks++) + { + val = source[ks]; + int r = (palette[dest[ks]].r * (255-val) + palette[_color].r * val) >> 10; + int g = (palette[dest[ks]].g * (255-val) + palette[_color].g * val) >> 10; + int b = (palette[dest[ks]].b * (255-val) + palette[_color].b * val) >> 10; + dest[ks] = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; + } + + source += 4; + dest += pitch; + } while (--count); + } } void DrawColumnRt1AddClampPalCommand::Execute(DrawerThread *thread) @@ -573,22 +681,43 @@ namespace swrenderer if (count <= 0) return; + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; pitch = _pitch * thread->num_cores; colormap = _colormap; const PalEntry *palette = GPalette.BaseColors; - do { - int fg = colormap[*source]; - int bg = *dest; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - *dest = RGB256k.RGB[r][g][b]; - source += 4; - dest += pitch; - } while (--count); + if (!r_blendmethod) + { + do { + uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); + } + else + { + do { + int fg = colormap[*source]; + int bg = *dest; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + *dest = RGB256k.RGB[r][g][b]; + source += 4; + dest += pitch; + } while (--count); + } } void DrawColumnRt4AddClampPalCommand::Execute(DrawerThread *thread) @@ -609,20 +738,70 @@ namespace swrenderer colormap = _colormap; const PalEntry *palette = GPalette.BaseColors; - do { - for (int ks = 0; ks < 4; ks++) - { - int fg = colormap[source[ks]]; - int bg = dest[ks]; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - dest[ks] = RGB256k.RGB[r][g][b]; - } + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; - source += 4; - dest += pitch; - } while (--count); + if (!r_blendmethod) + { + do { + uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); + } + else + { + do { + for (int ks = 0; ks < 4; ks++) + { + int fg = colormap[source[ks]]; + int bg = dest[ks]; + int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); + int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); + int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + dest[ks] = RGB256k.RGB[r][g][b]; + } + + source += 4; + dest += pitch; + } while (--count); + } } void DrawColumnRt1SubClampPalCommand::Execute(DrawerThread *thread) @@ -645,16 +824,34 @@ namespace swrenderer colormap = _colormap; const PalEntry *palette = GPalette.BaseColors; - do { - int fg = colormap[*source]; - int bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - source += 4; - dest += pitch; - } while (--count); + if (!r_blendmethod) + { + do { + uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); + } + else + { + do { + int fg = colormap[*source]; + int bg = *dest; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; + source += 4; + dest += pitch; + } while (--count); + } } void DrawColumnRt4SubClampPalCommand::Execute(DrawerThread *thread) @@ -677,20 +874,63 @@ namespace swrenderer colormap = _colormap; const PalEntry *palette = GPalette.BaseColors; - do { - for (int ks = 0; ks < 4; ks++) - { - int fg = colormap[source[ks]]; - int bg = dest[ks]; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); - dest[ks] = RGB256k.RGB[r][g][b]; - } + if (!r_blendmethod) + { + do { + uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; + uint32_t b = a; - source += 4; - dest += pitch; - } while (--count); + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); + } + else + { + do { + for (int ks = 0; ks < 4; ks++) + { + int fg = colormap[source[ks]]; + int bg = dest[ks]; + int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + dest[ks] = RGB256k.RGB[r][g][b]; + } + + source += 4; + dest += pitch; + } while (--count); + } } void DrawColumnRt1RevSubClampPalCommand::Execute(DrawerThread *thread) @@ -713,16 +953,34 @@ namespace swrenderer colormap = _colormap; const PalEntry *palette = GPalette.BaseColors; - do { - int fg = colormap[*source]; - int bg = *dest; - int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - source += 4; - dest += pitch; - } while (--count); + if (!r_blendmethod) + { + do { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); + } + else + { + do { + int fg = colormap[*source]; + int bg = *dest; + int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + *dest = RGB256k.RGB[r][g][b]; + source += 4; + dest += pitch; + } while (--count); + } } void DrawColumnRt4RevSubClampPalCommand::Execute(DrawerThread *thread) @@ -745,19 +1003,62 @@ namespace swrenderer colormap = _colormap; const PalEntry *palette = GPalette.BaseColors; - do { - for (int ks = 0; ks < 4; ks++) - { - int fg = colormap[source[ks]]; - int bg = dest[ks]; - int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - dest[ks] = RGB256k.RGB[r][g][b]; - } + if (!r_blendmethod) + { + do { + uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; + uint32_t b = a; - source += 4; - dest += pitch; - } while (--count); + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); + } + else + { + do { + for (int ks = 0; ks < 4; ks++) + { + int fg = colormap[source[ks]]; + int bg = dest[ks]; + int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + dest[ks] = RGB256k.RGB[r][g][b]; + } + + source += 4; + dest += pitch; + } while (--count); + } } } From 9ece249dbb02e5897e5cf972fd3cf8b96ea014c7 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 24 Dec 2016 12:42:27 -0500 Subject: [PATCH 549/912] - Reimplemented rgb555 into burn/crossfade in f_wipe.cpp. --- src/f_wipe.cpp | 141 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 101 insertions(+), 40 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index 8d5f072f62..9d487bc9dc 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -30,6 +30,8 @@ #include "templates.h" #include "v_palette.h" +EXTERN_CVAR(Bool, r_blendmethod) + // // SCREEN WIPE PACKAGE // @@ -281,42 +283,80 @@ bool wipe_doBurn (int ticks) fromold = (BYTE *)wipe_scr_start; fromnew = (BYTE *)wipe_scr_end; - for (y = 0, firey = 0; y < SCREENHEIGHT; y++, firey += ystep) + if (!r_blendmethod) { - for (x = 0, firex = 0; x < SCREENWIDTH; x++, firex += xstep) + for (y = 0, firey = 0; y < SCREENHEIGHT; y++, firey += ystep) { - int fglevel; - - fglevel = burnarray[(firex>>SHIFT)+(firey>>SHIFT)*FIREWIDTH] / 2; - if (fglevel >= 63) + for (x = 0, firex = 0; x < SCREENWIDTH; x++, firex += xstep) { - to[x] = fromnew[x]; - } - else if (fglevel == 0) - { - to[x] = fromold[x]; - done = false; - } - else - { - int bglevel = 64-fglevel; + int fglevel; - const PalEntry* pal = GPalette.BaseColors; - - DWORD fg = fromnew[x]; - DWORD bg = fromold[x]; - int r = MIN((pal[fg].r * fglevel + pal[bg].r * bglevel) >> 8, 63); - int g = MIN((pal[fg].g * fglevel + pal[bg].g * bglevel) >> 8, 63); - int b = MIN((pal[fg].b * fglevel + pal[bg].b * bglevel) >> 8, 63); - to[x] = RGB256k.RGB[r][g][b]; - done = false; + fglevel = burnarray[(firex>>SHIFT)+(firey>>SHIFT)*FIREWIDTH] / 2; + if (fglevel >= 63) + { + to[x] = fromnew[x]; + } + else if (fglevel == 0) + { + to[x] = fromold[x]; + done = false; + } + else + { + int bglevel = 64-fglevel; + DWORD *fg2rgb = Col2RGB8[fglevel]; + DWORD *bg2rgb = Col2RGB8[bglevel]; + DWORD fg = fg2rgb[fromnew[x]]; + DWORD bg = bg2rgb[fromold[x]]; + fg = (fg+bg) | 0x1f07c1f; + to[x] = RGB32k.All[fg & (fg>>15)]; + done = false; + } } + fromold += SCREENWIDTH; + fromnew += SCREENWIDTH; + to += SCREENPITCH; } - fromold += SCREENWIDTH; - fromnew += SCREENWIDTH; - to += SCREENPITCH; - } + } + else + { + for (y = 0, firey = 0; y < SCREENHEIGHT; y++, firey += ystep) + { + for (x = 0, firex = 0; x < SCREENWIDTH; x++, firex += xstep) + { + int fglevel; + + fglevel = burnarray[(firex>>SHIFT)+(firey>>SHIFT)*FIREWIDTH] / 2; + if (fglevel >= 63) + { + to[x] = fromnew[x]; + } + else if (fglevel == 0) + { + to[x] = fromold[x]; + done = false; + } + else + { + int bglevel = 64-fglevel; + + const PalEntry* pal = GPalette.BaseColors; + + DWORD fg = fromnew[x]; + DWORD bg = fromold[x]; + int r = MIN((pal[fg].r * fglevel + pal[bg].r * bglevel) >> 8, 63); + int g = MIN((pal[fg].g * fglevel + pal[bg].g * bglevel) >> 8, 63); + int b = MIN((pal[fg].b * fglevel + pal[bg].b * bglevel) >> 8, 63); + to[x] = RGB256k.RGB[r][g][b]; + done = false; + } + } + fromold += SCREENWIDTH; + fromnew += SCREENWIDTH; + to += SCREENPITCH; + } + } return done || (burntime > 40); } @@ -346,25 +386,46 @@ bool wipe_doFade (int ticks) { int x, y; int bglevel = 64 - fade; + DWORD *fg2rgb = Col2RGB8[fade]; + DWORD *bg2rgb = Col2RGB8[bglevel]; BYTE *fromnew = (BYTE *)wipe_scr_end; BYTE *fromold = (BYTE *)wipe_scr_start; BYTE *to = screen->GetBuffer(); const PalEntry *pal = GPalette.BaseColors; - for (y = 0; y < SCREENHEIGHT; y++) + if (!r_blendmethod) { - for (x = 0; x < SCREENWIDTH; x++) + for (y = 0; y < SCREENHEIGHT; y++) { - DWORD fg = fromnew[x]; - DWORD bg = fromold[x]; - int r = MIN((pal[fg].r * (64-bglevel) + pal[bg].r * bglevel) >> 8, 63); - int g = MIN((pal[fg].g * (64-bglevel) + pal[bg].g * bglevel) >> 8, 63); - int b = MIN((pal[fg].b * (64-bglevel) + pal[bg].b * bglevel) >> 8, 63); - to[x] = RGB256k.RGB[r][g][b]; + for (x = 0; x < SCREENWIDTH; x++) + { + DWORD fg = fg2rgb[fromnew[x]]; + DWORD bg = bg2rgb[fromold[x]]; + fg = (fg+bg) | 0x1f07c1f; + to[x] = RGB32k.All[fg & (fg>>15)]; + } + fromnew += SCREENWIDTH; + fromold += SCREENWIDTH; + to += SCREENPITCH; + } + } + else + { + for (y = 0; y < SCREENHEIGHT; y++) + { + for (x = 0; x < SCREENWIDTH; x++) + { + DWORD fg = fromnew[x]; + DWORD bg = fromold[x]; + int r = MIN((pal[fg].r * (64-bglevel) + pal[bg].r * bglevel) >> 8, 63); + int g = MIN((pal[fg].g * (64-bglevel) + pal[bg].g * bglevel) >> 8, 63); + int b = MIN((pal[fg].b * (64-bglevel) + pal[bg].b * bglevel) >> 8, 63); + to[x] = RGB256k.RGB[r][g][b]; + } + fromnew += SCREENWIDTH; + fromold += SCREENWIDTH; + to += SCREENPITCH; } - fromnew += SCREENWIDTH; - fromold += SCREENWIDTH; - to += SCREENPITCH; } } return false; From 2fa13396f20f110af422a6d86d7d1c12a5a80f5f Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 24 Dec 2016 12:50:17 -0500 Subject: [PATCH 550/912] - Added r_blendmethod to the menu. --- wadsrc/static/language.enu | 4 ++++ wadsrc/static/menudef.txt | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 28db40e649..8c2b733fda 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1780,6 +1780,8 @@ DSPLYMNU_BRIGHTNESS = "Brightness"; DSPLYMNU_VSYNC = "Vertical Sync"; DSPLYMNU_CAPFPS = "Rendering Interpolation"; DSPLYMNU_COLUMNMETHOD = "Column render mode"; +DSPLYMNU_BLENDMETHOD = "Transparency render mode"; + DSPLYMNU_WIPETYPE = "Screen wipe style"; DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen"; DSPLYMNU_BLOODFADE = "Blood Flash Intensity"; @@ -2189,6 +2191,8 @@ OPTVAL_INVERTED = "Inverted"; OPTVAL_NOTINVERTED = "Not Inverted"; OPTVAL_ORIGINAL = "Original"; OPTVAL_OPTIMIZED = "Optimized"; +OPTVAL_CLASSIC = "Classic (Faster)"; +OPTVAL_PRECISE = "Precise"; OPTVAL_NORMAL = "Normal"; OPTVAL_STRETCH = "Stretch"; OPTVAL_CAPPED = "Capped"; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 30cad4dd24..73873c7764 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -600,6 +600,12 @@ OptionValue ColumnMethods 1.0, "$OPTVAL_OPTIMIZED" } +OptionValue BlendMethods +{ + 0.0, "$OPTVAL_CLASSIC" + 1.0, "$OPTVAL_PRECISE" +} + OptionValue SkyModes { 0.0, "$OPTVAL_NORMAL" @@ -671,6 +677,7 @@ OptionMenu "VideoOptions" Slider "$DSPLYMNU_PICKUPFADE", "pickup_fade_scalar", 0.0, 1.0, 0.05, 2 Slider "$DSPLYMNU_WATERFADE", "underwater_fade_scalar", 0.0, 1.0, 0.05, 2 Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" + Option "$DSPLYMNU_BLENDMETHOD", "r_blendmethod", "BlendMethods" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From 62e093a7da977bbaeefd730f8d1788eb301a1ca3 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 24 Dec 2016 14:10:51 -0500 Subject: [PATCH 551/912] - fixed: mistyped r_columnmethod instead of r_blendmethod in a span drawer. --- src/r_draw_pal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 88fe93f85a..37d38891ef 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -2756,7 +2756,7 @@ namespace swrenderer xstep = _xstep; ystep = _ystep; - if (!r_columnmethod) + if (!r_blendmethod) { if (_xbits == 6 && _ybits == 6) { From 8231032f899365a4c87405812452b1944a6e2c54 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 24 Dec 2016 19:22:31 -0500 Subject: [PATCH 552/912] - fixed a crash - tried to make MAKETRANSDOT algorithm more consistent with original --- src/v_draw.cpp | 19 ++++++++++--------- src/v_video.cpp | 11 +++++++++++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 703a059ef7..b529770d6b 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -70,6 +70,7 @@ CUSTOM_CVAR(Int, uiscale, 2, CVAR_ARCHIVE | CVAR_NOINITCALL) StatusBar->ScreenSizeChanged(); } } +EXTERN_CVAR(Bool, r_blendmethod) // [RH] Stretch values to make a 320x200 image best fit the screen // without using fractional steppings @@ -1009,17 +1010,17 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; uint32_t fg = swrenderer::LightBgra::shade_pal_index_simple(basecolor, swrenderer::LightBgra::calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; + uint32_t fg_red = (((fg >> 16) & 0xff) * (63 - level)) >> 6; + uint32_t fg_green = (((fg >> 8) & 0xff) * (63 - level)) >> 6; + uint32_t fg_blue = ((fg & 0xff) * (63 - level)) >> 6; - uint32_t bg_red = (*spot >> 16) & 0xff; - uint32_t bg_green = (*spot >> 8) & 0xff; - uint32_t bg_blue = (*spot) & 0xff; + uint32_t bg_red = (((*spot >> 16) & 0xff) * level) >> 6; + uint32_t bg_green = (((*spot >> 8) & 0xff) * level) >> 6; + uint32_t bg_blue = (((*spot) & 0xff) * level) >> 6; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = fg_red + bg_red; + uint32_t green = fg_green + bg_green; + uint32_t blue = fg_blue + bg_blue; *spot = 0xff000000 | (red << 16) | (green << 8) | blue; } diff --git a/src/v_video.cpp b/src/v_video.cpp index bd322bf52f..146e2eb025 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -424,6 +424,17 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) if (!r_blendmethod) { + { + int amount; + + amount = (int)(damount * 64); + bg2rgb = Col2RGB8[64 - amount]; + + fg = (((color.r * amount) >> 4) << 20) | + ((color.g * amount) >> 4) | + (((color.b * amount) >> 4) << 10); + } + for (y = h; y != 0; y--) { for (x = w; x != 0; x--) From b0febec986f6f2b9e4e80ff763f23d242ecd0512 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 25 Dec 2016 05:05:53 +0100 Subject: [PATCH 553/912] Removed r_columnmethod as its performance gains are too insignificant to justify its complexity on the codebase --- src/r_draw.cpp | 8 ++--- src/r_draw.h | 4 +-- src/r_main.cpp | 12 ------- src/r_segs.cpp | 71 +------------------------------------- src/r_things.cpp | 72 +-------------------------------------- src/v_draw.cpp | 49 +++----------------------- wadsrc/static/menudef.txt | 1 - 7 files changed, 10 insertions(+), 207 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 14b6ec55aa..4ec2745931 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -445,7 +445,7 @@ namespace swrenderer if (style.BlendOp == STYLEOP_Fuzz) { colfunc = fuzzcolfunc; - return DoDraw0; + return DoDraw; } else if (style == LegacyRenderStyles[STYLE_Shaded]) { @@ -466,8 +466,7 @@ namespace swrenderer { R_SetColorMapLight(basecolormap, 0, 0); } - bool active_columnmethod = r_columnmethod && !r_swtruecolor; - return active_columnmethod ? DoDraw1 : DoDraw0; + return DoDraw; } fglevel = GetAlpha(style.SrcAlpha, alpha); @@ -500,8 +499,7 @@ namespace swrenderer { return DontDraw; } - bool active_columnmethod = r_columnmethod && !r_swtruecolor; - return active_columnmethod ? DoDraw1 : DoDraw0; + return DoDraw; } ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color) diff --git a/src/r_draw.h b/src/r_draw.h index f0aee6f1a9..367cba36b8 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -15,7 +15,6 @@ EXTERN_CVAR(Float, r_lod_bias); EXTERN_CVAR(Int, r_drawfuzz); EXTERN_CVAR(Bool, r_drawtrans); EXTERN_CVAR(Float, transsouls); -EXTERN_CVAR(Int, r_columnmethod); EXTERN_CVAR(Bool, r_dynlights); namespace swrenderer @@ -131,8 +130,7 @@ namespace swrenderer enum ESPSResult { DontDraw, // not useful to draw this - DoDraw0, // draw this as if r_columnmethod is 0 - DoDraw1, // draw this as if r_columnmethod is 1 + DoDraw // draw this }; ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); diff --git a/src/r_main.cpp b/src/r_main.cpp index 17b8e682f5..c67b48a1ba 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -65,18 +65,6 @@ CVAR (String, r_viewsize, "", CVAR_NOSET) CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) -CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) -{ - if (self != 0 && self != 1) - { - self = 1; - } - else - { // Trigger the change - setsizeneeded = true; - } -} - CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 14abcfc249..f171d79336 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -415,46 +415,13 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) mceilingclip = wallupper; // draw the columns one at a time - if (drawmode == DoDraw0) + if (drawmode == DoDraw) { for (dc_x = x1; dc_x < x2; ++dc_x) { BlastMaskedColumn (tex, false); } } - else - { - // [RH] Draw up to four columns at once - int stop = x2 & ~3; - - if (x1 >= x2) - goto clearfog; - - dc_x = x1; - - while ((dc_x < stop) && (dc_x & 3)) - { - BlastMaskedColumn (tex, false); - dc_x++; - } - - while (dc_x < stop) - { - rt_initcols(nullptr); - BlastMaskedColumn (tex, true); dc_x++; - BlastMaskedColumn (tex, true); dc_x++; - BlastMaskedColumn (tex, true); dc_x++; - BlastMaskedColumn (tex, true); - rt_draw4cols (dc_x - 3); - dc_x++; - } - - while (dc_x < x2) - { - BlastMaskedColumn (tex, false); - dc_x++; - } - } } else { // Texture does wrap vertically. @@ -2346,42 +2313,6 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, } else { - int stop4; - - if (mode == DoDraw0) - { // 1 column at a time - stop4 = dc_x; - } - else // DoDraw1 - { // up to 4 columns at a time - stop4 = x2 & ~3; - } - - while ((dc_x < stop4) && (dc_x & 3)) - { - if (calclighting) - { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); - } - R_WallSpriteColumn (false); - dc_x++; - } - - while (dc_x < stop4) - { - if (calclighting) - { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); - } - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) - { - R_WallSpriteColumn (true); - dc_x++; - } - rt_draw4cols (dc_x - 4); - } - while (dc_x < x2) { if (calclighting) diff --git a/src/r_things.cpp b/src/r_things.cpp index 53e3c05895..7b6114d008 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -530,7 +530,7 @@ void R_DrawVisSprite (vissprite_t *vis) { fixed_t frac; FTexture *tex; - int x2, stop4; + int x2; fixed_t xiscale; ESPSResult mode; bool ispsprite = (!vis->sector && vis->gpos != FVector3(0, 0, 0)); @@ -554,17 +554,6 @@ void R_DrawVisSprite (vissprite_t *vis) if (mode != DontDraw) { - if (mode == DoDraw0) - { - // One column at a time - stop4 = vis->x1; - } - else // DoDraw1 - { - // Up to four columns at a time - stop4 = vis->x2 & ~3; - } - tex = vis->pic; spryscale = vis->yscale; sprflipvert = false; @@ -592,27 +581,6 @@ void R_DrawVisSprite (vissprite_t *vis) if (dc_x < x2) { - while ((dc_x < stop4) && (dc_x & 3)) - { - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (tex, frac, false); - dc_x++; - frac += xiscale; - } - - while (dc_x < stop4) - { - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) - { - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (tex, frac, true); - dc_x++; - frac += xiscale; - } - rt_draw4cols (dc_x - 4); - } - while (dc_x < x2) { if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) @@ -708,44 +676,6 @@ void R_DrawWallSprite(vissprite_t *spr) } else { - int stop4; - - if (mode == DoDraw0) - { // 1 column at a time - stop4 = dc_x; - } - else // DoDraw1 - { // up to 4 columns at a time - stop4 = x2 & ~3; - } - - while ((dc_x < stop4) && (dc_x & 3)) - { - if (calclighting) - { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); - } - if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(false); - dc_x++; - } - - while (dc_x < stop4) - { - if (calclighting) - { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); - } - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) - { - if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(true); - dc_x++; - } - rt_draw4cols(dc_x - 4); - } - while (dc_x < x2) { if (calclighting) diff --git a/src/v_draw.cpp b/src/v_draw.cpp index b529770d6b..0e75bc2828 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -210,8 +210,6 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (mode != DontDraw) { - int stop4; - double centeryback = CenterY; CenterY = 0; @@ -277,56 +275,17 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) x2 = parms.rclip; } - // Drawing short output ought to fit in the data cache well enough - // if we draw one column at a time, so do that, since it's simpler. - if (parms.destheight < 32 || (parms.dclip - parms.uclip) < 32) - { - mode = DoDraw0; - } - dc_x = int(x0); int x2_i = int(x2); fixed_t xiscale_i = FLOAT2FIXED(xiscale); - if (mode == DoDraw0) + while (dc_x < x2_i) { - // One column at a time - stop4 = dc_x; - } - else // DoDraw1` - { - // Up to four columns at a time - stop4 = x2_i & ~3; + R_DrawMaskedColumn(img, frac, false, !parms.masked); + dc_x++; + frac += xiscale_i; } - if (dc_x < x2_i) - { - while ((dc_x < stop4) && (dc_x & 3)) - { - R_DrawMaskedColumn(img, frac, false, !parms.masked); - dc_x++; - frac += xiscale_i; - } - - while (dc_x < stop4) - { - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) - { - R_DrawMaskedColumn(img, frac, true, !parms.masked); - dc_x++; - frac += xiscale_i; - } - rt_draw4cols(dc_x - 4); - } - - while (dc_x < x2_i) - { - R_DrawMaskedColumn(img, frac, false, !parms.masked); - dc_x++; - frac += xiscale_i; - } - } CenterY = centeryback; } R_FinishSetPatchStyle (); diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 8882c89ae1..314534bb7d 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -704,7 +704,6 @@ OptionMenu "VideoOptions" Slider "$DSPLYMNU_BLOODFADE", "blood_fade_scalar", 0.0, 1.0, 0.05, 2 Slider "$DSPLYMNU_PICKUPFADE", "pickup_fade_scalar", 0.0, 1.0, 0.05, 2 Slider "$DSPLYMNU_WATERFADE", "underwater_fade_scalar", 0.0, 1.0, 0.05, 2 - Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" Option "$DSPLYMNU_BLENDMETHOD", "r_blendmethod", "BlendMethods" StaticText " " From 57593adeb00df85011dc7a2ac6ab634884deb8e8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 25 Dec 2016 05:15:23 +0100 Subject: [PATCH 554/912] Simplify ESPSResult to a boolean --- src/r_draw.cpp | 14 +++++++------- src/r_draw.h | 10 ++-------- src/r_segs.cpp | 26 ++++++++------------------ src/r_things.cpp | 15 ++++++--------- src/v_draw.cpp | 8 ++++---- 5 files changed, 27 insertions(+), 46 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 4ec2745931..c7bf95eed0 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -393,7 +393,7 @@ namespace swrenderer FDynamicColormap *basecolormapsave; } - ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color) + bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color) { using namespace drawerargs; @@ -445,13 +445,13 @@ namespace swrenderer if (style.BlendOp == STYLEOP_Fuzz) { colfunc = fuzzcolfunc; - return DoDraw; + return true; } else if (style == LegacyRenderStyles[STYLE_Shaded]) { // Shaded drawer only gets 16 levels of alpha because it saves memory. if ((alpha >>= 12) == 0) - return DontDraw; + return false; colfunc = R_DrawShadedColumn; hcolfunc_post1 = rt_shaded1col; hcolfunc_post4 = rt_shaded4cols; @@ -466,7 +466,7 @@ namespace swrenderer { R_SetColorMapLight(basecolormap, 0, 0); } - return DoDraw; + return true; } fglevel = GetAlpha(style.SrcAlpha, alpha); @@ -497,12 +497,12 @@ namespace swrenderer if (!R_SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) { - return DontDraw; + return false; } - return DoDraw; + return true; } - ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color) + bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color) { return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); } diff --git a/src/r_draw.h b/src/r_draw.h index 367cba36b8..752647943a 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -127,14 +127,8 @@ namespace swrenderer void R_InitShadeMaps(); void R_InitFuzzTable(int fuzzoff); - enum ESPSResult - { - DontDraw, // not useful to draw this - DoDraw // draw this - }; - - ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); - ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color); + bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); + bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color); void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade bool R_GetTransMaskDrawers(void(**drawCol1)(), void(**drawCol4)()); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index f171d79336..edad2aeccc 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -218,14 +218,10 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) curline = ds->curline; - // killough 4/11/98: draw translucent 2s normal textures - // [RH] modified because we don't use user-definable translucency maps - ESPSResult drawmode; - - drawmode = R_SetPatchStyle (LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], + bool visible = R_SetPatchStyle (LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], (float)MIN(curline->linedef->alpha, 1.), 0, 0); - if ((drawmode == DontDraw && !ds->bFogBoundary && !ds->bFakeBoundary)) + if (!visible && !ds->bFogBoundary && !ds->bFakeBoundary) { return; } @@ -280,7 +276,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) goto clearfog; } } - if ((ds->bFakeBoundary && !(ds->bFakeBoundary & 4)) || drawmode == DontDraw) + if ((ds->bFakeBoundary && !(ds->bFakeBoundary & 4)) || !visible) { goto clearfog; } @@ -415,7 +411,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) mceilingclip = wallupper; // draw the columns one at a time - if (drawmode == DoDraw) + if (visible) { for (dc_x = x1; dc_x < x2; ++dc_x) { @@ -512,11 +508,10 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) double yscale; fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); - ESPSResult drawmode; - drawmode = R_SetPatchStyle (LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], + bool visible = R_SetPatchStyle (LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], Alpha, 0, 0); - if(drawmode == DontDraw) { + if(!visible) { R_FinishSetPatchStyle(); return; } @@ -2297,9 +2292,8 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, do { dc_x = x1; - ESPSResult mode; - mode = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); + bool visible = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -2307,11 +2301,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, usecolormap = basecolormap; } - if (mode == DontDraw) - { - needrepeat = 0; - } - else + if (visible) { while (dc_x < x2) { diff --git a/src/r_things.cpp b/src/r_things.cpp index 7b6114d008..22d6b7ac65 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -532,7 +532,6 @@ void R_DrawVisSprite (vissprite_t *vis) FTexture *tex; int x2; fixed_t xiscale; - ESPSResult mode; bool ispsprite = (!vis->sector && vis->gpos != FVector3(0, 0, 0)); if (vis->xscale == 0 || fabs(vis->yscale) < (1.0f / 32000.0f)) @@ -543,7 +542,7 @@ void R_DrawVisSprite (vissprite_t *vis) fixed_t centeryfrac = FLOAT2FIXED(CenterY); R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); - mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); + bool visible = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Shaded]) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but @@ -552,7 +551,7 @@ void R_DrawVisSprite (vissprite_t *vis) R_SetColorMapLight(dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS); } - if (mode != DontDraw) + if (visible) { tex = vis->pic; spryscale = vis->yscale; @@ -660,9 +659,8 @@ void R_DrawWallSprite(vissprite_t *spr) MaskedScaleY = (float)iyscale; dc_x = x1; - ESPSResult mode; - mode = R_SetPatchStyle (spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); + bool visible = R_SetPatchStyle (spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -670,7 +668,7 @@ void R_DrawWallSprite(vissprite_t *spr) usecolormap = basecolormap; } - if (mode == DontDraw) + if (!visible) { return; } @@ -707,14 +705,13 @@ void R_WallSpriteColumn (bool useRt) void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop, short *clipbot) { - ESPSResult mode; int flags = 0; // Do setup for blending. R_SetColorMapLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); - mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); + bool visible = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); - if (mode == DontDraw) + if (!visible) { return; } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 0e75bc2828..df5dd5cf0e 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -190,11 +190,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) } fixedcolormap = dc_fcolormap; - ESPSResult mode; + bool visible; if (r_swtruecolor) - mode = R_SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor); + visible = R_SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor); else - mode = R_SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor); + visible = R_SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor); BYTE *destorgsave = dc_destorg; int destheightsave = dc_destheight; @@ -208,7 +208,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) double x0 = parms.x - parms.left * parms.destwidth / parms.texwidth; double y0 = parms.y - parms.top * parms.destheight / parms.texheight; - if (mode != DontDraw) + if (visible) { double centeryback = CenterY; CenterY = 0; From ced7bdf94a9ff8ce13501f49066c9330e432c1d7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 25 Dec 2016 05:46:16 +0100 Subject: [PATCH 555/912] Remove rt drawer family and the infrastructure supporting it --- src/CMakeLists.txt | 2 - src/r_draw.cpp | 541 --------- src/r_draw.h | 30 - src/r_draw_pal.h | 84 -- src/r_draw_rgba.cpp | 80 ++ src/r_draw_rgba.h | 79 -- src/r_drawers.cpp | 24 - src/r_drawers.h | 24 - src/r_drawt_pal.cpp | 1064 ----------------- src/r_drawt_rgba.cpp | 314 ----- src/r_main.cpp | 11 - src/r_main.h | 6 - src/r_poly_triangle.cpp | 1 - src/r_segs.cpp | 2 - src/r_things.cpp | 20 +- src/r_thread.h | 11 - .../fixedfunction/drawcolumncodegen.cpp | 142 +-- .../fixedfunction/drawcolumncodegen.h | 17 +- tools/drawergen/llvmdrawers.cpp | 60 +- tools/drawergen/llvmdrawers.h | 2 +- 20 files changed, 150 insertions(+), 2364 deletions(-) delete mode 100644 src/r_drawt_pal.cpp delete mode 100644 src/r_drawt_rgba.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index aa67b13d1a..ede6c9a521 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -818,9 +818,7 @@ set( FASTMATH_PCH_SOURCES r_bsp.cpp r_draw.cpp r_draw_pal.cpp - r_drawt_pal.cpp r_draw_rgba.cpp - r_drawt_rgba.cpp r_drawers.cpp r_thread.cpp r_main.cpp diff --git a/src/r_draw.cpp b/src/r_draw.cpp index c7bf95eed0..f629c027da 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -147,9 +147,6 @@ namespace swrenderer fuzzcolfunc = R_DrawFuzzColumn; transcolfunc = R_DrawTranslatedColumn; spanfunc = R_DrawSpan; - hcolfunc_pre = R_DrawColumnHoriz; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; } void R_InitShadeMaps() @@ -233,20 +230,14 @@ namespace swrenderer if (flags & STYLEF_ColorIsFixed) { colfunc = R_FillColumn; - hcolfunc_post1 = rt_copy1col; - hcolfunc_post4 = rt_copy4cols; } else if (dc_translation == NULL) { colfunc = basecolfunc; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; } else { colfunc = transcolfunc; - hcolfunc_post1 = rt_tlate1col; - hcolfunc_post4 = rt_tlate4cols; drawer_needs_pal_input = true; } return true; @@ -284,20 +275,14 @@ namespace swrenderer if (flags & STYLEF_ColorIsFixed) { colfunc = R_FillAddColumn; - hcolfunc_post1 = rt_add1col; - hcolfunc_post4 = rt_add4cols; } else if (dc_translation == NULL) { colfunc = R_DrawAddColumn; - hcolfunc_post1 = rt_add1col; - hcolfunc_post4 = rt_add4cols; } else { colfunc = R_DrawTlatedAddColumn; - hcolfunc_post1 = rt_tlateadd1col; - hcolfunc_post4 = rt_tlateadd4cols; drawer_needs_pal_input = true; } } @@ -306,20 +291,14 @@ namespace swrenderer if (flags & STYLEF_ColorIsFixed) { colfunc = R_FillAddClampColumn; - hcolfunc_post1 = rt_addclamp1col; - hcolfunc_post4 = rt_addclamp4cols; } else if (dc_translation == NULL) { colfunc = R_DrawAddClampColumn; - hcolfunc_post1 = rt_addclamp1col; - hcolfunc_post4 = rt_addclamp4cols; } else { colfunc = R_DrawAddClampTranslatedColumn; - hcolfunc_post1 = rt_tlateaddclamp1col; - hcolfunc_post4 = rt_tlateaddclamp4cols; drawer_needs_pal_input = true; } } @@ -329,20 +308,14 @@ namespace swrenderer if (flags & STYLEF_ColorIsFixed) { colfunc = R_FillSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; } else if (dc_translation == NULL) { colfunc = R_DrawSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; } else { colfunc = R_DrawSubClampTranslatedColumn; - hcolfunc_post1 = rt_tlatesubclamp1col; - hcolfunc_post4 = rt_tlatesubclamp4cols; drawer_needs_pal_input = true; } return true; @@ -355,20 +328,14 @@ namespace swrenderer if (flags & STYLEF_ColorIsFixed) { colfunc = R_FillRevSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; } else if (dc_translation == NULL) { colfunc = R_DrawRevSubClampColumn; - hcolfunc_post1 = rt_revsubclamp1col; - hcolfunc_post4 = rt_revsubclamp4cols; } else { colfunc = R_DrawRevSubClampTranslatedColumn; - hcolfunc_post1 = rt_tlaterevsubclamp1col; - hcolfunc_post4 = rt_tlaterevsubclamp4cols; drawer_needs_pal_input = true; } return true; @@ -439,7 +406,6 @@ namespace swrenderer } } basecolormapsave = basecolormap; - hcolfunc_pre = R_DrawColumnHoriz; // Check for special modes if (style.BlendOp == STYLEOP_Fuzz) @@ -453,8 +419,6 @@ namespace swrenderer if ((alpha >>= 12) == 0) return false; colfunc = R_DrawShadedColumn; - hcolfunc_post1 = rt_shaded1col; - hcolfunc_post4 = rt_shaded4cols; drawer_needs_pal_input = true; dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; basecolormap = &ShadeFakeColormap[16 - alpha]; @@ -491,7 +455,6 @@ namespace swrenderer // dc_srccolor is used by the R_Fill* routines. It is premultiplied // with the alpha. dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; - hcolfunc_pre = R_FillColumnHoriz; R_SetColorMapLight(&identitycolormap, 0, 0); } @@ -638,191 +601,6 @@ namespace swrenderer } } - void rt_initcols(uint8_t *buffer) - { - using namespace drawerargs; - - for (int y = 3; y >= 0; y--) - horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(buffer); - else - DrawerCommandQueue::QueueCommand(buffer); - } - - void rt_span_coverage(int x, int start, int stop) - { - using namespace drawerargs; - - unsigned int **tspan = &dc_ctspan[x & 3]; - (*tspan)[0] = start; - (*tspan)[1] = stop; - *tspan += 2; - } - - void rt_flip_posts() - { - using namespace drawerargs; - - unsigned int *front = horizspan[dc_x & 3]; - unsigned int *back = dc_ctspan[dc_x & 3] - 2; - - while (front < back) - { - swapvalues(front[0], back[0]); - swapvalues(front[1], back[1]); - front += 2; - back -= 2; - } - } - - void rt_draw4cols(int sx) - { - using namespace drawerargs; - - int x, bad; - unsigned int maxtop, minbot, minnexttop; - - // Place a dummy "span" in each column. These don't get - // drawn. They're just here to avoid special cases in the - // max/min calculations below. - for (x = 0; x < 4; ++x) - { - dc_ctspan[x][0] = screen->GetHeight()+1; - dc_ctspan[x][1] = screen->GetHeight(); - } - - for (;;) - { - // If a column is out of spans, mark it as such - bad = 0; - minnexttop = 0xffffffff; - for (x = 0; x < 4; ++x) - { - if (horizspan[x] >= dc_ctspan[x]) - { - bad |= 1 << x; - } - else if ((horizspan[x]+2)[0] < minnexttop) - { - minnexttop = (horizspan[x]+2)[0]; - } - } - // Once all columns are out of spans, we're done - if (bad == 15) - { - return; - } - - // Find the largest shared area for the spans in each column - maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]), - MAX (horizspan[2][0], horizspan[3][0])); - minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]), - MIN (horizspan[2][1], horizspan[3][1])); - - // If there is no shared area with these spans, draw each span - // individually and advance to the next spans until we reach a shared area. - // However, only draw spans down to the highest span in the next set of - // spans. If we allow the entire height of a span to be drawn, it could - // prevent any more shared areas from being drawn in these four columns. - // - // Example: Suppose we have the following arrangement: - // A CD - // A CD - // B D - // B D - // aB D - // aBcD - // aBcD - // aBc - // - // If we draw the entire height of the spans, we end up drawing this first: - // A CD - // A CD - // B D - // B D - // B D - // B D - // B D - // B D - // B - // - // This leaves only the "a" and "c" columns to be drawn, and they are not - // part of a shared area, but if we can include B and D with them, we can - // get a shared area. So we cut off everything in the first set just - // above the "a" column and end up drawing this first: - // A CD - // A CD - // B D - // B D - // - // Then the next time through, we have the following arrangement with an - // easily shared area to draw: - // aB D - // aBcD - // aBcD - // aBc - if (bad != 0 || maxtop > minbot) - { - int drawcount = 0; - for (x = 0; x < 4; ++x) - { - if (!(bad & 1)) - { - if (horizspan[x][1] < minnexttop) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], horizspan[x][1]); - horizspan[x] += 2; - drawcount++; - } - else if (minnexttop > horizspan[x][0]) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], minnexttop-1); - horizspan[x][0] = minnexttop; - drawcount++; - } - } - bad >>= 1; - } - // Drawcount *should* always be non-zero. The reality is that some situations - // can make this not true. Unfortunately, I'm not sure what those situations are. - if (drawcount == 0) - { - return; - } - continue; - } - - // Draw any span fragments above the shared area. - for (x = 0; x < 4; ++x) - { - if (maxtop > horizspan[x][0]) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], maxtop-1); - } - } - - // Draw the shared area. - hcolfunc_post4 (sx, maxtop, minbot); - - // For each column, if part of the span is past the shared area, - // set its top to just below the shared area. Otherwise, advance - // to the next span in that column. - for (x = 0; x < 4; ++x) - { - if (minbot < horizspan[x][1]) - { - horizspan[x][0] = minbot+1; - } - else - { - horizspan[x] += 2; - } - } - } - } - void R_SetupSpanBits(FTexture *tex) { using namespace drawerargs; @@ -855,325 +633,6 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - void R_FillColumnHoriz() - { - using namespace drawerargs; - - if (dc_count <= 0) - return; - - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawColumnHoriz() - { - using namespace drawerargs; - - if (dc_count <= 0) - return; - - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - - if (r_swtruecolor) - { - if (drawer_needs_pal_input) - DrawerCommandQueue::QueueCommand>(); - else - DrawerCommandQueue::QueueCommand>(); - } - else - { - DrawerCommandQueue::QueueCommand(); - } - } - - // Copies one span at hx to the screen at sx. - void rt_copy1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Copies all four spans to the screen starting at sx. - void rt_copy4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - // To do: we could do this with SSE using __m128i - rt_copy1col(0, sx, yl, yh); - rt_copy1col(1, sx + 1, yl, yh); - rt_copy1col(2, sx + 2, yl, yh); - rt_copy1col(3, sx + 3, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - } - - // Maps one span at hx to the screen at sx. - void rt_map1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Maps all four spans to the screen starting at sx. - void rt_map4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates one span at hx to the screen at sx. - void rt_tlate1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_map1col(hx, sx, yl, yh); - } - } - - // Translates all four spans to the screen starting at sx. - void rt_tlate4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_map4cols(sx, yl, yh); - } - } - - // Adds one span at hx to the screen at sx without clamping. - void rt_add1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Adds all four spans to the screen starting at sx without clamping. - void rt_add4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates and adds one span at hx to the screen at sx without clamping. - void rt_tlateadd1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_add1col(hx, sx, yl, yh); - } - } - - // Translates and adds all four spans to the screen starting at sx without clamping. - void rt_tlateadd4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_add4cols(sx, yl, yh); - } - } - - // Shades one span at hx to the screen at sx. - void rt_shaded1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Shades all four spans to the screen starting at sx. - void rt_shaded4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Adds one span at hx to the screen at sx with clamping. - void rt_addclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Adds all four spans to the screen starting at sx with clamping. - void rt_addclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates and adds one span at hx to the screen at sx with clamping. - void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_addclamp1col(hx, sx, yl, yh); - } - } - - // Translates and adds all four spans to the screen starting at sx with clamping. - void rt_tlateaddclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_addclamp4cols(sx, yl, yh); - } - } - - // Subtracts one span at hx to the screen at sx with clamping. - void rt_subclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Subtracts all four spans to the screen starting at sx with clamping. - void rt_subclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates and subtracts one span at hx to the screen at sx with clamping. - void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_subclamp1col(hx, sx, yl, yh); - } - } - - // Translates and subtracts all four spans to the screen starting at sx with clamping. - void rt_tlatesubclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_subclamp4cols(sx, yl, yh); - } - } - - // Subtracts one span at hx from the screen at sx with clamping. - void rt_revsubclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Subtracts all four spans from the screen starting at sx with clamping. - void rt_revsubclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates and subtracts one span at hx from the screen at sx with clamping. - void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_revsubclamp1col(hx, sx, yl, yh); - } - } - - // Translates and subtracts all four spans from the screen starting at sx with clamping. - void rt_tlaterevsubclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_revsubclamp4cols(sx, yl, yh); - } - } - void R_DrawWallCol1() { if (r_swtruecolor) diff --git a/src/r_draw.h b/src/r_draw.h index 752647943a..c508aa2680 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -134,35 +134,6 @@ namespace swrenderer const uint8_t *R_GetColumn(FTexture *tex, int col); - void rt_initcols(uint8_t *buffer = nullptr); - void rt_span_coverage(int x, int start, int stop); - void rt_draw4cols(int sx); - void rt_flip_posts(); - void rt_copy1col(int hx, int sx, int yl, int yh); - void rt_copy4cols(int sx, int yl, int yh); - void rt_shaded1col(int hx, int sx, int yl, int yh); - void rt_shaded4cols(int sx, int yl, int yh); - void rt_map1col(int hx, int sx, int yl, int yh); - void rt_add1col(int hx, int sx, int yl, int yh); - void rt_addclamp1col(int hx, int sx, int yl, int yh); - void rt_subclamp1col(int hx, int sx, int yl, int yh); - void rt_revsubclamp1col(int hx, int sx, int yl, int yh); - void rt_tlate1col(int hx, int sx, int yl, int yh); - void rt_tlateadd1col(int hx, int sx, int yl, int yh); - void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh); - void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh); - void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh); - void rt_map4cols(int sx, int yl, int yh); - void rt_add4cols(int sx, int yl, int yh); - void rt_addclamp4cols(int sx, int yl, int yh); - void rt_subclamp4cols(int sx, int yl, int yh); - void rt_revsubclamp4cols(int sx, int yl, int yh); - void rt_tlate4cols(int sx, int yl, int yh); - void rt_tlateadd4cols(int sx, int yl, int yh); - void rt_tlateaddclamp4cols(int sx, int yl, int yh); - void rt_tlatesubclamp4cols(int sx, int yl, int yh); - void rt_tlaterevsubclamp4cols(int sx, int yl, int yh); - void R_DrawColumnHoriz(); void R_DrawColumn(); void R_DrawFuzzColumn(); void R_DrawTranslatedColumn(); @@ -192,7 +163,6 @@ namespace swrenderer void R_SetupDrawSlab(FSWColormap *base_colormap, float light, int shade); void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p); void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); - void R_FillColumnHoriz(); void R_FillSpan(); void R_DrawWallCol1(); diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index 205e0cc284..984849b83e 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -248,88 +248,4 @@ namespace swrenderer const uint8_t *_colormap; uint8_t *_destorg; }; - - class RtInitColsPalCommand : public DrawerCommand - { - public: - RtInitColsPalCommand(uint8_t *buff); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override { return "RtInitColsPalCommand"; } - - private: - uint8_t *buff; - }; - - class PalColumnHorizCommand : public DrawerCommand - { - public: - PalColumnHorizCommand(); - - protected: - const uint8_t *_source; - fixed_t _iscale; - fixed_t _texturefrac; - int _count; - int _color; - int _x; - int _yl; - }; - - class DrawColumnHorizPalCommand : public PalColumnHorizCommand - { - public: - void Execute(DrawerThread *thread) override; - FString DebugInfo() override { return "DrawColumnHorizPalCommand"; } - }; - - class FillColumnHorizPalCommand : public PalColumnHorizCommand - { - public: - void Execute(DrawerThread *thread) override; - FString DebugInfo() override { return "FillColumnHorizPalCommand"; } - }; - - class PalRtCommand : public DrawerCommand - { - public: - PalRtCommand(int hx, int sx, int yl, int yh); - FString DebugInfo() override { return "PalRtCommand"; } - - protected: - int hx, sx, yl, yh; - uint8_t *_destorg; - int _pitch; - const uint8_t *_colormap; - const uint32_t *_srcblend; - const uint32_t *_destblend; - const uint8_t *_translation; - fixed_t _srcalpha; - fixed_t _destalpha; - int _color; - }; - - class DrawColumnRt1CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; } diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a9fc5c80ea..4b56856081 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1123,4 +1123,84 @@ namespace swrenderer } #endif + ///////////////////////////////////////////////////////////////////////////// + + namespace + { + static uint32_t particle_texture[16 * 16] = + { + 1 * 1, 2 * 1, 3 * 1, 4 * 1, 5 * 1, 6 * 1, 7 * 1, 8 * 1, 8 * 1, 7 * 1, 6 * 1, 5 * 1, 4 * 1, 3 * 1, 2 * 1, 1 * 1, + 1 * 2, 2 * 2, 3 * 2, 4 * 2, 5 * 2, 6 * 2, 7 * 2, 8 * 2, 8 * 2, 7 * 2, 6 * 2, 5 * 2, 4 * 2, 3 * 2, 2 * 2, 1 * 2, + 1 * 3, 2 * 3, 3 * 3, 4 * 3, 5 * 3, 6 * 3, 7 * 3, 8 * 3, 8 * 3, 7 * 3, 6 * 3, 5 * 3, 4 * 3, 3 * 3, 2 * 3, 1 * 3, + 1 * 4, 2 * 4, 3 * 4, 4 * 4, 5 * 4, 6 * 4, 7 * 4, 8 * 4, 8 * 4, 7 * 4, 6 * 4, 5 * 4, 4 * 4, 3 * 4, 2 * 4, 1 * 4, + 1 * 5, 2 * 5, 3 * 5, 4 * 5, 5 * 5, 6 * 5, 7 * 5, 8 * 5, 8 * 5, 7 * 5, 6 * 5, 5 * 5, 4 * 5, 3 * 5, 2 * 5, 1 * 5, + 1 * 6, 2 * 6, 3 * 6, 4 * 6, 5 * 6, 6 * 6, 7 * 6, 8 * 6, 8 * 6, 7 * 6, 6 * 6, 5 * 6, 4 * 6, 3 * 6, 2 * 6, 1 * 6, + 1 * 7, 2 * 7, 3 * 7, 4 * 7, 5 * 7, 6 * 7, 7 * 7, 8 * 7, 8 * 7, 7 * 7, 6 * 7, 5 * 7, 4 * 7, 3 * 7, 2 * 7, 1 * 7, + 1 * 8, 2 * 8, 3 * 8, 4 * 8, 5 * 8, 6 * 8, 7 * 8, 8 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, 3 * 8, 2 * 8, 1 * 8, + 1 * 8, 2 * 8, 3 * 8, 4 * 8, 5 * 8, 6 * 8, 7 * 8, 8 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, 3 * 8, 2 * 8, 1 * 8, + 1 * 7, 2 * 7, 3 * 7, 4 * 7, 5 * 7, 6 * 7, 7 * 7, 8 * 7, 8 * 7, 7 * 7, 6 * 7, 5 * 7, 4 * 7, 3 * 7, 2 * 7, 1 * 7, + 1 * 6, 2 * 6, 3 * 6, 4 * 6, 5 * 6, 6 * 6, 7 * 6, 8 * 6, 8 * 6, 7 * 6, 6 * 6, 5 * 6, 4 * 6, 3 * 6, 2 * 6, 1 * 6, + 1 * 5, 2 * 5, 3 * 5, 4 * 5, 5 * 5, 6 * 5, 7 * 5, 8 * 5, 8 * 5, 7 * 5, 6 * 5, 5 * 5, 4 * 5, 3 * 5, 2 * 5, 1 * 5, + 1 * 4, 2 * 4, 3 * 4, 4 * 4, 5 * 4, 6 * 4, 7 * 4, 8 * 4, 8 * 4, 7 * 4, 6 * 4, 5 * 4, 4 * 4, 3 * 4, 2 * 4, 1 * 4, + 1 * 3, 2 * 3, 3 * 3, 4 * 3, 5 * 3, 6 * 3, 7 * 3, 8 * 3, 8 * 3, 7 * 3, 6 * 3, 5 * 3, 4 * 3, 3 * 3, 2 * 3, 1 * 3, + 1 * 2, 2 * 2, 3 * 2, 4 * 2, 5 * 2, 6 * 2, 7 * 2, 8 * 2, 8 * 2, 7 * 2, 6 * 2, 5 * 2, 4 * 2, 3 * 2, 2 * 2, 1 * 2, + 1 * 1, 2 * 1, 3 * 1, 4 * 1, 5 * 1, 6 * 1, 7 * 1, 8 * 1, 8 * 1, 7 * 1, 6 * 1, 5 * 1, 4 * 1, 3 * 1, 2 * 1, 1 * 1 + }; + } + + DrawParticleColumnRGBACommand::DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx) + { + _dest = dest; + _pitch = pitch; + _count = count; + _fg = fg; + _alpha = alpha; + _fracposx = fracposx; + _dest_y = dest_y; + } + + void DrawParticleColumnRGBACommand::Execute(DrawerThread *thread) + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, _dest); + int pitch = _pitch * thread->num_cores; + + const uint32_t *source = &particle_texture[(_fracposx >> FRACBITS) * 16]; + uint32_t particle_alpha = _alpha; + + uint32_t fracstep = 16 * FRACUNIT / _count; + uint32_t fracpos = fracstep * thread->skipped_by_thread(_dest_y) + fracstep / 2; + fracstep *= thread->num_cores; + + uint32_t fg_red = (_fg >> 16) & 0xff; + uint32_t fg_green = (_fg >> 8) & 0xff; + uint32_t fg_blue = _fg & 0xff; + + for (int y = 0; y < count; y++) + { + uint32_t alpha = (source[fracpos >> FRACBITS] * particle_alpha) >> 6; + uint32_t inv_alpha = 256 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + fracpos += fracstep; + } + } + + FString DrawParticleColumnRGBACommand::DebugInfo() + { + return "DrawParticle"; + } + } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index d5b2691064..1364d537bd 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -361,85 +361,6 @@ namespace swrenderer } }; - class DrawColumnRt1LLVMCommand : public DrawerCommand - { - protected: - DrawColumnArgs args; - WorkerThreadData ThreadData(DrawerThread *thread); - - public: - DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - }; - - DECLARE_DRAW_COMMAND(DrawColumnRt1Copy, DrawColumnRt1Copy, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1Add, DrawColumnRt1Add, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1Shaded, DrawColumnRt1Shaded, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1AddClamp, DrawColumnRt1AddClamp, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1SubClamp, DrawColumnRt1SubClamp, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClamp, DrawColumnRt1RevSubClamp, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1Translated, DrawColumnRt1Translated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1TlatedAdd, DrawColumnRt1TlatedAdd, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1AddClampTranslated, DrawColumnRt1AddClampTranslated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1SubClampTranslated, DrawColumnRt1SubClampTranslated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClampTranslated, DrawColumnRt1RevSubClampTranslated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4, DrawColumnRt4, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4Copy, DrawColumnRt4Copy, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4Add, DrawColumnRt4Add, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4Shaded, DrawColumnRt4Shaded, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4AddClamp, DrawColumnRt4AddClamp, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4SubClamp, DrawColumnRt4SubClamp, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4Translated, DrawColumnRt4Translated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4TlatedAdd, DrawColumnRt4TlatedAdd, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4AddClampTranslated, DrawColumnRt4AddClampTranslated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4SubClampTranslated, DrawColumnRt4SubClampTranslated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClampTranslated, DrawColumnRt4RevSubClampTranslated, DrawColumnRt1LLVMCommand); - - ///////////////////////////////////////////////////////////////////////////// - - class RtInitColsRGBACommand : public DrawerCommand - { - BYTE * RESTRICT buff; - - public: - RtInitColsRGBACommand(BYTE *buff); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - }; - - template - class DrawColumnHorizRGBACommand : public DrawerCommand - { - int _count; - fixed_t _iscale; - fixed_t _texturefrac; - const InputPixelType * RESTRICT _source; - int _x; - int _yl; - int _yh; - - public: - DrawColumnHorizRGBACommand(); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - }; - - class FillColumnHorizRGBACommand : public DrawerCommand - { - int _x; - int _yl; - int _yh; - int _count; - uint32_t _color; - - public: - FillColumnHorizRGBACommand(); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - }; - ///////////////////////////////////////////////////////////////////////////// class DrawParticleColumnRGBACommand : public DrawerCommand diff --git a/src/r_drawers.cpp b/src/r_drawers.cpp index dd81af1096..e61e951837 100644 --- a/src/r_drawers.cpp +++ b/src/r_drawers.cpp @@ -175,30 +175,6 @@ Drawers::Drawers() FillColumnAddClamp = FillColumnAddClamp_SSE2; FillColumnSubClamp = FillColumnSubClamp_SSE2; FillColumnRevSubClamp = FillColumnRevSubClamp_SSE2; - DrawColumnRt1 = DrawColumnRt1_SSE2; - DrawColumnRt1Copy = DrawColumnRt1Copy_SSE2; - DrawColumnRt1Add = DrawColumnRt1Add_SSE2; - DrawColumnRt1Shaded = DrawColumnRt1Shaded_SSE2; - DrawColumnRt1AddClamp = DrawColumnRt1AddClamp_SSE2; - DrawColumnRt1SubClamp = DrawColumnRt1SubClamp_SSE2; - DrawColumnRt1RevSubClamp = DrawColumnRt1RevSubClamp_SSE2; - DrawColumnRt1Translated = DrawColumnRt1Translated_SSE2; - DrawColumnRt1TlatedAdd = DrawColumnRt1TlatedAdd_SSE2; - DrawColumnRt1AddClampTranslated = DrawColumnRt1AddClampTranslated_SSE2; - DrawColumnRt1SubClampTranslated = DrawColumnRt1SubClampTranslated_SSE2; - DrawColumnRt1RevSubClampTranslated = DrawColumnRt1RevSubClampTranslated_SSE2; - DrawColumnRt4 = DrawColumnRt4_SSE2; - DrawColumnRt4Copy = DrawColumnRt4Copy_SSE2; - DrawColumnRt4Add = DrawColumnRt4Add_SSE2; - DrawColumnRt4Shaded = DrawColumnRt4Shaded_SSE2; - DrawColumnRt4AddClamp = DrawColumnRt4AddClamp_SSE2; - DrawColumnRt4SubClamp = DrawColumnRt4SubClamp_SSE2; - DrawColumnRt4RevSubClamp = DrawColumnRt4RevSubClamp_SSE2; - DrawColumnRt4Translated = DrawColumnRt4Translated_SSE2; - DrawColumnRt4TlatedAdd = DrawColumnRt4TlatedAdd_SSE2; - DrawColumnRt4AddClampTranslated = DrawColumnRt4AddClampTranslated_SSE2; - DrawColumnRt4SubClampTranslated = DrawColumnRt4SubClampTranslated_SSE2; - DrawColumnRt4RevSubClampTranslated = DrawColumnRt4RevSubClampTranslated_SSE2; DrawSpan = DrawSpan_SSE2; DrawSpanMasked = DrawSpanMasked_SSE2; DrawSpanTranslucent = DrawSpanTranslucent_SSE2; diff --git a/src/r_drawers.h b/src/r_drawers.h index 2ff2fd087a..7a94c82ddd 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -308,30 +308,6 @@ public: void(*FillColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1Shaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1Translated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1TlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1AddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1SubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1RevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4Shaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4Translated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4TlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4AddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4SubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4RevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawSpan)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp deleted file mode 100644 index dcc0a09389..0000000000 --- a/src/r_drawt_pal.cpp +++ /dev/null @@ -1,1064 +0,0 @@ -/* -** r_drawt.cpp -** Faster column drawers for modern processors -** -**--------------------------------------------------------------------------- -** Copyright 1998-2006 Randy Heit -** Copyright 2016 Magnus Norddahl -** Copyright 2016 Rachael Alexanderson -** All rights reserved. -** -** Redistribution and use in source and binary forms, with or without -** modification, are permitted provided that the following conditions -** are met: -** -** 1. Redistributions of source code must retain the above copyright -** notice, this list of conditions and the following disclaimer. -** 2. Redistributions in binary form must reproduce the above copyright -** notice, this list of conditions and the following disclaimer in the -** documentation and/or other materials provided with the distribution. -** 3. The name of the author may not be used to endorse or promote products -** derived from this software without specific prior written permission. -** -** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**--------------------------------------------------------------------------- -** -** These functions stretch columns into a temporary buffer and then -** map them to the screen. On modern machines, this is faster than drawing -** them directly to the screen. -** -** Will I be able to even understand any of this if I come back to it later? -** Let's hope so. :-) -*/ - -#include "templates.h" -#include "doomtype.h" -#include "doomdef.h" -#include "r_defs.h" -#include "r_draw.h" -#include "r_main.h" -#include "r_things.h" -#include "v_video.h" -#include "r_draw_pal.h" - -EXTERN_CVAR(Bool, r_blendmethod) - -// I should have commented this stuff better. -// -// dc_temp is the buffer R_DrawColumnHoriz writes into. -// dc_tspans points into it. -// dc_ctspan points into dc_tspans. -// horizspan also points into dc_tspans. - -// dc_ctspan is advanced while drawing into dc_temp. -// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. - -namespace swrenderer -{ - RtInitColsPalCommand::RtInitColsPalCommand(uint8_t *buff) : buff(buff) - { - } - - void RtInitColsPalCommand::Execute(DrawerThread *thread) - { - thread->dc_temp = buff == nullptr ? thread->dc_temp_buff : buff; - } - - ///////////////////////////////////////////////////////////////////// - - PalColumnHorizCommand::PalColumnHorizCommand() - { - using namespace drawerargs; - - _source = dc_source; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _color = dc_color; - _x = dc_x; - _yl = dc_yl; - } - - void DrawColumnHorizPalCommand::Execute(DrawerThread *thread) - { - int count = _count; - uint8_t *dest; - fixed_t fracstep; - fixed_t frac; - - count = thread->count_for_thread(_yl, count); - if (count <= 0) - return; - - fracstep = _iscale; - frac = _texturefrac; - - const uint8_t *source = _source; - - int x = _x & 3; - dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; - frac += fracstep * thread->skipped_by_thread(_yl); - fracstep *= thread->num_cores; - - if (count & 1) { - *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest[16] = source[frac >> FRACBITS]; frac += fracstep; - dest[20] = source[frac >> FRACBITS]; frac += fracstep; - dest[24] = source[frac >> FRACBITS]; frac += fracstep; - dest[28] = source[frac >> FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); - } - - void FillColumnHorizPalCommand::Execute(DrawerThread *thread) - { - int count = _count; - uint8_t color = _color; - uint8_t *dest; - - count = thread->count_for_thread(_yl, count); - if (count <= 0) - return; - - int x = _x & 3; - dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; - - if (count & 1) { - *dest = color; - dest += 4; - } - if (!(count >>= 1)) - return; - do { - dest[0] = color; dest[4] = color; - dest += 8; - } while (--count); - } - - ///////////////////////////////////////////////////////////////////// - - PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) : hx(hx), sx(sx), yl(yl), yh(yh) - { - using namespace drawerargs; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _colormap = dc_colormap; - _srcblend = dc_srcblend; - _destblend = dc_destblend; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _translation = dc_translation; - _color = dc_color; - } - - void DrawColumnRt1CopyPalCommand::Execute(DrawerThread *thread) - { - uint8_t *source; - uint8_t *dest; - int count; - int pitch; - - count = yh - yl + 1; - - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - pitch = _pitch * thread->num_cores; - - if (count & 1) { - *dest = *source; - source += 4; - dest += pitch; - } - if (count & 2) { - dest[0] = source[0]; - dest[pitch] = source[4]; - source += 8; - dest += pitch*2; - } - if (!(count >>= 2)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4]; - dest[pitch*2] = source[8]; - dest[pitch*3] = source[12]; - source += 16; - dest += pitch*4; - } while (--count); - } - - void DrawColumnRt4CopyPalCommand::Execute(DrawerThread *thread) - { - int *source; - int *dest; - int count; - int pitch; - - count = yh - yl + 1; - - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - dest = (int *)(ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg); - source = (int *)(&thread->dc_temp[thread->temp_line_for_thread(yl)*4]); - pitch = _pitch*thread->num_cores/sizeof(int); - - if (count & 1) { - *dest = *source; - source += 4/sizeof(int); - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4/sizeof(int)]; - source += 8/sizeof(int); - dest += pitch*2; - } while (--count); - } - - void DrawColumnRt1PalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int count; - int pitch; - - count = yh - yl + 1; - - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - colormap = _colormap; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl) *4 + hx]; - pitch = _pitch*thread->num_cores; - - if (count & 1) { - *dest = colormap[*source]; - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = colormap[source[0]]; - dest[pitch] = colormap[source[4]]; - source += 8; - dest += pitch*2; - } while (--count); - } - - void DrawColumnRt4PalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int count; - int pitch; - - count = yh - yl + 1; - - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - colormap = _colormap; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - pitch = _pitch*thread->num_cores; - - if (count & 1) { - dest[0] = colormap[source[0]]; - dest[1] = colormap[source[1]]; - dest[2] = colormap[source[2]]; - dest[3] = colormap[source[3]]; - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = colormap[source[0]]; - dest[1] = colormap[source[1]]; - dest[2] = colormap[source[2]]; - dest[3] = colormap[source[3]]; - dest[pitch] = colormap[source[4]]; - dest[pitch+1] = colormap[source[5]]; - dest[pitch+2] = colormap[source[6]]; - dest[pitch+3] = colormap[source[7]]; - source += 8; - dest += pitch*2; - } while (--count); - } - - void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread) - { - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - const uint8_t *translation = _translation; - - // Things we do to hit the compiler's optimizer with a clue bat: - // 1. Parallelism is explicitly spelled out by using a separate - // C instruction for each assembly instruction. GCC lets me - // have four temporaries, but VC++ spills to the stack with - // more than two. Two is probably optimal, anyway. - // 2. The results of the translation lookups are explicitly - // stored in byte-sized variables. This causes the VC++ code - // to use byte mov instructions in most cases; for apparently - // random reasons, it will use movzx for some places. GCC - // ignores this and uses movzx always. - - // Do 8 rows at a time. - for (int count8 = count >> 3; count8; --count8) - { - int c0, c1; - uint8_t b0, b1; - - c0 = source[0]; c1 = source[4]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[4] = b1; - - c0 = source[8]; c1 = source[12]; - b0 = translation[c0]; b1 = translation[c1]; - source[8] = b0; source[12] = b1; - - c0 = source[16]; c1 = source[20]; - b0 = translation[c0]; b1 = translation[c1]; - source[16] = b0; source[20] = b1; - - c0 = source[24]; c1 = source[28]; - b0 = translation[c0]; b1 = translation[c1]; - source[24] = b0; source[28] = b1; - - source += 32; - } - // Finish by doing 1 row at a time. - for (count &= 7; count; --count, source += 4) - { - source[0] = translation[source[0]]; - } - } - - void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread) - { - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - const uint8_t *translation = _translation; - int c0, c1; - uint8_t b0, b1; - - // Do 2 rows at a time. - for (int count8 = count >> 1; count8; --count8) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - - c0 = source[4]; c1 = source[5]; - b0 = translation[c0]; b1 = translation[c1]; - source[4] = b0; source[5] = b1; - - c0 = source[6]; c1 = source[7]; - b0 = translation[c0]; b1 = translation[c1]; - source[6] = b0; source[7] = b1; - - source += 8; - } - // Do the final row if count was odd. - if (count & 1) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - } - } - - void DrawColumnRt1AddPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t fg = colormap[*source]; - uint32_t bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - uint32_t fg = colormap[*source]; - uint32_t bg = *dest; - - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - *dest = RGB256k.RGB[r][g][b]; - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt4AddPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t fg = colormap[source[0]]; - uint32_t bg = dest[0]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[0] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[1]]; - bg = dest[1]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[1] = RGB32k.All[fg & (fg>>15)]; - - - fg = colormap[source[2]]; - bg = dest[2]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[2] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[3]]; - bg = dest[3]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[3] = RGB32k.All[fg & (fg>>15)]; - - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - for (int ks = 0; ks < 4; ks++) - { // [SP] this 4col function was a block of copy-pasted code. 4 times. I regret nothing. - uint32_t fg = colormap[source[ks]]; - uint32_t bg = dest[ks]; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - dest[ks] = RGB256k.RGB[r][g][b]; - } - - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt1ShadedPalCommand::Execute(DrawerThread *thread) - { - uint32_t *fgstart; - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - fgstart = &Col2RGB8[0][_color]; - colormap = _colormap; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - pitch = _pitch * thread->num_cores; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t val = colormap[*source]; - uint32_t fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - uint32_t val = *source; - int r = (palette[*dest].r * (255-val) + palette[_color].r * val) >> 10; - int g = (palette[*dest].g * (255-val) + palette[_color].g * val) >> 10; - int b = (palette[*dest].b * (255-val) + palette[_color].b * val) >> 10; - *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt4ShadedPalCommand::Execute(DrawerThread *thread) - { - uint32_t *fgstart; - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - fgstart = &Col2RGB8[0][_color]; - colormap = _colormap; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - pitch = _pitch * thread->num_cores; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t val; - - val = colormap[source[0]]; - val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; - dest[0] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[1]]; - val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; - dest[1] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[2]]; - val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; - dest[2] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[3]]; - val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; - dest[3] = RGB32k.All[val & (val>>15)]; - - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - uint32_t val; - - for (int ks = 0; ks < 4; ks++) - { - val = source[ks]; - int r = (palette[dest[ks]].r * (255-val) + palette[_color].r * val) >> 10; - int g = (palette[dest[ks]].g * (255-val) + palette[_color].g * val) >> 10; - int b = (palette[dest[ks]].b * (255-val) + palette[_color].b * val) >> 10; - dest[ks] = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; - } - - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt1AddClampPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - int fg = colormap[*source]; - int bg = *dest; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - *dest = RGB256k.RGB[r][g][b]; - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt4AddClampPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - - if (!r_blendmethod) - { - do { - uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - for (int ks = 0; ks < 4; ks++) - { - int fg = colormap[source[ks]]; - int bg = dest[ks]; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - dest[ks] = RGB256k.RGB[r][g][b]; - } - - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt1SubClampPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - int fg = colormap[*source]; - int bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt4SubClampPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - for (int ks = 0; ks < 4; ks++) - { - int fg = colormap[source[ks]]; - int bg = dest[ks]; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); - dest[ks] = RGB256k.RGB[r][g][b]; - } - - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt1RevSubClampPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - int fg = colormap[*source]; - int bg = *dest; - int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt4RevSubClampPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - for (int ks = 0; ks < 4; ks++) - { - int fg = colormap[source[ks]]; - int bg = dest[ks]; - int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - dest[ks] = RGB256k.RGB[r][g][b]; - } - - source += 4; - dest += pitch; - } while (--count); - } - } -} diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp deleted file mode 100644 index b5be7a9c36..0000000000 --- a/src/r_drawt_rgba.cpp +++ /dev/null @@ -1,314 +0,0 @@ -/* -** Drawer commands for the RT family of drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "templates.h" -#include "doomtype.h" -#include "doomdef.h" -#include "r_defs.h" -#include "r_draw.h" -#include "r_main.h" -#include "r_things.h" -#include "v_video.h" -#include "r_draw_rgba.h" -#include "r_drawers.h" - -namespace swrenderer -{ - WorkerThreadData DrawColumnRt1LLVMCommand::ThreadData(DrawerThread *thread) - { - WorkerThreadData d; - d.core = thread->core; - d.num_cores = thread->num_cores; - d.pass_start_y = thread->pass_start_y; - d.pass_end_y = thread->pass_end_y; - d.temp = thread->dc_temp_rgba; - return d; - } - - DrawColumnRt1LLVMCommand::DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh) - { - using namespace drawerargs; - - args.dest = (uint32_t*)dc_destorg + ylookup[yl] + sx; - args.source = nullptr; - args.source2 = nullptr; - args.colormap = dc_colormap; - args.translation = dc_translation; - args.basecolors = (const uint32_t *)GPalette.BaseColors; - args.pitch = dc_pitch; - args.count = yh - yl + 1; - args.dest_y = yl; - args.iscale = dc_iscale; - args.texturefrac = hx; - args.light = LightBgra::calc_light_multiplier(dc_light); - args.color = LightBgra::shade_pal_index_simple(dc_color, args.light); - args.srccolor = dc_srccolor_bgra; - args.srcalpha = dc_srcalpha >> (FRACBITS - 8); - args.destalpha = dc_destalpha >> (FRACBITS - 8); - args.light_red = dc_shade_constants.light_red; - args.light_green = dc_shade_constants.light_green; - args.light_blue = dc_shade_constants.light_blue; - args.light_alpha = dc_shade_constants.light_alpha; - args.fade_red = dc_shade_constants.fade_red; - args.fade_green = dc_shade_constants.fade_green; - args.fade_blue = dc_shade_constants.fade_blue; - args.fade_alpha = dc_shade_constants.fade_alpha; - args.desaturate = dc_shade_constants.desaturate; - args.flags = 0; - if (dc_shade_constants.simple_shade) - args.flags |= DrawColumnArgs::simple_shade; - if (args.source2 == nullptr) - args.flags |= DrawColumnArgs::nearest_filter; - - DetectRangeError(args.dest, args.dest_y, args.count); - } - - void DrawColumnRt1LLVMCommand::Execute(DrawerThread *thread) - { - WorkerThreadData d = ThreadData(thread); - Drawers::Instance()->DrawColumnRt1(&args, &d); - } - - FString DrawColumnRt1LLVMCommand::DebugInfo() - { - return "DrawColumnRt\n" + args.ToString(); - } - - ///////////////////////////////////////////////////////////////////////////// - - RtInitColsRGBACommand::RtInitColsRGBACommand(BYTE *buff) - { - this->buff = buff; - } - - void RtInitColsRGBACommand::Execute(DrawerThread *thread) - { - thread->dc_temp_rgba = buff == NULL ? thread->dc_temp_rgbabuff_rgba : (uint32_t*)buff; - } - - FString RtInitColsRGBACommand::DebugInfo() - { - return "RtInitCols"; - } - - ///////////////////////////////////////////////////////////////////////////// - - template - DrawColumnHorizRGBACommand::DrawColumnHorizRGBACommand() - { - using namespace drawerargs; - - _count = dc_count; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = (const InputPixelType *)dc_source; - _x = dc_x; - _yl = dc_yl; - _yh = dc_yh; - } - - template - void DrawColumnHorizRGBACommand::Execute(DrawerThread *thread) - { - int count = _count; - uint32_t *dest; - fixed_t fracstep; - fixed_t frac; - - if (count <= 0) - return; - - { - int x = _x & 3; - dest = &thread->dc_temp_rgba[x + 4 * _yl]; - } - fracstep = _iscale; - frac = _texturefrac; - - const InputPixelType *source = _source; - - if (count & 1) { - *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest[16] = source[frac >> FRACBITS]; frac += fracstep; - dest[20] = source[frac >> FRACBITS]; frac += fracstep; - dest[24] = source[frac >> FRACBITS]; frac += fracstep; - dest[28] = source[frac >> FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); - } - - template - FString DrawColumnHorizRGBACommand::DebugInfo() - { - return "DrawColumnHoriz"; - } - - // Generate code for the versions we use: - template class DrawColumnHorizRGBACommand; - template class DrawColumnHorizRGBACommand; - - ///////////////////////////////////////////////////////////////////////////// - - FillColumnHorizRGBACommand::FillColumnHorizRGBACommand() - { - using namespace drawerargs; - - _x = dc_x; - _count = dc_count; - _color = GPalette.BaseColors[dc_color].d | (uint32_t)0xff000000; - _yl = dc_yl; - _yh = dc_yh; - } - - void FillColumnHorizRGBACommand::Execute(DrawerThread *thread) - { - int count = _count; - uint32_t color = _color; - uint32_t *dest; - - if (count <= 0) - return; - - { - int x = _x & 3; - dest = &thread->dc_temp_rgba[x + 4 * _yl]; - } - - if (count & 1) { - *dest = color; - dest += 4; - } - if (!(count >>= 1)) - return; - do { - dest[0] = color; dest[4] = color; - dest += 8; - } while (--count); - } - - FString FillColumnHorizRGBACommand::DebugInfo() - { - return "FillColumnHoriz"; - } - - ///////////////////////////////////////////////////////////////////////////// - - namespace - { - static uint32_t particle_texture[16 * 16] = - { - 1*1, 2*1, 3*1, 4*1, 5*1, 6*1, 7*1, 8*1, 8*1, 7*1, 6*1, 5*1, 4*1, 3*1, 2*1, 1*1, - 1*2, 2*2, 3*2, 4*2, 5*2, 6*2, 7*2, 8*2, 8*2, 7*2, 6*2, 5*2, 4*2, 3*2, 2*2, 1*2, - 1*3, 2*3, 3*3, 4*3, 5*3, 6*3, 7*3, 8*3, 8*3, 7*3, 6*3, 5*3, 4*3, 3*3, 2*3, 1*3, - 1*4, 2*4, 3*4, 4*4, 5*4, 6*4, 7*4, 8*4, 8*4, 7*4, 6*4, 5*4, 4*4, 3*4, 2*4, 1*4, - 1*5, 2*5, 3*5, 4*5, 5*5, 6*5, 7*5, 8*5, 8*5, 7*5, 6*5, 5*5, 4*5, 3*5, 2*5, 1*5, - 1*6, 2*6, 3*6, 4*6, 5*6, 6*6, 7*6, 8*6, 8*6, 7*6, 6*6, 5*6, 4*6, 3*6, 2*6, 1*6, - 1*7, 2*7, 3*7, 4*7, 5*7, 6*7, 7*7, 8*7, 8*7, 7*7, 6*7, 5*7, 4*7, 3*7, 2*7, 1*7, - 1*8, 2*8, 3*8, 4*8, 5*8, 6*8, 7*8, 8*8, 8*8, 7*8, 6*8, 5*8, 4*8, 3*8, 2*8, 1*8, - 1*8, 2*8, 3*8, 4*8, 5*8, 6*8, 7*8, 8*8, 8*8, 7*8, 6*8, 5*8, 4*8, 3*8, 2*8, 1*8, - 1*7, 2*7, 3*7, 4*7, 5*7, 6*7, 7*7, 8*7, 8*7, 7*7, 6*7, 5*7, 4*7, 3*7, 2*7, 1*7, - 1*6, 2*6, 3*6, 4*6, 5*6, 6*6, 7*6, 8*6, 8*6, 7*6, 6*6, 5*6, 4*6, 3*6, 2*6, 1*6, - 1*5, 2*5, 3*5, 4*5, 5*5, 6*5, 7*5, 8*5, 8*5, 7*5, 6*5, 5*5, 4*5, 3*5, 2*5, 1*5, - 1*4, 2*4, 3*4, 4*4, 5*4, 6*4, 7*4, 8*4, 8*4, 7*4, 6*4, 5*4, 4*4, 3*4, 2*4, 1*4, - 1*3, 2*3, 3*3, 4*3, 5*3, 6*3, 7*3, 8*3, 8*3, 7*3, 6*3, 5*3, 4*3, 3*3, 2*3, 1*3, - 1*2, 2*2, 3*2, 4*2, 5*2, 6*2, 7*2, 8*2, 8*2, 7*2, 6*2, 5*2, 4*2, 3*2, 2*2, 1*2, - 1*1, 2*1, 3*1, 4*1, 5*1, 6*1, 7*1, 8*1, 8*1, 7*1, 6*1, 5*1, 4*1, 3*1, 2*1, 1*1 - }; - } - - DrawParticleColumnRGBACommand::DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx) - { - _dest = dest; - _pitch = pitch; - _count = count; - _fg = fg; - _alpha = alpha; - _fracposx = fracposx; - _dest_y = dest_y; - } - - void DrawParticleColumnRGBACommand::Execute(DrawerThread *thread) - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, _dest); - int pitch = _pitch * thread->num_cores; - - const uint32_t *source = &particle_texture[(_fracposx >> FRACBITS) * 16]; - uint32_t particle_alpha = _alpha; - - uint32_t fracstep = 16 * FRACUNIT / _count; - uint32_t fracpos = fracstep * thread->skipped_by_thread(_dest_y) + fracstep / 2; - fracstep *= thread->num_cores; - - uint32_t fg_red = (_fg >> 16) & 0xff; - uint32_t fg_green = (_fg >> 8) & 0xff; - uint32_t fg_blue = _fg & 0xff; - - for (int y = 0; y < count; y++) - { - uint32_t alpha = (source[fracpos >> FRACBITS] * particle_alpha) >> 6; - uint32_t inv_alpha = 256 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - fracpos += fracstep; - } - } - - FString DrawParticleColumnRGBACommand::DebugInfo() - { - return "DrawParticle"; - } -} diff --git a/src/r_main.cpp b/src/r_main.cpp index c67b48a1ba..a83b2f190a 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -169,11 +169,6 @@ void (*fuzzcolfunc) (void); void (*transcolfunc) (void); void (*spanfunc) (void); -void (*hcolfunc_pre) (void); -void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); -void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); -void (*hcolfunc_post4) (int sx, int yl, int yh); - cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; // PRIVATE DATA DEFINITIONS ------------------------------------------------ @@ -853,17 +848,11 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // [RH] Show off segs if r_drawflat is 1 if (r_drawflat) { - hcolfunc_pre = R_FillColumnHoriz; - hcolfunc_post1 = rt_copy1col; - hcolfunc_post4 = rt_copy4cols; colfunc = R_FillColumn; spanfunc = R_FillSpan; } else { - hcolfunc_pre = R_DrawColumnHoriz; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; colfunc = basecolfunc; spanfunc = R_DrawSpan; } diff --git a/src/r_main.h b/src/r_main.h index 37ead76ce3..daf60c16d0 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -124,12 +124,6 @@ extern void (*transcolfunc) (void); // No shadow effects on floors. extern void (*spanfunc) (void); -// [RH] Function pointers for the horizontal column drawers. -extern void (*hcolfunc_pre) (void); -extern void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); -extern void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); -extern void (*hcolfunc_post4) (int sx, int yl, int yh); - void R_InitTextureMapping (); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 72851b6b34..5da23297b6 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -377,7 +377,6 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) thread_data.num_cores = thread->num_cores; thread_data.pass_start_y = thread->pass_start_y; thread_data.pass_end_y = thread->pass_end_y; - thread_data.temp = thread->dc_temp_rgba; thread_data.FullSpans = thread->FullSpansBuffer.data(); thread_data.PartialBlocks = thread->PartialBlocksBuffer.data(); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index edad2aeccc..a2d02c3f9e 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -2323,8 +2323,6 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, } while (needrepeat--); colfunc = basecolfunc; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; R_FinishSetPatchStyle (); done: diff --git a/src/r_things.cpp b/src/r_things.cpp index 22d6b7ac65..7b945a44cb 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -366,18 +366,12 @@ void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool useRt, bool unmaske double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight(); dc_texturefrac = (uint32_t)(v * (1 << 30)); - if (useRt) - hcolfunc_pre(); - else - colfunc(); + colfunc(); } span++; } dc_iscale = saved_iscale; - - if (sprflipvert && useRt) - rt_flip_posts(); } void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked) @@ -447,16 +441,10 @@ void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked) else if (dc_iscale < 0) dc_count = MIN(dc_count, (dc_texturefrac - dc_iscale) / (-dc_iscale)); - if (useRt) - hcolfunc_pre(); - else - colfunc (); + colfunc (); } span++; } - - if (sprflipvert && useRt) - rt_flip_posts(); } // [ZZ] @@ -705,6 +693,7 @@ void R_WallSpriteColumn (bool useRt) void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop, short *clipbot) { +#if 0 int flags = 0; // Do setup for blending. @@ -772,6 +761,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop R_FinishSetPatchStyle(); NetUpdate(); +#endif } // @@ -2857,6 +2847,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) { +#if 0 int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff; fixed_t cosang, sinang, sprcosang, sprsinang; int backx, backy, gxinc, gyinc; @@ -3184,6 +3175,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, } } } +#endif } //========================================================================== diff --git a/src/r_thread.h b/src/r_thread.h index ea1ceaa29b..c3e818abb9 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -45,9 +45,6 @@ class DrawerThread public: DrawerThread() { - dc_temp = dc_temp_buff; - dc_temp_rgba = dc_temp_rgbabuff_rgba; - FullSpansBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8)); PartialBlocksBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8)); } @@ -64,14 +61,6 @@ public: int pass_start_y = 0; int pass_end_y = MAXHEIGHT; - // Working buffer used by Rt drawers - uint8_t dc_temp_buff[MAXHEIGHT * 4]; - uint8_t *dc_temp = nullptr; - - // Working buffer used by Rt drawers, true color edition - uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; - uint32_t *dc_temp_rgba = nullptr; - // Working buffer used by the tilted (sloped) span drawer const uint8_t *tiltlighting[MAXWIDTH]; diff --git a/tools/drawergen/fixedfunction/drawcolumncodegen.cpp b/tools/drawergen/fixedfunction/drawcolumncodegen.cpp index 177074dad3..6e00528ae4 100644 --- a/tools/drawergen/fixedfunction/drawcolumncodegen.cpp +++ b/tools/drawergen/fixedfunction/drawcolumncodegen.cpp @@ -32,7 +32,7 @@ #include "ssa/ssa_struct_type.h" #include "ssa/ssa_value.h" -void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data) +void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data) { dest = args[0][0].load(true); source = args[0][1].load(true); @@ -43,12 +43,9 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met pitch = args[0][6].load(true); count = args[0][7].load(true); dest_y = args[0][8].load(true); - if (method == DrawColumnMethod::Normal) - { - iscale = args[0][9].load(true); - texturefracx = args[0][10].load(true); - textureheight = args[0][11].load(true); - } + iscale = args[0][9].load(true); + texturefracx = args[0][10].load(true); + textureheight = args[0][11].load(true); texturefrac = args[0][12].load(true); light = args[0][13].load(true); color = SSAVec4i::unpack(args[0][14].load(true)); @@ -81,49 +78,32 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met count = count_for_thread(dest_y, count, thread); dest = dest_for_thread(dest_y, pitch, dest, thread); pitch = pitch * thread.num_cores; - if (method == DrawColumnMethod::Normal) - { - stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); - iscale = iscale * thread.num_cores; - one = (1 << 30) / textureheight; - SSAIfBlock branch; - branch.if_block(is_simple_shade); - LoopShade(variant, method, true); - branch.else_block(); - LoopShade(variant, method, false); - branch.end_block(); - } - else - { - source = thread.temp[((dest_y + skipped_by_thread(dest_y, thread)) * 4 + texturefrac) * 4]; + stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); + iscale = iscale * thread.num_cores; + one = (1 << 30) / textureheight; - SSAIfBlock branch; - branch.if_block(is_simple_shade); - Loop(variant, method, true, true); - branch.else_block(); - Loop(variant, method, false, true); - branch.end_block(); - } -} - -void DrawColumnCodegen::LoopShade(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade) -{ SSAIfBlock branch; - branch.if_block(is_nearest_filter); - Loop(variant, method, isSimpleShade, true); + branch.if_block(is_simple_shade); + LoopShade(variant, true); branch.else_block(); - stack_frac.store(stack_frac.load() - (one >> 1)); - Loop(variant, method, isSimpleShade, false); + LoopShade(variant, false); branch.end_block(); } -void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter) +void DrawColumnCodegen::LoopShade(DrawColumnVariant variant, bool isSimpleShade) { - SSAInt sincr; - if (method != DrawColumnMethod::Normal) - sincr = thread.num_cores * 4; + SSAIfBlock branch; + branch.if_block(is_nearest_filter); + Loop(variant, isSimpleShade, true); + branch.else_block(); + stack_frac.store(stack_frac.load() - (one >> 1)); + Loop(variant, isSimpleShade, false); + branch.end_block(); +} +void DrawColumnCodegen::Loop(DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter) +{ stack_index.store(SSAInt(0)); { SSAForBlock loop; @@ -131,56 +111,21 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, loop.loop_block(index < count); SSAInt sample_index, frac; - if (method == DrawColumnMethod::Normal) - { - frac = stack_frac.load(); - if (IsPaletteInput(variant)) - sample_index = frac >> FRACBITS; - else - sample_index = frac; - } + frac = stack_frac.load(); + if (IsPaletteInput(variant)) + sample_index = frac >> FRACBITS; else - { - sample_index = index * sincr * 4; - } + sample_index = frac; SSAInt offset = index * pitch * 4; - SSAVec4i bgcolor[4]; + SSAVec4i bgcolor = dest[offset].load_vec4ub(false); - int numColumns = (method == DrawColumnMethod::Rt4) ? 4 : 1; + SSAVec4i outcolor = ProcessPixel(sample_index, bgcolor, variant, isSimpleShade, isNearestFilter); - if (numColumns == 4) - { - SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false); - SSAVec8s bg0 = SSAVec8s::extendlo(bg); - SSAVec8s bg1 = SSAVec8s::extendhi(bg); - bgcolor[0] = SSAVec4i::extendlo(bg0); - bgcolor[1] = SSAVec4i::extendhi(bg0); - bgcolor[2] = SSAVec4i::extendlo(bg1); - bgcolor[3] = SSAVec4i::extendhi(bg1); - } - else - { - bgcolor[0] = dest[offset].load_vec4ub(false); - } - - SSAVec4i outcolor[4]; - for (int i = 0; i < numColumns; i++) - outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, method, isSimpleShade, isNearestFilter); - - if (numColumns == 4) - { - SSAVec16ub packedcolor(SSAVec8s(outcolor[0], outcolor[1]), SSAVec8s(outcolor[2], outcolor[3])); - dest[offset].store_unaligned_vec16ub(packedcolor); - } - else - { - dest[offset].store_vec4ub(outcolor[0]); - } + dest[offset].store_vec4ub(outcolor); stack_index.store(index.add(SSAInt(1), true, true)); - if (method == DrawColumnMethod::Normal) - stack_frac.store(frac + iscale); + stack_frac.store(frac + iscale); loop.end_block(); } } @@ -212,7 +157,7 @@ bool DrawColumnCodegen::IsPaletteInput(DrawColumnVariant variant) } } -SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter) +SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter) { SSAInt alpha, inv_alpha; SSAVec4i fg; @@ -220,22 +165,22 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, { default: case DrawColumnVariant::DrawCopy: - return blend_copy(Sample(sample_index, method, isNearestFilter)); + return blend_copy(Sample(sample_index, isNearestFilter)); case DrawColumnVariant::Draw: - return blend_copy(Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade)); + return blend_copy(Shade(Sample(sample_index, isNearestFilter), isSimpleShade)); case DrawColumnVariant::DrawAdd: case DrawColumnVariant::DrawAddClamp: - fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade); + fg = Shade(Sample(sample_index, isNearestFilter), isSimpleShade); return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawShaded: alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; inv_alpha = 256 - alpha; return blend_add(color, bgcolor, alpha, inv_alpha); case DrawColumnVariant::DrawSubClamp: - fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade); + fg = Shade(Sample(sample_index, isNearestFilter), isSimpleShade); return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawRevSubClamp: - fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade); + fg = Shade(Sample(sample_index, isNearestFilter), isSimpleShade); return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawTranslated: return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade)); @@ -311,23 +256,16 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo } } -SSAVec4i DrawColumnCodegen::Sample(SSAInt frac, DrawColumnMethod method, bool isNearestFilter) +SSAVec4i DrawColumnCodegen::Sample(SSAInt frac, bool isNearestFilter) { - if (method == DrawColumnMethod::Normal) + if (isNearestFilter) { - if (isNearestFilter) - { - SSAInt sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - return source[sample_index * 4].load_vec4ub(false); - } - else - { - return SampleLinear(source, source2, texturefracx, frac, one, textureheight); - } + SSAInt sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; + return source[sample_index * 4].load_vec4ub(false); } else { - return source[frac].load_vec4ub(true); + return SampleLinear(source, source2, texturefracx, frac, one, textureheight); } } diff --git a/tools/drawergen/fixedfunction/drawcolumncodegen.h b/tools/drawergen/fixedfunction/drawcolumncodegen.h index 2c44edc5ce..9056549111 100644 --- a/tools/drawergen/fixedfunction/drawcolumncodegen.h +++ b/tools/drawergen/fixedfunction/drawcolumncodegen.h @@ -45,24 +45,17 @@ enum class DrawColumnVariant DrawRevSubClampTranslated }; -enum class DrawColumnMethod -{ - Normal, - Rt1, - Rt4 -}; - class DrawColumnCodegen : public DrawerCodegen { public: - void Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data); + void Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data); private: - void LoopShade(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade); - void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter); - SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter); + void LoopShade(DrawColumnVariant variant, bool isSimpleShade); + void Loop(DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter); + SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter); SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); - SSAVec4i Sample(SSAInt frac, DrawColumnMethod method, bool isNearestFilter); + SSAVec4i Sample(SSAInt frac, bool isNearestFilter); SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAInt ColormapSample(SSAInt frac); SSAVec4i TranslateSample(SSAInt frac); diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index 5c3bf05a43..0999199974 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -29,46 +29,22 @@ LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName, { mProgram.CreateModule(); - CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRt1", DrawColumnVariant::Draw, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill); + CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd); + CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp); + CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp); + CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp); + CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw); + CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd); + CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded); + CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp); + CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp); + CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp); + CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated); + CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd); + CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated); + CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated); + CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated); CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); @@ -102,7 +78,7 @@ LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName, ObjectFile = mProgram.GenerateObjectFile(triple, cpuName, features); } -void LLVMDrawers::CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method) +void LLVMDrawers::CodegenDrawColumn(const char *name, DrawColumnVariant variant) { llvm::IRBuilder<> builder(mProgram.context()); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); @@ -113,7 +89,7 @@ void LLVMDrawers::CodegenDrawColumn(const char *name, DrawColumnVariant variant, function.create_public(); DrawColumnCodegen codegen; - codegen.Generate(variant, method, function.parameter(0), function.parameter(1)); + codegen.Generate(variant, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); diff --git a/tools/drawergen/llvmdrawers.h b/tools/drawergen/llvmdrawers.h index f546d1dff2..a7f1c86254 100644 --- a/tools/drawergen/llvmdrawers.h +++ b/tools/drawergen/llvmdrawers.h @@ -47,7 +47,7 @@ public: std::vector ObjectFile; private: - void CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method); + void CodegenDrawColumn(const char *name, DrawColumnVariant variant); void CodegenDrawSpan(const char *name, DrawSpanVariant variant); void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); From ea92b9548378fb99848a75cdddf24de5a0af3886 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 25 Dec 2016 06:00:18 +0100 Subject: [PATCH 556/912] Change define to if statement --- src/r_walldraw.cpp | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/r_walldraw.cpp b/src/r_walldraw.cpp index 4f532a78e8..8941443001 100644 --- a/src/r_walldraw.cpp +++ b/src/r_walldraw.cpp @@ -805,29 +805,31 @@ static void ProcessWallWorker( double xmagnitude = 1.0; -#if !defined(NO_DYNAMIC_SWLIGHTS) - for (int x = x1; x < x2; x++, light += rw_lightstep) + if (r_dynlights) { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; + for (int x = x1; x < x2; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); + if (!fixed) + R_SetColorMapLight(basecolormap, light, wallshade); - if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); + if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); - WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - Draw1Column(x, y1, y2, sampler, draw1column); + WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); + Draw1Column(x, y1, y2, sampler, draw1column); + } + NetUpdate(); + return; } -#else + // Calculate where 4 column alignment begins and ends: int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - double xmagnitude = 1.0; - // First unaligned columns: for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) { @@ -956,7 +958,6 @@ static void ProcessWallWorker( WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); Draw1Column(x, y1, y2, sampler, draw1column); } -#endif NetUpdate(); } From d428634c58e2966e55088594ba578d40f40fcb6f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 25 Dec 2016 07:15:17 +0100 Subject: [PATCH 557/912] Add dynlights to pal wall drawers --- src/r_draw_pal.cpp | 197 ++++++++++++++++++++++++++++++++++++++------- src/r_draw_pal.h | 6 ++ src/r_walldraw.cpp | 110 ++++++++++++------------- 3 files changed, 229 insertions(+), 84 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 37d38891ef..66129fea7b 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -103,6 +103,10 @@ namespace swrenderer _pitch = dc_pitch; _srcblend = dc_srcblend; _destblend = dc_destblend; + _dynlights = dc_lights; + _num_dynlights = dc_num_lights; + _viewpos_z = dc_viewpos.Z; + _step_viewpos_z = dc_viewpos_step.Z; } PalWall4Command::PalWall4Command() @@ -124,6 +128,43 @@ namespace swrenderer _destblend = dc_destblend; } + uint8_t PalWall1Command::AddLights(const TriLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material) + { + uint32_t lit_r = GPalette.BaseColors[fg].r; + uint32_t lit_g = GPalette.BaseColors[fg].g; + uint32_t lit_b = GPalette.BaseColors[fg].b; + + uint32_t material_r = GPalette.BaseColors[material].r; + uint32_t material_g = GPalette.BaseColors[material].g; + uint32_t material_b = GPalette.BaseColors[material].b; + + for (int i = 0; i < num_lights; i++) + { + uint32_t light_color_r = RPART(lights[i].color); + uint32_t light_color_g = GPART(lights[i].color); + uint32_t light_color_b = BPART(lights[i].color); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // attenuation = 1 - MIN(dist * (1/radius), 1) + float Lxy2 = lights[i].x; // L.x*L.x + L.y*L.y + float Lz = lights[i].z - viewpos_z; + float dist2 = Lxy2 + Lz * Lz; + float dist = dist2 * _mm_cvtss_f32(_mm_rsqrt_ss(_mm_load_ss(&dist2))); + uint32_t attenuation = (uint32_t)(256.0f - MIN(dist * lights[i].radius, 256.0f)); + + lit_r += (light_color_r * material_r * attenuation) >> 16; + lit_g += (light_color_g * material_g * attenuation) >> 16; + lit_b += (light_color_b * material_b * attenuation) >> 16; + } + + lit_r = MIN(lit_r, 255); + lit_g = MIN(lit_g, 255); + lit_b = MIN(lit_b, 255); + + return RGB256k.All[((lit_r >> 2) << 12) | ((lit_g >> 2) << 6) | (lit_b >> 2)]; + } + void DrawWall1PalCommand::Execute(DrawerThread *thread) { uint32_t fracstep = _iscale; @@ -134,6 +175,10 @@ namespace swrenderer uint8_t *dest = _dest; int bits = _fracbits; int pitch = _pitch; + TriLight *dynlights = _dynlights; + int num_dynlights = _num_dynlights; + float viewpos_z = _viewpos_z; + float step_viewpos_z = _step_viewpos_z; count = thread->count_for_thread(_dest_y, count); if (count <= 0) @@ -144,12 +189,31 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - do + if (num_dynlights == 0) { - *dest = colormap[source[frac >> bits]]; - frac += fracstep; - dest += pitch; - } while (--count); + do + { + *dest = colormap[source[frac >> bits]]; + frac += fracstep; + dest += pitch; + } while (--count); + } + else + { + float viewpos_z = _viewpos_z; + float step_viewpos_z = _step_viewpos_z; + + viewpos_z += step_viewpos_z * thread->skipped_by_thread(_dest_y); + step_viewpos_z *= thread->num_cores; + + do + { + *dest = AddLights(dynlights, num_dynlights, viewpos_z, colormap[source[frac >> bits]], source[frac >> bits]); + viewpos_z += step_viewpos_z; + frac += fracstep; + dest += pitch; + } while (--count); + } } void DrawWall4PalCommand::Execute(DrawerThread *thread) @@ -212,6 +276,10 @@ namespace swrenderer uint8_t *dest = _dest; int bits = _fracbits; int pitch = _pitch; + TriLight *dynlights = _dynlights; + int num_dynlights = _num_dynlights; + float viewpos_z = _viewpos_z; + float step_viewpos_z = _step_viewpos_z; count = thread->count_for_thread(_dest_y, count); if (count <= 0) @@ -222,16 +290,39 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - do + if (num_dynlights == 0) { - uint8_t pix = source[frac >> bits]; - if (pix != 0) + do { - *dest = colormap[pix]; - } - frac += fracstep; - dest += pitch; - } while (--count); + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + *dest = colormap[pix]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + else + { + float viewpos_z = _viewpos_z; + float step_viewpos_z = _step_viewpos_z; + + viewpos_z += step_viewpos_z * thread->skipped_by_thread(_dest_y); + step_viewpos_z *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + *dest = AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix); + } + viewpos_z += step_viewpos_z; + frac += fracstep; + dest += pitch; + } while (--count); + } } void DrawWallMasked4PalCommand::Execute(DrawerThread *thread) @@ -296,6 +387,10 @@ namespace swrenderer uint8_t *dest = _dest; int bits = _fracbits; int pitch = _pitch; + TriLight *dynlights = _dynlights; + int num_dynlights = _num_dynlights; + float viewpos_z = _viewpos_z; + float step_viewpos_z = _step_viewpos_z; uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; @@ -308,6 +403,8 @@ namespace swrenderer frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; pitch *= thread->num_cores; + viewpos_z += step_viewpos_z * thread->skipped_by_thread(_dest_y); + step_viewpos_z *= thread->num_cores; if (!r_blendmethod) { @@ -316,11 +413,14 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = fg2rgb[colormap[pix]]; + uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix]; + + uint32_t fg = fg2rgb[lit]; uint32_t bg = bg2rgb[*dest]; fg = (fg + bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg >> 15)]; } + viewpos_z += step_viewpos_z; frac += fracstep; dest += pitch; } while (--count); @@ -333,11 +433,14 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); - uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); - uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); + uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix]; + + uint32_t r = MIN(GPalette.BaseColors[lit].r + GPalette.BaseColors[*dest].r, 255); + uint32_t g = MIN(GPalette.BaseColors[lit].g + GPalette.BaseColors[*dest].g, 255); + uint32_t b = MIN(GPalette.BaseColors[lit].b + GPalette.BaseColors[*dest].b, 255); *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; } + viewpos_z += step_viewpos_z; frac += fracstep; dest += pitch; } while (--count); @@ -420,6 +523,10 @@ namespace swrenderer uint8_t *dest = _dest; int bits = _fracbits; int pitch = _pitch; + TriLight *dynlights = _dynlights; + int num_dynlights = _num_dynlights; + float viewpos_z = _viewpos_z; + float step_viewpos_z = _step_viewpos_z; uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; @@ -432,6 +539,8 @@ namespace swrenderer frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; pitch *= thread->num_cores; + viewpos_z += step_viewpos_z * thread->skipped_by_thread(_dest_y); + step_viewpos_z *= thread->num_cores; if (!r_blendmethod) { @@ -440,7 +549,9 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; + uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix]; + + uint32_t a = fg2rgb[lit] + bg2rgb[*dest]; uint32_t b = a; a |= 0x01f07c1f; @@ -450,6 +561,7 @@ namespace swrenderer a |= b; *dest = RGB32k.All[a & (a >> 15)]; } + viewpos_z += step_viewpos_z; frac += fracstep; dest += pitch; } while (--count); @@ -461,11 +573,14 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); - uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); - uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); + uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix]; + + uint32_t r = MIN(GPalette.BaseColors[lit].r + GPalette.BaseColors[*dest].r, 255); + uint32_t g = MIN(GPalette.BaseColors[lit].g + GPalette.BaseColors[*dest].g, 255); + uint32_t b = MIN(GPalette.BaseColors[lit].b + GPalette.BaseColors[*dest].b, 255); *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; } + viewpos_z += step_viewpos_z; frac += fracstep; dest += pitch; } while (--count); @@ -526,6 +641,10 @@ namespace swrenderer uint8_t *dest = _dest; int bits = _fracbits; int pitch = _pitch; + TriLight *dynlights = _dynlights; + int num_dynlights = _num_dynlights; + float viewpos_z = _viewpos_z; + float step_viewpos_z = _step_viewpos_z; uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; @@ -538,6 +657,8 @@ namespace swrenderer frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; pitch *= thread->num_cores; + viewpos_z += step_viewpos_z * thread->skipped_by_thread(_dest_y); + step_viewpos_z *= thread->num_cores; if (!r_blendmethod) { @@ -546,7 +667,9 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; + uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix]; + + uint32_t a = (fg2rgb[lit] | 0x40100400) - bg2rgb[*dest]; uint32_t b = a; b &= 0x40100400; @@ -555,6 +678,7 @@ namespace swrenderer a |= 0x01f07c1f; *dest = RGB32k.All[a & (a >> 15)]; } + viewpos_z += step_viewpos_z; frac += fracstep; dest += pitch; } while (--count); @@ -566,11 +690,14 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - int r = clamp(-GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 0, 255); - int g = clamp(-GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 0, 255); - int b = clamp(-GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 0, 255); + uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix]; + + int r = clamp(-GPalette.BaseColors[lit].r + GPalette.BaseColors[*dest].r, 0, 255); + int g = clamp(-GPalette.BaseColors[lit].g + GPalette.BaseColors[*dest].g, 0, 255); + int b = clamp(-GPalette.BaseColors[lit].b + GPalette.BaseColors[*dest].b, 0, 255); *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; } + viewpos_z += step_viewpos_z; frac += fracstep; dest += pitch; } while (--count); @@ -657,6 +784,10 @@ namespace swrenderer uint8_t *dest = _dest; int bits = _fracbits; int pitch = _pitch; + TriLight *dynlights = _dynlights; + int num_dynlights = _num_dynlights; + float viewpos_z = _viewpos_z; + float step_viewpos_z = _step_viewpos_z; uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; @@ -669,6 +800,8 @@ namespace swrenderer frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; pitch *= thread->num_cores; + viewpos_z += step_viewpos_z * thread->skipped_by_thread(_dest_y); + step_viewpos_z *= thread->num_cores; if (!r_blendmethod) { @@ -677,7 +810,9 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; + uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix]; + + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[lit]; uint32_t b = a; b &= 0x40100400; @@ -686,6 +821,7 @@ namespace swrenderer a |= 0x01f07c1f; *dest = RGB32k.All[a & (a >> 15)]; } + viewpos_z += step_viewpos_z; frac += fracstep; dest += pitch; } while (--count); @@ -697,11 +833,14 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - int r = clamp(GPalette.BaseColors[colormap[pix]].r - GPalette.BaseColors[*dest].r, 0, 255); - int g = clamp(GPalette.BaseColors[colormap[pix]].g - GPalette.BaseColors[*dest].g, 0, 255); - int b = clamp(GPalette.BaseColors[colormap[pix]].b - GPalette.BaseColors[*dest].b, 0, 255); + uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix]; + + int r = clamp(GPalette.BaseColors[lit].r - GPalette.BaseColors[*dest].r, 0, 255); + int g = clamp(GPalette.BaseColors[lit].g - GPalette.BaseColors[*dest].g, 0, 255); + int b = clamp(GPalette.BaseColors[lit].b - GPalette.BaseColors[*dest].b, 0, 255); *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; } + viewpos_z += step_viewpos_z; frac += fracstep; dest += pitch; } while (--count); diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index 984849b83e..5766d38f4e 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -14,6 +14,8 @@ namespace swrenderer FString DebugInfo() override { return "PalWallCommand"; } protected: + inline static uint8_t AddLights(const TriLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material); + uint32_t _iscale; uint32_t _texturefrac; uint8_t *_colormap; @@ -24,6 +26,10 @@ namespace swrenderer int _pitch; uint32_t *_srcblend; uint32_t *_destblend; + TriLight *_dynlights; + int _num_dynlights; + float _viewpos_z; + float _step_viewpos_z; }; class PalWall4Command : public DrawerCommand diff --git a/src/r_walldraw.cpp b/src/r_walldraw.cpp index 8941443001..7e4f3f5151 100644 --- a/src/r_walldraw.cpp +++ b/src/r_walldraw.cpp @@ -537,71 +537,71 @@ WallSampler::WallSampler(int y1, float swal, double yrepeat, fixed_t xoffset, do // Draw a column with support for non-power-of-two ranges static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1column)()) { - if (r_swtruecolor) + if (r_dynlights) { - if (r_dynlights) + // Find column position in view space + float w1 = 1.0f / WallC.sz1; + float w2 = 1.0f / WallC.sz2; + float t = (x - WallC.sx1 + 0.5f) / (WallC.sx2 - WallC.sx1); + float wcol = w1 * (1.0f - t) + w2 * t; + float zcol = 1.0f / wcol; + dc_viewpos.X = (float)((x + 0.5 - CenterX) / CenterX * zcol); + dc_viewpos.Y = zcol; + dc_viewpos.Z = (float)((CenterY - y1 - 0.5) / InvZtoScale * zcol); + dc_viewpos_step.Z = (float)(-zcol / InvZtoScale); + + static TriLight lightbuffer[64 * 1024]; + static int nextlightindex = 0; + + // Setup lights for column + dc_num_lights = 0; + dc_lights = lightbuffer + nextlightindex; + FLightNode *cur_node = dc_light_list; + while (cur_node && nextlightindex < 64 * 1024) { - // Find column position in view space - float w1 = 1.0f / WallC.sz1; - float w2 = 1.0f / WallC.sz2; - float t = (x - WallC.sx1 + 0.5f) / (WallC.sx2 - WallC.sx1); - float wcol = w1 * (1.0f - t) + w2 * t; - float zcol = 1.0f / wcol; - dc_viewpos.X = (float)((x + 0.5 - CenterX) / CenterX * zcol); - dc_viewpos.Y = zcol; - dc_viewpos.Z = (float)((CenterY - y1 - 0.5) / InvZtoScale * zcol); - dc_viewpos_step.Z = (float)(-zcol / InvZtoScale); - - static TriLight lightbuffer[64 * 1024]; - static int nextlightindex = 0; - - // Setup lights for column - dc_num_lights = 0; - dc_lights = lightbuffer + nextlightindex; - FLightNode *cur_node = dc_light_list; - while (cur_node && nextlightindex < 64 * 1024) + if (!(cur_node->lightsource->flags2&MF2_DORMANT)) { - if (!(cur_node->lightsource->flags2&MF2_DORMANT)) + double lightX = cur_node->lightsource->X() - ViewPos.X; + double lightY = cur_node->lightsource->Y() - ViewPos.Y; + double lightZ = cur_node->lightsource->Z() - ViewPos.Z; + + float lx = (float)(lightX * ViewSin - lightY * ViewCos) - dc_viewpos.X; + float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; + float lz = (float)lightZ; + + // Precalculate the constant part of the dot here so the drawer doesn't have to. + float lconstant = lx * lx + ly * ly; + + // Include light only if it touches this column + float radius = cur_node->lightsource->GetRadius(); + if (radius * radius >= lconstant) { - double lightX = cur_node->lightsource->X() - ViewPos.X; - double lightY = cur_node->lightsource->Y() - ViewPos.Y; - double lightZ = cur_node->lightsource->Z() - ViewPos.Z; + uint32_t red = cur_node->lightsource->GetRed(); + uint32_t green = cur_node->lightsource->GetGreen(); + uint32_t blue = cur_node->lightsource->GetBlue(); - float lx = (float)(lightX * ViewSin - lightY * ViewCos) - dc_viewpos.X; - float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; - float lz = (float)lightZ; - - // Precalculate the constant part of the dot here so the drawer doesn't have to. - float lconstant = lx * lx + ly * ly; - - // Include light only if it touches this column - float radius = cur_node->lightsource->GetRadius(); - if (radius * radius >= lconstant) - { - uint32_t red = cur_node->lightsource->GetRed(); - uint32_t green = cur_node->lightsource->GetGreen(); - uint32_t blue = cur_node->lightsource->GetBlue(); - - nextlightindex++; - auto &light = dc_lights[dc_num_lights++]; - light.x = lconstant; - light.z = lz; - light.radius = 256.0f / cur_node->lightsource->GetRadius(); - light.color = (red << 16) | (green << 8) | blue; - } + nextlightindex++; + auto &light = dc_lights[dc_num_lights++]; + light.x = lconstant; + light.z = lz; + light.radius = 256.0f / cur_node->lightsource->GetRadius(); + light.color = (red << 16) | (green << 8) | blue; } - - cur_node = cur_node->nextLight; } - if (nextlightindex == 64 * 1024) - nextlightindex = 0; - } - else - { - dc_num_lights = 0; + cur_node = cur_node->nextLight; } + if (nextlightindex == 64 * 1024) + nextlightindex = 0; + } + else + { + dc_num_lights = 0; + } + + if (r_swtruecolor) + { int count = y2 - y1; dc_source = sampler.source; From a76cd3533357962f3211a7d06004dffe24ad5ac4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 25 Dec 2016 07:49:44 +0100 Subject: [PATCH 558/912] Dynamic lights to the flats in pal mode --- src/r_draw_pal.cpp | 152 +++++++++++++++++++++++++++++++++++++++------ src/r_draw_pal.h | 6 ++ src/r_plane.cpp | 93 ++++++++++++++------------- 3 files changed, 183 insertions(+), 68 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 66129fea7b..6302553402 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -2384,6 +2384,47 @@ namespace swrenderer _color = ds_color; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; + _dynlights = dc_lights; + _num_dynlights = dc_num_lights; + _viewpos_x = dc_viewpos.X; + _step_viewpos_x = dc_viewpos_step.X; + } + + uint8_t PalSpanCommand::AddLights(const TriLight *lights, int num_lights, float viewpos_x, uint8_t fg, uint8_t material) + { + uint32_t lit_r = GPalette.BaseColors[fg].r; + uint32_t lit_g = GPalette.BaseColors[fg].g; + uint32_t lit_b = GPalette.BaseColors[fg].b; + + uint32_t material_r = GPalette.BaseColors[material].r; + uint32_t material_g = GPalette.BaseColors[material].g; + uint32_t material_b = GPalette.BaseColors[material].b; + + for (int i = 0; i < num_lights; i++) + { + uint32_t light_color_r = RPART(lights[i].color); + uint32_t light_color_g = GPART(lights[i].color); + uint32_t light_color_b = BPART(lights[i].color); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // attenuation = 1 - MIN(dist * (1/radius), 1) + float Lyz2 = lights[i].y; // L.y*L.y + L.z*L.z + float Lx = lights[i].x - viewpos_x; + float dist2 = Lyz2 + Lx * Lx; + float dist = dist2 * _mm_cvtss_f32(_mm_rsqrt_ss(_mm_load_ss(&dist2))); + uint32_t attenuation = (uint32_t)(256.0f - MIN(dist * lights[i].radius, 256.0f)); + + lit_r += (light_color_r * material_r * attenuation) >> 16; + lit_g += (light_color_g * material_g * attenuation) >> 16; + lit_b += (light_color_b * material_b * attenuation) >> 16; + } + + lit_r = MIN(lit_r, 255); + lit_g = MIN(lit_g, 255); + lit_b = MIN(lit_b, 255); + + return RGB256k.All[((lit_r >> 2) << 12) | ((lit_g >> 2) << 6) | (lit_b >> 2)]; } void DrawSpanPalCommand::Execute(DrawerThread *thread) @@ -2411,7 +2452,12 @@ namespace swrenderer xstep = _xstep; ystep = _ystep; - if (_xbits == 6 && _ybits == 6) + const TriLight *dynlights = _dynlights; + int num_dynlights = _num_dynlights; + float viewpos_x = _viewpos_x; + float step_viewpos_x = _step_viewpos_x; + + if (_xbits == 6 && _ybits == 6 && num_dynlights == 0) { // 64x64 is the most common case by far, so special case it. do @@ -2428,6 +2474,24 @@ namespace swrenderer yfrac += ystep; } while (--count); } + else if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + viewpos_x += step_viewpos_x; + } while (--count); + } else { uint8_t yshift = 32 - _ybits; @@ -2441,11 +2505,12 @@ namespace swrenderer // Lookup pixel from flat texture tile, // re-index using light/colormap. - *dest++ = colormap[source[spot]]; + *dest++ = AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]); // Next step in u,v. xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } } @@ -2475,6 +2540,11 @@ namespace swrenderer xstep = _xstep; ystep = _ystep; + const TriLight *dynlights = _dynlights; + int num_dynlights = _num_dynlights; + float viewpos_x = _viewpos_x; + float step_viewpos_x = _step_viewpos_x; + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -2486,11 +2556,12 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - *dest = colormap[texdata]; + *dest = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata]; } dest++; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } else @@ -2506,11 +2577,12 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - *dest = colormap[texdata]; + *dest = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata]; } dest++; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } } @@ -2544,6 +2616,11 @@ namespace swrenderer const PalEntry *palette = GPalette.BaseColors; + const TriLight *dynlights = _dynlights; + int num_dynlights = _num_dynlights; + float viewpos_x = _viewpos_x; + float step_viewpos_x = _step_viewpos_x; + if (!r_blendmethod) { if (_xbits == 6 && _ybits == 6) @@ -2552,7 +2629,7 @@ namespace swrenderer do { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = colormap[source[spot]]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]]; uint32_t bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; @@ -2560,6 +2637,7 @@ namespace swrenderer *dest++ = RGB32k.All[fg & (fg >> 15)]; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } else @@ -2570,7 +2648,7 @@ namespace swrenderer do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = colormap[source[spot]]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]]; uint32_t bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; @@ -2578,6 +2656,7 @@ namespace swrenderer *dest++ = RGB32k.All[fg & (fg >> 15)]; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } } @@ -2589,7 +2668,7 @@ namespace swrenderer do { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = colormap[source[spot]]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]]; uint32_t bg = *dest; int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); @@ -2598,6 +2677,7 @@ namespace swrenderer xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } else @@ -2608,7 +2688,7 @@ namespace swrenderer do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = colormap[source[spot]]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]]; uint32_t bg = *dest; int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); @@ -2617,6 +2697,7 @@ namespace swrenderer xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } } @@ -2641,6 +2722,11 @@ namespace swrenderer const PalEntry *palette = GPalette.BaseColors; + const TriLight *dynlights = _dynlights; + int num_dynlights = _num_dynlights; + float viewpos_x = _viewpos_x; + float step_viewpos_x = _step_viewpos_x; + xfrac = _xfrac; yfrac = _yfrac; @@ -2664,7 +2750,7 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - uint32_t fg = colormap[texdata]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata]; uint32_t bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; @@ -2674,6 +2760,7 @@ namespace swrenderer dest++; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } else @@ -2689,7 +2776,7 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - uint32_t fg = colormap[texdata]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata]; uint32_t bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; @@ -2699,6 +2786,7 @@ namespace swrenderer dest++; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } } @@ -2715,7 +2803,7 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - uint32_t fg = colormap[texdata]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata]; uint32_t bg = *dest; int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); @@ -2725,6 +2813,7 @@ namespace swrenderer dest++; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } else @@ -2740,7 +2829,7 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - uint32_t fg = colormap[texdata]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata]; uint32_t bg = *dest; int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); @@ -2750,6 +2839,7 @@ namespace swrenderer dest++; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } } @@ -2773,6 +2863,11 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; + const TriLight *dynlights = _dynlights; + int num_dynlights = _num_dynlights; + float viewpos_x = _viewpos_x; + float step_viewpos_x = _step_viewpos_x; + xfrac = _xfrac; yfrac = _yfrac; @@ -2791,7 +2886,8 @@ namespace swrenderer do { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]]; + uint32_t a = fg2rgb[fg] + bg2rgb[*dest]; uint32_t b = a; a |= 0x01f07c1f; @@ -2802,6 +2898,7 @@ namespace swrenderer *dest++ = RGB32k.All[a & (a >> 15)]; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } else @@ -2812,7 +2909,8 @@ namespace swrenderer do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]]; + uint32_t a = fg2rgb[fg] + bg2rgb[*dest]; uint32_t b = a; a |= 0x01f07c1f; @@ -2823,6 +2921,7 @@ namespace swrenderer *dest++ = RGB32k.All[a & (a >> 15)]; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } } @@ -2834,7 +2933,7 @@ namespace swrenderer do { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = colormap[source[spot]]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]]; uint32_t bg = *dest; int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); @@ -2843,6 +2942,7 @@ namespace swrenderer xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } else @@ -2853,7 +2953,7 @@ namespace swrenderer do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = colormap[source[spot]]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]]; uint32_t bg = *dest; int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); @@ -2862,6 +2962,7 @@ namespace swrenderer xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } } @@ -2885,6 +2986,11 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; + const TriLight *dynlights = _dynlights; + int num_dynlights = _num_dynlights; + float viewpos_x = _viewpos_x; + float step_viewpos_x = _step_viewpos_x; + xfrac = _xfrac; yfrac = _yfrac; @@ -2908,7 +3014,8 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata]; + uint32_t a = fg2rgb[fg] + bg2rgb[*dest]; uint32_t b = a; a |= 0x01f07c1f; @@ -2921,6 +3028,7 @@ namespace swrenderer dest++; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } else @@ -2936,7 +3044,8 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata]; + uint32_t a = fg2rgb[fg] + bg2rgb[*dest]; uint32_t b = a; a |= 0x01f07c1f; @@ -2949,6 +3058,7 @@ namespace swrenderer dest++; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } } @@ -2965,7 +3075,7 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - uint32_t fg = colormap[texdata]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata]; uint32_t bg = *dest; int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); @@ -2975,6 +3085,7 @@ namespace swrenderer dest++; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } else @@ -2990,7 +3101,7 @@ namespace swrenderer texdata = source[spot]; if (texdata != 0) { - uint32_t fg = colormap[texdata]; + uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata]; uint32_t bg = *dest; int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); @@ -3000,6 +3111,7 @@ namespace swrenderer dest++; xfrac += xstep; yfrac += ystep; + viewpos_x += step_viewpos_x; } while (--count); } } diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index 5766d38f4e..5b901152c1 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -153,6 +153,8 @@ namespace swrenderer FString DebugInfo() override { return "PalSpanCommand"; } protected: + inline static uint8_t AddLights(const TriLight *lights, int num_lights, float viewpos_x, uint8_t fg, uint8_t material); + const uint8_t *_source; const uint8_t *_colormap; dsfixed_t _xfrac; @@ -170,6 +172,10 @@ namespace swrenderer int _color; fixed_t _srcalpha; fixed_t _destalpha; + TriLight *_dynlights; + int _num_dynlights; + float _viewpos_x; + float _step_viewpos_x; }; class DrawSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index ddde435b2f..fce565922b 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -256,63 +256,60 @@ void R_MapPlane (int y, int x1) R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); } - if (r_swtruecolor) + if (r_dynlights) { - if (r_dynlights) + // Find row position in view space + float zspan = planeheight / (fabs(y + 0.5 - CenterY) / InvZtoScale); + dc_viewpos.X = (float)((x1 + 0.5 - CenterX) / CenterX * zspan); + dc_viewpos.Y = zspan; + dc_viewpos.Z = (float)((CenterY - y - 0.5) / InvZtoScale * zspan); + dc_viewpos_step.X = (float)(zspan / CenterX); + + static TriLight lightbuffer[64 * 1024]; + static int nextlightindex = 0; + + // Setup lights for column + dc_num_lights = 0; + dc_lights = lightbuffer + nextlightindex; + visplane_light *cur_node = ds_light_list; + while (cur_node && nextlightindex < 64 * 1024) { - // Find row position in view space - float zspan = planeheight / (fabs(y + 0.5 - CenterY) / InvZtoScale); - dc_viewpos.X = (float)((x1 + 0.5 - CenterX) / CenterX * zspan); - dc_viewpos.Y = zspan; - dc_viewpos.Z = (float)((CenterY - y - 0.5) / InvZtoScale * zspan); - dc_viewpos_step.X = (float)(zspan / CenterX); + double lightX = cur_node->lightsource->X() - ViewPos.X; + double lightY = cur_node->lightsource->Y() - ViewPos.Y; + double lightZ = cur_node->lightsource->Z() - ViewPos.Z; - static TriLight lightbuffer[64 * 1024]; - static int nextlightindex = 0; + float lx = (float)(lightX * ViewSin - lightY * ViewCos); + float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; + float lz = (float)lightZ - dc_viewpos.Z; - // Setup lights for column - dc_num_lights = 0; - dc_lights = lightbuffer + nextlightindex; - visplane_light *cur_node = ds_light_list; - while (cur_node && nextlightindex < 64 * 1024) + // Precalculate the constant part of the dot here so the drawer doesn't have to. + float lconstant = ly * ly + lz * lz; + + // Include light only if it touches this row + float radius = cur_node->lightsource->GetRadius(); + if (radius * radius >= lconstant) { - double lightX = cur_node->lightsource->X() - ViewPos.X; - double lightY = cur_node->lightsource->Y() - ViewPos.Y; - double lightZ = cur_node->lightsource->Z() - ViewPos.Z; + uint32_t red = cur_node->lightsource->GetRed(); + uint32_t green = cur_node->lightsource->GetGreen(); + uint32_t blue = cur_node->lightsource->GetBlue(); - float lx = (float)(lightX * ViewSin - lightY * ViewCos); - float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; - float lz = (float)lightZ - dc_viewpos.Z; - - // Precalculate the constant part of the dot here so the drawer doesn't have to. - float lconstant = ly * ly + lz * lz; - - // Include light only if it touches this row - float radius = cur_node->lightsource->GetRadius(); - if (radius * radius >= lconstant) - { - uint32_t red = cur_node->lightsource->GetRed(); - uint32_t green = cur_node->lightsource->GetGreen(); - uint32_t blue = cur_node->lightsource->GetBlue(); - - nextlightindex++; - auto &light = dc_lights[dc_num_lights++]; - light.x = lx; - light.y = lconstant; - light.radius = 256.0f / radius; - light.color = (red << 16) | (green << 8) | blue; - } - - cur_node = cur_node->next; + nextlightindex++; + auto &light = dc_lights[dc_num_lights++]; + light.x = lx; + light.y = lconstant; + light.radius = 256.0f / radius; + light.color = (red << 16) | (green << 8) | blue; } - if (nextlightindex == 64 * 1024) - nextlightindex = 0; - } - else - { - dc_num_lights = 0; + cur_node = cur_node->next; } + + if (nextlightindex == 64 * 1024) + nextlightindex = 0; + } + else + { + dc_num_lights = 0; } ds_y = y; From 6f86c11058ba2980aa85a4756d1bd6425ad8c81b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 25 Dec 2016 07:56:09 +0100 Subject: [PATCH 559/912] DrawWallAdd1Pal cannot do dynamic lights --- src/r_draw.cpp | 4 +++- src/r_draw_pal.cpp | 12 ++---------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index f629c027da..ae039d6428 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -669,8 +669,10 @@ namespace swrenderer { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); - else + else if (drawerargs::dc_num_lights == 0) DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); } void R_DrawWallAddCol4() diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 6302553402..a6b6a79463 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -387,10 +387,6 @@ namespace swrenderer uint8_t *dest = _dest; int bits = _fracbits; int pitch = _pitch; - TriLight *dynlights = _dynlights; - int num_dynlights = _num_dynlights; - float viewpos_z = _viewpos_z; - float step_viewpos_z = _step_viewpos_z; uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; @@ -403,8 +399,6 @@ namespace swrenderer frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; pitch *= thread->num_cores; - viewpos_z += step_viewpos_z * thread->skipped_by_thread(_dest_y); - step_viewpos_z *= thread->num_cores; if (!r_blendmethod) { @@ -413,14 +407,13 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix]; + uint8_t lit = colormap[pix]; uint32_t fg = fg2rgb[lit]; uint32_t bg = bg2rgb[*dest]; fg = (fg + bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg >> 15)]; } - viewpos_z += step_viewpos_z; frac += fracstep; dest += pitch; } while (--count); @@ -433,14 +426,13 @@ namespace swrenderer uint8_t pix = source[frac >> bits]; if (pix != 0) { - uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix]; + uint8_t lit = colormap[pix]; uint32_t r = MIN(GPalette.BaseColors[lit].r + GPalette.BaseColors[*dest].r, 255); uint32_t g = MIN(GPalette.BaseColors[lit].g + GPalette.BaseColors[*dest].g, 255); uint32_t b = MIN(GPalette.BaseColors[lit].b + GPalette.BaseColors[*dest].b, 255); *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; } - viewpos_z += step_viewpos_z; frac += fracstep; dest += pitch; } while (--count); From 08fd81802d518e0fbb1eb537ceefcea5d0661465 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 25 Dec 2016 08:01:21 +0100 Subject: [PATCH 560/912] Remove useRt --- src/r_segs.cpp | 8 ++++---- src/r_things.cpp | 14 +++++++------- src/r_things.h | 4 ++-- src/v_draw.cpp | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index a2d02c3f9e..eff303acb6 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -153,7 +153,7 @@ inline bool IsFogBoundary (sector_t *front, sector_t *back) float *MaskedSWall; float MaskedScaleY; -static void BlastMaskedColumn (FTexture *tex, bool useRt) +static void BlastMaskedColumn (FTexture *tex) { // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) @@ -176,7 +176,7 @@ static void BlastMaskedColumn (FTexture *tex, bool useRt) // when forming multipatched textures (see r_data.c). // draw the texture - R_DrawMaskedColumn(tex, maskedtexturecol[dc_x], useRt); + R_DrawMaskedColumn(tex, maskedtexturecol[dc_x]); rw_light += rw_lightstep; spryscale += rw_scalestep; } @@ -415,7 +415,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) { for (dc_x = x1; dc_x < x2; ++dc_x) { - BlastMaskedColumn (tex, false); + BlastMaskedColumn (tex); } } } @@ -2309,7 +2309,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { // calculate lighting R_SetColorMapLight(usecolormap, rw_light, wallshade); } - R_WallSpriteColumn (false); + R_WallSpriteColumn (); dc_x++; } } diff --git a/src/r_things.cpp b/src/r_things.cpp index 7b945a44cb..0e1fbef56d 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -258,7 +258,7 @@ double sprtopscreen; bool sprflipvert; -void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool useRt, bool unmasked) +void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool unmasked) { fixed_t saved_iscale = dc_iscale; // Save this because we need to modify it for mipmaps @@ -374,12 +374,12 @@ void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool useRt, bool unmaske dc_iscale = saved_iscale; } -void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked) +void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool unmasked) { // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. - if (r_swtruecolor && !drawer_needs_pal_input && !useRt) // To do: add support to R_DrawColumnHoriz_rgba + if (r_swtruecolor && !drawer_needs_pal_input) // To do: add support to R_DrawColumnHoriz_rgba { - R_DrawMaskedColumnBgra(tex, col, useRt, unmasked); + R_DrawMaskedColumnBgra(tex, col, unmasked); return; } @@ -669,14 +669,14 @@ void R_DrawWallSprite(vissprite_t *spr) R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(false); + R_WallSpriteColumn(); dc_x++; } } R_FinishSetPatchStyle(); } -void R_WallSpriteColumn (bool useRt) +void R_WallSpriteColumn () { float iscale = swall[dc_x] * MaskedScaleY; dc_iscale = FLOAT2FIXED(iscale); @@ -687,7 +687,7 @@ void R_WallSpriteColumn (bool useRt) sprtopscreen = CenterY - dc_texturemid * spryscale; dc_texturefrac = 0; - R_DrawMaskedColumn(WallSpriteTile, lwall[dc_x], useRt); + R_DrawMaskedColumn(WallSpriteTile, lwall[dc_x]); rw_light += rw_lightstep; } diff --git a/src/r_things.h b/src/r_things.h index 657be32c53..ac8345c708 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -129,8 +129,8 @@ extern double pspriteyscale; extern FTexture *WallSpriteTile; -void R_DrawMaskedColumn (FTexture *texture, fixed_t column, bool useRt, bool unmasked = false); -void R_WallSpriteColumn (bool useRt); +void R_DrawMaskedColumn (FTexture *texture, fixed_t column, bool unmasked = false); +void R_WallSpriteColumn (); void R_CacheSprite (spritedef_t *sprite); void R_SortVisSprites (int (*compare)(const void *, const void *), size_t first); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index df5dd5cf0e..1b23d55668 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -281,7 +281,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) while (dc_x < x2_i) { - R_DrawMaskedColumn(img, frac, false, !parms.masked); + R_DrawMaskedColumn(img, frac, !parms.masked); dc_x++; frac += xiscale_i; } From 5ec989d563b3baed133d1a84580d82be8dcab965 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 25 Dec 2016 08:08:01 +0100 Subject: [PATCH 561/912] Make particles always square --- src/r_things.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_things.cpp b/src/r_things.cpp index 0e1fbef56d..4e63372e26 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2567,7 +2567,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, if (x1 >= x2) return; - yscale = YaspectMul * xscale; + yscale = xscale; // YaspectMul is not needed for particles as they should always be square ty = particle->Pos.Z - ViewPos.Z; y1 = xs_RoundToInt(CenterY - (ty + psize) * yscale); y2 = xs_RoundToInt(CenterY - (ty - psize) * yscale); From 66b9e2208d827908fced4cc4810464f48d2a6a34 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 25 Dec 2016 11:07:50 +0100 Subject: [PATCH 562/912] Draw a rect where the voxels would be --- src/CMakeLists.txt | 1 + src/r_things.cpp | 9 ++-- src/r_things.h | 5 -- src/r_voxel.cpp | 124 +++++++++++++++++++++++++++++++++++++++++++++ src/r_voxel.h | 31 ++++++++++++ 5 files changed, 161 insertions(+), 9 deletions(-) create mode 100644 src/r_voxel.cpp create mode 100644 src/r_voxel.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ede6c9a521..2fb046924b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -826,6 +826,7 @@ set( FASTMATH_PCH_SOURCES r_segs.cpp r_sky.cpp r_things.cpp + r_voxel.cpp r_walldraw.cpp s_advsound.cpp s_environment.cpp diff --git a/src/r_things.cpp b/src/r_things.cpp index 4e63372e26..472ecc8ea7 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -65,6 +65,7 @@ #include "r_data/voxels.h" #include "p_local.h" #include "p_maputl.h" +#include "r_voxel.h" EXTERN_CVAR(Bool, st_scale) EXTERN_CVAR(Bool, r_shadercolormaps) @@ -691,9 +692,9 @@ void R_WallSpriteColumn () rw_light += rw_lightstep; } +#if 0 void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop, short *clipbot) { -#if 0 int flags = 0; // Do setup for blending. @@ -761,8 +762,8 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop R_FinishSetPatchStyle(); NetUpdate(); -#endif } +#endif // // R_ProjectSprite @@ -2842,12 +2843,12 @@ inline int sgn(int v) return v < 0 ? -1 : v > 0 ? 1 : 0; } +#if 0 void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, const FVector3 &dasprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) { -#if 0 int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff; fixed_t cosang, sinang, sprcosang, sprsinang; int backx, backy, gxinc, gyinc; @@ -3175,8 +3176,8 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, } } } -#endif } +#endif //========================================================================== // diff --git a/src/r_things.h b/src/r_things.h index ac8345c708..2fa8786a90 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -144,11 +144,6 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly); enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; -void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle, - const FVector3 &sprpos, DAngle dasprang, - fixed_t daxscale, fixed_t dayscale, struct FVoxel *voxobj, - FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags); - void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); } diff --git a/src/r_voxel.cpp b/src/r_voxel.cpp new file mode 100644 index 0000000000..115ff610c2 --- /dev/null +++ b/src/r_voxel.cpp @@ -0,0 +1,124 @@ +/* +** Voxel rendering +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "sbar.h" +#include "r_data/r_translate.h" +#include "r_data/colormaps.h" +#include "r_data/voxels.h" +#include "r_data/sprites.h" +#include "d_net.h" +#include "po_man.h" +#include "r_things.h" +#include "r_draw.h" +#include "r_thread.h" +#include "r_utility.h" +#include "r_main.h" +#include "r_voxel.h" + +namespace swrenderer +{ + void R_DrawVisVoxel(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom) + { + R_SetColorMapLight(sprite->Style.BaseColormap, 0, sprite->Style.ColormapNum << FRACBITS); + bool visible = R_SetPatchStyle(sprite->Style.RenderStyle, sprite->Style.Alpha, sprite->Translation, sprite->FillColor); + if (!visible) + return; + + FVector3 view_origin = sprite->pa.vpos; + FAngle view_angle = sprite->pa.vang; + DVector3 sprite_origin = { sprite->gpos.X, sprite->gpos.Y, sprite->gpos.Z }; + DAngle sprite_angle = sprite->Angle; + double sprite_xscale = FIXED2DBL(sprite->xscale); + double sprite_yscale = sprite->yscale; + FVoxel *voxel = sprite->voxel; + + int miplevel = 0;//voxel->NumMips; + const FVoxelMipLevel &mip = voxel->Mips[miplevel]; + if (mip.SlabData == nullptr) + return; + + /* + double spriteSin = sprite_angle.Sin(); + double spriteCos = sprite_angle.Cos(); + double pivotX = (mip.Pivot.X * spriteSin - mip.Pivot.Y * spriteCos); + double pivotY = (mip.Pivot.X * spriteCos + mip.Pivot.Y * spriteSin); + double pivotZ = -mip.Pivot.Z; + */ + + R_FillBox(sprite_origin, 5.0 * sprite_xscale, 5.0 * sprite_yscale, 16, cliptop, clipbottom, false, false); + } + + void R_FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) + { + double viewX, viewY, viewZ; + if (viewspace) + { + viewX = origin.X; + viewY = origin.Y; + viewZ = origin.Z; + } + else // world space + { + double translatedX = origin.X - ViewPos.X; + double translatedY = origin.Y - ViewPos.Y; + double translatedZ = origin.Z - ViewPos.Z; + viewX = translatedX * ViewSin - translatedY * ViewCos; + viewY = translatedZ; + viewZ = translatedX * ViewTanCos + translatedY * ViewTanSin; + } + + if (viewZ < 0.01f) + return; + + double screenX = CenterX + viewX / viewZ * CenterX; + double screenY = CenterY - viewY / viewZ * InvZtoScale; + double screenExtentX = extentX / viewZ * CenterX; + double screenExtentY = pixelstretch ? screenExtentX * YaspectMul : screenExtentX; + + int x1 = MAX((int)(screenX - screenExtentX), 0); + int x2 = MIN((int)(screenX + screenExtentX + 0.5f), viewwidth - 1); + int y1 = MAX((int)(screenY - screenExtentY), 0); + int y2 = MIN((int)(screenY + screenExtentY + 0.5f), viewheight - 1); + + int pixelsize = r_swtruecolor ? 4 : 1; + + if (y1 < y2) + { + for (int x = x1; x < x2; x++) + { + int columnY1 = MAX(y1, (int)cliptop[x]); + int columnY2 = MIN(y2, (int)clipbottom[x]); + if (columnY1 < columnY2) + { + using namespace drawerargs; + dc_dest = dc_destorg + (dc_pitch * columnY1 + x) * pixelsize; + dc_color = color; + dc_count = columnY2 - columnY1; + R_FillColumn(); + } + } + } + } +} diff --git a/src/r_voxel.h b/src/r_voxel.h new file mode 100644 index 0000000000..159e26f6eb --- /dev/null +++ b/src/r_voxel.h @@ -0,0 +1,31 @@ +/* +** Voxel rendering +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +namespace swrenderer +{ + struct vissprite_t; + + void R_DrawVisVoxel(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom); + void R_FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); +} From 2901e2d8342d8e76a6af641298636536735409a1 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sun, 25 Dec 2016 17:43:13 +0200 Subject: [PATCH 563/912] Added missing Intel intrinsics #include --- src/r_draw_pal.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index a6b6a79463..f700be7e27 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -33,6 +33,7 @@ ** */ +#include #include "templates.h" #include "doomtype.h" #include "doomdef.h" From 100b80143a109f0422bab7912d79d9fa15f74c2b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 26 Dec 2016 05:09:01 +0100 Subject: [PATCH 564/912] Add basic voxel drawing showing how to use the input data structures --- src/r_voxel.cpp | 60 ++++++++++++++++++++++++++++++++++++++++++++----- src/r_voxel.h | 5 +++++ 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/src/r_voxel.cpp b/src/r_voxel.cpp index 115ff610c2..dad4fed160 100644 --- a/src/r_voxel.cpp +++ b/src/r_voxel.cpp @@ -54,20 +54,68 @@ namespace swrenderer double sprite_yscale = sprite->yscale; FVoxel *voxel = sprite->voxel; + // To do: calculate the mipmap level based on distance, sprite scale and voxel extents int miplevel = 0;//voxel->NumMips; + const FVoxelMipLevel &mip = voxel->Mips[miplevel]; if (mip.SlabData == nullptr) return; - /* double spriteSin = sprite_angle.Sin(); double spriteCos = sprite_angle.Cos(); - double pivotX = (mip.Pivot.X * spriteSin - mip.Pivot.Y * spriteCos); - double pivotY = (mip.Pivot.X * spriteCos + mip.Pivot.Y * spriteSin); - double pivotZ = -mip.Pivot.Z; - */ + + DVector2 dirX(spriteSin * sprite_xscale, -spriteCos * sprite_xscale); + DVector2 dirY(spriteCos * sprite_xscale, spriteSin * sprite_xscale); + float dirZ = -sprite_yscale; + + DVector3 voxel_origin = sprite_origin; + voxel_origin.X -= dirX.X * mip.Pivot.X + dirX.Y * mip.Pivot.Y; + voxel_origin.Y -= dirY.X * mip.Pivot.X + dirY.Y * mip.Pivot.Y; + voxel_origin.Z -= dirZ * mip.Pivot.Z; + + // To do: do this loop sorted back to front: + + for (int x = 0; x < mip.SizeX; x++) + { + for (int y = 0; y < mip.SizeY; y++) + { + kvxslab_t *slab_start = R_GetSlabStart(mip, x, y); + kvxslab_t *slab_end = R_GetSlabEnd(mip, x, y); + + for (kvxslab_t *slab = slab_start; slab != slab_end; slab = R_NextSlab(slab)) + { + // To do: check slab->backfacecull + + for (int i = 0; i < slab->zleng; i++) + { + int z = slab->ztop + i; + uint8_t color = slab->col[i]; + + DVector3 voxel_pos = voxel_origin; + voxel_pos.X += dirX.X * x + dirX.Y * y; + voxel_pos.Y += dirY.X * x + dirY.Y * y; + voxel_pos.Z += dirZ * z; + + R_FillBox(voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); + } + } + } + } + } + + kvxslab_t *R_GetSlabStart(const FVoxelMipLevel &mip, int x, int y) + { + return (kvxslab_t *)&mip.SlabData[mip.OffsetX[x] + (int)mip.OffsetXY[x * (mip.SizeY + 1) + y]]; + } - R_FillBox(sprite_origin, 5.0 * sprite_xscale, 5.0 * sprite_yscale, 16, cliptop, clipbottom, false, false); + kvxslab_t *R_GetSlabEnd(const FVoxelMipLevel &mip, int x, int y) + { + return R_GetSlabStart(mip, x, y + 1); + } + + kvxslab_t *R_NextSlab(kvxslab_t *slab) + { + return (kvxslab_t*)(((uint8_t*)slab) + 3 + slab->zleng); } void R_FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) diff --git a/src/r_voxel.h b/src/r_voxel.h index 159e26f6eb..4c1e3f0af7 100644 --- a/src/r_voxel.h +++ b/src/r_voxel.h @@ -22,10 +22,15 @@ #pragma once +struct kvxslab_t; + namespace swrenderer { struct vissprite_t; void R_DrawVisVoxel(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom); void R_FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); + kvxslab_t *R_GetSlabStart(const FVoxelMipLevel &mip, int x, int y); + kvxslab_t *R_GetSlabEnd(const FVoxelMipLevel &mip, int x, int y); + kvxslab_t *R_NextSlab(kvxslab_t *slab); } From 5d6d75385e7170f4f1a3216be913a04d2328e82d Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 26 Dec 2016 07:41:52 -0500 Subject: [PATCH 565/912] - pulled some things out of the loop for the fill drawer --- src/r_draw_pal.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index f700be7e27..15846977de 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -1495,11 +1495,11 @@ namespace swrenderer } else { + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; do { - int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; - int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; - int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); @@ -1551,11 +1551,11 @@ namespace swrenderer } else { + int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; + int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; + int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; do { - int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; - int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; - int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); From 80228562b346934b201e54e916637182be361bbc Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 26 Dec 2016 08:05:04 -0500 Subject: [PATCH 566/912] - Fixed problems with FillSub and FillRevSub drawers. --- src/r_draw_pal.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 15846977de..90dcc9c3b3 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -1610,9 +1610,9 @@ namespace swrenderer int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; int bg = *dest; - int r = MAX((src_r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((src_g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((src_b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((-src_r + palette[bg].r * _destalpha)>>18, 0); + int g = MAX((-src_g + palette[bg].g * _destalpha)>>18, 0); + int b = MAX((-src_b + palette[bg].b * _destalpha)>>18, 0); *dest = RGB256k.RGB[r][g][b]; dest += pitch; @@ -1667,9 +1667,9 @@ namespace swrenderer int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; int bg = *dest; - int r = MAX((src_r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((src_g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((src_b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((src_r - palette[bg].r * _destalpha)>>18, 0); + int g = MAX((src_g - palette[bg].g * _destalpha)>>18, 0); + int b = MAX((src_b - palette[bg].b * _destalpha)>>18, 0); *dest = RGB256k.RGB[r][g][b]; dest += pitch; From 470a96d3b21c488abba44b94df1fa14e845a86fd Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 25 Dec 2016 19:19:49 +0100 Subject: [PATCH 567/912] - fixed: non-damaging attacks should not cause infighting, unless some relevant pain flags are being set. --- src/c_console.cpp | 2 +- src/p_interaction.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/c_console.cpp b/src/c_console.cpp index f3b91efde4..0e9b949966 100644 --- a/src/c_console.cpp +++ b/src/c_console.cpp @@ -1577,7 +1577,7 @@ static bool C_HandleKey (event_t *ev, FCommandBuffer &buffer) static TArray command; const size_t length = buffer.Text.Len(); - command.Resize(length + 1); + command.Resize(unsigned(length + 1)); memcpy(&command[0], buffer.Text.GetChars(), length); command[length] = '\0'; diff --git a/src/p_interaction.cpp b/src/p_interaction.cpp index 9aa178e64d..102142d38f 100644 --- a/src/p_interaction.cpp +++ b/src/p_interaction.cpp @@ -1545,7 +1545,7 @@ dopain: target->SetState (target->SeeState); } } - else if (source != target->target && target->OkayToSwitchTarget (source)) + else if ((damage > 0 || fakedPain) && source != target->target && target->OkayToSwitchTarget (source)) { // Target actor is not intent on another actor, // so make him chase after source From 3fe3abc51e100e804410280be64bf5fe9c132d94 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Mon, 26 Dec 2016 15:59:44 +0200 Subject: [PATCH 568/912] Fixed endianness issue in script VM See https://forum.zdoom.org/viewtopic.php?t=54549 --- src/scripting/vm/vm.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/scripting/vm/vm.h b/src/scripting/vm/vm.h index 1869c410b3..fdf5f3c9fe 100644 --- a/src/scripting/vm/vm.h +++ b/src/scripting/vm/vm.h @@ -21,27 +21,35 @@ typedef VM_UBYTE VM_ATAG; #define VM_EPSILON (1/65536.0) +#ifdef __BIG_ENDIAN__ +#define VM_DEFINE_OP2(TYPE, ARG1, ARG2) TYPE ARG2, ARG1 +#define VM_DEFINE_OP4(TYPE, ARG1, ARG2, ARG3, ARG4) TYPE ARG4, ARG3, ARG2, ARG1 +#else // little endian +#define VM_DEFINE_OP2(TYPE, ARG1, ARG2) TYPE ARG1, ARG2 +#define VM_DEFINE_OP4(TYPE, ARG1, ARG2, ARG3, ARG4) TYPE ARG1, ARG2, ARG3, ARG4 +#endif // __BIG_ENDIAN__ + union VMOP { struct { - VM_UBYTE op, a, b, c; + VM_DEFINE_OP4(VM_UBYTE, op, a, b, c); }; struct { - VM_SBYTE pad0, as, bs, cs; + VM_DEFINE_OP4(VM_SBYTE, pad0, as, bs, cs); }; struct { - VM_SWORD pad1:8, i24:24; + VM_DEFINE_OP2(VM_SWORD, pad1:8, i24:24); }; struct { - VM_SWORD pad2:16, i16:16; + VM_DEFINE_OP2(VM_SWORD, pad2:16, i16:16); }; struct { - VM_UHALF pad3, i16u; + VM_DEFINE_OP2(VM_UHALF, pad3, i16u); }; VM_UWORD word; @@ -56,6 +64,9 @@ union VMOP // sar eax,10h }; +#undef VM_DEFINE_OP4 +#undef VM_DEFINE_OP2 + enum { #include "vmops.h" From 66b154a475bd9bf54d62859eed2ca894d712bdc8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 03:07:50 +0100 Subject: [PATCH 569/912] Voxel mipmap selection --- src/r_voxel.cpp | 85 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 20 deletions(-) diff --git a/src/r_voxel.cpp b/src/r_voxel.cpp index dad4fed160..0f81124c75 100644 --- a/src/r_voxel.cpp +++ b/src/r_voxel.cpp @@ -46,21 +46,37 @@ namespace swrenderer if (!visible) return; - FVector3 view_origin = sprite->pa.vpos; + DVector3 view_origin = { sprite->pa.vpos.X, sprite->pa.vpos.Y, sprite->pa.vpos.Z }; FAngle view_angle = sprite->pa.vang; DVector3 sprite_origin = { sprite->gpos.X, sprite->gpos.Y, sprite->gpos.Z }; DAngle sprite_angle = sprite->Angle; double sprite_xscale = FIXED2DBL(sprite->xscale); double sprite_yscale = sprite->yscale; FVoxel *voxel = sprite->voxel; - - // To do: calculate the mipmap level based on distance, sprite scale and voxel extents - int miplevel = 0;//voxel->NumMips; + + // Select mipmap level: + + double viewSin = view_angle.Cos(); + double viewCos = view_angle.Sin(); + double logmip = fabs((view_origin.X - sprite_origin.X) * viewCos - (view_origin.Y - sprite_origin.Y) * viewSin); + int miplevel = 0; + while (miplevel < voxel->NumMips - 1 && logmip >= FocalLengthX) + { + logmip *= 0.5; + miplevel++; + } const FVoxelMipLevel &mip = voxel->Mips[miplevel]; if (mip.SlabData == nullptr) return; + minZ >>= miplevel; + maxZ >>= miplevel; + sprite_xscale *= (1 << miplevel); + sprite_yscale *= (1 << miplevel); + + // Find voxel cube eigenvectors and origin in world space: + double spriteSin = sprite_angle.Sin(); double spriteCos = sprite_angle.Cos(); @@ -73,30 +89,59 @@ namespace swrenderer voxel_origin.Y -= dirY.X * mip.Pivot.X + dirY.Y * mip.Pivot.Y; voxel_origin.Z -= dirZ * mip.Pivot.Z; - // To do: do this loop sorted back to front: + // Voxel cube walking directions: - for (int x = 0; x < mip.SizeX; x++) + int startX[4] = { 0, mip.SizeX - 1, 0, mip.SizeX - 1 }; + int startY[4] = { 0, 0, mip.SizeY - 1, mip.SizeY - 1 }; + int stepX[4] = { 1, -1, 1, -1 }; + int stepY[4] = { 1, 1, -1, -1 }; + + // The point in cube mipmap local space where voxel sides change from front to backfacing: + + double dx = (view_origin.X - sprite_origin.X) / sprite_xscale; + double dy = (view_origin.Y - sprite_origin.Y) / sprite_xscale; + int backX = (int)(dx * spriteCos - dy * spriteSin + mip.Pivot.X); + int backY = (int)(dy * spriteCos + dx * spriteSin + mip.Pivot.Y); + int endX = clamp(backX, 0, mip.SizeX - 1); + int endY = clamp(backY, 0, mip.SizeY - 1); + + // Draw the voxel cube: + + for (int index = 0; index < 4; index++) { - for (int y = 0; y < mip.SizeY; y++) + if ((stepX[index] < 0 && endX >= startX[index]) || + (stepX[index] > 0 && endX <= startX[index]) || + (stepY[index] < 0 && endY >= startY[index]) || + (stepY[index] > 0 && endY <= startY[index])) continue; + + for (int x = startX[index]; x != endX; x += stepX[index]) { - kvxslab_t *slab_start = R_GetSlabStart(mip, x, y); - kvxslab_t *slab_end = R_GetSlabEnd(mip, x, y); - - for (kvxslab_t *slab = slab_start; slab != slab_end; slab = R_NextSlab(slab)) + for (int y = startY[index]; y != endY; y += stepY[index]) { - // To do: check slab->backfacecull + kvxslab_t *slab_start = R_GetSlabStart(mip, x, y); + kvxslab_t *slab_end = R_GetSlabEnd(mip, x, y); - for (int i = 0; i < slab->zleng; i++) + for (kvxslab_t *slab = slab_start; slab != slab_end; slab = R_NextSlab(slab)) { - int z = slab->ztop + i; - uint8_t color = slab->col[i]; + // To do: check slab->backfacecull - DVector3 voxel_pos = voxel_origin; - voxel_pos.X += dirX.X * x + dirX.Y * y; - voxel_pos.Y += dirY.X * x + dirY.Y * y; - voxel_pos.Z += dirZ * z; + int ztop = slab->ztop; + int zbottom = ztop + slab->zleng; - R_FillBox(voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); + //ztop = MAX(ztop, minZ); + //zbottom = MIN(zbottom, maxZ); + + for (int z = ztop; z < zbottom; z++) + { + uint8_t color = slab->col[z - slab->ztop]; + + DVector3 voxel_pos = voxel_origin; + voxel_pos.X += dirX.X * x + dirX.Y * y; + voxel_pos.Y += dirY.X * x + dirY.Y * y; + voxel_pos.Z += dirZ * z; + + R_FillBox(voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); + } } } } From bafc985282a4d074034c45c3827884f3719fc798 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 03:31:34 +0100 Subject: [PATCH 570/912] Fix pal particle performance issue --- src/r_draw_pal.cpp | 81 +++++++++++++++++++++++++++++ src/r_draw_pal.h | 16 ++++++ src/r_things.cpp | 125 ++++++++------------------------------------- src/r_things.h | 1 - 4 files changed, 119 insertions(+), 104 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 90dcc9c3b3..fbe372fb8b 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -3445,4 +3445,85 @@ namespace swrenderer dest[x] = colormap[dest[x]]; } while (++x <= x2); } + + ///////////////////////////////////////////////////////////////////////////// + + namespace + { + static uint32_t particle_texture[16 * 16] = + { + 1 * 1, 2 * 1, 3 * 1, 4 * 1, 5 * 1, 6 * 1, 7 * 1, 8 * 1, 8 * 1, 7 * 1, 6 * 1, 5 * 1, 4 * 1, 3 * 1, 2 * 1, 1 * 1, + 1 * 2, 2 * 2, 3 * 2, 4 * 2, 5 * 2, 6 * 2, 7 * 2, 8 * 2, 8 * 2, 7 * 2, 6 * 2, 5 * 2, 4 * 2, 3 * 2, 2 * 2, 1 * 2, + 1 * 3, 2 * 3, 3 * 3, 4 * 3, 5 * 3, 6 * 3, 7 * 3, 8 * 3, 8 * 3, 7 * 3, 6 * 3, 5 * 3, 4 * 3, 3 * 3, 2 * 3, 1 * 3, + 1 * 4, 2 * 4, 3 * 4, 4 * 4, 5 * 4, 6 * 4, 7 * 4, 8 * 4, 8 * 4, 7 * 4, 6 * 4, 5 * 4, 4 * 4, 3 * 4, 2 * 4, 1 * 4, + 1 * 5, 2 * 5, 3 * 5, 4 * 5, 5 * 5, 6 * 5, 7 * 5, 8 * 5, 8 * 5, 7 * 5, 6 * 5, 5 * 5, 4 * 5, 3 * 5, 2 * 5, 1 * 5, + 1 * 6, 2 * 6, 3 * 6, 4 * 6, 5 * 6, 6 * 6, 7 * 6, 8 * 6, 8 * 6, 7 * 6, 6 * 6, 5 * 6, 4 * 6, 3 * 6, 2 * 6, 1 * 6, + 1 * 7, 2 * 7, 3 * 7, 4 * 7, 5 * 7, 6 * 7, 7 * 7, 8 * 7, 8 * 7, 7 * 7, 6 * 7, 5 * 7, 4 * 7, 3 * 7, 2 * 7, 1 * 7, + 1 * 8, 2 * 8, 3 * 8, 4 * 8, 5 * 8, 6 * 8, 7 * 8, 8 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, 3 * 8, 2 * 8, 1 * 8, + 1 * 8, 2 * 8, 3 * 8, 4 * 8, 5 * 8, 6 * 8, 7 * 8, 8 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, 3 * 8, 2 * 8, 1 * 8, + 1 * 7, 2 * 7, 3 * 7, 4 * 7, 5 * 7, 6 * 7, 7 * 7, 8 * 7, 8 * 7, 7 * 7, 6 * 7, 5 * 7, 4 * 7, 3 * 7, 2 * 7, 1 * 7, + 1 * 6, 2 * 6, 3 * 6, 4 * 6, 5 * 6, 6 * 6, 7 * 6, 8 * 6, 8 * 6, 7 * 6, 6 * 6, 5 * 6, 4 * 6, 3 * 6, 2 * 6, 1 * 6, + 1 * 5, 2 * 5, 3 * 5, 4 * 5, 5 * 5, 6 * 5, 7 * 5, 8 * 5, 8 * 5, 7 * 5, 6 * 5, 5 * 5, 4 * 5, 3 * 5, 2 * 5, 1 * 5, + 1 * 4, 2 * 4, 3 * 4, 4 * 4, 5 * 4, 6 * 4, 7 * 4, 8 * 4, 8 * 4, 7 * 4, 6 * 4, 5 * 4, 4 * 4, 3 * 4, 2 * 4, 1 * 4, + 1 * 3, 2 * 3, 3 * 3, 4 * 3, 5 * 3, 6 * 3, 7 * 3, 8 * 3, 8 * 3, 7 * 3, 6 * 3, 5 * 3, 4 * 3, 3 * 3, 2 * 3, 1 * 3, + 1 * 2, 2 * 2, 3 * 2, 4 * 2, 5 * 2, 6 * 2, 7 * 2, 8 * 2, 8 * 2, 7 * 2, 6 * 2, 5 * 2, 4 * 2, 3 * 2, 2 * 2, 1 * 2, + 1 * 1, 2 * 1, 3 * 1, 4 * 1, 5 * 1, 6 * 1, 7 * 1, 8 * 1, 8 * 1, 7 * 1, 6 * 1, 5 * 1, 4 * 1, 3 * 1, 2 * 1, 1 * 1 + }; + } + + DrawParticleColumnPalCommand::DrawParticleColumnPalCommand(uint8_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx) + { + _dest = dest; + _pitch = pitch; + _count = count; + _fg = fg; + _alpha = alpha; + _fracposx = fracposx; + _dest_y = dest_y; + } + + void DrawParticleColumnPalCommand::Execute(DrawerThread *thread) + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint8_t *dest = thread->dest_for_thread(_dest_y, _pitch, _dest); + int pitch = _pitch * thread->num_cores; + + const uint32_t *source = &particle_texture[(_fracposx >> FRACBITS) * 16]; + uint32_t particle_alpha = _alpha; + + uint32_t fracstep = 16 * FRACUNIT / _count; + uint32_t fracpos = fracstep * thread->skipped_by_thread(_dest_y) + fracstep / 2; + fracstep *= thread->num_cores; + + uint32_t fg_red = (_fg >> 16) & 0xff; + uint32_t fg_green = (_fg >> 8) & 0xff; + uint32_t fg_blue = _fg & 0xff; + + for (int y = 0; y < count; y++) + { + uint32_t alpha = (source[fracpos >> FRACBITS] * particle_alpha) >> 6; + uint32_t inv_alpha = 256 - alpha; + + int bg = *dest; + uint32_t bg_red = GPalette.BaseColors[bg].r; + uint32_t bg_green = GPalette.BaseColors[bg].g; + uint32_t bg_blue = GPalette.BaseColors[bg].b; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 256; + + *dest = RGB256k.All[((red >> 2) << 12) | ((green >> 2) << 6) | (blue >> 2)]; + dest += pitch; + fracpos += fracstep; + } + } + + FString DrawParticleColumnPalCommand::DebugInfo() + { + return "DrawParticle"; + } } diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index 5b901152c1..79152665cc 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -260,4 +260,20 @@ namespace swrenderer const uint8_t *_colormap; uint8_t *_destorg; }; + + class DrawParticleColumnPalCommand : public DrawerCommand + { + public: + DrawParticleColumnPalCommand(uint8_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + + private: + uint8_t *_dest; + int _pitch; + int _count; + uint32_t _fg; + uint32_t _alpha; + uint32_t _fracposx; + }; } diff --git a/src/r_things.cpp b/src/r_things.cpp index 40fbf782aa..b7883a2424 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -59,6 +59,7 @@ #include "r_segs.h" #include "r_3dfloors.h" #include "r_draw_rgba.h" +#include "r_draw_pal.h" #include "v_palette.h" #include "r_data/r_translate.h" #include "r_data/colormaps.h" @@ -2712,105 +2713,9 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) } } -//inline int clamp(int x, int y, int z) { return ((x < y) ? x : (z < y) ? z : y); } - -void R_DrawParticle (vissprite_t *vis) -{ - if (r_swtruecolor) - return R_DrawParticle_rgba(vis); - - DWORD *bg2rgb; - int spacing; - BYTE *dest; - DWORD fg; - BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; - int yl = vis->y1; - int ycount = vis->y2 - yl + 1; - int x1 = vis->x1; - int countbase = vis->x2 - x1; - - R_DrawMaskedSegsBehindParticle (vis); - - DrawerCommandQueue::WaitForWorkers(); - - // vis->renderflags holds translucency level (0-255) - fixed_t fglevel, bglevel; - - { - DWORD *fg2rgb; - - fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - bglevel = FRACUNIT-fglevel; - fg2rgb = Col2RGB8[fglevel>>10]; - bg2rgb = Col2RGB8[bglevel>>10]; - fg = fg2rgb[color]; - } - - /* - - spacing = RenderTarget->GetPitch() - countbase; - dest = ylookup[yl] + x1 + dc_destorg; - - do - { - int count = countbase; - do - { - DWORD bg = bg2rgb[*dest]; - bg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[bg & (bg>>15)]; - } while (--count); - dest += spacing; - } while (--ycount);*/ - - // original was row-wise - // width = countbase - // height = ycount - - spacing = RenderTarget->GetPitch(); - - if (!r_blendmethod) - { - for (int x = x1; x < (x1+countbase); x++) - { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) - continue; - dest = ylookup[yl] + x + dc_destorg; - for (int y = 0; y < ycount; y++) - { - DWORD bg = bg2rgb[*dest]; - bg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg>>15)]; - dest += spacing; - } - } - } - else - { - for (int x = x1; x < (x1+countbase); x++) - { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) - continue; - dest = ylookup[yl] + x + dc_destorg; - for (int y = 0; y < ycount; y++) - { - uint32_t dest_r = MIN((GPalette.BaseColors[*dest].r * bglevel + GPalette.BaseColors[color].r * fglevel) >> 18, 63); - uint32_t dest_g = MIN((GPalette.BaseColors[*dest].g * bglevel + GPalette.BaseColors[color].g * fglevel) >> 18, 63); - uint32_t dest_b = MIN((GPalette.BaseColors[*dest].b * bglevel + GPalette.BaseColors[color].b * fglevel) >> 18, 63); - - *dest = RGB256k.RGB[dest_r][dest_g][dest_b]; - dest += spacing; - } - } - } -} - -void R_DrawParticle_rgba(vissprite_t *vis) +void R_DrawParticle(vissprite_t *vis) { int spacing; - uint32_t *dest; BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2833,13 +2738,27 @@ void R_DrawParticle_rgba(vissprite_t *vis) uint32_t fracstepx = 16 * FRACUNIT / countbase; uint32_t fracposx = fracstepx / 2; - for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) + if (r_swtruecolor) { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) - continue; - dest = ylookup[yl] + x + (uint32_t*)dc_destorg; - DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); + for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) + { + dc_x = x; + if (R_ClipSpriteColumnWithPortals(vis)) + continue; + uint32_t *dest = ylookup[yl] + x + (uint32_t*)dc_destorg; + DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); + } + } + else + { + for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) + { + dc_x = x; + if (R_ClipSpriteColumnWithPortals(vis)) + continue; + uint8_t *dest = ylookup[yl] + x + dc_destorg; + DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); + } } } diff --git a/src/r_things.h b/src/r_things.h index 2fa8786a90..7f00ce1775 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -101,7 +101,6 @@ struct vissprite_t }; void R_DrawParticle (vissprite_t *); -void R_DrawParticle_rgba (vissprite_t *); void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); From e27702481dec8512ecf5ac8c1ad759b40fa1e359 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 05:12:10 +0100 Subject: [PATCH 571/912] Fix warning --- src/r_voxel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_voxel.cpp b/src/r_voxel.cpp index 0f81124c75..42973ce9ea 100644 --- a/src/r_voxel.cpp +++ b/src/r_voxel.cpp @@ -82,7 +82,7 @@ namespace swrenderer DVector2 dirX(spriteSin * sprite_xscale, -spriteCos * sprite_xscale); DVector2 dirY(spriteCos * sprite_xscale, spriteSin * sprite_xscale); - float dirZ = -sprite_yscale; + double dirZ = -sprite_yscale; DVector3 voxel_origin = sprite_origin; voxel_origin.X -= dirX.X * mip.Pivot.X + dirX.Y * mip.Pivot.Y; From 2659090e1c10e0626b99ec4c1aafdf58bcbb7f75 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 06:31:55 +0100 Subject: [PATCH 572/912] Move renders into folders --- src/CMakeLists.txt | 72 +++++++++++-------- src/gl/system/gl_swframebuffer.cpp | 2 +- src/gl/system/gl_swwipe.cpp | 2 +- .../drawers/poly_triangle.cpp} | 6 +- .../drawers/poly_triangle.h} | 6 +- .../math/poly_intersection.cpp} | 2 +- .../math/poly_intersection.h} | 2 +- .../poly_renderer.cpp} | 2 +- .../poly_renderer.h} | 8 +-- .../scene/poly_cull.cpp} | 4 +- .../scene/poly_cull.h} | 4 +- .../scene/poly_decal.cpp} | 4 +- .../scene/poly_decal.h} | 2 +- .../scene/poly_particle.cpp} | 4 +- .../scene/poly_particle.h} | 2 +- .../scene/poly_plane.cpp} | 6 +- .../scene/poly_plane.h} | 2 +- .../scene/poly_playersprite.cpp} | 6 +- .../scene/poly_playersprite.h} | 0 .../scene/poly_portal.cpp} | 4 +- .../scene/poly_portal.h} | 2 +- .../scene/poly_scene.cpp} | 4 +- .../scene/poly_scene.h} | 20 +++--- .../scene/poly_sky.cpp} | 4 +- .../scene/poly_sky.h} | 2 +- .../scene/poly_sprite.cpp} | 6 +- .../scene/poly_sprite.h} | 2 +- .../scene/poly_wall.cpp} | 6 +- .../scene/poly_wall.h} | 2 +- .../scene/poly_wallsprite.cpp} | 4 +- .../scene/poly_wallsprite.h} | 2 +- src/portal.cpp | 2 - src/{ => swrenderer/drawers}/r_draw.cpp | 4 +- src/{ => swrenderer/drawers}/r_draw.h | 0 src/{ => swrenderer/drawers}/r_draw_pal.cpp | 4 +- src/{ => swrenderer/drawers}/r_draw_pal.h | 0 src/{ => swrenderer/drawers}/r_draw_rgba.cpp | 4 +- src/{ => swrenderer/drawers}/r_draw_rgba.h | 0 src/{ => swrenderer/drawers}/r_drawers.cpp | 0 src/{ => swrenderer/drawers}/r_drawers.h | 0 src/{ => swrenderer/drawers}/r_thread.cpp | 2 +- src/{ => swrenderer/drawers}/r_thread.h | 0 src/{ => swrenderer}/r_local.h | 4 +- src/{ => swrenderer}/r_main.cpp | 10 +-- src/{ => swrenderer}/r_main.h | 0 src/{ => swrenderer}/r_swrenderer.cpp | 10 +-- src/{ => swrenderer}/r_swrenderer.h | 0 src/{ => swrenderer/scene}/r_3dfloors.cpp | 2 +- src/{ => swrenderer/scene}/r_3dfloors.h | 0 src/{ => swrenderer/scene}/r_bsp.cpp | 6 +- src/{ => swrenderer/scene}/r_bsp.h | 0 src/{ => swrenderer/scene}/r_plane.cpp | 4 +- src/{ => swrenderer/scene}/r_plane.h | 0 src/{ => swrenderer/scene}/r_segs.cpp | 4 +- src/{ => swrenderer/scene}/r_segs.h | 0 src/{ => swrenderer/scene}/r_things.cpp | 6 +- src/{ => swrenderer/scene}/r_things.h | 0 src/{ => swrenderer/scene}/r_voxel.cpp | 6 +- src/{ => swrenderer/scene}/r_voxel.h | 0 src/{ => swrenderer/scene}/r_walldraw.cpp | 6 +- src/v_draw.cpp | 8 +-- src/win32/fb_d3d9.cpp | 2 +- src/win32/hardware.cpp | 2 +- src/win32/win32video.cpp | 2 +- tools/drawergen/precomp.h | 2 +- 65 files changed, 147 insertions(+), 137 deletions(-) rename src/{r_poly_triangle.cpp => polyrenderer/drawers/poly_triangle.cpp} (99%) rename src/{r_poly_triangle.h => polyrenderer/drawers/poly_triangle.h} (98%) rename src/{r_poly_intersection.cpp => polyrenderer/math/poly_intersection.cpp} (99%) rename src/{r_poly_intersection.h => polyrenderer/math/poly_intersection.h} (99%) rename src/{r_poly.cpp => polyrenderer/poly_renderer.cpp} (99%) rename src/{r_poly.h => polyrenderer/poly_renderer.h} (93%) rename src/{r_poly_cull.cpp => polyrenderer/scene/poly_cull.cpp} (99%) rename src/{r_poly_cull.h => polyrenderer/scene/poly_cull.h} (95%) rename src/{r_poly_decal.cpp => polyrenderer/scene/poly_decal.cpp} (98%) rename src/{r_poly_decal.h => polyrenderer/scene/poly_decal.h} (96%) rename src/{r_poly_particle.cpp => polyrenderer/scene/poly_particle.cpp} (98%) rename src/{r_poly_particle.h => polyrenderer/scene/poly_particle.h} (96%) rename src/{r_poly_plane.cpp => polyrenderer/scene/poly_plane.cpp} (99%) rename src/{r_poly_plane.h => polyrenderer/scene/poly_plane.h} (97%) rename src/{r_poly_playersprite.cpp => polyrenderer/scene/poly_playersprite.cpp} (99%) rename src/{r_poly_playersprite.h => polyrenderer/scene/poly_playersprite.h} (100%) rename src/{r_poly_portal.cpp => polyrenderer/scene/poly_portal.cpp} (99%) rename src/{r_poly_portal.h => polyrenderer/scene/poly_portal.h} (99%) rename src/{r_poly_scene.cpp => polyrenderer/scene/poly_scene.cpp} (99%) rename src/{r_poly_scene.h => polyrenderer/scene/poly_scene.h} (91%) rename src/{r_poly_sky.cpp => polyrenderer/scene/poly_sky.cpp} (98%) rename src/{r_poly_sky.h => polyrenderer/scene/poly_sky.h} (96%) rename src/{r_poly_sprite.cpp => polyrenderer/scene/poly_sprite.cpp} (99%) rename src/{r_poly_sprite.h => polyrenderer/scene/poly_sprite.h} (96%) rename src/{r_poly_wall.cpp => polyrenderer/scene/poly_wall.cpp} (99%) rename src/{r_poly_wall.h => polyrenderer/scene/poly_wall.h} (98%) rename src/{r_poly_wallsprite.cpp => polyrenderer/scene/poly_wallsprite.cpp} (98%) rename src/{r_poly_wallsprite.h => polyrenderer/scene/poly_wallsprite.h} (96%) rename src/{ => swrenderer/drawers}/r_draw.cpp (99%) rename src/{ => swrenderer/drawers}/r_draw.h (100%) rename src/{ => swrenderer/drawers}/r_draw_pal.cpp (99%) rename src/{ => swrenderer/drawers}/r_draw_pal.h (100%) rename src/{ => swrenderer/drawers}/r_draw_rgba.cpp (99%) rename src/{ => swrenderer/drawers}/r_draw_rgba.h (100%) rename src/{ => swrenderer/drawers}/r_drawers.cpp (100%) rename src/{ => swrenderer/drawers}/r_drawers.h (100%) rename src/{ => swrenderer/drawers}/r_thread.cpp (99%) rename src/{ => swrenderer/drawers}/r_thread.h (100%) rename src/{ => swrenderer}/r_local.h (94%) rename src/{ => swrenderer}/r_main.cpp (99%) rename src/{ => swrenderer}/r_main.h (100%) rename src/{ => swrenderer}/r_swrenderer.cpp (98%) rename src/{ => swrenderer}/r_swrenderer.h (100%) rename src/{ => swrenderer/scene}/r_3dfloors.cpp (99%) rename src/{ => swrenderer/scene}/r_3dfloors.h (100%) rename src/{ => swrenderer/scene}/r_bsp.cpp (99%) rename src/{ => swrenderer/scene}/r_bsp.h (100%) rename src/{ => swrenderer/scene}/r_plane.cpp (99%) rename src/{ => swrenderer/scene}/r_plane.h (100%) rename src/{ => swrenderer/scene}/r_segs.cpp (99%) rename src/{ => swrenderer/scene}/r_segs.h (100%) rename src/{ => swrenderer/scene}/r_things.cpp (99%) rename src/{ => swrenderer/scene}/r_things.h (100%) rename src/{ => swrenderer/scene}/r_voxel.cpp (98%) rename src/{ => swrenderer/scene}/r_voxel.h (100%) rename src/{ => swrenderer/scene}/r_walldraw.cpp (99%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2fb046924b..2eb5ba784b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -765,6 +765,13 @@ file( GLOB HEADER_FILES scripting/zscript/*.h scripting/vm/*.h xlat/*.h + swrenderer/*.h + swrenderer/drawers/*.h + swrenderer/scene/*.h + polyrenderer/*.h + polyrenderer/math/*.h + polyrenderer/drawers/*.h + polyrenderer/scene/*.h gl/*.h gl/api/*.h gl/data/*.h @@ -799,35 +806,35 @@ set( NOT_COMPILED_SOURCE_FILES ) set( FASTMATH_PCH_SOURCES - r_swrenderer.cpp - r_poly.cpp - r_poly_scene.cpp - r_poly_portal.cpp - r_poly_cull.cpp - r_poly_decal.cpp - r_poly_particle.cpp - r_poly_plane.cpp - r_poly_playersprite.cpp - r_poly_wall.cpp - r_poly_wallsprite.cpp - r_poly_sprite.cpp - r_poly_sky.cpp - r_poly_triangle.cpp - r_poly_intersection.cpp - r_3dfloors.cpp - r_bsp.cpp - r_draw.cpp - r_draw_pal.cpp - r_draw_rgba.cpp - r_drawers.cpp - r_thread.cpp - r_main.cpp - r_plane.cpp - r_segs.cpp + swrenderer/r_swrenderer.cpp + swrenderer/r_main.cpp + swrenderer/drawers/r_draw.cpp + swrenderer/drawers/r_draw_pal.cpp + swrenderer/drawers/r_draw_rgba.cpp + swrenderer/drawers/r_drawers.cpp + swrenderer/drawers/r_thread.cpp + swrenderer/scene/r_3dfloors.cpp + swrenderer/scene/r_bsp.cpp + swrenderer/scene/r_plane.cpp + swrenderer/scene/r_segs.cpp + swrenderer/scene/r_things.cpp + swrenderer/scene/r_voxel.cpp + swrenderer/scene/r_walldraw.cpp + polyrenderer/poly_renderer.cpp + polyrenderer/scene/poly_scene.cpp + polyrenderer/scene/poly_portal.cpp + polyrenderer/scene/poly_cull.cpp + polyrenderer/scene/poly_decal.cpp + polyrenderer/scene/poly_particle.cpp + polyrenderer/scene/poly_plane.cpp + polyrenderer/scene/poly_playersprite.cpp + polyrenderer/scene/poly_wall.cpp + polyrenderer/scene/poly_wallsprite.cpp + polyrenderer/scene/poly_sprite.cpp + polyrenderer/scene/poly_sky.cpp + polyrenderer/drawers/poly_triangle.cpp + polyrenderer/math/poly_intersection.cpp r_sky.cpp - r_things.cpp - r_voxel.cpp - r_walldraw.cpp s_advsound.cpp s_environment.cpp s_playlist.cpp @@ -1441,8 +1448,13 @@ source_group("OpenGL Renderer\\Shaders" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOU source_group("OpenGL Renderer\\System" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/gl/system/.+") source_group("OpenGL Renderer\\Textures" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/gl/textures/.+") source_group("OpenGL Renderer\\Utilities" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/gl/utility/.+") -source_group("Render Core\\Render Headers" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_.+\\.h$") -source_group("Render Core\\Render Sources" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_.+\\.cpp$") +source_group("Software Renderer" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/.+") +source_group("Software Renderer\\Drawers" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/drawers/.+") +source_group("Software Renderer\\Scene" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/scene/.+") +source_group("Poly Renderer" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/polyrenderer/.+") +source_group("Poly Renderer\\Math" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/polyrenderer/math/.+") +source_group("Poly Renderer\\Drawers" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/polyrenderer/drawers/.+") +source_group("Poly Renderer\\Scene" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/polyrenderer/scene/.+") source_group("Render Data\\Resource Headers" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_data/.+\\.h$") source_group("Render Data\\Resource Sources" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_data/.+\\.cpp$") source_group("Render Data\\Textures" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/textures/.+") diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 5f142d30c7..0d9cdfce9d 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -57,7 +57,7 @@ #include "v_pfx.h" #include "stats.h" #include "doomerrors.h" -#include "r_main.h" +#include "swrenderer/r_main.h" #include "r_data/r_translate.h" #include "f_wipe.h" #include "sbar.h" diff --git a/src/gl/system/gl_swwipe.cpp b/src/gl/system/gl_swwipe.cpp index 44c2203b49..2f51ae5a4f 100644 --- a/src/gl/system/gl_swwipe.cpp +++ b/src/gl/system/gl_swwipe.cpp @@ -53,7 +53,7 @@ #include "v_pfx.h" #include "stats.h" #include "doomerrors.h" -#include "r_main.h" +#include "swrenderer/r_main.h" #include "r_data/r_translate.h" #include "f_wipe.h" #include "sbar.h" diff --git a/src/r_poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp similarity index 99% rename from src/r_poly_triangle.cpp rename to src/polyrenderer/drawers/poly_triangle.cpp index 5da23297b6..76e349af74 100644 --- a/src/r_poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -25,7 +25,6 @@ #include "doomdef.h" #include "i_system.h" #include "w_wad.h" -#include "r_local.h" #include "v_video.h" #include "doomstat.h" #include "st_stuff.h" @@ -34,8 +33,9 @@ #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "r_poly_triangle.h" -#include "r_draw_rgba.h" +#include "poly_triangle.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/r_main.h" CVAR(Bool, r_debug_trisetup, false, 0); diff --git a/src/r_poly_triangle.h b/src/polyrenderer/drawers/poly_triangle.h similarity index 98% rename from src/r_poly_triangle.h rename to src/polyrenderer/drawers/poly_triangle.h index 5b66e80e2b..9d8e0e44b0 100644 --- a/src/r_poly_triangle.h +++ b/src/polyrenderer/drawers/poly_triangle.h @@ -22,9 +22,9 @@ #pragma once -#include "r_draw.h" -#include "r_thread.h" -#include "r_drawers.h" +#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/drawers/r_thread.h" +#include "swrenderer/drawers/r_drawers.h" #include "r_data/r_translate.h" #include "r_data/colormaps.h" diff --git a/src/r_poly_intersection.cpp b/src/polyrenderer/math/poly_intersection.cpp similarity index 99% rename from src/r_poly_intersection.cpp rename to src/polyrenderer/math/poly_intersection.cpp index 5e7ad374b7..ed5e8ef438 100644 --- a/src/r_poly_intersection.cpp +++ b/src/polyrenderer/math/poly_intersection.cpp @@ -23,7 +23,7 @@ #include #include "templates.h" #include "doomdef.h" -#include "r_poly_intersection.h" +#include "poly_intersection.h" IntersectionTest::Result IntersectionTest::plane_aabb(const Vec4f &plane, const AxisAlignedBoundingBox &aabb) { diff --git a/src/r_poly_intersection.h b/src/polyrenderer/math/poly_intersection.h similarity index 99% rename from src/r_poly_intersection.h rename to src/polyrenderer/math/poly_intersection.h index 2ce164e5e7..438146fce3 100644 --- a/src/r_poly_intersection.h +++ b/src/polyrenderer/math/poly_intersection.h @@ -22,7 +22,7 @@ #pragma once -#include "r_poly_triangle.h" +#include "polyrenderer/drawers/poly_triangle.h" #include #include diff --git a/src/r_poly.cpp b/src/polyrenderer/poly_renderer.cpp similarity index 99% rename from src/r_poly.cpp rename to src/polyrenderer/poly_renderer.cpp index f7fa339795..031dff683d 100644 --- a/src/r_poly.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -26,7 +26,7 @@ #include "sbar.h" #include "r_data/r_translate.h" #include "r_data/r_interpolate.h" -#include "r_poly.h" +#include "poly_renderer.h" #include "gl/data/gl_data.h" #include "d_net.h" #include "po_man.h" diff --git a/src/r_poly.h b/src/polyrenderer/poly_renderer.h similarity index 93% rename from src/r_poly.h rename to src/polyrenderer/poly_renderer.h index 10d2e92aa9..c7de1b1cc8 100644 --- a/src/r_poly.h +++ b/src/polyrenderer/poly_renderer.h @@ -28,10 +28,10 @@ #include #include "doomdata.h" #include "r_utility.h" -#include "r_main.h" -#include "r_poly_portal.h" -#include "r_poly_playersprite.h" -#include "r_poly_sky.h" +#include "swrenderer/r_main.h" +#include "scene/poly_portal.h" +#include "scene/poly_playersprite.h" +#include "scene/poly_sky.h" class AActor; class DCanvas; diff --git a/src/r_poly_cull.cpp b/src/polyrenderer/scene/poly_cull.cpp similarity index 99% rename from src/r_poly_cull.cpp rename to src/polyrenderer/scene/poly_cull.cpp index 02a0f2fc51..38f1502225 100644 --- a/src/r_poly_cull.cpp +++ b/src/polyrenderer/scene/poly_cull.cpp @@ -25,8 +25,8 @@ #include "doomdef.h" #include "sbar.h" #include "r_data/r_translate.h" -#include "r_poly_cull.h" -#include "r_poly.h" +#include "poly_cull.h" +#include "polyrenderer/poly_renderer.h" void PolyCull::CullScene(const TriMatrix &worldToClip, const Vec4f &portalClipPlane) { diff --git a/src/r_poly_cull.h b/src/polyrenderer/scene/poly_cull.h similarity index 95% rename from src/r_poly_cull.h rename to src/polyrenderer/scene/poly_cull.h index 4c0cfe314b..faa8a0740c 100644 --- a/src/r_poly_cull.h +++ b/src/polyrenderer/scene/poly_cull.h @@ -22,8 +22,8 @@ #pragma once -#include "r_poly_triangle.h" -#include "r_poly_intersection.h" +#include "polyrenderer/drawers/poly_triangle.h" +#include "polyrenderer/math/poly_intersection.h" enum class LineSegmentRange { diff --git a/src/r_poly_decal.cpp b/src/polyrenderer/scene/poly_decal.cpp similarity index 98% rename from src/r_poly_decal.cpp rename to src/polyrenderer/scene/poly_decal.cpp index 7718d422e1..fbc95a15c3 100644 --- a/src/r_poly_decal.cpp +++ b/src/polyrenderer/scene/poly_decal.cpp @@ -25,8 +25,8 @@ #include "doomdef.h" #include "sbar.h" #include "r_data/r_translate.h" -#include "r_poly_decal.h" -#include "r_poly.h" +#include "poly_decal.h" +#include "polyrenderer/poly_renderer.h" #include "a_sharedglobal.h" void RenderPolyDecal::RenderWallDecals(const TriMatrix &worldToClip, const Vec4f &clipPlane, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue) diff --git a/src/r_poly_decal.h b/src/polyrenderer/scene/poly_decal.h similarity index 96% rename from src/r_poly_decal.h rename to src/polyrenderer/scene/poly_decal.h index 068887d90c..48907780a8 100644 --- a/src/r_poly_decal.h +++ b/src/polyrenderer/scene/poly_decal.h @@ -22,7 +22,7 @@ #pragma once -#include "r_poly_triangle.h" +#include "polyrenderer/drawers/poly_triangle.h" class Vec4f; diff --git a/src/r_poly_particle.cpp b/src/polyrenderer/scene/poly_particle.cpp similarity index 98% rename from src/r_poly_particle.cpp rename to src/polyrenderer/scene/poly_particle.cpp index 27888e77b0..3d1142b85b 100644 --- a/src/r_poly_particle.cpp +++ b/src/polyrenderer/scene/poly_particle.cpp @@ -25,8 +25,8 @@ #include "doomdef.h" #include "sbar.h" #include "r_data/r_translate.h" -#include "r_poly_particle.h" -#include "r_poly.h" +#include "poly_particle.h" +#include "polyrenderer/poly_renderer.h" void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, particle_t *particle, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) { diff --git a/src/r_poly_particle.h b/src/polyrenderer/scene/poly_particle.h similarity index 96% rename from src/r_poly_particle.h rename to src/polyrenderer/scene/poly_particle.h index fd581fa83c..b3b25b996d 100644 --- a/src/r_poly_particle.h +++ b/src/polyrenderer/scene/poly_particle.h @@ -22,7 +22,7 @@ #pragma once -#include "r_poly_triangle.h" +#include "polyrenderer/drawers/poly_triangle.h" #include "p_effect.h" class Vec4f; diff --git a/src/r_poly_plane.cpp b/src/polyrenderer/scene/poly_plane.cpp similarity index 99% rename from src/r_poly_plane.cpp rename to src/polyrenderer/scene/poly_plane.cpp index e21868dc2f..5a8396c616 100644 --- a/src/r_poly_plane.cpp +++ b/src/polyrenderer/scene/poly_plane.cpp @@ -25,9 +25,9 @@ #include "doomdef.h" #include "sbar.h" #include "r_data/r_translate.h" -#include "r_poly_plane.h" -#include "r_poly_portal.h" -#include "r_poly.h" +#include "poly_plane.h" +#include "poly_portal.h" +#include "polyrenderer/poly_renderer.h" #include "r_sky.h" EXTERN_CVAR(Int, r_3dfloors) diff --git a/src/r_poly_plane.h b/src/polyrenderer/scene/poly_plane.h similarity index 97% rename from src/r_poly_plane.h rename to src/polyrenderer/scene/poly_plane.h index be307b4e23..bf844dcc27 100644 --- a/src/r_poly_plane.h +++ b/src/polyrenderer/scene/poly_plane.h @@ -22,7 +22,7 @@ #pragma once -#include "r_poly_triangle.h" +#include "polyrenderer/drawers/poly_triangle.h" class PolyDrawSectorPortal; class PolyCull; diff --git a/src/r_poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp similarity index 99% rename from src/r_poly_playersprite.cpp rename to src/polyrenderer/scene/poly_playersprite.cpp index cd9584b681..ded5a55b30 100644 --- a/src/r_poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -25,9 +25,9 @@ #include "doomdef.h" #include "sbar.h" #include "r_data/r_translate.h" -#include "r_poly_playersprite.h" -#include "r_poly.h" -#include "r_things.h" // for pspritexscale +#include "poly_playersprite.h" +#include "polyrenderer/poly_renderer.h" +#include "swrenderer/scene/r_things.h" // for pspritexscale EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_deathcamera) diff --git a/src/r_poly_playersprite.h b/src/polyrenderer/scene/poly_playersprite.h similarity index 100% rename from src/r_poly_playersprite.h rename to src/polyrenderer/scene/poly_playersprite.h diff --git a/src/r_poly_portal.cpp b/src/polyrenderer/scene/poly_portal.cpp similarity index 99% rename from src/r_poly_portal.cpp rename to src/polyrenderer/scene/poly_portal.cpp index 580622c1fb..58bb5b0496 100644 --- a/src/r_poly_portal.cpp +++ b/src/polyrenderer/scene/poly_portal.cpp @@ -26,8 +26,8 @@ #include "p_maputl.h" #include "sbar.h" #include "r_data/r_translate.h" -#include "r_poly_portal.h" -#include "r_poly.h" +#include "poly_portal.h" +#include "polyrenderer/poly_renderer.h" #include "gl/data/gl_data.h" extern bool r_showviewer; diff --git a/src/r_poly_portal.h b/src/polyrenderer/scene/poly_portal.h similarity index 99% rename from src/r_poly_portal.h rename to src/polyrenderer/scene/poly_portal.h index 4f23567610..24a812a4f5 100644 --- a/src/r_poly_portal.h +++ b/src/polyrenderer/scene/poly_portal.h @@ -22,7 +22,7 @@ #pragma once -#include "r_poly_scene.h" +#include "poly_scene.h" struct PolyPortalVertexRange { diff --git a/src/r_poly_scene.cpp b/src/polyrenderer/scene/poly_scene.cpp similarity index 99% rename from src/r_poly_scene.cpp rename to src/polyrenderer/scene/poly_scene.cpp index 8bf6d1ce99..51ce53a26f 100644 --- a/src/r_poly_scene.cpp +++ b/src/polyrenderer/scene/poly_scene.cpp @@ -26,8 +26,8 @@ #include "p_maputl.h" #include "sbar.h" #include "r_data/r_translate.h" -#include "r_poly_scene.h" -#include "r_poly.h" +#include "polyrenderer/scene/poly_scene.h" +#include "polyrenderer/poly_renderer.h" #include "gl/data/gl_data.h" CVAR(Bool, r_debug_cull, 0, 0) diff --git a/src/r_poly_scene.h b/src/polyrenderer/scene/poly_scene.h similarity index 91% rename from src/r_poly_scene.h rename to src/polyrenderer/scene/poly_scene.h index 00502e53ee..fbc878fc7c 100644 --- a/src/r_poly_scene.h +++ b/src/polyrenderer/scene/poly_scene.h @@ -28,16 +28,16 @@ #include #include "doomdata.h" #include "r_utility.h" -#include "r_main.h" -#include "r_poly_triangle.h" -#include "r_poly_intersection.h" -#include "r_poly_wall.h" -#include "r_poly_sprite.h" -#include "r_poly_wallsprite.h" -#include "r_poly_playersprite.h" -#include "r_poly_particle.h" -#include "r_poly_plane.h" -#include "r_poly_cull.h" +#include "swrenderer/r_main.h" +#include "polyrenderer/drawers/poly_triangle.h" +#include "polyrenderer/math/poly_intersection.h" +#include "poly_wall.h" +#include "poly_sprite.h" +#include "poly_wallsprite.h" +#include "poly_playersprite.h" +#include "poly_particle.h" +#include "poly_plane.h" +#include "poly_cull.h" #include #include diff --git a/src/r_poly_sky.cpp b/src/polyrenderer/scene/poly_sky.cpp similarity index 98% rename from src/r_poly_sky.cpp rename to src/polyrenderer/scene/poly_sky.cpp index d2ea632b85..8c84901b86 100644 --- a/src/r_poly_sky.cpp +++ b/src/polyrenderer/scene/poly_sky.cpp @@ -24,8 +24,8 @@ #include "doomdef.h" #include "sbar.h" #include "r_data/r_translate.h" -#include "r_poly_sky.h" -#include "r_poly_portal.h" +#include "poly_sky.h" +#include "poly_portal.h" #include "r_sky.h" // for skyflatnum PolySkyDome::PolySkyDome() diff --git a/src/r_poly_sky.h b/src/polyrenderer/scene/poly_sky.h similarity index 96% rename from src/r_poly_sky.h rename to src/polyrenderer/scene/poly_sky.h index 1f5a655b9b..1a8cd8ef21 100644 --- a/src/r_poly_sky.h +++ b/src/polyrenderer/scene/poly_sky.h @@ -21,7 +21,7 @@ #pragma once -#include "r_poly_triangle.h" +#include "polyrenderer/drawers/poly_triangle.h" class PolySkyDome { diff --git a/src/r_poly_sprite.cpp b/src/polyrenderer/scene/poly_sprite.cpp similarity index 99% rename from src/r_poly_sprite.cpp rename to src/polyrenderer/scene/poly_sprite.cpp index 1821357535..d344ae78b9 100644 --- a/src/r_poly_sprite.cpp +++ b/src/polyrenderer/scene/poly_sprite.cpp @@ -25,9 +25,9 @@ #include "doomdef.h" #include "sbar.h" #include "r_data/r_translate.h" -#include "r_poly_sprite.h" -#include "r_poly.h" -#include "r_poly_intersection.h" +#include "poly_sprite.h" +#include "polyrenderer/poly_renderer.h" +#include "polyrenderer/math/poly_intersection.h" EXTERN_CVAR(Float, transsouls) EXTERN_CVAR(Int, r_drawfuzz) diff --git a/src/r_poly_sprite.h b/src/polyrenderer/scene/poly_sprite.h similarity index 96% rename from src/r_poly_sprite.h rename to src/polyrenderer/scene/poly_sprite.h index 085845e506..fe38dad22e 100644 --- a/src/r_poly_sprite.h +++ b/src/polyrenderer/scene/poly_sprite.h @@ -22,7 +22,7 @@ #pragma once -#include "r_poly_triangle.h" +#include "polyrenderer/drawers/poly_triangle.h" class Vec4f; diff --git a/src/r_poly_wall.cpp b/src/polyrenderer/scene/poly_wall.cpp similarity index 99% rename from src/r_poly_wall.cpp rename to src/polyrenderer/scene/poly_wall.cpp index f9d409b604..5c912dec08 100644 --- a/src/r_poly_wall.cpp +++ b/src/polyrenderer/scene/poly_wall.cpp @@ -28,9 +28,9 @@ #include "p_lnspec.h" #include "sbar.h" #include "r_data/r_translate.h" -#include "r_poly_wall.h" -#include "r_poly_decal.h" -#include "r_poly.h" +#include "poly_wall.h" +#include "poly_decal.h" +#include "polyrenderer/poly_renderer.h" #include "r_sky.h" EXTERN_CVAR(Bool, r_drawmirrors) diff --git a/src/r_poly_wall.h b/src/polyrenderer/scene/poly_wall.h similarity index 98% rename from src/r_poly_wall.h rename to src/polyrenderer/scene/poly_wall.h index 8443a174ad..014110a400 100644 --- a/src/r_poly_wall.h +++ b/src/polyrenderer/scene/poly_wall.h @@ -22,7 +22,7 @@ #pragma once -#include "r_poly_triangle.h" +#include "polyrenderer/drawers/poly_triangle.h" class PolyTranslucentObject; class PolyDrawLinePortal; diff --git a/src/r_poly_wallsprite.cpp b/src/polyrenderer/scene/poly_wallsprite.cpp similarity index 98% rename from src/r_poly_wallsprite.cpp rename to src/polyrenderer/scene/poly_wallsprite.cpp index fe12b271db..197f528ed1 100644 --- a/src/r_poly_wallsprite.cpp +++ b/src/polyrenderer/scene/poly_wallsprite.cpp @@ -25,8 +25,8 @@ #include "doomdef.h" #include "sbar.h" #include "r_data/r_translate.h" -#include "r_poly_wallsprite.h" -#include "r_poly.h" +#include "poly_wallsprite.h" +#include "polyrenderer/poly_renderer.h" void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) { diff --git a/src/r_poly_wallsprite.h b/src/polyrenderer/scene/poly_wallsprite.h similarity index 96% rename from src/r_poly_wallsprite.h rename to src/polyrenderer/scene/poly_wallsprite.h index 51cf28de6d..75a550748e 100644 --- a/src/r_poly_wallsprite.h +++ b/src/polyrenderer/scene/poly_wallsprite.h @@ -22,7 +22,7 @@ #pragma once -#include "r_poly_triangle.h" +#include "polyrenderer/drawers/poly_triangle.h" class Vec4f; diff --git a/src/portal.cpp b/src/portal.cpp index 121b05e19f..0a21f19149 100644 --- a/src/portal.cpp +++ b/src/portal.cpp @@ -40,8 +40,6 @@ #include "p_local.h" #include "p_blockmap.h" #include "p_lnspec.h" -#include "r_bsp.h" -#include "r_segs.h" #include "c_cvars.h" #include "m_bbox.h" #include "p_tags.h" diff --git a/src/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp similarity index 99% rename from src/r_draw.cpp rename to src/swrenderer/drawers/r_draw.cpp index 3e31c7e048..6260982459 100644 --- a/src/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -38,7 +38,7 @@ #include "doomdef.h" #include "i_system.h" #include "w_wad.h" -#include "r_local.h" +#include "swrenderer/r_local.h" #include "v_video.h" #include "doomstat.h" #include "st_stuff.h" @@ -47,7 +47,7 @@ #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "r_plane.h" +#include "swrenderer/scene/r_plane.h" #include "r_draw.h" #include "r_draw_rgba.h" #include "r_draw_pal.h" diff --git a/src/r_draw.h b/src/swrenderer/drawers/r_draw.h similarity index 100% rename from src/r_draw.h rename to src/swrenderer/drawers/r_draw.h diff --git a/src/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp similarity index 99% rename from src/r_draw_pal.cpp rename to src/swrenderer/drawers/r_draw_pal.cpp index fbe372fb8b..c8adf39656 100644 --- a/src/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -39,8 +39,8 @@ #include "doomdef.h" #include "r_defs.h" #include "r_draw.h" -#include "r_main.h" -#include "r_things.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" #include "v_video.h" #include "r_draw_pal.h" diff --git a/src/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h similarity index 100% rename from src/r_draw_pal.h rename to src/swrenderer/drawers/r_draw_pal.h diff --git a/src/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp similarity index 99% rename from src/r_draw_rgba.cpp rename to src/swrenderer/drawers/r_draw_rgba.cpp index 4b56856081..b3e970a6a1 100644 --- a/src/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -27,7 +27,7 @@ #include "doomdef.h" #include "i_system.h" #include "w_wad.h" -#include "r_local.h" +#include "swrenderer/r_local.h" #include "v_video.h" #include "doomstat.h" #include "st_stuff.h" @@ -36,7 +36,7 @@ #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "r_plane.h" +#include "swrenderer/scene/r_plane.h" #include "r_draw_rgba.h" #include "r_drawers.h" #include "gl/data/gl_matrix.h" diff --git a/src/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h similarity index 100% rename from src/r_draw_rgba.h rename to src/swrenderer/drawers/r_draw_rgba.h diff --git a/src/r_drawers.cpp b/src/swrenderer/drawers/r_drawers.cpp similarity index 100% rename from src/r_drawers.cpp rename to src/swrenderer/drawers/r_drawers.cpp diff --git a/src/r_drawers.h b/src/swrenderer/drawers/r_drawers.h similarity index 100% rename from src/r_drawers.h rename to src/swrenderer/drawers/r_drawers.h diff --git a/src/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp similarity index 99% rename from src/r_thread.cpp rename to src/swrenderer/drawers/r_thread.cpp index bbb3faf3f9..15eeee4250 100644 --- a/src/r_thread.cpp +++ b/src/swrenderer/drawers/r_thread.cpp @@ -25,7 +25,7 @@ #include "doomdef.h" #include "i_system.h" #include "w_wad.h" -#include "r_local.h" +#include "swrenderer/r_local.h" #include "v_video.h" #include "doomstat.h" #include "st_stuff.h" diff --git a/src/r_thread.h b/src/swrenderer/drawers/r_thread.h similarity index 100% rename from src/r_thread.h rename to src/swrenderer/drawers/r_thread.h diff --git a/src/r_local.h b/src/swrenderer/r_local.h similarity index 94% rename from src/r_local.h rename to src/swrenderer/r_local.h index b0ba8841ee..92fb717abd 100644 --- a/src/r_local.h +++ b/src/swrenderer/r_local.h @@ -34,7 +34,7 @@ // Separate header file for each module. // #include "r_main.h" -#include "r_things.h" -#include "r_draw.h" +#include "scene/r_things.h" +#include "drawers/r_draw.h" #endif // __R_LOCAL_H__ diff --git a/src/r_main.cpp b/src/swrenderer/r_main.cpp similarity index 99% rename from src/r_main.cpp rename to src/swrenderer/r_main.cpp index a83b2f190a..ea5bb64e5b 100644 --- a/src/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -35,12 +35,12 @@ #include "m_random.h" #include "m_bbox.h" #include "r_local.h" -#include "r_plane.h" -#include "r_bsp.h" -#include "r_segs.h" -#include "r_3dfloors.h" +#include "scene/r_plane.h" +#include "scene/r_bsp.h" +#include "scene/r_segs.h" +#include "scene/r_3dfloors.h" #include "r_sky.h" -#include "r_draw_rgba.h" +#include "drawers/r_draw_rgba.h" #include "st_stuff.h" #include "c_cvars.h" #include "c_dispatch.h" diff --git a/src/r_main.h b/src/swrenderer/r_main.h similarity index 100% rename from src/r_main.h rename to src/swrenderer/r_main.h diff --git a/src/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp similarity index 98% rename from src/r_swrenderer.cpp rename to src/swrenderer/r_swrenderer.cpp index 77cbb28eb7..5b0d3b4dd3 100644 --- a/src/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -37,14 +37,14 @@ #include "v_palette.h" #include "v_video.h" #include "m_png.h" -#include "r_bsp.h" +#include "scene/r_bsp.h" #include "r_swrenderer.h" -#include "r_3dfloors.h" +#include "scene/r_3dfloors.h" #include "textures/textures.h" #include "r_data/voxels.h" -#include "r_draw_rgba.h" -#include "r_drawers.h" -#include "r_poly.h" +#include "drawers/r_draw_rgba.h" +#include "drawers/r_drawers.h" +#include "polyrenderer/poly_renderer.h" #include "p_setup.h" void gl_ParseDefs(); diff --git a/src/r_swrenderer.h b/src/swrenderer/r_swrenderer.h similarity index 100% rename from src/r_swrenderer.h rename to src/swrenderer/r_swrenderer.h diff --git a/src/r_3dfloors.cpp b/src/swrenderer/scene/r_3dfloors.cpp similarity index 99% rename from src/r_3dfloors.cpp rename to src/swrenderer/scene/r_3dfloors.cpp index 87c8af618e..bbd50331c6 100644 --- a/src/r_3dfloors.cpp +++ b/src/swrenderer/scene/r_3dfloors.cpp @@ -9,7 +9,7 @@ #include "doomdef.h" #include "p_local.h" #include "c_dispatch.h" -#include "r_local.h" +#include "swrenderer/r_local.h" #include "r_bsp.h" #include "r_plane.h" #include "c_cvars.h" diff --git a/src/r_3dfloors.h b/src/swrenderer/scene/r_3dfloors.h similarity index 100% rename from src/r_3dfloors.h rename to src/swrenderer/scene/r_3dfloors.h diff --git a/src/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp similarity index 99% rename from src/r_bsp.cpp rename to src/swrenderer/scene/r_bsp.cpp index d8a6c2c01a..36984743b1 100644 --- a/src/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -32,10 +32,10 @@ #include "p_lnspec.h" #include "p_setup.h" -#include "r_local.h" -#include "r_main.h" +#include "swrenderer/r_local.h" +#include "swrenderer/r_main.h" #include "r_plane.h" -#include "r_draw.h" +#include "swrenderer/drawers/r_draw.h" #include "r_things.h" #include "r_3dfloors.h" #include "a_sharedglobal.h" diff --git a/src/r_bsp.h b/src/swrenderer/scene/r_bsp.h similarity index 100% rename from src/r_bsp.h rename to src/swrenderer/scene/r_bsp.h diff --git a/src/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp similarity index 99% rename from src/r_plane.cpp rename to src/swrenderer/scene/r_plane.cpp index fce565922b..bcd0d5e5f3 100644 --- a/src/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -42,7 +42,7 @@ #include "doomdef.h" #include "doomstat.h" -#include "r_local.h" +#include "swrenderer/r_local.h" #include "r_sky.h" #include "stats.h" @@ -58,7 +58,7 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "r_draw_rgba.h" +#include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" #ifdef _MSC_VER diff --git a/src/r_plane.h b/src/swrenderer/scene/r_plane.h similarity index 100% rename from src/r_plane.h rename to src/swrenderer/scene/r_plane.h diff --git a/src/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp similarity index 99% rename from src/r_segs.cpp rename to src/swrenderer/scene/r_segs.cpp index eff303acb6..0348ba26c9 100644 --- a/src/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -30,7 +30,7 @@ #include "doomdata.h" #include "p_lnspec.h" -#include "r_local.h" +#include "swrenderer/r_local.h" #include "r_sky.h" #include "v_video.h" @@ -44,7 +44,7 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" -#include "r_draw.h" +#include "swrenderer/drawers/r_draw.h" #include "v_palette.h" #include "r_data/colormaps.h" diff --git a/src/r_segs.h b/src/swrenderer/scene/r_segs.h similarity index 100% rename from src/r_segs.h rename to src/swrenderer/scene/r_segs.h diff --git a/src/r_things.cpp b/src/swrenderer/scene/r_things.cpp similarity index 99% rename from src/r_things.cpp rename to src/swrenderer/scene/r_things.cpp index 8d8ae3eefc..f0365819e6 100644 --- a/src/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -37,7 +37,7 @@ #include "m_swap.h" #include "i_system.h" #include "w_wad.h" -#include "r_local.h" +#include "swrenderer/r_local.h" #include "c_console.h" #include "c_cvars.h" #include "c_dispatch.h" @@ -58,8 +58,8 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" -#include "r_draw_rgba.h" -#include "r_draw_pal.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/drawers/r_draw_pal.h" #include "v_palette.h" #include "r_data/r_translate.h" #include "r_data/colormaps.h" diff --git a/src/r_things.h b/src/swrenderer/scene/r_things.h similarity index 100% rename from src/r_things.h rename to src/swrenderer/scene/r_things.h diff --git a/src/r_voxel.cpp b/src/swrenderer/scene/r_voxel.cpp similarity index 98% rename from src/r_voxel.cpp rename to src/swrenderer/scene/r_voxel.cpp index 42973ce9ea..4645982420 100644 --- a/src/r_voxel.cpp +++ b/src/swrenderer/scene/r_voxel.cpp @@ -31,10 +31,10 @@ #include "d_net.h" #include "po_man.h" #include "r_things.h" -#include "r_draw.h" -#include "r_thread.h" +#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/drawers/r_thread.h" #include "r_utility.h" -#include "r_main.h" +#include "swrenderer/r_main.h" #include "r_voxel.h" namespace swrenderer diff --git a/src/r_voxel.h b/src/swrenderer/scene/r_voxel.h similarity index 100% rename from src/r_voxel.h rename to src/swrenderer/scene/r_voxel.h diff --git a/src/r_walldraw.cpp b/src/swrenderer/scene/r_walldraw.cpp similarity index 99% rename from src/r_walldraw.cpp rename to src/swrenderer/scene/r_walldraw.cpp index 7e4f3f5151..41e27d40e9 100644 --- a/src/r_walldraw.cpp +++ b/src/swrenderer/scene/r_walldraw.cpp @@ -27,7 +27,7 @@ #include "doomstat.h" #include "doomdata.h" -#include "r_local.h" +#include "swrenderer/r_local.h" #include "r_sky.h" #include "v_video.h" @@ -35,7 +35,7 @@ #include "a_sharedglobal.h" #include "d_net.h" #include "g_level.h" -#include "r_draw.h" +#include "swrenderer/drawers/r_draw.h" #include "r_bsp.h" #include "r_plane.h" #include "r_segs.h" @@ -43,7 +43,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "gl/dynlights/gl_dynlight.h" -#include "r_drawers.h" +#include "swrenderer/drawers/r_drawers.h" namespace swrenderer { diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 1b23d55668..1af386d603 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -43,10 +43,10 @@ #include "r_defs.h" #include "r_utility.h" #ifndef NO_SWRENDER -#include "r_draw.h" -#include "r_draw_rgba.h" -#include "r_main.h" -#include "r_things.h" +#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" #endif #include "r_data/r_translate.h" #include "doomstat.h" diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index f3a4e426e2..cf627f4d7f 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -63,7 +63,7 @@ #include "v_pfx.h" #include "stats.h" #include "doomerrors.h" -#include "r_main.h" +#include "swrenderer/r_main.h" #include "r_data/r_translate.h" #include "f_wipe.h" #include "sbar.h" diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 609f56647d..70898ebe5a 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -47,7 +47,7 @@ #include "doomstat.h" #include "m_argv.h" #include "version.h" -#include "r_swrenderer.h" +#include "swrenderer/r_swrenderer.h" EXTERN_CVAR (Bool, ticker) EXTERN_CVAR (Bool, fullscreen) diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index e07ca36609..e6c8075365 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -69,7 +69,7 @@ #include "m_argv.h" #include "r_defs.h" #include "v_text.h" -#include "r_swrenderer.h" +#include "swrenderer/r_swrenderer.h" #include "version.h" #include "win32iface.h" diff --git a/tools/drawergen/precomp.h b/tools/drawergen/precomp.h index ea5a5e6df5..f2bf67b70a 100644 --- a/tools/drawergen/precomp.h +++ b/tools/drawergen/precomp.h @@ -2,4 +2,4 @@ #pragma once #include "llvm_include.h" -#include "../../src/r_drawers.h" +#include "../../src/swrenderer/drawers/r_drawers.h" From 49903af394a3b0215b06e56e855c8db29cb510f4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 07:18:04 +0100 Subject: [PATCH 573/912] Remove 4 column wall drawers --- src/swrenderer/drawers/r_draw.cpp | 94 +-- src/swrenderer/drawers/r_draw.h | 26 +- src/swrenderer/drawers/r_draw_pal.cpp | 672 ------------------ src/swrenderer/drawers/r_draw_pal.h | 27 - src/swrenderer/drawers/r_draw_rgba.cpp | 66 -- src/swrenderer/drawers/r_draw_rgba.h | 21 - src/swrenderer/drawers/r_drawers.cpp | 12 - src/swrenderer/drawers/r_drawers.h | 8 - src/swrenderer/scene/r_plane.cpp | 83 +-- src/swrenderer/scene/r_walldraw.cpp | 246 +------ .../fixedfunction/drawskycodegen.cpp | 67 +- .../drawergen/fixedfunction/drawskycodegen.h | 18 +- .../fixedfunction/drawwallcodegen.cpp | 116 +-- .../drawergen/fixedfunction/drawwallcodegen.h | 30 +- tools/drawergen/llvmdrawers.cpp | 32 +- tools/drawergen/llvmdrawers.h | 4 +- 16 files changed, 119 insertions(+), 1403 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 6260982459..b49427d982 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -496,30 +496,26 @@ namespace swrenderer return tex->GetColumn(col, nullptr); } - bool R_GetTransMaskDrawers(void(**drawCol1)(), void(**drawCol4)()) + bool R_GetTransMaskDrawers(void(**drawColumn)()) { if (colfunc == R_DrawAddColumn) { - *drawCol1 = R_DrawWallAddCol1; - *drawCol4 = R_DrawWallAddCol4; + *drawColumn = R_DrawWallAddColumn; return true; } if (colfunc == R_DrawAddClampColumn) { - *drawCol1 = R_DrawWallAddClampCol1; - *drawCol4 = R_DrawWallAddClampCol4; + *drawColumn = R_DrawWallAddClampColumn; return true; } if (colfunc == R_DrawSubClampColumn) { - *drawCol1 = R_DrawWallSubClampCol1; - *drawCol4 = R_DrawWallSubClampCol4; + *drawColumn = R_DrawWallSubClampColumn; return true; } if (colfunc == R_DrawRevSubClampColumn) { - *drawCol1 = R_DrawWallRevSubClampCol1; - *drawCol4 = R_DrawWallRevSubClampCol4; + *drawColumn = R_DrawWallRevSubClampColumn; return true; } return false; @@ -637,7 +633,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - void R_DrawWallCol1() + void R_DrawWallColumn() { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); @@ -645,15 +641,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawWallCol4() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallMaskedCol1() + void R_DrawWallMaskedColumn() { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); @@ -661,15 +649,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawWallMaskedCol4() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallAddCol1() + void R_DrawWallAddColumn() { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); @@ -679,15 +659,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawWallAddCol4() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallAddClampCol1() + void R_DrawWallAddClampColumn() { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); @@ -695,15 +667,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawWallAddClampCol4() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallSubClampCol1() + void R_DrawWallSubClampColumn() { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); @@ -711,15 +675,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawWallSubClampCol4() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallRevSubClampCol1() + void R_DrawWallRevSubClampColumn() { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); @@ -727,15 +683,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawWallRevSubClampCol4() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + void R_DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); @@ -743,15 +691,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } - void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - else - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - } - - void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + void R_DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); @@ -759,14 +699,6 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } - void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - else - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - } - void R_DrawColumn() { if (r_swtruecolor) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index c508aa2680..580bcd2f9a 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -130,7 +130,7 @@ namespace swrenderer bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color); void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade - bool R_GetTransMaskDrawers(void(**drawCol1)(), void(**drawCol4)()); + bool R_GetTransMaskDrawers(void(**drawColumn)()); const uint8_t *R_GetColumn(FTexture *tex, int col); @@ -165,23 +165,15 @@ namespace swrenderer void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); void R_FillSpan(); - void R_DrawWallCol1(); - void R_DrawWallCol4(); - void R_DrawWallMaskedCol1(); - void R_DrawWallMaskedCol4(); - void R_DrawWallAddCol1(); - void R_DrawWallAddCol4(); - void R_DrawWallAddClampCol1(); - void R_DrawWallAddClampCol4(); - void R_DrawWallSubClampCol1(); - void R_DrawWallSubClampCol4(); - void R_DrawWallRevSubClampCol1(); - void R_DrawWallRevSubClampCol4(); + void R_DrawWallColumn(); + void R_DrawWallMaskedColumn(); + void R_DrawWallAddColumn(); + void R_DrawWallAddClampColumn(); + void R_DrawWallSubClampColumn(); + void R_DrawWallRevSubClampColumn(); - void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); - void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); - void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); - void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom); // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade); diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index c8adf39656..dd1fdcc528 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -110,25 +110,6 @@ namespace swrenderer _step_viewpos_z = dc_viewpos_step.Z; } - PalWall4Command::PalWall4Command() - { - using namespace drawerargs; - - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _fracbits = dc_wall_fracbits; - for (int col = 0; col < 4; col++) - { - _colormap[col] = dc_wall_colormap[col]; - _source[col] = dc_wall_source[col]; - _iscale[col] = dc_wall_iscale[col]; - _texturefrac[col] = dc_wall_texturefrac[col]; - } - _srcblend = dc_srcblend; - _destblend = dc_destblend; - } - uint8_t PalWall1Command::AddLights(const TriLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material) { uint32_t lit_r = GPalette.BaseColors[fg].r; @@ -217,56 +198,6 @@ namespace swrenderer } } - void DrawWall4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int bits = _fracbits; - uint32_t place; - auto pal0 = _colormap[0]; - auto pal1 = _colormap[1]; - auto pal2 = _colormap[2]; - auto pal3 = _colormap[3]; - auto buf0 = _source[0]; - auto buf1 = _source[1]; - auto buf2 = _source[2]; - auto buf3 = _source[3]; - auto dc_wall_iscale0 = _iscale[0]; - auto dc_wall_iscale1 = _iscale[1]; - auto dc_wall_iscale2 = _iscale[2]; - auto dc_wall_iscale3 = _iscale[3]; - auto dc_wall_texturefrac0 = _texturefrac[0]; - auto dc_wall_texturefrac1 = _texturefrac[1]; - auto dc_wall_texturefrac2 = _texturefrac[2]; - auto dc_wall_texturefrac3 = _texturefrac[3]; - auto pitch = _pitch; - - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; - - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - dc_wall_texturefrac0 += dc_wall_iscale0 * skipped; - dc_wall_texturefrac1 += dc_wall_iscale1 * skipped; - dc_wall_texturefrac2 += dc_wall_iscale2 * skipped; - dc_wall_texturefrac3 += dc_wall_iscale3 * skipped; - dc_wall_iscale0 *= thread->num_cores; - dc_wall_iscale1 *= thread->num_cores; - dc_wall_iscale2 *= thread->num_cores; - dc_wall_iscale3 *= thread->num_cores; - pitch *= thread->num_cores; - - do - { - dest[0] = pal0[buf0[(place = dc_wall_texturefrac0) >> bits]]; dc_wall_texturefrac0 = place + dc_wall_iscale0; - dest[1] = pal1[buf1[(place = dc_wall_texturefrac1) >> bits]]; dc_wall_texturefrac1 = place + dc_wall_iscale1; - dest[2] = pal2[buf2[(place = dc_wall_texturefrac2) >> bits]]; dc_wall_texturefrac2 = place + dc_wall_iscale2; - dest[3] = pal3[buf3[(place = dc_wall_texturefrac3) >> bits]]; dc_wall_texturefrac3 = place + dc_wall_iscale3; - dest += pitch; - } while (--count); - } - void DrawWallMasked1PalCommand::Execute(DrawerThread *thread) { uint32_t fracstep = _iscale; @@ -326,58 +257,6 @@ namespace swrenderer } } - void DrawWallMasked4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int bits = _fracbits; - uint32_t place; - auto pal0 = _colormap[0]; - auto pal1 = _colormap[1]; - auto pal2 = _colormap[2]; - auto pal3 = _colormap[3]; - auto buf0 = _source[0]; - auto buf1 = _source[1]; - auto buf2 = _source[2]; - auto buf3 = _source[3]; - auto dc_wall_iscale0 = _iscale[0]; - auto dc_wall_iscale1 = _iscale[1]; - auto dc_wall_iscale2 = _iscale[2]; - auto dc_wall_iscale3 = _iscale[3]; - auto dc_wall_texturefrac0 = _texturefrac[0]; - auto dc_wall_texturefrac1 = _texturefrac[1]; - auto dc_wall_texturefrac2 = _texturefrac[2]; - auto dc_wall_texturefrac3 = _texturefrac[3]; - auto pitch = _pitch; - - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; - - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - dc_wall_texturefrac0 += dc_wall_iscale0 * skipped; - dc_wall_texturefrac1 += dc_wall_iscale1 * skipped; - dc_wall_texturefrac2 += dc_wall_iscale2 * skipped; - dc_wall_texturefrac3 += dc_wall_iscale3 * skipped; - dc_wall_iscale0 *= thread->num_cores; - dc_wall_iscale1 *= thread->num_cores; - dc_wall_iscale2 *= thread->num_cores; - dc_wall_iscale3 *= thread->num_cores; - pitch *= thread->num_cores; - - do - { - uint8_t pix; - - pix = buf0[(place = dc_wall_texturefrac0) >> bits]; if (pix) dest[0] = pal0[pix]; dc_wall_texturefrac0 = place + dc_wall_iscale0; - pix = buf1[(place = dc_wall_texturefrac1) >> bits]; if (pix) dest[1] = pal1[pix]; dc_wall_texturefrac1 = place + dc_wall_iscale1; - pix = buf2[(place = dc_wall_texturefrac2) >> bits]; if (pix) dest[2] = pal2[pix]; dc_wall_texturefrac2 = place + dc_wall_iscale2; - pix = buf3[(place = dc_wall_texturefrac3) >> bits]; if (pix) dest[3] = pal3[pix]; dc_wall_texturefrac3 = place + dc_wall_iscale3; - dest += pitch; - } while (--count); - } - void DrawWallAdd1PalCommand::Execute(DrawerThread *thread) { uint32_t fracstep = _iscale; @@ -440,72 +319,6 @@ namespace swrenderer } } - void DrawWallAdd4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int bits = _fracbits; - - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; - - uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] }; - uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] }; - - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; - - int pitch = _pitch; - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - for (int i = 0; i < 4; i++) - { - dc_wall_texturefrac[i] += dc_wall_iscale[i] * skipped; - dc_wall_iscale[i] *= thread->num_cores; - } - pitch *= thread->num_cores; - - if (!r_blendmethod) - { - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - uint32_t fg = fg2rgb[_colormap[i][pix]]; - uint32_t bg = bg2rgb[dest[i]]; - fg = (fg + bg) | 0x1f07c1f; - dest[i] = RGB32k.All[fg & (fg >> 15)]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); - } - else - { - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - uint32_t r = MIN(GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); - uint32_t g = MIN(GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); - uint32_t b = MIN(GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); - dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); - } - } - void DrawWallAddClamp1PalCommand::Execute(DrawerThread *thread) { uint32_t fracstep = _iscale; @@ -580,50 +393,6 @@ namespace swrenderer } } - void DrawWallAddClamp4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int bits = _fracbits; - - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; - - uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] }; - uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] }; - - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; - - int pitch = _pitch; - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - for (int i = 0; i < 4; i++) - { - dc_wall_texturefrac[i] += dc_wall_iscale[i] * skipped; - dc_wall_iscale[i] *= thread->num_cores; - } - pitch *= thread->num_cores; - - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - uint32_t r = MIN(GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); - uint32_t g = MIN(GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); - uint32_t b = MIN(GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); - dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); - } - void DrawWallSubClamp1PalCommand::Execute(DrawerThread *thread) { uint32_t fracstep = _iscale; @@ -697,76 +466,6 @@ namespace swrenderer } } - void DrawWallSubClamp4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int bits = _fracbits; - - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; - - uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] }; - uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] }; - - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; - - int pitch = _pitch; - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - for (int i = 0; i < 4; i++) - { - dc_wall_texturefrac[i] += dc_wall_iscale[i] * skipped; - dc_wall_iscale[i] *= thread->num_cores; - } - pitch *= thread->num_cores; - - if (!r_blendmethod) - { - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - uint32_t a = (fg2rgb[_colormap[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a >> 15)]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); - } - else - { - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - int r = clamp(-GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255); - int g = clamp(-GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255); - int b = clamp(-GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255); - dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); - } - } - void DrawWallRevSubClamp1PalCommand::Execute(DrawerThread *thread) { uint32_t fracstep = _iscale; @@ -840,76 +539,6 @@ namespace swrenderer } } - void DrawWallRevSubClamp4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int bits = _fracbits; - - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; - - uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] }; - uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] }; - - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; - - int pitch = _pitch; - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - for (int i = 0; i < 4; i++) - { - dc_wall_texturefrac[i] += dc_wall_iscale[i] * skipped; - dc_wall_iscale[i] *= thread->num_cores; - } - pitch *= thread->num_cores; - - if (!r_blendmethod) - { - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - uint32_t a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[_colormap[i][pix]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a >> 15)]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += _pitch; - } while (--count); - } - else - { - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - uint32_t r = clamp(GPalette.BaseColors[_colormap[i][pix]].r - GPalette.BaseColors[dest[i]].r, 0, 255); - uint32_t g = clamp(GPalette.BaseColors[_colormap[i][pix]].g - GPalette.BaseColors[dest[i]].g, 0, 255); - uint32_t b = clamp(GPalette.BaseColors[_colormap[i][pix]].b - GPalette.BaseColors[dest[i]].b, 0, 255); - dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += _pitch; - } while (--count); - } - } - ///////////////////////////////////////////////////////////////////////// PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom) : solid_top(solid_top), solid_bottom(solid_bottom) @@ -994,146 +623,6 @@ namespace swrenderer } } - void DrawSingleSky4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int pitch = _pitch; - const uint8_t *source0[4] = { _source[0], _source[1], _source[2], _source[3] }; - int textureheight0 = _sourceheight[0]; - const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; - int32_t frac[4] = { (int32_t)_texturefrac[0], (int32_t)_texturefrac[1], (int32_t)_texturefrac[2], (int32_t)_texturefrac[3] }; - int32_t fracstep[4] = { (int32_t)_iscale[0], (int32_t)_iscale[1], (int32_t)_iscale[2], (int32_t)_iscale[3] }; - uint8_t output[4]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - uint32_t solid_top_fill = RGB256k.RGB[(solid_top_r >> 2)][(solid_top_g >> 2)][(solid_top_b >> 2)]; - uint32_t solid_bottom_fill = RGB256k.RGB[(solid_bottom_r >> 2)][(solid_bottom_g >> 2)][(solid_bottom_b >> 2)]; - solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; - solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; - - // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: - int fade_length = (1 << (24 - start_fade)); - int start_fadetop_y = (-frac[0]) / fracstep[0]; - int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; - int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; - int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; - for (int col = 1; col < 4; col++) - { - start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); - end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); - start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); - end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); - } - start_fadetop_y = clamp(start_fadetop_y, 0, count); - end_fadetop_y = clamp(end_fadetop_y, 0, count); - start_fadebottom_y = clamp(start_fadebottom_y, 0, count); - end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - for (int col = 0; col < 4; col++) - { - frac[col] += fracstep[col] * skipped; - fracstep[col] *= thread->num_cores; - } - pitch *= thread->num_cores; - int num_cores = thread->num_cores; - int index = skipped; - - // Top solid color: - while (index < start_fadetop_y) - { - *((uint32_t*)dest) = solid_top_fill; - dest += pitch; - for (int col = 0; col < 4; col++) - frac[col] += fracstep[col]; - index += num_cores; - } - - // Top fade: - while (index < end_fadetop_y) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - - uint32_t c = palette[fg]; - int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); - int inv_alpha_top = 256 - alpha_top; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - index += num_cores; - } - - // Textured center: - while (index < start_fadebottom_y) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - output[col] = source0[col][sample_index]; - - frac[col] += fracstep[col]; - } - - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - index += num_cores; - } - - // Fade bottom: - while (index < end_fadebottom_y) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - - uint32_t c = palette[fg]; - int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); - int inv_alpha_bottom = 256 - alpha_bottom; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - index += num_cores; - } - - // Bottom solid color: - while (index < count) - { - *((uint32_t*)dest) = solid_bottom_fill; - dest += pitch; - index += num_cores; - } - } - void DrawDoubleSky1PalCommand::Execute(DrawerThread *thread) { uint8_t *dest = _dest; @@ -1206,167 +695,6 @@ namespace swrenderer } } - void DrawDoubleSky4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int pitch = _pitch; - const uint8_t *source0[4] = { _source[0], _source[1], _source[2], _source[3] }; - const uint8_t *source1[4] = { _source2[0], _source2[1], _source2[2], _source2[3] }; - int textureheight0 = _sourceheight[0]; - uint32_t maxtextureheight1 = _sourceheight[1] - 1; - const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; - int32_t frac[4] = { (int32_t)_texturefrac[0], (int32_t)_texturefrac[1], (int32_t)_texturefrac[2], (int32_t)_texturefrac[3] }; - int32_t fracstep[4] = { (int32_t)_iscale[0], (int32_t)_iscale[1], (int32_t)_iscale[2], (int32_t)_iscale[3] }; - uint8_t output[4]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - uint32_t solid_top_fill = RGB256k.RGB[(solid_top_r >> 2)][(solid_top_g >> 2)][(solid_top_b >> 2)]; - uint32_t solid_bottom_fill = RGB256k.RGB[(solid_bottom_r >> 2)][(solid_bottom_g >> 2)][(solid_bottom_b >> 2)]; - solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; - solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; - - // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: - int fade_length = (1 << (24 - start_fade)); - int start_fadetop_y = (-frac[0]) / fracstep[0]; - int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; - int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; - int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; - for (int col = 1; col < 4; col++) - { - start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); - end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); - start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); - end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); - } - start_fadetop_y = clamp(start_fadetop_y, 0, count); - end_fadetop_y = clamp(end_fadetop_y, 0, count); - start_fadebottom_y = clamp(start_fadebottom_y, 0, count); - end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - for (int col = 0; col < 4; col++) - { - frac[col] += fracstep[col] * skipped; - fracstep[col] *= thread->num_cores; - } - pitch *= thread->num_cores; - int num_cores = thread->num_cores; - int index = skipped; - - // Top solid color: - while (index < start_fadetop_y) - { - *((uint32_t*)dest) = solid_top_fill; - dest += pitch; - for (int col = 0; col < 4; col++) - frac[col] += fracstep[col]; - index += num_cores; - } - - // Top fade: - while (index < end_fadetop_y) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - uint32_t c = palette[fg]; - int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); - int inv_alpha_top = 256 - alpha_top; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - index += num_cores; - } - - // Textured center: - while (index < start_fadebottom_y) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - frac[col] += fracstep[col]; - } - - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - index += num_cores; - } - - // Fade bottom: - while (index < end_fadebottom_y) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - uint32_t c = palette[fg]; - int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); - int inv_alpha_bottom = 256 - alpha_bottom; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - index += num_cores; - } - - // Bottom solid color: - while (index < count) - { - *((uint32_t*)dest) = solid_bottom_fill; - dest += pitch; - index += num_cores; - } - } - ///////////////////////////////////////////////////////////////////////// PalColumnCommand::PalColumnCommand() diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index 79152665cc..539cf430d0 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -32,37 +32,12 @@ namespace swrenderer float _step_viewpos_z; }; - class PalWall4Command : public DrawerCommand - { - public: - PalWall4Command(); - FString DebugInfo() override { return "PalWallCommand"; } - - protected: - uint8_t *_dest; - int _count; - int _pitch; - int _fracbits; - uint8_t *_colormap[4]; - const uint8_t *_source[4]; - uint32_t _iscale[4]; - uint32_t _texturefrac[4]; - uint32_t *_srcblend; - uint32_t *_destblend; - }; - class DrawWall1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWall4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; class DrawWallMasked1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallMasked4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; class DrawWallAdd1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallAdd4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; class DrawWallAddClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallAddClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; class DrawWallSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; class DrawWallRevSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallRevSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; class PalSkyCommand : public DrawerCommand { @@ -85,9 +60,7 @@ namespace swrenderer }; class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; - class DrawSingleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; class DrawDoubleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; - class DrawDoubleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; class PalColumnCommand : public DrawerCommand { diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index b3e970a6a1..5cdaea108b 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -181,72 +181,6 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - WorkerThreadData DrawWall4LLVMCommand::ThreadData(DrawerThread *thread) - { - WorkerThreadData d; - d.core = thread->core; - d.num_cores = thread->num_cores; - d.pass_start_y = thread->pass_start_y; - d.pass_end_y = thread->pass_end_y; - return d; - } - - DrawWall4LLVMCommand::DrawWall4LLVMCommand() - { - using namespace drawerargs; - - args.dest = (uint32_t*)dc_dest; - args.dest_y = _dest_y; - args.count = dc_count; - args.pitch = dc_pitch; - args.light_red = dc_shade_constants.light_red; - args.light_green = dc_shade_constants.light_green; - args.light_blue = dc_shade_constants.light_blue; - args.light_alpha = dc_shade_constants.light_alpha; - args.fade_red = dc_shade_constants.fade_red; - args.fade_green = dc_shade_constants.fade_green; - args.fade_blue = dc_shade_constants.fade_blue; - args.fade_alpha = dc_shade_constants.fade_alpha; - args.desaturate = dc_shade_constants.desaturate; - for (int i = 0; i < 4; i++) - { - args.texturefrac[i] = dc_wall_texturefrac[i]; - args.iscale[i] = dc_wall_iscale[i]; - args.texturefracx[i] = dc_wall_texturefracx[i]; - args.textureheight[i] = dc_wall_sourceheight[i]; - args.source[i] = (const uint32_t *)dc_wall_source[i]; - args.source2[i] = (const uint32_t *)dc_wall_source2[i]; - args.light[i] = LightBgra::calc_light_multiplier(dc_wall_light[i]); - } - args.srcalpha = dc_srcalpha >> (FRACBITS - 8); - args.destalpha = dc_destalpha >> (FRACBITS - 8); - args.flags = 0; - if (dc_shade_constants.simple_shade) - args.flags |= DrawWallArgs::simple_shade; - if (args.source2[0] == nullptr) - args.flags |= DrawWallArgs::nearest_filter; - - args.z = 0.0f; - args.step_z = 0.0f; - args.dynlights = nullptr; - args.num_dynlights = 0; - - DetectRangeError(args.dest, args.dest_y, args.count); - } - - void DrawWall4LLVMCommand::Execute(DrawerThread *thread) - { - WorkerThreadData d = ThreadData(thread); - Drawers::Instance()->vlinec4(&args, &d); - } - - FString DrawWall4LLVMCommand::DebugInfo() - { - return "DrawWall4\n" + args.ToString(); - } - - ///////////////////////////////////////////////////////////////////////////// - WorkerThreadData DrawWall1LLVMCommand::ThreadData(DrawerThread *thread) { WorkerThreadData d; diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 1364d537bd..cd3bb7c1d2 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -117,20 +117,6 @@ namespace swrenderer void Execute(DrawerThread *thread) override; }; - class DrawWall4LLVMCommand : public DrawerCommand - { - protected: - DrawWallArgs args; - - WorkerThreadData ThreadData(DrawerThread *thread); - - public: - DrawWall4LLVMCommand(); - - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - }; - class DrawWall1LLVMCommand : public DrawerCommand { protected: @@ -171,11 +157,6 @@ namespace swrenderer FString DebugInfo() override; }; - DECLARE_DRAW_COMMAND(DrawWallMasked4, mvlinec4, DrawWall4LLVMCommand); - DECLARE_DRAW_COMMAND(DrawWallAdd4, tmvline4_add, DrawWall4LLVMCommand); - DECLARE_DRAW_COMMAND(DrawWallAddClamp4, tmvline4_addclamp, DrawWall4LLVMCommand); - DECLARE_DRAW_COMMAND(DrawWallSubClamp4, tmvline4_subclamp, DrawWall4LLVMCommand); - DECLARE_DRAW_COMMAND(DrawWallRevSubClamp4, tmvline4_revsubclamp, DrawWall4LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand); @@ -197,9 +178,7 @@ namespace swrenderer DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(DrawSingleSky1, DrawSky1, DrawSkyLLVMCommand); - DECLARE_DRAW_COMMAND(DrawSingleSky4, DrawSky4, DrawSkyLLVMCommand); DECLARE_DRAW_COMMAND(DrawDoubleSky1, DrawDoubleSky1, DrawSkyLLVMCommand); - DECLARE_DRAW_COMMAND(DrawDoubleSky4, DrawDoubleSky4, DrawSkyLLVMCommand); class DrawFuzzColumnRGBACommand : public DrawerCommand { diff --git a/src/swrenderer/drawers/r_drawers.cpp b/src/swrenderer/drawers/r_drawers.cpp index e61e951837..57c33a5be5 100644 --- a/src/swrenderer/drawers/r_drawers.cpp +++ b/src/swrenderer/drawers/r_drawers.cpp @@ -78,9 +78,7 @@ extern "C" void DrawSpanAddClamp_SSE2(const DrawSpanArgs *); void DrawSpanMaskedAddClamp_SSE2(const DrawSpanArgs *); void vlinec1_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void vlinec4_SSE2(const DrawWallArgs *, const WorkerThreadData *); void mvlinec1_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void mvlinec4_SSE2(const DrawWallArgs *, const WorkerThreadData *); void tmvline1_add_SSE2(const DrawWallArgs *, const WorkerThreadData *); void tmvline4_add_SSE2(const DrawWallArgs *, const WorkerThreadData *); void tmvline1_addclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); @@ -90,9 +88,7 @@ extern "C" void tmvline1_revsubclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); void tmvline4_revsubclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); void DrawSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *); - void DrawSky4_SSE2(const DrawSkyArgs *, const WorkerThreadData *); void DrawDoubleSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *); - void DrawDoubleSky4_SSE2(const DrawSkyArgs *, const WorkerThreadData *); void TriDraw8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDraw8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDraw8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -182,21 +178,13 @@ Drawers::Drawers() DrawSpanAddClamp = DrawSpanAddClamp_SSE2; DrawSpanMaskedAddClamp = DrawSpanMaskedAddClamp_SSE2; vlinec1 = vlinec1_SSE2; - vlinec4 = vlinec4_SSE2; mvlinec1 = mvlinec1_SSE2; - mvlinec4 = mvlinec4_SSE2; tmvline1_add = tmvline1_add_SSE2; - tmvline4_add = tmvline4_add_SSE2; tmvline1_addclamp = tmvline1_addclamp_SSE2; - tmvline4_addclamp = tmvline4_addclamp_SSE2; tmvline1_subclamp = tmvline1_subclamp_SSE2; - tmvline4_subclamp = tmvline4_subclamp_SSE2; tmvline1_revsubclamp = tmvline1_revsubclamp_SSE2; - tmvline4_revsubclamp = tmvline4_revsubclamp_SSE2; DrawSky1 = DrawSky1_SSE2; - DrawSky4 = DrawSky4_SSE2; DrawDoubleSky1 = DrawDoubleSky1_SSE2; - DrawDoubleSky4 = DrawDoubleSky4_SSE2; TriDraw8.push_back(TriDraw8_0_SSE2); TriDraw8.push_back(TriDraw8_1_SSE2); TriDraw8.push_back(TriDraw8_2_SSE2); diff --git a/src/swrenderer/drawers/r_drawers.h b/src/swrenderer/drawers/r_drawers.h index 7a94c82ddd..0a79f26d53 100644 --- a/src/swrenderer/drawers/r_drawers.h +++ b/src/swrenderer/drawers/r_drawers.h @@ -317,22 +317,14 @@ public: void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr; void(*vlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*vlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*mvlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*mvlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*tmvline1_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*tmvline4_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*tmvline1_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*tmvline4_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*tmvline1_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*tmvline4_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*tmvline1_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*tmvline4_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*DrawSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; - void(*DrawSky4)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; void(*DrawDoubleSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; - void(*DrawDoubleSky4)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; std::vector TriDraw8; std::vector TriDraw32; diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index bcd0d5e5f3..8f9661fbdb 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -931,16 +931,10 @@ static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, doub uint32_t solid_top = frontskytex->GetSkyCapColor(false); uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); - if (columns == 4) - if (!backskytex) - R_DrawSingleSkyCol4(solid_top, solid_bottom); - else - R_DrawDoubleSkyCol4(solid_top, solid_bottom); + if (!backskytex) + R_DrawSingleSkyColumn(solid_top, solid_bottom); else - if (!backskytex) - R_DrawSingleSkyCol1(solid_top, solid_bottom); - else - R_DrawDoubleSkyCol1(solid_top, solid_bottom); + R_DrawDoubleSkyColumn(solid_top, solid_bottom); } static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) @@ -970,76 +964,7 @@ static void R_DrawCapSky(visplane_t *pl) short *uwal = (short *)pl->top; short *dwal = (short *)pl->bottom; - // Calculate where 4 column alignment begins and ends: - int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); - int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - - // First unaligned columns: - for (int x = x1; x < aligned_x1; x++) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - R_DrawSkyColumn(x, y1, y2, 1); - } - - // The aligned columns - for (int x = aligned_x1; x < aligned_x2; x += 4) - { - // Find y1, y2, light and uv values for four columns: - int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; - int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - - // Figure out where we vertically can start and stop drawing 4 columns in one go - int middle_y1 = y1[0]; - int middle_y2 = y2[0]; - for (int i = 1; i < 4; i++) - { - middle_y1 = MAX(y1[i], middle_y1); - middle_y2 = MIN(y2[i], middle_y2); - } - - // If we got an empty column in our set we cannot draw 4 columns in one go: - bool empty_column_in_set = false; - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - empty_column_in_set = true; - } - if (empty_column_in_set || middle_y2 <= middle_y1) - { - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - continue; - - R_DrawSkyColumn(x + i, y1[i], y2[i], 1); - } - continue; - } - - // Draw the first rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (y1[i] < middle_y1) - R_DrawSkyColumn(x + i, y1[i], middle_y1, 1); - } - - // Draw the area where all 4 columns are active - R_DrawSkyColumn(x, middle_y1, middle_y2, 4); - - // Draw the last rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (middle_y2 < y2[i]) - R_DrawSkyColumn(x + i, middle_y2, y2[i], 1); - } - } - - // The last unaligned columns: - for (int x = aligned_x2; x < x2; x++) + for (int x = x1; x < x2; x++) { int y1 = uwal[x]; int y2 = dwal[x]; diff --git a/src/swrenderer/scene/r_walldraw.cpp b/src/swrenderer/scene/r_walldraw.cpp index 41e27d40e9..24d22b2c53 100644 --- a/src/swrenderer/scene/r_walldraw.cpp +++ b/src/swrenderer/scene/r_walldraw.cpp @@ -670,102 +670,11 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1 } } -// Draw four columns with support for non-power-of-two ranges -static void Draw4Columns(int x, int y1, int y2, WallSampler *sampler, void(*draw4columns)()) -{ - if (r_swtruecolor) - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - dc_wall_source[i] = sampler[i].source; - dc_wall_source2[i] = sampler[i].source2; - dc_wall_texturefracx[i] = sampler[i].texturefracx; - dc_wall_sourceheight[i] = sampler[i].height; - dc_wall_texturefrac[i] = sampler[i].uv_pos; - dc_wall_iscale[i] = sampler[i].uv_step; - - uint64_t step64 = sampler[i].uv_step; - uint64_t pos64 = sampler[i].uv_pos; - sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - dc_wall_source[i] = sampler[i].source; - dc_wall_source2[i] = sampler[i].source2; - dc_wall_texturefracx[i] = sampler[i].texturefracx; - dc_wall_texturefrac[i] = sampler[i].uv_pos; - dc_wall_iscale[i] = sampler[i].uv_step; - - uint64_t step64 = sampler[i].uv_step; - uint64_t pos64 = sampler[i].uv_pos; - sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - dc_dest = (ylookup[y1] + x) + dc_destorg; - for (int i = 0; i < 4; i++) - { - dc_wall_source[i] = sampler[i].source; - dc_wall_source2[i] = sampler[i].source2; - dc_wall_texturefracx[i] = sampler[i].texturefracx; - } - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; - uint32_t next_uv_wrap = available / sampler[i].uv_step; - if (available % sampler[i].uv_step != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps - for (int i = 0; i < 4; i++) - { - dc_wall_texturefrac[i] = sampler[i].uv_pos; - dc_wall_iscale[i] = sampler[i].uv_step; - } - dc_count = count; - draw4columns(); - - // Wrap the uv position - for (int i = 0; i < 4; i++) - { - sampler[i].uv_pos += sampler[i].uv_step * count; - if (sampler[i].uv_pos >= sampler[i].uv_max) - sampler[i].uv_pos -= sampler[i].uv_max; - } - - left -= count; - } - } - } -} - typedef void(*DrawColumnFuncPtr)(); static void ProcessWallWorker( int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, - const BYTE *(*getcol)(FTexture *tex, int x), DrawColumnFuncPtr draw1column, DrawColumnFuncPtr draw4columns) + const BYTE *(*getcol)(FTexture *tex, int x), DrawColumnFuncPtr drawcolumn) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -805,33 +714,7 @@ static void ProcessWallWorker( double xmagnitude = 1.0; - if (r_dynlights) - { - for (int x = x1; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); - - WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - Draw1Column(x, y1, y2, sampler, draw1column); - } - NetUpdate(); - return; - } - - // Calculate where 4 column alignment begins and ends: - int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); - int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - - // First unaligned columns: - for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) + for (int x = x1; x < x2; x++, light += rw_lightstep) { int y1 = uwal[x]; int y2 = dwal[x]; @@ -844,119 +727,7 @@ static void ProcessWallWorker( if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - Draw1Column(x, y1, y2, sampler, draw1column); - } - - // The aligned columns - for (int x = aligned_x1; x < aligned_x2; x += 4) - { - // Find y1, y2, light and uv values for four columns: - int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; - int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - - float lights[4]; - for (int i = 0; i < 4; i++) - { - lights[i] = light; - light += rw_lightstep; - } - - WallSampler sampler[4]; - for (int i = 0; i < 4; i++) - { - if (x + i + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + i + 1]) - FIXED2DBL(lwal[x + i])); - sampler[i] = WallSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, xmagnitude, rw_pic, getcol); - } - - // Figure out where we vertically can start and stop drawing 4 columns in one go - int middle_y1 = y1[0]; - int middle_y2 = y2[0]; - for (int i = 1; i < 4; i++) - { - middle_y1 = MAX(y1[i], middle_y1); - middle_y2 = MIN(y2[i], middle_y2); - } - - // If we got an empty column in our set we cannot draw 4 columns in one go: - bool empty_column_in_set = false; - int bilinear_count = 0; - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - empty_column_in_set = true; - if (sampler[i].source2) - bilinear_count++; - } - - if (empty_column_in_set || middle_y2 <= middle_y1 || (bilinear_count > 0 && bilinear_count < 4)) - { - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - Draw1Column(x + i, y1[i], y2[i], sampler[i], draw1column); - } - continue; - } - - // Draw the first rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (y1[i] < middle_y1) - Draw1Column(x + i, y1[i], middle_y1, sampler[i], draw1column); - } - - // Draw the area where all 4 columns are active - if (!fixed) - { - for (int i = 0; i < 4; i++) - { - if (r_swtruecolor) - { - dc_wall_colormap[i] = basecolormap->Maps; - dc_wall_light[i] = LIGHTSCALE(lights[i], wallshade); - } - else - { - dc_wall_colormap[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - dc_wall_light[i] = 0; - } - } - } - Draw4Columns(x, middle_y1, middle_y2, sampler, draw4columns); - - // Draw the last rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (middle_y2 < y2[i]) - Draw1Column(x + i, middle_y2, y2[i], sampler[i], draw1column); - } - } - - // The last unaligned columns: - for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); - - WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - Draw1Column(x, y1, y2, sampler, draw1column); + Draw1Column(x, y1, y2, sampler, drawcolumn); } NetUpdate(); @@ -964,7 +735,7 @@ static void ProcessWallWorker( static void ProcessNormalWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallCol1, R_DrawWallCol4); + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallColumn); } static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) @@ -975,22 +746,21 @@ static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *s } else { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallMaskedCol1, R_DrawWallMaskedCol4); + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallMaskedColumn); } } static void ProcessTranslucentWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { - static void(*drawcol1)(); - static void(*drawcol4)(); - if (!R_GetTransMaskDrawers(&drawcol1, &drawcol4)) + void(*drawcol1)(); + if (!R_GetTransMaskDrawers(&drawcol1)) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); } else { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, drawcol1, drawcol4); + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, drawcol1); } } diff --git a/tools/drawergen/fixedfunction/drawskycodegen.cpp b/tools/drawergen/fixedfunction/drawskycodegen.cpp index 3a05818703..3bd23e5290 100644 --- a/tools/drawergen/fixedfunction/drawskycodegen.cpp +++ b/tools/drawergen/fixedfunction/drawskycodegen.cpp @@ -32,28 +32,16 @@ #include "ssa/ssa_struct_type.h" #include "ssa/ssa_value.h" -void DrawSkyCodegen::Generate(DrawSkyVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data) +void DrawSkyCodegen::Generate(DrawSkyVariant variant, SSAValue args, SSAValue thread_data) { dest = args[0][0].load(true); - source0[0] = args[0][1].load(true); - source0[1] = args[0][2].load(true); - source0[2] = args[0][3].load(true); - source0[3] = args[0][4].load(true); - source1[0] = args[0][5].load(true); - source1[1] = args[0][6].load(true); - source1[2] = args[0][7].load(true); - source1[3] = args[0][8].load(true); + source0 = args[0][1].load(true); + source1 = args[0][5].load(true); pitch = args[0][9].load(true); count = args[0][10].load(true); dest_y = args[0][11].load(true); - texturefrac[0] = args[0][12].load(true); - texturefrac[1] = args[0][13].load(true); - texturefrac[2] = args[0][14].load(true); - texturefrac[3] = args[0][15].load(true); - iscale[0] = args[0][16].load(true); - iscale[1] = args[0][17].load(true); - iscale[2] = args[0][18].load(true); - iscale[3] = args[0][19].load(true); + texturefrac = args[0][12].load(true); + iscale = args[0][16].load(true); textureheight0 = args[0][20].load(true); SSAInt textureheight1 = args[0][21].load(true); maxtextureheight1 = textureheight1 - 1; @@ -70,66 +58,45 @@ void DrawSkyCodegen::Generate(DrawSkyVariant variant, bool fourColumns, SSAValue pitch = pitch * thread.num_cores; - int numColumns = fourColumns ? 4 : 1; - for (int i = 0; i < numColumns; i++) - { - stack_frac[i].store(texturefrac[i] + iscale[i] * skipped_by_thread(dest_y, thread)); - fracstep[i] = iscale[i] * thread.num_cores; - } + stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); + fracstep = iscale * thread.num_cores; - Loop(variant, fourColumns); + Loop(variant); } -void DrawSkyCodegen::Loop(DrawSkyVariant variant, bool fourColumns) +void DrawSkyCodegen::Loop(DrawSkyVariant variant) { - int numColumns = fourColumns ? 4 : 1; - stack_index.store(SSAInt(0)); { SSAForBlock loop; SSAInt index = stack_index.load(); loop.loop_block(index < count); - SSAInt frac[4]; - for (int i = 0; i < numColumns; i++) - frac[i] = stack_frac[i].load(); + SSAInt frac = stack_frac.load(); SSAInt offset = index * pitch * 4; - if (fourColumns) - { - SSAVec4i colors[4]; - for (int i = 0; i < 4; i++) - colors[i] = FadeOut(frac[i], Sample(frac[i], i, variant)); - - SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); - dest[offset].store_unaligned_vec16ub(color); - } - else - { - SSAVec4i color = FadeOut(frac[0], Sample(frac[0], 0, variant)); - dest[offset].store_vec4ub(color); - } + SSAVec4i color = FadeOut(frac, Sample(frac, variant)); + dest[offset].store_vec4ub(color); stack_index.store(index.add(SSAInt(1), true, true)); - for (int i = 0; i < numColumns; i++) - stack_frac[i].store(frac[i] + fracstep[i]); + stack_frac.store(frac + fracstep); loop.end_block(); } } -SSAVec4i DrawSkyCodegen::Sample(SSAInt frac, int index, DrawSkyVariant variant) +SSAVec4i DrawSkyCodegen::Sample(SSAInt frac, DrawSkyVariant variant) { SSAInt sample_index = (((frac << 8) >> FRACBITS) * textureheight0) >> FRACBITS; if (variant == DrawSkyVariant::Single) { - return source0[index][sample_index * 4].load_vec4ub(false); + return source0[sample_index * 4].load_vec4ub(false); } else { SSAInt sample_index2 = SSAInt::MIN(sample_index, maxtextureheight1); - SSAVec4i color0 = source0[index][sample_index * 4].load_vec4ub(false); - SSAVec4i color1 = source1[index][sample_index2 * 4].load_vec4ub(false); + SSAVec4i color0 = source0[sample_index * 4].load_vec4ub(false); + SSAVec4i color1 = source1[sample_index2 * 4].load_vec4ub(false); return blend_alpha_blend(color0, color1); } } diff --git a/tools/drawergen/fixedfunction/drawskycodegen.h b/tools/drawergen/fixedfunction/drawskycodegen.h index aaf4bdfebd..6cd700203f 100644 --- a/tools/drawergen/fixedfunction/drawskycodegen.h +++ b/tools/drawergen/fixedfunction/drawskycodegen.h @@ -33,28 +33,28 @@ enum class DrawSkyVariant class DrawSkyCodegen : public DrawerCodegen { public: - void Generate(DrawSkyVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data); + void Generate(DrawSkyVariant variant, SSAValue args, SSAValue thread_data); private: - void Loop(DrawSkyVariant variant, bool fourColumns); - SSAVec4i Sample(SSAInt frac, int index, DrawSkyVariant variant); + void Loop(DrawSkyVariant variant); + SSAVec4i Sample(SSAInt frac, DrawSkyVariant variant); SSAVec4i FadeOut(SSAInt frac, SSAVec4i color); - SSAStack stack_index, stack_frac[4]; + SSAStack stack_index, stack_frac; SSAUBytePtr dest; - SSAUBytePtr source0[4]; - SSAUBytePtr source1[4]; + SSAUBytePtr source0; + SSAUBytePtr source1; SSAInt pitch; SSAInt count; SSAInt dest_y; - SSAInt texturefrac[4]; - SSAInt iscale[4]; + SSAInt texturefrac; + SSAInt iscale; SSAInt textureheight0; SSAInt maxtextureheight1; SSAVec4i top_color; SSAVec4i bottom_color; SSAWorkerThread thread; - SSAInt fracstep[4]; + SSAInt fracstep; }; diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.cpp b/tools/drawergen/fixedfunction/drawwallcodegen.cpp index 898aebdbb3..055b132d1a 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.cpp +++ b/tools/drawergen/fixedfunction/drawwallcodegen.cpp @@ -32,40 +32,19 @@ #include "ssa/ssa_struct_type.h" #include "ssa/ssa_value.h" -void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data) +void DrawWallCodegen::Generate(DrawWallVariant variant, SSAValue args, SSAValue thread_data) { dest = args[0][0].load(true); - source[0] = args[0][1].load(true); - source[1] = args[0][2].load(true); - source[2] = args[0][3].load(true); - source[3] = args[0][4].load(true); - source2[0] = args[0][5].load(true); - source2[1] = args[0][6].load(true); - source2[2] = args[0][7].load(true); - source2[3] = args[0][8].load(true); + source = args[0][1].load(true); + source2 = args[0][5].load(true); pitch = args[0][9].load(true); count = args[0][10].load(true); dest_y = args[0][11].load(true); - texturefrac[0] = args[0][12].load(true); - texturefrac[1] = args[0][13].load(true); - texturefrac[2] = args[0][14].load(true); - texturefrac[3] = args[0][15].load(true); - texturefracx[0] = args[0][16].load(true); - texturefracx[1] = args[0][17].load(true); - texturefracx[2] = args[0][18].load(true); - texturefracx[3] = args[0][19].load(true); - iscale[0] = args[0][20].load(true); - iscale[1] = args[0][21].load(true); - iscale[2] = args[0][22].load(true); - iscale[3] = args[0][23].load(true); - textureheight[0] = args[0][24].load(true); - textureheight[1] = args[0][25].load(true); - textureheight[2] = args[0][26].load(true); - textureheight[3] = args[0][27].load(true); - light[0] = args[0][28].load(true); - light[1] = args[0][29].load(true); - light[2] = args[0][30].load(true); - light[3] = args[0][31].load(true); + texturefrac = args[0][12].load(true); + texturefracx = args[0][16].load(true); + iscale = args[0][20].load(true); + textureheight = args[0][24].load(true); + light = args[0][28].load(true); srcalpha = args[0][32].load(true); destalpha = args[0][33].load(true); SSAShort light_alpha = args[0][34].load(true); @@ -99,42 +78,34 @@ void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAVal pitch = pitch * thread.num_cores; - int numColumns = fourColumns ? 4 : 1; - for (int i = 0; i < numColumns; i++) - { - stack_frac[i].store(texturefrac[i] + iscale[i] * skipped_by_thread(dest_y, thread)); - fracstep[i] = iscale[i] * thread.num_cores; - one[i] = ((0x80000000 + textureheight[i] - 1) / textureheight[i]) * 2 + 1; - } + stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); + fracstep = iscale * thread.num_cores; + one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; start_z = start_z + step_z * SSAFloat(skipped_by_thread(dest_y, thread)); step_z = step_z * SSAFloat(thread.num_cores); SSAIfBlock branch; branch.if_block(is_simple_shade); - LoopShade(variant, fourColumns, true); + LoopShade(variant, true); branch.else_block(); - LoopShade(variant, fourColumns, false); + LoopShade(variant, false); branch.end_block(); } -void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade) +void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool isSimpleShade) { SSAIfBlock branch; branch.if_block(is_nearest_filter); - Loop(variant, fourColumns, isSimpleShade, true); + Loop(variant, isSimpleShade, true); branch.else_block(); - int numColumns = fourColumns ? 4 : 1; - for (int i = 0; i < numColumns; i++) - stack_frac[i].store(stack_frac[i].load() - (one[i] / 2)); - Loop(variant, fourColumns, isSimpleShade, false); + stack_frac.store(stack_frac.load() - (one / 2)); + Loop(variant, isSimpleShade, false); branch.end_block(); } -void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter) +void DrawWallCodegen::Loop(DrawWallVariant variant, bool isSimpleShade, bool isNearestFilter) { - int numColumns = fourColumns ? 4 : 1; - stack_index.store(SSAInt(0)); stack_z.store(start_z); { @@ -143,57 +114,30 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim z = stack_z.load(); loop.loop_block(index < count); - SSAInt frac[4]; - for (int i = 0; i < numColumns; i++) - frac[i] = stack_frac[i].load(); - + SSAInt frac = stack_frac.load(); SSAInt offset = index * pitch * 4; - if (fourColumns) - { - SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false); - SSAVec8s bg0 = SSAVec8s::extendlo(bg); - SSAVec8s bg1 = SSAVec8s::extendhi(bg); - SSAVec4i bgcolors[4] = - { - SSAVec4i::extendlo(bg0), - SSAVec4i::extendhi(bg0), - SSAVec4i::extendlo(bg1), - SSAVec4i::extendhi(bg1) - }; - - SSAVec4i colors[4]; - for (int i = 0; i < 4; i++) - colors[i] = Blend(Shade(Sample(frac[i], i, isNearestFilter), i, isSimpleShade), bgcolors[i], variant); - - SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); - dest[offset].store_unaligned_vec16ub(color); - } - else - { - SSAVec4i bgcolor = dest[offset].load_vec4ub(false); - SSAVec4i color = Blend(Shade(Sample(frac[0], 0, isNearestFilter), 0, isSimpleShade), bgcolor, variant); - dest[offset].store_vec4ub(color); - } + SSAVec4i bgcolor = dest[offset].load_vec4ub(false); + SSAVec4i color = Blend(Shade(Sample(frac, isNearestFilter), isSimpleShade), bgcolor, variant); + dest[offset].store_vec4ub(color); stack_z.store(z + step_z); stack_index.store(index.add(SSAInt(1), true, true)); - for (int i = 0; i < numColumns; i++) - stack_frac[i].store(frac[i] + fracstep[i]); + stack_frac.store(frac + fracstep); loop.end_block(); } } -SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter) +SSAVec4i DrawWallCodegen::Sample(SSAInt frac, bool isNearestFilter) { if (isNearestFilter) { - SSAInt sample_index = ((frac >> FRACBITS) * textureheight[index]) >> FRACBITS; - return source[index][sample_index * 4].load_vec4ub(false); + SSAInt sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + return source[sample_index * 4].load_vec4ub(false); } else { - return SampleLinear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]); + return SampleLinear(source, source2, texturefracx, frac, one, textureheight); } } @@ -217,13 +161,13 @@ SSAVec4i DrawWallCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAIn return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; } -SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) +SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, bool isSimpleShade) { SSAVec4i c; if (isSimpleShade) - c = shade_bgra_simple(fg, light[index]); + c = shade_bgra_simple(fg, light); else - c = shade_bgra_advanced(fg, light[index], shade_constants); + c = shade_bgra_advanced(fg, light, shade_constants); stack_lit_color.store(c); stack_light_index.store(SSAInt(0)); diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.h b/tools/drawergen/fixedfunction/drawwallcodegen.h index cb46dcd5f4..1afb5396ae 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.h +++ b/tools/drawergen/fixedfunction/drawwallcodegen.h @@ -37,31 +37,31 @@ enum class DrawWallVariant class DrawWallCodegen : public DrawerCodegen { public: - void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data); + void Generate(DrawWallVariant variant, SSAValue args, SSAValue thread_data); private: - void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade); - void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter); - SSAVec4i Sample(SSAInt frac, int index, bool isNearestFilter); + void LoopShade(DrawWallVariant variant, bool isSimpleShade); + void Loop(DrawWallVariant variant, bool isSimpleShade, bool isNearestFilter); + SSAVec4i Sample(SSAInt frac, bool isNearestFilter); SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); - SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); + SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); - SSAStack stack_index, stack_frac[4], stack_light_index; + SSAStack stack_index, stack_frac, stack_light_index; SSAStack stack_lit_color; SSAStack stack_z; SSAUBytePtr dest; - SSAUBytePtr source[4]; - SSAUBytePtr source2[4]; + SSAUBytePtr source; + SSAUBytePtr source2; SSAInt pitch; SSAInt count; SSAInt dest_y; - SSAInt texturefrac[4]; - SSAInt texturefracx[4]; - SSAInt iscale[4]; - SSAInt textureheight[4]; - SSAInt light[4]; + SSAInt texturefrac; + SSAInt texturefracx; + SSAInt iscale; + SSAInt textureheight; + SSAInt light; SSAInt srcalpha; SSAInt destalpha; SSABool is_simple_shade; @@ -69,8 +69,8 @@ private: SSAShadeConstants shade_constants; SSAWorkerThread thread; - SSAInt fracstep[4]; - SSAInt one[4]; + SSAInt fracstep; + SSAInt one; SSAFloat start_z, step_z; diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index 0999199974..0ce8ee8305 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -51,22 +51,14 @@ LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName, CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); - CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1); - CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4); - CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1); - CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4); - CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1); - CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4); - CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1); - CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4); - CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1); - CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); - CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); - CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); - CodegenDrawSky("DrawSky1", DrawSkyVariant::Single, 1); - CodegenDrawSky("DrawSky4", DrawSkyVariant::Single, 4); - CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double, 1); - CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4); + CodegenDrawWall("vlinec1", DrawWallVariant::Opaque); + CodegenDrawWall("mvlinec1", DrawWallVariant::Masked); + CodegenDrawWall("tmvline1_add", DrawWallVariant::Add); + CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp); + CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp); + CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp); + CodegenDrawSky("DrawSky1", DrawSkyVariant::Single); + CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double); for (int i = 0; i < NumTriBlendModes(); i++) { CodegenDrawTriangle("TriDraw8_" + std::to_string(i), (TriBlendMode)i, false, false); @@ -115,7 +107,7 @@ void LLVMDrawers::CodegenDrawSpan(const char *name, DrawSpanVariant variant) throw Exception("verifyFunction failed for CodegenDrawSpan()"); } -void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) +void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant) { llvm::IRBuilder<> builder(mProgram.context()); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); @@ -126,7 +118,7 @@ void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant, int function.create_public(); DrawWallCodegen codegen; - codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); + codegen.Generate(variant, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); @@ -134,7 +126,7 @@ void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant, int throw Exception("verifyFunction failed for CodegenDrawWall()"); } -void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns) +void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant) { llvm::IRBuilder<> builder(mProgram.context()); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); @@ -145,7 +137,7 @@ void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant, int c function.create_public(); DrawSkyCodegen codegen; - codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); + codegen.Generate(variant, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); diff --git a/tools/drawergen/llvmdrawers.h b/tools/drawergen/llvmdrawers.h index a7f1c86254..dd66c2a86c 100644 --- a/tools/drawergen/llvmdrawers.h +++ b/tools/drawergen/llvmdrawers.h @@ -49,8 +49,8 @@ public: private: void CodegenDrawColumn(const char *name, DrawColumnVariant variant); void CodegenDrawSpan(const char *name, DrawSpanVariant variant); - void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); - void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); + void CodegenDrawWall(const char *name, DrawWallVariant variant); + void CodegenDrawSky(const char *name, DrawSkyVariant variant); void CodegenDrawTriangle(const std::string &name, TriBlendMode blendmode, bool truecolor, bool colorfill); llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); From 46e9a0cdf99f7102773efa95ad7d9b2e2fb9cb82 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 07:20:22 +0100 Subject: [PATCH 574/912] Remove zdoom ifdef version as we aren't doing merges from there anymore --- src/swrenderer/scene/r_walldraw.cpp | 349 ---------------------------- 1 file changed, 349 deletions(-) diff --git a/src/swrenderer/scene/r_walldraw.cpp b/src/swrenderer/scene/r_walldraw.cpp index 24d22b2c53..74aafe11e6 100644 --- a/src/swrenderer/scene/r_walldraw.cpp +++ b/src/swrenderer/scene/r_walldraw.cpp @@ -52,353 +52,6 @@ namespace swrenderer extern FTexture *rw_pic; extern int wallshade; -#if 0 // ZDoom version - -struct WallSampler -{ - WallSampler() { } - WallSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); - - uint32_t uv_pos; - uint32_t uv_step; - uint32_t uv_max; - - const BYTE *source; - uint32_t height; -}; - -WallSampler::WallSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) -{ - height = texture->GetHeight(); - - int uv_fracbits = 32 - texture->HeightBits; - if (uv_fracbits != 32) - { - uv_max = height << uv_fracbits; - - // Find start uv in [0-base_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; - v = v - floor(v); - v *= height; - v *= (1 << uv_fracbits); - - uv_pos = (uint32_t)v; - uv_step = xs_ToFixed(uv_fracbits, uv_stepd); - if (uv_step == 0) // To prevent divide by zero elsewhere - uv_step = 1; - } - else - { // Hack for one pixel tall textures - uv_pos = 0; - uv_step = 0; - uv_max = 1; - } - - source = getcol(texture, xoffset >> FRACBITS); -} - -// Draw a column with support for non-power-of-two ranges -static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1column)()) -{ - if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two - { - int count = y2 - y1; - - dc_source = sampler.source; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = sampler.uv_pos; - draw1column(); - - uint64_t step64 = sampler.uv_step; - uint64_t pos64 = sampler.uv_pos; - sampler.uv_pos = (uint32_t)(pos64 + step64 * count); - } - else - { - uint32_t uv_pos = sampler.uv_pos; - - uint32_t left = y2 - y1; - while (left > 0) - { - uint32_t available = sampler.uv_max - uv_pos; - uint32_t next_uv_wrap = available / sampler.uv_step; - if (available % sampler.uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); - - dc_source = sampler.source; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = uv_pos; - draw1column(); - - left -= count; - uv_pos += sampler.uv_step * count; - if (uv_pos >= sampler.uv_max) - uv_pos -= sampler.uv_max; - } - - sampler.uv_pos = uv_pos; - } -} - -// Draw four columns with support for non-power-of-two ranges -static void Draw4Columns(int x, int y1, int y2, WallSampler *sampler, void(*draw4columns)()) -{ - if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - dc_wall_source[i] = sampler[i].source; - dc_wall_texturefrac[i] = sampler[i].uv_pos; - dc_wall_iscale[i] = sampler[i].uv_step; - - uint64_t step64 = sampler[i].uv_step; - uint64_t pos64 = sampler[i].uv_pos; - sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - dc_dest = (ylookup[y1] + x) + dc_destorg; - for (int i = 0; i < 4; i++) - { - dc_wall_source[i] = sampler[i].source; - } - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; - uint32_t next_uv_wrap = available / sampler[i].uv_step; - if (available % sampler[i].uv_step != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps - for (int i = 0; i < 4; i++) - { - dc_wall_texturefrac[i] = sampler[i].uv_pos; - dc_wall_iscale[i] = sampler[i].uv_step; - } - dc_count = count; - draw4columns(); - - // Wrap the uv position - for (int i = 0; i < 4; i++) - { - sampler[i].uv_pos += sampler[i].uv_step * count; - if (sampler[i].uv_pos >= sampler[i].uv_max) - sampler[i].uv_pos -= sampler[i].uv_max; - } - - left -= count; - } - } -} - -typedef void(*DrawColumnFuncPtr)(); - -static void ProcessWallWorker( - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, - const BYTE *(*getcol)(FTexture *tex, int x), DrawColumnFuncPtr draw1column, DrawColumnFuncPtr draw4columns) -{ - if (rw_pic->UseType == FTexture::TEX_Null) - return; - - fixed_t xoffset = rw_offset; - - int fracbits = 32 - rw_pic->HeightBits; - if (fracbits == 32) - { // Hack for one pixel tall textures - fracbits = 0; - yrepeat = 0; - dc_texturemid = 0; - } - - dc_wall_fracbits = fracbits; - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - dc_wall_colormap[0] = dc_colormap; - dc_wall_colormap[1] = dc_colormap; - dc_wall_colormap[2] = dc_colormap; - dc_wall_colormap[3] = dc_colormap; - } - - if (fixedcolormap) - dc_colormap = fixedcolormap; - else - dc_colormap = basecolormap->Maps; - - float light = rw_light; - - // Calculate where 4 column alignment begins and ends: - int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); - int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - - // First unaligned columns: - for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); - Draw1Column(x, y1, y2, sampler, draw1column); - } - - // The aligned columns - for (int x = aligned_x1; x < aligned_x2; x += 4) - { - // Find y1, y2, light and uv values for four columns: - int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; - int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - - float lights[4]; - for (int i = 0; i < 4; i++) - { - lights[i] = light; - light += rw_lightstep; - } - - WallSampler sampler[4]; - for (int i = 0; i < 4; i++) - sampler[i] = WallSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, rw_pic, getcol); - - // Figure out where we vertically can start and stop drawing 4 columns in one go - int middle_y1 = y1[0]; - int middle_y2 = y2[0]; - for (int i = 1; i < 4; i++) - { - middle_y1 = MAX(y1[i], middle_y1); - middle_y2 = MIN(y2[i], middle_y2); - } - - // If we got an empty column in our set we cannot draw 4 columns in one go: - bool empty_column_in_set = false; - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - empty_column_in_set = true; - } - - if (empty_column_in_set || middle_y2 <= middle_y1) - { - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - continue; - - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - Draw1Column(x + i, y1[i], y2[i], sampler[i], draw1column); - } - continue; - } - - // Draw the first rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - - if (y1[i] < middle_y1) - Draw1Column(x + i, y1[i], middle_y1, sampler[i], draw1column); - } - - // Draw the area where all 4 columns are active - if (!fixed) - { - for (int i = 0; i < 4; i++) - { - dc_wall_colormap[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - } - } - Draw4Columns(x, middle_y1, middle_y2, sampler, draw4columns); - - // Draw the last rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - - if (middle_y2 < y2[i]) - Draw1Column(x + i, middle_y2, y2[i], sampler[i], draw1column); - } - } - - // The last unaligned columns: - for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); - Draw1Column(x, y1, y2, sampler, draw1column); - } - - NetUpdate(); -} - -static void ProcessNormalWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) -{ - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallCol1, R_DrawWallCol4); -} - -static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) -{ - if (!rw_pic->bMasked) // Textures that aren't masked can use the faster ProcessNormalWall. - { - ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallMaskedCol1, R_DrawWallMaskedCol4); - } -} - -static void ProcessTranslucentWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) -{ - void (*drawcol1)(); - void (*drawcol4)(); - if (!R_GetTransMaskDrawers(&drawcol1, &drawcol4)) - { - // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. - ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, drawcol1, drawcol4); - } -} - -#else // QZDoom version - struct WallSampler { WallSampler() { } @@ -764,8 +417,6 @@ static void ProcessTranslucentWall(int x1, int x2, short *uwal, short *dwal, flo } } -#endif - static void ProcessStripedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { FDynamicColormap *startcolormap = basecolormap; From 4d0cc9e7bb94681d0520cb0c9158a4f1cade03af Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 07:25:11 +0100 Subject: [PATCH 575/912] Remove old voxel slab drawer --- src/swrenderer/drawers/r_draw.cpp | 31 ------- src/swrenderer/drawers/r_draw.h | 2 - src/swrenderer/drawers/r_draw_pal.cpp | 40 --------- src/swrenderer/drawers/r_draw_pal.h | 18 ---- src/swrenderer/drawers/r_draw_rgba.cpp | 110 ------------------------- src/swrenderer/drawers/r_draw_rgba.h | 20 ----- 6 files changed, 221 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index b49427d982..181c4602f9 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -914,37 +914,6 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(y, x1, x2); } - namespace - { - ShadeConstants slab_rgba_shade_constants; - const uint8_t *slab_rgba_colormap; - fixed_t slab_rgba_light; - } - - void R_SetupDrawSlab(FSWColormap *base_colormap, float light, int shade) - { - slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; - slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; - slab_rgba_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; - slab_rgba_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; - slab_rgba_shade_constants.fade_red = base_colormap->Fade.r; - slab_rgba_shade_constants.fade_green = base_colormap->Fade.g; - slab_rgba_shade_constants.fade_blue = base_colormap->Fade.b; - slab_rgba_shade_constants.fade_alpha = base_colormap->Fade.a; - slab_rgba_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; - slab_rgba_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); - slab_rgba_colormap = base_colormap->Maps; - slab_rgba_light = LIGHTSCALE(light, shade); - } - - void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); - else - DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_colormap); - } - void R_DrawFogBoundarySection(int y, int y2, int x1) { for (; y < y2; ++y) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 580bcd2f9a..b168830223 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -160,8 +160,6 @@ namespace swrenderer void R_FillSpan(); void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); void R_DrawColoredSpan(int y, int x1, int x2); - void R_SetupDrawSlab(FSWColormap *base_colormap, float light, int shade); - void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p); void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); void R_FillSpan(); diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index dd1fdcc528..94250ccd55 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -2713,46 +2713,6 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawSlabPalCommand::DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap) - : _dx(dx), _v(v), _dy(dy), _vi(vi), _vvptr(vptr), _p(p), _colormap(colormap) - { - using namespace drawerargs; - _pitch = dc_pitch; - _start_y = static_cast((p - dc_destorg) / dc_pitch); - } - - void DrawSlabPalCommand::Execute(DrawerThread *thread) - { - int count = _dy; - uint8_t *dest = _p; - int pitch = _pitch; - int width = _dx; - const uint8_t *colormap = _colormap; - const uint8_t *source = _vvptr; - fixed_t fracpos = _v; - fixed_t iscale = _vi; - - count = thread->count_for_thread(_start_y, count); - dest = thread->dest_for_thread(_start_y, pitch, dest); - fracpos += iscale * thread->skipped_by_thread(_start_y); - iscale *= thread->num_cores; - pitch *= thread->num_cores; - - while (count > 0) - { - uint8_t color = colormap[source[fracpos >> FRACBITS]]; - - for (int x = 0; x < width; x++) - dest[x] = color; - - dest += pitch; - fracpos += iscale; - count--; - } - } - - ///////////////////////////////////////////////////////////////////////// - DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(int y, int x1, int x2) : y(y), x1(x1), x2(x2) { using namespace drawerargs; diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index 539cf430d0..0d821b87c6 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -204,24 +204,6 @@ namespace swrenderer uint8_t *destorg; }; - class DrawSlabPalCommand : public PalSpanCommand - { - public: - DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap); - void Execute(DrawerThread *thread) override; - - private: - int _dx; - fixed_t _v; - int _dy; - fixed_t _vi; - const uint8_t *_vvptr; - uint8_t *_p; - const uint8_t *_colormap; - int _pitch; - int _start_y; - }; - class DrawFogBoundaryLinePalCommand : public PalSpanCommand { public: diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 5cdaea108b..4203bc8a98 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -489,116 +489,6 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - DrawSlabRGBACommand::DrawSlabRGBACommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, ShadeConstants shade_constants, const uint8_t *colormap, fixed_t light) - { - using namespace drawerargs; - - _dx = dx; - _v = v; - _dy = dy; - _vi = vi; - _voxelptr = vptr; - _p = (uint32_t *)p; - _shade_constants = shade_constants; - _colormap = colormap; - _light = light; - _pitch = dc_pitch; - _start_y = static_cast((p - dc_destorg) / (dc_pitch * 4)); - assert(dx > 0); - } - - void DrawSlabRGBACommand::Execute(DrawerThread *thread) - { - int dx = _dx; - fixed_t v = _v; - int dy = _dy; - fixed_t vi = _vi; - const uint8_t *vptr = _voxelptr; - uint32_t *p = _p; - ShadeConstants shade_constants = _shade_constants; - const uint8_t *colormap = _colormap; - uint32_t light = LightBgra::calc_light_multiplier(_light); - int pitch = _pitch; - int x; - - dy = thread->count_for_thread(_start_y, dy); - p = thread->dest_for_thread(_start_y, pitch, p); - v += vi * thread->skipped_by_thread(_start_y); - vi *= thread->num_cores; - pitch *= thread->num_cores; - - if (dx == 1) - { - while (dy > 0) - { - *p = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); - p += pitch; - v += vi; - dy--; - } - } - else if (dx == 2) - { - while (dy > 0) - { - uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); - p[0] = color; - p[1] = color; - p += pitch; - v += vi; - dy--; - } - } - else if (dx == 3) - { - while (dy > 0) - { - uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); - p[0] = color; - p[1] = color; - p[2] = color; - p += pitch; - v += vi; - dy--; - } - } - else if (dx == 4) - { - while (dy > 0) - { - uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); - p[0] = color; - p[1] = color; - p[2] = color; - p[3] = color; - p += pitch; - v += vi; - dy--; - } - } - else while (dy > 0) - { - uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); - // The optimizer will probably turn this into a memset call. - // Since dx is not likely to be large, I'm not sure that's a good thing, - // hence the alternatives above. - for (x = 0; x < dx; x++) - { - p[x] = color; - } - p += pitch; - v += vi; - dy--; - } - } - - FString DrawSlabRGBACommand::DebugInfo() - { - return "DrawSlab"; - } - - ///////////////////////////////////////////////////////////////////////////// - DrawFogBoundaryLineRGBACommand::DrawFogBoundaryLineRGBACommand(int y, int x, int x2) { using namespace drawerargs; diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index cd3bb7c1d2..bee96fd2fa 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -211,26 +211,6 @@ namespace swrenderer FString DebugInfo() override; }; - class DrawSlabRGBACommand : public DrawerCommand - { - int _dx; - fixed_t _v; - int _dy; - fixed_t _vi; - const uint8_t *_voxelptr; - uint32_t *_p; - ShadeConstants _shade_constants; - const uint8_t *_colormap; - fixed_t _light; - int _pitch; - int _start_y; - - public: - DrawSlabRGBACommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, ShadeConstants shade_constants, const uint8_t *colormap, fixed_t light); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - }; - class DrawFogBoundaryLineRGBACommand : public DrawerCommand { int _y; From 5a7765910f08e5446c10560ab18b435aadbc56a1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 07:30:29 +0100 Subject: [PATCH 576/912] Merge R_SetupSpanBits with R_SetSpanSource and rename it to R_SetSpanTexture --- src/swrenderer/drawers/r_draw.cpp | 13 ++++--------- src/swrenderer/drawers/r_draw.h | 3 +-- src/swrenderer/scene/r_plane.cpp | 3 +-- src/v_draw.cpp | 3 +-- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 181c4602f9..c609e7c43c 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -601,7 +601,7 @@ namespace swrenderer } } - void R_SetupSpanBits(FTexture *tex) + void R_SetSpanTexture(FTexture *tex) { using namespace drawerargs; @@ -616,6 +616,9 @@ namespace swrenderer { ds_ybits--; } + + ds_source = r_swtruecolor ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); + ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; } void R_SetSpanColormap(FDynamicColormap *colormap, int shade) @@ -623,14 +626,6 @@ namespace swrenderer R_SetDSColorMapLight(colormap, 0, shade); } - void R_SetSpanSource(FTexture *tex) - { - using namespace drawerargs; - - ds_source = r_swtruecolor ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); - ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; - } - ///////////////////////////////////////////////////////////////////////// void R_DrawWallColumn() diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index b168830223..cf8fb2c357 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -178,9 +178,8 @@ namespace swrenderer void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); - void R_SetupSpanBits(FTexture *tex); + void R_SetSpanTexture(FTexture *tex); void R_SetSpanColormap(FDynamicColormap *colormap, int shade); - void R_SetSpanSource(FTexture *tex); void R_MapTiltedPlane(int y, int x1); void R_MapColoredPlane(int y, int x1); diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index 8f9661fbdb..d7ecda8fed 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -1179,10 +1179,9 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske { // Don't waste time on a masked texture if it isn't really masked. masked = false; } - R_SetupSpanBits(tex); + R_SetSpanTexture(tex); double xscale = pl->xform.xScale * tex->Scale.X; double yscale = pl->xform.yScale * tex->Scale.Y; - R_SetSpanSource(tex); basecolormap = pl->colormap; planeshade = LIGHT2SHADE(pl->lightlevel); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 1af386d603..31081a2106 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1379,12 +1379,11 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, sinrot = sin(rotation.Radians()); // Setup constant texture mapping parameters. - R_SetupSpanBits(tex); + R_SetSpanTexture(tex); if (colormap) R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else R_SetSpanColormap(&identitycolormap, 0); - R_SetSpanSource(tex); if (ds_xbits != 0) { scalex = double(1u << (32 - ds_xbits)) / scalex; From f1cd91922b3d37c60fda011e5999fade5da1a196 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 07:32:21 +0100 Subject: [PATCH 577/912] Move to correct header --- src/swrenderer/drawers/r_draw.h | 3 --- src/swrenderer/scene/r_plane.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index cf8fb2c357..468ad6e2f2 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -180,7 +180,4 @@ namespace swrenderer void R_SetSpanTexture(FTexture *tex); void R_SetSpanColormap(FDynamicColormap *colormap, int shade); - - void R_MapTiltedPlane(int y, int x1); - void R_MapColoredPlane(int y, int x1); } diff --git a/src/swrenderer/scene/r_plane.h b/src/swrenderer/scene/r_plane.h index 740020209c..e6f2dc2eba 100644 --- a/src/swrenderer/scene/r_plane.h +++ b/src/swrenderer/scene/r_plane.h @@ -107,6 +107,9 @@ void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t al void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); +void R_MapTiltedPlane(int y, int x1); +void R_MapColoredPlane(int y, int x1); + visplane_t *R_FindPlane ( const secplane_t &height, FTextureID picnum, From 325fa20a0248ede0c7179d9daa715f1012f14420 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 07:40:35 +0100 Subject: [PATCH 578/912] Add R_DrawFogBoundaryLine and move R_DrawFogBoundary to a source file closer to its correct location --- src/swrenderer/drawers/r_draw.cpp | 121 ++---------------------------- src/swrenderer/drawers/r_draw.h | 2 +- src/swrenderer/scene/r_plane.cpp | 106 ++++++++++++++++++++++++++ src/swrenderer/scene/r_plane.h | 2 + 4 files changed, 114 insertions(+), 117 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index c609e7c43c..56a99754c6 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -57,12 +57,6 @@ CVAR(Bool, r_dynlights, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); namespace swrenderer { - // Needed by R_DrawFogBoundary (which probably shouldn't be part of this file) - extern "C" short spanend[MAXHEIGHT]; - extern float rw_light; - extern float rw_lightstep; - extern int wallshade; - double dc_texturemid; FLightNode *dc_light_list; visplane_light *ds_light_list; @@ -909,116 +903,11 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(y, x1, x2); } - void R_DrawFogBoundarySection(int y, int y2, int x1) + void R_DrawFogBoundaryLine(int y, int x1, int x2) { - for (; y < y2; ++y) - { - int x2 = spanend[y]; - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(y, x1, x2); - else - DrawerCommandQueue::QueueCommand(y, x1, x2); - } + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(y, x1, x2); + else + DrawerCommandQueue::QueueCommand(y, x1, x2); } - - void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) - { - // This is essentially the same as R_MapVisPlane but with an extra step - // to create new horizontal spans whenever the light changes enough that - // we need to use a new colormap. - - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2 - x1 - 1); - int x = x2 - 1; - int t2 = uclip[x]; - int b2 = dclip[x]; - int rcolormap = GETPALOOKUP(light, wallshade); - int lcolormap; - uint8_t *basecolormapdata = basecolormap->Maps; - - if (b2 > t2) - { - fillshort(spanend + t2, b2 - t2, x); - } - - R_SetColorMapLight(basecolormap, (float)light, wallshade); - - uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - for (--x; x >= x1; --x) - { - int t1 = uclip[x]; - int b1 = dclip[x]; - const int xr = x + 1; - int stop; - - light -= rw_lightstep; - lcolormap = GETPALOOKUP(light, wallshade); - if (lcolormap != rcolormap) - { - if (t2 < b2 && rcolormap != 0) - { // Colormap 0 is always the identity map, so rendering it is - // just a waste of time. - R_DrawFogBoundarySection(t2, b2, xr); - } - if (t1 < t2) t2 = t1; - if (b1 > b2) b2 = b1; - if (t2 < b2) - { - fillshort(spanend + t2, b2 - t2, x); - } - rcolormap = lcolormap; - R_SetColorMapLight(basecolormap, (float)light, wallshade); - fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - } - else - { - if (fake_dc_colormap != basecolormapdata) - { - stop = MIN(t1, b2); - while (t2 < stop) - { - int y = t2++; - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); - else - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); - } - stop = MAX(b1, t2); - while (b2 > stop) - { - int y = --b2; - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); - else - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); - } - } - else - { - t2 = MAX(t2, MIN(t1, b2)); - b2 = MIN(b2, MAX(b1, t2)); - } - - stop = MIN(t2, b1); - while (t1 < stop) - { - spanend[t1++] = x; - } - stop = MAX(b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; - } - } - - t2 = uclip[x]; - b2 = dclip[x]; - } - if (t2 < b2 && rcolormap != 0) - { - R_DrawFogBoundarySection(t2, b2, x1); - } - } - } diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 468ad6e2f2..94bf7b3281 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -160,7 +160,7 @@ namespace swrenderer void R_FillSpan(); void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); void R_DrawColoredSpan(int y, int x1, int x2); - void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); + void R_DrawFogBoundaryLine(int y, int x1, int x2); void R_FillSpan(); void R_DrawWallColumn(); diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index d7ecda8fed..fecef724e0 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -75,6 +75,8 @@ namespace swrenderer { using namespace drawerargs; +extern int wallshade; + extern subsector_t *InSubsector; static void R_DrawSkyStriped (visplane_t *pl); @@ -341,6 +343,110 @@ void R_MapColoredPlane(int y, int x1) R_DrawColoredSpan(y, x1, spanend[y]); } +void R_DrawFogBoundarySection(int y, int y2, int x1) +{ + for (; y < y2; ++y) + { + R_DrawFogBoundaryLine(y, x1, spanend[y]); + } +} + +//========================================================================== + +void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) +{ + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + uint8_t *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + fillshort(spanend + t2, b2 - t2, x); + } + + R_SetColorMapLight(basecolormap, (float)light, wallshade); + + uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + fillshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + R_SetColorMapLight(basecolormap, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } + else + { + if (fake_dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + R_DrawFogBoundaryLine(y, xr, spanend[y]); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + R_DrawFogBoundaryLine(y, xr, spanend[y]); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection(t2, b2, x1); + } +} + //========================================================================== namespace diff --git a/src/swrenderer/scene/r_plane.h b/src/swrenderer/scene/r_plane.h index e6f2dc2eba..e8fe7882cf 100644 --- a/src/swrenderer/scene/r_plane.h +++ b/src/swrenderer/scene/r_plane.h @@ -110,6 +110,8 @@ void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); void R_MapTiltedPlane(int y, int x1); void R_MapColoredPlane(int y, int x1); +void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); + visplane_t *R_FindPlane ( const secplane_t &height, FTextureID picnum, From 4554cf45f1aa4477856492eaacc61b3d7caa4e23 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 07:41:30 +0100 Subject: [PATCH 579/912] Remove duplicate prototype entry --- src/swrenderer/drawers/r_draw.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 94bf7b3281..c2d09cebd5 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -161,7 +161,6 @@ namespace swrenderer void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); void R_DrawColoredSpan(int y, int x1, int x2); void R_DrawFogBoundaryLine(int y, int x1, int x2); - void R_FillSpan(); void R_DrawWallColumn(); void R_DrawWallMaskedColumn(); From 398001b6cb350a9881bb9860366b858158ae5af2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 07:45:10 +0100 Subject: [PATCH 580/912] Move code into R_UpdateFuzzPos --- src/swrenderer/drawers/r_draw.cpp | 17 +++++++++++------ src/swrenderer/drawers/r_draw.h | 4 +++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 56a99754c6..1519c465db 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -736,21 +736,26 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawFuzzColumn() + void R_UpdateFuzzPos() { using namespace drawerargs; - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - dc_yl = MAX(dc_yl, 1); dc_yh = MIN(dc_yh, fuzzviewheight); if (dc_yl <= dc_yh) fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; } + void R_DrawFuzzColumn() + { + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + + R_UpdateFuzzPos(); + } + void R_DrawAddColumn() { if (r_swtruecolor) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index c2d09cebd5..d24551f7a8 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -133,7 +133,9 @@ namespace swrenderer bool R_GetTransMaskDrawers(void(**drawColumn)()); const uint8_t *R_GetColumn(FTexture *tex, int col); - + + void R_UpdateFuzzPos(); + void R_DrawColumn(); void R_DrawFuzzColumn(); void R_DrawTranslatedColumn(); From d5032663ad016d5c11ee4fa434bbd63097c44816 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 27 Dec 2016 00:32:54 +0100 Subject: [PATCH 581/912] - fixed: All missiles which can bounce off actors need the CANPASS/PASSMOBJ flag so setting this in the 'bouncetype' property is insufficient, it needs to be done when the actor has been completely parsed. --- src/scripting/thingdef.cpp | 9 +++++++++ src/scripting/thingdef_properties.cpp | 5 ----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/scripting/thingdef.cpp b/src/scripting/thingdef.cpp index 355cc87f31..ef26d8a503 100644 --- a/src/scripting/thingdef.cpp +++ b/src/scripting/thingdef.cpp @@ -425,6 +425,15 @@ void LoadActors() CheckForUnsafeStates(ti); } + // ensure that all actor bouncers have PASSMOBJ set. + auto defaults = GetDefaultByType(ti); + if (defaults->BounceFlags & (BOUNCE_Actors | BOUNCE_AllActors)) + { + // PASSMOBJ is irrelevant for normal missiles, but not for bouncers. + defaults->flags2 |= MF2_PASSMOBJ; + } + + } if (FScriptPosition::ErrorCounter > 0) { diff --git a/src/scripting/thingdef_properties.cpp b/src/scripting/thingdef_properties.cpp index 27b7ab2d33..e54a00963f 100644 --- a/src/scripting/thingdef_properties.cpp +++ b/src/scripting/thingdef_properties.cpp @@ -1216,11 +1216,6 @@ DEFINE_PROPERTY(bouncetype, S, Actor) } defaults->BounceFlags &= ~(BOUNCE_TypeMask | BOUNCE_UseSeeSound); defaults->BounceFlags |= flags[match]; - if (defaults->BounceFlags & (BOUNCE_Actors | BOUNCE_AllActors)) - { - // PASSMOBJ is irrelevant for normal missiles, but not for bouncers. - defaults->flags2 |= MF2_PASSMOBJ; - } } //========================================================================== From 272b302294cb635b2f42d70401d67185bce1fc16 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 08:47:51 +0100 Subject: [PATCH 582/912] Create SWPixelFormatDrawers for pal and bgra drawers --- src/swrenderer/drawers/r_draw.cpp | 303 ++------------------------- src/swrenderer/drawers/r_draw.h | 120 +++++++---- src/swrenderer/drawers/r_draw_pal.h | 53 +++++ src/swrenderer/drawers/r_draw_rgba.h | 47 +++++ 4 files changed, 199 insertions(+), 324 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 1519c465db..1e7b176c14 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -135,8 +135,25 @@ namespace swrenderer unsigned int *horizspan[4]; } + namespace + { + SWPixelFormatDrawers *active_drawers; + SWPalDrawers pal_drawers; + SWTruecolorDrawers tc_drawers; + } + + SWPixelFormatDrawers *R_ActiveDrawers() + { + return active_drawers; + } + void R_InitColumnDrawers() { + if (r_swtruecolor) + active_drawers = &tc_drawers; + else + active_drawers = &pal_drawers; + colfunc = basecolfunc = R_DrawColumn; fuzzcolfunc = R_DrawFuzzColumn; transcolfunc = R_DrawTranslatedColumn; @@ -620,122 +637,6 @@ namespace swrenderer R_SetDSColorMapLight(colormap, 0, shade); } - ///////////////////////////////////////////////////////////////////////// - - void R_DrawWallColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallMaskedColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallAddColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else if (drawerargs::dc_num_lights == 0) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallAddClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallSubClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallRevSubClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - else - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - } - - void R_DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - else - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - } - - void R_DrawColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_FillColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_FillAddColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_FillAddClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_FillSubClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_FillRevSubClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - void R_UpdateFuzzPos() { using namespace drawerargs; @@ -745,174 +646,4 @@ namespace swrenderer if (dc_yl <= dc_yh) fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; } - - void R_DrawFuzzColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - - R_UpdateFuzzPos(); - } - - void R_DrawAddColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawTranslatedColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawTlatedAddColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawShadedColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawAddClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawAddClampTranslatedColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSubClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSubClampTranslatedColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawRevSubClampColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawRevSubClampTranslatedColumn() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSpan() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSpanMasked() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSpanTranslucent() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSpanMaskedTranslucent() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSpanAddClamp() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSpanMaskedAddClamp() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_FillSpan() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); - else - DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); - } - - void R_DrawColoredSpan(int y, int x1, int x2) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(y, x1, x2); - else - DrawerCommandQueue::QueueCommand(y, x1, x2); - } - - void R_DrawFogBoundaryLine(int y, int x1, int x2) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(y, x1, x2); - else - DrawerCommandQueue::QueueCommand(y, x1, x2); - } } diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index d24551f7a8..01d670841a 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -123,6 +123,49 @@ namespace swrenderer extern bool r_swtruecolor; + class SWPixelFormatDrawers + { + public: + virtual ~SWPixelFormatDrawers() { } + virtual void DrawWallColumn() = 0; + virtual void DrawWallMaskedColumn() = 0; + virtual void DrawWallAddColumn() = 0; + virtual void DrawWallAddClampColumn() = 0; + virtual void DrawWallSubClampColumn() = 0; + virtual void DrawWallRevSubClampColumn() = 0; + virtual void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) = 0; + virtual void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) = 0; + virtual void DrawColumn() = 0; + virtual void FillColumn() = 0; + virtual void FillAddColumn() = 0; + virtual void FillAddClampColumn() = 0; + virtual void FillSubClampColumn() = 0; + virtual void FillRevSubClampColumn() = 0; + virtual void DrawFuzzColumn() = 0; + virtual void DrawAddColumn() = 0; + virtual void DrawTranslatedColumn() = 0; + virtual void DrawTranslatedAddColumn() = 0; + virtual void DrawShadedColumn() = 0; + virtual void DrawAddClampColumn() = 0; + virtual void DrawAddClampTranslatedColumn() = 0; + virtual void DrawSubClampColumn() = 0; + virtual void DrawSubClampTranslatedColumn() = 0; + virtual void DrawRevSubClampColumn() = 0; + virtual void DrawRevSubClampTranslatedColumn() = 0; + virtual void DrawSpan() = 0; + virtual void DrawSpanMasked() = 0; + virtual void DrawSpanTranslucent() = 0; + virtual void DrawSpanMaskedTranslucent() = 0; + virtual void DrawSpanAddClamp() = 0; + virtual void DrawSpanMaskedAddClamp() = 0; + virtual void FillSpan() = 0; + virtual void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) = 0; + virtual void DrawColoredSpan(int y, int x1, int x2) = 0; + virtual void DrawFogBoundaryLine(int y, int x1, int x2) = 0; + }; + + SWPixelFormatDrawers *R_ActiveDrawers(); + void R_InitColumnDrawers(); void R_InitShadeMaps(); void R_InitFuzzTable(int fuzzoff); @@ -136,44 +179,6 @@ namespace swrenderer void R_UpdateFuzzPos(); - void R_DrawColumn(); - void R_DrawFuzzColumn(); - void R_DrawTranslatedColumn(); - void R_DrawShadedColumn(); - void R_FillColumn(); - void R_FillAddColumn(); - void R_FillAddClampColumn(); - void R_FillSubClampColumn(); - void R_FillRevSubClampColumn(); - void R_DrawAddColumn(); - void R_DrawTlatedAddColumn(); - void R_DrawAddClampColumn(); - void R_DrawAddClampTranslatedColumn(); - void R_DrawSubClampColumn(); - void R_DrawSubClampTranslatedColumn(); - void R_DrawRevSubClampColumn(); - void R_DrawRevSubClampTranslatedColumn(); - void R_DrawSpan(); - void R_DrawSpanMasked(); - void R_DrawSpanTranslucent(); - void R_DrawSpanMaskedTranslucent(); - void R_DrawSpanAddClamp(); - void R_DrawSpanMaskedAddClamp(); - void R_FillSpan(); - void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); - void R_DrawColoredSpan(int y, int x1, int x2); - void R_DrawFogBoundaryLine(int y, int x1, int x2); - - void R_DrawWallColumn(); - void R_DrawWallMaskedColumn(); - void R_DrawWallAddColumn(); - void R_DrawWallAddClampColumn(); - void R_DrawWallSubClampColumn(); - void R_DrawWallRevSubClampColumn(); - - void R_DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom); - void R_DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom); - // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade); void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); @@ -181,4 +186,43 @@ namespace swrenderer void R_SetSpanTexture(FTexture *tex); void R_SetSpanColormap(FDynamicColormap *colormap, int shade); + + inline void R_DrawWallColumn() { R_ActiveDrawers()->DrawWallColumn(); } + inline void R_DrawWallMaskedColumn() { R_ActiveDrawers()->DrawWallMaskedColumn(); } + inline void R_DrawWallAddColumn() { R_ActiveDrawers()->DrawWallAddColumn(); } + inline void R_DrawWallAddClampColumn() { R_ActiveDrawers()->DrawWallAddClampColumn(); } + inline void R_DrawWallSubClampColumn() { R_ActiveDrawers()->DrawWallSubClampColumn(); } + inline void R_DrawWallRevSubClampColumn() { R_ActiveDrawers()->DrawWallRevSubClampColumn(); } + inline void R_DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) { R_ActiveDrawers()->DrawSingleSkyColumn(solid_top, solid_bottom); } + inline void R_DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) { R_ActiveDrawers()->DrawDoubleSkyColumn(solid_top, solid_bottom); } + inline void R_DrawColumn() { R_ActiveDrawers()->DrawColumn(); } + inline void R_FillColumn() { R_ActiveDrawers()->FillColumn(); } + inline void R_FillAddColumn() { R_ActiveDrawers()->FillAddColumn(); } + inline void R_FillAddClampColumn() { R_ActiveDrawers()->FillAddClampColumn(); } + inline void R_FillSubClampColumn() { R_ActiveDrawers()->FillSubClampColumn(); } + inline void R_FillRevSubClampColumn() { R_ActiveDrawers()->FillRevSubClampColumn(); } + inline void R_DrawFuzzColumn() { R_ActiveDrawers()->DrawFuzzColumn(); } + inline void R_DrawAddColumn() { R_ActiveDrawers()->DrawAddColumn(); } + inline void R_DrawTranslatedColumn() { R_ActiveDrawers()->DrawTranslatedColumn(); } + inline void R_DrawTlatedAddColumn() { R_ActiveDrawers()->DrawTranslatedAddColumn(); } + inline void R_DrawShadedColumn() { R_ActiveDrawers()->DrawShadedColumn(); } + inline void R_DrawAddClampColumn() { R_ActiveDrawers()->DrawAddClampColumn(); } + inline void R_DrawAddClampTranslatedColumn() { R_ActiveDrawers()->DrawAddClampTranslatedColumn(); } + inline void R_DrawSubClampColumn() { R_ActiveDrawers()->DrawSubClampColumn(); } + inline void R_DrawSubClampTranslatedColumn() { R_ActiveDrawers()->DrawSubClampTranslatedColumn(); } + inline void R_DrawRevSubClampColumn() { R_ActiveDrawers()->DrawRevSubClampColumn(); } + inline void R_DrawRevSubClampTranslatedColumn() { R_ActiveDrawers()->DrawRevSubClampTranslatedColumn(); } + inline void R_DrawSpan() { R_ActiveDrawers()->DrawSpan(); } + inline void R_DrawSpanMasked() { R_ActiveDrawers()->DrawSpanMasked(); } + inline void R_DrawSpanTranslucent() { R_ActiveDrawers()->DrawSpanTranslucent(); } + inline void R_DrawSpanMaskedTranslucent() { R_ActiveDrawers()->DrawSpanMaskedTranslucent(); } + inline void R_DrawSpanAddClamp() { R_ActiveDrawers()->DrawSpanAddClamp(); } + inline void R_DrawSpanMaskedAddClamp() { R_ActiveDrawers()->DrawSpanMaskedAddClamp(); } + inline void R_FillSpan() { R_ActiveDrawers()->FillSpan(); } + inline void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + { + R_ActiveDrawers()->DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + } + inline void R_DrawColoredSpan(int y, int x1, int x2) { R_ActiveDrawers()->DrawColoredSpan(y, x1, x2); } + inline void R_DrawFogBoundaryLine(int y, int x1, int x2) { R_ActiveDrawers()->DrawFogBoundaryLine(y, x1, x2); } } diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index 0d821b87c6..06cbe1ea64 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -231,4 +231,57 @@ namespace swrenderer uint32_t _alpha; uint32_t _fracposx; }; + + class SWPalDrawers : public SWPixelFormatDrawers + { + public: + void DrawWallColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawWallMaskedColumn() override { DrawerCommandQueue::QueueCommand(); } + + void DrawWallAddColumn() override + { + if (drawerargs::dc_num_lights == 0) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); + } + + void DrawWallAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawWallSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawWallRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } + void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } + void DrawColumn() override { DrawerCommandQueue::QueueCommand(); } + void FillColumn() override { DrawerCommandQueue::QueueCommand(); } + void FillAddColumn() override { DrawerCommandQueue::QueueCommand(); } + void FillAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void FillSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void FillRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawFuzzColumn() override { DrawerCommandQueue::QueueCommand(); R_UpdateFuzzPos(); } + void DrawAddColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawTranslatedAddColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawShadedColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawAddClampTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawSubClampTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawRevSubClampTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawSpan() override { DrawerCommandQueue::QueueCommand(); } + void DrawSpanMasked() override { DrawerCommandQueue::QueueCommand(); } + void DrawSpanTranslucent() override { DrawerCommandQueue::QueueCommand(); } + void DrawSpanMaskedTranslucent() override { DrawerCommandQueue::QueueCommand(); } + void DrawSpanAddClamp() override { DrawerCommandQueue::QueueCommand(); } + void DrawSpanMaskedAddClamp() override { DrawerCommandQueue::QueueCommand(); } + void FillSpan() override { DrawerCommandQueue::QueueCommand(); } + + void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) override + { + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + } + + void DrawColoredSpan(int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(y, x1, x2); } + void DrawFogBoundaryLine(int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(y, x1, x2); } + }; } diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index bee96fd2fa..0e43abbb51 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -338,6 +338,53 @@ namespace swrenderer uint32_t _fracposx; }; + ///////////////////////////////////////////////////////////////////////////// + + class SWTruecolorDrawers : public SWPixelFormatDrawers + { + public: + void DrawWallColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawWallMaskedColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawWallAddColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawWallAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawWallSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawWallRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } + void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } + void DrawColumn() override { DrawerCommandQueue::QueueCommand(); } + void FillColumn() override { DrawerCommandQueue::QueueCommand(); } + void FillAddColumn() override { DrawerCommandQueue::QueueCommand(); } + void FillAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void FillSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void FillRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawFuzzColumn() override { DrawerCommandQueue::QueueCommand(); R_UpdateFuzzPos(); } + void DrawAddColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawTranslatedAddColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawShadedColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawAddClampTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawSubClampTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawRevSubClampTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawSpan() override { DrawerCommandQueue::QueueCommand(); } + void DrawSpanMasked() override { DrawerCommandQueue::QueueCommand(); } + void DrawSpanTranslucent() override { DrawerCommandQueue::QueueCommand(); } + void DrawSpanMaskedTranslucent() override { DrawerCommandQueue::QueueCommand(); } + void DrawSpanAddClamp() override { DrawerCommandQueue::QueueCommand(); } + void DrawSpanMaskedAddClamp() override { DrawerCommandQueue::QueueCommand(); } + void FillSpan() override { DrawerCommandQueue::QueueCommand(); } + + void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) override + { + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + } + + void DrawColoredSpan(int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(y, x1, x2); } + void DrawFogBoundaryLine(int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(y, x1, x2); } + }; + ///////////////////////////////////////////////////////////////////////////// // Pixel shading inline functions: From a94fee0e5d8166047467bfaca701eabc1d64ff12 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 08:54:37 +0100 Subject: [PATCH 583/912] Rename R_ActiveDrawers to R_Drawers --- src/swrenderer/drawers/r_draw.cpp | 2 +- src/swrenderer/drawers/r_draw.h | 72 +++++++++++++++---------------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 1e7b176c14..dbce7ec487 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -142,7 +142,7 @@ namespace swrenderer SWTruecolorDrawers tc_drawers; } - SWPixelFormatDrawers *R_ActiveDrawers() + SWPixelFormatDrawers *R_Drawers() { return active_drawers; } diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 01d670841a..bf2ef1a0b0 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -164,7 +164,7 @@ namespace swrenderer virtual void DrawFogBoundaryLine(int y, int x1, int x2) = 0; }; - SWPixelFormatDrawers *R_ActiveDrawers(); + SWPixelFormatDrawers *R_Drawers(); void R_InitColumnDrawers(); void R_InitShadeMaps(); @@ -187,42 +187,42 @@ namespace swrenderer void R_SetSpanTexture(FTexture *tex); void R_SetSpanColormap(FDynamicColormap *colormap, int shade); - inline void R_DrawWallColumn() { R_ActiveDrawers()->DrawWallColumn(); } - inline void R_DrawWallMaskedColumn() { R_ActiveDrawers()->DrawWallMaskedColumn(); } - inline void R_DrawWallAddColumn() { R_ActiveDrawers()->DrawWallAddColumn(); } - inline void R_DrawWallAddClampColumn() { R_ActiveDrawers()->DrawWallAddClampColumn(); } - inline void R_DrawWallSubClampColumn() { R_ActiveDrawers()->DrawWallSubClampColumn(); } - inline void R_DrawWallRevSubClampColumn() { R_ActiveDrawers()->DrawWallRevSubClampColumn(); } - inline void R_DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) { R_ActiveDrawers()->DrawSingleSkyColumn(solid_top, solid_bottom); } - inline void R_DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) { R_ActiveDrawers()->DrawDoubleSkyColumn(solid_top, solid_bottom); } - inline void R_DrawColumn() { R_ActiveDrawers()->DrawColumn(); } - inline void R_FillColumn() { R_ActiveDrawers()->FillColumn(); } - inline void R_FillAddColumn() { R_ActiveDrawers()->FillAddColumn(); } - inline void R_FillAddClampColumn() { R_ActiveDrawers()->FillAddClampColumn(); } - inline void R_FillSubClampColumn() { R_ActiveDrawers()->FillSubClampColumn(); } - inline void R_FillRevSubClampColumn() { R_ActiveDrawers()->FillRevSubClampColumn(); } - inline void R_DrawFuzzColumn() { R_ActiveDrawers()->DrawFuzzColumn(); } - inline void R_DrawAddColumn() { R_ActiveDrawers()->DrawAddColumn(); } - inline void R_DrawTranslatedColumn() { R_ActiveDrawers()->DrawTranslatedColumn(); } - inline void R_DrawTlatedAddColumn() { R_ActiveDrawers()->DrawTranslatedAddColumn(); } - inline void R_DrawShadedColumn() { R_ActiveDrawers()->DrawShadedColumn(); } - inline void R_DrawAddClampColumn() { R_ActiveDrawers()->DrawAddClampColumn(); } - inline void R_DrawAddClampTranslatedColumn() { R_ActiveDrawers()->DrawAddClampTranslatedColumn(); } - inline void R_DrawSubClampColumn() { R_ActiveDrawers()->DrawSubClampColumn(); } - inline void R_DrawSubClampTranslatedColumn() { R_ActiveDrawers()->DrawSubClampTranslatedColumn(); } - inline void R_DrawRevSubClampColumn() { R_ActiveDrawers()->DrawRevSubClampColumn(); } - inline void R_DrawRevSubClampTranslatedColumn() { R_ActiveDrawers()->DrawRevSubClampTranslatedColumn(); } - inline void R_DrawSpan() { R_ActiveDrawers()->DrawSpan(); } - inline void R_DrawSpanMasked() { R_ActiveDrawers()->DrawSpanMasked(); } - inline void R_DrawSpanTranslucent() { R_ActiveDrawers()->DrawSpanTranslucent(); } - inline void R_DrawSpanMaskedTranslucent() { R_ActiveDrawers()->DrawSpanMaskedTranslucent(); } - inline void R_DrawSpanAddClamp() { R_ActiveDrawers()->DrawSpanAddClamp(); } - inline void R_DrawSpanMaskedAddClamp() { R_ActiveDrawers()->DrawSpanMaskedAddClamp(); } - inline void R_FillSpan() { R_ActiveDrawers()->FillSpan(); } + inline void R_DrawWallColumn() { R_Drawers()->DrawWallColumn(); } + inline void R_DrawWallMaskedColumn() { R_Drawers()->DrawWallMaskedColumn(); } + inline void R_DrawWallAddColumn() { R_Drawers()->DrawWallAddColumn(); } + inline void R_DrawWallAddClampColumn() { R_Drawers()->DrawWallAddClampColumn(); } + inline void R_DrawWallSubClampColumn() { R_Drawers()->DrawWallSubClampColumn(); } + inline void R_DrawWallRevSubClampColumn() { R_Drawers()->DrawWallRevSubClampColumn(); } + inline void R_DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) { R_Drawers()->DrawSingleSkyColumn(solid_top, solid_bottom); } + inline void R_DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) { R_Drawers()->DrawDoubleSkyColumn(solid_top, solid_bottom); } + inline void R_DrawColumn() { R_Drawers()->DrawColumn(); } + inline void R_FillColumn() { R_Drawers()->FillColumn(); } + inline void R_FillAddColumn() { R_Drawers()->FillAddColumn(); } + inline void R_FillAddClampColumn() { R_Drawers()->FillAddClampColumn(); } + inline void R_FillSubClampColumn() { R_Drawers()->FillSubClampColumn(); } + inline void R_FillRevSubClampColumn() { R_Drawers()->FillRevSubClampColumn(); } + inline void R_DrawFuzzColumn() { R_Drawers()->DrawFuzzColumn(); } + inline void R_DrawAddColumn() { R_Drawers()->DrawAddColumn(); } + inline void R_DrawTranslatedColumn() { R_Drawers()->DrawTranslatedColumn(); } + inline void R_DrawTlatedAddColumn() { R_Drawers()->DrawTranslatedAddColumn(); } + inline void R_DrawShadedColumn() { R_Drawers()->DrawShadedColumn(); } + inline void R_DrawAddClampColumn() { R_Drawers()->DrawAddClampColumn(); } + inline void R_DrawAddClampTranslatedColumn() { R_Drawers()->DrawAddClampTranslatedColumn(); } + inline void R_DrawSubClampColumn() { R_Drawers()->DrawSubClampColumn(); } + inline void R_DrawSubClampTranslatedColumn() { R_Drawers()->DrawSubClampTranslatedColumn(); } + inline void R_DrawRevSubClampColumn() { R_Drawers()->DrawRevSubClampColumn(); } + inline void R_DrawRevSubClampTranslatedColumn() { R_Drawers()->DrawRevSubClampTranslatedColumn(); } + inline void R_DrawSpan() { R_Drawers()->DrawSpan(); } + inline void R_DrawSpanMasked() { R_Drawers()->DrawSpanMasked(); } + inline void R_DrawSpanTranslucent() { R_Drawers()->DrawSpanTranslucent(); } + inline void R_DrawSpanMaskedTranslucent() { R_Drawers()->DrawSpanMaskedTranslucent(); } + inline void R_DrawSpanAddClamp() { R_Drawers()->DrawSpanAddClamp(); } + inline void R_DrawSpanMaskedAddClamp() { R_Drawers()->DrawSpanMaskedAddClamp(); } + inline void R_FillSpan() { R_Drawers()->FillSpan(); } inline void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { - R_ActiveDrawers()->DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + R_Drawers()->DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } - inline void R_DrawColoredSpan(int y, int x1, int x2) { R_ActiveDrawers()->DrawColoredSpan(y, x1, x2); } - inline void R_DrawFogBoundaryLine(int y, int x1, int x2) { R_ActiveDrawers()->DrawFogBoundaryLine(y, x1, x2); } + inline void R_DrawColoredSpan(int y, int x1, int x2) { R_Drawers()->DrawColoredSpan(y, x1, x2); } + inline void R_DrawFogBoundaryLine(int y, int x1, int x2) { R_Drawers()->DrawFogBoundaryLine(y, x1, x2); } } From 96228fd645e27edc1dfe106ac4fae550eaee5e7f Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Tue, 27 Dec 2016 12:55:26 +0200 Subject: [PATCH 584/912] Fixed compilation of non-Windows backends --- src/posix/cocoa/i_video.mm | 2 +- src/posix/sdl/hardware.cpp | 2 +- src/posix/sdl/sdlvideo.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 5875e87b8b..f056b38768 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -48,7 +48,7 @@ #include "m_argv.h" #include "m_png.h" #include "r_renderer.h" -#include "r_swrenderer.h" +#include "swrenderer/r_swrenderer.h" #include "st_console.h" #include "stats.h" #include "textures.h" diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index 8fbb7c08ac..d8e714ccc4 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -49,7 +49,7 @@ #include "m_argv.h" #include "sdlglvideo.h" #include "r_renderer.h" -#include "r_swrenderer.h" +#include "swrenderer/r_swrenderer.h" EXTERN_CVAR (Bool, ticker) EXTERN_CVAR (Bool, fullscreen) diff --git a/src/posix/sdl/sdlvideo.cpp b/src/posix/sdl/sdlvideo.cpp index e127f56850..8c5c370753 100644 --- a/src/posix/sdl/sdlvideo.cpp +++ b/src/posix/sdl/sdlvideo.cpp @@ -11,7 +11,7 @@ #include "stats.h" #include "v_palette.h" #include "sdlvideo.h" -#include "r_swrenderer.h" +#include "swrenderer/r_swrenderer.h" #include "version.h" #include From 186c7b881465ee82d4fdace41e98e4d17881c256 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Tue, 27 Dec 2016 12:56:21 +0200 Subject: [PATCH 585/912] Removed deprecated code from macOS SDL backend --- src/posix/sdl/i_main.cpp | 48 ---------------------------------------- 1 file changed, 48 deletions(-) diff --git a/src/posix/sdl/i_main.cpp b/src/posix/sdl/i_main.cpp index cb9ed58722..ca704340df 100644 --- a/src/posix/sdl/i_main.cpp +++ b/src/posix/sdl/i_main.cpp @@ -41,10 +41,6 @@ #include #include #include -#if defined(__MACH__) && !defined(NOASM) -#include -#include -#endif #include "doomerrors.h" #include "m_argv.h" @@ -185,46 +181,6 @@ static int DoomSpecificInfo (char *buffer, char *end) return p; } -#if defined(__MACH__) && !defined(NOASM) -// NASM won't let us create custom sections for Mach-O. Whether that's a limitation of NASM -// or of Mach-O, I don't know, but since we're using NASM for the assembly, it doesn't much -// matter. -extern "C" -{ - extern void *rtext_a_start, *rtext_a_end; - extern void *rtext_tmap_start, *rtext_tmap_end; - extern void *rtext_tmap2_start, *rtext_tmap2_end; - extern void *rtext_tmap3_start, *rtext_tmap3_end; -}; - -static void unprotect_pages(long pagesize, void *start, void *end) -{ - char *page = (char *)((intptr_t)start & ~(pagesize - 1)); - size_t len = (char *)end - (char *)start; - if (mprotect(page, len, PROT_READ|PROT_WRITE|PROT_EXEC) != 0) - { - fprintf(stderr, "mprotect failed\n"); - exit(1); - } -} - -static void unprotect_rtext() -{ - static void *const pages[] = - { - rtext_a_start, rtext_a_end, - rtext_tmap_start, rtext_tmap_end, - rtext_tmap2_start, rtext_tmap2_end, - rtext_tmap3_start, rtext_tmap3_end - }; - long pagesize = sysconf(_SC_PAGESIZE); - for (void *const *p = pages; p < &pages[countof(pages)]; p += 2) - { - unprotect_pages(pagesize, p[0], p[1]); - } -} -#endif - void I_StartupJoysticks(); void I_ShutdownJoysticks(); @@ -243,10 +199,6 @@ int main (int argc, char **argv) seteuid (getuid ()); std::set_new_handler (NewFailure); -#if defined(__MACH__) && !defined(NOASM) - unprotect_rtext(); -#endif - // Set LC_NUMERIC environment variable in case some library decides to // clear the setlocale call at least this will be correct. // Note that the LANG environment variable is overridden by LC_* From f8010854c3a1a8dc32cea01b9e511027f68af65a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 23:53:47 +0100 Subject: [PATCH 586/912] Move colfunc and friends to r_draw and change them to member function pointers --- src/swrenderer/drawers/r_draw.cpp | 66 +++++++++++++++-------------- src/swrenderer/drawers/r_draw.h | 47 ++++---------------- src/swrenderer/r_main.cpp | 12 ++---- src/swrenderer/r_main.h | 12 ------ src/swrenderer/scene/r_plane.cpp | 30 ++++++------- src/swrenderer/scene/r_things.cpp | 4 +- src/swrenderer/scene/r_voxel.cpp | 2 +- src/swrenderer/scene/r_walldraw.cpp | 20 ++++----- src/v_draw.cpp | 2 +- 9 files changed, 73 insertions(+), 122 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index dbce7ec487..201f09abff 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -70,6 +70,12 @@ namespace swrenderer int fuzzpos; int fuzzviewheight; + DrawerFunc colfunc; + DrawerFunc basecolfunc; + DrawerFunc fuzzcolfunc; + DrawerFunc transcolfunc; + DrawerFunc spanfunc; + namespace drawerargs { int dc_pitch; @@ -154,10 +160,10 @@ namespace swrenderer else active_drawers = &pal_drawers; - colfunc = basecolfunc = R_DrawColumn; - fuzzcolfunc = R_DrawFuzzColumn; - transcolfunc = R_DrawTranslatedColumn; - spanfunc = R_DrawSpan; + colfunc = basecolfunc = &SWPixelFormatDrawers::DrawColumn; + fuzzcolfunc = &SWPixelFormatDrawers::DrawFuzzColumn; + transcolfunc = &SWPixelFormatDrawers::DrawTranslatedColumn; + spanfunc = &SWPixelFormatDrawers::DrawSpan; } void R_InitShadeMaps() @@ -240,7 +246,7 @@ namespace swrenderer { if (flags & STYLEF_ColorIsFixed) { - colfunc = R_FillColumn; + colfunc = &SWPixelFormatDrawers::FillColumn; } else if (dc_translation == NULL) { @@ -285,15 +291,15 @@ namespace swrenderer { // Colors won't overflow when added if (flags & STYLEF_ColorIsFixed) { - colfunc = R_FillAddColumn; + colfunc = &SWPixelFormatDrawers::FillAddColumn; } else if (dc_translation == NULL) { - colfunc = R_DrawAddColumn; + colfunc = &SWPixelFormatDrawers::DrawAddColumn; } else { - colfunc = R_DrawTlatedAddColumn; + colfunc = &SWPixelFormatDrawers::DrawTranslatedAddColumn; drawer_needs_pal_input = true; } } @@ -301,15 +307,15 @@ namespace swrenderer { // Colors might overflow when added if (flags & STYLEF_ColorIsFixed) { - colfunc = R_FillAddClampColumn; + colfunc = &SWPixelFormatDrawers::FillAddClampColumn; } else if (dc_translation == NULL) { - colfunc = R_DrawAddClampColumn; + colfunc = &SWPixelFormatDrawers::DrawAddClampColumn; } else { - colfunc = R_DrawAddClampTranslatedColumn; + colfunc = &SWPixelFormatDrawers::DrawAddClampTranslatedColumn; drawer_needs_pal_input = true; } } @@ -318,15 +324,15 @@ namespace swrenderer case STYLEOP_Sub: if (flags & STYLEF_ColorIsFixed) { - colfunc = R_FillSubClampColumn; + colfunc = &SWPixelFormatDrawers::FillSubClampColumn; } else if (dc_translation == NULL) { - colfunc = R_DrawSubClampColumn; + colfunc = &SWPixelFormatDrawers::DrawSubClampColumn; } else { - colfunc = R_DrawSubClampTranslatedColumn; + colfunc = &SWPixelFormatDrawers::DrawSubClampTranslatedColumn; drawer_needs_pal_input = true; } return true; @@ -338,15 +344,15 @@ namespace swrenderer } if (flags & STYLEF_ColorIsFixed) { - colfunc = R_FillRevSubClampColumn; + colfunc = &SWPixelFormatDrawers::FillRevSubClampColumn; } else if (dc_translation == NULL) { - colfunc = R_DrawRevSubClampColumn; + colfunc = &SWPixelFormatDrawers::DrawRevSubClampColumn; } else { - colfunc = R_DrawRevSubClampTranslatedColumn; + colfunc = &SWPixelFormatDrawers::DrawRevSubClampTranslatedColumn; drawer_needs_pal_input = true; } return true; @@ -433,7 +439,7 @@ namespace swrenderer // Shaded drawer only gets 16 levels of alpha because it saves memory. if ((alpha >>= 12) == 0) return false; - colfunc = R_DrawShadedColumn; + colfunc = &SWPixelFormatDrawers::DrawShadedColumn; drawer_needs_pal_input = true; dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; basecolormap = &ShadeFakeColormap[16 - alpha]; @@ -507,29 +513,25 @@ namespace swrenderer return tex->GetColumn(col, nullptr); } - bool R_GetTransMaskDrawers(void(**drawColumn)()) + DrawerFunc R_GetTransMaskDrawer() { - if (colfunc == R_DrawAddColumn) + if (colfunc == &SWPixelFormatDrawers::DrawAddColumn) { - *drawColumn = R_DrawWallAddColumn; - return true; + return &SWPixelFormatDrawers::DrawWallAddColumn; } - if (colfunc == R_DrawAddClampColumn) + if (colfunc == &SWPixelFormatDrawers::DrawAddClampColumn) { - *drawColumn = R_DrawWallAddClampColumn; - return true; + return &SWPixelFormatDrawers::DrawWallAddClampColumn; } - if (colfunc == R_DrawSubClampColumn) + if (colfunc == &SWPixelFormatDrawers::DrawSubClampColumn) { - *drawColumn = R_DrawWallSubClampColumn; - return true; + return &SWPixelFormatDrawers::DrawWallSubClampColumn; } - if (colfunc == R_DrawRevSubClampColumn) + if (colfunc == &SWPixelFormatDrawers::DrawRevSubClampColumn) { - *drawColumn = R_DrawWallRevSubClampColumn; - return true; + return &SWPixelFormatDrawers::DrawWallRevSubClampColumn; } - return false; + return nullptr; } void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index bf2ef1a0b0..496b2ee112 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -164,6 +164,8 @@ namespace swrenderer virtual void DrawFogBoundaryLine(int y, int x1, int x2) = 0; }; + typedef void(SWPixelFormatDrawers::*DrawerFunc)(); + SWPixelFormatDrawers *R_Drawers(); void R_InitColumnDrawers(); @@ -173,7 +175,7 @@ namespace swrenderer bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color); void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade - bool R_GetTransMaskDrawers(void(**drawColumn)()); + DrawerFunc R_GetTransMaskDrawer(); const uint8_t *R_GetColumn(FTexture *tex, int col); @@ -187,42 +189,9 @@ namespace swrenderer void R_SetSpanTexture(FTexture *tex); void R_SetSpanColormap(FDynamicColormap *colormap, int shade); - inline void R_DrawWallColumn() { R_Drawers()->DrawWallColumn(); } - inline void R_DrawWallMaskedColumn() { R_Drawers()->DrawWallMaskedColumn(); } - inline void R_DrawWallAddColumn() { R_Drawers()->DrawWallAddColumn(); } - inline void R_DrawWallAddClampColumn() { R_Drawers()->DrawWallAddClampColumn(); } - inline void R_DrawWallSubClampColumn() { R_Drawers()->DrawWallSubClampColumn(); } - inline void R_DrawWallRevSubClampColumn() { R_Drawers()->DrawWallRevSubClampColumn(); } - inline void R_DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) { R_Drawers()->DrawSingleSkyColumn(solid_top, solid_bottom); } - inline void R_DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) { R_Drawers()->DrawDoubleSkyColumn(solid_top, solid_bottom); } - inline void R_DrawColumn() { R_Drawers()->DrawColumn(); } - inline void R_FillColumn() { R_Drawers()->FillColumn(); } - inline void R_FillAddColumn() { R_Drawers()->FillAddColumn(); } - inline void R_FillAddClampColumn() { R_Drawers()->FillAddClampColumn(); } - inline void R_FillSubClampColumn() { R_Drawers()->FillSubClampColumn(); } - inline void R_FillRevSubClampColumn() { R_Drawers()->FillRevSubClampColumn(); } - inline void R_DrawFuzzColumn() { R_Drawers()->DrawFuzzColumn(); } - inline void R_DrawAddColumn() { R_Drawers()->DrawAddColumn(); } - inline void R_DrawTranslatedColumn() { R_Drawers()->DrawTranslatedColumn(); } - inline void R_DrawTlatedAddColumn() { R_Drawers()->DrawTranslatedAddColumn(); } - inline void R_DrawShadedColumn() { R_Drawers()->DrawShadedColumn(); } - inline void R_DrawAddClampColumn() { R_Drawers()->DrawAddClampColumn(); } - inline void R_DrawAddClampTranslatedColumn() { R_Drawers()->DrawAddClampTranslatedColumn(); } - inline void R_DrawSubClampColumn() { R_Drawers()->DrawSubClampColumn(); } - inline void R_DrawSubClampTranslatedColumn() { R_Drawers()->DrawSubClampTranslatedColumn(); } - inline void R_DrawRevSubClampColumn() { R_Drawers()->DrawRevSubClampColumn(); } - inline void R_DrawRevSubClampTranslatedColumn() { R_Drawers()->DrawRevSubClampTranslatedColumn(); } - inline void R_DrawSpan() { R_Drawers()->DrawSpan(); } - inline void R_DrawSpanMasked() { R_Drawers()->DrawSpanMasked(); } - inline void R_DrawSpanTranslucent() { R_Drawers()->DrawSpanTranslucent(); } - inline void R_DrawSpanMaskedTranslucent() { R_Drawers()->DrawSpanMaskedTranslucent(); } - inline void R_DrawSpanAddClamp() { R_Drawers()->DrawSpanAddClamp(); } - inline void R_DrawSpanMaskedAddClamp() { R_Drawers()->DrawSpanMaskedAddClamp(); } - inline void R_FillSpan() { R_Drawers()->FillSpan(); } - inline void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) - { - R_Drawers()->DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); - } - inline void R_DrawColoredSpan(int y, int x1, int x2) { R_Drawers()->DrawColoredSpan(y, x1, x2); } - inline void R_DrawFogBoundaryLine(int y, int x1, int x2) { R_Drawers()->DrawFogBoundaryLine(y, x1, x2); } + extern DrawerFunc colfunc; + extern DrawerFunc basecolfunc; + extern DrawerFunc fuzzcolfunc; + extern DrawerFunc transcolfunc; + extern DrawerFunc spanfunc; } diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index ea5bb64e5b..dbee5ab240 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -163,12 +163,6 @@ angle_t xtoviewangle[MAXWIDTH+1]; bool foggy; // [RH] ignore extralight and fullbright? int r_actualextralight; -void (*colfunc) (void); -void (*basecolfunc) (void); -void (*fuzzcolfunc) (void); -void (*transcolfunc) (void); -void (*spanfunc) (void); - cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; // PRIVATE DATA DEFINITIONS ------------------------------------------------ @@ -848,13 +842,13 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // [RH] Show off segs if r_drawflat is 1 if (r_drawflat) { - colfunc = R_FillColumn; - spanfunc = R_FillSpan; + colfunc = &SWPixelFormatDrawers::FillColumn; + spanfunc = &SWPixelFormatDrawers::FillSpan; } else { colfunc = basecolfunc; - spanfunc = R_DrawSpan; + spanfunc = &SWPixelFormatDrawers::DrawSpan; } WindowLeft = 0; diff --git a/src/swrenderer/r_main.h b/src/swrenderer/r_main.h index daf60c16d0..5fb10cec3e 100644 --- a/src/swrenderer/r_main.h +++ b/src/swrenderer/r_main.h @@ -112,18 +112,6 @@ extern int fixedlightlev; extern FSWColormap* fixedcolormap; extern FSpecialColormap*realfixedcolormap; - -// -// Function pointers to switch refresh/drawing functions. -// Used to select shadow mode etc. -// -extern void (*colfunc) (void); -extern void (*basecolfunc) (void); -extern void (*fuzzcolfunc) (void); -extern void (*transcolfunc) (void); -// No shadow effects on floors. -extern void (*spanfunc) (void); - void R_InitTextureMapping (); diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index fecef724e0..9988ab12f1 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -318,7 +318,7 @@ void R_MapPlane (int y, int x1) ds_x1 = x1; ds_x2 = x2; - spanfunc (); + (R_Drawers()->*spanfunc)(); } //========================================================================== @@ -329,7 +329,7 @@ void R_MapPlane (int y, int x1) void R_MapTiltedPlane (int y, int x1) { - R_DrawTiltedSpan(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + R_Drawers()->DrawTiltedSpan(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } //========================================================================== @@ -340,14 +340,14 @@ void R_MapTiltedPlane (int y, int x1) void R_MapColoredPlane(int y, int x1) { - R_DrawColoredSpan(y, x1, spanend[y]); + R_Drawers()->DrawColoredSpan(y, x1, spanend[y]); } void R_DrawFogBoundarySection(int y, int y2, int x1) { for (; y < y2; ++y) { - R_DrawFogBoundaryLine(y, x1, spanend[y]); + R_Drawers()->DrawFogBoundaryLine(y, x1, spanend[y]); } } @@ -411,13 +411,13 @@ void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) while (t2 < stop) { int y = t2++; - R_DrawFogBoundaryLine(y, xr, spanend[y]); + R_Drawers()->DrawFogBoundaryLine(y, xr, spanend[y]); } stop = MAX(b1, t2); while (b2 > stop) { int y = --b2; - R_DrawFogBoundaryLine(y, xr, spanend[y]); + R_Drawers()->DrawFogBoundaryLine(y, xr, spanend[y]); } } else @@ -1038,9 +1038,9 @@ static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, doub uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); if (!backskytex) - R_DrawSingleSkyColumn(solid_top, solid_bottom); + R_Drawers()->DrawSingleSkyColumn(solid_top, solid_bottom); else - R_DrawDoubleSkyColumn(solid_top, solid_bottom); + R_Drawers()->DrawDoubleSkyColumn(solid_top, solid_bottom); } static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) @@ -1745,7 +1745,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t plane_shade = true; } - if (spanfunc != R_FillSpan) + if (spanfunc != &SWPixelFormatDrawers::FillSpan) { if (masked) { @@ -1753,7 +1753,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t { if (!additive) { - spanfunc = R_DrawSpanMaskedTranslucent; + spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedTranslucent; dc_srcblend = Col2RGB8[alpha>>10]; dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; dc_srcalpha = alpha; @@ -1761,7 +1761,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t } else { - spanfunc = R_DrawSpanMaskedAddClamp; + spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedAddClamp; dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; dc_srcalpha = alpha; @@ -1770,7 +1770,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t } else { - spanfunc = R_DrawSpanMasked; + spanfunc = &SWPixelFormatDrawers::DrawSpanMasked; } } else @@ -1779,7 +1779,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t { if (!additive) { - spanfunc = R_DrawSpanTranslucent; + spanfunc = &SWPixelFormatDrawers::DrawSpanTranslucent; dc_srcblend = Col2RGB8[alpha>>10]; dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; dc_srcalpha = alpha; @@ -1787,7 +1787,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t } else { - spanfunc = R_DrawSpanAddClamp; + spanfunc = &SWPixelFormatDrawers::DrawSpanAddClamp; dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; dc_srcalpha = alpha; @@ -1796,7 +1796,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t } else { - spanfunc = R_DrawSpan; + spanfunc = &SWPixelFormatDrawers::DrawSpan; } } } diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index f0365819e6..419499d4c9 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -368,7 +368,7 @@ void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool unmasked) double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight(); dc_texturefrac = (uint32_t)(v * (1 << 30)); - colfunc(); + (R_Drawers()->*colfunc)(); } span++; } @@ -443,7 +443,7 @@ void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool unmasked) else if (dc_iscale < 0) dc_count = MIN(dc_count, (dc_texturefrac - dc_iscale) / (-dc_iscale)); - colfunc (); + (R_Drawers()->*colfunc)(); } span++; } diff --git a/src/swrenderer/scene/r_voxel.cpp b/src/swrenderer/scene/r_voxel.cpp index 4645982420..22773499d5 100644 --- a/src/swrenderer/scene/r_voxel.cpp +++ b/src/swrenderer/scene/r_voxel.cpp @@ -209,7 +209,7 @@ namespace swrenderer dc_dest = dc_destorg + (dc_pitch * columnY1 + x) * pixelsize; dc_color = color; dc_count = columnY2 - columnY1; - R_FillColumn(); + R_Drawers()->FillColumn(); } } } diff --git a/src/swrenderer/scene/r_walldraw.cpp b/src/swrenderer/scene/r_walldraw.cpp index 74aafe11e6..0ffb3632c6 100644 --- a/src/swrenderer/scene/r_walldraw.cpp +++ b/src/swrenderer/scene/r_walldraw.cpp @@ -188,7 +188,7 @@ WallSampler::WallSampler(int y1, float swal, double yrepeat, fixed_t xoffset, do } // Draw a column with support for non-power-of-two ranges -static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1column)()) +static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, DrawerFunc draw1column) { if (r_dynlights) { @@ -265,7 +265,7 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1 dc_iscale = sampler.uv_step; dc_texturefrac = sampler.uv_pos; dc_textureheight = sampler.height; - draw1column(); + (R_Drawers()->*draw1column)(); uint64_t step64 = sampler.uv_step; uint64_t pos64 = sampler.uv_pos; @@ -284,7 +284,7 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1 dc_count = count; dc_iscale = sampler.uv_step; dc_texturefrac = sampler.uv_pos; - draw1column(); + (R_Drawers()->*draw1column)(); uint64_t step64 = sampler.uv_step; uint64_t pos64 = sampler.uv_pos; @@ -310,7 +310,7 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1 dc_count = count; dc_iscale = sampler.uv_step; dc_texturefrac = uv_pos; - draw1column(); + (R_Drawers()->*draw1column)(); left -= count; uv_pos += sampler.uv_step * count; @@ -323,11 +323,9 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1 } } -typedef void(*DrawColumnFuncPtr)(); - static void ProcessWallWorker( int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, - const BYTE *(*getcol)(FTexture *tex, int x), DrawColumnFuncPtr drawcolumn) + const BYTE *(*getcol)(FTexture *tex, int x), DrawerFunc drawcolumn) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -388,7 +386,7 @@ static void ProcessWallWorker( static void ProcessNormalWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallColumn); + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, &SWPixelFormatDrawers::DrawWallColumn); } static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) @@ -399,14 +397,14 @@ static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *s } else { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallMaskedColumn); + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); } } static void ProcessTranslucentWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { - void(*drawcol1)(); - if (!R_GetTransMaskDrawers(&drawcol1)) + DrawerFunc drawcol1 = R_GetTransMaskDrawer(); + if (drawcol1 == nullptr) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 31081a2106..bd40c80ed7 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1481,7 +1481,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, ds_xfrac = xs_RoundToInt(tex.X * scalex); ds_yfrac = xs_RoundToInt(tex.Y * scaley); - R_DrawSpan(); + R_Drawers()->DrawSpan(); #endif } x += xinc; From 259d7241067ade1a89aa7215a878bb3667a19c7d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 28 Dec 2016 01:35:22 +0100 Subject: [PATCH 587/912] Move R_GetColumn to walldraw and create header file --- src/swrenderer/drawers/r_draw.cpp | 17 ---------------- src/swrenderer/scene/r_plane.cpp | 6 +++--- src/swrenderer/scene/r_segs.cpp | 2 +- src/swrenderer/scene/r_walldraw.cpp | 31 +++++++++++++++-------------- src/swrenderer/scene/r_walldraw.h | 26 ++++++++++++++++++++++++ 5 files changed, 46 insertions(+), 36 deletions(-) create mode 100644 src/swrenderer/scene/r_walldraw.h diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 201f09abff..cbfb8e2536 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -496,23 +496,6 @@ namespace swrenderer basecolormap = basecolormapsave; } - const uint8_t *R_GetColumn(FTexture *tex, int col) - { - int width; - - // If the texture's width isn't a power of 2, then we need to make it a - // positive offset for proper clamping. - if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) - { - col = width + (col % width); - } - - if (r_swtruecolor) - return (const uint8_t *)tex->GetColumnBgra(col, nullptr); - else - return tex->GetColumn(col, nullptr); - } - DrawerFunc R_GetTransMaskDrawer() { if (colfunc == &SWPixelFormatDrawers::DrawAddColumn) diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index 9988ab12f1..b6f1eca431 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -60,6 +60,7 @@ #include "r_data/colormaps.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" +#include "r_walldraw.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -155,7 +156,6 @@ static double xstepscale, ystepscale; static double basexfrac, baseyfrac; void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); -void R_DrawSkySegment(visplane_t *vis, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col)); //========================================================================== // @@ -1135,7 +1135,7 @@ static void R_DrawSky (visplane_t *pl) lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - R_DrawSkySegment (pl, (short *)pl->top, (short *)pl->bottom, swall, lwall, + R_DrawSkySegment (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); } else @@ -1173,7 +1173,7 @@ static void R_DrawSkyStriped (visplane_t *pl) lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - R_DrawSkySegment (pl, top, bot, swall, lwall, rw_pic->Scale.Y, + R_DrawSkySegment (pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); yl = yh; yh += drawheight; diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 0348ba26c9..1c90ee2a39 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -47,6 +47,7 @@ #include "swrenderer/drawers/r_draw.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_walldraw.h" #define WALLYREPEAT 8 @@ -60,7 +61,6 @@ namespace swrenderer { using namespace drawerargs; - void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, FLightNode *light_list = nullptr); void R_DrawDrawSeg(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); #define HEIGHTBITS 12 diff --git a/src/swrenderer/scene/r_walldraw.cpp b/src/swrenderer/scene/r_walldraw.cpp index 0ffb3632c6..1b56e9524b 100644 --- a/src/swrenderer/scene/r_walldraw.cpp +++ b/src/swrenderer/scene/r_walldraw.cpp @@ -44,6 +44,7 @@ #include "r_data/colormaps.h" #include "gl/dynlights/gl_dynlight.h" #include "swrenderer/drawers/r_drawers.h" +#include "r_walldraw.h" namespace swrenderer { @@ -52,20 +53,22 @@ namespace swrenderer extern FTexture *rw_pic; extern int wallshade; -struct WallSampler +static const uint8_t *R_GetColumn(FTexture *tex, int col) { - WallSampler() { } - WallSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + int width; - uint32_t uv_pos; - uint32_t uv_step; - uint32_t uv_max; + // If the texture's width isn't a power of 2, then we need to make it a + // positive offset for proper clamping. + if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) + { + col = width + (col % width); + } - const BYTE *source; - const BYTE *source2; - uint32_t texturefracx; - uint32_t height; -}; + if (r_swtruecolor) + return (const uint8_t *)tex->GetColumnBgra(col, nullptr); + else + return tex->GetColumn(col, nullptr); +} WallSampler::WallSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) { @@ -596,11 +599,9 @@ void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short * dc_light_list = nullptr; } -void R_DrawSkySegment(visplane_t *pl, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +void R_DrawSkySegment(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { - ProcessNormalWall(pl->left, pl->right, uwal, dwal, swal, lwal, yrepeat, getcol); + ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); } - - } \ No newline at end of file diff --git a/src/swrenderer/scene/r_walldraw.h b/src/swrenderer/scene/r_walldraw.h new file mode 100644 index 0000000000..50b1545ca2 --- /dev/null +++ b/src/swrenderer/scene/r_walldraw.h @@ -0,0 +1,26 @@ + +#pragma once + +class FTexture; +struct FLightNode; + +namespace swrenderer +{ + struct WallSampler + { + WallSampler() { } + WallSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + + uint32_t uv_pos; + uint32_t uv_step; + uint32_t uv_max; + + const BYTE *source; + const BYTE *source2; + uint32_t texturefracx; + uint32_t height; + }; + + void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, FLightNode *light_list = nullptr); + void R_DrawSkySegment(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col)); +} From db53d9c7065e4a8e5e3276b79b29397044797dc0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 28 Dec 2016 01:56:37 +0100 Subject: [PATCH 588/912] Remove r_local header --- src/swrenderer/drawers/r_draw.cpp | 3 +- src/swrenderer/drawers/r_draw_rgba.cpp | 2 +- src/swrenderer/drawers/r_thread.cpp | 2 +- src/swrenderer/r_local.h | 40 -------------------------- src/swrenderer/r_main.cpp | 4 ++- src/swrenderer/r_swrenderer.cpp | 3 +- src/swrenderer/scene/r_3dfloors.cpp | 2 +- src/swrenderer/scene/r_bsp.cpp | 1 - src/swrenderer/scene/r_plane.cpp | 3 +- src/swrenderer/scene/r_segs.cpp | 3 +- src/swrenderer/scene/r_things.cpp | 3 +- src/swrenderer/scene/r_walldraw.cpp | 2 +- 12 files changed, 16 insertions(+), 52 deletions(-) delete mode 100644 src/swrenderer/r_local.h diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index cbfb8e2536..9d991a039d 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -38,7 +38,6 @@ #include "doomdef.h" #include "i_system.h" #include "w_wad.h" -#include "swrenderer/r_local.h" #include "v_video.h" #include "doomstat.h" #include "st_stuff.h" @@ -47,11 +46,11 @@ #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "swrenderer/scene/r_plane.h" #include "r_draw.h" #include "r_draw_rgba.h" #include "r_draw_pal.h" #include "r_thread.h" +#include "swrenderer/r_main.h" CVAR(Bool, r_dynlights, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 4203bc8a98..90c1ec21af 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -27,7 +27,7 @@ #include "doomdef.h" #include "i_system.h" #include "w_wad.h" -#include "swrenderer/r_local.h" +#include "swrenderer/r_main.h" #include "v_video.h" #include "doomstat.h" #include "st_stuff.h" diff --git a/src/swrenderer/drawers/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp index 15eeee4250..1dea2b3566 100644 --- a/src/swrenderer/drawers/r_thread.cpp +++ b/src/swrenderer/drawers/r_thread.cpp @@ -25,7 +25,7 @@ #include "doomdef.h" #include "i_system.h" #include "w_wad.h" -#include "swrenderer/r_local.h" +#include "swrenderer/r_main.h" #include "v_video.h" #include "doomstat.h" #include "st_stuff.h" diff --git a/src/swrenderer/r_local.h b/src/swrenderer/r_local.h deleted file mode 100644 index 92fb717abd..0000000000 --- a/src/swrenderer/r_local.h +++ /dev/null @@ -1,40 +0,0 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// DESCRIPTION: -// Refresh (R_*) module, global header. -// All the rendering/drawing stuff is here. -// -//----------------------------------------------------------------------------- - -#ifndef __R_LOCAL_H__ -#define __R_LOCAL_H__ - -// Binary Angles, sine/cosine/atan lookups. - -// Screen size related parameters. -#include "doomdef.h" - -// Include the refresh/render data structs. - -// -// Separate header file for each module. -// -#include "r_main.h" -#include "scene/r_things.h" -#include "drawers/r_draw.h" - -#endif // __R_LOCAL_H__ diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index dbee5ab240..9a874a5741 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -34,7 +34,9 @@ #include "doomstat.h" #include "m_random.h" #include "m_bbox.h" -#include "r_local.h" +#include "r_main.h" +#include "scene/r_things.h" +#include "drawers/r_draw.h" #include "scene/r_plane.h" #include "scene/r_bsp.h" #include "scene/r_segs.h" diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index 5b0d3b4dd3..ea7c549628 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -33,7 +33,8 @@ */ -#include "r_local.h" +#include "r_main.h" +#include "swrenderer/scene/r_things.h" #include "v_palette.h" #include "v_video.h" #include "m_png.h" diff --git a/src/swrenderer/scene/r_3dfloors.cpp b/src/swrenderer/scene/r_3dfloors.cpp index bbd50331c6..5cb8f7a8db 100644 --- a/src/swrenderer/scene/r_3dfloors.cpp +++ b/src/swrenderer/scene/r_3dfloors.cpp @@ -9,7 +9,7 @@ #include "doomdef.h" #include "p_local.h" #include "c_dispatch.h" -#include "swrenderer/r_local.h" +#include "swrenderer/r_main.h" #include "r_bsp.h" #include "r_plane.h" #include "c_cvars.h" diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 36984743b1..aa7bc738bb 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -32,7 +32,6 @@ #include "p_lnspec.h" #include "p_setup.h" -#include "swrenderer/r_local.h" #include "swrenderer/r_main.h" #include "r_plane.h" #include "swrenderer/drawers/r_draw.h" diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index b6f1eca431..92875f0a37 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -42,7 +42,8 @@ #include "doomdef.h" #include "doomstat.h" -#include "swrenderer/r_local.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" #include "r_sky.h" #include "stats.h" diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 1c90ee2a39..acd69962b4 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -30,7 +30,8 @@ #include "doomdata.h" #include "p_lnspec.h" -#include "swrenderer/r_local.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" #include "r_sky.h" #include "v_video.h" diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index 419499d4c9..f1c5edca12 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -37,7 +37,8 @@ #include "m_swap.h" #include "i_system.h" #include "w_wad.h" -#include "swrenderer/r_local.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" #include "c_console.h" #include "c_cvars.h" #include "c_dispatch.h" diff --git a/src/swrenderer/scene/r_walldraw.cpp b/src/swrenderer/scene/r_walldraw.cpp index 1b56e9524b..097751ef8c 100644 --- a/src/swrenderer/scene/r_walldraw.cpp +++ b/src/swrenderer/scene/r_walldraw.cpp @@ -27,7 +27,7 @@ #include "doomstat.h" #include "doomdata.h" -#include "swrenderer/r_local.h" +#include "swrenderer/r_main.h" #include "r_sky.h" #include "v_video.h" From b2a0f02f6844cb295fe8a177f144b95ca35dd8b7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 28 Dec 2016 02:21:32 +0100 Subject: [PATCH 589/912] Remove unused prototype --- src/swrenderer/drawers/r_draw.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 496b2ee112..47d232cffe 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -177,8 +177,6 @@ namespace swrenderer void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade DrawerFunc R_GetTransMaskDrawer(); - const uint8_t *R_GetColumn(FTexture *tex, int col); - void R_UpdateFuzzPos(); // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) From 1b284ecf3b3e7bf8e01598fe16c5ec8ac303c779 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 28 Dec 2016 07:04:13 +0100 Subject: [PATCH 590/912] Create files for clip, draw and portal segments --- src/CMakeLists.txt | 3 ++ src/swrenderer/r_main.cpp | 3 ++ src/swrenderer/scene/r_bsp.cpp | 66 +---------------------- src/swrenderer/scene/r_bsp.h | 39 -------------- src/swrenderer/scene/r_clip_segment.cpp | 40 ++++++++++++++ src/swrenderer/scene/r_clip_segment.h | 16 ++++++ src/swrenderer/scene/r_draw_segment.cpp | 49 +++++++++++++++++ src/swrenderer/scene/r_draw_segment.h | 43 +++++++++++++++ src/swrenderer/scene/r_plane.cpp | 3 ++ src/swrenderer/scene/r_portal_segment.cpp | 34 ++++++++++++ src/swrenderer/scene/r_portal_segment.h | 26 +++++++++ src/swrenderer/scene/r_segs.cpp | 5 +- src/swrenderer/scene/r_segs.h | 21 -------- src/swrenderer/scene/r_things.cpp | 2 + src/swrenderer/scene/r_walldraw.cpp | 1 + 15 files changed, 224 insertions(+), 127 deletions(-) create mode 100644 src/swrenderer/scene/r_clip_segment.cpp create mode 100644 src/swrenderer/scene/r_clip_segment.h create mode 100644 src/swrenderer/scene/r_draw_segment.cpp create mode 100644 src/swrenderer/scene/r_draw_segment.h create mode 100644 src/swrenderer/scene/r_portal_segment.cpp create mode 100644 src/swrenderer/scene/r_portal_segment.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2eb5ba784b..fe90644ccb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -820,6 +820,9 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_things.cpp swrenderer/scene/r_voxel.cpp swrenderer/scene/r_walldraw.cpp + swrenderer/scene/r_clip_segment.cpp + swrenderer/scene/r_draw_segment.cpp + swrenderer/scene/r_portal_segment.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 9a874a5741..72cefcefa4 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -39,6 +39,9 @@ #include "drawers/r_draw.h" #include "scene/r_plane.h" #include "scene/r_bsp.h" +#include "scene/r_draw_segment.h" +#include "scene/r_portal_segment.h" +#include "scene/r_clip_segment.h" #include "scene/r_segs.h" #include "scene/r_3dfloors.h" #include "r_sky.h" diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index aa7bc738bb..24937c7a99 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -37,6 +37,8 @@ #include "swrenderer/drawers/r_draw.h" #include "r_things.h" #include "r_3dfloors.h" +#include "r_clip_segment.h" +#include "r_portal_segment.h" #include "a_sharedglobal.h" #include "g_level.h" #include "p_effect.h" @@ -83,15 +85,6 @@ double rw_backfz1, rw_backfz2; double rw_frontcz1, rw_frontcz2; double rw_frontfz1, rw_frontfz2; - -size_t MaxDrawSegs; -drawseg_t *drawsegs; -drawseg_t* firstdrawseg; -drawseg_t* ds_p; - -size_t FirstInterestingDrawseg; -TArray InterestingDrawsegs; - FWallCoords WallC; FWallTmapVals WallT; @@ -99,7 +92,6 @@ static BYTE FakeSide; int WindowLeft, WindowRight; WORD MirrorFlags; -TArray WallPortals(1000); // note: this array needs to go away as reallocation can cause crashes. subsector_t *InSubsector; @@ -108,45 +100,6 @@ subsector_t *InSubsector; void R_StoreWallRange (int start, int stop); -// -// R_ClearDrawSegs -// -void R_ClearDrawSegs (void) -{ - if (drawsegs == NULL) - { - MaxDrawSegs = 256; // [RH] Default. Increased as needed. - firstdrawseg = drawsegs = (drawseg_t *)M_Malloc (MaxDrawSegs * sizeof(drawseg_t)); - } - FirstInterestingDrawseg = 0; - InterestingDrawsegs.Clear (); - ds_p = drawsegs; -} - - - -// -// ClipWallSegment -// Clips the given range of columns -// and includes it in the new clip list. -// -// -// 1/11/98 killough: Since a type "short" is sufficient, we -// should use it, since smaller arrays fit better in cache. -// - -struct cliprange_t -{ - short first, last; // killough -}; - - -// newend is one past the last valid seg -static cliprange_t *newend; -static cliprange_t solidsegs[MAXWIDTH/2+2]; - - - //========================================================================== // // R_ClipWallSegment @@ -285,21 +238,6 @@ bool R_CheckClipWallSegment (int first, int last) return false; } - - -// -// R_ClearClipSegs -// -void R_ClearClipSegs (short left, short right) -{ - solidsegs[0].first = -0x7fff; // new short limit -- killough - solidsegs[0].last = left; - solidsegs[1].first = right; - solidsegs[1].last = 0x7fff; // new short limit -- killough - newend = solidsegs+2; -} - - // // killough 3/7/98: Hack floor/ceiling heights for deep water etc. // diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index e4d70c4cf1..79f5b3d5ef 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -67,56 +67,17 @@ enum FAKED_AboveCeiling }; -struct drawseg_t -{ - seg_t* curline; - float light, lightstep; - float iscale, iscalestep; - short x1, x2; // Same as sx1 and sx2, but clipped to the drawseg - short sx1, sx2; // left, right of parent seg on screen - float sz1, sz2; // z for left, right of parent seg on screen - float siz1, siz2; // 1/z for left, right of parent seg on screen - float cx, cy, cdx, cdy; - float yscale; - BYTE silhouette; // 0=none, 1=bottom, 2=top, 3=both - BYTE bFogBoundary; - BYTE bFakeBoundary; // for fake walls - int shade; -// Pointers to lists for sprite clipping, -// all three adjusted so [x1] is first value. - ptrdiff_t sprtopclip; // type short - ptrdiff_t sprbottomclip; // type short - ptrdiff_t maskedtexturecol; // type short - ptrdiff_t swall; // type float - int fake; // ident fake drawseg, don't draw and clip sprites -// backups - ptrdiff_t bkup; // sprtopclip backup, for mid and fake textures - FWallTmapVals tmapvals; - int CurrentPortalUniq; // [ZZ] to identify the portal that this drawseg is in. used for sprite clipping. -}; - - extern seg_t* curline; extern side_t* sidedef; extern line_t* linedef; extern sector_t* frontsector; extern sector_t* backsector; -extern drawseg_t *drawsegs; -extern drawseg_t *firstdrawseg; -extern drawseg_t* ds_p; - -extern TArray InterestingDrawsegs; // drawsegs that have something drawn on them -extern size_t FirstInterestingDrawseg; - extern int WindowLeft, WindowRight; extern WORD MirrorFlags; typedef void (*drawfunc_t) (int start, int stop); -// BSP? -void R_ClearClipSegs (short left, short right); -void R_ClearDrawSegs (); void R_RenderBSPNode (void *node); // killough 4/13/98: fake floors/ceilings for deep water / fake ceilings: diff --git a/src/swrenderer/scene/r_clip_segment.cpp b/src/swrenderer/scene/r_clip_segment.cpp new file mode 100644 index 0000000000..1c4da92149 --- /dev/null +++ b/src/swrenderer/scene/r_clip_segment.cpp @@ -0,0 +1,40 @@ + +#include +#include "templates.h" +#include "doomdef.h" +#include "m_bbox.h" +#include "i_system.h" +#include "p_lnspec.h" +#include "p_setup.h" +#include "swrenderer/r_main.h" +#include "r_plane.h" +#include "swrenderer/drawers/r_draw.h" +#include "r_things.h" +#include "r_3dfloors.h" +#include "a_sharedglobal.h" +#include "g_level.h" +#include "p_effect.h" +#include "doomstat.h" +#include "r_state.h" +#include "r_bsp.h" +#include "r_segs.h" +#include "v_palette.h" +#include "r_sky.h" +#include "po_man.h" +#include "r_data/colormaps.h" +#include "r_clip_segment.h" + +namespace swrenderer +{ + cliprange_t *newend; + cliprange_t solidsegs[MAXWIDTH/2+2]; + + void R_ClearClipSegs(short left, short right) + { + solidsegs[0].first = -0x7fff; + solidsegs[0].last = left; + solidsegs[1].first = right; + solidsegs[1].last = 0x7fff; + newend = solidsegs+2; + } +} diff --git a/src/swrenderer/scene/r_clip_segment.h b/src/swrenderer/scene/r_clip_segment.h new file mode 100644 index 0000000000..ce6fffe07a --- /dev/null +++ b/src/swrenderer/scene/r_clip_segment.h @@ -0,0 +1,16 @@ + +#pragma once + +namespace swrenderer +{ + struct cliprange_t + { + short first, last; + }; + + // newend is one past the last valid seg + extern cliprange_t *newend; + extern cliprange_t solidsegs[MAXWIDTH/2+2]; + + void R_ClearClipSegs(short left, short right); +} diff --git a/src/swrenderer/scene/r_draw_segment.cpp b/src/swrenderer/scene/r_draw_segment.cpp new file mode 100644 index 0000000000..d4122f7287 --- /dev/null +++ b/src/swrenderer/scene/r_draw_segment.cpp @@ -0,0 +1,49 @@ + +#include +#include "templates.h" +#include "doomdef.h" +#include "m_bbox.h" +#include "i_system.h" +#include "p_lnspec.h" +#include "p_setup.h" +#include "swrenderer/r_main.h" +#include "r_plane.h" +#include "swrenderer/drawers/r_draw.h" +#include "r_things.h" +#include "r_3dfloors.h" +#include "a_sharedglobal.h" +#include "g_level.h" +#include "p_effect.h" +#include "doomstat.h" +#include "r_state.h" +#include "r_bsp.h" +#include "r_segs.h" +#include "v_palette.h" +#include "r_sky.h" +#include "po_man.h" +#include "r_data/colormaps.h" +#include "r_draw_segment.h" + +namespace swrenderer +{ + size_t MaxDrawSegs; + drawseg_t *drawsegs; + drawseg_t *firstdrawseg; + drawseg_t *ds_p; + + size_t FirstInterestingDrawseg; + TArray InterestingDrawsegs; + + void R_ClearDrawSegs() + { + if (drawsegs == NULL) + { + MaxDrawSegs = 256; // [RH] Default. Increased as needed. + firstdrawseg = drawsegs = (drawseg_t *)M_Malloc (MaxDrawSegs * sizeof(drawseg_t)); + } + FirstInterestingDrawseg = 0; + InterestingDrawsegs.Clear (); + ds_p = drawsegs; + } + +} diff --git a/src/swrenderer/scene/r_draw_segment.h b/src/swrenderer/scene/r_draw_segment.h new file mode 100644 index 0000000000..07aa6198ff --- /dev/null +++ b/src/swrenderer/scene/r_draw_segment.h @@ -0,0 +1,43 @@ + +#pragma once + +namespace swrenderer +{ + struct drawseg_t + { + seg_t *curline; + float light, lightstep; + float iscale, iscalestep; + short x1, x2; // Same as sx1 and sx2, but clipped to the drawseg + short sx1, sx2; // left, right of parent seg on screen + float sz1, sz2; // z for left, right of parent seg on screen + float siz1, siz2; // 1/z for left, right of parent seg on screen + float cx, cy, cdx, cdy; + float yscale; + uint8_t silhouette; // 0=none, 1=bottom, 2=top, 3=both + uint8_t bFogBoundary; + uint8_t bFakeBoundary; // for fake walls + int shade; + + // Pointers to lists for sprite clipping, all three adjusted so [x1] is first value. + ptrdiff_t sprtopclip; // type short + ptrdiff_t sprbottomclip; // type short + ptrdiff_t maskedtexturecol; // type short + ptrdiff_t swall; // type float + ptrdiff_t bkup; // sprtopclip backup, for mid and fake textures + + FWallTmapVals tmapvals; + + int fake; // ident fake drawseg, don't draw and clip sprites backups + int CurrentPortalUniq; // [ZZ] to identify the portal that this drawseg is in. used for sprite clipping. + }; + + extern drawseg_t *drawsegs; + extern drawseg_t *firstdrawseg; + extern drawseg_t *ds_p; + + extern TArray InterestingDrawsegs; // drawsegs that have something drawn on them + extern size_t FirstInterestingDrawseg; + + void R_ClearDrawSegs(); +} diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index 92875f0a37..746205e707 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -62,6 +62,9 @@ #include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" #include "r_walldraw.h" +#include "r_clip_segment.h" +#include "r_draw_segment.h" +#include "r_portal_segment.h" #ifdef _MSC_VER #pragma warning(disable:4244) diff --git a/src/swrenderer/scene/r_portal_segment.cpp b/src/swrenderer/scene/r_portal_segment.cpp new file mode 100644 index 0000000000..5e698f0c7f --- /dev/null +++ b/src/swrenderer/scene/r_portal_segment.cpp @@ -0,0 +1,34 @@ + +#include +#include "templates.h" +#include "doomdef.h" +#include "m_bbox.h" +#include "i_system.h" +#include "p_lnspec.h" +#include "p_setup.h" +#include "swrenderer/r_main.h" +#include "r_plane.h" +#include "swrenderer/drawers/r_draw.h" +#include "r_things.h" +#include "r_3dfloors.h" +#include "a_sharedglobal.h" +#include "g_level.h" +#include "p_effect.h" +#include "doomstat.h" +#include "r_state.h" +#include "r_bsp.h" +#include "r_segs.h" +#include "v_palette.h" +#include "r_sky.h" +#include "po_man.h" +#include "r_data/colormaps.h" +#include "r_portal_segment.h" + +namespace swrenderer +{ + PortalDrawseg *CurrentPortal = nullptr; + int CurrentPortalUniq = 0; + bool CurrentPortalInSkybox = false; + + TArray WallPortals(1000); // note: this array needs to go away as reallocation can cause crashes. +} diff --git a/src/swrenderer/scene/r_portal_segment.h b/src/swrenderer/scene/r_portal_segment.h new file mode 100644 index 0000000000..a1e341df18 --- /dev/null +++ b/src/swrenderer/scene/r_portal_segment.h @@ -0,0 +1,26 @@ + +#pragma once + +namespace swrenderer +{ + /* portal structure, this is used in r_ code in order to store drawsegs with portals (and mirrors) */ + struct PortalDrawseg + { + line_t* src; // source line (the one drawn) this doesn't change over render loops + line_t* dst; // destination line (the one that the portal is linked with, equals 'src' for mirrors) + + int x1; // drawseg x1 + int x2; // drawseg x2 + + int len; + TArray ceilingclip; + TArray floorclip; + + bool mirror; // true if this is a mirror (src should equal dst) + }; + + extern PortalDrawseg* CurrentPortal; + extern int CurrentPortalUniq; + extern bool CurrentPortalInSkybox; + extern TArray WallPortals; +} diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index acd69962b4..5be89ecb09 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -49,6 +49,8 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_walldraw.h" +#include "r_draw_segment.h" +#include "r_portal_segment.h" #define WALLYREPEAT 8 @@ -69,9 +71,6 @@ namespace swrenderer extern double globaluclip, globaldclip; -PortalDrawseg* CurrentPortal = NULL; -int CurrentPortalUniq = 0; -bool CurrentPortalInSkybox = false; // OPTIMIZE: closed two sided lines as single sided diff --git a/src/swrenderer/scene/r_segs.h b/src/swrenderer/scene/r_segs.h index 8f552daeee..20cba123b7 100644 --- a/src/swrenderer/scene/r_segs.h +++ b/src/swrenderer/scene/r_segs.h @@ -57,27 +57,6 @@ extern float rw_lightstep; extern float rw_lightleft; extern fixed_t rw_offset; -/* portal structure, this is used in r_ code in order to store drawsegs with portals (and mirrors) */ -struct PortalDrawseg -{ - line_t* src; // source line (the one drawn) this doesn't change over render loops - line_t* dst; // destination line (the one that the portal is linked with, equals 'src' for mirrors) - - int x1; // drawseg x1 - int x2; // drawseg x2 - - int len; - TArray ceilingclip; - TArray floorclip; - - bool mirror; // true if this is a mirror (src should equal dst) -}; - -extern PortalDrawseg* CurrentPortal; -extern int CurrentPortalUniq; -extern bool CurrentPortalInSkybox; -extern TArray WallPortals; - } #endif diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index f1c5edca12..f53d0531a6 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -68,6 +68,8 @@ #include "p_local.h" #include "p_maputl.h" #include "r_voxel.h" +#include "r_draw_segment.h" +#include "r_portal_segment.h" EXTERN_CVAR(Bool, st_scale) EXTERN_CVAR(Bool, r_shadercolormaps) diff --git a/src/swrenderer/scene/r_walldraw.cpp b/src/swrenderer/scene/r_walldraw.cpp index 097751ef8c..83cbc96ca0 100644 --- a/src/swrenderer/scene/r_walldraw.cpp +++ b/src/swrenderer/scene/r_walldraw.cpp @@ -45,6 +45,7 @@ #include "gl/dynlights/gl_dynlight.h" #include "swrenderer/drawers/r_drawers.h" #include "r_walldraw.h" +#include "r_draw_segment.h" namespace swrenderer { From 18a551f936db703e4c5bac4d5a16ec3b100cf803 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 28 Dec 2016 12:07:55 +0100 Subject: [PATCH 591/912] Move all clip segment handling to r_clip_segment and make its working data private --- src/swrenderer/scene/r_bsp.cpp | 162 +----------------------- src/swrenderer/scene/r_clip_segment.cpp | 153 +++++++++++++++++++++- src/swrenderer/scene/r_clip_segment.h | 14 +- src/swrenderer/scene/r_segs.cpp | 7 +- src/swrenderer/scene/r_segs.h | 1 + 5 files changed, 164 insertions(+), 173 deletions(-) diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 24937c7a99..2b3054e4fe 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -96,148 +96,6 @@ WORD MirrorFlags; subsector_t *InSubsector; - - -void R_StoreWallRange (int start, int stop); - -//========================================================================== -// -// R_ClipWallSegment -// -// Clips the given range of columns, possibly including it in the clip list. -// Handles both windows (e.g. LineDefs with upper and lower textures) and -// solid walls (e.g. single sided LineDefs [middle texture]) that entirely -// block the view. -// -//========================================================================== - -bool R_ClipWallSegment (int first, int last, bool solid) -{ - cliprange_t *next, *start; - int i, j; - bool res = false; - - // Find the first range that touches the range - // (adjacent pixels are touching). - start = solidsegs; - while (start->last < first) - start++; - - if (first < start->first) - { - res = true; - if (last <= start->first) - { - // Post is entirely visible (above start). - R_StoreWallRange (first, last); - if (fake3D & FAKE3D_FAKEMASK) - { - return true; - } - - // Insert a new clippost for solid walls. - if (solid) - { - if (last == start->first) - { - start->first = first; - } - else - { - next = newend; - newend++; - while (next != start) - { - *next = *(next-1); - next--; - } - next->first = first; - next->last = last; - } - } - return true; - } - - // There is a fragment above *start. - R_StoreWallRange (first, start->first); - - // Adjust the clip size for solid walls - if (solid && !(fake3D & FAKE3D_FAKEMASK)) - { - start->first = first; - } - } - - // Bottom contained in start? - if (last <= start->last) - return res; - - next = start; - while (last >= (next+1)->first) - { - // There is a fragment between two posts. - R_StoreWallRange (next->last, (next+1)->first); - next++; - - if (last <= next->last) - { - // Bottom is contained in next. - last = next->last; - goto crunch; - } - } - - // There is a fragment after *next. - R_StoreWallRange (next->last, last); - -crunch: - if (fake3D & FAKE3D_FAKEMASK) - { - return true; - } - if (solid) - { - // Adjust the clip size. - start->last = last; - - if (next != start) - { - // Remove start+1 to next from the clip list, - // because start now covers their area. - for (i = 1, j = (int)(newend - next); j > 0; i++, j--) - { - start[i] = next[i]; - } - newend = start+i; - } - } - return true; -} - -bool R_CheckClipWallSegment (int first, int last) -{ - cliprange_t *start; - - // Find the first range that touches the range - // (adjacent pixels are touching). - start = solidsegs; - while (start->last < first) - start++; - - if (first < start->first) - { - return true; - } - - // Bottom contained in start? - if (last > start->last) - { - return true; - } - - return false; -} - // // killough 3/7/98: Hack floor/ceiling heights for deep water etc. // @@ -703,7 +561,7 @@ void R_AddLine (seg_t *line) #endif } - if (R_ClipWallSegment (WallC.sx1, WallC.sx2, solid)) + if (R_ClipWallSegment(WallC.sx1, WallC.sx2, solid, R_StoreWallRange)) { InSubsector->flags |= SSECF_DRAWN; } @@ -836,8 +694,6 @@ static bool R_CheckBBox (float *bspcoord) // killough 1/28/98: static double rx1, ry1, rx2, ry2; int sx1, sx2; - cliprange_t* start; - // Find the corners of the box // that define the edges from current viewpoint. if (ViewPos.X <= bspcoord[BOXLEFT]) @@ -911,21 +767,7 @@ static bool R_CheckBBox (float *bspcoord) // killough 1/28/98: static // Find the first clippost that touches the source post // (adjacent pixels are touching). - // Does not cross a pixel. - if (sx2 <= sx1) - return false; - - start = solidsegs; - while (start->last < sx2) - start++; - - if (sx1 >= start->first && sx2 <= start->last) - { - // The clippost contains the new span. - return false; - } - - return true; + return R_IsWallSegmentVisible(sx1, sx2); } diff --git a/src/swrenderer/scene/r_clip_segment.cpp b/src/swrenderer/scene/r_clip_segment.cpp index 1c4da92149..4db069a495 100644 --- a/src/swrenderer/scene/r_clip_segment.cpp +++ b/src/swrenderer/scene/r_clip_segment.cpp @@ -26,8 +26,16 @@ namespace swrenderer { - cliprange_t *newend; - cliprange_t solidsegs[MAXWIDTH/2+2]; + namespace + { + struct cliprange_t + { + short first, last; + }; + + cliprange_t *newend; // newend is one past the last valid seg + cliprange_t solidsegs[MAXWIDTH / 2 + 2]; + } void R_ClearClipSegs(short left, short right) { @@ -37,4 +45,145 @@ namespace swrenderer solidsegs[1].last = 0x7fff; newend = solidsegs+2; } + + bool R_CheckClipWallSegment(int first, int last) + { + cliprange_t *start; + + // Find the first range that touches the range + // (adjacent pixels are touching). + start = solidsegs; + while (start->last < first) + start++; + + if (first < start->first) + { + return true; + } + + // Bottom contained in start? + if (last > start->last) + { + return true; + } + + return false; + } + + bool R_IsWallSegmentVisible(int sx1, int sx2) + { + // Does not cross a pixel. + if (sx2 <= sx1) + return false; + + cliprange_t *start = solidsegs; + while (start->last < sx2) + start++; + + if (sx1 >= start->first && sx2 <= start->last) + { + // The clippost contains the new span. + return false; + } + + return true; + } + + bool R_ClipWallSegment(int first, int last, bool solid, VisibleSegmentCallback callback) + { + cliprange_t *next, *start; + int i, j; + bool res = false; + + // Find the first range that touches the range + // (adjacent pixels are touching). + start = solidsegs; + while (start->last < first) + start++; + + if (first < start->first) + { + res = true; + if (last <= start->first) + { + // Post is entirely visible (above start). + if (!callback(first, last)) + return true; + + // Insert a new clippost for solid walls. + if (solid) + { + if (last == start->first) + { + start->first = first; + } + else + { + next = newend; + newend++; + while (next != start) + { + *next = *(next - 1); + next--; + } + next->first = first; + next->last = last; + } + } + return true; + } + + // There is a fragment above *start. + if (!callback(first, start->first) && solid) + { + start->first = first; + } + } + + // Bottom contained in start? + if (last <= start->last) + return res; + + bool clipsegment; + next = start; + while (last >= (next + 1)->first) + { + // There is a fragment between two posts. + clipsegment = callback(next->last, (next + 1)->first); + next++; + + if (last <= next->last) + { + // Bottom is contained in next. + last = next->last; + goto crunch; + } + } + + // There is a fragment after *next. + clipsegment = callback(next->last, last); + + crunch: + if (!clipsegment) + { + return true; + } + if (solid) + { + // Adjust the clip size. + start->last = last; + + if (next != start) + { + // Remove start+1 to next from the clip list, + // because start now covers their area. + for (i = 1, j = (int)(newend - next); j > 0; i++, j--) + { + start[i] = next[i]; + } + newend = start + i; + } + } + return true; + } } diff --git a/src/swrenderer/scene/r_clip_segment.h b/src/swrenderer/scene/r_clip_segment.h index ce6fffe07a..3ebe464d83 100644 --- a/src/swrenderer/scene/r_clip_segment.h +++ b/src/swrenderer/scene/r_clip_segment.h @@ -3,14 +3,10 @@ namespace swrenderer { - struct cliprange_t - { - short first, last; - }; - - // newend is one past the last valid seg - extern cliprange_t *newend; - extern cliprange_t solidsegs[MAXWIDTH/2+2]; - + typedef bool(*VisibleSegmentCallback)(int x1, int x2); + void R_ClearClipSegs(short left, short right); + bool R_ClipWallSegment(int x1, int x2, bool solid, VisibleSegmentCallback callback); + bool R_CheckClipWallSegment(int first, int last); + bool R_IsWallSegmentVisible(int x1, int x2); } diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 5be89ecb09..f23a9326bf 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -1587,7 +1587,7 @@ ptrdiff_t R_NewOpening (ptrdiff_t len) // A wall segment will be drawn between start and stop pixels (inclusive). // -void R_StoreWallRange (int start, int stop) +bool R_StoreWallRange (int start, int stop) { int i; bool maskedtexture = false; @@ -1827,7 +1827,8 @@ void R_StoreWallRange (int start, int stop) if(fake3D & 7) { ds_p++; - return; + + return !(fake3D & FAKE3D_FAKEMASK); } // save sprite clipping info @@ -1888,6 +1889,8 @@ void R_StoreWallRange (int start, int stop) } ds_p++; + + return !(fake3D & FAKE3D_FAKEMASK); } int R_CreateWallSegmentY(short *outbuf, double z1, double z2, const FWallCoords *wallc) diff --git a/src/swrenderer/scene/r_segs.h b/src/swrenderer/scene/r_segs.h index 20cba123b7..f9b2358727 100644 --- a/src/swrenderer/scene/r_segs.h +++ b/src/swrenderer/scene/r_segs.h @@ -28,6 +28,7 @@ namespace swrenderer struct drawseg_t; +bool R_StoreWallRange(int start, int stop); void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2); extern short *openings; From b615b1b497cbdec86e151f608ee9ebc11184c093 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 29 Dec 2016 01:06:24 +0100 Subject: [PATCH 592/912] Move some draw segment functions to r_draw_segment --- src/swrenderer/scene/r_draw_segment.cpp | 29 ++++++++++++++++++ src/swrenderer/scene/r_draw_segment.h | 2 ++ src/swrenderer/scene/r_segs.cpp | 40 ------------------------- src/swrenderer/scene/r_segs.h | 4 --- 4 files changed, 31 insertions(+), 44 deletions(-) diff --git a/src/swrenderer/scene/r_draw_segment.cpp b/src/swrenderer/scene/r_draw_segment.cpp index d4122f7287..b6e29b3967 100644 --- a/src/swrenderer/scene/r_draw_segment.cpp +++ b/src/swrenderer/scene/r_draw_segment.cpp @@ -46,4 +46,33 @@ namespace swrenderer ds_p = drawsegs; } + ptrdiff_t R_NewOpening(ptrdiff_t len) + { + ptrdiff_t res = lastopening; + len = (len + 1) & ~1; // only return DWORD aligned addresses because some code stores fixed_t's and floats in openings... + lastopening += len; + if ((size_t)lastopening > maxopenings) + { + do + maxopenings = maxopenings ? maxopenings * 2 : 16384; + while ((size_t)lastopening > maxopenings); + openings = (short *)M_Realloc(openings, maxopenings * sizeof(*openings)); + DPrintf(DMSG_NOTIFY, "MaxOpenings increased to %zu\n", maxopenings); + } + return res; + } + + void R_CheckDrawSegs() + { + if (ds_p == &drawsegs[MaxDrawSegs]) + { // [RH] Grab some more drawsegs + size_t newdrawsegs = MaxDrawSegs ? MaxDrawSegs * 2 : 32; + ptrdiff_t firstofs = firstdrawseg - drawsegs; + drawsegs = (drawseg_t *)M_Realloc(drawsegs, newdrawsegs * sizeof(drawseg_t)); + firstdrawseg = drawsegs + firstofs; + ds_p = drawsegs + MaxDrawSegs; + MaxDrawSegs = newdrawsegs; + DPrintf(DMSG_NOTIFY, "MaxDrawSegs increased to %zu\n", MaxDrawSegs); + } + } } diff --git a/src/swrenderer/scene/r_draw_segment.h b/src/swrenderer/scene/r_draw_segment.h index 07aa6198ff..d22e754e00 100644 --- a/src/swrenderer/scene/r_draw_segment.h +++ b/src/swrenderer/scene/r_draw_segment.h @@ -40,4 +40,6 @@ namespace swrenderer extern size_t FirstInterestingDrawseg; void R_ClearDrawSegs(); + void R_CheckDrawSegs(); + ptrdiff_t R_NewOpening(ptrdiff_t len); } diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index f23a9326bf..833755fe1a 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -1542,46 +1542,6 @@ void R_NewWall (bool needlights) } } - -// -// R_CheckDrawSegs -// - -void R_CheckDrawSegs () -{ - if (ds_p == &drawsegs[MaxDrawSegs]) - { // [RH] Grab some more drawsegs - size_t newdrawsegs = MaxDrawSegs ? MaxDrawSegs*2 : 32; - ptrdiff_t firstofs = firstdrawseg - drawsegs; - drawsegs = (drawseg_t *)M_Realloc (drawsegs, newdrawsegs * sizeof(drawseg_t)); - firstdrawseg = drawsegs + firstofs; - ds_p = drawsegs + MaxDrawSegs; - MaxDrawSegs = newdrawsegs; - DPrintf (DMSG_NOTIFY, "MaxDrawSegs increased to %zu\n", MaxDrawSegs); - } -} - -// -// R_CheckOpenings -// - -ptrdiff_t R_NewOpening (ptrdiff_t len) -{ - ptrdiff_t res = lastopening; - len = (len + 1) & ~1; // only return DWORD aligned addresses because some code stores fixed_t's and floats in openings... - lastopening += len; - if ((size_t)lastopening > maxopenings) - { - do - maxopenings = maxopenings ? maxopenings*2 : 16384; - while ((size_t)lastopening > maxopenings); - openings = (short *)M_Realloc (openings, maxopenings * sizeof(*openings)); - DPrintf (DMSG_NOTIFY, "MaxOpenings increased to %zu\n", maxopenings); - } - return res; -} - - // // R_StoreWallRange // A wall segment will be drawn between start and stop pixels (inclusive). diff --git a/src/swrenderer/scene/r_segs.h b/src/swrenderer/scene/r_segs.h index f9b2358727..a4aa36b652 100644 --- a/src/swrenderer/scene/r_segs.h +++ b/src/swrenderer/scene/r_segs.h @@ -45,10 +45,6 @@ inline int R_CreateWallSegmentY(short *outbuf, double z, const FWallCoords *wall void PrepWall (float *swall, fixed_t *lwall, double walxrepeat, int x1, int x2); void PrepLWall (fixed_t *lwall, double walxrepeat, int x1, int x2); -ptrdiff_t R_NewOpening (ptrdiff_t len); - -void R_CheckDrawSegs (); - void R_RenderSegLoop (); extern float swall[MAXWIDTH]; From d3056d267969f4a104c1c4fb240398bcce5587c1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Dec 2016 02:20:24 +0100 Subject: [PATCH 593/912] Split poly_triangle into multiple files --- src/CMakeLists.txt | 4 + src/polyrenderer/drawers/poly_buffer.cpp | 100 ++ src/polyrenderer/drawers/poly_buffer.h | 68 ++ src/polyrenderer/drawers/poly_draw_args.cpp | 106 ++ src/polyrenderer/drawers/poly_draw_args.h | 69 ++ src/polyrenderer/drawers/poly_triangle.cpp | 1104 +----------------- src/polyrenderer/drawers/poly_triangle.h | 220 +--- src/polyrenderer/drawers/screen_triangle.cpp | 966 +++++++++++++++ src/polyrenderer/drawers/screen_triangle.h | 40 + src/polyrenderer/math/tri_matrix.cpp | 187 +++ src/polyrenderer/math/tri_matrix.h | 46 + 11 files changed, 1590 insertions(+), 1320 deletions(-) create mode 100644 src/polyrenderer/drawers/poly_buffer.cpp create mode 100644 src/polyrenderer/drawers/poly_buffer.h create mode 100644 src/polyrenderer/drawers/poly_draw_args.cpp create mode 100644 src/polyrenderer/drawers/poly_draw_args.h create mode 100644 src/polyrenderer/drawers/screen_triangle.cpp create mode 100644 src/polyrenderer/drawers/screen_triangle.h create mode 100644 src/polyrenderer/math/tri_matrix.cpp create mode 100644 src/polyrenderer/math/tri_matrix.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2df5fb949a..454ed25daa 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -836,6 +836,10 @@ set( FASTMATH_PCH_SOURCES polyrenderer/scene/poly_sprite.cpp polyrenderer/scene/poly_sky.cpp polyrenderer/drawers/poly_triangle.cpp + polyrenderer/drawers/poly_buffer.cpp + polyrenderer/drawers/poly_draw_args.cpp + polyrenderer/drawers/screen_triangle.cpp + polyrenderer/math/tri_matrix.cpp polyrenderer/math/poly_intersection.cpp r_sky.cpp s_advsound.cpp diff --git a/src/polyrenderer/drawers/poly_buffer.cpp b/src/polyrenderer/drawers/poly_buffer.cpp new file mode 100644 index 0000000000..5ad9f4474e --- /dev/null +++ b/src/polyrenderer/drawers/poly_buffer.cpp @@ -0,0 +1,100 @@ +/* +** Triangle drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "poly_buffer.h" +#include "swrenderer/drawers/r_drawers.h" + +///////////////////////////////////////////////////////////////////////////// + +PolySubsectorGBuffer *PolySubsectorGBuffer::Instance() +{ + static PolySubsectorGBuffer buffer; + return &buffer; +} + +void PolySubsectorGBuffer::Resize(int newwidth, int newheight) +{ + width = newwidth; + height = newheight; + values.resize(width * height); +} + +///////////////////////////////////////////////////////////////////////////// + +PolyStencilBuffer *PolyStencilBuffer::Instance() +{ + static PolyStencilBuffer buffer; + return &buffer; +} + +void PolyStencilBuffer::Clear(int newwidth, int newheight, uint8_t stencil_value) +{ + width = newwidth; + height = newheight; + int count = BlockWidth() * BlockHeight(); + values.resize(count * 64); + masks.resize(count); + + uint8_t *v = Values(); + uint32_t *m = Masks(); + for (int i = 0; i < count; i++) + { + m[i] = 0xffffff00 | stencil_value; + } +} + +///////////////////////////////////////////////////////////////////////////// + +namespace +{ + int NextBufferVertex = 0; +} + +TriVertex *PolyVertexBuffer::GetVertices(int count) +{ + enum { VertexBufferSize = 256 * 1024 }; + static TriVertex Vertex[VertexBufferSize]; + + if (NextBufferVertex + count > VertexBufferSize) + return nullptr; + TriVertex *v = Vertex + NextBufferVertex; + NextBufferVertex += count; + return v; +} + +void PolyVertexBuffer::Clear() +{ + NextBufferVertex = 0; +} diff --git a/src/polyrenderer/drawers/poly_buffer.h b/src/polyrenderer/drawers/poly_buffer.h new file mode 100644 index 0000000000..4454f72fda --- /dev/null +++ b/src/polyrenderer/drawers/poly_buffer.h @@ -0,0 +1,68 @@ +/* +** Frame buffers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +struct TriVertex; + +class PolySubsectorGBuffer +{ +public: + static PolySubsectorGBuffer *Instance(); + void Resize(int newwidth, int newheight); + int Width() const { return width; } + int Height() const { return height; } + uint32_t *Values() { return values.data(); } + +private: + int width; + int height; + std::vector values; +}; + +class PolyStencilBuffer +{ +public: + static PolyStencilBuffer *Instance(); + void Clear(int newwidth, int newheight, uint8_t stencil_value = 0); + int Width() const { return width; } + int Height() const { return height; } + int BlockWidth() const { return (width + 7) / 8; } + int BlockHeight() const { return (height + 7) / 8; } + uint8_t *Values() { return values.data(); } + uint32_t *Masks() { return masks.data(); } + +private: + int width; + int height; + + // 8x8 blocks of stencil values, plus a mask for each block indicating if values are the same for early out stencil testing + std::vector values; + std::vector masks; +}; + +class PolyVertexBuffer +{ +public: + static TriVertex *GetVertices(int count); + static void Clear(); +}; diff --git a/src/polyrenderer/drawers/poly_draw_args.cpp b/src/polyrenderer/drawers/poly_draw_args.cpp new file mode 100644 index 0000000000..60621553a6 --- /dev/null +++ b/src/polyrenderer/drawers/poly_draw_args.cpp @@ -0,0 +1,106 @@ +/* +** Triangle drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "poly_draw_args.h" +#include "swrenderer/r_main.h" + +void PolyDrawArgs::SetClipPlane(float a, float b, float c, float d) +{ + clipPlane[0] = a; + clipPlane[1] = b; + clipPlane[2] = c; + clipPlane[3] = d; +} + +void PolyDrawArgs::SetTexture(FTexture *texture) +{ + textureWidth = texture->GetWidth(); + textureHeight = texture->GetHeight(); + if (swrenderer::r_swtruecolor) + texturePixels = (const uint8_t *)texture->GetPixelsBgra(); + else + texturePixels = texture->GetPixels(); + translation = nullptr; +} + +void PolyDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, bool forcePal) +{ + if (translationID != 0xffffffff && translationID != 0) + { + FRemapTable *table = TranslationToTable(translationID); + if (table != nullptr && !table->Inactive) + { + if (swrenderer::r_swtruecolor) + translation = (uint8_t*)table->Palette; + else + translation = table->Remap; + + textureWidth = texture->GetWidth(); + textureHeight = texture->GetHeight(); + texturePixels = texture->GetPixels(); + return; + } + } + + if (forcePal) + { + textureWidth = texture->GetWidth(); + textureHeight = texture->GetHeight(); + texturePixels = texture->GetPixels(); + } + else + { + SetTexture(texture); + } +} + +void PolyDrawArgs::SetColormap(FSWColormap *base_colormap) +{ + uniforms.light_red = base_colormap->Color.r * 256 / 255; + uniforms.light_green = base_colormap->Color.g * 256 / 255; + uniforms.light_blue = base_colormap->Color.b * 256 / 255; + uniforms.light_alpha = base_colormap->Color.a * 256 / 255; + uniforms.fade_red = base_colormap->Fade.r; + uniforms.fade_green = base_colormap->Fade.g; + uniforms.fade_blue = base_colormap->Fade.b; + uniforms.fade_alpha = base_colormap->Fade.a; + uniforms.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + bool simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); + if (simple_shade) + uniforms.flags |= TriUniforms::simple_shade; + else + uniforms.flags &= ~TriUniforms::simple_shade; + colormaps = base_colormap->Maps; +} diff --git a/src/polyrenderer/drawers/poly_draw_args.h b/src/polyrenderer/drawers/poly_draw_args.h new file mode 100644 index 0000000000..78e6d07a23 --- /dev/null +++ b/src/polyrenderer/drawers/poly_draw_args.h @@ -0,0 +1,69 @@ +/* +** Triangle drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "r_data/r_translate.h" +#include "r_data/colormaps.h" +#include "swrenderer/drawers/r_drawers.h" + +class FTexture; + +enum class TriangleDrawMode +{ + Normal, + Fan, + Strip +}; + +struct TriDrawTriangleArgs; +struct TriMatrix; + +class PolyDrawArgs +{ +public: + TriUniforms uniforms; + const TriMatrix *objectToClip = nullptr; + const TriVertex *vinput = nullptr; + int vcount = 0; + TriangleDrawMode mode = TriangleDrawMode::Normal; + bool ccw = false; + // bool stencilTest = true; // Always true for now + bool subsectorTest = false; + bool writeStencil = true; + bool writeColor = true; + bool writeSubsector = true; + const uint8_t *texturePixels = nullptr; + int textureWidth = 0; + int textureHeight = 0; + const uint8_t *translation = nullptr; + uint8_t stenciltestvalue = 0; + uint8_t stencilwritevalue = 0; + const uint8_t *colormaps = nullptr; + float clipPlane[4]; + TriBlendMode blendmode = TriBlendMode::Copy; + + void SetClipPlane(float a, float b, float c, float d); + void SetTexture(FTexture *texture); + void SetTexture(FTexture *texture, uint32_t translationID, bool forcePal = false); + void SetColormap(FSWColormap *base_colormap); +}; diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index 76e349af74..5856b0267b 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -36,6 +36,7 @@ #include "poly_triangle.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/r_main.h" +#include "screen_triangle.h" CVAR(Bool, r_debug_trisetup, false, 0); @@ -411,1106 +412,3 @@ FString DrawPolyTrianglesCommand::DebugInfo() args.texturePixels ? "ptr" : "null", args.translation ? "ptr" : "null", args.colormaps ? "ptr" : "null"); return info; } - -///////////////////////////////////////////////////////////////////////////// - -TriMatrix TriMatrix::null() -{ - TriMatrix m; - memset(m.matrix, 0, sizeof(m.matrix)); - return m; -} - -TriMatrix TriMatrix::identity() -{ - TriMatrix m = null(); - m.matrix[0] = 1.0f; - m.matrix[5] = 1.0f; - m.matrix[10] = 1.0f; - m.matrix[15] = 1.0f; - return m; -} - -TriMatrix TriMatrix::translate(float x, float y, float z) -{ - TriMatrix m = identity(); - m.matrix[0 + 3 * 4] = x; - m.matrix[1 + 3 * 4] = y; - m.matrix[2 + 3 * 4] = z; - return m; -} - -TriMatrix TriMatrix::scale(float x, float y, float z) -{ - TriMatrix m = null(); - m.matrix[0 + 0 * 4] = x; - m.matrix[1 + 1 * 4] = y; - m.matrix[2 + 2 * 4] = z; - m.matrix[3 + 3 * 4] = 1; - return m; -} - -TriMatrix TriMatrix::rotate(float angle, float x, float y, float z) -{ - float c = cosf(angle); - float s = sinf(angle); - TriMatrix m = null(); - m.matrix[0 + 0 * 4] = (x*x*(1.0f - c) + c); - m.matrix[0 + 1 * 4] = (x*y*(1.0f - c) - z*s); - m.matrix[0 + 2 * 4] = (x*z*(1.0f - c) + y*s); - m.matrix[1 + 0 * 4] = (y*x*(1.0f - c) + z*s); - m.matrix[1 + 1 * 4] = (y*y*(1.0f - c) + c); - m.matrix[1 + 2 * 4] = (y*z*(1.0f - c) - x*s); - m.matrix[2 + 0 * 4] = (x*z*(1.0f - c) - y*s); - m.matrix[2 + 1 * 4] = (y*z*(1.0f - c) + x*s); - m.matrix[2 + 2 * 4] = (z*z*(1.0f - c) + c); - m.matrix[3 + 3 * 4] = 1.0f; - return m; -} - -TriMatrix TriMatrix::swapYZ() -{ - TriMatrix m = null(); - m.matrix[0 + 0 * 4] = 1.0f; - m.matrix[1 + 2 * 4] = 1.0f; - m.matrix[2 + 1 * 4] = -1.0f; - m.matrix[3 + 3 * 4] = 1.0f; - return m; -} - -TriMatrix TriMatrix::perspective(float fovy, float aspect, float z_near, float z_far) -{ - float f = (float)(1.0 / tan(fovy * M_PI / 360.0)); - TriMatrix m = null(); - m.matrix[0 + 0 * 4] = f / aspect; - m.matrix[1 + 1 * 4] = f; - m.matrix[2 + 2 * 4] = (z_far + z_near) / (z_near - z_far); - m.matrix[2 + 3 * 4] = (2.0f * z_far * z_near) / (z_near - z_far); - m.matrix[3 + 2 * 4] = -1.0f; - return m; -} - -TriMatrix TriMatrix::frustum(float left, float right, float bottom, float top, float near, float far) -{ - float a = (right + left) / (right - left); - float b = (top + bottom) / (top - bottom); - float c = -(far + near) / (far - near); - float d = -(2.0f * far) / (far - near); - TriMatrix m = null(); - m.matrix[0 + 0 * 4] = 2.0f * near / (right - left); - m.matrix[1 + 1 * 4] = 2.0f * near / (top - bottom); - m.matrix[0 + 2 * 4] = a; - m.matrix[1 + 2 * 4] = b; - m.matrix[2 + 2 * 4] = c; - m.matrix[2 + 3 * 4] = d; - m.matrix[3 + 2 * 4] = -1; - return m; -} - -TriMatrix TriMatrix::worldToView() -{ - TriMatrix m = null(); - m.matrix[0 + 0 * 4] = (float)ViewSin; - m.matrix[0 + 1 * 4] = (float)-ViewCos; - m.matrix[1 + 2 * 4] = 1.0f; - m.matrix[2 + 0 * 4] = (float)-ViewCos; - m.matrix[2 + 1 * 4] = (float)-ViewSin; - m.matrix[3 + 3 * 4] = 1.0f; - return m * translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); -} - -TriMatrix TriMatrix::viewToClip() -{ - float near = 5.0f; - float far = 65536.0f; - float width = (float)(FocalTangent * near); - float top = (float)(swrenderer::CenterY / swrenderer::InvZtoScale * near); - float bottom = (float)(top - viewheight / swrenderer::InvZtoScale * near); - return frustum(-width, width, bottom, top, near, far); -} - -TriMatrix TriMatrix::operator*(const TriMatrix &mult) const -{ - TriMatrix result; - for (int x = 0; x < 4; x++) - { - for (int y = 0; y < 4; y++) - { - result.matrix[x + y * 4] = - matrix[0 * 4 + x] * mult.matrix[y * 4 + 0] + - matrix[1 * 4 + x] * mult.matrix[y * 4 + 1] + - matrix[2 * 4 + x] * mult.matrix[y * 4 + 2] + - matrix[3 * 4 + x] * mult.matrix[y * 4 + 3]; - } - } - return result; -} - -ShadedTriVertex TriMatrix::operator*(TriVertex v) const -{ - float vx = matrix[0 * 4 + 0] * v.x + matrix[1 * 4 + 0] * v.y + matrix[2 * 4 + 0] * v.z + matrix[3 * 4 + 0] * v.w; - float vy = matrix[0 * 4 + 1] * v.x + matrix[1 * 4 + 1] * v.y + matrix[2 * 4 + 1] * v.z + matrix[3 * 4 + 1] * v.w; - float vz = matrix[0 * 4 + 2] * v.x + matrix[1 * 4 + 2] * v.y + matrix[2 * 4 + 2] * v.z + matrix[3 * 4 + 2] * v.w; - float vw = matrix[0 * 4 + 3] * v.x + matrix[1 * 4 + 3] * v.y + matrix[2 * 4 + 3] * v.z + matrix[3 * 4 + 3] * v.w; - ShadedTriVertex sv; - sv.x = vx; - sv.y = vy; - sv.z = vz; - sv.w = vw; - for (int i = 0; i < TriVertex::NumVarying; i++) - sv.varying[i] = v.varying[i]; - return sv; -} - -///////////////////////////////////////////////////////////////////////////// - -namespace -{ - int NextBufferVertex = 0; -} - -TriVertex *PolyVertexBuffer::GetVertices(int count) -{ - enum { VertexBufferSize = 256 * 1024 }; - static TriVertex Vertex[VertexBufferSize]; - - if (NextBufferVertex + count > VertexBufferSize) - return nullptr; - TriVertex *v = Vertex + NextBufferVertex; - NextBufferVertex += count; - return v; -} - -void PolyVertexBuffer::Clear() -{ - NextBufferVertex = 0; -} - -///////////////////////////////////////////////////////////////////////////// - -void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipright = args->clipright; - int clipbottom = args->clipbottom; - - int stencilPitch = args->stencilPitch; - uint8_t * RESTRICT stencilValues = args->stencilValues; - uint32_t * RESTRICT stencilMasks = args->stencilMasks; - uint8_t stencilTestValue = args->stencilTestValue; - - TriFullSpan * RESTRICT span = thread->FullSpans; - TriPartialBlock * RESTRICT partial = thread->PartialBlocks; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); - if (minx >= maxx || miny >= maxy) - { - thread->NumFullSpans = 0; - thread->NumPartialBlocks = 0; - return; - } - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // First block line for this thread - int core = thread->core; - int num_cores = thread->num_cores; - int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores; - miny += core_skip * q; - - thread->StartX = minx; - thread->StartY = miny; - span->Length = 0; - - // Loop through blocks - for (int y = miny; y < maxy; y += q * num_cores) - { - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Stencil test the whole block, if possible - int block = x / 8 + y / 8 * stencilPitch; - uint8_t *stencilBlock = &stencilValues[block * 64]; - uint32_t *stencilBlockMask = &stencilMasks[block]; - bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; - bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; - - // Skip block when outside an edge - if (a == 0 || b == 0 || c == 0 || skipBlock) - { - if (span->Length != 0) - { - span++; - span->Length = 0; - } - continue; - } - - // Accept whole block when totally covered - if (a == 0xf && b == 0xf && c == 0xf && x + q <= clipright && y + q <= clipbottom && blockIsSingleStencil) - { - if (span->Length != 0) - { - span->Length++; - } - else - { - span->X = x; - span->Y = y; - span->Length = 1; - } - } - else // Partially covered block - { - x0 = x << 4; - x1 = (x + q - 1) << 4; - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; - bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest); - mask0 <<= 1; - mask0 |= (uint32_t)covered; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - } - - for (int iy = 4; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; - bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest); - mask1 <<= 1; - mask1 |= (uint32_t)covered; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - } - - if (mask0 != 0xffffffff || mask1 != 0xffffffff) - { - if (span->Length > 0) - { - span++; - span->Length = 0; - } - - if (mask0 == 0 && mask1 == 0) - continue; - - partial->X = x; - partial->Y = y; - partial->Mask0 = mask0; - partial->Mask1 = mask1; - partial++; - } - else if (span->Length != 0) - { - span->Length++; - } - else - { - span->X = x; - span->Y = y; - span->Length = 1; - } - } - } - - if (span->Length != 0) - { - span++; - span->Length = 0; - } - } - - thread->NumFullSpans = (int)(span - thread->FullSpans); - thread->NumPartialBlocks = (int)(partial - thread->PartialBlocks); -} - -void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - int clipright = args->clipright; - int clipbottom = args->clipbottom; - - int stencilPitch = args->stencilPitch; - uint8_t * RESTRICT stencilValues = args->stencilValues; - uint32_t * RESTRICT stencilMasks = args->stencilMasks; - uint8_t stencilTestValue = args->stencilTestValue; - - uint32_t * RESTRICT subsectorGBuffer = args->subsectorGBuffer; - uint32_t subsectorDepth = args->uniforms->subsectorDepth; - int32_t pitch = args->pitch; - - TriFullSpan * RESTRICT span = thread->FullSpans; - TriPartialBlock * RESTRICT partial = thread->PartialBlocks; - - // 28.4 fixed-point coordinates - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); - - // Deltas - const int DX12 = X1 - X2; - const int DX23 = X2 - X3; - const int DX31 = X3 - X1; - - const int DY12 = Y1 - Y2; - const int DY23 = Y2 - Y3; - const int DY31 = Y3 - Y1; - - // Fixed-point deltas - const int FDX12 = DX12 << 4; - const int FDX23 = DX23 << 4; - const int FDX31 = DX31 << 4; - - const int FDY12 = DY12 << 4; - const int FDY23 = DY23 << 4; - const int FDY31 = DY31 << 4; - - // Bounding rectangle - int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); - int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); - int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); - int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); - if (minx >= maxx || miny >= maxy) - { - thread->NumFullSpans = 0; - thread->NumPartialBlocks = 0; - return; - } - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - // Start in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - - // Half-edge constants - int C1 = DY12 * X1 - DX12 * Y1; - int C2 = DY23 * X2 - DX23 * Y2; - int C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - - // First block line for this thread - int core = thread->core; - int num_cores = thread->num_cores; - int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores; - miny += core_skip * q; - - thread->StartX = minx; - thread->StartY = miny; - span->Length = 0; - - // Loop through blocks - for (int y = miny; y < maxy; y += q * num_cores) - { - for (int x = minx; x < maxx; x += q) - { - // Corners of block - int x0 = x << 4; - int x1 = (x + q - 1) << 4; - int y0 = y << 4; - int y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - // Stencil test the whole block, if possible - int block = x / 8 + y / 8 * stencilPitch; - uint8_t *stencilBlock = &stencilValues[block * 64]; - uint32_t *stencilBlockMask = &stencilMasks[block]; - bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; - bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) < stencilTestValue; - - // Skip block when outside an edge - if (a == 0 || b == 0 || c == 0 || skipBlock) - { - if (span->Length != 0) - { - span++; - span->Length = 0; - } - continue; - } - - // Accept whole block when totally covered - if (a == 0xf && b == 0xf && c == 0xf && x + q <= clipright && y + q <= clipbottom && blockIsSingleStencil) - { - // Totally covered block still needs a subsector coverage test: - - uint32_t *subsector = subsectorGBuffer + x + y * pitch; - - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - for (int ix = 0; ix < q; ix++) - { - bool covered = subsector[ix] >= subsectorDepth; - mask0 <<= 1; - mask0 |= (uint32_t)covered; - } - subsector += pitch; - } - - for (int iy = 4; iy < q; iy++) - { - for (int ix = 0; ix < q; ix++) - { - bool covered = subsector[ix] >= subsectorDepth; - mask1 <<= 1; - mask1 |= (uint32_t)covered; - } - subsector += pitch; - } - - if (mask0 != 0xffffffff || mask1 != 0xffffffff) - { - if (span->Length > 0) - { - span++; - span->Length = 0; - } - - if (mask0 == 0 && mask1 == 0) - continue; - - partial->X = x; - partial->Y = y; - partial->Mask0 = mask0; - partial->Mask1 = mask1; - partial++; - } - else if (span->Length != 0) - { - span->Length++; - } - else - { - span->X = x; - span->Y = y; - span->Length = 1; - } - } - else // Partially covered block - { - x0 = x << 4; - x1 = (x + q - 1) << 4; - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - uint32_t *subsector = subsectorGBuffer + x + y * pitch; - - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] >= stencilTestValue; - bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest && subsector[ix] >= subsectorDepth); - mask0 <<= 1; - mask0 |= (uint32_t)covered; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - subsector += pitch; - } - - for (int iy = 4; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] >= stencilTestValue; - bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest && subsector[ix] >= subsectorDepth); - mask1 <<= 1; - mask1 |= (uint32_t)covered; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - subsector += pitch; - } - - if (mask0 != 0xffffffff || mask1 != 0xffffffff) - { - if (span->Length > 0) - { - span++; - span->Length = 0; - } - - if (mask0 == 0 && mask1 == 0) - continue; - - partial->X = x; - partial->Y = y; - partial->Mask0 = mask0; - partial->Mask1 = mask1; - partial++; - } - else if (span->Length != 0) - { - span->Length++; - } - else - { - span->X = x; - span->Y = y; - span->Length = 1; - } - } - } - - if (span->Length != 0) - { - span++; - span->Length = 0; - } - } - - thread->NumFullSpans = (int)(span - thread->FullSpans); - thread->NumPartialBlocks = (int)(partial - thread->PartialBlocks); -} - -void ScreenTriangle::StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - uint8_t * RESTRICT stencilValues = args->stencilValues; - uint32_t * RESTRICT stencilMasks = args->stencilMasks; - uint32_t stencilWriteValue = args->stencilWriteValue; - uint32_t stencilPitch = args->stencilPitch; - - int numSpans = thread->NumFullSpans; - auto fullSpans = thread->FullSpans; - int numBlocks = thread->NumPartialBlocks; - auto partialBlocks = thread->PartialBlocks; - - for (int i = 0; i < numSpans; i++) - { - const auto &span = fullSpans[i]; - - int block = span.X / 8 + span.Y / 8 * stencilPitch; - uint8_t *stencilBlock = &stencilValues[block * 64]; - uint32_t *stencilBlockMask = &stencilMasks[block]; - - int width = span.Length; - for (int x = 0; x < width; x++) - stencilBlockMask[x] = 0xffffff00 | stencilWriteValue; - } - - for (int i = 0; i < numBlocks; i++) - { - const auto &block = partialBlocks[i]; - - uint32_t mask0 = block.Mask0; - uint32_t mask1 = block.Mask1; - - int sblock = block.X / 8 + block.Y / 8 * stencilPitch; - uint8_t *stencilBlock = &stencilValues[sblock * 64]; - uint32_t *stencilBlockMask = &stencilMasks[sblock]; - - bool isSingleValue = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; - if (isSingleValue) - { - uint8_t value = (*stencilBlockMask) & 0xff; - for (int v = 0; v < 64; v++) - stencilBlock[v] = value; - *stencilBlockMask = 0; - } - - int count = 0; - for (int v = 0; v < 32; v++) - { - if ((mask0 & (1 << 31)) || stencilBlock[v] == stencilWriteValue) - { - stencilBlock[v] = stencilWriteValue; - count++; - } - mask0 <<= 1; - } - for (int v = 32; v < 64; v++) - { - if ((mask1 & (1 << 31)) || stencilBlock[v] == stencilWriteValue) - { - stencilBlock[v] = stencilWriteValue; - count++; - } - mask1 <<= 1; - } - - if (count == 64) - *stencilBlockMask = 0xffffff00 | stencilWriteValue; - } -} - -void ScreenTriangle::SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - uint32_t * RESTRICT subsectorGBuffer = args->subsectorGBuffer; - uint32_t subsectorDepth = args->uniforms->subsectorDepth; - int pitch = args->pitch; - - int numSpans = thread->NumFullSpans; - auto fullSpans = thread->FullSpans; - int numBlocks = thread->NumPartialBlocks; - auto partialBlocks = thread->PartialBlocks; - - for (int i = 0; i < numSpans; i++) - { - const auto &span = fullSpans[i]; - - uint32_t *subsector = subsectorGBuffer + span.X + span.Y * pitch; - int width = span.Length * 8; - int height = 8; - for (int y = 0; y < height; y++) - { - for (int x = 0; x < width; x++) - subsector[x] = subsectorDepth; - subsector += pitch; - } - } - - for (int i = 0; i < numBlocks; i++) - { - const auto &block = partialBlocks[i]; - - uint32_t *subsector = subsectorGBuffer + block.X + block.Y * pitch; - uint32_t mask0 = block.Mask0; - uint32_t mask1 = block.Mask1; - for (int y = 0; y < 4; y++) - { - for (int x = 0; x < 8; x++) - { - if (mask0 & (1 << 31)) - subsector[x] = subsectorDepth; - mask0 <<= 1; - } - subsector += pitch; - } - for (int y = 4; y < 8; y++) - { - for (int x = 0; x < 8; x++) - { - if (mask1 & (1 << 31)) - subsector[x] = subsectorDepth; - mask1 <<= 1; - } - subsector += pitch; - } - } -} - -#if 0 -float ScreenTriangle::FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) -{ - float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); - float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); - return top / bottom; -} - -float ScreenTriangle::FindGradientY(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) -{ - float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); - float bottom = (x0 - x2) * (y1 - y2) - (x1 - x2) * (y0 - y2); - return top / bottom; -} - -void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - int numSpans = thread->NumFullSpans; - auto fullSpans = thread->FullSpans; - int numBlocks = thread->NumPartialBlocks; - auto partialBlocks = thread->PartialBlocks; - int startX = thread->StartX; - int startY = thread->StartY; - - // Calculate gradients - const TriVertex &v1 = *args->v1; - const TriVertex &v2 = *args->v2; - const TriVertex &v3 = *args->v3; - ScreenTriangleStepVariables gradientX; - ScreenTriangleStepVariables gradientY; - ScreenTriangleStepVariables start; - gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); - for (int i = 0; i < TriVertex::NumVarying; i++) - { - gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); - } - - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; - uint32_t texWidth = args->textureWidth; - uint32_t texHeight = args->textureHeight; - - uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; - uint32_t * RESTRICT subsectorGBuffer = (uint32_t*)args->subsectorGBuffer; - int pitch = args->pitch; - - uint32_t subsectorDepth = args->uniforms->subsectorDepth; - - uint32_t light = args->uniforms->light; - float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; - float globVis = 1706.0f; - - for (int i = 0; i < numSpans; i++) - { - const auto &span = fullSpans[i]; - - uint32_t *dest = destOrg + span.X + span.Y * pitch; - uint32_t *subsector = subsectorGBuffer + span.X + span.Y * pitch; - int width = span.Length; - int height = 8; - - ScreenTriangleStepVariables blockPosY; - blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); - - for (int y = 0; y < height; y++) - { - ScreenTriangleStepVariables blockPosX = blockPosY; - - float rcpW = 0x01000000 / blockPosX.W; - int32_t varyingPos[TriVertex::NumVarying]; - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); - int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); - - for (int x = 0; x < width; x++) - { - blockPosX.W += gradientX.W * 8; - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosX.Varying[j] += gradientX.Varying[j] * 8; - - rcpW = 0x01000000 / blockPosX.W; - int32_t varyingStep[TriVertex::NumVarying]; - for (int j = 0; j < TriVertex::NumVarying; j++) - { - int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); - varyingStep[j] = (nextPos - varyingPos[j]) / 8; - } - - int lightnext = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); - int lightstep = (lightnext - lightpos) / 8; - - for (int ix = 0; ix < 8; ix++) - { - int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; - int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; - uint32_t fg = texPixels[texelX * texHeight + texelY]; - - uint32_t r = RPART(fg); - uint32_t g = GPART(fg); - uint32_t b = BPART(fg); - r = r * lightpos / 256; - g = g * lightpos / 256; - b = b * lightpos / 256; - fg = 0xff000000 | (r << 16) | (g << 8) | b; - - dest[x * 8 + ix] = fg; - subsector[x * 8 + ix] = subsectorDepth; - - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] += varyingStep[j]; - lightpos += lightstep; - } - } - - blockPosY.W += gradientY.W; - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosY.Varying[j] += gradientY.Varying[j]; - - dest += pitch; - subsector += pitch; - } - } - - for (int i = 0; i < numBlocks; i++) - { - const auto &block = partialBlocks[i]; - - ScreenTriangleStepVariables blockPosY; - blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); - - uint32_t *dest = destOrg + block.X + block.Y * pitch; - uint32_t *subsector = subsectorGBuffer + block.X + block.Y * pitch; - uint32_t mask0 = block.Mask0; - uint32_t mask1 = block.Mask1; - for (int y = 0; y < 4; y++) - { - ScreenTriangleStepVariables blockPosX = blockPosY; - - float rcpW = 0x01000000 / blockPosX.W; - int32_t varyingPos[TriVertex::NumVarying]; - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); - - int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); - - blockPosX.W += gradientX.W * 8; - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosX.Varying[j] += gradientX.Varying[j] * 8; - - rcpW = 0x01000000 / blockPosX.W; - int32_t varyingStep[TriVertex::NumVarying]; - for (int j = 0; j < TriVertex::NumVarying; j++) - { - int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); - varyingStep[j] = (nextPos - varyingPos[j]) / 8; - } - - int lightnext = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); - int lightstep = (lightnext - lightpos) / 8; - - for (int x = 0; x < 8; x++) - { - if (mask0 & (1 << 31)) - { - int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; - int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; - uint32_t fg = texPixels[texelX * texHeight + texelY]; - - uint32_t r = RPART(fg); - uint32_t g = GPART(fg); - uint32_t b = BPART(fg); - r = r * lightpos / 256; - g = g * lightpos / 256; - b = b * lightpos / 256; - fg = 0xff000000 | (r << 16) | (g << 8) | b; - - dest[x] = fg; - subsector[x] = subsectorDepth; - } - mask0 <<= 1; - - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] += varyingStep[j]; - lightpos += lightstep; - } - - blockPosY.W += gradientY.W; - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosY.Varying[j] += gradientY.Varying[j]; - - dest += pitch; - subsector += pitch; - } - for (int y = 4; y < 8; y++) - { - ScreenTriangleStepVariables blockPosX = blockPosY; - - float rcpW = 0x01000000 / blockPosX.W; - int32_t varyingPos[TriVertex::NumVarying]; - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); - - int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); - - blockPosX.W += gradientX.W * 8; - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosX.Varying[j] += gradientX.Varying[j] * 8; - - rcpW = 0x01000000 / blockPosX.W; - int32_t varyingStep[TriVertex::NumVarying]; - for (int j = 0; j < TriVertex::NumVarying; j++) - { - int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); - varyingStep[j] = (nextPos - varyingPos[j]) / 8; - } - - int lightnext = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); - int lightstep = (lightnext - lightpos) / 8; - - for (int x = 0; x < 8; x++) - { - if (mask1 & (1 << 31)) - { - int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; - int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; - uint32_t fg = texPixels[texelX * texHeight + texelY]; - - uint32_t r = RPART(fg); - uint32_t g = GPART(fg); - uint32_t b = BPART(fg); - r = r * lightpos / 256; - g = g * lightpos / 256; - b = b * lightpos / 256; - fg = 0xff000000 | (r << 16) | (g << 8) | b; - - dest[x] = fg; - subsector[x] = subsectorDepth; - } - mask1 <<= 1; - - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] += varyingStep[j]; - lightpos += lightstep; - } - - blockPosY.W += gradientY.W; - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosY.Varying[j] += gradientY.Varying[j]; - - dest += pitch; - subsector += pitch; - } - } -} -#endif diff --git a/src/polyrenderer/drawers/poly_triangle.h b/src/polyrenderer/drawers/poly_triangle.h index 9d8e0e44b0..bc2357b484 100644 --- a/src/polyrenderer/drawers/poly_triangle.h +++ b/src/polyrenderer/drawers/poly_triangle.h @@ -25,140 +25,15 @@ #include "swrenderer/drawers/r_draw.h" #include "swrenderer/drawers/r_thread.h" #include "swrenderer/drawers/r_drawers.h" -#include "r_data/r_translate.h" -#include "r_data/colormaps.h" - -class FTexture; - -enum class TriangleDrawMode -{ - Normal, - Fan, - Strip -}; - -struct TriDrawTriangleArgs; -struct TriMatrix; - -class PolyDrawArgs -{ -public: - TriUniforms uniforms; - const TriMatrix *objectToClip = nullptr; - const TriVertex *vinput = nullptr; - int vcount = 0; - TriangleDrawMode mode = TriangleDrawMode::Normal; - bool ccw = false; - // bool stencilTest = true; // Always true for now - bool subsectorTest = false; - bool writeStencil = true; - bool writeColor = true; - bool writeSubsector = true; - const uint8_t *texturePixels = nullptr; - int textureWidth = 0; - int textureHeight = 0; - const uint8_t *translation = nullptr; - uint8_t stenciltestvalue = 0; - uint8_t stencilwritevalue = 0; - const uint8_t *colormaps = nullptr; - float clipPlane[4]; - TriBlendMode blendmode = TriBlendMode::Copy; - - void SetClipPlane(float a, float b, float c, float d) - { - clipPlane[0] = a; - clipPlane[1] = b; - clipPlane[2] = c; - clipPlane[3] = d; - } - - void SetTexture(FTexture *texture) - { - textureWidth = texture->GetWidth(); - textureHeight = texture->GetHeight(); - if (swrenderer::r_swtruecolor) - texturePixels = (const uint8_t *)texture->GetPixelsBgra(); - else - texturePixels = texture->GetPixels(); - translation = nullptr; - } - - void SetTexture(FTexture *texture, uint32_t translationID, bool forcePal = false) - { - if (translationID != 0xffffffff && translationID != 0) - { - FRemapTable *table = TranslationToTable(translationID); - if (table != nullptr && !table->Inactive) - { - if (swrenderer::r_swtruecolor) - translation = (uint8_t*)table->Palette; - else - translation = table->Remap; - - textureWidth = texture->GetWidth(); - textureHeight = texture->GetHeight(); - texturePixels = texture->GetPixels(); - return; - } - } - - if (forcePal) - { - textureWidth = texture->GetWidth(); - textureHeight = texture->GetHeight(); - texturePixels = texture->GetPixels(); - } - else - { - SetTexture(texture); - } - } - - void SetColormap(FSWColormap *base_colormap) - { - uniforms.light_red = base_colormap->Color.r * 256 / 255; - uniforms.light_green = base_colormap->Color.g * 256 / 255; - uniforms.light_blue = base_colormap->Color.b * 256 / 255; - uniforms.light_alpha = base_colormap->Color.a * 256 / 255; - uniforms.fade_red = base_colormap->Fade.r; - uniforms.fade_green = base_colormap->Fade.g; - uniforms.fade_blue = base_colormap->Fade.b; - uniforms.fade_alpha = base_colormap->Fade.a; - uniforms.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; - bool simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); - if (simple_shade) - uniforms.flags |= TriUniforms::simple_shade; - else - uniforms.flags &= ~TriUniforms::simple_shade; - colormaps = base_colormap->Maps; - } -}; +#include "polyrenderer/math/tri_matrix.h" +#include "polyrenderer/drawers/poly_buffer.h" +#include "polyrenderer/drawers/poly_draw_args.h" struct ShadedTriVertex : public TriVertex { float clipDistance0; }; -struct TriMatrix -{ - static TriMatrix null(); - static TriMatrix identity(); - static TriMatrix translate(float x, float y, float z); - static TriMatrix scale(float x, float y, float z); - static TriMatrix rotate(float angle, float x, float y, float z); - static TriMatrix swapYZ(); - static TriMatrix perspective(float fovy, float aspect, float near, float far); - static TriMatrix frustum(float left, float right, float bottom, float top, float near, float far); - - static TriMatrix worldToView(); // Software renderer world to view space transform - static TriMatrix viewToClip(); // Software renderer shearing projection - - ShadedTriVertex operator*(TriVertex v) const; - TriMatrix operator*(const TriMatrix &m) const; - - float matrix[16]; -}; - typedef void(*PolyDrawFuncPtr)(const TriDrawTriangleArgs *, WorkerThreadData *); class PolyTriangleDrawer @@ -185,73 +60,6 @@ private: friend class DrawPolyTrianglesCommand; }; -class PolySubsectorGBuffer -{ -public: - static PolySubsectorGBuffer *Instance() - { - static PolySubsectorGBuffer buffer; - return &buffer; - } - - void Resize(int newwidth, int newheight) - { - width = newwidth; - height = newheight; - values.resize(width * height); - } - - int Width() const { return width; } - int Height() const { return height; } - uint32_t *Values() { return values.data(); } - -private: - int width; - int height; - std::vector values; -}; - -class PolyStencilBuffer -{ -public: - static PolyStencilBuffer *Instance() - { - static PolyStencilBuffer buffer; - return &buffer; - } - - void Clear(int newwidth, int newheight, uint8_t stencil_value = 0) - { - width = newwidth; - height = newheight; - int count = BlockWidth() * BlockHeight(); - values.resize(count * 64); - masks.resize(count); - - uint8_t *v = Values(); - uint32_t *m = Masks(); - for (int i = 0; i < count; i++) - { - m[i] = 0xffffff00 | stencil_value; - } - } - - int Width() const { return width; } - int Height() const { return height; } - int BlockWidth() const { return (width + 7) / 8; } - int BlockHeight() const { return (height + 7) / 8; } - uint8_t *Values() { return values.data(); } - uint32_t *Masks() { return masks.data(); } - -private: - int width; - int height; - - // 8x8 blocks of stencil values, plus a mask for each block indicating if values are the same for early out stencil testing - std::vector values; - std::vector masks; -}; - class DrawPolyTrianglesCommand : public DrawerCommand { public: @@ -263,25 +71,3 @@ public: private: PolyDrawArgs args; }; - -class PolyVertexBuffer -{ -public: - static TriVertex *GetVertices(int count); - static void Clear(); -}; - -struct ScreenTriangleStepVariables -{ - float W; - float Varying[TriVertex::NumVarying]; -}; - -class ScreenTriangle -{ -public: - static void SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); -}; diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp new file mode 100644 index 0000000000..e164b6fe0e --- /dev/null +++ b/src/polyrenderer/drawers/screen_triangle.cpp @@ -0,0 +1,966 @@ +/* +** Triangle drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "poly_triangle.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/r_main.h" +#include "screen_triangle.h" + +void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipright = args->clipright; + int clipbottom = args->clipbottom; + + int stencilPitch = args->stencilPitch; + uint8_t * RESTRICT stencilValues = args->stencilValues; + uint32_t * RESTRICT stencilMasks = args->stencilMasks; + uint8_t stencilTestValue = args->stencilTestValue; + + TriFullSpan * RESTRICT span = thread->FullSpans; + TriPartialBlock * RESTRICT partial = thread->PartialBlocks; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + if (minx >= maxx || miny >= maxy) + { + thread->NumFullSpans = 0; + thread->NumPartialBlocks = 0; + return; + } + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // First block line for this thread + int core = thread->core; + int num_cores = thread->num_cores; + int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores; + miny += core_skip * q; + + thread->StartX = minx; + thread->StartY = miny; + span->Length = 0; + + // Loop through blocks + for (int y = miny; y < maxy; y += q * num_cores) + { + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Stencil test the whole block, if possible + int block = x / 8 + y / 8 * stencilPitch; + uint8_t *stencilBlock = &stencilValues[block * 64]; + uint32_t *stencilBlockMask = &stencilMasks[block]; + bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; + bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; + + // Skip block when outside an edge + if (a == 0 || b == 0 || c == 0 || skipBlock) + { + if (span->Length != 0) + { + span++; + span->Length = 0; + } + continue; + } + + // Accept whole block when totally covered + if (a == 0xf && b == 0xf && c == 0xf && x + q <= clipright && y + q <= clipbottom && blockIsSingleStencil) + { + if (span->Length != 0) + { + span->Length++; + } + else + { + span->X = x; + span->Y = y; + span->Length = 1; + } + } + else // Partially covered block + { + x0 = x << 4; + x1 = (x + q - 1) << 4; + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 4; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = 0; ix < q; ix++) + { + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest); + mask0 <<= 1; + mask0 |= (uint32_t)covered; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + } + + for (int iy = 4; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = 0; ix < q; ix++) + { + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest); + mask1 <<= 1; + mask1 |= (uint32_t)covered; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + } + + if (mask0 != 0xffffffff || mask1 != 0xffffffff) + { + if (span->Length > 0) + { + span++; + span->Length = 0; + } + + if (mask0 == 0 && mask1 == 0) + continue; + + partial->X = x; + partial->Y = y; + partial->Mask0 = mask0; + partial->Mask1 = mask1; + partial++; + } + else if (span->Length != 0) + { + span->Length++; + } + else + { + span->X = x; + span->Y = y; + span->Length = 1; + } + } + } + + if (span->Length != 0) + { + span++; + span->Length = 0; + } + } + + thread->NumFullSpans = (int)(span - thread->FullSpans); + thread->NumPartialBlocks = (int)(partial - thread->PartialBlocks); +} + +void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + int clipright = args->clipright; + int clipbottom = args->clipbottom; + + int stencilPitch = args->stencilPitch; + uint8_t * RESTRICT stencilValues = args->stencilValues; + uint32_t * RESTRICT stencilMasks = args->stencilMasks; + uint8_t stencilTestValue = args->stencilTestValue; + + uint32_t * RESTRICT subsectorGBuffer = args->subsectorGBuffer; + uint32_t subsectorDepth = args->uniforms->subsectorDepth; + int32_t pitch = args->pitch; + + TriFullSpan * RESTRICT span = thread->FullSpans; + TriPartialBlock * RESTRICT partial = thread->PartialBlocks; + + // 28.4 fixed-point coordinates + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << 4; + const int FDX23 = DX23 << 4; + const int FDX31 = DX31 << 4; + + const int FDY12 = DY12 << 4; + const int FDY23 = DY23 << 4; + const int FDY31 = DY31 << 4; + + // Bounding rectangle + int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); + int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); + int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + if (minx >= maxx || miny >= maxy) + { + thread->NumFullSpans = 0; + thread->NumPartialBlocks = 0; + return; + } + + // Block size, standard 8x8 (must be power of two) + const int q = 8; + + // Start in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + // First block line for this thread + int core = thread->core; + int num_cores = thread->num_cores; + int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores; + miny += core_skip * q; + + thread->StartX = minx; + thread->StartY = miny; + span->Length = 0; + + // Loop through blocks + for (int y = miny; y < maxy; y += q * num_cores) + { + for (int x = minx; x < maxx; x += q) + { + // Corners of block + int x0 = x << 4; + int x1 = (x + q - 1) << 4; + int y0 = y << 4; + int y1 = (y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + // Stencil test the whole block, if possible + int block = x / 8 + y / 8 * stencilPitch; + uint8_t *stencilBlock = &stencilValues[block * 64]; + uint32_t *stencilBlockMask = &stencilMasks[block]; + bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; + bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) < stencilTestValue; + + // Skip block when outside an edge + if (a == 0 || b == 0 || c == 0 || skipBlock) + { + if (span->Length != 0) + { + span++; + span->Length = 0; + } + continue; + } + + // Accept whole block when totally covered + if (a == 0xf && b == 0xf && c == 0xf && x + q <= clipright && y + q <= clipbottom && blockIsSingleStencil) + { + // Totally covered block still needs a subsector coverage test: + + uint32_t *subsector = subsectorGBuffer + x + y * pitch; + + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 4; iy++) + { + for (int ix = 0; ix < q; ix++) + { + bool covered = subsector[ix] >= subsectorDepth; + mask0 <<= 1; + mask0 |= (uint32_t)covered; + } + subsector += pitch; + } + + for (int iy = 4; iy < q; iy++) + { + for (int ix = 0; ix < q; ix++) + { + bool covered = subsector[ix] >= subsectorDepth; + mask1 <<= 1; + mask1 |= (uint32_t)covered; + } + subsector += pitch; + } + + if (mask0 != 0xffffffff || mask1 != 0xffffffff) + { + if (span->Length > 0) + { + span++; + span->Length = 0; + } + + if (mask0 == 0 && mask1 == 0) + continue; + + partial->X = x; + partial->Y = y; + partial->Mask0 = mask0; + partial->Mask1 = mask1; + partial++; + } + else if (span->Length != 0) + { + span->Length++; + } + else + { + span->X = x; + span->Y = y; + span->Length = 1; + } + } + else // Partially covered block + { + x0 = x << 4; + x1 = (x + q - 1) << 4; + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + uint32_t *subsector = subsectorGBuffer + x + y * pitch; + + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 4; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = 0; ix < q; ix++) + { + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] >= stencilTestValue; + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest && subsector[ix] >= subsectorDepth); + mask0 <<= 1; + mask0 |= (uint32_t)covered; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + subsector += pitch; + } + + for (int iy = 4; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = 0; ix < q; ix++) + { + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] >= stencilTestValue; + bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest && subsector[ix] >= subsectorDepth); + mask1 <<= 1; + mask1 |= (uint32_t)covered; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + subsector += pitch; + } + + if (mask0 != 0xffffffff || mask1 != 0xffffffff) + { + if (span->Length > 0) + { + span++; + span->Length = 0; + } + + if (mask0 == 0 && mask1 == 0) + continue; + + partial->X = x; + partial->Y = y; + partial->Mask0 = mask0; + partial->Mask1 = mask1; + partial++; + } + else if (span->Length != 0) + { + span->Length++; + } + else + { + span->X = x; + span->Y = y; + span->Length = 1; + } + } + } + + if (span->Length != 0) + { + span++; + span->Length = 0; + } + } + + thread->NumFullSpans = (int)(span - thread->FullSpans); + thread->NumPartialBlocks = (int)(partial - thread->PartialBlocks); +} + +void ScreenTriangle::StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + uint8_t * RESTRICT stencilValues = args->stencilValues; + uint32_t * RESTRICT stencilMasks = args->stencilMasks; + uint32_t stencilWriteValue = args->stencilWriteValue; + uint32_t stencilPitch = args->stencilPitch; + + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + int block = span.X / 8 + span.Y / 8 * stencilPitch; + uint8_t *stencilBlock = &stencilValues[block * 64]; + uint32_t *stencilBlockMask = &stencilMasks[block]; + + int width = span.Length; + for (int x = 0; x < width; x++) + stencilBlockMask[x] = 0xffffff00 | stencilWriteValue; + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + + int sblock = block.X / 8 + block.Y / 8 * stencilPitch; + uint8_t *stencilBlock = &stencilValues[sblock * 64]; + uint32_t *stencilBlockMask = &stencilMasks[sblock]; + + bool isSingleValue = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; + if (isSingleValue) + { + uint8_t value = (*stencilBlockMask) & 0xff; + for (int v = 0; v < 64; v++) + stencilBlock[v] = value; + *stencilBlockMask = 0; + } + + int count = 0; + for (int v = 0; v < 32; v++) + { + if ((mask0 & (1 << 31)) || stencilBlock[v] == stencilWriteValue) + { + stencilBlock[v] = stencilWriteValue; + count++; + } + mask0 <<= 1; + } + for (int v = 32; v < 64; v++) + { + if ((mask1 & (1 << 31)) || stencilBlock[v] == stencilWriteValue) + { + stencilBlock[v] = stencilWriteValue; + count++; + } + mask1 <<= 1; + } + + if (count == 64) + *stencilBlockMask = 0xffffff00 | stencilWriteValue; + } +} + +void ScreenTriangle::SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + uint32_t * RESTRICT subsectorGBuffer = args->subsectorGBuffer; + uint32_t subsectorDepth = args->uniforms->subsectorDepth; + int pitch = args->pitch; + + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *subsector = subsectorGBuffer + span.X + span.Y * pitch; + int width = span.Length * 8; + int height = 8; + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + subsector[x] = subsectorDepth; + subsector += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + uint32_t *subsector = subsectorGBuffer + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + subsector[x] = subsectorDepth; + mask0 <<= 1; + } + subsector += pitch; + } + for (int y = 4; y < 8; y++) + { + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + subsector[x] = subsectorDepth; + mask1 <<= 1; + } + subsector += pitch; + } + } +} + +#if 0 +float ScreenTriangle::FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); + float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); + return top / bottom; +} + +float ScreenTriangle::FindGradientY(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); + float bottom = (x0 - x2) * (y1 - y2) - (x1 - x2) * (y0 - y2); + return top / bottom; +} + +void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + uint32_t * RESTRICT subsectorGBuffer = (uint32_t*)args->subsectorGBuffer; + int pitch = args->pitch; + + uint32_t subsectorDepth = args->uniforms->subsectorDepth; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = 1706.0f; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + uint32_t *subsector = subsectorGBuffer + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); + int lightstep = (lightnext - lightpos) / 8; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = r * lightpos / 256; + g = g * lightpos / 256; + b = b * lightpos / 256; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + + dest[x * 8 + ix] = fg; + subsector[x * 8 + ix] = subsectorDepth; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + subsector += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t *subsector = subsectorGBuffer + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); + int lightstep = (lightnext - lightpos) / 8; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = r * lightpos / 256; + g = g * lightpos / 256; + b = b * lightpos / 256; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + + dest[x] = fg; + subsector[x] = subsectorDepth; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + subsector += pitch; + } + for (int y = 4; y < 8; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); + int lightstep = (lightnext - lightpos) / 8; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = r * lightpos / 256; + g = g * lightpos / 256; + b = b * lightpos / 256; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + + dest[x] = fg; + subsector[x] = subsectorDepth; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + subsector += pitch; + } + } +} +#endif diff --git a/src/polyrenderer/drawers/screen_triangle.h b/src/polyrenderer/drawers/screen_triangle.h new file mode 100644 index 0000000000..0238088bfc --- /dev/null +++ b/src/polyrenderer/drawers/screen_triangle.h @@ -0,0 +1,40 @@ +/* +** Projected triangle drawer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "swrenderer/drawers/r_drawers.h" + +class ScreenTriangle +{ +public: + static void SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + static void SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); +}; + +struct ScreenTriangleStepVariables +{ + float W; + float Varying[TriVertex::NumVarying]; +}; diff --git a/src/polyrenderer/math/tri_matrix.cpp b/src/polyrenderer/math/tri_matrix.cpp new file mode 100644 index 0000000000..72be0d0edf --- /dev/null +++ b/src/polyrenderer/math/tri_matrix.cpp @@ -0,0 +1,187 @@ +/* +** Triangle drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "tri_matrix.h" +#include "polyrenderer/drawers/poly_triangle.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/r_main.h" + +TriMatrix TriMatrix::null() +{ + TriMatrix m; + memset(m.matrix, 0, sizeof(m.matrix)); + return m; +} + +TriMatrix TriMatrix::identity() +{ + TriMatrix m = null(); + m.matrix[0] = 1.0f; + m.matrix[5] = 1.0f; + m.matrix[10] = 1.0f; + m.matrix[15] = 1.0f; + return m; +} + +TriMatrix TriMatrix::translate(float x, float y, float z) +{ + TriMatrix m = identity(); + m.matrix[0 + 3 * 4] = x; + m.matrix[1 + 3 * 4] = y; + m.matrix[2 + 3 * 4] = z; + return m; +} + +TriMatrix TriMatrix::scale(float x, float y, float z) +{ + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = x; + m.matrix[1 + 1 * 4] = y; + m.matrix[2 + 2 * 4] = z; + m.matrix[3 + 3 * 4] = 1; + return m; +} + +TriMatrix TriMatrix::rotate(float angle, float x, float y, float z) +{ + float c = cosf(angle); + float s = sinf(angle); + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = (x*x*(1.0f - c) + c); + m.matrix[0 + 1 * 4] = (x*y*(1.0f - c) - z*s); + m.matrix[0 + 2 * 4] = (x*z*(1.0f - c) + y*s); + m.matrix[1 + 0 * 4] = (y*x*(1.0f - c) + z*s); + m.matrix[1 + 1 * 4] = (y*y*(1.0f - c) + c); + m.matrix[1 + 2 * 4] = (y*z*(1.0f - c) - x*s); + m.matrix[2 + 0 * 4] = (x*z*(1.0f - c) - y*s); + m.matrix[2 + 1 * 4] = (y*z*(1.0f - c) + x*s); + m.matrix[2 + 2 * 4] = (z*z*(1.0f - c) + c); + m.matrix[3 + 3 * 4] = 1.0f; + return m; +} + +TriMatrix TriMatrix::swapYZ() +{ + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = 1.0f; + m.matrix[1 + 2 * 4] = 1.0f; + m.matrix[2 + 1 * 4] = -1.0f; + m.matrix[3 + 3 * 4] = 1.0f; + return m; +} + +TriMatrix TriMatrix::perspective(float fovy, float aspect, float z_near, float z_far) +{ + float f = (float)(1.0 / tan(fovy * M_PI / 360.0)); + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = f / aspect; + m.matrix[1 + 1 * 4] = f; + m.matrix[2 + 2 * 4] = (z_far + z_near) / (z_near - z_far); + m.matrix[2 + 3 * 4] = (2.0f * z_far * z_near) / (z_near - z_far); + m.matrix[3 + 2 * 4] = -1.0f; + return m; +} + +TriMatrix TriMatrix::frustum(float left, float right, float bottom, float top, float near, float far) +{ + float a = (right + left) / (right - left); + float b = (top + bottom) / (top - bottom); + float c = -(far + near) / (far - near); + float d = -(2.0f * far) / (far - near); + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = 2.0f * near / (right - left); + m.matrix[1 + 1 * 4] = 2.0f * near / (top - bottom); + m.matrix[0 + 2 * 4] = a; + m.matrix[1 + 2 * 4] = b; + m.matrix[2 + 2 * 4] = c; + m.matrix[2 + 3 * 4] = d; + m.matrix[3 + 2 * 4] = -1; + return m; +} + +TriMatrix TriMatrix::worldToView() +{ + TriMatrix m = null(); + m.matrix[0 + 0 * 4] = (float)ViewSin; + m.matrix[0 + 1 * 4] = (float)-ViewCos; + m.matrix[1 + 2 * 4] = 1.0f; + m.matrix[2 + 0 * 4] = (float)-ViewCos; + m.matrix[2 + 1 * 4] = (float)-ViewSin; + m.matrix[3 + 3 * 4] = 1.0f; + return m * translate((float)-ViewPos.X, (float)-ViewPos.Y, (float)-ViewPos.Z); +} + +TriMatrix TriMatrix::viewToClip() +{ + float near = 5.0f; + float far = 65536.0f; + float width = (float)(FocalTangent * near); + float top = (float)(swrenderer::CenterY / swrenderer::InvZtoScale * near); + float bottom = (float)(top - viewheight / swrenderer::InvZtoScale * near); + return frustum(-width, width, bottom, top, near, far); +} + +TriMatrix TriMatrix::operator*(const TriMatrix &mult) const +{ + TriMatrix result; + for (int x = 0; x < 4; x++) + { + for (int y = 0; y < 4; y++) + { + result.matrix[x + y * 4] = + matrix[0 * 4 + x] * mult.matrix[y * 4 + 0] + + matrix[1 * 4 + x] * mult.matrix[y * 4 + 1] + + matrix[2 * 4 + x] * mult.matrix[y * 4 + 2] + + matrix[3 * 4 + x] * mult.matrix[y * 4 + 3]; + } + } + return result; +} + +ShadedTriVertex TriMatrix::operator*(TriVertex v) const +{ + float vx = matrix[0 * 4 + 0] * v.x + matrix[1 * 4 + 0] * v.y + matrix[2 * 4 + 0] * v.z + matrix[3 * 4 + 0] * v.w; + float vy = matrix[0 * 4 + 1] * v.x + matrix[1 * 4 + 1] * v.y + matrix[2 * 4 + 1] * v.z + matrix[3 * 4 + 1] * v.w; + float vz = matrix[0 * 4 + 2] * v.x + matrix[1 * 4 + 2] * v.y + matrix[2 * 4 + 2] * v.z + matrix[3 * 4 + 2] * v.w; + float vw = matrix[0 * 4 + 3] * v.x + matrix[1 * 4 + 3] * v.y + matrix[2 * 4 + 3] * v.z + matrix[3 * 4 + 3] * v.w; + ShadedTriVertex sv; + sv.x = vx; + sv.y = vy; + sv.z = vz; + sv.w = vw; + for (int i = 0; i < TriVertex::NumVarying; i++) + sv.varying[i] = v.varying[i]; + return sv; +} diff --git a/src/polyrenderer/math/tri_matrix.h b/src/polyrenderer/math/tri_matrix.h new file mode 100644 index 0000000000..c1eabcce89 --- /dev/null +++ b/src/polyrenderer/math/tri_matrix.h @@ -0,0 +1,46 @@ +/* +** Triangle drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +struct TriVertex; +struct ShadedTriVertex; + +struct TriMatrix +{ + static TriMatrix null(); + static TriMatrix identity(); + static TriMatrix translate(float x, float y, float z); + static TriMatrix scale(float x, float y, float z); + static TriMatrix rotate(float angle, float x, float y, float z); + static TriMatrix swapYZ(); + static TriMatrix perspective(float fovy, float aspect, float near, float far); + static TriMatrix frustum(float left, float right, float bottom, float top, float near, float far); + + static TriMatrix worldToView(); // Software renderer world to view space transform + static TriMatrix viewToClip(); // Software renderer shearing projection + + ShadedTriVertex operator*(TriVertex v) const; + TriMatrix operator*(const TriMatrix &m) const; + + float matrix[16]; +}; From 74e1955afa025f32b6c98740187b00b113341e58 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Dec 2016 05:01:42 +0100 Subject: [PATCH 594/912] Move more to r_draw_segment --- src/r_defs.h | 1 - src/swrenderer/r_main.cpp | 7 +- src/swrenderer/scene/r_draw_segment.cpp | 49 +++++--- src/swrenderer/scene/r_draw_segment.h | 6 +- src/swrenderer/scene/r_plane.cpp | 36 +++--- src/swrenderer/scene/r_segs.cpp | 156 ++++++++++++------------ 6 files changed, 130 insertions(+), 125 deletions(-) diff --git a/src/r_defs.h b/src/r_defs.h index 036e980b7e..4015103966 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -60,7 +60,6 @@ enum SIL_BOTH }; -namespace swrenderer { extern size_t MaxDrawSegs; } struct FDisplacement; // diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 72cefcefa4..eefb4af83f 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -406,12 +406,7 @@ static void R_ShutdownRenderer() openings = NULL; } - // Free drawsegs - if (drawsegs != NULL) - { - M_Free (drawsegs); - drawsegs = NULL; - } + R_FreeDrawSegs(); } //========================================================================== diff --git a/src/swrenderer/scene/r_draw_segment.cpp b/src/swrenderer/scene/r_draw_segment.cpp index b6e29b3967..6181911f2a 100644 --- a/src/swrenderer/scene/r_draw_segment.cpp +++ b/src/swrenderer/scene/r_draw_segment.cpp @@ -26,17 +26,30 @@ namespace swrenderer { - size_t MaxDrawSegs; - drawseg_t *drawsegs; drawseg_t *firstdrawseg; drawseg_t *ds_p; + drawseg_t *drawsegs; size_t FirstInterestingDrawseg; TArray InterestingDrawsegs; + namespace + { + size_t MaxDrawSegs; + } + + void R_FreeDrawSegs() + { + if (drawsegs != nullptr) + { + M_Free(drawsegs); + drawsegs = nullptr; + } + } + void R_ClearDrawSegs() { - if (drawsegs == NULL) + if (drawsegs == nullptr) { MaxDrawSegs = 256; // [RH] Default. Increased as needed. firstdrawseg = drawsegs = (drawseg_t *)M_Malloc (MaxDrawSegs * sizeof(drawseg_t)); @@ -46,6 +59,22 @@ namespace swrenderer ds_p = drawsegs; } + drawseg_t *R_AddDrawSegment() + { + if (ds_p == &drawsegs[MaxDrawSegs]) + { // [RH] Grab some more drawsegs + size_t newdrawsegs = MaxDrawSegs ? MaxDrawSegs * 2 : 32; + ptrdiff_t firstofs = firstdrawseg - drawsegs; + drawsegs = (drawseg_t *)M_Realloc(drawsegs, newdrawsegs * sizeof(drawseg_t)); + firstdrawseg = drawsegs + firstofs; + ds_p = drawsegs + MaxDrawSegs; + MaxDrawSegs = newdrawsegs; + DPrintf(DMSG_NOTIFY, "MaxDrawSegs increased to %zu\n", MaxDrawSegs); + } + + return ds_p++; + } + ptrdiff_t R_NewOpening(ptrdiff_t len) { ptrdiff_t res = lastopening; @@ -61,18 +90,4 @@ namespace swrenderer } return res; } - - void R_CheckDrawSegs() - { - if (ds_p == &drawsegs[MaxDrawSegs]) - { // [RH] Grab some more drawsegs - size_t newdrawsegs = MaxDrawSegs ? MaxDrawSegs * 2 : 32; - ptrdiff_t firstofs = firstdrawseg - drawsegs; - drawsegs = (drawseg_t *)M_Realloc(drawsegs, newdrawsegs * sizeof(drawseg_t)); - firstdrawseg = drawsegs + firstofs; - ds_p = drawsegs + MaxDrawSegs; - MaxDrawSegs = newdrawsegs; - DPrintf(DMSG_NOTIFY, "MaxDrawSegs increased to %zu\n", MaxDrawSegs); - } - } } diff --git a/src/swrenderer/scene/r_draw_segment.h b/src/swrenderer/scene/r_draw_segment.h index d22e754e00..05d6c3846f 100644 --- a/src/swrenderer/scene/r_draw_segment.h +++ b/src/swrenderer/scene/r_draw_segment.h @@ -32,14 +32,16 @@ namespace swrenderer int CurrentPortalUniq; // [ZZ] to identify the portal that this drawseg is in. used for sprite clipping. }; - extern drawseg_t *drawsegs; extern drawseg_t *firstdrawseg; extern drawseg_t *ds_p; + extern drawseg_t *drawsegs; extern TArray InterestingDrawsegs; // drawsegs that have something drawn on them extern size_t FirstInterestingDrawseg; void R_ClearDrawSegs(); - void R_CheckDrawSegs(); + void R_FreeDrawSegs(); + + drawseg_t *R_AddDrawSegment(); ptrdiff_t R_NewOpening(ptrdiff_t len); } diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index 746205e707..2b698e2728 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -1446,26 +1446,26 @@ void R_DrawPortals () } // Create a drawseg to clip sprites to the sky plane - R_CheckDrawSegs (); - ds_p->CurrentPortalUniq = CurrentPortalUniq; - ds_p->siz1 = INT_MAX; - ds_p->siz2 = INT_MAX; - ds_p->sz1 = 0; - ds_p->sz2 = 0; - ds_p->x1 = pl->left; - ds_p->x2 = pl->right; - ds_p->silhouette = SIL_BOTH; - ds_p->sprbottomclip = R_NewOpening (pl->right - pl->left); - ds_p->sprtopclip = R_NewOpening (pl->right - pl->left); - ds_p->maskedtexturecol = ds_p->swall = -1; - ds_p->bFogBoundary = false; - ds_p->curline = NULL; - ds_p->fake = 0; - memcpy (openings + ds_p->sprbottomclip, floorclip + pl->left, (pl->right - pl->left)*sizeof(short)); - memcpy (openings + ds_p->sprtopclip, ceilingclip + pl->left, (pl->right - pl->left)*sizeof(short)); + drawseg_t *draw_segment = R_AddDrawSegment(); + draw_segment->CurrentPortalUniq = CurrentPortalUniq; + draw_segment->siz1 = INT_MAX; + draw_segment->siz2 = INT_MAX; + draw_segment->sz1 = 0; + draw_segment->sz2 = 0; + draw_segment->x1 = pl->left; + draw_segment->x2 = pl->right; + draw_segment->silhouette = SIL_BOTH; + draw_segment->sprbottomclip = R_NewOpening (pl->right - pl->left); + draw_segment->sprtopclip = R_NewOpening (pl->right - pl->left); + draw_segment->maskedtexturecol = ds_p->swall = -1; + draw_segment->bFogBoundary = false; + draw_segment->curline = NULL; + draw_segment->fake = 0; + memcpy (openings + draw_segment->sprbottomclip, floorclip + pl->left, (pl->right - pl->left)*sizeof(short)); + memcpy (openings + draw_segment->sprtopclip, ceilingclip + pl->left, (pl->right - pl->left)*sizeof(short)); firstvissprite = vissprite_p; - firstdrawseg = ds_p++; + firstdrawseg = draw_segment; FirstInterestingDrawseg = InterestingDrawsegs.Size(); interestingStack.Push (FirstInterestingDrawseg); diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 833755fe1a..e8439ab140 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -1557,9 +1557,8 @@ bool R_StoreWallRange (int start, int stop) I_FatalError ("Bad R_StoreWallRange: %i to %i", start , stop); #endif - // don't overflow and crash - R_CheckDrawSegs (); - + drawseg_t *draw_segment = R_AddDrawSegment(); + if (!rw_prepped) { rw_prepped = true; @@ -1569,57 +1568,56 @@ bool R_StoreWallRange (int start, int stop) rw_offset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); rw_light = rw_lightleft + rw_lightstep * (start - WallC.sx1); - ds_p->CurrentPortalUniq = CurrentPortalUniq; - ds_p->sx1 = WallC.sx1; - ds_p->sx2 = WallC.sx2; - ds_p->sz1 = WallC.sz1; - ds_p->sz2 = WallC.sz2; - ds_p->cx = WallC.tleft.X;; - ds_p->cy = WallC.tleft.Y; - ds_p->cdx = WallC.tright.X - WallC.tleft.X; - ds_p->cdy = WallC.tright.Y - WallC.tleft.Y; - ds_p->tmapvals = WallT; - ds_p->siz1 = 1 / WallC.sz1; - ds_p->siz2 = 1 / WallC.sz2; - ds_p->x1 = rw_x = start; - ds_p->x2 = stop; - ds_p->curline = curline; + draw_segment->CurrentPortalUniq = CurrentPortalUniq; + draw_segment->sx1 = WallC.sx1; + draw_segment->sx2 = WallC.sx2; + draw_segment->sz1 = WallC.sz1; + draw_segment->sz2 = WallC.sz2; + draw_segment->cx = WallC.tleft.X;; + draw_segment->cy = WallC.tleft.Y; + draw_segment->cdx = WallC.tright.X - WallC.tleft.X; + draw_segment->cdy = WallC.tright.Y - WallC.tleft.Y; + draw_segment->tmapvals = WallT; + draw_segment->siz1 = 1 / WallC.sz1; + draw_segment->siz2 = 1 / WallC.sz2; + draw_segment->x1 = rw_x = start; + draw_segment->x2 = stop; + draw_segment->curline = curline; rw_stopx = stop; - ds_p->bFogBoundary = false; - ds_p->bFakeBoundary = false; - if(fake3D & 7) ds_p->fake = 1; - else ds_p->fake = 0; + draw_segment->bFogBoundary = false; + draw_segment->bFakeBoundary = false; + if(fake3D & 7) draw_segment->fake = 1; + else draw_segment->fake = 0; - // killough 1/6/98, 2/1/98: remove limit on openings - ds_p->sprtopclip = ds_p->sprbottomclip = ds_p->maskedtexturecol = ds_p->bkup = ds_p->swall = -1; + draw_segment->sprtopclip = draw_segment->sprbottomclip = draw_segment->maskedtexturecol = draw_segment->bkup = draw_segment->swall = -1; if (rw_markportal) { - ds_p->silhouette = SIL_BOTH; + draw_segment->silhouette = SIL_BOTH; } else if (backsector == NULL) { - ds_p->sprtopclip = R_NewOpening (stop - start); - ds_p->sprbottomclip = R_NewOpening (stop - start); - fillshort (openings + ds_p->sprtopclip, stop-start, viewheight); - memset (openings + ds_p->sprbottomclip, -1, (stop-start)*sizeof(short)); - ds_p->silhouette = SIL_BOTH; + draw_segment->sprtopclip = R_NewOpening (stop - start); + draw_segment->sprbottomclip = R_NewOpening (stop - start); + fillshort (openings + draw_segment->sprtopclip, stop-start, viewheight); + memset (openings + draw_segment->sprbottomclip, -1, (stop-start)*sizeof(short)); + draw_segment->silhouette = SIL_BOTH; } else { // two sided line - ds_p->silhouette = 0; + draw_segment->silhouette = 0; if (rw_frontfz1 > rw_backfz1 || rw_frontfz2 > rw_backfz2 || backsector->floorplane.PointOnSide(ViewPos) < 0) { - ds_p->silhouette = SIL_BOTTOM; + draw_segment->silhouette = SIL_BOTTOM; } if (rw_frontcz1 < rw_backcz1 || rw_frontcz2 < rw_backcz2 || backsector->ceilingplane.PointOnSide(ViewPos) < 0) { - ds_p->silhouette |= SIL_TOP; + draw_segment->silhouette |= SIL_TOP; } // killough 1/17/98: this test is required if the fix @@ -1634,41 +1632,41 @@ bool R_StoreWallRange (int start, int stop) extern int doorclosed; // killough 1/17/98, 2/8/98, 4/7/98 if (doorclosed || (rw_backcz1 <= rw_frontfz1 && rw_backcz2 <= rw_frontfz2)) { - ds_p->sprbottomclip = R_NewOpening (stop - start); - memset (openings + ds_p->sprbottomclip, -1, (stop-start)*sizeof(short)); - ds_p->silhouette |= SIL_BOTTOM; + draw_segment->sprbottomclip = R_NewOpening (stop - start); + memset (openings + draw_segment->sprbottomclip, -1, (stop-start)*sizeof(short)); + draw_segment->silhouette |= SIL_BOTTOM; } if (doorclosed || (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) { // killough 1/17/98, 2/8/98 - ds_p->sprtopclip = R_NewOpening (stop - start); - fillshort (openings + ds_p->sprtopclip, stop - start, viewheight); - ds_p->silhouette |= SIL_TOP; + draw_segment->sprtopclip = R_NewOpening (stop - start); + fillshort (openings + draw_segment->sprtopclip, stop - start, viewheight); + draw_segment->silhouette |= SIL_TOP; } } - if(!ds_p->fake && r_3dfloors && backsector->e && backsector->e->XFloor.ffloors.Size()) { + if(!draw_segment->fake && r_3dfloors && backsector->e && backsector->e->XFloor.ffloors.Size()) { for(i = 0; i < (int)backsector->e->XFloor.ffloors.Size(); i++) { F3DFloor *rover = backsector->e->XFloor.ffloors[i]; if(rover->flags & FF_RENDERSIDES && (!(rover->flags & FF_INVERTSIDES) || rover->flags & FF_ALLSIDES)) { - ds_p->bFakeBoundary |= 1; + draw_segment->bFakeBoundary |= 1; break; } } } - if(!ds_p->fake && r_3dfloors && frontsector->e && frontsector->e->XFloor.ffloors.Size()) { + if(!draw_segment->fake && r_3dfloors && frontsector->e && frontsector->e->XFloor.ffloors.Size()) { for(i = 0; i < (int)frontsector->e->XFloor.ffloors.Size(); i++) { F3DFloor *rover = frontsector->e->XFloor.ffloors[i]; if(rover->flags & FF_RENDERSIDES && (rover->flags & FF_ALLSIDES || rover->flags & FF_INVERTSIDES)) { - ds_p->bFakeBoundary |= 2; + draw_segment->bFakeBoundary |= 2; break; } } } // kg3D - no for fakes - if(!ds_p->fake) + if(!draw_segment->fake) // allocate space for masked texture tables, if needed // [RH] Don't just allocate the space; fill it in too. - if ((TexMan(sidedef->GetTexture(side_t::mid), true)->UseType != FTexture::TEX_Null || ds_p->bFakeBoundary || IsFogBoundary (frontsector, backsector)) && + if ((TexMan(sidedef->GetTexture(side_t::mid), true)->UseType != FTexture::TEX_Null || draw_segment->bFakeBoundary || IsFogBoundary (frontsector, backsector)) && (rw_ceilstat != 12 || !sidedef->GetTexture(side_t::top).isValid()) && (rw_floorstat != 3 || !sidedef->GetTexture(side_t::bottom).isValid()) && (WallC.sz1 >= TOO_CLOSE_Z && WallC.sz2 >= TOO_CLOSE_Z)) @@ -1680,21 +1678,21 @@ bool R_StoreWallRange (int start, int stop) maskedtexture = true; // kg3D - backup for mid and fake walls - ds_p->bkup = R_NewOpening(stop - start); - memcpy(openings + ds_p->bkup, &ceilingclip[start], sizeof(short)*(stop - start)); + draw_segment->bkup = R_NewOpening(stop - start); + memcpy(openings + draw_segment->bkup, &ceilingclip[start], sizeof(short)*(stop - start)); - ds_p->bFogBoundary = IsFogBoundary (frontsector, backsector); - if (sidedef->GetTexture(side_t::mid).isValid() || ds_p->bFakeBoundary) + draw_segment->bFogBoundary = IsFogBoundary (frontsector, backsector); + if (sidedef->GetTexture(side_t::mid).isValid() || draw_segment->bFakeBoundary) { if(sidedef->GetTexture(side_t::mid).isValid()) - ds_p->bFakeBoundary |= 4; // it is also mid texture + draw_segment->bFakeBoundary |= 4; // it is also mid texture // note: This should never have used the openings array to store its data! - ds_p->maskedtexturecol = R_NewOpening ((stop - start) * 2); - ds_p->swall = R_NewOpening ((stop - start) * 2); + draw_segment->maskedtexturecol = R_NewOpening ((stop - start) * 2); + draw_segment->swall = R_NewOpening ((stop - start) * 2); - lwal = (fixed_t *)(openings + ds_p->maskedtexturecol); - swal = (float *)(openings + ds_p->swall); + lwal = (fixed_t *)(openings + draw_segment->maskedtexturecol); + swal = (float *)(openings + draw_segment->swall); FTexture *pic = TexMan(sidedef->GetTexture(side_t::mid), true); double yscale = pic->Scale.Y * sidedef->GetTextureYScale(side_t::mid); fixed_t xoffset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); @@ -1710,7 +1708,7 @@ bool R_StoreWallRange (int start, int stop) *swal++ = swall[i]; } - double istart = *((float *)(openings + ds_p->swall)) * yscale; + double istart = *((float *)(openings + draw_segment->swall)) * yscale; double iend = *(swal - 1) * yscale; #if 0 ///This was for avoiding overflow when using fixed point. It might not be needed anymore. @@ -1722,19 +1720,19 @@ bool R_StoreWallRange (int start, int stop) #endif istart = 1 / istart; iend = 1 / iend; - ds_p->yscale = (float)yscale; - ds_p->iscale = (float)istart; + draw_segment->yscale = (float)yscale; + draw_segment->iscale = (float)istart; if (stop - start > 0) { - ds_p->iscalestep = float((iend - istart) / (stop - start)); + draw_segment->iscalestep = float((iend - istart) / (stop - start)); } else { - ds_p->iscalestep = 0; + draw_segment->iscalestep = 0; } } - ds_p->light = rw_light; - ds_p->lightstep = rw_lightstep; + draw_segment->light = rw_light; + draw_segment->lightstep = rw_lightstep; // Masked midtextures should get the light level from the sector they reference, // not from the current subsector, which is what the current wallshade value @@ -1742,17 +1740,17 @@ bool R_StoreWallRange (int start, int stop) // sector should be whichever one they move into. if (curline->sidedef->Flags & WALLF_POLYOBJ) { - ds_p->shade = wallshade; + draw_segment->shade = wallshade; } else { - ds_p->shade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, curline->frontsector->lightlevel) + draw_segment->shade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, curline->frontsector->lightlevel) + r_actualextralight); } - if (ds_p->bFogBoundary || ds_p->maskedtexturecol != -1) + if (draw_segment->bFogBoundary || draw_segment->maskedtexturecol != -1) { - size_t drawsegnum = ds_p - drawsegs; + size_t drawsegnum = draw_segment - drawsegs; InterestingDrawsegs.Push (drawsegnum); } } @@ -1786,27 +1784,25 @@ bool R_StoreWallRange (int start, int stop) R_RenderSegLoop (); if(fake3D & 7) { - ds_p++; - return !(fake3D & FAKE3D_FAKEMASK); } // save sprite clipping info - if ( ((ds_p->silhouette & SIL_TOP) || maskedtexture) && ds_p->sprtopclip == -1) + if ( ((draw_segment->silhouette & SIL_TOP) || maskedtexture) && draw_segment->sprtopclip == -1) { - ds_p->sprtopclip = R_NewOpening (stop - start); - memcpy (openings + ds_p->sprtopclip, &ceilingclip[start], sizeof(short)*(stop-start)); + draw_segment->sprtopclip = R_NewOpening (stop - start); + memcpy (openings + draw_segment->sprtopclip, &ceilingclip[start], sizeof(short)*(stop-start)); } - if ( ((ds_p->silhouette & SIL_BOTTOM) || maskedtexture) && ds_p->sprbottomclip == -1) + if ( ((draw_segment->silhouette & SIL_BOTTOM) || maskedtexture) && draw_segment->sprbottomclip == -1) { - ds_p->sprbottomclip = R_NewOpening (stop - start); - memcpy (openings + ds_p->sprbottomclip, &floorclip[start], sizeof(short)*(stop-start)); + draw_segment->sprbottomclip = R_NewOpening (stop - start); + memcpy (openings + draw_segment->sprbottomclip, &floorclip[start], sizeof(short)*(stop-start)); } if (maskedtexture && curline->sidedef->GetTexture(side_t::mid).isValid()) { - ds_p->silhouette |= SIL_TOP | SIL_BOTTOM; + draw_segment->silhouette |= SIL_TOP | SIL_BOTTOM; } // [RH] Draw any decals bound to the seg @@ -1815,7 +1811,7 @@ bool R_StoreWallRange (int start, int stop) { for (DBaseDecal *decal = curline->sidedef->AttachedDecals; decal != NULL; decal = decal->WallNext) { - R_RenderDecal (curline->sidedef, decal, ds_p, 0); + R_RenderDecal (curline->sidedef, decal, draw_segment, 0); } } @@ -1824,13 +1820,13 @@ bool R_StoreWallRange (int start, int stop) PortalDrawseg pds; pds.src = curline->linedef; pds.dst = curline->linedef->special == Line_Mirror? curline->linedef : curline->linedef->getPortalDestination(); - pds.x1 = ds_p->x1; - pds.x2 = ds_p->x2; + pds.x1 = draw_segment->x1; + pds.x2 = draw_segment->x2; pds.len = pds.x2 - pds.x1; pds.ceilingclip.Resize(pds.len); - memcpy(&pds.ceilingclip[0], openings + ds_p->sprtopclip, pds.len*sizeof(*openings)); + memcpy(&pds.ceilingclip[0], openings + draw_segment->sprtopclip, pds.len*sizeof(*openings)); pds.floorclip.Resize(pds.len); - memcpy(&pds.floorclip[0], openings + ds_p->sprbottomclip, pds.len*sizeof(*openings)); + memcpy(&pds.floorclip[0], openings + draw_segment->sprbottomclip, pds.len*sizeof(*openings)); for (int i = 0; i < pds.x2-pds.x1; i++) { @@ -1848,8 +1844,6 @@ bool R_StoreWallRange (int start, int stop) WallPortals.Push(pds); } - ds_p++; - return !(fake3D & FAKE3D_FAKEMASK); } From 60c0dcc3c73cabb8bc81748c067ed304039bd446 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Dec 2016 05:35:25 +0100 Subject: [PATCH 595/912] Move openings to r_memory --- src/CMakeLists.txt | 1 + src/swrenderer/r_memory.cpp | 52 +++++++++++++++++++++++++ src/swrenderer/r_memory.h | 10 +++++ src/swrenderer/scene/r_draw_segment.cpp | 16 -------- src/swrenderer/scene/r_draw_segment.h | 1 - src/swrenderer/scene/r_plane.cpp | 15 +------ src/swrenderer/scene/r_plane.h | 4 -- src/swrenderer/scene/r_segs.cpp | 1 + src/swrenderer/scene/r_segs.h | 4 -- src/swrenderer/scene/r_things.cpp | 1 + 10 files changed, 67 insertions(+), 38 deletions(-) create mode 100644 src/swrenderer/r_memory.cpp create mode 100644 src/swrenderer/r_memory.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 454ed25daa..87841b3802 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -808,6 +808,7 @@ set( NOT_COMPILED_SOURCE_FILES set( FASTMATH_PCH_SOURCES swrenderer/r_swrenderer.cpp swrenderer/r_main.cpp + swrenderer/r_memory.cpp swrenderer/drawers/r_draw.cpp swrenderer/drawers/r_draw_pal.cpp swrenderer/drawers/r_draw_rgba.cpp diff --git a/src/swrenderer/r_memory.cpp b/src/swrenderer/r_memory.cpp new file mode 100644 index 0000000000..855d851be7 --- /dev/null +++ b/src/swrenderer/r_memory.cpp @@ -0,0 +1,52 @@ + +#include +#include "templates.h" +#include "doomdef.h" +#include "m_bbox.h" +#include "i_system.h" +#include "p_lnspec.h" +#include "p_setup.h" +#include "swrenderer/r_main.h" +#include "swrenderer/drawers/r_draw.h" +#include "a_sharedglobal.h" +#include "g_level.h" +#include "p_effect.h" +#include "doomstat.h" +#include "r_state.h" +#include "v_palette.h" +#include "r_sky.h" +#include "po_man.h" +#include "r_data/colormaps.h" +#include "r_memory.h" + +namespace swrenderer +{ + short *openings; + + namespace + { + size_t maxopenings; + ptrdiff_t lastopening; + } + + ptrdiff_t R_NewOpening(ptrdiff_t len) + { + ptrdiff_t res = lastopening; + len = (len + 1) & ~1; // only return DWORD aligned addresses because some code stores fixed_t's and floats in openings... + lastopening += len; + if ((size_t)lastopening > maxopenings) + { + do + maxopenings = maxopenings ? maxopenings * 2 : 16384; + while ((size_t)lastopening > maxopenings); + openings = (short *)M_Realloc(openings, maxopenings * sizeof(*openings)); + DPrintf(DMSG_NOTIFY, "MaxOpenings increased to %zu\n", maxopenings); + } + return res; + } + + void R_FreeOpenings() + { + lastopening = 0; + } +} diff --git a/src/swrenderer/r_memory.h b/src/swrenderer/r_memory.h new file mode 100644 index 0000000000..06a03c3ffc --- /dev/null +++ b/src/swrenderer/r_memory.h @@ -0,0 +1,10 @@ + +#pragma once + +namespace swrenderer +{ + extern short *openings; + + ptrdiff_t R_NewOpening(ptrdiff_t len); + void R_FreeOpenings(); +} diff --git a/src/swrenderer/scene/r_draw_segment.cpp b/src/swrenderer/scene/r_draw_segment.cpp index 6181911f2a..2200f0ea51 100644 --- a/src/swrenderer/scene/r_draw_segment.cpp +++ b/src/swrenderer/scene/r_draw_segment.cpp @@ -74,20 +74,4 @@ namespace swrenderer return ds_p++; } - - ptrdiff_t R_NewOpening(ptrdiff_t len) - { - ptrdiff_t res = lastopening; - len = (len + 1) & ~1; // only return DWORD aligned addresses because some code stores fixed_t's and floats in openings... - lastopening += len; - if ((size_t)lastopening > maxopenings) - { - do - maxopenings = maxopenings ? maxopenings * 2 : 16384; - while ((size_t)lastopening > maxopenings); - openings = (short *)M_Realloc(openings, maxopenings * sizeof(*openings)); - DPrintf(DMSG_NOTIFY, "MaxOpenings increased to %zu\n", maxopenings); - } - return res; - } } diff --git a/src/swrenderer/scene/r_draw_segment.h b/src/swrenderer/scene/r_draw_segment.h index 05d6c3846f..61f6d4d580 100644 --- a/src/swrenderer/scene/r_draw_segment.h +++ b/src/swrenderer/scene/r_draw_segment.h @@ -43,5 +43,4 @@ namespace swrenderer void R_FreeDrawSegs(); drawseg_t *R_AddDrawSegment(); - ptrdiff_t R_NewOpening(ptrdiff_t len); } diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index 2b698e2728..b6648da712 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -65,6 +65,7 @@ #include "r_clip_segment.h" #include "r_draw_segment.h" #include "r_portal_segment.h" +#include "swrenderer\r_memory.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -118,15 +119,6 @@ double stacked_visibility; DVector3 stacked_viewpos; DAngle stacked_angle; - -// -// opening -// - -size_t maxopenings; -short *openings; -ptrdiff_t lastopening; - // // Clip values are the solid pixel bounding the range. // floorclip starts out SCREENHEIGHT and is just outside the range @@ -546,7 +538,7 @@ void R_ClearPlanes (bool fullclear) !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); - lastopening = 0; + R_FreeOpenings(); next_plane_light = 0; } @@ -1351,7 +1343,6 @@ void R_DrawPortals () DAngle savedangle = ViewAngle; ptrdiff_t savedvissprite_p = vissprite_p - vissprites; ptrdiff_t savedds_p = ds_p - drawsegs; - ptrdiff_t savedlastopening = lastopening; size_t savedinteresting = FirstInterestingDrawseg; double savedvisibility = R_GetVisibility(); AActor *savedcamera = camera; @@ -1520,8 +1511,6 @@ void R_DrawPortals () InterestingDrawsegs.Resize ((unsigned int)FirstInterestingDrawseg); FirstInterestingDrawseg = savedinteresting; - lastopening = savedlastopening; - camera = savedcamera; viewsector = savedsector; ViewPos = savedpos; diff --git a/src/swrenderer/scene/r_plane.h b/src/swrenderer/scene/r_plane.h index e8fe7882cf..39bf56b5cc 100644 --- a/src/swrenderer/scene/r_plane.h +++ b/src/swrenderer/scene/r_plane.h @@ -80,10 +80,6 @@ typedef struct visplane_s visplane_t; -// Visplane related. -extern ptrdiff_t lastopening; // type short - - typedef void (*planefunction_t) (int top, int bottom); extern planefunction_t floorfunc; diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index e8439ab140..079700d880 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -51,6 +51,7 @@ #include "r_walldraw.h" #include "r_draw_segment.h" #include "r_portal_segment.h" +#include "swrenderer\r_memory.h" #define WALLYREPEAT 8 diff --git a/src/swrenderer/scene/r_segs.h b/src/swrenderer/scene/r_segs.h index a4aa36b652..8aafa6ccee 100644 --- a/src/swrenderer/scene/r_segs.h +++ b/src/swrenderer/scene/r_segs.h @@ -31,10 +31,6 @@ struct drawseg_t; bool R_StoreWallRange(int start, int stop); void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2); -extern short *openings; -extern ptrdiff_t lastopening; -extern size_t maxopenings; - int R_CreateWallSegmentY (short *outbuf, double z1, double z2, const FWallCoords *wallc); int R_CreateWallSegmentYSloped (short *outbuf, const secplane_t &plane, const FWallCoords *wallc); inline int R_CreateWallSegmentY(short *outbuf, double z, const FWallCoords *wallc) diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index f53d0531a6..3a6e7dff16 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -70,6 +70,7 @@ #include "r_voxel.h" #include "r_draw_segment.h" #include "r_portal_segment.h" +#include "swrenderer\r_memory.h" EXTERN_CVAR(Bool, st_scale) EXTERN_CVAR(Bool, r_shadercolormaps) From 41d0e7c6633b59d7f76a5c86706d19e95135755d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Dec 2016 06:08:47 +0100 Subject: [PATCH 596/912] Move portal drawing to r_portal --- src/CMakeLists.txt | 1 + src/r_utility.h | 2 + src/swrenderer/r_main.cpp | 259 +------------ src/swrenderer/r_main.h | 4 + src/swrenderer/scene/r_bsp.h | 2 + src/swrenderer/scene/r_draw_segment.h | 2 + src/swrenderer/scene/r_plane.cpp | 248 +----------- src/swrenderer/scene/r_plane.h | 8 +- src/swrenderer/scene/r_portal.cpp | 534 ++++++++++++++++++++++++++ src/swrenderer/scene/r_portal.h | 12 + 10 files changed, 569 insertions(+), 503 deletions(-) create mode 100644 src/swrenderer/scene/r_portal.cpp create mode 100644 src/swrenderer/scene/r_portal.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 87841b3802..f30cc647bf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -824,6 +824,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_clip_segment.cpp swrenderer/scene/r_draw_segment.cpp swrenderer/scene/r_portal_segment.cpp + swrenderer/scene/r_portal.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp diff --git a/src/r_utility.h b/src/r_utility.h index da9a32d117..688cb757f9 100644 --- a/src/r_utility.h +++ b/src/r_utility.h @@ -44,6 +44,8 @@ extern unsigned int R_OldBlend; const double r_Yaspect = 200.0; // Why did I make this a variable? It's never set anywhere. +extern bool r_showviewer; + //========================================================================== // // R_PointOnSide diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index eefb4af83f..2d14a2308e 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -44,6 +44,7 @@ #include "scene/r_clip_segment.h" #include "scene/r_segs.h" #include "scene/r_3dfloors.h" +#include "scene/r_portal.h" #include "r_sky.h" #include "drawers/r_draw_rgba.h" #include "st_stuff.h" @@ -70,16 +71,8 @@ CVAR (String, r_viewsize, "", CVAR_NOSET) CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) -CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) -CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE) - EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) -extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; -extern cycle_t FrameCycles; - -extern bool r_showviewer; - namespace swrenderer { @@ -539,245 +532,6 @@ void R_SetupFreelook() } } -//========================================================================== -// -// R_EnterPortal -// -// [RH] Draw the reflection inside a mirror -// [ZZ] Merged with portal code, originally called R_EnterMirror -// -//========================================================================== - -void R_HighlightPortal (PortalDrawseg* pds) -{ - // [ZZ] NO OVERFLOW CHECKS HERE - // I believe it won't break. if it does, blame me. :( - - if (r_swtruecolor) // Assuming this is just a debug function - return; - - BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 255, 0, 0, 0, 255); - - BYTE* pixels = RenderTarget->GetBuffer(); - // top edge - for (int x = pds->x1; x < pds->x2; x++) - { - if (x < 0 || x >= RenderTarget->GetWidth()) - continue; - - int p = x - pds->x1; - int Ytop = pds->ceilingclip[p]; - int Ybottom = pds->floorclip[p]; - - if (x == pds->x1 || x == pds->x2-1) - { - RenderTarget->DrawLine(x, Ytop, x, Ybottom+1, color, 0); - continue; - } - - int YtopPrev = pds->ceilingclip[p-1]; - int YbottomPrev = pds->floorclip[p-1]; - - if (abs(Ytop-YtopPrev) > 1) - RenderTarget->DrawLine(x, YtopPrev, x, Ytop, color, 0); - else *(pixels + Ytop * RenderTarget->GetPitch() + x) = color; - - if (abs(Ybottom-YbottomPrev) > 1) - RenderTarget->DrawLine(x, YbottomPrev, x, Ybottom, color, 0); - else *(pixels + Ybottom * RenderTarget->GetPitch() + x) = color; - } -} - -void R_EnterPortal (PortalDrawseg* pds, int depth) -{ - // [ZZ] check depth. fill portal with black if it's exceeding the visual recursion limit, and continue like nothing happened. - if (depth >= r_portal_recursions) - { - BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 0, 0, 0, 0, 255); - int spacing = RenderTarget->GetPitch(); - for (int x = pds->x1; x < pds->x2; x++) - { - if (x < 0 || x >= RenderTarget->GetWidth()) - continue; - - int Ytop = pds->ceilingclip[x-pds->x1]; - int Ybottom = pds->floorclip[x-pds->x1]; - - if (r_swtruecolor) - { - uint32_t *dest = (uint32_t*)RenderTarget->GetBuffer() + x + Ytop * spacing; - - uint32_t c = GPalette.BaseColors[color].d; - for (int y = Ytop; y <= Ybottom; y++) - { - *dest = c; - dest += spacing; - } - } - else - { - BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; - - for (int y = Ytop; y <= Ybottom; y++) - { - *dest = color; - dest += spacing; - } - } - } - - if (r_highlight_portals) - R_HighlightPortal(pds); - - return; - } - - DAngle startang = ViewAngle; - DVector3 startpos = ViewPos; - DVector3 savedpath[2] = { ViewPath[0], ViewPath[1] }; - ActorRenderFlags savedvisibility = camera? camera->renderflags & RF_INVISIBLE : ActorRenderFlags::FromInt(0); - - CurrentPortalUniq++; - - unsigned int portalsAtStart = WallPortals.Size (); - - if (pds->mirror) - { - //vertex_t *v1 = ds->curline->v1; - vertex_t *v1 = pds->src->v1; - - // Reflect the current view behind the mirror. - if (pds->src->Delta().X == 0) - { // vertical mirror - ViewPos.X = v1->fX() - startpos.X + v1->fX(); - } - else if (pds->src->Delta().Y == 0) - { // horizontal mirror - ViewPos.Y = v1->fY() - startpos.Y + v1->fY(); - } - else - { // any mirror - vertex_t *v2 = pds->src->v2; - - double dx = v2->fX() - v1->fX(); - double dy = v2->fY() - v1->fY(); - double x1 = v1->fX(); - double y1 = v1->fY(); - double x = startpos.X; - double y = startpos.Y; - - // the above two cases catch len == 0 - double r = ((x - x1)*dx + (y - y1)*dy) / (dx*dx + dy*dy); - - ViewPos.X = (x1 + r * dx)*2 - x; - ViewPos.Y = (y1 + r * dy)*2 - y; - } - ViewAngle = pds->src->Delta().Angle() * 2 - startang; - } - else - { - P_TranslatePortalXY(pds->src, ViewPos.X, ViewPos.Y); - P_TranslatePortalZ(pds->src, ViewPos.Z); - P_TranslatePortalAngle(pds->src, ViewAngle); - P_TranslatePortalXY(pds->src, ViewPath[0].X, ViewPath[0].Y); - P_TranslatePortalXY(pds->src, ViewPath[1].X, ViewPath[1].Y); - - if (!r_showviewer && camera && P_PointOnLineSidePrecise(ViewPath[0], pds->dst) != P_PointOnLineSidePrecise(ViewPath[1], pds->dst)) - { - double distp = (ViewPath[0] - ViewPath[1]).Length(); - if (distp > EQUAL_EPSILON) - { - double dist1 = (ViewPos - ViewPath[0]).Length(); - double dist2 = (ViewPos - ViewPath[1]).Length(); - - if (dist1 + dist2 < distp + 1) - { - camera->renderflags |= RF_INVISIBLE; - } - } - } - } - - ViewSin = ViewAngle.Sin(); - ViewCos = ViewAngle.Cos(); - - ViewTanSin = FocalTangent * ViewSin; - ViewTanCos = FocalTangent * ViewCos; - - R_CopyStackedViewParameters(); - - validcount++; - PortalDrawseg* prevpds = CurrentPortal; - CurrentPortal = pds; - - R_ClearPlanes (false); - R_ClearClipSegs (pds->x1, pds->x2); - - WindowLeft = pds->x1; - WindowRight = pds->x2; - - // RF_XFLIP should be removed before calling the root function - int prevmf = MirrorFlags; - if (pds->mirror) - { - if (MirrorFlags & RF_XFLIP) - MirrorFlags &= ~RF_XFLIP; - else MirrorFlags |= RF_XFLIP; - } - - // some portals have height differences, account for this here - R_3D_EnterSkybox(); // push 3D floor height map - CurrentPortalInSkybox = false; // first portal in a skybox should set this variable to false for proper clipping in skyboxes. - - // first pass, set clipping - memcpy (ceilingclip + pds->x1, &pds->ceilingclip[0], pds->len*sizeof(*ceilingclip)); - memcpy (floorclip + pds->x1, &pds->floorclip[0], pds->len*sizeof(*floorclip)); - - InSubsector = NULL; - R_RenderBSPNode (nodes + numnodes - 1); - R_3D_ResetClip(); // reset clips (floor/ceiling) - if (!savedvisibility && camera) camera->renderflags &= ~RF_INVISIBLE; - - PlaneCycles.Clock(); - R_DrawPlanes (); - R_DrawPortals (); - PlaneCycles.Unclock(); - - double vzp = ViewPos.Z; - - int prevuniq = CurrentPortalUniq; - // depth check is in another place right now - unsigned int portalsAtEnd = WallPortals.Size (); - for (; portalsAtStart < portalsAtEnd; portalsAtStart++) - { - R_EnterPortal (&WallPortals[portalsAtStart], depth + 1); - } - int prevuniq2 = CurrentPortalUniq; - CurrentPortalUniq = prevuniq; - - NetUpdate(); - - MaskedCycles.Clock(); // [ZZ] count sprites in portals/mirrors along with normal ones. - R_DrawMasked (); // this is required since with portals there often will be cases when more than 80% of the view is inside a portal. - MaskedCycles.Unclock(); - - NetUpdate(); - - R_3D_LeaveSkybox(); // pop 3D floor height map - CurrentPortalUniq = prevuniq2; - - // draw a red line around a portal if it's being highlighted - if (r_highlight_portals) - R_HighlightPortal(pds); - - CurrentPortal = prevpds; - MirrorFlags = prevmf; - ViewAngle = startang; - ViewPos = startpos; - ViewPath[0] = savedpath[0]; - ViewPath[1] = savedpath[1]; -} - //========================================================================== // // R_SetupBuffer @@ -889,16 +643,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) R_DrawPortals(); PlaneCycles.Unclock(); - // [RH] Walk through mirrors - // [ZZ] Merged with portals - size_t lastportal = WallPortals.Size(); - for (unsigned int i = 0; i < lastportal; i++) - { - R_EnterPortal(&WallPortals[i], 0); - } - - CurrentPortal = NULL; - CurrentPortalUniq = 0; + R_DrawWallPortals(); NetUpdate (); diff --git a/src/swrenderer/r_main.h b/src/swrenderer/r_main.h index 5fb10cec3e..1a62eccf10 100644 --- a/src/swrenderer/r_main.h +++ b/src/swrenderer/r_main.h @@ -35,9 +35,13 @@ extern int viewwindowy; typedef BYTE lighttable_t; // This could be wider for >8 bit display. +extern cycle_t FrameCycles; + namespace swrenderer { +extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; + // // POV related. // diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index 79f5b3d5ef..6996a684b2 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -67,6 +67,8 @@ enum FAKED_AboveCeiling }; +extern subsector_t *InSubsector; + extern seg_t* curline; extern side_t* sidedef; extern line_t* linedef; diff --git a/src/swrenderer/scene/r_draw_segment.h b/src/swrenderer/scene/r_draw_segment.h index 61f6d4d580..df9ed80dd9 100644 --- a/src/swrenderer/scene/r_draw_segment.h +++ b/src/swrenderer/scene/r_draw_segment.h @@ -1,6 +1,8 @@ #pragma once +#include "r_bsp.h" + namespace swrenderer { struct drawseg_t diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index b6648da712..b13af8b76d 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -73,7 +73,6 @@ CVAR(Bool, r_linearsky, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); CVAR(Bool, tilt, false, 0); -CVAR(Bool, r_skyboxes, true, 0) EXTERN_CVAR(Int, r_skymode) @@ -90,16 +89,10 @@ static void R_DrawSkyStriped (visplane_t *pl); planefunction_t floorfunc; planefunction_t ceilingfunc; -// Here comes the obnoxious "visplane". -#define MAXVISPLANES 128 /* must be a power of 2 */ - -// Avoid infinite recursion with stacked sectors by limiting them. -#define MAX_SKYBOX_PLANES 1000 - // [RH] Allocate one extra for sky box planes. -static visplane_t *visplanes[MAXVISPLANES+1]; // killough -static visplane_t *freetail; // killough -static visplane_t **freehead = &freetail; // killough +visplane_t *visplanes[MAXVISPLANES+1]; +visplane_t *freetail; +visplane_t **freehead = &freetail; visplane_t *floorplane; visplane_t *ceilingplane; @@ -1300,241 +1293,6 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske NetUpdate (); } -//========================================================================== -// -// R_DrawPortals -// -// Draws any recorded sky boxes and then frees them. -// -// The process: -// 1. Move the camera to coincide with the SkyViewpoint. -// 2. Clear out the old planes. (They have already been drawn.) -// 3. Clear a window out of the ClipSegs just large enough for the plane. -// 4. Pretend the existing vissprites and drawsegs aren't there. -// 5. Create a drawseg at 0 distance to clip sprites to the visplane. It -// doesn't need to be associated with a line in the map, since there -// will never be any sprites in front of it. -// 6. Render the BSP, then planes, then masked stuff. -// 7. Restore the previous vissprites and drawsegs. -// 8. Repeat for any other sky boxes. -// 9. Put the camera back where it was to begin with. -// -//========================================================================== -static int numskyboxes; - -void R_DrawPortals () -{ - static TArray interestingStack; - static TArray drawsegStack; - static TArray visspriteStack; - static TArray viewposStack; - static TArray visplaneStack; - - numskyboxes = 0; - - if (visplanes[MAXVISPLANES] == NULL) - return; - - R_3D_EnterSkybox(); - CurrentPortalInSkybox = true; - - int savedextralight = extralight; - DVector3 savedpos = ViewPos; - DAngle savedangle = ViewAngle; - ptrdiff_t savedvissprite_p = vissprite_p - vissprites; - ptrdiff_t savedds_p = ds_p - drawsegs; - size_t savedinteresting = FirstInterestingDrawseg; - double savedvisibility = R_GetVisibility(); - AActor *savedcamera = camera; - sector_t *savedsector = viewsector; - - int i; - visplane_t *pl; - - for (pl = visplanes[MAXVISPLANES]; pl != NULL; pl = visplanes[MAXVISPLANES]) - { - // Pop the visplane off the list now so that if this skybox adds more - // skyboxes to the list, they will be drawn instead of skipped (because - // new skyboxes go to the beginning of the list instead of the end). - visplanes[MAXVISPLANES] = pl->next; - pl->next = NULL; - - if (pl->right < pl->left || !r_skyboxes || numskyboxes == MAX_SKYBOX_PLANES || pl->portal == NULL) - { - R_DrawSinglePlane (pl, OPAQUE, false, false); - *freehead = pl; - freehead = &pl->next; - continue; - } - - numskyboxes++; - - FSectorPortal *port = pl->portal; - switch (port->mType) - { - case PORTS_SKYVIEWPOINT: - { - // Don't let gun flashes brighten the sky box - ASkyViewpoint *sky = barrier_cast(port->mSkybox); - extralight = 0; - R_SetVisibility(sky->args[0] * 0.25f); - - ViewPos = sky->InterpolatedPosition(r_TicFracF); - ViewAngle = savedangle + (sky->PrevAngles.Yaw + deltaangle(sky->PrevAngles.Yaw, sky->Angles.Yaw) * r_TicFracF); - - R_CopyStackedViewParameters(); - break; - } - - case PORTS_STACKEDSECTORTHING: - case PORTS_PORTAL: - case PORTS_LINKEDPORTAL: - extralight = pl->extralight; - R_SetVisibility (pl->visibility); - ViewPos.X = pl->viewpos.X + port->mDisplacement.X; - ViewPos.Y = pl->viewpos.Y + port->mDisplacement.Y; - ViewPos.Z = pl->viewpos.Z; - ViewAngle = pl->viewangle; - break; - - case PORTS_HORIZON: - case PORTS_PLANE: - // not implemented yet - - default: - R_DrawSinglePlane(pl, OPAQUE, false, false); - *freehead = pl; - freehead = &pl->next; - numskyboxes--; - continue; - } - - port->mFlags |= PORTSF_INSKYBOX; - if (port->mPartner > 0) sectorPortals[port->mPartner].mFlags |= PORTSF_INSKYBOX; - camera = NULL; - viewsector = port->mDestination; - assert(viewsector != NULL); - R_SetViewAngle (); - validcount++; // Make sure we see all sprites - - R_ClearPlanes (false); - R_ClearClipSegs (pl->left, pl->right); - WindowLeft = pl->left; - WindowRight = pl->right; - - for (i = pl->left; i < pl->right; i++) - { - if (pl->top[i] == 0x7fff) - { - ceilingclip[i] = viewheight; - floorclip[i] = -1; - } - else - { - ceilingclip[i] = pl->top[i]; - floorclip[i] = pl->bottom[i]; - } - } - - // Create a drawseg to clip sprites to the sky plane - drawseg_t *draw_segment = R_AddDrawSegment(); - draw_segment->CurrentPortalUniq = CurrentPortalUniq; - draw_segment->siz1 = INT_MAX; - draw_segment->siz2 = INT_MAX; - draw_segment->sz1 = 0; - draw_segment->sz2 = 0; - draw_segment->x1 = pl->left; - draw_segment->x2 = pl->right; - draw_segment->silhouette = SIL_BOTH; - draw_segment->sprbottomclip = R_NewOpening (pl->right - pl->left); - draw_segment->sprtopclip = R_NewOpening (pl->right - pl->left); - draw_segment->maskedtexturecol = ds_p->swall = -1; - draw_segment->bFogBoundary = false; - draw_segment->curline = NULL; - draw_segment->fake = 0; - memcpy (openings + draw_segment->sprbottomclip, floorclip + pl->left, (pl->right - pl->left)*sizeof(short)); - memcpy (openings + draw_segment->sprtopclip, ceilingclip + pl->left, (pl->right - pl->left)*sizeof(short)); - - firstvissprite = vissprite_p; - firstdrawseg = draw_segment; - FirstInterestingDrawseg = InterestingDrawsegs.Size(); - - interestingStack.Push (FirstInterestingDrawseg); - ptrdiff_t diffnum = firstdrawseg - drawsegs; - drawsegStack.Push (diffnum); - diffnum = firstvissprite - vissprites; - visspriteStack.Push (diffnum); - viewposStack.Push(ViewPos); - visplaneStack.Push (pl); - - InSubsector = NULL; - R_RenderBSPNode (nodes + numnodes - 1); - R_3D_ResetClip(); // reset clips (floor/ceiling) - R_DrawPlanes (); - - port->mFlags &= ~PORTSF_INSKYBOX; - if (port->mPartner > 0) sectorPortals[port->mPartner].mFlags &= ~PORTSF_INSKYBOX; - } - - // Draw all the masked textures in a second pass, in the reverse order they - // were added. This must be done separately from the previous step for the - // sake of nested skyboxes. - while (interestingStack.Pop (FirstInterestingDrawseg)) - { - ptrdiff_t pd = 0; - - drawsegStack.Pop (pd); - firstdrawseg = drawsegs + pd; - visspriteStack.Pop (pd); - firstvissprite = vissprites + pd; - - // Masked textures and planes need the view coordinates restored for proper positioning. - viewposStack.Pop(ViewPos); - - R_DrawMasked (); - - ds_p = firstdrawseg; - vissprite_p = firstvissprite; - - visplaneStack.Pop (pl); - if (pl->Alpha > 0 && pl->picnum != skyflatnum) - { - R_DrawSinglePlane (pl, pl->Alpha, pl->Additive, true); - } - *freehead = pl; - freehead = &pl->next; - } - firstvissprite = vissprites; - vissprite_p = vissprites + savedvissprite_p; - firstdrawseg = drawsegs; - ds_p = drawsegs + savedds_p; - InterestingDrawsegs.Resize ((unsigned int)FirstInterestingDrawseg); - FirstInterestingDrawseg = savedinteresting; - - camera = savedcamera; - viewsector = savedsector; - ViewPos = savedpos; - R_SetVisibility(savedvisibility); - extralight = savedextralight; - ViewAngle = savedangle; - R_SetViewAngle (); - - CurrentPortalInSkybox = false; - R_3D_LeaveSkybox(); - - if(fakeActive) return; - - for (*freehead = visplanes[MAXVISPLANES], visplanes[MAXVISPLANES] = NULL; *freehead; ) - freehead = &(*freehead)->next; -} - -ADD_STAT(skyboxes) -{ - FString out; - out.Format ("%d skybox planes", numskyboxes); - return out; -} - //========================================================================== // // R_DrawSkyPlane diff --git a/src/swrenderer/scene/r_plane.h b/src/swrenderer/scene/r_plane.h index 39bf56b5cc..176b9d2c88 100644 --- a/src/swrenderer/scene/r_plane.h +++ b/src/swrenderer/scene/r_plane.h @@ -97,7 +97,7 @@ void R_ClearPlanes (bool fullclear); void R_AddPlaneLights(visplane_t *plane, FLightNode *light_head); int R_DrawPlanes (); -void R_DrawPortals (); +void R_DrawSinglePlane(visplane_t *pl, fixed_t alpha, bool additive, bool masked); void R_DrawSkyPlane (visplane_t *pl); void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); @@ -128,6 +128,12 @@ bool R_PlaneInitData (void); extern visplane_t* floorplane; extern visplane_t* ceilingplane; +#define MAXVISPLANES 128 /* must be a power of 2 */ + +extern visplane_t *visplanes[MAXVISPLANES + 1]; +extern visplane_t *freetail; +extern visplane_t **freehead; + } #endif // __R_PLANE_H__ diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp new file mode 100644 index 0000000000..ccd27236f6 --- /dev/null +++ b/src/swrenderer/scene/r_portal.cpp @@ -0,0 +1,534 @@ + +#include +#include + +#include "templates.h" +#include "doomdef.h" +#include "d_net.h" +#include "doomstat.h" +#include "m_random.h" +#include "m_bbox.h" +#include "r_portal.h" +#include "r_sky.h" +#include "st_stuff.h" +#include "c_cvars.h" +#include "c_dispatch.h" +#include "v_video.h" +#include "stats.h" +#include "i_video.h" +#include "i_system.h" +#include "a_sharedglobal.h" +#include "r_data/r_translate.h" +#include "p_3dmidtex.h" +#include "r_data/r_interpolate.h" +#include "v_palette.h" +#include "po_man.h" +#include "p_effect.h" +#include "st_start.h" +#include "v_font.h" +#include "r_data/colormaps.h" +#include "p_maputl.h" +#include "p_setup.h" +#include "version.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "r_utility.h" +#include "r_plane.h" +#include "r_clip_segment.h" +#include "r_draw_segment.h" +#include "r_things.h" +#include "r_3dfloors.h" +#include "swrenderer/r_main.h" +#include "swrenderer/r_memory.h" + +CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) +CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE) +CVAR(Bool, r_skyboxes, true, 0) + +// Avoid infinite recursion with stacked sectors by limiting them. +#define MAX_SKYBOX_PLANES 1000 + +namespace swrenderer +{ + namespace + { + int numskyboxes; // For ADD_STAT(skyboxes) + } + + //========================================================================== + // + // R_DrawPortals + // + // Draws any recorded sky boxes and then frees them. + // + // The process: + // 1. Move the camera to coincide with the SkyViewpoint. + // 2. Clear out the old planes. (They have already been drawn.) + // 3. Clear a window out of the ClipSegs just large enough for the plane. + // 4. Pretend the existing vissprites and drawsegs aren't there. + // 5. Create a drawseg at 0 distance to clip sprites to the visplane. It + // doesn't need to be associated with a line in the map, since there + // will never be any sprites in front of it. + // 6. Render the BSP, then planes, then masked stuff. + // 7. Restore the previous vissprites and drawsegs. + // 8. Repeat for any other sky boxes. + // 9. Put the camera back where it was to begin with. + // + //========================================================================== + + void R_DrawPortals() + { + static TArray interestingStack; + static TArray drawsegStack; + static TArray visspriteStack; + static TArray viewposStack; + static TArray visplaneStack; + + numskyboxes = 0; + + if (visplanes[MAXVISPLANES] == nullptr) + return; + + R_3D_EnterSkybox(); + CurrentPortalInSkybox = true; + + int savedextralight = extralight; + DVector3 savedpos = ViewPos; + DAngle savedangle = ViewAngle; + ptrdiff_t savedvissprite_p = vissprite_p - vissprites; + ptrdiff_t savedds_p = ds_p - drawsegs; + size_t savedinteresting = FirstInterestingDrawseg; + double savedvisibility = R_GetVisibility(); + AActor *savedcamera = camera; + sector_t *savedsector = viewsector; + + int i; + visplane_t *pl; + + for (pl = visplanes[MAXVISPLANES]; pl != nullptr; pl = visplanes[MAXVISPLANES]) + { + // Pop the visplane off the list now so that if this skybox adds more + // skyboxes to the list, they will be drawn instead of skipped (because + // new skyboxes go to the beginning of the list instead of the end). + visplanes[MAXVISPLANES] = pl->next; + pl->next = nullptr; + + if (pl->right < pl->left || !r_skyboxes || numskyboxes == MAX_SKYBOX_PLANES || pl->portal == nullptr) + { + R_DrawSinglePlane(pl, OPAQUE, false, false); + *freehead = pl; + freehead = &pl->next; + continue; + } + + numskyboxes++; + + FSectorPortal *port = pl->portal; + switch (port->mType) + { + case PORTS_SKYVIEWPOINT: + { + // Don't let gun flashes brighten the sky box + ASkyViewpoint *sky = barrier_cast(port->mSkybox); + extralight = 0; + R_SetVisibility(sky->args[0] * 0.25f); + + ViewPos = sky->InterpolatedPosition(r_TicFracF); + ViewAngle = savedangle + (sky->PrevAngles.Yaw + deltaangle(sky->PrevAngles.Yaw, sky->Angles.Yaw) * r_TicFracF); + + R_CopyStackedViewParameters(); + break; + } + + case PORTS_STACKEDSECTORTHING: + case PORTS_PORTAL: + case PORTS_LINKEDPORTAL: + extralight = pl->extralight; + R_SetVisibility(pl->visibility); + ViewPos.X = pl->viewpos.X + port->mDisplacement.X; + ViewPos.Y = pl->viewpos.Y + port->mDisplacement.Y; + ViewPos.Z = pl->viewpos.Z; + ViewAngle = pl->viewangle; + break; + + case PORTS_HORIZON: + case PORTS_PLANE: + // not implemented yet + + default: + R_DrawSinglePlane(pl, OPAQUE, false, false); + *freehead = pl; + freehead = &pl->next; + numskyboxes--; + continue; + } + + port->mFlags |= PORTSF_INSKYBOX; + if (port->mPartner > 0) sectorPortals[port->mPartner].mFlags |= PORTSF_INSKYBOX; + camera = nullptr; + viewsector = port->mDestination; + assert(viewsector != nullptr); + R_SetViewAngle(); + validcount++; // Make sure we see all sprites + + R_ClearPlanes(false); + R_ClearClipSegs(pl->left, pl->right); + WindowLeft = pl->left; + WindowRight = pl->right; + + for (i = pl->left; i < pl->right; i++) + { + if (pl->top[i] == 0x7fff) + { + ceilingclip[i] = viewheight; + floorclip[i] = -1; + } + else + { + ceilingclip[i] = pl->top[i]; + floorclip[i] = pl->bottom[i]; + } + } + + // Create a drawseg to clip sprites to the sky plane + drawseg_t *draw_segment = R_AddDrawSegment(); + draw_segment->CurrentPortalUniq = CurrentPortalUniq; + draw_segment->siz1 = INT_MAX; + draw_segment->siz2 = INT_MAX; + draw_segment->sz1 = 0; + draw_segment->sz2 = 0; + draw_segment->x1 = pl->left; + draw_segment->x2 = pl->right; + draw_segment->silhouette = SIL_BOTH; + draw_segment->sprbottomclip = R_NewOpening(pl->right - pl->left); + draw_segment->sprtopclip = R_NewOpening(pl->right - pl->left); + draw_segment->maskedtexturecol = ds_p->swall = -1; + draw_segment->bFogBoundary = false; + draw_segment->curline = nullptr; + draw_segment->fake = 0; + memcpy(openings + draw_segment->sprbottomclip, floorclip + pl->left, (pl->right - pl->left) * sizeof(short)); + memcpy(openings + draw_segment->sprtopclip, ceilingclip + pl->left, (pl->right - pl->left) * sizeof(short)); + + firstvissprite = vissprite_p; + firstdrawseg = draw_segment; + FirstInterestingDrawseg = InterestingDrawsegs.Size(); + + interestingStack.Push(FirstInterestingDrawseg); + ptrdiff_t diffnum = firstdrawseg - drawsegs; + drawsegStack.Push(diffnum); + diffnum = firstvissprite - vissprites; + visspriteStack.Push(diffnum); + viewposStack.Push(ViewPos); + visplaneStack.Push(pl); + + InSubsector = nullptr; + R_RenderBSPNode(nodes + numnodes - 1); + R_3D_ResetClip(); // reset clips (floor/ceiling) + R_DrawPlanes(); + + port->mFlags &= ~PORTSF_INSKYBOX; + if (port->mPartner > 0) sectorPortals[port->mPartner].mFlags &= ~PORTSF_INSKYBOX; + } + + // Draw all the masked textures in a second pass, in the reverse order they + // were added. This must be done separately from the previous step for the + // sake of nested skyboxes. + while (interestingStack.Pop(FirstInterestingDrawseg)) + { + ptrdiff_t pd = 0; + + drawsegStack.Pop(pd); + firstdrawseg = drawsegs + pd; + visspriteStack.Pop(pd); + firstvissprite = vissprites + pd; + + // Masked textures and planes need the view coordinates restored for proper positioning. + viewposStack.Pop(ViewPos); + + R_DrawMasked(); + + ds_p = firstdrawseg; + vissprite_p = firstvissprite; + + visplaneStack.Pop(pl); + if (pl->Alpha > 0 && pl->picnum != skyflatnum) + { + R_DrawSinglePlane(pl, pl->Alpha, pl->Additive, true); + } + *freehead = pl; + freehead = &pl->next; + } + firstvissprite = vissprites; + vissprite_p = vissprites + savedvissprite_p; + firstdrawseg = drawsegs; + ds_p = drawsegs + savedds_p; + InterestingDrawsegs.Resize((unsigned int)FirstInterestingDrawseg); + FirstInterestingDrawseg = savedinteresting; + + camera = savedcamera; + viewsector = savedsector; + ViewPos = savedpos; + R_SetVisibility(savedvisibility); + extralight = savedextralight; + ViewAngle = savedangle; + R_SetViewAngle(); + + CurrentPortalInSkybox = false; + R_3D_LeaveSkybox(); + + if (fakeActive) return; + + for (*freehead = visplanes[MAXVISPLANES], visplanes[MAXVISPLANES] = nullptr; *freehead; ) + freehead = &(*freehead)->next; + } + + void R_DrawWallPortals() + { + // [RH] Walk through mirrors + // [ZZ] Merged with portals + size_t lastportal = WallPortals.Size(); + for (unsigned int i = 0; i < lastportal; i++) + { + R_EnterPortal(&WallPortals[i], 0); + } + + CurrentPortal = nullptr; + CurrentPortalUniq = 0; + } + + void R_EnterPortal(PortalDrawseg* pds, int depth) + { + // [ZZ] check depth. fill portal with black if it's exceeding the visual recursion limit, and continue like nothing happened. + if (depth >= r_portal_recursions) + { + BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 0, 0, 0, 0, 255); + int spacing = RenderTarget->GetPitch(); + for (int x = pds->x1; x < pds->x2; x++) + { + if (x < 0 || x >= RenderTarget->GetWidth()) + continue; + + int Ytop = pds->ceilingclip[x - pds->x1]; + int Ybottom = pds->floorclip[x - pds->x1]; + + if (r_swtruecolor) + { + uint32_t *dest = (uint32_t*)RenderTarget->GetBuffer() + x + Ytop * spacing; + + uint32_t c = GPalette.BaseColors[color].d; + for (int y = Ytop; y <= Ybottom; y++) + { + *dest = c; + dest += spacing; + } + } + else + { + BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; + + for (int y = Ytop; y <= Ybottom; y++) + { + *dest = color; + dest += spacing; + } + } + } + + if (r_highlight_portals) + R_HighlightPortal(pds); + + return; + } + + DAngle startang = ViewAngle; + DVector3 startpos = ViewPos; + DVector3 savedpath[2] = { ViewPath[0], ViewPath[1] }; + ActorRenderFlags savedvisibility = camera ? camera->renderflags & RF_INVISIBLE : ActorRenderFlags::FromInt(0); + + CurrentPortalUniq++; + + unsigned int portalsAtStart = WallPortals.Size(); + + if (pds->mirror) + { + //vertex_t *v1 = ds->curline->v1; + vertex_t *v1 = pds->src->v1; + + // Reflect the current view behind the mirror. + if (pds->src->Delta().X == 0) + { // vertical mirror + ViewPos.X = v1->fX() - startpos.X + v1->fX(); + } + else if (pds->src->Delta().Y == 0) + { // horizontal mirror + ViewPos.Y = v1->fY() - startpos.Y + v1->fY(); + } + else + { // any mirror + vertex_t *v2 = pds->src->v2; + + double dx = v2->fX() - v1->fX(); + double dy = v2->fY() - v1->fY(); + double x1 = v1->fX(); + double y1 = v1->fY(); + double x = startpos.X; + double y = startpos.Y; + + // the above two cases catch len == 0 + double r = ((x - x1)*dx + (y - y1)*dy) / (dx*dx + dy*dy); + + ViewPos.X = (x1 + r * dx) * 2 - x; + ViewPos.Y = (y1 + r * dy) * 2 - y; + } + ViewAngle = pds->src->Delta().Angle() * 2 - startang; + } + else + { + P_TranslatePortalXY(pds->src, ViewPos.X, ViewPos.Y); + P_TranslatePortalZ(pds->src, ViewPos.Z); + P_TranslatePortalAngle(pds->src, ViewAngle); + P_TranslatePortalXY(pds->src, ViewPath[0].X, ViewPath[0].Y); + P_TranslatePortalXY(pds->src, ViewPath[1].X, ViewPath[1].Y); + + if (!r_showviewer && camera && P_PointOnLineSidePrecise(ViewPath[0], pds->dst) != P_PointOnLineSidePrecise(ViewPath[1], pds->dst)) + { + double distp = (ViewPath[0] - ViewPath[1]).Length(); + if (distp > EQUAL_EPSILON) + { + double dist1 = (ViewPos - ViewPath[0]).Length(); + double dist2 = (ViewPos - ViewPath[1]).Length(); + + if (dist1 + dist2 < distp + 1) + { + camera->renderflags |= RF_INVISIBLE; + } + } + } + } + + ViewSin = ViewAngle.Sin(); + ViewCos = ViewAngle.Cos(); + + ViewTanSin = FocalTangent * ViewSin; + ViewTanCos = FocalTangent * ViewCos; + + R_CopyStackedViewParameters(); + + validcount++; + PortalDrawseg* prevpds = CurrentPortal; + CurrentPortal = pds; + + R_ClearPlanes(false); + R_ClearClipSegs(pds->x1, pds->x2); + + WindowLeft = pds->x1; + WindowRight = pds->x2; + + // RF_XFLIP should be removed before calling the root function + int prevmf = MirrorFlags; + if (pds->mirror) + { + if (MirrorFlags & RF_XFLIP) + MirrorFlags &= ~RF_XFLIP; + else MirrorFlags |= RF_XFLIP; + } + + // some portals have height differences, account for this here + R_3D_EnterSkybox(); // push 3D floor height map + CurrentPortalInSkybox = false; // first portal in a skybox should set this variable to false for proper clipping in skyboxes. + + // first pass, set clipping + memcpy(ceilingclip + pds->x1, &pds->ceilingclip[0], pds->len * sizeof(*ceilingclip)); + memcpy(floorclip + pds->x1, &pds->floorclip[0], pds->len * sizeof(*floorclip)); + + InSubsector = nullptr; + R_RenderBSPNode(nodes + numnodes - 1); + R_3D_ResetClip(); // reset clips (floor/ceiling) + if (!savedvisibility && camera) camera->renderflags &= ~RF_INVISIBLE; + + PlaneCycles.Clock(); + R_DrawPlanes(); + R_DrawPortals(); + PlaneCycles.Unclock(); + + double vzp = ViewPos.Z; + + int prevuniq = CurrentPortalUniq; + // depth check is in another place right now + unsigned int portalsAtEnd = WallPortals.Size(); + for (; portalsAtStart < portalsAtEnd; portalsAtStart++) + { + R_EnterPortal(&WallPortals[portalsAtStart], depth + 1); + } + int prevuniq2 = CurrentPortalUniq; + CurrentPortalUniq = prevuniq; + + NetUpdate(); + + MaskedCycles.Clock(); // [ZZ] count sprites in portals/mirrors along with normal ones. + R_DrawMasked(); // this is required since with portals there often will be cases when more than 80% of the view is inside a portal. + MaskedCycles.Unclock(); + + NetUpdate(); + + R_3D_LeaveSkybox(); // pop 3D floor height map + CurrentPortalUniq = prevuniq2; + + // draw a red line around a portal if it's being highlighted + if (r_highlight_portals) + R_HighlightPortal(pds); + + CurrentPortal = prevpds; + MirrorFlags = prevmf; + ViewAngle = startang; + ViewPos = startpos; + ViewPath[0] = savedpath[0]; + ViewPath[1] = savedpath[1]; + } + + void R_HighlightPortal(PortalDrawseg* pds) + { + // [ZZ] NO OVERFLOW CHECKS HERE + // I believe it won't break. if it does, blame me. :( + + if (r_swtruecolor) // Assuming this is just a debug function + return; + + BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 255, 0, 0, 0, 255); + + BYTE* pixels = RenderTarget->GetBuffer(); + // top edge + for (int x = pds->x1; x < pds->x2; x++) + { + if (x < 0 || x >= RenderTarget->GetWidth()) + continue; + + int p = x - pds->x1; + int Ytop = pds->ceilingclip[p]; + int Ybottom = pds->floorclip[p]; + + if (x == pds->x1 || x == pds->x2 - 1) + { + RenderTarget->DrawLine(x, Ytop, x, Ybottom + 1, color, 0); + continue; + } + + int YtopPrev = pds->ceilingclip[p - 1]; + int YbottomPrev = pds->floorclip[p - 1]; + + if (abs(Ytop - YtopPrev) > 1) + RenderTarget->DrawLine(x, YtopPrev, x, Ytop, color, 0); + else *(pixels + Ytop * RenderTarget->GetPitch() + x) = color; + + if (abs(Ybottom - YbottomPrev) > 1) + RenderTarget->DrawLine(x, YbottomPrev, x, Ybottom, color, 0); + else *(pixels + Ybottom * RenderTarget->GetPitch() + x) = color; + } + } +} + +ADD_STAT(skyboxes) +{ + FString out; + out.Format("%d skybox planes", swrenderer::numskyboxes); + return out; +} diff --git a/src/swrenderer/scene/r_portal.h b/src/swrenderer/scene/r_portal.h new file mode 100644 index 0000000000..d7c7f948a5 --- /dev/null +++ b/src/swrenderer/scene/r_portal.h @@ -0,0 +1,12 @@ + +#pragma once + +#include "r_portal_segment.h" + +namespace swrenderer +{ + void R_DrawPortals(); + void R_DrawWallPortals(); + void R_EnterPortal(PortalDrawseg* pds, int depth); + void R_HighlightPortal(PortalDrawseg* pds); +} From 775deeb1510549e62bc3afac267439a12f3b4bdf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Dec 2016 06:15:10 +0100 Subject: [PATCH 597/912] Move variables closer to their correct location --- src/swrenderer/scene/r_bsp.cpp | 2 +- src/swrenderer/scene/r_plane.cpp | 2 +- src/swrenderer/scene/r_portal.cpp | 4 ++++ src/swrenderer/scene/r_portal.h | 4 ++++ src/swrenderer/scene/r_portal_segment.cpp | 4 ---- src/swrenderer/scene/r_portal_segment.h | 3 --- src/swrenderer/scene/r_segs.cpp | 2 +- src/swrenderer/scene/r_things.cpp | 2 +- 8 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 2b3054e4fe..d154e39682 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -38,7 +38,7 @@ #include "r_things.h" #include "r_3dfloors.h" #include "r_clip_segment.h" -#include "r_portal_segment.h" +#include "r_portal.h" #include "a_sharedglobal.h" #include "g_level.h" #include "p_effect.h" diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index b13af8b76d..3eb6192285 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -64,7 +64,7 @@ #include "r_walldraw.h" #include "r_clip_segment.h" #include "r_draw_segment.h" -#include "r_portal_segment.h" +#include "r_portal.h" #include "swrenderer\r_memory.h" #ifdef _MSC_VER diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index ccd27236f6..638f99a846 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -49,6 +49,10 @@ CVAR(Bool, r_skyboxes, true, 0) namespace swrenderer { + PortalDrawseg *CurrentPortal = nullptr; + int CurrentPortalUniq = 0; + bool CurrentPortalInSkybox = false; + namespace { int numskyboxes; // For ADD_STAT(skyboxes) diff --git a/src/swrenderer/scene/r_portal.h b/src/swrenderer/scene/r_portal.h index d7c7f948a5..f13c760b33 100644 --- a/src/swrenderer/scene/r_portal.h +++ b/src/swrenderer/scene/r_portal.h @@ -5,6 +5,10 @@ namespace swrenderer { + extern PortalDrawseg* CurrentPortal; + extern int CurrentPortalUniq; + extern bool CurrentPortalInSkybox; + void R_DrawPortals(); void R_DrawWallPortals(); void R_EnterPortal(PortalDrawseg* pds, int depth); diff --git a/src/swrenderer/scene/r_portal_segment.cpp b/src/swrenderer/scene/r_portal_segment.cpp index 5e698f0c7f..e817576704 100644 --- a/src/swrenderer/scene/r_portal_segment.cpp +++ b/src/swrenderer/scene/r_portal_segment.cpp @@ -26,9 +26,5 @@ namespace swrenderer { - PortalDrawseg *CurrentPortal = nullptr; - int CurrentPortalUniq = 0; - bool CurrentPortalInSkybox = false; - TArray WallPortals(1000); // note: this array needs to go away as reallocation can cause crashes. } diff --git a/src/swrenderer/scene/r_portal_segment.h b/src/swrenderer/scene/r_portal_segment.h index a1e341df18..dc4f501526 100644 --- a/src/swrenderer/scene/r_portal_segment.h +++ b/src/swrenderer/scene/r_portal_segment.h @@ -19,8 +19,5 @@ namespace swrenderer bool mirror; // true if this is a mirror (src should equal dst) }; - extern PortalDrawseg* CurrentPortal; - extern int CurrentPortalUniq; - extern bool CurrentPortalInSkybox; extern TArray WallPortals; } diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 079700d880..0ae558e0f1 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -50,7 +50,7 @@ #include "r_data/colormaps.h" #include "r_walldraw.h" #include "r_draw_segment.h" -#include "r_portal_segment.h" +#include "r_portal.h" #include "swrenderer\r_memory.h" #define WALLYREPEAT 8 diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index 3a6e7dff16..246fbb4154 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -69,7 +69,7 @@ #include "p_maputl.h" #include "r_voxel.h" #include "r_draw_segment.h" -#include "r_portal_segment.h" +#include "r_portal.h" #include "swrenderer\r_memory.h" EXTERN_CVAR(Bool, st_scale) From 80e369541a55d0c2dc7c2ef0eb28f543af6b120f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Dec 2016 06:42:20 +0100 Subject: [PATCH 598/912] Move visplane_t to its own file --- src/CMakeLists.txt | 1 + src/swrenderer/r_main.cpp | 2 +- src/swrenderer/scene/r_plane.cpp | 119 ----------------------- src/swrenderer/scene/r_plane.h | 80 +-------------- src/swrenderer/scene/r_visible_plane.cpp | 106 ++++++++++++++++++++ src/swrenderer/scene/r_visible_plane.h | 67 +++++++++++++ 6 files changed, 178 insertions(+), 197 deletions(-) create mode 100644 src/swrenderer/scene/r_visible_plane.cpp create mode 100644 src/swrenderer/scene/r_visible_plane.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f30cc647bf..46dc1b118c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -825,6 +825,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_draw_segment.cpp swrenderer/scene/r_portal_segment.cpp swrenderer/scene/r_portal.cpp + swrenderer/scene/r_visible_plane.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 2d14a2308e..7027a7767e 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -377,7 +377,6 @@ void R_InitRenderer() // viewwidth / viewheight are set by the defaults fillshort (zeroarray, MAXWIDTH, 0); - R_InitPlanes (); R_InitShadeMaps(); R_InitColumnDrawers (); } @@ -392,6 +391,7 @@ static void R_ShutdownRenderer() { R_DeinitSprites(); R_DeinitPlanes(); + fakeActive = 0; // Free openings if (openings != NULL) { diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index 3eb6192285..cfb6cda281 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -89,20 +89,9 @@ static void R_DrawSkyStriped (visplane_t *pl); planefunction_t floorfunc; planefunction_t ceilingfunc; -// [RH] Allocate one extra for sky box planes. -visplane_t *visplanes[MAXVISPLANES+1]; -visplane_t *freetail; -visplane_t **freehead = &freetail; - visplane_t *floorplane; visplane_t *ceilingplane; -// killough -- hash function for visplanes -// Empirically verified to be fairly uniform: - -#define visplane_hash(picnum,lightlevel,height) \ - ((unsigned)((picnum)*3+(lightlevel)+(FLOAT2FIXED((height).fD()))*7) & (MAXVISPLANES-1)) - // These are copies of the main parameters used when drawing stacked sectors. // When you change the main parameters, you should copy them here too *unless* // you are changing them to draw a stacked sector. Otherwise, stacked sectors @@ -146,44 +135,6 @@ static double basexfrac, baseyfrac; void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); -//========================================================================== -// -// R_InitPlanes -// -// Called at game startup. -// -//========================================================================== - -void R_InitPlanes () -{ -} - -//========================================================================== -// -// R_DeinitPlanes -// -//========================================================================== - -void R_DeinitPlanes () -{ - fakeActive = 0; - - // do not use R_ClearPlanes because at this point the screen pointer is no longer valid. - for (int i = 0; i <= MAXVISPLANES; i++) // new code -- killough - { - for (*freehead = visplanes[i], visplanes[i] = NULL; *freehead; ) - { - freehead = &(*freehead)->next; - } - } - for (visplane_t *pl = freetail; pl != NULL; ) - { - visplane_t *next = pl->next; - free (pl); - pl = next; - } -} - //========================================================================== // // R_MapPlane @@ -537,38 +488,6 @@ void R_ClearPlanes (bool fullclear) } } -//========================================================================== -// -// new_visplane -// -// New function, by Lee Killough -// [RH] top and bottom buffers get allocated immediately after the visplane. -// -//========================================================================== - -static visplane_t *new_visplane (unsigned hash) -{ - visplane_t *check = freetail; - - if (check == NULL) - { - check = (visplane_t *)M_Malloc (sizeof(*check) + 3 + sizeof(*check->top)*(MAXWIDTH*2)); - memset(check, 0, sizeof(*check) + 3 + sizeof(*check->top)*(MAXWIDTH*2)); - check->bottom = check->top + MAXWIDTH+2; - } - else if (NULL == (freetail = freetail->next)) - { - freehead = &freetail; - } - - check->lights = nullptr; - - check->next = visplanes[hash]; - visplanes[hash] = check; - return check; -} - - //========================================================================== // // R_FindPlane @@ -1754,42 +1673,4 @@ void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)) ds_light_list = nullptr; } -//========================================================================== -// -// R_PlaneInitData -// -//========================================================================== - -bool R_PlaneInitData () -{ - int i; - visplane_t *pl; - - // Free all visplanes and let them be re-allocated as needed. - pl = freetail; - - while (pl) - { - visplane_t *next = pl->next; - M_Free (pl); - pl = next; - } - freetail = NULL; - freehead = &freetail; - - for (i = 0; i < MAXVISPLANES; i++) - { - pl = visplanes[i]; - visplanes[i] = NULL; - while (pl) - { - visplane_t *next = pl->next; - M_Free (pl); - pl = next; - } - } - - return true; -} - } diff --git a/src/swrenderer/scene/r_plane.h b/src/swrenderer/scene/r_plane.h index 176b9d2c88..c2f885c85e 100644 --- a/src/swrenderer/scene/r_plane.h +++ b/src/swrenderer/scene/r_plane.h @@ -23,63 +23,11 @@ #ifndef __R_PLANE_H__ #define __R_PLANE_H__ -#include - -class ASkyViewpoint; -class ADynamicLight; -struct FLightNode; +#include "r_visible_plane.h" namespace swrenderer { -struct visplane_light -{ - ADynamicLight *lightsource; - visplane_light *next; -}; - -// -// The infamous visplane -// -struct visplane_s -{ - struct visplane_s *next; // Next visplane in hash chain -- killough - - FDynamicColormap *colormap; // [RH] Support multiple colormaps - FSectorPortal *portal; // [RH] Support sky boxes - visplane_light *lights; - - FTransform xform; - secplane_t height; - FTextureID picnum; - int lightlevel; - int left, right; - int sky; - - // [RH] This set of variables copies information from the time when the - // visplane is created. They are only used by stacks so that you can - // have stacked sectors inside a skybox. If the visplane is not for a - // stack, then they are unused. - int extralight; - double visibility; - DVector3 viewpos; - DAngle viewangle; - fixed_t Alpha; - bool Additive; - - // kg3D - keep track of mirror and skybox owner - int CurrentSkybox; - int CurrentPortalUniq; // mirror counter, counts all of them - int MirrorFlags; // this is not related to CurrentMirror - - unsigned short *bottom; // [RH] bottom and top arrays are dynamically - unsigned short pad; // allocated immediately after the - unsigned short top[]; // visplane. -}; -typedef struct visplane_s visplane_t; - - - typedef void (*planefunction_t) (int top, int bottom); extern planefunction_t floorfunc; @@ -90,8 +38,6 @@ extern short ceilingclip[MAXWIDTH]; extern float yslope[MAXHEIGHT]; -void R_InitPlanes (); -void R_DeinitPlanes (); void R_ClearPlanes (bool fullclear); void R_AddPlaneLights(visplane_t *plane, FLightNode *light_head); @@ -108,32 +54,12 @@ void R_MapColoredPlane(int y, int x1); void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); -visplane_t *R_FindPlane -( const secplane_t &height, - FTextureID picnum, - int lightlevel, - double alpha, - bool additive, - const FTransform &xform, - int sky, - FSectorPortal *portal); - -visplane_t *R_CheckPlane (visplane_t *pl, int start, int stop); - - -// [RH] Added for multires support -bool R_PlaneInitData (void); - +visplane_t *R_FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double alpha, bool additive, const FTransform &xform, int sky, FSectorPortal *portal); +visplane_t *R_CheckPlane(visplane_t *pl, int start, int stop); extern visplane_t* floorplane; extern visplane_t* ceilingplane; -#define MAXVISPLANES 128 /* must be a power of 2 */ - -extern visplane_t *visplanes[MAXVISPLANES + 1]; -extern visplane_t *freetail; -extern visplane_t **freehead; - } #endif // __R_PLANE_H__ diff --git a/src/swrenderer/scene/r_visible_plane.cpp b/src/swrenderer/scene/r_visible_plane.cpp new file mode 100644 index 0000000000..4a9e11ce95 --- /dev/null +++ b/src/swrenderer/scene/r_visible_plane.cpp @@ -0,0 +1,106 @@ + +#include +#include + +#include "templates.h" +#include "i_system.h" +#include "w_wad.h" +#include "doomdef.h" +#include "doomstat.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" +#include "r_sky.h" +#include "stats.h" +#include "v_video.h" +#include "a_sharedglobal.h" +#include "c_console.h" +#include "cmdlib.h" +#include "d_net.h" +#include "g_level.h" +#include "r_bsp.h" +#include "r_visible_plane.h" + +namespace swrenderer +{ + // [RH] Allocate one extra for sky box planes. + visplane_t *visplanes[MAXVISPLANES + 1]; + visplane_t *freetail; + visplane_t **freehead = &freetail; + + namespace + { + enum { max_plane_lights = 32 * 1024 }; + visplane_light plane_lights[max_plane_lights]; + int next_plane_light = 0; + } + + void R_DeinitPlanes() + { + // do not use R_ClearPlanes because at this point the screen pointer is no longer valid. + for (int i = 0; i <= MAXVISPLANES; i++) // new code -- killough + { + for (*freehead = visplanes[i], visplanes[i] = NULL; *freehead; ) + { + freehead = &(*freehead)->next; + } + } + for (visplane_t *pl = freetail; pl != NULL; ) + { + visplane_t *next = pl->next; + free(pl); + pl = next; + } + } + + visplane_t *new_visplane(unsigned hash) + { + visplane_t *check = freetail; + + if (check == NULL) + { + check = (visplane_t *)M_Malloc(sizeof(*check) + 3 + sizeof(*check->top)*(MAXWIDTH * 2)); + memset(check, 0, sizeof(*check) + 3 + sizeof(*check->top)*(MAXWIDTH * 2)); + check->bottom = check->top + MAXWIDTH + 2; + } + else if (NULL == (freetail = freetail->next)) + { + freehead = &freetail; + } + + check->lights = nullptr; + + check->next = visplanes[hash]; + visplanes[hash] = check; + return check; + } + + void R_PlaneInitData() + { + int i; + visplane_t *pl; + + // Free all visplanes and let them be re-allocated as needed. + pl = freetail; + + while (pl) + { + visplane_t *next = pl->next; + M_Free(pl); + pl = next; + } + freetail = NULL; + freehead = &freetail; + + for (i = 0; i < MAXVISPLANES; i++) + { + pl = visplanes[i]; + visplanes[i] = NULL; + while (pl) + { + visplane_t *next = pl->next; + M_Free(pl); + pl = next; + } + } + } +} diff --git a/src/swrenderer/scene/r_visible_plane.h b/src/swrenderer/scene/r_visible_plane.h new file mode 100644 index 0000000000..28e7035058 --- /dev/null +++ b/src/swrenderer/scene/r_visible_plane.h @@ -0,0 +1,67 @@ + +#pragma once + +#include +#include "r_defs.h" + +class ASkyViewpoint; +class ADynamicLight; +struct FLightNode; +struct FDynamicColormap; +struct FSectorPortal; + +namespace swrenderer +{ + struct visplane_light + { + ADynamicLight *lightsource; + visplane_light *next; + }; + + struct visplane_t + { + visplane_t *next; // Next visplane in hash chain -- killough + + FDynamicColormap *colormap; // [RH] Support multiple colormaps + FSectorPortal *portal; // [RH] Support sky boxes + visplane_light *lights; + + FTransform xform; + secplane_t height; + FTextureID picnum; + int lightlevel; + int left, right; + int sky; + + // [RH] This set of variables copies information from the time when the + // visplane is created. They are only used by stacks so that you can + // have stacked sectors inside a skybox. If the visplane is not for a + // stack, then they are unused. + int extralight; + double visibility; + DVector3 viewpos; + DAngle viewangle; + fixed_t Alpha; + bool Additive; + + // kg3D - keep track of mirror and skybox owner + int CurrentSkybox; + int CurrentPortalUniq; // mirror counter, counts all of them + int MirrorFlags; // this is not related to CurrentMirror + + unsigned short *bottom; // [RH] bottom and top arrays are dynamically + unsigned short pad; // allocated immediately after the + unsigned short top[]; // visplane. + }; + + #define MAXVISPLANES 128 /* must be a power of 2 */ + #define visplane_hash(picnum,lightlevel,height) ((unsigned)((picnum)*3+(lightlevel)+(FLOAT2FIXED((height).fD()))*7) & (MAXVISPLANES-1)) + + extern visplane_t *visplanes[MAXVISPLANES + 1]; + extern visplane_t *freetail; + extern visplane_t **freehead; + + void R_DeinitPlanes(); + visplane_t *new_visplane(unsigned hash); + void R_PlaneInitData(); +} From f133b4caa4532e4a35bcedf10fb45f391021a076 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Dec 2016 07:15:10 +0100 Subject: [PATCH 599/912] Move vissprite to its own file --- src/CMakeLists.txt | 1 + src/swrenderer/scene/r_things.cpp | 47 +------------ src/swrenderer/scene/r_things.h | 76 +-------------------- src/swrenderer/scene/r_visible_sprite.cpp | 64 +++++++++++++++++ src/swrenderer/scene/r_visible_sprite.h | 83 +++++++++++++++++++++++ 5 files changed, 151 insertions(+), 120 deletions(-) create mode 100644 src/swrenderer/scene/r_visible_sprite.cpp create mode 100644 src/swrenderer/scene/r_visible_sprite.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 46dc1b118c..e1ec4293a2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -826,6 +826,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_portal_segment.cpp swrenderer/scene/r_portal.cpp swrenderer/scene/r_visible_plane.cpp + swrenderer/scene/r_visible_sprite.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index 246fbb4154..f2193e1398 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -161,12 +161,6 @@ FCoverageBuffer *OffscreenCoverageBuffer; // GAME FUNCTIONS // -int MaxVisSprites; -vissprite_t **vissprites; -vissprite_t **firstvissprite; -vissprite_t **vissprite_p; -vissprite_t **lastvissprite; -int newvissprite; bool DrewAVoxel; static vissprite_t **spritesorter; @@ -179,15 +173,7 @@ static void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID p void R_DeinitSprites() { - // Free vissprites - for (int i = 0; i < MaxVisSprites; ++i) - { - delete vissprites[i]; - } - free (vissprites); - vissprites = NULL; - vissprite_p = lastvissprite = NULL; - MaxVisSprites = 0; + R_DeinitVisSprites(); // Free vissprites sorter if (spritesorter != NULL) @@ -217,39 +203,10 @@ void R_DeinitSprites() // void R_ClearSprites (void) { - vissprite_p = firstvissprite; + R_ClearVisSprites(); DrewAVoxel = false; } - -// -// R_NewVisSprite -// -vissprite_t *R_NewVisSprite (void) -{ - if (vissprite_p == lastvissprite) - { - ptrdiff_t firstvisspritenum = firstvissprite - vissprites; - ptrdiff_t prevvisspritenum = vissprite_p - vissprites; - - MaxVisSprites = MaxVisSprites ? MaxVisSprites * 2 : 128; - vissprites = (vissprite_t **)M_Realloc (vissprites, MaxVisSprites * sizeof(vissprite_t)); - lastvissprite = &vissprites[MaxVisSprites]; - firstvissprite = &vissprites[firstvisspritenum]; - vissprite_p = &vissprites[prevvisspritenum]; - DPrintf (DMSG_NOTIFY, "MaxVisSprites increased to %d\n", MaxVisSprites); - - // Allocate sprites from the new pile - for (vissprite_t **p = vissprite_p; p < lastvissprite; ++p) - { - *p = new vissprite_t; - } - } - - vissprite_p++; - return *(vissprite_p-1); -} - // // R_DrawMaskedColumn // Used for sprites and masked mid textures. diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index 7f00ce1775..c5553899a4 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -23,7 +23,7 @@ #ifndef __R_THINGS__ #define __R_THINGS__ -#include "r_bsp.h" +#include "r_visible_sprite.h" struct particle_t; struct FVoxel; @@ -31,84 +31,10 @@ struct FVoxel; namespace swrenderer { -// A vissprite_t is a thing -// that will be drawn during a refresh. -// I.e. a sprite object that is partly visible. - -struct vissprite_t -{ - struct posang - { - FVector3 vpos; // view origin - FAngle vang; // view angle - }; - - short x1, x2; - FVector3 gpos; // origin in world coordinates - union - { - struct - { - float gzb, gzt; // global bottom / top for silhouette clipping - }; - struct - { - int y1, y2; // top / bottom of particle on screen - }; - }; - DAngle Angle; - fixed_t xscale; - float yscale; - float depth; - float idepth; // 1/z - float deltax, deltay; - DWORD FillColor; - double floorclip; - union - { - FTexture *pic; - struct FVoxel *voxel; - }; - union - { - // Used by face sprites - struct - { - double texturemid; - fixed_t startfrac; // horizontal position of x1 - fixed_t xiscale; // negative if flipped - }; - // Used by wall sprites - FWallCoords wallc; - // Used by voxels - posang pa; - }; - sector_t *heightsec; // killough 3/27/98: height sector for underwater/fake ceiling - sector_t *sector; // [RH] sector this sprite is in - F3DFloor *fakefloor; - F3DFloor *fakeceiling; - BYTE bIsVoxel:1; // [RH] Use voxel instead of pic - BYTE bWallSprite:1; // [RH] This is a wall sprite - BYTE bSplitSprite:1; // [RH] Sprite was split by a drawseg - BYTE bInMirror:1; // [RH] Sprite is "inside" a mirror - BYTE FakeFlatStat; // [RH] which side of fake/floor ceiling sprite is on - short renderflags; - DWORD Translation; // [RH] for color translation - visstyle_t Style; - int CurrentPortalUniq; // [ZZ] to identify the portal that this thing is in. used for clipping. - - vissprite_t() {} -}; - void R_DrawParticle (vissprite_t *); void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); -extern int MaxVisSprites; - -extern vissprite_t **vissprites, **firstvissprite; -extern vissprite_t **vissprite_p; - // Constant arrays used for psprite clipping // and initializing clipping. extern short zeroarray[MAXWIDTH]; diff --git a/src/swrenderer/scene/r_visible_sprite.cpp b/src/swrenderer/scene/r_visible_sprite.cpp new file mode 100644 index 0000000000..f3cd357913 --- /dev/null +++ b/src/swrenderer/scene/r_visible_sprite.cpp @@ -0,0 +1,64 @@ + +#include +#include +#include +#include "p_lnspec.h" +#include "templates.h" +#include "doomdef.h" +#include "m_swap.h" +#include "i_system.h" +#include "w_wad.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_visible_sprite.h" + +namespace swrenderer +{ + int MaxVisSprites; + vissprite_t **vissprites; + vissprite_t **firstvissprite; + vissprite_t **vissprite_p; + vissprite_t **lastvissprite; + + void R_DeinitVisSprites() + { + // Free vissprites + for (int i = 0; i < MaxVisSprites; ++i) + { + delete vissprites[i]; + } + free(vissprites); + vissprites = nullptr; + vissprite_p = lastvissprite = nullptr; + MaxVisSprites = 0; + } + + void R_ClearVisSprites() + { + vissprite_p = firstvissprite; + } + + vissprite_t *R_NewVisSprite() + { + if (vissprite_p == lastvissprite) + { + ptrdiff_t firstvisspritenum = firstvissprite - vissprites; + ptrdiff_t prevvisspritenum = vissprite_p - vissprites; + + MaxVisSprites = MaxVisSprites ? MaxVisSprites * 2 : 128; + vissprites = (vissprite_t **)M_Realloc(vissprites, MaxVisSprites * sizeof(vissprite_t)); + lastvissprite = &vissprites[MaxVisSprites]; + firstvissprite = &vissprites[firstvisspritenum]; + vissprite_p = &vissprites[prevvisspritenum]; + DPrintf(DMSG_NOTIFY, "MaxVisSprites increased to %d\n", MaxVisSprites); + + // Allocate sprites from the new pile + for (vissprite_t **p = vissprite_p; p < lastvissprite; ++p) + { + *p = new vissprite_t; + } + } + + vissprite_p++; + return *(vissprite_p - 1); + } +} diff --git a/src/swrenderer/scene/r_visible_sprite.h b/src/swrenderer/scene/r_visible_sprite.h new file mode 100644 index 0000000000..426840fc35 --- /dev/null +++ b/src/swrenderer/scene/r_visible_sprite.h @@ -0,0 +1,83 @@ + +#pragma once + +#include "r_bsp.h" + +struct particle_t; +struct FVoxel; + +namespace swrenderer +{ + struct vissprite_t + { + struct posang + { + FVector3 vpos; // view origin + FAngle vang; // view angle + }; + + short x1, x2; + FVector3 gpos; // origin in world coordinates + union + { + struct + { + float gzb, gzt; // global bottom / top for silhouette clipping + }; + struct + { + int y1, y2; // top / bottom of particle on screen + }; + }; + DAngle Angle; + fixed_t xscale; + float yscale; + float depth; + float idepth; // 1/z + float deltax, deltay; + uint32_t FillColor; + double floorclip; + union + { + FTexture *pic; + struct FVoxel *voxel; + }; + union + { + // Used by face sprites + struct + { + double texturemid; + fixed_t startfrac; // horizontal position of x1 + fixed_t xiscale; // negative if flipped + }; + // Used by wall sprites + FWallCoords wallc; + // Used by voxels + posang pa; + }; + sector_t *heightsec; // killough 3/27/98: height sector for underwater/fake ceiling + sector_t *sector; // [RH] sector this sprite is in + F3DFloor *fakefloor; + F3DFloor *fakeceiling; + uint8_t bIsVoxel : 1; // [RH] Use voxel instead of pic + uint8_t bWallSprite : 1; // [RH] This is a wall sprite + uint8_t bSplitSprite : 1; // [RH] Sprite was split by a drawseg + uint8_t bInMirror : 1; // [RH] Sprite is "inside" a mirror + uint8_t FakeFlatStat; // [RH] which side of fake/floor ceiling sprite is on + short renderflags; + uint32_t Translation; // [RH] for color translation + visstyle_t Style; + int CurrentPortalUniq; // [ZZ] to identify the portal that this thing is in. used for clipping. + + vissprite_t() {} + }; + + extern int MaxVisSprites; + extern vissprite_t **vissprites, **firstvissprite; + extern vissprite_t **vissprite_p; + + void R_DeinitVisSprites(); + void R_ClearVisSprites(); + vissprite_t *R_NewVisSprite(); +} From bf237799bf1eee2edd594d63ae71f64a02b9261c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Dec 2016 07:26:25 +0100 Subject: [PATCH 600/912] Move particle drawing to r_particle --- src/CMakeLists.txt | 1 + src/swrenderer/scene/r_bsp.cpp | 1 + src/swrenderer/scene/r_particle.cpp | 285 ++++++++++++++++++++++++++++ src/swrenderer/scene/r_particle.h | 11 ++ src/swrenderer/scene/r_plane.cpp | 2 +- src/swrenderer/scene/r_segs.cpp | 2 +- src/swrenderer/scene/r_things.cpp | 245 +----------------------- src/swrenderer/scene/r_things.h | 11 +- 8 files changed, 310 insertions(+), 248 deletions(-) create mode 100644 src/swrenderer/scene/r_particle.cpp create mode 100644 src/swrenderer/scene/r_particle.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e1ec4293a2..a63cc8ae33 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -827,6 +827,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_portal.cpp swrenderer/scene/r_visible_plane.cpp swrenderer/scene/r_visible_sprite.cpp + swrenderer/scene/r_particle.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index d154e39682..df99066060 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -36,6 +36,7 @@ #include "r_plane.h" #include "swrenderer/drawers/r_draw.h" #include "r_things.h" +#include "r_particle.h" #include "r_3dfloors.h" #include "r_clip_segment.h" #include "r_portal.h" diff --git a/src/swrenderer/scene/r_particle.cpp b/src/swrenderer/scene/r_particle.cpp new file mode 100644 index 0000000000..4197c6db29 --- /dev/null +++ b/src/swrenderer/scene/r_particle.cpp @@ -0,0 +1,285 @@ + +#include +#include +#include +#include "p_lnspec.h" +#include "templates.h" +#include "doomdef.h" +#include "m_swap.h" +#include "i_system.h" +#include "w_wad.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" +#include "swrenderer/scene/r_particle.h" +#include "c_console.h" +#include "c_cvars.h" +#include "c_dispatch.h" +#include "doomstat.h" +#include "v_video.h" +#include "sc_man.h" +#include "s_sound.h" +#include "sbar.h" +#include "gi.h" +#include "r_sky.h" +#include "cmdlib.h" +#include "g_level.h" +#include "d_net.h" +#include "colormatcher.h" +#include "d_netinf.h" +#include "p_effect.h" +#include "r_bsp.h" +#include "r_plane.h" +#include "r_segs.h" +#include "r_3dfloors.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/drawers/r_draw_pal.h" +#include "v_palette.h" +#include "r_data/r_translate.h" +#include "r_data/colormaps.h" +#include "r_data/voxels.h" +#include "p_local.h" +#include "p_maputl.h" +#include "r_voxel.h" +#include "r_draw_segment.h" +#include "r_portal.h" +#include "swrenderer/r_memory.h" + +namespace swrenderer +{ + void R_ProjectParticle(particle_t *particle, const sector_t *sector, int shade, int fakeside) + { + double tr_x, tr_y; + double tx, ty; + double tz, tiz; + double xscale, yscale; + int x1, x2, y1, y2; + vissprite_t* vis; + sector_t* heightsec = NULL; + FSWColormap* map; + + // [ZZ] Particle not visible through the portal plane + if (CurrentPortal && !!P_PointOnLineSide(particle->Pos, CurrentPortal->dst)) + return; + + // transform the origin point + tr_x = particle->Pos.X - ViewPos.X; + tr_y = particle->Pos.Y - ViewPos.Y; + + tz = tr_x * ViewTanCos + tr_y * ViewTanSin; + + // particle is behind view plane? + if (tz < MINZ) + return; + + tx = tr_x * ViewSin - tr_y * ViewCos; + + // Flip for mirrors + if (MirrorFlags & RF_XFLIP) + { + tx = viewwidth - tx - 1; + } + + // too far off the side? + if (tz <= fabs(tx)) + return; + + tiz = 1 / tz; + xscale = centerx * tiz; + + // calculate edges of the shape + double psize = particle->size / 8.0; + + x1 = MAX(WindowLeft, centerx + xs_RoundToInt((tx - psize) * xscale)); + x2 = MIN(WindowRight, centerx + xs_RoundToInt((tx + psize) * xscale)); + + if (x1 >= x2) + return; + + yscale = xscale; // YaspectMul is not needed for particles as they should always be square + ty = particle->Pos.Z - ViewPos.Z; + y1 = xs_RoundToInt(CenterY - (ty + psize) * yscale); + y2 = xs_RoundToInt(CenterY - (ty - psize) * yscale); + + // Clip the particle now. Because it's a point and projected as its subsector is + // entered, we don't need to clip it to drawsegs like a normal sprite. + + // Clip particles behind walls. + if (y1 < ceilingclip[x1]) y1 = ceilingclip[x1]; + if (y1 < ceilingclip[x2 - 1]) y1 = ceilingclip[x2 - 1]; + if (y2 >= floorclip[x1]) y2 = floorclip[x1] - 1; + if (y2 >= floorclip[x2 - 1]) y2 = floorclip[x2 - 1] - 1; + + if (y1 > y2) + return; + + // Clip particles above the ceiling or below the floor. + heightsec = sector->GetHeightSec(); + + const secplane_t *topplane; + const secplane_t *botplane; + FTextureID toppic; + FTextureID botpic; + + if (heightsec) // only clip things which are in special sectors + { + if (fakeside == FAKED_AboveCeiling) + { + topplane = §or->ceilingplane; + botplane = &heightsec->ceilingplane; + toppic = sector->GetTexture(sector_t::ceiling); + botpic = heightsec->GetTexture(sector_t::ceiling); + map = heightsec->ColorMap; + } + else if (fakeside == FAKED_BelowFloor) + { + topplane = &heightsec->floorplane; + botplane = §or->floorplane; + toppic = heightsec->GetTexture(sector_t::floor); + botpic = sector->GetTexture(sector_t::floor); + map = heightsec->ColorMap; + } + else + { + topplane = &heightsec->ceilingplane; + botplane = &heightsec->floorplane; + toppic = heightsec->GetTexture(sector_t::ceiling); + botpic = heightsec->GetTexture(sector_t::floor); + map = sector->ColorMap; + } + } + else + { + topplane = §or->ceilingplane; + botplane = §or->floorplane; + toppic = sector->GetTexture(sector_t::ceiling); + botpic = sector->GetTexture(sector_t::floor); + map = sector->ColorMap; + } + + if (botpic != skyflatnum && particle->Pos.Z < botplane->ZatPoint(particle->Pos)) + return; + if (toppic != skyflatnum && particle->Pos.Z >= topplane->ZatPoint(particle->Pos)) + return; + + // store information in a vissprite + vis = R_NewVisSprite(); + vis->CurrentPortalUniq = CurrentPortalUniq; + vis->heightsec = heightsec; + vis->xscale = FLOAT2FIXED(xscale); + vis->yscale = (float)xscale; + // vis->yscale *= InvZtoScale; + vis->depth = (float)tz; + vis->idepth = float(1 / tz); + vis->gpos = { (float)particle->Pos.X, (float)particle->Pos.Y, (float)particle->Pos.Z }; + vis->y1 = y1; + vis->y2 = y2; + vis->x1 = x1; + vis->x2 = x2; + vis->Translation = 0; + vis->startfrac = 255 & (particle->color >> 24); + vis->pic = NULL; + vis->bIsVoxel = false; + vis->renderflags = particle->trans; + vis->FakeFlatStat = fakeside; + vis->floorclip = 0; + vis->Style.ColormapNum = 0; + + if (fixedlightlev >= 0) + { + vis->Style.BaseColormap = map; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + } + else if (fixedcolormap) + { + vis->Style.BaseColormap = fixedcolormap; + vis->Style.ColormapNum = 0; + } + else if (particle->bright) + { + vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : map; + vis->Style.ColormapNum = 0; + } + else + { + // Particles are slightly more visible than regular sprites. + vis->Style.ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade); + vis->Style.BaseColormap = map; + } + } + + void R_DrawParticle(vissprite_t *vis) + { + using namespace drawerargs; + + int spacing; + BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac]; + int yl = vis->y1; + int ycount = vis->y2 - yl + 1; + int x1 = vis->x1; + int countbase = vis->x2 - x1; + + if (ycount <= 0 || countbase <= 0) + return; + + R_DrawMaskedSegsBehindParticle(vis); + + uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); + + // vis->renderflags holds translucency level (0-255) + fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; + uint32_t alpha = fglevel * 256 / FRACUNIT; + + spacing = RenderTarget->GetPitch(); + + uint32_t fracstepx = 16 * FRACUNIT / countbase; + uint32_t fracposx = fracstepx / 2; + + if (r_swtruecolor) + { + for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) + { + dc_x = x; + if (R_ClipSpriteColumnWithPortals(vis)) + continue; + uint32_t *dest = ylookup[yl] + x + (uint32_t*)dc_destorg; + DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); + } + } + else + { + for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) + { + dc_x = x; + if (R_ClipSpriteColumnWithPortals(vis)) + continue; + uint8_t *dest = ylookup[yl] + x + dc_destorg; + DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); + } + } + } + + void R_DrawMaskedSegsBehindParticle(const vissprite_t *vis) + { + const int x1 = vis->x1; + const int x2 = vis->x2; + + // Draw any masked textures behind this particle so that when the + // particle is drawn, it will be in front of them. + for (unsigned int p = InterestingDrawsegs.Size(); p-- > FirstInterestingDrawseg; ) + { + drawseg_t *ds = &drawsegs[InterestingDrawsegs[p]]; + // kg3D - no fake segs + if (ds->fake) continue; + if (ds->x1 >= x2 || ds->x2 <= x1) + { + continue; + } + if ((ds->siz2 - ds->siz1) * ((x2 + x1) / 2 - ds->sx1) / (ds->sx2 - ds->sx1) + ds->siz1 < vis->idepth) + { + // [ZZ] only draw stuff that's inside the same portal as the particle, other portals will care for themselves + if (ds->CurrentPortalUniq == vis->CurrentPortalUniq) + R_RenderMaskedSegRange(ds, MAX(ds->x1, x1), MIN(ds->x2, x2)); + } + } + } +} diff --git a/src/swrenderer/scene/r_particle.h b/src/swrenderer/scene/r_particle.h new file mode 100644 index 0000000000..9184840d88 --- /dev/null +++ b/src/swrenderer/scene/r_particle.h @@ -0,0 +1,11 @@ + +#pragma once + +#include "r_visible_sprite.h" + +namespace swrenderer +{ + void R_ProjectParticle(particle_t *, const sector_t *sector, int shade, int fakeside); + void R_DrawParticle(vissprite_t *); + void R_DrawMaskedSegsBehindParticle(const vissprite_t *vis); +} diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index cfb6cda281..83810ad95f 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -65,7 +65,7 @@ #include "r_clip_segment.h" #include "r_draw_segment.h" #include "r_portal.h" -#include "swrenderer\r_memory.h" +#include "swrenderer/r_memory.h" #ifdef _MSC_VER #pragma warning(disable:4244) diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 0ae558e0f1..40ad2bc52c 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -51,7 +51,7 @@ #include "r_walldraw.h" #include "r_draw_segment.h" #include "r_portal.h" -#include "swrenderer\r_memory.h" +#include "swrenderer/r_memory.h" #define WALLYREPEAT 8 diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index f2193e1398..4b8725ad14 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -70,7 +70,8 @@ #include "r_voxel.h" #include "r_draw_segment.h" #include "r_portal.h" -#include "swrenderer\r_memory.h" +#include "r_particle.h" +#include "swrenderer/r_memory.h" EXTERN_CVAR(Bool, st_scale) EXTERN_CVAR(Bool, r_shadercolormaps) @@ -113,10 +114,6 @@ struct FCoverageBuffer extern double globaluclip, globaldclip; extern float MaskedScaleY; -#define MINZ double((2048*4) / double(1 << 20)) -#define BASEXCENTER (160) -#define BASEYCENTER (100) - // // Sprite rotation 0 is facing the viewer, // rotation 1 is one angle turn CLOCKWISE around the axis. @@ -446,7 +443,7 @@ static inline void R_CollectPortals() } } -static inline bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) +bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) { // [ZZ] 10.01.2016: don't clip sprites from the root of a skybox. if (CurrentPortalInSkybox) @@ -2598,242 +2595,6 @@ void R_DrawMasked (void) R_DrawPlayerSprites (); } - -void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, int fakeside) -{ - double tr_x, tr_y; - double tx, ty; - double tz, tiz; - double xscale, yscale; - int x1, x2, y1, y2; - vissprite_t* vis; - sector_t* heightsec = NULL; - FSWColormap* map; - - // [ZZ] Particle not visible through the portal plane - if (CurrentPortal && !!P_PointOnLineSide(particle->Pos, CurrentPortal->dst)) - return; - - // transform the origin point - tr_x = particle->Pos.X - ViewPos.X; - tr_y = particle->Pos.Y - ViewPos.Y; - - tz = tr_x * ViewTanCos + tr_y * ViewTanSin; - - // particle is behind view plane? - if (tz < MINZ) - return; - - tx = tr_x * ViewSin - tr_y * ViewCos; - - // Flip for mirrors - if (MirrorFlags & RF_XFLIP) - { - tx = viewwidth - tx - 1; - } - - // too far off the side? - if (tz <= fabs(tx)) - return; - - tiz = 1 / tz; - xscale = centerx * tiz; - - // calculate edges of the shape - double psize = particle->size / 8.0; - - x1 = MAX(WindowLeft, centerx + xs_RoundToInt((tx - psize) * xscale)); - x2 = MIN(WindowRight, centerx + xs_RoundToInt((tx + psize) * xscale)); - - if (x1 >= x2) - return; - - yscale = xscale; // YaspectMul is not needed for particles as they should always be square - ty = particle->Pos.Z - ViewPos.Z; - y1 = xs_RoundToInt(CenterY - (ty + psize) * yscale); - y2 = xs_RoundToInt(CenterY - (ty - psize) * yscale); - - // Clip the particle now. Because it's a point and projected as its subsector is - // entered, we don't need to clip it to drawsegs like a normal sprite. - - // Clip particles behind walls. - if (y1 < ceilingclip[x1]) y1 = ceilingclip[x1]; - if (y1 < ceilingclip[x2-1]) y1 = ceilingclip[x2-1]; - if (y2 >= floorclip[x1]) y2 = floorclip[x1] - 1; - if (y2 >= floorclip[x2-1]) y2 = floorclip[x2-1] - 1; - - if (y1 > y2) - return; - - // Clip particles above the ceiling or below the floor. - heightsec = sector->GetHeightSec(); - - const secplane_t *topplane; - const secplane_t *botplane; - FTextureID toppic; - FTextureID botpic; - - if (heightsec) // only clip things which are in special sectors - { - if (fakeside == FAKED_AboveCeiling) - { - topplane = §or->ceilingplane; - botplane = &heightsec->ceilingplane; - toppic = sector->GetTexture(sector_t::ceiling); - botpic = heightsec->GetTexture(sector_t::ceiling); - map = heightsec->ColorMap; - } - else if (fakeside == FAKED_BelowFloor) - { - topplane = &heightsec->floorplane; - botplane = §or->floorplane; - toppic = heightsec->GetTexture(sector_t::floor); - botpic = sector->GetTexture(sector_t::floor); - map = heightsec->ColorMap; - } - else - { - topplane = &heightsec->ceilingplane; - botplane = &heightsec->floorplane; - toppic = heightsec->GetTexture(sector_t::ceiling); - botpic = heightsec->GetTexture(sector_t::floor); - map = sector->ColorMap; - } - } - else - { - topplane = §or->ceilingplane; - botplane = §or->floorplane; - toppic = sector->GetTexture(sector_t::ceiling); - botpic = sector->GetTexture(sector_t::floor); - map = sector->ColorMap; - } - - if (botpic != skyflatnum && particle->Pos.Z < botplane->ZatPoint (particle->Pos)) - return; - if (toppic != skyflatnum && particle->Pos.Z >= topplane->ZatPoint (particle->Pos)) - return; - - // store information in a vissprite - vis = R_NewVisSprite (); - vis->CurrentPortalUniq = CurrentPortalUniq; - vis->heightsec = heightsec; - vis->xscale = FLOAT2FIXED(xscale); - vis->yscale = (float)xscale; -// vis->yscale *= InvZtoScale; - vis->depth = (float)tz; - vis->idepth = float(1 / tz); - vis->gpos = { (float)particle->Pos.X, (float)particle->Pos.Y, (float)particle->Pos.Z }; - vis->y1 = y1; - vis->y2 = y2; - vis->x1 = x1; - vis->x2 = x2; - vis->Translation = 0; - vis->startfrac = 255 & (particle->color >>24); - vis->pic = NULL; - vis->bIsVoxel = false; - vis->renderflags = particle->trans; - vis->FakeFlatStat = fakeside; - vis->floorclip = 0; - vis->Style.ColormapNum = 0; - - if (fixedlightlev >= 0) - { - vis->Style.BaseColormap = map; - vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (fixedcolormap) - { - vis->Style.BaseColormap = fixedcolormap; - vis->Style.ColormapNum = 0; - } - else if (particle->bright) - { - vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : map; - vis->Style.ColormapNum = 0; - } - else - { - // Particles are slightly more visible than regular sprites. - vis->Style.ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade); - vis->Style.BaseColormap = map; - } -} - -static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) -{ - const int x1 = vis->x1; - const int x2 = vis->x2; - - // Draw any masked textures behind this particle so that when the - // particle is drawn, it will be in front of them. - for (unsigned int p = InterestingDrawsegs.Size(); p-- > FirstInterestingDrawseg; ) - { - drawseg_t *ds = &drawsegs[InterestingDrawsegs[p]]; - // kg3D - no fake segs - if(ds->fake) continue; - if (ds->x1 >= x2 || ds->x2 <= x1) - { - continue; - } - if ((ds->siz2 - ds->siz1) * ((x2 + x1)/2 - ds->sx1) / (ds->sx2 - ds->sx1) + ds->siz1 < vis->idepth) - { - // [ZZ] only draw stuff that's inside the same portal as the particle, other portals will care for themselves - if (ds->CurrentPortalUniq == vis->CurrentPortalUniq) - R_RenderMaskedSegRange (ds, MAX(ds->x1, x1), MIN(ds->x2, x2)); - } - } -} - -void R_DrawParticle(vissprite_t *vis) -{ - int spacing; - BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac]; - int yl = vis->y1; - int ycount = vis->y2 - yl + 1; - int x1 = vis->x1; - int countbase = vis->x2 - x1; - - if (ycount <= 0 || countbase <= 0) - return; - - R_DrawMaskedSegsBehindParticle(vis); - - uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); - - // vis->renderflags holds translucency level (0-255) - fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - uint32_t alpha = fglevel * 256 / FRACUNIT; - - spacing = RenderTarget->GetPitch(); - - uint32_t fracstepx = 16 * FRACUNIT / countbase; - uint32_t fracposx = fracstepx / 2; - - if (r_swtruecolor) - { - for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) - { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) - continue; - uint32_t *dest = ylookup[yl] + x + (uint32_t*)dc_destorg; - DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); - } - } - else - { - for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) - { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) - continue; - uint8_t *dest = ylookup[yl] + x + dc_destorg; - DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); - } - } -} - extern double BaseYaspectMul;; inline int sgn(int v) diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index c5553899a4..0e562a204e 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -28,13 +28,15 @@ struct particle_t; struct FVoxel; +#define MINZ double((2048*4) / double(1 << 20)) +#define BASEXCENTER (160) +#define BASEYCENTER (100) + +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); + namespace swrenderer { -void R_DrawParticle (vissprite_t *); - -void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); - // Constant arrays used for psprite clipping // and initializing clipping. extern short zeroarray[MAXWIDTH]; @@ -53,6 +55,7 @@ extern double pspriteyscale; extern FTexture *WallSpriteTile; +bool R_ClipSpriteColumnWithPortals(vissprite_t* spr); void R_DrawMaskedColumn (FTexture *texture, fixed_t column, bool unmasked = false); void R_WallSpriteColumn (); From 98026c57113a36f0dc56705c66852c089f8308b9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Dec 2016 07:51:39 +0100 Subject: [PATCH 601/912] Move player sprite handling to r_playersprite --- src/CMakeLists.txt | 1 + src/swrenderer/r_swrenderer.cpp | 2 +- src/swrenderer/scene/r_playersprite.cpp | 620 ++++++++++++++++++++++++ src/swrenderer/scene/r_playersprite.h | 11 + src/swrenderer/scene/r_things.cpp | 588 +--------------------- src/swrenderer/scene/r_things.h | 4 +- 6 files changed, 639 insertions(+), 587 deletions(-) create mode 100644 src/swrenderer/scene/r_playersprite.cpp create mode 100644 src/swrenderer/scene/r_playersprite.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a63cc8ae33..1fa9367d91 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -828,6 +828,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_visible_plane.cpp swrenderer/scene/r_visible_sprite.cpp swrenderer/scene/r_particle.cpp + swrenderer/scene/r_playersprite.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index ea7c549628..76aa2cca4e 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -34,7 +34,7 @@ #include "r_main.h" -#include "swrenderer/scene/r_things.h" +#include "swrenderer/scene/r_playersprite.h" #include "v_palette.h" #include "v_video.h" #include "m_png.h" diff --git a/src/swrenderer/scene/r_playersprite.cpp b/src/swrenderer/scene/r_playersprite.cpp new file mode 100644 index 0000000000..ac9137c9eb --- /dev/null +++ b/src/swrenderer/scene/r_playersprite.cpp @@ -0,0 +1,620 @@ + +#include +#include +#include +#include "p_lnspec.h" +#include "templates.h" +#include "doomdef.h" +#include "m_swap.h" +#include "i_system.h" +#include "w_wad.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" +#include "swrenderer/scene/r_playersprite.h" +#include "c_console.h" +#include "c_cvars.h" +#include "c_dispatch.h" +#include "doomstat.h" +#include "v_video.h" +#include "sc_man.h" +#include "s_sound.h" +#include "sbar.h" +#include "gi.h" +#include "r_sky.h" +#include "cmdlib.h" +#include "g_level.h" +#include "d_net.h" +#include "colormatcher.h" +#include "d_netinf.h" +#include "p_effect.h" +#include "r_bsp.h" +#include "r_plane.h" +#include "r_segs.h" +#include "r_3dfloors.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/drawers/r_draw_pal.h" +#include "v_palette.h" +#include "r_data/r_translate.h" +#include "r_data/colormaps.h" +#include "r_data/voxels.h" +#include "p_local.h" +#include "p_maputl.h" +#include "r_voxel.h" +#include "r_draw_segment.h" +#include "r_portal.h" +#include "swrenderer/r_memory.h" + +EXTERN_CVAR(Bool, st_scale) +EXTERN_CVAR(Bool, r_drawplayersprites) +EXTERN_CVAR(Bool, r_deathcamera) +EXTERN_CVAR(Bool, r_shadercolormaps) + +namespace swrenderer +{ + namespace + { + // Used to store a psprite's drawing information if it needs to be drawn later. + struct vispsp_t + { + vissprite_t *vis; + FDynamicColormap *basecolormap; + int x1; + }; + + TArray vispsprites; + unsigned int vispspindex; + } + + void R_DrawPlayerSprites() + { + int i; + int lightnum; + DPSprite* psp; + DPSprite* weapon; + sector_t* sec = NULL; + static sector_t tempsec; + int floorlight, ceilinglight; + F3DFloor *rover; + + if (!r_drawplayersprites || + !camera || + !camera->player || + (players[consoleplayer].cheats & CF_CHASECAM) || + (r_deathcamera && camera->health <= 0)) + return; + + if (fixedlightlev < 0 && viewsector->e && viewsector->e->XFloor.lightlist.Size()) + { + for (i = viewsector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) + { + if (ViewPos.Z <= viewsector->e->XFloor.lightlist[i].plane.Zat0()) + { + rover = viewsector->e->XFloor.lightlist[i].caster; + if (rover) + { + if (rover->flags & FF_DOUBLESHADOW && ViewPos.Z <= rover->bottom.plane->Zat0()) + break; + sec = rover->model; + if (rover->flags & FF_FADEWALLS) + basecolormap = sec->ColorMap; + else + basecolormap = viewsector->e->XFloor.lightlist[i].extra_colormap; + } + break; + } + } + if (!sec) + { + sec = viewsector; + basecolormap = sec->ColorMap; + } + floorlight = ceilinglight = sec->lightlevel; + } + else + { // This used to use camera->Sector but due to interpolation that can be incorrect + // when the interpolated viewpoint is in a different sector than the camera. + sec = R_FakeFlat(viewsector, &tempsec, &floorlight, + &ceilinglight, false); + + // [RH] set basecolormap + basecolormap = sec->ColorMap; + } + + // [RH] set foggy flag + foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); + r_actualextralight = foggy ? 0 : extralight << 4; + + // get light level + lightnum = ((floorlight + ceilinglight) >> 1) + r_actualextralight; + spriteshade = LIGHT2SHADE(lightnum) - 24 * FRACUNIT; + + // clip to screen bounds + mfloorclip = screenheightarray; + mceilingclip = zeroarray; + + if (camera->player != NULL) + { + double centerhack = CenterY; + double wx, wy; + float bobx, boby; + + CenterY = viewheight / 2; + + P_BobWeapon(camera->player, &bobx, &boby, r_TicFracF); + + // Interpolate the main weapon layer once so as to be able to add it to other layers. + if ((weapon = camera->player->FindPSprite(PSP_WEAPON)) != nullptr) + { + if (weapon->firstTic) + { + wx = weapon->x; + wy = weapon->y; + } + else + { + wx = weapon->oldx + (weapon->x - weapon->oldx) * r_TicFracF; + wy = weapon->oldy + (weapon->y - weapon->oldy) * r_TicFracF; + } + } + else + { + wx = 0; + wy = 0; + } + + // add all active psprites + psp = camera->player->psprites; + while (psp) + { + // [RH] Don't draw the targeter's crosshair if the player already has a crosshair set. + // It's possible this psprite's caller is now null but the layer itself hasn't been destroyed + // because it didn't tick yet (if we typed 'take all' while in the console for example). + // In this case let's simply not draw it to avoid crashing. + + if ((psp->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && psp->GetCaller() != nullptr) + { + R_DrawPSprite(psp, camera, bobx, boby, wx, wy, r_TicFracF); + } + + psp = psp->GetNext(); + } + + CenterY = centerhack; + } + } + + void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac) + { + double tx; + int x1; + int x2; + double sx, sy; + spritedef_t* sprdef; + spriteframe_t* sprframe; + FTextureID picnum; + WORD flip; + FTexture* tex; + vissprite_t* vis; + bool noaccel; + double alpha = owner->Alpha; + static TArray avis; + + if (avis.Size() < vispspindex + 1) + avis.Reserve(avis.Size() - vispspindex + 1); + + // decide which patch to use + if ((unsigned)pspr->GetSprite() >= (unsigned)sprites.Size()) + { + DPrintf(DMSG_ERROR, "R_DrawPSprite: invalid sprite number %i\n", pspr->GetSprite()); + return; + } + sprdef = &sprites[pspr->GetSprite()]; + if (pspr->GetFrame() >= sprdef->numframes) + { + DPrintf(DMSG_ERROR, "R_DrawPSprite: invalid sprite frame %i : %i\n", pspr->GetSprite(), pspr->GetFrame()); + return; + } + sprframe = &SpriteFrames[sprdef->spriteframes + pspr->GetFrame()]; + + picnum = sprframe->Texture[0]; + flip = sprframe->Flip & 1; + tex = TexMan(picnum); + + if (tex->UseType == FTexture::TEX_Null || pspr->RenderStyle == STYLE_None) + return; + + if (pspr->firstTic) + { // Can't interpolate the first tic. + pspr->firstTic = false; + pspr->oldx = pspr->x; + pspr->oldy = pspr->y; + } + + sx = pspr->oldx + (pspr->x - pspr->oldx) * ticfrac; + sy = pspr->oldy + (pspr->y - pspr->oldy) * ticfrac + WEAPON_FUDGE_Y; + + if (pspr->Flags & PSPF_ADDBOB) + { + sx += bobx; + sy += boby; + } + + if (pspr->Flags & PSPF_ADDWEAPON && pspr->GetID() != PSP_WEAPON) + { + sx += wx; + sy += wy; + } + + // calculate edges of the shape + tx = sx - BASEXCENTER; + + tx -= tex->GetScaledLeftOffset(); + x1 = xs_RoundToInt(CenterX + tx * pspritexscale); + + // off the right side + if (x1 > viewwidth) + return; + + tx += tex->GetScaledWidth(); + x2 = xs_RoundToInt(CenterX + tx * pspritexscale); + + // off the left side + if (x2 <= 0) + return; + + // store information in a vissprite + vis = &avis[vispspindex]; + vis->renderflags = owner->renderflags; + vis->floorclip = 0; + + vis->texturemid = (BASEYCENTER - sy) * tex->Scale.Y + tex->TopOffset; + + if (camera->player && (RenderTarget != screen || + viewheight == RenderTarget->GetHeight() || + (RenderTarget->GetWidth() > (BASEXCENTER * 2) && !st_scale))) + { // Adjust PSprite for fullscreen views + AWeapon *weapon = dyn_cast(pspr->GetCaller()); + if (weapon != nullptr && weapon->YAdjust != 0) + { + if (RenderTarget != screen || viewheight == RenderTarget->GetHeight()) + { + vis->texturemid -= weapon->YAdjust; + } + else + { + vis->texturemid -= StatusBar->GetDisplacement() * weapon->YAdjust; + } + } + } + if (pspr->GetID() < PSP_TARGETCENTER) + { // Move the weapon down for 1280x1024. + vis->texturemid -= AspectPspriteOffset(WidescreenRatio); + } + vis->x1 = x1 < 0 ? 0 : x1; + vis->x2 = x2 >= viewwidth ? viewwidth : x2; + vis->xscale = FLOAT2FIXED(pspritexscale / tex->Scale.X); + vis->yscale = float(pspriteyscale / tex->Scale.Y); + vis->Translation = 0; // [RH] Use default colors + vis->pic = tex; + vis->Style.ColormapNum = 0; + + // If flip is used, provided that it's not already flipped (that would just invert itself) + // (It's an XOR...) + if (!(flip) != !(pspr->Flags & PSPF_FLIP)) + { + vis->xiscale = -FLOAT2FIXED(pspritexiscale * tex->Scale.X); + vis->startfrac = (tex->GetWidth() << FRACBITS) - 1; + } + else + { + vis->xiscale = FLOAT2FIXED(pspritexiscale * tex->Scale.X); + vis->startfrac = 0; + } + + if (vis->x1 > x1) + vis->startfrac += vis->xiscale*(vis->x1 - x1); + + noaccel = false; + FDynamicColormap *colormap_to_use = nullptr; + if (pspr->GetID() < PSP_TARGETCENTER) + { + // [MC] Set the render style + + if (pspr->Flags & PSPF_RENDERSTYLE) + { + const int rs = clamp(pspr->RenderStyle, 0, STYLE_Count); + + if (pspr->Flags & PSPF_FORCESTYLE) + { + vis->Style.RenderStyle = LegacyRenderStyles[rs]; + } + else if (owner->RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) + { + vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Fuzzy]; + } + else if (owner->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) + { + vis->Style.RenderStyle = LegacyRenderStyles[STYLE_OptFuzzy]; + vis->Style.RenderStyle.CheckFuzz(); + } + else if (owner->RenderStyle == LegacyRenderStyles[STYLE_Subtract]) + { + vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Subtract]; + } + else + { + vis->Style.RenderStyle = LegacyRenderStyles[rs]; + } + } + else + { + vis->Style.RenderStyle = owner->RenderStyle; + } + + // Set the alpha based on if using the overlay's own or not. Also adjust + // and override the alpha if not forced. + if (pspr->Flags & PSPF_ALPHA) + { + if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) + { + alpha = owner->Alpha; + } + else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) + { + FRenderStyle style = vis->Style.RenderStyle; + style.CheckFuzz(); + switch (style.BlendOp) + { + default: + alpha = pspr->alpha * owner->Alpha; + break; + case STYLEOP_Fuzz: + case STYLEOP_Sub: + alpha = owner->Alpha; + break; + } + + } + else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Subtract]) + { + alpha = owner->Alpha; + } + else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] || + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Translucent] || + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_TranslucentStencil] || + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_AddStencil] || + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_AddShaded]) + { + alpha = owner->Alpha * pspr->alpha; + } + else + { + alpha = owner->Alpha; + } + } + + // Should normal renderstyle come out on top at the end and we desire alpha, + // switch it to translucent. Normal never applies any sort of alpha. + if ((pspr->Flags & PSPF_ALPHA) && + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Normal] && + vis->Style.Alpha < 1.0) + { + vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Translucent]; + alpha = owner->Alpha * pspr->alpha; + } + + // ALWAYS take priority if asked for, except fuzz. Fuzz does absolutely nothing + // no matter what way it's changed. + if (pspr->Flags & PSPF_FORCEALPHA) + { + //Due to lack of != operators... + if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Fuzzy] || + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_SoulTrans] || + vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Stencil]) + { + } + else + { + alpha = pspr->alpha; + vis->Style.RenderStyle.Flags |= STYLEF_ForceAlpha; + } + } + vis->Style.Alpha = clamp(float(alpha), 0.f, 1.f); + + // Due to how some of the effects are handled, going to 0 or less causes some + // weirdness to display. There's no point rendering it anyway if it's 0. + if (vis->Style.Alpha <= 0.) + return; + + //----------------------------------------------------------------------------- + + // The software renderer cannot invert the source without inverting the overlay + // too. That means if the source is inverted, we need to do the reverse of what + // the invert overlay flag says to do. + INTBOOL invertcolormap = (vis->Style.RenderStyle.Flags & STYLEF_InvertOverlay); + + if (vis->Style.RenderStyle.Flags & STYLEF_InvertSource) + { + invertcolormap = !invertcolormap; + } + + FDynamicColormap *mybasecolormap = basecolormap; + + if (vis->Style.RenderStyle.Flags & STYLEF_FadeToBlack) + { + if (invertcolormap) + { // Fade to white + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); + invertcolormap = false; + } + else + { // Fade to black + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); + } + } + + if (realfixedcolormap != nullptr && (!r_swtruecolor || (r_shadercolormaps && screen->Accel2D))) + { // fixed color + vis->Style.BaseColormap = realfixedcolormap; + vis->Style.ColormapNum = 0; + } + else + { + if (invertcolormap) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); + } + if (fixedlightlev >= 0) + { + vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + } + else if (!foggy && pspr->GetState()->GetFullbright()) + { // full bright + vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap + vis->Style.ColormapNum = 0; + } + else + { // local light + vis->Style.BaseColormap = mybasecolormap; + vis->Style.ColormapNum = GETPALOOKUP(0, spriteshade); + } + } + if (camera->Inventory != nullptr) + { + BYTE oldcolormapnum = vis->Style.ColormapNum; + FSWColormap *oldcolormap = vis->Style.BaseColormap; + camera->Inventory->AlterWeaponSprite(&vis->Style); + if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum) + { + // The colormap has changed. Is it one we can easily identify? + // If not, then don't bother trying to identify it for + // hardware accelerated drawing. + if (vis->Style.BaseColormap < &SpecialColormaps[0] || + vis->Style.BaseColormap > &SpecialColormaps.Last()) + { + noaccel = true; + } + // Has the basecolormap changed? If so, we can't hardware accelerate it, + // since we don't know what it is anymore. + else if (vis->Style.BaseColormap != mybasecolormap) + { + noaccel = true; + } + } + } + // If we're drawing with a special colormap, but shaders for them are disabled, do + // not accelerate. + if (!r_shadercolormaps && (vis->Style.BaseColormap >= &SpecialColormaps[0] && + vis->Style.BaseColormap <= &SpecialColormaps.Last())) + { + noaccel = true; + } + // If drawing with a BOOM colormap, disable acceleration. + if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps) + { + noaccel = true; + } + // If the main colormap has fixed lights, and this sprite is being drawn with that + // colormap, disable acceleration so that the lights can remain fixed. + if (!noaccel && realfixedcolormap == nullptr && + NormalLightHasFixedLights && mybasecolormap == &NormalLight && + vis->pic->UseBasePalette()) + { + noaccel = true; + } + // [SP] If emulating GZDoom fullbright, disable acceleration + if (r_fullbrightignoresectorcolor && fixedlightlev >= 0) + mybasecolormap = &FullNormalLight; + if (r_fullbrightignoresectorcolor && !foggy && pspr->GetState()->GetFullbright()) + mybasecolormap = &FullNormalLight; + colormap_to_use = mybasecolormap; + } + else + { + colormap_to_use = basecolormap; + + vis->Style.BaseColormap = basecolormap; + vis->Style.ColormapNum = 0; + } + + // Check for hardware-assisted 2D. If it's available, and this sprite is not + // fuzzy, don't draw it until after the switch to 2D mode. + if (!noaccel && RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) + { + FRenderStyle style = vis->Style.RenderStyle; + style.CheckFuzz(); + if (style.BlendOp != STYLEOP_Fuzz) + { + if (vispsprites.Size() < vispspindex + 1) + vispsprites.Reserve(vispsprites.Size() - vispspindex + 1); + + vispsprites[vispspindex].vis = vis; + vispsprites[vispspindex].basecolormap = colormap_to_use; + vispsprites[vispspindex].x1 = x1; + vispspindex++; + return; + } + } + + R_DrawVisSprite(vis); + } + + void R_DrawRemainingPlayerSprites() + { + for (unsigned int i = 0; i < vispspindex; i++) + { + vissprite_t *vis; + + vis = vispsprites[i].vis; + FDynamicColormap *colormap = vispsprites[i].basecolormap; + bool flip = vis->xiscale < 0; + FSpecialColormap *special = NULL; + PalEntry overlay = 0; + FColormapStyle colormapstyle; + bool usecolormapstyle = false; + + if (vis->Style.BaseColormap >= &SpecialColormaps[0] && + vis->Style.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) + { + special = static_cast(vis->Style.BaseColormap); + } + else if (colormap->Color == PalEntry(255, 255, 255) && + colormap->Desaturate == 0) + { + overlay = colormap->Fade; + overlay.a = BYTE(vis->Style.ColormapNum * 255 / NUMCOLORMAPS); + } + else + { + usecolormapstyle = true; + colormapstyle.Color = colormap->Color; + colormapstyle.Fade = colormap->Fade; + colormapstyle.Desaturate = colormap->Desaturate; + colormapstyle.FadeLevel = vis->Style.ColormapNum / float(NUMCOLORMAPS); + } + screen->DrawTexture(vis->pic, + viewwindowx + vispsprites[i].x1, + viewwindowy + viewheight / 2 - vis->texturemid * vis->yscale - 0.5, + DTA_DestWidthF, FIXED2DBL(vis->pic->GetWidth() * vis->xscale), + DTA_DestHeightF, vis->pic->GetHeight() * vis->yscale, + DTA_Translation, TranslationToTable(vis->Translation), + DTA_FlipX, flip, + DTA_TopOffset, 0, + DTA_LeftOffset, 0, + DTA_ClipLeft, viewwindowx, + DTA_ClipTop, viewwindowy, + DTA_ClipRight, viewwindowx + viewwidth, + DTA_ClipBottom, viewwindowy + viewheight, + DTA_AlphaF, vis->Style.Alpha, + DTA_RenderStyle, vis->Style.RenderStyle, + DTA_FillColor, vis->FillColor, + DTA_SpecialColormap, special, + DTA_ColorOverlay, overlay.d, + DTA_ColormapStyle, usecolormapstyle ? &colormapstyle : NULL, + TAG_DONE); + } + + vispspindex = 0; + } +} diff --git a/src/swrenderer/scene/r_playersprite.h b/src/swrenderer/scene/r_playersprite.h new file mode 100644 index 0000000000..0fff067965 --- /dev/null +++ b/src/swrenderer/scene/r_playersprite.h @@ -0,0 +1,11 @@ + +#pragma once + +#include "r_visible_sprite.h" + +namespace swrenderer +{ + void R_DrawPlayerSprites(); + void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac); + void R_DrawRemainingPlayerSprites(); +} diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index 4b8725ad14..bfc550ed80 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -71,13 +71,10 @@ #include "r_draw_segment.h" #include "r_portal.h" #include "r_particle.h" +#include "r_playersprite.h" #include "swrenderer/r_memory.h" -EXTERN_CVAR(Bool, st_scale) -EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, r_drawfuzz) -EXTERN_CVAR(Bool, r_deathcamera); -EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_drawvoxels) EXTERN_CVAR(Bool, r_blendmethod) @@ -126,17 +123,7 @@ double pspriteyscale; fixed_t sky1scale; // [RH] Sky 1 scale factor fixed_t sky2scale; // [RH] Sky 2 scale factor -// Used to store a psprite's drawing information if it needs to be drawn later. -struct vispsp_t -{ - vissprite_t *vis; - FDynamicColormap *basecolormap; - int x1; -}; -TArray vispsprites; -unsigned int vispspindex; - -static int spriteshade; +int spriteshade; FTexture *WallSpriteTile; @@ -1293,575 +1280,6 @@ void R_AddSprites (sector_t *sec, int lightlevel, int fakeside) } } -// -// R_DrawPSprite -// -void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac) -{ - double tx; - int x1; - int x2; - double sx, sy; - spritedef_t* sprdef; - spriteframe_t* sprframe; - FTextureID picnum; - WORD flip; - FTexture* tex; - vissprite_t* vis; - bool noaccel; - double alpha = owner->Alpha; - static TArray avis; - - if (avis.Size() < vispspindex + 1) - avis.Reserve(avis.Size() - vispspindex + 1); - - // decide which patch to use - if ((unsigned)pspr->GetSprite() >= (unsigned)sprites.Size()) - { - DPrintf(DMSG_ERROR, "R_DrawPSprite: invalid sprite number %i\n", pspr->GetSprite()); - return; - } - sprdef = &sprites[pspr->GetSprite()]; - if (pspr->GetFrame() >= sprdef->numframes) - { - DPrintf(DMSG_ERROR, "R_DrawPSprite: invalid sprite frame %i : %i\n", pspr->GetSprite(), pspr->GetFrame()); - return; - } - sprframe = &SpriteFrames[sprdef->spriteframes + pspr->GetFrame()]; - - picnum = sprframe->Texture[0]; - flip = sprframe->Flip & 1; - tex = TexMan(picnum); - - if (tex->UseType == FTexture::TEX_Null || pspr->RenderStyle == STYLE_None) - return; - - if (pspr->firstTic) - { // Can't interpolate the first tic. - pspr->firstTic = false; - pspr->oldx = pspr->x; - pspr->oldy = pspr->y; - } - - sx = pspr->oldx + (pspr->x - pspr->oldx) * ticfrac; - sy = pspr->oldy + (pspr->y - pspr->oldy) * ticfrac + WEAPON_FUDGE_Y; - - if (pspr->Flags & PSPF_ADDBOB) - { - sx += bobx; - sy += boby; - } - - if (pspr->Flags & PSPF_ADDWEAPON && pspr->GetID() != PSP_WEAPON) - { - sx += wx; - sy += wy; - } - - // calculate edges of the shape - tx = sx - BASEXCENTER; - - tx -= tex->GetScaledLeftOffset(); - x1 = xs_RoundToInt(CenterX + tx * pspritexscale); - - // off the right side - if (x1 > viewwidth) - return; - - tx += tex->GetScaledWidth(); - x2 = xs_RoundToInt(CenterX + tx * pspritexscale); - - // off the left side - if (x2 <= 0) - return; - - // store information in a vissprite - vis = &avis[vispspindex]; - vis->renderflags = owner->renderflags; - vis->floorclip = 0; - - vis->texturemid = (BASEYCENTER - sy) * tex->Scale.Y + tex->TopOffset; - - if (camera->player && (RenderTarget != screen || - viewheight == RenderTarget->GetHeight() || - (RenderTarget->GetWidth() > (BASEXCENTER * 2) && !st_scale))) - { // Adjust PSprite for fullscreen views - AWeapon *weapon = dyn_cast(pspr->GetCaller()); - if (weapon != nullptr && weapon->YAdjust != 0) - { - if (RenderTarget != screen || viewheight == RenderTarget->GetHeight()) - { - vis->texturemid -= weapon->YAdjust; - } - else - { - vis->texturemid -= StatusBar->GetDisplacement() * weapon->YAdjust; - } - } - } - if (pspr->GetID() < PSP_TARGETCENTER) - { // Move the weapon down for 1280x1024. - vis->texturemid -= AspectPspriteOffset(WidescreenRatio); - } - vis->x1 = x1 < 0 ? 0 : x1; - vis->x2 = x2 >= viewwidth ? viewwidth : x2; - vis->xscale = FLOAT2FIXED(pspritexscale / tex->Scale.X); - vis->yscale = float(pspriteyscale / tex->Scale.Y); - vis->Translation = 0; // [RH] Use default colors - vis->pic = tex; - vis->Style.ColormapNum = 0; - - // If flip is used, provided that it's not already flipped (that would just invert itself) - // (It's an XOR...) - if (!(flip) != !(pspr->Flags & PSPF_FLIP)) - { - vis->xiscale = -FLOAT2FIXED(pspritexiscale * tex->Scale.X); - vis->startfrac = (tex->GetWidth() << FRACBITS) - 1; - } - else - { - vis->xiscale = FLOAT2FIXED(pspritexiscale * tex->Scale.X); - vis->startfrac = 0; - } - - if (vis->x1 > x1) - vis->startfrac += vis->xiscale*(vis->x1 - x1); - - noaccel = false; - FDynamicColormap *colormap_to_use = nullptr; - if (pspr->GetID() < PSP_TARGETCENTER) - { - // [MC] Set the render style - - if (pspr->Flags & PSPF_RENDERSTYLE) - { - const int rs = clamp(pspr->RenderStyle, 0, STYLE_Count); - - if (pspr->Flags & PSPF_FORCESTYLE) - { - vis->Style.RenderStyle = LegacyRenderStyles[rs]; - } - else if (owner->RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) - { - vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Fuzzy]; - } - else if (owner->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) - { - vis->Style.RenderStyle = LegacyRenderStyles[STYLE_OptFuzzy]; - vis->Style.RenderStyle.CheckFuzz(); - } - else if (owner->RenderStyle == LegacyRenderStyles[STYLE_Subtract]) - { - vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Subtract]; - } - else - { - vis->Style.RenderStyle = LegacyRenderStyles[rs]; - } - } - else - { - vis->Style.RenderStyle = owner->RenderStyle; - } - - // Set the alpha based on if using the overlay's own or not. Also adjust - // and override the alpha if not forced. - if (pspr->Flags & PSPF_ALPHA) - { - if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) - { - alpha = owner->Alpha; - } - else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) - { - FRenderStyle style = vis->Style.RenderStyle; - style.CheckFuzz(); - switch (style.BlendOp) - { - default: - alpha = pspr->alpha * owner->Alpha; - break; - case STYLEOP_Fuzz: - case STYLEOP_Sub: - alpha = owner->Alpha; - break; - } - - } - else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Subtract]) - { - alpha = owner->Alpha; - } - else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] || - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Translucent] || - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_TranslucentStencil] || - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_AddStencil] || - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_AddShaded]) - { - alpha = owner->Alpha * pspr->alpha; - } - else - { - alpha = owner->Alpha; - } - } - - // Should normal renderstyle come out on top at the end and we desire alpha, - // switch it to translucent. Normal never applies any sort of alpha. - if ((pspr->Flags & PSPF_ALPHA) && - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Normal] && - vis->Style.Alpha < 1.0) - { - vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Translucent]; - alpha = owner->Alpha * pspr->alpha; - } - - // ALWAYS take priority if asked for, except fuzz. Fuzz does absolutely nothing - // no matter what way it's changed. - if (pspr->Flags & PSPF_FORCEALPHA) - { - //Due to lack of != operators... - if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Fuzzy] || - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_SoulTrans] || - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Stencil]) - { } - else - { - alpha = pspr->alpha; - vis->Style.RenderStyle.Flags |= STYLEF_ForceAlpha; - } - } - vis->Style.Alpha = clamp(float(alpha), 0.f, 1.f); - - // Due to how some of the effects are handled, going to 0 or less causes some - // weirdness to display. There's no point rendering it anyway if it's 0. - if (vis->Style.Alpha <= 0.) - return; - - //----------------------------------------------------------------------------- - - // The software renderer cannot invert the source without inverting the overlay - // too. That means if the source is inverted, we need to do the reverse of what - // the invert overlay flag says to do. - INTBOOL invertcolormap = (vis->Style.RenderStyle.Flags & STYLEF_InvertOverlay); - - if (vis->Style.RenderStyle.Flags & STYLEF_InvertSource) - { - invertcolormap = !invertcolormap; - } - - FDynamicColormap *mybasecolormap = basecolormap; - - if (vis->Style.RenderStyle.Flags & STYLEF_FadeToBlack) - { - if (invertcolormap) - { // Fade to white - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); - invertcolormap = false; - } - else - { // Fade to black - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); - } - } - - if (realfixedcolormap != nullptr && (!r_swtruecolor || (r_shadercolormaps && screen->Accel2D))) - { // fixed color - vis->Style.BaseColormap = realfixedcolormap; - vis->Style.ColormapNum = 0; - } - else - { - if (invertcolormap) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); - } - if (fixedlightlev >= 0) - { - vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (!foggy && pspr->GetState()->GetFullbright()) - { // full bright - vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap - vis->Style.ColormapNum = 0; - } - else - { // local light - vis->Style.BaseColormap = mybasecolormap; - vis->Style.ColormapNum = GETPALOOKUP(0, spriteshade); - } - } - if (camera->Inventory != nullptr) - { - BYTE oldcolormapnum = vis->Style.ColormapNum; - FSWColormap *oldcolormap = vis->Style.BaseColormap; - camera->Inventory->AlterWeaponSprite (&vis->Style); - if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum) - { - // The colormap has changed. Is it one we can easily identify? - // If not, then don't bother trying to identify it for - // hardware accelerated drawing. - if (vis->Style.BaseColormap < &SpecialColormaps[0] || - vis->Style.BaseColormap > &SpecialColormaps.Last()) - { - noaccel = true; - } - // Has the basecolormap changed? If so, we can't hardware accelerate it, - // since we don't know what it is anymore. - else if (vis->Style.BaseColormap != mybasecolormap) - { - noaccel = true; - } - } - } - // If we're drawing with a special colormap, but shaders for them are disabled, do - // not accelerate. - if (!r_shadercolormaps && (vis->Style.BaseColormap >= &SpecialColormaps[0] && - vis->Style.BaseColormap <= &SpecialColormaps.Last())) - { - noaccel = true; - } - // If drawing with a BOOM colormap, disable acceleration. - if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps) - { - noaccel = true; - } - // If the main colormap has fixed lights, and this sprite is being drawn with that - // colormap, disable acceleration so that the lights can remain fixed. - if (!noaccel && realfixedcolormap == nullptr && - NormalLightHasFixedLights && mybasecolormap == &NormalLight && - vis->pic->UseBasePalette()) - { - noaccel = true; - } - // [SP] If emulating GZDoom fullbright, disable acceleration - if (r_fullbrightignoresectorcolor && fixedlightlev >= 0) - mybasecolormap = &FullNormalLight; - if (r_fullbrightignoresectorcolor && !foggy && pspr->GetState()->GetFullbright()) - mybasecolormap = &FullNormalLight; - colormap_to_use = mybasecolormap; - } - else - { - colormap_to_use = basecolormap; - - vis->Style.BaseColormap = basecolormap; - vis->Style.ColormapNum = 0; - } - - // Check for hardware-assisted 2D. If it's available, and this sprite is not - // fuzzy, don't draw it until after the switch to 2D mode. - if (!noaccel && RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) - { - FRenderStyle style = vis->Style.RenderStyle; - style.CheckFuzz(); - if (style.BlendOp != STYLEOP_Fuzz) - { - if (vispsprites.Size() < vispspindex + 1) - vispsprites.Reserve(vispsprites.Size() - vispspindex + 1); - - vispsprites[vispspindex].vis = vis; - vispsprites[vispspindex].basecolormap = colormap_to_use; - vispsprites[vispspindex].x1 = x1; - vispspindex++; - return; - } - } - - R_DrawVisSprite(vis); -} - -//========================================================================== -// -// R_DrawPlayerSprites -// -//========================================================================== - -void R_DrawPlayerSprites () -{ - int i; - int lightnum; - DPSprite* psp; - DPSprite* weapon; - sector_t* sec = NULL; - static sector_t tempsec; - int floorlight, ceilinglight; - F3DFloor *rover; - - if (!r_drawplayersprites || - !camera || - !camera->player || - (players[consoleplayer].cheats & CF_CHASECAM) || - (r_deathcamera && camera->health <= 0)) - return; - - if (fixedlightlev < 0 && viewsector->e && viewsector->e->XFloor.lightlist.Size()) - { - for (i = viewsector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) - { - if (ViewPos.Z <= viewsector->e->XFloor.lightlist[i].plane.Zat0()) - { - rover = viewsector->e->XFloor.lightlist[i].caster; - if (rover) - { - if (rover->flags & FF_DOUBLESHADOW && ViewPos.Z <= rover->bottom.plane->Zat0()) - break; - sec = rover->model; - if (rover->flags & FF_FADEWALLS) - basecolormap = sec->ColorMap; - else - basecolormap = viewsector->e->XFloor.lightlist[i].extra_colormap; - } - break; - } - } - if(!sec) - { - sec = viewsector; - basecolormap = sec->ColorMap; - } - floorlight = ceilinglight = sec->lightlevel; - } - else - { // This used to use camera->Sector but due to interpolation that can be incorrect - // when the interpolated viewpoint is in a different sector than the camera. - sec = R_FakeFlat (viewsector, &tempsec, &floorlight, - &ceilinglight, false); - - // [RH] set basecolormap - basecolormap = sec->ColorMap; - } - - // [RH] set foggy flag - foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); - r_actualextralight = foggy ? 0 : extralight << 4; - - // get light level - lightnum = ((floorlight + ceilinglight) >> 1) + r_actualextralight; - spriteshade = LIGHT2SHADE(lightnum) - 24*FRACUNIT; - - // clip to screen bounds - mfloorclip = screenheightarray; - mceilingclip = zeroarray; - - if (camera->player != NULL) - { - double centerhack = CenterY; - double wx, wy; - float bobx, boby; - - CenterY = viewheight / 2; - - P_BobWeapon (camera->player, &bobx, &boby, r_TicFracF); - - // Interpolate the main weapon layer once so as to be able to add it to other layers. - if ((weapon = camera->player->FindPSprite(PSP_WEAPON)) != nullptr) - { - if (weapon->firstTic) - { - wx = weapon->x; - wy = weapon->y; - } - else - { - wx = weapon->oldx + (weapon->x - weapon->oldx) * r_TicFracF; - wy = weapon->oldy + (weapon->y - weapon->oldy) * r_TicFracF; - } - } - else - { - wx = 0; - wy = 0; - } - - // add all active psprites - psp = camera->player->psprites; - while (psp) - { - // [RH] Don't draw the targeter's crosshair if the player already has a crosshair set. - // It's possible this psprite's caller is now null but the layer itself hasn't been destroyed - // because it didn't tick yet (if we typed 'take all' while in the console for example). - // In this case let's simply not draw it to avoid crashing. - - if ((psp->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && psp->GetCaller() != nullptr) - { - R_DrawPSprite(psp, camera, bobx, boby, wx, wy, r_TicFracF); - } - - psp = psp->GetNext(); - } - - CenterY = centerhack; - } -} - -//========================================================================== -// -// R_DrawRemainingPlayerSprites -// -// Called from D_Display to draw sprites that were not drawn by -// R_DrawPlayerSprites(). -// -//========================================================================== - -void R_DrawRemainingPlayerSprites() -{ - for (unsigned int i = 0; i < vispspindex; i++) - { - vissprite_t *vis; - - vis = vispsprites[i].vis; - FDynamicColormap *colormap = vispsprites[i].basecolormap; - bool flip = vis->xiscale < 0; - FSpecialColormap *special = NULL; - PalEntry overlay = 0; - FColormapStyle colormapstyle; - bool usecolormapstyle = false; - - if (vis->Style.BaseColormap >= &SpecialColormaps[0] && - vis->Style.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) - { - special = static_cast(vis->Style.BaseColormap); - } - else if (colormap->Color == PalEntry(255,255,255) && - colormap->Desaturate == 0) - { - overlay = colormap->Fade; - overlay.a = BYTE(vis->Style.ColormapNum * 255 / NUMCOLORMAPS); - } - else - { - usecolormapstyle = true; - colormapstyle.Color = colormap->Color; - colormapstyle.Fade = colormap->Fade; - colormapstyle.Desaturate = colormap->Desaturate; - colormapstyle.FadeLevel = vis->Style.ColormapNum / float(NUMCOLORMAPS); - } - screen->DrawTexture(vis->pic, - viewwindowx + vispsprites[i].x1, - viewwindowy + viewheight/2 - vis->texturemid * vis->yscale - 0.5, - DTA_DestWidthF, FIXED2DBL(vis->pic->GetWidth() * vis->xscale), - DTA_DestHeightF, vis->pic->GetHeight() * vis->yscale, - DTA_Translation, TranslationToTable(vis->Translation), - DTA_FlipX, flip, - DTA_TopOffset, 0, - DTA_LeftOffset, 0, - DTA_ClipLeft, viewwindowx, - DTA_ClipTop, viewwindowy, - DTA_ClipRight, viewwindowx + viewwidth, - DTA_ClipBottom, viewwindowy + viewheight, - DTA_AlphaF, vis->Style.Alpha, - DTA_RenderStyle, vis->Style.RenderStyle, - DTA_FillColor, vis->FillColor, - DTA_SpecialColormap, special, - DTA_ColorOverlay, overlay.d, - DTA_ColormapStyle, usecolormapstyle ? &colormapstyle : NULL, - TAG_DONE); - } - - vispspindex = 0; -} // // R_SortVisSprites @@ -2592,7 +2010,7 @@ void R_DrawMasked (void) R_3D_DeleteHeights(); fake3D = 0; } - R_DrawPlayerSprites (); + R_DrawPlayerSprites(); } extern double BaseYaspectMul;; diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index 0e562a204e..a110b355c7 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -55,6 +55,8 @@ extern double pspriteyscale; extern FTexture *WallSpriteTile; +extern int spriteshade; + bool R_ClipSpriteColumnWithPortals(vissprite_t* spr); void R_DrawMaskedColumn (FTexture *texture, fixed_t column, bool unmasked = false); @@ -66,13 +68,13 @@ void R_AddSprites (sector_t *sec, int lightlevel, int fakeside); void R_DrawSprites (); void R_ClearSprites (); void R_DrawMasked (); -void R_DrawRemainingPlayerSprites (); void R_CheckOffscreenBuffer(int width, int height, bool spansonly); enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); +void R_DrawVisSprite(vissprite_t *vis); } From 50c525161b7ee9141ab30495f22499f32e34b2f6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Dec 2016 08:11:06 +0100 Subject: [PATCH 602/912] Move wallsprite handling to r_wallsprite --- src/CMakeLists.txt | 1 + src/swrenderer/scene/r_segs.cpp | 12 +- src/swrenderer/scene/r_things.cpp | 185 +------------------- src/swrenderer/scene/r_things.h | 1 - src/swrenderer/scene/r_wallsprite.cpp | 234 ++++++++++++++++++++++++++ src/swrenderer/scene/r_wallsprite.h | 11 ++ 6 files changed, 253 insertions(+), 191 deletions(-) create mode 100644 src/swrenderer/scene/r_wallsprite.cpp create mode 100644 src/swrenderer/scene/r_wallsprite.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1fa9367d91..e59a0fce4e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -829,6 +829,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_visible_sprite.cpp swrenderer/scene/r_particle.cpp swrenderer/scene/r_playersprite.cpp + swrenderer/scene/r_wallsprite.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 40ad2bc52c..0d6a9276cb 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -51,6 +51,7 @@ #include "r_walldraw.h" #include "r_draw_segment.h" #include "r_portal.h" +#include "r_wallsprite.h" #include "swrenderer/r_memory.h" #define WALLYREPEAT 8 @@ -138,7 +139,6 @@ FTexture *rw_pic; static fixed_t *maskedtexturecol; static void R_RenderDecal (side_t *wall, DBaseDecal *first, drawseg_t *clipper, int pass); -static void WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)); inline bool IsFogBoundary (sector_t *front, sector_t *back) { @@ -2246,10 +2246,10 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, sprflipvert = false; } - MaskedScaleY = float(1 / yscale); + float maskedScaleY = float(1 / yscale); do { - dc_x = x1; + int x = x1; bool visible = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); @@ -2261,14 +2261,14 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, if (visible) { - while (dc_x < x2) + while (x < x2) { if (calclighting) { // calculate lighting R_SetColorMapLight(usecolormap, rw_light, wallshade); } - R_WallSpriteColumn (); - dc_x++; + R_WallSpriteColumn(x, maskedScaleY); + x++; } } diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index bfc550ed80..b974722061 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -72,6 +72,7 @@ #include "r_portal.h" #include "r_particle.h" #include "r_playersprite.h" +#include "r_wallsprite.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Int, r_drawfuzz) @@ -109,7 +110,6 @@ struct FCoverageBuffer }; extern double globaluclip, globaldclip; -extern float MaskedScaleY; // // Sprite rotation 0 is facing the viewer, @@ -151,7 +151,6 @@ static vissprite_t **spritesorter; static int spritesortersize = 0; static int vsprcount; -static void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, INTBOOL flip); @@ -530,114 +529,6 @@ void R_DrawVisSprite (vissprite_t *vis) NetUpdate (); } -void R_DrawWallSprite(vissprite_t *spr) -{ - int x1, x2; - double iyscale; - - x1 = MAX(spr->x1, spr->wallc.sx1); - x2 = MIN(spr->x2, spr->wallc.sx2); - if (x1 >= x2) - return; - WallT.InitFromWallCoords(&spr->wallc); - PrepWall(swall, lwall, spr->pic->GetWidth() << FRACBITS, x1, x2); - iyscale = 1 / spr->yscale; - dc_texturemid = (spr->gzt - ViewPos.Z) * iyscale; - if (spr->renderflags & RF_XFLIP) - { - int right = (spr->pic->GetWidth() << FRACBITS) - 1; - - for (int i = x1; i < x2; i++) - { - lwall[i] = right - lwall[i]; - } - } - // Prepare lighting - bool calclighting = false; - FDynamicColormap *usecolormap = basecolormap; - bool rereadcolormap = true; - - // Decals that are added to the scene must fade to black. - if (spr->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && usecolormap->Fade != 0) - { - usecolormap = GetSpecialLights(usecolormap->Color, 0, usecolormap->Desaturate); - rereadcolormap = false; - } - - int shade = LIGHT2SHADE(spr->sector->lightlevel + r_actualextralight); - GlobVis = r_WallVisibility; - rw_lightleft = float (GlobVis / spr->wallc.sz1); - rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); - rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; - if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); - else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); - else - calclighting = true; - - // Draw it - WallSpriteTile = spr->pic; - if (spr->renderflags & RF_YFLIP) - { - sprflipvert = true; - iyscale = -iyscale; - dc_texturemid -= spr->pic->GetHeight(); - } - else - { - sprflipvert = false; - } - - MaskedScaleY = (float)iyscale; - - dc_x = x1; - - bool visible = R_SetPatchStyle (spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); - - // R_SetPatchStyle can modify basecolormap. - if (rereadcolormap) - { - usecolormap = basecolormap; - } - - if (!visible) - { - return; - } - else - { - while (dc_x < x2) - { - if (calclighting) - { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); - } - if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(); - dc_x++; - } - } - R_FinishSetPatchStyle(); -} - -void R_WallSpriteColumn () -{ - float iscale = swall[dc_x] * MaskedScaleY; - dc_iscale = FLOAT2FIXED(iscale); - spryscale = 1 / iscale; - if (sprflipvert) - sprtopscreen = CenterY + dc_texturemid * spryscale; - else - sprtopscreen = CenterY - dc_texturemid * spryscale; - - dc_texturefrac = 0; - R_DrawMaskedColumn(WallSpriteTile, lwall[dc_x]); - rw_light += rw_lightstep; -} - #if 0 void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop, short *clipbot) { @@ -1142,80 +1033,6 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor } } -static void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags) -{ - FWallCoords wallc; - double x1, x2; - DVector2 left, right; - double gzb, gzt, tz; - FTexture *pic = TexMan(picnum, true); - DAngle ang = thing->Angles.Yaw + 90; - double angcos = ang.Cos(); - double angsin = ang.Sin(); - vissprite_t *vis; - - // Determine left and right edges of sprite. The sprite's angle is its normal, - // so the edges are 90 degrees each side of it. - x2 = pic->GetScaledWidth(); - x1 = pic->GetScaledLeftOffset(); - - x1 *= scale.X; - x2 *= scale.X; - - left.X = pos.X - x1 * angcos - ViewPos.X; - left.Y = pos.Y - x1 * angsin - ViewPos.Y; - right.X = left.X + x2 * angcos; - right.Y = right.Y + x2 * angsin; - - // Is it off-screen? - if (wallc.Init(left, right, TOO_CLOSE_Z)) - return; - - if (wallc.sx1 >= WindowRight || wallc.sx2 <= WindowLeft) - return; - - // Sprite sorting should probably treat these as walls, not sprites, - // but right now, I just want to get them drawing. - tz = (pos.X - ViewPos.X) * ViewTanCos + (pos.Y - ViewPos.Y) * ViewTanSin; - - int scaled_to = pic->GetScaledTopOffset(); - int scaled_bo = scaled_to - pic->GetScaledHeight(); - gzt = pos.Z + scale.Y * scaled_to; - gzb = pos.Z + scale.Y * scaled_bo; - - vis = R_NewVisSprite(); - vis->CurrentPortalUniq = CurrentPortalUniq; - vis->x1 = wallc.sx1 < WindowLeft ? WindowLeft : wallc.sx1; - vis->x2 = wallc.sx2 >= WindowRight ? WindowRight : wallc.sx2; - vis->yscale = (float)scale.Y; - vis->idepth = float(1 / tz); - vis->depth = (float)tz; - vis->sector = thing->Sector; - vis->heightsec = NULL; - vis->gpos = { (float)pos.X, (float)pos.Y, (float)pos.Z }; - vis->gzb = (float)gzb; - vis->gzt = (float)gzt; - vis->deltax = float(pos.X - ViewPos.X); - vis->deltay = float(pos.Y - ViewPos.Y); - vis->renderflags = renderflags; - if(thing->flags5 & MF5_BRIGHT) vis->renderflags |= RF_FULLBRIGHT; // kg3D - vis->Style.RenderStyle = thing->RenderStyle; - vis->FillColor = thing->fillcolor; - vis->Translation = thing->Translation; - vis->FakeFlatStat = 0; - vis->Style.Alpha = float(thing->Alpha); - vis->fakefloor = NULL; - vis->fakeceiling = NULL; - vis->bInMirror = MirrorFlags & RF_XFLIP; - vis->pic = pic; - vis->bIsVoxel = false; - vis->bWallSprite = true; - vis->Style.ColormapNum = GETPALOOKUP( - r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = basecolormap; - vis->wallc = wallc; -} - // // R_AddSprites // During BSP traversal, this adds sprites by sector. diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index a110b355c7..d3b3d5ebde 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -60,7 +60,6 @@ extern int spriteshade; bool R_ClipSpriteColumnWithPortals(vissprite_t* spr); void R_DrawMaskedColumn (FTexture *texture, fixed_t column, bool unmasked = false); -void R_WallSpriteColumn (); void R_CacheSprite (spritedef_t *sprite); void R_SortVisSprites (int (*compare)(const void *, const void *), size_t first); diff --git a/src/swrenderer/scene/r_wallsprite.cpp b/src/swrenderer/scene/r_wallsprite.cpp new file mode 100644 index 0000000000..e4718bdbe6 --- /dev/null +++ b/src/swrenderer/scene/r_wallsprite.cpp @@ -0,0 +1,234 @@ + +#include +#include +#include +#include "p_lnspec.h" +#include "templates.h" +#include "doomdef.h" +#include "m_swap.h" +#include "i_system.h" +#include "w_wad.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" +#include "swrenderer/scene/r_wallsprite.h" +#include "c_console.h" +#include "c_cvars.h" +#include "c_dispatch.h" +#include "doomstat.h" +#include "v_video.h" +#include "sc_man.h" +#include "s_sound.h" +#include "sbar.h" +#include "gi.h" +#include "r_sky.h" +#include "cmdlib.h" +#include "g_level.h" +#include "d_net.h" +#include "colormatcher.h" +#include "d_netinf.h" +#include "p_effect.h" +#include "r_bsp.h" +#include "r_plane.h" +#include "r_segs.h" +#include "r_3dfloors.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/drawers/r_draw_pal.h" +#include "v_palette.h" +#include "r_data/r_translate.h" +#include "r_data/colormaps.h" +#include "r_data/voxels.h" +#include "p_local.h" +#include "p_maputl.h" +#include "r_voxel.h" +#include "r_draw_segment.h" +#include "r_portal.h" +#include "swrenderer/r_memory.h" + +namespace swrenderer +{ + void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags) + { + FWallCoords wallc; + double x1, x2; + DVector2 left, right; + double gzb, gzt, tz; + FTexture *pic = TexMan(picnum, true); + DAngle ang = thing->Angles.Yaw + 90; + double angcos = ang.Cos(); + double angsin = ang.Sin(); + vissprite_t *vis; + + // Determine left and right edges of sprite. The sprite's angle is its normal, + // so the edges are 90 degrees each side of it. + x2 = pic->GetScaledWidth(); + x1 = pic->GetScaledLeftOffset(); + + x1 *= scale.X; + x2 *= scale.X; + + left.X = pos.X - x1 * angcos - ViewPos.X; + left.Y = pos.Y - x1 * angsin - ViewPos.Y; + right.X = left.X + x2 * angcos; + right.Y = right.Y + x2 * angsin; + + // Is it off-screen? + if (wallc.Init(left, right, TOO_CLOSE_Z)) + return; + + if (wallc.sx1 >= WindowRight || wallc.sx2 <= WindowLeft) + return; + + // Sprite sorting should probably treat these as walls, not sprites, + // but right now, I just want to get them drawing. + tz = (pos.X - ViewPos.X) * ViewTanCos + (pos.Y - ViewPos.Y) * ViewTanSin; + + int scaled_to = pic->GetScaledTopOffset(); + int scaled_bo = scaled_to - pic->GetScaledHeight(); + gzt = pos.Z + scale.Y * scaled_to; + gzb = pos.Z + scale.Y * scaled_bo; + + vis = R_NewVisSprite(); + vis->CurrentPortalUniq = CurrentPortalUniq; + vis->x1 = wallc.sx1 < WindowLeft ? WindowLeft : wallc.sx1; + vis->x2 = wallc.sx2 >= WindowRight ? WindowRight : wallc.sx2; + vis->yscale = (float)scale.Y; + vis->idepth = float(1 / tz); + vis->depth = (float)tz; + vis->sector = thing->Sector; + vis->heightsec = NULL; + vis->gpos = { (float)pos.X, (float)pos.Y, (float)pos.Z }; + vis->gzb = (float)gzb; + vis->gzt = (float)gzt; + vis->deltax = float(pos.X - ViewPos.X); + vis->deltay = float(pos.Y - ViewPos.Y); + vis->renderflags = renderflags; + if (thing->flags5 & MF5_BRIGHT) vis->renderflags |= RF_FULLBRIGHT; // kg3D + vis->Style.RenderStyle = thing->RenderStyle; + vis->FillColor = thing->fillcolor; + vis->Translation = thing->Translation; + vis->FakeFlatStat = 0; + vis->Style.Alpha = float(thing->Alpha); + vis->fakefloor = NULL; + vis->fakeceiling = NULL; + vis->bInMirror = MirrorFlags & RF_XFLIP; + vis->pic = pic; + vis->bIsVoxel = false; + vis->bWallSprite = true; + vis->Style.ColormapNum = GETPALOOKUP( + r_SpriteVisibility / MAX(tz, MINZ), spriteshade); + vis->Style.BaseColormap = basecolormap; + vis->wallc = wallc; + } + + void R_DrawWallSprite(vissprite_t *spr) + { + int x1, x2; + double iyscale; + + x1 = MAX(spr->x1, spr->wallc.sx1); + x2 = MIN(spr->x2, spr->wallc.sx2); + if (x1 >= x2) + return; + WallT.InitFromWallCoords(&spr->wallc); + PrepWall(swall, lwall, spr->pic->GetWidth() << FRACBITS, x1, x2); + iyscale = 1 / spr->yscale; + dc_texturemid = (spr->gzt - ViewPos.Z) * iyscale; + if (spr->renderflags & RF_XFLIP) + { + int right = (spr->pic->GetWidth() << FRACBITS) - 1; + + for (int i = x1; i < x2; i++) + { + lwall[i] = right - lwall[i]; + } + } + // Prepare lighting + bool calclighting = false; + FDynamicColormap *usecolormap = basecolormap; + bool rereadcolormap = true; + + // Decals that are added to the scene must fade to black. + if (spr->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && usecolormap->Fade != 0) + { + usecolormap = GetSpecialLights(usecolormap->Color, 0, usecolormap->Desaturate); + rereadcolormap = false; + } + + int shade = LIGHT2SHADE(spr->sector->lightlevel + r_actualextralight); + GlobVis = r_WallVisibility; + rw_lightleft = float(GlobVis / spr->wallc.sz1); + rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); + rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; + if (fixedlightlev >= 0) + R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + else if (fixedcolormap != NULL) + R_SetColorMapLight(fixedcolormap, 0, 0); + else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); + else + calclighting = true; + + // Draw it + WallSpriteTile = spr->pic; + if (spr->renderflags & RF_YFLIP) + { + sprflipvert = true; + iyscale = -iyscale; + dc_texturemid -= spr->pic->GetHeight(); + } + else + { + sprflipvert = false; + } + + float maskedScaleY = (float)iyscale; + + int x = x1; + + bool visible = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); + + // R_SetPatchStyle can modify basecolormap. + if (rereadcolormap) + { + usecolormap = basecolormap; + } + + if (!visible) + { + return; + } + else + { + while (x < x2) + { + if (calclighting) + { // calculate lighting + R_SetColorMapLight(usecolormap, rw_light, shade); + } + if (!R_ClipSpriteColumnWithPortals(spr)) + R_WallSpriteColumn(x, maskedScaleY); + x++; + } + } + R_FinishSetPatchStyle(); + } + + void R_WallSpriteColumn(int x, float maskedScaleY) + { + using namespace drawerargs; + + dc_x = x; + + float iscale = swall[dc_x] * maskedScaleY; + dc_iscale = FLOAT2FIXED(iscale); + spryscale = 1 / iscale; + if (sprflipvert) + sprtopscreen = CenterY + dc_texturemid * spryscale; + else + sprtopscreen = CenterY - dc_texturemid * spryscale; + + dc_texturefrac = 0; + R_DrawMaskedColumn(WallSpriteTile, lwall[dc_x]); + rw_light += rw_lightstep; + } +} diff --git a/src/swrenderer/scene/r_wallsprite.h b/src/swrenderer/scene/r_wallsprite.h new file mode 100644 index 0000000000..934c7d8eb9 --- /dev/null +++ b/src/swrenderer/scene/r_wallsprite.h @@ -0,0 +1,11 @@ + +#pragma once + +#include "r_visible_sprite.h" + +namespace swrenderer +{ + void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags); + void R_DrawWallSprite(vissprite_t *spr); + void R_WallSpriteColumn(int x, float maskedScaleY); +} From 99e263e1b35cadcff54e35c4e268964e70f01752 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Dec 2016 08:31:02 +0100 Subject: [PATCH 603/912] Decal rendering to r_decal --- src/CMakeLists.txt | 1 + src/swrenderer/drawers/r_draw_rgba.cpp | 7 - src/swrenderer/scene/r_decal.cpp | 296 +++++++++++++++++++++++++ src/swrenderer/scene/r_decal.h | 13 ++ src/swrenderer/scene/r_segs.cpp | 259 +--------------------- src/swrenderer/scene/r_segs.h | 6 + 6 files changed, 318 insertions(+), 264 deletions(-) create mode 100644 src/swrenderer/scene/r_decal.cpp create mode 100644 src/swrenderer/scene/r_decal.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e59a0fce4e..0b5b9ff777 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -830,6 +830,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_particle.cpp swrenderer/scene/r_playersprite.cpp swrenderer/scene/r_wallsprite.cpp + swrenderer/scene/r_decal.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 90c1ec21af..21f7da8286 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -60,13 +60,6 @@ CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG namespace swrenderer { - extern "C" short spanend[MAXHEIGHT]; - extern float rw_light; - extern float rw_lightstep; - extern int wallshade; - - ///////////////////////////////////////////////////////////////////////////// - DrawSpanLLVMCommand::DrawSpanLLVMCommand() { using namespace drawerargs; diff --git a/src/swrenderer/scene/r_decal.cpp b/src/swrenderer/scene/r_decal.cpp new file mode 100644 index 0000000000..374552621e --- /dev/null +++ b/src/swrenderer/scene/r_decal.cpp @@ -0,0 +1,296 @@ + +#include +#include +#include "templates.h" +#include "i_system.h" +#include "doomdef.h" +#include "doomstat.h" +#include "doomdata.h" +#include "p_lnspec.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" +#include "r_sky.h" +#include "v_video.h" +#include "m_swap.h" +#include "w_wad.h" +#include "stats.h" +#include "a_sharedglobal.h" +#include "d_net.h" +#include "g_level.h" +#include "r_bsp.h" +#include "r_plane.h" +#include "r_decal.h" +#include "r_3dfloors.h" +#include "swrenderer/drawers/r_draw.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_walldraw.h" +#include "r_draw_segment.h" +#include "r_portal.h" +#include "r_wallsprite.h" +#include "r_draw_segment.h" +#include "r_segs.h" +#include "swrenderer/r_memory.h" + +namespace swrenderer +{ + void R_RenderDecals(side_t *sidedef, drawseg_t *draw_segment) + { + for (DBaseDecal *decal = sidedef->AttachedDecals; decal != NULL; decal = decal->WallNext) + { + R_RenderDecal(sidedef, decal, draw_segment, 0); + } + } + + // pass = 0: when seg is first drawn + // = 1: drawing masked textures (including sprites) + // Currently, only pass = 0 is done or used + + void R_RenderDecal(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int pass) + { + DVector2 decal_left, decal_right, decal_pos; + int x1, x2; + double yscale; + BYTE flipx; + double zpos; + int needrepeat = 0; + sector_t *front, *back; + bool calclighting; + bool rereadcolormap; + FDynamicColormap *usecolormap; + + if (decal->RenderFlags & RF_INVISIBLE || !viewactive || !decal->PicNum.isValid()) + return; + + // Determine actor z + zpos = decal->Z; + front = curline->frontsector; + back = (curline->backsector != NULL) ? curline->backsector : curline->frontsector; + switch (decal->RenderFlags & RF_RELMASK) + { + default: + zpos = decal->Z; + break; + case RF_RELUPPER: + if (curline->linedef->flags & ML_DONTPEGTOP) + { + zpos = decal->Z + front->GetPlaneTexZ(sector_t::ceiling); + } + else + { + zpos = decal->Z + back->GetPlaneTexZ(sector_t::ceiling); + } + break; + case RF_RELLOWER: + if (curline->linedef->flags & ML_DONTPEGBOTTOM) + { + zpos = decal->Z + front->GetPlaneTexZ(sector_t::ceiling); + } + else + { + zpos = decal->Z + back->GetPlaneTexZ(sector_t::floor); + } + break; + case RF_RELMID: + if (curline->linedef->flags & ML_DONTPEGBOTTOM) + { + zpos = decal->Z + front->GetPlaneTexZ(sector_t::floor); + } + else + { + zpos = decal->Z + front->GetPlaneTexZ(sector_t::ceiling); + } + } + + WallSpriteTile = TexMan(decal->PicNum, true); + flipx = (BYTE)(decal->RenderFlags & RF_XFLIP); + + if (WallSpriteTile == NULL || WallSpriteTile->UseType == FTexture::TEX_Null) + { + return; + } + + // Determine left and right edges of sprite. Since this sprite is bound + // to a wall, we use the wall's angle instead of the decal's. This is + // pretty much the same as what R_AddLine() does. + + FWallCoords savecoord = WallC; + + double edge_right = WallSpriteTile->GetWidth(); + double edge_left = WallSpriteTile->LeftOffset; + edge_right = (edge_right - edge_left) * decal->ScaleX; + edge_left *= decal->ScaleX; + + double dcx, dcy; + decal->GetXY(wall, dcx, dcy); + decal_pos = { dcx, dcy }; + + DVector2 angvec = (curline->v2->fPos() - curline->v1->fPos()).Unit(); + + decal_left = decal_pos - edge_left * angvec - ViewPos; + decal_right = decal_pos + edge_right * angvec - ViewPos; + + if (WallC.Init(decal_left, decal_right, TOO_CLOSE_Z)) + goto done; + + x1 = WallC.sx1; + x2 = WallC.sx2; + + if (x1 >= clipper->x2 || x2 <= clipper->x1) + goto done; + + WallT.InitFromWallCoords(&WallC); + + // Get the top and bottom clipping arrays + switch (decal->RenderFlags & RF_CLIPMASK) + { + case RF_CLIPFULL: + if (curline->backsector == NULL) + { + if (pass != 0) + { + goto done; + } + mceilingclip = walltop; + mfloorclip = wallbottom; + } + else if (pass == 0) + { + mceilingclip = walltop; + mfloorclip = ceilingclip; + needrepeat = 1; + } + else + { + mceilingclip = openings + clipper->sprtopclip - clipper->x1; + mfloorclip = openings + clipper->sprbottomclip - clipper->x1; + } + break; + + case RF_CLIPUPPER: + if (pass != 0) + { + goto done; + } + mceilingclip = walltop; + mfloorclip = ceilingclip; + break; + + case RF_CLIPMID: + if (curline->backsector != NULL && pass != 2) + { + goto done; + } + mceilingclip = openings + clipper->sprtopclip - clipper->x1; + mfloorclip = openings + clipper->sprbottomclip - clipper->x1; + break; + + case RF_CLIPLOWER: + if (pass != 0) + { + goto done; + } + mceilingclip = floorclip; + mfloorclip = wallbottom; + break; + } + + yscale = decal->ScaleY; + dc_texturemid = WallSpriteTile->TopOffset + (zpos - ViewPos.Z) / yscale; + + // Clip sprite to drawseg + x1 = MAX(clipper->x1, x1); + x2 = MIN(clipper->x2, x2); + if (x1 >= x2) + { + goto done; + } + + PrepWall(swall, lwall, WallSpriteTile->GetWidth(), x1, x2); + + if (flipx) + { + int i; + int right = (WallSpriteTile->GetWidth() << FRACBITS) - 1; + + for (i = x1; i < x2; i++) + { + lwall[i] = right - lwall[i]; + } + } + + // Prepare lighting + calclighting = false; + usecolormap = basecolormap; + rereadcolormap = true; + + // Decals that are added to the scene must fade to black. + if (decal->RenderStyle == LegacyRenderStyles[STYLE_Add] && usecolormap->Fade != 0) + { + usecolormap = GetSpecialLights(usecolormap->Color, 0, usecolormap->Desaturate); + rereadcolormap = false; + } + + rw_light = rw_lightleft + (x1 - savecoord.sx1) * rw_lightstep; + if (fixedlightlev >= 0) + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + else if (fixedcolormap != NULL) + R_SetColorMapLight(fixedcolormap, 0, 0); + else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); + else + calclighting = true; + + // Draw it + if (decal->RenderFlags & RF_YFLIP) + { + sprflipvert = true; + yscale = -yscale; + dc_texturemid -= WallSpriteTile->GetHeight(); + } + else + { + sprflipvert = false; + } + + float maskedScaleY = float(1 / yscale); + do + { + int x = x1; + + bool visible = R_SetPatchStyle(decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); + + // R_SetPatchStyle can modify basecolormap. + if (rereadcolormap) + { + usecolormap = basecolormap; + } + + if (visible) + { + while (x < x2) + { + if (calclighting) + { // calculate lighting + R_SetColorMapLight(usecolormap, rw_light, wallshade); + } + R_WallSpriteColumn(x, maskedScaleY); + x++; + } + } + + // If this sprite is RF_CLIPFULL on a two-sided line, needrepeat will + // be set 1 if we need to draw on the lower wall. In all other cases, + // needrepeat will be 0, and the while will fail. + mceilingclip = floorclip; + mfloorclip = wallbottom; + R_FinishSetPatchStyle(); + } while (needrepeat--); + + colfunc = basecolfunc; + + R_FinishSetPatchStyle(); + done: + WallC = savecoord; + } +} diff --git a/src/swrenderer/scene/r_decal.h b/src/swrenderer/scene/r_decal.h new file mode 100644 index 0000000000..8eeadb77ea --- /dev/null +++ b/src/swrenderer/scene/r_decal.h @@ -0,0 +1,13 @@ + +#pragma once + +struct side_t; +class DBaseDecal; + +namespace swrenderer +{ + struct drawseg_t; + + void R_RenderDecals(side_t *wall, drawseg_t *draw_segment); + void R_RenderDecal(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int pass); +} diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 0d6a9276cb..315411b936 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -52,6 +52,7 @@ #include "r_draw_segment.h" #include "r_portal.h" #include "r_wallsprite.h" +#include "r_decal.h" #include "swrenderer/r_memory.h" #define WALLYREPEAT 8 @@ -138,7 +139,6 @@ FTexture *rw_pic; static fixed_t *maskedtexturecol; -static void R_RenderDecal (side_t *wall, DBaseDecal *first, drawseg_t *clipper, int pass); inline bool IsFogBoundary (sector_t *front, sector_t *back) { @@ -1810,10 +1810,7 @@ bool R_StoreWallRange (int start, int stop) // [ZZ] Only if not an active mirror if (!rw_markportal) { - for (DBaseDecal *decal = curline->sidedef->AttachedDecals; decal != NULL; decal = decal->WallNext) - { - R_RenderDecal (curline->sidedef, decal, draw_segment, 0); - } + R_RenderDecals(curline->sidedef, draw_segment); } if (rw_markportal) @@ -2035,256 +2032,4 @@ void PrepLWall(fixed_t *upos, double walxrepeat, int x1, int x2) } } -// pass = 0: when seg is first drawn -// = 1: drawing masked textures (including sprites) -// Currently, only pass = 0 is done or used - -static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int pass) -{ - DVector2 decal_left, decal_right, decal_pos; - int x1, x2; - double yscale; - BYTE flipx; - double zpos; - int needrepeat = 0; - sector_t *front, *back; - bool calclighting; - bool rereadcolormap; - FDynamicColormap *usecolormap; - - if (decal->RenderFlags & RF_INVISIBLE || !viewactive || !decal->PicNum.isValid()) - return; - - // Determine actor z - zpos = decal->Z; - front = curline->frontsector; - back = (curline->backsector != NULL) ? curline->backsector : curline->frontsector; - switch (decal->RenderFlags & RF_RELMASK) - { - default: - zpos = decal->Z; - break; - case RF_RELUPPER: - if (curline->linedef->flags & ML_DONTPEGTOP) - { - zpos = decal->Z + front->GetPlaneTexZ(sector_t::ceiling); - } - else - { - zpos = decal->Z + back->GetPlaneTexZ(sector_t::ceiling); - } - break; - case RF_RELLOWER: - if (curline->linedef->flags & ML_DONTPEGBOTTOM) - { - zpos = decal->Z + front->GetPlaneTexZ(sector_t::ceiling); - } - else - { - zpos = decal->Z + back->GetPlaneTexZ(sector_t::floor); - } - break; - case RF_RELMID: - if (curline->linedef->flags & ML_DONTPEGBOTTOM) - { - zpos = decal->Z + front->GetPlaneTexZ(sector_t::floor); - } - else - { - zpos = decal->Z + front->GetPlaneTexZ(sector_t::ceiling); - } - } - - WallSpriteTile = TexMan(decal->PicNum, true); - flipx = (BYTE)(decal->RenderFlags & RF_XFLIP); - - if (WallSpriteTile == NULL || WallSpriteTile->UseType == FTexture::TEX_Null) - { - return; - } - - // Determine left and right edges of sprite. Since this sprite is bound - // to a wall, we use the wall's angle instead of the decal's. This is - // pretty much the same as what R_AddLine() does. - - FWallCoords savecoord = WallC; - - double edge_right = WallSpriteTile->GetWidth(); - double edge_left = WallSpriteTile->LeftOffset; - edge_right = (edge_right - edge_left) * decal->ScaleX; - edge_left *= decal->ScaleX; - - double dcx, dcy; - decal->GetXY(wall, dcx, dcy); - decal_pos = { dcx, dcy }; - - DVector2 angvec = (curline->v2->fPos() - curline->v1->fPos()).Unit(); - - decal_left = decal_pos - edge_left * angvec - ViewPos; - decal_right = decal_pos + edge_right * angvec - ViewPos; - - if (WallC.Init(decal_left, decal_right, TOO_CLOSE_Z)) - goto done; - - x1 = WallC.sx1; - x2 = WallC.sx2; - - if (x1 >= clipper->x2 || x2 <= clipper->x1) - goto done; - - WallT.InitFromWallCoords(&WallC); - - // Get the top and bottom clipping arrays - switch (decal->RenderFlags & RF_CLIPMASK) - { - case RF_CLIPFULL: - if (curline->backsector == NULL) - { - if (pass != 0) - { - goto done; - } - mceilingclip = walltop; - mfloorclip = wallbottom; - } - else if (pass == 0) - { - mceilingclip = walltop; - mfloorclip = ceilingclip; - needrepeat = 1; - } - else - { - mceilingclip = openings + clipper->sprtopclip - clipper->x1; - mfloorclip = openings + clipper->sprbottomclip - clipper->x1; - } - break; - - case RF_CLIPUPPER: - if (pass != 0) - { - goto done; - } - mceilingclip = walltop; - mfloorclip = ceilingclip; - break; - - case RF_CLIPMID: - if (curline->backsector != NULL && pass != 2) - { - goto done; - } - mceilingclip = openings + clipper->sprtopclip - clipper->x1; - mfloorclip = openings + clipper->sprbottomclip - clipper->x1; - break; - - case RF_CLIPLOWER: - if (pass != 0) - { - goto done; - } - mceilingclip = floorclip; - mfloorclip = wallbottom; - break; - } - - yscale = decal->ScaleY; - dc_texturemid = WallSpriteTile->TopOffset + (zpos - ViewPos.Z) / yscale; - - // Clip sprite to drawseg - x1 = MAX(clipper->x1, x1); - x2 = MIN(clipper->x2, x2); - if (x1 >= x2) - { - goto done; - } - - PrepWall (swall, lwall, WallSpriteTile->GetWidth(), x1, x2); - - if (flipx) - { - int i; - int right = (WallSpriteTile->GetWidth() << FRACBITS) - 1; - - for (i = x1; i < x2; i++) - { - lwall[i] = right - lwall[i]; - } - } - - // Prepare lighting - calclighting = false; - usecolormap = basecolormap; - rereadcolormap = true; - - // Decals that are added to the scene must fade to black. - if (decal->RenderStyle == LegacyRenderStyles[STYLE_Add] && usecolormap->Fade != 0) - { - usecolormap = GetSpecialLights(usecolormap->Color, 0, usecolormap->Desaturate); - rereadcolormap = false; - } - - rw_light = rw_lightleft + (x1 - savecoord.sx1) * rw_lightstep; - if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); - else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); - else - calclighting = true; - - // Draw it - if (decal->RenderFlags & RF_YFLIP) - { - sprflipvert = true; - yscale = -yscale; - dc_texturemid -= WallSpriteTile->GetHeight(); - } - else - { - sprflipvert = false; - } - - float maskedScaleY = float(1 / yscale); - do - { - int x = x1; - - bool visible = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); - - // R_SetPatchStyle can modify basecolormap. - if (rereadcolormap) - { - usecolormap = basecolormap; - } - - if (visible) - { - while (x < x2) - { - if (calclighting) - { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); - } - R_WallSpriteColumn(x, maskedScaleY); - x++; - } - } - - // If this sprite is RF_CLIPFULL on a two-sided line, needrepeat will - // be set 1 if we need to draw on the lower wall. In all other cases, - // needrepeat will be 0, and the while will fail. - mceilingclip = floorclip; - mfloorclip = wallbottom; - R_FinishSetPatchStyle (); - } while (needrepeat--); - - colfunc = basecolfunc; - - R_FinishSetPatchStyle (); -done: - WallC = savecoord; -} - } diff --git a/src/swrenderer/scene/r_segs.h b/src/swrenderer/scene/r_segs.h index 8aafa6ccee..9c83b534d3 100644 --- a/src/swrenderer/scene/r_segs.h +++ b/src/swrenderer/scene/r_segs.h @@ -43,12 +43,18 @@ void PrepLWall (fixed_t *lwall, double walxrepeat, int x1, int x2); void R_RenderSegLoop (); +extern short walltop[MAXWIDTH]; // [RH] record max extents of wall +extern short wallbottom[MAXWIDTH]; +extern short wallupper[MAXWIDTH]; +extern short walllower[MAXWIDTH]; extern float swall[MAXWIDTH]; extern fixed_t lwall[MAXWIDTH]; +extern double lwallscale; extern float rw_light; // [RH] Scale lights with viewsize adjustments extern float rw_lightstep; extern float rw_lightleft; extern fixed_t rw_offset; +extern int wallshade; } From e3a909c29649d63f737162c094f146771c88766c Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Fri, 30 Dec 2016 16:05:57 +0200 Subject: [PATCH 604/912] Fixed compilation with GCC/Clang No more 'error: cannot jump from this goto statement to its label' --- src/swrenderer/scene/r_decal.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/swrenderer/scene/r_decal.cpp b/src/swrenderer/scene/r_decal.cpp index 374552621e..4159595fd6 100644 --- a/src/swrenderer/scene/r_decal.cpp +++ b/src/swrenderer/scene/r_decal.cpp @@ -126,6 +126,7 @@ namespace swrenderer decal_pos = { dcx, dcy }; DVector2 angvec = (curline->v2->fPos() - curline->v1->fPos()).Unit(); + float maskedScaleY; decal_left = decal_pos - edge_left * angvec - ViewPos; decal_right = decal_pos + edge_right * angvec - ViewPos; @@ -253,7 +254,7 @@ namespace swrenderer sprflipvert = false; } - float maskedScaleY = float(1 / yscale); + maskedScaleY = float(1 / yscale); do { int x = x1; From 447b1625349479b851ff0c8a4863380d3aecabb0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 31 Dec 2016 10:19:31 +0100 Subject: [PATCH 605/912] Move sky rendering to its own file --- src/CMakeLists.txt | 1 + src/swrenderer/scene/r_plane.cpp | 504 +--------------------------- src/swrenderer/scene/r_plane.h | 1 - src/swrenderer/scene/r_segs.h | 1 + src/swrenderer/scene/r_skyplane.cpp | 502 +++++++++++++++++++++++++++ src/swrenderer/scene/r_skyplane.h | 18 + 6 files changed, 523 insertions(+), 504 deletions(-) create mode 100644 src/swrenderer/scene/r_skyplane.cpp create mode 100644 src/swrenderer/scene/r_skyplane.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0b5b9ff777..a31350cd87 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -831,6 +831,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_playersprite.cpp swrenderer/scene/r_wallsprite.cpp swrenderer/scene/r_decal.cpp + swrenderer/scene/r_skyplane.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index 83810ad95f..2f74d66921 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -65,17 +65,15 @@ #include "r_clip_segment.h" #include "r_draw_segment.h" #include "r_portal.h" +#include "r_skyplane.h" #include "swrenderer/r_memory.h" #ifdef _MSC_VER #pragma warning(disable:4244) #endif -CVAR(Bool, r_linearsky, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); CVAR(Bool, tilt, false, 0); -EXTERN_CVAR(Int, r_skymode) - namespace swrenderer { using namespace drawerargs; @@ -84,8 +82,6 @@ extern int wallshade; extern subsector_t *InSubsector; -static void R_DrawSkyStriped (visplane_t *pl); - planefunction_t floorfunc; planefunction_t ceilingfunc; @@ -714,381 +710,6 @@ visplane_t *R_CheckPlane (visplane_t *pl, int start, int stop) return pl; } - -//========================================================================== -// -// R_MakeSpans -// -// -//========================================================================== - -inline void R_MakeSpans (int x, int t1, int b1, int t2, int b2, void (*mapfunc)(int y, int x1)) -{ -} - -//========================================================================== -// -// R_DrawSky -// -// Can handle overlapped skies. Note that the front sky is *not* masked in -// in the normal convention for patches, but uses color 0 as a transparent -// color instead. -// -// Note that since ZDoom now uses color 0 as transparent for other purposes, -// you can use normal texture transparency, so the distinction isn't so -// important anymore, but you should still be aware of it. -// -//========================================================================== - -static FTexture *frontskytex, *backskytex; -static angle_t skyflip; -static int frontpos, backpos; -static double frontyScale; -static fixed_t frontcyl, backcyl; -static double skymid; -static angle_t skyangle; -static double frontiScale; - -extern float swall[MAXWIDTH]; -extern fixed_t lwall[MAXWIDTH]; -extern fixed_t rw_offset; -extern FTexture *rw_pic; - -// Allow for layer skies up to 512 pixels tall. This is overkill, -// since the most anyone can ever see of the sky is 500 pixels. -// We need 4 skybufs because R_DrawSkySegment can draw up to 4 columns at a time. -// Need two versions - one for true color and one for palette -#define MAXSKYBUF 3072 -static BYTE skybuf[4][512]; -static uint32_t skybuf_bgra[MAXSKYBUF][512]; -static DWORD lastskycol[4]; -static DWORD lastskycol_bgra[MAXSKYBUF]; -static int skycolplace; -static int skycolplace_bgra; - - -// Get a column of sky when there is only one sky texture. -static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) -{ - int tx; - if (r_linearsky) - { - angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); - angle_t column = (skyangle + xangle) ^ skyflip; - tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; - } - else - { - angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; - } - - if (!r_swtruecolor) - return fronttex->GetColumn(tx, NULL); - else - { - return (const BYTE *)fronttex->GetColumnBgra(tx, NULL); - } -} - -// Get a column of sky when there are two overlapping sky textures -static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) -{ - DWORD ang, angle1, angle2; - - if (r_linearsky) - { - angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); - ang = (skyangle + xangle) ^ skyflip; - } - else - { - ang = (skyangle + xtoviewangle[x]) ^ skyflip; - } - angle1 = (DWORD)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); - angle2 = (DWORD)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); - - // Check if this column has already been built. If so, there's - // no reason to waste time building it again. - DWORD skycol = (angle1 << 16) | angle2; - int i; - - if (!r_swtruecolor) - { - for (i = 0; i < 4; ++i) - { - if (lastskycol[i] == skycol) - { - return skybuf[i]; - } - } - - lastskycol[skycolplace] = skycol; - BYTE *composite = skybuf[skycolplace]; - skycolplace = (skycolplace + 1) & 3; - - // The ordering of the following code has been tuned to allow VC++ to optimize - // it well. In particular, this arrangement lets it keep count in a register - // instead of on the stack. - const BYTE *front = fronttex->GetColumn(angle1, NULL); - const BYTE *back = backskytex->GetColumn(angle2, NULL); - - int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); - i = 0; - do - { - if (front[i]) - { - composite[i] = front[i]; - } - else - { - composite[i] = back[i]; - } - } while (++i, --count); - return composite; - } - else - { - //return R_GetOneSkyColumn(fronttex, x); - for (i = skycolplace_bgra - 4; i < skycolplace_bgra; ++i) - { - int ic = (i % MAXSKYBUF); // i "checker" - can wrap around the ends of the array - if (lastskycol_bgra[ic] == skycol) - { - return (BYTE*)(skybuf_bgra[ic]); - } - } - - lastskycol_bgra[skycolplace_bgra] = skycol; - uint32_t *composite = skybuf_bgra[skycolplace_bgra]; - skycolplace_bgra = (skycolplace_bgra + 1) % MAXSKYBUF; - - // The ordering of the following code has been tuned to allow VC++ to optimize - // it well. In particular, this arrangement lets it keep count in a register - // instead of on the stack. - const uint32_t *front = (const uint32_t *)fronttex->GetColumnBgra(angle1, NULL); - const uint32_t *back = (const uint32_t *)backskytex->GetColumnBgra(angle2, NULL); - - //[SP] Paletted version is used for comparison only - const BYTE *frontcompare = fronttex->GetColumn(angle1, NULL); - - int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); - i = 0; - do - { - if (frontcompare[i]) - { - composite[i] = front[i]; - } - else - { - composite[i] = back[i]; - } - } while (++i, --count); - return (BYTE*)composite; - } -} - -static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) -{ - uint32_t height = frontskytex->GetHeight(); - - for (int i = 0; i < columns; i++) - { - double uv_stepd = skyiscale * yrepeat; - double v = (texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; - double v_step = uv_stepd / height; - - uint32_t uv_pos = (uint32_t)(v * 0x01000000); - uint32_t uv_step = (uint32_t)(v_step * 0x01000000); - - int x = start_x + i; - if (MirrorFlags & RF_XFLIP) - x = (viewwidth - x); - - DWORD ang, angle1, angle2; - - if (r_linearsky) - { - angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); - ang = (skyangle + xangle) ^ skyflip; - } - else - { - ang = (skyangle + xtoviewangle[x]) ^ skyflip; - } - angle1 = (DWORD)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); - angle2 = (DWORD)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); - - if (r_swtruecolor) - { - dc_wall_source[i] = (const BYTE *)frontskytex->GetColumnBgra(angle1, nullptr); - dc_wall_source2[i] = backskytex ? (const BYTE *)backskytex->GetColumnBgra(angle2, nullptr) : nullptr; - } - else - { - dc_wall_source[i] = (const BYTE *)frontskytex->GetColumn(angle1, nullptr); - dc_wall_source2[i] = backskytex ? (const BYTE *)backskytex->GetColumn(angle2, nullptr) : nullptr; - } - - dc_wall_iscale[i] = uv_step; - dc_wall_texturefrac[i] = uv_pos; - } - - dc_wall_sourceheight[0] = height; - dc_wall_sourceheight[1] = backskytex ? backskytex->GetHeight() : height; - int pixelsize = r_swtruecolor ? 4 : 1; - dc_dest = (ylookup[y1] + start_x) * pixelsize + dc_destorg; - dc_count = y2 - y1; - - uint32_t solid_top = frontskytex->GetSkyCapColor(false); - uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); - - if (!backskytex) - R_Drawers()->DrawSingleSkyColumn(solid_top, solid_bottom); - else - R_Drawers()->DrawDoubleSkyColumn(solid_top, solid_bottom); -} - -static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) -{ - if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) - { - double texturemid = skymid * frontskytex->Scale.Y + frontskytex->GetHeight(); - R_DrawSkyColumnStripe(start_x, y1, y2, columns, frontskytex->Scale.Y, texturemid, frontskytex->Scale.Y); - } - else - { - double yrepeat = frontskytex->Scale.Y; - double scale = frontskytex->Scale.Y * skyscale; - double iscale = 1 / scale; - short drawheight = short(frontskytex->GetHeight() * scale); - double topfrac = fmod(skymid + iscale * (1 - CenterY), frontskytex->GetHeight()); - if (topfrac < 0) topfrac += frontskytex->GetHeight(); - double texturemid = topfrac - iscale * (1 - CenterY); - R_DrawSkyColumnStripe(start_x, y1, y2, columns, scale, texturemid, yrepeat); - } -} - -static void R_DrawCapSky(visplane_t *pl) -{ - int x1 = pl->left; - int x2 = pl->right; - short *uwal = (short *)pl->top; - short *dwal = (short *)pl->bottom; - - for (int x = x1; x < x2; x++) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - R_DrawSkyColumn(x, y1, y2, 1); - } -} - -static void R_DrawSky (visplane_t *pl) -{ - if (r_skymode == 2) - { - R_DrawCapSky(pl); - return; - } - - int x; - float swal; - - if (pl->left >= pl->right) - return; - - swal = skyiscale; - for (x = pl->left; x < pl->right; ++x) - { - swall[x] = swal; - } - - if (MirrorFlags & RF_XFLIP) - { - for (x = pl->left; x < pl->right; ++x) - { - lwall[x] = (viewwidth - x) << FRACBITS; - } - } - else - { - for (x = pl->left; x < pl->right; ++x) - { - lwall[x] = x << FRACBITS; - } - } - - for (x = 0; x < 4; ++x) - { - lastskycol[x] = 0xffffffff; - lastskycol_bgra[x] = 0xffffffff; - } - - rw_pic = frontskytex; - rw_offset = 0; - - frontyScale = rw_pic->Scale.Y; - dc_texturemid = skymid * frontyScale; - - if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) - { // The texture tiles nicely - for (x = 0; x < 4; ++x) - { - lastskycol[x] = 0xffffffff; - lastskycol_bgra[x] = 0xffffffff; - } - R_DrawSkySegment (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, - frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); - } - else - { // The texture does not tile nicely - frontyScale *= skyscale; - frontiScale = 1 / frontyScale; - R_DrawSkyStriped (pl); - } -} - -static void R_DrawSkyStriped (visplane_t *pl) -{ - short drawheight = short(frontskytex->GetHeight() * frontyScale); - double topfrac; - double iscale = frontiScale; - short top[MAXWIDTH], bot[MAXWIDTH]; - short yl, yh; - int x; - - topfrac = fmod(skymid + iscale * (1 - CenterY), frontskytex->GetHeight()); - if (topfrac < 0) topfrac += frontskytex->GetHeight(); - yl = 0; - yh = short((frontskytex->GetHeight() - topfrac) * frontyScale); - dc_texturemid = topfrac - iscale * (1 - CenterY); - - while (yl < viewheight) - { - for (x = pl->left; x < pl->right; ++x) - { - top[x] = MAX (yl, (short)pl->top[x]); - bot[x] = MIN (yh, (short)pl->bottom[x]); - } - for (x = 0; x < 4; ++x) - { - lastskycol[x] = 0xffffffff; - lastskycol_bgra[x] = 0xffffffff; - } - R_DrawSkySegment (pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, - backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); - yl = yh; - yh += drawheight; - dc_texturemid = iscale * (centery-yl-1); - } -} - //========================================================================== // // R_DrawPlanes @@ -1097,7 +718,6 @@ static void R_DrawSkyStriped (visplane_t *pl) // //========================================================================== - int R_DrawPlanes () { visplane_t *pl; @@ -1212,128 +832,6 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske NetUpdate (); } -//========================================================================== -// -// R_DrawSkyPlane -// -//========================================================================== - -void R_DrawSkyPlane (visplane_t *pl) -{ - FTextureID sky1tex, sky2tex; - double frontdpos = 0, backdpos = 0; - - if ((level.flags & LEVEL_SWAPSKIES) && !(level.flags & LEVEL_DOUBLESKY)) - { - sky1tex = sky2texture; - } - else - { - sky1tex = sky1texture; - } - sky2tex = sky2texture; - skymid = skytexturemid; - skyangle = ViewAngle.BAMs(); - - if (pl->picnum == skyflatnum) - { - if (!(pl->sky & PL_SKYFLAT)) - { // use sky1 - sky1: - frontskytex = TexMan(sky1tex, true); - if (level.flags & LEVEL_DOUBLESKY) - backskytex = TexMan(sky2tex, true); - else - backskytex = NULL; - skyflip = 0; - frontdpos = sky1pos; - backdpos = sky2pos; - frontcyl = sky1cyl; - backcyl = sky2cyl; - } - else if (pl->sky == PL_SKYFLAT) - { // use sky2 - frontskytex = TexMan(sky2tex, true); - backskytex = NULL; - frontcyl = sky2cyl; - skyflip = 0; - frontdpos = sky2pos; - } - else - { // MBF's linedef-controlled skies - // Sky Linedef - const line_t *l = &lines[(pl->sky & ~PL_SKYFLAT)-1]; - - // Sky transferred from first sidedef - const side_t *s = l->sidedef[0]; - int pos; - - // Texture comes from upper texture of reference sidedef - // [RH] If swapping skies, then use the lower sidedef - if (level.flags & LEVEL_SWAPSKIES && s->GetTexture(side_t::bottom).isValid()) - { - pos = side_t::bottom; - } - else - { - pos = side_t::top; - } - - frontskytex = TexMan(s->GetTexture(pos), true); - if (frontskytex == NULL || frontskytex->UseType == FTexture::TEX_Null) - { // [RH] The blank texture: Use normal sky instead. - goto sky1; - } - backskytex = NULL; - - // Horizontal offset is turned into an angle offset, - // to allow sky rotation as well as careful positioning. - // However, the offset is scaled very small, so that it - // allows a long-period of sky rotation. - skyangle += FLOAT2FIXED(s->GetTextureXOffset(pos)); - - // Vertical offset allows careful sky positioning. - skymid = s->GetTextureYOffset(pos) - 28; - - // We sometimes flip the picture horizontally. - // - // Doom always flipped the picture, so we make it optional, - // to make it easier to use the new feature, while to still - // allow old sky textures to be used. - skyflip = l->args[2] ? 0u : ~0u; - - int frontxscale = int(frontskytex->Scale.X * 1024); - frontcyl = MAX(frontskytex->GetWidth(), frontxscale); - if (skystretch) - { - skymid = skymid * frontskytex->GetScaledHeightDouble() / SKYSTRETCH_HEIGHT; - } - } - } - frontpos = int(fmod(frontdpos, sky1cyl * 65536.0)); - if (backskytex != NULL) - { - backpos = int(fmod(backdpos, sky2cyl * 65536.0)); - } - - bool fakefixed = false; - if (fixedcolormap) - { - R_SetColorMapLight(fixedcolormap, 0, 0); - } - else - { - fakefixed = true; - fixedcolormap = &NormalLight; - R_SetColorMapLight(fixedcolormap, 0, 0); - } - - R_DrawSky (pl); - - if (fakefixed) - fixedcolormap = NULL; -} - //========================================================================== // // R_DrawNormalPlane diff --git a/src/swrenderer/scene/r_plane.h b/src/swrenderer/scene/r_plane.h index c2f885c85e..6a1d386b89 100644 --- a/src/swrenderer/scene/r_plane.h +++ b/src/swrenderer/scene/r_plane.h @@ -44,7 +44,6 @@ void R_AddPlaneLights(visplane_t *plane, FLightNode *light_head); int R_DrawPlanes (); void R_DrawSinglePlane(visplane_t *pl, fixed_t alpha, bool additive, bool masked); -void R_DrawSkyPlane (visplane_t *pl); void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); diff --git a/src/swrenderer/scene/r_segs.h b/src/swrenderer/scene/r_segs.h index 9c83b534d3..51cab3eb55 100644 --- a/src/swrenderer/scene/r_segs.h +++ b/src/swrenderer/scene/r_segs.h @@ -54,6 +54,7 @@ extern float rw_light; // [RH] Scale lights with viewsize adjustments extern float rw_lightstep; extern float rw_lightleft; extern fixed_t rw_offset; +extern FTexture *rw_pic; extern int wallshade; } diff --git a/src/swrenderer/scene/r_skyplane.cpp b/src/swrenderer/scene/r_skyplane.cpp new file mode 100644 index 0000000000..759abe2d58 --- /dev/null +++ b/src/swrenderer/scene/r_skyplane.cpp @@ -0,0 +1,502 @@ + +#include +#include +#include "templates.h" +#include "i_system.h" +#include "w_wad.h" +#include "doomdef.h" +#include "doomstat.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" +#include "r_sky.h" +#include "stats.h" +#include "v_video.h" +#include "a_sharedglobal.h" +#include "c_console.h" +#include "cmdlib.h" +#include "d_net.h" +#include "g_level.h" +#include "r_bsp.h" +#include "r_skyplane.h" +#include "r_segs.h" +#include "r_3dfloors.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "gl/dynlights/gl_dynlight.h" +#include "r_walldraw.h" +#include "r_clip_segment.h" +#include "r_draw_segment.h" +#include "r_portal.h" +#include "swrenderer/r_memory.h" + +CVAR(Bool, r_linearsky, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +EXTERN_CVAR(Int, r_skymode) + +namespace swrenderer +{ + namespace + { + FTexture *frontskytex, *backskytex; + angle_t skyflip; + int frontpos, backpos; + double frontyScale; + fixed_t frontcyl, backcyl; + double skymid; + angle_t skyangle; + double frontiScale; + + // Allow for layer skies up to 512 pixels tall. This is overkill, + // since the most anyone can ever see of the sky is 500 pixels. + // We need 4 skybufs because R_DrawSkySegment can draw up to 4 columns at a time. + // Need two versions - one for true color and one for palette + #define MAXSKYBUF 3072 + uint8_t skybuf[4][512]; + uint32_t skybuf_bgra[MAXSKYBUF][512]; + uint32_t lastskycol[4]; + uint32_t lastskycol_bgra[MAXSKYBUF]; + int skycolplace; + int skycolplace_bgra; + } + + void R_DrawSkyPlane(visplane_t *pl) + { + FTextureID sky1tex, sky2tex; + double frontdpos = 0, backdpos = 0; + + if ((level.flags & LEVEL_SWAPSKIES) && !(level.flags & LEVEL_DOUBLESKY)) + { + sky1tex = sky2texture; + } + else + { + sky1tex = sky1texture; + } + sky2tex = sky2texture; + skymid = skytexturemid; + skyangle = ViewAngle.BAMs(); + + if (pl->picnum == skyflatnum) + { + if (!(pl->sky & PL_SKYFLAT)) + { // use sky1 + sky1: + frontskytex = TexMan(sky1tex, true); + if (level.flags & LEVEL_DOUBLESKY) + backskytex = TexMan(sky2tex, true); + else + backskytex = NULL; + skyflip = 0; + frontdpos = sky1pos; + backdpos = sky2pos; + frontcyl = sky1cyl; + backcyl = sky2cyl; + } + else if (pl->sky == PL_SKYFLAT) + { // use sky2 + frontskytex = TexMan(sky2tex, true); + backskytex = NULL; + frontcyl = sky2cyl; + skyflip = 0; + frontdpos = sky2pos; + } + else + { // MBF's linedef-controlled skies + // Sky Linedef + const line_t *l = &lines[(pl->sky & ~PL_SKYFLAT) - 1]; + + // Sky transferred from first sidedef + const side_t *s = l->sidedef[0]; + int pos; + + // Texture comes from upper texture of reference sidedef + // [RH] If swapping skies, then use the lower sidedef + if (level.flags & LEVEL_SWAPSKIES && s->GetTexture(side_t::bottom).isValid()) + { + pos = side_t::bottom; + } + else + { + pos = side_t::top; + } + + frontskytex = TexMan(s->GetTexture(pos), true); + if (frontskytex == NULL || frontskytex->UseType == FTexture::TEX_Null) + { // [RH] The blank texture: Use normal sky instead. + goto sky1; + } + backskytex = NULL; + + // Horizontal offset is turned into an angle offset, + // to allow sky rotation as well as careful positioning. + // However, the offset is scaled very small, so that it + // allows a long-period of sky rotation. + skyangle += FLOAT2FIXED(s->GetTextureXOffset(pos)); + + // Vertical offset allows careful sky positioning. + skymid = s->GetTextureYOffset(pos) - 28; + + // We sometimes flip the picture horizontally. + // + // Doom always flipped the picture, so we make it optional, + // to make it easier to use the new feature, while to still + // allow old sky textures to be used. + skyflip = l->args[2] ? 0u : ~0u; + + int frontxscale = int(frontskytex->Scale.X * 1024); + frontcyl = MAX(frontskytex->GetWidth(), frontxscale); + if (skystretch) + { + skymid = skymid * frontskytex->GetScaledHeightDouble() / SKYSTRETCH_HEIGHT; + } + } + } + frontpos = int(fmod(frontdpos, sky1cyl * 65536.0)); + if (backskytex != NULL) + { + backpos = int(fmod(backdpos, sky2cyl * 65536.0)); + } + + bool fakefixed = false; + if (fixedcolormap) + { + R_SetColorMapLight(fixedcolormap, 0, 0); + } + else + { + fakefixed = true; + fixedcolormap = &NormalLight; + R_SetColorMapLight(fixedcolormap, 0, 0); + } + + R_DrawSky(pl); + + if (fakefixed) + fixedcolormap = NULL; + } + + + // Get a column of sky when there is only one sky texture. + const uint8_t *R_GetOneSkyColumn(FTexture *fronttex, int x) + { + int tx; + if (r_linearsky) + { + angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); + angle_t column = (skyangle + xangle) ^ skyflip; + tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; + } + else + { + angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; + tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; + } + + if (!r_swtruecolor) + return fronttex->GetColumn(tx, NULL); + else + { + return (const uint8_t *)fronttex->GetColumnBgra(tx, NULL); + } + } + + // Get a column of sky when there are two overlapping sky textures + const uint8_t *R_GetTwoSkyColumns(FTexture *fronttex, int x) + { + uint32_t ang, angle1, angle2; + + if (r_linearsky) + { + angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); + ang = (skyangle + xangle) ^ skyflip; + } + else + { + ang = (skyangle + xtoviewangle[x]) ^ skyflip; + } + angle1 = (uint32_t)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); + angle2 = (uint32_t)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); + + // Check if this column has already been built. If so, there's + // no reason to waste time building it again. + uint32_t skycol = (angle1 << 16) | angle2; + int i; + + if (!r_swtruecolor) + { + for (i = 0; i < 4; ++i) + { + if (lastskycol[i] == skycol) + { + return skybuf[i]; + } + } + + lastskycol[skycolplace] = skycol; + uint8_t *composite = skybuf[skycolplace]; + skycolplace = (skycolplace + 1) & 3; + + // The ordering of the following code has been tuned to allow VC++ to optimize + // it well. In particular, this arrangement lets it keep count in a register + // instead of on the stack. + const uint8_t *front = fronttex->GetColumn(angle1, NULL); + const uint8_t *back = backskytex->GetColumn(angle2, NULL); + + int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); + i = 0; + do + { + if (front[i]) + { + composite[i] = front[i]; + } + else + { + composite[i] = back[i]; + } + } while (++i, --count); + return composite; + } + else + { + //return R_GetOneSkyColumn(fronttex, x); + for (i = skycolplace_bgra - 4; i < skycolplace_bgra; ++i) + { + int ic = (i % MAXSKYBUF); // i "checker" - can wrap around the ends of the array + if (lastskycol_bgra[ic] == skycol) + { + return (uint8_t*)(skybuf_bgra[ic]); + } + } + + lastskycol_bgra[skycolplace_bgra] = skycol; + uint32_t *composite = skybuf_bgra[skycolplace_bgra]; + skycolplace_bgra = (skycolplace_bgra + 1) % MAXSKYBUF; + + // The ordering of the following code has been tuned to allow VC++ to optimize + // it well. In particular, this arrangement lets it keep count in a register + // instead of on the stack. + const uint32_t *front = (const uint32_t *)fronttex->GetColumnBgra(angle1, NULL); + const uint32_t *back = (const uint32_t *)backskytex->GetColumnBgra(angle2, NULL); + + //[SP] Paletted version is used for comparison only + const uint8_t *frontcompare = fronttex->GetColumn(angle1, NULL); + + int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); + i = 0; + do + { + if (frontcompare[i]) + { + composite[i] = front[i]; + } + else + { + composite[i] = back[i]; + } + } while (++i, --count); + return (uint8_t*)composite; + } + } + + void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) + { + using namespace drawerargs; + + uint32_t height = frontskytex->GetHeight(); + + for (int i = 0; i < columns; i++) + { + double uv_stepd = skyiscale * yrepeat; + double v = (texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + double v_step = uv_stepd / height; + + uint32_t uv_pos = (uint32_t)(v * 0x01000000); + uint32_t uv_step = (uint32_t)(v_step * 0x01000000); + + int x = start_x + i; + if (MirrorFlags & RF_XFLIP) + x = (viewwidth - x); + + uint32_t ang, angle1, angle2; + + if (r_linearsky) + { + angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); + ang = (skyangle + xangle) ^ skyflip; + } + else + { + ang = (skyangle + xtoviewangle[x]) ^ skyflip; + } + angle1 = (uint32_t)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); + angle2 = (uint32_t)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); + + if (r_swtruecolor) + { + dc_wall_source[i] = (const uint8_t *)frontskytex->GetColumnBgra(angle1, nullptr); + dc_wall_source2[i] = backskytex ? (const uint8_t *)backskytex->GetColumnBgra(angle2, nullptr) : nullptr; + } + else + { + dc_wall_source[i] = (const uint8_t *)frontskytex->GetColumn(angle1, nullptr); + dc_wall_source2[i] = backskytex ? (const uint8_t *)backskytex->GetColumn(angle2, nullptr) : nullptr; + } + + dc_wall_iscale[i] = uv_step; + dc_wall_texturefrac[i] = uv_pos; + } + + dc_wall_sourceheight[0] = height; + dc_wall_sourceheight[1] = backskytex ? backskytex->GetHeight() : height; + int pixelsize = r_swtruecolor ? 4 : 1; + dc_dest = (ylookup[y1] + start_x) * pixelsize + dc_destorg; + dc_count = y2 - y1; + + uint32_t solid_top = frontskytex->GetSkyCapColor(false); + uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); + + if (!backskytex) + R_Drawers()->DrawSingleSkyColumn(solid_top, solid_bottom); + else + R_Drawers()->DrawDoubleSkyColumn(solid_top, solid_bottom); + } + + void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) + { + if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) + { + double texturemid = skymid * frontskytex->Scale.Y + frontskytex->GetHeight(); + R_DrawSkyColumnStripe(start_x, y1, y2, columns, frontskytex->Scale.Y, texturemid, frontskytex->Scale.Y); + } + else + { + double yrepeat = frontskytex->Scale.Y; + double scale = frontskytex->Scale.Y * skyscale; + double iscale = 1 / scale; + short drawheight = short(frontskytex->GetHeight() * scale); + double topfrac = fmod(skymid + iscale * (1 - CenterY), frontskytex->GetHeight()); + if (topfrac < 0) topfrac += frontskytex->GetHeight(); + double texturemid = topfrac - iscale * (1 - CenterY); + R_DrawSkyColumnStripe(start_x, y1, y2, columns, scale, texturemid, yrepeat); + } + } + + void R_DrawCapSky(visplane_t *pl) + { + int x1 = pl->left; + int x2 = pl->right; + short *uwal = (short *)pl->top; + short *dwal = (short *)pl->bottom; + + for (int x = x1; x < x2; x++) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + R_DrawSkyColumn(x, y1, y2, 1); + } + } + + void R_DrawSky(visplane_t *pl) + { + if (r_skymode == 2) + { + R_DrawCapSky(pl); + return; + } + + int x; + float swal; + + if (pl->left >= pl->right) + return; + + swal = skyiscale; + for (x = pl->left; x < pl->right; ++x) + { + swall[x] = swal; + } + + if (MirrorFlags & RF_XFLIP) + { + for (x = pl->left; x < pl->right; ++x) + { + lwall[x] = (viewwidth - x) << FRACBITS; + } + } + else + { + for (x = pl->left; x < pl->right; ++x) + { + lwall[x] = x << FRACBITS; + } + } + + for (x = 0; x < 4; ++x) + { + lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; + } + + rw_pic = frontskytex; + rw_offset = 0; + + frontyScale = rw_pic->Scale.Y; + dc_texturemid = skymid * frontyScale; + + if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) + { // The texture tiles nicely + for (x = 0; x < 4; ++x) + { + lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; + } + R_DrawSkySegment(pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, + frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + } + else + { // The texture does not tile nicely + frontyScale *= skyscale; + frontiScale = 1 / frontyScale; + R_DrawSkyStriped(pl); + } + } + + void R_DrawSkyStriped(visplane_t *pl) + { + short drawheight = short(frontskytex->GetHeight() * frontyScale); + double topfrac; + double iscale = frontiScale; + short top[MAXWIDTH], bot[MAXWIDTH]; + short yl, yh; + int x; + + topfrac = fmod(skymid + iscale * (1 - CenterY), frontskytex->GetHeight()); + if (topfrac < 0) topfrac += frontskytex->GetHeight(); + yl = 0; + yh = short((frontskytex->GetHeight() - topfrac) * frontyScale); + dc_texturemid = topfrac - iscale * (1 - CenterY); + + while (yl < viewheight) + { + for (x = pl->left; x < pl->right; ++x) + { + top[x] = MAX(yl, (short)pl->top[x]); + bot[x] = MIN(yh, (short)pl->bottom[x]); + } + for (x = 0; x < 4; ++x) + { + lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; + } + R_DrawSkySegment(pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, + backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + yl = yh; + yh += drawheight; + dc_texturemid = iscale * (centery - yl - 1); + } + } +} diff --git a/src/swrenderer/scene/r_skyplane.h b/src/swrenderer/scene/r_skyplane.h new file mode 100644 index 0000000000..b8df900789 --- /dev/null +++ b/src/swrenderer/scene/r_skyplane.h @@ -0,0 +1,18 @@ + +#pragma once + +#include "r_visible_plane.h" + +namespace swrenderer +{ + void R_DrawSkyPlane(visplane_t *pl); + + void R_DrawSky(visplane_t *pl); + void R_DrawSkyStriped(visplane_t *pl); + void R_DrawCapSky(visplane_t *pl); + void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat); + void R_DrawSkyColumn(int start_x, int y1, int y2, int columns); + + const uint8_t *R_GetOneSkyColumn(FTexture *fronttex, int x); + const uint8_t *R_GetTwoSkyColumns(FTexture *fronttex, int x); +} From f354cc8c67c8ab0e5f4f097ad27742d761764c5a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 31 Dec 2016 11:42:49 +0100 Subject: [PATCH 606/912] Move flat and slope plane drawing to their own files, isolate and privatize their working variables --- src/CMakeLists.txt | 2 + src/swrenderer/r_main.cpp | 41 +-- src/swrenderer/scene/r_flatplane.cpp | 359 ++++++++++++++++++++ src/swrenderer/scene/r_flatplane.h | 16 + src/swrenderer/scene/r_plane.cpp | 461 +------------------------- src/swrenderer/scene/r_plane.h | 9 +- src/swrenderer/scene/r_slopeplane.cpp | 183 ++++++++++ src/swrenderer/scene/r_slopeplane.h | 10 + src/v_draw.cpp | 4 +- 9 files changed, 590 insertions(+), 495 deletions(-) create mode 100644 src/swrenderer/scene/r_flatplane.cpp create mode 100644 src/swrenderer/scene/r_flatplane.h create mode 100644 src/swrenderer/scene/r_slopeplane.cpp create mode 100644 src/swrenderer/scene/r_slopeplane.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a31350cd87..f59bc0fce3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -832,6 +832,8 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_wallsprite.cpp swrenderer/scene/r_decal.cpp swrenderer/scene/r_skyplane.cpp + swrenderer/scene/r_flatplane.cpp + swrenderer/scene/r_slopeplane.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 7027a7767e..74bbbeabd9 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -38,6 +38,7 @@ #include "scene/r_things.h" #include "drawers/r_draw.h" #include "scene/r_plane.h" +#include "scene/r_flatplane.h" #include "scene/r_bsp.h" #include "scene/r_draw_segment.h" #include "scene/r_portal_segment.h" @@ -491,45 +492,7 @@ void R_SetupFreelook() globaluclip = -CenterY / InvZtoScale; globaldclip = (viewheight - CenterY) / InvZtoScale; - //centeryfrac &= 0xffff0000; - int e, i; - - i = 0; - e = viewheight; - float focus = float(FocalLengthY); - float den; - float cy = float(CenterY); - if (i < centery) - { - den = cy - i - 0.5f; - if (e <= centery) - { - do { - yslope[i] = focus / den; - den -= 1; - } while (++i < e); - } - else - { - do { - yslope[i] = focus / den; - den -= 1; - } while (++i < centery); - den = i - cy + 0.5f; - do { - yslope[i] = focus / den; - den += 1; - } while (++i < e); - } - } - else - { - den = i - cy + 0.5f; - do { - yslope[i] = focus / den; - den += 1; - } while (++i < e); - } + R_SetupPlaneSlope(); } //========================================================================== diff --git a/src/swrenderer/scene/r_flatplane.cpp b/src/swrenderer/scene/r_flatplane.cpp new file mode 100644 index 0000000000..fba9cd1623 --- /dev/null +++ b/src/swrenderer/scene/r_flatplane.cpp @@ -0,0 +1,359 @@ + +#include +#include +#include "templates.h" +#include "i_system.h" +#include "w_wad.h" +#include "doomdef.h" +#include "doomstat.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" +#include "r_sky.h" +#include "stats.h" +#include "v_video.h" +#include "a_sharedglobal.h" +#include "c_console.h" +#include "cmdlib.h" +#include "d_net.h" +#include "g_level.h" +#include "r_bsp.h" +#include "r_flatplane.h" +#include "r_segs.h" +#include "r_3dfloors.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "gl/dynlights/gl_dynlight.h" +#include "r_walldraw.h" +#include "r_clip_segment.h" +#include "r_draw_segment.h" +#include "r_portal.h" +#include "r_plane.h" +#include "swrenderer/r_memory.h" + +namespace swrenderer +{ + namespace + { + double planeheight; + bool plane_shade; + int planeshade; + fixed_t pviewx, pviewy; + float yslope[MAXHEIGHT]; + fixed_t xscale, yscale; + double xstepscale, ystepscale; + double basexfrac, baseyfrac; + } + + void R_DrawNormalPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) + { + using namespace drawerargs; + + if (alpha <= 0) + { + return; + } + + double planeang = (pl->xform.Angle + pl->xform.baseAngle).Radians(); + double xstep, ystep, leftxfrac, leftyfrac, rightxfrac, rightyfrac; + double x; + + xscale = xs_ToFixed(32 - ds_xbits, _xscale); + yscale = xs_ToFixed(32 - ds_ybits, _yscale); + if (planeang != 0) + { + double cosine = cos(planeang), sine = sin(planeang); + pviewx = FLOAT2FIXED(pl->xform.xOffs + ViewPos.X * cosine - ViewPos.Y * sine); + pviewy = FLOAT2FIXED(pl->xform.yOffs - ViewPos.X * sine - ViewPos.Y * cosine); + } + else + { + pviewx = FLOAT2FIXED(pl->xform.xOffs + ViewPos.X); + pviewy = FLOAT2FIXED(pl->xform.yOffs - ViewPos.Y); + } + + pviewx = FixedMul(xscale, pviewx); + pviewy = FixedMul(yscale, pviewy); + + // left to right mapping + planeang += (ViewAngle - 90).Radians(); + + // Scale will be unit scale at FocalLengthX (normally SCREENWIDTH/2) distance + xstep = cos(planeang) / FocalLengthX; + ystep = -sin(planeang) / FocalLengthX; + + // [RH] flip for mirrors + if (MirrorFlags & RF_XFLIP) + { + xstep = -xstep; + ystep = -ystep; + } + + planeang += M_PI / 2; + double cosine = cos(planeang), sine = -sin(planeang); + x = pl->right - centerx - 0.5; + rightxfrac = _xscale * (cosine + x * xstep); + rightyfrac = _yscale * (sine + x * ystep); + x = pl->left - centerx - 0.5; + leftxfrac = _xscale * (cosine + x * xstep); + leftyfrac = _yscale * (sine + x * ystep); + + basexfrac = rightxfrac; + baseyfrac = rightyfrac; + xstepscale = (rightxfrac - leftxfrac) / (pl->right - pl->left); + ystepscale = (rightyfrac - leftyfrac) / (pl->right - pl->left); + + planeheight = fabs(pl->height.Zat0() - ViewPos.Z); + + GlobVis = r_FloorVisibility / planeheight; + ds_light = 0; + if (fixedlightlev >= 0) + { + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + plane_shade = false; + } + else if (fixedcolormap) + { + R_SetDSColorMapLight(fixedcolormap, 0, 0); + plane_shade = false; + } + else + { + plane_shade = true; + planeshade = LIGHT2SHADE(pl->lightlevel); + } + + if (spanfunc != &SWPixelFormatDrawers::FillSpan) + { + if (masked) + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; + } + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpanMasked; + } + } + else + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = &SWPixelFormatDrawers::DrawSpanTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpanAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; + } + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpan; + } + } + } + R_MapVisPlane(pl, R_MapPlane, R_StepPlane); + } + + void R_StepPlane() + { + basexfrac -= xstepscale; + baseyfrac -= ystepscale; + } + + void R_MapPlane(int y, int x1, int x2) + { + using namespace drawerargs; + + double distance; + +#ifdef RANGECHECK + if (x2 < x1 || x1<0 || x2 >= viewwidth || (unsigned)y >= (unsigned)viewheight) + { + I_FatalError("R_MapPlane: %i, %i at %i", x1, x2, y); + } +#endif + + // [RH] Notice that I dumped the caching scheme used by Doom. + // It did not offer any appreciable speedup. + + distance = planeheight * yslope[y]; + + if (ds_xbits != 0) + { + ds_xstep = xs_ToFixed(32 - ds_xbits, distance * xstepscale); + ds_xfrac = xs_ToFixed(32 - ds_xbits, distance * basexfrac) + pviewx; + } + else + { + ds_xstep = 0; + ds_xfrac = 0; + } + if (ds_ybits != 0) + { + ds_ystep = xs_ToFixed(32 - ds_ybits, distance * ystepscale); + ds_yfrac = xs_ToFixed(32 - ds_ybits, distance * baseyfrac) + pviewy; + } + else + { + ds_ystep = 0; + ds_yfrac = 0; + } + + if (r_swtruecolor) + { + double distance2 = planeheight * yslope[(y + 1 < viewheight) ? y + 1 : y - 1]; + double xmagnitude = fabs(ystepscale * (distance2 - distance) * FocalLengthX); + double ymagnitude = fabs(xstepscale * (distance2 - distance) * FocalLengthX); + double magnitude = MAX(ymagnitude, xmagnitude); + double min_lod = -1000.0; + ds_lod = MAX(log2(magnitude) + r_lod_bias, min_lod); + } + + if (plane_shade) + { + // Determine lighting based on the span's distance from the viewer. + R_SetDSColorMapLight(basecolormap, (float)(GlobVis * fabs(CenterY - y)), planeshade); + } + + if (r_dynlights) + { + // Find row position in view space + float zspan = (float)(planeheight / (fabs(y + 0.5 - CenterY) / InvZtoScale)); + dc_viewpos.X = (float)((x1 + 0.5 - CenterX) / CenterX * zspan); + dc_viewpos.Y = zspan; + dc_viewpos.Z = (float)((CenterY - y - 0.5) / InvZtoScale * zspan); + dc_viewpos_step.X = (float)(zspan / CenterX); + + static TriLight lightbuffer[64 * 1024]; + static int nextlightindex = 0; + + // Setup lights for column + dc_num_lights = 0; + dc_lights = lightbuffer + nextlightindex; + visplane_light *cur_node = ds_light_list; + while (cur_node && nextlightindex < 64 * 1024) + { + double lightX = cur_node->lightsource->X() - ViewPos.X; + double lightY = cur_node->lightsource->Y() - ViewPos.Y; + double lightZ = cur_node->lightsource->Z() - ViewPos.Z; + + float lx = (float)(lightX * ViewSin - lightY * ViewCos); + float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; + float lz = (float)lightZ - dc_viewpos.Z; + + // Precalculate the constant part of the dot here so the drawer doesn't have to. + float lconstant = ly * ly + lz * lz; + + // Include light only if it touches this row + float radius = cur_node->lightsource->GetRadius(); + if (radius * radius >= lconstant) + { + uint32_t red = cur_node->lightsource->GetRed(); + uint32_t green = cur_node->lightsource->GetGreen(); + uint32_t blue = cur_node->lightsource->GetBlue(); + + nextlightindex++; + auto &light = dc_lights[dc_num_lights++]; + light.x = lx; + light.y = lconstant; + light.radius = 256.0f / radius; + light.color = (red << 16) | (green << 8) | blue; + } + + cur_node = cur_node->next; + } + + if (nextlightindex == 64 * 1024) + nextlightindex = 0; + } + else + { + dc_num_lights = 0; + } + + ds_y = y; + ds_x1 = x1; + ds_x2 = x2; + + (R_Drawers()->*spanfunc)(); + } + + void R_DrawColoredPlane(visplane_t *pl) + { + R_MapVisPlane(pl, R_MapColoredPlane, nullptr); + } + + void R_MapColoredPlane(int y, int x1, int x2) + { + R_Drawers()->DrawColoredSpan(y, x1, x2); + } + + void R_SetupPlaneSlope() + { + int e, i; + + i = 0; + e = viewheight; + float focus = float(FocalLengthY); + float den; + float cy = float(CenterY); + if (i < centery) + { + den = cy - i - 0.5f; + if (e <= centery) + { + do { + yslope[i] = focus / den; + den -= 1; + } while (++i < e); + } + else + { + do { + yslope[i] = focus / den; + den -= 1; + } while (++i < centery); + den = i - cy + 0.5f; + do { + yslope[i] = focus / den; + den += 1; + } while (++i < e); + } + } + else + { + den = i - cy + 0.5f; + do { + yslope[i] = focus / den; + den += 1; + } while (++i < e); + } + } +} diff --git a/src/swrenderer/scene/r_flatplane.h b/src/swrenderer/scene/r_flatplane.h new file mode 100644 index 0000000000..d719c7153e --- /dev/null +++ b/src/swrenderer/scene/r_flatplane.h @@ -0,0 +1,16 @@ + +#pragma once + +#include "r_visible_plane.h" + +namespace swrenderer +{ + void R_SetupPlaneSlope(); + + void R_DrawNormalPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked); + void R_MapPlane(int y, int x1, int x2); + void R_StepPlane(); + + void R_DrawColoredPlane(visplane_t *pl); + void R_MapColoredPlane(int y, int x1, int x2); +} diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index 2f74d66921..ae8c466951 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -66,6 +66,8 @@ #include "r_draw_segment.h" #include "r_portal.h" #include "r_skyplane.h" +#include "r_flatplane.h" +#include "r_slopeplane.h" #include "swrenderer/r_memory.h" #ifdef _MSC_VER @@ -109,176 +111,10 @@ short ceilingclip[MAXWIDTH]; // texture mapping // -static double planeheight; - -extern "C" { -// -// spanend holds the end of a plane span in each screen row -// short spanend[MAXHEIGHT]; -int planeshade; -FVector3 plane_sz, plane_su, plane_sv; -float planelightfloat; -bool plane_shade; -fixed_t pviewx, pviewy; -} - -float yslope[MAXHEIGHT]; -static fixed_t xscale, yscale; -static double xstepscale, ystepscale; -static double basexfrac, baseyfrac; - void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); -//========================================================================== -// -// R_MapPlane -// -// Globals used: planeheight, ds_source, basexscale, baseyscale, -// pviewx, pviewy, xoffs, yoffs, basecolormap, xscale, yscale. -// -//========================================================================== - -void R_MapPlane (int y, int x1) -{ - int x2 = spanend[y]; - double distance; - -#ifdef RANGECHECK - if (x2 < x1 || x1<0 || x2>=viewwidth || (unsigned)y>=(unsigned)viewheight) - { - I_FatalError ("R_MapPlane: %i, %i at %i", x1, x2, y); - } -#endif - - // [RH] Notice that I dumped the caching scheme used by Doom. - // It did not offer any appreciable speedup. - - distance = planeheight * yslope[y]; - - if (ds_xbits != 0) - { - ds_xstep = xs_ToFixed(32 - ds_xbits, distance * xstepscale); - ds_xfrac = xs_ToFixed(32 - ds_xbits, distance * basexfrac) + pviewx; - } - else - { - ds_xstep = 0; - ds_xfrac = 0; - } - if (ds_ybits != 0) - { - ds_ystep = xs_ToFixed(32 - ds_ybits, distance * ystepscale); - ds_yfrac = xs_ToFixed(32 - ds_ybits, distance * baseyfrac) + pviewy; - } - else - { - ds_ystep = 0; - ds_yfrac = 0; - } - - if (r_swtruecolor) - { - double distance2 = planeheight * yslope[(y + 1 < viewheight) ? y + 1 : y - 1]; - double xmagnitude = fabs(ystepscale * (distance2 - distance) * FocalLengthX); - double ymagnitude = fabs(xstepscale * (distance2 - distance) * FocalLengthX); - double magnitude = MAX(ymagnitude, xmagnitude); - double min_lod = -1000.0; - ds_lod = MAX(log2(magnitude) + r_lod_bias, min_lod); - } - - if (plane_shade) - { - // Determine lighting based on the span's distance from the viewer. - R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); - } - - if (r_dynlights) - { - // Find row position in view space - float zspan = planeheight / (fabs(y + 0.5 - CenterY) / InvZtoScale); - dc_viewpos.X = (float)((x1 + 0.5 - CenterX) / CenterX * zspan); - dc_viewpos.Y = zspan; - dc_viewpos.Z = (float)((CenterY - y - 0.5) / InvZtoScale * zspan); - dc_viewpos_step.X = (float)(zspan / CenterX); - - static TriLight lightbuffer[64 * 1024]; - static int nextlightindex = 0; - - // Setup lights for column - dc_num_lights = 0; - dc_lights = lightbuffer + nextlightindex; - visplane_light *cur_node = ds_light_list; - while (cur_node && nextlightindex < 64 * 1024) - { - double lightX = cur_node->lightsource->X() - ViewPos.X; - double lightY = cur_node->lightsource->Y() - ViewPos.Y; - double lightZ = cur_node->lightsource->Z() - ViewPos.Z; - - float lx = (float)(lightX * ViewSin - lightY * ViewCos); - float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; - float lz = (float)lightZ - dc_viewpos.Z; - - // Precalculate the constant part of the dot here so the drawer doesn't have to. - float lconstant = ly * ly + lz * lz; - - // Include light only if it touches this row - float radius = cur_node->lightsource->GetRadius(); - if (radius * radius >= lconstant) - { - uint32_t red = cur_node->lightsource->GetRed(); - uint32_t green = cur_node->lightsource->GetGreen(); - uint32_t blue = cur_node->lightsource->GetBlue(); - - nextlightindex++; - auto &light = dc_lights[dc_num_lights++]; - light.x = lx; - light.y = lconstant; - light.radius = 256.0f / radius; - light.color = (red << 16) | (green << 8) | blue; - } - - cur_node = cur_node->next; - } - - if (nextlightindex == 64 * 1024) - nextlightindex = 0; - } - else - { - dc_num_lights = 0; - } - - ds_y = y; - ds_x1 = x1; - ds_x2 = x2; - - (R_Drawers()->*spanfunc)(); -} - -//========================================================================== -// -// R_MapTiltedPlane -// -//========================================================================== - -void R_MapTiltedPlane (int y, int x1) -{ - R_Drawers()->DrawTiltedSpan(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); -} - -//========================================================================== -// -// R_MapColoredPlane -// -//========================================================================== - -void R_MapColoredPlane(int y, int x1) -{ - R_Drawers()->DrawColoredSpan(y, x1, spanend[y]); -} - void R_DrawFogBoundarySection(int y, int y2, int x1) { for (; y < y2; ++y) @@ -790,7 +626,7 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske if (r_drawflat) { // [RH] no texture mapping ds_color += 4; - R_MapVisPlane (pl, R_MapColoredPlane); + R_DrawColoredPlane(pl); } else if (pl->picnum == skyflatnum) { // sky flat @@ -818,7 +654,6 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske double yscale = pl->xform.yScale * tex->Scale.Y; basecolormap = pl->colormap; - planeshade = LIGHT2SHADE(pl->lightlevel); if (r_drawflat || (!pl->height.isSlope() && !tilt)) { @@ -832,277 +667,6 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske NetUpdate (); } -//========================================================================== -// -// R_DrawNormalPlane -// -//========================================================================== - -void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) -{ - if (alpha <= 0) - { - return; - } - - double planeang = (pl->xform.Angle + pl->xform.baseAngle).Radians(); - double xstep, ystep, leftxfrac, leftyfrac, rightxfrac, rightyfrac; - double x; - - xscale = xs_ToFixed(32 - ds_xbits, _xscale); - yscale = xs_ToFixed(32 - ds_ybits, _yscale); - if (planeang != 0) - { - double cosine = cos(planeang), sine = sin(planeang); - pviewx = FLOAT2FIXED(pl->xform.xOffs + ViewPos.X * cosine - ViewPos.Y * sine); - pviewy = FLOAT2FIXED(pl->xform.yOffs - ViewPos.X * sine - ViewPos.Y * cosine); - } - else - { - pviewx = FLOAT2FIXED(pl->xform.xOffs + ViewPos.X); - pviewy = FLOAT2FIXED(pl->xform.yOffs - ViewPos.Y); - } - - pviewx = FixedMul (xscale, pviewx); - pviewy = FixedMul (yscale, pviewy); - - // left to right mapping - planeang += (ViewAngle - 90).Radians(); - - // Scale will be unit scale at FocalLengthX (normally SCREENWIDTH/2) distance - xstep = cos(planeang) / FocalLengthX; - ystep = -sin(planeang) / FocalLengthX; - - // [RH] flip for mirrors - if (MirrorFlags & RF_XFLIP) - { - xstep = -xstep; - ystep = -ystep; - } - - planeang += M_PI/2; - double cosine = cos(planeang), sine = -sin(planeang); - x = pl->right - centerx - 0.5; - rightxfrac = _xscale * (cosine + x * xstep); - rightyfrac = _yscale * (sine + x * ystep); - x = pl->left - centerx - 0.5; - leftxfrac = _xscale * (cosine + x * xstep); - leftyfrac = _yscale * (sine + x * ystep); - - basexfrac = rightxfrac; - baseyfrac = rightyfrac; - xstepscale = (rightxfrac - leftxfrac) / (pl->right - pl->left); - ystepscale = (rightyfrac - leftyfrac) / (pl->right - pl->left); - - planeheight = fabs(pl->height.Zat0() - ViewPos.Z); - - GlobVis = r_FloorVisibility / planeheight; - ds_light = 0; - if (fixedlightlev >= 0) - { - R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - plane_shade = false; - } - else if (fixedcolormap) - { - R_SetDSColorMapLight(fixedcolormap, 0, 0); - plane_shade = false; - } - else - { - plane_shade = true; - } - - if (spanfunc != &SWPixelFormatDrawers::FillSpan) - { - if (masked) - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedTranslucent; - dc_srcblend = Col2RGB8[alpha>>10]; - dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; - dc_srcalpha = alpha; - dc_destalpha = FRACUNIT; - } - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpanMasked; - } - } - else - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = &SWPixelFormatDrawers::DrawSpanTranslucent; - dc_srcblend = Col2RGB8[alpha>>10]; - dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpanAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; - dc_srcalpha = alpha; - dc_destalpha = FRACUNIT; - } - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpan; - } - } - } - R_MapVisPlane (pl, R_MapPlane); -} - -//========================================================================== -// -// R_DrawTiltedPlane -// -//========================================================================== - -void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) -{ - static const float ifloatpow2[16] = - { - // ifloatpow2[i] = 1 / (1 << i) - 64.f, 32.f, 16.f, 8.f, 4.f, 2.f, 1.f, 0.5f, - 0.25f, 0.125f, 0.0625f, 0.03125f, 0.015625f, 0.0078125f, - 0.00390625f, 0.001953125f - /*, 0.0009765625f, 0.00048828125f, 0.000244140625f, - 1.220703125e-4f, 6.103515625e-5, 3.0517578125e-5*/ - }; - double lxscale, lyscale; - double xscale, yscale; - FVector3 p, m, n; - double ang, planeang, cosine, sine; - double zeroheight; - - if (alpha <= 0) - { - return; - } - - lxscale = _xscale * ifloatpow2[ds_xbits]; - lyscale = _yscale * ifloatpow2[ds_ybits]; - xscale = 64.f / lxscale; - yscale = 64.f / lyscale; - zeroheight = pl->height.ZatPoint(ViewPos); - - pviewx = xs_ToFixed(32 - ds_xbits, pl->xform.xOffs * pl->xform.xScale); - pviewy = xs_ToFixed(32 - ds_ybits, pl->xform.yOffs * pl->xform.yScale); - planeang = (pl->xform.Angle + pl->xform.baseAngle).Radians(); - - // p is the texture origin in view space - // Don't add in the offsets at this stage, because doing so can result in - // errors if the flat is rotated. - ang = M_PI*3/2 - ViewAngle.Radians(); - cosine = cos(ang), sine = sin(ang); - p[0] = ViewPos.X * cosine - ViewPos.Y * sine; - p[2] = ViewPos.X * sine + ViewPos.Y * cosine; - p[1] = pl->height.ZatPoint(0.0, 0.0) - ViewPos.Z; - - // m is the v direction vector in view space - ang = ang - M_PI / 2 - planeang; - cosine = cos(ang), sine = sin(ang); - m[0] = yscale * cosine; - m[2] = yscale * sine; -// m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); -// VectorScale2 (m, 64.f/VectorLength(m)); - - // n is the u direction vector in view space -#if 0 - //let's use the sin/cosine we already know instead of computing new ones - ang += M_PI/2 - n[0] = -xscale * cos(ang); - n[2] = -xscale * sin(ang); -#else - n[0] = xscale * sine; - n[2] = -xscale * cosine; -#endif -// n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); -// VectorScale2 (n, 64.f/VectorLength(n)); - - // This code keeps the texture coordinates constant across the x,y plane no matter - // how much you slope the surface. Use the commented-out code above instead to keep - // the textures a constant size across the surface's plane instead. - cosine = cos(planeang), sine = sin(planeang); - m[1] = pl->height.ZatPoint(ViewPos.X + yscale * sine, ViewPos.Y + yscale * cosine) - zeroheight; - n[1] = -(pl->height.ZatPoint(ViewPos.X - xscale * cosine, ViewPos.Y + xscale * sine) - zeroheight); - - plane_su = p ^ m; - plane_sv = p ^ n; - plane_sz = m ^ n; - - plane_su.Z *= FocalLengthX; - plane_sv.Z *= FocalLengthX; - plane_sz.Z *= FocalLengthX; - - plane_su.Y *= IYaspectMul; - plane_sv.Y *= IYaspectMul; - plane_sz.Y *= IYaspectMul; - - // Premultiply the texture vectors with the scale factors - plane_su *= 4294967296.f; - plane_sv *= 4294967296.f; - - if (MirrorFlags & RF_XFLIP) - { - plane_su[0] = -plane_su[0]; - plane_sv[0] = -plane_sv[0]; - plane_sz[0] = -plane_sz[0]; - } - - planelightfloat = (r_TiltVisibility * lxscale * lyscale) / (fabs(pl->height.ZatPoint(ViewPos) - ViewPos.Z)) / 65536.f; - - if (pl->height.fC() > 0) - planelightfloat = -planelightfloat; - - if (fixedlightlev >= 0) - { - R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - plane_shade = false; - } - else if (fixedcolormap) - { - R_SetDSColorMapLight(fixedcolormap, 0, 0); - plane_shade = false; - } - else - { - R_SetDSColorMapLight(basecolormap, 0, 0); - plane_shade = true; - } - - // Hack in support for 1 x Z and Z x 1 texture sizes - if (ds_ybits == 0) - { - plane_sv[2] = plane_sv[1] = plane_sv[0] = 0; - } - if (ds_xbits == 0) - { - plane_su[2] = plane_su[1] = plane_su[0] = 0; - } - - R_MapVisPlane (pl, R_MapTiltedPlane); -} - //========================================================================== // // R_MapVisPlane @@ -1113,7 +677,7 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t // //========================================================================== -void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)) +void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1, int x2), void(*stepfunc)()) { int x = pl->right - 1; int t2 = pl->top[x]; @@ -1137,12 +701,16 @@ void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)) stop = MIN (t1, b2); while (t2 < stop) { - mapfunc (t2++, xr); + int y = t2++; + int x2 = spanend[y]; + mapfunc (y, xr, x2); } stop = MAX (b1, t2); while (b2 > stop) { - mapfunc (--b2, xr); + int y = --b2; + int x2 = spanend[y]; + mapfunc (y, xr, x2); } // Mark any spans that have just opened @@ -1159,13 +727,16 @@ void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)) t2 = pl->top[x]; b2 = pl->bottom[x]; - basexfrac -= xstepscale; - baseyfrac -= ystepscale; + + if (stepfunc) + stepfunc(); } // Draw any spans that are still open while (t2 < b2) { - mapfunc (--b2, pl->left); + int y = --b2; + int x2 = spanend[y]; + mapfunc (y, pl->left, x2); } ds_light_list = nullptr; diff --git a/src/swrenderer/scene/r_plane.h b/src/swrenderer/scene/r_plane.h index 6a1d386b89..ad9e59b9b5 100644 --- a/src/swrenderer/scene/r_plane.h +++ b/src/swrenderer/scene/r_plane.h @@ -36,20 +36,13 @@ extern planefunction_t ceilingfunc_t; extern short floorclip[MAXWIDTH]; extern short ceilingclip[MAXWIDTH]; -extern float yslope[MAXHEIGHT]; - void R_ClearPlanes (bool fullclear); void R_AddPlaneLights(visplane_t *plane, FLightNode *light_head); int R_DrawPlanes (); void R_DrawSinglePlane(visplane_t *pl, fixed_t alpha, bool additive, bool masked); -void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); -void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); -void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); - -void R_MapTiltedPlane(int y, int x1); -void R_MapColoredPlane(int y, int x1); +void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1, int x2), void (*stepfunc)()); void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); diff --git a/src/swrenderer/scene/r_slopeplane.cpp b/src/swrenderer/scene/r_slopeplane.cpp new file mode 100644 index 0000000000..4810fd3d41 --- /dev/null +++ b/src/swrenderer/scene/r_slopeplane.cpp @@ -0,0 +1,183 @@ + +#include +#include +#include "templates.h" +#include "i_system.h" +#include "w_wad.h" +#include "doomdef.h" +#include "doomstat.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" +#include "r_sky.h" +#include "stats.h" +#include "v_video.h" +#include "a_sharedglobal.h" +#include "c_console.h" +#include "cmdlib.h" +#include "d_net.h" +#include "g_level.h" +#include "r_bsp.h" +#include "r_slopeplane.h" +#include "r_segs.h" +#include "r_3dfloors.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "gl/dynlights/gl_dynlight.h" +#include "r_walldraw.h" +#include "r_clip_segment.h" +#include "r_draw_segment.h" +#include "r_portal.h" +#include "r_plane.h" +#include "swrenderer/r_memory.h" + +#ifdef _MSC_VER +#pragma warning(disable:4244) +#endif + +namespace swrenderer +{ + namespace + { + FVector3 plane_sz, plane_su, plane_sv; + float planelightfloat; + bool plane_shade; + int planeshade; + fixed_t pviewx, pviewy; + fixed_t xscale, yscale; + } + + void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) + { + using namespace drawerargs; + + static const float ifloatpow2[16] = + { + // ifloatpow2[i] = 1 / (1 << i) + 64.f, 32.f, 16.f, 8.f, 4.f, 2.f, 1.f, 0.5f, + 0.25f, 0.125f, 0.0625f, 0.03125f, 0.015625f, 0.0078125f, + 0.00390625f, 0.001953125f + /*, 0.0009765625f, 0.00048828125f, 0.000244140625f, + 1.220703125e-4f, 6.103515625e-5, 3.0517578125e-5*/ + }; + double lxscale, lyscale; + double xscale, yscale; + FVector3 p, m, n; + double ang, planeang, cosine, sine; + double zeroheight; + + if (alpha <= 0) + { + return; + } + + lxscale = _xscale * ifloatpow2[ds_xbits]; + lyscale = _yscale * ifloatpow2[ds_ybits]; + xscale = 64.f / lxscale; + yscale = 64.f / lyscale; + zeroheight = pl->height.ZatPoint(ViewPos); + + pviewx = xs_ToFixed(32 - ds_xbits, pl->xform.xOffs * pl->xform.xScale); + pviewy = xs_ToFixed(32 - ds_ybits, pl->xform.yOffs * pl->xform.yScale); + planeang = (pl->xform.Angle + pl->xform.baseAngle).Radians(); + + // p is the texture origin in view space + // Don't add in the offsets at this stage, because doing so can result in + // errors if the flat is rotated. + ang = M_PI * 3 / 2 - ViewAngle.Radians(); + cosine = cos(ang), sine = sin(ang); + p[0] = ViewPos.X * cosine - ViewPos.Y * sine; + p[2] = ViewPos.X * sine + ViewPos.Y * cosine; + p[1] = pl->height.ZatPoint(0.0, 0.0) - ViewPos.Z; + + // m is the v direction vector in view space + ang = ang - M_PI / 2 - planeang; + cosine = cos(ang), sine = sin(ang); + m[0] = yscale * cosine; + m[2] = yscale * sine; + // m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); + // VectorScale2 (m, 64.f/VectorLength(m)); + + // n is the u direction vector in view space +#if 0 + //let's use the sin/cosine we already know instead of computing new ones + ang += M_PI / 2 + n[0] = -xscale * cos(ang); + n[2] = -xscale * sin(ang); +#else + n[0] = xscale * sine; + n[2] = -xscale * cosine; +#endif + // n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); + // VectorScale2 (n, 64.f/VectorLength(n)); + + // This code keeps the texture coordinates constant across the x,y plane no matter + // how much you slope the surface. Use the commented-out code above instead to keep + // the textures a constant size across the surface's plane instead. + cosine = cos(planeang), sine = sin(planeang); + m[1] = pl->height.ZatPoint(ViewPos.X + yscale * sine, ViewPos.Y + yscale * cosine) - zeroheight; + n[1] = -(pl->height.ZatPoint(ViewPos.X - xscale * cosine, ViewPos.Y + xscale * sine) - zeroheight); + + plane_su = p ^ m; + plane_sv = p ^ n; + plane_sz = m ^ n; + + plane_su.Z *= FocalLengthX; + plane_sv.Z *= FocalLengthX; + plane_sz.Z *= FocalLengthX; + + plane_su.Y *= IYaspectMul; + plane_sv.Y *= IYaspectMul; + plane_sz.Y *= IYaspectMul; + + // Premultiply the texture vectors with the scale factors + plane_su *= 4294967296.f; + plane_sv *= 4294967296.f; + + if (MirrorFlags & RF_XFLIP) + { + plane_su[0] = -plane_su[0]; + plane_sv[0] = -plane_sv[0]; + plane_sz[0] = -plane_sz[0]; + } + + planelightfloat = (r_TiltVisibility * lxscale * lyscale) / (fabs(pl->height.ZatPoint(ViewPos) - ViewPos.Z)) / 65536.f; + + if (pl->height.fC() > 0) + planelightfloat = -planelightfloat; + + if (fixedlightlev >= 0) + { + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + plane_shade = false; + } + else if (fixedcolormap) + { + R_SetDSColorMapLight(fixedcolormap, 0, 0); + plane_shade = false; + } + else + { + R_SetDSColorMapLight(basecolormap, 0, 0); + plane_shade = true; + planeshade = LIGHT2SHADE(pl->lightlevel); + } + + // Hack in support for 1 x Z and Z x 1 texture sizes + if (ds_ybits == 0) + { + plane_sv[2] = plane_sv[1] = plane_sv[0] = 0; + } + if (ds_xbits == 0) + { + plane_su[2] = plane_su[1] = plane_su[0] = 0; + } + + R_MapVisPlane(pl, R_MapTiltedPlane, nullptr); + } + + void R_MapTiltedPlane(int y, int x1, int x2) + { + R_Drawers()->DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + } +} diff --git a/src/swrenderer/scene/r_slopeplane.h b/src/swrenderer/scene/r_slopeplane.h new file mode 100644 index 0000000000..11c4b9ac77 --- /dev/null +++ b/src/swrenderer/scene/r_slopeplane.h @@ -0,0 +1,10 @@ + +#pragma once + +#include "r_visible_plane.h" + +namespace swrenderer +{ + void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked); + void R_MapTiltedPlane(int y, int x1, int x2); +} diff --git a/src/v_draw.cpp b/src/v_draw.cpp index bd40c80ed7..5e5722030c 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -82,9 +82,6 @@ int CleanWidth, CleanHeight; // Above minus 1 (or 1, if they are already 1) int CleanXfac_1, CleanYfac_1, CleanWidth_1, CleanHeight_1; -// FillSimplePoly uses this -extern "C" short spanend[MAXHEIGHT]; - CVAR (Bool, hud_scale, true, CVAR_ARCHIVE); // For routines that take RGB colors, cache the previous lookup in case there @@ -1406,6 +1403,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, } // Travel down the right edge and create an outline of that edge. + static short spanend[MAXHEIGHT]; pt1 = toppt; pt2 = toppt + 1; if (pt2 > npoints) pt2 = 0; y1 = xs_RoundToInt(points[pt1].Y + 0.5f); From 2f96dcc1a80011024db1cdba7749bd949e7c1c05 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 31 Dec 2016 12:04:23 +0100 Subject: [PATCH 607/912] Move fog boundary drawing to r_fogboundary --- src/CMakeLists.txt | 1 + src/swrenderer/scene/r_fogboundary.cpp | 146 +++++++++++++++++++++++++ src/swrenderer/scene/r_fogboundary.h | 10 ++ src/swrenderer/scene/r_plane.cpp | 100 ----------------- src/swrenderer/scene/r_plane.h | 2 - src/swrenderer/scene/r_segs.cpp | 1 + 6 files changed, 158 insertions(+), 102 deletions(-) create mode 100644 src/swrenderer/scene/r_fogboundary.cpp create mode 100644 src/swrenderer/scene/r_fogboundary.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f59bc0fce3..8c1d559ded 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -834,6 +834,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_skyplane.cpp swrenderer/scene/r_flatplane.cpp swrenderer/scene/r_slopeplane.cpp + swrenderer/scene/r_fogboundary.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp diff --git a/src/swrenderer/scene/r_fogboundary.cpp b/src/swrenderer/scene/r_fogboundary.cpp new file mode 100644 index 0000000000..b8c395aaff --- /dev/null +++ b/src/swrenderer/scene/r_fogboundary.cpp @@ -0,0 +1,146 @@ + +#include +#include +#include "templates.h" +#include "i_system.h" +#include "w_wad.h" +#include "doomdef.h" +#include "doomstat.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" +#include "r_sky.h" +#include "stats.h" +#include "v_video.h" +#include "a_sharedglobal.h" +#include "c_console.h" +#include "cmdlib.h" +#include "d_net.h" +#include "g_level.h" +#include "r_bsp.h" +#include "r_fogboundary.h" +#include "r_segs.h" +#include "r_3dfloors.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "gl/dynlights/gl_dynlight.h" +#include "r_walldraw.h" +#include "r_clip_segment.h" +#include "r_draw_segment.h" +#include "r_portal.h" +#include "r_plane.h" +#include "swrenderer/r_memory.h" + +#ifdef _MSC_VER +#pragma warning(disable:4244) +#endif + +namespace swrenderer +{ + namespace + { + short spanend[MAXHEIGHT]; + } + + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) + { + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + uint8_t *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + fillshort(spanend + t2, b2 - t2, x); + } + + R_SetColorMapLight(basecolormap, (float)light, wallshade); + + uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + fillshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + R_SetColorMapLight(basecolormap, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } + else + { + if (fake_dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + R_Drawers()->DrawFogBoundaryLine(y, xr, spanend[y]); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + R_Drawers()->DrawFogBoundaryLine(y, xr, spanend[y]); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection(t2, b2, x1); + } + } + + void R_DrawFogBoundarySection(int y, int y2, int x1) + { + for (; y < y2; ++y) + { + R_Drawers()->DrawFogBoundaryLine(y, x1, spanend[y]); + } + } +} diff --git a/src/swrenderer/scene/r_fogboundary.h b/src/swrenderer/scene/r_fogboundary.h new file mode 100644 index 0000000000..3885bb0da2 --- /dev/null +++ b/src/swrenderer/scene/r_fogboundary.h @@ -0,0 +1,10 @@ + +#pragma once + +#include "r_visible_plane.h" + +namespace swrenderer +{ + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); + void R_DrawFogBoundarySection(int y, int y2, int x1); +} diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index ae8c466951..fff52a62be 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -115,109 +115,9 @@ short spanend[MAXHEIGHT]; void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); -void R_DrawFogBoundarySection(int y, int y2, int x1) -{ - for (; y < y2; ++y) - { - R_Drawers()->DrawFogBoundaryLine(y, x1, spanend[y]); - } -} //========================================================================== -void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) -{ - // This is essentially the same as R_MapVisPlane but with an extra step - // to create new horizontal spans whenever the light changes enough that - // we need to use a new colormap. - - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2 - x1 - 1); - int x = x2 - 1; - int t2 = uclip[x]; - int b2 = dclip[x]; - int rcolormap = GETPALOOKUP(light, wallshade); - int lcolormap; - uint8_t *basecolormapdata = basecolormap->Maps; - - if (b2 > t2) - { - fillshort(spanend + t2, b2 - t2, x); - } - - R_SetColorMapLight(basecolormap, (float)light, wallshade); - - uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - for (--x; x >= x1; --x) - { - int t1 = uclip[x]; - int b1 = dclip[x]; - const int xr = x + 1; - int stop; - - light -= rw_lightstep; - lcolormap = GETPALOOKUP(light, wallshade); - if (lcolormap != rcolormap) - { - if (t2 < b2 && rcolormap != 0) - { // Colormap 0 is always the identity map, so rendering it is - // just a waste of time. - R_DrawFogBoundarySection(t2, b2, xr); - } - if (t1 < t2) t2 = t1; - if (b1 > b2) b2 = b1; - if (t2 < b2) - { - fillshort(spanend + t2, b2 - t2, x); - } - rcolormap = lcolormap; - R_SetColorMapLight(basecolormap, (float)light, wallshade); - fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - } - else - { - if (fake_dc_colormap != basecolormapdata) - { - stop = MIN(t1, b2); - while (t2 < stop) - { - int y = t2++; - R_Drawers()->DrawFogBoundaryLine(y, xr, spanend[y]); - } - stop = MAX(b1, t2); - while (b2 > stop) - { - int y = --b2; - R_Drawers()->DrawFogBoundaryLine(y, xr, spanend[y]); - } - } - else - { - t2 = MAX(t2, MIN(t1, b2)); - b2 = MIN(b2, MAX(b1, t2)); - } - - stop = MIN(t2, b1); - while (t1 < stop) - { - spanend[t1++] = x; - } - stop = MAX(b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; - } - } - - t2 = uclip[x]; - b2 = dclip[x]; - } - if (t2 < b2 && rcolormap != 0) - { - R_DrawFogBoundarySection(t2, b2, x1); - } -} //========================================================================== diff --git a/src/swrenderer/scene/r_plane.h b/src/swrenderer/scene/r_plane.h index ad9e59b9b5..1bc18ec458 100644 --- a/src/swrenderer/scene/r_plane.h +++ b/src/swrenderer/scene/r_plane.h @@ -44,8 +44,6 @@ int R_DrawPlanes (); void R_DrawSinglePlane(visplane_t *pl, fixed_t alpha, bool additive, bool masked); void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1, int x2), void (*stepfunc)()); -void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); - visplane_t *R_FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double alpha, bool additive, const FTransform &xform, int sky, FSectorPortal *portal); visplane_t *R_CheckPlane(visplane_t *pl, int start, int stop); diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 315411b936..e28c2803dc 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -43,6 +43,7 @@ #include "g_level.h" #include "r_bsp.h" #include "r_plane.h" +#include "r_fogboundary.h" #include "r_segs.h" #include "r_3dfloors.h" #include "swrenderer/drawers/r_draw.h" From ad15d56a814e636cadb117d5606576fc3af68d3f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 31 Dec 2016 12:45:07 +0100 Subject: [PATCH 608/912] Move files into additional folders --- src/CMakeLists.txt | 34 +++++++++++-------- .../{scene => plane}/r_flatplane.cpp | 16 ++++----- src/swrenderer/{scene => plane}/r_flatplane.h | 2 +- .../{scene => plane}/r_fogboundary.cpp | 18 +++++----- .../{scene => plane}/r_fogboundary.h | 2 +- .../{scene => plane}/r_skyplane.cpp | 14 ++++---- src/swrenderer/{scene => plane}/r_skyplane.h | 2 +- .../{scene => plane}/r_slopeplane.cpp | 16 ++++----- .../{scene => plane}/r_slopeplane.h | 2 +- .../r_visibleplane.cpp} | 4 +-- .../r_visibleplane.h} | 0 src/swrenderer/r_main.cpp | 8 ++--- src/swrenderer/r_swrenderer.cpp | 2 +- src/swrenderer/scene/r_bsp.cpp | 4 +-- src/swrenderer/scene/r_plane.cpp | 10 +++--- src/swrenderer/scene/r_plane.h | 2 +- src/swrenderer/scene/r_portal.cpp | 4 +-- src/swrenderer/scene/r_portal.h | 2 +- src/swrenderer/scene/r_segs.cpp | 8 ++--- src/swrenderer/scene/r_things.cpp | 10 +++--- src/swrenderer/scene/r_things.h | 2 +- src/swrenderer/scene/r_walldraw.cpp | 2 +- .../r_clipsegment.cpp} | 12 +++---- .../r_clipsegment.h} | 0 .../r_drawsegment.cpp} | 12 +++---- .../r_drawsegment.h} | 2 +- .../r_portalsegment.cpp} | 12 +++---- .../r_portalsegment.h} | 0 src/swrenderer/{scene => things}/r_decal.cpp | 15 ++++---- src/swrenderer/{scene => things}/r_decal.h | 0 .../{scene => things}/r_particle.cpp | 14 ++++---- src/swrenderer/{scene => things}/r_particle.h | 2 +- .../{scene => things}/r_playersprite.cpp | 14 ++++---- .../{scene => things}/r_playersprite.h | 2 +- .../r_visiblesprite.cpp} | 2 +- .../r_visiblesprite.h} | 2 +- src/swrenderer/{scene => things}/r_voxel.cpp | 2 +- src/swrenderer/{scene => things}/r_voxel.h | 0 .../{scene => things}/r_wallsprite.cpp | 14 ++++---- .../{scene => things}/r_wallsprite.h | 2 +- 40 files changed, 138 insertions(+), 133 deletions(-) rename src/swrenderer/{scene => plane}/r_flatplane.cpp (96%) rename src/swrenderer/{scene => plane}/r_flatplane.h (92%) rename src/swrenderer/{scene => plane}/r_fogboundary.cpp (89%) rename src/swrenderer/{scene => plane}/r_fogboundary.h (85%) rename src/swrenderer/{scene => plane}/r_skyplane.cpp (97%) rename src/swrenderer/{scene => plane}/r_skyplane.h (94%) rename src/swrenderer/{scene => plane}/r_slopeplane.cpp (93%) rename src/swrenderer/{scene => plane}/r_slopeplane.h (87%) rename src/swrenderer/{scene/r_visible_plane.cpp => plane/r_visibleplane.cpp} (97%) rename src/swrenderer/{scene/r_visible_plane.h => plane/r_visibleplane.h} (100%) rename src/swrenderer/{scene/r_clip_segment.cpp => segments/r_clipsegment.cpp} (93%) rename src/swrenderer/{scene/r_clip_segment.h => segments/r_clipsegment.h} (100%) rename src/swrenderer/{scene/r_draw_segment.cpp => segments/r_drawsegment.cpp} (86%) rename src/swrenderer/{scene/r_draw_segment.h => segments/r_drawsegment.h} (97%) rename src/swrenderer/{scene/r_portal_segment.cpp => segments/r_portalsegment.cpp} (70%) rename src/swrenderer/{scene/r_portal_segment.h => segments/r_portalsegment.h} (100%) rename src/swrenderer/{scene => things}/r_decal.cpp (96%) rename src/swrenderer/{scene => things}/r_decal.h (100%) rename src/swrenderer/{scene => things}/r_particle.cpp (96%) rename src/swrenderer/{scene => things}/r_particle.h (88%) rename src/swrenderer/{scene => things}/r_playersprite.cpp (98%) rename src/swrenderer/{scene => things}/r_playersprite.h (88%) rename src/swrenderer/{scene/r_visible_sprite.cpp => things/r_visiblesprite.cpp} (96%) rename src/swrenderer/{scene/r_visible_sprite.h => things/r_visiblesprite.h} (98%) rename src/swrenderer/{scene => things}/r_voxel.cpp (99%) rename src/swrenderer/{scene => things}/r_voxel.h (100%) rename src/swrenderer/{scene => things}/r_wallsprite.cpp (95%) rename src/swrenderer/{scene => things}/r_wallsprite.h (89%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c1d559ded..3451b000b5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -768,6 +768,9 @@ file( GLOB HEADER_FILES swrenderer/*.h swrenderer/drawers/*.h swrenderer/scene/*.h + swrenderer/segments/*.h + swrenderer/plane/*.h + swrenderer/things/*.h polyrenderer/*.h polyrenderer/math/*.h polyrenderer/drawers/*.h @@ -819,22 +822,22 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_plane.cpp swrenderer/scene/r_segs.cpp swrenderer/scene/r_things.cpp - swrenderer/scene/r_voxel.cpp swrenderer/scene/r_walldraw.cpp - swrenderer/scene/r_clip_segment.cpp - swrenderer/scene/r_draw_segment.cpp - swrenderer/scene/r_portal_segment.cpp swrenderer/scene/r_portal.cpp - swrenderer/scene/r_visible_plane.cpp - swrenderer/scene/r_visible_sprite.cpp - swrenderer/scene/r_particle.cpp - swrenderer/scene/r_playersprite.cpp - swrenderer/scene/r_wallsprite.cpp - swrenderer/scene/r_decal.cpp - swrenderer/scene/r_skyplane.cpp - swrenderer/scene/r_flatplane.cpp - swrenderer/scene/r_slopeplane.cpp - swrenderer/scene/r_fogboundary.cpp + swrenderer/segments/r_clipsegment.cpp + swrenderer/segments/r_drawsegment.cpp + swrenderer/segments/r_portalsegment.cpp + swrenderer/things/r_visiblesprite.cpp + swrenderer/things/r_voxel.cpp + swrenderer/things/r_particle.cpp + swrenderer/things/r_playersprite.cpp + swrenderer/things/r_wallsprite.cpp + swrenderer/things/r_decal.cpp + swrenderer/plane/r_visibleplane.cpp + swrenderer/plane/r_skyplane.cpp + swrenderer/plane/r_flatplane.cpp + swrenderer/plane/r_slopeplane.cpp + swrenderer/plane/r_fogboundary.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp @@ -1471,6 +1474,9 @@ source_group("OpenGL Renderer\\Utilities" REGULAR_EXPRESSION "^${CMAKE_CURRENT_S source_group("Software Renderer" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/.+") source_group("Software Renderer\\Drawers" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/drawers/.+") source_group("Software Renderer\\Scene" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/scene/.+") +source_group("Software Renderer\\Segments" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/segments/.+") +source_group("Software Renderer\\Plane" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/plane/.+") +source_group("Software Renderer\\Things" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/things/.+") source_group("Poly Renderer" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/polyrenderer/.+") source_group("Poly Renderer\\Math" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/polyrenderer/math/.+") source_group("Poly Renderer\\Drawers" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/polyrenderer/drawers/.+") diff --git a/src/swrenderer/scene/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp similarity index 96% rename from src/swrenderer/scene/r_flatplane.cpp rename to src/swrenderer/plane/r_flatplane.cpp index fba9cd1623..51d7b88ccb 100644 --- a/src/swrenderer/scene/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -16,19 +16,19 @@ #include "cmdlib.h" #include "d_net.h" #include "g_level.h" -#include "r_bsp.h" +#include "swrenderer/scene/r_bsp.h" #include "r_flatplane.h" -#include "r_segs.h" -#include "r_3dfloors.h" +#include "swrenderer/scene/r_segs.h" +#include "swrenderer/scene/r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" -#include "r_walldraw.h" -#include "r_clip_segment.h" -#include "r_draw_segment.h" -#include "r_portal.h" -#include "r_plane.h" +#include "swrenderer/scene/r_walldraw.h" +#include "swrenderer/segments/r_clipsegment.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_plane.h" #include "swrenderer/r_memory.h" namespace swrenderer diff --git a/src/swrenderer/scene/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h similarity index 92% rename from src/swrenderer/scene/r_flatplane.h rename to src/swrenderer/plane/r_flatplane.h index d719c7153e..7164b140c3 100644 --- a/src/swrenderer/scene/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -1,7 +1,7 @@ #pragma once -#include "r_visible_plane.h" +#include "r_visibleplane.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_fogboundary.cpp b/src/swrenderer/plane/r_fogboundary.cpp similarity index 89% rename from src/swrenderer/scene/r_fogboundary.cpp rename to src/swrenderer/plane/r_fogboundary.cpp index b8c395aaff..5244b937f0 100644 --- a/src/swrenderer/scene/r_fogboundary.cpp +++ b/src/swrenderer/plane/r_fogboundary.cpp @@ -16,19 +16,19 @@ #include "cmdlib.h" #include "d_net.h" #include "g_level.h" -#include "r_bsp.h" -#include "r_fogboundary.h" -#include "r_segs.h" -#include "r_3dfloors.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/plane/r_fogboundary.h" +#include "swrenderer/scene/r_segs.h" +#include "swrenderer/scene/r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" -#include "r_walldraw.h" -#include "r_clip_segment.h" -#include "r_draw_segment.h" -#include "r_portal.h" -#include "r_plane.h" +#include "swrenderer/scene/r_walldraw.h" +#include "swrenderer/segments/r_clipsegment.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_plane.h" #include "swrenderer/r_memory.h" #ifdef _MSC_VER diff --git a/src/swrenderer/scene/r_fogboundary.h b/src/swrenderer/plane/r_fogboundary.h similarity index 85% rename from src/swrenderer/scene/r_fogboundary.h rename to src/swrenderer/plane/r_fogboundary.h index 3885bb0da2..0e9af3322a 100644 --- a/src/swrenderer/scene/r_fogboundary.h +++ b/src/swrenderer/plane/r_fogboundary.h @@ -1,7 +1,7 @@ #pragma once -#include "r_visible_plane.h" +#include "r_visibleplane.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp similarity index 97% rename from src/swrenderer/scene/r_skyplane.cpp rename to src/swrenderer/plane/r_skyplane.cpp index 759abe2d58..e85c3d6ef9 100644 --- a/src/swrenderer/scene/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -16,18 +16,18 @@ #include "cmdlib.h" #include "d_net.h" #include "g_level.h" -#include "r_bsp.h" +#include "swrenderer/scene/r_bsp.h" #include "r_skyplane.h" -#include "r_segs.h" -#include "r_3dfloors.h" +#include "swrenderer/scene/r_segs.h" +#include "swrenderer/scene/r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" -#include "r_walldraw.h" -#include "r_clip_segment.h" -#include "r_draw_segment.h" -#include "r_portal.h" +#include "swrenderer/scene/r_walldraw.h" +#include "swrenderer/segments/r_clipsegment.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_portal.h" #include "swrenderer/r_memory.h" CVAR(Bool, r_linearsky, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); diff --git a/src/swrenderer/scene/r_skyplane.h b/src/swrenderer/plane/r_skyplane.h similarity index 94% rename from src/swrenderer/scene/r_skyplane.h rename to src/swrenderer/plane/r_skyplane.h index b8df900789..0a7d55be46 100644 --- a/src/swrenderer/scene/r_skyplane.h +++ b/src/swrenderer/plane/r_skyplane.h @@ -1,7 +1,7 @@ #pragma once -#include "r_visible_plane.h" +#include "r_visibleplane.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp similarity index 93% rename from src/swrenderer/scene/r_slopeplane.cpp rename to src/swrenderer/plane/r_slopeplane.cpp index 4810fd3d41..b2dccead8f 100644 --- a/src/swrenderer/scene/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -16,19 +16,19 @@ #include "cmdlib.h" #include "d_net.h" #include "g_level.h" -#include "r_bsp.h" +#include "swrenderer/scene/r_bsp.h" #include "r_slopeplane.h" -#include "r_segs.h" -#include "r_3dfloors.h" +#include "swrenderer/scene/r_segs.h" +#include "swrenderer/scene/r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" -#include "r_walldraw.h" -#include "r_clip_segment.h" -#include "r_draw_segment.h" -#include "r_portal.h" -#include "r_plane.h" +#include "swrenderer/scene/r_walldraw.h" +#include "swrenderer/segments/r_clipsegment.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_plane.h" #include "swrenderer/r_memory.h" #ifdef _MSC_VER diff --git a/src/swrenderer/scene/r_slopeplane.h b/src/swrenderer/plane/r_slopeplane.h similarity index 87% rename from src/swrenderer/scene/r_slopeplane.h rename to src/swrenderer/plane/r_slopeplane.h index 11c4b9ac77..fc071b110c 100644 --- a/src/swrenderer/scene/r_slopeplane.h +++ b/src/swrenderer/plane/r_slopeplane.h @@ -1,7 +1,7 @@ #pragma once -#include "r_visible_plane.h" +#include "r_visibleplane.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_visible_plane.cpp b/src/swrenderer/plane/r_visibleplane.cpp similarity index 97% rename from src/swrenderer/scene/r_visible_plane.cpp rename to src/swrenderer/plane/r_visibleplane.cpp index 4a9e11ce95..79f7ea2e71 100644 --- a/src/swrenderer/scene/r_visible_plane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -17,8 +17,8 @@ #include "cmdlib.h" #include "d_net.h" #include "g_level.h" -#include "r_bsp.h" -#include "r_visible_plane.h" +#include "swrenderer/scene/r_bsp.h" +#include "r_visibleplane.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_visible_plane.h b/src/swrenderer/plane/r_visibleplane.h similarity index 100% rename from src/swrenderer/scene/r_visible_plane.h rename to src/swrenderer/plane/r_visibleplane.h diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 74bbbeabd9..5d672c2804 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -38,11 +38,11 @@ #include "scene/r_things.h" #include "drawers/r_draw.h" #include "scene/r_plane.h" -#include "scene/r_flatplane.h" +#include "plane/r_flatplane.h" #include "scene/r_bsp.h" -#include "scene/r_draw_segment.h" -#include "scene/r_portal_segment.h" -#include "scene/r_clip_segment.h" +#include "segments/r_drawsegment.h" +#include "segments/r_portalsegment.h" +#include "segments/r_clipsegment.h" #include "scene/r_segs.h" #include "scene/r_3dfloors.h" #include "scene/r_portal.h" diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index 76aa2cca4e..c368bf3d43 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -34,7 +34,7 @@ #include "r_main.h" -#include "swrenderer/scene/r_playersprite.h" +#include "swrenderer/things/r_playersprite.h" #include "v_palette.h" #include "v_video.h" #include "m_png.h" diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index df99066060..ba939390f4 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -36,9 +36,9 @@ #include "r_plane.h" #include "swrenderer/drawers/r_draw.h" #include "r_things.h" -#include "r_particle.h" +#include "swrenderer/things/r_particle.h" #include "r_3dfloors.h" -#include "r_clip_segment.h" +#include "swrenderer/segments/r_clipsegment.h" #include "r_portal.h" #include "a_sharedglobal.h" #include "g_level.h" diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index fff52a62be..8442532ceb 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -62,12 +62,12 @@ #include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" #include "r_walldraw.h" -#include "r_clip_segment.h" -#include "r_draw_segment.h" +#include "swrenderer/segments/r_clipsegment.h" +#include "swrenderer/segments/r_drawsegment.h" #include "r_portal.h" -#include "r_skyplane.h" -#include "r_flatplane.h" -#include "r_slopeplane.h" +#include "swrenderer/plane/r_skyplane.h" +#include "swrenderer/plane/r_flatplane.h" +#include "swrenderer/plane/r_slopeplane.h" #include "swrenderer/r_memory.h" #ifdef _MSC_VER diff --git a/src/swrenderer/scene/r_plane.h b/src/swrenderer/scene/r_plane.h index 1bc18ec458..e38d16af8a 100644 --- a/src/swrenderer/scene/r_plane.h +++ b/src/swrenderer/scene/r_plane.h @@ -23,7 +23,7 @@ #ifndef __R_PLANE_H__ #define __R_PLANE_H__ -#include "r_visible_plane.h" +#include "swrenderer/plane/r_visibleplane.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 638f99a846..d2897d6563 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -33,8 +33,8 @@ #include "swrenderer/drawers/r_draw_rgba.h" #include "r_utility.h" #include "r_plane.h" -#include "r_clip_segment.h" -#include "r_draw_segment.h" +#include "swrenderer/segments/r_clipsegment.h" +#include "swrenderer/segments/r_drawsegment.h" #include "r_things.h" #include "r_3dfloors.h" #include "swrenderer/r_main.h" diff --git a/src/swrenderer/scene/r_portal.h b/src/swrenderer/scene/r_portal.h index f13c760b33..32b6463ca3 100644 --- a/src/swrenderer/scene/r_portal.h +++ b/src/swrenderer/scene/r_portal.h @@ -1,7 +1,7 @@ #pragma once -#include "r_portal_segment.h" +#include "swrenderer/segments/r_portalsegment.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index e28c2803dc..5a8fb29826 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -43,17 +43,17 @@ #include "g_level.h" #include "r_bsp.h" #include "r_plane.h" -#include "r_fogboundary.h" +#include "swrenderer/plane/r_fogboundary.h" #include "r_segs.h" #include "r_3dfloors.h" #include "swrenderer/drawers/r_draw.h" #include "v_palette.h" #include "r_data/colormaps.h" #include "r_walldraw.h" -#include "r_draw_segment.h" +#include "swrenderer/segments/r_drawsegment.h" #include "r_portal.h" -#include "r_wallsprite.h" -#include "r_decal.h" +#include "swrenderer/things/r_wallsprite.h" +#include "swrenderer/things/r_decal.h" #include "swrenderer/r_memory.h" #define WALLYREPEAT 8 diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index b974722061..e11510f321 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -67,12 +67,12 @@ #include "r_data/voxels.h" #include "p_local.h" #include "p_maputl.h" -#include "r_voxel.h" -#include "r_draw_segment.h" +#include "swrenderer/things/r_voxel.h" +#include "swrenderer/segments/r_drawsegment.h" #include "r_portal.h" -#include "r_particle.h" -#include "r_playersprite.h" -#include "r_wallsprite.h" +#include "swrenderer/things/r_particle.h" +#include "swrenderer/things/r_playersprite.h" +#include "swrenderer/things/r_wallsprite.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Int, r_drawfuzz) diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index d3b3d5ebde..2354f7c0f4 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -23,7 +23,7 @@ #ifndef __R_THINGS__ #define __R_THINGS__ -#include "r_visible_sprite.h" +#include "swrenderer/things/r_visiblesprite.h" struct particle_t; struct FVoxel; diff --git a/src/swrenderer/scene/r_walldraw.cpp b/src/swrenderer/scene/r_walldraw.cpp index 83cbc96ca0..bc98b50d39 100644 --- a/src/swrenderer/scene/r_walldraw.cpp +++ b/src/swrenderer/scene/r_walldraw.cpp @@ -45,7 +45,7 @@ #include "gl/dynlights/gl_dynlight.h" #include "swrenderer/drawers/r_drawers.h" #include "r_walldraw.h" -#include "r_draw_segment.h" +#include "swrenderer/segments/r_drawsegment.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_clip_segment.cpp b/src/swrenderer/segments/r_clipsegment.cpp similarity index 93% rename from src/swrenderer/scene/r_clip_segment.cpp rename to src/swrenderer/segments/r_clipsegment.cpp index 4db069a495..83920a1373 100644 --- a/src/swrenderer/scene/r_clip_segment.cpp +++ b/src/swrenderer/segments/r_clipsegment.cpp @@ -7,22 +7,22 @@ #include "p_lnspec.h" #include "p_setup.h" #include "swrenderer/r_main.h" -#include "r_plane.h" +#include "swrenderer/scene/r_plane.h" #include "swrenderer/drawers/r_draw.h" -#include "r_things.h" -#include "r_3dfloors.h" +#include "swrenderer/scene/r_things.h" +#include "swrenderer/scene/r_3dfloors.h" #include "a_sharedglobal.h" #include "g_level.h" #include "p_effect.h" #include "doomstat.h" #include "r_state.h" -#include "r_bsp.h" -#include "r_segs.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_segs.h" #include "v_palette.h" #include "r_sky.h" #include "po_man.h" #include "r_data/colormaps.h" -#include "r_clip_segment.h" +#include "swrenderer/segments/r_clipsegment.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_clip_segment.h b/src/swrenderer/segments/r_clipsegment.h similarity index 100% rename from src/swrenderer/scene/r_clip_segment.h rename to src/swrenderer/segments/r_clipsegment.h diff --git a/src/swrenderer/scene/r_draw_segment.cpp b/src/swrenderer/segments/r_drawsegment.cpp similarity index 86% rename from src/swrenderer/scene/r_draw_segment.cpp rename to src/swrenderer/segments/r_drawsegment.cpp index 2200f0ea51..42af7d0320 100644 --- a/src/swrenderer/scene/r_draw_segment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -7,22 +7,22 @@ #include "p_lnspec.h" #include "p_setup.h" #include "swrenderer/r_main.h" -#include "r_plane.h" +#include "swrenderer/scene/r_plane.h" #include "swrenderer/drawers/r_draw.h" -#include "r_things.h" -#include "r_3dfloors.h" +#include "swrenderer/scene/r_things.h" +#include "swrenderer/scene/r_3dfloors.h" #include "a_sharedglobal.h" #include "g_level.h" #include "p_effect.h" #include "doomstat.h" #include "r_state.h" -#include "r_bsp.h" -#include "r_segs.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_segs.h" #include "v_palette.h" #include "r_sky.h" #include "po_man.h" #include "r_data/colormaps.h" -#include "r_draw_segment.h" +#include "swrenderer/segments/r_drawsegment.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_draw_segment.h b/src/swrenderer/segments/r_drawsegment.h similarity index 97% rename from src/swrenderer/scene/r_draw_segment.h rename to src/swrenderer/segments/r_drawsegment.h index df9ed80dd9..7174794a3a 100644 --- a/src/swrenderer/scene/r_draw_segment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -1,7 +1,7 @@ #pragma once -#include "r_bsp.h" +#include "swrenderer/scene/r_bsp.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_portal_segment.cpp b/src/swrenderer/segments/r_portalsegment.cpp similarity index 70% rename from src/swrenderer/scene/r_portal_segment.cpp rename to src/swrenderer/segments/r_portalsegment.cpp index e817576704..db31f5edfa 100644 --- a/src/swrenderer/scene/r_portal_segment.cpp +++ b/src/swrenderer/segments/r_portalsegment.cpp @@ -7,22 +7,22 @@ #include "p_lnspec.h" #include "p_setup.h" #include "swrenderer/r_main.h" -#include "r_plane.h" +#include "swrenderer/scene/r_plane.h" #include "swrenderer/drawers/r_draw.h" -#include "r_things.h" -#include "r_3dfloors.h" +#include "swrenderer/scene/r_things.h" +#include "swrenderer/scene/r_3dfloors.h" #include "a_sharedglobal.h" #include "g_level.h" #include "p_effect.h" #include "doomstat.h" #include "r_state.h" -#include "r_bsp.h" -#include "r_segs.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_segs.h" #include "v_palette.h" #include "r_sky.h" #include "po_man.h" #include "r_data/colormaps.h" -#include "r_portal_segment.h" +#include "swrenderer/segments/r_portalsegment.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_portal_segment.h b/src/swrenderer/segments/r_portalsegment.h similarity index 100% rename from src/swrenderer/scene/r_portal_segment.h rename to src/swrenderer/segments/r_portalsegment.h diff --git a/src/swrenderer/scene/r_decal.cpp b/src/swrenderer/things/r_decal.cpp similarity index 96% rename from src/swrenderer/scene/r_decal.cpp rename to src/swrenderer/things/r_decal.cpp index 4159595fd6..a49e90cfba 100644 --- a/src/swrenderer/scene/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -17,19 +17,18 @@ #include "a_sharedglobal.h" #include "d_net.h" #include "g_level.h" -#include "r_bsp.h" -#include "r_plane.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_plane.h" #include "r_decal.h" -#include "r_3dfloors.h" +#include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "r_walldraw.h" -#include "r_draw_segment.h" -#include "r_portal.h" +#include "swrenderer/scene/r_walldraw.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_portal.h" #include "r_wallsprite.h" -#include "r_draw_segment.h" -#include "r_segs.h" +#include "swrenderer/scene/r_segs.h" #include "swrenderer/r_memory.h" namespace swrenderer diff --git a/src/swrenderer/scene/r_decal.h b/src/swrenderer/things/r_decal.h similarity index 100% rename from src/swrenderer/scene/r_decal.h rename to src/swrenderer/things/r_decal.h diff --git a/src/swrenderer/scene/r_particle.cpp b/src/swrenderer/things/r_particle.cpp similarity index 96% rename from src/swrenderer/scene/r_particle.cpp rename to src/swrenderer/things/r_particle.cpp index 4197c6db29..d8a32d9ade 100644 --- a/src/swrenderer/scene/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -10,7 +10,7 @@ #include "w_wad.h" #include "swrenderer/r_main.h" #include "swrenderer/scene/r_things.h" -#include "swrenderer/scene/r_particle.h" +#include "swrenderer/things/r_particle.h" #include "c_console.h" #include "c_cvars.h" #include "c_dispatch.h" @@ -27,10 +27,10 @@ #include "colormatcher.h" #include "d_netinf.h" #include "p_effect.h" -#include "r_bsp.h" -#include "r_plane.h" -#include "r_segs.h" -#include "r_3dfloors.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_plane.h" +#include "swrenderer/scene/r_segs.h" +#include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" #include "v_palette.h" @@ -40,8 +40,8 @@ #include "p_local.h" #include "p_maputl.h" #include "r_voxel.h" -#include "r_draw_segment.h" -#include "r_portal.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_portal.h" #include "swrenderer/r_memory.h" namespace swrenderer diff --git a/src/swrenderer/scene/r_particle.h b/src/swrenderer/things/r_particle.h similarity index 88% rename from src/swrenderer/scene/r_particle.h rename to src/swrenderer/things/r_particle.h index 9184840d88..01db4dfa00 100644 --- a/src/swrenderer/scene/r_particle.h +++ b/src/swrenderer/things/r_particle.h @@ -1,7 +1,7 @@ #pragma once -#include "r_visible_sprite.h" +#include "r_visiblesprite.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp similarity index 98% rename from src/swrenderer/scene/r_playersprite.cpp rename to src/swrenderer/things/r_playersprite.cpp index ac9137c9eb..13d78495f1 100644 --- a/src/swrenderer/scene/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -10,7 +10,7 @@ #include "w_wad.h" #include "swrenderer/r_main.h" #include "swrenderer/scene/r_things.h" -#include "swrenderer/scene/r_playersprite.h" +#include "swrenderer/things/r_playersprite.h" #include "c_console.h" #include "c_cvars.h" #include "c_dispatch.h" @@ -27,10 +27,10 @@ #include "colormatcher.h" #include "d_netinf.h" #include "p_effect.h" -#include "r_bsp.h" -#include "r_plane.h" -#include "r_segs.h" -#include "r_3dfloors.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_plane.h" +#include "swrenderer/scene/r_segs.h" +#include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" #include "v_palette.h" @@ -40,8 +40,8 @@ #include "p_local.h" #include "p_maputl.h" #include "r_voxel.h" -#include "r_draw_segment.h" -#include "r_portal.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_portal.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Bool, st_scale) diff --git a/src/swrenderer/scene/r_playersprite.h b/src/swrenderer/things/r_playersprite.h similarity index 88% rename from src/swrenderer/scene/r_playersprite.h rename to src/swrenderer/things/r_playersprite.h index 0fff067965..21133532ba 100644 --- a/src/swrenderer/scene/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -1,7 +1,7 @@ #pragma once -#include "r_visible_sprite.h" +#include "r_visiblesprite.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_visible_sprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp similarity index 96% rename from src/swrenderer/scene/r_visible_sprite.cpp rename to src/swrenderer/things/r_visiblesprite.cpp index f3cd357913..3236e93374 100644 --- a/src/swrenderer/scene/r_visible_sprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -9,7 +9,7 @@ #include "i_system.h" #include "w_wad.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_visible_sprite.h" +#include "swrenderer/things/r_visiblesprite.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_visible_sprite.h b/src/swrenderer/things/r_visiblesprite.h similarity index 98% rename from src/swrenderer/scene/r_visible_sprite.h rename to src/swrenderer/things/r_visiblesprite.h index 426840fc35..01f7ab3af2 100644 --- a/src/swrenderer/scene/r_visible_sprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -1,7 +1,7 @@ #pragma once -#include "r_bsp.h" +#include "swrenderer/scene/r_bsp.h" struct particle_t; struct FVoxel; diff --git a/src/swrenderer/scene/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp similarity index 99% rename from src/swrenderer/scene/r_voxel.cpp rename to src/swrenderer/things/r_voxel.cpp index 22773499d5..f0d29f4888 100644 --- a/src/swrenderer/scene/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -30,7 +30,7 @@ #include "r_data/sprites.h" #include "d_net.h" #include "po_man.h" -#include "r_things.h" +#include "swrenderer/scene/r_things.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/drawers/r_thread.h" #include "r_utility.h" diff --git a/src/swrenderer/scene/r_voxel.h b/src/swrenderer/things/r_voxel.h similarity index 100% rename from src/swrenderer/scene/r_voxel.h rename to src/swrenderer/things/r_voxel.h diff --git a/src/swrenderer/scene/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp similarity index 95% rename from src/swrenderer/scene/r_wallsprite.cpp rename to src/swrenderer/things/r_wallsprite.cpp index e4718bdbe6..c5fb8125c1 100644 --- a/src/swrenderer/scene/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -10,7 +10,7 @@ #include "w_wad.h" #include "swrenderer/r_main.h" #include "swrenderer/scene/r_things.h" -#include "swrenderer/scene/r_wallsprite.h" +#include "swrenderer/things/r_wallsprite.h" #include "c_console.h" #include "c_cvars.h" #include "c_dispatch.h" @@ -27,10 +27,10 @@ #include "colormatcher.h" #include "d_netinf.h" #include "p_effect.h" -#include "r_bsp.h" -#include "r_plane.h" -#include "r_segs.h" -#include "r_3dfloors.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_plane.h" +#include "swrenderer/scene/r_segs.h" +#include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" #include "v_palette.h" @@ -40,8 +40,8 @@ #include "p_local.h" #include "p_maputl.h" #include "r_voxel.h" -#include "r_draw_segment.h" -#include "r_portal.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_portal.h" #include "swrenderer/r_memory.h" namespace swrenderer diff --git a/src/swrenderer/scene/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h similarity index 89% rename from src/swrenderer/scene/r_wallsprite.h rename to src/swrenderer/things/r_wallsprite.h index 934c7d8eb9..3a731e5d0b 100644 --- a/src/swrenderer/scene/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -1,7 +1,7 @@ #pragma once -#include "r_visible_sprite.h" +#include "r_visiblesprite.h" namespace swrenderer { From 6fd3691da460c3f2e12c89d6c4eb1802a46e6d60 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 31 Dec 2016 12:50:57 +0100 Subject: [PATCH 609/912] Remove unused declarations --- src/swrenderer/scene/r_plane.cpp | 5 ----- src/swrenderer/scene/r_plane.h | 5 ----- 2 files changed, 10 deletions(-) diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index 8442532ceb..cbfd85a92e 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -80,13 +80,8 @@ namespace swrenderer { using namespace drawerargs; -extern int wallshade; - extern subsector_t *InSubsector; -planefunction_t floorfunc; -planefunction_t ceilingfunc; - visplane_t *floorplane; visplane_t *ceilingplane; diff --git a/src/swrenderer/scene/r_plane.h b/src/swrenderer/scene/r_plane.h index e38d16af8a..09f039e55d 100644 --- a/src/swrenderer/scene/r_plane.h +++ b/src/swrenderer/scene/r_plane.h @@ -28,11 +28,6 @@ namespace swrenderer { -typedef void (*planefunction_t) (int top, int bottom); - -extern planefunction_t floorfunc; -extern planefunction_t ceilingfunc_t; - extern short floorclip[MAXWIDTH]; extern short ceilingclip[MAXWIDTH]; From 918904074ae4ff90711b7c603f34bca201041caa Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 31 Dec 2016 12:57:48 +0100 Subject: [PATCH 610/912] Remove wallshade global from R_DrawFogBoundary --- src/swrenderer/plane/r_fogboundary.cpp | 2 +- src/swrenderer/plane/r_fogboundary.h | 2 +- src/swrenderer/scene/r_segs.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/swrenderer/plane/r_fogboundary.cpp b/src/swrenderer/plane/r_fogboundary.cpp index 5244b937f0..3dff9fa600 100644 --- a/src/swrenderer/plane/r_fogboundary.cpp +++ b/src/swrenderer/plane/r_fogboundary.cpp @@ -42,7 +42,7 @@ namespace swrenderer short spanend[MAXHEIGHT]; } - void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip, int wallshade) { // This is essentially the same as R_MapVisPlane but with an extra step // to create new horizontal spans whenever the light changes enough that diff --git a/src/swrenderer/plane/r_fogboundary.h b/src/swrenderer/plane/r_fogboundary.h index 0e9af3322a..f334e916b7 100644 --- a/src/swrenderer/plane/r_fogboundary.h +++ b/src/swrenderer/plane/r_fogboundary.h @@ -5,6 +5,6 @@ namespace swrenderer { - void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip, int wallshade); void R_DrawFogBoundarySection(int y, int y2, int x1); } diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 5a8fb29826..0ebfaa0843 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -272,7 +272,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) // [RH] Draw fog partition if (ds->bFogBoundary) { - R_DrawFogBoundary (x1, x2, mceilingclip, mfloorclip); + R_DrawFogBoundary (x1, x2, mceilingclip, mfloorclip, wallshade); if (ds->maskedtexturecol == -1) { goto clearfog; From ce864655e3f813f712b545c211863bfe7f9b9852 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 31 Dec 2016 12:59:43 +0100 Subject: [PATCH 611/912] Remove wallshade global from R_RenderDecals --- src/swrenderer/scene/r_segs.cpp | 2 +- src/swrenderer/things/r_decal.cpp | 6 +++--- src/swrenderer/things/r_decal.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 0ebfaa0843..30f4e90337 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -1811,7 +1811,7 @@ bool R_StoreWallRange (int start, int stop) // [ZZ] Only if not an active mirror if (!rw_markportal) { - R_RenderDecals(curline->sidedef, draw_segment); + R_RenderDecals(curline->sidedef, draw_segment, wallshade); } if (rw_markportal) diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index a49e90cfba..857e3bc36b 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -33,11 +33,11 @@ namespace swrenderer { - void R_RenderDecals(side_t *sidedef, drawseg_t *draw_segment) + void R_RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade) { for (DBaseDecal *decal = sidedef->AttachedDecals; decal != NULL; decal = decal->WallNext) { - R_RenderDecal(sidedef, decal, draw_segment, 0); + R_RenderDecal(sidedef, decal, draw_segment, wallshade, 0); } } @@ -45,7 +45,7 @@ namespace swrenderer // = 1: drawing masked textures (including sprites) // Currently, only pass = 0 is done or used - void R_RenderDecal(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int pass) + void R_RenderDecal(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int wallshade, int pass) { DVector2 decal_left, decal_right, decal_pos; int x1, x2; diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 8eeadb77ea..a006e2fee6 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -8,6 +8,6 @@ namespace swrenderer { struct drawseg_t; - void R_RenderDecals(side_t *wall, drawseg_t *draw_segment); - void R_RenderDecal(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int pass); + void R_RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade); + void R_RenderDecal(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, int pass); } From 165134f1a78a8b9556a437d0f21b9093b2d19b69 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 31 Dec 2016 13:12:09 +0100 Subject: [PATCH 612/912] Make wallshade private to r_segs --- src/swrenderer/plane/r_skyplane.cpp | 5 +-- src/swrenderer/scene/r_segs.cpp | 17 ++++---- src/swrenderer/scene/r_segs.h | 1 - src/swrenderer/scene/r_walldraw.cpp | 66 ++++++++++++++--------------- src/swrenderer/scene/r_walldraw.h | 5 ++- 5 files changed, 45 insertions(+), 49 deletions(-) diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index e85c3d6ef9..5ad928ffaf 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -455,7 +455,7 @@ namespace swrenderer lastskycol_bgra[x] = 0xffffffff; } R_DrawSkySegment(pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, - frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + frontyScale, 0, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); } else { // The texture does not tile nicely @@ -492,8 +492,7 @@ namespace swrenderer lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - R_DrawSkySegment(pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, - backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + R_DrawSkySegment(pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, 0, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); yl = yh; yh += drawheight; dc_texturemid = iscale * (centery - yl - 1); diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 30f4e90337..41c34f6823 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -68,7 +68,10 @@ namespace swrenderer { using namespace drawerargs; - void R_DrawDrawSeg(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); + namespace + { + int wallshade; + } #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) @@ -91,8 +94,6 @@ fixed_t rw_offset_mid; fixed_t rw_offset_bottom; -int wallshade; - short walltop[MAXWIDTH]; // [RH] record max extents of wall short wallbottom[MAXWIDTH]; short wallupper[MAXWIDTH]; @@ -475,7 +476,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_offset = 0; rw_pic = tex; - R_DrawDrawSeg(ds, x1, x2, mceilingclip, mfloorclip, MaskedSWall, maskedtexturecol, ds->yscale); + R_DrawDrawSeg(ds, x1, x2, mceilingclip, mfloorclip, MaskedSWall, maskedtexturecol, ds->yscale, wallshade); } clearfog: @@ -602,7 +603,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) } PrepLWall (lwall, curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2); - R_DrawDrawSeg(ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale); + R_DrawDrawSeg(ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale, wallshade); R_FinishSetPatchStyle(); } @@ -1131,7 +1132,7 @@ void R_RenderSegLoop () { rw_offset = -rw_offset; } - R_DrawWallSegment(rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, light_list); + R_DrawWallSegment(rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, light_list); } fillshort (ceilingclip+x1, x2-x1, viewheight); fillshort (floorclip+x1, x2-x1, 0xffff); @@ -1167,7 +1168,7 @@ void R_RenderSegLoop () { rw_offset = -rw_offset; } - R_DrawWallSegment(rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, light_list); + R_DrawWallSegment(rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, light_list); } memcpy (ceilingclip+x1, wallupper+x1, (x2-x1)*sizeof(short)); } @@ -1206,7 +1207,7 @@ void R_RenderSegLoop () { rw_offset = -rw_offset; } - R_DrawWallSegment(rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, light_list); + R_DrawWallSegment(rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, light_list); } memcpy (floorclip+x1, walllower+x1, (x2-x1)*sizeof(short)); } diff --git a/src/swrenderer/scene/r_segs.h b/src/swrenderer/scene/r_segs.h index 51cab3eb55..7dafafa9dd 100644 --- a/src/swrenderer/scene/r_segs.h +++ b/src/swrenderer/scene/r_segs.h @@ -55,7 +55,6 @@ extern float rw_lightstep; extern float rw_lightleft; extern fixed_t rw_offset; extern FTexture *rw_pic; -extern int wallshade; } diff --git a/src/swrenderer/scene/r_walldraw.cpp b/src/swrenderer/scene/r_walldraw.cpp index bc98b50d39..3b6e402534 100644 --- a/src/swrenderer/scene/r_walldraw.cpp +++ b/src/swrenderer/scene/r_walldraw.cpp @@ -52,7 +52,6 @@ namespace swrenderer using namespace drawerargs; extern FTexture *rw_pic; - extern int wallshade; static const uint8_t *R_GetColumn(FTexture *tex, int col) { @@ -328,7 +327,7 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, DrawerFunc } static void ProcessWallWorker( - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x), DrawerFunc drawcolumn) { if (rw_pic->UseType == FTexture::TEX_Null) @@ -388,41 +387,40 @@ static void ProcessWallWorker( NetUpdate(); } -static void ProcessNormalWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) +static void ProcessNormalWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, &SWPixelFormatDrawers::DrawWallColumn); + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol, &SWPixelFormatDrawers::DrawWallColumn); } -static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) +static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { if (!rw_pic->bMasked) // Textures that aren't masked can use the faster ProcessNormalWall. { - ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol); } else { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); } } -static void ProcessTranslucentWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) +static void ProcessTranslucentWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { DrawerFunc drawcol1 = R_GetTransMaskDrawer(); if (drawcol1 == nullptr) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. - ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol); } else { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, drawcol1); + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol, drawcol1); } } -static void ProcessStripedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) +static void ProcessStripedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade) { FDynamicColormap *startcolormap = basecolormap; - int startshade = wallshade; bool fogginess = foggy; short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; @@ -444,44 +442,42 @@ static void ProcessStripedWall(int x1, int x2, short *uwal, short *dwal, float * { down[j] = clamp (most3[j], up[j], dwal[j]); } - ProcessNormalWall (x1, x2, up, down, swal, lwal, yrepeat); + ProcessNormalWall (x1, x2, up, down, swal, lwal, yrepeat, wallshade); up = down; down = (down == most1) ? most2 : most1; } lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, - *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); } - ProcessNormalWall (x1, x2, up, dwal, swal, lwal, yrepeat); + ProcessNormalWall (x1, x2, up, dwal, swal, lwal, yrepeat, wallshade); basecolormap = startcolormap; - wallshade = startshade; } -static void ProcessWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask) +static void ProcessWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, bool mask) { if (mask) { if (colfunc == basecolfunc) { - ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat); + ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); } else { - ProcessTranslucentWall(x1, x2, uwal, dwal, swal, lwal, yrepeat); + ProcessTranslucentWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); } } else { if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) { - ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat); + ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); } else { - ProcessStripedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat); + ProcessStripedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); } } } @@ -497,7 +493,7 @@ static void ProcessWall(int x1, int x2, short *uwal, short *dwal, float *swal, f // //============================================================================= -static void ProcessWallNP2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) +static void ProcessWallNP2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, bool mask) { short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; short *up, *down; @@ -524,14 +520,14 @@ static void ProcessWallNP2(int x1, int x2, short *uwal, short *dwal, float *swal { down[j] = clamp(most3[j], up[j], dwal[j]); } - ProcessWall(x1, x2, up, down, swal, lwal, yrepeat, mask); + ProcessWall(x1, x2, up, down, swal, lwal, yrepeat, wallshade, mask); up = down; down = (down == most1) ? most2 : most1; } partition -= scaledtexheight; dc_texturemid -= texheight; } - ProcessWall(x1, x2, up, dwal, swal, lwal, yrepeat, mask); + ProcessWall(x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, mask); } else { // upside down: draw strips from bottom to top @@ -548,18 +544,18 @@ static void ProcessWallNP2(int x1, int x2, short *uwal, short *dwal, float *swal { up[j] = clamp(most3[j], uwal[j], down[j]); } - ProcessWall(x1, x2, up, down, swal, lwal, yrepeat, mask); + ProcessWall(x1, x2, up, down, swal, lwal, yrepeat, wallshade, mask); down = up; up = (up == most1) ? most2 : most1; } partition -= scaledtexheight; dc_texturemid -= texheight; } - ProcessWall(x1, x2, uwal, down, swal, lwal, yrepeat, mask); + ProcessWall(x1, x2, uwal, down, swal, lwal, yrepeat, wallshade, mask); } } -void R_DrawDrawSeg(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) +void R_DrawDrawSeg(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade) { if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) { @@ -577,32 +573,32 @@ void R_DrawDrawSeg(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, floa { bot = MAX(bot, sclipBottom); } - ProcessWallNP2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); + ProcessWallNP2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, true); } else { - ProcessWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); + ProcessWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, true); } } -void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, FLightNode *light_list) +void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, FLightNode *light_list) { dc_light_list = light_list; if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) { - ProcessWallNP2(x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, false); + ProcessWallNP2(x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, wallshade, false); } else { - ProcessWall(x1, x2, walltop, wallbottom, swall, lwall, yscale, false); + ProcessWall(x1, x2, walltop, wallbottom, swall, lwall, yscale, wallshade, false); } dc_light_list = nullptr; } -void R_DrawSkySegment(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +void R_DrawSkySegment(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x)) { - ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol); } } \ No newline at end of file diff --git a/src/swrenderer/scene/r_walldraw.h b/src/swrenderer/scene/r_walldraw.h index 50b1545ca2..1a1a50504b 100644 --- a/src/swrenderer/scene/r_walldraw.h +++ b/src/swrenderer/scene/r_walldraw.h @@ -21,6 +21,7 @@ namespace swrenderer uint32_t height; }; - void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, FLightNode *light_list = nullptr); - void R_DrawSkySegment(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col)); + void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, FLightNode *light_list); + void R_DrawSkySegment(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const uint8_t *(*getcol)(FTexture *tex, int col)); + void R_DrawDrawSeg(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade); } From 3aee73eaa49490f2414c2ccf5563924f443cf2ba Mon Sep 17 00:00:00 2001 From: ZZYZX Date: Fri, 30 Dec 2016 23:26:56 +0200 Subject: [PATCH 613/912] Enabled user shader for a cameratexture --- src/gl/textures/gl_material.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gl/textures/gl_material.cpp b/src/gl/textures/gl_material.cpp index 5aa1d75b98..4be2e96198 100644 --- a/src/gl/textures/gl_material.cpp +++ b/src/gl/textures/gl_material.cpp @@ -447,6 +447,11 @@ FMaterial::FMaterial(FTexture * tx, bool expanded) } else if (tx->bHasCanvas) { + if (tx->gl_info.shaderindex >= FIRST_USER_SHADER) + { + mShaderIndex = tx->gl_info.shaderindex; + } + // no brightmap for cameratexture } else { From 0884a09b3870aee7c3237e1a1dac2fc380eac64c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 31 Dec 2016 14:00:12 +0100 Subject: [PATCH 614/912] Move floorclip, ceilingclip, floorplane and ceilingplane closer to their location --- src/swrenderer/scene/r_3dfloors.cpp | 1 + src/swrenderer/scene/r_plane.cpp | 10 ---------- src/swrenderer/scene/r_plane.h | 6 ------ src/swrenderer/scene/r_portal.cpp | 1 + src/swrenderer/scene/r_segs.cpp | 10 ++++++++++ src/swrenderer/scene/r_segs.h | 5 +++++ 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/swrenderer/scene/r_3dfloors.cpp b/src/swrenderer/scene/r_3dfloors.cpp index 5cb8f7a8db..f2d7f88e8b 100644 --- a/src/swrenderer/scene/r_3dfloors.cpp +++ b/src/swrenderer/scene/r_3dfloors.cpp @@ -12,6 +12,7 @@ #include "swrenderer/r_main.h" #include "r_bsp.h" #include "r_plane.h" +#include "r_segs.h" #include "c_cvars.h" #include "r_3dfloors.h" diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index cbfd85a92e..397b5efde4 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -82,8 +82,6 @@ namespace swrenderer extern subsector_t *InSubsector; -visplane_t *floorplane; -visplane_t *ceilingplane; // These are copies of the main parameters used when drawing stacked sectors. // When you change the main parameters, you should copy them here too *unless* @@ -94,14 +92,6 @@ double stacked_visibility; DVector3 stacked_viewpos; DAngle stacked_angle; -// -// Clip values are the solid pixel bounding the range. -// floorclip starts out SCREENHEIGHT and is just outside the range -// ceilingclip starts out 0 and is just inside the range -// -short floorclip[MAXWIDTH]; -short ceilingclip[MAXWIDTH]; - // // texture mapping // diff --git a/src/swrenderer/scene/r_plane.h b/src/swrenderer/scene/r_plane.h index 09f039e55d..aad67beda9 100644 --- a/src/swrenderer/scene/r_plane.h +++ b/src/swrenderer/scene/r_plane.h @@ -28,9 +28,6 @@ namespace swrenderer { -extern short floorclip[MAXWIDTH]; -extern short ceilingclip[MAXWIDTH]; - void R_ClearPlanes (bool fullclear); void R_AddPlaneLights(visplane_t *plane, FLightNode *light_head); @@ -42,9 +39,6 @@ void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1, int x2), void visplane_t *R_FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double alpha, bool additive, const FTransform &xform, int sky, FSectorPortal *portal); visplane_t *R_CheckPlane(visplane_t *pl, int start, int stop); -extern visplane_t* floorplane; -extern visplane_t* ceilingplane; - } #endif // __R_PLANE_H__ diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index d2897d6563..98c3d8af85 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -37,6 +37,7 @@ #include "swrenderer/segments/r_drawsegment.h" #include "r_things.h" #include "r_3dfloors.h" +#include "r_segs.h" #include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 41c34f6823..6a87ab7562 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -141,6 +141,16 @@ FTexture *rw_pic; static fixed_t *maskedtexturecol; +visplane_t *floorplane; +visplane_t *ceilingplane; + +// Clip values are the solid pixel bounding the range. +// floorclip starts out SCREENHEIGHT and is just outside the range +// ceilingclip starts out 0 and is just inside the range +// +short floorclip[MAXWIDTH]; +short ceilingclip[MAXWIDTH]; + inline bool IsFogBoundary (sector_t *front, sector_t *back) { diff --git a/src/swrenderer/scene/r_segs.h b/src/swrenderer/scene/r_segs.h index 7dafafa9dd..cf73ce82a7 100644 --- a/src/swrenderer/scene/r_segs.h +++ b/src/swrenderer/scene/r_segs.h @@ -56,6 +56,11 @@ extern float rw_lightleft; extern fixed_t rw_offset; extern FTexture *rw_pic; +extern short floorclip[MAXWIDTH]; +extern short ceilingclip[MAXWIDTH]; +extern visplane_t *floorplane; +extern visplane_t *ceilingplane; + } #endif From 07826ccd2f9c9e51f7361a81152185a483ac90ea Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 31 Dec 2016 14:15:06 +0100 Subject: [PATCH 615/912] Move variables closer to their correct location --- src/swrenderer/r_main.cpp | 8 ++++++++ src/swrenderer/r_main.h | 4 ---- src/swrenderer/scene/r_plane.cpp | 29 ++++------------------------- src/swrenderer/scene/r_portal.cpp | 9 +++++++++ src/swrenderer/scene/r_portal.h | 5 +++++ 5 files changed, 26 insertions(+), 29 deletions(-) diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 5d672c2804..37295fe7bd 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -68,6 +68,8 @@ #include "p_maputl.h" #include "p_setup.h" #include "version.h" +#include "c_console.h" +#include "r_memory.h" CVAR (String, r_viewsize, "", CVAR_NOSET) CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) @@ -554,6 +556,12 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) R_ClearPlanes (true); R_ClearSprites (); + // opening / clipping determination + // [RH] clip ceiling to console bottom + fillshort(floorclip, viewwidth, viewheight); + fillshort(ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); + R_FreeOpenings(); + NetUpdate (); // [RH] Show off segs if r_drawflat is 1 diff --git a/src/swrenderer/r_main.h b/src/swrenderer/r_main.h index 1a62eccf10..4f6b00a1b5 100644 --- a/src/swrenderer/r_main.h +++ b/src/swrenderer/r_main.h @@ -133,10 +133,6 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, int x, int y, int wid void R_MultiresInit (void); -extern int stacked_extralight; -extern double stacked_visibility; -extern DVector3 stacked_viewpos; -extern DAngle stacked_angle; extern void R_CopyStackedViewParameters(); diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index 397b5efde4..f18d1e533f 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -80,23 +80,11 @@ namespace swrenderer { using namespace drawerargs; -extern subsector_t *InSubsector; - -// These are copies of the main parameters used when drawing stacked sectors. -// When you change the main parameters, you should copy them here too *unless* -// you are changing them to draw a stacked sector. Otherwise, stacked sectors -// won't draw in skyboxes properly. -int stacked_extralight; -double stacked_visibility; -DVector3 stacked_viewpos; -DAngle stacked_angle; - -// -// texture mapping -// - -short spanend[MAXHEIGHT]; +namespace +{ + short spanend[MAXHEIGHT]; +} void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); @@ -192,15 +180,6 @@ void R_ClearPlanes (bool fullclear) } } - // opening / clipping determination - fillshort (floorclip, viewwidth, viewheight); - // [RH] clip ceiling to console bottom - fillshort (ceilingclip, viewwidth, - !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas - ? (ConBottom - viewwindowy) : 0); - - R_FreeOpenings(); - next_plane_light = 0; } } diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 98c3d8af85..e48a47c869 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -54,6 +54,15 @@ namespace swrenderer int CurrentPortalUniq = 0; bool CurrentPortalInSkybox = false; + // These are copies of the main parameters used when drawing stacked sectors. + // When you change the main parameters, you should copy them here too *unless* + // you are changing them to draw a stacked sector. Otherwise, stacked sectors + // won't draw in skyboxes properly. + int stacked_extralight; + double stacked_visibility; + DVector3 stacked_viewpos; + DAngle stacked_angle; + namespace { int numskyboxes; // For ADD_STAT(skyboxes) diff --git a/src/swrenderer/scene/r_portal.h b/src/swrenderer/scene/r_portal.h index 32b6463ca3..42cec48913 100644 --- a/src/swrenderer/scene/r_portal.h +++ b/src/swrenderer/scene/r_portal.h @@ -9,6 +9,11 @@ namespace swrenderer extern int CurrentPortalUniq; extern bool CurrentPortalInSkybox; + extern int stacked_extralight; + extern double stacked_visibility; + extern DVector3 stacked_viewpos; + extern DAngle stacked_angle; + void R_DrawPortals(); void R_DrawWallPortals(); void R_EnterPortal(PortalDrawseg* pds, int depth); From 3967156d623f14cd13d1ad617fd6bfc94bf240e3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 31 Dec 2016 14:45:41 +0100 Subject: [PATCH 616/912] Moved remaining parts of r_plane to r_visibleplane --- src/CMakeLists.txt | 1 - src/swrenderer/drawers/r_draw.cpp | 1 - src/swrenderer/drawers/r_draw.h | 2 - src/swrenderer/drawers/r_draw_rgba.cpp | 1 - src/swrenderer/plane/r_flatplane.cpp | 5 +- src/swrenderer/plane/r_fogboundary.cpp | 1 - src/swrenderer/plane/r_slopeplane.cpp | 1 - src/swrenderer/plane/r_visibleplane.cpp | 485 +++++++++++++++- src/swrenderer/plane/r_visibleplane.h | 12 + src/swrenderer/r_main.cpp | 1 - src/swrenderer/scene/r_3dfloors.cpp | 1 - src/swrenderer/scene/r_bsp.cpp | 6 +- src/swrenderer/scene/r_plane.cpp | 609 -------------------- src/swrenderer/scene/r_plane.h | 44 -- src/swrenderer/scene/r_portal.cpp | 8 +- src/swrenderer/scene/r_segs.cpp | 1 - src/swrenderer/scene/r_segs.h | 1 + src/swrenderer/scene/r_things.cpp | 1 - src/swrenderer/scene/r_walldraw.cpp | 1 - src/swrenderer/segments/r_clipsegment.cpp | 1 - src/swrenderer/segments/r_drawsegment.cpp | 1 - src/swrenderer/segments/r_portalsegment.cpp | 1 - src/swrenderer/things/r_decal.cpp | 1 - src/swrenderer/things/r_particle.cpp | 1 - src/swrenderer/things/r_playersprite.cpp | 1 - src/swrenderer/things/r_wallsprite.cpp | 1 - 26 files changed, 500 insertions(+), 689 deletions(-) delete mode 100644 src/swrenderer/scene/r_plane.cpp delete mode 100644 src/swrenderer/scene/r_plane.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3451b000b5..e7e819626a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -819,7 +819,6 @@ set( FASTMATH_PCH_SOURCES swrenderer/drawers/r_thread.cpp swrenderer/scene/r_3dfloors.cpp swrenderer/scene/r_bsp.cpp - swrenderer/scene/r_plane.cpp swrenderer/scene/r_segs.cpp swrenderer/scene/r_things.cpp swrenderer/scene/r_walldraw.cpp diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 9d991a039d..99a37a18af 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -58,7 +58,6 @@ namespace swrenderer { double dc_texturemid; FLightNode *dc_light_list; - visplane_light *ds_light_list; int ylookup[MAXHEIGHT]; uint8_t shadetables[NUMCOLORMAPS * 16 * 256]; diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 47d232cffe..6715589b3a 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -20,7 +20,6 @@ EXTERN_CVAR(Bool, r_dynlights); namespace swrenderer { struct vissprite_t; - struct visplane_light; struct ShadeConstants { @@ -38,7 +37,6 @@ namespace swrenderer extern double dc_texturemid; extern FLightNode *dc_light_list; - extern visplane_light *ds_light_list; namespace drawerargs { diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 21f7da8286..309a3e2bd0 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -36,7 +36,6 @@ #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "swrenderer/scene/r_plane.h" #include "r_draw_rgba.h" #include "r_drawers.h" #include "gl/data/gl_matrix.h" diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 51d7b88ccb..354396cc7b 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -28,7 +28,6 @@ #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" -#include "swrenderer/scene/r_plane.h" #include "swrenderer/r_memory.h" namespace swrenderer @@ -43,6 +42,7 @@ namespace swrenderer fixed_t xscale, yscale; double xstepscale, ystepscale; double basexfrac, baseyfrac; + visplane_light *ds_light_list; } void R_DrawNormalPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) @@ -178,6 +178,9 @@ namespace swrenderer } } } + + ds_light_list = pl->lights; + R_MapVisPlane(pl, R_MapPlane, R_StepPlane); } diff --git a/src/swrenderer/plane/r_fogboundary.cpp b/src/swrenderer/plane/r_fogboundary.cpp index 3dff9fa600..fb140db719 100644 --- a/src/swrenderer/plane/r_fogboundary.cpp +++ b/src/swrenderer/plane/r_fogboundary.cpp @@ -28,7 +28,6 @@ #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" -#include "swrenderer/scene/r_plane.h" #include "swrenderer/r_memory.h" #ifdef _MSC_VER diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index b2dccead8f..3742f2f40e 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -28,7 +28,6 @@ #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" -#include "swrenderer/scene/r_plane.h" #include "swrenderer/r_memory.h" #ifdef _MSC_VER diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index 79f7ea2e71..f175f00631 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -7,8 +7,6 @@ #include "w_wad.h" #include "doomdef.h" #include "doomstat.h" -#include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" #include "r_sky.h" #include "stats.h" #include "v_video.h" @@ -17,8 +15,19 @@ #include "cmdlib.h" #include "d_net.h" #include "g_level.h" +#include "gl/dynlights/gl_dynlight.h" +#include "swrenderer/r_main.h" #include "swrenderer/scene/r_bsp.h" -#include "r_visibleplane.h" +#include "swrenderer/scene/r_things.h" +#include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_portal.h" +#include "swrenderer/plane/r_flatplane.h" +#include "swrenderer/plane/r_slopeplane.h" +#include "swrenderer/plane/r_skyplane.h" +#include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/drawers/r_draw.h" + +CVAR(Bool, tilt, false, 0); namespace swrenderer { @@ -32,6 +41,8 @@ namespace swrenderer enum { max_plane_lights = 32 * 1024 }; visplane_light plane_lights[max_plane_lights]; int next_plane_light = 0; + + short spanend[MAXHEIGHT]; } void R_DeinitPlanes() @@ -39,12 +50,12 @@ namespace swrenderer // do not use R_ClearPlanes because at this point the screen pointer is no longer valid. for (int i = 0; i <= MAXVISPLANES; i++) // new code -- killough { - for (*freehead = visplanes[i], visplanes[i] = NULL; *freehead; ) + for (*freehead = visplanes[i], visplanes[i] = nullptr; *freehead; ) { freehead = &(*freehead)->next; } } - for (visplane_t *pl = freetail; pl != NULL; ) + for (visplane_t *pl = freetail; pl != nullptr; ) { visplane_t *next = pl->next; free(pl); @@ -56,13 +67,13 @@ namespace swrenderer { visplane_t *check = freetail; - if (check == NULL) + if (check == nullptr) { check = (visplane_t *)M_Malloc(sizeof(*check) + 3 + sizeof(*check->top)*(MAXWIDTH * 2)); memset(check, 0, sizeof(*check) + 3 + sizeof(*check->top)*(MAXWIDTH * 2)); check->bottom = check->top + MAXWIDTH + 2; } - else if (NULL == (freetail = freetail->next)) + else if (nullptr == (freetail = freetail->next)) { freehead = &freetail; } @@ -88,13 +99,13 @@ namespace swrenderer M_Free(pl); pl = next; } - freetail = NULL; + freetail = nullptr; freehead = &freetail; for (i = 0; i < MAXVISPLANES; i++) { pl = visplanes[i]; - visplanes[i] = NULL; + visplanes[i] = nullptr; while (pl) { visplane_t *next = pl->next; @@ -103,4 +114,460 @@ namespace swrenderer } } } + + void R_ClearPlanes(bool fullclear) + { + int i; + + // Don't clear fake planes if not doing a full clear. + if (!fullclear) + { + for (i = 0; i <= MAXVISPLANES - 1; i++) // new code -- killough + { + for (visplane_t **probe = &visplanes[i]; *probe != nullptr; ) + { + if ((*probe)->sky < 0) + { // fake: move past it + probe = &(*probe)->next; + } + else + { // not fake: move to freelist + visplane_t *vis = *probe; + *freehead = vis; + *probe = vis->next; + vis->next = nullptr; + freehead = &vis->next; + } + } + } + } + else + { + for (i = 0; i <= MAXVISPLANES; i++) // new code -- killough + { + for (*freehead = visplanes[i], visplanes[i] = nullptr; *freehead; ) + { + freehead = &(*freehead)->next; + } + } + + next_plane_light = 0; + } + } + + void R_AddPlaneLights(visplane_t *plane, FLightNode *node) + { + if (!r_dynlights) + return; + + while (node) + { + if (!(node->lightsource->flags2&MF2_DORMANT)) + { + bool found = false; + visplane_light *light_node = plane->lights; + while (light_node) + { + if (light_node->lightsource == node->lightsource) + { + found = true; + break; + } + light_node = light_node->next; + } + if (!found) + { + if (next_plane_light == max_plane_lights) + return; + + visplane_light *newlight = &plane_lights[next_plane_light++]; + newlight->next = plane->lights; + newlight->lightsource = node->lightsource; + plane->lights = newlight; + } + } + node = node->nextLight; + } + } + + visplane_t *R_FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal) + { + secplane_t plane; + visplane_t *check; + unsigned hash; // killough + bool isskybox; + const FTransform *xform = &xxform; + fixed_t alpha = FLOAT2FIXED(Alpha); + //angle_t angle = (xform.Angle + xform.baseAngle).BAMs(); + + FTransform nulltransform; + + if (picnum == skyflatnum) // killough 10/98 + { // most skies map together + lightlevel = 0; + xform = &nulltransform; + nulltransform.xOffs = nulltransform.yOffs = nulltransform.baseyOffs = 0; + nulltransform.xScale = nulltransform.yScale = 1; + nulltransform.Angle = nulltransform.baseAngle = 0.0; + additive = false; + // [RH] Map floor skies and ceiling skies to separate visplanes. This isn't + // always necessary, but it is needed if a floor and ceiling sky are in the + // same column but separated by a wall. If they both try to reside in the + // same visplane, then only the floor sky will be drawn. + plane.set(0., 0., height.fC(), 0.); + isskybox = portal != nullptr && !(portal->mFlags & PORTSF_INSKYBOX); + } + else if (portal != nullptr && !(portal->mFlags & PORTSF_INSKYBOX)) + { + plane = height; + isskybox = true; + } + else + { + plane = height; + isskybox = false; + // kg3D - hack, store alpha in sky + // i know there is ->alpha, but this also allows to identify fake plane + // and ->alpha is for stacked sectors + if (fake3D & (FAKE3D_FAKEFLOOR | FAKE3D_FAKECEILING)) sky = 0x80000000 | fakeAlpha; + else sky = 0; // not skyflatnum so it can't be a sky + portal = nullptr; + alpha = OPAQUE; + } + + // New visplane algorithm uses hash table -- killough + hash = isskybox ? MAXVISPLANES : visplane_hash(picnum.GetIndex(), lightlevel, height); + + for (check = visplanes[hash]; check; check = check->next) // killough + { + if (isskybox) + { + if (portal == check->portal && plane == check->height) + { + if (portal->mType != PORTS_SKYVIEWPOINT) + { // This skybox is really a stacked sector, so we need to + // check even more. + if (check->extralight == stacked_extralight && + check->visibility == stacked_visibility && + check->viewpos == stacked_viewpos && + ( + // headache inducing logic... :( + (portal->mType != PORTS_STACKEDSECTORTHING) || + ( + check->Alpha == alpha && + check->Additive == additive && + (alpha == 0 || // if alpha is > 0 everything needs to be checked + (plane == check->height && + picnum == check->picnum && + lightlevel == check->lightlevel && + basecolormap == check->colormap && // [RH] Add more checks + *xform == check->xform + ) + ) && + check->viewangle == stacked_angle + ) + ) + ) + { + return check; + } + } + else + { + return check; + } + } + } + else + if (plane == check->height && + picnum == check->picnum && + lightlevel == check->lightlevel && + basecolormap == check->colormap && // [RH] Add more checks + *xform == check->xform && + sky == check->sky && + CurrentPortalUniq == check->CurrentPortalUniq && + MirrorFlags == check->MirrorFlags && + CurrentSkybox == check->CurrentSkybox && + ViewPos == check->viewpos + ) + { + return check; + } + } + + check = new_visplane(hash); // killough + + check->height = plane; + check->picnum = picnum; + check->lightlevel = lightlevel; + check->xform = *xform; + check->colormap = basecolormap; // [RH] Save colormap + check->sky = sky; + check->portal = portal; + check->left = viewwidth; // Was SCREENWIDTH -- killough 11/98 + check->right = 0; + check->extralight = stacked_extralight; + check->visibility = stacked_visibility; + check->viewpos = stacked_viewpos; + check->viewangle = stacked_angle; + check->Alpha = alpha; + check->Additive = additive; + check->CurrentPortalUniq = CurrentPortalUniq; + check->MirrorFlags = MirrorFlags; + check->CurrentSkybox = CurrentSkybox; + + fillshort(check->top, viewwidth, 0x7fff); + + return check; + } + + visplane_t *R_CheckPlane(visplane_t *pl, int start, int stop) + { + int intrl, intrh; + int unionl, unionh; + int x; + + assert(start >= 0 && start < viewwidth); + assert(stop > start && stop <= viewwidth); + + if (start < pl->left) + { + intrl = pl->left; + unionl = start; + } + else + { + unionl = pl->left; + intrl = start; + } + + if (stop > pl->right) + { + intrh = pl->right; + unionh = stop; + } + else + { + unionh = pl->right; + intrh = stop; + } + + for (x = intrl; x < intrh && pl->top[x] == 0x7fff; x++) + ; + + if (x >= intrh) + { + // use the same visplane + pl->left = unionl; + pl->right = unionh; + } + else + { + // make a new visplane + unsigned hash; + + if (pl->portal != nullptr && !(pl->portal->mFlags & PORTSF_INSKYBOX) && viewactive) + { + hash = MAXVISPLANES; + } + else + { + hash = visplane_hash(pl->picnum.GetIndex(), pl->lightlevel, pl->height); + } + visplane_t *new_pl = new_visplane(hash); + + new_pl->height = pl->height; + new_pl->picnum = pl->picnum; + new_pl->lightlevel = pl->lightlevel; + new_pl->xform = pl->xform; + new_pl->colormap = pl->colormap; + new_pl->portal = pl->portal; + new_pl->extralight = pl->extralight; + new_pl->visibility = pl->visibility; + new_pl->viewpos = pl->viewpos; + new_pl->viewangle = pl->viewangle; + new_pl->sky = pl->sky; + new_pl->Alpha = pl->Alpha; + new_pl->Additive = pl->Additive; + new_pl->CurrentPortalUniq = pl->CurrentPortalUniq; + new_pl->MirrorFlags = pl->MirrorFlags; + new_pl->CurrentSkybox = pl->CurrentSkybox; + new_pl->lights = pl->lights; + pl = new_pl; + pl->left = start; + pl->right = stop; + fillshort(pl->top, viewwidth, 0x7fff); + } + return pl; + } + + int R_DrawPlanes() + { + visplane_t *pl; + int i; + int vpcount = 0; + + drawerargs::ds_color = 3; + + for (i = 0; i < MAXVISPLANES; i++) + { + for (pl = visplanes[i]; pl; pl = pl->next) + { + // kg3D - draw only correct planes + if (pl->CurrentPortalUniq != CurrentPortalUniq || pl->CurrentSkybox != CurrentSkybox) + continue; + // kg3D - draw only real planes now + if (pl->sky >= 0) { + vpcount++; + R_DrawSinglePlane(pl, OPAQUE, false, false); + } + } + } + return vpcount; + } + + void R_DrawHeightPlanes(double height) + { + visplane_t *pl; + int i; + + drawerargs::ds_color = 3; + + DVector3 oViewPos = ViewPos; + DAngle oViewAngle = ViewAngle; + + for (i = 0; i < MAXVISPLANES; i++) + { + for (pl = visplanes[i]; pl; pl = pl->next) + { + if (pl->CurrentSkybox != CurrentSkybox || pl->CurrentPortalUniq != CurrentPortalUniq) + continue; + + if (pl->sky < 0 && pl->height.Zat0() == height) + { + ViewPos = pl->viewpos; + ViewAngle = pl->viewangle; + MirrorFlags = pl->MirrorFlags; + + R_DrawSinglePlane(pl, pl->sky & 0x7FFFFFFF, pl->Additive, true); + } + } + } + ViewPos = oViewPos; + ViewAngle = oViewAngle; + } + + void R_DrawSinglePlane(visplane_t *pl, fixed_t alpha, bool additive, bool masked) + { + if (pl->left >= pl->right) + return; + + if (r_drawflat) // no texture mapping + { + drawerargs::ds_color += 4; + R_DrawColoredPlane(pl); + } + else if (pl->picnum == skyflatnum) // sky flat + { + R_DrawSkyPlane(pl); + } + else // regular flat + { + FTexture *tex = TexMan(pl->picnum, true); + + if (tex->UseType == FTexture::TEX_Null) + { + return; + } + + if (!masked && !additive) + { // If we're not supposed to see through this plane, draw it opaque. + alpha = OPAQUE; + } + else if (!tex->bMasked) + { // Don't waste time on a masked texture if it isn't really masked. + masked = false; + } + R_SetSpanTexture(tex); + double xscale = pl->xform.xScale * tex->Scale.X; + double yscale = pl->xform.yScale * tex->Scale.Y; + + basecolormap = pl->colormap; + + if (r_drawflat || (!pl->height.isSlope() && !tilt)) + { + R_DrawNormalPlane(pl, xscale, yscale, alpha, additive, masked); + } + else + { + R_DrawTiltedPlane(pl, xscale, yscale, alpha, additive, masked); + } + } + NetUpdate(); + } + + void R_MapVisPlane(visplane_t *pl, void(*mapfunc)(int y, int x1, int x2), void(*stepfunc)()) + { + // t1/b1 are at x + // t2/b2 are at x+1 + // spanend[y] is at the right edge + + int x = pl->right - 1; + int t2 = pl->top[x]; + int b2 = pl->bottom[x]; + + if (b2 > t2) + { + fillshort(spanend + t2, b2 - t2, x); + } + + for (--x; x >= pl->left; --x) + { + int t1 = pl->top[x]; + int b1 = pl->bottom[x]; + const int xr = x + 1; + int stop; + + // Draw any spans that have just closed + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + int x2 = spanend[y]; + mapfunc(y, xr, x2); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + int x2 = spanend[y]; + mapfunc(y, xr, x2); + } + + // Mark any spans that have just opened + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + + t2 = pl->top[x]; + b2 = pl->bottom[x]; + + if (stepfunc) + stepfunc(); + } + // Draw any spans that are still open + while (t2 < b2) + { + int y = --b2; + int x2 = spanend[y]; + mapfunc(y, pl->left, x2); + } + } } diff --git a/src/swrenderer/plane/r_visibleplane.h b/src/swrenderer/plane/r_visibleplane.h index 28e7035058..1c2ee717f2 100644 --- a/src/swrenderer/plane/r_visibleplane.h +++ b/src/swrenderer/plane/r_visibleplane.h @@ -63,5 +63,17 @@ namespace swrenderer void R_DeinitPlanes(); visplane_t *new_visplane(unsigned hash); + void R_PlaneInitData(); + void R_ClearPlanes(bool fullclear); + + void R_AddPlaneLights(visplane_t *plane, FLightNode *node); + + visplane_t *R_FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal); + visplane_t *R_CheckPlane(visplane_t *pl, int start, int stop); + + int R_DrawPlanes(); + void R_DrawHeightPlanes(double height); + void R_DrawSinglePlane(visplane_t *pl, fixed_t alpha, bool additive, bool masked); + void R_MapVisPlane(visplane_t *pl, void(*mapfunc)(int y, int x1, int x2), void(*stepfunc)()); } diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 37295fe7bd..42e8e98dc8 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -37,7 +37,6 @@ #include "r_main.h" #include "scene/r_things.h" #include "drawers/r_draw.h" -#include "scene/r_plane.h" #include "plane/r_flatplane.h" #include "scene/r_bsp.h" #include "segments/r_drawsegment.h" diff --git a/src/swrenderer/scene/r_3dfloors.cpp b/src/swrenderer/scene/r_3dfloors.cpp index f2d7f88e8b..5b487d8d8f 100644 --- a/src/swrenderer/scene/r_3dfloors.cpp +++ b/src/swrenderer/scene/r_3dfloors.cpp @@ -11,7 +11,6 @@ #include "c_dispatch.h" #include "swrenderer/r_main.h" #include "r_bsp.h" -#include "r_plane.h" #include "r_segs.h" #include "c_cvars.h" #include "r_3dfloors.h" diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index ba939390f4..debee35c68 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -33,12 +33,12 @@ #include "p_setup.h" #include "swrenderer/r_main.h" -#include "r_plane.h" #include "swrenderer/drawers/r_draw.h" -#include "r_things.h" +#include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/things/r_particle.h" -#include "r_3dfloors.h" #include "swrenderer/segments/r_clipsegment.h" +#include "r_things.h" +#include "r_3dfloors.h" #include "r_portal.h" #include "a_sharedglobal.h" #include "g_level.h" diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp deleted file mode 100644 index f18d1e533f..0000000000 --- a/src/swrenderer/scene/r_plane.cpp +++ /dev/null @@ -1,609 +0,0 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// $Log:$ -// -// DESCRIPTION: -// Here is a core component: drawing the floors and ceilings, -// while maintaining a per column clipping list only. -// Moreover, the sky areas have to be determined. -// -// MAXVISPLANES is no longer a limit on the number of visplanes, -// but a limit on the number of hash slots; larger numbers mean -// better performance usually but after a point they are wasted, -// and memory and time overheads creep in. -// -// Lee Killough -// -// [RH] Further modified to significantly increase accuracy and add slopes. -// -//----------------------------------------------------------------------------- - -#include -#include - -#include "templates.h" -#include "i_system.h" -#include "w_wad.h" - -#include "doomdef.h" -#include "doomstat.h" - -#include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" -#include "r_sky.h" -#include "stats.h" - -#include "v_video.h" -#include "a_sharedglobal.h" -#include "c_console.h" -#include "cmdlib.h" -#include "d_net.h" -#include "g_level.h" -#include "r_bsp.h" -#include "r_plane.h" -#include "r_segs.h" -#include "r_3dfloors.h" -#include "v_palette.h" -#include "r_data/colormaps.h" -#include "swrenderer/drawers/r_draw_rgba.h" -#include "gl/dynlights/gl_dynlight.h" -#include "r_walldraw.h" -#include "swrenderer/segments/r_clipsegment.h" -#include "swrenderer/segments/r_drawsegment.h" -#include "r_portal.h" -#include "swrenderer/plane/r_skyplane.h" -#include "swrenderer/plane/r_flatplane.h" -#include "swrenderer/plane/r_slopeplane.h" -#include "swrenderer/r_memory.h" - -#ifdef _MSC_VER -#pragma warning(disable:4244) -#endif - -CVAR(Bool, tilt, false, 0); - -namespace swrenderer -{ - using namespace drawerargs; - - -namespace -{ - short spanend[MAXHEIGHT]; -} - -void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); - - -//========================================================================== - - -//========================================================================== - -namespace -{ - enum { max_plane_lights = 32 * 1024 }; - visplane_light plane_lights[max_plane_lights]; - int next_plane_light = 0; -} - -void R_AddPlaneLights(visplane_t *plane, FLightNode *node) -{ - if (!r_dynlights) - return; - - while (node) - { - if (!(node->lightsource->flags2&MF2_DORMANT)) - { - bool found = false; - visplane_light *light_node = plane->lights; - while (light_node) - { - if (light_node->lightsource == node->lightsource) - { - found = true; - break; - } - light_node = light_node->next; - } - if (!found) - { - if (next_plane_light == max_plane_lights) - return; - - visplane_light *newlight = &plane_lights[next_plane_light++]; - newlight->next = plane->lights; - newlight->lightsource = node->lightsource; - plane->lights = newlight; - } - } - node = node->nextLight; - } -} - -//========================================================================== -// -// R_ClearPlanes -// -// Called at the beginning of each frame. -// -//========================================================================== - -void R_ClearPlanes (bool fullclear) -{ - int i; - - // Don't clear fake planes if not doing a full clear. - if (!fullclear) - { - for (i = 0; i <= MAXVISPLANES-1; i++) // new code -- killough - { - for (visplane_t **probe = &visplanes[i]; *probe != NULL; ) - { - if ((*probe)->sky < 0) - { // fake: move past it - probe = &(*probe)->next; - } - else - { // not fake: move to freelist - visplane_t *vis = *probe; - *freehead = vis; - *probe = vis->next; - vis->next = NULL; - freehead = &vis->next; - } - } - } - } - else - { - for (i = 0; i <= MAXVISPLANES; i++) // new code -- killough - { - for (*freehead = visplanes[i], visplanes[i] = NULL; *freehead; ) - { - freehead = &(*freehead)->next; - } - } - - next_plane_light = 0; - } -} - -//========================================================================== -// -// R_FindPlane -// -// killough 2/28/98: Add offsets -//========================================================================== - -visplane_t *R_FindPlane (const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, - const FTransform &xxform, - int sky, FSectorPortal *portal) -{ - secplane_t plane; - visplane_t *check; - unsigned hash; // killough - bool isskybox; - const FTransform *xform = &xxform; - fixed_t alpha = FLOAT2FIXED(Alpha); - //angle_t angle = (xform.Angle + xform.baseAngle).BAMs(); - - FTransform nulltransform; - - if (picnum == skyflatnum) // killough 10/98 - { // most skies map together - lightlevel = 0; - xform = &nulltransform; - nulltransform.xOffs = nulltransform.yOffs = nulltransform.baseyOffs = 0; - nulltransform.xScale = nulltransform.yScale = 1; - nulltransform.Angle = nulltransform.baseAngle = 0.0; - additive = false; - // [RH] Map floor skies and ceiling skies to separate visplanes. This isn't - // always necessary, but it is needed if a floor and ceiling sky are in the - // same column but separated by a wall. If they both try to reside in the - // same visplane, then only the floor sky will be drawn. - plane.set(0., 0., height.fC(), 0.); - isskybox = portal != NULL && !(portal->mFlags & PORTSF_INSKYBOX); - } - else if (portal != NULL && !(portal->mFlags & PORTSF_INSKYBOX)) - { - plane = height; - isskybox = true; - } - else - { - plane = height; - isskybox = false; - // kg3D - hack, store alpha in sky - // i know there is ->alpha, but this also allows to identify fake plane - // and ->alpha is for stacked sectors - if (fake3D & (FAKE3D_FAKEFLOOR|FAKE3D_FAKECEILING)) sky = 0x80000000 | fakeAlpha; - else sky = 0; // not skyflatnum so it can't be a sky - portal = NULL; - alpha = OPAQUE; - } - - // New visplane algorithm uses hash table -- killough - hash = isskybox ? MAXVISPLANES : visplane_hash (picnum.GetIndex(), lightlevel, height); - - for (check = visplanes[hash]; check; check = check->next) // killough - { - if (isskybox) - { - if (portal == check->portal && plane == check->height) - { - if (portal->mType != PORTS_SKYVIEWPOINT) - { // This skybox is really a stacked sector, so we need to - // check even more. - if (check->extralight == stacked_extralight && - check->visibility == stacked_visibility && - check->viewpos == stacked_viewpos && - ( - // headache inducing logic... :( - (portal->mType != PORTS_STACKEDSECTORTHING) || - ( - check->Alpha == alpha && - check->Additive == additive && - (alpha == 0 || // if alpha is > 0 everything needs to be checked - (plane == check->height && - picnum == check->picnum && - lightlevel == check->lightlevel && - basecolormap == check->colormap && // [RH] Add more checks - *xform == check->xform - ) - ) && - check->viewangle == stacked_angle - ) - ) - ) - { - return check; - } - } - else - { - return check; - } - } - } - else - if (plane == check->height && - picnum == check->picnum && - lightlevel == check->lightlevel && - basecolormap == check->colormap && // [RH] Add more checks - *xform == check->xform && - sky == check->sky && - CurrentPortalUniq == check->CurrentPortalUniq && - MirrorFlags == check->MirrorFlags && - CurrentSkybox == check->CurrentSkybox && - ViewPos == check->viewpos - ) - { - return check; - } - } - - check = new_visplane (hash); // killough - - check->height = plane; - check->picnum = picnum; - check->lightlevel = lightlevel; - check->xform = *xform; - check->colormap = basecolormap; // [RH] Save colormap - check->sky = sky; - check->portal = portal; - check->left = viewwidth; // Was SCREENWIDTH -- killough 11/98 - check->right = 0; - check->extralight = stacked_extralight; - check->visibility = stacked_visibility; - check->viewpos = stacked_viewpos; - check->viewangle = stacked_angle; - check->Alpha = alpha; - check->Additive = additive; - check->CurrentPortalUniq = CurrentPortalUniq; - check->MirrorFlags = MirrorFlags; - check->CurrentSkybox = CurrentSkybox; - - fillshort (check->top, viewwidth, 0x7fff); - - return check; -} - -//========================================================================== -// -// R_CheckPlane -// -//========================================================================== - -visplane_t *R_CheckPlane (visplane_t *pl, int start, int stop) -{ - int intrl, intrh; - int unionl, unionh; - int x; - - assert (start >= 0 && start < viewwidth); - assert (stop > start && stop <= viewwidth); - - if (start < pl->left) - { - intrl = pl->left; - unionl = start; - } - else - { - unionl = pl->left; - intrl = start; - } - - if (stop > pl->right) - { - intrh = pl->right; - unionh = stop; - } - else - { - unionh = pl->right; - intrh = stop; - } - - for (x = intrl; x < intrh && pl->top[x] == 0x7fff; x++) - ; - - if (x >= intrh) - { - // use the same visplane - pl->left = unionl; - pl->right = unionh; - } - else - { - // make a new visplane - unsigned hash; - - if (pl->portal != NULL && !(pl->portal->mFlags & PORTSF_INSKYBOX) && viewactive) - { - hash = MAXVISPLANES; - } - else - { - hash = visplane_hash (pl->picnum.GetIndex(), pl->lightlevel, pl->height); - } - visplane_t *new_pl = new_visplane (hash); - - new_pl->height = pl->height; - new_pl->picnum = pl->picnum; - new_pl->lightlevel = pl->lightlevel; - new_pl->xform = pl->xform; - new_pl->colormap = pl->colormap; - new_pl->portal = pl->portal; - new_pl->extralight = pl->extralight; - new_pl->visibility = pl->visibility; - new_pl->viewpos = pl->viewpos; - new_pl->viewangle = pl->viewangle; - new_pl->sky = pl->sky; - new_pl->Alpha = pl->Alpha; - new_pl->Additive = pl->Additive; - new_pl->CurrentPortalUniq = pl->CurrentPortalUniq; - new_pl->MirrorFlags = pl->MirrorFlags; - new_pl->CurrentSkybox = pl->CurrentSkybox; - new_pl->lights = pl->lights; - pl = new_pl; - pl->left = start; - pl->right = stop; - fillshort (pl->top, viewwidth, 0x7fff); - } - return pl; -} - -//========================================================================== -// -// R_DrawPlanes -// -// At the end of each frame. -// -//========================================================================== - -int R_DrawPlanes () -{ - visplane_t *pl; - int i; - int vpcount = 0; - - ds_color = 3; - - for (i = 0; i < MAXVISPLANES; i++) - { - for (pl = visplanes[i]; pl; pl = pl->next) - { - // kg3D - draw only correct planes - if(pl->CurrentPortalUniq != CurrentPortalUniq || pl->CurrentSkybox != CurrentSkybox) - continue; - // kg3D - draw only real planes now - if(pl->sky >= 0) { - vpcount++; - R_DrawSinglePlane (pl, OPAQUE, false, false); - } - } - } - return vpcount; -} - -// kg3D - draw all visplanes with "height" -void R_DrawHeightPlanes(double height) -{ - visplane_t *pl; - int i; - - ds_color = 3; - - DVector3 oViewPos = ViewPos; - DAngle oViewAngle = ViewAngle; - - for (i = 0; i < MAXVISPLANES; i++) - { - for (pl = visplanes[i]; pl; pl = pl->next) - { - // kg3D - draw only correct planes - if(pl->CurrentSkybox != CurrentSkybox || pl->CurrentPortalUniq != CurrentPortalUniq) - continue; - if(pl->sky < 0 && pl->height.Zat0() == height) { - ViewPos = pl->viewpos; - ViewAngle = pl->viewangle; - MirrorFlags = pl->MirrorFlags; - R_DrawSinglePlane (pl, pl->sky & 0x7FFFFFFF, pl->Additive, true); - } - } - } - ViewPos = oViewPos; - ViewAngle = oViewAngle; -} - - -//========================================================================== -// -// R_DrawSinglePlane -// -// Draws a single visplane. -// -//========================================================================== - -void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool masked) -{ - if (pl->left >= pl->right) - return; - - if (r_drawflat) - { // [RH] no texture mapping - ds_color += 4; - R_DrawColoredPlane(pl); - } - else if (pl->picnum == skyflatnum) - { // sky flat - R_DrawSkyPlane (pl); - } - else - { // regular flat - FTexture *tex = TexMan(pl->picnum, true); - - if (tex->UseType == FTexture::TEX_Null) - { - return; - } - - if (!masked && !additive) - { // If we're not supposed to see through this plane, draw it opaque. - alpha = OPAQUE; - } - else if (!tex->bMasked) - { // Don't waste time on a masked texture if it isn't really masked. - masked = false; - } - R_SetSpanTexture(tex); - double xscale = pl->xform.xScale * tex->Scale.X; - double yscale = pl->xform.yScale * tex->Scale.Y; - - basecolormap = pl->colormap; - - if (r_drawflat || (!pl->height.isSlope() && !tilt)) - { - R_DrawNormalPlane(pl, xscale, yscale, alpha, additive, masked); - } - else - { - R_DrawTiltedPlane(pl, xscale, yscale, alpha, additive, masked); - } - } - NetUpdate (); -} - -//========================================================================== -// -// R_MapVisPlane -// -// t1/b1 are at x -// t2/b2 are at x+1 -// spanend[y] is at the right edge -// -//========================================================================== - -void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1, int x2), void(*stepfunc)()) -{ - int x = pl->right - 1; - int t2 = pl->top[x]; - int b2 = pl->bottom[x]; - - ds_light_list = pl->lights; - - if (b2 > t2) - { - fillshort (spanend+t2, b2-t2, x); - } - - for (--x; x >= pl->left; --x) - { - int t1 = pl->top[x]; - int b1 = pl->bottom[x]; - const int xr = x+1; - int stop; - - // Draw any spans that have just closed - stop = MIN (t1, b2); - while (t2 < stop) - { - int y = t2++; - int x2 = spanend[y]; - mapfunc (y, xr, x2); - } - stop = MAX (b1, t2); - while (b2 > stop) - { - int y = --b2; - int x2 = spanend[y]; - mapfunc (y, xr, x2); - } - - // Mark any spans that have just opened - stop = MIN (t2, b1); - while (t1 < stop) - { - spanend[t1++] = x; - } - stop = MAX (b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; - } - - t2 = pl->top[x]; - b2 = pl->bottom[x]; - - if (stepfunc) - stepfunc(); - } - // Draw any spans that are still open - while (t2 < b2) - { - int y = --b2; - int x2 = spanend[y]; - mapfunc (y, pl->left, x2); - } - - ds_light_list = nullptr; -} - -} diff --git a/src/swrenderer/scene/r_plane.h b/src/swrenderer/scene/r_plane.h deleted file mode 100644 index aad67beda9..0000000000 --- a/src/swrenderer/scene/r_plane.h +++ /dev/null @@ -1,44 +0,0 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// DESCRIPTION: -// Refresh, visplane stuff (floor, ceilings). -// -//----------------------------------------------------------------------------- - - -#ifndef __R_PLANE_H__ -#define __R_PLANE_H__ - -#include "swrenderer/plane/r_visibleplane.h" - -namespace swrenderer -{ - -void R_ClearPlanes (bool fullclear); - -void R_AddPlaneLights(visplane_t *plane, FLightNode *light_head); - -int R_DrawPlanes (); -void R_DrawSinglePlane(visplane_t *pl, fixed_t alpha, bool additive, bool masked); -void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1, int x2), void (*stepfunc)()); - -visplane_t *R_FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double alpha, bool additive, const FTransform &xform, int sky, FSectorPortal *portal); -visplane_t *R_CheckPlane(visplane_t *pl, int start, int stop); - -} - -#endif // __R_PLANE_H__ diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index e48a47c869..acb87dbb23 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -30,14 +30,14 @@ #include "p_maputl.h" #include "p_setup.h" #include "version.h" -#include "swrenderer/drawers/r_draw_rgba.h" #include "r_utility.h" -#include "r_plane.h" -#include "swrenderer/segments/r_clipsegment.h" -#include "swrenderer/segments/r_drawsegment.h" #include "r_things.h" #include "r_3dfloors.h" #include "r_segs.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/segments/r_clipsegment.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 6a87ab7562..0510129739 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -42,7 +42,6 @@ #include "d_net.h" #include "g_level.h" #include "r_bsp.h" -#include "r_plane.h" #include "swrenderer/plane/r_fogboundary.h" #include "r_segs.h" #include "r_3dfloors.h" diff --git a/src/swrenderer/scene/r_segs.h b/src/swrenderer/scene/r_segs.h index cf73ce82a7..f4ca36e4f2 100644 --- a/src/swrenderer/scene/r_segs.h +++ b/src/swrenderer/scene/r_segs.h @@ -27,6 +27,7 @@ namespace swrenderer { struct drawseg_t; +struct visplane_t; bool R_StoreWallRange(int start, int stop); void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2); diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index e11510f321..0dbf47fa4c 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -56,7 +56,6 @@ #include "d_netinf.h" #include "p_effect.h" #include "r_bsp.h" -#include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" diff --git a/src/swrenderer/scene/r_walldraw.cpp b/src/swrenderer/scene/r_walldraw.cpp index 3b6e402534..21b7348729 100644 --- a/src/swrenderer/scene/r_walldraw.cpp +++ b/src/swrenderer/scene/r_walldraw.cpp @@ -37,7 +37,6 @@ #include "g_level.h" #include "swrenderer/drawers/r_draw.h" #include "r_bsp.h" -#include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" #include "v_palette.h" diff --git a/src/swrenderer/segments/r_clipsegment.cpp b/src/swrenderer/segments/r_clipsegment.cpp index 83920a1373..d92633ea9d 100644 --- a/src/swrenderer/segments/r_clipsegment.cpp +++ b/src/swrenderer/segments/r_clipsegment.cpp @@ -7,7 +7,6 @@ #include "p_lnspec.h" #include "p_setup.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_plane.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/scene/r_things.h" #include "swrenderer/scene/r_3dfloors.h" diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 42af7d0320..9c7dde091c 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -7,7 +7,6 @@ #include "p_lnspec.h" #include "p_setup.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_plane.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/scene/r_things.h" #include "swrenderer/scene/r_3dfloors.h" diff --git a/src/swrenderer/segments/r_portalsegment.cpp b/src/swrenderer/segments/r_portalsegment.cpp index db31f5edfa..22e137fff5 100644 --- a/src/swrenderer/segments/r_portalsegment.cpp +++ b/src/swrenderer/segments/r_portalsegment.cpp @@ -7,7 +7,6 @@ #include "p_lnspec.h" #include "p_setup.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_plane.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/scene/r_things.h" #include "swrenderer/scene/r_3dfloors.h" diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 857e3bc36b..c4a90b414d 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -18,7 +18,6 @@ #include "d_net.h" #include "g_level.h" #include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_plane.h" #include "r_decal.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw.h" diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index d8a32d9ade..83ebd6db34 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -28,7 +28,6 @@ #include "d_netinf.h" #include "p_effect.h" #include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_plane.h" #include "swrenderer/scene/r_segs.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 13d78495f1..bc1df52461 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -28,7 +28,6 @@ #include "d_netinf.h" #include "p_effect.h" #include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_plane.h" #include "swrenderer/scene/r_segs.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index c5fb8125c1..9d7e03c2a4 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -28,7 +28,6 @@ #include "d_netinf.h" #include "p_effect.h" #include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_plane.h" #include "swrenderer/scene/r_segs.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" From 06900ff8bec79a30e0109f002bc0b86318ea2664 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sat, 31 Dec 2016 15:40:16 +0100 Subject: [PATCH 617/912] - reviewd script code for spawn calls that did not check their results. Nothing should ever assume that spawning an actor is unconditionally successful. There can always be some edge cases where this is not the case. --- wadsrc/static/zscript/doom/archvile.txt | 11 +++-- wadsrc/static/zscript/doom/fatso.txt | 1 + wadsrc/static/zscript/doom/revenant.txt | 11 +++-- wadsrc/static/zscript/heretic/chicken.txt | 13 +++--- wadsrc/static/zscript/heretic/dsparil.txt | 22 +++++---- .../zscript/heretic/hereticartifacts.txt | 2 +- wadsrc/static/zscript/heretic/hereticimp.txt | 18 +++++--- wadsrc/static/zscript/heretic/hereticmisc.txt | 26 ++++++----- wadsrc/static/zscript/heretic/ironlich.txt | 28 +++++++----- wadsrc/static/zscript/heretic/knight.txt | 9 ++-- .../static/zscript/heretic/weaponblaster.txt | 11 +++-- wadsrc/static/zscript/heretic/weaponmace.txt | 45 +++++++++++-------- .../static/zscript/heretic/weaponphoenix.txt | 25 +++++++---- wadsrc/static/zscript/hexen/blastradius.txt | 3 +- wadsrc/static/zscript/hexen/clericholy.txt | 15 ++++--- wadsrc/static/zscript/hexen/flechette.txt | 11 +++-- wadsrc/static/zscript/hexen/korax.txt | 23 +++++----- wadsrc/static/zscript/raven/minotaur.txt | 11 +++-- wadsrc/static/zscript/strife/entityboss.txt | 9 ++-- wadsrc/static/zscript/strife/inquisitor.txt | 9 ++-- wadsrc/static/zscript/strife/rebels.txt | 4 ++ wadsrc/static/zscript/strife/sigil.txt | 2 +- wadsrc/static/zscript/strife/spectral.txt | 38 ++++++++++------ .../static/zscript/strife/strifefunctions.txt | 5 ++- 24 files changed, 222 insertions(+), 130 deletions(-) diff --git a/wadsrc/static/zscript/doom/archvile.txt b/wadsrc/static/zscript/doom/archvile.txt index a19e5c705c..2ea153be1c 100644 --- a/wadsrc/static/zscript/doom/archvile.txt +++ b/wadsrc/static/zscript/doom/archvile.txt @@ -116,10 +116,13 @@ extend class Actor A_FaceTarget (); Actor fog = Spawn (fire, target.Pos, ALLOW_REPLACE); - tracer = fog; - fog.target = self; - fog.tracer = self.target; - fog.A_Fire(0); + if (fog != null) + { + tracer = fog; + fog.target = self; + fog.tracer = self.target; + fog.A_Fire(0); + } } } diff --git a/wadsrc/static/zscript/doom/fatso.txt b/wadsrc/static/zscript/doom/fatso.txt index 3831110c0e..e70c979243 100644 --- a/wadsrc/static/zscript/doom/fatso.txt +++ b/wadsrc/static/zscript/doom/fatso.txt @@ -188,6 +188,7 @@ extend class Actor // Now launch mushroom cloud Actor aimtarget = Spawn("Mapspot", pos, NO_REPLACE); // We need something to aim at. + if (aimtarget == null) return; Actor owner = (flags & MSF_DontHurt) ? target : self; aimtarget.Height = Height; diff --git a/wadsrc/static/zscript/doom/revenant.txt b/wadsrc/static/zscript/doom/revenant.txt index 2fa335a2df..82d5639340 100644 --- a/wadsrc/static/zscript/doom/revenant.txt +++ b/wadsrc/static/zscript/doom/revenant.txt @@ -233,10 +233,13 @@ extend class Actor SpawnPuff ("BulletPuff", pos, angle, angle, 3); Actor smoke = Spawn ("RevenantTracerSmoke", Vec3Offset(-Vel.X, -Vel.Y, 0.), ALLOW_REPLACE); - smoke.Vel.Z = 1.; - smoke.tics -= random[Tracer](0, 3); - if (smoke.tics < 1) - smoke.tics = 1; + if (smoke != null) + { + smoke.Vel.Z = 1.; + smoke.tics -= random[Tracer](0, 3); + if (smoke.tics < 1) + smoke.tics = 1; + } // The rest of this function was identical with Strife's version, except for the angle being used. A_Tracer2(16.875); diff --git a/wadsrc/static/zscript/heretic/chicken.txt b/wadsrc/static/zscript/heretic/chicken.txt index 9f4429d50f..b60a891363 100644 --- a/wadsrc/static/zscript/heretic/chicken.txt +++ b/wadsrc/static/zscript/heretic/chicken.txt @@ -335,11 +335,14 @@ extend class Actor for (int i = 0; i < count; i++) { Actor mo = Spawn("Feather", pos + (0, 0, 20), NO_REPLACE); - mo.target = self; - mo.Vel.X = Random2[Feathers]() / 256.; - mo.Vel.Y = Random2[Feathers]() / 256.; - mo.Vel.Z = 1. + random[Feathers]() / 128.; - mo.SetState (mo.SpawnState + (random[Feathers]()&7)); + if (mo != null) + { + mo.target = self; + mo.Vel.X = Random2[Feathers]() / 256.; + mo.Vel.Y = Random2[Feathers]() / 256.; + mo.Vel.Z = 1. + random[Feathers]() / 128.; + mo.SetState (mo.SpawnState + (random[Feathers]()&7)); + } } } diff --git a/wadsrc/static/zscript/heretic/dsparil.txt b/wadsrc/static/zscript/heretic/dsparil.txt index 54931b65e8..579f474928 100644 --- a/wadsrc/static/zscript/heretic/dsparil.txt +++ b/wadsrc/static/zscript/heretic/dsparil.txt @@ -166,13 +166,14 @@ class Sorcerer1 : Actor { bSolid = false; Actor mo = Spawn("Sorcerer2", Pos, ALLOW_REPLACE); - mo.Translation = Translation; - mo.SetStateLabel("Rise"); - mo.angle = angle; - mo.CopyFriendliness (self, true); + if (mo != null) + { + mo.Translation = Translation; + mo.SetStateLabel("Rise"); + mo.angle = angle; + mo.CopyFriendliness (self, true); + } } - - } @@ -434,9 +435,12 @@ class Sorcerer2FX1 : Actor for (int i = 0; i < 2; i++) { Actor mo = Spawn("Sorcerer2FXSpark", pos, ALLOW_REPLACE); - mo.Vel.X = Random2[BlueSpark]() / 128.; - mo.Vel.Y = Random2[BlueSpark]() / 128.; - mo.Vel.Z = 1. + Random[BlueSpark]() / 256.; + if (mo != null) + { + mo.Vel.X = Random2[BlueSpark]() / 128.; + mo.Vel.Y = Random2[BlueSpark]() / 128.; + mo.Vel.Z = 1. + Random[BlueSpark]() / 256.; + } } } } diff --git a/wadsrc/static/zscript/heretic/hereticartifacts.txt b/wadsrc/static/zscript/heretic/hereticartifacts.txt index 3ef265735a..20f52a2c93 100644 --- a/wadsrc/static/zscript/heretic/hereticartifacts.txt +++ b/wadsrc/static/zscript/heretic/hereticartifacts.txt @@ -149,7 +149,7 @@ Class ArtiTimeBomb : Inventory override bool Use (bool pickup) { Actor mo = Spawn("ActivatedTimeBomb", Owner.Vec3Angle(24., Owner.angle, - Owner.Floorclip), ALLOW_REPLACE); - mo.target = Owner; + if (mo != null) mo.target = Owner; return true; } diff --git a/wadsrc/static/zscript/heretic/hereticimp.txt b/wadsrc/static/zscript/heretic/hereticimp.txt index c1443de032..e6915c7f6b 100644 --- a/wadsrc/static/zscript/heretic/hereticimp.txt +++ b/wadsrc/static/zscript/heretic/hereticimp.txt @@ -102,14 +102,20 @@ class HereticImp : Actor bNoGravity = false; chunk = Spawn("HereticImpChunk1", pos, ALLOW_REPLACE); - chunk.vel.x = random2[ImpExplode]() / 64.; - chunk.vel.y = random2[ImpExplode]() / 64.; - chunk.vel.z = 9; + if (chunk != null) + { + chunk.vel.x = random2[ImpExplode]() / 64.; + chunk.vel.y = random2[ImpExplode]() / 64.; + chunk.vel.z = 9; + } chunk = Spawn("HereticImpChunk2", pos, ALLOW_REPLACE); - chunk.vel.x = random2[ImpExplode]() / 64.; - chunk.vel.y = random2[ImpExplode]() / 64.; - chunk.vel.z = 9; + if (chunk != null) + { + chunk.vel.x = random2[ImpExplode]() / 64.; + chunk.vel.y = random2[ImpExplode]() / 64.; + chunk.vel.z = 9; + } if (extremecrash) { diff --git a/wadsrc/static/zscript/heretic/hereticmisc.txt b/wadsrc/static/zscript/heretic/hereticmisc.txt index ea79f5f803..b662173527 100644 --- a/wadsrc/static/zscript/heretic/hereticmisc.txt +++ b/wadsrc/static/zscript/heretic/hereticmisc.txt @@ -60,10 +60,13 @@ class Pod : Actor for (int count = chance > 240 ? 2 : 1; count; count--) { Actor goo = Spawn(gootype, pos + (0, 0, 48), ALLOW_REPLACE); - goo.target = self; - goo.Vel.X = Random2[PodPain]() / 128.; - goo.Vel.Y = Random2[PodPain]() / 128.; - goo.Vel.Z = 0.5 + random[PodPain]() / 128.; + if (goo != null) + { + goo.target = self; + goo.Vel.X = Random2[PodPain]() / 128.; + goo.Vel.Y = Random2[PodPain]() / 128.; + goo.Vel.Z = 0.5 + random[PodPain]() / 128.; + } } } @@ -295,12 +298,15 @@ class Volcano : Actor for (int i = 0; i < count; i++) { Actor blast = Spawn("VolcanoBlast", pos + (0, 0, 44), ALLOW_REPLACE); - blast.target = self; - blast.Angle = random[VolcanoBlast]() * (360 / 256.); - blast.VelFromAngle(1.); - blast.Vel.Z = 2.5 + random[VolcanoBlast]() / 64.; - blast.A_PlaySound ("world/volcano/shoot", CHAN_BODY); - blast.CheckMissileSpawn (radius); + if (blast != null) + { + blast.target = self; + blast.Angle = random[VolcanoBlast]() * (360 / 256.); + blast.VelFromAngle(1.); + blast.Vel.Z = 2.5 + random[VolcanoBlast]() / 64.; + blast.A_PlaySound ("world/volcano/shoot", CHAN_BODY); + blast.CheckMissileSpawn (radius); + } } } } diff --git a/wadsrc/static/zscript/heretic/ironlich.txt b/wadsrc/static/zscript/heretic/ironlich.txt index 8a4dd4c05c..65e510932e 100644 --- a/wadsrc/static/zscript/heretic/ironlich.txt +++ b/wadsrc/static/zscript/heretic/ironlich.txt @@ -102,12 +102,15 @@ class Ironlich : Actor { A_PlaySound ("ironlich/attack1", CHAN_BODY); } - fire.target = baseFire.target; - fire.angle = baseFire.angle; - fire.Vel = baseFire.Vel; - fire.SetDamage(0); - fire.health = (i+1) * 2; - fire.CheckMissileSpawn (radius); + if (fire != null) + { + fire.target = baseFire.target; + fire.angle = baseFire.angle; + fire.Vel = baseFire.Vel; + fire.SetDamage(0); + fire.health = (i+1) * 2; + fire.CheckMissileSpawn (radius); + } } } } @@ -167,11 +170,14 @@ class HeadFX1 : Actor for (int i = 0; i < 8; i++) { Actor shard = Spawn("HeadFX2", Pos, ALLOW_REPLACE); - shard.target = target; - shard.angle = i*45.; - shard.VelFromAngle(); - shard.Vel.Z = -.6; - shard.CheckMissileSpawn (radius); + if (shard != null) + { + shard.target = target; + shard.angle = i*45.; + shard.VelFromAngle(); + shard.Vel.Z = -.6; + shard.CheckMissileSpawn (radius); + } } } } diff --git a/wadsrc/static/zscript/heretic/knight.txt b/wadsrc/static/zscript/heretic/knight.txt index 01b7252c60..e66e191f04 100644 --- a/wadsrc/static/zscript/heretic/knight.txt +++ b/wadsrc/static/zscript/heretic/knight.txt @@ -163,8 +163,11 @@ class RedAxe : KnightAxe double xo = random2[DripBlood]() / 32.0; double yo = random2[DripBlood]() / 32.0; Actor mo = Spawn ("Blood", Vec3Offset(xo, yo, 0.), ALLOW_REPLACE); - mo.Vel.X = random2[DripBlood]() / 64.0; - mo.Vel.Y = random2[DripBlood]() / 64.0; - mo.Gravity = 1./8; + if (mo != null) + { + mo.Vel.X = random2[DripBlood]() / 64.0; + mo.Vel.Y = random2[DripBlood]() / 64.0; + mo.Gravity = 1./8; + } } } diff --git a/wadsrc/static/zscript/heretic/weaponblaster.txt b/wadsrc/static/zscript/heretic/weaponblaster.txt index 612e630c0b..913abffdb6 100644 --- a/wadsrc/static/zscript/heretic/weaponblaster.txt +++ b/wadsrc/static/zscript/heretic/weaponblaster.txt @@ -160,10 +160,13 @@ class BlasterFX1 : FastProjectile for(int i = 0; i < 8; i++) { Actor ripper = Spawn("Ripper", pos, ALLOW_REPLACE); - ripper.target = target; - ripper.angle = i*45; - ripper.VelFromAngle(); - ripper.CheckMissileSpawn (radius); + if (ripper != null) + { + ripper.target = target; + ripper.angle = i*45; + ripper.VelFromAngle(); + ripper.CheckMissileSpawn (radius); + } } } } diff --git a/wadsrc/static/zscript/heretic/weaponmace.txt b/wadsrc/static/zscript/heretic/weaponmace.txt index fd11e685fd..3ef747e0f9 100644 --- a/wadsrc/static/zscript/heretic/weaponmace.txt +++ b/wadsrc/static/zscript/heretic/weaponmace.txt @@ -60,14 +60,17 @@ class Mace : HereticWeapon if (random[MaceAtk]() < 28) { Actor ball = Spawn("MaceFX2", Pos + (0, 0, 28 - Floorclip), ALLOW_REPLACE); - ball.Vel.Z = 2 - clamp(tan(pitch), -5, 5); - ball.target = self; - ball.angle = self.angle; - ball.AddZ(ball.Vel.Z); - ball.VelFromAngle(); - ball.Vel += Vel.xy / 2; - ball.A_PlaySound ("weapons/maceshoot", CHAN_BODY); - ball.CheckMissileSpawn (radius); + if (ball != null) + { + ball.Vel.Z = 2 - clamp(tan(pitch), -5, 5); + ball.target = self; + ball.angle = self.angle; + ball.AddZ(ball.Vel.Z); + ball.VelFromAngle(); + ball.Vel += Vel.xy / 2; + ball.A_PlaySound ("weapons/maceshoot", CHAN_BODY); + ball.CheckMissileSpawn (radius); + } } else { @@ -260,18 +263,24 @@ class MaceFX2 : MaceFX1 SetState (SpawnState); Actor tiny = Spawn("MaceFX3", Pos, ALLOW_REPLACE); - tiny.target = target; - tiny.angle = angle + 90.; - tiny.VelFromAngle(Vel.Z - 1.); - tiny.Vel += (Vel.XY * .5, Vel.Z); - tiny.CheckMissileSpawn (radius); + if (tiny != null) + { + tiny.target = target; + tiny.angle = angle + 90.; + tiny.VelFromAngle(Vel.Z - 1.); + tiny.Vel += (Vel.XY * .5, Vel.Z); + tiny.CheckMissileSpawn (radius); + } tiny = Spawn("MaceFX3", Pos, ALLOW_REPLACE); - tiny.target = target; - tiny.angle = angle - 90.; - tiny.VelFromAngle(Vel.Z - 1.); - tiny.Vel += (Vel.XY * .5, Vel.Z); - tiny.CheckMissileSpawn (radius); + if (tiny != null) + { + tiny.target = target; + tiny.angle = angle - 90.; + tiny.VelFromAngle(Vel.Z - 1.); + tiny.Vel += (Vel.XY * .5, Vel.Z); + tiny.CheckMissileSpawn (radius); + } return; } } diff --git a/wadsrc/static/zscript/heretic/weaponphoenix.txt b/wadsrc/static/zscript/heretic/weaponphoenix.txt index 6b949b8f61..eb75acb1dd 100644 --- a/wadsrc/static/zscript/heretic/weaponphoenix.txt +++ b/wadsrc/static/zscript/heretic/weaponphoenix.txt @@ -151,16 +151,19 @@ class PhoenixRodPowered : PhoenixRod slope += 0.1; Actor mo = Spawn("PhoenixFX2", spawnpos, ALLOW_REPLACE); - mo.target = self; - mo.Angle = Angle; - mo.VelFromAngle(); - mo.Vel.XY += Vel.XY; - mo.Vel.Z = mo.Speed * slope; + if (mo != null) + { + mo.target = self; + mo.Angle = Angle; + mo.VelFromAngle(); + mo.Vel.XY += Vel.XY; + mo.Vel.Z = mo.Speed * slope; + mo.CheckMissileSpawn (radius); + } if (!player.refire) { A_PlaySound("weapons/phoenixpowshoot", CHAN_WEAPON, 1, true); } - mo.CheckMissileSpawn (radius); } //---------------------------------------------------------------------------- @@ -239,10 +242,16 @@ class PhoenixFX1 : Actor //[RH] Heretic never sets the target for seeking //P_SeekerMissile (self, 5, 10); Actor puff = Spawn("PhoenixPuff", Pos, ALLOW_REPLACE); - puff.Vel.XY = AngleToVector(Angle + 90, 1.3); + if (puff != null) + { + puff.Vel.XY = AngleToVector(Angle + 90, 1.3); + } puff = Spawn("PhoenixPuff", Pos, ALLOW_REPLACE); - puff.Vel.XY = AngleToVector(Angle - 90, 1.3); + if (puff != null) + { + puff.Vel.XY = AngleToVector(Angle - 90, 1.3); + } } diff --git a/wadsrc/static/zscript/hexen/blastradius.txt b/wadsrc/static/zscript/hexen/blastradius.txt index 7374a4523d..3dd355b7cf 100644 --- a/wadsrc/static/zscript/hexen/blastradius.txt +++ b/wadsrc/static/zscript/hexen/blastradius.txt @@ -70,7 +70,8 @@ extend class Actor // [RH] Floor and ceiling huggers should not be blasted vertically. if (!victim.bFloorHugger && !victim.bCeilingHugger) { - mo.Vel.Z = victim.Vel.Z = 8; + victim.Vel.Z = 8; + if (mo != null) mo.Vel.Z = 8; } } else diff --git a/wadsrc/static/zscript/hexen/clericholy.txt b/wadsrc/static/zscript/hexen/clericholy.txt index 33b69bf25a..3846c75d5e 100644 --- a/wadsrc/static/zscript/hexen/clericholy.txt +++ b/wadsrc/static/zscript/hexen/clericholy.txt @@ -567,14 +567,17 @@ class HolyTail : Actor static void SpawnSpiritTail (Actor spirit) { Actor tail = Spawn ("HolyTail", spirit.Pos, ALLOW_REPLACE); - tail.target = spirit; // parent - for (int i = 1; i < 3; i++) + if (tail != null) { - Actor next = Spawn ("HolyTailTrail", spirit.Pos, ALLOW_REPLACE); - tail.tracer = next; - tail = next; + tail.target = spirit; // parent + for (int i = 1; i < 3; i++) + { + Actor next = Spawn ("HolyTailTrail", spirit.Pos, ALLOW_REPLACE); + tail.tracer = next; + tail = next; + } + tail.tracer = null; // last tail bit } - tail.tracer = null; // last tail bit } //============================================================================ diff --git a/wadsrc/static/zscript/hexen/flechette.txt b/wadsrc/static/zscript/hexen/flechette.txt index fc9268c5b0..3440312c9e 100644 --- a/wadsrc/static/zscript/hexen/flechette.txt +++ b/wadsrc/static/zscript/hexen/flechette.txt @@ -255,10 +255,13 @@ class ArtiPoisonBag : Inventory } class spawntype = GetFlechetteType(other); - Inventory copy = Inventory(Spawn (spawntype)); - copy.Amount = Amount; - copy.MaxAmount = MaxAmount; - GoAwayAndDie (); + let copy = Inventory(Spawn (spawntype)); + if (copy != null) + { + copy.Amount = Amount; + copy.MaxAmount = MaxAmount; + GoAwayAndDie (); + } return copy; } } diff --git a/wadsrc/static/zscript/hexen/korax.txt b/wadsrc/static/zscript/hexen/korax.txt index 1c1f31712f..8f313bc19e 100644 --- a/wadsrc/static/zscript/hexen/korax.txt +++ b/wadsrc/static/zscript/hexen/korax.txt @@ -315,17 +315,20 @@ class Korax : Actor private void SpawnKoraxMissile (Vector3 pos, Actor dest, Class type) { Actor th = Spawn (type, pos, ALLOW_REPLACE); - th.target = self; // Originator - double an = th.AngleTo(dest); - if (dest.bShadow) - { // Invisible target - an += Random2[KoraxMissile]() * (45/256.); + if (th != null) + { + th.target = self; // Originator + double an = th.AngleTo(dest); + if (dest.bShadow) + { // Invisible target + an += Random2[KoraxMissile]() * (45/256.); + } + th.angle = an; + th.VelFromAngle(); + double dist = dest.DistanceBySpeed(th, th.Speed); + th.Vel.Z = (dest.pos.z - pos.Z + 30) / dist; + th.CheckMissileSpawn(radius); } - th.angle = an; - th.VelFromAngle(); - double dist = dest.DistanceBySpeed(th, th.Speed); - th.Vel.Z = (dest.pos.z - pos.Z + 30) / dist; - th.CheckMissileSpawn(radius); } //============================================================================ diff --git a/wadsrc/static/zscript/raven/minotaur.txt b/wadsrc/static/zscript/raven/minotaur.txt index 01cd635410..06bb6daae3 100644 --- a/wadsrc/static/zscript/raven/minotaur.txt +++ b/wadsrc/static/zscript/raven/minotaur.txt @@ -262,7 +262,7 @@ class Minotaur : Actor type = "PunchPuff"; } Actor puff = Spawn (type, Pos, ALLOW_REPLACE); - puff.Vel.Z = 2; + if (puff != null) puff.Vel.Z = 2; special1--; } else @@ -710,9 +710,12 @@ class MinotaurFX2 : MinotaurFX1 double y = Random2[MntrFloorFire]() / 64.; Actor mo = Spawn("MinotaurFX3", Vec2OffsetZ(x, y, floorz), ALLOW_REPLACE); - mo.target = target; - mo.Vel.X = MinVel; // Force block checking - mo.CheckMissileSpawn (radius); + if (mo != null) + { + mo.target = target; + mo.Vel.X = MinVel; // Force block checking + mo.CheckMissileSpawn (radius); + } } } diff --git a/wadsrc/static/zscript/strife/entityboss.txt b/wadsrc/static/zscript/strife/entityboss.txt index a9ec94bb23..ff26280c3f 100644 --- a/wadsrc/static/zscript/strife/entityboss.txt +++ b/wadsrc/static/zscript/strife/entityboss.txt @@ -202,9 +202,12 @@ class EntityBoss : SpectralMonster Vector3 pos = spot.Vec3Angle(secondRadius, an, tracer ? 70. : 0.); second = Spawn("EntitySecond", pos, ALLOW_REPLACE); - second.CopyFriendliness(self, true); - second.A_FaceTarget(); - second.VelFromAngle(i == 0? 4.8828125 : secondRadius * 4., an); + if (second != null) + { + second.CopyFriendliness(self, true); + second.A_FaceTarget(); + second.VelFromAngle(i == 0? 4.8828125 : secondRadius * 4., an); + } } } diff --git a/wadsrc/static/zscript/strife/inquisitor.txt b/wadsrc/static/zscript/strife/inquisitor.txt index 6d3c5bc7f7..5c15f4af7e 100644 --- a/wadsrc/static/zscript/strife/inquisitor.txt +++ b/wadsrc/static/zscript/strife/inquisitor.txt @@ -171,9 +171,12 @@ class Inquisitor : Actor void A_TossArm () { Actor foo = Spawn("InquisitorArm", Pos + (0,0,24), ALLOW_REPLACE); - foo.angle = angle - 90. + Random2[Inquisitor]() * (360./1024.); - foo.VelFromAngle(foo.Speed / 8); - foo.Vel.Z = random[Inquisitor]() / 64.; + if (foo != null) + { + foo.angle = angle - 90. + Random2[Inquisitor]() * (360./1024.); + foo.VelFromAngle(foo.Speed / 8); + foo.Vel.Z = random[Inquisitor]() / 64.; + } } diff --git a/wadsrc/static/zscript/strife/rebels.txt b/wadsrc/static/zscript/strife/rebels.txt index 24deea25b1..7302f1c020 100644 --- a/wadsrc/static/zscript/strife/rebels.txt +++ b/wadsrc/static/zscript/strife/rebels.txt @@ -163,6 +163,10 @@ class TeleporterBeacon : Inventory { Actor owner = target; Actor rebel = Spawn("Rebel1", (pos.xy, floorz), ALLOW_REPLACE); + if (rebel == null) + { + return; + } if (!rebel.TryMove (rebel.Pos.xy, true)) { rebel.Destroy (); diff --git a/wadsrc/static/zscript/strife/sigil.txt b/wadsrc/static/zscript/strife/sigil.txt index 096915ccf2..bfdcae77a5 100644 --- a/wadsrc/static/zscript/strife/sigil.txt +++ b/wadsrc/static/zscript/strife/sigil.txt @@ -268,7 +268,7 @@ class Sigil : Weapon action void A_FireSigil1 () { - Actor spot; + Actor spot = null; FTranslatedLineTarget t; if (player == null || player.ReadyWeapon == null) diff --git a/wadsrc/static/zscript/strife/spectral.txt b/wadsrc/static/zscript/strife/spectral.txt index 0878cff948..a56d9e7178 100644 --- a/wadsrc/static/zscript/strife/spectral.txt +++ b/wadsrc/static/zscript/strife/spectral.txt @@ -62,11 +62,13 @@ class SpectralMonster : Actor return; Actor foo = Spawn("SpectralLightningV2", Pos + (0, 0, 32), ALLOW_REPLACE); - - foo.Vel.Z = -12; - foo.target = self; - foo.FriendPlayer = 0; - foo.tracer = target; + if (foo != null) + { + foo.Vel.Z = -12; + foo.target = self; + foo.FriendPlayer = 0; + foo.tracer = target; + } Angle -= 90.; for (int i = 0; i < 20; ++i) @@ -217,9 +219,11 @@ class SpectralLightningH1 : SpectralLightningBase void A_SpectralLightningTail () { Actor foo = Spawn("SpectralLightningHTail", Vec3Offset(-Vel.X, -Vel.Y, 0.), ALLOW_REPLACE); - - foo.Angle = Angle; - foo.FriendPlayer = FriendPlayer; + if (foo != null) + { + foo.Angle = Angle; + foo.FriendPlayer = FriendPlayer; + } } } @@ -386,15 +390,21 @@ class SpectralLightningSpot : SpectralLightningDeath1 Actor flash = Spawn (cls, Vec2OffsetZ(xo, yo, ONCEILINGZ), ALLOW_REPLACE); - flash.target = target; - flash.Vel.Z = -18; - flash.FriendPlayer = FriendPlayer; + if (flash != null) + { + flash.target = target; + flash.Vel.Z = -18; + flash.FriendPlayer = FriendPlayer; + } flash = Spawn("SpectralLightningV2", (pos.xy, ONCEILINGZ), ALLOW_REPLACE); - flash.target = target; - flash.Vel.Z = -18; - flash.FriendPlayer = FriendPlayer; + if (flash != null) + { + flash.target = target; + flash.Vel.Z = -18; + flash.FriendPlayer = FriendPlayer; + } } } diff --git a/wadsrc/static/zscript/strife/strifefunctions.txt b/wadsrc/static/zscript/strife/strifefunctions.txt index ed6c3854ef..5e0312a954 100644 --- a/wadsrc/static/zscript/strife/strifefunctions.txt +++ b/wadsrc/static/zscript/strife/strifefunctions.txt @@ -133,7 +133,10 @@ extend class Actor void A_DropFire() { Actor drop = Spawn("FireDroplet", pos + (0,0,24), ALLOW_REPLACE); - drop.Vel.Z = -1.; + if (drop != null) + { + drop.Vel.Z = -1.; + } A_Explode(64, 64, XF_NOSPLASH, damagetype: 'Fire'); } From 5d849f0ac9e04076c96f8f5fdda2b481e0c6fcfe Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 31 Dec 2016 17:33:32 -0500 Subject: [PATCH 618/912] - Added a shell script to fix llvm-3.8 issues in Ubuntu. --- tools/fix-llvm-3.8-ubuntu.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100755 tools/fix-llvm-3.8-ubuntu.sh diff --git a/tools/fix-llvm-3.8-ubuntu.sh b/tools/fix-llvm-3.8-ubuntu.sh new file mode 100755 index 0000000000..978359201d --- /dev/null +++ b/tools/fix-llvm-3.8-ubuntu.sh @@ -0,0 +1,12 @@ +read -p "This tool adds in missing CMAKE stuff to your llvm-3.8 package. This requires super-user access. This tool is only meant for convenience and is possibly very dangerous. Rude things may tend to occur when using this tool. Are you sure you want to use this tool?" -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]] +then + sudo mkdir -p /usr/lib/llvm-3.8/share/llvm + sudo ln -s /usr/share/llvm-3.8/cmake /usr/lib/llvm-3.8/share/llvm/cmake + sudo sed -i -e '/get_filename_component(LLVM_INSTALL_PREFIX/ {s|^|#|}' -e '/^# Compute the installation prefix/i set(LLVM_INSTALL_PREFIX "/usr/lib/llvm-3.8")' /usr/lib/llvm-3.8/share/llvm/cmake/LLVMConfig.cmake + sudo sed -i '/_IMPORT_CHECK_TARGETS Polly/ {s|^|#|}' /usr/lib/llvm-3.8/share/llvm/cmake/LLVMExports-relwithdebinfo.cmake + sudo sed -i '/_IMPORT_CHECK_TARGETS sancov/ {s|^|#|}' /usr/lib/llvm-3.8/share/llvm/cmake/LLVMExports-relwithdebinfo.cmake + sudo ln -s /usr/lib/x86_64-linux-gnu/libLLVM-3.8.so.1 /usr/lib/llvm-3.8/lib/ +fi + From 4b41b9c9dc8454e8c130ab7d3978c94cfe112673 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 31 Dec 2016 17:38:50 -0500 Subject: [PATCH 619/912] - Fixed formatting a bit on 'the dangerous tool.' --- tools/fix-llvm-3.8-ubuntu.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/fix-llvm-3.8-ubuntu.sh b/tools/fix-llvm-3.8-ubuntu.sh index 978359201d..2dafc2c346 100755 --- a/tools/fix-llvm-3.8-ubuntu.sh +++ b/tools/fix-llvm-3.8-ubuntu.sh @@ -1,4 +1,9 @@ -read -p "This tool adds in missing CMAKE stuff to your llvm-3.8 package. This requires super-user access. This tool is only meant for convenience and is possibly very dangerous. Rude things may tend to occur when using this tool. Are you sure you want to use this tool?" -n 1 -r +echo This tool adds in missing CMAKE stuff to your llvm-3.8 package. +echo This requires super-user access. This tool is only meant for +echo convenience and is possibly very dangerous. Rude things may tend +echo to occur when using this tool. +echo +read -p "Are you sure you want to use this tool? " -n 1 -r echo if [[ $REPLY =~ ^[Yy]$ ]] then From 81658d7c88a8df3f7dbb9398690100eb67886073 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 1 Jan 2017 10:28:35 +0100 Subject: [PATCH 620/912] Move line handling to more files --- src/CMakeLists.txt | 6 +- src/swrenderer/line/r_line.cpp | 126 ++++ src/swrenderer/line/r_line.h | 28 + src/swrenderer/line/r_walldraw.cpp | 604 ++++++++++++++++++++ src/swrenderer/{scene => line}/r_walldraw.h | 2 + src/swrenderer/line/r_wallsetup.cpp | 227 ++++++++ src/swrenderer/line/r_wallsetup.h | 24 + src/swrenderer/plane/r_flatplane.cpp | 1 - src/swrenderer/plane/r_fogboundary.cpp | 1 - src/swrenderer/plane/r_skyplane.cpp | 3 +- src/swrenderer/plane/r_slopeplane.cpp | 1 - src/swrenderer/scene/r_bsp.cpp | 145 +---- src/swrenderer/scene/r_bsp.h | 25 - src/swrenderer/scene/r_portal.cpp | 1 + src/swrenderer/scene/r_segs.cpp | 203 +------ src/swrenderer/scene/r_segs.h | 15 - src/swrenderer/scene/r_walldraw.cpp | 603 ------------------- src/swrenderer/segments/r_drawsegment.h | 2 +- src/swrenderer/things/r_decal.cpp | 3 +- src/swrenderer/things/r_visiblesprite.h | 2 +- src/swrenderer/things/r_wallsprite.cpp | 2 + 21 files changed, 1051 insertions(+), 973 deletions(-) create mode 100644 src/swrenderer/line/r_line.cpp create mode 100644 src/swrenderer/line/r_line.h create mode 100644 src/swrenderer/line/r_walldraw.cpp rename src/swrenderer/{scene => line}/r_walldraw.h (97%) create mode 100644 src/swrenderer/line/r_wallsetup.cpp create mode 100644 src/swrenderer/line/r_wallsetup.h delete mode 100644 src/swrenderer/scene/r_walldraw.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e7e819626a..f914f79966 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -769,6 +769,7 @@ file( GLOB HEADER_FILES swrenderer/drawers/*.h swrenderer/scene/*.h swrenderer/segments/*.h + swrenderer/line/*.h swrenderer/plane/*.h swrenderer/things/*.h polyrenderer/*.h @@ -821,8 +822,10 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_bsp.cpp swrenderer/scene/r_segs.cpp swrenderer/scene/r_things.cpp - swrenderer/scene/r_walldraw.cpp swrenderer/scene/r_portal.cpp + swrenderer/line/r_line.cpp + swrenderer/line/r_walldraw.cpp + swrenderer/line/r_wallsetup.cpp swrenderer/segments/r_clipsegment.cpp swrenderer/segments/r_drawsegment.cpp swrenderer/segments/r_portalsegment.cpp @@ -1474,6 +1477,7 @@ source_group("Software Renderer" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR source_group("Software Renderer\\Drawers" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/drawers/.+") source_group("Software Renderer\\Scene" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/scene/.+") source_group("Software Renderer\\Segments" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/segments/.+") +source_group("Software Renderer\\Line" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/line/.+") source_group("Software Renderer\\Plane" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/plane/.+") source_group("Software Renderer\\Things" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/things/.+") source_group("Poly Renderer" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/polyrenderer/.+") diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp new file mode 100644 index 0000000000..4edd23cb99 --- /dev/null +++ b/src/swrenderer/line/r_line.cpp @@ -0,0 +1,126 @@ + +#include +#include +#include "templates.h" +#include "i_system.h" +#include "doomdef.h" +#include "doomstat.h" +#include "doomdata.h" +#include "p_lnspec.h" +#include "r_sky.h" +#include "v_video.h" +#include "m_swap.h" +#include "w_wad.h" +#include "stats.h" +#include "a_sharedglobal.h" +#include "d_net.h" +#include "g_level.h" +#include "r_wallsetup.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "swrenderer/r_main.h" +#include "swrenderer/r_memory.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/line/r_line.h" + +namespace swrenderer +{ + FWallCoords WallC; + FWallTmapVals WallT; + + // Transform and clip coordinates. Returns true if it was clipped away + bool FWallCoords::Init(const DVector2 &pt1, const DVector2 &pt2, double too_close) + { + tleft.X = float(pt1.X * ViewSin - pt1.Y * ViewCos); + tright.X = float(pt2.X * ViewSin - pt2.Y * ViewCos); + + tleft.Y = float(pt1.X * ViewTanCos + pt1.Y * ViewTanSin); + tright.Y = float(pt2.X * ViewTanCos + pt2.Y * ViewTanSin); + + if (MirrorFlags & RF_XFLIP) + { + float t = -tleft.X; + tleft.X = -tright.X; + tright.X = t; + swapvalues(tleft.Y, tright.Y); + } + + if (tleft.X >= -tleft.Y) + { + if (tleft.X > tleft.Y) return true; // left edge is off the right side + if (tleft.Y == 0) return true; + sx1 = xs_RoundToInt(CenterX + tleft.X * CenterX / tleft.Y); + sz1 = tleft.Y; + } + else + { + if (tright.X < -tright.Y) return true; // wall is off the left side + float den = tleft.X - tright.X - tright.Y + tleft.Y; + if (den == 0) return true; + sx1 = 0; + sz1 = tleft.Y + (tright.Y - tleft.Y) * (tleft.X + tleft.Y) / den; + } + + if (sz1 < too_close) + return true; + + if (tright.X <= tright.Y) + { + if (tright.X < -tright.Y) return true; // right edge is off the left side + if (tright.Y == 0) return true; + sx2 = xs_RoundToInt(CenterX + tright.X * CenterX / tright.Y); + sz2 = tright.Y; + } + else + { + if (tleft.X > tleft.Y) return true; // wall is off the right side + float den = tright.Y - tleft.Y - tright.X + tleft.X; + if (den == 0) return true; + sx2 = viewwidth; + sz2 = tleft.Y + (tright.Y - tleft.Y) * (tleft.X - tleft.Y) / den; + } + + if (sz2 < too_close || sx2 <= sx1) + return true; + + return false; + } + + ///////////////////////////////////////////////////////////////////////// + + void FWallTmapVals::InitFromWallCoords(const FWallCoords *wallc) + { + const FVector2 *left = &wallc->tleft; + const FVector2 *right = &wallc->tright; + + if (MirrorFlags & RF_XFLIP) + { + swapvalues(left, right); + } + UoverZorg = left->X * centerx; + UoverZstep = -left->Y; + InvZorg = (left->X - right->X) * centerx; + InvZstep = right->Y - left->Y; + } + + void FWallTmapVals::InitFromLine(const DVector2 &left, const DVector2 &right) + { + // Coordinates should have already had viewx,viewy subtracted + + double fullx1 = left.X * ViewSin - left.Y * ViewCos; + double fullx2 = right.X * ViewSin - right.Y * ViewCos; + double fully1 = left.X * ViewTanCos + left.Y * ViewTanSin; + double fully2 = right.X * ViewTanCos + right.Y * ViewTanSin; + + if (MirrorFlags & RF_XFLIP) + { + fullx1 = -fullx1; + fullx2 = -fullx2; + } + + UoverZorg = float(fullx1 * centerx); + UoverZstep = float(-fully1); + InvZorg = float((fullx1 - fullx2) * centerx); + InvZstep = float(fully2 - fully1); + } +} diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h new file mode 100644 index 0000000000..95a9319696 --- /dev/null +++ b/src/swrenderer/line/r_line.h @@ -0,0 +1,28 @@ + +#pragma once + +namespace swrenderer +{ + struct FWallCoords + { + FVector2 tleft; // coords at left of wall in view space rx1,ry1 + FVector2 tright; // coords at right of wall in view space rx2,ry2 + + float sz1, sz2; // depth at left, right of wall in screen space yb1,yb2 + short sx1, sx2; // x coords at left, right of wall in screen space xb1,xb2 + + bool Init(const DVector2 &pt1, const DVector2 &pt2, double too_close); + }; + + struct FWallTmapVals + { + float UoverZorg, UoverZstep; + float InvZorg, InvZstep; + + void InitFromWallCoords(const FWallCoords *wallc); + void InitFromLine(const DVector2 &left, const DVector2 &right); + }; + + extern FWallCoords WallC; + extern FWallTmapVals WallT; +} diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp new file mode 100644 index 0000000000..da0206eeca --- /dev/null +++ b/src/swrenderer/line/r_walldraw.cpp @@ -0,0 +1,604 @@ +/* +** Wall drawing stuff +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include + +#include "doomdef.h" +#include "doomstat.h" +#include "doomdata.h" + +#include "swrenderer/r_main.h" +#include "r_sky.h" +#include "v_video.h" + +#include "m_swap.h" +#include "a_sharedglobal.h" +#include "d_net.h" +#include "g_level.h" +#include "r_walldraw.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "gl/dynlights/gl_dynlight.h" +#include "swrenderer/drawers/r_drawers.h" +#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_segs.h" +#include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/line/r_walldraw.h" +#include "swrenderer/line/r_wallsetup.h" + +namespace swrenderer +{ + using namespace drawerargs; + + extern FTexture *rw_pic; + + static const uint8_t *R_GetColumn(FTexture *tex, int col) + { + int width; + + // If the texture's width isn't a power of 2, then we need to make it a + // positive offset for proper clamping. + if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) + { + col = width + (col % width); + } + + if (r_swtruecolor) + return (const uint8_t *)tex->GetColumnBgra(col, nullptr); + else + return tex->GetColumn(col, nullptr); + } + + WallSampler::WallSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) + { + xoffset += FLOAT2FIXED(xmagnitude * 0.5); + + if (!r_swtruecolor) + { + height = texture->GetHeight(); + + int uv_fracbits = 32 - texture->HeightBits; + if (uv_fracbits != 32) + { + uv_max = height << uv_fracbits; + + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + v = v - floor(v); + v *= height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + if (uv_step == 0) // To prevent divide by zero elsewhere + uv_step = 1; + } + else + { // Hack for one pixel tall textures + uv_pos = 0; + uv_step = 0; + uv_max = 1; + } + + source = getcol(texture, xoffset >> FRACBITS); + source2 = nullptr; + texturefracx = 0; + } + else + { + // Normalize to 0-1 range: + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); + v = v - floor(v); + double v_step = uv_stepd / texture->GetHeight(); + + if (isnan(v) || isnan(v_step)) // this should never happen, but it apparently does.. + { + uv_stepd = 0.0; + v = 0.0; + v_step = 0.0; + } + + // Convert to uint32: + uv_pos = (uint32_t)(v * 0x100000000LL); + uv_step = (uint32_t)(v_step * 0x100000000LL); + uv_max = 0; + + // Texture mipmap and filter selection: + if (getcol != R_GetColumn) + { + source = getcol(texture, xoffset >> FRACBITS); + source2 = nullptr; + height = texture->GetHeight(); + texturefracx = 0; + } + else + { + double ymagnitude = fabs(uv_stepd); + double magnitude = MAX(ymagnitude, xmagnitude); + double min_lod = -1000.0; + double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); + bool magnifying = lod < 0.0f; + + int mipmap_offset = 0; + int mip_width = texture->GetWidth(); + int mip_height = texture->GetHeight(); + if (r_mipmap && texture->Mipmapped() && mip_width > 1 && mip_height > 1) + { + uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); + + int level = (int)lod; + while (level > 0 && mip_width > 1 && mip_height > 1) + { + mipmap_offset += mip_width * mip_height; + level--; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + xoffset = (xpos >> FRACBITS) * mip_width; + } + + const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + + bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); + if (filter_nearest) + { + int tx = (xoffset >> FRACBITS) % mip_width; + if (tx < 0) + tx += mip_width; + source = (BYTE*)(pixels + tx * mip_height); + source2 = nullptr; + height = mip_height; + texturefracx = 0; + } + else + { + xoffset -= FRACUNIT / 2; + int tx0 = (xoffset >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + height = mip_height; + texturefracx = (xoffset >> (FRACBITS - 4)) & 15; + } + } + } + } + + // Draw a column with support for non-power-of-two ranges + static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, DrawerFunc draw1column) + { + if (r_dynlights) + { + // Find column position in view space + float w1 = 1.0f / WallC.sz1; + float w2 = 1.0f / WallC.sz2; + float t = (x - WallC.sx1 + 0.5f) / (WallC.sx2 - WallC.sx1); + float wcol = w1 * (1.0f - t) + w2 * t; + float zcol = 1.0f / wcol; + dc_viewpos.X = (float)((x + 0.5 - CenterX) / CenterX * zcol); + dc_viewpos.Y = zcol; + dc_viewpos.Z = (float)((CenterY - y1 - 0.5) / InvZtoScale * zcol); + dc_viewpos_step.Z = (float)(-zcol / InvZtoScale); + + static TriLight lightbuffer[64 * 1024]; + static int nextlightindex = 0; + + // Setup lights for column + dc_num_lights = 0; + dc_lights = lightbuffer + nextlightindex; + FLightNode *cur_node = dc_light_list; + while (cur_node && nextlightindex < 64 * 1024) + { + if (!(cur_node->lightsource->flags2&MF2_DORMANT)) + { + double lightX = cur_node->lightsource->X() - ViewPos.X; + double lightY = cur_node->lightsource->Y() - ViewPos.Y; + double lightZ = cur_node->lightsource->Z() - ViewPos.Z; + + float lx = (float)(lightX * ViewSin - lightY * ViewCos) - dc_viewpos.X; + float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; + float lz = (float)lightZ; + + // Precalculate the constant part of the dot here so the drawer doesn't have to. + float lconstant = lx * lx + ly * ly; + + // Include light only if it touches this column + float radius = cur_node->lightsource->GetRadius(); + if (radius * radius >= lconstant) + { + uint32_t red = cur_node->lightsource->GetRed(); + uint32_t green = cur_node->lightsource->GetGreen(); + uint32_t blue = cur_node->lightsource->GetBlue(); + + nextlightindex++; + auto &light = dc_lights[dc_num_lights++]; + light.x = lconstant; + light.z = lz; + light.radius = 256.0f / cur_node->lightsource->GetRadius(); + light.color = (red << 16) | (green << 8) | blue; + } + } + + cur_node = cur_node->nextLight; + } + + if (nextlightindex == 64 * 1024) + nextlightindex = 0; + } + else + { + dc_num_lights = 0; + } + + if (r_swtruecolor) + { + int count = y2 - y1; + + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = sampler.uv_pos; + dc_textureheight = sampler.height; + (R_Drawers()->*draw1column)(); + + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); + } + else + { + if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two + { + int count = y2 - y1; + + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = sampler.uv_pos; + (R_Drawers()->*draw1column)(); + + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); + } + else + { + uint32_t uv_pos = sampler.uv_pos; + + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = uv_pos; + (R_Drawers()->*draw1column)(); + + left -= count; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; + } + + sampler.uv_pos = uv_pos; + } + } + } + + static void ProcessWallWorker( + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, + const BYTE *(*getcol)(FTexture *tex, int x), DrawerFunc drawcolumn) + { + if (rw_pic->UseType == FTexture::TEX_Null) + return; + + fixed_t xoffset = rw_offset; + + rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set + int fracbits = 32 - rw_pic->HeightBits; + if (fracbits == 32) + { // Hack for one pixel tall textures + fracbits = 0; + yrepeat = 0; + dc_texturemid = 0; + } + + dc_wall_fracbits = r_swtruecolor ? FRACBITS : fracbits; + + bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); + if (fixed) + { + dc_wall_colormap[0] = dc_colormap; + dc_wall_colormap[1] = dc_colormap; + dc_wall_colormap[2] = dc_colormap; + dc_wall_colormap[3] = dc_colormap; + dc_wall_light[0] = 0; + dc_wall_light[1] = 0; + dc_wall_light[2] = 0; + dc_wall_light[3] = 0; + } + + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + + float light = rw_light; + + double xmagnitude = 1.0; + + for (int x = x1; x < x2; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, light, wallshade); + + if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); + + WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); + Draw1Column(x, y1, y2, sampler, drawcolumn); + } + + NetUpdate(); + } + + static void ProcessNormalWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + { + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol, &SWPixelFormatDrawers::DrawWallColumn); + } + + static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + { + if (!rw_pic->bMasked) // Textures that aren't masked can use the faster ProcessNormalWall. + { + ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol); + } + else + { + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); + } + } + + static void ProcessTranslucentWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + { + DrawerFunc drawcol1 = R_GetTransMaskDrawer(); + if (drawcol1 == nullptr) + { + // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. + ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol); + } + else + { + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol, drawcol1); + } + } + + static void ProcessStripedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade) + { + FDynamicColormap *startcolormap = basecolormap; + bool fogginess = foggy; + + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + + up = uwal; + down = most1; + + assert(WallC.sx1 <= x1); + assert(WallC.sx2 >= x2); + + // kg3D - fake floors instead of zdoom light list + for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) + { + int j = R_CreateWallSegmentYSloped(most3, frontsector->e->XFloor.lightlist[i].plane, &WallC, curline, MirrorFlags & RF_XFLIP); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp(most3[j], up[j], dwal[j]); + } + ProcessNormalWall(x1, x2, up, down, swal, lwal, yrepeat, wallshade); + up = down; + down = (down == most1) ? most2 : most1; + } + + lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); + } + + ProcessNormalWall(x1, x2, up, dwal, swal, lwal, yrepeat, wallshade); + basecolormap = startcolormap; + } + + static void ProcessWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, bool mask) + { + if (mask) + { + if (colfunc == basecolfunc) + { + ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); + } + else + { + ProcessTranslucentWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); + } + } + else + { + if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) + { + ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); + } + else + { + ProcessStripedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); + } + } + } + + //============================================================================= + // + // ProcessWallNP2 + // + // This is a wrapper around ProcessWall that helps it tile textures whose heights + // are not powers of 2. It divides the wall into texture-sized strips and calls + // ProcessNormalWall for each of those. Since only one repetition of the texture fits + // in each strip, ProcessWall will not tile. + // + //============================================================================= + + static void ProcessWallNP2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, bool mask) + { + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + double texheight = rw_pic->GetHeight(); + double partition; + double scaledtexheight = texheight / yrepeat; + + if (yrepeat >= 0) + { // normal orientation: draw strips from top to bottom + partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + if (partition == top) + { + partition -= scaledtexheight; + } + up = uwal; + down = most1; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition > bot) + { + int j = R_CreateWallSegmentY(most3, partition - ViewPos.Z, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp(most3[j], up[j], dwal[j]); + } + ProcessWall(x1, x2, up, down, swal, lwal, yrepeat, wallshade, mask); + up = down; + down = (down == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + ProcessWall(x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, mask); + } + else + { // upside down: draw strips from bottom to top + partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + up = most1; + down = dwal; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition < top) + { + int j = R_CreateWallSegmentY(most3, partition - ViewPos.Z, &WallC); + if (j != 12) + { + for (int j = x1; j < x2; ++j) + { + up[j] = clamp(most3[j], uwal[j], down[j]); + } + ProcessWall(x1, x2, up, down, swal, lwal, yrepeat, wallshade, mask); + down = up; + up = (up == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + ProcessWall(x1, x2, uwal, down, swal, lwal, yrepeat, wallshade, mask); + } + } + + void R_DrawDrawSeg(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade) + { + if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) + { + double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); + double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); + double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); + double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); + double top = MAX(frontcz1, frontcz2); + double bot = MIN(frontfz1, frontfz2); + if (fake3D & FAKE3D_CLIPTOP) + { + top = MIN(top, sclipTop); + } + if (fake3D & FAKE3D_CLIPBOTTOM) + { + bot = MAX(bot, sclipBottom); + } + ProcessWallNP2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, true); + } + else + { + ProcessWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, true); + } + } + + + void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, FLightNode *light_list) + { + dc_light_list = light_list; + if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) + { + ProcessWallNP2(x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, wallshade, false); + } + else + { + ProcessWall(x1, x2, walltop, wallbottom, swall, lwall, yscale, wallshade, false); + } + dc_light_list = nullptr; + } + + void R_DrawSkySegment(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x)) + { + ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol); + } +} diff --git a/src/swrenderer/scene/r_walldraw.h b/src/swrenderer/line/r_walldraw.h similarity index 97% rename from src/swrenderer/scene/r_walldraw.h rename to src/swrenderer/line/r_walldraw.h index 1a1a50504b..9ca312fb04 100644 --- a/src/swrenderer/scene/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -6,6 +6,8 @@ struct FLightNode; namespace swrenderer { + struct drawseg_t; + struct WallSampler { WallSampler() { } diff --git a/src/swrenderer/line/r_wallsetup.cpp b/src/swrenderer/line/r_wallsetup.cpp new file mode 100644 index 0000000000..90ba7793a1 --- /dev/null +++ b/src/swrenderer/line/r_wallsetup.cpp @@ -0,0 +1,227 @@ + +#include +#include +#include "templates.h" +#include "i_system.h" +#include "doomdef.h" +#include "doomstat.h" +#include "doomdata.h" +#include "p_lnspec.h" +#include "r_sky.h" +#include "v_video.h" +#include "m_swap.h" +#include "w_wad.h" +#include "stats.h" +#include "a_sharedglobal.h" +#include "d_net.h" +#include "g_level.h" +#include "r_wallsetup.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_walldraw.h" +#include "swrenderer/r_main.h" +#include "swrenderer/r_memory.h" +#include "swrenderer/line/r_line.h" + +namespace swrenderer +{ + short walltop[MAXWIDTH]; + short wallbottom[MAXWIDTH]; + short wallupper[MAXWIDTH]; + short walllower[MAXWIDTH]; + float swall[MAXWIDTH]; + fixed_t lwall[MAXWIDTH]; + double lwallscale; + + int R_CreateWallSegmentY(short *outbuf, double z, const FWallCoords *wallc) + { + return R_CreateWallSegmentY(outbuf, z, z, wallc); + } + + int R_CreateWallSegmentY(short *outbuf, double z1, double z2, const FWallCoords *wallc) + { + float y1 = (float)(CenterY - z1 * InvZtoScale / wallc->sz1); + float y2 = (float)(CenterY - z2 * InvZtoScale / wallc->sz2); + + if (y1 < 0 && y2 < 0) // entire line is above screen + { + memset(&outbuf[wallc->sx1], 0, (wallc->sx2 - wallc->sx1) * sizeof(outbuf[0])); + return 3; + } + else if (y1 > viewheight && y2 > viewheight) // entire line is below screen + { + fillshort(&outbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); + return 12; + } + + if (wallc->sx2 <= wallc->sx1) + return 0; + + float rcp_delta = 1.0f / (wallc->sx2 - wallc->sx1); + if (y1 >= 0.0f && y2 >= 0.0f && xs_RoundToInt(y1) <= viewheight && xs_RoundToInt(y2) <= viewheight) + { + for (int x = wallc->sx1; x < wallc->sx2; x++) + { + float t = (x - wallc->sx1) * rcp_delta; + float y = y1 * (1.0f - t) + y2 * t; + outbuf[x] = (short)xs_RoundToInt(y); + } + } + else + { + for (int x = wallc->sx1; x < wallc->sx2; x++) + { + float t = (x - wallc->sx1) * rcp_delta; + float y = y1 * (1.0f - t) + y2 * t; + outbuf[x] = (short)clamp(xs_RoundToInt(y), 0, viewheight); + } + } + + return 0; + } + + int R_CreateWallSegmentYSloped(short *outbuf, const secplane_t &plane, const FWallCoords *wallc, seg_t *curline, bool xflip) + { + if (!plane.isSlope()) + { + return R_CreateWallSegmentY(outbuf, plane.Zat0() - ViewPos.Z, wallc); + } + else + { + // Get Z coordinates at both ends of the line + double x, y, den, z1, z2; + if (xflip) + { + x = curline->v2->fX(); + y = curline->v2->fY(); + if (wallc->sx1 == 0 && 0 != (den = wallc->tleft.X - wallc->tright.X + wallc->tleft.Y - wallc->tright.Y)) + { + double frac = (wallc->tleft.Y + wallc->tleft.X) / den; + x -= frac * (x - curline->v1->fX()); + y -= frac * (y - curline->v1->fY()); + } + z1 = plane.ZatPoint(x, y) - ViewPos.Z; + + if (wallc->sx2 > wallc->sx1 + 1) + { + x = curline->v1->fX(); + y = curline->v1->fY(); + if (wallc->sx2 == viewwidth && 0 != (den = wallc->tleft.X - wallc->tright.X - wallc->tleft.Y + wallc->tright.Y)) + { + double frac = (wallc->tright.Y - wallc->tright.X) / den; + x += frac * (curline->v2->fX() - x); + y += frac * (curline->v2->fY() - y); + } + z2 = plane.ZatPoint(x, y) - ViewPos.Z; + } + else + { + z2 = z1; + } + } + else + { + x = curline->v1->fX(); + y = curline->v1->fY(); + if (wallc->sx1 == 0 && 0 != (den = wallc->tleft.X - wallc->tright.X + wallc->tleft.Y - wallc->tright.Y)) + { + double frac = (wallc->tleft.Y + wallc->tleft.X) / den; + x += frac * (curline->v2->fX() - x); + y += frac * (curline->v2->fY() - y); + } + z1 = plane.ZatPoint(x, y) - ViewPos.Z; + + if (wallc->sx2 > wallc->sx1 + 1) + { + x = curline->v2->fX(); + y = curline->v2->fY(); + if (wallc->sx2 == viewwidth && 0 != (den = wallc->tleft.X - wallc->tright.X - wallc->tleft.Y + wallc->tright.Y)) + { + double frac = (wallc->tright.Y - wallc->tright.X) / den; + x -= frac * (x - curline->v1->fX()); + y -= frac * (y - curline->v1->fY()); + } + z2 = plane.ZatPoint(x, y) - ViewPos.Z; + } + else + { + z2 = z1; + } + } + + return R_CreateWallSegmentY(outbuf, z1, z2, wallc); + } + } + + void PrepWall(float *vstep, fixed_t *upos, double walxrepeat, int x1, int x2) + { + float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - CenterX); + float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - CenterX); + float uGradient = WallT.UoverZstep; + float zGradient = WallT.InvZstep; + float xrepeat = (float)walxrepeat; + float depthScale = (float)(WallT.InvZstep * WallTMapScale2); + float depthOrg = (float)(-WallT.UoverZstep * WallTMapScale2); + + if (xrepeat < 0.0f) + { + for (int x = x1; x < x2; x++) + { + float u = uOverZ / invZ; + + upos[x] = (fixed_t)((xrepeat - u * xrepeat) * FRACUNIT); + vstep[x] = depthOrg + u * depthScale; + + uOverZ += uGradient; + invZ += zGradient; + } + } + else + { + for (int x = x1; x < x2; x++) + { + float u = uOverZ / invZ; + + upos[x] = (fixed_t)(u * xrepeat * FRACUNIT); + vstep[x] = depthOrg + u * depthScale; + + uOverZ += uGradient; + invZ += zGradient; + } + } + } + + void PrepLWall(fixed_t *upos, double walxrepeat, int x1, int x2) + { + float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - CenterX); + float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - CenterX); + float uGradient = WallT.UoverZstep; + float zGradient = WallT.InvZstep; + float xrepeat = (float)walxrepeat; + + if (xrepeat < 0.0f) + { + for (int x = x1; x < x2; x++) + { + float u = uOverZ / invZ * xrepeat - xrepeat; + + upos[x] = (fixed_t)(u * FRACUNIT); + + uOverZ += uGradient; + invZ += zGradient; + } + } + else + { + for (int x = x1; x < x2; x++) + { + float u = uOverZ / invZ * xrepeat; + + upos[x] = (fixed_t)(u * FRACUNIT); + + uOverZ += uGradient; + invZ += zGradient; + } + } + } +} diff --git a/src/swrenderer/line/r_wallsetup.h b/src/swrenderer/line/r_wallsetup.h new file mode 100644 index 0000000000..4e483601f7 --- /dev/null +++ b/src/swrenderer/line/r_wallsetup.h @@ -0,0 +1,24 @@ + +#pragma once + +#include "r_defs.h" + +namespace swrenderer +{ + struct FWallCoords; + + extern short walltop[MAXWIDTH]; + extern short wallbottom[MAXWIDTH]; + extern short wallupper[MAXWIDTH]; + extern short walllower[MAXWIDTH]; + extern float swall[MAXWIDTH]; + extern fixed_t lwall[MAXWIDTH]; + extern double lwallscale; + + int R_CreateWallSegmentY(short *outbuf, double z1, double z2, const FWallCoords *wallc); + int R_CreateWallSegmentYSloped(short *outbuf, const secplane_t &plane, const FWallCoords *wallc, seg_t *line, bool xflip); + int R_CreateWallSegmentY(short *outbuf, double z, const FWallCoords *wallc); + + void PrepWall(float *swall, fixed_t *lwall, double walxrepeat, int x1, int x2); + void PrepLWall(fixed_t *lwall, double walxrepeat, int x1, int x2); +} diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 354396cc7b..6d3206f23f 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -24,7 +24,6 @@ #include "r_data/colormaps.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" -#include "swrenderer/scene/r_walldraw.h" #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" diff --git a/src/swrenderer/plane/r_fogboundary.cpp b/src/swrenderer/plane/r_fogboundary.cpp index fb140db719..03eed8604d 100644 --- a/src/swrenderer/plane/r_fogboundary.cpp +++ b/src/swrenderer/plane/r_fogboundary.cpp @@ -24,7 +24,6 @@ #include "r_data/colormaps.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" -#include "swrenderer/scene/r_walldraw.h" #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 5ad928ffaf..e048751cc8 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -24,9 +24,10 @@ #include "r_data/colormaps.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" -#include "swrenderer/scene/r_walldraw.h" #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/line/r_wallsetup.h" +#include "swrenderer/line/r_walldraw.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/r_memory.h" diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 3742f2f40e..4eb085ecf2 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -24,7 +24,6 @@ #include "r_data/colormaps.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" -#include "swrenderer/scene/r_walldraw.h" #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index debee35c68..125a3ae218 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -37,6 +37,7 @@ #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/things/r_particle.h" #include "swrenderer/segments/r_clipsegment.h" +#include "swrenderer/line/r_wallsetup.h" #include "r_things.h" #include "r_3dfloors.h" #include "r_portal.h" @@ -76,19 +77,12 @@ extern bool rw_prepped; extern bool rw_havehigh, rw_havelow; extern int rw_floorstat, rw_ceilstat; extern bool rw_mustmarkfloor, rw_mustmarkceiling; -extern short walltop[MAXWIDTH]; // [RH] record max extents of wall -extern short wallbottom[MAXWIDTH]; -extern short wallupper[MAXWIDTH]; -extern short walllower[MAXWIDTH]; double rw_backcz1, rw_backcz2; double rw_backfz1, rw_backfz2; double rw_frontcz1, rw_frontcz2; double rw_frontfz1, rw_frontfz2; -FWallCoords WallC; -FWallTmapVals WallT; - static BYTE FakeSide; int WindowLeft, WindowRight; @@ -435,12 +429,12 @@ void R_AddLine (seg_t *line) if (rw_frontcz1 > rw_backcz1 || rw_frontcz2 > rw_backcz2) { rw_havehigh = true; - R_CreateWallSegmentYSloped (wallupper, backsector->ceilingplane, &WallC); + R_CreateWallSegmentYSloped (wallupper, backsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); } if (rw_frontfz1 < rw_backfz1 || rw_frontfz2 < rw_backfz2) { rw_havelow = true; - R_CreateWallSegmentYSloped (walllower, backsector->floorplane, &WallC); + R_CreateWallSegmentYSloped (walllower, backsector->floorplane, &WallC, curline, MirrorFlags & RF_XFLIP); } // Portal @@ -541,8 +535,8 @@ void R_AddLine (seg_t *line) } else { - rw_ceilstat = R_CreateWallSegmentYSloped (walltop, frontsector->ceilingplane, &WallC); - rw_floorstat = R_CreateWallSegmentYSloped (wallbottom, frontsector->floorplane, &WallC); + rw_ceilstat = R_CreateWallSegmentYSloped (walltop, frontsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); + rw_floorstat = R_CreateWallSegmentYSloped (wallbottom, frontsector->floorplane, &WallC, curline, MirrorFlags & RF_XFLIP); // [RH] treat off-screen walls as solid #if 0 // Maybe later... @@ -568,125 +562,26 @@ void R_AddLine (seg_t *line) } } -// -// FWallCoords :: Init -// -// Transform and clip coordinates. Returns true if it was clipped away -// -bool FWallCoords::Init(const DVector2 &pt1, const DVector2 &pt2, double too_close) -{ - tleft.X = float(pt1.X * ViewSin - pt1.Y * ViewCos); - tright.X = float(pt2.X * ViewSin - pt2.Y * ViewCos); - tleft.Y = float(pt1.X * ViewTanCos + pt1.Y * ViewTanSin); - tright.Y = float(pt2.X * ViewTanCos + pt2.Y * ViewTanSin); - - if (MirrorFlags & RF_XFLIP) - { - float t = -tleft.X; - tleft.X = -tright.X; - tright.X = t; - swapvalues(tleft.Y, tright.Y); - } - - if (tleft.X >= -tleft.Y) - { - if (tleft.X > tleft.Y) return true; // left edge is off the right side - if (tleft.Y == 0) return true; - sx1 = xs_RoundToInt(CenterX + tleft.X * CenterX / tleft.Y); - sz1 = tleft.Y; - } - else - { - if (tright.X < -tright.Y) return true; // wall is off the left side - float den = tleft.X - tright.X - tright.Y + tleft.Y; - if (den == 0) return true; - sx1 = 0; - sz1 = tleft.Y + (tright.Y - tleft.Y) * (tleft.X + tleft.Y) / den; - } - - if (sz1 < too_close) - return true; - - if (tright.X <= tright.Y) - { - if (tright.X < -tright.Y) return true; // right edge is off the left side - if (tright.Y == 0) return true; - sx2 = xs_RoundToInt(CenterX + tright.X * CenterX / tright.Y); - sz2 = tright.Y; - } - else - { - if (tleft.X > tleft.Y) return true; // wall is off the right side - float den = tright.Y - tleft.Y - tright.X + tleft.X; - if (den == 0) return true; - sx2 = viewwidth; - sz2 = tleft.Y + (tright.Y - tleft.Y) * (tleft.X - tleft.Y) / den; - } - - if (sz2 < too_close || sx2 <= sx1) - return true; - - return false; -} - -void FWallTmapVals::InitFromWallCoords(const FWallCoords *wallc) -{ - const FVector2 *left = &wallc->tleft; - const FVector2 *right = &wallc->tright; - - if (MirrorFlags & RF_XFLIP) - { - swapvalues(left, right); - } - UoverZorg = left->X * centerx; - UoverZstep = -left->Y; - InvZorg = (left->X - right->X) * centerx; - InvZstep = right->Y - left->Y; -} - -void FWallTmapVals::InitFromLine(const DVector2 &left, const DVector2 &right) -{ // Coordinates should have already had viewx,viewy subtracted - double fullx1 = left.X * ViewSin - left.Y * ViewCos; - double fullx2 = right.X * ViewSin - right.Y * ViewCos; - double fully1 = left.X * ViewTanCos + left.Y * ViewTanSin; - double fully2 = right.X * ViewTanCos + right.Y * ViewTanSin; - - if (MirrorFlags & RF_XFLIP) - { - fullx1 = -fullx1; - fullx2 = -fullx2; - } - - UoverZorg = float(fullx1 * centerx); - UoverZstep = float(-fully1); - InvZorg = float((fullx1 - fullx2) * centerx); - InvZstep = float(fully2 - fully1); -} - -// -// R_CheckBBox // Checks BSP node/subtree bounding box. // Returns true if some part of the bbox might be visible. -// -extern "C" const int checkcoord[12][4] = -{ - {3,0,2,1}, - {3,0,2,0}, - {3,1,2,0}, - {0}, - {2,0,2,1}, - {0,0,0,0}, - {3,1,3,0}, - {0}, - {2,0,3,1}, - {2,1,3,1}, - {2,1,3,0} -}; - - static bool R_CheckBBox (float *bspcoord) // killough 1/28/98: static { + static const int checkcoord[12][4] = + { + { 3,0,2,1 }, + { 3,0,2,0 }, + { 3,1,2,0 }, + { 0 }, + { 2,0,2,1 }, + { 0,0,0,0 }, + { 3,1,3,0 }, + { 0 }, + { 2,0,3,1 }, + { 2,1,3,1 }, + { 2,1,3,0 } + }; + int boxx; int boxy; int boxpos; diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index 6996a684b2..3ab94b2882 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -37,29 +37,6 @@ namespace swrenderer // the texture calculations. #define TOO_CLOSE_Z (3072.0 / (1<<12)) -struct FWallCoords -{ - FVector2 tleft; // coords at left of wall in view space rx1,ry1 - FVector2 tright; // coords at right of wall in view space rx2,ry2 - - float sz1, sz2; // depth at left, right of wall in screen space yb1,yb2 - short sx1, sx2; // x coords at left, right of wall in screen space xb1,xb2 - - bool Init(const DVector2 &pt1, const DVector2 &pt2, double too_close); -}; - -struct FWallTmapVals -{ - float UoverZorg, UoverZstep; - float InvZorg, InvZstep; - - void InitFromWallCoords(const FWallCoords *wallc); - void InitFromLine(const DVector2 &left, const DVector2 &right); -}; - -extern FWallCoords WallC; -extern FWallTmapVals WallT; - enum { FAKED_Center, @@ -78,8 +55,6 @@ extern sector_t* backsector; extern int WindowLeft, WindowRight; extern WORD MirrorFlags; -typedef void (*drawfunc_t) (int start, int stop); - void R_RenderBSPNode (void *node); // killough 4/13/98: fake floors/ceilings for deep water / fake ceilings: diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index acb87dbb23..26dbb2e1cf 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -38,6 +38,7 @@ #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/scene/r_bsp.h" #include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 0510129739..575e82ef4a 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -48,7 +48,9 @@ #include "swrenderer/drawers/r_draw.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "r_walldraw.h" +#include "swrenderer/line/r_line.h" +#include "swrenderer/line/r_wallsetup.h" +#include "swrenderer/line/r_walldraw.h" #include "swrenderer/segments/r_drawsegment.h" #include "r_portal.h" #include "swrenderer/things/r_wallsprite.h" @@ -93,13 +95,6 @@ fixed_t rw_offset_mid; fixed_t rw_offset_bottom; -short walltop[MAXWIDTH]; // [RH] record max extents of wall -short wallbottom[MAXWIDTH]; -short wallupper[MAXWIDTH]; -short walllower[MAXWIDTH]; -float swall[MAXWIDTH]; -fixed_t lwall[MAXWIDTH]; -double lwallscale; // // regular wall @@ -198,13 +193,13 @@ void ClipMidtex(int x1, int x2) { short most[MAXWIDTH]; - R_CreateWallSegmentYSloped(most, curline->frontsector->ceilingplane, &WallC); + R_CreateWallSegmentYSloped(most, curline->frontsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); for (int i = x1; i < x2; ++i) { if (wallupper[i] < most[i]) wallupper[i] = most[i]; } - R_CreateWallSegmentYSloped(most, curline->frontsector->floorplane, &WallC); + R_CreateWallSegmentYSloped(most, curline->frontsector->floorplane, &WallC, curline, MirrorFlags & RF_XFLIP); for (int i = x1; i < x2; ++i) { if (walllower[i] > most[i]) @@ -1327,7 +1322,7 @@ void R_NewWall (bool needlights) // wall but nothing to draw for it. // Recalculate walltop so that the wall is clipped by the back sector's // ceiling instead of the front sector's ceiling. - R_CreateWallSegmentYSloped (walltop, backsector->ceilingplane, &WallC); + R_CreateWallSegmentYSloped (walltop, backsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); } // Putting sky ceilings on the front and back of a line alters the way unpegged // positioning works. @@ -1856,191 +1851,5 @@ bool R_StoreWallRange (int start, int stop) return !(fake3D & FAKE3D_FAKEMASK); } -int R_CreateWallSegmentY(short *outbuf, double z1, double z2, const FWallCoords *wallc) -{ - float y1 = (float)(CenterY - z1 * InvZtoScale / wallc->sz1); - float y2 = (float)(CenterY - z2 * InvZtoScale / wallc->sz2); - - if (y1 < 0 && y2 < 0) // entire line is above screen - { - memset(&outbuf[wallc->sx1], 0, (wallc->sx2 - wallc->sx1) * sizeof(outbuf[0])); - return 3; - } - else if (y1 > viewheight && y2 > viewheight) // entire line is below screen - { - fillshort(&outbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); - return 12; - } - - if (wallc->sx2 <= wallc->sx1) - return 0; - - float rcp_delta = 1.0f / (wallc->sx2 - wallc->sx1); - if (y1 >= 0.0f && y2 >= 0.0f && xs_RoundToInt(y1) <= viewheight && xs_RoundToInt(y2) <= viewheight) - { - for (int x = wallc->sx1; x < wallc->sx2; x++) - { - float t = (x - wallc->sx1) * rcp_delta; - float y = y1 * (1.0f - t) + y2 * t; - outbuf[x] = (short)xs_RoundToInt(y); - } - } - else - { - for (int x = wallc->sx1; x < wallc->sx2; x++) - { - float t = (x - wallc->sx1) * rcp_delta; - float y = y1 * (1.0f - t) + y2 * t; - outbuf[x] = (short)clamp(xs_RoundToInt(y), 0, viewheight); - } - } - - return 0; -} - -int R_CreateWallSegmentYSloped(short *outbuf, const secplane_t &plane, const FWallCoords *wallc) -{ - if (!plane.isSlope()) - { - return R_CreateWallSegmentY(outbuf, plane.Zat0() - ViewPos.Z, wallc); - } - else - { - // Get Z coordinates at both ends of the line - double x, y, den, z1, z2; - if (MirrorFlags & RF_XFLIP) - { - x = curline->v2->fX(); - y = curline->v2->fY(); - if (wallc->sx1 == 0 && 0 != (den = wallc->tleft.X - wallc->tright.X + wallc->tleft.Y - wallc->tright.Y)) - { - double frac = (wallc->tleft.Y + wallc->tleft.X) / den; - x -= frac * (x - curline->v1->fX()); - y -= frac * (y - curline->v1->fY()); - } - z1 = plane.ZatPoint(x, y) - ViewPos.Z; - - if (wallc->sx2 > wallc->sx1 + 1) - { - x = curline->v1->fX(); - y = curline->v1->fY(); - if (wallc->sx2 == viewwidth && 0 != (den = wallc->tleft.X - wallc->tright.X - wallc->tleft.Y + wallc->tright.Y)) - { - double frac = (wallc->tright.Y - wallc->tright.X) / den; - x += frac * (curline->v2->fX() - x); - y += frac * (curline->v2->fY() - y); - } - z2 = plane.ZatPoint(x, y) - ViewPos.Z; - } - else - { - z2 = z1; - } - } - else - { - x = curline->v1->fX(); - y = curline->v1->fY(); - if (wallc->sx1 == 0 && 0 != (den = wallc->tleft.X - wallc->tright.X + wallc->tleft.Y - wallc->tright.Y)) - { - double frac = (wallc->tleft.Y + wallc->tleft.X) / den; - x += frac * (curline->v2->fX() - x); - y += frac * (curline->v2->fY() - y); - } - z1 = plane.ZatPoint(x, y) - ViewPos.Z; - - if (wallc->sx2 > wallc->sx1 + 1) - { - x = curline->v2->fX(); - y = curline->v2->fY(); - if (wallc->sx2 == viewwidth && 0 != (den = wallc->tleft.X - wallc->tright.X - wallc->tleft.Y + wallc->tright.Y)) - { - double frac = (wallc->tright.Y - wallc->tright.X) / den; - x -= frac * (x - curline->v1->fX()); - y -= frac * (y - curline->v1->fY()); - } - z2 = plane.ZatPoint(x, y) - ViewPos.Z; - } - else - { - z2 = z1; - } - } - - return R_CreateWallSegmentY(outbuf, z1, z2, wallc); - } -} - -void PrepWall(float *vstep, fixed_t *upos, double walxrepeat, int x1, int x2) -{ - float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - CenterX); - float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - CenterX); - float uGradient = WallT.UoverZstep; - float zGradient = WallT.InvZstep; - float xrepeat = (float)walxrepeat; - float depthScale = (float)(WallT.InvZstep * WallTMapScale2); - float depthOrg = (float)(-WallT.UoverZstep * WallTMapScale2); - - if (xrepeat < 0.0f) - { - for (int x = x1; x < x2; x++) - { - float u = uOverZ / invZ; - - upos[x] = (fixed_t)((xrepeat - u * xrepeat) * FRACUNIT); - vstep[x] = depthOrg + u * depthScale; - - uOverZ += uGradient; - invZ += zGradient; - } - } - else - { - for (int x = x1; x < x2; x++) - { - float u = uOverZ / invZ; - - upos[x] = (fixed_t)(u * xrepeat * FRACUNIT); - vstep[x] = depthOrg + u * depthScale; - - uOverZ += uGradient; - invZ += zGradient; - } - } -} - -void PrepLWall(fixed_t *upos, double walxrepeat, int x1, int x2) -{ - float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - CenterX); - float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - CenterX); - float uGradient = WallT.UoverZstep; - float zGradient = WallT.InvZstep; - float xrepeat = (float)walxrepeat; - - if (xrepeat < 0.0f) - { - for (int x = x1; x < x2; x++) - { - float u = uOverZ / invZ * xrepeat - xrepeat; - - upos[x] = (fixed_t)(u * FRACUNIT); - - uOverZ += uGradient; - invZ += zGradient; - } - } - else - { - for (int x = x1; x < x2; x++) - { - float u = uOverZ / invZ * xrepeat; - - upos[x] = (fixed_t)(u * FRACUNIT); - - uOverZ += uGradient; - invZ += zGradient; - } - } -} } diff --git a/src/swrenderer/scene/r_segs.h b/src/swrenderer/scene/r_segs.h index f4ca36e4f2..6f0fce65f2 100644 --- a/src/swrenderer/scene/r_segs.h +++ b/src/swrenderer/scene/r_segs.h @@ -32,25 +32,10 @@ struct visplane_t; bool R_StoreWallRange(int start, int stop); void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2); -int R_CreateWallSegmentY (short *outbuf, double z1, double z2, const FWallCoords *wallc); -int R_CreateWallSegmentYSloped (short *outbuf, const secplane_t &plane, const FWallCoords *wallc); -inline int R_CreateWallSegmentY(short *outbuf, double z, const FWallCoords *wallc) -{ - return R_CreateWallSegmentY(outbuf, z, z, wallc); -} -void PrepWall (float *swall, fixed_t *lwall, double walxrepeat, int x1, int x2); -void PrepLWall (fixed_t *lwall, double walxrepeat, int x1, int x2); void R_RenderSegLoop (); -extern short walltop[MAXWIDTH]; // [RH] record max extents of wall -extern short wallbottom[MAXWIDTH]; -extern short wallupper[MAXWIDTH]; -extern short walllower[MAXWIDTH]; -extern float swall[MAXWIDTH]; -extern fixed_t lwall[MAXWIDTH]; -extern double lwallscale; extern float rw_light; // [RH] Scale lights with viewsize adjustments extern float rw_lightstep; extern float rw_lightleft; diff --git a/src/swrenderer/scene/r_walldraw.cpp b/src/swrenderer/scene/r_walldraw.cpp deleted file mode 100644 index 21b7348729..0000000000 --- a/src/swrenderer/scene/r_walldraw.cpp +++ /dev/null @@ -1,603 +0,0 @@ -/* -** Wall drawing stuff free of Build pollution -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include -#include - -#include "doomdef.h" -#include "doomstat.h" -#include "doomdata.h" - -#include "swrenderer/r_main.h" -#include "r_sky.h" -#include "v_video.h" - -#include "m_swap.h" -#include "a_sharedglobal.h" -#include "d_net.h" -#include "g_level.h" -#include "swrenderer/drawers/r_draw.h" -#include "r_bsp.h" -#include "r_segs.h" -#include "r_3dfloors.h" -#include "v_palette.h" -#include "r_data/colormaps.h" -#include "gl/dynlights/gl_dynlight.h" -#include "swrenderer/drawers/r_drawers.h" -#include "r_walldraw.h" -#include "swrenderer/segments/r_drawsegment.h" - -namespace swrenderer -{ - using namespace drawerargs; - - extern FTexture *rw_pic; - -static const uint8_t *R_GetColumn(FTexture *tex, int col) -{ - int width; - - // If the texture's width isn't a power of 2, then we need to make it a - // positive offset for proper clamping. - if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) - { - col = width + (col % width); - } - - if (r_swtruecolor) - return (const uint8_t *)tex->GetColumnBgra(col, nullptr); - else - return tex->GetColumn(col, nullptr); -} - -WallSampler::WallSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) -{ - xoffset += FLOAT2FIXED(xmagnitude * 0.5); - - if (!r_swtruecolor) - { - height = texture->GetHeight(); - - int uv_fracbits = 32 - texture->HeightBits; - if (uv_fracbits != 32) - { - uv_max = height << uv_fracbits; - - // Find start uv in [0-base_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; - v = v - floor(v); - v *= height; - v *= (1 << uv_fracbits); - - uv_pos = (uint32_t)v; - uv_step = xs_ToFixed(uv_fracbits, uv_stepd); - if (uv_step == 0) // To prevent divide by zero elsewhere - uv_step = 1; - } - else - { // Hack for one pixel tall textures - uv_pos = 0; - uv_step = 0; - uv_max = 1; - } - - source = getcol(texture, xoffset >> FRACBITS); - source2 = nullptr; - texturefracx = 0; - } - else - { - // Normalize to 0-1 range: - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); - v = v - floor(v); - double v_step = uv_stepd / texture->GetHeight(); - - if (isnan(v) || isnan(v_step)) // this should never happen, but it apparently does.. - { - uv_stepd = 0.0; - v = 0.0; - v_step = 0.0; - } - - // Convert to uint32: - uv_pos = (uint32_t)(v * 0x100000000LL); - uv_step = (uint32_t)(v_step * 0x100000000LL); - uv_max = 0; - - // Texture mipmap and filter selection: - if (getcol != R_GetColumn) - { - source = getcol(texture, xoffset >> FRACBITS); - source2 = nullptr; - height = texture->GetHeight(); - texturefracx = 0; - } - else - { - double ymagnitude = fabs(uv_stepd); - double magnitude = MAX(ymagnitude, xmagnitude); - double min_lod = -1000.0; - double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); - bool magnifying = lod < 0.0f; - - int mipmap_offset = 0; - int mip_width = texture->GetWidth(); - int mip_height = texture->GetHeight(); - if (r_mipmap && texture->Mipmapped() && mip_width > 1 && mip_height > 1) - { - uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); - - int level = (int)lod; - while (level > 0 && mip_width > 1 && mip_height > 1) - { - mipmap_offset += mip_width * mip_height; - level--; - mip_width = MAX(mip_width >> 1, 1); - mip_height = MAX(mip_height >> 1, 1); - } - xoffset = (xpos >> FRACBITS) * mip_width; - } - - const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; - - bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); - if (filter_nearest) - { - int tx = (xoffset >> FRACBITS) % mip_width; - if (tx < 0) - tx += mip_width; - source = (BYTE*)(pixels + tx * mip_height); - source2 = nullptr; - height = mip_height; - texturefracx = 0; - } - else - { - xoffset -= FRACUNIT / 2; - int tx0 = (xoffset >> FRACBITS) % mip_width; - if (tx0 < 0) - tx0 += mip_width; - int tx1 = (tx0 + 1) % mip_width; - source = (BYTE*)(pixels + tx0 * mip_height); - source2 = (BYTE*)(pixels + tx1 * mip_height); - height = mip_height; - texturefracx = (xoffset >> (FRACBITS - 4)) & 15; - } - } - } -} - -// Draw a column with support for non-power-of-two ranges -static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, DrawerFunc draw1column) -{ - if (r_dynlights) - { - // Find column position in view space - float w1 = 1.0f / WallC.sz1; - float w2 = 1.0f / WallC.sz2; - float t = (x - WallC.sx1 + 0.5f) / (WallC.sx2 - WallC.sx1); - float wcol = w1 * (1.0f - t) + w2 * t; - float zcol = 1.0f / wcol; - dc_viewpos.X = (float)((x + 0.5 - CenterX) / CenterX * zcol); - dc_viewpos.Y = zcol; - dc_viewpos.Z = (float)((CenterY - y1 - 0.5) / InvZtoScale * zcol); - dc_viewpos_step.Z = (float)(-zcol / InvZtoScale); - - static TriLight lightbuffer[64 * 1024]; - static int nextlightindex = 0; - - // Setup lights for column - dc_num_lights = 0; - dc_lights = lightbuffer + nextlightindex; - FLightNode *cur_node = dc_light_list; - while (cur_node && nextlightindex < 64 * 1024) - { - if (!(cur_node->lightsource->flags2&MF2_DORMANT)) - { - double lightX = cur_node->lightsource->X() - ViewPos.X; - double lightY = cur_node->lightsource->Y() - ViewPos.Y; - double lightZ = cur_node->lightsource->Z() - ViewPos.Z; - - float lx = (float)(lightX * ViewSin - lightY * ViewCos) - dc_viewpos.X; - float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; - float lz = (float)lightZ; - - // Precalculate the constant part of the dot here so the drawer doesn't have to. - float lconstant = lx * lx + ly * ly; - - // Include light only if it touches this column - float radius = cur_node->lightsource->GetRadius(); - if (radius * radius >= lconstant) - { - uint32_t red = cur_node->lightsource->GetRed(); - uint32_t green = cur_node->lightsource->GetGreen(); - uint32_t blue = cur_node->lightsource->GetBlue(); - - nextlightindex++; - auto &light = dc_lights[dc_num_lights++]; - light.x = lconstant; - light.z = lz; - light.radius = 256.0f / cur_node->lightsource->GetRadius(); - light.color = (red << 16) | (green << 8) | blue; - } - } - - cur_node = cur_node->nextLight; - } - - if (nextlightindex == 64 * 1024) - nextlightindex = 0; - } - else - { - dc_num_lights = 0; - } - - if (r_swtruecolor) - { - int count = y2 - y1; - - dc_source = sampler.source; - dc_source2 = sampler.source2; - dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = sampler.uv_pos; - dc_textureheight = sampler.height; - (R_Drawers()->*draw1column)(); - - uint64_t step64 = sampler.uv_step; - uint64_t pos64 = sampler.uv_pos; - sampler.uv_pos = (uint32_t)(pos64 + step64 * count); - } - else - { - if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two - { - int count = y2 - y1; - - dc_source = sampler.source; - dc_source2 = sampler.source2; - dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = sampler.uv_pos; - (R_Drawers()->*draw1column)(); - - uint64_t step64 = sampler.uv_step; - uint64_t pos64 = sampler.uv_pos; - sampler.uv_pos = (uint32_t)(pos64 + step64 * count); - } - else - { - uint32_t uv_pos = sampler.uv_pos; - - uint32_t left = y2 - y1; - while (left > 0) - { - uint32_t available = sampler.uv_max - uv_pos; - uint32_t next_uv_wrap = available / sampler.uv_step; - if (available % sampler.uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); - - dc_source = sampler.source; - dc_source2 = sampler.source2; - dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = uv_pos; - (R_Drawers()->*draw1column)(); - - left -= count; - uv_pos += sampler.uv_step * count; - if (uv_pos >= sampler.uv_max) - uv_pos -= sampler.uv_max; - } - - sampler.uv_pos = uv_pos; - } - } -} - -static void ProcessWallWorker( - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, - const BYTE *(*getcol)(FTexture *tex, int x), DrawerFunc drawcolumn) -{ - if (rw_pic->UseType == FTexture::TEX_Null) - return; - - fixed_t xoffset = rw_offset; - - rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set - int fracbits = 32 - rw_pic->HeightBits; - if (fracbits == 32) - { // Hack for one pixel tall textures - fracbits = 0; - yrepeat = 0; - dc_texturemid = 0; - } - - dc_wall_fracbits = r_swtruecolor ? FRACBITS : fracbits; - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - dc_wall_colormap[0] = dc_colormap; - dc_wall_colormap[1] = dc_colormap; - dc_wall_colormap[2] = dc_colormap; - dc_wall_colormap[3] = dc_colormap; - dc_wall_light[0] = 0; - dc_wall_light[1] = 0; - dc_wall_light[2] = 0; - dc_wall_light[3] = 0; - } - - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); - else - R_SetColorMapLight(basecolormap, 0, 0); - - float light = rw_light; - - double xmagnitude = 1.0; - - for (int x = x1; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); - - WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - Draw1Column(x, y1, y2, sampler, drawcolumn); - } - - NetUpdate(); -} - -static void ProcessNormalWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) -{ - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol, &SWPixelFormatDrawers::DrawWallColumn); -} - -static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) -{ - if (!rw_pic->bMasked) // Textures that aren't masked can use the faster ProcessNormalWall. - { - ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol); - } - else - { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); - } -} - -static void ProcessTranslucentWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) -{ - DrawerFunc drawcol1 = R_GetTransMaskDrawer(); - if (drawcol1 == nullptr) - { - // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. - ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol); - } - else - { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol, drawcol1); - } -} - -static void ProcessStripedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade) -{ - FDynamicColormap *startcolormap = basecolormap; - bool fogginess = foggy; - - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - - up = uwal; - down = most1; - - assert(WallC.sx1 <= x1); - assert(WallC.sx2 >= x2); - - // kg3D - fake floors instead of zdoom light list - for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) - { - int j = R_CreateWallSegmentYSloped (most3, frontsector->e->XFloor.lightlist[i].plane, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp (most3[j], up[j], dwal[j]); - } - ProcessNormalWall (x1, x2, up, down, swal, lwal, yrepeat, wallshade); - up = down; - down = (down == most1) ? most2 : most1; - } - - lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); - } - - ProcessNormalWall (x1, x2, up, dwal, swal, lwal, yrepeat, wallshade); - basecolormap = startcolormap; -} - -static void ProcessWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, bool mask) -{ - if (mask) - { - if (colfunc == basecolfunc) - { - ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); - } - else - { - ProcessTranslucentWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); - } - } - else - { - if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) - { - ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); - } - else - { - ProcessStripedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); - } - } -} - -//============================================================================= -// -// ProcessWallNP2 -// -// This is a wrapper around ProcessWall that helps it tile textures whose heights -// are not powers of 2. It divides the wall into texture-sized strips and calls -// ProcessNormalWall for each of those. Since only one repetition of the texture fits -// in each strip, ProcessWall will not tile. -// -//============================================================================= - -static void ProcessWallNP2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, bool mask) -{ - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - double texheight = rw_pic->GetHeight(); - double partition; - double scaledtexheight = texheight / yrepeat; - - if (yrepeat >= 0) - { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - if (partition == top) - { - partition -= scaledtexheight; - } - up = uwal; - down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition > bot) - { - int j = R_CreateWallSegmentY(most3, partition - ViewPos.Z, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp(most3[j], up[j], dwal[j]); - } - ProcessWall(x1, x2, up, down, swal, lwal, yrepeat, wallshade, mask); - up = down; - down = (down == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - ProcessWall(x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, mask); - } - else - { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - up = most1; - down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition < top) - { - int j = R_CreateWallSegmentY(most3, partition - ViewPos.Z, &WallC); - if (j != 12) - { - for (int j = x1; j < x2; ++j) - { - up[j] = clamp(most3[j], uwal[j], down[j]); - } - ProcessWall(x1, x2, up, down, swal, lwal, yrepeat, wallshade, mask); - down = up; - up = (up == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - ProcessWall(x1, x2, uwal, down, swal, lwal, yrepeat, wallshade, mask); - } -} - -void R_DrawDrawSeg(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade) -{ - if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) - { - double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); - double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); - double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); - double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); - double top = MAX(frontcz1, frontcz2); - double bot = MIN(frontfz1, frontfz2); - if (fake3D & FAKE3D_CLIPTOP) - { - top = MIN(top, sclipTop); - } - if (fake3D & FAKE3D_CLIPBOTTOM) - { - bot = MAX(bot, sclipBottom); - } - ProcessWallNP2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, true); - } - else - { - ProcessWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, true); - } -} - - -void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, FLightNode *light_list) -{ - dc_light_list = light_list; - if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) - { - ProcessWallNP2(x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, wallshade, false); - } - else - { - ProcessWall(x1, x2, walltop, wallbottom, swall, lwall, yscale, wallshade, false); - } - dc_light_list = nullptr; -} - -void R_DrawSkySegment(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol); -} - -} \ No newline at end of file diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index 7174794a3a..234a14bc09 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -1,7 +1,7 @@ #pragma once -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/line/r_line.h" namespace swrenderer { diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index c4a90b414d..5aa67a550c 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -23,7 +23,8 @@ #include "swrenderer/drawers/r_draw.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "swrenderer/scene/r_walldraw.h" +#include "swrenderer/line/r_wallsetup.h" +#include "swrenderer/line/r_walldraw.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "r_wallsprite.h" diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index 01f7ab3af2..090a757068 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -1,7 +1,7 @@ #pragma once -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/line/r_line.h" struct particle_t; struct FVoxel; diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 9d7e03c2a4..400c1e1639 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -41,6 +41,8 @@ #include "r_voxel.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/line/r_wallsetup.h" +#include "swrenderer/line/r_walldraw.h" #include "swrenderer/r_memory.h" namespace swrenderer From c5281df6916c8a92d964babca3279bfbc9c513f9 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 1 Jan 2017 11:33:54 -0500 Subject: [PATCH 621/912] - We're on version 1.3pre now. --- src/version.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/version.h b/src/version.h index 6e28224920..635b3a3910 100644 --- a/src/version.h +++ b/src/version.h @@ -41,12 +41,12 @@ const char *GetVersionString(); /** Lots of different version numbers **/ -#define VERSIONSTR "1.2pre" +#define VERSIONSTR "1.3pre" // The version as seen in the Windows resource -#define RC_FILEVERSION 1,1,9999,0 -#define RC_PRODUCTVERSION 1,1,9999,0 -#define RC_PRODUCTVERSION2 "1.2pre" +#define RC_FILEVERSION 1,2,9999,0 +#define RC_PRODUCTVERSION 1,2,9999,0 +#define RC_PRODUCTVERSION2 "1.3pre" // Version identifier for network games. // Bump it every time you do a release unless you're certain you From 34bd21449b8c46baff40e033c86ae76e1bddd71a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 2 Jan 2017 03:12:51 +0100 Subject: [PATCH 622/912] Move line working variables to r_line --- src/swrenderer/line/r_line.cpp | 37 +++++++++++ src/swrenderer/line/r_line.h | 37 +++++++++++ src/swrenderer/scene/r_bsp.cpp | 10 --- src/swrenderer/scene/r_segs.cpp | 107 +++++++------------------------- src/swrenderer/scene/r_segs.h | 2 +- 5 files changed, 98 insertions(+), 95 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 4edd23cb99..60107f2a1a 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -28,6 +28,43 @@ namespace swrenderer FWallCoords WallC; FWallTmapVals WallT; + double rw_backcz1; + double rw_backcz2; + double rw_backfz1; + double rw_backfz2; + double rw_frontcz1; + double rw_frontcz2; + double rw_frontfz1; + double rw_frontfz2; + + fixed_t rw_offset_top; + fixed_t rw_offset_mid; + fixed_t rw_offset_bottom; + + int rw_ceilstat, rw_floorstat; + bool rw_mustmarkfloor, rw_mustmarkceiling; + bool rw_prepped; + bool rw_markportal; + bool rw_havehigh; + bool rw_havelow; + + float rw_light; + float rw_lightstep; + float rw_lightleft; + + fixed_t rw_offset; + double rw_midtexturemid; + double rw_toptexturemid; + double rw_bottomtexturemid; + double rw_midtexturescalex; + double rw_midtexturescaley; + double rw_toptexturescalex; + double rw_toptexturescaley; + double rw_bottomtexturescalex; + double rw_bottomtexturescaley; + + FTexture *rw_pic; + // Transform and clip coordinates. Returns true if it was clipped away bool FWallCoords::Init(const DVector2 &pt1, const DVector2 &pt2, double too_close) { diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 95a9319696..345b5e9942 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -25,4 +25,41 @@ namespace swrenderer extern FWallCoords WallC; extern FWallTmapVals WallT; + + extern double rw_backcz1; + extern double rw_backcz2; + extern double rw_backfz1; + extern double rw_backfz2; + extern double rw_frontcz1; + extern double rw_frontcz2; + extern double rw_frontfz1; + extern double rw_frontfz2; + + extern fixed_t rw_offset_top; + extern fixed_t rw_offset_mid; + extern fixed_t rw_offset_bottom; + + extern int rw_ceilstat, rw_floorstat; + extern bool rw_mustmarkfloor, rw_mustmarkceiling; + extern bool rw_prepped; + extern bool rw_markportal; + extern bool rw_havehigh; + extern bool rw_havelow; + + extern float rw_light; + extern float rw_lightstep; + extern float rw_lightleft; + + extern fixed_t rw_offset; + extern double rw_midtexturemid; + extern double rw_toptexturemid; + extern double rw_bottomtexturemid; + extern double rw_midtexturescalex; + extern double rw_midtexturescaley; + extern double rw_toptexturescalex; + extern double rw_toptexturescaley; + extern double rw_bottomtexturescalex; + extern double rw_bottomtexturescaley; + + extern FTexture *rw_pic; } diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 125a3ae218..111f09c1c9 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -73,16 +73,6 @@ int doorclosed; bool r_fakingunderwater; -extern bool rw_prepped; -extern bool rw_havehigh, rw_havelow; -extern int rw_floorstat, rw_ceilstat; -extern bool rw_mustmarkfloor, rw_mustmarkceiling; - -double rw_backcz1, rw_backcz2; -double rw_backfz1, rw_backfz2; -double rw_frontcz1, rw_frontcz2; -double rw_frontfz1, rw_frontfz2; - static BYTE FakeSide; int WindowLeft, WindowRight; diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index 575e82ef4a..ca921efa6b 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -90,48 +90,12 @@ bool markceiling; FTexture *toptexture; FTexture *bottomtexture; FTexture *midtexture; -fixed_t rw_offset_top; -fixed_t rw_offset_mid; -fixed_t rw_offset_bottom; // // regular wall // -extern double rw_backcz1, rw_backcz2; -extern double rw_backfz1, rw_backfz2; -extern double rw_frontcz1, rw_frontcz2; -extern double rw_frontfz1, rw_frontfz2; - -int rw_ceilstat, rw_floorstat; -bool rw_mustmarkfloor, rw_mustmarkceiling; -bool rw_prepped; -bool rw_markportal; -bool rw_havehigh; -bool rw_havelow; - -float rw_light; // [RH] Scale lights with viewsize adjustments -float rw_lightstep; -float rw_lightleft; - -static double rw_frontlowertop; - -static int rw_x; -static int rw_stopx; -fixed_t rw_offset; -static double rw_scalestep; -static double rw_midtexturemid; -static double rw_toptexturemid; -static double rw_bottomtexturemid; -static double rw_midtexturescalex; -static double rw_midtexturescaley; -static double rw_toptexturescalex; -static double rw_toptexturescaley; -static double rw_bottomtexturescalex; -static double rw_bottomtexturescaley; - -FTexture *rw_pic; static fixed_t *maskedtexturecol; @@ -153,41 +117,6 @@ inline bool IsFogBoundary (sector_t *front, sector_t *back) (front->GetTexture(sector_t::ceiling) != skyflatnum || back->GetTexture(sector_t::ceiling) != skyflatnum); } - -// -// R_RenderMaskedSegRange -// -float *MaskedSWall; -float MaskedScaleY; - -static void BlastMaskedColumn (FTexture *tex) -{ - // calculate lighting - if (fixedcolormap == NULL && fixedlightlev < 0) - { - R_SetColorMapLight(basecolormap, rw_light, wallshade); - } - - dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); - if (sprflipvert) - sprtopscreen = CenterY + dc_texturemid * spryscale; - else - sprtopscreen = CenterY - dc_texturemid * spryscale; - - // killough 1/25/98: here's where Medusa came in, because - // it implicitly assumed that the column was all one patch. - // Originally, Doom did not construct complete columns for - // multipatched textures, so there were no header or trailer - // bytes in the column referred to below, which explains - // the Medusa effect. The fix is to construct true columns - // when forming multipatched textures (see r_data.c). - - // draw the texture - R_DrawMaskedColumn(tex, maskedtexturecol[dc_x]); - rw_light += rw_lightstep; - spryscale += rw_scalestep; -} - // Clip a midtexture to the floor and ceiling of the sector in front of it. void ClipMidtex(int x1, int x2) { @@ -288,11 +217,11 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) goto clearfog; } - MaskedSWall = (float *)(openings + ds->swall) - ds->x1; - MaskedScaleY = ds->yscale; + float *MaskedSWall = (float *)(openings + ds->swall) - ds->x1; + float MaskedScaleY = ds->yscale; maskedtexturecol = (fixed_t *)(openings + ds->maskedtexturecol) - ds->x1; spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); - rw_scalestep = ds->iscalestep; + float rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); @@ -422,7 +351,21 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) { for (dc_x = x1; dc_x < x2; ++dc_x) { - BlastMaskedColumn (tex); + if (fixedcolormap == nullptr && fixedlightlev < 0) + { + R_SetColorMapLight(basecolormap, rw_light, wallshade); + } + + dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); + if (sprflipvert) + sprtopscreen = CenterY + dc_texturemid * spryscale; + else + sprtopscreen = CenterY - dc_texturemid * spryscale; + + R_DrawMaskedColumn(tex, maskedtexturecol[dc_x]); + + rw_light += rw_lightstep; + spryscale += rw_scalestep; } } } @@ -530,8 +473,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) mceilingclip = openings + ds->sprtopclip - ds->x1; spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); - rw_scalestep = ds->iscalestep; - MaskedSWall = (float *)(openings + ds->swall) - ds->x1; + float *MaskedSWall = (float *)(openings + ds->swall) - ds->x1; // find positioning side_t *scaledside; @@ -1020,10 +962,8 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) // CALLED: CORE LOOPING ROUTINE. // -void R_RenderSegLoop () +void R_RenderSegLoop(int x1, int x2) { - int x1 = rw_x; - int x2 = rw_stopx; int x; double xscale; double yscale; @@ -1305,7 +1245,7 @@ void R_NewWall (bool needlights) { // two-sided line // hack to allow height changes in outdoor areas - rw_frontlowertop = frontsector->GetPlaneTexZ(sector_t::ceiling); + double rw_frontlowertop = frontsector->GetPlaneTexZ(sector_t::ceiling); if (frontsector->GetTexture(sector_t::ceiling) == skyflatnum && backsector->GetTexture(sector_t::ceiling) == skyflatnum) @@ -1587,10 +1527,9 @@ bool R_StoreWallRange (int start, int stop) draw_segment->tmapvals = WallT; draw_segment->siz1 = 1 / WallC.sz1; draw_segment->siz2 = 1 / WallC.sz2; - draw_segment->x1 = rw_x = start; + draw_segment->x1 = start; draw_segment->x2 = stop; draw_segment->curline = curline; - rw_stopx = stop; draw_segment->bFogBoundary = false; draw_segment->bFakeBoundary = false; if(fake3D & 7) draw_segment->fake = 1; @@ -1788,7 +1727,7 @@ bool R_StoreWallRange (int start, int stop) } } - R_RenderSegLoop (); + R_RenderSegLoop (start, stop); if(fake3D & 7) { return !(fake3D & FAKE3D_FAKEMASK); diff --git a/src/swrenderer/scene/r_segs.h b/src/swrenderer/scene/r_segs.h index 6f0fce65f2..c11275cb4e 100644 --- a/src/swrenderer/scene/r_segs.h +++ b/src/swrenderer/scene/r_segs.h @@ -34,7 +34,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2); -void R_RenderSegLoop (); +void R_RenderSegLoop(int x1, int x2); extern float rw_light; // [RH] Scale lights with viewsize adjustments extern float rw_lightstep; From 388da85937a9f8263b7dd9a4b4fb896dc5e95c2b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 2 Jan 2017 03:46:48 +0100 Subject: [PATCH 623/912] Move some more vars --- src/swrenderer/line/r_line.cpp | 4 ++++ src/swrenderer/line/r_line.h | 4 ++++ src/swrenderer/scene/r_bsp.cpp | 3 --- src/swrenderer/scene/r_bsp.h | 3 --- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 60107f2a1a..b2a1a3fa2e 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -25,6 +25,10 @@ namespace swrenderer { + seg_t *curline; + side_t *sidedef; + line_t *linedef; + FWallCoords WallC; FWallTmapVals WallT; diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 345b5e9942..d4be544936 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -23,6 +23,10 @@ namespace swrenderer void InitFromLine(const DVector2 &left, const DVector2 &right); }; + extern seg_t *curline; + extern side_t *sidedef; + extern line_t *linedef; + extern FWallCoords WallC; extern FWallTmapVals WallT; diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 111f09c1c9..6c0c1be1cc 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -62,9 +62,6 @@ namespace swrenderer { using namespace drawerargs; -seg_t* curline; -side_t* sidedef; -line_t* linedef; sector_t* frontsector; sector_t* backsector; diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index 3ab94b2882..7530718b84 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -46,9 +46,6 @@ enum extern subsector_t *InSubsector; -extern seg_t* curline; -extern side_t* sidedef; -extern line_t* linedef; extern sector_t* frontsector; extern sector_t* backsector; From c61e9c7fe2e3445270c8712efa3df44dcd2b8dc7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 2 Jan 2017 06:52:50 +0100 Subject: [PATCH 624/912] Add attenuated point lights --- src/swrenderer/drawers/r_draw.cpp | 1 + src/swrenderer/drawers/r_draw.h | 1 + src/swrenderer/drawers/r_draw_pal.cpp | 28 +++++++++++++++---- src/swrenderer/line/r_walldraw.cpp | 12 +++++++- src/swrenderer/plane/r_flatplane.cpp | 12 ++++++-- .../fixedfunction/drawspancodegen.cpp | 17 +++++++++-- .../fixedfunction/drawwallcodegen.cpp | 17 +++++++++-- tools/drawergen/ssa/ssa_bool.cpp | 5 ++++ tools/drawergen/ssa/ssa_bool.h | 1 + 9 files changed, 80 insertions(+), 14 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 99a37a18af..413e7e1564 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -102,6 +102,7 @@ namespace swrenderer uint8_t *dc_destorg; int dc_destheight; int dc_count; + FVector3 dc_normal; FVector3 dc_viewpos; FVector3 dc_viewpos_step; TriLight *dc_lights; diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 6715589b3a..1952146584 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -66,6 +66,7 @@ namespace swrenderer extern uint8_t *dc_destorg; extern int dc_destheight; extern int dc_count; + extern FVector3 dc_normal; extern FVector3 dc_viewpos; extern FVector3 dc_viewpos_step; extern TriLight *dc_lights; diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 94250ccd55..84b9745dd4 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -128,12 +128,21 @@ namespace swrenderer // L = light-pos // dist = sqrt(dot(L, L)) - // attenuation = 1 - MIN(dist * (1/radius), 1) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) float Lxy2 = lights[i].x; // L.x*L.x + L.y*L.y float Lz = lights[i].z - viewpos_z; float dist2 = Lxy2 + Lz * Lz; - float dist = dist2 * _mm_cvtss_f32(_mm_rsqrt_ss(_mm_load_ss(&dist2))); - uint32_t attenuation = (uint32_t)(256.0f - MIN(dist * lights[i].radius, 256.0f)); + float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_load_ss(&dist2))); + float dist = dist2 * rcp_dist; + float distance_attenuation = (256.0f - MIN(dist * lights[i].radius, 256.0f)); + + // The simple light type + float simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + float point_attenuation = lights[i].y * rcp_dist * distance_attenuation; + uint32_t attenuation = (uint32_t)(lights[i].y == 0.0f ? simple_attenuation : point_attenuation); lit_r += (light_color_r * material_r * attenuation) >> 16; lit_g += (light_color_g * material_g * attenuation) >> 16; @@ -1733,8 +1742,17 @@ namespace swrenderer float Lyz2 = lights[i].y; // L.y*L.y + L.z*L.z float Lx = lights[i].x - viewpos_x; float dist2 = Lyz2 + Lx * Lx; - float dist = dist2 * _mm_cvtss_f32(_mm_rsqrt_ss(_mm_load_ss(&dist2))); - uint32_t attenuation = (uint32_t)(256.0f - MIN(dist * lights[i].radius, 256.0f)); + float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_load_ss(&dist2))); + float dist = dist2 * rcp_dist; + float distance_attenuation = (256.0f - MIN(dist * lights[i].radius, 256.0f)); + + // The simple light type + float simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + float point_attenuation = lights[i].z * rcp_dist * distance_attenuation; + uint32_t attenuation = (uint32_t)(lights[i].z == 0.0f ? simple_attenuation : point_attenuation); lit_r += (light_color_r * material_r * attenuation) >> 16; lit_g += (light_color_g * material_g * attenuation) >> 16; diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index da0206eeca..3f22e62b70 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -227,11 +227,13 @@ namespace swrenderer float lz = (float)lightZ; // Precalculate the constant part of the dot here so the drawer doesn't have to. + bool is_point_light = (cur_node->lightsource->flags4 & MF4_ATTENUATE) != 0; float lconstant = lx * lx + ly * ly; + float nlconstant = is_point_light ? lx * dc_normal.X + ly * dc_normal.Y : 0.0f; // Include light only if it touches this column float radius = cur_node->lightsource->GetRadius(); - if (radius * radius >= lconstant) + if (radius * radius >= lconstant && nlconstant >= 0.0f) { uint32_t red = cur_node->lightsource->GetRed(); uint32_t green = cur_node->lightsource->GetGreen(); @@ -240,6 +242,7 @@ namespace swrenderer nextlightindex++; auto &light = dc_lights[dc_num_lights++]; light.x = lconstant; + light.y = nlconstant; light.z = lz; light.radius = 256.0f / cur_node->lightsource->GetRadius(); light.color = (red << 16) | (green << 8) | blue; @@ -365,6 +368,13 @@ namespace swrenderer else R_SetColorMapLight(basecolormap, 0, 0); + float dx = WallC.tright.X - WallC.tleft.X; + float dy = WallC.tright.Y - WallC.tleft.Y; + float length = sqrt(dx * dx + dy * dy); + dc_normal.X = dy / length; + dc_normal.Y = -dx / length; + dc_normal.Z = 0.0f; + float light = rw_light; double xmagnitude = 1.0; diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 6d3206f23f..e45e03a695 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -255,8 +255,13 @@ namespace swrenderer static TriLight lightbuffer[64 * 1024]; static int nextlightindex = 0; + + // Plane normal + dc_normal.X = 0.0f; + dc_normal.Y = 0.0f; + dc_normal.Z = (y >= CenterY) ? 1.0f : -1.0f; - // Setup lights for column + // Setup lights for row dc_num_lights = 0; dc_lights = lightbuffer + nextlightindex; visplane_light *cur_node = ds_light_list; @@ -271,11 +276,13 @@ namespace swrenderer float lz = (float)lightZ - dc_viewpos.Z; // Precalculate the constant part of the dot here so the drawer doesn't have to. + bool is_point_light = (cur_node->lightsource->flags4 & MF4_ATTENUATE) != 0; float lconstant = ly * ly + lz * lz; + float nlconstant = is_point_light ? lz * dc_normal.Z : 0.0f; // Include light only if it touches this row float radius = cur_node->lightsource->GetRadius(); - if (radius * radius >= lconstant) + if (radius * radius >= lconstant && nlconstant >= 0.0f) { uint32_t red = cur_node->lightsource->GetRed(); uint32_t green = cur_node->lightsource->GetGreen(); @@ -285,6 +292,7 @@ namespace swrenderer auto &light = dc_lights[dc_num_lights++]; light.x = lx; light.y = lconstant; + light.z = nlconstant; light.radius = 256.0f / radius; light.color = (red << 16) | (green << 8) | blue; } diff --git a/tools/drawergen/fixedfunction/drawspancodegen.cpp b/tools/drawergen/fixedfunction/drawspancodegen.cpp index 2272acf8db..0fdadaa84f 100644 --- a/tools/drawergen/fixedfunction/drawspancodegen.cpp +++ b/tools/drawergen/fixedfunction/drawspancodegen.cpp @@ -248,7 +248,7 @@ SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) SSAVec4i light_color = SSAUBytePtr(SSAValue(dynlights[light_index][0]).v).load_vec4ub(true); SSAFloat light_x = dynlights[light_index][1].load(true); SSAFloat light_y = dynlights[light_index][2].load(true); - //SSAFloat light_z = dynlights[light_index][3].load(true); + SSAFloat light_z = dynlights[light_index][3].load(true); SSAFloat light_rcp_radius = dynlights[light_index][4].load(true); // L = light-pos @@ -256,8 +256,19 @@ SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) // attenuation = 1 - MIN(dist * (1/radius), 1) SSAFloat Lyz2 = light_y; // L.y*L.y + L.z*L.z SSAFloat Lx = light_x - viewpos_x; - SSAFloat dist = SSAFloat::fastsqrt(Lyz2 + Lx * Lx); - SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true); + SSAFloat dist2 = Lyz2 + Lx * Lx; + SSAFloat rcp_dist = SSAFloat::rsqrt(dist2); + SSAFloat dist = dist2 * rcp_dist; + SSAFloat distance_attenuation = SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)); + + // The simple light type + SSAFloat simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + SSAFloat point_attenuation = light_z * rcp_dist * distance_attenuation; + + SSAInt attenuation = SSAInt((light_z == SSAFloat(0.0f)).select(simple_attenuation, point_attenuation), true); SSAVec4i contribution = (light_color * fg * attenuation) >> 16; stack_lit_color.store(lit_color + contribution); diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.cpp b/tools/drawergen/fixedfunction/drawwallcodegen.cpp index 055b132d1a..9207fb63a7 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.cpp +++ b/tools/drawergen/fixedfunction/drawwallcodegen.cpp @@ -179,7 +179,7 @@ SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, bool isSimpleShade) { SSAVec4i light_color = SSAUBytePtr(SSAValue(dynlights[light_index][0]).v).load_vec4ub(true); SSAFloat light_x = dynlights[light_index][1].load(true); - //SSAFloat light_y = dynlights[light_index][2].load(true); + SSAFloat light_y = dynlights[light_index][2].load(true); SSAFloat light_z = dynlights[light_index][3].load(true); SSAFloat light_rcp_radius = dynlights[light_index][4].load(true); @@ -188,8 +188,19 @@ SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, bool isSimpleShade) // attenuation = 1 - MIN(dist * (1/radius), 1) SSAFloat Lxy2 = light_x; // L.x*L.x + L.y*L.y SSAFloat Lz = light_z - z; - SSAFloat dist = SSAFloat::fastsqrt(Lxy2 + Lz * Lz); - SSAInt attenuation = SSAInt(SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)), true); + SSAFloat dist2 = Lxy2 + Lz * Lz; + SSAFloat rcp_dist = SSAFloat::rsqrt(dist2); + SSAFloat dist = dist2 * rcp_dist; + SSAFloat distance_attenuation = SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)); + + // The simple light type + SSAFloat simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + SSAFloat point_attenuation = light_y * rcp_dist * distance_attenuation; + + SSAInt attenuation = SSAInt((light_y == SSAFloat(0.0f)).select(simple_attenuation, point_attenuation), true); SSAVec4i contribution = (light_color * fg * attenuation) >> 16; stack_lit_color.store(lit_color + contribution); diff --git a/tools/drawergen/ssa/ssa_bool.cpp b/tools/drawergen/ssa/ssa_bool.cpp index 7ce99b827a..6eac90afb7 100644 --- a/tools/drawergen/ssa/ssa_bool.cpp +++ b/tools/drawergen/ssa/ssa_bool.cpp @@ -58,6 +58,11 @@ SSAInt SSABool::select(SSAInt a, SSAInt b) return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint())); } +SSAFloat SSABool::select(SSAFloat a, SSAFloat b) +{ + return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint())); +} + SSAUByte SSABool::select(SSAUByte a, SSAUByte b) { return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint())); diff --git a/tools/drawergen/ssa/ssa_bool.h b/tools/drawergen/ssa/ssa_bool.h index 728c7b7e8e..fbf5192d36 100644 --- a/tools/drawergen/ssa/ssa_bool.h +++ b/tools/drawergen/ssa/ssa_bool.h @@ -42,6 +42,7 @@ public: SSAInt zext_int(); SSAInt select(SSAInt a, SSAInt b); + SSAFloat select(SSAFloat a, SSAFloat b); SSAUByte select(SSAUByte a, SSAUByte b); SSAVec4i select(SSAVec4i a, SSAVec4i b); From b3c52541324f52a4b68da8bc58032eba511439fb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 2 Jan 2017 07:55:23 +0100 Subject: [PATCH 625/912] Change doom lights to be attenuated --- .../static/filter/doom.doom1/gldefs.txt | 478 +++++++++++------- .../static/filter/doom.doom2/gldefs.txt | 478 +++++++++++------- 2 files changed, 590 insertions(+), 366 deletions(-) diff --git a/wadsrc_lights/static/filter/doom.doom1/gldefs.txt b/wadsrc_lights/static/filter/doom.doom1/gldefs.txt index 6c037b49f6..1d5e3aba09 100644 --- a/wadsrc_lights/static/filter/doom.doom1/gldefs.txt +++ b/wadsrc_lights/static/filter/doom.doom1/gldefs.txt @@ -10,17 +10,19 @@ flickerlight BPUFF1 { color 0.5 0.5 0.0 - size 6 - secondarySize 8 + size 9 + secondarySize 12 chance 0.8 + attenuate 1 } flickerlight BPUFF2 { color 0.5 0.5 0.0 - size 3 - secondarySize 4 + size 4 + secondarySize 6 chance 0.8 + attenuate 1 } object BulletPuff @@ -33,31 +35,35 @@ object BulletPuff pointlight ROCKET { color 1.0 0.7 0.0 - size 56 + size 84 + attenuate 1 } flickerlight ROCKET_X1 { color 1.0 0.7 0.5 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 + attenuate 1 } flickerlight ROCKET_X2 { color 0.5 0.3 0.2 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.3 + attenuate 1 } flickerlight ROCKET_X3 { color 0.3 0.1 0.1 - size 96 - secondarySize 104 + size 144 + secondarySize 156 chance 0.3 + attenuate 1 } object Rocket @@ -73,39 +79,44 @@ object Rocket pointlight PLASMABALL { color 0.5 0.5 1.0 - size 56 + size 84 + attenuate 1 } flickerlight PLASMA_X1 { color 0.5 0.5 1.0 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.4 + attenuate 1 } flickerlight PLASMA_X2 { color 0.4 0.4 0.8 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.4 + attenuate 1 } flickerlight PLASMA_X3 { color 0.25 0.25 0.5 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.4 + attenuate 1 } flickerlight PLASMA_X4 { color 0.1 0.1 0.2 - size 8 - secondarySize 16 + size 12 + secondarySize 24 chance 0.4 + attenuate 1 } object PlasmaBall @@ -124,39 +135,44 @@ object PlasmaBall pointlight PLASMABALL1 { color 0.1 1.0 0.0 - size 56 + size 84 + attenuate 1 } flickerlight PLASMA1_X1 { color 0.2 1.0 0.2 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.4 + attenuate 1 } flickerlight PLASMA1_X2 { color 0.2 0.8 0.2 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.4 + attenuate 1 } flickerlight PLASMA1_X3 { color 0.1 0.5 0.1 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.4 + attenuate 1 } flickerlight PLASMA1_X4 { color 0.0 0.2 0.0 - size 8 - secondarySize 16 + size 12 + secondarySize 24 chance 0.4 + attenuate 1 } object PlasmaBall1 @@ -175,31 +191,35 @@ object PlasmaBall1 pointlight PLASMABALL2 { color 1.0 0.1 0.0 - size 56 + size 84 + attenuate 1 } flickerlight PLASMA1_X1 { color 0.9 0.2 0.2 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.4 + attenuate 1 } flickerlight PLASMA1_X2 { color 0.6 0.2 0.2 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.4 + attenuate 1 } flickerlight PLASMA1_X3 { color 0.2 0.0 0.0 - size 8 - secondarySize 16 + size 12 + secondarySize 24 chance 0.4 + attenuate 1 } object PlasmaBall2 @@ -216,47 +236,54 @@ object PlasmaBall2 pointlight BFGBALL { color 0.5 1.0 0.5 - size 80 + size 120 + attenuate 1 } flickerlight BFGBALL_X1 { color 0.5 1.0 0.5 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.3 + attenuate 1 } flickerlight BFGBALL_X2 { color 0.6 1.0 0.6 - size 104 - secondarySize 112 + size 156 + secondarySize 168 chance 0.3 + attenuate 1 } flickerlight BFGBALL_X3 { color 0.7 1.0 0.7 - size 120 - secondarySize 128 + size 180 + secondarySize 192 chance 0.3 + attenuate 1 } flickerlight BFGBALL_X4 { color 0.4 0.7 0.4 - size 56 - secondarySize 64 + size 84 + secondarySize 96 chance 0.3 + attenuate 1 + attenuate 1 } flickerlight BFGBALL_X5 { color 0.1 0.3 0.1 - size 48 - secondarySize 56 + size 72 + secondarySize 84 chance 0.3 + attenuate 1 } object BFGBall @@ -291,11 +318,12 @@ object BFGExtra pulselight BARREL { color 0.0 0.5 0.0 - size 20 - secondarySize 21 + size 30 + secondarySize 31 interval 0.5 offset 0 36 0 dontlightself 1 + attenuate 1 } object ExplosiveBarrel @@ -311,8 +339,9 @@ object ExplosiveBarrel pointlight LAMP { color 1.0 1.0 0.8 - size 56 + size 84 offset 0 44 0 + attenuate 1 } object Column @@ -324,10 +353,11 @@ object Column pulselight SMALLLAMP { color 0.8 0.8 1.0 - size 56 - secondarySize 58 + size 84 + secondarySize 87 interval 0.4 offset 0 44 0 + attenuate 1 } object TechLamp2 @@ -339,10 +369,11 @@ object TechLamp2 pulselight BIGLAMP { color 0.8 0.8 1.0 - size 64 - secondarySize 66 + size 96 + secondarySize 99 interval 0.4 offset 0 72 0 + attenuate 1 } object TechLamp @@ -354,10 +385,11 @@ object TechLamp flickerlight2 BIGREDTORCH { color 1.0 0.5 0.2 - size 60 - secondarySize 66 + size 90 + secondarySize 99 interval 0.1 offset 0 60 0 + attenuate 1 } object RedTorch @@ -369,10 +401,11 @@ object RedTorch flickerlight2 BIGGREENTORCH { color 0.3 1.0 0.3 - size 60 - secondarySize 66 + size 90 + secondarySize 99 interval 0.1 offset 0 60 0 + attenuate 1 } object GreenTorch @@ -384,10 +417,11 @@ object GreenTorch flickerlight2 BIGBLUETORCH { color 0.3 0.3 1.0 - size 60 - secondarySize 66 + size 90 + secondarySize 99 interval 0.1 offset 0 60 0 + attenuate 1 } object BlueTorch @@ -399,10 +433,11 @@ object BlueTorch flickerlight2 SMALLREDTORCH { color 1.0 0.5 0.2 - size 48 - secondarySize 54 + size 72 + secondarySize 81 interval 0.1 offset 0 35 0 + attenuate 1 } object ShortRedTorch @@ -414,10 +449,11 @@ object ShortRedTorch flickerlight2 SMALLGREENTORCH { color 0.3 1.0 0.3 - size 48 - secondarySize 54 + size 72 + secondarySize 81 interval 0.1 offset 0 35 0 + attenuate 1 } object ShortGreenTorch @@ -429,10 +465,11 @@ object ShortGreenTorch flickerlight2 SMALLBLUETORCH { color 0.3 0.3 1.0 - size 48 - secondarySize 54 + size 72 + secondarySize 81 interval 0.1 offset 0 35 0 + attenuate 1 } object ShortBlueTorch @@ -444,10 +481,11 @@ object ShortBlueTorch flickerlight2 FIREBARREL { color 1.0 0.9 0.4 - size 48 - secondarySize 54 + size 72 + secondarySize 81 interval 0.1 offset 0 32 0 + attenuate 1 } object BurningBarrel @@ -459,10 +497,11 @@ object BurningBarrel flickerlight2 SKULLCANDLES { color 1.0 1.0 0.3 - size 32 - secondarySize 34 + size 48 + secondarySize 51 interval 0.1 offset 0 24 0 + attenuate 1 } object HeadCandles @@ -474,8 +513,9 @@ object HeadCandles pointlight CANDLE { color 1.0 1.0 0.3 - size 16 + size 24 offset 0 16 0 + attenuate 1 } object Candlestick @@ -487,8 +527,9 @@ object Candlestick pointlight CANDELABRA { color 1.0 1.0 0.3 - size 48 + size 67 offset 0 52 0 + attenuate 1 } object Candelabra @@ -506,10 +547,11 @@ object Candelabra pulselight SOULSPHERE { color 0.3 0.3 1.0 - size 40 - secondarySize 42 + size 60 + secondarySize 63 interval 2.0 offset 0 16 0 + attenuate 1 } object SoulSphere @@ -521,10 +563,11 @@ object SoulSphere pulselight INVULN { color 0.3 1.0 0.3 - size 40 - secondarySize 42 + size 60 + secondarySize 63 interval 2.0 offset 0 16 0 + attenuate 1 } object InvulnerabilitySphere @@ -536,36 +579,41 @@ object InvulnerabilitySphere pointlight BLURSPHERE1 { color 1.0 0.0 0.0 - size 40 + size 60 offset 0 16 0 + attenuate 1 } pointlight BLURSPHERE2 { color 0.0 0.0 1.0 - size 32 + size 48 offset 0 16 0 + attenuate 1 } pointlight BLURSPHERE3 { color 0.0 0.0 1.0 - size 24 + size 36 offset 0 16 0 + attenuate 1 } pointlight BLURSPHERE4 { color 0.0 0.0 1.0 - size 16 + size 24 offset 0 16 0 + attenuate 1 } pointlight BLURSPHERE5 { color 0.0 0.0 1.0 - size 8 + size 12 offset 0 16 0 + attenuate 1 } object BlurSphere @@ -582,9 +630,11 @@ object BlurSphere pulselight HEALTHPOTION { color 0.0 0.0 0.6 - size 16 - secondarySize 18 + size 24 + secondarySize 27 interval 2.0 + attenuate 1 + offset 0 10 0 } object HealthBonus @@ -596,10 +646,12 @@ object HealthBonus pulselight ARMORBONUS { color 0.2 0.6 0.2 - size 16 - secondarySize 14 + size 24 + secondarySize 21 interval 1.0 dontlightself 1 + attenuate 1 + offset 0 10 0 } object ArmorBonus @@ -622,9 +674,11 @@ object BlueSkull pulselight YELLOWKEY { color 0.6 0.6 0.0 - size 16 - secondarySize 18 + size 24 + secondarySize 27 interval 2.0 + attenuate 1 + offset 0 10 0 } object YellowCard @@ -641,9 +695,11 @@ object YellowSkull pulselight REDKEY { color 0.6 0.0 0.0 - size 16 - secondarySize 18 + size 24 + secondarySize 27 interval 2.0 + attenuate 1 + offset 0 10 0 } object RedCard @@ -660,13 +716,17 @@ object RedSkull pointlight GREENARMOR1 { color 0.0 0.6 0.0 - size 48 + size 72 + attenuate 1 + offset 0 10 0 } pointlight GREENARMOR2 { color 0.0 0.6 0.0 - size 32 + size 48 + attenuate 1 + offset 0 10 0 } object GreenArmor @@ -679,13 +739,17 @@ object GreenArmor pointlight BLUEARMOR1 { color 0.0 0.0 0.6 - size 48 + size 72 + attenuate 1 + offset 0 10 0 } pointlight BLUEARMOR2 { color 0.0 0.0 0.6 - size 32 + size 48 + attenuate 1 + offset 0 10 0 } object BlueArmor @@ -704,10 +768,11 @@ object BlueArmor flickerlight2 ZOMBIEATK { color 1.0 0.8 0.2 - size 48 - secondarySize 56 + size 72 + secondarySize 84 interval 1 offset 0 40 0 + attenuate 1 } object ZombieMan @@ -736,32 +801,36 @@ object DoomPlayer pointlight IMPBALL { color 1.0 0.7 0.4 - size 64 + size 96 + attenuate 1 } // Doom imp fireball explosion flickerlight IMPBALL_X1 { color 0.7 0.4 0.25 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.25 + attenuate 1 } flickerlight IMPBALL_X2 { color 0.4 0.2 0.1 - size 96 - secondarySize 104 + size 144 + secondarySize 156 chance 0.25 + attenuate 1 } flickerlight IMPBALL_X3 { color 0.2 0.1 0.0 - size 112 - secondarySize 120 + size 168 + secondarySize 180 chance 0.25 + attenuate 1 } object DoomImpBall @@ -777,9 +846,10 @@ object DoomImpBall pointlight SPECTRE { color 0.5 0.5 0.5 - size 48 + size 72 offset 0 24 0 subtractive 1 + attenuate 1 } /* @@ -793,33 +863,37 @@ object Spectre flickerlight CACOBALL { color 1.0 0.5 0.8 - size 56 - secondarySize 64 + size 84 + secondarySize 96 chance 0.5 + attenuate 1 } flickerlight CACOBALL_X1 { color 0.9 0.4 0.7 - size 72 - secondarySize 80 + size 108 + secondarySize 120 chance 0.25 + attenuate 1 } flickerlight CACOBALL_X2 { color 0.6 0.3 0.5 - size 88 - secondarySize 96 + size 132 + secondarySize 144 chance 0.25 + attenuate 1 } flickerlight CACOBALL_X3 { color 0.3 0.1 0.1 - size 104 - secondarySize 112 + size 156 + secondarySize 168 chance 0.25 + attenuate 1 } object CacodemonBall @@ -836,31 +910,35 @@ object CacodemonBall pointlight BARONBALL { color 0.0 1.0 0.0 - size 64 + size 96 + attenuate 1 } flickerlight BARONBALL_X1 { color 0.6 0.9 0.6 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.25 + attenuate 1 } flickerlight BARONBALL_X2 { color 0.45 0.6 0.45 - size 96 - secondarySize 104 + size 144 + secondarySize 156 chance 0.25 + attenuate 1 } flickerlight BARONBALL_X3 { color 0.2 0.3 0.2 - size 112 - secondarySize 120 + size 168 + secondarySize 180 chance 0.25 + attenuate 1 } object BaronBall @@ -877,41 +955,46 @@ object BaronBall flickerlight LOSTSOUL { color 1.0 0.6 0.3 - size 56 + size 84 secondarysize 64 chance 0.1 + attenuate 1 } flickerlight LOSTSOUL_X1 { color 0.8 0.5 0.3 - size 72 - secondarySize 80 + size 108 + secondarySize 120 chance 0.25 + attenuate 1 } flickerlight LOSTSOUL_X2 { color 0.6 0.3 0.2 - size 88 - secondarySize 96 + size 132 + secondarySize 144 chance 0.25 + attenuate 1 } flickerlight LOSTSOUL_X3 { color 0.4 0.1 0.0 - size 104 - secondarySize 112 + size 156 + secondarySize 168 chance 0.25 + attenuate 1 } flickerlight LOSTSOUL_X4 { color 0.2 0.0 0.0 - size 112 - secondarySize 120 + size 168 + secondarySize 180 chance 0.25 + attenuate 1 } object LostSoul @@ -945,39 +1028,44 @@ object FatShot pointlight ARACHPLAS { color 0.6 1.0 0.4 - size 56 + size 84 + attenuate 1 } flickerlight ARACHPLAS_X1 { color 0.4 0.8 0.3 - size 72 - secondarySize 80 + size 108 + secondarySize 120 chance 0.3 + attenuate 1 } flickerlight ARACHPLAS_X2 { color 0.6 0.6 0.3 - size 88 - secondarySize 96 + size 132 + secondarySize 144 chance 0.3 + attenuate 1 } flickerlight ARACHPLAS_X3 { color 0.4 0.4 0.2 - size 48 - secondarySize 32 + size 72 + secondarySize 48 chance 0.3 + attenuate 1 } flickerlight ARACHPLAS_X4 { color 0.2 0.2 0.1 - size 24 - secondarySize 16 + size 36 + secondarySize 24 chance 0.3 + attenuate 1 } object ArachnotronPlasma @@ -996,31 +1084,35 @@ object ArachnotronPlasma pointlight TRACER { color 1.0 0.5 0.3 - size 48 + size 72 + attenuate 1 } flickerlight TRACER_X1 { color 1.0 0.5 0.2 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.25 + attenuate 1 } flickerlight TRACER_X2 { color 0.6 0.3 0.1 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.25 + attenuate 1 } flickerlight TRACER_X3 { color 0.3 0.1 0.0 - size 96 - secondarySize 104 + size 144 + secondarySize 156 chance 0.25 + attenuate 1 } object RevenantTracer @@ -1037,73 +1129,81 @@ object RevenantTracer flickerlight ARCHFIRE1 { color 1.0 1.0 0.5 - size 24 - secondarySize 32 + size 36 + secondarySize 48 chance 0.3 offset 0 8 0 + attenuate 1 } flickerlight ARCHFIRE2 { color 1.0 1.0 0.5 - size 40 - secondarySize 48 + size 60 + secondarySize 72 chance 0.3 offset 0 24 0 + attenuate 1 } flickerlight ARCHFIRE3 { color 1.0 1.0 0.5 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 offset 0 32 0 + attenuate 1 } flickerlight ARCHFIRE4 { color 0.8 0.8 0.4 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 offset 0 40 0 + attenuate 1 } flickerlight ARCHFIRE5 { color 0.8 0.8 0.4 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 offset 0 48 0 + attenuate 1 } flickerlight ARCHFIRE6 { color 0.6 0.6 0.3 - size 48 - secondarySize 56 + size 72 + secondarySize 84 chance 0.3 offset 0 64 0 + attenuate 1 } flickerlight ARCHFIRE7 { color 0.4 0.4 0.2 - size 32 - secondarySize 40 + size 48 + secondarySize 60 chance 0.3 offset 0 72 0 + attenuate 1 } flickerlight ARCHFIRE8 { color 0.2 0.2 0.1 - size 16 - secondarySize 24 + size 24 + secondarySize 36 chance 0.3 offset 0 80 0 + attenuate 1 } object ArchvileFire @@ -1122,73 +1222,81 @@ object ArchvileFire flickerlight ARCHATK1 { color 1.0 1.0 0.4 - size 32 - secondarySize 48 + size 48 + secondarySize 72 chance 0.3 offset 0 80 0 + attenuate 1 } flickerlight ARCHATK2 { color 1.0 1.0 0.4 - size 56 - secondarySize 64 + size 84 + secondarySize 96 chance 0.3 offset 0 80 0 + attenuate 1 } flickerlight ARCHATK3 { color 1.0 1.0 0.4 - size 56 - secondarySize 64 + size 84 + secondarySize 96 chance 0.3 offset 0 64 0 + attenuate 1 } flickerlight ARCHATK4 { color 1.0 1.0 0.4 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 offset 0 48 0 + attenuate 1 } flickerlight ARCHATK5 { color 1.0 1.0 0.4 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.3 offset 0 40 0 + attenuate 1 } flickerlight ARCHATK6 { color 0.7 0.7 0.3 - size 96 - secondarySize 104 + size 144 + secondarySize 156 chance 0.3 offset 0 40 0 + attenuate 1 } flickerlight ARCHATK7 { color 0.3 0.3 0.1 - size 104 - secondarySize 112 + size 156 + secondarySize 168 chance 0.3 offset 0 40 0 + attenuate 1 } pulselight ARCHRES { color 0.6 0.3 0.3 - size 64 - secondarySize 70 + size 96 + secondarySize 105 interval 0.5 offset 0 36 0 + attenuate 1 } object Archvile @@ -1216,33 +1324,37 @@ object Archvile flickerlight DTFOG1 { color 0.4 1.0 0.4 - size 56 - secondarySize 64 + size 84 + secondarySize 96 chance 0.4 + attenuate 1 } flickerlight DTFOG2 { color 0.4 1.0 0.4 - size 40 - secondarySize 48 + size 60 + secondarySize 72 chance 0.4 + attenuate 1 } flickerlight DTFOG3 { color 0.4 1.0 0.4 - size 24 - secondarySize 32 + size 36 + secondarySize 48 chance 0.4 + attenuate 1 } flickerlight DTFOG4 { color 0.4 1.0 0.4 - size 10 - secondarySize 16 + size 15 + secondarySize 24 chance 0.4 + attenuate 1 } object TeleportFog diff --git a/wadsrc_lights/static/filter/doom.doom2/gldefs.txt b/wadsrc_lights/static/filter/doom.doom2/gldefs.txt index 6c037b49f6..1d5e3aba09 100644 --- a/wadsrc_lights/static/filter/doom.doom2/gldefs.txt +++ b/wadsrc_lights/static/filter/doom.doom2/gldefs.txt @@ -10,17 +10,19 @@ flickerlight BPUFF1 { color 0.5 0.5 0.0 - size 6 - secondarySize 8 + size 9 + secondarySize 12 chance 0.8 + attenuate 1 } flickerlight BPUFF2 { color 0.5 0.5 0.0 - size 3 - secondarySize 4 + size 4 + secondarySize 6 chance 0.8 + attenuate 1 } object BulletPuff @@ -33,31 +35,35 @@ object BulletPuff pointlight ROCKET { color 1.0 0.7 0.0 - size 56 + size 84 + attenuate 1 } flickerlight ROCKET_X1 { color 1.0 0.7 0.5 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 + attenuate 1 } flickerlight ROCKET_X2 { color 0.5 0.3 0.2 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.3 + attenuate 1 } flickerlight ROCKET_X3 { color 0.3 0.1 0.1 - size 96 - secondarySize 104 + size 144 + secondarySize 156 chance 0.3 + attenuate 1 } object Rocket @@ -73,39 +79,44 @@ object Rocket pointlight PLASMABALL { color 0.5 0.5 1.0 - size 56 + size 84 + attenuate 1 } flickerlight PLASMA_X1 { color 0.5 0.5 1.0 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.4 + attenuate 1 } flickerlight PLASMA_X2 { color 0.4 0.4 0.8 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.4 + attenuate 1 } flickerlight PLASMA_X3 { color 0.25 0.25 0.5 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.4 + attenuate 1 } flickerlight PLASMA_X4 { color 0.1 0.1 0.2 - size 8 - secondarySize 16 + size 12 + secondarySize 24 chance 0.4 + attenuate 1 } object PlasmaBall @@ -124,39 +135,44 @@ object PlasmaBall pointlight PLASMABALL1 { color 0.1 1.0 0.0 - size 56 + size 84 + attenuate 1 } flickerlight PLASMA1_X1 { color 0.2 1.0 0.2 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.4 + attenuate 1 } flickerlight PLASMA1_X2 { color 0.2 0.8 0.2 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.4 + attenuate 1 } flickerlight PLASMA1_X3 { color 0.1 0.5 0.1 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.4 + attenuate 1 } flickerlight PLASMA1_X4 { color 0.0 0.2 0.0 - size 8 - secondarySize 16 + size 12 + secondarySize 24 chance 0.4 + attenuate 1 } object PlasmaBall1 @@ -175,31 +191,35 @@ object PlasmaBall1 pointlight PLASMABALL2 { color 1.0 0.1 0.0 - size 56 + size 84 + attenuate 1 } flickerlight PLASMA1_X1 { color 0.9 0.2 0.2 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.4 + attenuate 1 } flickerlight PLASMA1_X2 { color 0.6 0.2 0.2 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.4 + attenuate 1 } flickerlight PLASMA1_X3 { color 0.2 0.0 0.0 - size 8 - secondarySize 16 + size 12 + secondarySize 24 chance 0.4 + attenuate 1 } object PlasmaBall2 @@ -216,47 +236,54 @@ object PlasmaBall2 pointlight BFGBALL { color 0.5 1.0 0.5 - size 80 + size 120 + attenuate 1 } flickerlight BFGBALL_X1 { color 0.5 1.0 0.5 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.3 + attenuate 1 } flickerlight BFGBALL_X2 { color 0.6 1.0 0.6 - size 104 - secondarySize 112 + size 156 + secondarySize 168 chance 0.3 + attenuate 1 } flickerlight BFGBALL_X3 { color 0.7 1.0 0.7 - size 120 - secondarySize 128 + size 180 + secondarySize 192 chance 0.3 + attenuate 1 } flickerlight BFGBALL_X4 { color 0.4 0.7 0.4 - size 56 - secondarySize 64 + size 84 + secondarySize 96 chance 0.3 + attenuate 1 + attenuate 1 } flickerlight BFGBALL_X5 { color 0.1 0.3 0.1 - size 48 - secondarySize 56 + size 72 + secondarySize 84 chance 0.3 + attenuate 1 } object BFGBall @@ -291,11 +318,12 @@ object BFGExtra pulselight BARREL { color 0.0 0.5 0.0 - size 20 - secondarySize 21 + size 30 + secondarySize 31 interval 0.5 offset 0 36 0 dontlightself 1 + attenuate 1 } object ExplosiveBarrel @@ -311,8 +339,9 @@ object ExplosiveBarrel pointlight LAMP { color 1.0 1.0 0.8 - size 56 + size 84 offset 0 44 0 + attenuate 1 } object Column @@ -324,10 +353,11 @@ object Column pulselight SMALLLAMP { color 0.8 0.8 1.0 - size 56 - secondarySize 58 + size 84 + secondarySize 87 interval 0.4 offset 0 44 0 + attenuate 1 } object TechLamp2 @@ -339,10 +369,11 @@ object TechLamp2 pulselight BIGLAMP { color 0.8 0.8 1.0 - size 64 - secondarySize 66 + size 96 + secondarySize 99 interval 0.4 offset 0 72 0 + attenuate 1 } object TechLamp @@ -354,10 +385,11 @@ object TechLamp flickerlight2 BIGREDTORCH { color 1.0 0.5 0.2 - size 60 - secondarySize 66 + size 90 + secondarySize 99 interval 0.1 offset 0 60 0 + attenuate 1 } object RedTorch @@ -369,10 +401,11 @@ object RedTorch flickerlight2 BIGGREENTORCH { color 0.3 1.0 0.3 - size 60 - secondarySize 66 + size 90 + secondarySize 99 interval 0.1 offset 0 60 0 + attenuate 1 } object GreenTorch @@ -384,10 +417,11 @@ object GreenTorch flickerlight2 BIGBLUETORCH { color 0.3 0.3 1.0 - size 60 - secondarySize 66 + size 90 + secondarySize 99 interval 0.1 offset 0 60 0 + attenuate 1 } object BlueTorch @@ -399,10 +433,11 @@ object BlueTorch flickerlight2 SMALLREDTORCH { color 1.0 0.5 0.2 - size 48 - secondarySize 54 + size 72 + secondarySize 81 interval 0.1 offset 0 35 0 + attenuate 1 } object ShortRedTorch @@ -414,10 +449,11 @@ object ShortRedTorch flickerlight2 SMALLGREENTORCH { color 0.3 1.0 0.3 - size 48 - secondarySize 54 + size 72 + secondarySize 81 interval 0.1 offset 0 35 0 + attenuate 1 } object ShortGreenTorch @@ -429,10 +465,11 @@ object ShortGreenTorch flickerlight2 SMALLBLUETORCH { color 0.3 0.3 1.0 - size 48 - secondarySize 54 + size 72 + secondarySize 81 interval 0.1 offset 0 35 0 + attenuate 1 } object ShortBlueTorch @@ -444,10 +481,11 @@ object ShortBlueTorch flickerlight2 FIREBARREL { color 1.0 0.9 0.4 - size 48 - secondarySize 54 + size 72 + secondarySize 81 interval 0.1 offset 0 32 0 + attenuate 1 } object BurningBarrel @@ -459,10 +497,11 @@ object BurningBarrel flickerlight2 SKULLCANDLES { color 1.0 1.0 0.3 - size 32 - secondarySize 34 + size 48 + secondarySize 51 interval 0.1 offset 0 24 0 + attenuate 1 } object HeadCandles @@ -474,8 +513,9 @@ object HeadCandles pointlight CANDLE { color 1.0 1.0 0.3 - size 16 + size 24 offset 0 16 0 + attenuate 1 } object Candlestick @@ -487,8 +527,9 @@ object Candlestick pointlight CANDELABRA { color 1.0 1.0 0.3 - size 48 + size 67 offset 0 52 0 + attenuate 1 } object Candelabra @@ -506,10 +547,11 @@ object Candelabra pulselight SOULSPHERE { color 0.3 0.3 1.0 - size 40 - secondarySize 42 + size 60 + secondarySize 63 interval 2.0 offset 0 16 0 + attenuate 1 } object SoulSphere @@ -521,10 +563,11 @@ object SoulSphere pulselight INVULN { color 0.3 1.0 0.3 - size 40 - secondarySize 42 + size 60 + secondarySize 63 interval 2.0 offset 0 16 0 + attenuate 1 } object InvulnerabilitySphere @@ -536,36 +579,41 @@ object InvulnerabilitySphere pointlight BLURSPHERE1 { color 1.0 0.0 0.0 - size 40 + size 60 offset 0 16 0 + attenuate 1 } pointlight BLURSPHERE2 { color 0.0 0.0 1.0 - size 32 + size 48 offset 0 16 0 + attenuate 1 } pointlight BLURSPHERE3 { color 0.0 0.0 1.0 - size 24 + size 36 offset 0 16 0 + attenuate 1 } pointlight BLURSPHERE4 { color 0.0 0.0 1.0 - size 16 + size 24 offset 0 16 0 + attenuate 1 } pointlight BLURSPHERE5 { color 0.0 0.0 1.0 - size 8 + size 12 offset 0 16 0 + attenuate 1 } object BlurSphere @@ -582,9 +630,11 @@ object BlurSphere pulselight HEALTHPOTION { color 0.0 0.0 0.6 - size 16 - secondarySize 18 + size 24 + secondarySize 27 interval 2.0 + attenuate 1 + offset 0 10 0 } object HealthBonus @@ -596,10 +646,12 @@ object HealthBonus pulselight ARMORBONUS { color 0.2 0.6 0.2 - size 16 - secondarySize 14 + size 24 + secondarySize 21 interval 1.0 dontlightself 1 + attenuate 1 + offset 0 10 0 } object ArmorBonus @@ -622,9 +674,11 @@ object BlueSkull pulselight YELLOWKEY { color 0.6 0.6 0.0 - size 16 - secondarySize 18 + size 24 + secondarySize 27 interval 2.0 + attenuate 1 + offset 0 10 0 } object YellowCard @@ -641,9 +695,11 @@ object YellowSkull pulselight REDKEY { color 0.6 0.0 0.0 - size 16 - secondarySize 18 + size 24 + secondarySize 27 interval 2.0 + attenuate 1 + offset 0 10 0 } object RedCard @@ -660,13 +716,17 @@ object RedSkull pointlight GREENARMOR1 { color 0.0 0.6 0.0 - size 48 + size 72 + attenuate 1 + offset 0 10 0 } pointlight GREENARMOR2 { color 0.0 0.6 0.0 - size 32 + size 48 + attenuate 1 + offset 0 10 0 } object GreenArmor @@ -679,13 +739,17 @@ object GreenArmor pointlight BLUEARMOR1 { color 0.0 0.0 0.6 - size 48 + size 72 + attenuate 1 + offset 0 10 0 } pointlight BLUEARMOR2 { color 0.0 0.0 0.6 - size 32 + size 48 + attenuate 1 + offset 0 10 0 } object BlueArmor @@ -704,10 +768,11 @@ object BlueArmor flickerlight2 ZOMBIEATK { color 1.0 0.8 0.2 - size 48 - secondarySize 56 + size 72 + secondarySize 84 interval 1 offset 0 40 0 + attenuate 1 } object ZombieMan @@ -736,32 +801,36 @@ object DoomPlayer pointlight IMPBALL { color 1.0 0.7 0.4 - size 64 + size 96 + attenuate 1 } // Doom imp fireball explosion flickerlight IMPBALL_X1 { color 0.7 0.4 0.25 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.25 + attenuate 1 } flickerlight IMPBALL_X2 { color 0.4 0.2 0.1 - size 96 - secondarySize 104 + size 144 + secondarySize 156 chance 0.25 + attenuate 1 } flickerlight IMPBALL_X3 { color 0.2 0.1 0.0 - size 112 - secondarySize 120 + size 168 + secondarySize 180 chance 0.25 + attenuate 1 } object DoomImpBall @@ -777,9 +846,10 @@ object DoomImpBall pointlight SPECTRE { color 0.5 0.5 0.5 - size 48 + size 72 offset 0 24 0 subtractive 1 + attenuate 1 } /* @@ -793,33 +863,37 @@ object Spectre flickerlight CACOBALL { color 1.0 0.5 0.8 - size 56 - secondarySize 64 + size 84 + secondarySize 96 chance 0.5 + attenuate 1 } flickerlight CACOBALL_X1 { color 0.9 0.4 0.7 - size 72 - secondarySize 80 + size 108 + secondarySize 120 chance 0.25 + attenuate 1 } flickerlight CACOBALL_X2 { color 0.6 0.3 0.5 - size 88 - secondarySize 96 + size 132 + secondarySize 144 chance 0.25 + attenuate 1 } flickerlight CACOBALL_X3 { color 0.3 0.1 0.1 - size 104 - secondarySize 112 + size 156 + secondarySize 168 chance 0.25 + attenuate 1 } object CacodemonBall @@ -836,31 +910,35 @@ object CacodemonBall pointlight BARONBALL { color 0.0 1.0 0.0 - size 64 + size 96 + attenuate 1 } flickerlight BARONBALL_X1 { color 0.6 0.9 0.6 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.25 + attenuate 1 } flickerlight BARONBALL_X2 { color 0.45 0.6 0.45 - size 96 - secondarySize 104 + size 144 + secondarySize 156 chance 0.25 + attenuate 1 } flickerlight BARONBALL_X3 { color 0.2 0.3 0.2 - size 112 - secondarySize 120 + size 168 + secondarySize 180 chance 0.25 + attenuate 1 } object BaronBall @@ -877,41 +955,46 @@ object BaronBall flickerlight LOSTSOUL { color 1.0 0.6 0.3 - size 56 + size 84 secondarysize 64 chance 0.1 + attenuate 1 } flickerlight LOSTSOUL_X1 { color 0.8 0.5 0.3 - size 72 - secondarySize 80 + size 108 + secondarySize 120 chance 0.25 + attenuate 1 } flickerlight LOSTSOUL_X2 { color 0.6 0.3 0.2 - size 88 - secondarySize 96 + size 132 + secondarySize 144 chance 0.25 + attenuate 1 } flickerlight LOSTSOUL_X3 { color 0.4 0.1 0.0 - size 104 - secondarySize 112 + size 156 + secondarySize 168 chance 0.25 + attenuate 1 } flickerlight LOSTSOUL_X4 { color 0.2 0.0 0.0 - size 112 - secondarySize 120 + size 168 + secondarySize 180 chance 0.25 + attenuate 1 } object LostSoul @@ -945,39 +1028,44 @@ object FatShot pointlight ARACHPLAS { color 0.6 1.0 0.4 - size 56 + size 84 + attenuate 1 } flickerlight ARACHPLAS_X1 { color 0.4 0.8 0.3 - size 72 - secondarySize 80 + size 108 + secondarySize 120 chance 0.3 + attenuate 1 } flickerlight ARACHPLAS_X2 { color 0.6 0.6 0.3 - size 88 - secondarySize 96 + size 132 + secondarySize 144 chance 0.3 + attenuate 1 } flickerlight ARACHPLAS_X3 { color 0.4 0.4 0.2 - size 48 - secondarySize 32 + size 72 + secondarySize 48 chance 0.3 + attenuate 1 } flickerlight ARACHPLAS_X4 { color 0.2 0.2 0.1 - size 24 - secondarySize 16 + size 36 + secondarySize 24 chance 0.3 + attenuate 1 } object ArachnotronPlasma @@ -996,31 +1084,35 @@ object ArachnotronPlasma pointlight TRACER { color 1.0 0.5 0.3 - size 48 + size 72 + attenuate 1 } flickerlight TRACER_X1 { color 1.0 0.5 0.2 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.25 + attenuate 1 } flickerlight TRACER_X2 { color 0.6 0.3 0.1 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.25 + attenuate 1 } flickerlight TRACER_X3 { color 0.3 0.1 0.0 - size 96 - secondarySize 104 + size 144 + secondarySize 156 chance 0.25 + attenuate 1 } object RevenantTracer @@ -1037,73 +1129,81 @@ object RevenantTracer flickerlight ARCHFIRE1 { color 1.0 1.0 0.5 - size 24 - secondarySize 32 + size 36 + secondarySize 48 chance 0.3 offset 0 8 0 + attenuate 1 } flickerlight ARCHFIRE2 { color 1.0 1.0 0.5 - size 40 - secondarySize 48 + size 60 + secondarySize 72 chance 0.3 offset 0 24 0 + attenuate 1 } flickerlight ARCHFIRE3 { color 1.0 1.0 0.5 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 offset 0 32 0 + attenuate 1 } flickerlight ARCHFIRE4 { color 0.8 0.8 0.4 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 offset 0 40 0 + attenuate 1 } flickerlight ARCHFIRE5 { color 0.8 0.8 0.4 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 offset 0 48 0 + attenuate 1 } flickerlight ARCHFIRE6 { color 0.6 0.6 0.3 - size 48 - secondarySize 56 + size 72 + secondarySize 84 chance 0.3 offset 0 64 0 + attenuate 1 } flickerlight ARCHFIRE7 { color 0.4 0.4 0.2 - size 32 - secondarySize 40 + size 48 + secondarySize 60 chance 0.3 offset 0 72 0 + attenuate 1 } flickerlight ARCHFIRE8 { color 0.2 0.2 0.1 - size 16 - secondarySize 24 + size 24 + secondarySize 36 chance 0.3 offset 0 80 0 + attenuate 1 } object ArchvileFire @@ -1122,73 +1222,81 @@ object ArchvileFire flickerlight ARCHATK1 { color 1.0 1.0 0.4 - size 32 - secondarySize 48 + size 48 + secondarySize 72 chance 0.3 offset 0 80 0 + attenuate 1 } flickerlight ARCHATK2 { color 1.0 1.0 0.4 - size 56 - secondarySize 64 + size 84 + secondarySize 96 chance 0.3 offset 0 80 0 + attenuate 1 } flickerlight ARCHATK3 { color 1.0 1.0 0.4 - size 56 - secondarySize 64 + size 84 + secondarySize 96 chance 0.3 offset 0 64 0 + attenuate 1 } flickerlight ARCHATK4 { color 1.0 1.0 0.4 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 offset 0 48 0 + attenuate 1 } flickerlight ARCHATK5 { color 1.0 1.0 0.4 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.3 offset 0 40 0 + attenuate 1 } flickerlight ARCHATK6 { color 0.7 0.7 0.3 - size 96 - secondarySize 104 + size 144 + secondarySize 156 chance 0.3 offset 0 40 0 + attenuate 1 } flickerlight ARCHATK7 { color 0.3 0.3 0.1 - size 104 - secondarySize 112 + size 156 + secondarySize 168 chance 0.3 offset 0 40 0 + attenuate 1 } pulselight ARCHRES { color 0.6 0.3 0.3 - size 64 - secondarySize 70 + size 96 + secondarySize 105 interval 0.5 offset 0 36 0 + attenuate 1 } object Archvile @@ -1216,33 +1324,37 @@ object Archvile flickerlight DTFOG1 { color 0.4 1.0 0.4 - size 56 - secondarySize 64 + size 84 + secondarySize 96 chance 0.4 + attenuate 1 } flickerlight DTFOG2 { color 0.4 1.0 0.4 - size 40 - secondarySize 48 + size 60 + secondarySize 72 chance 0.4 + attenuate 1 } flickerlight DTFOG3 { color 0.4 1.0 0.4 - size 24 - secondarySize 32 + size 36 + secondarySize 48 chance 0.4 + attenuate 1 } flickerlight DTFOG4 { color 0.4 1.0 0.4 - size 10 - secondarySize 16 + size 15 + secondarySize 24 chance 0.4 + attenuate 1 } object TeleportFog From 41a107a89f1fe8d99d82f7d33aeda5cd9d3bb9b0 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Mon, 2 Jan 2017 17:57:26 +0200 Subject: [PATCH 626/912] Fixed compilation with GCC/Clang No more 'error: cannot jump from this goto statement to its label' because of 'note: jump bypasses variable initialization' --- src/swrenderer/scene/r_segs.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp index ca921efa6b..4d85a736d4 100644 --- a/src/swrenderer/scene/r_segs.cpp +++ b/src/swrenderer/scene/r_segs.cpp @@ -203,6 +203,8 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) mfloorclip = openings + ds->sprbottomclip - ds->x1; mceilingclip = openings + ds->sprtopclip - ds->x1; + float *MaskedSWall, MaskedScaleY, rw_scalestep; + // [RH] Draw fog partition if (ds->bFogBoundary) { @@ -217,11 +219,11 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) goto clearfog; } - float *MaskedSWall = (float *)(openings + ds->swall) - ds->x1; - float MaskedScaleY = ds->yscale; + MaskedSWall = (float *)(openings + ds->swall) - ds->x1; + MaskedScaleY = ds->yscale; maskedtexturecol = (fixed_t *)(openings + ds->maskedtexturecol) - ds->x1; spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); - float rw_scalestep = ds->iscalestep; + rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); From 338d338e2733f475b7f2ebfed7eff08d09007bcd Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 2 Jan 2017 17:55:24 -0500 Subject: [PATCH 627/912] - Moved multithreaded option from Truecolor menu to the Display menu since both the palette and truecolor drawer sets now use it. --- wadsrc/static/language.enu | 2 +- wadsrc/static/menudef.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index c6de55f6f9..250de5f18b 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1811,6 +1811,7 @@ DSPLYMNU_DIMCOLOR = "Dim color"; DSPLYMNU_MOVEBOB = "View bob amount while moving"; DSPLYMNU_STILLBOB = "View bob amount while not moving"; DSPLYMNU_BOBSPEED = "Weapon bob speed"; +DSPLYMNU_MULTITHREADED = "Multithreaded Drawing"; // HUD Options HUDMNU_TITLE = "HUD Options"; @@ -2753,7 +2754,6 @@ DSPLYMNU_TCOPT = "TrueColor Options"; TCMNU_TITLE = "TRUECOLOR OPTIONS"; -TCMNU_MULTITHREADED = "Multithreaded Drawing"; TCMNU_TRUECOLOR = "True color output"; TCMNU_MINFILTER = "Linear filter when downscaling"; TCMNU_MAGFILTER = "Linear filter when upscaling"; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 314534bb7d..d069065acc 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -675,7 +675,6 @@ OptionMenu "OpenGLOptions" OptionMenu "TrueColorOptions" { Title "$TCMNU_TITLE" - Option "$TCMNU_MULTITHREADED", "r_multithreaded", "OnOff" StaticText " " //Option "$TCMNU_TRUECOLOR", "swtruecolor", "OnOff" Option "$TCMNU_MINFILTER", "r_minfilter", "OnOff" @@ -705,6 +704,7 @@ OptionMenu "VideoOptions" Slider "$DSPLYMNU_PICKUPFADE", "pickup_fade_scalar", 0.0, 1.0, 0.05, 2 Slider "$DSPLYMNU_WATERFADE", "underwater_fade_scalar", 0.0, 1.0, 0.05, 2 Option "$DSPLYMNU_BLENDMETHOD", "r_blendmethod", "BlendMethods" + Option "$DSPLYMNU_MULTITHREADED", "r_multithreaded", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From 3e59ebb48d15e2aecd2744f2ba2ad169a5268f71 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 2 Jan 2017 18:24:27 -0500 Subject: [PATCH 628/912] - Removed multithreaded from the menu completely. # Conflicts: # wadsrc/static/menudef.txt --- wadsrc/static/language.enu | 1 - wadsrc/static/menudef.txt | 1 - 2 files changed, 2 deletions(-) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 250de5f18b..ae500c1529 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1811,7 +1811,6 @@ DSPLYMNU_DIMCOLOR = "Dim color"; DSPLYMNU_MOVEBOB = "View bob amount while moving"; DSPLYMNU_STILLBOB = "View bob amount while not moving"; DSPLYMNU_BOBSPEED = "Weapon bob speed"; -DSPLYMNU_MULTITHREADED = "Multithreaded Drawing"; // HUD Options HUDMNU_TITLE = "HUD Options"; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index d069065acc..f88b34286d 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -704,7 +704,6 @@ OptionMenu "VideoOptions" Slider "$DSPLYMNU_PICKUPFADE", "pickup_fade_scalar", 0.0, 1.0, 0.05, 2 Slider "$DSPLYMNU_WATERFADE", "underwater_fade_scalar", 0.0, 1.0, 0.05, 2 Option "$DSPLYMNU_BLENDMETHOD", "r_blendmethod", "BlendMethods" - Option "$DSPLYMNU_MULTITHREADED", "r_multithreaded", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From 4125da9fc3d274784ffd50f65301ab03364c57d8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 3 Jan 2017 04:29:06 +0100 Subject: [PATCH 629/912] Fix speed regression caused by the DrawerThread object being recreated every frame --- src/swrenderer/drawers/r_thread.cpp | 2 +- src/swrenderer/drawers/r_thread.h | 12 +----------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/swrenderer/drawers/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp index 1dea2b3566..d629a23f2a 100644 --- a/src/swrenderer/drawers/r_thread.cpp +++ b/src/swrenderer/drawers/r_thread.cpp @@ -114,7 +114,7 @@ void DrawerCommandQueue::Finish() // Do one thread ourselves: - DrawerThread thread; + static DrawerThread thread; thread.core = 0; thread.num_cores = (int)(queue->threads.size() + 1); diff --git a/src/swrenderer/drawers/r_thread.h b/src/swrenderer/drawers/r_thread.h index c3e818abb9..38d803d4aa 100644 --- a/src/swrenderer/drawers/r_thread.h +++ b/src/swrenderer/drawers/r_thread.h @@ -202,17 +202,7 @@ public: if (queue->threaded_render == 0 || !r_multithreaded) { T command(std::forward(args)...); - VectoredTryCatch(&command, - [](void *data) - { - T *c = (T*)data; - c->Execute(&Instance()->single_core_thread); - }, - [](void *data, const char *reason, bool fatal) - { - T *c = (T*)data; - ReportDrawerError(c, false, reason, fatal); - }); + command.Execute(&Instance()->single_core_thread); } else { From a9fbd421fb59b86600d3c2cb71c67d591c1b2c25 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 3 Jan 2017 07:13:40 +0100 Subject: [PATCH 630/912] Move line handling to r_line and drawseg drawing to r_drawsegment and then remove r_segs --- src/CMakeLists.txt | 1 - src/swrenderer/line/r_line.cpp | 1117 ++++++++++++ src/swrenderer/line/r_line.h | 12 + src/swrenderer/line/r_walldraw.cpp | 1 - src/swrenderer/plane/r_flatplane.cpp | 1 - src/swrenderer/plane/r_fogboundary.cpp | 1 - src/swrenderer/plane/r_skyplane.cpp | 1 - src/swrenderer/plane/r_slopeplane.cpp | 1 - src/swrenderer/r_main.cpp | 1 - src/swrenderer/r_main.h | 2 + src/swrenderer/scene/r_3dfloors.cpp | 1 - src/swrenderer/scene/r_bsp.cpp | 275 +-- src/swrenderer/scene/r_bsp.h | 12 +- src/swrenderer/scene/r_portal.cpp | 1 - src/swrenderer/scene/r_segs.cpp | 1796 ------------------- src/swrenderer/scene/r_segs.h | 52 - src/swrenderer/scene/r_things.cpp | 3 - src/swrenderer/segments/r_clipsegment.cpp | 1 - src/swrenderer/segments/r_drawsegment.cpp | 855 ++++++++- src/swrenderer/segments/r_drawsegment.h | 4 + src/swrenderer/segments/r_portalsegment.cpp | 1 - src/swrenderer/things/r_decal.cpp | 1 - src/swrenderer/things/r_particle.cpp | 1 - src/swrenderer/things/r_playersprite.cpp | 1 - src/swrenderer/things/r_wallsprite.cpp | 1 - 25 files changed, 2000 insertions(+), 2143 deletions(-) delete mode 100644 src/swrenderer/scene/r_segs.cpp delete mode 100644 src/swrenderer/scene/r_segs.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 54392b3398..d3eea4bab5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -847,7 +847,6 @@ set( FASTMATH_PCH_SOURCES swrenderer/drawers/r_thread.cpp swrenderer/scene/r_3dfloors.cpp swrenderer/scene/r_bsp.cpp - swrenderer/scene/r_segs.cpp swrenderer/scene/r_things.cpp swrenderer/scene/r_portal.cpp swrenderer/line/r_line.cpp diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index b2a1a3fa2e..93d7a9bf41 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -7,6 +7,7 @@ #include "doomstat.h" #include "doomdata.h" #include "p_lnspec.h" +#include "p_setup.h" #include "r_sky.h" #include "v_video.h" #include "m_swap.h" @@ -21,10 +22,27 @@ #include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" #include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_portal.h" #include "swrenderer/line/r_line.h" +#include "swrenderer/line/r_walldraw.h" +#include "swrenderer/line/r_wallsetup.h" +#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/segments/r_clipsegment.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/things/r_decal.h" + +CVAR(Bool, r_fogboundary, true, 0) +CVAR(Bool, r_drawmirrors, true, 0) +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { + subsector_t *InSubsector; + sector_t *frontsector; + sector_t *backsector; + seg_t *curline; side_t *sidedef; line_t *linedef; @@ -69,6 +87,1105 @@ namespace swrenderer FTexture *rw_pic; + bool markfloor; // False if the back side is the same plane. + bool markceiling; + FTexture *toptexture; + FTexture *bottomtexture; + FTexture *midtexture; + + namespace + { + bool doorclosed; + int wallshade; + } + + void R_AddLine(seg_t *line) + { + static sector_t tempsec; // killough 3/8/98: ceiling/water hack + bool solid; + DVector2 pt1, pt2; + + curline = line; + + // [RH] Color if not texturing line + drawerargs::dc_color = (((int)(line - segs) * 8) + 4) & 255; + + pt1 = line->v1->fPos() - ViewPos; + pt2 = line->v2->fPos() - ViewPos; + + // Reject lines not facing viewer + if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) + return; + + if (WallC.Init(pt1, pt2, 32.0 / (1 << 12))) + return; + + if (WallC.sx1 >= WindowRight || WallC.sx2 <= WindowLeft) + return; + + if (line->linedef == NULL) + { + if (R_CheckClipWallSegment(WallC.sx1, WallC.sx2)) + { + InSubsector->flags |= SSECF_DRAWN; + } + return; + } + + // reject lines that aren't seen from the portal (if any) + // [ZZ] 10.01.2016: lines inside a skybox shouldn't be clipped, although this imposes some limitations on portals in skyboxes. + if (!CurrentPortalInSkybox && CurrentPortal && P_ClipLineToPortal(line->linedef, CurrentPortal->dst, ViewPos)) + return; + + vertex_t *v1, *v2; + v1 = line->linedef->v1; + v2 = line->linedef->v2; + + if ((v1 == line->v1 && v2 == line->v2) || (v2 == line->v1 && v1 == line->v2)) + { // The seg is the entire wall. + WallT.InitFromWallCoords(&WallC); + } + else + { // The seg is only part of the wall. + if (line->linedef->sidedef[0] != line->sidedef) + { + swapvalues(v1, v2); + } + WallT.InitFromLine(v1->fPos() - ViewPos, v2->fPos() - ViewPos); + } + + if (!(fake3D & FAKE3D_FAKEBACK)) + { + backsector = line->backsector; + } + rw_frontcz1 = frontsector->ceilingplane.ZatPoint(line->v1); + rw_frontfz1 = frontsector->floorplane.ZatPoint(line->v1); + rw_frontcz2 = frontsector->ceilingplane.ZatPoint(line->v2); + rw_frontfz2 = frontsector->floorplane.ZatPoint(line->v2); + + rw_mustmarkfloor = rw_mustmarkceiling = false; + rw_havehigh = rw_havelow = false; + + // Single sided line? + if (backsector == NULL) + { + solid = true; + } + else + { + // kg3D - its fake, no transfer_heights + if (!(fake3D & FAKE3D_FAKEBACK)) + { // killough 3/8/98, 4/4/98: hack for invisible ceilings / deep water + backsector = R_FakeFlat(backsector, &tempsec, NULL, NULL, true); + } + doorclosed = false; // killough 4/16/98 + + rw_backcz1 = backsector->ceilingplane.ZatPoint(line->v1); + rw_backfz1 = backsector->floorplane.ZatPoint(line->v1); + rw_backcz2 = backsector->ceilingplane.ZatPoint(line->v2); + rw_backfz2 = backsector->floorplane.ZatPoint(line->v2); + + if (fake3D & FAKE3D_FAKEBACK) + { + if (rw_frontfz1 >= rw_backfz1 && rw_frontfz2 >= rw_backfz2) + { + fake3D |= FAKE3D_CLIPBOTFRONT; + } + if (rw_frontcz1 <= rw_backcz1 && rw_frontcz2 <= rw_backcz2) + { + fake3D |= FAKE3D_CLIPTOPFRONT; + } + } + + // Cannot make these walls solid, because it can result in + // sprite clipping problems for sprites near the wall + if (rw_frontcz1 > rw_backcz1 || rw_frontcz2 > rw_backcz2) + { + rw_havehigh = true; + R_CreateWallSegmentYSloped(wallupper, backsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); + } + if (rw_frontfz1 < rw_backfz1 || rw_frontfz2 < rw_backfz2) + { + rw_havelow = true; + R_CreateWallSegmentYSloped(walllower, backsector->floorplane, &WallC, curline, MirrorFlags & RF_XFLIP); + } + + // Portal + if (line->linedef->isVisualPortal() && line->sidedef == line->linedef->sidedef[0]) + { + solid = true; + } + // Closed door. + else if ((rw_backcz1 <= rw_frontfz1 && rw_backcz2 <= rw_frontfz2) || + (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) + { + solid = true; + } + else if ( + // properly render skies (consider door "open" if both ceilings are sky): + (backsector->GetTexture(sector_t::ceiling) != skyflatnum || frontsector->GetTexture(sector_t::ceiling) != skyflatnum) + + // if door is closed because back is shut: + && rw_backcz1 <= rw_backfz1 && rw_backcz2 <= rw_backfz2 + + // preserve a kind of transparent door/lift special effect: + && ((rw_backcz1 >= rw_frontcz1 && rw_backcz2 >= rw_frontcz2) || line->sidedef->GetTexture(side_t::top).isValid()) + && ((rw_backfz1 <= rw_frontfz1 && rw_backfz2 <= rw_frontfz2) || line->sidedef->GetTexture(side_t::bottom).isValid())) + { + // killough 1/18/98 -- This function is used to fix the automap bug which + // showed lines behind closed doors simply because the door had a dropoff. + // + // It assumes that Doom has already ruled out a door being closed because + // of front-back closure (e.g. front floor is taller than back ceiling). + + // This fixes the automap floor height bug -- killough 1/18/98: + // killough 4/7/98: optimize: save result in doorclosed for use in r_segs.c + doorclosed = true; + solid = true; + } + else if (frontsector->ceilingplane != backsector->ceilingplane || + frontsector->floorplane != backsector->floorplane) + { + // Window. + solid = false; + } + else if (R_SkyboxCompare(frontsector, backsector)) + { + solid = false; + } + else if (backsector->lightlevel != frontsector->lightlevel + || backsector->GetTexture(sector_t::floor) != frontsector->GetTexture(sector_t::floor) + || backsector->GetTexture(sector_t::ceiling) != frontsector->GetTexture(sector_t::ceiling) + || curline->sidedef->GetTexture(side_t::mid).isValid() + + // killough 3/7/98: Take flats offsets into account: + || backsector->planes[sector_t::floor].xform != frontsector->planes[sector_t::floor].xform + || backsector->planes[sector_t::ceiling].xform != frontsector->planes[sector_t::ceiling].xform + + || backsector->GetPlaneLight(sector_t::floor) != frontsector->GetPlaneLight(sector_t::floor) + || backsector->GetPlaneLight(sector_t::ceiling) != frontsector->GetPlaneLight(sector_t::ceiling) + || backsector->GetVisFlags(sector_t::floor) != frontsector->GetVisFlags(sector_t::floor) + || backsector->GetVisFlags(sector_t::ceiling) != frontsector->GetVisFlags(sector_t::ceiling) + + // [RH] Also consider colormaps + || backsector->ColorMap != frontsector->ColorMap + + + + // kg3D - and fake lights + || (frontsector->e && frontsector->e->XFloor.lightlist.Size()) + || (backsector->e && backsector->e->XFloor.lightlist.Size()) + ) + { + solid = false; + } + else + { + // Reject empty lines used for triggers and special events. + // Identical floor and ceiling on both sides, identical light levels + // on both sides, and no middle texture. + + // When using GL nodes, do a clipping test for these lines so we can + // mark their subsectors as visible for automap texturing. + if (hasglnodes && !(InSubsector->flags & SSECF_DRAWN)) + { + if (R_CheckClipWallSegment(WallC.sx1, WallC.sx2)) + { + InSubsector->flags |= SSECF_DRAWN; + } + } + return; + } + } + + rw_prepped = false; + + if (line->linedef->special == Line_Horizon) + { + // Be aware: Line_Horizon does not work properly with sloped planes + fillshort(walltop + WallC.sx1, WallC.sx2 - WallC.sx1, centery); + fillshort(wallbottom + WallC.sx1, WallC.sx2 - WallC.sx1, centery); + } + else + { + rw_ceilstat = R_CreateWallSegmentYSloped(walltop, frontsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); + rw_floorstat = R_CreateWallSegmentYSloped(wallbottom, frontsector->floorplane, &WallC, curline, MirrorFlags & RF_XFLIP); + + // [RH] treat off-screen walls as solid +#if 0 // Maybe later... + if (!solid) + { + if (rw_ceilstat == 12 && line->sidedef->GetTexture(side_t::top) != 0) + { + rw_mustmarkceiling = true; + solid = true; + } + if (rw_floorstat == 3 && line->sidedef->GetTexture(side_t::bottom) != 0) + { + rw_mustmarkfloor = true; + solid = true; + } + } +#endif + } + + if (R_ClipWallSegment(WallC.sx1, WallC.sx2, solid, R_StoreWallRange)) + { + InSubsector->flags |= SSECF_DRAWN; + } + } + + bool R_SkyboxCompare(sector_t *frontsector, sector_t *backsector) + { + FSectorPortal *frontc = frontsector->GetPortal(sector_t::ceiling); + FSectorPortal *frontf = frontsector->GetPortal(sector_t::floor); + FSectorPortal *backc = backsector->GetPortal(sector_t::ceiling); + FSectorPortal *backf = backsector->GetPortal(sector_t::floor); + + // return true if any of the planes has a linedef-based portal (unless both sides have the same one. + // Ideally this should also check thing based portals but the omission of this check had been abused to hell and back for those. + // (Note: This may require a compatibility option if some maps ran into this for line based portals as well.) + if (!frontc->MergeAllowed()) return (frontc != backc); + if (!frontf->MergeAllowed()) return (frontf != backf); + if (!backc->MergeAllowed()) return true; + if (!backf->MergeAllowed()) return true; + return false; + } + + // A wall segment will be drawn between start and stop pixels (inclusive). + bool R_StoreWallRange(int start, int stop) + { + int i; + bool maskedtexture = false; + +#ifdef RANGECHECK + if (start >= viewwidth || start >= stop) + I_FatalError("Bad R_StoreWallRange: %i to %i", start, stop); +#endif + + drawseg_t *draw_segment = R_AddDrawSegment(); + + if (!rw_prepped) + { + rw_prepped = true; + R_NewWall(true); + } + + rw_offset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); + rw_light = rw_lightleft + rw_lightstep * (start - WallC.sx1); + + draw_segment->CurrentPortalUniq = CurrentPortalUniq; + draw_segment->sx1 = WallC.sx1; + draw_segment->sx2 = WallC.sx2; + draw_segment->sz1 = WallC.sz1; + draw_segment->sz2 = WallC.sz2; + draw_segment->cx = WallC.tleft.X;; + draw_segment->cy = WallC.tleft.Y; + draw_segment->cdx = WallC.tright.X - WallC.tleft.X; + draw_segment->cdy = WallC.tright.Y - WallC.tleft.Y; + draw_segment->tmapvals = WallT; + draw_segment->siz1 = 1 / WallC.sz1; + draw_segment->siz2 = 1 / WallC.sz2; + draw_segment->x1 = start; + draw_segment->x2 = stop; + draw_segment->curline = curline; + draw_segment->bFogBoundary = false; + draw_segment->bFakeBoundary = false; + if (fake3D & 7) draw_segment->fake = 1; + else draw_segment->fake = 0; + + draw_segment->sprtopclip = draw_segment->sprbottomclip = draw_segment->maskedtexturecol = draw_segment->bkup = draw_segment->swall = -1; + + if (rw_markportal) + { + draw_segment->silhouette = SIL_BOTH; + } + else if (backsector == NULL) + { + draw_segment->sprtopclip = R_NewOpening(stop - start); + draw_segment->sprbottomclip = R_NewOpening(stop - start); + fillshort(openings + draw_segment->sprtopclip, stop - start, viewheight); + memset(openings + draw_segment->sprbottomclip, -1, (stop - start) * sizeof(short)); + draw_segment->silhouette = SIL_BOTH; + } + else + { + // two sided line + draw_segment->silhouette = 0; + + if (rw_frontfz1 > rw_backfz1 || rw_frontfz2 > rw_backfz2 || + backsector->floorplane.PointOnSide(ViewPos) < 0) + { + draw_segment->silhouette = SIL_BOTTOM; + } + + if (rw_frontcz1 < rw_backcz1 || rw_frontcz2 < rw_backcz2 || + backsector->ceilingplane.PointOnSide(ViewPos) < 0) + { + draw_segment->silhouette |= SIL_TOP; + } + + // killough 1/17/98: this test is required if the fix + // for the automap bug (r_bsp.c) is used, or else some + // sprites will be displayed behind closed doors. That + // fix prevents lines behind closed doors with dropoffs + // from being displayed on the automap. + // + // killough 4/7/98: make doorclosed external variable + + { + if (doorclosed || (rw_backcz1 <= rw_frontfz1 && rw_backcz2 <= rw_frontfz2)) + { + draw_segment->sprbottomclip = R_NewOpening(stop - start); + memset(openings + draw_segment->sprbottomclip, -1, (stop - start) * sizeof(short)); + draw_segment->silhouette |= SIL_BOTTOM; + } + if (doorclosed || (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) + { // killough 1/17/98, 2/8/98 + draw_segment->sprtopclip = R_NewOpening(stop - start); + fillshort(openings + draw_segment->sprtopclip, stop - start, viewheight); + draw_segment->silhouette |= SIL_TOP; + } + } + + if (!draw_segment->fake && r_3dfloors && backsector->e && backsector->e->XFloor.ffloors.Size()) { + for (i = 0; i < (int)backsector->e->XFloor.ffloors.Size(); i++) { + F3DFloor *rover = backsector->e->XFloor.ffloors[i]; + if (rover->flags & FF_RENDERSIDES && (!(rover->flags & FF_INVERTSIDES) || rover->flags & FF_ALLSIDES)) { + draw_segment->bFakeBoundary |= 1; + break; + } + } + } + if (!draw_segment->fake && r_3dfloors && frontsector->e && frontsector->e->XFloor.ffloors.Size()) { + for (i = 0; i < (int)frontsector->e->XFloor.ffloors.Size(); i++) { + F3DFloor *rover = frontsector->e->XFloor.ffloors[i]; + if (rover->flags & FF_RENDERSIDES && (rover->flags & FF_ALLSIDES || rover->flags & FF_INVERTSIDES)) { + draw_segment->bFakeBoundary |= 2; + break; + } + } + } + // kg3D - no for fakes + if (!draw_segment->fake) + // allocate space for masked texture tables, if needed + // [RH] Don't just allocate the space; fill it in too. + if ((TexMan(sidedef->GetTexture(side_t::mid), true)->UseType != FTexture::TEX_Null || draw_segment->bFakeBoundary || IsFogBoundary(frontsector, backsector)) && + (rw_ceilstat != 12 || !sidedef->GetTexture(side_t::top).isValid()) && + (rw_floorstat != 3 || !sidedef->GetTexture(side_t::bottom).isValid()) && + (WallC.sz1 >= TOO_CLOSE_Z && WallC.sz2 >= TOO_CLOSE_Z)) + { + float *swal; + fixed_t *lwal; + int i; + + maskedtexture = true; + + // kg3D - backup for mid and fake walls + draw_segment->bkup = R_NewOpening(stop - start); + memcpy(openings + draw_segment->bkup, &ceilingclip[start], sizeof(short)*(stop - start)); + + draw_segment->bFogBoundary = IsFogBoundary(frontsector, backsector); + if (sidedef->GetTexture(side_t::mid).isValid() || draw_segment->bFakeBoundary) + { + if (sidedef->GetTexture(side_t::mid).isValid()) + draw_segment->bFakeBoundary |= 4; // it is also mid texture + + // note: This should never have used the openings array to store its data! + draw_segment->maskedtexturecol = R_NewOpening((stop - start) * 2); + draw_segment->swall = R_NewOpening((stop - start) * 2); + + lwal = (fixed_t *)(openings + draw_segment->maskedtexturecol); + swal = (float *)(openings + draw_segment->swall); + FTexture *pic = TexMan(sidedef->GetTexture(side_t::mid), true); + double yscale = pic->Scale.Y * sidedef->GetTextureYScale(side_t::mid); + fixed_t xoffset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); + + if (pic->bWorldPanning) + { + xoffset = xs_RoundToInt(xoffset * lwallscale); + } + + for (i = start; i < stop; i++) + { + *lwal++ = lwall[i] + xoffset; + *swal++ = swall[i]; + } + + double istart = *((float *)(openings + draw_segment->swall)) * yscale; + double iend = *(swal - 1) * yscale; +#if 0 + ///This was for avoiding overflow when using fixed point. It might not be needed anymore. + const double mini = 3 / 65536.0; + if (istart < mini && istart >= 0) istart = mini; + if (istart > -mini && istart < 0) istart = -mini; + if (iend < mini && iend >= 0) iend = mini; + if (iend > -mini && iend < 0) iend = -mini; +#endif + istart = 1 / istart; + iend = 1 / iend; + draw_segment->yscale = (float)yscale; + draw_segment->iscale = (float)istart; + if (stop - start > 0) + { + draw_segment->iscalestep = float((iend - istart) / (stop - start)); + } + else + { + draw_segment->iscalestep = 0; + } + } + draw_segment->light = rw_light; + draw_segment->lightstep = rw_lightstep; + + // Masked midtextures should get the light level from the sector they reference, + // not from the current subsector, which is what the current wallshade value + // comes from. We make an exeption for polyobjects, however, since their "home" + // sector should be whichever one they move into. + if (curline->sidedef->Flags & WALLF_POLYOBJ) + { + draw_segment->shade = wallshade; + } + else + { + draw_segment->shade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, curline->frontsector->lightlevel) + + r_actualextralight); + } + + if (draw_segment->bFogBoundary || draw_segment->maskedtexturecol != -1) + { + size_t drawsegnum = draw_segment - drawsegs; + InterestingDrawsegs.Push(drawsegnum); + } + } + } + + // render it + if (markceiling) + { + if (ceilingplane) + { // killough 4/11/98: add NULL ptr checks + ceilingplane = R_CheckPlane(ceilingplane, start, stop); + } + else + { + markceiling = false; + } + } + + if (markfloor) + { + if (floorplane) + { // killough 4/11/98: add NULL ptr checks + floorplane = R_CheckPlane(floorplane, start, stop); + } + else + { + markfloor = false; + } + } + + R_RenderSegLoop(start, stop); + + if (fake3D & 7) { + return !(fake3D & FAKE3D_FAKEMASK); + } + + // save sprite clipping info + if (((draw_segment->silhouette & SIL_TOP) || maskedtexture) && draw_segment->sprtopclip == -1) + { + draw_segment->sprtopclip = R_NewOpening(stop - start); + memcpy(openings + draw_segment->sprtopclip, &ceilingclip[start], sizeof(short)*(stop - start)); + } + + if (((draw_segment->silhouette & SIL_BOTTOM) || maskedtexture) && draw_segment->sprbottomclip == -1) + { + draw_segment->sprbottomclip = R_NewOpening(stop - start); + memcpy(openings + draw_segment->sprbottomclip, &floorclip[start], sizeof(short)*(stop - start)); + } + + if (maskedtexture && curline->sidedef->GetTexture(side_t::mid).isValid()) + { + draw_segment->silhouette |= SIL_TOP | SIL_BOTTOM; + } + + // [RH] Draw any decals bound to the seg + // [ZZ] Only if not an active mirror + if (!rw_markportal) + { + R_RenderDecals(curline->sidedef, draw_segment, wallshade); + } + + if (rw_markportal) + { + PortalDrawseg pds; + pds.src = curline->linedef; + pds.dst = curline->linedef->special == Line_Mirror ? curline->linedef : curline->linedef->getPortalDestination(); + pds.x1 = draw_segment->x1; + pds.x2 = draw_segment->x2; + pds.len = pds.x2 - pds.x1; + pds.ceilingclip.Resize(pds.len); + memcpy(&pds.ceilingclip[0], openings + draw_segment->sprtopclip, pds.len * sizeof(*openings)); + pds.floorclip.Resize(pds.len); + memcpy(&pds.floorclip[0], openings + draw_segment->sprbottomclip, pds.len * sizeof(*openings)); + + for (int i = 0; i < pds.x2 - pds.x1; i++) + { + if (pds.ceilingclip[i] < 0) + pds.ceilingclip[i] = 0; + if (pds.ceilingclip[i] >= viewheight) + pds.ceilingclip[i] = viewheight - 1; + if (pds.floorclip[i] < 0) + pds.floorclip[i] = 0; + if (pds.floorclip[i] >= viewheight) + pds.floorclip[i] = viewheight - 1; + } + + pds.mirror = curline->linedef->special == Line_Mirror; + WallPortals.Push(pds); + } + + return !(fake3D & FAKE3D_FAKEMASK); + } + + void R_NewWall(bool needlights) + { + double rowoffset; + double yrepeat; + + rw_markportal = false; + + sidedef = curline->sidedef; + linedef = curline->linedef; + + // mark the segment as visible for auto map + if (!r_dontmaplines) linedef->flags |= ML_MAPPED; + + midtexture = toptexture = bottomtexture = 0; + + if (sidedef == linedef->sidedef[0] && + (linedef->special == Line_Mirror && r_drawmirrors)) // [ZZ] compatibility with r_drawmirrors cvar that existed way before portals + { + markfloor = markceiling = true; // act like a one-sided wall here (todo: check how does this work with transparency) + rw_markportal = true; + } + else if (backsector == NULL) + { + // single sided line + // a single sided line is terminal, so it must mark ends + markfloor = markceiling = true; + // [RH] Horizon lines do not need to be textured + if (linedef->isVisualPortal()) + { + rw_markportal = true; + } + else if (linedef->special != Line_Horizon) + { + midtexture = TexMan(sidedef->GetTexture(side_t::mid), true); + rw_offset_mid = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); + rowoffset = sidedef->GetTextureYOffset(side_t::mid); + rw_midtexturescalex = sidedef->GetTextureXScale(side_t::mid); + rw_midtexturescaley = sidedef->GetTextureYScale(side_t::mid); + yrepeat = midtexture->Scale.Y * rw_midtexturescaley; + if (yrepeat >= 0) + { // normal orientation + if (linedef->flags & ML_DONTPEGBOTTOM) + { // bottom of texture at bottom + rw_midtexturemid = (frontsector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + midtexture->GetHeight(); + } + else + { // top of texture at top + rw_midtexturemid = (frontsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; + if (rowoffset < 0 && midtexture != NULL) + { + rowoffset += midtexture->GetHeight(); + } + } + } + else + { // upside down + rowoffset = -rowoffset; + if (linedef->flags & ML_DONTPEGBOTTOM) + { // top of texture at bottom + rw_midtexturemid = (frontsector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; + } + else + { // bottom of texture at top + rw_midtexturemid = (frontsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + midtexture->GetHeight(); + } + } + if (midtexture->bWorldPanning) + { + rw_midtexturemid += rowoffset * yrepeat; + } + else + { + // rowoffset is added outside the multiply so that it positions the texture + // by texels instead of world units. + rw_midtexturemid += rowoffset; + } + } + } + else + { // two-sided line + // hack to allow height changes in outdoor areas + + double rw_frontlowertop = frontsector->GetPlaneTexZ(sector_t::ceiling); + + if (frontsector->GetTexture(sector_t::ceiling) == skyflatnum && + backsector->GetTexture(sector_t::ceiling) == skyflatnum) + { + if (rw_havehigh) + { // front ceiling is above back ceiling + memcpy(&walltop[WallC.sx1], &wallupper[WallC.sx1], (WallC.sx2 - WallC.sx1) * sizeof(walltop[0])); + rw_havehigh = false; + } + else if (rw_havelow && frontsector->ceilingplane != backsector->ceilingplane) + { // back ceiling is above front ceiling + // The check for rw_havelow is not Doom-compliant, but it avoids HoM that + // would otherwise occur because there is space made available for this + // wall but nothing to draw for it. + // Recalculate walltop so that the wall is clipped by the back sector's + // ceiling instead of the front sector's ceiling. + R_CreateWallSegmentYSloped(walltop, backsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); + } + // Putting sky ceilings on the front and back of a line alters the way unpegged + // positioning works. + rw_frontlowertop = backsector->GetPlaneTexZ(sector_t::ceiling); + } + + if (linedef->isVisualPortal()) + { + markceiling = markfloor = true; + } + else if ((rw_backcz1 <= rw_frontfz1 && rw_backcz2 <= rw_frontfz2) || + (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) + { + // closed door + markceiling = markfloor = true; + } + else + { + markfloor = rw_mustmarkfloor + || backsector->floorplane != frontsector->floorplane + || backsector->lightlevel != frontsector->lightlevel + || backsector->GetTexture(sector_t::floor) != frontsector->GetTexture(sector_t::floor) + || backsector->GetPlaneLight(sector_t::floor) != frontsector->GetPlaneLight(sector_t::floor) + + // killough 3/7/98: Add checks for (x,y) offsets + || backsector->planes[sector_t::floor].xform != frontsector->planes[sector_t::floor].xform + || backsector->GetAlpha(sector_t::floor) != frontsector->GetAlpha(sector_t::floor) + + // killough 4/15/98: prevent 2s normals + // from bleeding through deep water + || frontsector->heightsec + + || backsector->GetVisFlags(sector_t::floor) != frontsector->GetVisFlags(sector_t::floor) + + // [RH] Add checks for colormaps + || backsector->ColorMap != frontsector->ColorMap + + + // kg3D - add fake lights + || (frontsector->e && frontsector->e->XFloor.lightlist.Size()) + || (backsector->e && backsector->e->XFloor.lightlist.Size()) + + || (sidedef->GetTexture(side_t::mid).isValid() && + ((linedef->flags & (ML_CLIP_MIDTEX | ML_WRAP_MIDTEX)) || + (sidedef->Flags & (WALLF_CLIP_MIDTEX | WALLF_WRAP_MIDTEX)))) + ; + + markceiling = (frontsector->GetTexture(sector_t::ceiling) != skyflatnum || + backsector->GetTexture(sector_t::ceiling) != skyflatnum) && + (rw_mustmarkceiling + || backsector->ceilingplane != frontsector->ceilingplane + || backsector->lightlevel != frontsector->lightlevel + || backsector->GetTexture(sector_t::ceiling) != frontsector->GetTexture(sector_t::ceiling) + + // killough 3/7/98: Add checks for (x,y) offsets + || backsector->planes[sector_t::ceiling].xform != frontsector->planes[sector_t::ceiling].xform + || backsector->GetAlpha(sector_t::ceiling) != frontsector->GetAlpha(sector_t::ceiling) + + // killough 4/15/98: prevent 2s normals + // from bleeding through fake ceilings + || (frontsector->heightsec && frontsector->GetTexture(sector_t::ceiling) != skyflatnum) + + || backsector->GetPlaneLight(sector_t::ceiling) != frontsector->GetPlaneLight(sector_t::ceiling) + || backsector->GetFlags(sector_t::ceiling) != frontsector->GetFlags(sector_t::ceiling) + + // [RH] Add check for colormaps + || backsector->ColorMap != frontsector->ColorMap + + // kg3D - add fake lights + || (frontsector->e && frontsector->e->XFloor.lightlist.Size()) + || (backsector->e && backsector->e->XFloor.lightlist.Size()) + + || (sidedef->GetTexture(side_t::mid).isValid() && + ((linedef->flags & (ML_CLIP_MIDTEX | ML_WRAP_MIDTEX)) || + (sidedef->Flags & (WALLF_CLIP_MIDTEX | WALLF_WRAP_MIDTEX)))) + ); + } + + if (rw_havehigh) + { // top texture + toptexture = TexMan(sidedef->GetTexture(side_t::top), true); + + rw_offset_top = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::top)); + rowoffset = sidedef->GetTextureYOffset(side_t::top); + rw_toptexturescalex = sidedef->GetTextureXScale(side_t::top); + rw_toptexturescaley = sidedef->GetTextureYScale(side_t::top); + yrepeat = toptexture->Scale.Y * rw_toptexturescaley; + if (yrepeat >= 0) + { // normal orientation + if (linedef->flags & ML_DONTPEGTOP) + { // top of texture at top + rw_toptexturemid = (frontsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; + if (rowoffset < 0 && toptexture != NULL) + { + rowoffset += toptexture->GetHeight(); + } + } + else + { // bottom of texture at bottom + rw_toptexturemid = (backsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + toptexture->GetHeight(); + } + } + else + { // upside down + rowoffset = -rowoffset; + if (linedef->flags & ML_DONTPEGTOP) + { // bottom of texture at top + rw_toptexturemid = (frontsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + toptexture->GetHeight(); + } + else + { // top of texture at bottom + rw_toptexturemid = (backsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; + } + } + if (toptexture->bWorldPanning) + { + rw_toptexturemid += rowoffset * yrepeat; + } + else + { + rw_toptexturemid += rowoffset; + } + } + if (rw_havelow) + { // bottom texture + bottomtexture = TexMan(sidedef->GetTexture(side_t::bottom), true); + + rw_offset_bottom = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::bottom)); + rowoffset = sidedef->GetTextureYOffset(side_t::bottom); + rw_bottomtexturescalex = sidedef->GetTextureXScale(side_t::bottom); + rw_bottomtexturescaley = sidedef->GetTextureYScale(side_t::bottom); + yrepeat = bottomtexture->Scale.Y * rw_bottomtexturescaley; + if (yrepeat >= 0) + { // normal orientation + if (linedef->flags & ML_DONTPEGBOTTOM) + { // bottom of texture at bottom + rw_bottomtexturemid = (rw_frontlowertop - ViewPos.Z) * yrepeat; + } + else + { // top of texture at top + rw_bottomtexturemid = (backsector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; + if (rowoffset < 0 && bottomtexture != NULL) + { + rowoffset += bottomtexture->GetHeight(); + } + } + } + else + { // upside down + rowoffset = -rowoffset; + if (linedef->flags & ML_DONTPEGBOTTOM) + { // top of texture at bottom + rw_bottomtexturemid = (rw_frontlowertop - ViewPos.Z) * yrepeat; + } + else + { // bottom of texture at top + rw_bottomtexturemid = (backsector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + bottomtexture->GetHeight(); + } + } + if (bottomtexture->bWorldPanning) + { + rw_bottomtexturemid += rowoffset * yrepeat; + } + else + { + rw_bottomtexturemid += rowoffset; + } + } + rw_markportal = linedef->isVisualPortal(); + } + + // if a floor / ceiling plane is on the wrong side of the view plane, + // it is definitely invisible and doesn't need to be marked. + + // killough 3/7/98: add deep water check + if (frontsector->GetHeightSec() == NULL) + { + int planeside; + + planeside = frontsector->floorplane.PointOnSide(ViewPos); + if (frontsector->floorplane.fC() < 0) // 3D floors have the floor backwards + planeside = -planeside; + if (planeside <= 0) // above view plane + markfloor = false; + + if (frontsector->GetTexture(sector_t::ceiling) != skyflatnum) + { + planeside = frontsector->ceilingplane.PointOnSide(ViewPos); + if (frontsector->ceilingplane.fC() > 0) // 3D floors have the ceiling backwards + planeside = -planeside; + if (planeside <= 0) // below view plane + markceiling = false; + } + } + + FTexture *midtex = TexMan(sidedef->GetTexture(side_t::mid), true); + + bool segtextured = midtex != NULL || toptexture != NULL || bottomtexture != NULL; + + // calculate light table + if (needlights && (segtextured || (backsector && IsFogBoundary(frontsector, backsector)))) + { + lwallscale = + midtex ? (midtex->Scale.X * sidedef->GetTextureXScale(side_t::mid)) : + toptexture ? (toptexture->Scale.X * sidedef->GetTextureXScale(side_t::top)) : + bottomtexture ? (bottomtexture->Scale.X * sidedef->GetTextureXScale(side_t::bottom)) : + 1.; + + PrepWall(swall, lwall, sidedef->TexelLength * lwallscale, WallC.sx1, WallC.sx2); + + if (fixedcolormap == NULL && fixedlightlev < 0) + { + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, frontsector->lightlevel) + + r_actualextralight); + GlobVis = r_WallVisibility; + rw_lightleft = float(GlobVis / WallC.sz1); + rw_lightstep = float((GlobVis / WallC.sz2 - rw_lightleft) / (WallC.sx2 - WallC.sx1)); + } + else + { + rw_lightleft = 1; + rw_lightstep = 0; + } + } + } + + bool IsFogBoundary(sector_t *front, sector_t *back) + { + return r_fogboundary && fixedcolormap == NULL && front->ColorMap->Fade && + front->ColorMap->Fade != back->ColorMap->Fade && + (front->GetTexture(sector_t::ceiling) != skyflatnum || back->GetTexture(sector_t::ceiling) != skyflatnum); + } + + // Draws zero, one, or two textures for walls. + // Can draw or mark the starting pixel of floor and ceiling textures. + void R_RenderSegLoop(int x1, int x2) + { + int x; + double xscale; + double yscale; + fixed_t xoffset = rw_offset; + + if (fixedlightlev >= 0) + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + else if (fixedcolormap != NULL) + R_SetColorMapLight(fixedcolormap, 0, 0); + + // clip wall to the floor and ceiling + for (x = x1; x < x2; ++x) + { + if (walltop[x] < ceilingclip[x]) + { + walltop[x] = ceilingclip[x]; + } + if (wallbottom[x] > floorclip[x]) + { + wallbottom[x] = floorclip[x]; + } + } + + // mark ceiling areas + if (markceiling) + { + for (x = x1; x < x2; ++x) + { + short top = (fakeFloor && fake3D & 2) ? fakeFloor->ceilingclip[x] : ceilingclip[x]; + short bottom = MIN(walltop[x], floorclip[x]); + if (top < bottom) + { + ceilingplane->top[x] = top; + ceilingplane->bottom[x] = bottom; + } + } + } + + // mark floor areas + if (markfloor) + { + for (x = x1; x < x2; ++x) + { + short top = MAX(wallbottom[x], ceilingclip[x]); + short bottom = (fakeFloor && fake3D & 1) ? fakeFloor->floorclip[x] : floorclip[x]; + if (top < bottom) + { + assert(bottom <= viewheight); + floorplane->top[x] = top; + floorplane->bottom[x] = bottom; + } + } + } + + // kg3D - fake planes clipping + if (fake3D & FAKE3D_REFRESHCLIP) + { + if (fake3D & FAKE3D_CLIPBOTFRONT) + { + memcpy(fakeFloor->floorclip + x1, wallbottom + x1, (x2 - x1) * sizeof(short)); + } + else + { + for (x = x1; x < x2; ++x) + { + walllower[x] = MIN(MAX(walllower[x], ceilingclip[x]), wallbottom[x]); + } + memcpy(fakeFloor->floorclip + x1, walllower + x1, (x2 - x1) * sizeof(short)); + } + if (fake3D & FAKE3D_CLIPTOPFRONT) + { + memcpy(fakeFloor->ceilingclip + x1, walltop + x1, (x2 - x1) * sizeof(short)); + } + else + { + for (x = x1; x < x2; ++x) + { + wallupper[x] = MAX(MIN(wallupper[x], floorclip[x]), walltop[x]); + } + memcpy(fakeFloor->ceilingclip + x1, wallupper + x1, (x2 - x1) * sizeof(short)); + } + } + if (fake3D & 7) return; + + FLightNode *light_list = (curline && curline->sidedef) ? curline->sidedef->lighthead : nullptr; + + // draw the wall tiers + if (midtexture) + { // one sided line + if (midtexture->UseType != FTexture::TEX_Null && viewactive) + { + dc_texturemid = rw_midtexturemid; + rw_pic = midtexture; + xscale = rw_pic->Scale.X * rw_midtexturescalex; + yscale = rw_pic->Scale.Y * rw_midtexturescaley; + if (xscale != lwallscale) + { + PrepLWall(lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2); + lwallscale = xscale; + } + if (midtexture->bWorldPanning) + { + rw_offset = xs_RoundToInt(rw_offset_mid * xscale); + } + else + { + rw_offset = rw_offset_mid; + } + if (xscale < 0) + { + rw_offset = -rw_offset; + } + R_DrawWallSegment(rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, light_list); + } + fillshort(ceilingclip + x1, x2 - x1, viewheight); + fillshort(floorclip + x1, x2 - x1, 0xffff); + } + else + { // two sided line + if (toptexture != NULL && toptexture->UseType != FTexture::TEX_Null) + { // top wall + for (x = x1; x < x2; ++x) + { + wallupper[x] = MAX(MIN(wallupper[x], floorclip[x]), walltop[x]); + } + if (viewactive) + { + dc_texturemid = rw_toptexturemid; + rw_pic = toptexture; + xscale = rw_pic->Scale.X * rw_toptexturescalex; + yscale = rw_pic->Scale.Y * rw_toptexturescaley; + if (xscale != lwallscale) + { + PrepLWall(lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2); + lwallscale = xscale; + } + if (toptexture->bWorldPanning) + { + rw_offset = xs_RoundToInt(rw_offset_top * xscale); + } + else + { + rw_offset = rw_offset_top; + } + if (xscale < 0) + { + rw_offset = -rw_offset; + } + R_DrawWallSegment(rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, light_list); + } + memcpy(ceilingclip + x1, wallupper + x1, (x2 - x1) * sizeof(short)); + } + else if (markceiling) + { // no top wall + memcpy(ceilingclip + x1, walltop + x1, (x2 - x1) * sizeof(short)); + } + + + if (bottomtexture != NULL && bottomtexture->UseType != FTexture::TEX_Null) + { // bottom wall + for (x = x1; x < x2; ++x) + { + walllower[x] = MIN(MAX(walllower[x], ceilingclip[x]), wallbottom[x]); + } + if (viewactive) + { + dc_texturemid = rw_bottomtexturemid; + rw_pic = bottomtexture; + xscale = rw_pic->Scale.X * rw_bottomtexturescalex; + yscale = rw_pic->Scale.Y * rw_bottomtexturescaley; + if (xscale != lwallscale) + { + PrepLWall(lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2); + lwallscale = xscale; + } + if (bottomtexture->bWorldPanning) + { + rw_offset = xs_RoundToInt(rw_offset_bottom * xscale); + } + else + { + rw_offset = rw_offset_bottom; + } + if (xscale < 0) + { + rw_offset = -rw_offset; + } + R_DrawWallSegment(rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, light_list); + } + memcpy(floorclip + x1, walllower + x1, (x2 - x1) * sizeof(short)); + } + else if (markfloor) + { // no bottom wall + memcpy(floorclip + x1, wallbottom + x1, (x2 - x1) * sizeof(short)); + } + } + rw_offset = xoffset; + } + + //////////////////////////////////////////////////////////////////////////// + // Transform and clip coordinates. Returns true if it was clipped away bool FWallCoords::Init(const DVector2 &pt1, const DVector2 &pt2, double too_close) { diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index d4be544936..b530f3e3ee 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -23,6 +23,18 @@ namespace swrenderer void InitFromLine(const DVector2 &left, const DVector2 &right); }; + void R_AddLine(seg_t *line); + bool R_StoreWallRange(int start, int stop); + void R_NewWall(bool needlights); + void R_RenderSegLoop(int x1, int x2); + + bool IsFogBoundary(sector_t *front, sector_t *back); + bool R_SkyboxCompare(sector_t *frontsector, sector_t *backsector); + + extern subsector_t *InSubsector; + extern sector_t *frontsector; + extern sector_t *backsector; + extern seg_t *curline; extern side_t *sidedef; extern line_t *linedef; diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 3f22e62b70..40173ce226 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -43,7 +43,6 @@ #include "swrenderer/drawers/r_draw.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_segs.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/line/r_walldraw.h" #include "swrenderer/line/r_wallsetup.h" diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index e45e03a695..7730fac2f6 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -18,7 +18,6 @@ #include "g_level.h" #include "swrenderer/scene/r_bsp.h" #include "r_flatplane.h" -#include "swrenderer/scene/r_segs.h" #include "swrenderer/scene/r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" diff --git a/src/swrenderer/plane/r_fogboundary.cpp b/src/swrenderer/plane/r_fogboundary.cpp index 03eed8604d..5c020fbf9c 100644 --- a/src/swrenderer/plane/r_fogboundary.cpp +++ b/src/swrenderer/plane/r_fogboundary.cpp @@ -18,7 +18,6 @@ #include "g_level.h" #include "swrenderer/scene/r_bsp.h" #include "swrenderer/plane/r_fogboundary.h" -#include "swrenderer/scene/r_segs.h" #include "swrenderer/scene/r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index e048751cc8..fa4dcd3e84 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -18,7 +18,6 @@ #include "g_level.h" #include "swrenderer/scene/r_bsp.h" #include "r_skyplane.h" -#include "swrenderer/scene/r_segs.h" #include "swrenderer/scene/r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 4eb085ecf2..0ba0298b7f 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -18,7 +18,6 @@ #include "g_level.h" #include "swrenderer/scene/r_bsp.h" #include "r_slopeplane.h" -#include "swrenderer/scene/r_segs.h" #include "swrenderer/scene/r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 42e8e98dc8..48f9ea2324 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -42,7 +42,6 @@ #include "segments/r_drawsegment.h" #include "segments/r_portalsegment.h" #include "segments/r_clipsegment.h" -#include "scene/r_segs.h" #include "scene/r_3dfloors.h" #include "scene/r_portal.h" #include "r_sky.h" diff --git a/src/swrenderer/r_main.h b/src/swrenderer/r_main.h index 4f6b00a1b5..a6e198482d 100644 --- a/src/swrenderer/r_main.h +++ b/src/swrenderer/r_main.h @@ -136,6 +136,8 @@ void R_MultiresInit (void); extern void R_CopyStackedViewParameters(); +extern double globaluclip, globaldclip; + } #endif // __R_MAIN_H__ diff --git a/src/swrenderer/scene/r_3dfloors.cpp b/src/swrenderer/scene/r_3dfloors.cpp index 5b487d8d8f..9a5cdadcbb 100644 --- a/src/swrenderer/scene/r_3dfloors.cpp +++ b/src/swrenderer/scene/r_3dfloors.cpp @@ -11,7 +11,6 @@ #include "c_dispatch.h" #include "swrenderer/r_main.h" #include "r_bsp.h" -#include "r_segs.h" #include "c_cvars.h" #include "r_3dfloors.h" diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 6c0c1be1cc..1d0fcfd50e 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -49,7 +49,6 @@ #include "doomstat.h" #include "r_state.h" #include "r_bsp.h" -#include "r_segs.h" #include "v_palette.h" #include "r_sky.h" #include "po_man.h" @@ -62,11 +61,6 @@ namespace swrenderer { using namespace drawerargs; -sector_t* frontsector; -sector_t* backsector; - -// killough 4/7/98: indicates doors closed wrt automap bugfix: -int doorclosed; bool r_fakingunderwater; @@ -75,8 +69,16 @@ static BYTE FakeSide; int WindowLeft, WindowRight; WORD MirrorFlags; +visplane_t *floorplane; +visplane_t *ceilingplane; + +// Clip values are the solid pixel bounding the range. +// floorclip starts out SCREENHEIGHT and is just outside the range +// ceilingclip starts out 0 and is just inside the range +// +short floorclip[MAXWIDTH]; +short ceilingclip[MAXWIDTH]; -subsector_t *InSubsector; // // killough 3/7/98: Hack floor/ceiling heights for deep water etc. @@ -290,265 +292,6 @@ sector_t *R_FakeFlat(sector_t *sec, sector_t *tempsec, } -bool R_SkyboxCompare(sector_t *frontsector, sector_t *backsector) -{ - FSectorPortal *frontc = frontsector->GetPortal(sector_t::ceiling); - FSectorPortal *frontf = frontsector->GetPortal(sector_t::floor); - FSectorPortal *backc = backsector->GetPortal(sector_t::ceiling); - FSectorPortal *backf = backsector->GetPortal(sector_t::floor); - - // return true if any of the planes has a linedef-based portal (unless both sides have the same one. - // Ideally this should also check thing based portals but the omission of this check had been abused to hell and back for those. - // (Note: This may require a compatibility option if some maps ran into this for line based portals as well.) - if (!frontc->MergeAllowed()) return (frontc != backc); - if (!frontf->MergeAllowed()) return (frontf != backf); - if (!backc->MergeAllowed()) return true; - if (!backf->MergeAllowed()) return true; - return false; -} - -// -// R_AddLine -// Clips the given segment -// and adds any visible pieces to the line list. -// - -void R_AddLine (seg_t *line) -{ - static sector_t tempsec; // killough 3/8/98: ceiling/water hack - bool solid; - DVector2 pt1, pt2; - - curline = line; - - // [RH] Color if not texturing line - dc_color = (((int)(line - segs) * 8) + 4) & 255; - - pt1 = line->v1->fPos() - ViewPos; - pt2 = line->v2->fPos() - ViewPos; - - // Reject lines not facing viewer - if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) - return; - - if (WallC.Init(pt1, pt2, 32.0 / (1 << 12))) - return; - - if (WallC.sx1 >= WindowRight || WallC.sx2 <= WindowLeft) - return; - - if (line->linedef == NULL) - { - if (R_CheckClipWallSegment (WallC.sx1, WallC.sx2)) - { - InSubsector->flags |= SSECF_DRAWN; - } - return; - } - - // reject lines that aren't seen from the portal (if any) - // [ZZ] 10.01.2016: lines inside a skybox shouldn't be clipped, although this imposes some limitations on portals in skyboxes. - if (!CurrentPortalInSkybox && CurrentPortal && P_ClipLineToPortal(line->linedef, CurrentPortal->dst, ViewPos)) - return; - - vertex_t *v1, *v2; - v1 = line->linedef->v1; - v2 = line->linedef->v2; - - if ((v1 == line->v1 && v2 == line->v2) || (v2 == line->v1 && v1 == line->v2)) - { // The seg is the entire wall. - WallT.InitFromWallCoords(&WallC); - } - else - { // The seg is only part of the wall. - if (line->linedef->sidedef[0] != line->sidedef) - { - swapvalues (v1, v2); - } - WallT.InitFromLine(v1->fPos() - ViewPos, v2->fPos() - ViewPos); - } - - if (!(fake3D & FAKE3D_FAKEBACK)) - { - backsector = line->backsector; - } - rw_frontcz1 = frontsector->ceilingplane.ZatPoint(line->v1); - rw_frontfz1 = frontsector->floorplane.ZatPoint(line->v1); - rw_frontcz2 = frontsector->ceilingplane.ZatPoint(line->v2); - rw_frontfz2 = frontsector->floorplane.ZatPoint(line->v2); - - rw_mustmarkfloor = rw_mustmarkceiling = false; - rw_havehigh = rw_havelow = false; - - // Single sided line? - if (backsector == NULL) - { - solid = true; - } - else - { - // kg3D - its fake, no transfer_heights - if (!(fake3D & FAKE3D_FAKEBACK)) - { // killough 3/8/98, 4/4/98: hack for invisible ceilings / deep water - backsector = R_FakeFlat (backsector, &tempsec, NULL, NULL, true); - } - doorclosed = 0; // killough 4/16/98 - - rw_backcz1 = backsector->ceilingplane.ZatPoint(line->v1); - rw_backfz1 = backsector->floorplane.ZatPoint(line->v1); - rw_backcz2 = backsector->ceilingplane.ZatPoint(line->v2); - rw_backfz2 = backsector->floorplane.ZatPoint(line->v2); - - if (fake3D & FAKE3D_FAKEBACK) - { - if (rw_frontfz1 >= rw_backfz1 && rw_frontfz2 >= rw_backfz2) - { - fake3D |= FAKE3D_CLIPBOTFRONT; - } - if (rw_frontcz1 <= rw_backcz1 && rw_frontcz2 <= rw_backcz2) - { - fake3D |= FAKE3D_CLIPTOPFRONT; - } - } - - // Cannot make these walls solid, because it can result in - // sprite clipping problems for sprites near the wall - if (rw_frontcz1 > rw_backcz1 || rw_frontcz2 > rw_backcz2) - { - rw_havehigh = true; - R_CreateWallSegmentYSloped (wallupper, backsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); - } - if (rw_frontfz1 < rw_backfz1 || rw_frontfz2 < rw_backfz2) - { - rw_havelow = true; - R_CreateWallSegmentYSloped (walllower, backsector->floorplane, &WallC, curline, MirrorFlags & RF_XFLIP); - } - - // Portal - if (line->linedef->isVisualPortal() && line->sidedef == line->linedef->sidedef[0]) - { - solid = true; - } - // Closed door. - else if ((rw_backcz1 <= rw_frontfz1 && rw_backcz2 <= rw_frontfz2) || - (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) - { - solid = true; - } - else if ( - // properly render skies (consider door "open" if both ceilings are sky): - (backsector->GetTexture(sector_t::ceiling) != skyflatnum || frontsector->GetTexture(sector_t::ceiling) != skyflatnum) - - // if door is closed because back is shut: - && rw_backcz1 <= rw_backfz1 && rw_backcz2 <= rw_backfz2 - - // preserve a kind of transparent door/lift special effect: - && ((rw_backcz1 >= rw_frontcz1 && rw_backcz2 >= rw_frontcz2) || line->sidedef->GetTexture(side_t::top).isValid()) - && ((rw_backfz1 <= rw_frontfz1 && rw_backfz2 <= rw_frontfz2) || line->sidedef->GetTexture(side_t::bottom).isValid())) - { - // killough 1/18/98 -- This function is used to fix the automap bug which - // showed lines behind closed doors simply because the door had a dropoff. - // - // It assumes that Doom has already ruled out a door being closed because - // of front-back closure (e.g. front floor is taller than back ceiling). - - // This fixes the automap floor height bug -- killough 1/18/98: - // killough 4/7/98: optimize: save result in doorclosed for use in r_segs.c - doorclosed = true; - solid = true; - } - else if (frontsector->ceilingplane != backsector->ceilingplane || - frontsector->floorplane != backsector->floorplane) - { - // Window. - solid = false; - } - else if (R_SkyboxCompare(frontsector, backsector)) - { - solid = false; - } - else if (backsector->lightlevel != frontsector->lightlevel - || backsector->GetTexture(sector_t::floor) != frontsector->GetTexture(sector_t::floor) - || backsector->GetTexture(sector_t::ceiling) != frontsector->GetTexture(sector_t::ceiling) - || curline->sidedef->GetTexture(side_t::mid).isValid() - - // killough 3/7/98: Take flats offsets into account: - || backsector->planes[sector_t::floor].xform != frontsector->planes[sector_t::floor].xform - || backsector->planes[sector_t::ceiling].xform != frontsector->planes[sector_t::ceiling].xform - - || backsector->GetPlaneLight(sector_t::floor) != frontsector->GetPlaneLight(sector_t::floor) - || backsector->GetPlaneLight(sector_t::ceiling) != frontsector->GetPlaneLight(sector_t::ceiling) - || backsector->GetVisFlags(sector_t::floor) != frontsector->GetVisFlags(sector_t::floor) - || backsector->GetVisFlags(sector_t::ceiling) != frontsector->GetVisFlags(sector_t::ceiling) - - // [RH] Also consider colormaps - || backsector->ColorMap != frontsector->ColorMap - - - - // kg3D - and fake lights - || (frontsector->e && frontsector->e->XFloor.lightlist.Size()) - || (backsector->e && backsector->e->XFloor.lightlist.Size()) - ) - { - solid = false; - } - else - { - // Reject empty lines used for triggers and special events. - // Identical floor and ceiling on both sides, identical light levels - // on both sides, and no middle texture. - - // When using GL nodes, do a clipping test for these lines so we can - // mark their subsectors as visible for automap texturing. - if (hasglnodes && !(InSubsector->flags & SSECF_DRAWN)) - { - if (R_CheckClipWallSegment(WallC.sx1, WallC.sx2)) - { - InSubsector->flags |= SSECF_DRAWN; - } - } - return; - } - } - - rw_prepped = false; - - if (line->linedef->special == Line_Horizon) - { - // Be aware: Line_Horizon does not work properly with sloped planes - fillshort (walltop+WallC.sx1, WallC.sx2 - WallC.sx1, centery); - fillshort (wallbottom+WallC.sx1, WallC.sx2 - WallC.sx1, centery); - } - else - { - rw_ceilstat = R_CreateWallSegmentYSloped (walltop, frontsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); - rw_floorstat = R_CreateWallSegmentYSloped (wallbottom, frontsector->floorplane, &WallC, curline, MirrorFlags & RF_XFLIP); - - // [RH] treat off-screen walls as solid -#if 0 // Maybe later... - if (!solid) - { - if (rw_ceilstat == 12 && line->sidedef->GetTexture(side_t::top) != 0) - { - rw_mustmarkceiling = true; - solid = true; - } - if (rw_floorstat == 3 && line->sidedef->GetTexture(side_t::bottom) != 0) - { - rw_mustmarkfloor = true; - solid = true; - } - } -#endif - } - - if (R_ClipWallSegment(WallC.sx1, WallC.sx2, solid, R_StoreWallRange)) - { - InSubsector->flags |= SSECF_DRAWN; - } -} - // Checks BSP node/subtree bounding box. // Returns true if some part of the bbox might be visible. diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index 7530718b84..ef5d08db19 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -32,6 +32,8 @@ EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs? namespace swrenderer { +struct visplane_t; + // The 3072 below is just an arbitrary value picked to avoid // drawing lines the player is too close to that would overflow // the texture calculations. @@ -44,11 +46,6 @@ enum FAKED_AboveCeiling }; -extern subsector_t *InSubsector; - -extern sector_t* frontsector; -extern sector_t* backsector; - extern int WindowLeft, WindowRight; extern WORD MirrorFlags; @@ -57,6 +54,11 @@ void R_RenderBSPNode (void *node); // killough 4/13/98: fake floors/ceilings for deep water / fake ceilings: sector_t *R_FakeFlat(sector_t *, sector_t *, int *, int *, bool); +extern visplane_t *floorplane; +extern visplane_t *ceilingplane; +extern short floorclip[MAXWIDTH]; +extern short ceilingclip[MAXWIDTH]; + } #endif diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 26dbb2e1cf..03741b14a5 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -33,7 +33,6 @@ #include "r_utility.h" #include "r_things.h" #include "r_3dfloors.h" -#include "r_segs.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" diff --git a/src/swrenderer/scene/r_segs.cpp b/src/swrenderer/scene/r_segs.cpp deleted file mode 100644 index 4d85a736d4..0000000000 --- a/src/swrenderer/scene/r_segs.cpp +++ /dev/null @@ -1,1796 +0,0 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// DESCRIPTION: -// All the clipping: columns, horizontal spans, sky columns. -// -//----------------------------------------------------------------------------- - -#include -#include - -#include "templates.h" -#include "i_system.h" - -#include "doomdef.h" -#include "doomstat.h" -#include "doomdata.h" -#include "p_lnspec.h" - -#include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" -#include "r_sky.h" -#include "v_video.h" - -#include "m_swap.h" -#include "w_wad.h" -#include "stats.h" -#include "a_sharedglobal.h" -#include "d_net.h" -#include "g_level.h" -#include "r_bsp.h" -#include "swrenderer/plane/r_fogboundary.h" -#include "r_segs.h" -#include "r_3dfloors.h" -#include "swrenderer/drawers/r_draw.h" -#include "v_palette.h" -#include "r_data/colormaps.h" -#include "swrenderer/line/r_line.h" -#include "swrenderer/line/r_wallsetup.h" -#include "swrenderer/line/r_walldraw.h" -#include "swrenderer/segments/r_drawsegment.h" -#include "r_portal.h" -#include "swrenderer/things/r_wallsprite.h" -#include "swrenderer/things/r_decal.h" -#include "swrenderer/r_memory.h" - -#define WALLYREPEAT 8 - - -CVAR(Bool, r_fogboundary, true, 0) -CVAR(Bool, r_drawmirrors, true, 0) -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); -EXTERN_CVAR(Bool, r_mipmap) - -namespace swrenderer -{ - using namespace drawerargs; - - namespace - { - int wallshade; - } - -#define HEIGHTBITS 12 -#define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) - -extern double globaluclip, globaldclip; - - -// OPTIMIZE: closed two sided lines as single sided - -// killough 1/6/98: replaced globals with statics where appropriate - -static bool segtextured; // True if any of the segs textures might be visible. -bool markfloor; // False if the back side is the same plane. -bool markceiling; -FTexture *toptexture; -FTexture *bottomtexture; -FTexture *midtexture; - - - -// -// regular wall -// - -static fixed_t *maskedtexturecol; - -visplane_t *floorplane; -visplane_t *ceilingplane; - -// Clip values are the solid pixel bounding the range. -// floorclip starts out SCREENHEIGHT and is just outside the range -// ceilingclip starts out 0 and is just inside the range -// -short floorclip[MAXWIDTH]; -short ceilingclip[MAXWIDTH]; - - -inline bool IsFogBoundary (sector_t *front, sector_t *back) -{ - return r_fogboundary && fixedcolormap == NULL && front->ColorMap->Fade && - front->ColorMap->Fade != back->ColorMap->Fade && - (front->GetTexture(sector_t::ceiling) != skyflatnum || back->GetTexture(sector_t::ceiling) != skyflatnum); -} - -// Clip a midtexture to the floor and ceiling of the sector in front of it. -void ClipMidtex(int x1, int x2) -{ - short most[MAXWIDTH]; - - R_CreateWallSegmentYSloped(most, curline->frontsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); - for (int i = x1; i < x2; ++i) - { - if (wallupper[i] < most[i]) - wallupper[i] = most[i]; - } - R_CreateWallSegmentYSloped(most, curline->frontsector->floorplane, &WallC, curline, MirrorFlags & RF_XFLIP); - for (int i = x1; i < x2; ++i) - { - if (walllower[i] > most[i]) - walllower[i] = most[i]; - } -} - -void R_RenderFakeWallRange(drawseg_t *ds, int x1, int x2); - -void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) -{ - FTexture *tex; - int i; - sector_t tempsec; // killough 4/13/98 - double texheight, texheightscale; - bool notrelevant = false; - double rowoffset; - bool wrap = false; - - const sector_t *sec; - - sprflipvert = false; - - curline = ds->curline; - - bool visible = R_SetPatchStyle (LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], - (float)MIN(curline->linedef->alpha, 1.), 0, 0); - - if (!visible && !ds->bFogBoundary && !ds->bFakeBoundary) - { - return; - } - - NetUpdate (); - - frontsector = curline->frontsector; - backsector = curline->backsector; - - tex = TexMan(curline->sidedef->GetTexture(side_t::mid), true); - if (i_compatflags & COMPATF_MASKEDMIDTEX) - { - tex = tex->GetRawTexture(); - } - - // killough 4/13/98: get correct lightlevel for 2s normal textures - sec = R_FakeFlat (frontsector, &tempsec, NULL, NULL, false); - - basecolormap = sec->ColorMap; // [RH] Set basecolormap - - wallshade = ds->shade; - rw_lightstep = ds->lightstep; - rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; - - if (fixedlightlev < 0) - { - if (!(fake3D & FAKE3D_CLIPTOP)) - { - sclipTop = sec->ceilingplane.ZatPoint(ViewPos); - } - for (i = frontsector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) - { - if (sclipTop <= frontsector->e->XFloor.lightlist[i].plane.Zat0()) - { - lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); - break; - } - } - } - - mfloorclip = openings + ds->sprbottomclip - ds->x1; - mceilingclip = openings + ds->sprtopclip - ds->x1; - - float *MaskedSWall, MaskedScaleY, rw_scalestep; - - // [RH] Draw fog partition - if (ds->bFogBoundary) - { - R_DrawFogBoundary (x1, x2, mceilingclip, mfloorclip, wallshade); - if (ds->maskedtexturecol == -1) - { - goto clearfog; - } - } - if ((ds->bFakeBoundary && !(ds->bFakeBoundary & 4)) || !visible) - { - goto clearfog; - } - - MaskedSWall = (float *)(openings + ds->swall) - ds->x1; - MaskedScaleY = ds->yscale; - maskedtexturecol = (fixed_t *)(openings + ds->maskedtexturecol) - ds->x1; - spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); - rw_scalestep = ds->iscalestep; - - if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); - - // find positioning - texheight = tex->GetScaledHeightDouble(); - texheightscale = fabs(curline->sidedef->GetTextureYScale(side_t::mid)); - if (texheightscale != 1) - { - texheight = texheight / texheightscale; - } - if (curline->linedef->flags & ML_DONTPEGBOTTOM) - { - dc_texturemid = MAX(frontsector->GetPlaneTexZ(sector_t::floor), backsector->GetPlaneTexZ(sector_t::floor)) + texheight; - } - else - { - dc_texturemid = MIN(frontsector->GetPlaneTexZ(sector_t::ceiling), backsector->GetPlaneTexZ(sector_t::ceiling)); - } - - rowoffset = curline->sidedef->GetTextureYOffset(side_t::mid); - - wrap = (curline->linedef->flags & ML_WRAP_MIDTEX) || (curline->sidedef->Flags & WALLF_WRAP_MIDTEX); - if (!wrap) - { // Texture does not wrap vertically. - double textop; - - if (MaskedScaleY < 0) - { - MaskedScaleY = -MaskedScaleY; - sprflipvert = true; - } - if (tex->bWorldPanning) - { - // rowoffset is added before the multiply so that the masked texture will - // still be positioned in world units rather than texels. - dc_texturemid += rowoffset - ViewPos.Z; - textop = dc_texturemid; - dc_texturemid *= MaskedScaleY; - } - else - { - // rowoffset is added outside the multiply so that it positions the texture - // by texels instead of world units. - textop = dc_texturemid + rowoffset / MaskedScaleY - ViewPos.Z; - dc_texturemid = (dc_texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; - } - if (sprflipvert) - { - MaskedScaleY = -MaskedScaleY; - dc_texturemid -= tex->GetHeight() << FRACBITS; - } - - // [RH] Don't bother drawing segs that are completely offscreen - if (globaldclip * ds->sz1 < -textop && globaldclip * ds->sz2 < -textop) - { // Texture top is below the bottom of the screen - goto clearfog; - } - - if (globaluclip * ds->sz1 > texheight - textop && globaluclip * ds->sz2 > texheight - textop) - { // Texture bottom is above the top of the screen - goto clearfog; - } - - if ((fake3D & FAKE3D_CLIPBOTTOM) && textop < sclipBottom - ViewPos.Z) - { - notrelevant = true; - goto clearfog; - } - if ((fake3D & FAKE3D_CLIPTOP) && textop - texheight > sclipTop - ViewPos.Z) - { - notrelevant = true; - goto clearfog; - } - - WallC.sz1 = ds->sz1; - WallC.sz2 = ds->sz2; - WallC.sx1 = ds->sx1; - WallC.sx2 = ds->sx2; - - if (fake3D & FAKE3D_CLIPTOP) - { - R_CreateWallSegmentY(wallupper, textop < sclipTop - ViewPos.Z ? textop : sclipTop - ViewPos.Z, &WallC); - } - else - { - R_CreateWallSegmentY(wallupper, textop, &WallC); - } - if (fake3D & FAKE3D_CLIPBOTTOM) - { - R_CreateWallSegmentY(walllower, textop - texheight > sclipBottom - ViewPos.Z ? textop - texheight : sclipBottom - ViewPos.Z, &WallC); - } - else - { - R_CreateWallSegmentY(walllower, textop - texheight, &WallC); - } - - for (i = x1; i < x2; i++) - { - if (wallupper[i] < mceilingclip[i]) - wallupper[i] = mceilingclip[i]; - } - for (i = x1; i < x2; i++) - { - if (walllower[i] > mfloorclip[i]) - walllower[i] = mfloorclip[i]; - } - - if (CurrentSkybox) - { // Midtex clipping doesn't work properly with skyboxes, since you're normally below the floor - // or above the ceiling, so the appropriate end won't be clipped automatically when adding - // this drawseg. - if ((curline->linedef->flags & ML_CLIP_MIDTEX) || - (curline->sidedef->Flags & WALLF_CLIP_MIDTEX)) - { - ClipMidtex(x1, x2); - } - } - - mfloorclip = walllower; - mceilingclip = wallupper; - - // draw the columns one at a time - if (visible) - { - for (dc_x = x1; dc_x < x2; ++dc_x) - { - if (fixedcolormap == nullptr && fixedlightlev < 0) - { - R_SetColorMapLight(basecolormap, rw_light, wallshade); - } - - dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); - if (sprflipvert) - sprtopscreen = CenterY + dc_texturemid * spryscale; - else - sprtopscreen = CenterY - dc_texturemid * spryscale; - - R_DrawMaskedColumn(tex, maskedtexturecol[dc_x]); - - rw_light += rw_lightstep; - spryscale += rw_scalestep; - } - } - } - else - { // Texture does wrap vertically. - if (tex->bWorldPanning) - { - // rowoffset is added before the multiply so that the masked texture will - // still be positioned in world units rather than texels. - dc_texturemid = (dc_texturemid - ViewPos.Z + rowoffset) * MaskedScaleY; - } - else - { - // rowoffset is added outside the multiply so that it positions the texture - // by texels instead of world units. - dc_texturemid = (dc_texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; - } - - WallC.sz1 = ds->sz1; - WallC.sz2 = ds->sz2; - WallC.sx1 = ds->sx1; - WallC.sx2 = ds->sx2; - - if (CurrentSkybox) - { // Midtex clipping doesn't work properly with skyboxes, since you're normally below the floor - // or above the ceiling, so the appropriate end won't be clipped automatically when adding - // this drawseg. - if ((curline->linedef->flags & ML_CLIP_MIDTEX) || - (curline->sidedef->Flags & WALLF_CLIP_MIDTEX)) - { - ClipMidtex(x1, x2); - } - } - - if (fake3D & FAKE3D_CLIPTOP) - { - R_CreateWallSegmentY(wallupper, sclipTop - ViewPos.Z, &WallC); - for (i = x1; i < x2; i++) - { - if (wallupper[i] < mceilingclip[i]) - wallupper[i] = mceilingclip[i]; - } - mceilingclip = wallupper; - } - if (fake3D & FAKE3D_CLIPBOTTOM) - { - R_CreateWallSegmentY(walllower, sclipBottom - ViewPos.Z, &WallC); - for (i = x1; i < x2; i++) - { - if (walllower[i] > mfloorclip[i]) - walllower[i] = mfloorclip[i]; - } - mfloorclip = walllower; - } - - rw_offset = 0; - rw_pic = tex; - R_DrawDrawSeg(ds, x1, x2, mceilingclip, mfloorclip, MaskedSWall, maskedtexturecol, ds->yscale, wallshade); - } - -clearfog: - R_FinishSetPatchStyle (); - if (ds->bFakeBoundary & 3) - { - R_RenderFakeWallRange(ds, x1, x2); - } - if (!notrelevant) - { - if (fake3D & FAKE3D_REFRESHCLIP) - { - if (!wrap) - { - assert(ds->bkup >= 0); - memcpy(openings + ds->sprtopclip, openings + ds->bkup, (ds->x2 - ds->x1) * 2); - } - } - else - { - fillshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); - } - } - return; -} - -// kg3D - render one fake wall -void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) -{ - int i; - double xscale; - double yscale; - - fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); - bool visible = R_SetPatchStyle (LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], - Alpha, 0, 0); - - if(!visible) { - R_FinishSetPatchStyle(); - return; - } - - rw_lightstep = ds->lightstep; - rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; - - mfloorclip = openings + ds->sprbottomclip - ds->x1; - mceilingclip = openings + ds->sprtopclip - ds->x1; - - spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); - float *MaskedSWall = (float *)(openings + ds->swall) - ds->x1; - - // find positioning - side_t *scaledside; - side_t::ETexpart scaledpart; - if (rover->flags & FF_UPPERTEXTURE) - { - scaledside = curline->sidedef; - scaledpart = side_t::top; - } - else if (rover->flags & FF_LOWERTEXTURE) - { - scaledside = curline->sidedef; - scaledpart = side_t::bottom; - } - else - { - scaledside = rover->master->sidedef[0]; - scaledpart = side_t::mid; - } - xscale = rw_pic->Scale.X * scaledside->GetTextureXScale(scaledpart); - yscale = rw_pic->Scale.Y * scaledside->GetTextureYScale(scaledpart); - - double rowoffset = curline->sidedef->GetTextureYOffset(side_t::mid) + rover->master->sidedef[0]->GetTextureYOffset(side_t::mid); - double planez = rover->model->GetPlaneTexZ(sector_t::ceiling); - rw_offset = FLOAT2FIXED(curline->sidedef->GetTextureXOffset(side_t::mid) + rover->master->sidedef[0]->GetTextureXOffset(side_t::mid)); - if (rowoffset < 0) - { - rowoffset += rw_pic->GetHeight(); - } - dc_texturemid = (planez - ViewPos.Z) * yscale; - if (rw_pic->bWorldPanning) - { - // rowoffset is added before the multiply so that the masked texture will - // still be positioned in world units rather than texels. - - dc_texturemid = dc_texturemid + rowoffset * yscale; - rw_offset = xs_RoundToInt(rw_offset * xscale); - } - else - { - // rowoffset is added outside the multiply so that it positions the texture - // by texels instead of world units. - dc_texturemid += rowoffset; - } - - if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); - - WallC.sz1 = ds->sz1; - WallC.sz2 = ds->sz2; - WallC.sx1 = ds->sx1; - WallC.sx2 = ds->sx2; - WallC.tleft.X = ds->cx; - WallC.tleft.Y = ds->cy; - WallC.tright.X = ds->cx + ds->cdx; - WallC.tright.Y = ds->cy + ds->cdy; - WallT = ds->tmapvals; - - R_CreateWallSegmentY(wallupper, sclipTop - ViewPos.Z, &WallC); - R_CreateWallSegmentY(walllower, sclipBottom - ViewPos.Z, &WallC); - - for (i = x1; i < x2; i++) - { - if (wallupper[i] < mceilingclip[i]) - wallupper[i] = mceilingclip[i]; - } - for (i = x1; i < x2; i++) - { - if (walllower[i] > mfloorclip[i]) - walllower[i] = mfloorclip[i]; - } - - PrepLWall (lwall, curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2); - R_DrawDrawSeg(ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale, wallshade); - R_FinishSetPatchStyle(); -} - -// kg3D - walls of fake floors -void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) -{ - FTexture *const DONT_DRAW = ((FTexture*)(intptr_t)-1); - int i,j; - F3DFloor *rover, *fover = NULL; - int passed, last; - double floorHeight; - double ceilingHeight; - - sprflipvert = false; - curline = ds->curline; - - frontsector = curline->frontsector; - backsector = curline->backsector; - - if (backsector == NULL) - { - return; - } - if ((ds->bFakeBoundary & 3) == 2) - { - sector_t *sec = backsector; - backsector = frontsector; - frontsector = sec; - } - - floorHeight = backsector->CenterFloor(); - ceilingHeight = backsector->CenterCeiling(); - - // maybe fix clipheights - if (!(fake3D & FAKE3D_CLIPBOTTOM)) sclipBottom = floorHeight; - if (!(fake3D & FAKE3D_CLIPTOP)) sclipTop = ceilingHeight; - - // maybe not visible - if (sclipBottom >= frontsector->CenterCeiling()) return; - if (sclipTop <= frontsector->CenterFloor()) return; - - if (fake3D & FAKE3D_DOWN2UP) - { // bottom to viewz - last = 0; - for (i = backsector->e->XFloor.ffloors.Size() - 1; i >= 0; i--) - { - rover = backsector->e->XFloor.ffloors[i]; - if (!(rover->flags & FF_EXISTS)) continue; - - // visible? - passed = 0; - if (!(rover->flags & FF_RENDERSIDES) || rover->top.plane->isSlope() || rover->bottom.plane->isSlope() || - rover->top.plane->Zat0() <= sclipBottom || - rover->bottom.plane->Zat0() >= ceilingHeight || - rover->top.plane->Zat0() <= floorHeight) - { - if (!i) - { - passed = 1; - } - else - { - continue; - } - } - - rw_pic = NULL; - if (rover->bottom.plane->Zat0() >= sclipTop || passed) - { - if (last) - { - break; - } - // maybe wall from inside rendering? - fover = NULL; - for (j = frontsector->e->XFloor.ffloors.Size() - 1; j >= 0; j--) - { - fover = frontsector->e->XFloor.ffloors[j]; - if (fover->model == rover->model) - { // never - fover = NULL; - break; - } - if (!(fover->flags & FF_EXISTS)) continue; - if (!(fover->flags & FF_RENDERSIDES)) continue; - // no sloped walls, it's bugged - if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; - - // visible? - if (fover->top.plane->Zat0() <= sclipBottom) continue; // no - if (fover->bottom.plane->Zat0() >= sclipTop) - { // no, last possible - fover = NULL; - break; - } - // it is, render inside? - if (!(fover->flags & (FF_BOTHPLANES|FF_INVERTPLANES))) - { // no - fover = NULL; - } - break; - } - // nothing - if (!fover || j == -1) - { - break; - } - // correct texture - if (fover->flags & rover->flags & FF_SWIMMABLE) - { // don't ever draw (but treat as something has been found) - rw_pic = DONT_DRAW; - } - else if(fover->flags & FF_UPPERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); - } - else if(fover->flags & FF_LOWERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); - } - else - { - rw_pic = TexMan(fover->master->sidedef[0]->GetTexture(side_t::mid), true); - } - } - else if (frontsector->e->XFloor.ffloors.Size()) - { - // maybe not visible? - fover = NULL; - for (j = frontsector->e->XFloor.ffloors.Size() - 1; j >= 0; j--) - { - fover = frontsector->e->XFloor.ffloors[j]; - if (fover->model == rover->model) // never - { - break; - } - if (!(fover->flags & FF_EXISTS)) continue; - if (!(fover->flags & FF_RENDERSIDES)) continue; - // no sloped walls, it's bugged - if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; - - // visible? - if (fover->top.plane->Zat0() <= sclipBottom) continue; // no - if (fover->bottom.plane->Zat0() >= sclipTop) - { // visible, last possible - fover = NULL; - break; - } - if ((fover->flags & FF_SOLID) == (rover->flags & FF_SOLID) && - !(!(fover->flags & FF_SOLID) && (fover->alpha == 255 || rover->alpha == 255)) - ) - { - break; - } - if (fover->flags & rover->flags & FF_SWIMMABLE) - { // don't ever draw (but treat as something has been found) - rw_pic = DONT_DRAW; - } - fover = NULL; // visible - break; - } - if (fover && j != -1) - { - fover = NULL; - last = 1; - continue; // not visible - } - } - if (!rw_pic) - { - fover = NULL; - if (rover->flags & FF_UPPERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); - } - else if(rover->flags & FF_LOWERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); - } - else - { - rw_pic = TexMan(rover->master->sidedef[0]->GetTexture(side_t::mid), true); - } - } - // correct colors now - basecolormap = frontsector->ColorMap; - wallshade = ds->shade; - if (fixedlightlev < 0) - { - if ((ds->bFakeBoundary & 3) == 2) - { - for (j = backsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) - { - if (sclipTop <= backsector->e->XFloor.lightlist[j].plane.Zat0()) - { - lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); - break; - } - } - } - else - { - for (j = frontsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) - { - if (sclipTop <= frontsector->e->XFloor.lightlist[j].plane.Zat0()) - { - lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); - break; - } - } - } - } - if (rw_pic != DONT_DRAW) - { - R_RenderFakeWall(ds, x1, x2, fover ? fover : rover); - } - else rw_pic = NULL; - break; - } - } - else - { // top to viewz - for (i = 0; i < (int)backsector->e->XFloor.ffloors.Size(); i++) - { - rover = backsector->e->XFloor.ffloors[i]; - if (!(rover->flags & FF_EXISTS)) continue; - - // visible? - passed = 0; - if (!(rover->flags & FF_RENDERSIDES) || - rover->top.plane->isSlope() || rover->bottom.plane->isSlope() || - rover->bottom.plane->Zat0() >= sclipTop || - rover->top.plane->Zat0() <= floorHeight || - rover->bottom.plane->Zat0() >= ceilingHeight) - { - if ((unsigned)i == backsector->e->XFloor.ffloors.Size() - 1) - { - passed = 1; - } - else - { - continue; - } - } - rw_pic = NULL; - if (rover->top.plane->Zat0() <= sclipBottom || passed) - { // maybe wall from inside rendering? - fover = NULL; - for (j = 0; j < (int)frontsector->e->XFloor.ffloors.Size(); j++) - { - fover = frontsector->e->XFloor.ffloors[j]; - if (fover->model == rover->model) - { // never - fover = NULL; - break; - } - if (!(fover->flags & FF_EXISTS)) continue; - if (!(fover->flags & FF_RENDERSIDES)) continue; - // no sloped walls, it's bugged - if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; - - // visible? - if (fover->bottom.plane->Zat0() >= sclipTop) continue; // no - if (fover->top.plane->Zat0() <= sclipBottom) - { // no, last possible - fover = NULL; - break; - } - // it is, render inside? - if (!(fover->flags & (FF_BOTHPLANES|FF_INVERTPLANES))) - { // no - fover = NULL; - } - break; - } - // nothing - if (!fover || (unsigned)j == frontsector->e->XFloor.ffloors.Size()) - { - break; - } - // correct texture - if (fover->flags & rover->flags & FF_SWIMMABLE) - { - rw_pic = DONT_DRAW; // don't ever draw (but treat as something has been found) - } - else if (fover->flags & FF_UPPERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); - } - else if (fover->flags & FF_LOWERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); - } - else - { - rw_pic = TexMan(fover->master->sidedef[0]->GetTexture(side_t::mid), true); - } - } - else if (frontsector->e->XFloor.ffloors.Size()) - { // maybe not visible? - fover = NULL; - for (j = 0; j < (int)frontsector->e->XFloor.ffloors.Size(); j++) - { - fover = frontsector->e->XFloor.ffloors[j]; - if (fover->model == rover->model) - { // never - break; - } - if (!(fover->flags & FF_EXISTS)) continue; - if (!(fover->flags & FF_RENDERSIDES)) continue; - // no sloped walls, its bugged - if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; - - // visible? - if (fover->bottom.plane->Zat0() >= sclipTop) continue; // no - if (fover->top.plane->Zat0() <= sclipBottom) - { // visible, last possible - fover = NULL; - break; - } - if ((fover->flags & FF_SOLID) == (rover->flags & FF_SOLID) && - !(!(rover->flags & FF_SOLID) && (fover->alpha == 255 || rover->alpha == 255)) - ) - { - break; - } - if (fover->flags & rover->flags & FF_SWIMMABLE) - { // don't ever draw (but treat as something has been found) - rw_pic = DONT_DRAW; - } - fover = NULL; // visible - break; - } - if (fover && (unsigned)j != frontsector->e->XFloor.ffloors.Size()) - { // not visible - break; - } - } - if (rw_pic == NULL) - { - fover = NULL; - if (rover->flags & FF_UPPERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); - } - else if (rover->flags & FF_LOWERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); - } - else - { - rw_pic = TexMan(rover->master->sidedef[0]->GetTexture(side_t::mid), true); - } - } - // correct colors now - basecolormap = frontsector->ColorMap; - wallshade = ds->shade; - if (fixedlightlev < 0) - { - if ((ds->bFakeBoundary & 3) == 2) - { - for (j = backsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) - { - if (sclipTop <= backsector->e->XFloor.lightlist[j].plane.Zat0()) - { - lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); - break; - } - } - } - else - { - for (j = frontsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) - { - if(sclipTop <= frontsector->e->XFloor.lightlist[j].plane.Zat0()) - { - lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); - break; - } - } - } - } - - if (rw_pic != DONT_DRAW) - { - R_RenderFakeWall(ds, x1, x2, fover ? fover : rover); - } - else - { - rw_pic = NULL; - } - break; - } - } - return; -} - -// -// R_RenderSegLoop -// Draws zero, one, or two textures for walls. -// Can draw or mark the starting pixel of floor and ceiling textures. -// CALLED: CORE LOOPING ROUTINE. -// - -void R_RenderSegLoop(int x1, int x2) -{ - int x; - double xscale; - double yscale; - fixed_t xoffset = rw_offset; - - if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); - - // clip wall to the floor and ceiling - for (x = x1; x < x2; ++x) - { - if (walltop[x] < ceilingclip[x]) - { - walltop[x] = ceilingclip[x]; - } - if (wallbottom[x] > floorclip[x]) - { - wallbottom[x] = floorclip[x]; - } - } - - // mark ceiling areas - if (markceiling) - { - for (x = x1; x < x2; ++x) - { - short top = (fakeFloor && fake3D & 2) ? fakeFloor->ceilingclip[x] : ceilingclip[x]; - short bottom = MIN (walltop[x], floorclip[x]); - if (top < bottom) - { - ceilingplane->top[x] = top; - ceilingplane->bottom[x] = bottom; - } - } - } - - // mark floor areas - if (markfloor) - { - for (x = x1; x < x2; ++x) - { - short top = MAX (wallbottom[x], ceilingclip[x]); - short bottom = (fakeFloor && fake3D & 1) ? fakeFloor->floorclip[x] : floorclip[x]; - if (top < bottom) - { - assert (bottom <= viewheight); - floorplane->top[x] = top; - floorplane->bottom[x] = bottom; - } - } - } - - // kg3D - fake planes clipping - if (fake3D & FAKE3D_REFRESHCLIP) - { - if (fake3D & FAKE3D_CLIPBOTFRONT) - { - memcpy (fakeFloor->floorclip+x1, wallbottom+x1, (x2-x1)*sizeof(short)); - } - else - { - for (x = x1; x < x2; ++x) - { - walllower[x] = MIN (MAX (walllower[x], ceilingclip[x]), wallbottom[x]); - } - memcpy (fakeFloor->floorclip+x1, walllower+x1, (x2-x1)*sizeof(short)); - } - if (fake3D & FAKE3D_CLIPTOPFRONT) - { - memcpy (fakeFloor->ceilingclip+x1, walltop+x1, (x2-x1)*sizeof(short)); - } - else - { - for (x = x1; x < x2; ++x) - { - wallupper[x] = MAX (MIN (wallupper[x], floorclip[x]), walltop[x]); - } - memcpy (fakeFloor->ceilingclip+x1, wallupper+x1, (x2-x1)*sizeof(short)); - } - } - if(fake3D & 7) return; - - FLightNode *light_list = (curline && curline->sidedef) ? curline->sidedef->lighthead : nullptr; - - // draw the wall tiers - if (midtexture) - { // one sided line - if (midtexture->UseType != FTexture::TEX_Null && viewactive) - { - dc_texturemid = rw_midtexturemid; - rw_pic = midtexture; - xscale = rw_pic->Scale.X * rw_midtexturescalex; - yscale = rw_pic->Scale.Y * rw_midtexturescaley; - if (xscale != lwallscale) - { - PrepLWall (lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2); - lwallscale = xscale; - } - if (midtexture->bWorldPanning) - { - rw_offset = xs_RoundToInt(rw_offset_mid * xscale); - } - else - { - rw_offset = rw_offset_mid; - } - if (xscale < 0) - { - rw_offset = -rw_offset; - } - R_DrawWallSegment(rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, light_list); - } - fillshort (ceilingclip+x1, x2-x1, viewheight); - fillshort (floorclip+x1, x2-x1, 0xffff); - } - else - { // two sided line - if (toptexture != NULL && toptexture->UseType != FTexture::TEX_Null) - { // top wall - for (x = x1; x < x2; ++x) - { - wallupper[x] = MAX (MIN (wallupper[x], floorclip[x]), walltop[x]); - } - if (viewactive) - { - dc_texturemid = rw_toptexturemid; - rw_pic = toptexture; - xscale = rw_pic->Scale.X * rw_toptexturescalex; - yscale = rw_pic->Scale.Y * rw_toptexturescaley; - if (xscale != lwallscale) - { - PrepLWall (lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2); - lwallscale = xscale; - } - if (toptexture->bWorldPanning) - { - rw_offset = xs_RoundToInt(rw_offset_top * xscale); - } - else - { - rw_offset = rw_offset_top; - } - if (xscale < 0) - { - rw_offset = -rw_offset; - } - R_DrawWallSegment(rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, light_list); - } - memcpy (ceilingclip+x1, wallupper+x1, (x2-x1)*sizeof(short)); - } - else if (markceiling) - { // no top wall - memcpy (ceilingclip+x1, walltop+x1, (x2-x1)*sizeof(short)); - } - - - if (bottomtexture != NULL && bottomtexture->UseType != FTexture::TEX_Null) - { // bottom wall - for (x = x1; x < x2; ++x) - { - walllower[x] = MIN (MAX (walllower[x], ceilingclip[x]), wallbottom[x]); - } - if (viewactive) - { - dc_texturemid = rw_bottomtexturemid; - rw_pic = bottomtexture; - xscale = rw_pic->Scale.X * rw_bottomtexturescalex; - yscale = rw_pic->Scale.Y * rw_bottomtexturescaley; - if (xscale != lwallscale) - { - PrepLWall (lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2); - lwallscale = xscale; - } - if (bottomtexture->bWorldPanning) - { - rw_offset = xs_RoundToInt(rw_offset_bottom * xscale); - } - else - { - rw_offset = rw_offset_bottom; - } - if (xscale < 0) - { - rw_offset = -rw_offset; - } - R_DrawWallSegment(rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, light_list); - } - memcpy (floorclip+x1, walllower+x1, (x2-x1)*sizeof(short)); - } - else if (markfloor) - { // no bottom wall - memcpy (floorclip+x1, wallbottom+x1, (x2-x1)*sizeof(short)); - } - } - rw_offset = xoffset; -} - -void R_NewWall (bool needlights) -{ - double rowoffset; - double yrepeat; - - rw_markportal = false; - - sidedef = curline->sidedef; - linedef = curline->linedef; - - // mark the segment as visible for auto map - if (!r_dontmaplines) linedef->flags |= ML_MAPPED; - - midtexture = toptexture = bottomtexture = 0; - - if (sidedef == linedef->sidedef[0] && - (linedef->special == Line_Mirror && r_drawmirrors)) // [ZZ] compatibility with r_drawmirrors cvar that existed way before portals - { - markfloor = markceiling = true; // act like a one-sided wall here (todo: check how does this work with transparency) - rw_markportal = true; - } - else if (backsector == NULL) - { - // single sided line - // a single sided line is terminal, so it must mark ends - markfloor = markceiling = true; - // [RH] Horizon lines do not need to be textured - if (linedef->isVisualPortal()) - { - rw_markportal = true; - } - else if (linedef->special != Line_Horizon) - { - midtexture = TexMan(sidedef->GetTexture(side_t::mid), true); - rw_offset_mid = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); - rowoffset = sidedef->GetTextureYOffset(side_t::mid); - rw_midtexturescalex = sidedef->GetTextureXScale(side_t::mid); - rw_midtexturescaley = sidedef->GetTextureYScale(side_t::mid); - yrepeat = midtexture->Scale.Y * rw_midtexturescaley; - if (yrepeat >= 0) - { // normal orientation - if (linedef->flags & ML_DONTPEGBOTTOM) - { // bottom of texture at bottom - rw_midtexturemid = (frontsector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + midtexture->GetHeight(); - } - else - { // top of texture at top - rw_midtexturemid = (frontsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; - if (rowoffset < 0 && midtexture != NULL) - { - rowoffset += midtexture->GetHeight(); - } - } - } - else - { // upside down - rowoffset = -rowoffset; - if (linedef->flags & ML_DONTPEGBOTTOM) - { // top of texture at bottom - rw_midtexturemid = (frontsector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; - } - else - { // bottom of texture at top - rw_midtexturemid = (frontsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + midtexture->GetHeight(); - } - } - if (midtexture->bWorldPanning) - { - rw_midtexturemid += rowoffset * yrepeat; - } - else - { - // rowoffset is added outside the multiply so that it positions the texture - // by texels instead of world units. - rw_midtexturemid += rowoffset; - } - } - } - else - { // two-sided line - // hack to allow height changes in outdoor areas - - double rw_frontlowertop = frontsector->GetPlaneTexZ(sector_t::ceiling); - - if (frontsector->GetTexture(sector_t::ceiling) == skyflatnum && - backsector->GetTexture(sector_t::ceiling) == skyflatnum) - { - if (rw_havehigh) - { // front ceiling is above back ceiling - memcpy (&walltop[WallC.sx1], &wallupper[WallC.sx1], (WallC.sx2 - WallC.sx1)*sizeof(walltop[0])); - rw_havehigh = false; - } - else if (rw_havelow && frontsector->ceilingplane != backsector->ceilingplane) - { // back ceiling is above front ceiling - // The check for rw_havelow is not Doom-compliant, but it avoids HoM that - // would otherwise occur because there is space made available for this - // wall but nothing to draw for it. - // Recalculate walltop so that the wall is clipped by the back sector's - // ceiling instead of the front sector's ceiling. - R_CreateWallSegmentYSloped (walltop, backsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); - } - // Putting sky ceilings on the front and back of a line alters the way unpegged - // positioning works. - rw_frontlowertop = backsector->GetPlaneTexZ(sector_t::ceiling); - } - - if (linedef->isVisualPortal()) - { - markceiling = markfloor = true; - } - else if ((rw_backcz1 <= rw_frontfz1 && rw_backcz2 <= rw_frontfz2) || - (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) - { - // closed door - markceiling = markfloor = true; - } - else - { - markfloor = rw_mustmarkfloor - || backsector->floorplane != frontsector->floorplane - || backsector->lightlevel != frontsector->lightlevel - || backsector->GetTexture(sector_t::floor) != frontsector->GetTexture(sector_t::floor) - || backsector->GetPlaneLight(sector_t::floor) != frontsector->GetPlaneLight(sector_t::floor) - - // killough 3/7/98: Add checks for (x,y) offsets - || backsector->planes[sector_t::floor].xform != frontsector->planes[sector_t::floor].xform - || backsector->GetAlpha(sector_t::floor) != frontsector->GetAlpha(sector_t::floor) - - // killough 4/15/98: prevent 2s normals - // from bleeding through deep water - || frontsector->heightsec - - || backsector->GetVisFlags(sector_t::floor) != frontsector->GetVisFlags(sector_t::floor) - - // [RH] Add checks for colormaps - || backsector->ColorMap != frontsector->ColorMap - - - // kg3D - add fake lights - || (frontsector->e && frontsector->e->XFloor.lightlist.Size()) - || (backsector->e && backsector->e->XFloor.lightlist.Size()) - - || (sidedef->GetTexture(side_t::mid).isValid() && - ((linedef->flags & (ML_CLIP_MIDTEX|ML_WRAP_MIDTEX)) || - (sidedef->Flags & (WALLF_CLIP_MIDTEX|WALLF_WRAP_MIDTEX)))) - ; - - markceiling = (frontsector->GetTexture(sector_t::ceiling) != skyflatnum || - backsector->GetTexture(sector_t::ceiling) != skyflatnum) && - (rw_mustmarkceiling - || backsector->ceilingplane != frontsector->ceilingplane - || backsector->lightlevel != frontsector->lightlevel - || backsector->GetTexture(sector_t::ceiling) != frontsector->GetTexture(sector_t::ceiling) - - // killough 3/7/98: Add checks for (x,y) offsets - || backsector->planes[sector_t::ceiling].xform != frontsector->planes[sector_t::ceiling].xform - || backsector->GetAlpha(sector_t::ceiling) != frontsector->GetAlpha(sector_t::ceiling) - - // killough 4/15/98: prevent 2s normals - // from bleeding through fake ceilings - || (frontsector->heightsec && frontsector->GetTexture(sector_t::ceiling) != skyflatnum) - - || backsector->GetPlaneLight(sector_t::ceiling) != frontsector->GetPlaneLight(sector_t::ceiling) - || backsector->GetFlags(sector_t::ceiling) != frontsector->GetFlags(sector_t::ceiling) - - // [RH] Add check for colormaps - || backsector->ColorMap != frontsector->ColorMap - - // kg3D - add fake lights - || (frontsector->e && frontsector->e->XFloor.lightlist.Size()) - || (backsector->e && backsector->e->XFloor.lightlist.Size()) - - || (sidedef->GetTexture(side_t::mid).isValid() && - ((linedef->flags & (ML_CLIP_MIDTEX|ML_WRAP_MIDTEX)) || - (sidedef->Flags & (WALLF_CLIP_MIDTEX|WALLF_WRAP_MIDTEX)))) - ); - } - - if (rw_havehigh) - { // top texture - toptexture = TexMan(sidedef->GetTexture(side_t::top), true); - - rw_offset_top = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::top)); - rowoffset = sidedef->GetTextureYOffset(side_t::top); - rw_toptexturescalex =sidedef->GetTextureXScale(side_t::top); - rw_toptexturescaley =sidedef->GetTextureYScale(side_t::top); - yrepeat = toptexture->Scale.Y * rw_toptexturescaley; - if (yrepeat >= 0) - { // normal orientation - if (linedef->flags & ML_DONTPEGTOP) - { // top of texture at top - rw_toptexturemid = (frontsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; - if (rowoffset < 0 && toptexture != NULL) - { - rowoffset += toptexture->GetHeight(); - } - } - else - { // bottom of texture at bottom - rw_toptexturemid = (backsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + toptexture->GetHeight(); - } - } - else - { // upside down - rowoffset = -rowoffset; - if (linedef->flags & ML_DONTPEGTOP) - { // bottom of texture at top - rw_toptexturemid = (frontsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + toptexture->GetHeight(); - } - else - { // top of texture at bottom - rw_toptexturemid = (backsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; - } - } - if (toptexture->bWorldPanning) - { - rw_toptexturemid += rowoffset * yrepeat; - } - else - { - rw_toptexturemid += rowoffset; - } - } - if (rw_havelow) - { // bottom texture - bottomtexture = TexMan(sidedef->GetTexture(side_t::bottom), true); - - rw_offset_bottom = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::bottom)); - rowoffset = sidedef->GetTextureYOffset(side_t::bottom); - rw_bottomtexturescalex = sidedef->GetTextureXScale(side_t::bottom); - rw_bottomtexturescaley = sidedef->GetTextureYScale(side_t::bottom); - yrepeat = bottomtexture->Scale.Y * rw_bottomtexturescaley; - if (yrepeat >= 0) - { // normal orientation - if (linedef->flags & ML_DONTPEGBOTTOM) - { // bottom of texture at bottom - rw_bottomtexturemid = (rw_frontlowertop - ViewPos.Z) * yrepeat; - } - else - { // top of texture at top - rw_bottomtexturemid = (backsector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; - if (rowoffset < 0 && bottomtexture != NULL) - { - rowoffset += bottomtexture->GetHeight(); - } - } - } - else - { // upside down - rowoffset = -rowoffset; - if (linedef->flags & ML_DONTPEGBOTTOM) - { // top of texture at bottom - rw_bottomtexturemid = (rw_frontlowertop - ViewPos.Z) * yrepeat; - } - else - { // bottom of texture at top - rw_bottomtexturemid = (backsector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + bottomtexture->GetHeight(); - } - } - if (bottomtexture->bWorldPanning) - { - rw_bottomtexturemid += rowoffset * yrepeat; - } - else - { - rw_bottomtexturemid += rowoffset; - } - } - rw_markportal = linedef->isVisualPortal(); - } - - // if a floor / ceiling plane is on the wrong side of the view plane, - // it is definitely invisible and doesn't need to be marked. - - // killough 3/7/98: add deep water check - if (frontsector->GetHeightSec() == NULL) - { - int planeside; - - planeside = frontsector->floorplane.PointOnSide(ViewPos); - if (frontsector->floorplane.fC() < 0) // 3D floors have the floor backwards - planeside = -planeside; - if (planeside <= 0) // above view plane - markfloor = false; - - if (frontsector->GetTexture(sector_t::ceiling) != skyflatnum) - { - planeside = frontsector->ceilingplane.PointOnSide(ViewPos); - if (frontsector->ceilingplane.fC() > 0) // 3D floors have the ceiling backwards - planeside = -planeside; - if (planeside <= 0) // below view plane - markceiling = false; - } - } - - FTexture *midtex = TexMan(sidedef->GetTexture(side_t::mid), true); - - segtextured = midtex != NULL || toptexture != NULL || bottomtexture != NULL; - - // calculate light table - if (needlights && (segtextured || (backsector && IsFogBoundary(frontsector, backsector)))) - { - lwallscale = - midtex ? (midtex->Scale.X * sidedef->GetTextureXScale(side_t::mid)) : - toptexture ? (toptexture->Scale.X * sidedef->GetTextureXScale(side_t::top)) : - bottomtexture ? (bottomtexture->Scale.X * sidedef->GetTextureXScale(side_t::bottom)) : - 1.; - - PrepWall (swall, lwall, sidedef->TexelLength * lwallscale, WallC.sx1, WallC.sx2); - - if (fixedcolormap == NULL && fixedlightlev < 0) - { - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, frontsector->lightlevel) - + r_actualextralight); - GlobVis = r_WallVisibility; - rw_lightleft = float (GlobVis / WallC.sz1); - rw_lightstep = float((GlobVis / WallC.sz2 - rw_lightleft) / (WallC.sx2 - WallC.sx1)); - } - else - { - rw_lightleft = 1; - rw_lightstep = 0; - } - } -} - -// -// R_StoreWallRange -// A wall segment will be drawn between start and stop pixels (inclusive). -// - -bool R_StoreWallRange (int start, int stop) -{ - int i; - bool maskedtexture = false; - -#ifdef RANGECHECK - if (start >= viewwidth || start >= stop) - I_FatalError ("Bad R_StoreWallRange: %i to %i", start , stop); -#endif - - drawseg_t *draw_segment = R_AddDrawSegment(); - - if (!rw_prepped) - { - rw_prepped = true; - R_NewWall (true); - } - - rw_offset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); - rw_light = rw_lightleft + rw_lightstep * (start - WallC.sx1); - - draw_segment->CurrentPortalUniq = CurrentPortalUniq; - draw_segment->sx1 = WallC.sx1; - draw_segment->sx2 = WallC.sx2; - draw_segment->sz1 = WallC.sz1; - draw_segment->sz2 = WallC.sz2; - draw_segment->cx = WallC.tleft.X;; - draw_segment->cy = WallC.tleft.Y; - draw_segment->cdx = WallC.tright.X - WallC.tleft.X; - draw_segment->cdy = WallC.tright.Y - WallC.tleft.Y; - draw_segment->tmapvals = WallT; - draw_segment->siz1 = 1 / WallC.sz1; - draw_segment->siz2 = 1 / WallC.sz2; - draw_segment->x1 = start; - draw_segment->x2 = stop; - draw_segment->curline = curline; - draw_segment->bFogBoundary = false; - draw_segment->bFakeBoundary = false; - if(fake3D & 7) draw_segment->fake = 1; - else draw_segment->fake = 0; - - draw_segment->sprtopclip = draw_segment->sprbottomclip = draw_segment->maskedtexturecol = draw_segment->bkup = draw_segment->swall = -1; - - if (rw_markportal) - { - draw_segment->silhouette = SIL_BOTH; - } - else if (backsector == NULL) - { - draw_segment->sprtopclip = R_NewOpening (stop - start); - draw_segment->sprbottomclip = R_NewOpening (stop - start); - fillshort (openings + draw_segment->sprtopclip, stop-start, viewheight); - memset (openings + draw_segment->sprbottomclip, -1, (stop-start)*sizeof(short)); - draw_segment->silhouette = SIL_BOTH; - } - else - { - // two sided line - draw_segment->silhouette = 0; - - if (rw_frontfz1 > rw_backfz1 || rw_frontfz2 > rw_backfz2 || - backsector->floorplane.PointOnSide(ViewPos) < 0) - { - draw_segment->silhouette = SIL_BOTTOM; - } - - if (rw_frontcz1 < rw_backcz1 || rw_frontcz2 < rw_backcz2 || - backsector->ceilingplane.PointOnSide(ViewPos) < 0) - { - draw_segment->silhouette |= SIL_TOP; - } - - // killough 1/17/98: this test is required if the fix - // for the automap bug (r_bsp.c) is used, or else some - // sprites will be displayed behind closed doors. That - // fix prevents lines behind closed doors with dropoffs - // from being displayed on the automap. - // - // killough 4/7/98: make doorclosed external variable - - { - extern int doorclosed; // killough 1/17/98, 2/8/98, 4/7/98 - if (doorclosed || (rw_backcz1 <= rw_frontfz1 && rw_backcz2 <= rw_frontfz2)) - { - draw_segment->sprbottomclip = R_NewOpening (stop - start); - memset (openings + draw_segment->sprbottomclip, -1, (stop-start)*sizeof(short)); - draw_segment->silhouette |= SIL_BOTTOM; - } - if (doorclosed || (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) - { // killough 1/17/98, 2/8/98 - draw_segment->sprtopclip = R_NewOpening (stop - start); - fillshort (openings + draw_segment->sprtopclip, stop - start, viewheight); - draw_segment->silhouette |= SIL_TOP; - } - } - - if(!draw_segment->fake && r_3dfloors && backsector->e && backsector->e->XFloor.ffloors.Size()) { - for(i = 0; i < (int)backsector->e->XFloor.ffloors.Size(); i++) { - F3DFloor *rover = backsector->e->XFloor.ffloors[i]; - if(rover->flags & FF_RENDERSIDES && (!(rover->flags & FF_INVERTSIDES) || rover->flags & FF_ALLSIDES)) { - draw_segment->bFakeBoundary |= 1; - break; - } - } - } - if(!draw_segment->fake && r_3dfloors && frontsector->e && frontsector->e->XFloor.ffloors.Size()) { - for(i = 0; i < (int)frontsector->e->XFloor.ffloors.Size(); i++) { - F3DFloor *rover = frontsector->e->XFloor.ffloors[i]; - if(rover->flags & FF_RENDERSIDES && (rover->flags & FF_ALLSIDES || rover->flags & FF_INVERTSIDES)) { - draw_segment->bFakeBoundary |= 2; - break; - } - } - } - // kg3D - no for fakes - if(!draw_segment->fake) - // allocate space for masked texture tables, if needed - // [RH] Don't just allocate the space; fill it in too. - if ((TexMan(sidedef->GetTexture(side_t::mid), true)->UseType != FTexture::TEX_Null || draw_segment->bFakeBoundary || IsFogBoundary (frontsector, backsector)) && - (rw_ceilstat != 12 || !sidedef->GetTexture(side_t::top).isValid()) && - (rw_floorstat != 3 || !sidedef->GetTexture(side_t::bottom).isValid()) && - (WallC.sz1 >= TOO_CLOSE_Z && WallC.sz2 >= TOO_CLOSE_Z)) - { - float *swal; - fixed_t *lwal; - int i; - - maskedtexture = true; - - // kg3D - backup for mid and fake walls - draw_segment->bkup = R_NewOpening(stop - start); - memcpy(openings + draw_segment->bkup, &ceilingclip[start], sizeof(short)*(stop - start)); - - draw_segment->bFogBoundary = IsFogBoundary (frontsector, backsector); - if (sidedef->GetTexture(side_t::mid).isValid() || draw_segment->bFakeBoundary) - { - if(sidedef->GetTexture(side_t::mid).isValid()) - draw_segment->bFakeBoundary |= 4; // it is also mid texture - - // note: This should never have used the openings array to store its data! - draw_segment->maskedtexturecol = R_NewOpening ((stop - start) * 2); - draw_segment->swall = R_NewOpening ((stop - start) * 2); - - lwal = (fixed_t *)(openings + draw_segment->maskedtexturecol); - swal = (float *)(openings + draw_segment->swall); - FTexture *pic = TexMan(sidedef->GetTexture(side_t::mid), true); - double yscale = pic->Scale.Y * sidedef->GetTextureYScale(side_t::mid); - fixed_t xoffset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); - - if (pic->bWorldPanning) - { - xoffset = xs_RoundToInt(xoffset * lwallscale); - } - - for (i = start; i < stop; i++) - { - *lwal++ = lwall[i] + xoffset; - *swal++ = swall[i]; - } - - double istart = *((float *)(openings + draw_segment->swall)) * yscale; - double iend = *(swal - 1) * yscale; -#if 0 - ///This was for avoiding overflow when using fixed point. It might not be needed anymore. - const double mini = 3 / 65536.0; - if (istart < mini && istart >= 0) istart = mini; - if (istart > -mini && istart < 0) istart = -mini; - if (iend < mini && iend >= 0) iend = mini; - if (iend > -mini && iend < 0) iend = -mini; -#endif - istart = 1 / istart; - iend = 1 / iend; - draw_segment->yscale = (float)yscale; - draw_segment->iscale = (float)istart; - if (stop - start > 0) - { - draw_segment->iscalestep = float((iend - istart) / (stop - start)); - } - else - { - draw_segment->iscalestep = 0; - } - } - draw_segment->light = rw_light; - draw_segment->lightstep = rw_lightstep; - - // Masked midtextures should get the light level from the sector they reference, - // not from the current subsector, which is what the current wallshade value - // comes from. We make an exeption for polyobjects, however, since their "home" - // sector should be whichever one they move into. - if (curline->sidedef->Flags & WALLF_POLYOBJ) - { - draw_segment->shade = wallshade; - } - else - { - draw_segment->shade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, curline->frontsector->lightlevel) - + r_actualextralight); - } - - if (draw_segment->bFogBoundary || draw_segment->maskedtexturecol != -1) - { - size_t drawsegnum = draw_segment - drawsegs; - InterestingDrawsegs.Push (drawsegnum); - } - } - } - - // render it - if (markceiling) - { - if (ceilingplane) - { // killough 4/11/98: add NULL ptr checks - ceilingplane = R_CheckPlane (ceilingplane, start, stop); - } - else - { - markceiling = false; - } - } - - if (markfloor) - { - if (floorplane) - { // killough 4/11/98: add NULL ptr checks - floorplane = R_CheckPlane (floorplane, start, stop); - } - else - { - markfloor = false; - } - } - - R_RenderSegLoop (start, stop); - - if(fake3D & 7) { - return !(fake3D & FAKE3D_FAKEMASK); - } - - // save sprite clipping info - if ( ((draw_segment->silhouette & SIL_TOP) || maskedtexture) && draw_segment->sprtopclip == -1) - { - draw_segment->sprtopclip = R_NewOpening (stop - start); - memcpy (openings + draw_segment->sprtopclip, &ceilingclip[start], sizeof(short)*(stop-start)); - } - - if ( ((draw_segment->silhouette & SIL_BOTTOM) || maskedtexture) && draw_segment->sprbottomclip == -1) - { - draw_segment->sprbottomclip = R_NewOpening (stop - start); - memcpy (openings + draw_segment->sprbottomclip, &floorclip[start], sizeof(short)*(stop-start)); - } - - if (maskedtexture && curline->sidedef->GetTexture(side_t::mid).isValid()) - { - draw_segment->silhouette |= SIL_TOP | SIL_BOTTOM; - } - - // [RH] Draw any decals bound to the seg - // [ZZ] Only if not an active mirror - if (!rw_markportal) - { - R_RenderDecals(curline->sidedef, draw_segment, wallshade); - } - - if (rw_markportal) - { - PortalDrawseg pds; - pds.src = curline->linedef; - pds.dst = curline->linedef->special == Line_Mirror? curline->linedef : curline->linedef->getPortalDestination(); - pds.x1 = draw_segment->x1; - pds.x2 = draw_segment->x2; - pds.len = pds.x2 - pds.x1; - pds.ceilingclip.Resize(pds.len); - memcpy(&pds.ceilingclip[0], openings + draw_segment->sprtopclip, pds.len*sizeof(*openings)); - pds.floorclip.Resize(pds.len); - memcpy(&pds.floorclip[0], openings + draw_segment->sprbottomclip, pds.len*sizeof(*openings)); - - for (int i = 0; i < pds.x2-pds.x1; i++) - { - if (pds.ceilingclip[i] < 0) - pds.ceilingclip[i] = 0; - if (pds.ceilingclip[i] >= viewheight) - pds.ceilingclip[i] = viewheight-1; - if (pds.floorclip[i] < 0) - pds.floorclip[i] = 0; - if (pds.floorclip[i] >= viewheight) - pds.floorclip[i] = viewheight-1; - } - - pds.mirror = curline->linedef->special == Line_Mirror; - WallPortals.Push(pds); - } - - return !(fake3D & FAKE3D_FAKEMASK); -} - - -} diff --git a/src/swrenderer/scene/r_segs.h b/src/swrenderer/scene/r_segs.h deleted file mode 100644 index c11275cb4e..0000000000 --- a/src/swrenderer/scene/r_segs.h +++ /dev/null @@ -1,52 +0,0 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// DESCRIPTION: -// Refresh module, drawing LineSegs from BSP. -// -//----------------------------------------------------------------------------- - - -#ifndef __R_SEGS_H__ -#define __R_SEGS_H__ - -namespace swrenderer -{ - -struct drawseg_t; -struct visplane_t; - -bool R_StoreWallRange(int start, int stop); -void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2); - - - -void R_RenderSegLoop(int x1, int x2); - -extern float rw_light; // [RH] Scale lights with viewsize adjustments -extern float rw_lightstep; -extern float rw_lightleft; -extern fixed_t rw_offset; -extern FTexture *rw_pic; - -extern short floorclip[MAXWIDTH]; -extern short ceilingclip[MAXWIDTH]; -extern visplane_t *floorplane; -extern visplane_t *ceilingplane; - -} - -#endif diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index 0dbf47fa4c..f99fe515b6 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -56,7 +56,6 @@ #include "d_netinf.h" #include "p_effect.h" #include "r_bsp.h" -#include "r_segs.h" #include "r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" @@ -108,8 +107,6 @@ struct FCoverageBuffer unsigned int NumLists; }; -extern double globaluclip, globaldclip; - // // Sprite rotation 0 is facing the viewer, // rotation 1 is one angle turn CLOCKWISE around the axis. diff --git a/src/swrenderer/segments/r_clipsegment.cpp b/src/swrenderer/segments/r_clipsegment.cpp index d92633ea9d..c8c4acd066 100644 --- a/src/swrenderer/segments/r_clipsegment.cpp +++ b/src/swrenderer/segments/r_clipsegment.cpp @@ -16,7 +16,6 @@ #include "doomstat.h" #include "r_state.h" #include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_segs.h" #include "v_palette.h" #include "r_sky.h" #include "po_man.h" diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 9c7dde091c..2f2e7bd773 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -6,21 +6,25 @@ #include "i_system.h" #include "p_lnspec.h" #include "p_setup.h" -#include "swrenderer/r_main.h" -#include "swrenderer/drawers/r_draw.h" -#include "swrenderer/scene/r_things.h" -#include "swrenderer/scene/r_3dfloors.h" #include "a_sharedglobal.h" #include "g_level.h" #include "p_effect.h" #include "doomstat.h" #include "r_state.h" -#include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_segs.h" #include "v_palette.h" #include "r_sky.h" #include "po_man.h" #include "r_data/colormaps.h" +#include "d_net.h" +#include "swrenderer/r_main.h" +#include "swrenderer/r_memory.h" +#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/scene/r_things.h" +#include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/line/r_wallsetup.h" +#include "swrenderer/line/r_walldraw.h" +#include "swrenderer/plane/r_fogboundary.h" #include "swrenderer/segments/r_drawsegment.h" namespace swrenderer @@ -73,4 +77,843 @@ namespace swrenderer return ds_p++; } + + // Clip a midtexture to the floor and ceiling of the sector in front of it. + void ClipMidtex(int x1, int x2) + { + short most[MAXWIDTH]; + + R_CreateWallSegmentYSloped(most, curline->frontsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); + for (int i = x1; i < x2; ++i) + { + if (wallupper[i] < most[i]) + wallupper[i] = most[i]; + } + R_CreateWallSegmentYSloped(most, curline->frontsector->floorplane, &WallC, curline, MirrorFlags & RF_XFLIP); + for (int i = x1; i < x2; ++i) + { + if (walllower[i] > most[i]) + walllower[i] = most[i]; + } + } + + void R_RenderMaskedSegRange(drawseg_t *ds, int x1, int x2) + { + FTexture *tex; + int i; + sector_t tempsec; // killough 4/13/98 + double texheight, texheightscale; + bool notrelevant = false; + double rowoffset; + bool wrap = false; + + const sector_t *sec; + + sprflipvert = false; + + curline = ds->curline; + + bool visible = R_SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], + (float)MIN(curline->linedef->alpha, 1.), 0, 0); + + if (!visible && !ds->bFogBoundary && !ds->bFakeBoundary) + { + return; + } + + NetUpdate(); + + frontsector = curline->frontsector; + backsector = curline->backsector; + + tex = TexMan(curline->sidedef->GetTexture(side_t::mid), true); + if (i_compatflags & COMPATF_MASKEDMIDTEX) + { + tex = tex->GetRawTexture(); + } + + // killough 4/13/98: get correct lightlevel for 2s normal textures + sec = R_FakeFlat(frontsector, &tempsec, nullptr, nullptr, false); + + basecolormap = sec->ColorMap; // [RH] Set basecolormap + + int wallshade = ds->shade; + rw_lightstep = ds->lightstep; + rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; + + if (fixedlightlev < 0) + { + if (!(fake3D & FAKE3D_CLIPTOP)) + { + sclipTop = sec->ceilingplane.ZatPoint(ViewPos); + } + for (i = frontsector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) + { + if (sclipTop <= frontsector->e->XFloor.lightlist[i].plane.Zat0()) + { + lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + r_actualextralight); + break; + } + } + } + + mfloorclip = openings + ds->sprbottomclip - ds->x1; + mceilingclip = openings + ds->sprtopclip - ds->x1; + + float *MaskedSWall, MaskedScaleY, rw_scalestep; + + // [RH] Draw fog partition + if (ds->bFogBoundary) + { + R_DrawFogBoundary(x1, x2, mceilingclip, mfloorclip, wallshade); + if (ds->maskedtexturecol == -1) + { + goto clearfog; + } + } + if ((ds->bFakeBoundary && !(ds->bFakeBoundary & 4)) || !visible) + { + goto clearfog; + } + + MaskedSWall = (float *)(openings + ds->swall) - ds->x1; + MaskedScaleY = ds->yscale; + fixed_t *maskedtexturecol = (fixed_t *)(openings + ds->maskedtexturecol) - ds->x1; + spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); + rw_scalestep = ds->iscalestep; + + if (fixedlightlev >= 0) + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + else if (fixedcolormap != nullptr) + R_SetColorMapLight(fixedcolormap, 0, 0); + + // find positioning + texheight = tex->GetScaledHeightDouble(); + texheightscale = fabs(curline->sidedef->GetTextureYScale(side_t::mid)); + if (texheightscale != 1) + { + texheight = texheight / texheightscale; + } + if (curline->linedef->flags & ML_DONTPEGBOTTOM) + { + dc_texturemid = MAX(frontsector->GetPlaneTexZ(sector_t::floor), backsector->GetPlaneTexZ(sector_t::floor)) + texheight; + } + else + { + dc_texturemid = MIN(frontsector->GetPlaneTexZ(sector_t::ceiling), backsector->GetPlaneTexZ(sector_t::ceiling)); + } + + rowoffset = curline->sidedef->GetTextureYOffset(side_t::mid); + + wrap = (curline->linedef->flags & ML_WRAP_MIDTEX) || (curline->sidedef->Flags & WALLF_WRAP_MIDTEX); + if (!wrap) + { // Texture does not wrap vertically. + double textop; + + if (MaskedScaleY < 0) + { + MaskedScaleY = -MaskedScaleY; + sprflipvert = true; + } + if (tex->bWorldPanning) + { + // rowoffset is added before the multiply so that the masked texture will + // still be positioned in world units rather than texels. + dc_texturemid += rowoffset - ViewPos.Z; + textop = dc_texturemid; + dc_texturemid *= MaskedScaleY; + } + else + { + // rowoffset is added outside the multiply so that it positions the texture + // by texels instead of world units. + textop = dc_texturemid + rowoffset / MaskedScaleY - ViewPos.Z; + dc_texturemid = (dc_texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; + } + if (sprflipvert) + { + MaskedScaleY = -MaskedScaleY; + dc_texturemid -= tex->GetHeight() << FRACBITS; + } + + // [RH] Don't bother drawing segs that are completely offscreen + if (globaldclip * ds->sz1 < -textop && globaldclip * ds->sz2 < -textop) + { // Texture top is below the bottom of the screen + goto clearfog; + } + + if (globaluclip * ds->sz1 > texheight - textop && globaluclip * ds->sz2 > texheight - textop) + { // Texture bottom is above the top of the screen + goto clearfog; + } + + if ((fake3D & FAKE3D_CLIPBOTTOM) && textop < sclipBottom - ViewPos.Z) + { + notrelevant = true; + goto clearfog; + } + if ((fake3D & FAKE3D_CLIPTOP) && textop - texheight > sclipTop - ViewPos.Z) + { + notrelevant = true; + goto clearfog; + } + + WallC.sz1 = ds->sz1; + WallC.sz2 = ds->sz2; + WallC.sx1 = ds->sx1; + WallC.sx2 = ds->sx2; + + if (fake3D & FAKE3D_CLIPTOP) + { + R_CreateWallSegmentY(wallupper, textop < sclipTop - ViewPos.Z ? textop : sclipTop - ViewPos.Z, &WallC); + } + else + { + R_CreateWallSegmentY(wallupper, textop, &WallC); + } + if (fake3D & FAKE3D_CLIPBOTTOM) + { + R_CreateWallSegmentY(walllower, textop - texheight > sclipBottom - ViewPos.Z ? textop - texheight : sclipBottom - ViewPos.Z, &WallC); + } + else + { + R_CreateWallSegmentY(walllower, textop - texheight, &WallC); + } + + for (i = x1; i < x2; i++) + { + if (wallupper[i] < mceilingclip[i]) + wallupper[i] = mceilingclip[i]; + } + for (i = x1; i < x2; i++) + { + if (walllower[i] > mfloorclip[i]) + walllower[i] = mfloorclip[i]; + } + + if (CurrentSkybox) + { // Midtex clipping doesn't work properly with skyboxes, since you're normally below the floor + // or above the ceiling, so the appropriate end won't be clipped automatically when adding + // this drawseg. + if ((curline->linedef->flags & ML_CLIP_MIDTEX) || + (curline->sidedef->Flags & WALLF_CLIP_MIDTEX)) + { + ClipMidtex(x1, x2); + } + } + + mfloorclip = walllower; + mceilingclip = wallupper; + + // draw the columns one at a time + if (visible) + { + using namespace drawerargs; + for (dc_x = x1; dc_x < x2; ++dc_x) + { + if (fixedcolormap == nullptr && fixedlightlev < 0) + { + R_SetColorMapLight(basecolormap, rw_light, wallshade); + } + + dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); + if (sprflipvert) + sprtopscreen = CenterY + dc_texturemid * spryscale; + else + sprtopscreen = CenterY - dc_texturemid * spryscale; + + R_DrawMaskedColumn(tex, maskedtexturecol[dc_x]); + + rw_light += rw_lightstep; + spryscale += rw_scalestep; + } + } + } + else + { // Texture does wrap vertically. + if (tex->bWorldPanning) + { + // rowoffset is added before the multiply so that the masked texture will + // still be positioned in world units rather than texels. + dc_texturemid = (dc_texturemid - ViewPos.Z + rowoffset) * MaskedScaleY; + } + else + { + // rowoffset is added outside the multiply so that it positions the texture + // by texels instead of world units. + dc_texturemid = (dc_texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; + } + + WallC.sz1 = ds->sz1; + WallC.sz2 = ds->sz2; + WallC.sx1 = ds->sx1; + WallC.sx2 = ds->sx2; + + if (CurrentSkybox) + { // Midtex clipping doesn't work properly with skyboxes, since you're normally below the floor + // or above the ceiling, so the appropriate end won't be clipped automatically when adding + // this drawseg. + if ((curline->linedef->flags & ML_CLIP_MIDTEX) || + (curline->sidedef->Flags & WALLF_CLIP_MIDTEX)) + { + ClipMidtex(x1, x2); + } + } + + if (fake3D & FAKE3D_CLIPTOP) + { + R_CreateWallSegmentY(wallupper, sclipTop - ViewPos.Z, &WallC); + for (i = x1; i < x2; i++) + { + if (wallupper[i] < mceilingclip[i]) + wallupper[i] = mceilingclip[i]; + } + mceilingclip = wallupper; + } + if (fake3D & FAKE3D_CLIPBOTTOM) + { + R_CreateWallSegmentY(walllower, sclipBottom - ViewPos.Z, &WallC); + for (i = x1; i < x2; i++) + { + if (walllower[i] > mfloorclip[i]) + walllower[i] = mfloorclip[i]; + } + mfloorclip = walllower; + } + + rw_offset = 0; + rw_pic = tex; + R_DrawDrawSeg(ds, x1, x2, mceilingclip, mfloorclip, MaskedSWall, maskedtexturecol, ds->yscale, wallshade); + } + + clearfog: + R_FinishSetPatchStyle(); + if (ds->bFakeBoundary & 3) + { + R_RenderFakeWallRange(ds, x1, x2, wallshade); + } + if (!notrelevant) + { + if (fake3D & FAKE3D_REFRESHCLIP) + { + if (!wrap) + { + assert(ds->bkup >= 0); + memcpy(openings + ds->sprtopclip, openings + ds->bkup, (ds->x2 - ds->x1) * 2); + } + } + else + { + fillshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); + } + } + return; + } + + // kg3D - render one fake wall + void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover, int wallshade) + { + int i; + double xscale; + double yscale; + + fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); + bool visible = R_SetPatchStyle(LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], + Alpha, 0, 0); + + if (!visible) { + R_FinishSetPatchStyle(); + return; + } + + rw_lightstep = ds->lightstep; + rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; + + mfloorclip = openings + ds->sprbottomclip - ds->x1; + mceilingclip = openings + ds->sprtopclip - ds->x1; + + spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); + float *MaskedSWall = (float *)(openings + ds->swall) - ds->x1; + + // find positioning + side_t *scaledside; + side_t::ETexpart scaledpart; + if (rover->flags & FF_UPPERTEXTURE) + { + scaledside = curline->sidedef; + scaledpart = side_t::top; + } + else if (rover->flags & FF_LOWERTEXTURE) + { + scaledside = curline->sidedef; + scaledpart = side_t::bottom; + } + else + { + scaledside = rover->master->sidedef[0]; + scaledpart = side_t::mid; + } + xscale = rw_pic->Scale.X * scaledside->GetTextureXScale(scaledpart); + yscale = rw_pic->Scale.Y * scaledside->GetTextureYScale(scaledpart); + + double rowoffset = curline->sidedef->GetTextureYOffset(side_t::mid) + rover->master->sidedef[0]->GetTextureYOffset(side_t::mid); + double planez = rover->model->GetPlaneTexZ(sector_t::ceiling); + rw_offset = FLOAT2FIXED(curline->sidedef->GetTextureXOffset(side_t::mid) + rover->master->sidedef[0]->GetTextureXOffset(side_t::mid)); + if (rowoffset < 0) + { + rowoffset += rw_pic->GetHeight(); + } + dc_texturemid = (planez - ViewPos.Z) * yscale; + if (rw_pic->bWorldPanning) + { + // rowoffset is added before the multiply so that the masked texture will + // still be positioned in world units rather than texels. + + dc_texturemid = dc_texturemid + rowoffset * yscale; + rw_offset = xs_RoundToInt(rw_offset * xscale); + } + else + { + // rowoffset is added outside the multiply so that it positions the texture + // by texels instead of world units. + dc_texturemid += rowoffset; + } + + if (fixedlightlev >= 0) + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + else if (fixedcolormap != nullptr) + R_SetColorMapLight(fixedcolormap, 0, 0); + + WallC.sz1 = ds->sz1; + WallC.sz2 = ds->sz2; + WallC.sx1 = ds->sx1; + WallC.sx2 = ds->sx2; + WallC.tleft.X = ds->cx; + WallC.tleft.Y = ds->cy; + WallC.tright.X = ds->cx + ds->cdx; + WallC.tright.Y = ds->cy + ds->cdy; + WallT = ds->tmapvals; + + R_CreateWallSegmentY(wallupper, sclipTop - ViewPos.Z, &WallC); + R_CreateWallSegmentY(walllower, sclipBottom - ViewPos.Z, &WallC); + + for (i = x1; i < x2; i++) + { + if (wallupper[i] < mceilingclip[i]) + wallupper[i] = mceilingclip[i]; + } + for (i = x1; i < x2; i++) + { + if (walllower[i] > mfloorclip[i]) + walllower[i] = mfloorclip[i]; + } + + PrepLWall(lwall, curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2); + R_DrawDrawSeg(ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale, wallshade); + R_FinishSetPatchStyle(); + } + + // kg3D - walls of fake floors + void R_RenderFakeWallRange(drawseg_t *ds, int x1, int x2, int wallshade) + { + FTexture *const DONT_DRAW = ((FTexture*)(intptr_t)-1); + int i, j; + F3DFloor *rover, *fover = nullptr; + int passed, last; + double floorHeight; + double ceilingHeight; + + sprflipvert = false; + curline = ds->curline; + + frontsector = curline->frontsector; + backsector = curline->backsector; + + if (backsector == nullptr) + { + return; + } + if ((ds->bFakeBoundary & 3) == 2) + { + sector_t *sec = backsector; + backsector = frontsector; + frontsector = sec; + } + + floorHeight = backsector->CenterFloor(); + ceilingHeight = backsector->CenterCeiling(); + + // maybe fix clipheights + if (!(fake3D & FAKE3D_CLIPBOTTOM)) sclipBottom = floorHeight; + if (!(fake3D & FAKE3D_CLIPTOP)) sclipTop = ceilingHeight; + + // maybe not visible + if (sclipBottom >= frontsector->CenterCeiling()) return; + if (sclipTop <= frontsector->CenterFloor()) return; + + if (fake3D & FAKE3D_DOWN2UP) + { // bottom to viewz + last = 0; + for (i = backsector->e->XFloor.ffloors.Size() - 1; i >= 0; i--) + { + rover = backsector->e->XFloor.ffloors[i]; + if (!(rover->flags & FF_EXISTS)) continue; + + // visible? + passed = 0; + if (!(rover->flags & FF_RENDERSIDES) || rover->top.plane->isSlope() || rover->bottom.plane->isSlope() || + rover->top.plane->Zat0() <= sclipBottom || + rover->bottom.plane->Zat0() >= ceilingHeight || + rover->top.plane->Zat0() <= floorHeight) + { + if (!i) + { + passed = 1; + } + else + { + continue; + } + } + + rw_pic = nullptr; + if (rover->bottom.plane->Zat0() >= sclipTop || passed) + { + if (last) + { + break; + } + // maybe wall from inside rendering? + fover = nullptr; + for (j = frontsector->e->XFloor.ffloors.Size() - 1; j >= 0; j--) + { + fover = frontsector->e->XFloor.ffloors[j]; + if (fover->model == rover->model) + { // never + fover = nullptr; + break; + } + if (!(fover->flags & FF_EXISTS)) continue; + if (!(fover->flags & FF_RENDERSIDES)) continue; + // no sloped walls, it's bugged + if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; + + // visible? + if (fover->top.plane->Zat0() <= sclipBottom) continue; // no + if (fover->bottom.plane->Zat0() >= sclipTop) + { // no, last possible + fover = nullptr; + break; + } + // it is, render inside? + if (!(fover->flags & (FF_BOTHPLANES | FF_INVERTPLANES))) + { // no + fover = nullptr; + } + break; + } + // nothing + if (!fover || j == -1) + { + break; + } + // correct texture + if (fover->flags & rover->flags & FF_SWIMMABLE) + { // don't ever draw (but treat as something has been found) + rw_pic = DONT_DRAW; + } + else if (fover->flags & FF_UPPERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); + } + else if (fover->flags & FF_LOWERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); + } + else + { + rw_pic = TexMan(fover->master->sidedef[0]->GetTexture(side_t::mid), true); + } + } + else if (frontsector->e->XFloor.ffloors.Size()) + { + // maybe not visible? + fover = nullptr; + for (j = frontsector->e->XFloor.ffloors.Size() - 1; j >= 0; j--) + { + fover = frontsector->e->XFloor.ffloors[j]; + if (fover->model == rover->model) // never + { + break; + } + if (!(fover->flags & FF_EXISTS)) continue; + if (!(fover->flags & FF_RENDERSIDES)) continue; + // no sloped walls, it's bugged + if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; + + // visible? + if (fover->top.plane->Zat0() <= sclipBottom) continue; // no + if (fover->bottom.plane->Zat0() >= sclipTop) + { // visible, last possible + fover = nullptr; + break; + } + if ((fover->flags & FF_SOLID) == (rover->flags & FF_SOLID) && + !(!(fover->flags & FF_SOLID) && (fover->alpha == 255 || rover->alpha == 255)) + ) + { + break; + } + if (fover->flags & rover->flags & FF_SWIMMABLE) + { // don't ever draw (but treat as something has been found) + rw_pic = DONT_DRAW; + } + fover = nullptr; // visible + break; + } + if (fover && j != -1) + { + fover = nullptr; + last = 1; + continue; // not visible + } + } + if (!rw_pic) + { + fover = nullptr; + if (rover->flags & FF_UPPERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); + } + else if (rover->flags & FF_LOWERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); + } + else + { + rw_pic = TexMan(rover->master->sidedef[0]->GetTexture(side_t::mid), true); + } + } + // correct colors now + basecolormap = frontsector->ColorMap; + wallshade = ds->shade; + if (fixedlightlev < 0) + { + if ((ds->bFakeBoundary & 3) == 2) + { + for (j = backsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) + { + if (sclipTop <= backsector->e->XFloor.lightlist[j].plane.Zat0()) + { + lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + r_actualextralight); + break; + } + } + } + else + { + for (j = frontsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) + { + if (sclipTop <= frontsector->e->XFloor.lightlist[j].plane.Zat0()) + { + lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + r_actualextralight); + break; + } + } + } + } + if (rw_pic != DONT_DRAW) + { + R_RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade); + } + else rw_pic = nullptr; + break; + } + } + else + { // top to viewz + for (i = 0; i < (int)backsector->e->XFloor.ffloors.Size(); i++) + { + rover = backsector->e->XFloor.ffloors[i]; + if (!(rover->flags & FF_EXISTS)) continue; + + // visible? + passed = 0; + if (!(rover->flags & FF_RENDERSIDES) || + rover->top.plane->isSlope() || rover->bottom.plane->isSlope() || + rover->bottom.plane->Zat0() >= sclipTop || + rover->top.plane->Zat0() <= floorHeight || + rover->bottom.plane->Zat0() >= ceilingHeight) + { + if ((unsigned)i == backsector->e->XFloor.ffloors.Size() - 1) + { + passed = 1; + } + else + { + continue; + } + } + rw_pic = nullptr; + if (rover->top.plane->Zat0() <= sclipBottom || passed) + { // maybe wall from inside rendering? + fover = nullptr; + for (j = 0; j < (int)frontsector->e->XFloor.ffloors.Size(); j++) + { + fover = frontsector->e->XFloor.ffloors[j]; + if (fover->model == rover->model) + { // never + fover = nullptr; + break; + } + if (!(fover->flags & FF_EXISTS)) continue; + if (!(fover->flags & FF_RENDERSIDES)) continue; + // no sloped walls, it's bugged + if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; + + // visible? + if (fover->bottom.plane->Zat0() >= sclipTop) continue; // no + if (fover->top.plane->Zat0() <= sclipBottom) + { // no, last possible + fover = nullptr; + break; + } + // it is, render inside? + if (!(fover->flags & (FF_BOTHPLANES | FF_INVERTPLANES))) + { // no + fover = nullptr; + } + break; + } + // nothing + if (!fover || (unsigned)j == frontsector->e->XFloor.ffloors.Size()) + { + break; + } + // correct texture + if (fover->flags & rover->flags & FF_SWIMMABLE) + { + rw_pic = DONT_DRAW; // don't ever draw (but treat as something has been found) + } + else if (fover->flags & FF_UPPERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); + } + else if (fover->flags & FF_LOWERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); + } + else + { + rw_pic = TexMan(fover->master->sidedef[0]->GetTexture(side_t::mid), true); + } + } + else if (frontsector->e->XFloor.ffloors.Size()) + { // maybe not visible? + fover = nullptr; + for (j = 0; j < (int)frontsector->e->XFloor.ffloors.Size(); j++) + { + fover = frontsector->e->XFloor.ffloors[j]; + if (fover->model == rover->model) + { // never + break; + } + if (!(fover->flags & FF_EXISTS)) continue; + if (!(fover->flags & FF_RENDERSIDES)) continue; + // no sloped walls, its bugged + if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; + + // visible? + if (fover->bottom.plane->Zat0() >= sclipTop) continue; // no + if (fover->top.plane->Zat0() <= sclipBottom) + { // visible, last possible + fover = nullptr; + break; + } + if ((fover->flags & FF_SOLID) == (rover->flags & FF_SOLID) && + !(!(rover->flags & FF_SOLID) && (fover->alpha == 255 || rover->alpha == 255)) + ) + { + break; + } + if (fover->flags & rover->flags & FF_SWIMMABLE) + { // don't ever draw (but treat as something has been found) + rw_pic = DONT_DRAW; + } + fover = nullptr; // visible + break; + } + if (fover && (unsigned)j != frontsector->e->XFloor.ffloors.Size()) + { // not visible + break; + } + } + if (rw_pic == nullptr) + { + fover = nullptr; + if (rover->flags & FF_UPPERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); + } + else if (rover->flags & FF_LOWERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); + } + else + { + rw_pic = TexMan(rover->master->sidedef[0]->GetTexture(side_t::mid), true); + } + } + // correct colors now + basecolormap = frontsector->ColorMap; + wallshade = ds->shade; + if (fixedlightlev < 0) + { + if ((ds->bFakeBoundary & 3) == 2) + { + for (j = backsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) + { + if (sclipTop <= backsector->e->XFloor.lightlist[j].plane.Zat0()) + { + lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + r_actualextralight); + break; + } + } + } + else + { + for (j = frontsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) + { + if (sclipTop <= frontsector->e->XFloor.lightlist[j].plane.Zat0()) + { + lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + r_actualextralight); + break; + } + } + } + } + + if (rw_pic != DONT_DRAW) + { + R_RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade); + } + else + { + rw_pic = nullptr; + } + break; + } + } + return; + } } diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index 234a14bc09..c5c276e68d 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -45,4 +45,8 @@ namespace swrenderer void R_FreeDrawSegs(); drawseg_t *R_AddDrawSegment(); + void ClipMidtex(int x1, int x2); + void R_RenderMaskedSegRange(drawseg_t *ds, int x1, int x2); + void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover, int wallshade); + void R_RenderFakeWallRange(drawseg_t *ds, int x1, int x2, int wallshade); } diff --git a/src/swrenderer/segments/r_portalsegment.cpp b/src/swrenderer/segments/r_portalsegment.cpp index 22e137fff5..67db89b876 100644 --- a/src/swrenderer/segments/r_portalsegment.cpp +++ b/src/swrenderer/segments/r_portalsegment.cpp @@ -16,7 +16,6 @@ #include "doomstat.h" #include "r_state.h" #include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_segs.h" #include "v_palette.h" #include "r_sky.h" #include "po_man.h" diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 5aa67a550c..9576294e70 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -28,7 +28,6 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "r_wallsprite.h" -#include "swrenderer/scene/r_segs.h" #include "swrenderer/r_memory.h" namespace swrenderer diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 83ebd6db34..78f6c42f3f 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -28,7 +28,6 @@ #include "d_netinf.h" #include "p_effect.h" #include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_segs.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index bc1df52461..5b8f4baaaa 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -28,7 +28,6 @@ #include "d_netinf.h" #include "p_effect.h" #include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_segs.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 400c1e1639..d72480fe86 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -28,7 +28,6 @@ #include "d_netinf.h" #include "p_effect.h" #include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_segs.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" From e7ab5dddb6a5b150abc81b96f45758eb50710c88 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 3 Jan 2017 07:17:54 +0100 Subject: [PATCH 631/912] Put add the copyright statement from the files the code originated from --- src/swrenderer/line/r_line.cpp | 12 ++++++++++++ src/swrenderer/line/r_line.h | 12 ++++++++++++ src/swrenderer/line/r_walldraw.h | 12 ++++++++++++ src/swrenderer/plane/r_flatplane.cpp | 12 ++++++++++++ src/swrenderer/plane/r_flatplane.h | 12 ++++++++++++ src/swrenderer/plane/r_fogboundary.cpp | 12 ++++++++++++ src/swrenderer/plane/r_fogboundary.h | 12 ++++++++++++ src/swrenderer/plane/r_skyplane.cpp | 12 ++++++++++++ src/swrenderer/plane/r_skyplane.h | 12 ++++++++++++ src/swrenderer/plane/r_slopeplane.cpp | 12 ++++++++++++ src/swrenderer/plane/r_slopeplane.h | 12 ++++++++++++ src/swrenderer/plane/r_visibleplane.cpp | 12 ++++++++++++ src/swrenderer/plane/r_visibleplane.h | 12 ++++++++++++ src/swrenderer/r_memory.cpp | 12 ++++++++++++ src/swrenderer/r_memory.h | 12 ++++++++++++ src/swrenderer/scene/r_portal.cpp | 12 ++++++++++++ src/swrenderer/scene/r_portal.h | 12 ++++++++++++ src/swrenderer/segments/r_clipsegment.cpp | 12 ++++++++++++ src/swrenderer/segments/r_clipsegment.h | 12 ++++++++++++ src/swrenderer/segments/r_drawsegment.cpp | 12 ++++++++++++ src/swrenderer/segments/r_drawsegment.h | 12 ++++++++++++ src/swrenderer/segments/r_portalsegment.cpp | 12 ++++++++++++ src/swrenderer/segments/r_portalsegment.h | 12 ++++++++++++ src/swrenderer/things/r_decal.cpp | 12 ++++++++++++ src/swrenderer/things/r_decal.h | 12 ++++++++++++ src/swrenderer/things/r_particle.cpp | 12 ++++++++++++ src/swrenderer/things/r_particle.h | 12 ++++++++++++ src/swrenderer/things/r_playersprite.cpp | 12 ++++++++++++ src/swrenderer/things/r_playersprite.h | 12 ++++++++++++ src/swrenderer/things/r_visiblesprite.cpp | 12 ++++++++++++ src/swrenderer/things/r_visiblesprite.h | 12 ++++++++++++ src/swrenderer/things/r_wallsprite.cpp | 12 ++++++++++++ src/swrenderer/things/r_wallsprite.h | 12 ++++++++++++ 33 files changed, 396 insertions(+) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 93d7a9bf41..849ae635f2 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index b530f3e3ee..c071e8e08c 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index 9ca312fb04..8cc71f8df1 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 7730fac2f6..df8ecc19a8 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index 7164b140c3..c922df34e9 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/plane/r_fogboundary.cpp b/src/swrenderer/plane/r_fogboundary.cpp index 5c020fbf9c..68ec3097dc 100644 --- a/src/swrenderer/plane/r_fogboundary.cpp +++ b/src/swrenderer/plane/r_fogboundary.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include diff --git a/src/swrenderer/plane/r_fogboundary.h b/src/swrenderer/plane/r_fogboundary.h index f334e916b7..de872fd589 100644 --- a/src/swrenderer/plane/r_fogboundary.h +++ b/src/swrenderer/plane/r_fogboundary.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index fa4dcd3e84..1f391b331b 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include diff --git a/src/swrenderer/plane/r_skyplane.h b/src/swrenderer/plane/r_skyplane.h index 0a7d55be46..2810a86933 100644 --- a/src/swrenderer/plane/r_skyplane.h +++ b/src/swrenderer/plane/r_skyplane.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 0ba0298b7f..7a4a8c8db4 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include diff --git a/src/swrenderer/plane/r_slopeplane.h b/src/swrenderer/plane/r_slopeplane.h index fc071b110c..d366c5feff 100644 --- a/src/swrenderer/plane/r_slopeplane.h +++ b/src/swrenderer/plane/r_slopeplane.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index f175f00631..3cee3e8694 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include diff --git a/src/swrenderer/plane/r_visibleplane.h b/src/swrenderer/plane/r_visibleplane.h index 1c2ee717f2..c201c691a1 100644 --- a/src/swrenderer/plane/r_visibleplane.h +++ b/src/swrenderer/plane/r_visibleplane.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/r_memory.cpp b/src/swrenderer/r_memory.cpp index 855d851be7..dd5fd83f88 100644 --- a/src/swrenderer/r_memory.cpp +++ b/src/swrenderer/r_memory.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include "templates.h" diff --git a/src/swrenderer/r_memory.h b/src/swrenderer/r_memory.h index 06a03c3ffc..fc0c845e29 100644 --- a/src/swrenderer/r_memory.h +++ b/src/swrenderer/r_memory.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 03741b14a5..314891b61d 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include diff --git a/src/swrenderer/scene/r_portal.h b/src/swrenderer/scene/r_portal.h index 42cec48913..e31cc9f2fb 100644 --- a/src/swrenderer/scene/r_portal.h +++ b/src/swrenderer/scene/r_portal.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/segments/r_clipsegment.cpp b/src/swrenderer/segments/r_clipsegment.cpp index c8c4acd066..7a2d8b1f48 100644 --- a/src/swrenderer/segments/r_clipsegment.cpp +++ b/src/swrenderer/segments/r_clipsegment.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include "templates.h" diff --git a/src/swrenderer/segments/r_clipsegment.h b/src/swrenderer/segments/r_clipsegment.h index 3ebe464d83..3204531036 100644 --- a/src/swrenderer/segments/r_clipsegment.h +++ b/src/swrenderer/segments/r_clipsegment.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 2f2e7bd773..311f5a867b 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include "templates.h" diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index c5c276e68d..7ab957cd48 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/segments/r_portalsegment.cpp b/src/swrenderer/segments/r_portalsegment.cpp index 67db89b876..4e7a2b5336 100644 --- a/src/swrenderer/segments/r_portalsegment.cpp +++ b/src/swrenderer/segments/r_portalsegment.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include "templates.h" diff --git a/src/swrenderer/segments/r_portalsegment.h b/src/swrenderer/segments/r_portalsegment.h index dc4f501526..f1d757276e 100644 --- a/src/swrenderer/segments/r_portalsegment.h +++ b/src/swrenderer/segments/r_portalsegment.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 9576294e70..f992cf13e5 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index a006e2fee6..74871c4b04 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 78f6c42f3f..d58e625c4f 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include diff --git a/src/swrenderer/things/r_particle.h b/src/swrenderer/things/r_particle.h index 01db4dfa00..a0413d388c 100644 --- a/src/swrenderer/things/r_particle.h +++ b/src/swrenderer/things/r_particle.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 5b8f4baaaa..22ae8e99c1 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index 21133532ba..4569a54361 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 3236e93374..567eae89d5 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index 090a757068..04ef665e63 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index d72480fe86..4073917bb7 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index 3a731e5d0b..cecb4738db 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -1,3 +1,15 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #pragma once From aa115340335d64081a6c25011e91f75f45a6e902 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 3 Jan 2017 18:55:12 +0100 Subject: [PATCH 632/912] Remove unused (and broken in swrenderer) back boolean from FakeFlat interface --- src/am_map.cpp | 2 +- src/gl/scene/gl_scene.cpp | 6 +++--- src/r_renderer.h | 2 +- src/swrenderer/r_swrenderer.cpp | 10 ++-------- src/swrenderer/r_swrenderer.h | 2 +- 5 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/am_map.cpp b/src/am_map.cpp index 8c250ee685..a30106d4e2 100644 --- a/src/am_map.cpp +++ b/src/am_map.cpp @@ -1934,7 +1934,7 @@ void AM_drawSubsectors() points[j].Y = float(f_y + (f_h - (pt.y - m_y) * scale)); } // For lighting and texture determination - sector_t *sec = Renderer->FakeFlat(subsectors[i].render_sector, &tempsec, &floorlight, &ceilinglight, false); + sector_t *sec = Renderer->FakeFlat(subsectors[i].render_sector, &tempsec, &floorlight, &ceilinglight); // Find texture origin. originpt.x = -sec->GetXOffset(sector_t::floor); originpt.y = sec->GetYOffset(sector_t::floor); diff --git a/src/gl/scene/gl_scene.cpp b/src/gl/scene/gl_scene.cpp index bedd7a7e7f..1cf4642502 100644 --- a/src/gl/scene/gl_scene.cpp +++ b/src/gl/scene/gl_scene.cpp @@ -1005,7 +1005,7 @@ struct FGLInterface : public FRenderer void StartSerialize(FSerializer &arc) override; void EndSerialize(FSerializer &arc) override; void RenderTextureView (FCanvasTexture *self, AActor *viewpoint, int fov) override; - sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, bool back) override; + sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) override; void SetFogParams(int _fogdensity, PalEntry _outsidefogcolor, int _outsidefogdensity, int _skyfog) override; void PreprocessLevel() override; void CleanLevelData() override; @@ -1362,7 +1362,7 @@ void FGLInterface::RenderTextureView (FCanvasTexture *tex, AActor *Viewpoint, in // //========================================================================== -sector_t *FGLInterface::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, bool back) +sector_t *FGLInterface::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) { if (floorlightlevel != NULL) { @@ -1372,7 +1372,7 @@ sector_t *FGLInterface::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlig { *ceilinglightlevel = sec->GetCeilingLight (); } - return gl_FakeFlat(sec, tempsec, back); + return gl_FakeFlat(sec, tempsec, false); } //=========================================================================== diff --git a/src/r_renderer.h b/src/r_renderer.h index 4236993d63..0b14058954 100644 --- a/src/r_renderer.h +++ b/src/r_renderer.h @@ -60,7 +60,7 @@ struct FRenderer virtual void SetupFrame(player_t *player) {} virtual void CopyStackedViewParameters() {} virtual void RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) = 0; - virtual sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, bool back) = 0; + virtual sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) = 0; virtual void SetFogParams(int _fogdensity, PalEntry _outsidefogcolor, int _outsidefogdensity, int _skyfog) {} virtual void PreprocessLevel() {} virtual void CleanLevelData() {} diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index c368bf3d43..c68aed77ad 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -482,14 +482,8 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin realfixedcolormap = savecm; } -//========================================================================== -// -// -// -//========================================================================== - -sector_t *FSoftwareRenderer::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, bool back) +sector_t *FSoftwareRenderer::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) { - return R_FakeFlat(sec, tempsec, floorlightlevel, ceilinglightlevel, back); + return R_FakeFlat(sec, tempsec, floorlightlevel, ceilinglightlevel, nullptr, 0, 0, 0, 0); } diff --git a/src/swrenderer/r_swrenderer.h b/src/swrenderer/r_swrenderer.h index 812ec32b50..c09542959c 100644 --- a/src/swrenderer/r_swrenderer.h +++ b/src/swrenderer/r_swrenderer.h @@ -42,7 +42,7 @@ struct FSoftwareRenderer : public FRenderer void SetupFrame(player_t *player) override; void CopyStackedViewParameters() override; void RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) override; - sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, bool back) override; + sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) override; void StateChanged(AActor *actor) override { From a4c0e299130f132a098cdc5acea08cd2f917c89d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 3 Jan 2017 18:57:48 +0100 Subject: [PATCH 633/912] Moved r_line into a class and implemented proper parameter passing between r_line and r_bsp, r_walldraw, r_wallsetup, r_decal, r_wallsprite, r_fogboundary, r_portal and r_playersprite --- src/swrenderer/line/r_line.cpp | 102 ++++++--------------- src/swrenderer/line/r_line.h | 105 ++++++++++++---------- src/swrenderer/line/r_walldraw.cpp | 82 +++++++++-------- src/swrenderer/line/r_walldraw.h | 7 +- src/swrenderer/line/r_wallsetup.cpp | 4 +- src/swrenderer/line/r_wallsetup.h | 5 +- src/swrenderer/plane/r_fogboundary.cpp | 7 +- src/swrenderer/plane/r_fogboundary.h | 2 +- src/swrenderer/plane/r_skyplane.cpp | 11 +-- src/swrenderer/r_main.cpp | 3 +- src/swrenderer/scene/r_bsp.cpp | 37 ++++---- src/swrenderer/scene/r_bsp.h | 3 +- src/swrenderer/scene/r_portal.cpp | 6 +- src/swrenderer/segments/r_drawsegment.cpp | 23 +++-- src/swrenderer/things/r_decal.cpp | 34 +++++-- src/swrenderer/things/r_decal.h | 5 +- src/swrenderer/things/r_playersprite.cpp | 3 +- src/swrenderer/things/r_wallsprite.cpp | 13 +-- 18 files changed, 229 insertions(+), 223 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 849ae635f2..c88e17a97f 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -51,72 +51,16 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - subsector_t *InSubsector; - sector_t *frontsector; - sector_t *backsector; - - seg_t *curline; - side_t *sidedef; - line_t *linedef; - - FWallCoords WallC; - FWallTmapVals WallT; - - double rw_backcz1; - double rw_backcz2; - double rw_backfz1; - double rw_backfz2; - double rw_frontcz1; - double rw_frontcz2; - double rw_frontfz1; - double rw_frontfz2; - - fixed_t rw_offset_top; - fixed_t rw_offset_mid; - fixed_t rw_offset_bottom; - - int rw_ceilstat, rw_floorstat; - bool rw_mustmarkfloor, rw_mustmarkceiling; - bool rw_prepped; - bool rw_markportal; - bool rw_havehigh; - bool rw_havelow; - - float rw_light; - float rw_lightstep; - float rw_lightleft; - - fixed_t rw_offset; - double rw_midtexturemid; - double rw_toptexturemid; - double rw_bottomtexturemid; - double rw_midtexturescalex; - double rw_midtexturescaley; - double rw_toptexturescalex; - double rw_toptexturescaley; - double rw_bottomtexturescalex; - double rw_bottomtexturescaley; - - FTexture *rw_pic; - - bool markfloor; // False if the back side is the same plane. - bool markceiling; - FTexture *toptexture; - FTexture *bottomtexture; - FTexture *midtexture; - - namespace - { - bool doorclosed; - int wallshade; - } - - void R_AddLine(seg_t *line) + void SWRenderLine::R_AddLine(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector) { static sector_t tempsec; // killough 3/8/98: ceiling/water hack bool solid; DVector2 pt1, pt2; + InSubsector = subsector; + frontsector = sector; + backsector = fakebacksector; + curline = line; // [RH] Color if not texturing line @@ -188,7 +132,7 @@ namespace swrenderer // kg3D - its fake, no transfer_heights if (!(fake3D & FAKE3D_FAKEBACK)) { // killough 3/8/98, 4/4/98: hack for invisible ceilings / deep water - backsector = R_FakeFlat(backsector, &tempsec, NULL, NULL, true); + backsector = R_FakeFlat(backsector, &tempsec, nullptr, nullptr, curline, WallC.sx1, WallC.sx2, rw_frontcz1, rw_frontcz2); } doorclosed = false; // killough 4/16/98 @@ -341,13 +285,19 @@ namespace swrenderer #endif } - if (R_ClipWallSegment(WallC.sx1, WallC.sx2, solid, R_StoreWallRange)) + static SWRenderLine *self = this; + bool visible = R_ClipWallSegment(WallC.sx1, WallC.sx2, solid, [](int x1, int x2) -> bool + { + return self->R_StoreWallRange(x1, x2); + }); + + if (visible) { InSubsector->flags |= SSECF_DRAWN; } } - bool R_SkyboxCompare(sector_t *frontsector, sector_t *backsector) + bool SWRenderLine::R_SkyboxCompare(sector_t *frontsector, sector_t *backsector) { FSectorPortal *frontc = frontsector->GetPortal(sector_t::ceiling); FSectorPortal *frontf = frontsector->GetPortal(sector_t::floor); @@ -365,7 +315,7 @@ namespace swrenderer } // A wall segment will be drawn between start and stop pixels (inclusive). - bool R_StoreWallRange(int start, int stop) + bool SWRenderLine::R_StoreWallRange(int start, int stop) { int i; bool maskedtexture = false; @@ -625,7 +575,7 @@ namespace swrenderer // [ZZ] Only if not an active mirror if (!rw_markportal) { - R_RenderDecals(curline->sidedef, draw_segment, wallshade); + R_RenderDecals(curline->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, curline, WallC); } if (rw_markportal) @@ -660,7 +610,7 @@ namespace swrenderer return !(fake3D & FAKE3D_FAKEMASK); } - void R_NewWall(bool needlights) + void SWRenderLine::R_NewWall(bool needlights) { double rowoffset; double yrepeat; @@ -968,7 +918,7 @@ namespace swrenderer bottomtexture ? (bottomtexture->Scale.X * sidedef->GetTextureXScale(side_t::bottom)) : 1.; - PrepWall(swall, lwall, sidedef->TexelLength * lwallscale, WallC.sx1, WallC.sx2); + PrepWall(swall, lwall, sidedef->TexelLength * lwallscale, WallC.sx1, WallC.sx2, WallT); if (fixedcolormap == NULL && fixedlightlev < 0) { @@ -986,7 +936,7 @@ namespace swrenderer } } - bool IsFogBoundary(sector_t *front, sector_t *back) + bool SWRenderLine::IsFogBoundary(sector_t *front, sector_t *back) { return r_fogboundary && fixedcolormap == NULL && front->ColorMap->Fade && front->ColorMap->Fade != back->ColorMap->Fade && @@ -995,7 +945,7 @@ namespace swrenderer // Draws zero, one, or two textures for walls. // Can draw or mark the starting pixel of floor and ceiling textures. - void R_RenderSegLoop(int x1, int x2) + void SWRenderLine::R_RenderSegLoop(int x1, int x2) { int x; double xscale; @@ -1094,7 +1044,7 @@ namespace swrenderer yscale = rw_pic->Scale.Y * rw_midtexturescaley; if (xscale != lwallscale) { - PrepLWall(lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2); + PrepLWall(lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } if (midtexture->bWorldPanning) @@ -1109,7 +1059,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, light_list); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list); } fillshort(ceilingclip + x1, x2 - x1, viewheight); fillshort(floorclip + x1, x2 - x1, 0xffff); @@ -1130,7 +1080,7 @@ namespace swrenderer yscale = rw_pic->Scale.Y * rw_toptexturescaley; if (xscale != lwallscale) { - PrepLWall(lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2); + PrepLWall(lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } if (toptexture->bWorldPanning) @@ -1145,7 +1095,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, light_list); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list); } memcpy(ceilingclip + x1, wallupper + x1, (x2 - x1) * sizeof(short)); } @@ -1169,7 +1119,7 @@ namespace swrenderer yscale = rw_pic->Scale.Y * rw_bottomtexturescaley; if (xscale != lwallscale) { - PrepLWall(lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2); + PrepLWall(lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } if (bottomtexture->bWorldPanning) @@ -1184,7 +1134,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, light_list); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list); } memcpy(floorclip + x1, walllower + x1, (x2 - x1) * sizeof(short)); } diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index c071e8e08c..fd245871df 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -35,59 +35,74 @@ namespace swrenderer void InitFromLine(const DVector2 &left, const DVector2 &right); }; - void R_AddLine(seg_t *line); - bool R_StoreWallRange(int start, int stop); - void R_NewWall(bool needlights); - void R_RenderSegLoop(int x1, int x2); + class SWRenderLine + { + public: + void R_AddLine(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector); - bool IsFogBoundary(sector_t *front, sector_t *back); - bool R_SkyboxCompare(sector_t *frontsector, sector_t *backsector); + private: + bool R_StoreWallRange(int start, int stop); + void R_NewWall(bool needlights); + void R_RenderSegLoop(int x1, int x2); - extern subsector_t *InSubsector; - extern sector_t *frontsector; - extern sector_t *backsector; + bool IsFogBoundary(sector_t *front, sector_t *back); + bool R_SkyboxCompare(sector_t *frontsector, sector_t *backsector); - extern seg_t *curline; - extern side_t *sidedef; - extern line_t *linedef; + subsector_t *InSubsector; + sector_t *frontsector; + sector_t *backsector; - extern FWallCoords WallC; - extern FWallTmapVals WallT; + seg_t *curline; + side_t *sidedef; + line_t *linedef; - extern double rw_backcz1; - extern double rw_backcz2; - extern double rw_backfz1; - extern double rw_backfz2; - extern double rw_frontcz1; - extern double rw_frontcz2; - extern double rw_frontfz1; - extern double rw_frontfz2; + FWallCoords WallC; + FWallTmapVals WallT; - extern fixed_t rw_offset_top; - extern fixed_t rw_offset_mid; - extern fixed_t rw_offset_bottom; + double rw_backcz1; + double rw_backcz2; + double rw_backfz1; + double rw_backfz2; + double rw_frontcz1; + double rw_frontcz2; + double rw_frontfz1; + double rw_frontfz2; - extern int rw_ceilstat, rw_floorstat; - extern bool rw_mustmarkfloor, rw_mustmarkceiling; - extern bool rw_prepped; - extern bool rw_markportal; - extern bool rw_havehigh; - extern bool rw_havelow; + fixed_t rw_offset_top; + fixed_t rw_offset_mid; + fixed_t rw_offset_bottom; - extern float rw_light; - extern float rw_lightstep; - extern float rw_lightleft; + int rw_ceilstat, rw_floorstat; + bool rw_mustmarkfloor, rw_mustmarkceiling; + bool rw_prepped; + bool rw_markportal; + bool rw_havehigh; + bool rw_havelow; - extern fixed_t rw_offset; - extern double rw_midtexturemid; - extern double rw_toptexturemid; - extern double rw_bottomtexturemid; - extern double rw_midtexturescalex; - extern double rw_midtexturescaley; - extern double rw_toptexturescalex; - extern double rw_toptexturescaley; - extern double rw_bottomtexturescalex; - extern double rw_bottomtexturescaley; + float rw_light; + float rw_lightstep; + float rw_lightleft; - extern FTexture *rw_pic; + fixed_t rw_offset; + double rw_midtexturemid; + double rw_toptexturemid; + double rw_bottomtexturemid; + double rw_midtexturescalex; + double rw_midtexturescaley; + double rw_toptexturescalex; + double rw_toptexturescaley; + double rw_bottomtexturescalex; + double rw_bottomtexturescaley; + + FTexture *rw_pic; + + bool doorclosed; + int wallshade; + + bool markfloor; // False if the back side is the same plane. + bool markceiling; + FTexture *toptexture; + FTexture *bottomtexture; + FTexture *midtexture; + }; } diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 40173ce226..26f8e923d1 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -51,7 +51,10 @@ namespace swrenderer { using namespace drawerargs; - extern FTexture *rw_pic; + namespace + { + FTexture *rw_pic; + } static const uint8_t *R_GetColumn(FTexture *tex, int col) { @@ -191,9 +194,9 @@ namespace swrenderer } // Draw a column with support for non-power-of-two ranges - static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, DrawerFunc draw1column) + static void Draw1Column(const FWallCoords &WallC, int x, int y1, int y2, WallSampler &sampler, DrawerFunc draw1column) { - if (r_dynlights) + if (r_dynlights && dc_light_list) { // Find column position in view space float w1 = 1.0f / WallC.sz1; @@ -330,14 +333,13 @@ namespace swrenderer } static void ProcessWallWorker( - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, + const FWallCoords &WallC, + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, const BYTE *(*getcol)(FTexture *tex, int x), DrawerFunc drawcolumn) { if (rw_pic->UseType == FTexture::TEX_Null) return; - fixed_t xoffset = rw_offset; - rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set int fracbits = 32 - rw_pic->HeightBits; if (fracbits == 32) @@ -374,11 +376,9 @@ namespace swrenderer dc_normal.Y = -dx / length; dc_normal.Z = 0.0f; - float light = rw_light; - double xmagnitude = 1.0; - for (int x = x1; x < x2; x++, light += rw_lightstep) + for (int x = x1; x < x2; x++, light += lightstep) { int y1 = uwal[x]; int y2 = dwal[x]; @@ -391,44 +391,44 @@ namespace swrenderer if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - Draw1Column(x, y1, y2, sampler, drawcolumn); + Draw1Column(WallC, x, y1, y2, sampler, drawcolumn); } NetUpdate(); } - static void ProcessNormalWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessNormalWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol, &SWPixelFormatDrawers::DrawWallColumn); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, getcol, &SWPixelFormatDrawers::DrawWallColumn); } - static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessMaskedWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { if (!rw_pic->bMasked) // Textures that aren't masked can use the faster ProcessNormalWall. { - ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol); + ProcessNormalWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, getcol); } else { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); } } - static void ProcessTranslucentWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessTranslucentWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { DrawerFunc drawcol1 = R_GetTransMaskDrawer(); if (drawcol1 == nullptr) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. - ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol); + ProcessMaskedWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, getcol); } else { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol, drawcol1); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, getcol, drawcol1); } } - static void ProcessStripedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade) + static void ProcessStripedWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep) { FDynamicColormap *startcolormap = basecolormap; bool fogginess = foggy; @@ -452,7 +452,7 @@ namespace swrenderer { down[j] = clamp(most3[j], up[j], dwal[j]); } - ProcessNormalWall(x1, x2, up, down, swal, lwal, yrepeat, wallshade); + ProcessNormalWall(WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); up = down; down = (down == most1) ? most2 : most1; } @@ -462,32 +462,32 @@ namespace swrenderer wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); } - ProcessNormalWall(x1, x2, up, dwal, swal, lwal, yrepeat, wallshade); + ProcessNormalWall(WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); basecolormap = startcolormap; } - static void ProcessWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, bool mask) + static void ProcessWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask) { if (mask) { if (colfunc == basecolfunc) { - ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); + ProcessMaskedWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); } else { - ProcessTranslucentWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); + ProcessTranslucentWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); } } else { if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) { - ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); + ProcessNormalWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); } else { - ProcessStripedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade); + ProcessStripedWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); } } } @@ -503,7 +503,7 @@ namespace swrenderer // //============================================================================= - static void ProcessWallNP2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, bool mask) + static void ProcessWallNP2(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask) { short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; short *up, *down; @@ -530,14 +530,14 @@ namespace swrenderer { down[j] = clamp(most3[j], up[j], dwal[j]); } - ProcessWall(x1, x2, up, down, swal, lwal, yrepeat, wallshade, mask); + ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask); up = down; down = (down == most1) ? most2 : most1; } partition -= scaledtexheight; dc_texturemid -= texheight; } - ProcessWall(x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, mask); + ProcessWall(frontsector, curline, WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask); } else { // upside down: draw strips from bottom to top @@ -554,19 +554,20 @@ namespace swrenderer { up[j] = clamp(most3[j], uwal[j], down[j]); } - ProcessWall(x1, x2, up, down, swal, lwal, yrepeat, wallshade, mask); + ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask); down = up; up = (up == most1) ? most2 : most1; } partition -= scaledtexheight; dc_texturemid -= texheight; } - ProcessWall(x1, x2, uwal, down, swal, lwal, yrepeat, wallshade, mask); + ProcessWall(frontsector, curline, WallC, x1, x2, uwal, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask); } } - void R_DrawDrawSeg(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade) + void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep) { + rw_pic = pic; if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) { double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); @@ -583,31 +584,34 @@ namespace swrenderer { bot = MAX(bot, sclipBottom); } - ProcessWallNP2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, true); + ProcessWallNP2(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, xoffset, light, lightstep, true); } else { - ProcessWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, true); + ProcessWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, true); } } - void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, FLightNode *light_list) + void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list) { + rw_pic = pic; dc_light_list = light_list; if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) { - ProcessWallNP2(x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, wallshade, false); + ProcessWallNP2(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, wallshade, xoffset, light, lightstep, false); } else { - ProcessWall(x1, x2, walltop, wallbottom, swall, lwall, yscale, wallshade, false); + ProcessWall(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, wallshade, xoffset, light, lightstep, false); } dc_light_list = nullptr; } - void R_DrawSkySegment(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const BYTE *(*getcol)(FTexture *tex, int x)) + void R_DrawSkySegment(FTexture *pic, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, const uint8_t *(*getcol)(FTexture *tex, int x)) { - ProcessNormalWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, getcol); + rw_pic = pic; + FWallCoords wallC; // Not used. To do: don't use r_walldraw to draw the sky!! + ProcessNormalWall(wallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, getcol); } } diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index 8cc71f8df1..8ef23438f2 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -19,6 +19,7 @@ struct FLightNode; namespace swrenderer { struct drawseg_t; + struct FWallCoords; struct WallSampler { @@ -35,7 +36,7 @@ namespace swrenderer uint32_t height; }; - void R_DrawWallSegment(FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, FLightNode *light_list); - void R_DrawSkySegment(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, const uint8_t *(*getcol)(FTexture *tex, int col)); - void R_DrawDrawSeg(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade); + void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list); + void R_DrawSkySegment(FTexture *rw_pic, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, const uint8_t *(*getcol)(FTexture *tex, int col)); + void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep); } diff --git a/src/swrenderer/line/r_wallsetup.cpp b/src/swrenderer/line/r_wallsetup.cpp index 90ba7793a1..ee32c1bd0d 100644 --- a/src/swrenderer/line/r_wallsetup.cpp +++ b/src/swrenderer/line/r_wallsetup.cpp @@ -153,7 +153,7 @@ namespace swrenderer } } - void PrepWall(float *vstep, fixed_t *upos, double walxrepeat, int x1, int x2) + void PrepWall(float *vstep, fixed_t *upos, double walxrepeat, int x1, int x2, const FWallTmapVals &WallT) { float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - CenterX); float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - CenterX); @@ -191,7 +191,7 @@ namespace swrenderer } } - void PrepLWall(fixed_t *upos, double walxrepeat, int x1, int x2) + void PrepLWall(fixed_t *upos, double walxrepeat, int x1, int x2, const FWallTmapVals &WallT) { float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - CenterX); float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - CenterX); diff --git a/src/swrenderer/line/r_wallsetup.h b/src/swrenderer/line/r_wallsetup.h index 4e483601f7..5ceac6f65e 100644 --- a/src/swrenderer/line/r_wallsetup.h +++ b/src/swrenderer/line/r_wallsetup.h @@ -6,6 +6,7 @@ namespace swrenderer { struct FWallCoords; + struct FWallTmapVals; extern short walltop[MAXWIDTH]; extern short wallbottom[MAXWIDTH]; @@ -19,6 +20,6 @@ namespace swrenderer int R_CreateWallSegmentYSloped(short *outbuf, const secplane_t &plane, const FWallCoords *wallc, seg_t *line, bool xflip); int R_CreateWallSegmentY(short *outbuf, double z, const FWallCoords *wallc); - void PrepWall(float *swall, fixed_t *lwall, double walxrepeat, int x1, int x2); - void PrepLWall(fixed_t *lwall, double walxrepeat, int x1, int x2); + void PrepWall(float *swall, fixed_t *lwall, double walxrepeat, int x1, int x2, const FWallTmapVals &WallT); + void PrepLWall(fixed_t *lwall, double walxrepeat, int x1, int x2, const FWallTmapVals &WallT); } diff --git a/src/swrenderer/plane/r_fogboundary.cpp b/src/swrenderer/plane/r_fogboundary.cpp index 68ec3097dc..bef238125e 100644 --- a/src/swrenderer/plane/r_fogboundary.cpp +++ b/src/swrenderer/plane/r_fogboundary.cpp @@ -51,14 +51,13 @@ namespace swrenderer short spanend[MAXHEIGHT]; } - void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip, int wallshade) + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep) { // This is essentially the same as R_MapVisPlane but with an extra step // to create new horizontal spans whenever the light changes enough that // we need to use a new colormap. - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2 - x1 - 1); + float light = lightleft + lightstep*(x2 - x1 - 1); int x = x2 - 1; int t2 = uclip[x]; int b2 = dclip[x]; @@ -82,7 +81,7 @@ namespace swrenderer const int xr = x + 1; int stop; - light -= rw_lightstep; + light -= lightstep; lcolormap = GETPALOOKUP(light, wallshade); if (lcolormap != rcolormap) { diff --git a/src/swrenderer/plane/r_fogboundary.h b/src/swrenderer/plane/r_fogboundary.h index de872fd589..7eaff57b9a 100644 --- a/src/swrenderer/plane/r_fogboundary.h +++ b/src/swrenderer/plane/r_fogboundary.h @@ -17,6 +17,6 @@ namespace swrenderer { - void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip, int wallshade); + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep); void R_DrawFogBoundarySection(int y, int y2, int x1); } diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 1f391b331b..3cf4176c9d 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -453,10 +453,7 @@ namespace swrenderer lastskycol_bgra[x] = 0xffffffff; } - rw_pic = frontskytex; - rw_offset = 0; - - frontyScale = rw_pic->Scale.Y; + frontyScale = frontskytex->Scale.Y; dc_texturemid = skymid * frontyScale; if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) @@ -466,8 +463,8 @@ namespace swrenderer lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - R_DrawSkySegment(pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, - frontyScale, 0, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + R_DrawSkySegment(frontskytex, pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, + frontyScale, 0, 0, 0.0f, 0.0f, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); } else { // The texture does not tile nicely @@ -504,7 +501,7 @@ namespace swrenderer lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - R_DrawSkySegment(pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, 0, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + R_DrawSkySegment(frontskytex, pl->left, pl->right, top, bot, swall, lwall, frontskytex->Scale.Y, 0, 0, 0.0f, 0.0f, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); yl = yh; yh += drawheight; dc_texturemid = iscale * (centery - yl - 1); diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 48f9ea2324..1dbae64ba3 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -597,8 +597,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) } // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function PO_LinkToSubsectors(); - InSubsector = NULL; - R_RenderBSPNode(nodes + numnodes - 1); // The head node is the last node output. + R_RenderScene(); R_3D_ResetClip(); // reset clips (floor/ceiling) camera->renderflags = savedflags; WallCycles.Unclock(); diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 1d0fcfd50e..f7e871572d 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -59,8 +59,13 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - using namespace drawerargs; + namespace + { + subsector_t *InSubsector; + sector_t *frontsector; + SWRenderLine render_line; + } bool r_fakingunderwater; @@ -93,9 +98,7 @@ short ceilingclip[MAXWIDTH]; // killough 4/11/98, 4/13/98: fix bugs, add 'back' parameter // -sector_t *R_FakeFlat(sector_t *sec, sector_t *tempsec, - int *floorlightlevel, int *ceilinglightlevel, - bool back) +sector_t *R_FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, seg_t *backline, int backx1, int backx2, double frontcz1, double frontcz2) { // [RH] allow per-plane lighting if (floorlightlevel != NULL) @@ -182,13 +185,13 @@ sector_t *R_FakeFlat(sector_t *sec, sector_t *tempsec, // are underwater but not in a water sector themselves. // Only works if you cannot see the top surface of any deep water // sectors at the same time. - if (back && !r_fakingunderwater && curline->frontsector->heightsec == NULL) + if (backline && !r_fakingunderwater && backline->frontsector->heightsec == NULL) { - if (rw_frontcz1 <= s->floorplane.ZatPoint(curline->v1) && - rw_frontcz2 <= s->floorplane.ZatPoint(curline->v2)) + if (frontcz1 <= s->floorplane.ZatPoint(backline->v1) && + frontcz2 <= s->floorplane.ZatPoint(backline->v2)) { // Check that the window is actually visible - for (int z = WallC.sx1; z < WallC.sx2; ++z) + for (int z = backx1; z < backx2; ++z) { if (floorclip[z] > ceilingclip[z]) { @@ -211,7 +214,7 @@ sector_t *R_FakeFlat(sector_t *sec, sector_t *tempsec, } // killough 11/98: prevent sudden light changes from non-water sectors: - if ((underwater && !back) || doorunderwater) + if ((underwater && !backline) || doorunderwater) { // head-below-floor hack tempsec->SetTexture(sector_t::floor, diffTex ? sec->GetTexture(sector_t::floor) : s->GetTexture(sector_t::floor), false); tempsec->planes[sector_t::floor].xform = s->planes[sector_t::floor].xform; @@ -427,7 +430,7 @@ void R_FakeDrawLoop(subsector_t *sub) { if ((line->sidedef) && !(line->sidedef->Flags & WALLF_POLYOBJ)) { - R_AddLine (line); + render_line.R_AddLine (line, InSubsector, frontsector, nullptr); } line++; } @@ -489,8 +492,7 @@ void R_Subsector (subsector_t *sub) line = sub->firstline; // killough 3/8/98, 4/4/98: Deep water / fake ceiling effect - frontsector = R_FakeFlat(frontsector, &tempsec, &floorlightlevel, - &ceilinglightlevel, false); // killough 4/11/98 + frontsector = R_FakeFlat(frontsector, &tempsec, &floorlightlevel, &ceilinglightlevel, nullptr, 0, 0, 0, 0); fll = floorlightlevel; cll = ceilinglightlevel; @@ -757,20 +759,19 @@ void R_Subsector (subsector_t *sub) tempsec = *fakeFloor->model; tempsec.floorplane = *fakeFloor->top.plane; tempsec.ceilingplane = *fakeFloor->bottom.plane; - backsector = &tempsec; if (fakeFloor->validcount != validcount) { fakeFloor->validcount = validcount; R_3D_NewClip(); } - R_AddLine(line); // fake + render_line.R_AddLine(line, InSubsector, frontsector, &tempsec); // fake } fakeFloor = NULL; fake3D = 0; floorplane = backupfp; ceilingplane = backupcp; } - R_AddLine (line); // now real + render_line.R_AddLine(line, InSubsector, frontsector, nullptr); // now real } line++; } @@ -780,6 +781,12 @@ void R_Subsector (subsector_t *sub) } } +void R_RenderScene() +{ + InSubsector = nullptr; + R_RenderBSPNode(nodes + numnodes - 1); // The head node is the last node output. +} + // // RenderBSPNode // Renders all subsectors below a given node, traversing subtree recursively. diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index ef5d08db19..d45075ba78 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -49,10 +49,11 @@ enum extern int WindowLeft, WindowRight; extern WORD MirrorFlags; +void R_RenderScene(); void R_RenderBSPNode (void *node); // killough 4/13/98: fake floors/ceilings for deep water / fake ceilings: -sector_t *R_FakeFlat(sector_t *, sector_t *, int *, int *, bool); +sector_t *R_FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, seg_t *backline, int backx1, int backx2, double frontcz1, double frontcz2); extern visplane_t *floorplane; extern visplane_t *ceilingplane; diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 314891b61d..7d5a85f916 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -246,8 +246,7 @@ namespace swrenderer viewposStack.Push(ViewPos); visplaneStack.Push(pl); - InSubsector = nullptr; - R_RenderBSPNode(nodes + numnodes - 1); + R_RenderScene(); R_3D_ResetClip(); // reset clips (floor/ceiling) R_DrawPlanes(); @@ -466,8 +465,7 @@ namespace swrenderer memcpy(ceilingclip + pds->x1, &pds->ceilingclip[0], pds->len * sizeof(*ceilingclip)); memcpy(floorclip + pds->x1, &pds->floorclip[0], pds->len * sizeof(*floorclip)); - InSubsector = nullptr; - R_RenderBSPNode(nodes + numnodes - 1); + R_RenderScene(); R_3D_ResetClip(); // reset clips (floor/ceiling) if (!savedvisibility && camera) camera->renderflags &= ~RF_INVISIBLE; diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 311f5a867b..6d174ce751 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -51,6 +51,19 @@ namespace swrenderer namespace { size_t MaxDrawSegs; + + sector_t *frontsector; + sector_t *backsector; + + seg_t *curline; + + FWallCoords WallC; + FWallTmapVals WallT; + + float rw_light; + float rw_lightstep; + fixed_t rw_offset; + FTexture *rw_pic; } void R_FreeDrawSegs() @@ -145,7 +158,7 @@ namespace swrenderer } // killough 4/13/98: get correct lightlevel for 2s normal textures - sec = R_FakeFlat(frontsector, &tempsec, nullptr, nullptr, false); + sec = R_FakeFlat(frontsector, &tempsec, nullptr, nullptr, nullptr, 0, 0, 0, 0); basecolormap = sec->ColorMap; // [RH] Set basecolormap @@ -179,7 +192,7 @@ namespace swrenderer // [RH] Draw fog partition if (ds->bFogBoundary) { - R_DrawFogBoundary(x1, x2, mceilingclip, mfloorclip, wallshade); + R_DrawFogBoundary(x1, x2, mceilingclip, mfloorclip, wallshade, rw_light, rw_lightstep); if (ds->maskedtexturecol == -1) { goto clearfog; @@ -397,7 +410,7 @@ namespace swrenderer rw_offset = 0; rw_pic = tex; - R_DrawDrawSeg(ds, x1, x2, mceilingclip, mfloorclip, MaskedSWall, maskedtexturecol, ds->yscale, wallshade); + R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, mceilingclip, mfloorclip, MaskedSWall, maskedtexturecol, ds->yscale, wallshade, rw_offset, rw_light, rw_lightstep); } clearfog: @@ -522,8 +535,8 @@ namespace swrenderer walllower[i] = mfloorclip[i]; } - PrepLWall(lwall, curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2); - R_DrawDrawSeg(ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale, wallshade); + PrepLWall(lwall, curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2, WallT); + R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale, wallshade, rw_offset, rw_light, rw_lightstep); R_FinishSetPatchStyle(); } diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index f992cf13e5..19281d12b9 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -44,11 +44,11 @@ namespace swrenderer { - void R_RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade) + void R_RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC) { for (DBaseDecal *decal = sidedef->AttachedDecals; decal != NULL; decal = decal->WallNext) { - R_RenderDecal(sidedef, decal, draw_segment, wallshade, 0); + R_RenderDecal(sidedef, decal, draw_segment, wallshade, lightleft, lightstep, curline, wallC, 0); } } @@ -56,7 +56,7 @@ namespace swrenderer // = 1: drawing masked textures (including sprites) // Currently, only pass = 0 is done or used - void R_RenderDecal(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int wallshade, int pass) + void R_RenderDecal(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords WallC, int pass) { DVector2 decal_left, decal_right, decal_pos; int x1, x2; @@ -150,6 +150,7 @@ namespace swrenderer if (x1 >= clipper->x2 || x2 <= clipper->x1) goto done; + FWallTmapVals WallT; WallT.InitFromWallCoords(&WallC); // Get the top and bottom clipping arrays @@ -217,7 +218,7 @@ namespace swrenderer goto done; } - PrepWall(swall, lwall, WallSpriteTile->GetWidth(), x1, x2); + PrepWall(swall, lwall, WallSpriteTile->GetWidth(), x1, x2, WallT); if (flipx) { @@ -242,7 +243,7 @@ namespace swrenderer rereadcolormap = false; } - rw_light = rw_lightleft + (x1 - savecoord.sx1) * rw_lightstep; + float light = lightleft + (x1 - savecoord.sx1) * lightstep; if (fixedlightlev >= 0) R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) @@ -283,9 +284,10 @@ namespace swrenderer { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + R_SetColorMapLight(usecolormap, light, wallshade); } - R_WallSpriteColumn(x, maskedScaleY); + R_DecalColumn(x, maskedScaleY); + light += lightstep; x++; } } @@ -304,4 +306,22 @@ namespace swrenderer done: WallC = savecoord; } + + void R_DecalColumn(int x, float maskedScaleY) + { + using namespace drawerargs; + + dc_x = x; + + float iscale = swall[dc_x] * maskedScaleY; + dc_iscale = FLOAT2FIXED(iscale); + spryscale = 1 / iscale; + if (sprflipvert) + sprtopscreen = CenterY + dc_texturemid * spryscale; + else + sprtopscreen = CenterY - dc_texturemid * spryscale; + + dc_texturefrac = 0; + R_DrawMaskedColumn(WallSpriteTile, lwall[dc_x]); + } } diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 74871c4b04..7bd4147fa0 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -20,6 +20,7 @@ namespace swrenderer { struct drawseg_t; - void R_RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade); - void R_RenderDecal(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, int pass); + void R_RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC); + void R_RenderDecal(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, int pass); + void R_DecalColumn(int x, float maskedScaleY); } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 22ae8e99c1..5dad94c707 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -123,8 +123,7 @@ namespace swrenderer else { // This used to use camera->Sector but due to interpolation that can be incorrect // when the interpolated viewpoint is in a different sector than the camera. - sec = R_FakeFlat(viewsector, &tempsec, &floorlight, - &ceilinglight, false); + sec = R_FakeFlat(viewsector, &tempsec, &floorlight, &ceilinglight, nullptr, 0, 0, 0, 0); // [RH] set basecolormap basecolormap = sec->ColorMap; diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 4073917bb7..b4c60cb95d 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -141,8 +141,9 @@ namespace swrenderer x2 = MIN(spr->x2, spr->wallc.sx2); if (x1 >= x2) return; + FWallTmapVals WallT; WallT.InitFromWallCoords(&spr->wallc); - PrepWall(swall, lwall, spr->pic->GetWidth() << FRACBITS, x1, x2); + PrepWall(swall, lwall, spr->pic->GetWidth() << FRACBITS, x1, x2, WallT); iyscale = 1 / spr->yscale; dc_texturemid = (spr->gzt - ViewPos.Z) * iyscale; if (spr->renderflags & RF_XFLIP) @@ -168,9 +169,9 @@ namespace swrenderer int shade = LIGHT2SHADE(spr->sector->lightlevel + r_actualextralight); GlobVis = r_WallVisibility; - rw_lightleft = float(GlobVis / spr->wallc.sz1); - rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); - rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; + float lightleft = float(GlobVis / spr->wallc.sz1); + float lightstep = float((GlobVis / spr->wallc.sz2 - lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); + float light = lightleft + (x1 - spr->wallc.sx1) * lightstep; if (fixedlightlev >= 0) R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) @@ -215,10 +216,11 @@ namespace swrenderer { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); + R_SetColorMapLight(usecolormap, light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(x, maskedScaleY); + light += lightstep; x++; } } @@ -241,6 +243,5 @@ namespace swrenderer dc_texturefrac = 0; R_DrawMaskedColumn(WallSpriteTile, lwall[dc_x]); - rw_light += rw_lightstep; } } From 863f17ada96940bb47ca3d3f8d62ef49a708be19 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 3 Jan 2017 19:08:02 +0100 Subject: [PATCH 634/912] Fix function names --- src/swrenderer/line/r_line.cpp | 20 ++++++++++---------- src/swrenderer/line/r_line.h | 12 ++++++------ src/swrenderer/scene/r_bsp.cpp | 6 +++--- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index c88e17a97f..18db1752fb 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -51,7 +51,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void SWRenderLine::R_AddLine(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector) + void SWRenderLine::Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector) { static sector_t tempsec; // killough 3/8/98: ceiling/water hack bool solid; @@ -205,7 +205,7 @@ namespace swrenderer // Window. solid = false; } - else if (R_SkyboxCompare(frontsector, backsector)) + else if (SkyboxCompare(frontsector, backsector)) { solid = false; } @@ -288,7 +288,7 @@ namespace swrenderer static SWRenderLine *self = this; bool visible = R_ClipWallSegment(WallC.sx1, WallC.sx2, solid, [](int x1, int x2) -> bool { - return self->R_StoreWallRange(x1, x2); + return self->RenderWallSegment(x1, x2); }); if (visible) @@ -297,7 +297,7 @@ namespace swrenderer } } - bool SWRenderLine::R_SkyboxCompare(sector_t *frontsector, sector_t *backsector) + bool SWRenderLine::SkyboxCompare(sector_t *frontsector, sector_t *backsector) const { FSectorPortal *frontc = frontsector->GetPortal(sector_t::ceiling); FSectorPortal *frontf = frontsector->GetPortal(sector_t::floor); @@ -315,7 +315,7 @@ namespace swrenderer } // A wall segment will be drawn between start and stop pixels (inclusive). - bool SWRenderLine::R_StoreWallRange(int start, int stop) + bool SWRenderLine::RenderWallSegment(int start, int stop) { int i; bool maskedtexture = false; @@ -330,7 +330,7 @@ namespace swrenderer if (!rw_prepped) { rw_prepped = true; - R_NewWall(true); + SetWallVariables(true); } rw_offset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); @@ -547,7 +547,7 @@ namespace swrenderer } } - R_RenderSegLoop(start, stop); + RenderWallSegmentTextures(start, stop); if (fake3D & 7) { return !(fake3D & FAKE3D_FAKEMASK); @@ -610,7 +610,7 @@ namespace swrenderer return !(fake3D & FAKE3D_FAKEMASK); } - void SWRenderLine::R_NewWall(bool needlights) + void SWRenderLine::SetWallVariables(bool needlights) { double rowoffset; double yrepeat; @@ -936,7 +936,7 @@ namespace swrenderer } } - bool SWRenderLine::IsFogBoundary(sector_t *front, sector_t *back) + bool SWRenderLine::IsFogBoundary(sector_t *front, sector_t *back) const { return r_fogboundary && fixedcolormap == NULL && front->ColorMap->Fade && front->ColorMap->Fade != back->ColorMap->Fade && @@ -945,7 +945,7 @@ namespace swrenderer // Draws zero, one, or two textures for walls. // Can draw or mark the starting pixel of floor and ceiling textures. - void SWRenderLine::R_RenderSegLoop(int x1, int x2) + void SWRenderLine::RenderWallSegmentTextures(int x1, int x2) { int x; double xscale; diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index fd245871df..7c677e072d 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -38,15 +38,15 @@ namespace swrenderer class SWRenderLine { public: - void R_AddLine(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector); + void Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector); private: - bool R_StoreWallRange(int start, int stop); - void R_NewWall(bool needlights); - void R_RenderSegLoop(int x1, int x2); + bool RenderWallSegment(int x1, int x2); + void SetWallVariables(bool needlights); + void RenderWallSegmentTextures(int x1, int x2); - bool IsFogBoundary(sector_t *front, sector_t *back); - bool R_SkyboxCompare(sector_t *frontsector, sector_t *backsector); + bool IsFogBoundary(sector_t *front, sector_t *back) const; + bool SkyboxCompare(sector_t *frontsector, sector_t *backsector) const; subsector_t *InSubsector; sector_t *frontsector; diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index f7e871572d..20b04ebaff 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -430,7 +430,7 @@ void R_FakeDrawLoop(subsector_t *sub) { if ((line->sidedef) && !(line->sidedef->Flags & WALLF_POLYOBJ)) { - render_line.R_AddLine (line, InSubsector, frontsector, nullptr); + render_line.Render(line, InSubsector, frontsector, nullptr); } line++; } @@ -764,14 +764,14 @@ void R_Subsector (subsector_t *sub) fakeFloor->validcount = validcount; R_3D_NewClip(); } - render_line.R_AddLine(line, InSubsector, frontsector, &tempsec); // fake + render_line.Render(line, InSubsector, frontsector, &tempsec); // fake } fakeFloor = NULL; fake3D = 0; floorplane = backupfp; ceilingplane = backupcp; } - render_line.R_AddLine(line, InSubsector, frontsector, nullptr); // now real + render_line.Render(line, InSubsector, frontsector, nullptr); // now real } line++; } From e25645df46bc78ff7318b43c4fb8a47b79c39d1a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 3 Jan 2017 19:16:37 +0100 Subject: [PATCH 635/912] Move r_fogboundary to line folder --- src/CMakeLists.txt | 2 +- src/swrenderer/{plane => line}/r_fogboundary.cpp | 10 +++++----- src/swrenderer/{plane => line}/r_fogboundary.h | 2 -- src/swrenderer/segments/r_drawsegment.cpp | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) rename src/swrenderer/{plane => line}/r_fogboundary.cpp (98%) rename src/swrenderer/{plane => line}/r_fogboundary.h (95%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d3eea4bab5..7610d957cb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -852,6 +852,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/line/r_line.cpp swrenderer/line/r_walldraw.cpp swrenderer/line/r_wallsetup.cpp + swrenderer/line/r_fogboundary.cpp swrenderer/segments/r_clipsegment.cpp swrenderer/segments/r_drawsegment.cpp swrenderer/segments/r_portalsegment.cpp @@ -865,7 +866,6 @@ set( FASTMATH_PCH_SOURCES swrenderer/plane/r_skyplane.cpp swrenderer/plane/r_flatplane.cpp swrenderer/plane/r_slopeplane.cpp - swrenderer/plane/r_fogboundary.cpp polyrenderer/poly_renderer.cpp polyrenderer/scene/poly_scene.cpp polyrenderer/scene/poly_portal.cpp diff --git a/src/swrenderer/plane/r_fogboundary.cpp b/src/swrenderer/line/r_fogboundary.cpp similarity index 98% rename from src/swrenderer/plane/r_fogboundary.cpp rename to src/swrenderer/line/r_fogboundary.cpp index bef238125e..f5aff03529 100644 --- a/src/swrenderer/plane/r_fogboundary.cpp +++ b/src/swrenderer/line/r_fogboundary.cpp @@ -28,16 +28,16 @@ #include "cmdlib.h" #include "d_net.h" #include "g_level.h" -#include "swrenderer/scene/r_bsp.h" -#include "swrenderer/plane/r_fogboundary.h" -#include "swrenderer/scene/r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "swrenderer/drawers/r_draw_rgba.h" #include "gl/dynlights/gl_dynlight.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_portal.h" #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" -#include "swrenderer/scene/r_portal.h" +#include "swrenderer/line/r_fogboundary.h" #include "swrenderer/r_memory.h" #ifdef _MSC_VER diff --git a/src/swrenderer/plane/r_fogboundary.h b/src/swrenderer/line/r_fogboundary.h similarity index 95% rename from src/swrenderer/plane/r_fogboundary.h rename to src/swrenderer/line/r_fogboundary.h index 7eaff57b9a..bb7d267241 100644 --- a/src/swrenderer/plane/r_fogboundary.h +++ b/src/swrenderer/line/r_fogboundary.h @@ -13,8 +13,6 @@ #pragma once -#include "r_visibleplane.h" - namespace swrenderer { void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep); diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 6d174ce751..633eae15bb 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -36,7 +36,7 @@ #include "swrenderer/scene/r_bsp.h" #include "swrenderer/line/r_wallsetup.h" #include "swrenderer/line/r_walldraw.h" -#include "swrenderer/plane/r_fogboundary.h" +#include "swrenderer/line/r_fogboundary.h" #include "swrenderer/segments/r_drawsegment.h" namespace swrenderer From abdc7f9ff11a044186056feca1f7ae4e975399a6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 3 Jan 2017 19:25:00 +0100 Subject: [PATCH 636/912] Move WindowLeft, WindowRight, MirrorFlags to r_portal --- src/swrenderer/line/r_walldraw.cpp | 1 + src/swrenderer/scene/r_bsp.cpp | 3 --- src/swrenderer/scene/r_bsp.h | 3 --- src/swrenderer/scene/r_portal.cpp | 3 +++ src/swrenderer/scene/r_portal.h | 3 +++ src/swrenderer/segments/r_drawsegment.cpp | 1 + 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 26f8e923d1..e486ffbcb7 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -44,6 +44,7 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_bsp.h" #include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_portal.h" #include "swrenderer/line/r_walldraw.h" #include "swrenderer/line/r_wallsetup.h" diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 20b04ebaff..dc7747ec35 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -71,9 +71,6 @@ bool r_fakingunderwater; static BYTE FakeSide; -int WindowLeft, WindowRight; -WORD MirrorFlags; - visplane_t *floorplane; visplane_t *ceilingplane; diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index d45075ba78..e39b760955 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -46,9 +46,6 @@ enum FAKED_AboveCeiling }; -extern int WindowLeft, WindowRight; -extern WORD MirrorFlags; - void R_RenderScene(); void R_RenderBSPNode (void *node); diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 7d5a85f916..dd8342a683 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -62,6 +62,9 @@ CVAR(Bool, r_skyboxes, true, 0) namespace swrenderer { + int WindowLeft, WindowRight; + uint16_t MirrorFlags; + PortalDrawseg *CurrentPortal = nullptr; int CurrentPortalUniq = 0; bool CurrentPortalInSkybox = false; diff --git a/src/swrenderer/scene/r_portal.h b/src/swrenderer/scene/r_portal.h index e31cc9f2fb..f7b1fb74c4 100644 --- a/src/swrenderer/scene/r_portal.h +++ b/src/swrenderer/scene/r_portal.h @@ -17,6 +17,9 @@ namespace swrenderer { + extern int WindowLeft, WindowRight; + extern uint16_t MirrorFlags; + extern PortalDrawseg* CurrentPortal; extern int CurrentPortalUniq; extern bool CurrentPortalInSkybox; diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 633eae15bb..ca20dd0dbf 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -34,6 +34,7 @@ #include "swrenderer/scene/r_things.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_portal.h" #include "swrenderer/line/r_wallsetup.h" #include "swrenderer/line/r_walldraw.h" #include "swrenderer/line/r_fogboundary.h" From ca9523acef72d45b18f803b959d1f986d095f66b Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Tue, 3 Jan 2017 09:45:53 +0200 Subject: [PATCH 637/912] Fixed compilation with GCC/Clang Fixes #175 --- src/tarray.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tarray.h b/src/tarray.h index ca24649677..e40b88a7be 100644 --- a/src/tarray.h +++ b/src/tarray.h @@ -550,8 +550,8 @@ public: //////// TStaticArray() { - Count = 0; - Array = NULL; + this->Count = 0; + this->Array = NULL; } // This is not supposed to be copyable. TStaticArray(const TStaticArray &other) = delete; @@ -562,13 +562,13 @@ public: } void Clear() { - if (Array) delete[] Array; + if (this->Array) delete[] this->Array; } void Alloc(unsigned int amount) { Clear(); - Array = new T[amount]; - Count = Amount; + this->Array = new T[amount]; + this->Count = amount; } }; From f30b2ca80dd27203e3b3c677252940f71b7e3664 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 3 Jan 2017 12:00:26 +0100 Subject: [PATCH 638/912] - disabled OpenGL 3.0 on the Open Source Mesa driver for Linux because it appears to be broken. --- src/gl/system/gl_interface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gl/system/gl_interface.cpp b/src/gl/system/gl_interface.cpp index b0bc46ab27..f1babfb7f4 100644 --- a/src/gl/system/gl_interface.cpp +++ b/src/gl/system/gl_interface.cpp @@ -168,7 +168,7 @@ void gl_LoadExtensions() } // The minimum requirement for the modern render path are GL 3.0 + uniform buffers - if (gl_version < 3.0f || (gl_version < 3.1f && !CheckExtension("GL_ARB_uniform_buffer_object"))) + if (gl_version < 3.0f || (gl_version < 3.1f && !CheckExtension("GL_ARB_uniform_buffer_object") && strstr(gl.vendorstring, "X.Org") == nullptr)) { gl.legacyMode = true; gl.lightmethod = LM_LEGACY; From 37dab4a12cbe04b7df4503930f474f9eecf399dd Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 3 Jan 2017 16:00:25 +0100 Subject: [PATCH 639/912] - fixed: 'out' parameters must always allocate an address register, regardless of type. --- src/scripting/vm/vmbuilder.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/scripting/vm/vmbuilder.cpp b/src/scripting/vm/vmbuilder.cpp index 3e833af79c..4886dba8c5 100644 --- a/src/scripting/vm/vmbuilder.cpp +++ b/src/scripting/vm/vmbuilder.cpp @@ -857,14 +857,15 @@ void FFunctionBuildList::Build() // Allocate registers for the function's arguments and create local variable nodes before starting to resolve it. VMFunctionBuilder buildit(item.Func->GetImplicitArgs()); - for(unsigned i=0;iVariants[0].Proto->ArgumentTypes.Size();i++) + for (unsigned i = 0; i < item.Func->Variants[0].Proto->ArgumentTypes.Size(); i++) { auto type = item.Func->Variants[0].Proto->ArgumentTypes[i]; auto name = item.Func->Variants[0].ArgNames[i]; auto flags = item.Func->Variants[0].ArgFlags[i]; // this won't get resolved and won't get emitted. It is only needed so that the code generator can retrieve the necessary info about this argument to do its work. - auto local = new FxLocalVariableDeclaration(type, name, nullptr, flags, FScriptPosition()); - local->RegNum = buildit.Registers[type->GetRegType()].Get(type->GetRegCount()); + auto local = new FxLocalVariableDeclaration(type, name, nullptr, flags, FScriptPosition()); + if (!(flags & VARF_Out)) local->RegNum = buildit.Registers[type->GetRegType()].Get(type->GetRegCount()); + else local->RegNum = buildit.Registers[REGT_POINTER].Get(1); ctx.FunctionArgs.Push(local); } From 105f877e671b716a6e8158c69fa6b9b5a71f432f Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 3 Jan 2017 17:16:33 +0100 Subject: [PATCH 640/912] - fixed a few Freedoom light definitions. --- .../static/filter/doom.freedoom/gldefs.txt | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/wadsrc_lights/static/filter/doom.freedoom/gldefs.txt b/wadsrc_lights/static/filter/doom.freedoom/gldefs.txt index 1b501ca8e1..19aa9d0fde 100644 --- a/wadsrc_lights/static/filter/doom.freedoom/gldefs.txt +++ b/wadsrc_lights/static/filter/doom.freedoom/gldefs.txt @@ -337,7 +337,7 @@ object ExplosiveBarrel // Floor lamp pointlight LAMP { - color 1.0 1.0 0.8 + color 0.6 1.0 0.6 size 84 offset 0 44 0 attenuate 1 @@ -351,7 +351,7 @@ object Column // Short tech lamp pulselight SMALLLAMP { - color 0.8 0.8 1.0 + color 1.0 1.0 0.7 size 84 secondarySize 87 interval 0.4 @@ -367,7 +367,7 @@ object TechLamp2 // Tall tech lamp pulselight BIGLAMP { - color 0.8 0.8 1.0 + color 1.0 1.0 0.7 size 96 secondarySize 99 interval 0.4 @@ -542,6 +542,22 @@ object SoulSphere frame SOUL { light SOULSPHERE } } +// Soul Sphere +pulselight MEGASPHERE +{ + color 0.4 1.0 0.4 + size 60 + secondarySize 63 + interval 2.0 + offset 0 16 0 + attenuate 1 +} + +object MegaSphere +{ + frame MEGA { light MEGASPHERE } +} + // Invulnerability Sphere pulselight INVULN { @@ -609,22 +625,6 @@ object BlurSphere frame PINSD { light BLURSPHERE5 } } -// Health Potion -pulselight HEALTHPOTION -{ - color 0.0 0.0 0.6 - size 24 - secondarySize 27 - interval 2.0 - attenuate 1 - offset 0 10 0 -} - -object HealthBonus -{ - frame BON1 { light HEALTHPOTION } -} - // Armour Helmet pulselight ARMORBONUS { From 3fec305ca22b1334292ac450c03cca103985c5b3 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 3 Jan 2017 18:31:01 +0100 Subject: [PATCH 641/912] - completed attenuation for Heretic. A few items were intentionally left unattenuated. --- .../static/filter/heretic/gldefs.txt | 408 +++++++++++------- 1 file changed, 253 insertions(+), 155 deletions(-) diff --git a/wadsrc_lights/static/filter/heretic/gldefs.txt b/wadsrc_lights/static/filter/heretic/gldefs.txt index ecfbc18ce3..85a2fd6e29 100644 --- a/wadsrc_lights/static/filter/heretic/gldefs.txt +++ b/wadsrc_lights/static/filter/heretic/gldefs.txt @@ -823,25 +823,28 @@ object RedAxe flickerlight DISCIPLEBALL { color 1.0 0.5 1.0 - size 32 - secondarySize 40 + size 48 + secondarySize 60 chance 0.3 + attenuate 1 } flickerlight DISCIPLEBALL_X1 { color 0.7 0.3 0.7 - size 16 - secondarySize 24 + size 24 + secondarySize 36 chance 0.3 + attenuate 1 } flickerlight DISCIPLEBALL_X2 { color 0.3 0.17 0.3 - size 8 - secondarySize 16 + size 12 + secondarySize 24 chance 0.3 + attenuate 1 } object WizardFX1 @@ -860,37 +863,41 @@ object WizardFX1 flickerlight IRONLICH1 { color 1.0 0.4 0.0 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 offset 0 40 0 + attenuate 1 } flickerlight IRONLICH2 { color 1.0 0.7 0.0 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.3 offset 0 44 0 + attenuate 1 } flickerlight IRONLICH3 { color 0.8 0.4 0.0 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 offset 0 48 0 + attenuate 1 } flickerlight IRONLICH4 { color 0.4 0.0 0.0 - size 48 - secondarySize 56 + size 72 + secondarySize 84 chance 0.3 offset 0 40 0 + attenuate 1 } object Ironlich @@ -905,31 +912,36 @@ object Ironlich pointlight FROSTBALL { color 0.4 0.4 1.0 - size 48 + size 72 + attenuate 1 } pointlight FROSTBALL_X1 { color 0.4 0.4 1.0 - size 64 + size 96 + attenuate 1 } pointlight FROSTBALL_X2 { color 0.2 0.2 0.7 - size 56 + size 84 + attenuate 1 } pointlight FROSTBALL_X3 { color 0.1 0.1 0.4 - size 48 + size 72 + attenuate 1 } pointlight FROSTBALL_X4 { color 0.0 0.0 0.2 - size 40 + size 60 + attenuate 1 } object HeadFX1 @@ -948,7 +960,8 @@ object HeadFX1 pointlight FROSTSHARD { color 0.0 0.0 0.5 - size 32 + size 48 + attenuate 1 } object HeadFX2 @@ -962,41 +975,46 @@ object HeadFX2 flickerlight LICHFIRE { color 1.0 0.7 0.4 - size 48 - secondarySize 56 + size 72 + secondarySize 84 chance 0.5 + attenuate 1 } flickerlight LICHFIRE_X1 { color 0.9 0.4 0.3 - size 56 - secondarySize 64 + size 84 + secondarySize 96 chance 0.5 + attenuate 1 } flickerlight LICHFIRE_X2 { color 0.7 0.1 0.2 - size 48 - secondarySize 56 + size 72 + secondarySize 84 chance 0.5 + attenuate 1 } flickerlight LICHFIRE_X3 { color 0.4 0.1 0.1 - size 40 - secondarySize 48 + size 60 + secondarySize 72 chance 0.5 + attenuate 1 } flickerlight LICHFIRE_X4 { color 0.2 0.0 0.0 - size 32 - secondarySize 40 + size 48 + secondarySize 60 chance 0.5 + attenuate 1 } object HeadFX3 @@ -1015,33 +1033,37 @@ object HeadFX3 flickerlight CLINK_X1 { color 1.0 0.8 0.0 - size 40 - secondarySize 48 + size 60 + secondarySize 72 chance 0.5 + attenuate 1 } flickerlight CLINK_X2 { color 1.0 0.6 0.0 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.5 + attenuate 1 } flickerlight CLINK_X3 { color 0.6 0.3 0.0 - size 56 - secondarySize 64 + size 88 + secondarySize 96 chance 0.5 + attenuate 1 } flickerlight CLINK_X4 { color 0.3 0.0 0.0 - size 48 - secondarySize 56 + size 72 + secondarySize 84 chance 0.5 + attenuate 1 } object Clink @@ -1056,10 +1078,11 @@ object Clink flickerlight BEASTATK { color 1.0 0.7 0.0 - size 56 - secondarySize 64 + size 84 + secondarySize 96 chance 0.5 offset 0 48 0 + attenuate 1 } object Beast @@ -1071,41 +1094,46 @@ object Beast flickerlight BEASTBALL { color 1.0 0.5 0.3 - size 48 - secondarySize 56 + size 72 + secondarySize 88 chance 0.3 + attenuate 1 } flickerlight BEASTBALL_X1 { color 0.8 0.4 0.24 - size 48 - secondarySize 56 + size 72 + secondarySize 84 chance 0.3 + attenuate 1 } flickerlight BEASTBALL_X2 { color 0.6 0.3 0.2 - size 40 - secondarySize 48 + size 60 + secondarySize 72 chance 0.3 + attenuate 1 } flickerlight BEASTBALL_X3 { color 0.4 0.2 0.1 - size 32 - secondarySize 40 + size 48 + secondarySize 60 chance 0.3 + attenuate 1 } flickerlight BEASTBALL_X4 { color 0.2 0.0 0.0 - size 28 - secondarySize 32 + size 42 + secondarySize 48 chance 0.3 + attenuate 1 } object BeastBall @@ -1124,39 +1152,44 @@ object BeastBall pointlight SNAKESHOT1 { color 0.5 0.3 1.0 - size 24 + size 36 + attenuate 1 } flickerlight SNAKESHOT1_X1 { color 0.5 0.3 1.0 - size 24 - secondarySize 26 + size 36 + secondarySize 39 chance 0.3 + attenuate 1 } flickerlight SNAKESHOT1_X2 { color 0.4 0.2 0.7 - size 24 - secondarySize 26 + size 36 + secondarySize 39 chance 0.3 + attenuate 1 } flickerlight SNAKESHOT1_X3 { color 0.3 0.1 0.5 - size 28 - secondarySize 30 + size 42 + secondarySize 45 chance 0.3 + attenuate 1 } flickerlight SNAKESHOT1_X4 { color 0.3 0.0 0.3 - size 26 - secondarySize 28 + size 39 + secondarySize 42 chance 0.3 + attenuate 1 } object SnakeProjA @@ -1176,31 +1209,35 @@ object SnakeProjA pointlight SNAKESHOT2 { color 1.0 0.6 0.4 - size 32 + size 48 + attenuate 1 } flickerlight SNAKESHOT2_X1 { color 1.0 0.6 0.4 - size 40 - secondarySize 48 + size 60 + secondarySize 72 chance 0.3 + attenuate 1 } flickerlight SNAKESHOT2_X2 { color 0.6 0.3 0.25 - size 48 - secondarySize 52 + size 72 + secondarySize 88 chance 0.3 + attenuate 1 } flickerlight SNAKESHOT2_X3 { color 0.3 0.1 0.1 - size 44 - secondarySize 48 + size 66 + secondarySize 72 chance 0.3 + attenuate 1 } object SnakeProjB @@ -1217,41 +1254,46 @@ object SnakeProjB flickerlight MAULBALL { color 1.0 0.7 0.5 - size 40 - secondarySize 48 + size 60 + secondarySize 72 chance 0.5 + attenuate 1 } flickerlight MAULBALL_X1 { color 0.8 0.6 0.4 - size 56 - secondarySize 64 + size 88 + secondarySize 96 chance 0.3 + attenuate 1 } flickerlight MAULBALL_X2 { color 0.8 0.4 0.3 - size 56 - secondarySize 64 + size 88 + secondarySize 96 chance 0.3 + attenuate 1 } flickerlight MAULBALL_X3 { color 0.6 0.3 0.2 - size 40 - secondarySize 48 + size 60 + secondarySize 72 chance 0.3 + attenuate 1 } flickerlight MAULBALL_X4 { color 0.4 0.1 0.0 - size 32 - secondarySize 40 + size 48 + secondarySize 60 chance 0.3 + attenuate 1 } object MinotaurFX1 @@ -1272,48 +1314,60 @@ pulselight MAULFLAME { color 1.0 0.7 0.5 size 1 - secondarySize 64 + secondarySize 96 interval 6.0 + offset 0 10 0 + attenuate 1 } flickerlight MAULFLAME_X1 { color 1.0 0.7 0.5 - size 48 - secondarySize 56 + size 72 + secondarySize 88 chance 0.3 + offset 0 10 0 + attenuate 1 } flickerlight MAULFLAME_X2 { color 1.0 0.7 0.5 - size 56 - secondarySize 64 + size 88 + secondarySize 96 chance 0.3 + offset 0 10 0 + attenuate 1 } flickerlight MAULFLAME_X3 { color 0.7 0.4 0.3 - size 64 - secondarySize 68 + size 96 + secondarySize 101 chance 0.3 + offset 0 10 0 + attenuate 1 } flickerlight MAULFLAME_X4 { color 0.5 0.3 0.1 - size 68 - secondarySize 72 + size 102 + secondarySize 108 chance 0.3 + offset 0 10 0 + attenuate 1 } flickerlight MAULFLAME_X5 { color 0.2 0.0 0.0 - size 72 - secondarySize 76 + size 108 + secondarySize 114 chance 0.3 + offset 0 10 0 + attenuate 1 } object MinotaurFX3 @@ -1337,41 +1391,46 @@ object MinotaurFX3 flickerlight SERPENTBALL { color 1.0 0.95 0.5 - size 56 - secondarySize 64 + size 588 + secondarySize 96 chance 0.5 + attenuate 1 } flickerlight SERPENTBALL_X1 { color 1.0 0.95 0.5 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.5 + attenuate 1 } flickerlight SERPENTBALL_X2 { color 0.8 0.8 0.4 - size 72 - secondarySize 80 + size 108 + secondarySize 120 chance 0.5 + attenuate 1 } flickerlight SERPENTBALL_X3 { color 0.5 0.5 0.25 - size 88 - secondarySize 96 + size 132 + secondarySize 144 chance 0.5 + attenuate 1 } flickerlight SERPENTBALL_X4 { color 0.2 0.2 0.1 - size 96 - secondarySize 104 + size 144 + secondarySize 156 chance 0.5 + attenuate 1 } object SorcererFX1 @@ -1391,49 +1450,55 @@ object SorcererFX1 flickerlight DSPARILBALL { color 0.5 0.5 1.0 - size 56 - secondarySize 64 + size 88 + secondarySize 96 chance 0.5 + attenuate 1 } flickerlight DSPARILBALL_X1 { color 0.5 0.5 1.0 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 + attenuate 1 } flickerlight DSPARILBALL_X2 { color 0.4 0.4 0.8 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.3 + attenuate 1 } flickerlight DSPARILBALL_X3 { color 0.3 0.3 0.6 - size 88 - secondarySize 92 + size 132 + secondarySize 138 chance 0.3 + attenuate 1 } flickerlight DSPARILBALL_X4 { color 0.2 0.2 0.4 - size 82 - secondarySize 86 + size 123 + secondarySize 138 chance 0.3 + attenuate 1 } flickerlight DSPARILBALL_X5 { color 0.1 0.1 0.2 - size 82 - secondarySize 86 + size 123 + secondarySize 138 chance 0.3 + attenuate 1 } object Sorcerer2FX1 @@ -1454,9 +1519,10 @@ object Sorcerer2FX1 flickerlight DSPARILATK { color 0.3 0.3 1.0 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.5 + attenuate 1 } object Sorcerer2 @@ -1489,10 +1555,11 @@ object WallTorch flickerlight2 FIREBRAZ { color 1.0 0.8 0.0 - size 68 - secondarySize 76 + size 102 + secondarySize 114 interval 0.1 offset 0 48 0 + attenuate 1 } object FireBrazier @@ -1504,10 +1571,11 @@ object FireBrazier flickerlight2 SERPTORCH { color 1.0 0.8 0.0 - size 48 - secondarySize 56 + size 72 + secondarySize 88 interval 0.1 offset 0 48 0 + attenuate 1 } object SerpentTorch @@ -1519,9 +1587,10 @@ object SerpentTorch flickerlight2 CHANDELIER { color 1.0 1.0 0.0 - size 64 - secondarySize 68 + size 96 + secondarySize 102 interval 0.1 + attenuate 1 } object Chandelier @@ -1533,33 +1602,41 @@ object Chandelier flickerlight POD_X1 { color 0.0 1.0 0.0 - size 48 - secondarySize 56 + size 72 + secondarySize 88 chance 0.3 + offset 0 30 0 + attenuate 1 } flickerlight POD_X2 { color 0.0 0.7 0.0 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 + offset 0 30 0 + attenuate 1 } flickerlight POD_X3 { color 0.0 0.4 0.0 - size 72 - secondarySize 80 + size 108 + secondarySize 120 chance 0.3 + offset 0 30 0 + attenuate 1 } flickerlight POD_X4 { color 0.0 0.2 0.0 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.3 + offset 0 30 0 + attenuate 1 } object Pod @@ -1574,9 +1651,10 @@ object Pod flickerlight VOLCANOBALL1 { color 1.0 0.7 0.5 - size 56 - secondarySize 64 + size 88 + secondarySize 96 chance 0.5 + attenuate 1 } object VolcanoBlast @@ -1588,9 +1666,10 @@ object VolcanoBlast flickerlight VOLCANOBALL1 { color 1.0 0.5 0.0 - size 40 - secondarySize 48 + size 60 + secondarySize 72 chance 0.5 + attenuate 1 } object VolcanoTBlast @@ -1602,8 +1681,9 @@ object VolcanoTBlast pointlight BLUESTATUE { color 0.0 0.0 1.0 - size 32 + size 48 offset 0 64 0 + attenuate 1 } object KeyGizmoBlue @@ -1615,8 +1695,9 @@ object KeyGizmoBlue pointlight YELLOWSTATUE { color 1.0 1.0 0.0 - size 32 + size 48 offset 0 64 0 + attenuate 1 } object KeyGizmoYellow @@ -1628,8 +1709,9 @@ object KeyGizmoYellow pointlight GREENSTATUE { color 0.0 1.0 0.0 - size 32 + size 48 offset 0 64 0 + attenuate 1 } object KeyGizmoGreen @@ -1645,41 +1727,46 @@ object KeyGizmoGreen flickerlight TIMEBOMB_X1 { color 1.0 0.6 0.4 - size 48 - secondarySize 56 + size 72 + secondarySize 88 chance 0.3 + attenuate 1 } flickerlight TIMEBOMB_X1 { color 0.8 0.4 0.3 - size 56 - secondarySize 64 + size 88 + secondarySize 96 chance 0.3 + attenuate 1 } flickerlight TIMEBOMB_X1 { color 0.6 0.3 0.2 - size 64 - secondarySize 72 + size 96 + secondarySize 108 chance 0.3 + attenuate 1 } flickerlight TIMEBOMB_X1 { color 0.4 0.2 0.1 - size 72 - secondarySize 80 + size 108 + secondarySize 120 chance 0.3 + attenuate 1 } flickerlight TIMEBOMB_X1 { color 0.2 0.1 0.0 - size 80 - secondarySize 88 + size 120 + secondarySize 132 chance 0.3 + attenuate 1 } object ActivatedTimeBomb @@ -1835,9 +1922,11 @@ object PhoenixRodHefty pulselight HYELLOWKEY { color 1.0 1.0 0.0 - size 24 - secondarySize 26 + size 36 + secondarySize 39 interval 2.0 + offset 0 16 0 + attenuate 1 } object KeyYellow { @@ -1848,9 +1937,11 @@ object KeyYellow pulselight HBLUEKEY { color 0.0 0.0 1.0 - size 24 - secondarySize 26 + size 36 + secondarySize 30 interval 2.0 + offset 0 16 0 + attenuate 1 } object KeyBlue @@ -1862,9 +1953,11 @@ object KeyBlue pulselight HGREENKEY { color 0.0 1.0 0.0 - size 24 - secondarySize 26 + size 36 + secondarySize 39 interval 2.0 + offset 0 16 0 + attenuate 1 } object KeyGreen @@ -1880,35 +1973,40 @@ object KeyGreen pointlight HTFOG1 { color 0.4 0.4 1.0 - size 64 + size 96 + attenuate 1 } pointlight HTFOG2 { color 0.4 0.4 1.0 - size 40 + size 60 + attenuate 1 } pointlight HTFOG3 { color 0.4 0.4 1.0 - size 16 + size 24 + attenuate 1 } flickerlight HTFOG4 { color 0.5 0.5 1.0 - size 40 - secondarySize 48 + size 60 + secondarySize 72 chance 0.4 + attenuate 1 } flickerlight HTFOG5 { color 0.5 0.5 1.0 - size 56 - secondarySize 64 + size 88 + secondarySize 96 chance 0.4 + attenuate 1 } object TeleportFog From b3cee51fd032f7888cd18b6f74f496808fe0e939 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 3 Jan 2017 20:53:40 +0100 Subject: [PATCH 642/912] Fixed clang errors about the gotos leaving certain variables uninitialized --- src/swrenderer/segments/r_drawsegment.cpp | 6 ++++-- src/swrenderer/things/r_decal.cpp | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index ca20dd0dbf..7884415dee 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -125,6 +125,9 @@ namespace swrenderer void R_RenderMaskedSegRange(drawseg_t *ds, int x1, int x2) { + float *MaskedSWall = nullptr, MaskedScaleY = 0, rw_scalestep = 0; + fixed_t *maskedtexturecol = nullptr; + FTexture *tex; int i; sector_t tempsec; // killough 4/13/98 @@ -188,7 +191,6 @@ namespace swrenderer mfloorclip = openings + ds->sprbottomclip - ds->x1; mceilingclip = openings + ds->sprtopclip - ds->x1; - float *MaskedSWall, MaskedScaleY, rw_scalestep; // [RH] Draw fog partition if (ds->bFogBoundary) @@ -206,7 +208,7 @@ namespace swrenderer MaskedSWall = (float *)(openings + ds->swall) - ds->x1; MaskedScaleY = ds->yscale; - fixed_t *maskedtexturecol = (fixed_t *)(openings + ds->maskedtexturecol) - ds->x1; + maskedtexturecol = (fixed_t *)(openings + ds->maskedtexturecol) - ds->x1; spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); rw_scalestep = ds->iscalestep; diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 19281d12b9..3acc73d62d 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -68,6 +68,7 @@ namespace swrenderer bool calclighting; bool rereadcolormap; FDynamicColormap *usecolormap; + float light = 0; if (decal->RenderFlags & RF_INVISIBLE || !viewactive || !decal->PicNum.isValid()) return; @@ -243,7 +244,7 @@ namespace swrenderer rereadcolormap = false; } - float light = lightleft + (x1 - savecoord.sx1) * lightstep; + light = lightleft + (x1 - savecoord.sx1) * lightstep; if (fixedlightlev >= 0) R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) From dba81db1984c96417b55c59fae07fe1c07f49c6b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 3 Jan 2017 21:16:21 +0100 Subject: [PATCH 643/912] Move material multiplication out of inner light loop --- src/swrenderer/drawers/r_draw_pal.cpp | 36 +++++++++---------- .../fixedfunction/drawspancodegen.cpp | 6 ++-- .../fixedfunction/drawwallcodegen.cpp | 6 ++-- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 84b9745dd4..c0625e3fc3 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -112,9 +112,9 @@ namespace swrenderer uint8_t PalWall1Command::AddLights(const TriLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material) { - uint32_t lit_r = GPalette.BaseColors[fg].r; - uint32_t lit_g = GPalette.BaseColors[fg].g; - uint32_t lit_b = GPalette.BaseColors[fg].b; + uint32_t lit_r = 0; + uint32_t lit_g = 0; + uint32_t lit_b = 0; uint32_t material_r = GPalette.BaseColors[material].r; uint32_t material_g = GPalette.BaseColors[material].g; @@ -144,14 +144,14 @@ namespace swrenderer float point_attenuation = lights[i].y * rcp_dist * distance_attenuation; uint32_t attenuation = (uint32_t)(lights[i].y == 0.0f ? simple_attenuation : point_attenuation); - lit_r += (light_color_r * material_r * attenuation) >> 16; - lit_g += (light_color_g * material_g * attenuation) >> 16; - lit_b += (light_color_b * material_b * attenuation) >> 16; + lit_r += (light_color_r * attenuation) >> 8; + lit_g += (light_color_g * attenuation) >> 8; + lit_b += (light_color_b * attenuation) >> 8; } - lit_r = MIN(lit_r, 255); - lit_g = MIN(lit_g, 255); - lit_b = MIN(lit_b, 255); + lit_r = MIN(GPalette.BaseColors[fg].r + ((lit_r * material_r) >> 8), 255); + lit_g = MIN(GPalette.BaseColors[fg].g + ((lit_g * material_g) >> 8), 255); + lit_b = MIN(GPalette.BaseColors[fg].b + ((lit_b * material_b) >> 8), 255); return RGB256k.All[((lit_r >> 2) << 12) | ((lit_g >> 2) << 6) | (lit_b >> 2)]; } @@ -1722,9 +1722,9 @@ namespace swrenderer uint8_t PalSpanCommand::AddLights(const TriLight *lights, int num_lights, float viewpos_x, uint8_t fg, uint8_t material) { - uint32_t lit_r = GPalette.BaseColors[fg].r; - uint32_t lit_g = GPalette.BaseColors[fg].g; - uint32_t lit_b = GPalette.BaseColors[fg].b; + uint32_t lit_r = 0; + uint32_t lit_g = 0; + uint32_t lit_b = 0; uint32_t material_r = GPalette.BaseColors[material].r; uint32_t material_g = GPalette.BaseColors[material].g; @@ -1754,14 +1754,14 @@ namespace swrenderer float point_attenuation = lights[i].z * rcp_dist * distance_attenuation; uint32_t attenuation = (uint32_t)(lights[i].z == 0.0f ? simple_attenuation : point_attenuation); - lit_r += (light_color_r * material_r * attenuation) >> 16; - lit_g += (light_color_g * material_g * attenuation) >> 16; - lit_b += (light_color_b * material_b * attenuation) >> 16; + lit_r += (light_color_r * attenuation) >> 8; + lit_g += (light_color_g * attenuation) >> 8; + lit_b += (light_color_b * attenuation) >> 8; } - lit_r = MIN(lit_r, 255); - lit_g = MIN(lit_g, 255); - lit_b = MIN(lit_b, 255); + lit_r = MIN(GPalette.BaseColors[fg].r + ((lit_r * material_r) >> 8), 255); + lit_g = MIN(GPalette.BaseColors[fg].g + ((lit_g * material_g) >> 8), 255); + lit_b = MIN(GPalette.BaseColors[fg].b + ((lit_b * material_b) >> 8), 255); return RGB256k.All[((lit_r >> 2) << 12) | ((lit_g >> 2) << 6) | (lit_b >> 2)]; } diff --git a/tools/drawergen/fixedfunction/drawspancodegen.cpp b/tools/drawergen/fixedfunction/drawspancodegen.cpp index 0fdadaa84f..f5a3b46c7b 100644 --- a/tools/drawergen/fixedfunction/drawspancodegen.cpp +++ b/tools/drawergen/fixedfunction/drawspancodegen.cpp @@ -237,7 +237,7 @@ SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) else c = shade_bgra_advanced(fg, light, shade_constants); - stack_lit_color.store(c); + stack_lit_color.store(SSAVec4i(0)); stack_light_index.store(SSAInt(0)); SSAForBlock block; @@ -269,14 +269,14 @@ SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) SSAFloat point_attenuation = light_z * rcp_dist * distance_attenuation; SSAInt attenuation = SSAInt((light_z == SSAFloat(0.0f)).select(simple_attenuation, point_attenuation), true); - SSAVec4i contribution = (light_color * fg * attenuation) >> 16; + SSAVec4i contribution = (light_color * attenuation) >> 8; stack_lit_color.store(lit_color + contribution); stack_light_index.store(light_index + 1); } block.end_block(); - return stack_lit_color.load(); + return c + ((stack_lit_color.load() * fg) >> 8); } SSAVec4i DrawSpanCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant) diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.cpp b/tools/drawergen/fixedfunction/drawwallcodegen.cpp index 9207fb63a7..df89432392 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.cpp +++ b/tools/drawergen/fixedfunction/drawwallcodegen.cpp @@ -169,7 +169,7 @@ SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, bool isSimpleShade) else c = shade_bgra_advanced(fg, light, shade_constants); - stack_lit_color.store(c); + stack_lit_color.store(SSAVec4i(0)); stack_light_index.store(SSAInt(0)); SSAForBlock block; @@ -201,14 +201,14 @@ SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, bool isSimpleShade) SSAFloat point_attenuation = light_y * rcp_dist * distance_attenuation; SSAInt attenuation = SSAInt((light_y == SSAFloat(0.0f)).select(simple_attenuation, point_attenuation), true); - SSAVec4i contribution = (light_color * fg * attenuation) >> 16; + SSAVec4i contribution = (light_color * attenuation) >> 8; stack_lit_color.store(lit_color + contribution); stack_light_index.store(light_index + 1); } block.end_block(); - return stack_lit_color.load(); + return c + ((stack_lit_color.load() * fg) >> 8); } SSAVec4i DrawWallCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant) From 5caea5a256db5e02cd1c5a588c061026e0187556 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 4 Jan 2017 04:49:10 +0100 Subject: [PATCH 644/912] Fix clipping regression caused by splitting clip handling from line rendering --- src/swrenderer/line/r_line.cpp | 4 ++-- src/swrenderer/segments/r_clipsegment.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 18db1752fb..60269bf1b1 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -550,7 +550,7 @@ namespace swrenderer RenderWallSegmentTextures(start, stop); if (fake3D & 7) { - return !(fake3D & FAKE3D_FAKEMASK); + return (fake3D & FAKE3D_FAKEMASK) == 0; } // save sprite clipping info @@ -607,7 +607,7 @@ namespace swrenderer WallPortals.Push(pds); } - return !(fake3D & FAKE3D_FAKEMASK); + return (fake3D & FAKE3D_FAKEMASK) == 0; } void SWRenderLine::SetWallVariables(bool needlights) diff --git a/src/swrenderer/segments/r_clipsegment.cpp b/src/swrenderer/segments/r_clipsegment.cpp index 7a2d8b1f48..04bef89077 100644 --- a/src/swrenderer/segments/r_clipsegment.cpp +++ b/src/swrenderer/segments/r_clipsegment.cpp @@ -144,9 +144,9 @@ namespace swrenderer } // There is a fragment above *start. - if (!callback(first, start->first) && solid) + if (callback(first, start->first) && solid) { - start->first = first; + start->first = first; // Adjust the clip size for solid walls } } From c396e7f949f507d45a9e76afc3e6c163c4ed9a30 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 4 Jan 2017 05:10:16 +0100 Subject: [PATCH 645/912] Make r_bsp and r_line stop sharing floorplane and ceiling plane variables --- src/swrenderer/line/r_line.cpp | 4 +++- src/swrenderer/line/r_line.h | 6 +++++- src/swrenderer/scene/r_bsp.cpp | 25 ++++++++++--------------- src/swrenderer/scene/r_bsp.h | 2 -- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 60269bf1b1..e01e9f34a9 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -51,7 +51,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void SWRenderLine::Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector) + void SWRenderLine::Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, visplane_t *linefloorplane, visplane_t *lineceilingplane) { static sector_t tempsec; // killough 3/8/98: ceiling/water hack bool solid; @@ -60,6 +60,8 @@ namespace swrenderer InSubsector = subsector; frontsector = sector; backsector = fakebacksector; + floorplane = linefloorplane; + ceilingplane = lineceilingplane; curline = line; diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 7c677e072d..18d46e8b4e 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -15,6 +15,8 @@ namespace swrenderer { + struct visplane_t; + struct FWallCoords { FVector2 tleft; // coords at left of wall in view space rx1,ry1 @@ -38,7 +40,7 @@ namespace swrenderer class SWRenderLine { public: - void Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector); + void Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, visplane_t *floorplane, visplane_t *ceilingplane); private: bool RenderWallSegment(int x1, int x2); @@ -51,6 +53,8 @@ namespace swrenderer subsector_t *InSubsector; sector_t *frontsector; sector_t *backsector; + visplane_t *floorplane; + visplane_t *ceilingplane; seg_t *curline; side_t *sidedef; diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index dc7747ec35..0440b41bb9 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -63,17 +63,13 @@ namespace swrenderer { subsector_t *InSubsector; sector_t *frontsector; + uint8_t FakeSide; SWRenderLine render_line; } bool r_fakingunderwater; -static BYTE FakeSide; - -visplane_t *floorplane; -visplane_t *ceilingplane; - // Clip values are the solid pixel bounding the range. // floorclip starts out SCREENHEIGHT and is just outside the range // ceilingclip starts out 0 and is just inside the range @@ -415,7 +411,7 @@ static void R_AddPolyobjs(subsector_t *sub) } // kg3D - add fake segs, never rendered -void R_FakeDrawLoop(subsector_t *sub) +void R_FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane) { int count; seg_t* line; @@ -427,7 +423,7 @@ void R_FakeDrawLoop(subsector_t *sub) { if ((line->sidedef) && !(line->sidedef->Flags & WALLF_POLYOBJ)) { - render_line.Render(line, InSubsector, frontsector, nullptr); + render_line.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane); } line++; } @@ -517,7 +513,7 @@ void R_Subsector (subsector_t *sub) portal = frontsector->ValidatePortal(sector_t::ceiling); - ceilingplane = frontsector->ceilingplane.PointOnSide(ViewPos) > 0 || + visplane_t *ceilingplane = frontsector->ceilingplane.PointOnSide(ViewPos) > 0 || frontsector->GetTexture(sector_t::ceiling) == skyflatnum || portal != NULL || (frontsector->heightsec && @@ -557,7 +553,7 @@ void R_Subsector (subsector_t *sub) // killough 10/98: add support for skies transferred from sidedefs portal = frontsector->ValidatePortal(sector_t::floor); - floorplane = frontsector->floorplane.PointOnSide(ViewPos) > 0 || // killough 3/7/98 + visplane_t *floorplane = frontsector->floorplane.PointOnSide(ViewPos) > 0 || // killough 3/7/98 frontsector->GetTexture(sector_t::floor) == skyflatnum || portal != NULL || (frontsector->heightsec && @@ -636,7 +632,7 @@ void R_Subsector (subsector_t *sub) if (floorplane) R_AddPlaneLights(floorplane, frontsector->lighthead); - R_FakeDrawLoop(sub); + R_FakeDrawLoop(sub, floorplane, ceilingplane); fake3D = 0; frontsector = sub->sector; } @@ -700,7 +696,7 @@ void R_Subsector (subsector_t *sub) if (ceilingplane) R_AddPlaneLights(ceilingplane, frontsector->lighthead); - R_FakeDrawLoop(sub); + R_FakeDrawLoop(sub, floorplane, ceilingplane); fake3D = 0; frontsector = sub->sector; } @@ -719,8 +715,7 @@ void R_Subsector (subsector_t *sub) // lightlevels on floor & ceiling lightlevels in the surrounding area. // [RH] Handle sprite lighting like Duke 3D: If the ceiling is a sky, sprites are lit by // it, otherwise they are lit by the floor. - R_AddSprites (sub->sector, frontsector->GetTexture(sector_t::ceiling) == skyflatnum ? - ceilinglightlevel : floorlightlevel, FakeSide); + R_AddSprites (sub->sector, frontsector->GetTexture(sector_t::ceiling) == skyflatnum ? ceilinglightlevel : floorlightlevel, FakeSide); // [RH] Add particles if ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors) @@ -761,14 +756,14 @@ void R_Subsector (subsector_t *sub) fakeFloor->validcount = validcount; R_3D_NewClip(); } - render_line.Render(line, InSubsector, frontsector, &tempsec); // fake + render_line.Render(line, InSubsector, frontsector, &tempsec, floorplane, ceilingplane); // fake } fakeFloor = NULL; fake3D = 0; floorplane = backupfp; ceilingplane = backupcp; } - render_line.Render(line, InSubsector, frontsector, nullptr); // now real + render_line.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane); // now real } line++; } diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index e39b760955..ddd1f8bad4 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -52,8 +52,6 @@ void R_RenderBSPNode (void *node); // killough 4/13/98: fake floors/ceilings for deep water / fake ceilings: sector_t *R_FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, seg_t *backline, int backx1, int backx2, double frontcz1, double frontcz2); -extern visplane_t *floorplane; -extern visplane_t *ceilingplane; extern short floorclip[MAXWIDTH]; extern short ceilingclip[MAXWIDTH]; From 28732d63d23bd471195535f68e4ba5211df6ee53 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 4 Jan 2017 15:39:47 +0100 Subject: [PATCH 646/912] Move r_bsp into a class --- src/swrenderer/line/r_line.cpp | 10 +- src/swrenderer/plane/r_visibleplane.cpp | 9 +- src/swrenderer/r_main.cpp | 23 +- src/swrenderer/r_swrenderer.cpp | 2 +- src/swrenderer/scene/r_3dfloors.cpp | 4 +- src/swrenderer/scene/r_bsp.cpp | 1393 ++++++++++----------- src/swrenderer/scene/r_bsp.h | 70 +- src/swrenderer/scene/r_portal.cpp | 10 +- src/swrenderer/segments/r_drawsegment.cpp | 2 +- src/swrenderer/things/r_decal.cpp | 8 +- src/swrenderer/things/r_particle.cpp | 2 + src/swrenderer/things/r_playersprite.cpp | 2 +- 12 files changed, 758 insertions(+), 777 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index e01e9f34a9..19b22ce109 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -134,7 +134,7 @@ namespace swrenderer // kg3D - its fake, no transfer_heights if (!(fake3D & FAKE3D_FAKEBACK)) { // killough 3/8/98, 4/4/98: hack for invisible ceilings / deep water - backsector = R_FakeFlat(backsector, &tempsec, nullptr, nullptr, curline, WallC.sx1, WallC.sx2, rw_frontcz1, rw_frontcz2); + backsector = RenderBSP::Instance()->FakeFlat(backsector, &tempsec, nullptr, nullptr, curline, WallC.sx1, WallC.sx2, rw_frontcz1, rw_frontcz2); } doorclosed = false; // killough 4/16/98 @@ -447,7 +447,7 @@ namespace swrenderer // kg3D - backup for mid and fake walls draw_segment->bkup = R_NewOpening(stop - start); - memcpy(openings + draw_segment->bkup, &ceilingclip[start], sizeof(short)*(stop - start)); + memcpy(openings + draw_segment->bkup, &RenderBSP::Instance()->ceilingclip[start], sizeof(short)*(stop - start)); draw_segment->bFogBoundary = IsFogBoundary(frontsector, backsector); if (sidedef->GetTexture(side_t::mid).isValid() || draw_segment->bFakeBoundary) @@ -559,13 +559,13 @@ namespace swrenderer if (((draw_segment->silhouette & SIL_TOP) || maskedtexture) && draw_segment->sprtopclip == -1) { draw_segment->sprtopclip = R_NewOpening(stop - start); - memcpy(openings + draw_segment->sprtopclip, &ceilingclip[start], sizeof(short)*(stop - start)); + memcpy(openings + draw_segment->sprtopclip, &RenderBSP::Instance()->ceilingclip[start], sizeof(short)*(stop - start)); } if (((draw_segment->silhouette & SIL_BOTTOM) || maskedtexture) && draw_segment->sprbottomclip == -1) { draw_segment->sprbottomclip = R_NewOpening(stop - start); - memcpy(openings + draw_segment->sprbottomclip, &floorclip[start], sizeof(short)*(stop - start)); + memcpy(openings + draw_segment->sprbottomclip, &RenderBSP::Instance()->floorclip[start], sizeof(short)*(stop - start)); } if (maskedtexture && curline->sidedef->GetTexture(side_t::mid).isValid()) @@ -960,6 +960,8 @@ namespace swrenderer R_SetColorMapLight(fixedcolormap, 0, 0); // clip wall to the floor and ceiling + auto ceilingclip = RenderBSP::Instance()->ceilingclip; + auto floorclip = RenderBSP::Instance()->floorclip; for (x = x1; x < x2; ++x) { if (walltop[x] < ceilingclip[x]) diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index 3cee3e8694..e312f9cad8 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -474,12 +474,7 @@ namespace swrenderer if (pl->left >= pl->right) return; - if (r_drawflat) // no texture mapping - { - drawerargs::ds_color += 4; - R_DrawColoredPlane(pl); - } - else if (pl->picnum == skyflatnum) // sky flat + if (pl->picnum == skyflatnum) // sky flat { R_DrawSkyPlane(pl); } @@ -506,7 +501,7 @@ namespace swrenderer basecolormap = pl->colormap; - if (r_drawflat || (!pl->height.isSlope() && !tilt)) + if (!pl->height.isSlope() && !tilt) { R_DrawNormalPlane(pl, xscale, yscale, alpha, additive, masked); } diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 1dbae64ba3..bdad72d98b 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -103,9 +103,7 @@ static void R_ShutdownRenderer(); // EXTERNAL DATA DECLARATIONS ---------------------------------------------- extern short *openings; -extern bool r_fakingunderwater; extern int fuzzviewheight; -extern subsector_t *InSubsector; // PRIVATE DATA DECLARATIONS ----------------------------------------------- @@ -555,24 +553,13 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) R_ClearSprites (); // opening / clipping determination - // [RH] clip ceiling to console bottom - fillshort(floorclip, viewwidth, viewheight); - fillshort(ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); + RenderBSP::Instance()->ClearClip(); R_FreeOpenings(); NetUpdate (); - // [RH] Show off segs if r_drawflat is 1 - if (r_drawflat) - { - colfunc = &SWPixelFormatDrawers::FillColumn; - spanfunc = &SWPixelFormatDrawers::FillSpan; - } - else - { - colfunc = basecolfunc; - spanfunc = &SWPixelFormatDrawers::DrawSpan; - } + colfunc = basecolfunc; + spanfunc = &SWPixelFormatDrawers::DrawSpan; WindowLeft = 0; WindowRight = viewwidth; @@ -583,7 +570,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) r_dontmaplines = dontmaplines; // [RH] Hack to make windows into underwater areas possible - r_fakingunderwater = false; + RenderBSP::Instance()->ResetFakingUnderwater(); // [RH] Setup particles for this frame P_FindParticleSubsectors (); @@ -597,7 +584,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) } // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function PO_LinkToSubsectors(); - R_RenderScene(); + RenderBSP::Instance()->RenderScene(); R_3D_ResetClip(); // reset clips (floor/ceiling) camera->renderflags = savedflags; WallCycles.Unclock(); diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index c68aed77ad..497b8e6747 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -484,6 +484,6 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin sector_t *FSoftwareRenderer::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) { - return R_FakeFlat(sec, tempsec, floorlightlevel, ceilinglightlevel, nullptr, 0, 0, 0, 0); + return RenderBSP::Instance()->FakeFlat(sec, tempsec, floorlightlevel, ceilinglightlevel, nullptr, 0, 0, 0, 0); } diff --git a/src/swrenderer/scene/r_3dfloors.cpp b/src/swrenderer/scene/r_3dfloors.cpp index 9a5cdadcbb..5d3bb78221 100644 --- a/src/swrenderer/scene/r_3dfloors.cpp +++ b/src/swrenderer/scene/r_3dfloors.cpp @@ -98,8 +98,8 @@ void R_3D_NewClip() curr = (ClipStack*)M_Malloc(sizeof(ClipStack)); curr->next = 0; - memcpy(curr->floorclip, floorclip, sizeof(short) * MAXWIDTH); - memcpy(curr->ceilingclip, ceilingclip, sizeof(short) * MAXWIDTH); + memcpy(curr->floorclip, RenderBSP::Instance()->floorclip, sizeof(short) * MAXWIDTH); + memcpy(curr->ceilingclip, RenderBSP::Instance()->ceilingclip, sizeof(short) * MAXWIDTH); curr->ffloor = fakeFloor; assert(fakeFloor->floorclip == NULL); assert(fakeFloor->ceilingclip == NULL); diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 0440b41bb9..f36d0439b5 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -44,6 +44,7 @@ #include "a_sharedglobal.h" #include "g_level.h" #include "p_effect.h" +#include "c_console.h" // State. #include "doomstat.h" @@ -54,762 +55,746 @@ #include "po_man.h" #include "r_data/colormaps.h" -CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs? EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - namespace + RenderBSP *RenderBSP::Instance() { - subsector_t *InSubsector; - sector_t *frontsector; - uint8_t FakeSide; - - SWRenderLine render_line; + static RenderBSP bsp; + return &bsp; } -bool r_fakingunderwater; - -// Clip values are the solid pixel bounding the range. -// floorclip starts out SCREENHEIGHT and is just outside the range -// ceilingclip starts out 0 and is just inside the range -// -short floorclip[MAXWIDTH]; -short ceilingclip[MAXWIDTH]; - - -// -// killough 3/7/98: Hack floor/ceiling heights for deep water etc. -// -// If player's view height is underneath fake floor, lower the -// drawn ceiling to be just under the floor height, and replace -// the drawn floor and ceiling textures, and light level, with -// the control sector's. -// -// Similar for ceiling, only reflected. -// -// killough 4/11/98, 4/13/98: fix bugs, add 'back' parameter -// - -sector_t *R_FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, seg_t *backline, int backx1, int backx2, double frontcz1, double frontcz2) -{ - // [RH] allow per-plane lighting - if (floorlightlevel != NULL) + sector_t *RenderBSP::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, seg_t *backline, int backx1, int backx2, double frontcz1, double frontcz2) { - *floorlightlevel = sec->GetFloorLight (); - } + // If player's view height is underneath fake floor, lower the + // drawn ceiling to be just under the floor height, and replace + // the drawn floor and ceiling textures, and light level, with + // the control sector's. + // + // Similar for ceiling, only reflected. - if (ceilinglightlevel != NULL) - { - *ceilinglightlevel = sec->GetCeilingLight (); - } - - FakeSide = FAKED_Center; - - const sector_t *s = sec->GetHeightSec(); - if (s != NULL) - { - sector_t *heightsec = viewsector->heightsec; - bool underwater = r_fakingunderwater || - (heightsec && heightsec->floorplane.PointOnSide(ViewPos) <= 0); - bool doorunderwater = false; - int diffTex = (s->MoreFlags & SECF_CLIPFAKEPLANES); - - // Replace sector being drawn with a copy to be hacked - *tempsec = *sec; - - // Replace floor and ceiling height with control sector's heights. - if (diffTex) + // [RH] allow per-plane lighting + if (floorlightlevel != NULL) { - if (s->floorplane.CopyPlaneIfValid (&tempsec->floorplane, &sec->ceilingplane)) + *floorlightlevel = sec->GetFloorLight(); + } + + if (ceilinglightlevel != NULL) + { + *ceilinglightlevel = sec->GetCeilingLight(); + } + + FakeSide = FAKED_Center; + + const sector_t *s = sec->GetHeightSec(); + if (s != NULL) + { + sector_t *heightsec = viewsector->heightsec; + bool underwater = r_fakingunderwater || + (heightsec && heightsec->floorplane.PointOnSide(ViewPos) <= 0); + bool doorunderwater = false; + int diffTex = (s->MoreFlags & SECF_CLIPFAKEPLANES); + + // Replace sector being drawn with a copy to be hacked + *tempsec = *sec; + + // Replace floor and ceiling height with control sector's heights. + if (diffTex) { - tempsec->SetTexture(sector_t::floor, s->GetTexture(sector_t::floor), false); - } - else if (s->MoreFlags & SECF_FAKEFLOORONLY) - { - if (underwater) + if (s->floorplane.CopyPlaneIfValid(&tempsec->floorplane, &sec->ceilingplane)) { - tempsec->ColorMap = s->ColorMap; - if (!(s->MoreFlags & SECF_NOFAKELIGHT)) + tempsec->SetTexture(sector_t::floor, s->GetTexture(sector_t::floor), false); + } + else if (s->MoreFlags & SECF_FAKEFLOORONLY) + { + if (underwater) { - tempsec->lightlevel = s->lightlevel; - - if (floorlightlevel != NULL) + tempsec->ColorMap = s->ColorMap; + if (!(s->MoreFlags & SECF_NOFAKELIGHT)) { - *floorlightlevel = s->GetFloorLight (); + tempsec->lightlevel = s->lightlevel; + + if (floorlightlevel != NULL) + { + *floorlightlevel = s->GetFloorLight(); + } + + if (ceilinglightlevel != NULL) + { + *ceilinglightlevel = s->GetCeilingLight(); + } } + FakeSide = FAKED_BelowFloor; + return tempsec; + } + return sec; + } + } + else + { + tempsec->floorplane = s->floorplane; + } - if (ceilinglightlevel != NULL) + if (!(s->MoreFlags & SECF_FAKEFLOORONLY)) + { + if (diffTex) + { + if (s->ceilingplane.CopyPlaneIfValid(&tempsec->ceilingplane, &sec->floorplane)) + { + tempsec->SetTexture(sector_t::ceiling, s->GetTexture(sector_t::ceiling), false); + } + } + else + { + tempsec->ceilingplane = s->ceilingplane; + } + } + + double refceilz = s->ceilingplane.ZatPoint(ViewPos); + double orgceilz = sec->ceilingplane.ZatPoint(ViewPos); + +#if 1 + // [RH] Allow viewing underwater areas through doors/windows that + // are underwater but not in a water sector themselves. + // Only works if you cannot see the top surface of any deep water + // sectors at the same time. + if (backline && !r_fakingunderwater && backline->frontsector->heightsec == NULL) + { + if (frontcz1 <= s->floorplane.ZatPoint(backline->v1) && + frontcz2 <= s->floorplane.ZatPoint(backline->v2)) + { + // Check that the window is actually visible + for (int z = backx1; z < backx2; ++z) + { + if (floorclip[z] > ceilingclip[z]) { - *ceilinglightlevel = s->GetCeilingLight (); + doorunderwater = true; + r_fakingunderwater = true; + break; } } - FakeSide = FAKED_BelowFloor; - return tempsec; } - return sec; + } +#endif + + if (underwater || doorunderwater) + { + tempsec->floorplane = sec->floorplane; + tempsec->ceilingplane = s->floorplane; + tempsec->ceilingplane.FlipVert(); + tempsec->ceilingplane.ChangeHeight(-1 / 65536.); + tempsec->ColorMap = s->ColorMap; + } + + // killough 11/98: prevent sudden light changes from non-water sectors: + if ((underwater && !backline) || doorunderwater) + { // head-below-floor hack + tempsec->SetTexture(sector_t::floor, diffTex ? sec->GetTexture(sector_t::floor) : s->GetTexture(sector_t::floor), false); + tempsec->planes[sector_t::floor].xform = s->planes[sector_t::floor].xform; + + tempsec->ceilingplane = s->floorplane; + tempsec->ceilingplane.FlipVert(); + tempsec->ceilingplane.ChangeHeight(-1 / 65536.); + if (s->GetTexture(sector_t::ceiling) == skyflatnum) + { + tempsec->floorplane = tempsec->ceilingplane; + tempsec->floorplane.FlipVert(); + tempsec->floorplane.ChangeHeight(+1 / 65536.); + tempsec->SetTexture(sector_t::ceiling, tempsec->GetTexture(sector_t::floor), false); + tempsec->planes[sector_t::ceiling].xform = tempsec->planes[sector_t::floor].xform; + } + else + { + tempsec->SetTexture(sector_t::ceiling, diffTex ? s->GetTexture(sector_t::floor) : s->GetTexture(sector_t::ceiling), false); + tempsec->planes[sector_t::ceiling].xform = s->planes[sector_t::ceiling].xform; + } + + if (!(s->MoreFlags & SECF_NOFAKELIGHT)) + { + tempsec->lightlevel = s->lightlevel; + + if (floorlightlevel != NULL) + { + *floorlightlevel = s->GetFloorLight(); + } + + if (ceilinglightlevel != NULL) + { + *ceilinglightlevel = s->GetCeilingLight(); + } + } + FakeSide = FAKED_BelowFloor; + } + else if (heightsec && heightsec->ceilingplane.PointOnSide(ViewPos) <= 0 && + orgceilz > refceilz && !(s->MoreFlags & SECF_FAKEFLOORONLY)) + { // Above-ceiling hack + tempsec->ceilingplane = s->ceilingplane; + tempsec->floorplane = s->ceilingplane; + tempsec->floorplane.FlipVert(); + tempsec->floorplane.ChangeHeight(+1 / 65536.); + tempsec->ColorMap = s->ColorMap; + tempsec->ColorMap = s->ColorMap; + + tempsec->SetTexture(sector_t::ceiling, diffTex ? sec->GetTexture(sector_t::ceiling) : s->GetTexture(sector_t::ceiling), false); + tempsec->SetTexture(sector_t::floor, s->GetTexture(sector_t::ceiling), false); + tempsec->planes[sector_t::ceiling].xform = tempsec->planes[sector_t::floor].xform = s->planes[sector_t::ceiling].xform; + + if (s->GetTexture(sector_t::floor) != skyflatnum) + { + tempsec->ceilingplane = sec->ceilingplane; + tempsec->SetTexture(sector_t::floor, s->GetTexture(sector_t::floor), false); + tempsec->planes[sector_t::floor].xform = s->planes[sector_t::floor].xform; + } + + if (!(s->MoreFlags & SECF_NOFAKELIGHT)) + { + tempsec->lightlevel = s->lightlevel; + + if (floorlightlevel != NULL) + { + *floorlightlevel = s->GetFloorLight(); + } + + if (ceilinglightlevel != NULL) + { + *ceilinglightlevel = s->GetCeilingLight(); + } + } + FakeSide = FAKED_AboveCeiling; + } + sec = tempsec; // Use other sector + } + return sec; + } + + + + // Checks BSP node/subtree bounding box. + // Returns true if some part of the bbox might be visible. + bool RenderBSP::CheckBBox(float *bspcoord) + { + static const int checkcoord[12][4] = + { + { 3,0,2,1 }, + { 3,0,2,0 }, + { 3,1,2,0 }, + { 0 }, + { 2,0,2,1 }, + { 0,0,0,0 }, + { 3,1,3,0 }, + { 0 }, + { 2,0,3,1 }, + { 2,1,3,1 }, + { 2,1,3,0 } + }; + + int boxx; + int boxy; + int boxpos; + + double x1, y1, x2, y2; + double rx1, ry1, rx2, ry2; + int sx1, sx2; + + // Find the corners of the box + // that define the edges from current viewpoint. + if (ViewPos.X <= bspcoord[BOXLEFT]) + boxx = 0; + else if (ViewPos.X < bspcoord[BOXRIGHT]) + boxx = 1; + else + boxx = 2; + + if (ViewPos.Y >= bspcoord[BOXTOP]) + boxy = 0; + else if (ViewPos.Y > bspcoord[BOXBOTTOM]) + boxy = 1; + else + boxy = 2; + + boxpos = (boxy << 2) + boxx; + if (boxpos == 5) + return true; + + x1 = bspcoord[checkcoord[boxpos][0]] - ViewPos.X; + y1 = bspcoord[checkcoord[boxpos][1]] - ViewPos.Y; + x2 = bspcoord[checkcoord[boxpos][2]] - ViewPos.X; + y2 = bspcoord[checkcoord[boxpos][3]] - ViewPos.Y; + + // check clip list for an open space + + // Sitting on a line? + if (y1 * (x1 - x2) + x1 * (y2 - y1) >= -EQUAL_EPSILON) + return true; + + rx1 = x1 * ViewSin - y1 * ViewCos; + rx2 = x2 * ViewSin - y2 * ViewCos; + ry1 = x1 * ViewTanCos + y1 * ViewTanSin; + ry2 = x2 * ViewTanCos + y2 * ViewTanSin; + + if (MirrorFlags & RF_XFLIP) + { + double t = -rx1; + rx1 = -rx2; + rx2 = t; + swapvalues(ry1, ry2); + } + + if (rx1 >= -ry1) + { + if (rx1 > ry1) return false; // left edge is off the right side + if (ry1 == 0) return false; + sx1 = xs_RoundToInt(CenterX + rx1 * CenterX / ry1); + } + else + { + if (rx2 < -ry2) return false; // wall is off the left side + if (rx1 - rx2 - ry2 + ry1 == 0) return false; // wall does not intersect view volume + sx1 = 0; + } + + if (rx2 <= ry2) + { + if (rx2 < -ry2) return false; // right edge is off the left side + if (ry2 == 0) return false; + sx2 = xs_RoundToInt(CenterX + rx2 * CenterX / ry2); + } + else + { + if (rx1 > ry1) return false; // wall is off the right side + if (ry2 - ry1 - rx2 + rx1 == 0) return false; // wall does not intersect view volume + sx2 = viewwidth; + } + + // Find the first clippost that touches the source post + // (adjacent pixels are touching). + + return R_IsWallSegmentVisible(sx1, sx2); + } + + void RenderBSP::AddPolyobjs(subsector_t *sub) + { + if (sub->BSP == NULL || sub->BSP->bDirty) + { + sub->BuildPolyBSP(); + } + if (sub->BSP->Nodes.Size() == 0) + { + RenderSubsector(&sub->BSP->Subsectors[0]); + } + else + { + RenderBSPNode(&sub->BSP->Nodes.Last()); + } + } + + // kg3D - add fake segs, never rendered + void RenderBSP::FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane) + { + int count; + seg_t* line; + + count = sub->numlines; + line = sub->firstline; + + while (count--) + { + if ((line->sidedef) && !(line->sidedef->Flags & WALLF_POLYOBJ)) + { + renderline.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane); + } + line++; + } + } + + void RenderBSP::RenderSubsector(subsector_t *sub) + { + // Determine floor/ceiling planes. + // Add sprites of things in sector. + // Draw one or more line segments. + + int count; + seg_t* line; + sector_t tempsec; // killough 3/7/98: deep water hack + int floorlightlevel; // killough 3/16/98: set floor lightlevel + int ceilinglightlevel; // killough 4/11/98 + bool outersubsector; + int fll, cll, position; + FSectorPortal *portal; + + // kg3D - fake floor stuff + visplane_t *backupfp; + visplane_t *backupcp; + //secplane_t templane; + lightlist_t *light; + + if (InSubsector != NULL) + { // InSubsector is not NULL. This means we are rendering from a mini-BSP. + outersubsector = false; + } + else + { + outersubsector = true; + InSubsector = sub; + } + +#ifdef RANGECHECK + if (outersubsector && sub - subsectors >= (ptrdiff_t)numsubsectors) + I_Error("RenderSubsector: ss %ti with numss = %i", sub - subsectors, numsubsectors); +#endif + + assert(sub->sector != NULL); + + if (sub->polys) + { // Render the polyobjs in the subsector first + AddPolyobjs(sub); + if (outersubsector) + { + InSubsector = NULL; + } + return; + } + + frontsector = sub->sector; + frontsector->MoreFlags |= SECF_DRAWN; + count = sub->numlines; + line = sub->firstline; + + // killough 3/8/98, 4/4/98: Deep water / fake ceiling effect + frontsector = FakeFlat(frontsector, &tempsec, &floorlightlevel, &ceilinglightlevel, nullptr, 0, 0, 0, 0); + + fll = floorlightlevel; + cll = ceilinglightlevel; + + // [RH] set foggy flag + foggy = level.fadeto || frontsector->ColorMap->Fade || (level.flags & LEVEL_HASFADETABLE); + r_actualextralight = foggy ? 0 : extralight << 4; + + // kg3D - fake lights + if (fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) + { + light = P_GetPlaneLight(frontsector, &frontsector->ceilingplane, false); + basecolormap = light->extra_colormap; + // If this is the real ceiling, don't discard plane lighting R_FakeFlat() + // accounted for. + if (light->p_lightlevel != &frontsector->lightlevel) + { + ceilinglightlevel = *light->p_lightlevel; } } else { - tempsec->floorplane = s->floorplane; + basecolormap = (r_fullbrightignoresectorcolor && fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; } - if (!(s->MoreFlags & SECF_FAKEFLOORONLY)) + portal = frontsector->ValidatePortal(sector_t::ceiling); + + visplane_t *ceilingplane = frontsector->ceilingplane.PointOnSide(ViewPos) > 0 || + frontsector->GetTexture(sector_t::ceiling) == skyflatnum || + portal != NULL || + (frontsector->heightsec && + !(frontsector->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC) && + frontsector->heightsec->GetTexture(sector_t::floor) == skyflatnum) ? + R_FindPlane(frontsector->ceilingplane, // killough 3/8/98 + frontsector->GetTexture(sector_t::ceiling), + ceilinglightlevel + r_actualextralight, // killough 4/11/98 + frontsector->GetAlpha(sector_t::ceiling), + !!(frontsector->GetFlags(sector_t::ceiling) & PLANEF_ADDITIVE), + frontsector->planes[sector_t::ceiling].xform, + frontsector->sky, + portal + ) : NULL; + + if (ceilingplane) + R_AddPlaneLights(ceilingplane, frontsector->lighthead); + + if (fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) { - if (diffTex) + light = P_GetPlaneLight(frontsector, &frontsector->floorplane, false); + basecolormap = light->extra_colormap; + // If this is the real floor, don't discard plane lighting R_FakeFlat() + // accounted for. + if (light->p_lightlevel != &frontsector->lightlevel) { - if (s->ceilingplane.CopyPlaneIfValid (&tempsec->ceilingplane, &sec->floorplane)) + floorlightlevel = *light->p_lightlevel; + } + } + else + { + basecolormap = (r_fullbrightignoresectorcolor && fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; + } + + // killough 3/7/98: Add (x,y) offsets to flats, add deep water check + // killough 3/16/98: add floorlightlevel + // killough 10/98: add support for skies transferred from sidedefs + portal = frontsector->ValidatePortal(sector_t::floor); + + visplane_t *floorplane = frontsector->floorplane.PointOnSide(ViewPos) > 0 || // killough 3/7/98 + frontsector->GetTexture(sector_t::floor) == skyflatnum || + portal != NULL || + (frontsector->heightsec && + !(frontsector->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC) && + frontsector->heightsec->GetTexture(sector_t::ceiling) == skyflatnum) ? + R_FindPlane(frontsector->floorplane, + frontsector->GetTexture(sector_t::floor), + floorlightlevel + r_actualextralight, // killough 3/16/98 + frontsector->GetAlpha(sector_t::floor), + !!(frontsector->GetFlags(sector_t::floor) & PLANEF_ADDITIVE), + frontsector->planes[sector_t::floor].xform, + frontsector->sky, + portal + ) : NULL; + + if (floorplane) + R_AddPlaneLights(floorplane, frontsector->lighthead); + + // kg3D - fake planes rendering + if (r_3dfloors && frontsector->e && frontsector->e->XFloor.ffloors.Size()) + { + backupfp = floorplane; + backupcp = ceilingplane; + // first check all floors + for (int i = 0; i < (int)frontsector->e->XFloor.ffloors.Size(); i++) + { + fakeFloor = frontsector->e->XFloor.ffloors[i]; + if (!(fakeFloor->flags & FF_EXISTS)) continue; + if (!fakeFloor->model) continue; + if (fakeFloor->bottom.plane->isSlope()) continue; + if (!(fakeFloor->flags & FF_NOSHADE) || (fakeFloor->flags & (FF_RENDERPLANES | FF_RENDERSIDES))) { - tempsec->SetTexture(sector_t::ceiling, s->GetTexture(sector_t::ceiling), false); + R_3D_AddHeight(fakeFloor->top.plane, frontsector); } - } - else - { - tempsec->ceilingplane = s->ceilingplane; - } - } - - double refceilz = s->ceilingplane.ZatPoint(ViewPos); - double orgceilz = sec->ceilingplane.ZatPoint(ViewPos); - -#if 1 - // [RH] Allow viewing underwater areas through doors/windows that - // are underwater but not in a water sector themselves. - // Only works if you cannot see the top surface of any deep water - // sectors at the same time. - if (backline && !r_fakingunderwater && backline->frontsector->heightsec == NULL) - { - if (frontcz1 <= s->floorplane.ZatPoint(backline->v1) && - frontcz2 <= s->floorplane.ZatPoint(backline->v2)) - { - // Check that the window is actually visible - for (int z = backx1; z < backx2; ++z) + if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; + if (fakeFloor->alpha == 0) continue; + if (fakeFloor->flags & FF_THISINSIDE && fakeFloor->flags & FF_INVERTSECTOR) continue; + fakeAlpha = MIN(Scale(fakeFloor->alpha, OPAQUE, 255), OPAQUE); + if (fakeFloor->validcount != validcount) { - if (floorclip[z] > ceilingclip[z]) + fakeFloor->validcount = validcount; + R_3D_NewClip(); + } + double fakeHeight = fakeFloor->top.plane->ZatPoint(frontsector->centerspot); + if (fakeHeight < ViewPos.Z && + fakeHeight > frontsector->floorplane.ZatPoint(frontsector->centerspot)) + { + fake3D = FAKE3D_FAKEFLOOR; + tempsec = *fakeFloor->model; + tempsec.floorplane = *fakeFloor->top.plane; + tempsec.ceilingplane = *fakeFloor->bottom.plane; + if (!(fakeFloor->flags & FF_THISINSIDE) && !(fakeFloor->flags & FF_INVERTSECTOR)) { - doorunderwater = true; - r_fakingunderwater = true; - break; + tempsec.SetTexture(sector_t::floor, tempsec.GetTexture(sector_t::ceiling)); + position = sector_t::ceiling; } + else position = sector_t::floor; + frontsector = &tempsec; + + if (fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) + { + light = P_GetPlaneLight(sub->sector, &frontsector->floorplane, false); + basecolormap = light->extra_colormap; + floorlightlevel = *light->p_lightlevel; + } + + ceilingplane = NULL; + floorplane = R_FindPlane(frontsector->floorplane, + frontsector->GetTexture(sector_t::floor), + floorlightlevel + r_actualextralight, // killough 3/16/98 + frontsector->GetAlpha(sector_t::floor), + !!(fakeFloor->flags & FF_ADDITIVETRANS), + frontsector->planes[position].xform, + frontsector->sky, + NULL); + + if (floorplane) + R_AddPlaneLights(floorplane, frontsector->lighthead); + + FakeDrawLoop(sub, floorplane, ceilingplane); + fake3D = 0; + frontsector = sub->sector; } } - } -#endif + // and now ceilings + for (unsigned int i = 0; i < frontsector->e->XFloor.ffloors.Size(); i++) + { + fakeFloor = frontsector->e->XFloor.ffloors[i]; + if (!(fakeFloor->flags & FF_EXISTS)) continue; + if (!fakeFloor->model) continue; + if (fakeFloor->top.plane->isSlope()) continue; + if (!(fakeFloor->flags & FF_NOSHADE) || (fakeFloor->flags & (FF_RENDERPLANES | FF_RENDERSIDES))) + { + R_3D_AddHeight(fakeFloor->bottom.plane, frontsector); + } + if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; + if (fakeFloor->alpha == 0) continue; + if (!(fakeFloor->flags & FF_THISINSIDE) && (fakeFloor->flags & (FF_SWIMMABLE | FF_INVERTSECTOR)) == (FF_SWIMMABLE | FF_INVERTSECTOR)) continue; + fakeAlpha = MIN(Scale(fakeFloor->alpha, OPAQUE, 255), OPAQUE); - if (underwater || doorunderwater) + if (fakeFloor->validcount != validcount) + { + fakeFloor->validcount = validcount; + R_3D_NewClip(); + } + double fakeHeight = fakeFloor->bottom.plane->ZatPoint(frontsector->centerspot); + if (fakeHeight > ViewPos.Z && + fakeHeight < frontsector->ceilingplane.ZatPoint(frontsector->centerspot)) + { + fake3D = FAKE3D_FAKECEILING; + tempsec = *fakeFloor->model; + tempsec.floorplane = *fakeFloor->top.plane; + tempsec.ceilingplane = *fakeFloor->bottom.plane; + if ((!(fakeFloor->flags & FF_THISINSIDE) && !(fakeFloor->flags & FF_INVERTSECTOR)) || + (fakeFloor->flags & FF_THISINSIDE && fakeFloor->flags & FF_INVERTSECTOR)) + { + tempsec.SetTexture(sector_t::ceiling, tempsec.GetTexture(sector_t::floor)); + position = sector_t::floor; + } + else position = sector_t::ceiling; + frontsector = &tempsec; + + tempsec.ceilingplane.ChangeHeight(-1 / 65536.); + if (fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) + { + light = P_GetPlaneLight(sub->sector, &frontsector->ceilingplane, false); + basecolormap = light->extra_colormap; + ceilinglightlevel = *light->p_lightlevel; + } + tempsec.ceilingplane.ChangeHeight(1 / 65536.); + + floorplane = NULL; + ceilingplane = R_FindPlane(frontsector->ceilingplane, // killough 3/8/98 + frontsector->GetTexture(sector_t::ceiling), + ceilinglightlevel + r_actualextralight, // killough 4/11/98 + frontsector->GetAlpha(sector_t::ceiling), + !!(fakeFloor->flags & FF_ADDITIVETRANS), + frontsector->planes[position].xform, + frontsector->sky, + NULL); + + if (ceilingplane) + R_AddPlaneLights(ceilingplane, frontsector->lighthead); + + FakeDrawLoop(sub, floorplane, ceilingplane); + fake3D = 0; + frontsector = sub->sector; + } + } + fakeFloor = NULL; + floorplane = backupfp; + ceilingplane = backupcp; + } + + basecolormap = frontsector->ColorMap; + floorlightlevel = fll; + ceilinglightlevel = cll; + + // killough 9/18/98: Fix underwater slowdown, by passing real sector + // instead of fake one. Improve sprite lighting by basing sprite + // lightlevels on floor & ceiling lightlevels in the surrounding area. + // [RH] Handle sprite lighting like Duke 3D: If the ceiling is a sky, sprites are lit by + // it, otherwise they are lit by the floor. + R_AddSprites(sub->sector, frontsector->GetTexture(sector_t::ceiling) == skyflatnum ? ceilinglightlevel : floorlightlevel, FakeSide); + + // [RH] Add particles + if ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors) + { // Only do it for the main BSP. + int shade = LIGHT2SHADE((floorlightlevel + ceilinglightlevel) / 2 + r_actualextralight); + for (WORD i = ParticlesInSubsec[(unsigned int)(sub - subsectors)]; i != NO_PARTICLE; i = Particles[i].snext) + { + R_ProjectParticle(Particles + i, subsectors[sub - subsectors].sector, shade, FakeSide); + } + } + + count = sub->numlines; + line = sub->firstline; + + while (count--) { - tempsec->floorplane = sec->floorplane; - tempsec->ceilingplane = s->floorplane; - tempsec->ceilingplane.FlipVert (); - tempsec->ceilingplane.ChangeHeight(-1 / 65536.); - tempsec->ColorMap = s->ColorMap; - } - - // killough 11/98: prevent sudden light changes from non-water sectors: - if ((underwater && !backline) || doorunderwater) - { // head-below-floor hack - tempsec->SetTexture(sector_t::floor, diffTex ? sec->GetTexture(sector_t::floor) : s->GetTexture(sector_t::floor), false); - tempsec->planes[sector_t::floor].xform = s->planes[sector_t::floor].xform; - - tempsec->ceilingplane = s->floorplane; - tempsec->ceilingplane.FlipVert (); - tempsec->ceilingplane.ChangeHeight (-1 / 65536.); - if (s->GetTexture(sector_t::ceiling) == skyflatnum) + if (!outersubsector || line->sidedef == NULL || !(line->sidedef->Flags & WALLF_POLYOBJ)) { - tempsec->floorplane = tempsec->ceilingplane; - tempsec->floorplane.FlipVert (); - tempsec->floorplane.ChangeHeight (+1 / 65536.); - tempsec->SetTexture(sector_t::ceiling, tempsec->GetTexture(sector_t::floor), false); - tempsec->planes[sector_t::ceiling].xform = tempsec->planes[sector_t::floor].xform; - } - else - { - tempsec->SetTexture(sector_t::ceiling, diffTex ? s->GetTexture(sector_t::floor) : s->GetTexture(sector_t::ceiling), false); - tempsec->planes[sector_t::ceiling].xform = s->planes[sector_t::ceiling].xform; - } - - if (!(s->MoreFlags & SECF_NOFAKELIGHT)) - { - tempsec->lightlevel = s->lightlevel; - - if (floorlightlevel != NULL) + // kg3D - fake planes bounding calculation + if (r_3dfloors && line->backsector && frontsector->e && line->backsector->e->XFloor.ffloors.Size()) { - *floorlightlevel = s->GetFloorLight (); - } - - if (ceilinglightlevel != NULL) - { - *ceilinglightlevel = s->GetCeilingLight (); + backupfp = floorplane; + backupcp = ceilingplane; + floorplane = NULL; + ceilingplane = NULL; + for (unsigned int i = 0; i < line->backsector->e->XFloor.ffloors.Size(); i++) + { + fakeFloor = line->backsector->e->XFloor.ffloors[i]; + if (!(fakeFloor->flags & FF_EXISTS)) continue; + if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; + if (!fakeFloor->model) continue; + fake3D = FAKE3D_FAKEBACK; + tempsec = *fakeFloor->model; + tempsec.floorplane = *fakeFloor->top.plane; + tempsec.ceilingplane = *fakeFloor->bottom.plane; + if (fakeFloor->validcount != validcount) + { + fakeFloor->validcount = validcount; + R_3D_NewClip(); + } + renderline.Render(line, InSubsector, frontsector, &tempsec, floorplane, ceilingplane); // fake + } + fakeFloor = NULL; + fake3D = 0; + floorplane = backupfp; + ceilingplane = backupcp; } + renderline.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane); // now real } - FakeSide = FAKED_BelowFloor; + line++; } - else if (heightsec && heightsec->ceilingplane.PointOnSide(ViewPos) <= 0 && - orgceilz > refceilz && !(s->MoreFlags & SECF_FAKEFLOORONLY)) - { // Above-ceiling hack - tempsec->ceilingplane = s->ceilingplane; - tempsec->floorplane = s->ceilingplane; - tempsec->floorplane.FlipVert (); - tempsec->floorplane.ChangeHeight (+1 / 65536.); - tempsec->ColorMap = s->ColorMap; - tempsec->ColorMap = s->ColorMap; - - tempsec->SetTexture(sector_t::ceiling, diffTex ? sec->GetTexture(sector_t::ceiling) : s->GetTexture(sector_t::ceiling), false); - tempsec->SetTexture(sector_t::floor, s->GetTexture(sector_t::ceiling), false); - tempsec->planes[sector_t::ceiling].xform = tempsec->planes[sector_t::floor].xform = s->planes[sector_t::ceiling].xform; - - if (s->GetTexture(sector_t::floor) != skyflatnum) - { - tempsec->ceilingplane = sec->ceilingplane; - tempsec->SetTexture(sector_t::floor, s->GetTexture(sector_t::floor), false); - tempsec->planes[sector_t::floor].xform = s->planes[sector_t::floor].xform; - } - - if (!(s->MoreFlags & SECF_NOFAKELIGHT)) - { - tempsec->lightlevel = s->lightlevel; - - if (floorlightlevel != NULL) - { - *floorlightlevel = s->GetFloorLight (); - } - - if (ceilinglightlevel != NULL) - { - *ceilinglightlevel = s->GetCeilingLight (); - } - } - FakeSide = FAKED_AboveCeiling; - } - sec = tempsec; // Use other sector - } - return sec; -} - - - -// Checks BSP node/subtree bounding box. -// Returns true if some part of the bbox might be visible. -static bool R_CheckBBox (float *bspcoord) // killough 1/28/98: static -{ - static const int checkcoord[12][4] = - { - { 3,0,2,1 }, - { 3,0,2,0 }, - { 3,1,2,0 }, - { 0 }, - { 2,0,2,1 }, - { 0,0,0,0 }, - { 3,1,3,0 }, - { 0 }, - { 2,0,3,1 }, - { 2,1,3,1 }, - { 2,1,3,0 } - }; - - int boxx; - int boxy; - int boxpos; - - double x1, y1, x2, y2; - double rx1, ry1, rx2, ry2; - int sx1, sx2; - - // Find the corners of the box - // that define the edges from current viewpoint. - if (ViewPos.X <= bspcoord[BOXLEFT]) - boxx = 0; - else if (ViewPos.X < bspcoord[BOXRIGHT]) - boxx = 1; - else - boxx = 2; - - if (ViewPos.Y >= bspcoord[BOXTOP]) - boxy = 0; - else if (ViewPos.Y > bspcoord[BOXBOTTOM]) - boxy = 1; - else - boxy = 2; - - boxpos = (boxy<<2)+boxx; - if (boxpos == 5) - return true; - - x1 = bspcoord[checkcoord[boxpos][0]] - ViewPos.X; - y1 = bspcoord[checkcoord[boxpos][1]] - ViewPos.Y; - x2 = bspcoord[checkcoord[boxpos][2]] - ViewPos.X; - y2 = bspcoord[checkcoord[boxpos][3]] - ViewPos.Y; - - // check clip list for an open space - - // Sitting on a line? - if (y1 * (x1 - x2) + x1 * (y2 - y1) >= -EQUAL_EPSILON) - return true; - - rx1 = x1 * ViewSin - y1 * ViewCos; - rx2 = x2 * ViewSin - y2 * ViewCos; - ry1 = x1 * ViewTanCos + y1 * ViewTanSin; - ry2 = x2 * ViewTanCos + y2 * ViewTanSin; - - if (MirrorFlags & RF_XFLIP) - { - double t = -rx1; - rx1 = -rx2; - rx2 = t; - swapvalues(ry1, ry2); - } - - if (rx1 >= -ry1) - { - if (rx1 > ry1) return false; // left edge is off the right side - if (ry1 == 0) return false; - sx1 = xs_RoundToInt(CenterX + rx1 * CenterX / ry1); - } - else - { - if (rx2 < -ry2) return false; // wall is off the left side - if (rx1 - rx2 - ry2 + ry1 == 0) return false; // wall does not intersect view volume - sx1 = 0; - } - - if (rx2 <= ry2) - { - if (rx2 < -ry2) return false; // right edge is off the left side - if (ry2 == 0) return false; - sx2 = xs_RoundToInt(CenterX + rx2 * CenterX / ry2); - } - else - { - if (rx1 > ry1) return false; // wall is off the right side - if (ry2 - ry1 - rx2 + rx1 == 0) return false; // wall does not intersect view volume - sx2 = viewwidth; - } - - // Find the first clippost that touches the source post - // (adjacent pixels are touching). - - return R_IsWallSegmentVisible(sx1, sx2); -} - - -void R_Subsector (subsector_t *sub); -static void R_AddPolyobjs(subsector_t *sub) -{ - if (sub->BSP == NULL || sub->BSP->bDirty) - { - sub->BuildPolyBSP(); - } - if (sub->BSP->Nodes.Size() == 0) - { - R_Subsector(&sub->BSP->Subsectors[0]); - } - else - { - R_RenderBSPNode(&sub->BSP->Nodes.Last()); - } -} - -// kg3D - add fake segs, never rendered -void R_FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane) -{ - int count; - seg_t* line; - - count = sub->numlines; - line = sub->firstline; - - while (count--) - { - if ((line->sidedef) && !(line->sidedef->Flags & WALLF_POLYOBJ)) - { - render_line.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane); - } - line++; - } -} - -// -// R_Subsector -// Determine floor/ceiling planes. -// Add sprites of things in sector. -// Draw one or more line segments. -// -void R_Subsector (subsector_t *sub) -{ - int count; - seg_t* line; - sector_t tempsec; // killough 3/7/98: deep water hack - int floorlightlevel; // killough 3/16/98: set floor lightlevel - int ceilinglightlevel; // killough 4/11/98 - bool outersubsector; - int fll, cll, position; - FSectorPortal *portal; - - // kg3D - fake floor stuff - visplane_t *backupfp; - visplane_t *backupcp; - //secplane_t templane; - lightlist_t *light; - - if (InSubsector != NULL) - { // InSubsector is not NULL. This means we are rendering from a mini-BSP. - outersubsector = false; - } - else - { - outersubsector = true; - InSubsector = sub; - } - -#ifdef RANGECHECK - if (outersubsector && sub - subsectors >= (ptrdiff_t)numsubsectors) - I_Error ("R_Subsector: ss %ti with numss = %i", sub - subsectors, numsubsectors); -#endif - - assert(sub->sector != NULL); - - if (sub->polys) - { // Render the polyobjs in the subsector first - R_AddPolyobjs(sub); if (outersubsector) { InSubsector = NULL; } - return; } - frontsector = sub->sector; - frontsector->MoreFlags |= SECF_DRAWN; - count = sub->numlines; - line = sub->firstline; - - // killough 3/8/98, 4/4/98: Deep water / fake ceiling effect - frontsector = R_FakeFlat(frontsector, &tempsec, &floorlightlevel, &ceilinglightlevel, nullptr, 0, 0, 0, 0); - - fll = floorlightlevel; - cll = ceilinglightlevel; - - // [RH] set foggy flag - foggy = level.fadeto || frontsector->ColorMap->Fade || (level.flags & LEVEL_HASFADETABLE); - r_actualextralight = foggy ? 0 : extralight << 4; - - // kg3D - fake lights - if (fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) + void RenderBSP::RenderScene() { - light = P_GetPlaneLight(frontsector, &frontsector->ceilingplane, false); - basecolormap = light->extra_colormap; - // If this is the real ceiling, don't discard plane lighting R_FakeFlat() - // accounted for. - if (light->p_lightlevel != &frontsector->lightlevel) + InSubsector = nullptr; + RenderBSPNode(nodes + numnodes - 1); // The head node is the last node output. + } + + // + // RenderBSPNode + // Renders all subsectors below a given node, traversing subtree recursively. + // Just call with BSP root and -1. + // killough 5/2/98: reformatted, removed tail recursion + + void RenderBSP::RenderBSPNode(void *node) + { + if (numnodes == 0) { - ceilinglightlevel = *light->p_lightlevel; - } - } - else - { - basecolormap = (r_fullbrightignoresectorcolor && fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; - } - - portal = frontsector->ValidatePortal(sector_t::ceiling); - - visplane_t *ceilingplane = frontsector->ceilingplane.PointOnSide(ViewPos) > 0 || - frontsector->GetTexture(sector_t::ceiling) == skyflatnum || - portal != NULL || - (frontsector->heightsec && - !(frontsector->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC) && - frontsector->heightsec->GetTexture(sector_t::floor) == skyflatnum) ? - R_FindPlane(frontsector->ceilingplane, // killough 3/8/98 - frontsector->GetTexture(sector_t::ceiling), - ceilinglightlevel + r_actualextralight, // killough 4/11/98 - frontsector->GetAlpha(sector_t::ceiling), - !!(frontsector->GetFlags(sector_t::ceiling) & PLANEF_ADDITIVE), - frontsector->planes[sector_t::ceiling].xform, - frontsector->sky, - portal - ) : NULL; - - if (ceilingplane) - R_AddPlaneLights(ceilingplane, frontsector->lighthead); - - if (fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) - { - light = P_GetPlaneLight(frontsector, &frontsector->floorplane, false); - basecolormap = light->extra_colormap; - // If this is the real floor, don't discard plane lighting R_FakeFlat() - // accounted for. - if (light->p_lightlevel != &frontsector->lightlevel) - { - floorlightlevel = *light->p_lightlevel; - } - } - else - { - basecolormap = (r_fullbrightignoresectorcolor && fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; - } - - // killough 3/7/98: Add (x,y) offsets to flats, add deep water check - // killough 3/16/98: add floorlightlevel - // killough 10/98: add support for skies transferred from sidedefs - portal = frontsector->ValidatePortal(sector_t::floor); - - visplane_t *floorplane = frontsector->floorplane.PointOnSide(ViewPos) > 0 || // killough 3/7/98 - frontsector->GetTexture(sector_t::floor) == skyflatnum || - portal != NULL || - (frontsector->heightsec && - !(frontsector->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC) && - frontsector->heightsec->GetTexture(sector_t::ceiling) == skyflatnum) ? - R_FindPlane(frontsector->floorplane, - frontsector->GetTexture(sector_t::floor), - floorlightlevel + r_actualextralight, // killough 3/16/98 - frontsector->GetAlpha(sector_t::floor), - !!(frontsector->GetFlags(sector_t::floor) & PLANEF_ADDITIVE), - frontsector->planes[sector_t::floor].xform, - frontsector->sky, - portal - ) : NULL; - - if (floorplane) - R_AddPlaneLights(floorplane, frontsector->lighthead); - - // kg3D - fake planes rendering - if (r_3dfloors && frontsector->e && frontsector->e->XFloor.ffloors.Size()) - { - backupfp = floorplane; - backupcp = ceilingplane; - // first check all floors - for (int i = 0; i < (int)frontsector->e->XFloor.ffloors.Size(); i++) - { - fakeFloor = frontsector->e->XFloor.ffloors[i]; - if (!(fakeFloor->flags & FF_EXISTS)) continue; - if (!fakeFloor->model) continue; - if (fakeFloor->bottom.plane->isSlope()) continue; - if (!(fakeFloor->flags & FF_NOSHADE) || (fakeFloor->flags & (FF_RENDERPLANES|FF_RENDERSIDES))) - { - R_3D_AddHeight(fakeFloor->top.plane, frontsector); - } - if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; - if (fakeFloor->alpha == 0) continue; - if (fakeFloor->flags & FF_THISINSIDE && fakeFloor->flags & FF_INVERTSECTOR) continue; - fakeAlpha = MIN(Scale(fakeFloor->alpha, OPAQUE, 255), OPAQUE); - if (fakeFloor->validcount != validcount) - { - fakeFloor->validcount = validcount; - R_3D_NewClip(); - } - double fakeHeight = fakeFloor->top.plane->ZatPoint(frontsector->centerspot); - if (fakeHeight < ViewPos.Z && - fakeHeight > frontsector->floorplane.ZatPoint(frontsector->centerspot)) - { - fake3D = FAKE3D_FAKEFLOOR; - tempsec = *fakeFloor->model; - tempsec.floorplane = *fakeFloor->top.plane; - tempsec.ceilingplane = *fakeFloor->bottom.plane; - if (!(fakeFloor->flags & FF_THISINSIDE) && !(fakeFloor->flags & FF_INVERTSECTOR)) - { - tempsec.SetTexture(sector_t::floor, tempsec.GetTexture(sector_t::ceiling)); - position = sector_t::ceiling; - } else position = sector_t::floor; - frontsector = &tempsec; - - if (fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) - { - light = P_GetPlaneLight(sub->sector, &frontsector->floorplane, false); - basecolormap = light->extra_colormap; - floorlightlevel = *light->p_lightlevel; - } - - ceilingplane = NULL; - floorplane = R_FindPlane(frontsector->floorplane, - frontsector->GetTexture(sector_t::floor), - floorlightlevel + r_actualextralight, // killough 3/16/98 - frontsector->GetAlpha(sector_t::floor), - !!(fakeFloor->flags & FF_ADDITIVETRANS), - frontsector->planes[position].xform, - frontsector->sky, - NULL); - - if (floorplane) - R_AddPlaneLights(floorplane, frontsector->lighthead); - - R_FakeDrawLoop(sub, floorplane, ceilingplane); - fake3D = 0; - frontsector = sub->sector; - } - } - // and now ceilings - for (unsigned int i = 0; i < frontsector->e->XFloor.ffloors.Size(); i++) - { - fakeFloor = frontsector->e->XFloor.ffloors[i]; - if (!(fakeFloor->flags & FF_EXISTS)) continue; - if (!fakeFloor->model) continue; - if (fakeFloor->top.plane->isSlope()) continue; - if (!(fakeFloor->flags & FF_NOSHADE) || (fakeFloor->flags & (FF_RENDERPLANES|FF_RENDERSIDES))) - { - R_3D_AddHeight(fakeFloor->bottom.plane, frontsector); - } - if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; - if (fakeFloor->alpha == 0) continue; - if (!(fakeFloor->flags & FF_THISINSIDE) && (fakeFloor->flags & (FF_SWIMMABLE|FF_INVERTSECTOR)) == (FF_SWIMMABLE|FF_INVERTSECTOR)) continue; - fakeAlpha = MIN(Scale(fakeFloor->alpha, OPAQUE, 255), OPAQUE); - - if (fakeFloor->validcount != validcount) - { - fakeFloor->validcount = validcount; - R_3D_NewClip(); - } - double fakeHeight = fakeFloor->bottom.plane->ZatPoint(frontsector->centerspot); - if (fakeHeight > ViewPos.Z && - fakeHeight < frontsector->ceilingplane.ZatPoint(frontsector->centerspot)) - { - fake3D = FAKE3D_FAKECEILING; - tempsec = *fakeFloor->model; - tempsec.floorplane = *fakeFloor->top.plane; - tempsec.ceilingplane = *fakeFloor->bottom.plane; - if ((!(fakeFloor->flags & FF_THISINSIDE) && !(fakeFloor->flags & FF_INVERTSECTOR)) || - (fakeFloor->flags & FF_THISINSIDE && fakeFloor->flags & FF_INVERTSECTOR)) - { - tempsec.SetTexture(sector_t::ceiling, tempsec.GetTexture(sector_t::floor)); - position = sector_t::floor; - } else position = sector_t::ceiling; - frontsector = &tempsec; - - tempsec.ceilingplane.ChangeHeight(-1 / 65536.); - if (fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) - { - light = P_GetPlaneLight(sub->sector, &frontsector->ceilingplane, false); - basecolormap = light->extra_colormap; - ceilinglightlevel = *light->p_lightlevel; - } - tempsec.ceilingplane.ChangeHeight(1 / 65536.); - - floorplane = NULL; - ceilingplane = R_FindPlane(frontsector->ceilingplane, // killough 3/8/98 - frontsector->GetTexture(sector_t::ceiling), - ceilinglightlevel + r_actualextralight, // killough 4/11/98 - frontsector->GetAlpha(sector_t::ceiling), - !!(fakeFloor->flags & FF_ADDITIVETRANS), - frontsector->planes[position].xform, - frontsector->sky, - NULL); - - if (ceilingplane) - R_AddPlaneLights(ceilingplane, frontsector->lighthead); - - R_FakeDrawLoop(sub, floorplane, ceilingplane); - fake3D = 0; - frontsector = sub->sector; - } - } - fakeFloor = NULL; - floorplane = backupfp; - ceilingplane = backupcp; - } - - basecolormap = frontsector->ColorMap; - floorlightlevel = fll; - ceilinglightlevel = cll; - - // killough 9/18/98: Fix underwater slowdown, by passing real sector - // instead of fake one. Improve sprite lighting by basing sprite - // lightlevels on floor & ceiling lightlevels in the surrounding area. - // [RH] Handle sprite lighting like Duke 3D: If the ceiling is a sky, sprites are lit by - // it, otherwise they are lit by the floor. - R_AddSprites (sub->sector, frontsector->GetTexture(sector_t::ceiling) == skyflatnum ? ceilinglightlevel : floorlightlevel, FakeSide); - - // [RH] Add particles - if ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors) - { // Only do it for the main BSP. - int shade = LIGHT2SHADE((floorlightlevel + ceilinglightlevel)/2 + r_actualextralight); - for (WORD i = ParticlesInSubsec[(unsigned int)(sub-subsectors)]; i != NO_PARTICLE; i = Particles[i].snext) - { - R_ProjectParticle (Particles + i, subsectors[sub-subsectors].sector, shade, FakeSide); - } - } - - count = sub->numlines; - line = sub->firstline; - - while (count--) - { - if (!outersubsector || line->sidedef == NULL || !(line->sidedef->Flags & WALLF_POLYOBJ)) - { - // kg3D - fake planes bounding calculation - if (r_3dfloors && line->backsector && frontsector->e && line->backsector->e->XFloor.ffloors.Size()) - { - backupfp = floorplane; - backupcp = ceilingplane; - floorplane = NULL; - ceilingplane = NULL; - for (unsigned int i = 0; i < line->backsector->e->XFloor.ffloors.Size(); i++) - { - fakeFloor = line->backsector->e->XFloor.ffloors[i]; - if (!(fakeFloor->flags & FF_EXISTS)) continue; - if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; - if (!fakeFloor->model) continue; - fake3D = FAKE3D_FAKEBACK; - tempsec = *fakeFloor->model; - tempsec.floorplane = *fakeFloor->top.plane; - tempsec.ceilingplane = *fakeFloor->bottom.plane; - if (fakeFloor->validcount != validcount) - { - fakeFloor->validcount = validcount; - R_3D_NewClip(); - } - render_line.Render(line, InSubsector, frontsector, &tempsec, floorplane, ceilingplane); // fake - } - fakeFloor = NULL; - fake3D = 0; - floorplane = backupfp; - ceilingplane = backupcp; - } - render_line.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane); // now real - } - line++; - } - if (outersubsector) - { - InSubsector = NULL; - } -} - -void R_RenderScene() -{ - InSubsector = nullptr; - R_RenderBSPNode(nodes + numnodes - 1); // The head node is the last node output. -} - -// -// RenderBSPNode -// Renders all subsectors below a given node, traversing subtree recursively. -// Just call with BSP root and -1. -// killough 5/2/98: reformatted, removed tail recursion - -void R_RenderBSPNode (void *node) -{ - if (numnodes == 0) - { - R_Subsector (subsectors); - return; - } - while (!((size_t)node & 1)) // Keep going until found a subsector - { - node_t *bsp = (node_t *)node; - - // Decide which side the view point is on. - int side = R_PointOnSide (ViewPos, bsp); - - // Recursively divide front space (toward the viewer). - R_RenderBSPNode (bsp->children[side]); - - // Possibly divide back space (away from the viewer). - side ^= 1; - if (!R_CheckBBox (bsp->bbox[side])) + RenderSubsector(subsectors); return; + } + while (!((size_t)node & 1)) // Keep going until found a subsector + { + node_t *bsp = (node_t *)node; - node = bsp->children[side]; + // Decide which side the view point is on. + int side = R_PointOnSide(ViewPos, bsp); + + // Recursively divide front space (toward the viewer). + RenderBSPNode(bsp->children[side]); + + // Possibly divide back space (away from the viewer). + side ^= 1; + if (!CheckBBox(bsp->bbox[side])) + return; + + node = bsp->children[side]; + } + RenderSubsector((subsector_t *)((BYTE *)node - 1)); } - R_Subsector ((subsector_t *)((BYTE *)node - 1)); -} + void RenderBSP::ClearClip() + { + // clip ceiling to console bottom + fillshort(floorclip, viewwidth, viewheight); + fillshort(ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); + } } diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index ddd1f8bad4..1cc149792b 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -1,7 +1,3 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ // // Copyright (C) 1993-1996 by id Software, Inc. // @@ -14,47 +10,57 @@ // FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License // for more details. // -// DESCRIPTION: -// Refresh module, BSP traversal and handling. -// -//----------------------------------------------------------------------------- - -#ifndef __R_BSP__ -#define __R_BSP__ +#pragma once #include "tarray.h" #include #include "r_defs.h" - -EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs? +#include "swrenderer/line/r_line.h" namespace swrenderer { + struct visplane_t; -struct visplane_t; + // The 3072 below is just an arbitrary value picked to avoid + // drawing lines the player is too close to that would overflow + // the texture calculations. + #define TOO_CLOSE_Z (3072.0 / (1<<12)) -// The 3072 below is just an arbitrary value picked to avoid -// drawing lines the player is too close to that would overflow -// the texture calculations. -#define TOO_CLOSE_Z (3072.0 / (1<<12)) + enum + { + FAKED_Center, + FAKED_BelowFloor, + FAKED_AboveCeiling + }; -enum -{ - FAKED_Center, - FAKED_BelowFloor, - FAKED_AboveCeiling -}; + class RenderBSP + { + public: + static RenderBSP *Instance(); -void R_RenderScene(); -void R_RenderBSPNode (void *node); + void ClearClip(); + void RenderScene(); -// killough 4/13/98: fake floors/ceilings for deep water / fake ceilings: -sector_t *R_FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, seg_t *backline, int backx1, int backx2, double frontcz1, double frontcz2); + void ResetFakingUnderwater() { r_fakingunderwater = false; } + sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, seg_t *backline, int backx1, int backx2, double frontcz1, double frontcz2); -extern short floorclip[MAXWIDTH]; -extern short ceilingclip[MAXWIDTH]; + short floorclip[MAXWIDTH]; + short ceilingclip[MAXWIDTH]; + private: + void RenderBSPNode(void *node); + void RenderSubsector(subsector_t *sub); + + bool CheckBBox(float *bspcoord); + void AddPolyobjs(subsector_t *sub); + void FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane); + + subsector_t *InSubsector; + sector_t *frontsector; + uint8_t FakeSide; + bool r_fakingunderwater; + + SWRenderLine renderline; + }; } - -#endif diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index dd8342a683..794f11eca9 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -204,6 +204,8 @@ namespace swrenderer WindowLeft = pl->left; WindowRight = pl->right; + auto ceilingclip = RenderBSP::Instance()->ceilingclip; + auto floorclip = RenderBSP::Instance()->floorclip; for (i = pl->left; i < pl->right; i++) { if (pl->top[i] == 0x7fff) @@ -249,7 +251,7 @@ namespace swrenderer viewposStack.Push(ViewPos); visplaneStack.Push(pl); - R_RenderScene(); + RenderBSP::Instance()->RenderScene(); R_3D_ResetClip(); // reset clips (floor/ceiling) R_DrawPlanes(); @@ -464,11 +466,13 @@ namespace swrenderer R_3D_EnterSkybox(); // push 3D floor height map CurrentPortalInSkybox = false; // first portal in a skybox should set this variable to false for proper clipping in skyboxes. - // first pass, set clipping + // first pass, set clipping + auto ceilingclip = RenderBSP::Instance()->ceilingclip; + auto floorclip = RenderBSP::Instance()->floorclip; memcpy(ceilingclip + pds->x1, &pds->ceilingclip[0], pds->len * sizeof(*ceilingclip)); memcpy(floorclip + pds->x1, &pds->floorclip[0], pds->len * sizeof(*floorclip)); - R_RenderScene(); + RenderBSP::Instance()->RenderScene(); R_3D_ResetClip(); // reset clips (floor/ceiling) if (!savedvisibility && camera) camera->renderflags &= ~RF_INVISIBLE; diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 7884415dee..2887a26f77 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -162,7 +162,7 @@ namespace swrenderer } // killough 4/13/98: get correct lightlevel for 2s normal textures - sec = R_FakeFlat(frontsector, &tempsec, nullptr, nullptr, nullptr, 0, 0, 0, 0); + sec = RenderBSP::Instance()->FakeFlat(frontsector, &tempsec, nullptr, nullptr, nullptr, 0, 0, 0, 0); basecolormap = sec->ColorMap; // [RH] Set basecolormap diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 3acc73d62d..ba08cfd016 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -170,7 +170,7 @@ namespace swrenderer else if (pass == 0) { mceilingclip = walltop; - mfloorclip = ceilingclip; + mfloorclip = RenderBSP::Instance()->ceilingclip; needrepeat = 1; } else @@ -186,7 +186,7 @@ namespace swrenderer goto done; } mceilingclip = walltop; - mfloorclip = ceilingclip; + mfloorclip = RenderBSP::Instance()->ceilingclip; break; case RF_CLIPMID: @@ -203,7 +203,7 @@ namespace swrenderer { goto done; } - mceilingclip = floorclip; + mceilingclip = RenderBSP::Instance()->floorclip; mfloorclip = wallbottom; break; } @@ -296,7 +296,7 @@ namespace swrenderer // If this sprite is RF_CLIPFULL on a two-sided line, needrepeat will // be set 1 if we need to draw on the lower wall. In all other cases, // needrepeat will be 0, and the while will fail. - mceilingclip = floorclip; + mceilingclip = RenderBSP::Instance()->floorclip; mfloorclip = wallbottom; R_FinishSetPatchStyle(); } while (needrepeat--); diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index d58e625c4f..9878d362b6 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -114,6 +114,8 @@ namespace swrenderer // entered, we don't need to clip it to drawsegs like a normal sprite. // Clip particles behind walls. + auto ceilingclip = RenderBSP::Instance()->ceilingclip; + auto floorclip = RenderBSP::Instance()->floorclip; if (y1 < ceilingclip[x1]) y1 = ceilingclip[x1]; if (y1 < ceilingclip[x2 - 1]) y1 = ceilingclip[x2 - 1]; if (y2 >= floorclip[x1]) y2 = floorclip[x1] - 1; diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 5dad94c707..1316a40ca2 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -123,7 +123,7 @@ namespace swrenderer else { // This used to use camera->Sector but due to interpolation that can be incorrect // when the interpolated viewpoint is in a different sector than the camera. - sec = R_FakeFlat(viewsector, &tempsec, &floorlight, &ceilinglight, nullptr, 0, 0, 0, 0); + sec = RenderBSP::Instance()->FakeFlat(viewsector, &tempsec, &floorlight, &ceilinglight, nullptr, 0, 0, 0, 0); // [RH] set basecolormap basecolormap = sec->ColorMap; From 5f8075f726563a8ed70537077cdd5efd334c24ba Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 4 Jan 2017 18:54:14 +0100 Subject: [PATCH 647/912] Convert r_3dfloors to a class --- src/swrenderer/line/r_line.cpp | 44 ++-- src/swrenderer/line/r_walldraw.cpp | 9 +- src/swrenderer/plane/r_visibleplane.cpp | 11 +- src/swrenderer/r_main.cpp | 9 +- src/swrenderer/r_swrenderer.cpp | 10 +- src/swrenderer/scene/r_3dfloors.cpp | 290 +++++++++++----------- src/swrenderer/scene/r_3dfloors.h | 123 ++++----- src/swrenderer/scene/r_bsp.cpp | 118 ++++----- src/swrenderer/scene/r_bsp.h | 1 + src/swrenderer/scene/r_portal.cpp | 14 +- src/swrenderer/scene/r_things.cpp | 91 +++---- src/swrenderer/segments/r_drawsegment.cpp | 83 ++++--- 12 files changed, 404 insertions(+), 399 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 19b22ce109..feb261d671 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -112,7 +112,9 @@ namespace swrenderer WallT.InitFromLine(v1->fPos() - ViewPos, v2->fPos() - ViewPos); } - if (!(fake3D & FAKE3D_FAKEBACK)) + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + + if (!(clip3d->fake3D & FAKE3D_FAKEBACK)) { backsector = line->backsector; } @@ -132,7 +134,7 @@ namespace swrenderer else { // kg3D - its fake, no transfer_heights - if (!(fake3D & FAKE3D_FAKEBACK)) + if (!(clip3d->fake3D & FAKE3D_FAKEBACK)) { // killough 3/8/98, 4/4/98: hack for invisible ceilings / deep water backsector = RenderBSP::Instance()->FakeFlat(backsector, &tempsec, nullptr, nullptr, curline, WallC.sx1, WallC.sx2, rw_frontcz1, rw_frontcz2); } @@ -143,15 +145,15 @@ namespace swrenderer rw_backcz2 = backsector->ceilingplane.ZatPoint(line->v2); rw_backfz2 = backsector->floorplane.ZatPoint(line->v2); - if (fake3D & FAKE3D_FAKEBACK) + if (clip3d->fake3D & FAKE3D_FAKEBACK) { if (rw_frontfz1 >= rw_backfz1 && rw_frontfz2 >= rw_backfz2) { - fake3D |= FAKE3D_CLIPBOTFRONT; + clip3d->fake3D |= FAKE3D_CLIPBOTFRONT; } if (rw_frontcz1 <= rw_backcz1 && rw_frontcz2 <= rw_backcz2) { - fake3D |= FAKE3D_CLIPTOPFRONT; + clip3d->fake3D |= FAKE3D_CLIPTOPFRONT; } } @@ -355,7 +357,9 @@ namespace swrenderer draw_segment->curline = curline; draw_segment->bFogBoundary = false; draw_segment->bFakeBoundary = false; - if (fake3D & 7) draw_segment->fake = 1; + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + if (clip3d->fake3D & FAKE3D_FAKEMASK) draw_segment->fake = 1; else draw_segment->fake = 0; draw_segment->sprtopclip = draw_segment->sprbottomclip = draw_segment->maskedtexturecol = draw_segment->bkup = draw_segment->swall = -1; @@ -551,8 +555,8 @@ namespace swrenderer RenderWallSegmentTextures(start, stop); - if (fake3D & 7) { - return (fake3D & FAKE3D_FAKEMASK) == 0; + if (clip3d->fake3D & FAKE3D_FAKEMASK) { + return (clip3d->fake3D & FAKE3D_FAKEMASK) == 0; } // save sprite clipping info @@ -609,7 +613,7 @@ namespace swrenderer WallPortals.Push(pds); } - return (fake3D & FAKE3D_FAKEMASK) == 0; + return (clip3d->fake3D & FAKE3D_FAKEMASK) == 0; } void SWRenderLine::SetWallVariables(bool needlights) @@ -974,12 +978,14 @@ namespace swrenderer } } + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + // mark ceiling areas if (markceiling) { for (x = x1; x < x2; ++x) { - short top = (fakeFloor && fake3D & 2) ? fakeFloor->ceilingclip[x] : ceilingclip[x]; + short top = (clip3d->fakeFloor && clip3d->fake3D & FAKE3D_FAKECEILING) ? clip3d->fakeFloor->ceilingclip[x] : ceilingclip[x]; short bottom = MIN(walltop[x], floorclip[x]); if (top < bottom) { @@ -995,7 +1001,7 @@ namespace swrenderer for (x = x1; x < x2; ++x) { short top = MAX(wallbottom[x], ceilingclip[x]); - short bottom = (fakeFloor && fake3D & 1) ? fakeFloor->floorclip[x] : floorclip[x]; + short bottom = (clip3d->fakeFloor && clip3d->fake3D & FAKE3D_FAKEFLOOR) ? clip3d->fakeFloor->floorclip[x] : floorclip[x]; if (top < bottom) { assert(bottom <= viewheight); @@ -1006,11 +1012,11 @@ namespace swrenderer } // kg3D - fake planes clipping - if (fake3D & FAKE3D_REFRESHCLIP) + if (clip3d->fake3D & FAKE3D_REFRESHCLIP) { - if (fake3D & FAKE3D_CLIPBOTFRONT) + if (clip3d->fake3D & FAKE3D_CLIPBOTFRONT) { - memcpy(fakeFloor->floorclip + x1, wallbottom + x1, (x2 - x1) * sizeof(short)); + memcpy(clip3d->fakeFloor->floorclip + x1, wallbottom + x1, (x2 - x1) * sizeof(short)); } else { @@ -1018,11 +1024,11 @@ namespace swrenderer { walllower[x] = MIN(MAX(walllower[x], ceilingclip[x]), wallbottom[x]); } - memcpy(fakeFloor->floorclip + x1, walllower + x1, (x2 - x1) * sizeof(short)); + memcpy(clip3d->fakeFloor->floorclip + x1, walllower + x1, (x2 - x1) * sizeof(short)); } - if (fake3D & FAKE3D_CLIPTOPFRONT) + if (clip3d->fake3D & FAKE3D_CLIPTOPFRONT) { - memcpy(fakeFloor->ceilingclip + x1, walltop + x1, (x2 - x1) * sizeof(short)); + memcpy(clip3d->fakeFloor->ceilingclip + x1, walltop + x1, (x2 - x1) * sizeof(short)); } else { @@ -1030,10 +1036,10 @@ namespace swrenderer { wallupper[x] = MAX(MIN(wallupper[x], floorclip[x]), walltop[x]); } - memcpy(fakeFloor->ceilingclip + x1, wallupper + x1, (x2 - x1) * sizeof(short)); + memcpy(clip3d->fakeFloor->ceilingclip + x1, wallupper + x1, (x2 - x1) * sizeof(short)); } } - if (fake3D & 7) return; + if (clip3d->fake3D & FAKE3D_FAKEMASK) return; FLightNode *light_list = (curline && curline->sidedef) ? curline->sidedef->lighthead : nullptr; diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index e486ffbcb7..981ae2dde2 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -577,13 +577,14 @@ namespace swrenderer double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); double top = MAX(frontcz1, frontcz2); double bot = MIN(frontfz1, frontfz2); - if (fake3D & FAKE3D_CLIPTOP) + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + if (clip3d->fake3D & FAKE3D_CLIPTOP) { - top = MIN(top, sclipTop); + top = MIN(top, clip3d->sclipTop); } - if (fake3D & FAKE3D_CLIPBOTTOM) + if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) { - bot = MAX(bot, sclipBottom); + bot = MAX(bot, clip3d->sclipBottom); } ProcessWallNP2(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, xoffset, light, lightstep, true); } diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index e312f9cad8..14c830fc97 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -241,7 +241,8 @@ namespace swrenderer // kg3D - hack, store alpha in sky // i know there is ->alpha, but this also allows to identify fake plane // and ->alpha is for stacked sectors - if (fake3D & (FAKE3D_FAKEFLOOR | FAKE3D_FAKECEILING)) sky = 0x80000000 | fakeAlpha; + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + if (clip3d->fake3D & (FAKE3D_FAKEFLOOR | FAKE3D_FAKECEILING)) sky = 0x80000000 | clip3d->fakeAlpha; else sky = 0; // not skyflatnum so it can't be a sky portal = nullptr; alpha = OPAQUE; @@ -299,7 +300,7 @@ namespace swrenderer sky == check->sky && CurrentPortalUniq == check->CurrentPortalUniq && MirrorFlags == check->MirrorFlags && - CurrentSkybox == check->CurrentSkybox && + Clip3DFloors::Instance()->CurrentSkybox == check->CurrentSkybox && ViewPos == check->viewpos ) { @@ -326,7 +327,7 @@ namespace swrenderer check->Additive = additive; check->CurrentPortalUniq = CurrentPortalUniq; check->MirrorFlags = MirrorFlags; - check->CurrentSkybox = CurrentSkybox; + check->CurrentSkybox = Clip3DFloors::Instance()->CurrentSkybox; fillshort(check->top, viewwidth, 0x7fff); @@ -426,7 +427,7 @@ namespace swrenderer for (pl = visplanes[i]; pl; pl = pl->next) { // kg3D - draw only correct planes - if (pl->CurrentPortalUniq != CurrentPortalUniq || pl->CurrentSkybox != CurrentSkybox) + if (pl->CurrentPortalUniq != CurrentPortalUniq || pl->CurrentSkybox != Clip3DFloors::Instance()->CurrentSkybox) continue; // kg3D - draw only real planes now if (pl->sky >= 0) { @@ -452,7 +453,7 @@ namespace swrenderer { for (pl = visplanes[i]; pl; pl = pl->next) { - if (pl->CurrentSkybox != CurrentSkybox || pl->CurrentPortalUniq != CurrentPortalUniq) + if (pl->CurrentSkybox != Clip3DFloors::Instance()->CurrentSkybox || pl->CurrentPortalUniq != CurrentPortalUniq) continue; if (pl->sky < 0 && pl->height.Zat0() == height) diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index bdad72d98b..cfb9daf51b 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -390,7 +390,7 @@ static void R_ShutdownRenderer() { R_DeinitSprites(); R_DeinitPlanes(); - fakeActive = 0; + Clip3DFloors::Instance()->Cleanup(); // Free openings if (openings != NULL) { @@ -540,8 +540,9 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) MaskedCycles.Reset(); WallScanCycles.Reset(); - fakeActive = 0; // kg3D - reset fake floor indicator - R_3D_ResetClip(); // reset clips (floor/ceiling) + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + clip3d->fakeActive = false; // kg3D - reset fake floor indicator + clip3d->ResetClip(); // reset clips (floor/ceiling) R_SetupBuffer (); R_SetupFrame (actor); @@ -585,7 +586,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function PO_LinkToSubsectors(); RenderBSP::Instance()->RenderScene(); - R_3D_ResetClip(); // reset clips (floor/ceiling) + Clip3DFloors::Instance()->ResetClip(); // reset clips (floor/ceiling) camera->renderflags = savedflags; WallCycles.Unclock(); diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index 497b8e6747..c1240bc6b9 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -345,15 +345,7 @@ void FSoftwareRenderer::OnModeSet () void FSoftwareRenderer::ErrorCleanup () { - fakeActive = 0; - fake3D = 0; - while (CurrentSkybox) - { - R_3D_DeleteHeights(); - R_3D_LeaveSkybox(); - } - R_3D_ResetClip(); - R_3D_DeleteHeights(); + Clip3DFloors::Instance()->Cleanup(); } //=========================================================================== diff --git a/src/swrenderer/scene/r_3dfloors.cpp b/src/swrenderer/scene/r_3dfloors.cpp index 5d3bb78221..dbfa6dfae4 100644 --- a/src/swrenderer/scene/r_3dfloors.cpp +++ b/src/swrenderer/scene/r_3dfloors.cpp @@ -18,148 +18,154 @@ CVAR(Int, r_3dfloors, true, 0); namespace swrenderer { - -// external variables -int fake3D; -F3DFloor *fakeFloor; -fixed_t fakeHeight; -fixed_t fakeAlpha; -int fakeActive = 0; -double sclipBottom; -double sclipTop; -HeightLevel *height_top = NULL; -HeightLevel *height_cur = NULL; -int CurrentMirror = 0; -int CurrentSkybox = 0; - -// private variables -int height_max = -1; -TArray toplist; -ClipStack *clip_top = NULL; -ClipStack *clip_cur = NULL; - -void R_3D_DeleteHeights() -{ - height_cur = height_top; - while(height_cur) { - height_top = height_cur; - height_cur = height_cur->next; - M_Free(height_top); - } - height_max = -1; - height_top = height_cur = NULL; -} - -void R_3D_AddHeight(secplane_t *add, sector_t *sec) -{ - HeightLevel *near; - HeightLevel *curr; - - double height = add->ZatPoint(ViewPos); - if(height >= sec->CenterCeiling()) return; - if(height <= sec->CenterFloor()) return; - - fakeActive = 1; - - if(height_max >= 0) { - near = height_top; - while(near && near->height < height) near = near->next; - if(near) { - if(near->height == height) return; - curr = (HeightLevel*)M_Malloc(sizeof(HeightLevel)); - curr->height = height; - curr->prev = near->prev; - curr->next = near; - if(near->prev) near->prev->next = curr; - else height_top = curr; - near->prev = curr; - } else { - curr = (HeightLevel*)M_Malloc(sizeof(HeightLevel)); - curr->height = height; - curr->prev = height_cur; - curr->next = NULL; - height_cur->next = curr; - height_cur = curr; - } - } else { - height_top = height_cur = (HeightLevel*)M_Malloc(sizeof(HeightLevel)); - height_top->height = height; - height_top->prev = NULL; - height_top->next = NULL; - } - height_max++; -} - -void R_3D_NewClip() -{ - ClipStack *curr; -// extern short floorclip[MAXWIDTH]; -// extern short ceilingclip[MAXWIDTH]; - - curr = (ClipStack*)M_Malloc(sizeof(ClipStack)); - curr->next = 0; - memcpy(curr->floorclip, RenderBSP::Instance()->floorclip, sizeof(short) * MAXWIDTH); - memcpy(curr->ceilingclip, RenderBSP::Instance()->ceilingclip, sizeof(short) * MAXWIDTH); - curr->ffloor = fakeFloor; - assert(fakeFloor->floorclip == NULL); - assert(fakeFloor->ceilingclip == NULL); - fakeFloor->floorclip = curr->floorclip; - fakeFloor->ceilingclip = curr->ceilingclip; - if(clip_top) { - clip_cur->next = curr; - clip_cur = curr; - } else { - clip_top = clip_cur = curr; - } -} - -void R_3D_ResetClip() -{ - clip_cur = clip_top; - while(clip_cur) + Clip3DFloors *Clip3DFloors::Instance() { - assert(clip_cur->ffloor->floorclip != NULL); - assert(clip_cur->ffloor->ceilingclip != NULL); - clip_cur->ffloor->ceilingclip = clip_cur->ffloor->floorclip = NULL; - clip_top = clip_cur; - clip_cur = clip_cur->next; - M_Free(clip_top); + static Clip3DFloors clip; + return &clip; + } + + void Clip3DFloors::Cleanup() + { + fakeActive = false; + fake3D = 0; + while (CurrentSkybox) + { + DeleteHeights(); + LeaveSkybox(); + } + ResetClip(); + DeleteHeights(); + } + + void Clip3DFloors::DeleteHeights() + { + height_cur = height_top; + while (height_cur) + { + height_top = height_cur; + height_cur = height_cur->next; + M_Free(height_top); + } + height_max = -1; + height_top = height_cur = nullptr; + } + + void Clip3DFloors::AddHeight(secplane_t *add, sector_t *sec) + { + HeightLevel *near; + HeightLevel *curr; + + double height = add->ZatPoint(ViewPos); + if (height >= sec->CenterCeiling()) return; + if (height <= sec->CenterFloor()) return; + + fakeActive = true; + + if (height_max >= 0) + { + near = height_top; + while (near && near->height < height) near = near->next; + if (near) + { + if (near->height == height) return; + curr = (HeightLevel*)M_Malloc(sizeof(HeightLevel)); + curr->height = height; + curr->prev = near->prev; + curr->next = near; + if (near->prev) near->prev->next = curr; + else height_top = curr; + near->prev = curr; + } + else + { + curr = (HeightLevel*)M_Malloc(sizeof(HeightLevel)); + curr->height = height; + curr->prev = height_cur; + curr->next = nullptr; + height_cur->next = curr; + height_cur = curr; + } + } + else + { + height_top = height_cur = (HeightLevel*)M_Malloc(sizeof(HeightLevel)); + height_top->height = height; + height_top->prev = nullptr; + height_top->next = nullptr; + } + height_max++; + } + + void Clip3DFloors::NewClip() + { + ClipStack *curr; + + curr = (ClipStack*)M_Malloc(sizeof(ClipStack)); + curr->next = 0; + memcpy(curr->floorclip, RenderBSP::Instance()->floorclip, sizeof(short) * MAXWIDTH); + memcpy(curr->ceilingclip, RenderBSP::Instance()->ceilingclip, sizeof(short) * MAXWIDTH); + curr->ffloor = fakeFloor; + assert(fakeFloor->floorclip == nullptr); + assert(fakeFloor->ceilingclip == nullptr); + fakeFloor->floorclip = curr->floorclip; + fakeFloor->ceilingclip = curr->ceilingclip; + if (clip_top) + { + clip_cur->next = curr; + clip_cur = curr; + } + else + { + clip_top = clip_cur = curr; + } + } + + void Clip3DFloors::ResetClip() + { + clip_cur = clip_top; + while (clip_cur) + { + assert(clip_cur->ffloor->floorclip != nullptr); + assert(clip_cur->ffloor->ceilingclip != nullptr); + clip_cur->ffloor->ceilingclip = clip_cur->ffloor->floorclip = nullptr; + clip_top = clip_cur; + clip_cur = clip_cur->next; + M_Free(clip_top); + } + clip_cur = clip_top = nullptr; + } + + void Clip3DFloors::EnterSkybox() + { + HeightStack current; + + current.height_top = height_top; + current.height_cur = height_cur; + current.height_max = height_max; + + toplist.Push(current); + + height_top = nullptr; + height_cur = nullptr; + height_max = -1; + + CurrentSkybox++; + } + + void Clip3DFloors::LeaveSkybox() + { + HeightStack current; + + current.height_top = nullptr; + current.height_cur = nullptr; + current.height_max = -1; + + toplist.Pop(current); + + height_top = current.height_top; + height_cur = current.height_cur; + height_max = current.height_max; + + CurrentSkybox--; } - clip_cur = clip_top = NULL; -} - -void R_3D_EnterSkybox() -{ - HeightStack current; - - current.height_top = height_top; - current.height_cur = height_cur; - current.height_max = height_max; - - toplist.Push(current); - - height_top = NULL; - height_cur = NULL; - height_max = -1; - - CurrentSkybox++; -} - -void R_3D_LeaveSkybox() -{ - HeightStack current; - - current.height_top = NULL; - current.height_cur = NULL; - current.height_max = -1; - - toplist.Pop(current); - - height_top = current.height_top; - height_cur = current.height_cur; - height_max = current.height_max; - - CurrentSkybox--; -} - } diff --git a/src/swrenderer/scene/r_3dfloors.h b/src/swrenderer/scene/r_3dfloors.h index a703ae19a4..ca8f9830b6 100644 --- a/src/swrenderer/scene/r_3dfloors.h +++ b/src/swrenderer/scene/r_3dfloors.h @@ -1,5 +1,5 @@ -#ifndef SOFT_FAKE3D_H -#define SOFT_FAKE3D_H + +#pragma once #include "p_3dfloors.h" @@ -7,70 +7,77 @@ EXTERN_CVAR(Int, r_3dfloors); namespace swrenderer { + struct HeightLevel + { + double height; + struct HeightLevel *prev; + struct HeightLevel *next; + }; -// special types + struct HeightStack + { + HeightLevel *height_top; + HeightLevel *height_cur; + int height_max; + }; -struct HeightLevel -{ - double height; - struct HeightLevel *prev; - struct HeightLevel *next; -}; + struct ClipStack + { + short floorclip[MAXWIDTH]; + short ceilingclip[MAXWIDTH]; + F3DFloor *ffloor; + ClipStack *next; + }; -struct HeightStack -{ - HeightLevel *height_top; - HeightLevel *height_cur; - int height_max; -}; + enum Fake3DOpaque + { + // BSP stage: + FAKE3D_FAKEFLOOR = 1, // fake floor, mark seg as FAKE + FAKE3D_FAKECEILING = 2, // fake ceiling, mark seg as FAKE + FAKE3D_FAKEBACK = 4, // RenderLine with fake backsector, mark seg as FAKE + FAKE3D_FAKEMASK = 7, + FAKE3D_CLIPBOTFRONT = 8, // use front sector clipping info (bottom) + FAKE3D_CLIPTOPFRONT = 16, // use front sector clipping info (top) + }; -struct ClipStack -{ - short floorclip[MAXWIDTH]; - short ceilingclip[MAXWIDTH]; - F3DFloor *ffloor; - ClipStack *next; -}; + enum Fake3DTranslucent + { + // sorting stage: + FAKE3D_CLIPBOTTOM = 1, // clip bottom + FAKE3D_CLIPTOP = 2, // clip top + FAKE3D_REFRESHCLIP = 4, // refresh clip info + FAKE3D_DOWN2UP = 8, // rendering from down to up (floors) + }; -// external varialbes + class Clip3DFloors + { + public: + static Clip3DFloors *Instance(); -// fake3D flags: -enum -{ - // BSP stage: - FAKE3D_FAKEFLOOR = 1, // fake floor, mark seg as FAKE - FAKE3D_FAKECEILING = 2, // fake ceiling, mark seg as FAKE - FAKE3D_FAKEBACK = 4, // R_AddLine with fake backsector, mark seg as FAKE - FAKE3D_FAKEMASK = 7, - FAKE3D_CLIPBOTFRONT = 8, // use front sector clipping info (bottom) - FAKE3D_CLIPTOPFRONT = 16, // use front sector clipping info (top) + void Cleanup(); - // sorting stage: - FAKE3D_CLIPBOTTOM = 1, // clip bottom - FAKE3D_CLIPTOP = 2, // clip top - FAKE3D_REFRESHCLIP = 4, // refresh clip info - FAKE3D_DOWN2UP = 8, // rendering from down to up (floors) -}; + void DeleteHeights(); + void AddHeight(secplane_t *add, sector_t *sec); + void NewClip(); + void ResetClip(); + void EnterSkybox(); + void LeaveSkybox(); -extern int fake3D; -extern F3DFloor *fakeFloor; -extern fixed_t fakeAlpha; -extern int fakeActive; -extern double sclipBottom; -extern double sclipTop; -extern HeightLevel *height_top; -extern HeightLevel *height_cur; -extern int CurrentMirror; -extern int CurrentSkybox; + int fake3D = 0; -// functions -void R_3D_DeleteHeights(); -void R_3D_AddHeight(secplane_t *add, sector_t *sec); -void R_3D_NewClip(); -void R_3D_ResetClip(); -void R_3D_EnterSkybox(); -void R_3D_LeaveSkybox(); + F3DFloor *fakeFloor = nullptr; + fixed_t fakeAlpha = 0; + bool fakeActive = false; + double sclipBottom = 0; + double sclipTop = 0; + HeightLevel *height_top = nullptr; + HeightLevel *height_cur = nullptr; + int CurrentSkybox = 0; + private: + int height_max = -1; + TArray toplist; + ClipStack *clip_top = nullptr; + ClipStack *clip_cur = nullptr; + }; } - -#endif diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index f36d0439b5..5b7daa8b23 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -554,35 +554,38 @@ namespace swrenderer { backupfp = floorplane; backupcp = ceilingplane; + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + // first check all floors for (int i = 0; i < (int)frontsector->e->XFloor.ffloors.Size(); i++) { - fakeFloor = frontsector->e->XFloor.ffloors[i]; - if (!(fakeFloor->flags & FF_EXISTS)) continue; - if (!fakeFloor->model) continue; - if (fakeFloor->bottom.plane->isSlope()) continue; - if (!(fakeFloor->flags & FF_NOSHADE) || (fakeFloor->flags & (FF_RENDERPLANES | FF_RENDERSIDES))) + clip3d->fakeFloor = frontsector->e->XFloor.ffloors[i]; + if (!(clip3d->fakeFloor->flags & FF_EXISTS)) continue; + if (!clip3d->fakeFloor->model) continue; + if (clip3d->fakeFloor->bottom.plane->isSlope()) continue; + if (!(clip3d->fakeFloor->flags & FF_NOSHADE) || (clip3d->fakeFloor->flags & (FF_RENDERPLANES | FF_RENDERSIDES))) { - R_3D_AddHeight(fakeFloor->top.plane, frontsector); + clip3d->AddHeight(clip3d->fakeFloor->top.plane, frontsector); } - if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; - if (fakeFloor->alpha == 0) continue; - if (fakeFloor->flags & FF_THISINSIDE && fakeFloor->flags & FF_INVERTSECTOR) continue; - fakeAlpha = MIN(Scale(fakeFloor->alpha, OPAQUE, 255), OPAQUE); - if (fakeFloor->validcount != validcount) + if (!(clip3d->fakeFloor->flags & FF_RENDERPLANES)) continue; + if (clip3d->fakeFloor->alpha == 0) continue; + if (clip3d->fakeFloor->flags & FF_THISINSIDE && clip3d->fakeFloor->flags & FF_INVERTSECTOR) continue; + clip3d->fakeAlpha = MIN(Scale(clip3d->fakeFloor->alpha, OPAQUE, 255), OPAQUE); + if (clip3d->fakeFloor->validcount != validcount) { - fakeFloor->validcount = validcount; - R_3D_NewClip(); + clip3d->fakeFloor->validcount = validcount; + clip3d->NewClip(); } - double fakeHeight = fakeFloor->top.plane->ZatPoint(frontsector->centerspot); + double fakeHeight = clip3d->fakeFloor->top.plane->ZatPoint(frontsector->centerspot); if (fakeHeight < ViewPos.Z && fakeHeight > frontsector->floorplane.ZatPoint(frontsector->centerspot)) { - fake3D = FAKE3D_FAKEFLOOR; - tempsec = *fakeFloor->model; - tempsec.floorplane = *fakeFloor->top.plane; - tempsec.ceilingplane = *fakeFloor->bottom.plane; - if (!(fakeFloor->flags & FF_THISINSIDE) && !(fakeFloor->flags & FF_INVERTSECTOR)) + clip3d->fake3D = FAKE3D_FAKEFLOOR; + tempsec = *clip3d->fakeFloor->model; + tempsec.floorplane = *clip3d->fakeFloor->top.plane; + tempsec.ceilingplane = *clip3d->fakeFloor->bottom.plane; + if (!(clip3d->fakeFloor->flags & FF_THISINSIDE) && !(clip3d->fakeFloor->flags & FF_INVERTSECTOR)) { tempsec.SetTexture(sector_t::floor, tempsec.GetTexture(sector_t::ceiling)); position = sector_t::ceiling; @@ -602,7 +605,7 @@ namespace swrenderer frontsector->GetTexture(sector_t::floor), floorlightlevel + r_actualextralight, // killough 3/16/98 frontsector->GetAlpha(sector_t::floor), - !!(fakeFloor->flags & FF_ADDITIVETRANS), + !!(clip3d->fakeFloor->flags & FF_ADDITIVETRANS), frontsector->planes[position].xform, frontsector->sky, NULL); @@ -611,41 +614,41 @@ namespace swrenderer R_AddPlaneLights(floorplane, frontsector->lighthead); FakeDrawLoop(sub, floorplane, ceilingplane); - fake3D = 0; + clip3d->fake3D = 0; frontsector = sub->sector; } } // and now ceilings for (unsigned int i = 0; i < frontsector->e->XFloor.ffloors.Size(); i++) { - fakeFloor = frontsector->e->XFloor.ffloors[i]; - if (!(fakeFloor->flags & FF_EXISTS)) continue; - if (!fakeFloor->model) continue; - if (fakeFloor->top.plane->isSlope()) continue; - if (!(fakeFloor->flags & FF_NOSHADE) || (fakeFloor->flags & (FF_RENDERPLANES | FF_RENDERSIDES))) + clip3d->fakeFloor = frontsector->e->XFloor.ffloors[i]; + if (!(clip3d->fakeFloor->flags & FF_EXISTS)) continue; + if (!clip3d->fakeFloor->model) continue; + if (clip3d->fakeFloor->top.plane->isSlope()) continue; + if (!(clip3d->fakeFloor->flags & FF_NOSHADE) || (clip3d->fakeFloor->flags & (FF_RENDERPLANES | FF_RENDERSIDES))) { - R_3D_AddHeight(fakeFloor->bottom.plane, frontsector); + clip3d->AddHeight(clip3d->fakeFloor->bottom.plane, frontsector); } - if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; - if (fakeFloor->alpha == 0) continue; - if (!(fakeFloor->flags & FF_THISINSIDE) && (fakeFloor->flags & (FF_SWIMMABLE | FF_INVERTSECTOR)) == (FF_SWIMMABLE | FF_INVERTSECTOR)) continue; - fakeAlpha = MIN(Scale(fakeFloor->alpha, OPAQUE, 255), OPAQUE); + if (!(clip3d->fakeFloor->flags & FF_RENDERPLANES)) continue; + if (clip3d->fakeFloor->alpha == 0) continue; + if (!(clip3d->fakeFloor->flags & FF_THISINSIDE) && (clip3d->fakeFloor->flags & (FF_SWIMMABLE | FF_INVERTSECTOR)) == (FF_SWIMMABLE | FF_INVERTSECTOR)) continue; + clip3d->fakeAlpha = MIN(Scale(clip3d->fakeFloor->alpha, OPAQUE, 255), OPAQUE); - if (fakeFloor->validcount != validcount) + if (clip3d->fakeFloor->validcount != validcount) { - fakeFloor->validcount = validcount; - R_3D_NewClip(); + clip3d->fakeFloor->validcount = validcount; + clip3d->NewClip(); } - double fakeHeight = fakeFloor->bottom.plane->ZatPoint(frontsector->centerspot); + double fakeHeight = clip3d->fakeFloor->bottom.plane->ZatPoint(frontsector->centerspot); if (fakeHeight > ViewPos.Z && fakeHeight < frontsector->ceilingplane.ZatPoint(frontsector->centerspot)) { - fake3D = FAKE3D_FAKECEILING; - tempsec = *fakeFloor->model; - tempsec.floorplane = *fakeFloor->top.plane; - tempsec.ceilingplane = *fakeFloor->bottom.plane; - if ((!(fakeFloor->flags & FF_THISINSIDE) && !(fakeFloor->flags & FF_INVERTSECTOR)) || - (fakeFloor->flags & FF_THISINSIDE && fakeFloor->flags & FF_INVERTSECTOR)) + clip3d->fake3D = FAKE3D_FAKECEILING; + tempsec = *clip3d->fakeFloor->model; + tempsec.floorplane = *clip3d->fakeFloor->top.plane; + tempsec.ceilingplane = *clip3d->fakeFloor->bottom.plane; + if ((!(clip3d->fakeFloor->flags & FF_THISINSIDE) && !(clip3d->fakeFloor->flags & FF_INVERTSECTOR)) || + (clip3d->fakeFloor->flags & FF_THISINSIDE && clip3d->fakeFloor->flags & FF_INVERTSECTOR)) { tempsec.SetTexture(sector_t::ceiling, tempsec.GetTexture(sector_t::floor)); position = sector_t::floor; @@ -667,7 +670,7 @@ namespace swrenderer frontsector->GetTexture(sector_t::ceiling), ceilinglightlevel + r_actualextralight, // killough 4/11/98 frontsector->GetAlpha(sector_t::ceiling), - !!(fakeFloor->flags & FF_ADDITIVETRANS), + !!(clip3d->fakeFloor->flags & FF_ADDITIVETRANS), frontsector->planes[position].xform, frontsector->sky, NULL); @@ -676,11 +679,11 @@ namespace swrenderer R_AddPlaneLights(ceilingplane, frontsector->lighthead); FakeDrawLoop(sub, floorplane, ceilingplane); - fake3D = 0; + clip3d->fake3D = 0; frontsector = sub->sector; } } - fakeFloor = NULL; + clip3d->fakeFloor = NULL; floorplane = backupfp; ceilingplane = backupcp; } @@ -720,25 +723,26 @@ namespace swrenderer backupcp = ceilingplane; floorplane = NULL; ceilingplane = NULL; + Clip3DFloors *clip3d = Clip3DFloors::Instance(); for (unsigned int i = 0; i < line->backsector->e->XFloor.ffloors.Size(); i++) { - fakeFloor = line->backsector->e->XFloor.ffloors[i]; - if (!(fakeFloor->flags & FF_EXISTS)) continue; - if (!(fakeFloor->flags & FF_RENDERPLANES)) continue; - if (!fakeFloor->model) continue; - fake3D = FAKE3D_FAKEBACK; - tempsec = *fakeFloor->model; - tempsec.floorplane = *fakeFloor->top.plane; - tempsec.ceilingplane = *fakeFloor->bottom.plane; - if (fakeFloor->validcount != validcount) + clip3d->fakeFloor = line->backsector->e->XFloor.ffloors[i]; + if (!(clip3d->fakeFloor->flags & FF_EXISTS)) continue; + if (!(clip3d->fakeFloor->flags & FF_RENDERPLANES)) continue; + if (!clip3d->fakeFloor->model) continue; + clip3d->fake3D = FAKE3D_FAKEBACK; + tempsec = *clip3d->fakeFloor->model; + tempsec.floorplane = *clip3d->fakeFloor->top.plane; + tempsec.ceilingplane = *clip3d->fakeFloor->bottom.plane; + if (clip3d->fakeFloor->validcount != validcount) { - fakeFloor->validcount = validcount; - R_3D_NewClip(); + clip3d->fakeFloor->validcount = validcount; + clip3d->NewClip(); } renderline.Render(line, InSubsector, frontsector, &tempsec, floorplane, ceilingplane); // fake } - fakeFloor = NULL; - fake3D = 0; + clip3d->fakeFloor = NULL; + clip3d->fake3D = 0; floorplane = backupfp; ceilingplane = backupcp; } diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index 1cc149792b..b8e2a50818 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -17,6 +17,7 @@ #include #include "r_defs.h" #include "swrenderer/line/r_line.h" +#include "swrenderer/scene/r_3dfloors.h" namespace swrenderer { diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 794f11eca9..794c3916b7 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -117,7 +117,7 @@ namespace swrenderer if (visplanes[MAXVISPLANES] == nullptr) return; - R_3D_EnterSkybox(); + Clip3DFloors::Instance()->EnterSkybox(); CurrentPortalInSkybox = true; int savedextralight = extralight; @@ -252,7 +252,7 @@ namespace swrenderer visplaneStack.Push(pl); RenderBSP::Instance()->RenderScene(); - R_3D_ResetClip(); // reset clips (floor/ceiling) + Clip3DFloors::Instance()->ResetClip(); // reset clips (floor/ceiling) R_DrawPlanes(); port->mFlags &= ~PORTSF_INSKYBOX; @@ -303,9 +303,9 @@ namespace swrenderer R_SetViewAngle(); CurrentPortalInSkybox = false; - R_3D_LeaveSkybox(); + Clip3DFloors::Instance()->LeaveSkybox(); - if (fakeActive) return; + if (Clip3DFloors::Instance()->fakeActive) return; for (*freehead = visplanes[MAXVISPLANES], visplanes[MAXVISPLANES] = nullptr; *freehead; ) freehead = &(*freehead)->next; @@ -463,7 +463,7 @@ namespace swrenderer } // some portals have height differences, account for this here - R_3D_EnterSkybox(); // push 3D floor height map + Clip3DFloors::Instance()->EnterSkybox(); // push 3D floor height map CurrentPortalInSkybox = false; // first portal in a skybox should set this variable to false for proper clipping in skyboxes. // first pass, set clipping @@ -473,7 +473,7 @@ namespace swrenderer memcpy(floorclip + pds->x1, &pds->floorclip[0], pds->len * sizeof(*floorclip)); RenderBSP::Instance()->RenderScene(); - R_3D_ResetClip(); // reset clips (floor/ceiling) + Clip3DFloors::Instance()->ResetClip(); // reset clips (floor/ceiling) if (!savedvisibility && camera) camera->renderflags &= ~RF_INVISIBLE; PlaneCycles.Clock(); @@ -501,7 +501,7 @@ namespace swrenderer NetUpdate(); - R_3D_LeaveSkybox(); // pop 3D floor height map + Clip3DFloors::Instance()->LeaveSkybox(); // pop 3D floor height map CurrentPortalUniq = prevuniq2; // draw a red line around a portal if it's being highlighted diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index f99fe515b6..bd4e9b02b3 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -1315,12 +1315,14 @@ void R_DrawSprite (vissprite_t *spr) F3DFloor *rover; FDynamicColormap *mybasecolormap; + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + // [RH] Check for particles if (!spr->bIsVoxel && spr->pic == NULL) { // kg3D - reject invisible parts - if ((fake3D & FAKE3D_CLIPBOTTOM) && spr->gpos.Z <= sclipBottom) return; - if ((fake3D & FAKE3D_CLIPTOP) && spr->gpos.Z >= sclipTop) return; + if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gpos.Z <= clip3d->sclipBottom) return; + if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gpos.Z >= clip3d->sclipTop) return; R_DrawParticle (spr); return; } @@ -1337,25 +1339,25 @@ void R_DrawSprite (vissprite_t *spr) return; // kg3D - reject invisible parts - if ((fake3D & FAKE3D_CLIPBOTTOM) && spr->gzt <= sclipBottom) return; - if ((fake3D & FAKE3D_CLIPTOP) && spr->gzb >= sclipTop) return; + if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gzt <= clip3d->sclipBottom) return; + if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gzb >= clip3d->sclipTop) return; // kg3D - correct colors now if (!fixedcolormap && fixedlightlev < 0 && spr->sector->e && spr->sector->e->XFloor.lightlist.Size()) { - if (!(fake3D & FAKE3D_CLIPTOP)) + if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) { - sclipTop = spr->sector->ceilingplane.ZatPoint(ViewPos); + clip3d->sclipTop = spr->sector->ceilingplane.ZatPoint(ViewPos); } sector_t *sec = NULL; for (i = spr->sector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) { - if (sclipTop <= spr->sector->e->XFloor.lightlist[i].plane.Zat0()) + if (clip3d->sclipTop <= spr->sector->e->XFloor.lightlist[i].plane.Zat0()) { rover = spr->sector->e->XFloor.lightlist[i].caster; if (rover) { - if (rover->flags & FF_DOUBLESHADOW && sclipTop <= rover->bottom.plane->Zat0()) + if (rover->flags & FF_DOUBLESHADOW && clip3d->sclipTop <= rover->bottom.plane->Zat0()) { break; } @@ -1501,15 +1503,15 @@ void R_DrawSprite (vissprite_t *spr) } } - if (fake3D & FAKE3D_CLIPBOTTOM) + if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) { if (!spr->bIsVoxel) { - double hz = sclipBottom; + double hz = clip3d->sclipBottom; if (spr->fakefloor) { double floorz = spr->fakefloor->top.plane->Zat0(); - if (ViewPos.Z > floorz && floorz == sclipBottom ) + if (ViewPos.Z > floorz && floorz == clip3d->sclipBottom ) { hz = spr->fakefloor->bottom.plane->Zat0(); } @@ -1520,17 +1522,17 @@ void R_DrawSprite (vissprite_t *spr) botclip = MAX(0, h); } } - hzb = MAX(hzb, sclipBottom); + hzb = MAX(hzb, clip3d->sclipBottom); } - if (fake3D & FAKE3D_CLIPTOP) + if (clip3d->fake3D & FAKE3D_CLIPTOP) { if (!spr->bIsVoxel) { - double hz = sclipTop; + double hz = clip3d->sclipTop; if (spr->fakeceiling != NULL) { double ceilingZ = spr->fakeceiling->bottom.plane->Zat0(); - if (ViewPos.Z < ceilingZ && ceilingZ == sclipTop) + if (ViewPos.Z < ceilingZ && ceilingZ == clip3d->sclipTop) { hz = spr->fakeceiling->top.plane->Zat0(); } @@ -1541,31 +1543,9 @@ void R_DrawSprite (vissprite_t *spr) topclip = short(MIN(h, viewheight)); } } - hzt = MIN(hzt, sclipTop); + hzt = MIN(hzt, clip3d->sclipTop); } -#if 0 - // [RH] Sprites that were split by a drawseg should also be clipped - // by the sector's floor and ceiling. (Not sure how/if to handle this - // with fake floors, since those already do clipping.) - if (spr->bSplitSprite && - (spr->heightsec == NULL || (spr->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC))) - { - fixed_t h = spr->sector->floorplane.ZatPoint (spr->gx, spr->gy); - h = (centeryfrac - FixedMul (h-viewz, scale)) >> FRACBITS; - if (h < botclip) - { - botclip = MAX (0, h); - } - h = spr->sector->ceilingplane.ZatPoint (spr->gx, spr->gy); - h = (centeryfrac - FixedMul (h-viewz, scale)) >> FRACBITS; - if (h > topclip) - { - topclip = short(MIN(h, viewheight)); - } - } -#endif - if (topclip >= botclip) { spr->Style.BaseColormap = colormap; @@ -1752,7 +1732,7 @@ void R_DrawMaskedSingle (bool renew) if (renew) { - fake3D |= FAKE3D_REFRESHCLIP; + Clip3DFloors::Instance()->fake3D |= FAKE3D_REFRESHCLIP; } for (ds = ds_p; ds-- > firstdrawseg; ) // new -- killough { @@ -1775,7 +1755,8 @@ void R_DrawMasked (void) R_CollectPortals(); R_SortVisSprites (DrewAVoxel ? sv_compare2d : sv_compare, firstvissprite - vissprites); - if (height_top == NULL) + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + if (clip3d->height_top == NULL) { // kg3D - no visible 3D floors, normal rendering R_DrawMaskedSingle(false); } @@ -1784,44 +1765,44 @@ void R_DrawMasked (void) HeightLevel *hl; // ceilings - for (hl = height_cur; hl != NULL && hl->height >= ViewPos.Z; hl = hl->prev) + for (hl = clip3d->height_cur; hl != NULL && hl->height >= ViewPos.Z; hl = hl->prev) { if (hl->next) { - fake3D = FAKE3D_CLIPBOTTOM | FAKE3D_CLIPTOP; - sclipTop = hl->next->height; + clip3d->fake3D = FAKE3D_CLIPBOTTOM | FAKE3D_CLIPTOP; + clip3d->sclipTop = hl->next->height; } else { - fake3D = FAKE3D_CLIPBOTTOM; + clip3d->fake3D = FAKE3D_CLIPBOTTOM; } - sclipBottom = hl->height; + clip3d->sclipBottom = hl->height; R_DrawMaskedSingle(true); R_DrawHeightPlanes(hl->height); } // floors - fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP; - sclipTop = height_top->height; + clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP; + clip3d->sclipTop = clip3d->height_top->height; R_DrawMaskedSingle(true); - hl = height_top; - for (hl = height_top; hl != NULL && hl->height < ViewPos.Z; hl = hl->next) + hl = clip3d->height_top; + for (hl = clip3d->height_top; hl != NULL && hl->height < ViewPos.Z; hl = hl->next) { R_DrawHeightPlanes(hl->height); if (hl->next) { - fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP | FAKE3D_CLIPBOTTOM; - sclipTop = hl->next->height; + clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP | FAKE3D_CLIPBOTTOM; + clip3d->sclipTop = hl->next->height; } else { - fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPBOTTOM; + clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPBOTTOM; } - sclipBottom = hl->height; + clip3d->sclipBottom = hl->height; R_DrawMaskedSingle(true); } - R_3D_DeleteHeights(); - fake3D = 0; + clip3d->DeleteHeights(); + clip3d->fake3D = 0; } R_DrawPlayerSprites(); } diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 2887a26f77..1224dbe916 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -170,15 +170,17 @@ namespace swrenderer rw_lightstep = ds->lightstep; rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + if (fixedlightlev < 0) { - if (!(fake3D & FAKE3D_CLIPTOP)) + if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) { - sclipTop = sec->ceilingplane.ZatPoint(ViewPos); + clip3d->sclipTop = sec->ceilingplane.ZatPoint(ViewPos); } for (i = frontsector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) { - if (sclipTop <= frontsector->e->XFloor.lightlist[i].plane.Zat0()) + if (clip3d->sclipTop <= frontsector->e->XFloor.lightlist[i].plane.Zat0()) { lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; basecolormap = lit->extra_colormap; @@ -277,12 +279,12 @@ namespace swrenderer goto clearfog; } - if ((fake3D & FAKE3D_CLIPBOTTOM) && textop < sclipBottom - ViewPos.Z) + if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && textop < clip3d->sclipBottom - ViewPos.Z) { notrelevant = true; goto clearfog; } - if ((fake3D & FAKE3D_CLIPTOP) && textop - texheight > sclipTop - ViewPos.Z) + if ((clip3d->fake3D & FAKE3D_CLIPTOP) && textop - texheight > clip3d->sclipTop - ViewPos.Z) { notrelevant = true; goto clearfog; @@ -293,17 +295,17 @@ namespace swrenderer WallC.sx1 = ds->sx1; WallC.sx2 = ds->sx2; - if (fake3D & FAKE3D_CLIPTOP) + if (clip3d->fake3D & FAKE3D_CLIPTOP) { - R_CreateWallSegmentY(wallupper, textop < sclipTop - ViewPos.Z ? textop : sclipTop - ViewPos.Z, &WallC); + R_CreateWallSegmentY(wallupper, textop < clip3d->sclipTop - ViewPos.Z ? textop : clip3d->sclipTop - ViewPos.Z, &WallC); } else { R_CreateWallSegmentY(wallupper, textop, &WallC); } - if (fake3D & FAKE3D_CLIPBOTTOM) + if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) { - R_CreateWallSegmentY(walllower, textop - texheight > sclipBottom - ViewPos.Z ? textop - texheight : sclipBottom - ViewPos.Z, &WallC); + R_CreateWallSegmentY(walllower, textop - texheight > clip3d->sclipBottom - ViewPos.Z ? textop - texheight : clip3d->sclipBottom - ViewPos.Z, &WallC); } else { @@ -321,7 +323,7 @@ namespace swrenderer walllower[i] = mfloorclip[i]; } - if (CurrentSkybox) + if (clip3d->CurrentSkybox) { // Midtex clipping doesn't work properly with skyboxes, since you're normally below the floor // or above the ceiling, so the appropriate end won't be clipped automatically when adding // this drawseg. @@ -379,7 +381,7 @@ namespace swrenderer WallC.sx1 = ds->sx1; WallC.sx2 = ds->sx2; - if (CurrentSkybox) + if (clip3d->CurrentSkybox) { // Midtex clipping doesn't work properly with skyboxes, since you're normally below the floor // or above the ceiling, so the appropriate end won't be clipped automatically when adding // this drawseg. @@ -390,9 +392,9 @@ namespace swrenderer } } - if (fake3D & FAKE3D_CLIPTOP) + if (clip3d->fake3D & FAKE3D_CLIPTOP) { - R_CreateWallSegmentY(wallupper, sclipTop - ViewPos.Z, &WallC); + R_CreateWallSegmentY(wallupper, clip3d->sclipTop - ViewPos.Z, &WallC); for (i = x1; i < x2; i++) { if (wallupper[i] < mceilingclip[i]) @@ -400,9 +402,9 @@ namespace swrenderer } mceilingclip = wallupper; } - if (fake3D & FAKE3D_CLIPBOTTOM) + if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) { - R_CreateWallSegmentY(walllower, sclipBottom - ViewPos.Z, &WallC); + R_CreateWallSegmentY(walllower, clip3d->sclipBottom - ViewPos.Z, &WallC); for (i = x1; i < x2; i++) { if (walllower[i] > mfloorclip[i]) @@ -424,7 +426,7 @@ namespace swrenderer } if (!notrelevant) { - if (fake3D & FAKE3D_REFRESHCLIP) + if (clip3d->fake3D & FAKE3D_REFRESHCLIP) { if (!wrap) { @@ -524,8 +526,9 @@ namespace swrenderer WallC.tright.Y = ds->cy + ds->cdy; WallT = ds->tmapvals; - R_CreateWallSegmentY(wallupper, sclipTop - ViewPos.Z, &WallC); - R_CreateWallSegmentY(walllower, sclipBottom - ViewPos.Z, &WallC); + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + R_CreateWallSegmentY(wallupper, clip3d->sclipTop - ViewPos.Z, &WallC); + R_CreateWallSegmentY(walllower, clip3d->sclipBottom - ViewPos.Z, &WallC); for (i = x1; i < x2; i++) { @@ -573,15 +576,17 @@ namespace swrenderer floorHeight = backsector->CenterFloor(); ceilingHeight = backsector->CenterCeiling(); + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + // maybe fix clipheights - if (!(fake3D & FAKE3D_CLIPBOTTOM)) sclipBottom = floorHeight; - if (!(fake3D & FAKE3D_CLIPTOP)) sclipTop = ceilingHeight; + if (!(clip3d->fake3D & FAKE3D_CLIPBOTTOM)) clip3d->sclipBottom = floorHeight; + if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) clip3d->sclipTop = ceilingHeight; // maybe not visible - if (sclipBottom >= frontsector->CenterCeiling()) return; - if (sclipTop <= frontsector->CenterFloor()) return; + if (clip3d->sclipBottom >= frontsector->CenterCeiling()) return; + if (clip3d->sclipTop <= frontsector->CenterFloor()) return; - if (fake3D & FAKE3D_DOWN2UP) + if (clip3d->fake3D & FAKE3D_DOWN2UP) { // bottom to viewz last = 0; for (i = backsector->e->XFloor.ffloors.Size() - 1; i >= 0; i--) @@ -592,7 +597,7 @@ namespace swrenderer // visible? passed = 0; if (!(rover->flags & FF_RENDERSIDES) || rover->top.plane->isSlope() || rover->bottom.plane->isSlope() || - rover->top.plane->Zat0() <= sclipBottom || + rover->top.plane->Zat0() <= clip3d->sclipBottom || rover->bottom.plane->Zat0() >= ceilingHeight || rover->top.plane->Zat0() <= floorHeight) { @@ -607,7 +612,7 @@ namespace swrenderer } rw_pic = nullptr; - if (rover->bottom.plane->Zat0() >= sclipTop || passed) + if (rover->bottom.plane->Zat0() >= clip3d->sclipTop || passed) { if (last) { @@ -629,8 +634,8 @@ namespace swrenderer if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; // visible? - if (fover->top.plane->Zat0() <= sclipBottom) continue; // no - if (fover->bottom.plane->Zat0() >= sclipTop) + if (fover->top.plane->Zat0() <= clip3d->sclipBottom) continue; // no + if (fover->bottom.plane->Zat0() >= clip3d->sclipTop) { // no, last possible fover = nullptr; break; @@ -682,8 +687,8 @@ namespace swrenderer if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; // visible? - if (fover->top.plane->Zat0() <= sclipBottom) continue; // no - if (fover->bottom.plane->Zat0() >= sclipTop) + if (fover->top.plane->Zat0() <= clip3d->sclipBottom) continue; // no + if (fover->bottom.plane->Zat0() >= clip3d->sclipTop) { // visible, last possible fover = nullptr; break; @@ -733,7 +738,7 @@ namespace swrenderer { for (j = backsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) { - if (sclipTop <= backsector->e->XFloor.lightlist[j].plane.Zat0()) + if (clip3d->sclipTop <= backsector->e->XFloor.lightlist[j].plane.Zat0()) { lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; @@ -746,7 +751,7 @@ namespace swrenderer { for (j = frontsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) { - if (sclipTop <= frontsector->e->XFloor.lightlist[j].plane.Zat0()) + if (clip3d->sclipTop <= frontsector->e->XFloor.lightlist[j].plane.Zat0()) { lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; @@ -775,7 +780,7 @@ namespace swrenderer passed = 0; if (!(rover->flags & FF_RENDERSIDES) || rover->top.plane->isSlope() || rover->bottom.plane->isSlope() || - rover->bottom.plane->Zat0() >= sclipTop || + rover->bottom.plane->Zat0() >= clip3d->sclipTop || rover->top.plane->Zat0() <= floorHeight || rover->bottom.plane->Zat0() >= ceilingHeight) { @@ -789,7 +794,7 @@ namespace swrenderer } } rw_pic = nullptr; - if (rover->top.plane->Zat0() <= sclipBottom || passed) + if (rover->top.plane->Zat0() <= clip3d->sclipBottom || passed) { // maybe wall from inside rendering? fover = nullptr; for (j = 0; j < (int)frontsector->e->XFloor.ffloors.Size(); j++) @@ -806,8 +811,8 @@ namespace swrenderer if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; // visible? - if (fover->bottom.plane->Zat0() >= sclipTop) continue; // no - if (fover->top.plane->Zat0() <= sclipBottom) + if (fover->bottom.plane->Zat0() >= clip3d->sclipTop) continue; // no + if (fover->top.plane->Zat0() <= clip3d->sclipBottom) { // no, last possible fover = nullptr; break; @@ -858,8 +863,8 @@ namespace swrenderer if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; // visible? - if (fover->bottom.plane->Zat0() >= sclipTop) continue; // no - if (fover->top.plane->Zat0() <= sclipBottom) + if (fover->bottom.plane->Zat0() >= clip3d->sclipTop) continue; // no + if (fover->top.plane->Zat0() <= clip3d->sclipBottom) { // visible, last possible fover = nullptr; break; @@ -907,7 +912,7 @@ namespace swrenderer { for (j = backsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) { - if (sclipTop <= backsector->e->XFloor.lightlist[j].plane.Zat0()) + if (clip3d->sclipTop <= backsector->e->XFloor.lightlist[j].plane.Zat0()) { lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; @@ -920,7 +925,7 @@ namespace swrenderer { for (j = frontsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) { - if (sclipTop <= frontsector->e->XFloor.lightlist[j].plane.Zat0()) + if (clip3d->sclipTop <= frontsector->e->XFloor.lightlist[j].plane.Zat0()) { lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; From 47ca45bf18054173396bafc44fcc7e8cdd274e5b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 4 Jan 2017 19:03:33 +0100 Subject: [PATCH 648/912] Convert FakeSide to enum class --- src/swrenderer/scene/r_bsp.cpp | 8 ++++---- src/swrenderer/scene/r_bsp.h | 16 ++++++++-------- src/swrenderer/scene/r_things.cpp | 16 ++++++++-------- src/swrenderer/scene/r_things.h | 3 ++- src/swrenderer/things/r_particle.cpp | 6 +++--- src/swrenderer/things/r_particle.h | 3 ++- src/swrenderer/things/r_visiblesprite.h | 3 ++- src/swrenderer/things/r_wallsprite.cpp | 2 +- 8 files changed, 30 insertions(+), 27 deletions(-) diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 5b7daa8b23..90866a3a45 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -85,7 +85,7 @@ namespace swrenderer *ceilinglightlevel = sec->GetCeilingLight(); } - FakeSide = FAKED_Center; + FakeSide = WaterFakeSide::Center; const sector_t *s = sec->GetHeightSec(); if (s != NULL) @@ -125,7 +125,7 @@ namespace swrenderer *ceilinglightlevel = s->GetCeilingLight(); } } - FakeSide = FAKED_BelowFloor; + FakeSide = WaterFakeSide::BelowFloor; return tempsec; } return sec; @@ -224,7 +224,7 @@ namespace swrenderer *ceilinglightlevel = s->GetCeilingLight(); } } - FakeSide = FAKED_BelowFloor; + FakeSide = WaterFakeSide::BelowFloor; } else if (heightsec && heightsec->ceilingplane.PointOnSide(ViewPos) <= 0 && orgceilz > refceilz && !(s->MoreFlags & SECF_FAKEFLOORONLY)) @@ -261,7 +261,7 @@ namespace swrenderer *ceilinglightlevel = s->GetCeilingLight(); } } - FakeSide = FAKED_AboveCeiling; + FakeSide = WaterFakeSide::AboveCeiling; } sec = tempsec; // Use other sector } diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index b8e2a50818..4a47da5935 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -28,11 +28,11 @@ namespace swrenderer // the texture calculations. #define TOO_CLOSE_Z (3072.0 / (1<<12)) - enum + enum class WaterFakeSide { - FAKED_Center, - FAKED_BelowFloor, - FAKED_AboveCeiling + Center, + BelowFloor, + AboveCeiling }; class RenderBSP @@ -57,10 +57,10 @@ namespace swrenderer void AddPolyobjs(subsector_t *sub); void FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane); - subsector_t *InSubsector; - sector_t *frontsector; - uint8_t FakeSide; - bool r_fakingunderwater; + subsector_t *InSubsector = nullptr; + sector_t *frontsector = nullptr; + WaterFakeSide FakeSide = WaterFakeSide::Center; + bool r_fakingunderwater = false; SWRenderLine renderline; }; diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index bd4e9b02b3..3b00ccbd41 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -602,7 +602,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // R_ProjectSprite // Generates a vissprite for a thing if it might be visible. // -void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector) +void R_ProjectSprite (AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector) { double tr_x; double tr_y; @@ -817,12 +817,12 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor if (heightsec != NULL) // only clip things which are in special sectors { - if (fakeside == FAKED_AboveCeiling) + if (fakeside == WaterFakeSide::AboveCeiling) { if (gzt < heightsec->ceilingplane.ZatPoint(pos)) return; } - else if (fakeside == FAKED_BelowFloor) + else if (fakeside == WaterFakeSide::BelowFloor) { if (gzb >= heightsec->floorplane.ZatPoint(pos)) return; @@ -1035,7 +1035,7 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor // // killough 9/18/98: add lightlevel as parameter, fixing underwater lighting // [RH] Save which side of heightsec sprite is on here. -void R_AddSprites (sector_t *sec, int lightlevel, int fakeside) +void R_AddSprites (sector_t *sec, int lightlevel, WaterFakeSide fakeside) { F3DFloor *fakeceiling = NULL; F3DFloor *fakefloor = NULL; @@ -1447,12 +1447,12 @@ void R_DrawSprite (vissprite_t *spr) if (spr->heightsec && !(spr->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC)) { // only things in specially marked sectors - if (spr->FakeFlatStat != FAKED_AboveCeiling) + if (spr->FakeFlatStat != WaterFakeSide::AboveCeiling) { double hz = spr->heightsec->floorplane.ZatPoint(spr->gpos); int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - if (spr->FakeFlatStat == FAKED_BelowFloor) + if (spr->FakeFlatStat == WaterFakeSide::BelowFloor) { // seen below floor: clip top if (!spr->bIsVoxel && h > topclip) { @@ -1469,12 +1469,12 @@ void R_DrawSprite (vissprite_t *spr) hzb = MAX(hzb, hz); } } - if (spr->FakeFlatStat != FAKED_BelowFloor && !(spr->heightsec->MoreFlags & SECF_FAKEFLOORONLY)) + if (spr->FakeFlatStat != WaterFakeSide::BelowFloor && !(spr->heightsec->MoreFlags & SECF_FAKEFLOORONLY)) { double hz = spr->heightsec->ceilingplane.ZatPoint(spr->gpos); int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - if (spr->FakeFlatStat == FAKED_AboveCeiling) + if (spr->FakeFlatStat == WaterFakeSide::AboveCeiling) { // seen above ceiling: clip bottom if (!spr->bIsVoxel && h < botclip) { diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index 2354f7c0f4..5782ea02b6 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -24,6 +24,7 @@ #define __R_THINGS__ #include "swrenderer/things/r_visiblesprite.h" +#include "swrenderer/scene/r_bsp.h" struct particle_t; struct FVoxel; @@ -63,7 +64,7 @@ void R_DrawMaskedColumn (FTexture *texture, fixed_t column, bool unmasked = fals void R_CacheSprite (spritedef_t *sprite); void R_SortVisSprites (int (*compare)(const void *, const void *), size_t first); -void R_AddSprites (sector_t *sec, int lightlevel, int fakeside); +void R_AddSprites (sector_t *sec, int lightlevel, WaterFakeSide fakeside); void R_DrawSprites (); void R_ClearSprites (); void R_DrawMasked (); diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 9878d362b6..4897937903 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -56,7 +56,7 @@ namespace swrenderer { - void R_ProjectParticle(particle_t *particle, const sector_t *sector, int shade, int fakeside) + void R_ProjectParticle(particle_t *particle, const sector_t *sector, int shade, WaterFakeSide fakeside) { double tr_x, tr_y; double tx, ty; @@ -134,7 +134,7 @@ namespace swrenderer if (heightsec) // only clip things which are in special sectors { - if (fakeside == FAKED_AboveCeiling) + if (fakeside == WaterFakeSide::AboveCeiling) { topplane = §or->ceilingplane; botplane = &heightsec->ceilingplane; @@ -142,7 +142,7 @@ namespace swrenderer botpic = heightsec->GetTexture(sector_t::ceiling); map = heightsec->ColorMap; } - else if (fakeside == FAKED_BelowFloor) + else if (fakeside == WaterFakeSide::BelowFloor) { topplane = &heightsec->floorplane; botplane = §or->floorplane; diff --git a/src/swrenderer/things/r_particle.h b/src/swrenderer/things/r_particle.h index a0413d388c..257a96aa39 100644 --- a/src/swrenderer/things/r_particle.h +++ b/src/swrenderer/things/r_particle.h @@ -14,10 +14,11 @@ #pragma once #include "r_visiblesprite.h" +#include "swrenderer/scene/r_bsp.h" namespace swrenderer { - void R_ProjectParticle(particle_t *, const sector_t *sector, int shade, int fakeside); + void R_ProjectParticle(particle_t *, const sector_t *sector, int shade, WaterFakeSide fakeside); void R_DrawParticle(vissprite_t *); void R_DrawMaskedSegsBehindParticle(const vissprite_t *vis); } diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index 04ef665e63..96eda392cd 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -14,6 +14,7 @@ #pragma once #include "swrenderer/line/r_line.h" +#include "swrenderer/scene/r_bsp.h" struct particle_t; struct FVoxel; @@ -76,7 +77,7 @@ namespace swrenderer uint8_t bWallSprite : 1; // [RH] This is a wall sprite uint8_t bSplitSprite : 1; // [RH] Sprite was split by a drawseg uint8_t bInMirror : 1; // [RH] Sprite is "inside" a mirror - uint8_t FakeFlatStat; // [RH] which side of fake/floor ceiling sprite is on + WaterFakeSide FakeFlatStat; // [RH] which side of fake/floor ceiling sprite is on short renderflags; uint32_t Translation; // [RH] for color translation visstyle_t Style; diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index b4c60cb95d..34c398c641 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -118,7 +118,7 @@ namespace swrenderer vis->Style.RenderStyle = thing->RenderStyle; vis->FillColor = thing->fillcolor; vis->Translation = thing->Translation; - vis->FakeFlatStat = 0; + vis->FakeFlatStat = WaterFakeSide::Center; vis->Style.Alpha = float(thing->Alpha); vis->fakefloor = NULL; vis->fakeceiling = NULL; From 4bcc34f01ce98a37fecdac2c9c6ba70507b692e7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 4 Jan 2017 19:13:58 +0100 Subject: [PATCH 649/912] Move openings deinit to r_memory --- src/swrenderer/r_main.cpp | 9 +-------- src/swrenderer/r_memory.cpp | 9 +++++++++ src/swrenderer/r_memory.h | 1 + 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index cfb9daf51b..a5978b5a8c 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -102,7 +102,6 @@ static void R_ShutdownRenderer(); // EXTERNAL DATA DECLARATIONS ---------------------------------------------- -extern short *openings; extern int fuzzviewheight; @@ -391,13 +390,7 @@ static void R_ShutdownRenderer() R_DeinitSprites(); R_DeinitPlanes(); Clip3DFloors::Instance()->Cleanup(); - // Free openings - if (openings != NULL) - { - M_Free (openings); - openings = NULL; - } - + R_DeinitOpenings(); R_FreeDrawSegs(); } diff --git a/src/swrenderer/r_memory.cpp b/src/swrenderer/r_memory.cpp index dd5fd83f88..0b3d3c1d4b 100644 --- a/src/swrenderer/r_memory.cpp +++ b/src/swrenderer/r_memory.cpp @@ -61,4 +61,13 @@ namespace swrenderer { lastopening = 0; } + + void R_DeinitOpenings() + { + if (openings != nullptr) + { + M_Free(openings); + openings = nullptr; + } + } } diff --git a/src/swrenderer/r_memory.h b/src/swrenderer/r_memory.h index fc0c845e29..5df7504807 100644 --- a/src/swrenderer/r_memory.h +++ b/src/swrenderer/r_memory.h @@ -19,4 +19,5 @@ namespace swrenderer ptrdiff_t R_NewOpening(ptrdiff_t len); void R_FreeOpenings(); + void R_DeinitOpenings(); } From e806b9424c7d407361959543f5d9026d2d4d3ec2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 5 Jan 2017 04:55:26 +0100 Subject: [PATCH 650/912] Move r_portal into a class --- src/swrenderer/line/r_line.cpp | 33 ++++++---- src/swrenderer/line/r_walldraw.cpp | 4 +- src/swrenderer/plane/r_flatplane.cpp | 3 +- src/swrenderer/plane/r_skyplane.cpp | 8 ++- src/swrenderer/plane/r_slopeplane.cpp | 3 +- src/swrenderer/plane/r_visibleplane.cpp | 36 ++++++----- src/swrenderer/r_main.cpp | 24 +------- src/swrenderer/r_main.h | 2 - src/swrenderer/r_swrenderer.cpp | 5 +- src/swrenderer/scene/r_bsp.cpp | 2 +- src/swrenderer/scene/r_portal.cpp | 74 ++++++++++------------- src/swrenderer/scene/r_portal.h | 52 ++++++++++++---- src/swrenderer/scene/r_things.cpp | 47 ++++++++------ src/swrenderer/segments/r_drawsegment.cpp | 6 +- src/swrenderer/things/r_particle.cpp | 12 ++-- src/swrenderer/things/r_wallsprite.cpp | 12 ++-- 16 files changed, 179 insertions(+), 144 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index feb261d671..813ff43c3a 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -78,7 +78,9 @@ namespace swrenderer if (WallC.Init(pt1, pt2, 32.0 / (1 << 12))) return; - if (WallC.sx1 >= WindowRight || WallC.sx2 <= WindowLeft) + RenderPortal *renderportal = RenderPortal::Instance(); + + if (WallC.sx1 >= renderportal->WindowRight || WallC.sx2 <= renderportal->WindowLeft) return; if (line->linedef == NULL) @@ -92,7 +94,7 @@ namespace swrenderer // reject lines that aren't seen from the portal (if any) // [ZZ] 10.01.2016: lines inside a skybox shouldn't be clipped, although this imposes some limitations on portals in skyboxes. - if (!CurrentPortalInSkybox && CurrentPortal && P_ClipLineToPortal(line->linedef, CurrentPortal->dst, ViewPos)) + if (!renderportal->CurrentPortalInSkybox && renderportal->CurrentPortal && P_ClipLineToPortal(line->linedef, renderportal->CurrentPortal->dst, ViewPos)) return; vertex_t *v1, *v2; @@ -162,12 +164,12 @@ namespace swrenderer if (rw_frontcz1 > rw_backcz1 || rw_frontcz2 > rw_backcz2) { rw_havehigh = true; - R_CreateWallSegmentYSloped(wallupper, backsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); + R_CreateWallSegmentYSloped(wallupper, backsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); } if (rw_frontfz1 < rw_backfz1 || rw_frontfz2 < rw_backfz2) { rw_havelow = true; - R_CreateWallSegmentYSloped(walllower, backsector->floorplane, &WallC, curline, MirrorFlags & RF_XFLIP); + R_CreateWallSegmentYSloped(walllower, backsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); } // Portal @@ -268,8 +270,8 @@ namespace swrenderer } else { - rw_ceilstat = R_CreateWallSegmentYSloped(walltop, frontsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); - rw_floorstat = R_CreateWallSegmentYSloped(wallbottom, frontsector->floorplane, &WallC, curline, MirrorFlags & RF_XFLIP); + rw_ceilstat = R_CreateWallSegmentYSloped(walltop, frontsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + rw_floorstat = R_CreateWallSegmentYSloped(wallbottom, frontsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); // [RH] treat off-screen walls as solid #if 0 // Maybe later... @@ -339,8 +341,10 @@ namespace swrenderer rw_offset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); rw_light = rw_lightleft + rw_lightstep * (start - WallC.sx1); + + RenderPortal *renderportal = RenderPortal::Instance(); - draw_segment->CurrentPortalUniq = CurrentPortalUniq; + draw_segment->CurrentPortalUniq = renderportal->CurrentPortalUniq; draw_segment->sx1 = WallC.sx1; draw_segment->sx2 = WallC.sx2; draw_segment->sz1 = WallC.sz1; @@ -715,7 +719,8 @@ namespace swrenderer // wall but nothing to draw for it. // Recalculate walltop so that the wall is clipped by the back sector's // ceiling instead of the front sector's ceiling. - R_CreateWallSegmentYSloped(walltop, backsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); + RenderPortal *renderportal = RenderPortal::Instance(); + R_CreateWallSegmentYSloped(walltop, backsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); } // Putting sky ceilings on the front and back of a line alters the way unpegged // positioning works. @@ -1166,8 +1171,10 @@ namespace swrenderer tleft.Y = float(pt1.X * ViewTanCos + pt1.Y * ViewTanSin); tright.Y = float(pt2.X * ViewTanCos + pt2.Y * ViewTanSin); + + RenderPortal *renderportal = RenderPortal::Instance(); - if (MirrorFlags & RF_XFLIP) + if (renderportal->MirrorFlags & RF_XFLIP) { float t = -tleft.X; tleft.X = -tright.X; @@ -1222,8 +1229,10 @@ namespace swrenderer { const FVector2 *left = &wallc->tleft; const FVector2 *right = &wallc->tright; + + RenderPortal *renderportal = RenderPortal::Instance(); - if (MirrorFlags & RF_XFLIP) + if (renderportal->MirrorFlags & RF_XFLIP) { swapvalues(left, right); } @@ -1241,8 +1250,10 @@ namespace swrenderer double fullx2 = right.X * ViewSin - right.Y * ViewCos; double fully1 = left.X * ViewTanCos + left.Y * ViewTanSin; double fully2 = right.X * ViewTanCos + right.Y * ViewTanSin; + + RenderPortal *renderportal = RenderPortal::Instance(); - if (MirrorFlags & RF_XFLIP) + if (renderportal->MirrorFlags & RF_XFLIP) { fullx1 = -fullx1; fullx2 = -fullx2; diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 981ae2dde2..29309f0223 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -442,11 +442,13 @@ namespace swrenderer assert(WallC.sx1 <= x1); assert(WallC.sx2 >= x2); + + RenderPortal *renderportal = RenderPortal::Instance(); // kg3D - fake floors instead of zdoom light list for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) { - int j = R_CreateWallSegmentYSloped(most3, frontsector->e->XFloor.lightlist[i].plane, &WallC, curline, MirrorFlags & RF_XFLIP); + int j = R_CreateWallSegmentYSloped(most3, frontsector->e->XFloor.lightlist[i].plane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); if (j != 3) { for (int j = x1; j < x2; ++j) diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index df8ecc19a8..ba88dd7189 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -93,7 +93,8 @@ namespace swrenderer ystep = -sin(planeang) / FocalLengthX; // [RH] flip for mirrors - if (MirrorFlags & RF_XFLIP) + RenderPortal *renderportal = RenderPortal::Instance(); + if (renderportal->MirrorFlags & RF_XFLIP) { xstep = -xstep; ystep = -ystep; diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 3cf4176c9d..727a04e511 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -314,6 +314,8 @@ namespace swrenderer void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) { using namespace drawerargs; + + RenderPortal *renderportal = RenderPortal::Instance(); uint32_t height = frontskytex->GetHeight(); @@ -327,7 +329,7 @@ namespace swrenderer uint32_t uv_step = (uint32_t)(v_step * 0x01000000); int x = start_x + i; - if (MirrorFlags & RF_XFLIP) + if (renderportal->MirrorFlags & RF_XFLIP) x = (viewwidth - x); uint32_t ang, angle1, angle2; @@ -431,8 +433,10 @@ namespace swrenderer { swall[x] = swal; } + + RenderPortal *renderportal = RenderPortal::Instance(); - if (MirrorFlags & RF_XFLIP) + if (renderportal->MirrorFlags & RF_XFLIP) { for (x = pl->left; x < pl->right; ++x) { diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 7a4a8c8db4..e68cee2ab1 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -143,7 +143,8 @@ namespace swrenderer plane_su *= 4294967296.f; plane_sv *= 4294967296.f; - if (MirrorFlags & RF_XFLIP) + RenderPortal *renderportal = RenderPortal::Instance(); + if (renderportal->MirrorFlags & RF_XFLIP) { plane_su[0] = -plane_su[0]; plane_sv[0] = -plane_sv[0]; diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index 14c830fc97..d1b3084be0 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -250,6 +250,8 @@ namespace swrenderer // New visplane algorithm uses hash table -- killough hash = isskybox ? MAXVISPLANES : visplane_hash(picnum.GetIndex(), lightlevel, height); + + RenderPortal *renderportal = RenderPortal::Instance(); for (check = visplanes[hash]; check; check = check->next) // killough { @@ -260,9 +262,9 @@ namespace swrenderer if (portal->mType != PORTS_SKYVIEWPOINT) { // This skybox is really a stacked sector, so we need to // check even more. - if (check->extralight == stacked_extralight && - check->visibility == stacked_visibility && - check->viewpos == stacked_viewpos && + if (check->extralight == renderportal->stacked_extralight && + check->visibility == renderportal->stacked_visibility && + check->viewpos == renderportal->stacked_viewpos && ( // headache inducing logic... :( (portal->mType != PORTS_STACKEDSECTORTHING) || @@ -277,7 +279,7 @@ namespace swrenderer *xform == check->xform ) ) && - check->viewangle == stacked_angle + check->viewangle == renderportal->stacked_angle ) ) ) @@ -298,8 +300,8 @@ namespace swrenderer basecolormap == check->colormap && // [RH] Add more checks *xform == check->xform && sky == check->sky && - CurrentPortalUniq == check->CurrentPortalUniq && - MirrorFlags == check->MirrorFlags && + renderportal->CurrentPortalUniq == check->CurrentPortalUniq && + renderportal->MirrorFlags == check->MirrorFlags && Clip3DFloors::Instance()->CurrentSkybox == check->CurrentSkybox && ViewPos == check->viewpos ) @@ -319,14 +321,14 @@ namespace swrenderer check->portal = portal; check->left = viewwidth; // Was SCREENWIDTH -- killough 11/98 check->right = 0; - check->extralight = stacked_extralight; - check->visibility = stacked_visibility; - check->viewpos = stacked_viewpos; - check->viewangle = stacked_angle; + check->extralight = renderportal->stacked_extralight; + check->visibility = renderportal->stacked_visibility; + check->viewpos = renderportal->stacked_viewpos; + check->viewangle = renderportal->stacked_angle; check->Alpha = alpha; check->Additive = additive; - check->CurrentPortalUniq = CurrentPortalUniq; - check->MirrorFlags = MirrorFlags; + check->CurrentPortalUniq = renderportal->CurrentPortalUniq; + check->MirrorFlags = renderportal->MirrorFlags; check->CurrentSkybox = Clip3DFloors::Instance()->CurrentSkybox; fillshort(check->top, viewwidth, 0x7fff); @@ -421,13 +423,15 @@ namespace swrenderer int vpcount = 0; drawerargs::ds_color = 3; + + RenderPortal *renderportal = RenderPortal::Instance(); for (i = 0; i < MAXVISPLANES; i++) { for (pl = visplanes[i]; pl; pl = pl->next) { // kg3D - draw only correct planes - if (pl->CurrentPortalUniq != CurrentPortalUniq || pl->CurrentSkybox != Clip3DFloors::Instance()->CurrentSkybox) + if (pl->CurrentPortalUniq != renderportal->CurrentPortalUniq || pl->CurrentSkybox != Clip3DFloors::Instance()->CurrentSkybox) continue; // kg3D - draw only real planes now if (pl->sky >= 0) { @@ -448,19 +452,21 @@ namespace swrenderer DVector3 oViewPos = ViewPos; DAngle oViewAngle = ViewAngle; + + RenderPortal *renderportal = RenderPortal::Instance(); for (i = 0; i < MAXVISPLANES; i++) { for (pl = visplanes[i]; pl; pl = pl->next) { - if (pl->CurrentSkybox != Clip3DFloors::Instance()->CurrentSkybox || pl->CurrentPortalUniq != CurrentPortalUniq) + if (pl->CurrentSkybox != Clip3DFloors::Instance()->CurrentSkybox || pl->CurrentPortalUniq != renderportal->CurrentPortalUniq) continue; if (pl->sky < 0 && pl->height.Zat0() == height) { ViewPos = pl->viewpos; ViewAngle = pl->viewangle; - MirrorFlags = pl->MirrorFlags; + renderportal->MirrorFlags = pl->MirrorFlags; R_DrawSinglePlane(pl, pl->sky & 0x7FFFFFFF, pl->Additive, true); } diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index a5978b5a8c..71031486ac 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -394,20 +394,6 @@ static void R_ShutdownRenderer() R_FreeDrawSegs(); } -//========================================================================== -// -// R_CopyStackedViewParameters -// -//========================================================================== - -void R_CopyStackedViewParameters() -{ - stacked_viewpos = ViewPos; - stacked_angle = ViewAngle; - stacked_extralight = extralight; - stacked_visibility = R_GetVisibility(); -} - //========================================================================== // // R_SetupColormap @@ -555,11 +541,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) colfunc = basecolfunc; spanfunc = &SWPixelFormatDrawers::DrawSpan; - WindowLeft = 0; - WindowRight = viewwidth; - MirrorFlags = 0; - CurrentPortal = NULL; - CurrentPortalUniq = 0; + RenderPortal::Instance()->SetMainPortal(); r_dontmaplines = dontmaplines; @@ -589,10 +571,10 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) { PlaneCycles.Clock(); R_DrawPlanes(); - R_DrawPortals(); + RenderPortal::Instance()->RenderPlanePortals(); PlaneCycles.Unclock(); - R_DrawWallPortals(); + RenderPortal::Instance()->RenderLinePortals(); NetUpdate (); diff --git a/src/swrenderer/r_main.h b/src/swrenderer/r_main.h index a6e198482d..c0a8ac345c 100644 --- a/src/swrenderer/r_main.h +++ b/src/swrenderer/r_main.h @@ -134,8 +134,6 @@ void R_MultiresInit (void); -extern void R_CopyStackedViewParameters(); - extern double globaluclip, globaldclip; } diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index c1240bc6b9..f08a2e026f 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -38,9 +38,10 @@ #include "v_palette.h" #include "v_video.h" #include "m_png.h" -#include "scene/r_bsp.h" #include "r_swrenderer.h" +#include "scene/r_bsp.h" #include "scene/r_3dfloors.h" +#include "scene/r_portal.h" #include "textures/textures.h" #include "r_data/voxels.h" #include "drawers/r_draw_rgba.h" @@ -395,7 +396,7 @@ void FSoftwareRenderer::SetupFrame(player_t *player) void FSoftwareRenderer::CopyStackedViewParameters() { - R_CopyStackedViewParameters(); + RenderPortal::Instance()->CopyStackedViewParameters(); } //========================================================================== diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 90866a3a45..1e390b313b 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -333,7 +333,7 @@ namespace swrenderer ry1 = x1 * ViewTanCos + y1 * ViewTanSin; ry2 = x2 * ViewTanCos + y2 * ViewTanSin; - if (MirrorFlags & RF_XFLIP) + if (RenderPortal::Instance()->MirrorFlags & RF_XFLIP) { double t = -rx1; rx1 = -rx2; diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 794c3916b7..092eac4c9f 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -62,31 +62,12 @@ CVAR(Bool, r_skyboxes, true, 0) namespace swrenderer { - int WindowLeft, WindowRight; - uint16_t MirrorFlags; - - PortalDrawseg *CurrentPortal = nullptr; - int CurrentPortalUniq = 0; - bool CurrentPortalInSkybox = false; - - // These are copies of the main parameters used when drawing stacked sectors. - // When you change the main parameters, you should copy them here too *unless* - // you are changing them to draw a stacked sector. Otherwise, stacked sectors - // won't draw in skyboxes properly. - int stacked_extralight; - double stacked_visibility; - DVector3 stacked_viewpos; - DAngle stacked_angle; - - namespace + RenderPortal *RenderPortal::Instance() { - int numskyboxes; // For ADD_STAT(skyboxes) + static RenderPortal renderportal; + return &renderportal; } - //========================================================================== - // - // R_DrawPortals - // // Draws any recorded sky boxes and then frees them. // // The process: @@ -102,16 +83,8 @@ namespace swrenderer // 8. Repeat for any other sky boxes. // 9. Put the camera back where it was to begin with. // - //========================================================================== - - void R_DrawPortals() + void RenderPortal::RenderPlanePortals() { - static TArray interestingStack; - static TArray drawsegStack; - static TArray visspriteStack; - static TArray viewposStack; - static TArray visplaneStack; - numskyboxes = 0; if (visplanes[MAXVISPLANES] == nullptr) @@ -164,7 +137,7 @@ namespace swrenderer ViewPos = sky->InterpolatedPosition(r_TicFracF); ViewAngle = savedangle + (sky->PrevAngles.Yaw + deltaangle(sky->PrevAngles.Yaw, sky->Angles.Yaw) * r_TicFracF); - R_CopyStackedViewParameters(); + CopyStackedViewParameters(); break; } @@ -311,21 +284,21 @@ namespace swrenderer freehead = &(*freehead)->next; } - void R_DrawWallPortals() + void RenderPortal::RenderLinePortals() { // [RH] Walk through mirrors // [ZZ] Merged with portals size_t lastportal = WallPortals.Size(); for (unsigned int i = 0; i < lastportal; i++) { - R_EnterPortal(&WallPortals[i], 0); + RenderLinePortal(&WallPortals[i], 0); } CurrentPortal = nullptr; CurrentPortalUniq = 0; } - void R_EnterPortal(PortalDrawseg* pds, int depth) + void RenderPortal::RenderLinePortal(PortalDrawseg* pds, int depth) { // [ZZ] check depth. fill portal with black if it's exceeding the visual recursion limit, and continue like nothing happened. if (depth >= r_portal_recursions) @@ -364,7 +337,7 @@ namespace swrenderer } if (r_highlight_portals) - R_HighlightPortal(pds); + RenderLinePortalHighlight(pds); return; } @@ -441,7 +414,7 @@ namespace swrenderer ViewTanSin = FocalTangent * ViewSin; ViewTanCos = FocalTangent * ViewCos; - R_CopyStackedViewParameters(); + CopyStackedViewParameters(); validcount++; PortalDrawseg* prevpds = CurrentPortal; @@ -478,7 +451,7 @@ namespace swrenderer PlaneCycles.Clock(); R_DrawPlanes(); - R_DrawPortals(); + RenderPlanePortals(); PlaneCycles.Unclock(); double vzp = ViewPos.Z; @@ -488,7 +461,7 @@ namespace swrenderer unsigned int portalsAtEnd = WallPortals.Size(); for (; portalsAtStart < portalsAtEnd; portalsAtStart++) { - R_EnterPortal(&WallPortals[portalsAtStart], depth + 1); + RenderLinePortal(&WallPortals[portalsAtStart], depth + 1); } int prevuniq2 = CurrentPortalUniq; CurrentPortalUniq = prevuniq; @@ -506,7 +479,7 @@ namespace swrenderer // draw a red line around a portal if it's being highlighted if (r_highlight_portals) - R_HighlightPortal(pds); + RenderLinePortalHighlight(pds); CurrentPortal = prevpds; MirrorFlags = prevmf; @@ -516,7 +489,7 @@ namespace swrenderer ViewPath[1] = savedpath[1]; } - void R_HighlightPortal(PortalDrawseg* pds) + void RenderPortal::RenderLinePortalHighlight(PortalDrawseg* pds) { // [ZZ] NO OVERFLOW CHECKS HERE // I believe it won't break. if it does, blame me. :( @@ -555,11 +528,28 @@ namespace swrenderer else *(pixels + Ybottom * RenderTarget->GetPitch() + x) = color; } } + + void RenderPortal::CopyStackedViewParameters() + { + stacked_viewpos = ViewPos; + stacked_angle = ViewAngle; + stacked_extralight = extralight; + stacked_visibility = R_GetVisibility(); + } + + void RenderPortal::SetMainPortal() + { + WindowLeft = 0; + WindowRight = viewwidth; + MirrorFlags = 0; + CurrentPortal = nullptr; + CurrentPortalUniq = 0; + } } ADD_STAT(skyboxes) { FString out; - out.Format("%d skybox planes", swrenderer::numskyboxes); + out.Format("%d skybox planes", swrenderer::RenderPortal::Instance()->numskyboxes); return out; } diff --git a/src/swrenderer/scene/r_portal.h b/src/swrenderer/scene/r_portal.h index f7b1fb74c4..1781026189 100644 --- a/src/swrenderer/scene/r_portal.h +++ b/src/swrenderer/scene/r_portal.h @@ -17,20 +17,46 @@ namespace swrenderer { - extern int WindowLeft, WindowRight; - extern uint16_t MirrorFlags; + struct visplane_t; - extern PortalDrawseg* CurrentPortal; - extern int CurrentPortalUniq; - extern bool CurrentPortalInSkybox; + class RenderPortal + { + public: + static RenderPortal *Instance(); + + void SetMainPortal(); + void CopyStackedViewParameters(); + + void RenderPlanePortals(); + void RenderLinePortals(); + + int WindowLeft = 0; + int WindowRight = 0; + uint16_t MirrorFlags = 0; - extern int stacked_extralight; - extern double stacked_visibility; - extern DVector3 stacked_viewpos; - extern DAngle stacked_angle; + PortalDrawseg* CurrentPortal = nullptr; + int CurrentPortalUniq = 0; + bool CurrentPortalInSkybox = false; - void R_DrawPortals(); - void R_DrawWallPortals(); - void R_EnterPortal(PortalDrawseg* pds, int depth); - void R_HighlightPortal(PortalDrawseg* pds); + // These are copies of the main parameters used when drawing stacked sectors. + // When you change the main parameters, you should copy them here too *unless* + // you are changing them to draw a stacked sector. Otherwise, stacked sectors + // won't draw in skyboxes properly. + int stacked_extralight = 0; + double stacked_visibility = 0.0; + DVector3 stacked_viewpos; + DAngle stacked_angle; + + int numskyboxes = 0; // For ADD_STAT(skyboxes) + + private: + void RenderLinePortal(PortalDrawseg* pds, int depth); + void RenderLinePortalHighlight(PortalDrawseg* pds); + + TArray interestingStack; + TArray drawsegStack; + TArray visspriteStack; + TArray viewposStack; + TArray visplaneStack; + }; } diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index 3b00ccbd41..54b93caeef 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -427,14 +427,16 @@ static inline void R_CollectPortals() bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) { + RenderPortal *renderportal = RenderPortal::Instance(); + // [ZZ] 10.01.2016: don't clip sprites from the root of a skybox. - if (CurrentPortalInSkybox) + if (renderportal->CurrentPortalInSkybox) return false; for (drawseg_t *seg : portaldrawsegs) { // ignore segs from other portals - if (seg->CurrentPortalUniq != CurrentPortalUniq) + if (seg->CurrentPortalUniq != renderportal->CurrentPortalUniq) continue; // (all checks that are already done in R_CollectPortals have been removed for performance reasons.) @@ -636,10 +638,12 @@ void R_ProjectSprite (AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor { return; } + + RenderPortal *renderportal = RenderPortal::Instance(); // [ZZ] Or less definitely not visible (hue) // [ZZ] 10.01.2016: don't try to clip stuff inside a skybox against the current portal. - if (!CurrentPortalInSkybox && CurrentPortal && !!P_PointOnLineSidePrecise(thing->Pos(), CurrentPortal->dst)) + if (!renderportal->CurrentPortalInSkybox && renderportal->CurrentPortal && !!P_PointOnLineSidePrecise(thing->Pos(), renderportal->CurrentPortal->dst)) return; // [RH] Interpolate the sprite's position to make it look smooth @@ -776,7 +780,7 @@ void R_ProjectSprite (AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor tx = tr_x * ViewSin - tr_y * ViewCos; // [RH] Flip for mirrors - if (MirrorFlags & RF_XFLIP) + if (renderportal->MirrorFlags & RF_XFLIP) { tx = -tx; } @@ -847,7 +851,7 @@ void R_ProjectSprite (AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor } // [RH] Flip for mirrors - renderflags ^= MirrorFlags & RF_XFLIP; + renderflags ^= renderportal->MirrorFlags & RF_XFLIP; // calculate edges of the shape const double thingxscalemul = spriteScale.X / tex->Scale.X; @@ -857,14 +861,14 @@ void R_ProjectSprite (AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor x1 = centerx + xs_RoundToInt(dtx1); // off the right side? - if (x1 >= WindowRight) + if (x1 >= renderportal->WindowRight) return; tx += tex->GetWidth() * thingxscalemul; x2 = centerx + xs_RoundToInt(tx * xscale); // off the left side or too small? - if ((x2 < WindowLeft || x2 <= x1)) + if ((x2 < renderportal->WindowLeft || x2 <= x1)) return; xscale = spriteScale.X * xscale / tex->Scale.X; @@ -875,14 +879,14 @@ void R_ProjectSprite (AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor // store information in a vissprite vis = R_NewVisSprite(); - vis->CurrentPortalUniq = CurrentPortalUniq; + vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->xscale = FLOAT2FIXED(xscale); vis->yscale = float(InvZtoScale * yscale / tz); vis->idepth = float(1 / tz); vis->floorclip = thing->Floorclip / yscale; vis->texturemid = tex->TopOffset - (ViewPos.Z - pos.Z + thing->Floorclip) / yscale; - vis->x1 = x1 < WindowLeft ? WindowLeft : x1; - vis->x2 = x2 > WindowRight ? WindowRight : x2; + vis->x1 = x1 < renderportal->WindowLeft ? renderportal->WindowLeft : x1; + vis->x2 = x2 > renderportal->WindowRight ? renderportal->WindowRight : x2; vis->Angle = thing->Angles.Yaw; if (renderflags & RF_XFLIP) @@ -902,11 +906,11 @@ void R_ProjectSprite (AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor { vis = R_NewVisSprite(); - vis->CurrentPortalUniq = CurrentPortalUniq; + vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->xscale = FLOAT2FIXED(xscale); vis->yscale = (float)yscale; - vis->x1 = WindowLeft; - vis->x2 = WindowRight; + vis->x1 = renderportal->WindowLeft; + vis->x2 = renderportal->WindowRight; vis->idepth = 1 / MINZ; vis->floorclip = thing->Floorclip; @@ -946,7 +950,7 @@ void R_ProjectSprite (AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor vis->fakefloor = fakefloor; vis->fakeceiling = fakeceiling; vis->Style.ColormapNum = 0; - vis->bInMirror = MirrorFlags & RF_XFLIP; + vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; vis->bSplitSprite = false; if (voxel != NULL) @@ -1604,15 +1608,20 @@ void R_DrawSprite (vissprite_t *spr) neardepth = ds->sz2, fardepth = ds->sz1; } } + + // Check if sprite is in front of draw seg: if ((!spr->bWallSprite && neardepth > spr->depth) || ((spr->bWallSprite || fardepth > spr->depth) && (spr->gpos.Y - ds->curline->v1->fY()) * (ds->curline->v2->fX() - ds->curline->v1->fX()) - (spr->gpos.X - ds->curline->v1->fX()) * (ds->curline->v2->fY() - ds->curline->v1->fY()) <= 0)) { + RenderPortal *renderportal = RenderPortal::Instance(); + // seg is behind sprite, so draw the mid texture if it has one - if (ds->CurrentPortalUniq == CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here + if (ds->CurrentPortalUniq == renderportal->CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here (ds->maskedtexturecol != -1 || ds->bFogBoundary)) R_RenderMaskedSegRange (ds, r1, r2); + continue; } @@ -1711,13 +1720,11 @@ void R_DrawMaskedSingle (bool renew) drawseg_t *ds; int i; -#if 0 - R_SplitVisSprites (); -#endif + RenderPortal *renderportal = RenderPortal::Instance(); for (i = vsprcount; i > 0; i--) { - if (spritesorter[i-1]->CurrentPortalUniq != CurrentPortalUniq) + if (spritesorter[i-1]->CurrentPortalUniq != renderportal->CurrentPortalUniq) continue; // probably another time R_DrawSprite (spritesorter[i-1]); } @@ -1737,7 +1744,7 @@ void R_DrawMaskedSingle (bool renew) for (ds = ds_p; ds-- > firstdrawseg; ) // new -- killough { // [ZZ] the same as above - if (ds->CurrentPortalUniq != CurrentPortalUniq) + if (ds->CurrentPortalUniq != renderportal->CurrentPortalUniq) continue; // kg3D - no fake segs if (ds->fake) continue; diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 1224dbe916..c8ac27d9f9 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -109,13 +109,15 @@ namespace swrenderer { short most[MAXWIDTH]; - R_CreateWallSegmentYSloped(most, curline->frontsector->ceilingplane, &WallC, curline, MirrorFlags & RF_XFLIP); + RenderPortal *renderportal = RenderPortal::Instance(); + + R_CreateWallSegmentYSloped(most, curline->frontsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); for (int i = x1; i < x2; ++i) { if (wallupper[i] < most[i]) wallupper[i] = most[i]; } - R_CreateWallSegmentYSloped(most, curline->frontsector->floorplane, &WallC, curline, MirrorFlags & RF_XFLIP); + R_CreateWallSegmentYSloped(most, curline->frontsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); for (int i = x1; i < x2; ++i) { if (walllower[i] > most[i]) diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 4897937903..2dbc6562c7 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -66,9 +66,11 @@ namespace swrenderer vissprite_t* vis; sector_t* heightsec = NULL; FSWColormap* map; + + RenderPortal *renderportal = RenderPortal::Instance(); // [ZZ] Particle not visible through the portal plane - if (CurrentPortal && !!P_PointOnLineSide(particle->Pos, CurrentPortal->dst)) + if (renderportal->CurrentPortal && !!P_PointOnLineSide(particle->Pos, renderportal->CurrentPortal->dst)) return; // transform the origin point @@ -84,7 +86,7 @@ namespace swrenderer tx = tr_x * ViewSin - tr_y * ViewCos; // Flip for mirrors - if (MirrorFlags & RF_XFLIP) + if (renderportal->MirrorFlags & RF_XFLIP) { tx = viewwidth - tx - 1; } @@ -99,8 +101,8 @@ namespace swrenderer // calculate edges of the shape double psize = particle->size / 8.0; - x1 = MAX(WindowLeft, centerx + xs_RoundToInt((tx - psize) * xscale)); - x2 = MIN(WindowRight, centerx + xs_RoundToInt((tx + psize) * xscale)); + x1 = MAX(renderportal->WindowLeft, centerx + xs_RoundToInt((tx - psize) * xscale)); + x2 = MIN(renderportal->WindowRight, centerx + xs_RoundToInt((tx + psize) * xscale)); if (x1 >= x2) return; @@ -175,7 +177,7 @@ namespace swrenderer // store information in a vissprite vis = R_NewVisSprite(); - vis->CurrentPortalUniq = CurrentPortalUniq; + vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->heightsec = heightsec; vis->xscale = FLOAT2FIXED(xscale); vis->yscale = (float)xscale; diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 34c398c641..7699edf706 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -86,8 +86,10 @@ namespace swrenderer // Is it off-screen? if (wallc.Init(left, right, TOO_CLOSE_Z)) return; + + RenderPortal *renderportal = RenderPortal::Instance(); - if (wallc.sx1 >= WindowRight || wallc.sx2 <= WindowLeft) + if (wallc.sx1 >= renderportal->WindowRight || wallc.sx2 <= renderportal->WindowLeft) return; // Sprite sorting should probably treat these as walls, not sprites, @@ -100,9 +102,9 @@ namespace swrenderer gzb = pos.Z + scale.Y * scaled_bo; vis = R_NewVisSprite(); - vis->CurrentPortalUniq = CurrentPortalUniq; - vis->x1 = wallc.sx1 < WindowLeft ? WindowLeft : wallc.sx1; - vis->x2 = wallc.sx2 >= WindowRight ? WindowRight : wallc.sx2; + vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; + vis->x1 = wallc.sx1 < renderportal->WindowLeft ? renderportal->WindowLeft : wallc.sx1; + vis->x2 = wallc.sx2 >= renderportal->WindowRight ? renderportal->WindowRight : wallc.sx2; vis->yscale = (float)scale.Y; vis->idepth = float(1 / tz); vis->depth = (float)tz; @@ -122,7 +124,7 @@ namespace swrenderer vis->Style.Alpha = float(thing->Alpha); vis->fakefloor = NULL; vis->fakeceiling = NULL; - vis->bInMirror = MirrorFlags & RF_XFLIP; + vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; vis->pic = pic; vis->bIsVoxel = false; vis->bWallSprite = true; From 3b7bc2d1e505c1ad3c8d292793e77ed03f91635e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 8 Jan 2017 04:54:11 +0100 Subject: [PATCH 651/912] Add NO_DRAWERGEN define --- src/swrenderer/drawers/r_drawers.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/swrenderer/drawers/r_drawers.cpp b/src/swrenderer/drawers/r_drawers.cpp index 57c33a5be5..544b8e779a 100644 --- a/src/swrenderer/drawers/r_drawers.cpp +++ b/src/swrenderer/drawers/r_drawers.cpp @@ -29,6 +29,8 @@ ///////////////////////////////////////////////////////////////////////////// +#if !defined(NO_DRAWERGEN) + extern "C" { void DrawColumn_SSE2(const DrawColumnArgs *, const WorkerThreadData *); @@ -150,11 +152,13 @@ extern "C" void TriFill32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriFill32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); } +#endif ///////////////////////////////////////////////////////////////////////////// Drawers::Drawers() { +#if !defined(NO_DRAWERGEN) DrawColumn = DrawColumn_SSE2; DrawColumnAdd = DrawColumnAdd_SSE2; DrawColumnShaded = DrawColumnShaded_SSE2; @@ -245,6 +249,7 @@ Drawers::Drawers() TriFill32.push_back(TriFill32_12_SSE2); TriFill32.push_back(TriFill32_13_SSE2); TriFill32.push_back(TriFill32_14_SSE2); +#endif } Drawers *Drawers::Instance() From 407204ee2c06fae3b3adec90b188519877bda24d Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 8 Jan 2017 21:25:27 -0500 Subject: [PATCH 652/912] - Enabled Gnu CC ARM architecture check for Drawergen. --- tools/drawergen/drawergen.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index ecf87062e1..e753d61971 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -88,7 +88,11 @@ int main(int argc, char **argv) std::cout << "Target triple is " << triple << std::endl; +#ifdef __arm__ + std::string cpuName = "armv8"; +#else std::string cpuName = "pentium4"; +#endif std::string features; std::cout << "Compiling drawer code for " << cpuName << ".." << std::endl; From b91e88a9a61970c6d43622f2558b5780f5fc7a84 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 11:25:21 +0100 Subject: [PATCH 653/912] Remove unused intrinsic functions --- tools/drawergen/ssa/ssa_float.cpp | 81 ------------------------------- tools/drawergen/ssa/ssa_float.h | 10 ---- tools/drawergen/ssa/ssa_vec4f.cpp | 65 ------------------------- tools/drawergen/ssa/ssa_vec4f.h | 8 --- tools/drawergen/ssa/ssa_vec4i.cpp | 11 ++--- tools/drawergen/ssa/ssa_vec4i.h | 1 - tools/drawergen/ssa/ssa_vec8s.cpp | 24 +-------- tools/drawergen/ssa/ssa_vec8s.h | 2 - 8 files changed, 4 insertions(+), 198 deletions(-) diff --git a/tools/drawergen/ssa/ssa_float.cpp b/tools/drawergen/ssa/ssa_float.cpp index f537792fc9..4f93379c89 100644 --- a/tools/drawergen/ssa/ssa_float.cpp +++ b/tools/drawergen/ssa/ssa_float.cpp @@ -54,18 +54,6 @@ llvm::Type *SSAFloat::llvm_type() return llvm::Type::getFloatTy(SSAScope::context()); } -SSAFloat SSAFloat::sqrt(SSAFloat f) -{ - std::vector params; - params.push_back(SSAFloat::llvm_type()); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint())); -} - -SSAFloat SSAFloat::fastsqrt(SSAFloat f) -{ - return f * rsqrt(f); -} - SSAFloat SSAFloat::rsqrt(SSAFloat f) { llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); @@ -73,75 +61,6 @@ SSAFloat SSAFloat::rsqrt(SSAFloat f) return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(f_ss, SSAInt(0).v, SSAScope::hint())); } -SSAFloat SSAFloat::sin(SSAFloat val) -{ - std::vector params; - params.push_back(SSAFloat::llvm_type()); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sin, params), val.v, SSAScope::hint())); -} - -SSAFloat SSAFloat::cos(SSAFloat val) -{ - std::vector params; - params.push_back(SSAFloat::llvm_type()); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::cos, params), val.v, SSAScope::hint())); -} - -SSAFloat SSAFloat::pow(SSAFloat val, SSAFloat power) -{ - std::vector params; - params.push_back(SSAFloat::llvm_type()); - //params.push_back(SSAFloat::llvm_type()); - std::vector args; - args.push_back(val.v); - args.push_back(power.v); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::pow, params), args, SSAScope::hint())); -} - -SSAFloat SSAFloat::exp(SSAFloat val) -{ - std::vector params; - params.push_back(SSAFloat::llvm_type()); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::exp, params), val.v, SSAScope::hint())); -} - -SSAFloat SSAFloat::log(SSAFloat val) -{ - std::vector params; - params.push_back(SSAFloat::llvm_type()); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::log, params), val.v, SSAScope::hint())); -} - -SSAFloat SSAFloat::fma(SSAFloat a, SSAFloat b, SSAFloat c) -{ - std::vector params; - params.push_back(SSAFloat::llvm_type()); - //params.push_back(SSAFloat::llvm_type()); - //params.push_back(SSAFloat::llvm_type()); - std::vector args; - args.push_back(a.v); - args.push_back(b.v); - args.push_back(c.v); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint())); -} - -/* This intrinsic isn't always available.. -SSAFloat SSAFloat::round(SSAFloat val) -{ - - std::vector params; - params.push_back(SSAFloat::llvm_type()); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::round, params), val.v, SSAScope::hint())); -} -*/ - -SSAFloat SSAFloat::floor(SSAFloat val) -{ - std::vector params; - params.push_back(SSAFloat::llvm_type()); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::floor, params), val.v, SSAScope::hint())); -} - SSAFloat SSAFloat::MIN(SSAFloat a, SSAFloat b) { return SSAFloat::from_llvm(SSAScope::builder().CreateSelect((a < b).v, a.v, b.v, SSAScope::hint())); diff --git a/tools/drawergen/ssa/ssa_float.h b/tools/drawergen/ssa/ssa_float.h index f7e2b93ef9..b3a35486f3 100644 --- a/tools/drawergen/ssa/ssa_float.h +++ b/tools/drawergen/ssa/ssa_float.h @@ -36,17 +36,7 @@ public: explicit SSAFloat(llvm::Value *v); static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); } static llvm::Type *llvm_type(); - static SSAFloat sqrt(SSAFloat f); - static SSAFloat fastsqrt(SSAFloat f); static SSAFloat rsqrt(SSAFloat f); - static SSAFloat sin(SSAFloat val); - static SSAFloat cos(SSAFloat val); - static SSAFloat pow(SSAFloat val, SSAFloat power); - static SSAFloat exp(SSAFloat val); - static SSAFloat log(SSAFloat val); - static SSAFloat fma(SSAFloat a, SSAFloat b, SSAFloat c); - //static SSAFloat round(SSAFloat val); - static SSAFloat floor(SSAFloat val); static SSAFloat MIN(SSAFloat a, SSAFloat b); static SSAFloat MAX(SSAFloat a, SSAFloat b); static SSAFloat clamp(SSAFloat a, SSAFloat b, SSAFloat c); diff --git a/tools/drawergen/ssa/ssa_vec4f.cpp b/tools/drawergen/ssa/ssa_vec4f.cpp index 5c40f74940..e58f167de6 100644 --- a/tools/drawergen/ssa/ssa_vec4f.cpp +++ b/tools/drawergen/ssa/ssa_vec4f.cpp @@ -111,71 +111,6 @@ SSAVec4f SSAVec4f::bitcast(SSAVec4i i32) return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint())); } -SSAVec4f SSAVec4f::sqrt(SSAVec4f f) -{ - std::vector params; - params.push_back(SSAVec4f::llvm_type()); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint())); - //return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_sqrt_ps), f.v, SSAScope::hint())); -} - -SSAVec4f SSAVec4f::rcp(SSAVec4f f) -{ - return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rcp_ps), f.v, SSAScope::hint())); -} - -SSAVec4f SSAVec4f::sin(SSAVec4f val) -{ - std::vector params; - params.push_back(SSAVec4f::llvm_type()); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sin, params), val.v, SSAScope::hint())); -} - -SSAVec4f SSAVec4f::cos(SSAVec4f val) -{ - std::vector params; - params.push_back(SSAVec4f::llvm_type()); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::cos, params), val.v, SSAScope::hint())); -} - -SSAVec4f SSAVec4f::pow(SSAVec4f val, SSAVec4f power) -{ - std::vector params; - params.push_back(SSAVec4f::llvm_type()); - //params.push_back(SSAVec4f::llvm_type()); - std::vector args; - args.push_back(val.v); - args.push_back(power.v); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::pow, params), args, SSAScope::hint())); -} - -SSAVec4f SSAVec4f::exp(SSAVec4f val) -{ - std::vector params; - params.push_back(SSAVec4f::llvm_type()); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::exp, params), val.v, SSAScope::hint())); -} - -SSAVec4f SSAVec4f::log(SSAVec4f val) -{ - std::vector params; - params.push_back(SSAVec4f::llvm_type()); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::log, params), val.v, SSAScope::hint())); -} - -SSAVec4f SSAVec4f::fma(SSAVec4f a, SSAVec4f b, SSAVec4f c) -{ - std::vector params; - params.push_back(SSAVec4f::llvm_type()); - //params.push_back(SSAVec4f::llvm_type()); - //params.push_back(SSAVec4f::llvm_type()); - std::vector args; - args.push_back(a.v); - args.push_back(b.v); - args.push_back(c.v); - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint())); -} - void SSAVec4f::transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3) { SSAVec4f tmp0 = shuffle(row0, row1, 0x44);//_MM_SHUFFLE(1,0,1,0)); diff --git a/tools/drawergen/ssa/ssa_vec4f.h b/tools/drawergen/ssa/ssa_vec4f.h index d8a2c5d012..3011da9d00 100644 --- a/tools/drawergen/ssa/ssa_vec4f.h +++ b/tools/drawergen/ssa/ssa_vec4f.h @@ -43,14 +43,6 @@ public: SSAFloat operator[](int index) const; static SSAVec4f insert_element(SSAVec4f vec4f, SSAFloat value, int index); static SSAVec4f bitcast(SSAVec4i i32); - static SSAVec4f sqrt(SSAVec4f f); - static SSAVec4f rcp(SSAVec4f f); - static SSAVec4f sin(SSAVec4f val); - static SSAVec4f cos(SSAVec4f val); - static SSAVec4f pow(SSAVec4f val, SSAVec4f power); - static SSAVec4f exp(SSAVec4f val); - static SSAVec4f log(SSAVec4f val); - static SSAVec4f fma(SSAVec4f a, SSAVec4f b, SSAVec4f c); static void transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3); static SSAVec4f shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3); static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3); diff --git a/tools/drawergen/ssa/ssa_vec4i.cpp b/tools/drawergen/ssa/ssa_vec4i.cpp index 344b33e1ee..0e42124e94 100644 --- a/tools/drawergen/ssa/ssa_vec4i.cpp +++ b/tools/drawergen/ssa/ssa_vec4i.cpp @@ -89,11 +89,13 @@ SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3) #endif } +/* SSAVec4i::SSAVec4i(SSAVec4f f32) : v(0) { v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint()); } +*/ SSAInt SSAVec4i::operator[](SSAInt index) const { @@ -198,11 +200,6 @@ SSAVec4i SSAVec4i::combinelo(SSAVec8s a, SSAVec8s b) return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 } -SSAVec4i SSAVec4i::sqrt(SSAVec4i f) -{ - return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_sqrt_pd), f.v, SSAScope::hint())); -} - SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b) { return SSAVec4i::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); @@ -265,9 +262,7 @@ SSAVec4i operator/(const SSAVec4i &a, int b) SSAVec4i operator<<(const SSAVec4i &a, int bits) { - //return SSAScope::builder().CreateShl(a.v, bits); - llvm::Value *values[2] = { a.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)bits)) }; - return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pslli_d), values, SSAScope::hint())); + return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); } SSAVec4i operator>>(const SSAVec4i &a, int bits) diff --git a/tools/drawergen/ssa/ssa_vec4i.h b/tools/drawergen/ssa/ssa_vec4i.h index 2035b54572..e8a3ec9a5a 100644 --- a/tools/drawergen/ssa/ssa_vec4i.h +++ b/tools/drawergen/ssa/ssa_vec4i.h @@ -55,7 +55,6 @@ public: static void extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3); static SSAVec4i combinehi(SSAVec8s v0, SSAVec8s v1); static SSAVec4i combinelo(SSAVec8s v0, SSAVec8s v1); - static SSAVec4i sqrt(SSAVec4i f); static SSAVec4i from_llvm(llvm::Value *v) { return SSAVec4i(v); } static llvm::Type *llvm_type(); diff --git a/tools/drawergen/ssa/ssa_vec8s.cpp b/tools/drawergen/ssa/ssa_vec8s.cpp index 0950a289e5..552b8d69b5 100644 --- a/tools/drawergen/ssa/ssa_vec8s.cpp +++ b/tools/drawergen/ssa/ssa_vec8s.cpp @@ -106,26 +106,6 @@ SSAVec8s SSAVec8s::extendlo(SSAVec16ub a) return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, SSAVec16ub((unsigned char)0), 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 } -/* -SSAVec8s SSAVec8s::min_sse2(SSAVec8s a, SSAVec8s b) -{ - llvm::Value *values[2] = { a.v, b.v }; - return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmins_w), values, SSAScope::hint())); -} - -SSAVec8s SSAVec8s::max_sse2(SSAVec8s a, SSAVec8s b) -{ - llvm::Value *values[2] = { a.v, b.v }; - return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmaxs_w), values, SSAScope::hint())); -} -*/ - -SSAVec8s SSAVec8s::mulhi(SSAVec8s a, SSAVec8s b) -{ - llvm::Value *values[2] = { a.v, b.v }; - return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmulh_w), values, SSAScope::hint())); -} - SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b) { return SSAVec8s::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); @@ -188,9 +168,7 @@ SSAVec8s operator/(const SSAVec8s &a, short b) SSAVec8s operator<<(const SSAVec8s &a, int bits) { - //return SSAScope::builder().CreateShl(a.v, bits); - llvm::Value *values[2] = { a.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)bits)) }; - return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pslli_d), values, SSAScope::hint())); + return SSAVec8s::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); } SSAVec8s operator>>(const SSAVec8s &a, int bits) diff --git a/tools/drawergen/ssa/ssa_vec8s.h b/tools/drawergen/ssa/ssa_vec8s.h index be320804ec..fdd56d972a 100644 --- a/tools/drawergen/ssa/ssa_vec8s.h +++ b/tools/drawergen/ssa/ssa_vec8s.h @@ -41,8 +41,6 @@ public: static SSAVec8s shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7); static SSAVec8s extendhi(SSAVec16ub a); static SSAVec8s extendlo(SSAVec16ub a); - //static SSAVec8s min_sse2(SSAVec8s a, SSAVec8s b); - //static SSAVec8s max_sse2(SSAVec8s a, SSAVec8s b); static SSAVec8s mulhi(SSAVec8s a, SSAVec8s b); static SSAVec8s from_llvm(llvm::Value *v) { return SSAVec8s(v); } static llvm::Type *llvm_type(); From 0de30ebdd9521d2cafaaa9d529eca0206103102a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 12:11:44 +0100 Subject: [PATCH 654/912] Add arm neon intrinsics --- tools/drawergen/precomp.h | 6 ++++++ tools/drawergen/ssa/ssa_float.cpp | 4 ++++ tools/drawergen/ssa/ssa_vec16ub.cpp | 6 ++++++ tools/drawergen/ssa/ssa_vec4f.cpp | 4 ++++ tools/drawergen/ssa/ssa_vec4i.cpp | 4 ++++ tools/drawergen/ssa/ssa_vec8s.cpp | 6 ++++++ 6 files changed, 30 insertions(+) diff --git a/tools/drawergen/precomp.h b/tools/drawergen/precomp.h index f2bf67b70a..bb4f818df8 100644 --- a/tools/drawergen/precomp.h +++ b/tools/drawergen/precomp.h @@ -3,3 +3,9 @@ #include "llvm_include.h" #include "../../src/swrenderer/drawers/r_drawers.h" + +#ifdef __arm__ +#define ARM_TARGET +#else +#define X86_TARGET +#endif diff --git a/tools/drawergen/ssa/ssa_float.cpp b/tools/drawergen/ssa/ssa_float.cpp index 4f93379c89..19a6fcd0a3 100644 --- a/tools/drawergen/ssa/ssa_float.cpp +++ b/tools/drawergen/ssa/ssa_float.cpp @@ -56,9 +56,13 @@ llvm::Type *SSAFloat::llvm_type() SSAFloat SSAFloat::rsqrt(SSAFloat f) { +#ifdef ARM_TARGET + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::aarch64_neon_frsqrts), f.v, SSAScope::hint())); +#else llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint()); return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(f_ss, SSAInt(0).v, SSAScope::hint())); +#endif } SSAFloat SSAFloat::MIN(SSAFloat a, SSAFloat b) diff --git a/tools/drawergen/ssa/ssa_vec16ub.cpp b/tools/drawergen/ssa/ssa_vec16ub.cpp index 0fceab8e87..cdf1e465cb 100644 --- a/tools/drawergen/ssa/ssa_vec16ub.cpp +++ b/tools/drawergen/ssa/ssa_vec16ub.cpp @@ -72,8 +72,14 @@ SSAVec16ub::SSAVec16ub(llvm::Value *v) SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1) : v(0) { +#ifdef ARM_TARGET + llvm::Value *int8x8_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu, s0.v, SSAScope::hint()); + llvm::Value *int8x8_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu, s1.v, SSAScope::hint()); + return shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +#else llvm::Value *values[2] = { s0.v, s1.v }; v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint()); +#endif } llvm::Type *SSAVec16ub::llvm_type() diff --git a/tools/drawergen/ssa/ssa_vec4f.cpp b/tools/drawergen/ssa/ssa_vec4f.cpp index e58f167de6..d59400ea93 100644 --- a/tools/drawergen/ssa/ssa_vec4f.cpp +++ b/tools/drawergen/ssa/ssa_vec4f.cpp @@ -81,9 +81,13 @@ SSAVec4f::SSAVec4f(llvm::Value *v) SSAVec4f::SSAVec4f(SSAVec4i i32) : v(0) { +#ifdef ARM_TARGET + v = SSAScope::builder().CreateSIToFP(i32.v, llvm_type(), SSAScope::hint()); +#else //llvm::VectorType *m128type = llvm::VectorType::get(llvm::Type::getFloatTy(*context), 4); //return builder->CreateSIToFP(i32.v, m128type); v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvtdq2ps), i32.v, SSAScope::hint()); +#endif } llvm::Type *SSAVec4f::llvm_type() diff --git a/tools/drawergen/ssa/ssa_vec4i.cpp b/tools/drawergen/ssa/ssa_vec4i.cpp index 0e42124e94..4b0d7766fb 100644 --- a/tools/drawergen/ssa/ssa_vec4i.cpp +++ b/tools/drawergen/ssa/ssa_vec4i.cpp @@ -93,7 +93,11 @@ SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3) SSAVec4i::SSAVec4i(SSAVec4f f32) : v(0) { +#ifdef ARM_TARGET + v = SSAScope::builder().CreateFPToSI(f32.v, llvm_type(), SSAScope::hint()); +#else v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint()); +#endif } */ diff --git a/tools/drawergen/ssa/ssa_vec8s.cpp b/tools/drawergen/ssa/ssa_vec8s.cpp index 552b8d69b5..f78c1dd250 100644 --- a/tools/drawergen/ssa/ssa_vec8s.cpp +++ b/tools/drawergen/ssa/ssa_vec8s.cpp @@ -62,8 +62,14 @@ SSAVec8s::SSAVec8s(llvm::Value *v) SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1) : v(0) { +#ifdef ARM_TARGET + llvm::Value *int16x4_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns, i0.v, SSAScope::hint()); + llvm::Value *int16x4_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns, i1.v, SSAScope::hint()); + return shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7); +#else llvm::Value *values[2] = { i0.v, i1.v }; v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint()); +#endif } llvm::Type *SSAVec8s::llvm_type() From 579199a246985769ed95486c48c7952654ad7717 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 13:53:02 +0100 Subject: [PATCH 655/912] Move R_DrawVisSprite to r_sprite and move R_DrawMaskedColumn to r_draw --- src/CMakeLists.txt | 1 + src/swrenderer/drawers/r_draw.cpp | 198 +++++++++++++++ src/swrenderer/drawers/r_draw.h | 7 + src/swrenderer/scene/r_things.cpp | 292 +--------------------- src/swrenderer/scene/r_things.h | 15 +- src/swrenderer/segments/r_drawsegment.cpp | 23 +- src/swrenderer/things/r_decal.cpp | 22 +- src/swrenderer/things/r_decal.h | 2 +- src/swrenderer/things/r_playersprite.cpp | 11 +- src/swrenderer/things/r_sprite.cpp | 131 ++++++++++ src/swrenderer/things/r_sprite.h | 21 ++ src/swrenderer/things/r_wallsprite.cpp | 20 +- src/swrenderer/things/r_wallsprite.h | 4 +- src/v_draw.cpp | 24 +- 14 files changed, 412 insertions(+), 359 deletions(-) create mode 100644 src/swrenderer/things/r_sprite.cpp create mode 100644 src/swrenderer/things/r_sprite.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3601815dc9..acb9c860e8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -860,6 +860,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/things/r_voxel.cpp swrenderer/things/r_particle.cpp swrenderer/things/r_playersprite.cpp + swrenderer/things/r_sprite.cpp swrenderer/things/r_wallsprite.cpp swrenderer/things/r_decal.cpp swrenderer/plane/r_visibleplane.cpp diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 413e7e1564..5c281f8c8b 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -68,6 +68,9 @@ namespace swrenderer int fuzzpos; int fuzzviewheight; + short zeroarray[MAXWIDTH]; + short screenheightarray[MAXWIDTH]; + DrawerFunc colfunc; DrawerFunc basecolfunc; DrawerFunc fuzzcolfunc; @@ -630,4 +633,199 @@ namespace swrenderer if (dc_yl <= dc_yh) fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; } + + void R_DrawMaskedColumn(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) + { + using namespace drawerargs; + + // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. + if (r_swtruecolor && !drawer_needs_pal_input) // To do: add support to R_DrawColumnHoriz_rgba + { + R_DrawMaskedColumnBgra(x, iscale, tex, col, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, unmasked); + return; + } + + dc_x = x; + dc_iscale = iscale; + + const FTexture::Span *span; + const BYTE *column; + if (r_swtruecolor && !drawer_needs_pal_input) + column = (const BYTE *)tex->GetColumnBgra(col >> FRACBITS, &span); + else + column = tex->GetColumn(col >> FRACBITS, &span); + + FTexture::Span unmaskedSpan[2]; + if (unmasked) + { + span = unmaskedSpan; + unmaskedSpan[0].TopOffset = 0; + unmaskedSpan[0].Length = tex->GetHeight(); + unmaskedSpan[1].TopOffset = 0; + unmaskedSpan[1].Length = 0; + } + + int pixelsize = r_swtruecolor ? 4 : 1; + + while (span->Length != 0) + { + const int length = span->Length; + const int top = span->TopOffset; + + // calculate unclipped screen coordinates for post + dc_yl = (int)(sprtopscreen + spryscale * top + 0.5); + dc_yh = (int)(sprtopscreen + spryscale * (top + length) + 0.5) - 1; + + if (sprflipvert) + { + swapvalues(dc_yl, dc_yh); + } + + if (dc_yh >= mfloorclip[dc_x]) + { + dc_yh = mfloorclip[dc_x] - 1; + } + if (dc_yl < mceilingclip[dc_x]) + { + dc_yl = mceilingclip[dc_x]; + } + + if (dc_yl <= dc_yh) + { + dc_texturefrac = FLOAT2FIXED((dc_yl + 0.5 - sprtopscreen) / spryscale); + dc_source = column; + dc_source2 = nullptr; + dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; + dc_count = dc_yh - dc_yl + 1; + + fixed_t maxfrac = ((top + length) << FRACBITS) - 1; + dc_texturefrac = MAX(dc_texturefrac, 0); + dc_texturefrac = MIN(dc_texturefrac, maxfrac); + if (dc_iscale > 0) + dc_count = MIN(dc_count, (maxfrac - dc_texturefrac + dc_iscale - 1) / dc_iscale); + else if (dc_iscale < 0) + dc_count = MIN(dc_count, (dc_texturefrac - dc_iscale) / (-dc_iscale)); + + (R_Drawers()->*colfunc)(); + } + span++; + } + } + + void R_DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) + { + using namespace drawerargs; + + dc_x = x; + dc_iscale = iscale; + + // Normalize to 0-1 range: + double uv_stepd = FIXED2DBL(dc_iscale); + double v_step = uv_stepd / tex->GetHeight(); + + // Convert to uint32: + dc_iscale = (uint32_t)(v_step * (1 << 30)); + + // Texture mipmap and filter selection: + fixed_t xoffset = col; + + double xmagnitude = 1.0; // To do: pass this into R_DrawMaskedColumn + double ymagnitude = fabs(uv_stepd); + double magnitude = MAX(ymagnitude, xmagnitude); + double min_lod = -1000.0; + double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); + bool magnifying = lod < 0.0f; + + int mipmap_offset = 0; + int mip_width = tex->GetWidth(); + int mip_height = tex->GetHeight(); + uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); + if (r_mipmap && tex->Mipmapped() && mip_width > 1 && mip_height > 1) + { + int level = (int)lod; + while (level > 0 && mip_width > 1 && mip_height > 1) + { + mipmap_offset += mip_width * mip_height; + level--; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + } + xoffset = (xpos >> FRACBITS) * mip_width; + + const uint32_t *pixels = tex->GetPixelsBgra() + mipmap_offset; + + bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); + if (filter_nearest) + { + xoffset = MAX(MIN(xoffset, (mip_width << FRACBITS) - 1), 0); + + int tx = xoffset >> FRACBITS; + dc_source = (BYTE*)(pixels + tx * mip_height); + dc_source2 = nullptr; + dc_textureheight = mip_height; + dc_texturefracx = 0; + } + else + { + xoffset = MAX(MIN(xoffset - (FRACUNIT / 2), (mip_width << FRACBITS) - 1), 0); + + int tx0 = xoffset >> FRACBITS; + int tx1 = MIN(tx0 + 1, mip_width - 1); + dc_source = (BYTE*)(pixels + tx0 * mip_height); + dc_source2 = (BYTE*)(pixels + tx1 * mip_height); + dc_textureheight = mip_height; + dc_texturefracx = (xoffset >> (FRACBITS - 4)) & 15; + } + + // Grab the posts we need to draw + const FTexture::Span *span; + tex->GetColumnBgra(col >> FRACBITS, &span); + FTexture::Span unmaskedSpan[2]; + if (unmasked) + { + span = unmaskedSpan; + unmaskedSpan[0].TopOffset = 0; + unmaskedSpan[0].Length = tex->GetHeight(); + unmaskedSpan[1].TopOffset = 0; + unmaskedSpan[1].Length = 0; + } + + // Draw each span post + while (span->Length != 0) + { + const int length = span->Length; + const int top = span->TopOffset; + + // calculate unclipped screen coordinates for post + dc_yl = (int)(sprtopscreen + spryscale * top + 0.5); + dc_yh = (int)(sprtopscreen + spryscale * (top + length) + 0.5) - 1; + + if (sprflipvert) + { + swapvalues(dc_yl, dc_yh); + } + + if (dc_yh >= mfloorclip[dc_x]) + { + dc_yh = mfloorclip[dc_x] - 1; + } + if (dc_yl < mceilingclip[dc_x]) + { + dc_yl = mceilingclip[dc_x]; + } + + if (dc_yl <= dc_yh) + { + dc_dest = (ylookup[dc_yl] + dc_x) * 4 + dc_destorg; + dc_count = dc_yh - dc_yl + 1; + + double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight(); + dc_texturefrac = (uint32_t)(v * (1 << 30)); + + (R_Drawers()->*colfunc)(); + } + span++; + } + } } diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 1952146584..bf26c8636a 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -114,6 +114,10 @@ namespace swrenderer extern uint8_t identitymap[256]; extern FDynamicColormap identitycolormap; + // Constant arrays used for psprite clipping and initializing clipping. + extern short zeroarray[MAXWIDTH]; + extern short screenheightarray[MAXWIDTH]; + // Spectre/Invisibility. #define FUZZTABLE 50 extern int fuzzoffset[FUZZTABLE + 1]; @@ -186,6 +190,9 @@ namespace swrenderer void R_SetSpanTexture(FTexture *tex); void R_SetSpanColormap(FDynamicColormap *colormap, int shade); + void R_DrawMaskedColumn(int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); + void R_DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked); + extern DrawerFunc colfunc; extern DrawerFunc basecolfunc; extern DrawerFunc fuzzcolfunc; diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index 54b93caeef..57a0700fb3 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -71,6 +71,7 @@ #include "swrenderer/things/r_particle.h" #include "swrenderer/things/r_playersprite.h" #include "swrenderer/things/r_wallsprite.h" +#include "swrenderer/things/r_sprite.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Int, r_drawfuzz) @@ -123,12 +124,6 @@ int spriteshade; FTexture *WallSpriteTile; -// constant arrays -// used for psprite clipping and initializing clipping -short zeroarray[MAXWIDTH]; -short screenheightarray[MAXWIDTH]; - - // // INITIALIZATION FUNCTIONS // @@ -186,213 +181,6 @@ void R_ClearSprites (void) DrewAVoxel = false; } -// -// R_DrawMaskedColumn -// Used for sprites and masked mid textures. -// Masked means: partly transparent, i.e. stored -// in posts/runs of opaque pixels. -// -short* mfloorclip; -short* mceilingclip; - -double spryscale; -double sprtopscreen; - -bool sprflipvert; - -void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool unmasked) -{ - fixed_t saved_iscale = dc_iscale; // Save this because we need to modify it for mipmaps - - // Normalize to 0-1 range: - double uv_stepd = FIXED2DBL(dc_iscale); - double v_step = uv_stepd / tex->GetHeight(); - - // Convert to uint32: - dc_iscale = (uint32_t)(v_step * (1 << 30)); - - // Texture mipmap and filter selection: - fixed_t xoffset = col; - - double xmagnitude = 1.0; // To do: pass this into R_DrawMaskedColumn - double ymagnitude = fabs(uv_stepd); - double magnitude = MAX(ymagnitude, xmagnitude); - double min_lod = -1000.0; - double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); - bool magnifying = lod < 0.0f; - - int mipmap_offset = 0; - int mip_width = tex->GetWidth(); - int mip_height = tex->GetHeight(); - uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); - if (r_mipmap && tex->Mipmapped() && mip_width > 1 && mip_height > 1) - { - int level = (int)lod; - while (level > 0 && mip_width > 1 && mip_height > 1) - { - mipmap_offset += mip_width * mip_height; - level--; - mip_width = MAX(mip_width >> 1, 1); - mip_height = MAX(mip_height >> 1, 1); - } - } - xoffset = (xpos >> FRACBITS) * mip_width; - - const uint32_t *pixels = tex->GetPixelsBgra() + mipmap_offset; - - bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); - if (filter_nearest) - { - xoffset = MAX(MIN(xoffset, (mip_width << FRACBITS) - 1), 0); - - int tx = xoffset >> FRACBITS; - dc_source = (BYTE*)(pixels + tx * mip_height); - dc_source2 = nullptr; - dc_textureheight = mip_height; - dc_texturefracx = 0; - } - else - { - xoffset = MAX(MIN(xoffset - (FRACUNIT / 2), (mip_width << FRACBITS) - 1), 0); - - int tx0 = xoffset >> FRACBITS; - int tx1 = MIN(tx0 + 1, mip_width - 1); - dc_source = (BYTE*)(pixels + tx0 * mip_height); - dc_source2 = (BYTE*)(pixels + tx1 * mip_height); - dc_textureheight = mip_height; - dc_texturefracx = (xoffset >> (FRACBITS - 4)) & 15; - } - - // Grab the posts we need to draw - const FTexture::Span *span; - tex->GetColumnBgra(col >> FRACBITS, &span); - FTexture::Span unmaskedSpan[2]; - if (unmasked) - { - span = unmaskedSpan; - unmaskedSpan[0].TopOffset = 0; - unmaskedSpan[0].Length = tex->GetHeight(); - unmaskedSpan[1].TopOffset = 0; - unmaskedSpan[1].Length = 0; - } - - // Draw each span post - while (span->Length != 0) - { - const int length = span->Length; - const int top = span->TopOffset; - - // calculate unclipped screen coordinates for post - dc_yl = (int)(sprtopscreen + spryscale * top + 0.5); - dc_yh = (int)(sprtopscreen + spryscale * (top + length) + 0.5) - 1; - - if (sprflipvert) - { - swapvalues(dc_yl, dc_yh); - } - - if (dc_yh >= mfloorclip[dc_x]) - { - dc_yh = mfloorclip[dc_x] - 1; - } - if (dc_yl < mceilingclip[dc_x]) - { - dc_yl = mceilingclip[dc_x]; - } - - if (dc_yl <= dc_yh) - { - dc_dest = (ylookup[dc_yl] + dc_x) * 4 + dc_destorg; - dc_count = dc_yh - dc_yl + 1; - - double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight(); - dc_texturefrac = (uint32_t)(v * (1 << 30)); - - (R_Drawers()->*colfunc)(); - } - span++; - } - - dc_iscale = saved_iscale; -} - -void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool unmasked) -{ - // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. - if (r_swtruecolor && !drawer_needs_pal_input) // To do: add support to R_DrawColumnHoriz_rgba - { - R_DrawMaskedColumnBgra(tex, col, unmasked); - return; - } - - const FTexture::Span *span; - const BYTE *column; - if (r_swtruecolor && !drawer_needs_pal_input) - column = (const BYTE *)tex->GetColumnBgra(col >> FRACBITS, &span); - else - column = tex->GetColumn(col >> FRACBITS, &span); - - FTexture::Span unmaskedSpan[2]; - if (unmasked) - { - span = unmaskedSpan; - unmaskedSpan[0].TopOffset = 0; - unmaskedSpan[0].Length = tex->GetHeight(); - unmaskedSpan[1].TopOffset = 0; - unmaskedSpan[1].Length = 0; - } - - int pixelsize = r_swtruecolor ? 4 : 1; - - while (span->Length != 0) - { - const int length = span->Length; - const int top = span->TopOffset; - - // calculate unclipped screen coordinates for post - dc_yl = (int)(sprtopscreen + spryscale * top + 0.5); - dc_yh = (int)(sprtopscreen + spryscale * (top + length) + 0.5) - 1; - - if (sprflipvert) - { - swapvalues (dc_yl, dc_yh); - } - - if (dc_yh >= mfloorclip[dc_x]) - { - dc_yh = mfloorclip[dc_x] - 1; - } - if (dc_yl < mceilingclip[dc_x]) - { - dc_yl = mceilingclip[dc_x]; - } - - if (dc_yl <= dc_yh) - { - dc_texturefrac = FLOAT2FIXED((dc_yl + 0.5 - sprtopscreen) / spryscale); - dc_source = column; - dc_source2 = nullptr; - dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; - dc_count = dc_yh - dc_yl + 1; - - fixed_t maxfrac = ((top + length) << FRACBITS) - 1; - dc_texturefrac = MAX(dc_texturefrac, 0); - dc_texturefrac = MIN(dc_texturefrac, maxfrac); - if (dc_iscale > 0) - dc_count = MIN(dc_count, (maxfrac - dc_texturefrac + dc_iscale - 1) / dc_iscale); - else if (dc_iscale < 0) - dc_count = MIN(dc_count, (dc_texturefrac - dc_iscale) / (-dc_iscale)); - - (R_Drawers()->*colfunc)(); - } - span++; - } -} - -// [ZZ] -// R_ClipSpriteColumnWithPortals -// - static TArray portaldrawsegs; static inline void R_CollectPortals() @@ -454,78 +242,6 @@ bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) } -// -// R_DrawVisSprite -// mfloorclip and mceilingclip should also be set. -// -void R_DrawVisSprite (vissprite_t *vis) -{ - fixed_t frac; - FTexture *tex; - int x2; - fixed_t xiscale; - bool ispsprite = (!vis->sector && vis->gpos != FVector3(0, 0, 0)); - - if (vis->xscale == 0 || fabs(vis->yscale) < (1.0f / 32000.0f)) - { // scaled to 0; can't see - return; - } - - fixed_t centeryfrac = FLOAT2FIXED(CenterY); - R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); - - bool visible = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); - - if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Shaded]) - { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but - // it is the brightest one. We need to get back to the proper light level for - // this sprite. - R_SetColorMapLight(dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS); - } - - if (visible) - { - tex = vis->pic; - spryscale = vis->yscale; - sprflipvert = false; - dc_iscale = FLOAT2FIXED(1 / vis->yscale); - frac = vis->startfrac; - xiscale = vis->xiscale; - dc_texturemid = vis->texturemid; - - if (vis->renderflags & RF_YFLIP) - { - sprflipvert = true; - spryscale = -spryscale; - dc_iscale = -dc_iscale; - dc_texturemid -= vis->pic->GetHeight(); - sprtopscreen = CenterY + dc_texturemid * spryscale; - } - else - { - sprflipvert = false; - sprtopscreen = CenterY - dc_texturemid * spryscale; - } - - dc_x = vis->x1; - x2 = vis->x2; - - if (dc_x < x2) - { - while (dc_x < x2) - { - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (tex, frac, false); - dc_x++; - frac += xiscale; - } - } - } - - R_FinishSetPatchStyle (); - - NetUpdate (); -} #if 0 void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop, short *clipbot) @@ -1663,15 +1379,13 @@ void R_DrawSprite (vissprite_t *spr) if (!spr->bIsVoxel) { - mfloorclip = clipbot; - mceilingclip = cliptop; if (!spr->bWallSprite) { - R_DrawVisSprite(spr); + R_DrawVisSprite(spr, clipbot, cliptop); } else { - R_DrawWallSprite(spr); + R_DrawWallSprite(spr, clipbot, cliptop); } } else diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index 5782ea02b6..97ef51cbd1 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -38,18 +38,6 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { -// Constant arrays used for psprite clipping -// and initializing clipping. -extern short zeroarray[MAXWIDTH]; -extern short screenheightarray[MAXWIDTH]; - -// vars for R_DrawMaskedColumn -extern short* mfloorclip; -extern short* mceilingclip; -extern double spryscale; -extern double sprtopscreen; -extern bool sprflipvert; - extern double pspritexscale; extern double pspritexiscale; extern double pspriteyscale; @@ -60,7 +48,6 @@ extern int spriteshade; bool R_ClipSpriteColumnWithPortals(vissprite_t* spr); -void R_DrawMaskedColumn (FTexture *texture, fixed_t column, bool unmasked = false); void R_CacheSprite (spritedef_t *sprite); void R_SortVisSprites (int (*compare)(const void *, const void *), size_t first); @@ -74,7 +61,7 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly); enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); -void R_DrawVisSprite(vissprite_t *vis); + } diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index c8ac27d9f9..fbbff4ec3f 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -140,7 +140,7 @@ namespace swrenderer const sector_t *sec; - sprflipvert = false; + bool sprflipvert = false; curline = ds->curline; @@ -192,8 +192,8 @@ namespace swrenderer } } - mfloorclip = openings + ds->sprbottomclip - ds->x1; - mceilingclip = openings + ds->sprtopclip - ds->x1; + short *mfloorclip = openings + ds->sprbottomclip - ds->x1; + short *mceilingclip = openings + ds->sprtopclip - ds->x1; // [RH] Draw fog partition @@ -213,7 +213,7 @@ namespace swrenderer MaskedSWall = (float *)(openings + ds->swall) - ds->x1; MaskedScaleY = ds->yscale; maskedtexturecol = (fixed_t *)(openings + ds->maskedtexturecol) - ds->x1; - spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); + double spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) @@ -342,21 +342,21 @@ namespace swrenderer // draw the columns one at a time if (visible) { - using namespace drawerargs; - for (dc_x = x1; dc_x < x2; ++dc_x) + for (int x = x1; x < x2; ++x) { if (fixedcolormap == nullptr && fixedlightlev < 0) { R_SetColorMapLight(basecolormap, rw_light, wallshade); } - dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); + fixed_t iscale = xs_Fix<16>::ToFix(MaskedSWall[x] * MaskedScaleY); + double sprtopscreen; if (sprflipvert) sprtopscreen = CenterY + dc_texturemid * spryscale; else sprtopscreen = CenterY - dc_texturemid * spryscale; - R_DrawMaskedColumn(tex, maskedtexturecol[dc_x]); + R_DrawMaskedColumn(x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); rw_light += rw_lightstep; spryscale += rw_scalestep; @@ -463,10 +463,10 @@ namespace swrenderer rw_lightstep = ds->lightstep; rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; - mfloorclip = openings + ds->sprbottomclip - ds->x1; - mceilingclip = openings + ds->sprtopclip - ds->x1; + short *mfloorclip = openings + ds->sprbottomclip - ds->x1; + short *mceilingclip = openings + ds->sprtopclip - ds->x1; - spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); + //double spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); float *MaskedSWall = (float *)(openings + ds->swall) - ds->x1; // find positioning @@ -558,7 +558,6 @@ namespace swrenderer double floorHeight; double ceilingHeight; - sprflipvert = false; curline = ds->curline; frontsector = curline->frontsector; diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index ba08cfd016..043d6e57b6 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -39,7 +39,7 @@ #include "swrenderer/line/r_walldraw.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" -#include "r_wallsprite.h" +#include "swrenderer/things/r_wallsprite.h" #include "swrenderer/r_memory.h" namespace swrenderer @@ -69,6 +69,8 @@ namespace swrenderer bool rereadcolormap; FDynamicColormap *usecolormap; float light = 0; + short *mfloorclip; + short *mceilingclip; if (decal->RenderFlags & RF_INVISIBLE || !viewactive || !decal->PicNum.isValid()) return; @@ -255,6 +257,7 @@ namespace swrenderer calclighting = true; // Draw it + bool sprflipvert; if (decal->RenderFlags & RF_YFLIP) { sprflipvert = true; @@ -287,7 +290,7 @@ namespace swrenderer { // calculate lighting R_SetColorMapLight(usecolormap, light, wallshade); } - R_DecalColumn(x, maskedScaleY); + R_DecalColumn(x, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } @@ -308,21 +311,16 @@ namespace swrenderer WallC = savecoord; } - void R_DecalColumn(int x, float maskedScaleY) + void R_DecalColumn(int x, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { - using namespace drawerargs; - - dc_x = x; - - float iscale = swall[dc_x] * maskedScaleY; - dc_iscale = FLOAT2FIXED(iscale); - spryscale = 1 / iscale; + float iscale = swall[x] * maskedScaleY; + double spryscale = 1 / iscale; + double sprtopscreen; if (sprflipvert) sprtopscreen = CenterY + dc_texturemid * spryscale; else sprtopscreen = CenterY - dc_texturemid * spryscale; - dc_texturefrac = 0; - R_DrawMaskedColumn(WallSpriteTile, lwall[dc_x]); + R_DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, lwall[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } } diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 7bd4147fa0..5399fb3707 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -22,5 +22,5 @@ namespace swrenderer void R_RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC); void R_RenderDecal(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, int pass); - void R_DecalColumn(int x, float maskedScaleY); + void R_DecalColumn(int x, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 1316a40ca2..0af6c04214 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -52,6 +52,7 @@ #include "r_voxel.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/things/r_sprite.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Bool, st_scale) @@ -137,10 +138,6 @@ namespace swrenderer lightnum = ((floorlight + ceilinglight) >> 1) + r_actualextralight; spriteshade = LIGHT2SHADE(lightnum) - 24 * FRACUNIT; - // clip to screen bounds - mfloorclip = screenheightarray; - mceilingclip = zeroarray; - if (camera->player != NULL) { double centerhack = CenterY; @@ -566,7 +563,11 @@ namespace swrenderer } } - R_DrawVisSprite(vis); + // clip to screen bounds + short *mfloorclip = screenheightarray; + short *mceilingclip = zeroarray; + + R_DrawVisSprite(vis, mfloorclip, mceilingclip); } void R_DrawRemainingPlayerSprites() diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp new file mode 100644 index 0000000000..0a87d45e40 --- /dev/null +++ b/src/swrenderer/things/r_sprite.cpp @@ -0,0 +1,131 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include +#include +#include "p_lnspec.h" +#include "templates.h" +#include "doomdef.h" +#include "m_swap.h" +#include "i_system.h" +#include "w_wad.h" +#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_things.h" +#include "swrenderer/things/r_wallsprite.h" +#include "c_console.h" +#include "c_cvars.h" +#include "c_dispatch.h" +#include "doomstat.h" +#include "v_video.h" +#include "sc_man.h" +#include "s_sound.h" +#include "sbar.h" +#include "gi.h" +#include "r_sky.h" +#include "cmdlib.h" +#include "g_level.h" +#include "d_net.h" +#include "colormatcher.h" +#include "d_netinf.h" +#include "p_effect.h" +#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/drawers/r_draw_pal.h" +#include "v_palette.h" +#include "r_data/r_translate.h" +#include "r_data/colormaps.h" +#include "r_data/voxels.h" +#include "p_local.h" +#include "p_maputl.h" +#include "r_voxel.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_portal.h" +#include "swrenderer/things/r_sprite.h" +#include "swrenderer/r_memory.h" + +namespace swrenderer +{ + void R_DrawVisSprite(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip) + { + fixed_t frac; + FTexture *tex; + int x2; + fixed_t xiscale; + bool ispsprite = (!vis->sector && vis->gpos != FVector3(0, 0, 0)); + + double spryscale, sprtopscreen; + bool sprflipvert; + + if (vis->xscale == 0 || fabs(vis->yscale) < (1.0f / 32000.0f)) + { // scaled to 0; can't see + return; + } + + fixed_t centeryfrac = FLOAT2FIXED(CenterY); + R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); + + bool visible = R_SetPatchStyle(vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); + + if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Shaded]) + { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but + // it is the brightest one. We need to get back to the proper light level for + // this sprite. + R_SetColorMapLight(drawerargs::dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS); + } + + if (visible) + { + tex = vis->pic; + spryscale = vis->yscale; + sprflipvert = false; + fixed_t iscale = FLOAT2FIXED(1 / vis->yscale); + frac = vis->startfrac; + xiscale = vis->xiscale; + dc_texturemid = vis->texturemid; + + if (vis->renderflags & RF_YFLIP) + { + sprflipvert = true; + spryscale = -spryscale; + iscale = -iscale; + dc_texturemid -= vis->pic->GetHeight(); + sprtopscreen = CenterY + dc_texturemid * spryscale; + } + else + { + sprflipvert = false; + sprtopscreen = CenterY - dc_texturemid * spryscale; + } + + int x = vis->x1; + x2 = vis->x2; + + if (x < x2) + { + while (x < x2) + { + if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) + R_DrawMaskedColumn(x, iscale, tex, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); + x++; + frac += xiscale; + } + } + } + + R_FinishSetPatchStyle(); + + NetUpdate(); + } +} diff --git a/src/swrenderer/things/r_sprite.h b/src/swrenderer/things/r_sprite.h new file mode 100644 index 0000000000..5d8f898f6b --- /dev/null +++ b/src/swrenderer/things/r_sprite.h @@ -0,0 +1,21 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#pragma once + +#include "r_visiblesprite.h" + +namespace swrenderer +{ + void R_DrawVisSprite(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip); +} diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 7699edf706..e04bdc6cbb 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -134,10 +134,11 @@ namespace swrenderer vis->wallc = wallc; } - void R_DrawWallSprite(vissprite_t *spr) + void R_DrawWallSprite(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip) { int x1, x2; double iyscale; + bool sprflipvert; x1 = MAX(spr->x1, spr->wallc.sx1); x2 = MIN(spr->x2, spr->wallc.sx2); @@ -221,7 +222,7 @@ namespace swrenderer R_SetColorMapLight(usecolormap, light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(x, maskedScaleY); + R_WallSpriteColumn(x, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } @@ -229,21 +230,16 @@ namespace swrenderer R_FinishSetPatchStyle(); } - void R_WallSpriteColumn(int x, float maskedScaleY) + void R_WallSpriteColumn(int x, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { - using namespace drawerargs; - - dc_x = x; - - float iscale = swall[dc_x] * maskedScaleY; - dc_iscale = FLOAT2FIXED(iscale); - spryscale = 1 / iscale; + float iscale = swall[x] * maskedScaleY; + double spryscale = 1 / iscale; + double sprtopscreen; if (sprflipvert) sprtopscreen = CenterY + dc_texturemid * spryscale; else sprtopscreen = CenterY - dc_texturemid * spryscale; - dc_texturefrac = 0; - R_DrawMaskedColumn(WallSpriteTile, lwall[dc_x]); + R_DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, lwall[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } } diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index cecb4738db..57b96db467 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -18,6 +18,6 @@ namespace swrenderer { void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags); - void R_DrawWallSprite(vissprite_t *spr); - void R_WallSpriteColumn(int x, float maskedScaleY); + void R_DrawWallSprite(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip); + void R_WallSpriteColumn(int x, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 5e5722030c..07c3d2a13e 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -46,7 +46,6 @@ #include "swrenderer/drawers/r_draw.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" #endif #include "r_data/r_translate.h" #include "doomstat.h" @@ -212,24 +211,25 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) // There is not enough precision in the drawing routines to keep the full // precision for y0. :( + double sprtopscreen; modf(y0, &sprtopscreen); double yscale = parms.destheight / img->GetHeight(); double iyscale = 1 / yscale; - spryscale = yscale; + double spryscale = yscale; assert(spryscale > 0); - sprflipvert = false; - //dc_iscale = FLOAT2FIXED(iyscale); - //dc_texturemid = (-y0) * iyscale; - //dc_iscale = 0xffffffffu / (unsigned)spryscale; - dc_iscale = FLOAT2FIXED(1 / spryscale); - dc_texturemid = (CenterY - 1 - sprtopscreen) * dc_iscale / 65536; + bool sprflipvert = false; + fixed_t iscale = FLOAT2FIXED(1 / spryscale); + //dc_texturemid = (CenterY - 1 - sprtopscreen) * iscale / 65536; fixed_t frac = 0; double xiscale = img->GetWidth() / parms.destwidth; double x2 = x0 + parms.destwidth; + short *mfloorclip; + short *mceilingclip; + if (bottomclipper[0] != parms.dclip) { fillshort(bottomclipper, screen->GetWidth(), (short)parms.dclip); @@ -272,14 +272,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) x2 = parms.rclip; } - dc_x = int(x0); + int x = int(x0); int x2_i = int(x2); fixed_t xiscale_i = FLOAT2FIXED(xiscale); - while (dc_x < x2_i) + while (x < x2_i) { - R_DrawMaskedColumn(img, frac, !parms.masked); - dc_x++; + R_DrawMaskedColumn(x, iscale, img, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, !parms.masked); + x++; frac += xiscale_i; } From 6d642b190644d2f7c64dd24987a2940a375a4cc6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 14:20:47 +0100 Subject: [PATCH 656/912] Move FCoverageBuffer to r_voxel --- src/swrenderer/scene/r_things.cpp | 669 +----------------------------- src/swrenderer/things/r_voxel.cpp | 618 +++++++++++++++++++++++++++ src/swrenderer/things/r_voxel.h | 25 ++ 3 files changed, 644 insertions(+), 668 deletions(-) diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index 57a0700fb3..c91120c7f3 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -19,12 +19,6 @@ // DESCRIPTION: // Refresh of things, i.e. objects represented by sprites. // -// This file contains some code from the Build Engine. -// -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// //----------------------------------------------------------------------------- #include @@ -85,29 +79,6 @@ namespace swrenderer { using namespace drawerargs; -// [RH] A c-buffer. Used for keeping track of offscreen voxel spans. - -struct FCoverageBuffer -{ - struct Span - { - Span *NextSpan; - short Start, Stop; - }; - - FCoverageBuffer(int size); - ~FCoverageBuffer(); - - void Clear(); - void InsertSpan(int listnum, int start, int stop); - Span *AllocSpan(); - - FMemArena SpanArena; - Span **Spans; // [0..NumLists-1] span lists - Span *FreeSpans; - unsigned int NumLists; -}; - // // Sprite rotation 0 is facing the viewer, // rotation 1 is one angle turn CLOCKWISE around the axis. @@ -128,9 +99,6 @@ FTexture *WallSpriteTile; // INITIALIZATION FUNCTIONS // -int OffscreenBufferWidth, OffscreenBufferHeight; -BYTE *OffscreenColorBuffer; -FCoverageBuffer *OffscreenCoverageBuffer; // @@ -148,6 +116,7 @@ static int vsprcount; void R_DeinitSprites() { R_DeinitVisSprites(); + R_DeinitRenderVoxel(); // Free vissprites sorter if (spritesorter != NULL) @@ -156,19 +125,6 @@ void R_DeinitSprites() spritesortersize = 0; spritesorter = NULL; } - - // Free offscreen buffer - if (OffscreenColorBuffer != NULL) - { - delete[] OffscreenColorBuffer; - OffscreenColorBuffer = NULL; - } - if (OffscreenCoverageBuffer != NULL) - { - delete OffscreenCoverageBuffer; - OffscreenCoverageBuffer = NULL; - } - OffscreenBufferHeight = OffscreenBufferWidth = 0; } // @@ -241,81 +197,6 @@ bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) return false; } - - -#if 0 -void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop, short *clipbot) -{ - int flags = 0; - - // Do setup for blending. - R_SetColorMapLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); - bool visible = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); - - if (!visible) - { - return; - } - if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) - { - flags = DVF_OFFSCREEN | DVF_SPANSONLY; - } - else if (colfunc != basecolfunc) - { - flags = DVF_OFFSCREEN; - } - if (flags != 0) - { - R_CheckOffscreenBuffer(RenderTarget->GetWidth(), RenderTarget->GetHeight(), !!(flags & DVF_SPANSONLY)); - } - if (spr->bInMirror) - { - flags |= DVF_MIRRORED; - } - - // Render the voxel, either directly to the screen or offscreen. - R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap, spr->Style.ColormapNum, cliptop, clipbot, - minslabz, maxslabz, flags); - - // Blend the voxel, if that's what we need to do. - if ((flags & ~DVF_MIRRORED) != 0) - { - int pixelsize = r_swtruecolor ? 4 : 1; - for (int x = 0; x < viewwidth; ++x) - { - if (!(flags & DVF_SPANSONLY) && (x & 3) == 0) - { - rt_initcols(OffscreenColorBuffer + x * OffscreenBufferHeight); - } - for (FCoverageBuffer::Span *span = OffscreenCoverageBuffer->Spans[x]; span != NULL; span = span->NextSpan) - { - if (flags & DVF_SPANSONLY) - { - dc_x = x; - dc_yl = span->Start; - dc_yh = span->Stop - 1; - dc_count = span->Stop - span->Start; - dc_dest = (ylookup[span->Start] + x) * pixelsize + dc_destorg; - colfunc(); - } - else - { - rt_span_coverage(x, span->Start, span->Stop - 1); - } - } - if (!(flags & DVF_SPANSONLY) && (x & 3) == 3) - { - rt_draw4cols(x - 3); - } - } - } - - R_FinishSetPatchStyle(); - NetUpdate(); -} -#endif - // // R_ProjectSprite // Generates a vissprite for a thing if it might be visible. @@ -1535,552 +1416,4 @@ inline int sgn(int v) return v < 0 ? -1 : v > 0 ? 1 : 0; } -#if 0 -void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, - const FVector3 &dasprpos, DAngle dasprang, - fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, - FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) -{ - int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff; - fixed_t cosang, sinang, sprcosang, sprsinang; - int backx, backy, gxinc, gyinc; - int daxscalerecip, dayscalerecip, cnt, gxstart, gystart, dazscale; - int lx, rx, nx, ny, x1=0, y1=0, x2=0, y2=0, yinc=0; - int yoff, xs=0, ys=0, xe, ye, xi=0, yi=0, cbackx, cbacky, dagxinc, dagyinc; - kvxslab_t *voxptr, *voxend; - FVoxelMipLevel *mip; - int z1a[64], z2a[64], yplc[64]; - - const int nytooclose = centerxwide * 2100, nytoofar = 32768*32768 - 1048576; - const int xdimenscale = FLOAT2FIXED(centerxwide * YaspectMul / 160); - const double centerxwide_f = centerxwide; - const double centerxwidebig_f = centerxwide_f * 65536*65536*8; - - // Convert to Build's coordinate system. - fixed_t globalposx = xs_Fix<4>::ToFix(globalpos.X); - fixed_t globalposy = xs_Fix<4>::ToFix(-globalpos.Y); - fixed_t globalposz = xs_Fix<8>::ToFix(-globalpos.Z); - - fixed_t dasprx = xs_Fix<4>::ToFix(dasprpos.X); - fixed_t daspry = xs_Fix<4>::ToFix(-dasprpos.Y); - fixed_t dasprz = xs_Fix<8>::ToFix(-dasprpos.Z); - - // Shift the scales from 16 bits of fractional precision to 6. - // Also do some magic voodoo scaling to make them the right size. - daxscale = daxscale / (0xC000 >> 6); - dayscale = dayscale / (0xC000 >> 6); - if (daxscale <= 0 || dayscale <= 0) - { - // won't be visible. - return; - } - - angle_t viewang = viewangle.BAMs(); - cosang = FLOAT2FIXED(viewangle.Cos()) >> 2; - sinang = FLOAT2FIXED(-viewangle.Sin()) >> 2; - sprcosang = FLOAT2FIXED(dasprang.Cos()) >> 2; - sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2; - - R_SetupDrawSlab(colormap, 0.0f, colormapnum << FRACBITS); - - int pixelsize = r_swtruecolor ? 4 : 1; - - // Select mip level - i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang)); - i = DivScale6(i, MIN(daxscale, dayscale)); - j = xs_Fix<13>::ToFix(FocalLengthX); - for (k = 0; i >= j && k < voxobj->NumMips; ++k) - { - i >>= 1; - } - if (k >= voxobj->NumMips) k = voxobj->NumMips - 1; - - mip = &voxobj->Mips[k]; if (mip->SlabData == NULL) return; - - minslabz >>= k; - maxslabz >>= k; - - daxscale <<= (k+8); dayscale <<= (k+8); - dazscale = FixedDiv(dayscale, FLOAT2FIXED(BaseYaspectMul)); - daxscale = fixed_t(daxscale / YaspectMul); - daxscale = Scale(daxscale, xdimenscale, centerxwide << 9); - dayscale = Scale(dayscale, FixedMul(xdimenscale, viewingrangerecip), centerxwide << 9); - - daxscalerecip = (1<<30) / daxscale; - dayscalerecip = (1<<30) / dayscale; - - fixed_t piv_x = fixed_t(mip->Pivot.X*256.); - fixed_t piv_y = fixed_t(mip->Pivot.Y*256.); - fixed_t piv_z = fixed_t(mip->Pivot.Z*256.); - - x = FixedMul(globalposx - dasprx, daxscalerecip); - y = FixedMul(globalposy - daspry, daxscalerecip); - backx = (DMulScale10(x, sprcosang, y, sprsinang) + piv_x) >> 8; - backy = (DMulScale10(y, sprcosang, x, -sprsinang) + piv_y) >> 8; - cbackx = clamp(backx, 0, mip->SizeX - 1); - cbacky = clamp(backy, 0, mip->SizeY - 1); - - sprcosang = MulScale14(daxscale, sprcosang); - sprsinang = MulScale14(daxscale, sprsinang); - - x = (dasprx - globalposx) - DMulScale18(piv_x, sprcosang, piv_y, -sprsinang); - y = (daspry - globalposy) - DMulScale18(piv_y, sprcosang, piv_x, sprsinang); - - cosang = FixedMul(cosang, dayscalerecip); - sinang = FixedMul(sinang, dayscalerecip); - - gxstart = y*cosang - x*sinang; - gystart = x*cosang + y*sinang; - gxinc = DMulScale10(sprsinang, cosang, sprcosang, -sinang); - gyinc = DMulScale10(sprcosang, cosang, sprsinang, sinang); - if ((abs(globalposz - dasprz) >> 10) >= abs(dazscale)) return; - - x = 0; y = 0; j = MAX(mip->SizeX, mip->SizeY); - fixed_t *ggxinc = (fixed_t *)alloca((j + 1) * sizeof(fixed_t) * 2); - fixed_t *ggyinc = ggxinc + (j + 1); - for (i = 0; i <= j; i++) - { - ggxinc[i] = x; x += gxinc; - ggyinc[i] = y; y += gyinc; - } - - syoff = DivScale21(globalposz - dasprz, FixedMul(dazscale, 0xE800)) + (piv_z << 7); - yoff = (abs(gxinc) + abs(gyinc)) >> 1; - - for (cnt = 0; cnt < 8; cnt++) - { - switch (cnt) - { - case 0: xs = 0; ys = 0; xi = 1; yi = 1; break; - case 1: xs = mip->SizeX-1; ys = 0; xi = -1; yi = 1; break; - case 2: xs = 0; ys = mip->SizeY-1; xi = 1; yi = -1; break; - case 3: xs = mip->SizeX-1; ys = mip->SizeY-1; xi = -1; yi = -1; break; - case 4: xs = 0; ys = cbacky; xi = 1; yi = 2; break; - case 5: xs = mip->SizeX-1; ys = cbacky; xi = -1; yi = 2; break; - case 6: xs = cbackx; ys = 0; xi = 2; yi = 1; break; - case 7: xs = cbackx; ys = mip->SizeY-1; xi = 2; yi = -1; break; - } - xe = cbackx; ye = cbacky; - if (cnt < 4) - { - if ((xi < 0) && (xe >= xs)) continue; - if ((xi > 0) && (xe <= xs)) continue; - if ((yi < 0) && (ye >= ys)) continue; - if ((yi > 0) && (ye <= ys)) continue; - } - else - { - if ((xi < 0) && (xe > xs)) continue; - if ((xi > 0) && (xe < xs)) continue; - if ((yi < 0) && (ye > ys)) continue; - if ((yi > 0) && (ye < ys)) continue; - xe += xi; ye += yi; - } - - i = sgn(ys - backy) + sgn(xs - backx) * 3 + 4; - switch(i) - { - case 6: case 7: x1 = 0; y1 = 0; break; - case 8: case 5: x1 = gxinc; y1 = gyinc; break; - case 0: case 3: x1 = gyinc; y1 = -gxinc; break; - case 2: case 1: x1 = gxinc+gyinc; y1 = gyinc-gxinc; break; - } - switch(i) - { - case 2: case 5: x2 = 0; y2 = 0; break; - case 0: case 1: x2 = gxinc; y2 = gyinc; break; - case 8: case 7: x2 = gyinc; y2 = -gxinc; break; - case 6: case 3: x2 = gxinc+gyinc; y2 = gyinc-gxinc; break; - } - BYTE oand = (1 << int(xs 0) { dagxinc = gxinc; dagyinc = FixedMul(gyinc, viewingrangerecip); } - else { dagxinc = -gxinc; dagyinc = -FixedMul(gyinc, viewingrangerecip); } - - /* Fix for non 90 degree viewing ranges */ - nxoff = FixedMul(x2 - x1, viewingrangerecip); - x1 = FixedMul(x1, viewingrangerecip); - - ggxstart = gxstart + ggyinc[ys]; - ggystart = gystart - ggxinc[ys]; - - for (x = xs; x != xe; x += xi) - { - BYTE *slabxoffs = &mip->SlabData[mip->OffsetX[x]]; - short *xyoffs = &mip->OffsetXY[x * (mip->SizeY + 1)]; - - nx = FixedMul(ggxstart + ggxinc[x], viewingrangerecip) + x1; - ny = ggystart + ggyinc[x]; - for (y = ys; y != ye; y += yi, nx += dagyinc, ny -= dagxinc) - { - if ((ny <= nytooclose) || (ny >= nytoofar)) continue; - voxptr = (kvxslab_t *)(slabxoffs + xyoffs[y]); - voxend = (kvxslab_t *)(slabxoffs + xyoffs[y+1]); - if (voxptr >= voxend) continue; - - lx = xs_RoundToInt(nx * centerxwide_f / (ny + y1)) + centerx; - if (lx < 0) lx = 0; - rx = xs_RoundToInt((nx + nxoff) * centerxwide_f / (ny + y2)) + centerx; - if (rx > viewwidth) rx = viewwidth; - if (rx <= lx) continue; - - if (flags & DVF_MIRRORED) - { - int t = viewwidth - lx; - lx = viewwidth - rx; - rx = t; - } - - fixed_t l1 = xs_RoundToInt(centerxwidebig_f / (ny - yoff)); - fixed_t l2 = xs_RoundToInt(centerxwidebig_f / (ny + yoff)); - for (; voxptr < voxend; voxptr = (kvxslab_t *)((BYTE *)voxptr + voxptr->zleng + 3)) - { - const BYTE *col = voxptr->col; - int zleng = voxptr->zleng; - int ztop = voxptr->ztop; - fixed_t z1, z2; - - if (ztop < minslabz) - { - int diff = minslabz - ztop; - ztop = minslabz; - col += diff; - zleng -= diff; - } - if (ztop + zleng > maxslabz) - { - int diff = ztop + zleng - maxslabz; - zleng -= diff; - } - if (zleng <= 0) continue; - - j = (ztop << 15) - syoff; - if (j < 0) - { - k = j + (zleng << 15); - if (k < 0) - { - if ((voxptr->backfacecull & oand32) == 0) continue; - z2 = MulScale32(l2, k) + centery; /* Below slab */ - } - else - { - if ((voxptr->backfacecull & oand) == 0) continue; /* Middle of slab */ - z2 = MulScale32(l1, k) + centery; - } - z1 = MulScale32(l1, j) + centery; - } - else - { - if ((voxptr->backfacecull & oand16) == 0) continue; - z1 = MulScale32(l2, j) + centery; /* Above slab */ - z2 = MulScale32(l1, j + (zleng << 15)) + centery; - } - - if (z2 <= z1) continue; - - if (zleng == 1) - { - yinc = 0; - } - else - { - if (z2-z1 >= 1024) yinc = FixedDiv(zleng, z2 - z1); - else yinc = (((1 << 24) - 1) / (z2 - z1)) * zleng >> 8; - } - // [RH] Clip each column separately, not just by the first one. - for (int stripwidth = MIN(countof(z1a), rx - lx), lxt = lx; - lxt < rx; - (lxt += countof(z1a)), stripwidth = MIN(countof(z1a), rx - lxt)) - { - // Calculate top and bottom pixels locations - for (int xxx = 0; xxx < stripwidth; ++xxx) - { - if (zleng == 1) - { - yplc[xxx] = 0; - z1a[xxx] = MAX(z1, daumost[lxt + xxx]); - } - else - { - if (z1 < daumost[lxt + xxx]) - { - yplc[xxx] = yinc * (daumost[lxt + xxx] - z1); - z1a[xxx] = daumost[lxt + xxx]; - } - else - { - yplc[xxx] = 0; - z1a[xxx] = z1; - } - } - z2a[xxx] = MIN(z2, dadmost[lxt + xxx]); - } - // Find top and bottom pixels that match and draw them as one strip - for (int xxl = 0, xxr; xxl < stripwidth; ) - { - if (z1a[xxl] >= z2a[xxl]) - { // No column here - xxl++; - continue; - } - int z1 = z1a[xxl]; - int z2 = z2a[xxl]; - // How many columns share the same extents? - for (xxr = xxl + 1; xxr < stripwidth; ++xxr) - { - if (z1a[xxr] != z1 || z2a[xxr] != z2) - break; - } - - if (!(flags & DVF_OFFSCREEN)) - { - // Draw directly to the screen. - R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, (ylookup[z1] + lxt + xxl) * pixelsize + dc_destorg); - } - else - { - // Record the area covered and possibly draw to an offscreen buffer. - dc_yl = z1; - dc_yh = z2 - 1; - dc_count = z2 - z1; - dc_iscale = yinc; - for (int x = xxl; x < xxr; ++x) - { - OffscreenCoverageBuffer->InsertSpan(lxt + x, z1, z2); - if (!(flags & DVF_SPANSONLY)) - { - dc_x = lxt + x; - rt_initcols(OffscreenColorBuffer + (dc_x & ~3) * OffscreenBufferHeight); - dc_source = col; - dc_source2 = nullptr; - dc_texturefrac = yplc[xxl]; - hcolfunc_pre(); - } - } - } - xxl = xxr; - } - } - } - } - } - } -} -#endif - -//========================================================================== -// -// FCoverageBuffer Constructor -// -//========================================================================== - -FCoverageBuffer::FCoverageBuffer(int lists) - : Spans(NULL), FreeSpans(NULL) -{ - NumLists = lists; - Spans = new Span *[lists]; - memset(Spans, 0, sizeof(Span*)*lists); -} - -//========================================================================== -// -// FCoverageBuffer Destructor -// -//========================================================================== - -FCoverageBuffer::~FCoverageBuffer() -{ - if (Spans != NULL) - { - delete[] Spans; - } -} - -//========================================================================== -// -// FCoverageBuffer :: Clear -// -//========================================================================== - -void FCoverageBuffer::Clear() -{ - SpanArena.FreeAll(); - memset(Spans, 0, sizeof(Span*)*NumLists); - FreeSpans = NULL; -} - -//========================================================================== -// -// FCoverageBuffer :: InsertSpan -// -// start is inclusive. -// stop is exclusive. -// -//========================================================================== - -void FCoverageBuffer::InsertSpan(int listnum, int start, int stop) -{ - assert(unsigned(listnum) < NumLists); - assert(start < stop); - - Span **span_p = &Spans[listnum]; - Span *span; - - if (*span_p == NULL || (*span_p)->Start > stop) - { // This list is empty or the first entry is after this one, so we can just insert the span. - goto addspan; - } - - // Insert the new span in order, merging with existing ones. - while (*span_p != NULL) - { - if ((*span_p)->Stop < start) // ===== (existing span) - { // Span ends before this one starts. // ++++ (new span) - span_p = &(*span_p)->NextSpan; - continue; - } - - // Does the new span overlap or abut the existing one? - if ((*span_p)->Start <= start) - { - if ((*span_p)->Stop >= stop) // ============= - { // The existing span completely covers this one. // +++++ - return; - } -extend: // Extend the existing span with the new one. // ====== - span = *span_p; // +++++++ - span->Stop = stop; // (or) +++++ - - // Free up any spans we just covered up. - span_p = &(*span_p)->NextSpan; - while (*span_p != NULL && (*span_p)->Start <= stop && (*span_p)->Stop <= stop) - { - Span *span = *span_p; // ====== ====== - *span_p = span->NextSpan; // +++++++++++++ - span->NextSpan = FreeSpans; - FreeSpans = span; - } - if (*span_p != NULL && (*span_p)->Start <= stop) // ======= ======== - { // Our new span connects two existing spans. // ++++++++++++++ - // They should all be collapsed into a single span. - span->Stop = (*span_p)->Stop; - span = *span_p; - *span_p = span->NextSpan; - span->NextSpan = FreeSpans; - FreeSpans = span; - } - goto check; - } - else if ((*span_p)->Start <= stop) // ===== - { // The new span extends the existing span from // ++++ - // the beginning. // (or) ++++ - (*span_p)->Start = start; - if ((*span_p)->Stop < stop) - { // The new span also extends the existing span // ====== - // at the bottom // ++++++++++++++ - goto extend; - } - goto check; - } - else // ====== - { // No overlap, so insert a new span. // +++++ - goto addspan; - } - } - // Append a new span to the end of the list. -addspan: - span = AllocSpan(); - span->NextSpan = *span_p; - span->Start = start; - span->Stop = stop; - *span_p = span; -check: -#ifdef _DEBUG - // Validate the span list: Spans must be in order, and there must be - // at least one pixel between spans. - for (span = Spans[listnum]; span != NULL; span = span->NextSpan) - { - assert(span->Start < span->Stop); - if (span->NextSpan != NULL) - { - assert(span->Stop < span->NextSpan->Start); - } - } -#endif - ; -} - -//========================================================================== -// -// FCoverageBuffer :: AllocSpan -// -//========================================================================== - -FCoverageBuffer::Span *FCoverageBuffer::AllocSpan() -{ - Span *span; - - if (FreeSpans != NULL) - { - span = FreeSpans; - FreeSpans = span->NextSpan; - } - else - { - span = (Span *)SpanArena.Alloc(sizeof(Span)); - } - return span; -} - -//========================================================================== -// -// R_CheckOffscreenBuffer -// -// Allocates the offscreen coverage buffer and optionally the offscreen -// color buffer. If they already exist but are the wrong size, they will -// be reallocated. -// -//========================================================================== - -void R_CheckOffscreenBuffer(int width, int height, bool spansonly) -{ - if (OffscreenCoverageBuffer == NULL) - { - assert(OffscreenColorBuffer == NULL && "The color buffer cannot exist without the coverage buffer"); - OffscreenCoverageBuffer = new FCoverageBuffer(width); - } - else if (OffscreenCoverageBuffer->NumLists != (unsigned)width) - { - delete OffscreenCoverageBuffer; - OffscreenCoverageBuffer = new FCoverageBuffer(width); - if (OffscreenColorBuffer != NULL) - { - delete[] OffscreenColorBuffer; - OffscreenColorBuffer = NULL; - } - } - else - { - OffscreenCoverageBuffer->Clear(); - } - - if (!spansonly) - { - if (OffscreenColorBuffer == NULL) - { - OffscreenColorBuffer = new BYTE[width * height * 4]; - } - else if (OffscreenBufferWidth != width || OffscreenBufferHeight != height) - { - delete[] OffscreenColorBuffer; - OffscreenColorBuffer = new BYTE[width * height * 4]; - } - } - OffscreenBufferWidth = width; - OffscreenBufferHeight = height; -} - } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index f0d29f4888..c02e9ac8f4 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -1,5 +1,6 @@ /* ** Voxel rendering +** Copyright (c) 1998-2016 Randy Heit ** Copyright (c) 2016 Magnus Norddahl ** ** This software is provided 'as-is', without any express or implied @@ -39,6 +40,13 @@ namespace swrenderer { + namespace + { + FCoverageBuffer *OffscreenCoverageBuffer; + int OffscreenBufferWidth, OffscreenBufferHeight; + uint8_t *OffscreenColorBuffer; + } + void R_DrawVisVoxel(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom) { R_SetColorMapLight(sprite->Style.BaseColormap, 0, sprite->Style.ColormapNum << FRACBITS); @@ -214,4 +222,614 @@ namespace swrenderer } } } + + void R_DeinitRenderVoxel() + { + // Free offscreen buffer + if (OffscreenColorBuffer != nullptr) + { + delete[] OffscreenColorBuffer; + OffscreenColorBuffer = nullptr; + } + if (OffscreenCoverageBuffer != nullptr) + { + delete OffscreenCoverageBuffer; + OffscreenCoverageBuffer = nullptr; + } + OffscreenBufferHeight = OffscreenBufferWidth = 0; + } + + void R_CheckOffscreenBuffer(int width, int height, bool spansonly) + { + // Allocates the offscreen coverage buffer and optionally the offscreen + // color buffer. If they already exist but are the wrong size, they will + // be reallocated. + + if (OffscreenCoverageBuffer == nullptr) + { + assert(OffscreenColorBuffer == nullptr && "The color buffer cannot exist without the coverage buffer"); + OffscreenCoverageBuffer = new FCoverageBuffer(width); + } + else if (OffscreenCoverageBuffer->NumLists != (unsigned)width) + { + delete OffscreenCoverageBuffer; + OffscreenCoverageBuffer = new FCoverageBuffer(width); + if (OffscreenColorBuffer != nullptr) + { + delete[] OffscreenColorBuffer; + OffscreenColorBuffer = nullptr; + } + } + else + { + OffscreenCoverageBuffer->Clear(); + } + + if (!spansonly) + { + if (OffscreenColorBuffer == nullptr) + { + OffscreenColorBuffer = new uint8_t[width * height * 4]; + } + else if (OffscreenBufferWidth != width || OffscreenBufferHeight != height) + { + delete[] OffscreenColorBuffer; + OffscreenColorBuffer = new uint8_t[width * height * 4]; + } + } + OffscreenBufferWidth = width; + OffscreenBufferHeight = height; + } + + //////////////////////////////////////////////////////////////////////////// + + FCoverageBuffer::FCoverageBuffer(int lists) + : Spans(nullptr), FreeSpans(nullptr) + { + NumLists = lists; + Spans = new Span *[lists]; + memset(Spans, 0, sizeof(Span*)*lists); + } + + FCoverageBuffer::~FCoverageBuffer() + { + if (Spans != nullptr) + { + delete[] Spans; + } + } + + void FCoverageBuffer::Clear() + { + SpanArena.FreeAll(); + memset(Spans, 0, sizeof(Span*)*NumLists); + FreeSpans = nullptr; + } + + void FCoverageBuffer::InsertSpan(int listnum, int start, int stop) + { + // start is inclusive. + // stop is exclusive. + + assert(unsigned(listnum) < NumLists); + assert(start < stop); + + Span **span_p = &Spans[listnum]; + Span *span; + + if (*span_p == nullptr || (*span_p)->Start > stop) + { // This list is empty or the first entry is after this one, so we can just insert the span. + goto addspan; + } + + // Insert the new span in order, merging with existing ones. + while (*span_p != nullptr) + { + if ((*span_p)->Stop < start) // ===== (existing span) + { // Span ends before this one starts. // ++++ (new span) + span_p = &(*span_p)->NextSpan; + continue; + } + + // Does the new span overlap or abut the existing one? + if ((*span_p)->Start <= start) + { + if ((*span_p)->Stop >= stop) // ============= + { // The existing span completely covers this one. // +++++ + return; + } + extend: // Extend the existing span with the new one. // ====== + span = *span_p; // +++++++ + span->Stop = stop; // (or) +++++ + + // Free up any spans we just covered up. + span_p = &(*span_p)->NextSpan; + while (*span_p != nullptr && (*span_p)->Start <= stop && (*span_p)->Stop <= stop) + { + Span *span = *span_p; // ====== ====== + *span_p = span->NextSpan; // +++++++++++++ + span->NextSpan = FreeSpans; + FreeSpans = span; + } + if (*span_p != nullptr && (*span_p)->Start <= stop) // ======= ======== + { // Our new span connects two existing spans. // ++++++++++++++ + // They should all be collapsed into a single span. + span->Stop = (*span_p)->Stop; + span = *span_p; + *span_p = span->NextSpan; + span->NextSpan = FreeSpans; + FreeSpans = span; + } + goto check; + } + else if ((*span_p)->Start <= stop) // ===== + { // The new span extends the existing span from // ++++ + // the beginning. // (or) ++++ + (*span_p)->Start = start; + if ((*span_p)->Stop < stop) + { // The new span also extends the existing span // ====== + // at the bottom // ++++++++++++++ + goto extend; + } + goto check; + } + else // ====== + { // No overlap, so insert a new span. // +++++ + goto addspan; + } + } + // Append a new span to the end of the list. + addspan: + span = AllocSpan(); + span->NextSpan = *span_p; + span->Start = start; + span->Stop = stop; + *span_p = span; + check: +#ifdef _DEBUG + // Validate the span list: Spans must be in order, and there must be + // at least one pixel between spans. + for (span = Spans[listnum]; span != nullptr; span = span->NextSpan) + { + assert(span->Start < span->Stop); + if (span->NextSpan != nullptr) + { + assert(span->Stop < span->NextSpan->Start); + } + } +#endif + ; + } + + FCoverageBuffer::Span *FCoverageBuffer::AllocSpan() + { + Span *span; + + if (FreeSpans != nullptr) + { + span = FreeSpans; + FreeSpans = span->NextSpan; + } + else + { + span = (Span *)SpanArena.Alloc(sizeof(Span)); + } + return span; + } + + ///////////////////////////////////////////////////////////////////////// + // Old BUILD implementation follows: + // + // This file contains some code from the Build Engine. + // + // "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman + // Ken Silverman's official web site: "http://www.advsys.net/ken" + // See the included license file "BUILDLIC.TXT" for license info. + +#if 0 + void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop, short *clipbot) + { + int flags = 0; + + // Do setup for blending. + R_SetColorMapLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); + bool visible = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); + + if (!visible) + { + return; + } + if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) + { + flags = DVF_OFFSCREEN | DVF_SPANSONLY; + } + else if (colfunc != basecolfunc) + { + flags = DVF_OFFSCREEN; + } + if (flags != 0) + { + R_CheckOffscreenBuffer(RenderTarget->GetWidth(), RenderTarget->GetHeight(), !!(flags & DVF_SPANSONLY)); + } + if (spr->bInMirror) + { + flags |= DVF_MIRRORED; + } + + // Render the voxel, either directly to the screen or offscreen. + R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap, spr->Style.ColormapNum, cliptop, clipbot, + minslabz, maxslabz, flags); + + // Blend the voxel, if that's what we need to do. + if ((flags & ~DVF_MIRRORED) != 0) + { + int pixelsize = r_swtruecolor ? 4 : 1; + for (int x = 0; x < viewwidth; ++x) + { + if (!(flags & DVF_SPANSONLY) && (x & 3) == 0) + { + rt_initcols(OffscreenColorBuffer + x * OffscreenBufferHeight); + } + for (FCoverageBuffer::Span *span = OffscreenCoverageBuffer->Spans[x]; span != NULL; span = span->NextSpan) + { + if (flags & DVF_SPANSONLY) + { + dc_x = x; + dc_yl = span->Start; + dc_yh = span->Stop - 1; + dc_count = span->Stop - span->Start; + dc_dest = (ylookup[span->Start] + x) * pixelsize + dc_destorg; + colfunc(); + } + else + { + rt_span_coverage(x, span->Start, span->Stop - 1); + } + } + if (!(flags & DVF_SPANSONLY) && (x & 3) == 3) + { + rt_draw4cols(x - 3); + } + } + } + + R_FinishSetPatchStyle(); + NetUpdate(); + } + + void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, + const FVector3 &dasprpos, DAngle dasprang, + fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, + FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) + { + int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff; + fixed_t cosang, sinang, sprcosang, sprsinang; + int backx, backy, gxinc, gyinc; + int daxscalerecip, dayscalerecip, cnt, gxstart, gystart, dazscale; + int lx, rx, nx, ny, x1=0, y1=0, x2=0, y2=0, yinc=0; + int yoff, xs=0, ys=0, xe, ye, xi=0, yi=0, cbackx, cbacky, dagxinc, dagyinc; + kvxslab_t *voxptr, *voxend; + FVoxelMipLevel *mip; + int z1a[64], z2a[64], yplc[64]; + + const int nytooclose = centerxwide * 2100, nytoofar = 32768*32768 - 1048576; + const int xdimenscale = FLOAT2FIXED(centerxwide * YaspectMul / 160); + const double centerxwide_f = centerxwide; + const double centerxwidebig_f = centerxwide_f * 65536*65536*8; + + // Convert to Build's coordinate system. + fixed_t globalposx = xs_Fix<4>::ToFix(globalpos.X); + fixed_t globalposy = xs_Fix<4>::ToFix(-globalpos.Y); + fixed_t globalposz = xs_Fix<8>::ToFix(-globalpos.Z); + + fixed_t dasprx = xs_Fix<4>::ToFix(dasprpos.X); + fixed_t daspry = xs_Fix<4>::ToFix(-dasprpos.Y); + fixed_t dasprz = xs_Fix<8>::ToFix(-dasprpos.Z); + + // Shift the scales from 16 bits of fractional precision to 6. + // Also do some magic voodoo scaling to make them the right size. + daxscale = daxscale / (0xC000 >> 6); + dayscale = dayscale / (0xC000 >> 6); + if (daxscale <= 0 || dayscale <= 0) + { + // won't be visible. + return; + } + + angle_t viewang = viewangle.BAMs(); + cosang = FLOAT2FIXED(viewangle.Cos()) >> 2; + sinang = FLOAT2FIXED(-viewangle.Sin()) >> 2; + sprcosang = FLOAT2FIXED(dasprang.Cos()) >> 2; + sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2; + + R_SetupDrawSlab(colormap, 0.0f, colormapnum << FRACBITS); + + int pixelsize = r_swtruecolor ? 4 : 1; + + // Select mip level + i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang)); + i = DivScale6(i, MIN(daxscale, dayscale)); + j = xs_Fix<13>::ToFix(FocalLengthX); + for (k = 0; i >= j && k < voxobj->NumMips; ++k) + { + i >>= 1; + } + if (k >= voxobj->NumMips) k = voxobj->NumMips - 1; + + mip = &voxobj->Mips[k]; if (mip->SlabData == NULL) return; + + minslabz >>= k; + maxslabz >>= k; + + daxscale <<= (k+8); dayscale <<= (k+8); + dazscale = FixedDiv(dayscale, FLOAT2FIXED(BaseYaspectMul)); + daxscale = fixed_t(daxscale / YaspectMul); + daxscale = Scale(daxscale, xdimenscale, centerxwide << 9); + dayscale = Scale(dayscale, FixedMul(xdimenscale, viewingrangerecip), centerxwide << 9); + + daxscalerecip = (1<<30) / daxscale; + dayscalerecip = (1<<30) / dayscale; + + fixed_t piv_x = fixed_t(mip->Pivot.X*256.); + fixed_t piv_y = fixed_t(mip->Pivot.Y*256.); + fixed_t piv_z = fixed_t(mip->Pivot.Z*256.); + + x = FixedMul(globalposx - dasprx, daxscalerecip); + y = FixedMul(globalposy - daspry, daxscalerecip); + backx = (DMulScale10(x, sprcosang, y, sprsinang) + piv_x) >> 8; + backy = (DMulScale10(y, sprcosang, x, -sprsinang) + piv_y) >> 8; + cbackx = clamp(backx, 0, mip->SizeX - 1); + cbacky = clamp(backy, 0, mip->SizeY - 1); + + sprcosang = MulScale14(daxscale, sprcosang); + sprsinang = MulScale14(daxscale, sprsinang); + + x = (dasprx - globalposx) - DMulScale18(piv_x, sprcosang, piv_y, -sprsinang); + y = (daspry - globalposy) - DMulScale18(piv_y, sprcosang, piv_x, sprsinang); + + cosang = FixedMul(cosang, dayscalerecip); + sinang = FixedMul(sinang, dayscalerecip); + + gxstart = y*cosang - x*sinang; + gystart = x*cosang + y*sinang; + gxinc = DMulScale10(sprsinang, cosang, sprcosang, -sinang); + gyinc = DMulScale10(sprcosang, cosang, sprsinang, sinang); + if ((abs(globalposz - dasprz) >> 10) >= abs(dazscale)) return; + + x = 0; y = 0; j = MAX(mip->SizeX, mip->SizeY); + fixed_t *ggxinc = (fixed_t *)alloca((j + 1) * sizeof(fixed_t) * 2); + fixed_t *ggyinc = ggxinc + (j + 1); + for (i = 0; i <= j; i++) + { + ggxinc[i] = x; x += gxinc; + ggyinc[i] = y; y += gyinc; + } + + syoff = DivScale21(globalposz - dasprz, FixedMul(dazscale, 0xE800)) + (piv_z << 7); + yoff = (abs(gxinc) + abs(gyinc)) >> 1; + + for (cnt = 0; cnt < 8; cnt++) + { + switch (cnt) + { + case 0: xs = 0; ys = 0; xi = 1; yi = 1; break; + case 1: xs = mip->SizeX-1; ys = 0; xi = -1; yi = 1; break; + case 2: xs = 0; ys = mip->SizeY-1; xi = 1; yi = -1; break; + case 3: xs = mip->SizeX-1; ys = mip->SizeY-1; xi = -1; yi = -1; break; + case 4: xs = 0; ys = cbacky; xi = 1; yi = 2; break; + case 5: xs = mip->SizeX-1; ys = cbacky; xi = -1; yi = 2; break; + case 6: xs = cbackx; ys = 0; xi = 2; yi = 1; break; + case 7: xs = cbackx; ys = mip->SizeY-1; xi = 2; yi = -1; break; + } + xe = cbackx; ye = cbacky; + if (cnt < 4) + { + if ((xi < 0) && (xe >= xs)) continue; + if ((xi > 0) && (xe <= xs)) continue; + if ((yi < 0) && (ye >= ys)) continue; + if ((yi > 0) && (ye <= ys)) continue; + } + else + { + if ((xi < 0) && (xe > xs)) continue; + if ((xi > 0) && (xe < xs)) continue; + if ((yi < 0) && (ye > ys)) continue; + if ((yi > 0) && (ye < ys)) continue; + xe += xi; ye += yi; + } + + i = sgn(ys - backy) + sgn(xs - backx) * 3 + 4; + switch(i) + { + case 6: case 7: x1 = 0; y1 = 0; break; + case 8: case 5: x1 = gxinc; y1 = gyinc; break; + case 0: case 3: x1 = gyinc; y1 = -gxinc; break; + case 2: case 1: x1 = gxinc+gyinc; y1 = gyinc-gxinc; break; + } + switch(i) + { + case 2: case 5: x2 = 0; y2 = 0; break; + case 0: case 1: x2 = gxinc; y2 = gyinc; break; + case 8: case 7: x2 = gyinc; y2 = -gxinc; break; + case 6: case 3: x2 = gxinc+gyinc; y2 = gyinc-gxinc; break; + } + BYTE oand = (1 << int(xs 0) { dagxinc = gxinc; dagyinc = FixedMul(gyinc, viewingrangerecip); } + else { dagxinc = -gxinc; dagyinc = -FixedMul(gyinc, viewingrangerecip); } + + /* Fix for non 90 degree viewing ranges */ + nxoff = FixedMul(x2 - x1, viewingrangerecip); + x1 = FixedMul(x1, viewingrangerecip); + + ggxstart = gxstart + ggyinc[ys]; + ggystart = gystart - ggxinc[ys]; + + for (x = xs; x != xe; x += xi) + { + BYTE *slabxoffs = &mip->SlabData[mip->OffsetX[x]]; + short *xyoffs = &mip->OffsetXY[x * (mip->SizeY + 1)]; + + nx = FixedMul(ggxstart + ggxinc[x], viewingrangerecip) + x1; + ny = ggystart + ggyinc[x]; + for (y = ys; y != ye; y += yi, nx += dagyinc, ny -= dagxinc) + { + if ((ny <= nytooclose) || (ny >= nytoofar)) continue; + voxptr = (kvxslab_t *)(slabxoffs + xyoffs[y]); + voxend = (kvxslab_t *)(slabxoffs + xyoffs[y+1]); + if (voxptr >= voxend) continue; + + lx = xs_RoundToInt(nx * centerxwide_f / (ny + y1)) + centerx; + if (lx < 0) lx = 0; + rx = xs_RoundToInt((nx + nxoff) * centerxwide_f / (ny + y2)) + centerx; + if (rx > viewwidth) rx = viewwidth; + if (rx <= lx) continue; + + if (flags & DVF_MIRRORED) + { + int t = viewwidth - lx; + lx = viewwidth - rx; + rx = t; + } + + fixed_t l1 = xs_RoundToInt(centerxwidebig_f / (ny - yoff)); + fixed_t l2 = xs_RoundToInt(centerxwidebig_f / (ny + yoff)); + for (; voxptr < voxend; voxptr = (kvxslab_t *)((BYTE *)voxptr + voxptr->zleng + 3)) + { + const BYTE *col = voxptr->col; + int zleng = voxptr->zleng; + int ztop = voxptr->ztop; + fixed_t z1, z2; + + if (ztop < minslabz) + { + int diff = minslabz - ztop; + ztop = minslabz; + col += diff; + zleng -= diff; + } + if (ztop + zleng > maxslabz) + { + int diff = ztop + zleng - maxslabz; + zleng -= diff; + } + if (zleng <= 0) continue; + + j = (ztop << 15) - syoff; + if (j < 0) + { + k = j + (zleng << 15); + if (k < 0) + { + if ((voxptr->backfacecull & oand32) == 0) continue; + z2 = MulScale32(l2, k) + centery; /* Below slab */ + } + else + { + if ((voxptr->backfacecull & oand) == 0) continue; /* Middle of slab */ + z2 = MulScale32(l1, k) + centery; + } + z1 = MulScale32(l1, j) + centery; + } + else + { + if ((voxptr->backfacecull & oand16) == 0) continue; + z1 = MulScale32(l2, j) + centery; /* Above slab */ + z2 = MulScale32(l1, j + (zleng << 15)) + centery; + } + + if (z2 <= z1) continue; + + if (zleng == 1) + { + yinc = 0; + } + else + { + if (z2-z1 >= 1024) yinc = FixedDiv(zleng, z2 - z1); + else yinc = (((1 << 24) - 1) / (z2 - z1)) * zleng >> 8; + } + // [RH] Clip each column separately, not just by the first one. + for (int stripwidth = MIN(countof(z1a), rx - lx), lxt = lx; + lxt < rx; + (lxt += countof(z1a)), stripwidth = MIN(countof(z1a), rx - lxt)) + { + // Calculate top and bottom pixels locations + for (int xxx = 0; xxx < stripwidth; ++xxx) + { + if (zleng == 1) + { + yplc[xxx] = 0; + z1a[xxx] = MAX(z1, daumost[lxt + xxx]); + } + else + { + if (z1 < daumost[lxt + xxx]) + { + yplc[xxx] = yinc * (daumost[lxt + xxx] - z1); + z1a[xxx] = daumost[lxt + xxx]; + } + else + { + yplc[xxx] = 0; + z1a[xxx] = z1; + } + } + z2a[xxx] = MIN(z2, dadmost[lxt + xxx]); + } + // Find top and bottom pixels that match and draw them as one strip + for (int xxl = 0, xxr; xxl < stripwidth; ) + { + if (z1a[xxl] >= z2a[xxl]) + { // No column here + xxl++; + continue; + } + int z1 = z1a[xxl]; + int z2 = z2a[xxl]; + // How many columns share the same extents? + for (xxr = xxl + 1; xxr < stripwidth; ++xxr) + { + if (z1a[xxr] != z1 || z2a[xxr] != z2) + break; + } + + if (!(flags & DVF_OFFSCREEN)) + { + // Draw directly to the screen. + R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, (ylookup[z1] + lxt + xxl) * pixelsize + dc_destorg); + } + else + { + // Record the area covered and possibly draw to an offscreen buffer. + dc_yl = z1; + dc_yh = z2 - 1; + dc_count = z2 - z1; + dc_iscale = yinc; + for (int x = xxl; x < xxr; ++x) + { + OffscreenCoverageBuffer->InsertSpan(lxt + x, z1, z2); + if (!(flags & DVF_SPANSONLY)) + { + dc_x = lxt + x; + rt_initcols(OffscreenColorBuffer + (dc_x & ~3) * OffscreenBufferHeight); + dc_source = col; + dc_source2 = nullptr; + dc_texturefrac = yplc[xxl]; + hcolfunc_pre(); + } + } + } + xxl = xxr; + } + } + } + } + } + } + } +#endif } diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index 4c1e3f0af7..a3dc12e912 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -1,5 +1,6 @@ /* ** Voxel rendering +** Copyright (c) 1998-2016 Randy Heit ** Copyright (c) 2016 Magnus Norddahl ** ** This software is provided 'as-is', without any express or implied @@ -33,4 +34,28 @@ namespace swrenderer kvxslab_t *R_GetSlabStart(const FVoxelMipLevel &mip, int x, int y); kvxslab_t *R_GetSlabEnd(const FVoxelMipLevel &mip, int x, int y); kvxslab_t *R_NextSlab(kvxslab_t *slab); + + void R_DeinitRenderVoxel(); + + // [RH] A c-buffer. Used for keeping track of offscreen voxel spans. + struct FCoverageBuffer + { + struct Span + { + Span *NextSpan; + short Start, Stop; + }; + + FCoverageBuffer(int size); + ~FCoverageBuffer(); + + void Clear(); + void InsertSpan(int listnum, int start, int stop); + Span *AllocSpan(); + + FMemArena SpanArena; + Span **Spans; // [0..NumLists-1] span lists + Span *FreeSpans; + unsigned int NumLists; + }; } From da85de01deec91f5ebaed8e87d72063be34c2bfa Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 14:22:00 +0100 Subject: [PATCH 657/912] Move prototype --- src/swrenderer/scene/r_things.h | 2 -- src/swrenderer/things/r_voxel.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index 97ef51cbd1..f0d9c098a5 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -56,8 +56,6 @@ void R_DrawSprites (); void R_ClearSprites (); void R_DrawMasked (); -void R_CheckOffscreenBuffer(int width, int height, bool spansonly); - enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index a3dc12e912..e3daa0d15a 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -35,6 +35,7 @@ namespace swrenderer kvxslab_t *R_GetSlabEnd(const FVoxelMipLevel &mip, int x, int y); kvxslab_t *R_NextSlab(kvxslab_t *slab); + void R_CheckOffscreenBuffer(int width, int height, bool spansonly); void R_DeinitRenderVoxel(); // [RH] A c-buffer. Used for keeping track of offscreen voxel spans. From 3b5dc96f4abead5e249b49103d68a4fd1b700347 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 14:28:31 +0100 Subject: [PATCH 658/912] Move player sprite scale to r_playersprite and make it private --- src/polyrenderer/scene/poly_playersprite.cpp | 16 ++++++++++------ src/swrenderer/r_main.cpp | 5 ++--- src/swrenderer/scene/r_things.cpp | 3 --- src/swrenderer/scene/r_things.h | 4 ---- src/swrenderer/things/r_playersprite.cpp | 11 +++++++++++ src/swrenderer/things/r_playersprite.h | 2 ++ 6 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index ded5a55b30..5baa7bda3e 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -139,18 +139,22 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa sy += wy; } + double pspritexscale = centerxwide / 160.0; + double pspriteyscale = pspritexscale * swrenderer::YaspectMul; + double pspritexiscale = 1 / pspritexscale; + // calculate edges of the shape double tx = sx - BaseXCenter; tx -= tex->GetScaledLeftOffset(); - int x1 = xs_RoundToInt(swrenderer::CenterX + tx * swrenderer::pspritexscale); + int x1 = xs_RoundToInt(swrenderer::CenterX + tx * pspritexscale); // off the right side if (x1 > viewwidth) return; tx += tex->GetScaledWidth(); - int x2 = xs_RoundToInt(swrenderer::CenterX + tx * swrenderer::pspritexscale); + int x2 = xs_RoundToInt(swrenderer::CenterX + tx * pspritexscale); // off the left side if (x2 <= 0) @@ -183,19 +187,19 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa int clipped_x1 = MAX(x1, 0); int clipped_x2 = MIN(x2, viewwidth); - double xscale = swrenderer::pspritexscale / tex->Scale.X; - double yscale = swrenderer::pspriteyscale / tex->Scale.Y; + double xscale = pspritexscale / tex->Scale.X; + double yscale = pspriteyscale / tex->Scale.Y; uint32_t translation = 0; // [RH] Use default colors double xiscale, startfrac; if (flip) { - xiscale = -swrenderer::pspritexiscale * tex->Scale.X; + xiscale = -pspritexiscale * tex->Scale.X; startfrac = 1; } else { - xiscale = swrenderer::pspritexiscale * tex->Scale.X; + xiscale = pspritexiscale * tex->Scale.X; startfrac = 0; } diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 71031486ac..771653fff6 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -68,6 +68,7 @@ #include "version.h" #include "c_console.h" #include "r_memory.h" +#include "swrenderer/things/r_playersprite.h" CVAR (String, r_viewsize, "", CVAR_NOSET) CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) @@ -345,9 +346,7 @@ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, WallTMapScale2 = IYaspectMul / CenterX; // psprite scales - pspritexscale = centerxwide / 160.0; - pspriteyscale = pspritexscale * YaspectMul; - pspritexiscale = 1 / pspritexscale; + R_SetupPlayerSpriteScale(); // thing clipping fillshort (screenheightarray, viewwidth, (short)viewheight); diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index c91120c7f3..c6b7ef90f8 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -85,9 +85,6 @@ namespace swrenderer // This is not the same as the angle, // which increases counter clockwise (protractor). // -double pspritexscale; -double pspritexiscale; -double pspriteyscale; fixed_t sky1scale; // [RH] Sky 1 scale factor fixed_t sky2scale; // [RH] Sky 2 scale factor diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index f0d9c098a5..ae7acce456 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -38,10 +38,6 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { -extern double pspritexscale; -extern double pspritexiscale; -extern double pspriteyscale; - extern FTexture *WallSpriteTile; extern int spriteshade; diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 0af6c04214..bd45565775 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -74,6 +74,17 @@ namespace swrenderer TArray vispsprites; unsigned int vispspindex; + + double pspritexscale; + double pspritexiscale; + double pspriteyscale; + } + + void R_SetupPlayerSpriteScale() + { + pspritexscale = centerxwide / 160.0; + pspriteyscale = pspritexscale * YaspectMul; + pspritexiscale = 1 / pspritexscale; } void R_DrawPlayerSprites() diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index 4569a54361..68e174654a 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -17,6 +17,8 @@ namespace swrenderer { + void R_SetupPlayerSpriteScale(); + void R_DrawPlayerSprites(); void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac); void R_DrawRemainingPlayerSprites(); From 65fa8074332f712fe78724062addd53eea19db38 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 14:46:57 +0100 Subject: [PATCH 659/912] Remove spriteshade global --- src/swrenderer/scene/r_things.cpp | 14 ++++++-------- src/swrenderer/scene/r_things.h | 2 -- src/swrenderer/things/r_playersprite.cpp | 6 +++--- src/swrenderer/things/r_playersprite.h | 2 +- src/swrenderer/things/r_wallsprite.cpp | 2 +- src/swrenderer/things/r_wallsprite.h | 2 +- 6 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index c6b7ef90f8..e3d4482444 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -88,8 +88,6 @@ namespace swrenderer fixed_t sky1scale; // [RH] Sky 1 scale factor fixed_t sky2scale; // [RH] Sky 2 scale factor -int spriteshade; - FTexture *WallSpriteTile; // @@ -198,7 +196,7 @@ bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) // R_ProjectSprite // Generates a vissprite for a thing if it might be visible. // -void R_ProjectSprite (AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector) +void R_ProjectSprite (AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade) { double tr_x; double tr_y; @@ -357,7 +355,7 @@ void R_ProjectSprite (AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor if ((renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) { - R_ProjectWallSprite(thing, pos, picnum, spriteScale, renderflags); + R_ProjectWallSprite(thing, pos, picnum, spriteScale, renderflags, spriteshade); return; } @@ -648,7 +646,7 @@ void R_AddSprites (sector_t *sec, int lightlevel, WaterFakeSide fakeside) // Well, now it will be done. sec->validcount = validcount; - spriteshade = LIGHT2SHADE(lightlevel + r_actualextralight); + int spriteshade = LIGHT2SHADE(lightlevel + r_actualextralight); // Handle all things in sector. for(auto p = sec->touching_renderthings; p != nullptr; p = p->m_snext) @@ -685,7 +683,7 @@ void R_AddSprites (sector_t *sec, int lightlevel, WaterFakeSide fakeside) if(rover->bottom.plane->ZatPoint(0., 0.) >= thing->Top()) fakeceiling = rover; } } - R_ProjectSprite (thing, fakeside, fakefloor, fakeceiling, sec); + R_ProjectSprite (thing, fakeside, fakefloor, fakeceiling, sec, spriteshade); fakeceiling = NULL; fakefloor = NULL; } @@ -1018,7 +1016,7 @@ void R_DrawSprite (vissprite_t *spr) } else { // diminished light - spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); + int spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); } @@ -1349,7 +1347,7 @@ void R_DrawMaskedSingle (bool renew) void R_DrawHeightPlanes(double height); // kg3D - fake planes -void R_DrawMasked (void) +void R_DrawMasked () { R_CollectPortals(); R_SortVisSprites (DrewAVoxel ? sv_compare2d : sv_compare, firstvissprite - vissprites); diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index ae7acce456..f870c8f22f 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -40,8 +40,6 @@ namespace swrenderer extern FTexture *WallSpriteTile; -extern int spriteshade; - bool R_ClipSpriteColumnWithPortals(vissprite_t* spr); diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index bd45565775..3755236525 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -147,7 +147,7 @@ namespace swrenderer // get light level lightnum = ((floorlight + ceilinglight) >> 1) + r_actualextralight; - spriteshade = LIGHT2SHADE(lightnum) - 24 * FRACUNIT; + int spriteshade = LIGHT2SHADE(lightnum) - 24 * FRACUNIT; if (camera->player != NULL) { @@ -190,7 +190,7 @@ namespace swrenderer if ((psp->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && psp->GetCaller() != nullptr) { - R_DrawPSprite(psp, camera, bobx, boby, wx, wy, r_TicFracF); + R_DrawPSprite(psp, camera, bobx, boby, wx, wy, r_TicFracF, spriteshade); } psp = psp->GetNext(); @@ -200,7 +200,7 @@ namespace swrenderer } } - void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac) + void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade) { double tx; int x1; diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index 68e174654a..a96b916661 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -20,6 +20,6 @@ namespace swrenderer void R_SetupPlayerSpriteScale(); void R_DrawPlayerSprites(); - void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac); + void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade); void R_DrawRemainingPlayerSprites(); } diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index e04bdc6cbb..674bd12187 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -58,7 +58,7 @@ namespace swrenderer { - void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags) + void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade) { FWallCoords wallc; double x1, x2; diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index 57b96db467..a3ed29170b 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -17,7 +17,7 @@ namespace swrenderer { - void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags); + void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade); void R_DrawWallSprite(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip); void R_WallSpriteColumn(int x, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); } From b9024f87a063931828a50f0579f5cab72bd939f5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 14:48:19 +0100 Subject: [PATCH 660/912] Remove unused globals --- src/swrenderer/scene/r_things.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index e3d4482444..623e3758de 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -79,15 +79,6 @@ namespace swrenderer { using namespace drawerargs; -// -// Sprite rotation 0 is facing the viewer, -// rotation 1 is one angle turn CLOCKWISE around the axis. -// This is not the same as the angle, -// which increases counter clockwise (protractor). -// -fixed_t sky1scale; // [RH] Sky 1 scale factor -fixed_t sky2scale; // [RH] Sky 2 scale factor - FTexture *WallSpriteTile; // From bb2806c9cab1e2294ddfe5bcc020eae217aa49bf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 14:51:34 +0100 Subject: [PATCH 661/912] Remove WallSpriteTile global --- src/swrenderer/scene/r_things.cpp | 11 ----------- src/swrenderer/scene/r_things.h | 2 -- src/swrenderer/things/r_decal.cpp | 6 +++--- src/swrenderer/things/r_decal.h | 2 +- src/swrenderer/things/r_wallsprite.cpp | 6 +++--- src/swrenderer/things/r_wallsprite.h | 2 +- 6 files changed, 8 insertions(+), 21 deletions(-) diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index 623e3758de..9e129af872 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -79,17 +79,6 @@ namespace swrenderer { using namespace drawerargs; -FTexture *WallSpriteTile; - -// -// INITIALIZATION FUNCTIONS -// - - -// - -// GAME FUNCTIONS -// bool DrewAVoxel; static vissprite_t **spritesorter; diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index f870c8f22f..6db9f3f4e1 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -38,8 +38,6 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { -extern FTexture *WallSpriteTile; - bool R_ClipSpriteColumnWithPortals(vissprite_t* spr); diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 043d6e57b6..1483e42120 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -115,7 +115,7 @@ namespace swrenderer } } - WallSpriteTile = TexMan(decal->PicNum, true); + FTexture *WallSpriteTile = TexMan(decal->PicNum, true); flipx = (BYTE)(decal->RenderFlags & RF_XFLIP); if (WallSpriteTile == NULL || WallSpriteTile->UseType == FTexture::TEX_Null) @@ -290,7 +290,7 @@ namespace swrenderer { // calculate lighting R_SetColorMapLight(usecolormap, light, wallshade); } - R_DecalColumn(x, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + R_DecalColumn(x, WallSpriteTile, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } @@ -311,7 +311,7 @@ namespace swrenderer WallC = savecoord; } - void R_DecalColumn(int x, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void R_DecalColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = swall[x] * maskedScaleY; double spryscale = 1 / iscale; diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 5399fb3707..461675a71d 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -22,5 +22,5 @@ namespace swrenderer void R_RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC); void R_RenderDecal(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, int pass); - void R_DecalColumn(int x, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + void R_DecalColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); } diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 674bd12187..fe1aabb64d 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -185,7 +185,7 @@ namespace swrenderer calclighting = true; // Draw it - WallSpriteTile = spr->pic; + FTexture *WallSpriteTile = spr->pic; if (spr->renderflags & RF_YFLIP) { sprflipvert = true; @@ -222,7 +222,7 @@ namespace swrenderer R_SetColorMapLight(usecolormap, light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(x, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + R_WallSpriteColumn(x, WallSpriteTile, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } @@ -230,7 +230,7 @@ namespace swrenderer R_FinishSetPatchStyle(); } - void R_WallSpriteColumn(int x, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void R_WallSpriteColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = swall[x] * maskedScaleY; double spryscale = 1 / iscale; diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index a3ed29170b..2fe78a68e2 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -19,5 +19,5 @@ namespace swrenderer { void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade); void R_DrawWallSprite(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip); - void R_WallSpriteColumn(int x, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + void R_WallSpriteColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); } From 4eac238d26e6ece0b187ac0bd83cd13171b7f60a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 14:52:50 +0100 Subject: [PATCH 662/912] Remove commented out code --- src/swrenderer/scene/r_things.cpp | 129 ------------------------------ 1 file changed, 129 deletions(-) diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index 9e129af872..4f2da52f3e 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -696,135 +696,6 @@ static bool sv_compare2d(vissprite_t *a, vissprite_t *b) DVector2(b->deltax, b->deltay).LengthSquared(); } -#if 0 -static drawseg_t **drawsegsorter; -static int drawsegsortersize = 0; - -// Sort vissprites by leftmost column, left to right -static int sv_comparex (const void *arg1, const void *arg2) -{ - return (*(vissprite_t **)arg2)->x1 - (*(vissprite_t **)arg1)->x1; -} - -// Sort drawsegs by rightmost column, left to right -static int sd_comparex (const void *arg1, const void *arg2) -{ - return (*(drawseg_t **)arg2)->x2 - (*(drawseg_t **)arg1)->x2; -} - -// Split up vissprites that intersect drawsegs -void R_SplitVisSprites () -{ - size_t start, stop; - size_t numdrawsegs = ds_p - firstdrawseg; - size_t numsprites; - size_t spr, dseg, dseg2; - - if (!r_splitsprites) - return; - - if (numdrawsegs == 0 || vissprite_p - firstvissprite == 0) - return; - - // Sort drawsegs from left to right - if (numdrawsegs > drawsegsortersize) - { - if (drawsegsorter != NULL) - delete[] drawsegsorter; - drawsegsortersize = numdrawsegs * 2; - drawsegsorter = new drawseg_t *[drawsegsortersize]; - } - for (dseg = dseg2 = 0; dseg < numdrawsegs; ++dseg) - { - // Drawsegs that don't clip any sprites don't need to be considered. - if (firstdrawseg[dseg].silhouette) - { - drawsegsorter[dseg2++] = &firstdrawseg[dseg]; - } - } - numdrawsegs = dseg2; - if (numdrawsegs == 0) - { - return; - } - qsort (drawsegsorter, numdrawsegs, sizeof(drawseg_t *), sd_comparex); - - // Now sort vissprites from left to right, and walk them simultaneously - // with the drawsegs, splitting any that intersect. - start = firstvissprite - vissprites; - - int p = 0; - do - { - p++; - R_SortVisSprites (sv_comparex, start); - stop = vissprite_p - vissprites; - numsprites = stop - start; - - spr = dseg = 0; - do - { - vissprite_t *vis = spritesorter[spr], *vis2; - - // Skip drawsegs until we get to one that doesn't end before the sprite - // begins. - while (dseg < numdrawsegs && drawsegsorter[dseg]->x2 <= vis->x1) - { - dseg++; - } - // Now split the sprite against any drawsegs it intersects - for (dseg2 = dseg; dseg2 < numdrawsegs; dseg2++) - { - drawseg_t *ds = drawsegsorter[dseg2]; - - if (ds->x1 > vis->x2 || ds->x2 < vis->x1) - continue; - - if ((vis->idepth < ds->siz1) != (vis->idepth < ds->siz2)) - { // The drawseg is crossed; find the x where the intersection occurs - int cross = Scale (vis->idepth - ds->siz1, ds->sx2 - ds->sx1, ds->siz2 - ds->siz1) + ds->sx1 + 1; - -/* if (cross < ds->x1 || cross > ds->x2) - { // The original seg is crossed, but the drawseg is not - continue; - } -*/ if (cross <= vis->x1 || cross >= vis->x2) - { // Don't create 0-sized sprites - continue; - } - - vis->bSplitSprite = true; - - // Create a new vissprite for the right part of the sprite - vis2 = R_NewVisSprite (); - *vis2 = *vis; - vis2->startfrac += vis2->xiscale * (cross - vis2->x1); - vis->x2 = cross-1; - vis2->x1 = cross; - //vis2->alpha /= 2; - //vis2->RenderStyle = STYLE_Add; - - if (vis->idepth < ds->siz1) - { // Left is in back, right is in front - vis->sector = ds->curline->backsector; - vis2->sector = ds->curline->frontsector; - } - else - { // Right is in front, left is in back - vis->sector = ds->curline->frontsector; - vis2->sector = ds->curline->backsector; - } - } - } - } - while (dseg < numdrawsegs && ++spr < numsprites); - - // Repeat for any new sprites that were added. - } - while (start = stop, stop != vissprite_p - vissprites); -} -#endif - #ifdef __GNUC__ static void swap(vissprite_t *&a, vissprite_t *&b) { From 4c67a717f872a78c284b7c8c1aa1c883024f9d0d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 14:56:29 +0100 Subject: [PATCH 663/912] Remove drawerargs from r_things --- src/swrenderer/scene/r_things.cpp | 5 ++--- src/swrenderer/scene/r_things.h | 2 +- src/swrenderer/things/r_particle.cpp | 6 ++---- src/swrenderer/things/r_sprite.cpp | 2 +- src/swrenderer/things/r_wallsprite.cpp | 2 +- 5 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index 4f2da52f3e..569e9be10b 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -77,7 +77,6 @@ CVAR(Bool, r_splitsprites, true, CVAR_ARCHIVE) namespace swrenderer { - using namespace drawerargs; bool DrewAVoxel; @@ -144,7 +143,7 @@ static inline void R_CollectPortals() } } -bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) +bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr) { RenderPortal *renderportal = RenderPortal::Instance(); @@ -165,7 +164,7 @@ bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) continue; // now if current column is covered by this drawseg, we clip it away - if ((dc_x >= seg->x1) && (dc_x < seg->x2)) + if ((x >= seg->x1) && (x < seg->x2)) return true; } diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index 6db9f3f4e1..04685d8bbf 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -38,7 +38,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { -bool R_ClipSpriteColumnWithPortals(vissprite_t* spr); +bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr); void R_CacheSprite (spritedef_t *sprite); diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index c6dd2d5df1..4daf1d799f 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -252,8 +252,7 @@ namespace swrenderer { for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) + if (R_ClipSpriteColumnWithPortals(x, vis)) continue; uint32_t *dest = ylookup[yl] + x + (uint32_t*)dc_destorg; DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); @@ -263,8 +262,7 @@ namespace swrenderer { for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) + if (R_ClipSpriteColumnWithPortals(x, vis)) continue; uint8_t *dest = ylookup[yl] + x + dc_destorg; DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 0a87d45e40..c998b35755 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -116,7 +116,7 @@ namespace swrenderer { while (x < x2) { - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) + if (ispsprite || !R_ClipSpriteColumnWithPortals(x, vis)) R_DrawMaskedColumn(x, iscale, tex, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); x++; frac += xiscale; diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index fe1aabb64d..d3ba5fee9e 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -221,7 +221,7 @@ namespace swrenderer { // calculate lighting R_SetColorMapLight(usecolormap, light, shade); } - if (!R_ClipSpriteColumnWithPortals(spr)) + if (!R_ClipSpriteColumnWithPortals(x, spr)) R_WallSpriteColumn(x, WallSpriteTile, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; From 6c04439315e611d63a6190601d43981d4ae231c6 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 9 Jan 2017 10:16:24 -0500 Subject: [PATCH 664/912] - Fixed compile errors with last merge. --- src/polyrenderer/scene/poly_sky.cpp | 1 + src/swrenderer/plane/r_skyplane.cpp | 3 ++- src/swrenderer/scene/r_bsp.cpp | 1 + src/swrenderer/scene/r_things.cpp | 1 + src/swrenderer/things/r_playersprite.cpp | 1 + 5 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/polyrenderer/scene/poly_sky.cpp b/src/polyrenderer/scene/poly_sky.cpp index 8c84901b86..82be3f2bc0 100644 --- a/src/polyrenderer/scene/poly_sky.cpp +++ b/src/polyrenderer/scene/poly_sky.cpp @@ -27,6 +27,7 @@ #include "poly_sky.h" #include "poly_portal.h" #include "r_sky.h" // for skyflatnum +#include "g_levellocals.h" PolySkyDome::PolySkyDome() { diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 727a04e511..dd8c2ca044 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -41,6 +41,7 @@ #include "swrenderer/line/r_walldraw.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/r_memory.h" +#include "g_levellocals.h" CVAR(Bool, r_linearsky, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); EXTERN_CVAR(Int, r_skymode) @@ -115,7 +116,7 @@ namespace swrenderer else { // MBF's linedef-controlled skies // Sky Linedef - const line_t *l = &lines[(pl->sky & ~PL_SKYFLAT) - 1]; + const line_t *l = &level.lines[(pl->sky & ~PL_SKYFLAT) - 1]; // Sky transferred from first sidedef const side_t *s = l->sidedef[0]; diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 1e390b313b..3a910f01b1 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -54,6 +54,7 @@ #include "r_sky.h" #include "po_man.h" #include "r_data/colormaps.h" +#include "g_levellocals.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index 569e9be10b..af503244dc 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -67,6 +67,7 @@ #include "swrenderer/things/r_wallsprite.h" #include "swrenderer/things/r_sprite.h" #include "swrenderer/r_memory.h" +#include "g_levellocals.h" EXTERN_CVAR(Int, r_drawfuzz) EXTERN_CVAR(Bool, r_drawvoxels) diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 3755236525..d37320c39b 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -54,6 +54,7 @@ #include "swrenderer/scene/r_portal.h" #include "swrenderer/things/r_sprite.h" #include "swrenderer/r_memory.h" +#include "g_levellocals.h" EXTERN_CVAR(Bool, st_scale) EXTERN_CVAR(Bool, r_drawplayersprites) From 079f3bd78cbc7db05fc62356360cb72b19b12ecb Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Mon, 9 Jan 2017 17:26:19 +0200 Subject: [PATCH 665/912] Fixed compilation errors with GCC/Clang No more 'error: cannot jump from this goto statement to its label' --- src/swrenderer/segments/r_drawsegment.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index fbbff4ec3f..ca875c6317 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -194,7 +194,7 @@ namespace swrenderer short *mfloorclip = openings + ds->sprbottomclip - ds->x1; short *mceilingclip = openings + ds->sprtopclip - ds->x1; - + double spryscale; // [RH] Draw fog partition if (ds->bFogBoundary) @@ -213,7 +213,7 @@ namespace swrenderer MaskedSWall = (float *)(openings + ds->swall) - ds->x1; MaskedScaleY = ds->yscale; maskedtexturecol = (fixed_t *)(openings + ds->maskedtexturecol) - ds->x1; - double spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); + spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) From 8353c8850674dd4221d5be37dd4ce6abbe36279b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 18:55:21 +0100 Subject: [PATCH 666/912] ARM compile fixes --- tools/drawergen/ssa/ssa_vec16ub.cpp | 4 ++-- tools/drawergen/ssa/ssa_vec8s.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/drawergen/ssa/ssa_vec16ub.cpp b/tools/drawergen/ssa/ssa_vec16ub.cpp index cdf1e465cb..deb7635fd9 100644 --- a/tools/drawergen/ssa/ssa_vec16ub.cpp +++ b/tools/drawergen/ssa/ssa_vec16ub.cpp @@ -73,8 +73,8 @@ SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1) : v(0) { #ifdef ARM_TARGET - llvm::Value *int8x8_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu, s0.v, SSAScope::hint()); - llvm::Value *int8x8_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu, s1.v, SSAScope::hint()); + llvm::Value *int8x8_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s0.v, SSAScope::hint()); + llvm::Value *int8x8_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s1.v, SSAScope::hint()); return shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #else llvm::Value *values[2] = { s0.v, s1.v }; diff --git a/tools/drawergen/ssa/ssa_vec8s.cpp b/tools/drawergen/ssa/ssa_vec8s.cpp index f78c1dd250..4664615d28 100644 --- a/tools/drawergen/ssa/ssa_vec8s.cpp +++ b/tools/drawergen/ssa/ssa_vec8s.cpp @@ -63,8 +63,8 @@ SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1) : v(0) { #ifdef ARM_TARGET - llvm::Value *int16x4_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns, i0.v, SSAScope::hint()); - llvm::Value *int16x4_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns, i1.v, SSAScope::hint()); + llvm::Value *int16x4_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i0.v, SSAScope::hint()); + llvm::Value *int16x4_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i1.v, SSAScope::hint()); return shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7); #else llvm::Value *values[2] = { i0.v, i1.v }; From 8d3b056221f04aae1ffdbc094d6c6a287a5c7f2d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 20:46:30 +0100 Subject: [PATCH 667/912] Move ARM fixes --- tools/drawergen/ssa/ssa_vec16ub.cpp | 2 +- tools/drawergen/ssa/ssa_vec8s.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/drawergen/ssa/ssa_vec16ub.cpp b/tools/drawergen/ssa/ssa_vec16ub.cpp index deb7635fd9..10a0a9d187 100644 --- a/tools/drawergen/ssa/ssa_vec16ub.cpp +++ b/tools/drawergen/ssa/ssa_vec16ub.cpp @@ -75,7 +75,7 @@ SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1) #ifdef ARM_TARGET llvm::Value *int8x8_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s0.v, SSAScope::hint()); llvm::Value *int8x8_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s1.v, SSAScope::hint()); - return shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + v = shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).v; #else llvm::Value *values[2] = { s0.v, s1.v }; v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint()); diff --git a/tools/drawergen/ssa/ssa_vec8s.cpp b/tools/drawergen/ssa/ssa_vec8s.cpp index 4664615d28..80d8817d1e 100644 --- a/tools/drawergen/ssa/ssa_vec8s.cpp +++ b/tools/drawergen/ssa/ssa_vec8s.cpp @@ -65,7 +65,7 @@ SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1) #ifdef ARM_TARGET llvm::Value *int16x4_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i0.v, SSAScope::hint()); llvm::Value *int16x4_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i1.v, SSAScope::hint()); - return shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7); + v = shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7).v; #else llvm::Value *values[2] = { i0.v, i1.v }; v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint()); From 7c04fa6e1bf59162527af12eb0bd09857ea7f037 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 9 Jan 2017 22:30:52 +0100 Subject: [PATCH 668/912] Link with arm libraries on unix systems --- tools/drawergen/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/drawergen/CMakeLists.txt b/tools/drawergen/CMakeLists.txt index 6a5804e8a1..43caf016a5 100644 --- a/tools/drawergen/CMakeLists.txt +++ b/tools/drawergen/CMakeLists.txt @@ -27,7 +27,8 @@ if( NOT WIN32 ) set( LLVM_COMPONENTS core support asmparser asmprinter bitreader bitwriter codegen ipo irreader transformutils instrumentation profiledata runtimedyld object instcombine linker analysis selectiondag scalaropts vectorize executionengine - mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) + mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen + armasmprinter arminfo armdesc armutils armcodegen ) # Example LLVM_DIR folder: C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm find_package(LLVM REQUIRED CONFIG) From 3487be2c40f2f97e3ae5d3108b0b229a97c6ef77 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 10 Jan 2017 12:51:35 +0100 Subject: [PATCH 669/912] Fix drawergen linking on rpi --- tools/drawergen/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/drawergen/CMakeLists.txt b/tools/drawergen/CMakeLists.txt index 43caf016a5..215125ec2c 100644 --- a/tools/drawergen/CMakeLists.txt +++ b/tools/drawergen/CMakeLists.txt @@ -28,7 +28,7 @@ if( NOT WIN32 ) irreader transformutils instrumentation profiledata runtimedyld object instcombine linker analysis selectiondag scalaropts vectorize executionengine mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen - armasmprinter arminfo armdesc armutils armcodegen ) + armasmprinter arminfo armdesc armcodegen ) # Example LLVM_DIR folder: C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm find_package(LLVM REQUIRED CONFIG) @@ -37,6 +37,7 @@ if( NOT WIN32 ) llvm_map_components_to_libnames( llvm_libs ${LLVM_COMPONENTS} ) include_directories( ${LLVM_INCLUDE_DIRS} ) set( DRAWERGEN_LIBS ${DRAWERGEN_LIBS} ${llvm_libs} ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti" ) else() set( LLVM_COMPONENTS core support asmparser asmprinter bitreader bitwriter codegen passes ipo irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld From 2f64bfa5af241906d6db1ff04ca2a8257f1662c6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 10 Jan 2017 13:11:12 +0100 Subject: [PATCH 670/912] Drop using intrinsics on ARM until after they actually got gdb to run on it. It is all a waste of time anyhow, because when my computer is literally a factor 100 times faster at building, what frame rate can one really expect? What should it run at? 320x200? --- tools/drawergen/drawergen.cpp | 2 +- tools/drawergen/ssa/ssa_float.cpp | 3 ++- tools/drawergen/ssa/ssa_vec16ub.cpp | 6 ++++++ tools/drawergen/ssa/ssa_vec8s.cpp | 6 ++++++ 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp index e753d61971..dcb039a404 100644 --- a/tools/drawergen/drawergen.cpp +++ b/tools/drawergen/drawergen.cpp @@ -89,7 +89,7 @@ int main(int argc, char **argv) std::cout << "Target triple is " << triple << std::endl; #ifdef __arm__ - std::string cpuName = "armv8"; + std::string cpuName = llvm::sys::getHostCPUName(); // "armv8"; #else std::string cpuName = "pentium4"; #endif diff --git a/tools/drawergen/ssa/ssa_float.cpp b/tools/drawergen/ssa/ssa_float.cpp index 19a6fcd0a3..6c597dc1c4 100644 --- a/tools/drawergen/ssa/ssa_float.cpp +++ b/tools/drawergen/ssa/ssa_float.cpp @@ -57,7 +57,8 @@ llvm::Type *SSAFloat::llvm_type() SSAFloat SSAFloat::rsqrt(SSAFloat f) { #ifdef ARM_TARGET - return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::aarch64_neon_frsqrts), f.v, SSAScope::hint())); + //return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::aarch64_neon_frsqrts), f.v, SSAScope::hint())); + return SSAFloat(1.0f) / (f * SSAFloat(0.01f)); #else llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint()); diff --git a/tools/drawergen/ssa/ssa_vec16ub.cpp b/tools/drawergen/ssa/ssa_vec16ub.cpp index 10a0a9d187..e64fe8e851 100644 --- a/tools/drawergen/ssa/ssa_vec16ub.cpp +++ b/tools/drawergen/ssa/ssa_vec16ub.cpp @@ -73,9 +73,15 @@ SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1) : v(0) { #ifdef ARM_TARGET + /* llvm::Value *int8x8_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s0.v, SSAScope::hint()); llvm::Value *int8x8_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s1.v, SSAScope::hint()); v = shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).v; + */ + // To do: add some clamping here + llvm::Value *int8x8_i0 = SSAScope::builder().CreateTrunc(s0.v, llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 8)); + llvm::Value *int8x8_i1 = SSAScope::builder().CreateTrunc(s1.v, llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 8)); + v = shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).v; #else llvm::Value *values[2] = { s0.v, s1.v }; v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint()); diff --git a/tools/drawergen/ssa/ssa_vec8s.cpp b/tools/drawergen/ssa/ssa_vec8s.cpp index 80d8817d1e..795194ca5b 100644 --- a/tools/drawergen/ssa/ssa_vec8s.cpp +++ b/tools/drawergen/ssa/ssa_vec8s.cpp @@ -63,9 +63,15 @@ SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1) : v(0) { #ifdef ARM_TARGET + /* llvm::Value *int16x4_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i0.v, SSAScope::hint()); llvm::Value *int16x4_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i1.v, SSAScope::hint()); v = shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7).v; + */ + // To do: add some clamping here + llvm::Value *int16x4_i0 = SSAScope::builder().CreateTrunc(i0.v, llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 4)); + llvm::Value *int16x4_i1 = SSAScope::builder().CreateTrunc(i1.v, llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 4)); + v = shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7).v; #else llvm::Value *values[2] = { i0.v, i1.v }; v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint()); From c4573fa3439c15d48f9950bf983ea372be2c6ab6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 10 Jan 2017 13:39:57 +0100 Subject: [PATCH 671/912] Disable SSE on arm --- src/swrenderer/drawers/r_draw_rgba.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 0e43abbb51..ae49578796 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -27,6 +27,10 @@ #include "r_thread.h" #include "r_drawers.h" +#ifdef __arm__ +#define NO_SSE +#endif + #ifndef NO_SSE #include #endif From 8fc6660a4aeb5a42e6d282bb43073c5ae4a1826b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 10 Jan 2017 13:45:14 +0100 Subject: [PATCH 672/912] Disable SSE stuff --- src/swrenderer/drawers/r_draw_pal.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index c0625e3fc3..42e62ac237 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -33,7 +33,13 @@ ** */ +#ifdef __arm__ +#define NO_SSE +#endif + +#ifndef NO_SSE #include +#endif #include "templates.h" #include "doomtype.h" #include "doomdef.h" @@ -132,7 +138,11 @@ namespace swrenderer float Lxy2 = lights[i].x; // L.x*L.x + L.y*L.y float Lz = lights[i].z - viewpos_z; float dist2 = Lxy2 + Lz * Lz; +#ifdef NO_SSE + float rcp_dist = 1.0f / (dist2 * 0.01f); +#else float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_load_ss(&dist2))); +#endif float dist = dist2 * rcp_dist; float distance_attenuation = (256.0f - MIN(dist * lights[i].radius, 256.0f)); @@ -1742,7 +1752,11 @@ namespace swrenderer float Lyz2 = lights[i].y; // L.y*L.y + L.z*L.z float Lx = lights[i].x - viewpos_x; float dist2 = Lyz2 + Lx * Lx; +#ifdef NO_SSE + float rcp_dist = 1.0f / (dist2 * 0.01f); +#else float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_load_ss(&dist2))); +#endif float dist = dist2 * rcp_dist; float distance_attenuation = (256.0f - MIN(dist * lights[i].radius, 256.0f)); From 751bd120ac665ee75a5657f1a418aad9ef1ccd63 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 10 Jan 2017 15:14:55 +0100 Subject: [PATCH 673/912] Fix warning about potential uninitialized lightfiller reported by gcc --- src/swrenderer/drawers/r_draw_pal.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 42e62ac237..6cc4568637 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -2635,7 +2635,7 @@ namespace swrenderer const uint8_t **tiltlighting = thread->tiltlighting; double lstep; - uint8_t *lightfiller; + uint8_t *lightfiller = nullptr; int i = 0; if (width == 0 || lval == lend) @@ -2720,7 +2720,7 @@ namespace swrenderer } } } - for (; i <= width; i++) + for (; i < width; i++) { tiltlighting[i] = lightfiller; } From ece8e95853429f220016f9b5cdfefb7f0c344d38 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 10 Jan 2017 15:16:55 +0100 Subject: [PATCH 674/912] Revert range adjustment (can't make out if this is wrong or not - if it is, it is wrong across the entire function) --- src/swrenderer/drawers/r_draw_pal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 6cc4568637..a55705cb7d 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -2720,7 +2720,7 @@ namespace swrenderer } } } - for (; i < width; i++) + for (; i <= width; i++) { tiltlighting[i] = lightfiller; } From 4be291fb617ec8ae361e22eeb2d74ae0ab9bb6be Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 10 Jan 2017 18:25:06 +0100 Subject: [PATCH 675/912] Fix divide by zero (issue 0000022) --- src/swrenderer/drawers/r_draw.cpp | 1 + tools/drawergen/fixedfunction/drawcolumncodegen.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 5c281f8c8b..a7d40e63c3 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -647,6 +647,7 @@ namespace swrenderer dc_x = x; dc_iscale = iscale; + dc_textureheight = tex->GetHeight(); const FTexture::Span *span; const BYTE *column; diff --git a/tools/drawergen/fixedfunction/drawcolumncodegen.cpp b/tools/drawergen/fixedfunction/drawcolumncodegen.cpp index 6e00528ae4..1a4f806bf2 100644 --- a/tools/drawergen/fixedfunction/drawcolumncodegen.cpp +++ b/tools/drawergen/fixedfunction/drawcolumncodegen.cpp @@ -81,7 +81,6 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAVa stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); iscale = iscale * thread.num_cores; - one = (1 << 30) / textureheight; SSAIfBlock branch; branch.if_block(is_simple_shade); @@ -97,6 +96,7 @@ void DrawColumnCodegen::LoopShade(DrawColumnVariant variant, bool isSimpleShade) branch.if_block(is_nearest_filter); Loop(variant, isSimpleShade, true); branch.else_block(); + one = (1 << 30) / textureheight; stack_frac.store(stack_frac.load() - (one >> 1)); Loop(variant, isSimpleShade, false); branch.end_block(); From 361bb11b131a761b797c6bcb09909caae7be271b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 10 Jan 2017 20:07:51 +0100 Subject: [PATCH 676/912] Remove -28 offset to fix issue 0000012 --- src/swrenderer/plane/r_skyplane.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index dd8c2ca044..0079d5af5c 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -147,7 +147,7 @@ namespace swrenderer skyangle += FLOAT2FIXED(s->GetTextureXOffset(pos)); // Vertical offset allows careful sky positioning. - skymid = s->GetTextureYOffset(pos) - 28; + skymid = s->GetTextureYOffset(pos); // We sometimes flip the picture horizontally. // From 72762e583f295c87f762faa61fdf524b598002a0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 15:02:36 +0100 Subject: [PATCH 677/912] Move AddSprites to r_bsp and R_ProjectSprite to r_sprite --- src/swrenderer/scene/r_bsp.cpp | 119 +++++-- src/swrenderer/scene/r_bsp.h | 2 + src/swrenderer/scene/r_things.cpp | 507 +---------------------------- src/swrenderer/scene/r_things.h | 2 +- src/swrenderer/things/r_sprite.cpp | 431 ++++++++++++++++++++++++ src/swrenderer/things/r_sprite.h | 1 + 6 files changed, 525 insertions(+), 537 deletions(-) diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 3a910f01b1..bd18ab41ef 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -35,6 +35,7 @@ #include "swrenderer/r_main.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/things/r_sprite.h" #include "swrenderer/things/r_particle.h" #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/line/r_wallsetup.h" @@ -76,12 +77,12 @@ namespace swrenderer // Similar for ceiling, only reflected. // [RH] allow per-plane lighting - if (floorlightlevel != NULL) + if (floorlightlevel != nullptr) { *floorlightlevel = sec->GetFloorLight(); } - if (ceilinglightlevel != NULL) + if (ceilinglightlevel != nullptr) { *ceilinglightlevel = sec->GetCeilingLight(); } @@ -89,7 +90,7 @@ namespace swrenderer FakeSide = WaterFakeSide::Center; const sector_t *s = sec->GetHeightSec(); - if (s != NULL) + if (s != nullptr) { sector_t *heightsec = viewsector->heightsec; bool underwater = r_fakingunderwater || @@ -116,12 +117,12 @@ namespace swrenderer { tempsec->lightlevel = s->lightlevel; - if (floorlightlevel != NULL) + if (floorlightlevel != nullptr) { *floorlightlevel = s->GetFloorLight(); } - if (ceilinglightlevel != NULL) + if (ceilinglightlevel != nullptr) { *ceilinglightlevel = s->GetCeilingLight(); } @@ -160,7 +161,7 @@ namespace swrenderer // are underwater but not in a water sector themselves. // Only works if you cannot see the top surface of any deep water // sectors at the same time. - if (backline && !r_fakingunderwater && backline->frontsector->heightsec == NULL) + if (backline && !r_fakingunderwater && backline->frontsector->heightsec == nullptr) { if (frontcz1 <= s->floorplane.ZatPoint(backline->v1) && frontcz2 <= s->floorplane.ZatPoint(backline->v2)) @@ -215,12 +216,12 @@ namespace swrenderer { tempsec->lightlevel = s->lightlevel; - if (floorlightlevel != NULL) + if (floorlightlevel != nullptr) { *floorlightlevel = s->GetFloorLight(); } - if (ceilinglightlevel != NULL) + if (ceilinglightlevel != nullptr) { *ceilinglightlevel = s->GetCeilingLight(); } @@ -252,12 +253,12 @@ namespace swrenderer { tempsec->lightlevel = s->lightlevel; - if (floorlightlevel != NULL) + if (floorlightlevel != nullptr) { *floorlightlevel = s->GetFloorLight(); } - if (ceilinglightlevel != NULL) + if (ceilinglightlevel != nullptr) { *ceilinglightlevel = s->GetCeilingLight(); } @@ -376,7 +377,7 @@ namespace swrenderer void RenderBSP::AddPolyobjs(subsector_t *sub) { - if (sub->BSP == NULL || sub->BSP->bDirty) + if (sub->BSP == nullptr || sub->BSP->bDirty) { sub->BuildPolyBSP(); } @@ -430,8 +431,8 @@ namespace swrenderer //secplane_t templane; lightlist_t *light; - if (InSubsector != NULL) - { // InSubsector is not NULL. This means we are rendering from a mini-BSP. + if (InSubsector != nullptr) + { // InSubsector is not nullptr. This means we are rendering from a mini-BSP. outersubsector = false; } else @@ -445,14 +446,14 @@ namespace swrenderer I_Error("RenderSubsector: ss %ti with numss = %i", sub - subsectors, numsubsectors); #endif - assert(sub->sector != NULL); + assert(sub->sector != nullptr); if (sub->polys) { // Render the polyobjs in the subsector first AddPolyobjs(sub); if (outersubsector) { - InSubsector = NULL; + InSubsector = nullptr; } return; } @@ -493,7 +494,7 @@ namespace swrenderer visplane_t *ceilingplane = frontsector->ceilingplane.PointOnSide(ViewPos) > 0 || frontsector->GetTexture(sector_t::ceiling) == skyflatnum || - portal != NULL || + portal != nullptr || (frontsector->heightsec && !(frontsector->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC) && frontsector->heightsec->GetTexture(sector_t::floor) == skyflatnum) ? @@ -505,7 +506,7 @@ namespace swrenderer frontsector->planes[sector_t::ceiling].xform, frontsector->sky, portal - ) : NULL; + ) : nullptr; if (ceilingplane) R_AddPlaneLights(ceilingplane, frontsector->lighthead); @@ -533,7 +534,7 @@ namespace swrenderer visplane_t *floorplane = frontsector->floorplane.PointOnSide(ViewPos) > 0 || // killough 3/7/98 frontsector->GetTexture(sector_t::floor) == skyflatnum || - portal != NULL || + portal != nullptr || (frontsector->heightsec && !(frontsector->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC) && frontsector->heightsec->GetTexture(sector_t::ceiling) == skyflatnum) ? @@ -545,7 +546,7 @@ namespace swrenderer frontsector->planes[sector_t::floor].xform, frontsector->sky, portal - ) : NULL; + ) : nullptr; if (floorplane) R_AddPlaneLights(floorplane, frontsector->lighthead); @@ -601,7 +602,7 @@ namespace swrenderer floorlightlevel = *light->p_lightlevel; } - ceilingplane = NULL; + ceilingplane = nullptr; floorplane = R_FindPlane(frontsector->floorplane, frontsector->GetTexture(sector_t::floor), floorlightlevel + r_actualextralight, // killough 3/16/98 @@ -609,7 +610,7 @@ namespace swrenderer !!(clip3d->fakeFloor->flags & FF_ADDITIVETRANS), frontsector->planes[position].xform, frontsector->sky, - NULL); + nullptr); if (floorplane) R_AddPlaneLights(floorplane, frontsector->lighthead); @@ -666,7 +667,7 @@ namespace swrenderer } tempsec.ceilingplane.ChangeHeight(1 / 65536.); - floorplane = NULL; + floorplane = nullptr; ceilingplane = R_FindPlane(frontsector->ceilingplane, // killough 3/8/98 frontsector->GetTexture(sector_t::ceiling), ceilinglightlevel + r_actualextralight, // killough 4/11/98 @@ -674,7 +675,7 @@ namespace swrenderer !!(clip3d->fakeFloor->flags & FF_ADDITIVETRANS), frontsector->planes[position].xform, frontsector->sky, - NULL); + nullptr); if (ceilingplane) R_AddPlaneLights(ceilingplane, frontsector->lighthead); @@ -684,7 +685,7 @@ namespace swrenderer frontsector = sub->sector; } } - clip3d->fakeFloor = NULL; + clip3d->fakeFloor = nullptr; floorplane = backupfp; ceilingplane = backupcp; } @@ -698,7 +699,7 @@ namespace swrenderer // lightlevels on floor & ceiling lightlevels in the surrounding area. // [RH] Handle sprite lighting like Duke 3D: If the ceiling is a sky, sprites are lit by // it, otherwise they are lit by the floor. - R_AddSprites(sub->sector, frontsector->GetTexture(sector_t::ceiling) == skyflatnum ? ceilinglightlevel : floorlightlevel, FakeSide); + AddSprites(sub->sector, frontsector->GetTexture(sector_t::ceiling) == skyflatnum ? ceilinglightlevel : floorlightlevel, FakeSide); // [RH] Add particles if ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors) @@ -715,15 +716,15 @@ namespace swrenderer while (count--) { - if (!outersubsector || line->sidedef == NULL || !(line->sidedef->Flags & WALLF_POLYOBJ)) + if (!outersubsector || line->sidedef == nullptr || !(line->sidedef->Flags & WALLF_POLYOBJ)) { // kg3D - fake planes bounding calculation if (r_3dfloors && line->backsector && frontsector->e && line->backsector->e->XFloor.ffloors.Size()) { backupfp = floorplane; backupcp = ceilingplane; - floorplane = NULL; - ceilingplane = NULL; + floorplane = nullptr; + ceilingplane = nullptr; Clip3DFloors *clip3d = Clip3DFloors::Instance(); for (unsigned int i = 0; i < line->backsector->e->XFloor.ffloors.Size(); i++) { @@ -742,7 +743,7 @@ namespace swrenderer } renderline.Render(line, InSubsector, frontsector, &tempsec, floorplane, ceilingplane); // fake } - clip3d->fakeFloor = NULL; + clip3d->fakeFloor = nullptr; clip3d->fake3D = 0; floorplane = backupfp; ceilingplane = backupcp; @@ -753,7 +754,7 @@ namespace swrenderer } if (outersubsector) { - InSubsector = NULL; + InSubsector = nullptr; } } @@ -802,4 +803,62 @@ namespace swrenderer fillshort(floorclip, viewwidth, viewheight); fillshort(ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); } + + void RenderBSP::AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside) + { + F3DFloor *fakeceiling = nullptr; + F3DFloor *fakefloor = nullptr; + + // BSP is traversed by subsector. + // A sector might have been split into several + // subsectors during BSP building. + // Thus we check whether it was already added. + if (sec->touching_renderthings == nullptr || sec->validcount == validcount) + return; + + // Well, now it will be done. + sec->validcount = validcount; + + int spriteshade = LIGHT2SHADE(lightlevel + r_actualextralight); + + // Handle all things in sector. + for (auto p = sec->touching_renderthings; p != nullptr; p = p->m_snext) + { + auto thing = p->m_thing; + if (thing->validcount == validcount) continue; + thing->validcount = validcount; + + FIntCVar *cvar = thing->GetClass()->distancecheck; + if (cvar != nullptr && *cvar >= 0) + { + double dist = (thing->Pos() - ViewPos).LengthSquared(); + double check = (double)**cvar; + if (dist >= check * check) + { + continue; + } + } + + // find fake level + for (auto rover : thing->Sector->e->XFloor.ffloors) + { + if (!(rover->flags & FF_EXISTS) || !(rover->flags & FF_RENDERPLANES)) continue; + if (!(rover->flags & FF_SOLID) || rover->alpha != 255) continue; + if (!fakefloor) + { + if (!rover->top.plane->isSlope()) + { + if (rover->top.plane->ZatPoint(0., 0.) <= thing->Z()) fakefloor = rover; + } + } + if (!rover->bottom.plane->isSlope()) + { + if (rover->bottom.plane->ZatPoint(0., 0.) >= thing->Top()) fakeceiling = rover; + } + } + R_ProjectSprite(thing, fakeside, fakefloor, fakeceiling, sec, spriteshade); + fakeceiling = nullptr; + fakefloor = nullptr; + } + } } diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index 4a47da5935..e673baff6b 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -57,6 +57,8 @@ namespace swrenderer void AddPolyobjs(subsector_t *sub); void FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane); + void AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside); + subsector_t *InSubsector = nullptr; sector_t *frontsector = nullptr; WaterFakeSide FakeSide = WaterFakeSide::Center; diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp index af503244dc..84dfc44ec7 100644 --- a/src/swrenderer/scene/r_things.cpp +++ b/src/swrenderer/scene/r_things.cpp @@ -66,6 +66,7 @@ #include "swrenderer/things/r_playersprite.h" #include "swrenderer/things/r_wallsprite.h" #include "swrenderer/things/r_sprite.h" +#include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/r_memory.h" #include "g_levellocals.h" @@ -172,503 +173,6 @@ bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr) return false; } -// -// R_ProjectSprite -// Generates a vissprite for a thing if it might be visible. -// -void R_ProjectSprite (AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade) -{ - double tr_x; - double tr_y; - - double gzt; // killough 3/27/98 - double gzb; // [RH] use bottom of sprite, not actor - double tx;// , tx2; - double tz; - - double xscale = 1, yscale = 1; - - int x1; - int x2; - - FTextureID picnum; - FTexture *tex; - FVoxelDef *voxel; - - vissprite_t* vis; - - fixed_t iscale; - - sector_t* heightsec; // killough 3/27/98 - - // Don't waste time projecting sprites that are definitely not visible. - if (thing == NULL || - (thing->renderflags & RF_INVISIBLE) || - !thing->RenderStyle.IsVisible(thing->Alpha) || - !thing->IsVisibleToPlayer() || - !thing->IsInsideVisibleAngles()) - { - return; - } - - RenderPortal *renderportal = RenderPortal::Instance(); - - // [ZZ] Or less definitely not visible (hue) - // [ZZ] 10.01.2016: don't try to clip stuff inside a skybox against the current portal. - if (!renderportal->CurrentPortalInSkybox && renderportal->CurrentPortal && !!P_PointOnLineSidePrecise(thing->Pos(), renderportal->CurrentPortal->dst)) - return; - - // [RH] Interpolate the sprite's position to make it look smooth - DVector3 pos = thing->InterpolatedPosition(r_TicFracF); - pos.Z += thing->GetBobOffset(r_TicFracF); - - tex = NULL; - voxel = NULL; - - int spritenum = thing->sprite; - DVector2 spriteScale = thing->Scale; - int renderflags = thing->renderflags; - if (spriteScale.Y < 0) - { - spriteScale.Y = -spriteScale.Y; - renderflags ^= RF_YFLIP; - } - if (thing->player != NULL) - { - P_CheckPlayerSprite(thing, spritenum, spriteScale); - } - - if (thing->picnum.isValid()) - { - picnum = thing->picnum; - - tex = TexMan(picnum); - if (tex->UseType == FTexture::TEX_Null) - { - return; - } - - if (tex->Rotations != 0xFFFF) - { - // choose a different rotation based on player view - spriteframe_t *sprframe = &SpriteFrames[tex->Rotations]; - DAngle ang = (pos - ViewPos).Angle(); - angle_t rot; - if (sprframe->Texture[0] == sprframe->Texture[1]) - { - if (thing->flags7 & MF7_SPRITEANGLE) - rot = (thing->SpriteAngle + 45.0 / 2 * 9).BAMs() >> 28; - else - rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + 45.0 / 2 * 9).BAMs() >> 28; - } - else - { - if (thing->flags7 & MF7_SPRITEANGLE) - rot = (thing->SpriteAngle + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; - else - rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; - } - picnum = sprframe->Texture[rot]; - if (sprframe->Flip & (1 << rot)) - { - renderflags ^= RF_XFLIP; - } - tex = TexMan[picnum]; // Do not animate the rotation - } - } - else - { - // decide which texture to use for the sprite - if ((unsigned)spritenum >= sprites.Size ()) - { - DPrintf (DMSG_ERROR, "R_ProjectSprite: invalid sprite number %u\n", spritenum); - return; - } - spritedef_t *sprdef = &sprites[spritenum]; - if (thing->frame >= sprdef->numframes) - { - // If there are no frames at all for this sprite, don't draw it. - return; - } - else - { - //picnum = SpriteFrames[sprdef->spriteframes + thing->frame].Texture[0]; - // choose a different rotation based on player view - spriteframe_t *sprframe = &SpriteFrames[sprdef->spriteframes + thing->frame]; - DAngle ang = (pos - ViewPos).Angle(); - angle_t rot; - if (sprframe->Texture[0] == sprframe->Texture[1]) - { - if (thing->flags7 & MF7_SPRITEANGLE) - rot = (thing->SpriteAngle + 45.0 / 2 * 9).BAMs() >> 28; - else - rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + 45.0 / 2 * 9).BAMs() >> 28; - } - else - { - if (thing->flags7 & MF7_SPRITEANGLE) - rot = (thing->SpriteAngle + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; - else - rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; - } - picnum = sprframe->Texture[rot]; - if (sprframe->Flip & (1 << rot)) - { - renderflags ^= RF_XFLIP; - } - tex = TexMan[picnum]; // Do not animate the rotation - if (r_drawvoxels) - { - voxel = sprframe->Voxel; - } - } - } - if (spriteScale.X < 0) - { - spriteScale.X = -spriteScale.X; - renderflags ^= RF_XFLIP; - } - if (voxel == NULL && (tex == NULL || tex->UseType == FTexture::TEX_Null)) - { - return; - } - - if ((renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) - { - R_ProjectWallSprite(thing, pos, picnum, spriteScale, renderflags, spriteshade); - return; - } - - // transform the origin point - tr_x = pos.X - ViewPos.X; - tr_y = pos.Y - ViewPos.Y; - - tz = tr_x * ViewTanCos + tr_y * ViewTanSin; - - // thing is behind view plane? - if (voxel == NULL && tz < MINZ) - return; - - tx = tr_x * ViewSin - tr_y * ViewCos; - - // [RH] Flip for mirrors - if (renderportal->MirrorFlags & RF_XFLIP) - { - tx = -tx; - } - //tx2 = tx >> 4; - - // too far off the side? - // if it's a voxel, it can be further off the side - if ((voxel == NULL && (fabs(tx / 64) > fabs(tz))) || - (voxel != NULL && (fabs(tx / 128) > fabs(tz)))) - { - return; - } - - if (voxel == NULL) - { - // [RH] Added scaling - int scaled_to = tex->GetScaledTopOffset(); - int scaled_bo = scaled_to - tex->GetScaledHeight(); - gzt = pos.Z + spriteScale.Y * scaled_to; - gzb = pos.Z + spriteScale.Y * scaled_bo; - } - else - { - xscale = spriteScale.X * voxel->Scale; - yscale = spriteScale.Y * voxel->Scale; - double piv = voxel->Voxel->Mips[0].Pivot.Z; - gzt = pos.Z + yscale * piv - thing->Floorclip; - gzb = pos.Z + yscale * (piv - voxel->Voxel->Mips[0].SizeZ); - if (gzt <= gzb) - return; - } - - // killough 3/27/98: exclude things totally separated - // from the viewer, by either water or fake ceilings - // killough 4/11/98: improve sprite clipping for underwater/fake ceilings - - heightsec = thing->Sector->GetHeightSec(); - - if (heightsec != NULL) // only clip things which are in special sectors - { - if (fakeside == WaterFakeSide::AboveCeiling) - { - if (gzt < heightsec->ceilingplane.ZatPoint(pos)) - return; - } - else if (fakeside == WaterFakeSide::BelowFloor) - { - if (gzb >= heightsec->floorplane.ZatPoint(pos)) - return; - } - else - { - if (gzt < heightsec->floorplane.ZatPoint(pos)) - return; - if (!(heightsec->MoreFlags & SECF_FAKEFLOORONLY) && gzb >= heightsec->ceilingplane.ZatPoint(pos)) - return; - } - } - - if (voxel == NULL) - { - xscale = CenterX / tz; - - // [RH] Reject sprites that are off the top or bottom of the screen - if (globaluclip * tz > ViewPos.Z - gzb || globaldclip * tz < ViewPos.Z - gzt) - { - return; - } - - // [RH] Flip for mirrors - renderflags ^= renderportal->MirrorFlags & RF_XFLIP; - - // calculate edges of the shape - const double thingxscalemul = spriteScale.X / tex->Scale.X; - - tx -= ((renderflags & RF_XFLIP) ? (tex->GetWidth() - tex->LeftOffset - 1) : tex->LeftOffset) * thingxscalemul; - double dtx1 = tx * xscale; - x1 = centerx + xs_RoundToInt(dtx1); - - // off the right side? - if (x1 >= renderportal->WindowRight) - return; - - tx += tex->GetWidth() * thingxscalemul; - x2 = centerx + xs_RoundToInt(tx * xscale); - - // off the left side or too small? - if ((x2 < renderportal->WindowLeft || x2 <= x1)) - return; - - xscale = spriteScale.X * xscale / tex->Scale.X; - iscale = (fixed_t)(FRACUNIT / xscale); // Round towards zero to avoid wrapping in edge cases - - double yscale = spriteScale.Y / tex->Scale.Y; - - // store information in a vissprite - vis = R_NewVisSprite(); - - vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; - vis->xscale = FLOAT2FIXED(xscale); - vis->yscale = float(InvZtoScale * yscale / tz); - vis->idepth = float(1 / tz); - vis->floorclip = thing->Floorclip / yscale; - vis->texturemid = tex->TopOffset - (ViewPos.Z - pos.Z + thing->Floorclip) / yscale; - vis->x1 = x1 < renderportal->WindowLeft ? renderportal->WindowLeft : x1; - vis->x2 = x2 > renderportal->WindowRight ? renderportal->WindowRight : x2; - vis->Angle = thing->Angles.Yaw; - - if (renderflags & RF_XFLIP) - { - vis->startfrac = (tex->GetWidth() << FRACBITS) - 1; - vis->xiscale = -iscale; - } - else - { - vis->startfrac = 0; - vis->xiscale = iscale; - } - - vis->startfrac += (fixed_t)(vis->xiscale * (vis->x1 - centerx + 0.5 - dtx1)); - } - else - { - vis = R_NewVisSprite(); - - vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; - vis->xscale = FLOAT2FIXED(xscale); - vis->yscale = (float)yscale; - vis->x1 = renderportal->WindowLeft; - vis->x2 = renderportal->WindowRight; - vis->idepth = 1 / MINZ; - vis->floorclip = thing->Floorclip; - - pos.Z -= thing->Floorclip; - - vis->Angle = thing->Angles.Yaw + voxel->AngleOffset; - - int voxelspin = (thing->flags & MF_DROPPED) ? voxel->DroppedSpin : voxel->PlacedSpin; - if (voxelspin != 0) - { - DAngle ang = double(I_FPSTime()) * voxelspin / 1000; - vis->Angle -= ang; - } - - vis->pa.vpos = { (float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z }; - vis->pa.vang = FAngle((float)ViewAngle.Degrees); - } - - // killough 3/27/98: save sector for special clipping later - vis->heightsec = heightsec; - vis->sector = thing->Sector; - - vis->depth = (float)tz; - vis->gpos = { (float)pos.X, (float)pos.Y, (float)pos.Z }; - vis->gzb = (float)gzb; // [RH] use gzb, not thing->z - vis->gzt = (float)gzt; // killough 3/27/98 - vis->deltax = float(pos.X - ViewPos.X); - vis->deltay = float(pos.Y - ViewPos.Y); - vis->renderflags = renderflags; - if(thing->flags5 & MF5_BRIGHT) - vis->renderflags |= RF_FULLBRIGHT; // kg3D - vis->Style.RenderStyle = thing->RenderStyle; - vis->FillColor = thing->fillcolor; - vis->Translation = thing->Translation; // [RH] thing translation table - vis->FakeFlatStat = fakeside; - vis->Style.Alpha = float(thing->Alpha); - vis->fakefloor = fakefloor; - vis->fakeceiling = fakeceiling; - vis->Style.ColormapNum = 0; - vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; - vis->bSplitSprite = false; - - if (voxel != NULL) - { - vis->voxel = voxel->Voxel; - vis->bIsVoxel = true; - vis->bWallSprite = false; - DrewAVoxel = true; - } - else - { - vis->pic = tex; - vis->bIsVoxel = false; - vis->bWallSprite = false; - } - - // The software renderer cannot invert the source without inverting the overlay - // too. That means if the source is inverted, we need to do the reverse of what - // the invert overlay flag says to do. - INTBOOL invertcolormap = (vis->Style.RenderStyle.Flags & STYLEF_InvertOverlay); - - if (vis->Style.RenderStyle.Flags & STYLEF_InvertSource) - { - invertcolormap = !invertcolormap; - } - - FDynamicColormap *mybasecolormap = basecolormap; - if (current_sector->sectornum != thing->Sector->sectornum) // compare sectornums to account for R_FakeFlat copies. - { - // Todo: The actor is from a different sector so we have to retrieve the proper basecolormap for that sector. - } - - // Sprites that are added to the scene must fade to black. - if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); - } - - if (vis->Style.RenderStyle.Flags & STYLEF_FadeToBlack) - { - if (invertcolormap) - { // Fade to white - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255,255,255), mybasecolormap->Desaturate); - invertcolormap = false; - } - else - { // Fade to black - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0,0,0), mybasecolormap->Desaturate); - } - } - - // get light level - if (fixedcolormap != NULL) - { // fixed map - vis->Style.BaseColormap = fixedcolormap; - vis->Style.ColormapNum = 0; - } - else - { - if (invertcolormap) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); - } - if (fixedlightlev >= 0) - { - vis->Style.BaseColormap = mybasecolormap; - vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) - { // full bright - vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - vis->Style.ColormapNum = 0; - } - else - { // diminished light - vis->Style.ColormapNum = GETPALOOKUP( - r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = mybasecolormap; - } - } -} - -// -// R_AddSprites -// During BSP traversal, this adds sprites by sector. -// -// killough 9/18/98: add lightlevel as parameter, fixing underwater lighting -// [RH] Save which side of heightsec sprite is on here. -void R_AddSprites (sector_t *sec, int lightlevel, WaterFakeSide fakeside) -{ - F3DFloor *fakeceiling = NULL; - F3DFloor *fakefloor = NULL; - - // BSP is traversed by subsector. - // A sector might have been split into several - // subsectors during BSP building. - // Thus we check whether it was already added. - if (sec->touching_renderthings == nullptr || sec->validcount == validcount) - return; - - // Well, now it will be done. - sec->validcount = validcount; - - int spriteshade = LIGHT2SHADE(lightlevel + r_actualextralight); - - // Handle all things in sector. - for(auto p = sec->touching_renderthings; p != nullptr; p = p->m_snext) - { - auto thing = p->m_thing; - if (thing->validcount == validcount) continue; - thing->validcount = validcount; - - FIntCVar *cvar = thing->GetClass()->distancecheck; - if (cvar != NULL && *cvar >= 0) - { - double dist = (thing->Pos() - ViewPos).LengthSquared(); - double check = (double)**cvar; - if (dist >= check * check) - { - continue; - } - } - - // find fake level - for(auto rover : thing->Sector->e->XFloor.ffloors) - { - if(!(rover->flags & FF_EXISTS) || !(rover->flags & FF_RENDERPLANES)) continue; - if(!(rover->flags & FF_SOLID) || rover->alpha != 255) continue; - if(!fakefloor) - { - if(!rover->top.plane->isSlope()) - { - if(rover->top.plane->ZatPoint(0., 0.) <= thing->Z()) fakefloor = rover; - } - } - if(!rover->bottom.plane->isSlope()) - { - if(rover->bottom.plane->ZatPoint(0., 0.) >= thing->Top()) fakeceiling = rover; - } - } - R_ProjectSprite (thing, fakeside, fakefloor, fakeceiling, sec, spriteshade); - fakeceiling = NULL; - fakefloor = NULL; - } -} - // // R_SortVisSprites @@ -1196,8 +700,6 @@ void R_DrawMaskedSingle (bool renew) } } -void R_DrawHeightPlanes(double height); // kg3D - fake planes - void R_DrawMasked () { R_CollectPortals(); @@ -1255,11 +757,4 @@ void R_DrawMasked () R_DrawPlayerSprites(); } -extern double BaseYaspectMul;; - -inline int sgn(int v) -{ - return v < 0 ? -1 : v > 0 ? 1 : 0; -} - } diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index 04685d8bbf..e972f62fd0 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -43,7 +43,6 @@ bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr); void R_CacheSprite (spritedef_t *sprite); void R_SortVisSprites (int (*compare)(const void *, const void *), size_t first); -void R_AddSprites (sector_t *sec, int lightlevel, WaterFakeSide fakeside); void R_DrawSprites (); void R_ClearSprites (); void R_DrawMasked (); @@ -52,6 +51,7 @@ enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); +extern bool DrewAVoxel; } diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index c998b35755..8c1553f6de 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -55,8 +55,439 @@ #include "swrenderer/things/r_sprite.h" #include "swrenderer/r_memory.h" +EXTERN_CVAR(Bool, r_drawvoxels) + namespace swrenderer { + void R_ProjectSprite(AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade) + { + double tr_x; + double tr_y; + + double gzt; // killough 3/27/98 + double gzb; // [RH] use bottom of sprite, not actor + double tx;// , tx2; + double tz; + + double xscale = 1, yscale = 1; + + int x1; + int x2; + + FTextureID picnum; + FTexture *tex; + FVoxelDef *voxel; + + vissprite_t* vis; + + fixed_t iscale; + + sector_t* heightsec; // killough 3/27/98 + + // Don't waste time projecting sprites that are definitely not visible. + if (thing == nullptr || + (thing->renderflags & RF_INVISIBLE) || + !thing->RenderStyle.IsVisible(thing->Alpha) || + !thing->IsVisibleToPlayer() || + !thing->IsInsideVisibleAngles()) + { + return; + } + + RenderPortal *renderportal = RenderPortal::Instance(); + + // [ZZ] Or less definitely not visible (hue) + // [ZZ] 10.01.2016: don't try to clip stuff inside a skybox against the current portal. + if (!renderportal->CurrentPortalInSkybox && renderportal->CurrentPortal && !!P_PointOnLineSidePrecise(thing->Pos(), renderportal->CurrentPortal->dst)) + return; + + // [RH] Interpolate the sprite's position to make it look smooth + DVector3 pos = thing->InterpolatedPosition(r_TicFracF); + pos.Z += thing->GetBobOffset(r_TicFracF); + + tex = nullptr; + voxel = nullptr; + + int spritenum = thing->sprite; + DVector2 spriteScale = thing->Scale; + int renderflags = thing->renderflags; + if (spriteScale.Y < 0) + { + spriteScale.Y = -spriteScale.Y; + renderflags ^= RF_YFLIP; + } + if (thing->player != nullptr) + { + P_CheckPlayerSprite(thing, spritenum, spriteScale); + } + + if (thing->picnum.isValid()) + { + picnum = thing->picnum; + + tex = TexMan(picnum); + if (tex->UseType == FTexture::TEX_Null) + { + return; + } + + if (tex->Rotations != 0xFFFF) + { + // choose a different rotation based on player view + spriteframe_t *sprframe = &SpriteFrames[tex->Rotations]; + DAngle ang = (pos - ViewPos).Angle(); + angle_t rot; + if (sprframe->Texture[0] == sprframe->Texture[1]) + { + if (thing->flags7 & MF7_SPRITEANGLE) + rot = (thing->SpriteAngle + 45.0 / 2 * 9).BAMs() >> 28; + else + rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + 45.0 / 2 * 9).BAMs() >> 28; + } + else + { + if (thing->flags7 & MF7_SPRITEANGLE) + rot = (thing->SpriteAngle + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + else + rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + } + picnum = sprframe->Texture[rot]; + if (sprframe->Flip & (1 << rot)) + { + renderflags ^= RF_XFLIP; + } + tex = TexMan[picnum]; // Do not animate the rotation + } + } + else + { + // decide which texture to use for the sprite + if ((unsigned)spritenum >= sprites.Size()) + { + DPrintf(DMSG_ERROR, "R_ProjectSprite: invalid sprite number %u\n", spritenum); + return; + } + spritedef_t *sprdef = &sprites[spritenum]; + if (thing->frame >= sprdef->numframes) + { + // If there are no frames at all for this sprite, don't draw it. + return; + } + else + { + //picnum = SpriteFrames[sprdef->spriteframes + thing->frame].Texture[0]; + // choose a different rotation based on player view + spriteframe_t *sprframe = &SpriteFrames[sprdef->spriteframes + thing->frame]; + DAngle ang = (pos - ViewPos).Angle(); + angle_t rot; + if (sprframe->Texture[0] == sprframe->Texture[1]) + { + if (thing->flags7 & MF7_SPRITEANGLE) + rot = (thing->SpriteAngle + 45.0 / 2 * 9).BAMs() >> 28; + else + rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + 45.0 / 2 * 9).BAMs() >> 28; + } + else + { + if (thing->flags7 & MF7_SPRITEANGLE) + rot = (thing->SpriteAngle + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + else + rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + } + picnum = sprframe->Texture[rot]; + if (sprframe->Flip & (1 << rot)) + { + renderflags ^= RF_XFLIP; + } + tex = TexMan[picnum]; // Do not animate the rotation + if (r_drawvoxels) + { + voxel = sprframe->Voxel; + } + } + } + if (spriteScale.X < 0) + { + spriteScale.X = -spriteScale.X; + renderflags ^= RF_XFLIP; + } + if (voxel == nullptr && (tex == nullptr || tex->UseType == FTexture::TEX_Null)) + { + return; + } + + if ((renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) + { + R_ProjectWallSprite(thing, pos, picnum, spriteScale, renderflags, spriteshade); + return; + } + + // transform the origin point + tr_x = pos.X - ViewPos.X; + tr_y = pos.Y - ViewPos.Y; + + tz = tr_x * ViewTanCos + tr_y * ViewTanSin; + + // thing is behind view plane? + if (voxel == nullptr && tz < MINZ) + return; + + tx = tr_x * ViewSin - tr_y * ViewCos; + + // [RH] Flip for mirrors + if (renderportal->MirrorFlags & RF_XFLIP) + { + tx = -tx; + } + //tx2 = tx >> 4; + + // too far off the side? + // if it's a voxel, it can be further off the side + if ((voxel == nullptr && (fabs(tx / 64) > fabs(tz))) || + (voxel != nullptr && (fabs(tx / 128) > fabs(tz)))) + { + return; + } + + if (voxel == nullptr) + { + // [RH] Added scaling + int scaled_to = tex->GetScaledTopOffset(); + int scaled_bo = scaled_to - tex->GetScaledHeight(); + gzt = pos.Z + spriteScale.Y * scaled_to; + gzb = pos.Z + spriteScale.Y * scaled_bo; + } + else + { + xscale = spriteScale.X * voxel->Scale; + yscale = spriteScale.Y * voxel->Scale; + double piv = voxel->Voxel->Mips[0].Pivot.Z; + gzt = pos.Z + yscale * piv - thing->Floorclip; + gzb = pos.Z + yscale * (piv - voxel->Voxel->Mips[0].SizeZ); + if (gzt <= gzb) + return; + } + + // killough 3/27/98: exclude things totally separated + // from the viewer, by either water or fake ceilings + // killough 4/11/98: improve sprite clipping for underwater/fake ceilings + + heightsec = thing->Sector->GetHeightSec(); + + if (heightsec != nullptr) // only clip things which are in special sectors + { + if (fakeside == WaterFakeSide::AboveCeiling) + { + if (gzt < heightsec->ceilingplane.ZatPoint(pos)) + return; + } + else if (fakeside == WaterFakeSide::BelowFloor) + { + if (gzb >= heightsec->floorplane.ZatPoint(pos)) + return; + } + else + { + if (gzt < heightsec->floorplane.ZatPoint(pos)) + return; + if (!(heightsec->MoreFlags & SECF_FAKEFLOORONLY) && gzb >= heightsec->ceilingplane.ZatPoint(pos)) + return; + } + } + + if (voxel == nullptr) + { + xscale = CenterX / tz; + + // [RH] Reject sprites that are off the top or bottom of the screen + if (globaluclip * tz > ViewPos.Z - gzb || globaldclip * tz < ViewPos.Z - gzt) + { + return; + } + + // [RH] Flip for mirrors + renderflags ^= renderportal->MirrorFlags & RF_XFLIP; + + // calculate edges of the shape + const double thingxscalemul = spriteScale.X / tex->Scale.X; + + tx -= ((renderflags & RF_XFLIP) ? (tex->GetWidth() - tex->LeftOffset - 1) : tex->LeftOffset) * thingxscalemul; + double dtx1 = tx * xscale; + x1 = centerx + xs_RoundToInt(dtx1); + + // off the right side? + if (x1 >= renderportal->WindowRight) + return; + + tx += tex->GetWidth() * thingxscalemul; + x2 = centerx + xs_RoundToInt(tx * xscale); + + // off the left side or too small? + if ((x2 < renderportal->WindowLeft || x2 <= x1)) + return; + + xscale = spriteScale.X * xscale / tex->Scale.X; + iscale = (fixed_t)(FRACUNIT / xscale); // Round towards zero to avoid wrapping in edge cases + + double yscale = spriteScale.Y / tex->Scale.Y; + + // store information in a vissprite + vis = R_NewVisSprite(); + + vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; + vis->xscale = FLOAT2FIXED(xscale); + vis->yscale = float(InvZtoScale * yscale / tz); + vis->idepth = float(1 / tz); + vis->floorclip = thing->Floorclip / yscale; + vis->texturemid = tex->TopOffset - (ViewPos.Z - pos.Z + thing->Floorclip) / yscale; + vis->x1 = x1 < renderportal->WindowLeft ? renderportal->WindowLeft : x1; + vis->x2 = x2 > renderportal->WindowRight ? renderportal->WindowRight : x2; + vis->Angle = thing->Angles.Yaw; + + if (renderflags & RF_XFLIP) + { + vis->startfrac = (tex->GetWidth() << FRACBITS) - 1; + vis->xiscale = -iscale; + } + else + { + vis->startfrac = 0; + vis->xiscale = iscale; + } + + vis->startfrac += (fixed_t)(vis->xiscale * (vis->x1 - centerx + 0.5 - dtx1)); + } + else + { + vis = R_NewVisSprite(); + + vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; + vis->xscale = FLOAT2FIXED(xscale); + vis->yscale = (float)yscale; + vis->x1 = renderportal->WindowLeft; + vis->x2 = renderportal->WindowRight; + vis->idepth = 1 / MINZ; + vis->floorclip = thing->Floorclip; + + pos.Z -= thing->Floorclip; + + vis->Angle = thing->Angles.Yaw + voxel->AngleOffset; + + int voxelspin = (thing->flags & MF_DROPPED) ? voxel->DroppedSpin : voxel->PlacedSpin; + if (voxelspin != 0) + { + DAngle ang = double(I_FPSTime()) * voxelspin / 1000; + vis->Angle -= ang; + } + + vis->pa.vpos = { (float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z }; + vis->pa.vang = FAngle((float)ViewAngle.Degrees); + } + + // killough 3/27/98: save sector for special clipping later + vis->heightsec = heightsec; + vis->sector = thing->Sector; + + vis->depth = (float)tz; + vis->gpos = { (float)pos.X, (float)pos.Y, (float)pos.Z }; + vis->gzb = (float)gzb; // [RH] use gzb, not thing->z + vis->gzt = (float)gzt; // killough 3/27/98 + vis->deltax = float(pos.X - ViewPos.X); + vis->deltay = float(pos.Y - ViewPos.Y); + vis->renderflags = renderflags; + if (thing->flags5 & MF5_BRIGHT) + vis->renderflags |= RF_FULLBRIGHT; // kg3D + vis->Style.RenderStyle = thing->RenderStyle; + vis->FillColor = thing->fillcolor; + vis->Translation = thing->Translation; // [RH] thing translation table + vis->FakeFlatStat = fakeside; + vis->Style.Alpha = float(thing->Alpha); + vis->fakefloor = fakefloor; + vis->fakeceiling = fakeceiling; + vis->Style.ColormapNum = 0; + vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; + vis->bSplitSprite = false; + + if (voxel != nullptr) + { + vis->voxel = voxel->Voxel; + vis->bIsVoxel = true; + vis->bWallSprite = false; + DrewAVoxel = true; + } + else + { + vis->pic = tex; + vis->bIsVoxel = false; + vis->bWallSprite = false; + } + + // The software renderer cannot invert the source without inverting the overlay + // too. That means if the source is inverted, we need to do the reverse of what + // the invert overlay flag says to do. + INTBOOL invertcolormap = (vis->Style.RenderStyle.Flags & STYLEF_InvertOverlay); + + if (vis->Style.RenderStyle.Flags & STYLEF_InvertSource) + { + invertcolormap = !invertcolormap; + } + + FDynamicColormap *mybasecolormap = basecolormap; + if (current_sector->sectornum != thing->Sector->sectornum) // compare sectornums to account for R_FakeFlat copies. + { + // Todo: The actor is from a different sector so we have to retrieve the proper basecolormap for that sector. + } + + // Sprites that are added to the scene must fade to black. + if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); + } + + if (vis->Style.RenderStyle.Flags & STYLEF_FadeToBlack) + { + if (invertcolormap) + { // Fade to white + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); + invertcolormap = false; + } + else + { // Fade to black + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); + } + } + + // get light level + if (fixedcolormap != nullptr) + { // fixed map + vis->Style.BaseColormap = fixedcolormap; + vis->Style.ColormapNum = 0; + } + else + { + if (invertcolormap) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); + } + if (fixedlightlev >= 0) + { + vis->Style.BaseColormap = mybasecolormap; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + } + else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) + { // full bright + vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + vis->Style.ColormapNum = 0; + } + else + { // diminished light + vis->Style.ColormapNum = GETPALOOKUP( + r_SpriteVisibility / MAX(tz, MINZ), spriteshade); + vis->Style.BaseColormap = mybasecolormap; + } + } + } + void R_DrawVisSprite(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip) { fixed_t frac; diff --git a/src/swrenderer/things/r_sprite.h b/src/swrenderer/things/r_sprite.h index 5d8f898f6b..fd31ec759d 100644 --- a/src/swrenderer/things/r_sprite.h +++ b/src/swrenderer/things/r_sprite.h @@ -17,5 +17,6 @@ namespace swrenderer { + void R_ProjectSprite(AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade); void R_DrawVisSprite(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip); } From f288d589ff8731247600289086edb5edbe750ece Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 15:09:35 +0100 Subject: [PATCH 678/912] Move defines and enums to where they are used --- src/swrenderer/scene/r_things.h | 5 ----- src/swrenderer/things/r_playersprite.h | 3 +++ src/swrenderer/things/r_visiblesprite.h | 2 ++ src/swrenderer/things/r_voxel.h | 2 ++ 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h index e972f62fd0..1b3186acb3 100644 --- a/src/swrenderer/scene/r_things.h +++ b/src/swrenderer/scene/r_things.h @@ -29,9 +29,6 @@ struct particle_t; struct FVoxel; -#define MINZ double((2048*4) / double(1 << 20)) -#define BASEXCENTER (160) -#define BASEYCENTER (100) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); @@ -47,8 +44,6 @@ void R_DrawSprites (); void R_ClearSprites (); void R_DrawMasked (); -enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; - void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); extern bool DrewAVoxel; diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index a96b916661..c1a4b53b19 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -15,6 +15,9 @@ #include "r_visiblesprite.h" +#define BASEXCENTER (160) +#define BASEYCENTER (100) + namespace swrenderer { void R_SetupPlayerSpriteScale(); diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index 96eda392cd..db40aa1cdd 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -16,6 +16,8 @@ #include "swrenderer/line/r_line.h" #include "swrenderer/scene/r_bsp.h" +#define MINZ double((2048*4) / double(1 << 20)) + struct particle_t; struct FVoxel; diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index e3daa0d15a..7d566394d5 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -29,6 +29,8 @@ namespace swrenderer { struct vissprite_t; + enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; + void R_DrawVisVoxel(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom); void R_FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); kvxslab_t *R_GetSlabStart(const FVoxelMipLevel &mip, int x, int y); From dce3a1c81cff8ec7f16b61d4218ceec504766b98 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 15:41:42 +0100 Subject: [PATCH 679/912] Move remaining parts of r_things into r_visiblesprite --- src/CMakeLists.txt | 1 - src/polyrenderer/scene/poly_playersprite.cpp | 1 - src/swrenderer/drawers/r_draw_pal.cpp | 1 - src/swrenderer/line/r_fogboundary.cpp | 1 - src/swrenderer/plane/r_flatplane.cpp | 1 - src/swrenderer/plane/r_skyplane.cpp | 1 - src/swrenderer/plane/r_slopeplane.cpp | 1 - src/swrenderer/plane/r_visibleplane.cpp | 1 - src/swrenderer/r_main.cpp | 1 - src/swrenderer/scene/r_bsp.cpp | 1 - src/swrenderer/scene/r_portal.cpp | 2 +- src/swrenderer/scene/r_things.cpp | 760 ------------------- src/swrenderer/scene/r_things.h | 53 -- src/swrenderer/segments/r_clipsegment.cpp | 1 - src/swrenderer/segments/r_drawsegment.cpp | 4 +- src/swrenderer/segments/r_portalsegment.cpp | 1 - src/swrenderer/things/r_decal.cpp | 3 +- src/swrenderer/things/r_particle.cpp | 3 +- src/swrenderer/things/r_playersprite.cpp | 2 +- src/swrenderer/things/r_sprite.cpp | 2 +- src/swrenderer/things/r_visiblesprite.cpp | 656 ++++++++++++++++ src/swrenderer/things/r_visiblesprite.h | 14 + src/swrenderer/things/r_voxel.cpp | 6 +- src/swrenderer/things/r_voxel.h | 1 + src/swrenderer/things/r_wallsprite.cpp | 3 +- 25 files changed, 686 insertions(+), 835 deletions(-) delete mode 100644 src/swrenderer/scene/r_things.cpp delete mode 100644 src/swrenderer/scene/r_things.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4415a027b0..74d1183564 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -847,7 +847,6 @@ set( FASTMATH_PCH_SOURCES swrenderer/drawers/r_thread.cpp swrenderer/scene/r_3dfloors.cpp swrenderer/scene/r_bsp.cpp - swrenderer/scene/r_things.cpp swrenderer/scene/r_portal.cpp swrenderer/line/r_line.cpp swrenderer/line/r_walldraw.cpp diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index 5baa7bda3e..03e22f1d1c 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -27,7 +27,6 @@ #include "r_data/r_translate.h" #include "poly_playersprite.h" #include "polyrenderer/poly_renderer.h" -#include "swrenderer/scene/r_things.h" // for pspritexscale EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_deathcamera) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index a55705cb7d..13f6219acf 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -46,7 +46,6 @@ #include "r_defs.h" #include "r_draw.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" #include "v_video.h" #include "r_draw_pal.h" diff --git a/src/swrenderer/line/r_fogboundary.cpp b/src/swrenderer/line/r_fogboundary.cpp index f5aff03529..1c8c92160a 100644 --- a/src/swrenderer/line/r_fogboundary.cpp +++ b/src/swrenderer/line/r_fogboundary.cpp @@ -19,7 +19,6 @@ #include "doomdef.h" #include "doomstat.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" #include "r_sky.h" #include "stats.h" #include "v_video.h" diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index ba88dd7189..af4c377975 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -19,7 +19,6 @@ #include "doomdef.h" #include "doomstat.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" #include "r_sky.h" #include "stats.h" #include "v_video.h" diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 0079d5af5c..b31445c140 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -19,7 +19,6 @@ #include "doomdef.h" #include "doomstat.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" #include "r_sky.h" #include "stats.h" #include "v_video.h" diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index e68cee2ab1..85afd1d33e 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -19,7 +19,6 @@ #include "doomdef.h" #include "doomstat.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" #include "r_sky.h" #include "stats.h" #include "v_video.h" diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index d1b3084be0..df937ffdfa 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -30,7 +30,6 @@ #include "gl/dynlights/gl_dynlight.h" #include "swrenderer/r_main.h" #include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_things.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/plane/r_flatplane.h" diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 771653fff6..630edc9aa2 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -35,7 +35,6 @@ #include "m_random.h" #include "m_bbox.h" #include "r_main.h" -#include "scene/r_things.h" #include "drawers/r_draw.h" #include "plane/r_flatplane.h" #include "scene/r_bsp.h" diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index bd18ab41ef..5aa15e3cc6 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -39,7 +39,6 @@ #include "swrenderer/things/r_particle.h" #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/line/r_wallsetup.h" -#include "r_things.h" #include "r_3dfloors.h" #include "r_portal.h" #include "a_sharedglobal.h" diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 092eac4c9f..5ca458b309 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -43,12 +43,12 @@ #include "p_setup.h" #include "version.h" #include "r_utility.h" -#include "r_things.h" #include "r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/scene/r_bsp.h" #include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" diff --git a/src/swrenderer/scene/r_things.cpp b/src/swrenderer/scene/r_things.cpp deleted file mode 100644 index 84dfc44ec7..0000000000 --- a/src/swrenderer/scene/r_things.cpp +++ /dev/null @@ -1,760 +0,0 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// $Log:$ -// -// DESCRIPTION: -// Refresh of things, i.e. objects represented by sprites. -// -//----------------------------------------------------------------------------- - -#include -#include -#include - -#include "p_lnspec.h" -#include "templates.h" -#include "doomdef.h" -#include "m_swap.h" -#include "i_system.h" -#include "w_wad.h" -#include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" -#include "c_console.h" -#include "c_cvars.h" -#include "c_dispatch.h" -#include "doomstat.h" -#include "v_video.h" -#include "sc_man.h" -#include "s_sound.h" -#include "sbar.h" -#include "gi.h" -#include "r_sky.h" -#include "cmdlib.h" -#include "g_level.h" -#include "d_net.h" -#include "colormatcher.h" -#include "d_netinf.h" -#include "p_effect.h" -#include "r_bsp.h" -#include "r_3dfloors.h" -#include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/drawers/r_draw_pal.h" -#include "v_palette.h" -#include "r_data/r_translate.h" -#include "r_data/colormaps.h" -#include "r_data/voxels.h" -#include "p_local.h" -#include "p_maputl.h" -#include "swrenderer/things/r_voxel.h" -#include "swrenderer/segments/r_drawsegment.h" -#include "r_portal.h" -#include "swrenderer/things/r_particle.h" -#include "swrenderer/things/r_playersprite.h" -#include "swrenderer/things/r_wallsprite.h" -#include "swrenderer/things/r_sprite.h" -#include "swrenderer/plane/r_visibleplane.h" -#include "swrenderer/r_memory.h" -#include "g_levellocals.h" - -EXTERN_CVAR(Int, r_drawfuzz) -EXTERN_CVAR(Bool, r_drawvoxels) -EXTERN_CVAR(Bool, r_blendmethod) - -CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); -CVAR(Bool, r_splitsprites, true, CVAR_ARCHIVE) - -namespace swrenderer -{ - -bool DrewAVoxel; - -static vissprite_t **spritesorter; -static int spritesortersize = 0; -static int vsprcount; - - - - -void R_DeinitSprites() -{ - R_DeinitVisSprites(); - R_DeinitRenderVoxel(); - - // Free vissprites sorter - if (spritesorter != NULL) - { - delete[] spritesorter; - spritesortersize = 0; - spritesorter = NULL; - } -} - -// -// R_ClearSprites -// Called at frame start. -// -void R_ClearSprites (void) -{ - R_ClearVisSprites(); - DrewAVoxel = false; -} - -static TArray portaldrawsegs; - -static inline void R_CollectPortals() -{ - // This function collects all drawsegs that may be of interest to R_ClipSpriteColumnWithPortals - // Having that function over the entire list of drawsegs can break down performance quite drastically. - // This is doing the costly stuff only once so that R_ClipSpriteColumnWithPortals can - // a) exit early if no relevant info is found and - // b) skip most of the collected drawsegs which have no portal attached. - portaldrawsegs.Clear(); - for (drawseg_t* seg = ds_p; seg-- > firstdrawseg; ) // copied code from killough below - { - // I don't know what makes this happen (some old top-down portal code or possibly skybox code? something adds null lines...) - // crashes at the first frame of the first map of Action2.wad - if (!seg->curline) continue; - - line_t* line = seg->curline->linedef; - // ignore minisegs from GL nodes. - if (!line) continue; - - // check if this line will clip sprites to itself - if (!line->isVisualPortal() && line->special != Line_Mirror) - continue; - - // don't clip sprites with portal's back side (it's transparent) - if (seg->curline->sidedef != line->sidedef[0]) - continue; - - portaldrawsegs.Push(seg); - } -} - -bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr) -{ - RenderPortal *renderportal = RenderPortal::Instance(); - - // [ZZ] 10.01.2016: don't clip sprites from the root of a skybox. - if (renderportal->CurrentPortalInSkybox) - return false; - - for (drawseg_t *seg : portaldrawsegs) - { - // ignore segs from other portals - if (seg->CurrentPortalUniq != renderportal->CurrentPortalUniq) - continue; - - // (all checks that are already done in R_CollectPortals have been removed for performance reasons.) - - // don't clip if the sprite is in front of the portal - if (!P_PointOnLineSidePrecise(spr->gpos.X, spr->gpos.Y, seg->curline->linedef)) - continue; - - // now if current column is covered by this drawseg, we clip it away - if ((x >= seg->x1) && (x < seg->x2)) - return true; - } - - return false; -} - - -// -// R_SortVisSprites -// -// [RH] The old code for this function used a bubble sort, which was far less -// than optimal with large numbers of sprites. I changed it to use the -// stdlib qsort() function instead, and now it is a *lot* faster; the -// more vissprites that need to be sorted, the better the performance -// gain compared to the old function. -// -// Sort vissprites by depth, far to near - -// This is the standard version, which does a simple test based on depth. -static bool sv_compare(vissprite_t *a, vissprite_t *b) -{ - return a->idepth > b->idepth; -} - -// This is an alternate version, for when one or more voxel is in view. -// It does a 2D distance test based on whichever one is furthest from -// the viewpoint. -static bool sv_compare2d(vissprite_t *a, vissprite_t *b) -{ - return DVector2(a->deltax, a->deltay).LengthSquared() < - DVector2(b->deltax, b->deltay).LengthSquared(); -} - -#ifdef __GNUC__ -static void swap(vissprite_t *&a, vissprite_t *&b) -{ - vissprite_t *t = a; - a = b; - b = t; -} -#endif - -void R_SortVisSprites (bool (*compare)(vissprite_t *, vissprite_t *), size_t first) -{ - int i; - vissprite_t **spr; - - vsprcount = int(vissprite_p - &vissprites[first]); - - if (vsprcount == 0) - return; - - if (spritesortersize < MaxVisSprites) - { - if (spritesorter != NULL) - delete[] spritesorter; - spritesorter = new vissprite_t *[MaxVisSprites]; - spritesortersize = MaxVisSprites; - } - - if (!(i_compatflags & COMPATF_SPRITESORT)) - { - for (i = 0, spr = firstvissprite; i < vsprcount; i++, spr++) - { - spritesorter[i] = *spr; - } - } - else - { - // If the compatibility option is on sprites of equal distance need to - // be sorted in inverse order. This is most easily achieved by - // filling the sort array backwards before the sort. - for (i = 0, spr = firstvissprite + vsprcount-1; i < vsprcount; i++, spr--) - { - spritesorter[i] = *spr; - } - } - - std::stable_sort(&spritesorter[0], &spritesorter[vsprcount], compare); -} - -// -// R_DrawSprite -// -void R_DrawSprite (vissprite_t *spr) -{ - static short clipbot[MAXWIDTH]; - static short cliptop[MAXWIDTH]; - drawseg_t *ds; - int i; - int x1, x2; - int r1, r2; - short topclip, botclip; - short *clip1, *clip2; - FSWColormap *colormap = spr->Style.BaseColormap; - int colormapnum = spr->Style.ColormapNum; - F3DFloor *rover; - FDynamicColormap *mybasecolormap; - - Clip3DFloors *clip3d = Clip3DFloors::Instance(); - - // [RH] Check for particles - if (!spr->bIsVoxel && spr->pic == NULL) - { - // kg3D - reject invisible parts - if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gpos.Z <= clip3d->sclipBottom) return; - if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gpos.Z >= clip3d->sclipTop) return; - R_DrawParticle (spr); - return; - } - - x1 = spr->x1; - x2 = spr->x2; - - // [RH] Quickly reject sprites with bad x ranges. - if (x1 >= x2) - return; - - // [RH] Sprites split behind a one-sided line can also be discarded. - if (spr->sector == NULL) - return; - - // kg3D - reject invisible parts - if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gzt <= clip3d->sclipBottom) return; - if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gzb >= clip3d->sclipTop) return; - - // kg3D - correct colors now - if (!fixedcolormap && fixedlightlev < 0 && spr->sector->e && spr->sector->e->XFloor.lightlist.Size()) - { - if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) - { - clip3d->sclipTop = spr->sector->ceilingplane.ZatPoint(ViewPos); - } - sector_t *sec = NULL; - for (i = spr->sector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) - { - if (clip3d->sclipTop <= spr->sector->e->XFloor.lightlist[i].plane.Zat0()) - { - rover = spr->sector->e->XFloor.lightlist[i].caster; - if (rover) - { - if (rover->flags & FF_DOUBLESHADOW && clip3d->sclipTop <= rover->bottom.plane->Zat0()) - { - break; - } - sec = rover->model; - if (rover->flags & FF_FADEWALLS) - { - mybasecolormap = sec->ColorMap; - } - else - { - mybasecolormap = spr->sector->e->XFloor.lightlist[i].extra_colormap; - } - } - break; - } - } - // found new values, recalculate - if (sec) - { - INTBOOL invertcolormap = (spr->Style.RenderStyle.Flags & STYLEF_InvertOverlay); - - if (spr->Style.RenderStyle.Flags & STYLEF_InvertSource) - { - invertcolormap = !invertcolormap; - } - - // Sprites that are added to the scene must fade to black. - if (spr->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); - } - - if (spr->Style.RenderStyle.Flags & STYLEF_FadeToBlack) - { - if (invertcolormap) - { // Fade to white - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255,255,255), mybasecolormap->Desaturate); - invertcolormap = false; - } - else - { // Fade to black - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0,0,0), mybasecolormap->Desaturate); - } - } - - // get light level - if (invertcolormap) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); - } - if (fixedlightlev >= 0) - { - spr->Style.BaseColormap = mybasecolormap; - spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - { // full bright - spr->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - spr->Style.ColormapNum = 0; - } - else - { // diminished light - int spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); - spr->Style.BaseColormap = mybasecolormap; - spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); - } - } - } - - // [RH] Initialize the clipping arrays to their largest possible range - // instead of using a special "not clipped" value. This eliminates - // visual anomalies when looking down and should be faster, too. - topclip = 0; - botclip = viewheight; - - // killough 3/27/98: - // Clip the sprite against deep water and/or fake ceilings. - // [RH] rewrote this to be based on which part of the sector is really visible - - double scale = InvZtoScale * spr->idepth; - double hzb = DBL_MIN, hzt = DBL_MAX; - - if (spr->bIsVoxel && spr->floorclip != 0) - { - hzb = spr->gzb; - } - - if (spr->heightsec && !(spr->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC)) - { // only things in specially marked sectors - if (spr->FakeFlatStat != WaterFakeSide::AboveCeiling) - { - double hz = spr->heightsec->floorplane.ZatPoint(spr->gpos); - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - - if (spr->FakeFlatStat == WaterFakeSide::BelowFloor) - { // seen below floor: clip top - if (!spr->bIsVoxel && h > topclip) - { - topclip = short(MIN(h, viewheight)); - } - hzt = MIN(hzt, hz); - } - else - { // seen in the middle: clip bottom - if (!spr->bIsVoxel && h < botclip) - { - botclip = MAX (0, h); - } - hzb = MAX(hzb, hz); - } - } - if (spr->FakeFlatStat != WaterFakeSide::BelowFloor && !(spr->heightsec->MoreFlags & SECF_FAKEFLOORONLY)) - { - double hz = spr->heightsec->ceilingplane.ZatPoint(spr->gpos); - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - - if (spr->FakeFlatStat == WaterFakeSide::AboveCeiling) - { // seen above ceiling: clip bottom - if (!spr->bIsVoxel && h < botclip) - { - botclip = MAX (0, h); - } - hzb = MAX(hzb, hz); - } - else - { // seen in the middle: clip top - if (!spr->bIsVoxel && h > topclip) - { - topclip = MIN(h, viewheight); - } - hzt = MIN(hzt, hz); - } - } - } - // killough 3/27/98: end special clipping for deep water / fake ceilings - else if (!spr->bIsVoxel && spr->floorclip) - { // [RH] Move floorclip stuff from R_DrawVisSprite to here - //int clip = ((FLOAT2FIXED(CenterY) - FixedMul (spr->texturemid - (spr->pic->GetHeight() << FRACBITS) + spr->floorclip, spr->yscale)) >> FRACBITS); - int clip = xs_RoundToInt(CenterY - (spr->texturemid - spr->pic->GetHeight() + spr->floorclip) * spr->yscale); - if (clip < botclip) - { - botclip = MAX(0, clip); - } - } - - if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) - { - if (!spr->bIsVoxel) - { - double hz = clip3d->sclipBottom; - if (spr->fakefloor) - { - double floorz = spr->fakefloor->top.plane->Zat0(); - if (ViewPos.Z > floorz && floorz == clip3d->sclipBottom ) - { - hz = spr->fakefloor->bottom.plane->Zat0(); - } - } - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - if (h < botclip) - { - botclip = MAX(0, h); - } - } - hzb = MAX(hzb, clip3d->sclipBottom); - } - if (clip3d->fake3D & FAKE3D_CLIPTOP) - { - if (!spr->bIsVoxel) - { - double hz = clip3d->sclipTop; - if (spr->fakeceiling != NULL) - { - double ceilingZ = spr->fakeceiling->bottom.plane->Zat0(); - if (ViewPos.Z < ceilingZ && ceilingZ == clip3d->sclipTop) - { - hz = spr->fakeceiling->top.plane->Zat0(); - } - } - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - if (h > topclip) - { - topclip = short(MIN(h, viewheight)); - } - } - hzt = MIN(hzt, clip3d->sclipTop); - } - - if (topclip >= botclip) - { - spr->Style.BaseColormap = colormap; - spr->Style.ColormapNum = colormapnum; - return; - } - - i = x2 - x1; - clip1 = clipbot + x1; - clip2 = cliptop + x1; - do - { - *clip1++ = botclip; - *clip2++ = topclip; - } while (--i); - - // Scan drawsegs from end to start for obscuring segs. - // The first drawseg that is closer than the sprite is the clip seg. - - // Modified by Lee Killough: - // (pointer check was originally nonportable - // and buggy, by going past LEFT end of array): - - // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code - - for (ds = ds_p; ds-- > firstdrawseg; ) // new -- killough - { - // [ZZ] portal handling here - //if (ds->CurrentPortalUniq != spr->CurrentPortalUniq) - // continue; - // [ZZ] WARNING: uncommenting the two above lines, totally breaks sprite clipping - - // kg3D - no clipping on fake segs - if (ds->fake) continue; - // determine if the drawseg obscures the sprite - if (ds->x1 >= x2 || ds->x2 <= x1 || - (!(ds->silhouette & SIL_BOTH) && ds->maskedtexturecol == -1 && - !ds->bFogBoundary) ) - { - // does not cover sprite - continue; - } - - r1 = MAX (ds->x1, x1); - r2 = MIN (ds->x2, x2); - - float neardepth, fardepth; - if (!spr->bWallSprite) - { - if (ds->sz1 < ds->sz2) - { - neardepth = ds->sz1, fardepth = ds->sz2; - } - else - { - neardepth = ds->sz2, fardepth = ds->sz1; - } - } - - - // Check if sprite is in front of draw seg: - if ((!spr->bWallSprite && neardepth > spr->depth) || ((spr->bWallSprite || fardepth > spr->depth) && - (spr->gpos.Y - ds->curline->v1->fY()) * (ds->curline->v2->fX() - ds->curline->v1->fX()) - - (spr->gpos.X - ds->curline->v1->fX()) * (ds->curline->v2->fY() - ds->curline->v1->fY()) <= 0)) - { - RenderPortal *renderportal = RenderPortal::Instance(); - - // seg is behind sprite, so draw the mid texture if it has one - if (ds->CurrentPortalUniq == renderportal->CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here - (ds->maskedtexturecol != -1 || ds->bFogBoundary)) - R_RenderMaskedSegRange (ds, r1, r2); - - continue; - } - - // clip this piece of the sprite - // killough 3/27/98: optimized and made much shorter - // [RH] Optimized further (at least for VC++; - // other compilers should be at least as good as before) - - if (ds->silhouette & SIL_BOTTOM) //bottom sil - { - clip1 = clipbot + r1; - clip2 = openings + ds->sprbottomclip + r1 - ds->x1; - i = r2 - r1; - do - { - if (*clip1 > *clip2) - *clip1 = *clip2; - clip1++; - clip2++; - } while (--i); - } - - if (ds->silhouette & SIL_TOP) // top sil - { - clip1 = cliptop + r1; - clip2 = openings + ds->sprtopclip + r1 - ds->x1; - i = r2 - r1; - do - { - if (*clip1 < *clip2) - *clip1 = *clip2; - clip1++; - clip2++; - } while (--i); - } - } - - // all clipping has been performed, so draw the sprite - - if (!spr->bIsVoxel) - { - if (!spr->bWallSprite) - { - R_DrawVisSprite(spr, clipbot, cliptop); - } - else - { - R_DrawWallSprite(spr, clipbot, cliptop); - } - } - else - { - // If it is completely clipped away, don't bother drawing it. - if (cliptop[x2] >= clipbot[x2]) - { - for (i = x1; i < x2; ++i) - { - if (cliptop[i] < clipbot[i]) - { - break; - } - } - if (i == x2) - { - spr->Style.BaseColormap = colormap; - spr->Style.ColormapNum = colormapnum; - return; - } - } - // Add everything outside the left and right edges to the clipping array - // for R_DrawVisVoxel(). - if (x1 > 0) - { - fillshort(cliptop, x1, viewheight); - } - if (x2 < viewwidth - 1) - { - fillshort(cliptop + x2, viewwidth - x2, viewheight); - } - int minvoxely = spr->gzt <= hzt ? 0 : xs_RoundToInt((spr->gzt - hzt) / spr->yscale); - int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); - R_DrawVisVoxel(spr, minvoxely, maxvoxely, cliptop, clipbot); - } - spr->Style.BaseColormap = colormap; - spr->Style.ColormapNum = colormapnum; -} - -// kg3D: -// R_DrawMasked contains sorting -// original renamed to R_DrawMaskedSingle - -void R_DrawMaskedSingle (bool renew) -{ - drawseg_t *ds; - int i; - - RenderPortal *renderportal = RenderPortal::Instance(); - - for (i = vsprcount; i > 0; i--) - { - if (spritesorter[i-1]->CurrentPortalUniq != renderportal->CurrentPortalUniq) - continue; // probably another time - R_DrawSprite (spritesorter[i-1]); - } - - // render any remaining masked mid textures - - // Modified by Lee Killough: - // (pointer check was originally nonportable - // and buggy, by going past LEFT end of array): - - // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code - - if (renew) - { - Clip3DFloors::Instance()->fake3D |= FAKE3D_REFRESHCLIP; - } - for (ds = ds_p; ds-- > firstdrawseg; ) // new -- killough - { - // [ZZ] the same as above - if (ds->CurrentPortalUniq != renderportal->CurrentPortalUniq) - continue; - // kg3D - no fake segs - if (ds->fake) continue; - if (ds->maskedtexturecol != -1 || ds->bFogBoundary) - { - R_RenderMaskedSegRange (ds, ds->x1, ds->x2); - } - } -} - -void R_DrawMasked () -{ - R_CollectPortals(); - R_SortVisSprites (DrewAVoxel ? sv_compare2d : sv_compare, firstvissprite - vissprites); - - Clip3DFloors *clip3d = Clip3DFloors::Instance(); - if (clip3d->height_top == NULL) - { // kg3D - no visible 3D floors, normal rendering - R_DrawMaskedSingle(false); - } - else - { // kg3D - correct sorting - HeightLevel *hl; - - // ceilings - for (hl = clip3d->height_cur; hl != NULL && hl->height >= ViewPos.Z; hl = hl->prev) - { - if (hl->next) - { - clip3d->fake3D = FAKE3D_CLIPBOTTOM | FAKE3D_CLIPTOP; - clip3d->sclipTop = hl->next->height; - } - else - { - clip3d->fake3D = FAKE3D_CLIPBOTTOM; - } - clip3d->sclipBottom = hl->height; - R_DrawMaskedSingle(true); - R_DrawHeightPlanes(hl->height); - } - - // floors - clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP; - clip3d->sclipTop = clip3d->height_top->height; - R_DrawMaskedSingle(true); - hl = clip3d->height_top; - for (hl = clip3d->height_top; hl != NULL && hl->height < ViewPos.Z; hl = hl->next) - { - R_DrawHeightPlanes(hl->height); - if (hl->next) - { - clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP | FAKE3D_CLIPBOTTOM; - clip3d->sclipTop = hl->next->height; - } - else - { - clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPBOTTOM; - } - clip3d->sclipBottom = hl->height; - R_DrawMaskedSingle(true); - } - clip3d->DeleteHeights(); - clip3d->fake3D = 0; - } - R_DrawPlayerSprites(); -} - -} diff --git a/src/swrenderer/scene/r_things.h b/src/swrenderer/scene/r_things.h deleted file mode 100644 index 1b3186acb3..0000000000 --- a/src/swrenderer/scene/r_things.h +++ /dev/null @@ -1,53 +0,0 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// DESCRIPTION: -// Rendering of moving objects, sprites. -// -//----------------------------------------------------------------------------- - - -#ifndef __R_THINGS__ -#define __R_THINGS__ - -#include "swrenderer/things/r_visiblesprite.h" -#include "swrenderer/scene/r_bsp.h" - -struct particle_t; -struct FVoxel; - - -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); - -namespace swrenderer -{ - -bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr); - - -void R_CacheSprite (spritedef_t *sprite); -void R_SortVisSprites (int (*compare)(const void *, const void *), size_t first); -void R_DrawSprites (); -void R_ClearSprites (); -void R_DrawMasked (); - -void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); - -extern bool DrewAVoxel; - -} - -#endif diff --git a/src/swrenderer/segments/r_clipsegment.cpp b/src/swrenderer/segments/r_clipsegment.cpp index 04bef89077..c1cc94de66 100644 --- a/src/swrenderer/segments/r_clipsegment.cpp +++ b/src/swrenderer/segments/r_clipsegment.cpp @@ -20,7 +20,6 @@ #include "p_setup.h" #include "swrenderer/r_main.h" #include "swrenderer/drawers/r_draw.h" -#include "swrenderer/scene/r_things.h" #include "swrenderer/scene/r_3dfloors.h" #include "a_sharedglobal.h" #include "g_level.h" diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index ca875c6317..18530e157d 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -31,7 +31,6 @@ #include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" #include "swrenderer/drawers/r_draw.h" -#include "swrenderer/scene/r_things.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_bsp.h" #include "swrenderer/scene/r_portal.h" @@ -39,6 +38,9 @@ #include "swrenderer/line/r_walldraw.h" #include "swrenderer/line/r_fogboundary.h" #include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/things/r_visiblesprite.h" + +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { diff --git a/src/swrenderer/segments/r_portalsegment.cpp b/src/swrenderer/segments/r_portalsegment.cpp index 4e7a2b5336..476e6b0a35 100644 --- a/src/swrenderer/segments/r_portalsegment.cpp +++ b/src/swrenderer/segments/r_portalsegment.cpp @@ -20,7 +20,6 @@ #include "p_setup.h" #include "swrenderer/r_main.h" #include "swrenderer/drawers/r_draw.h" -#include "swrenderer/scene/r_things.h" #include "swrenderer/scene/r_3dfloors.h" #include "a_sharedglobal.h" #include "g_level.h" diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 1483e42120..9e2d0b8939 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -20,7 +20,6 @@ #include "doomdata.h" #include "p_lnspec.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" #include "r_sky.h" #include "v_video.h" #include "m_swap.h" @@ -42,6 +41,8 @@ #include "swrenderer/things/r_wallsprite.h" #include "swrenderer/r_memory.h" +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); + namespace swrenderer { void R_RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC) diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 4daf1d799f..b256ffc38f 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -21,7 +21,6 @@ #include "i_system.h" #include "w_wad.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" #include "swrenderer/things/r_particle.h" #include "c_console.h" #include "c_cvars.h" @@ -54,6 +53,8 @@ #include "swrenderer/scene/r_portal.h" #include "swrenderer/r_memory.h" +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); + namespace swrenderer { void R_ProjectParticle(particle_t *particle, const sector_t *sector, int shade, WaterFakeSide fakeside) diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index d37320c39b..bf648e851c 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -21,7 +21,6 @@ #include "i_system.h" #include "w_wad.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" #include "swrenderer/things/r_playersprite.h" #include "c_console.h" #include "c_cvars.h" @@ -60,6 +59,7 @@ EXTERN_CVAR(Bool, st_scale) EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_deathcamera) EXTERN_CVAR(Bool, r_shadercolormaps) +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 8c1553f6de..e65aac6bfe 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -21,7 +21,6 @@ #include "i_system.h" #include "w_wad.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" #include "swrenderer/things/r_wallsprite.h" #include "c_console.h" #include "c_cvars.h" @@ -56,6 +55,7 @@ #include "swrenderer/r_memory.h" EXTERN_CVAR(Bool, r_drawvoxels) +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 567eae89d5..492fa3eab0 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -20,8 +20,26 @@ #include "m_swap.h" #include "i_system.h" #include "w_wad.h" +#include "g_levellocals.h" +#include "p_maputl.h" #include "swrenderer/r_main.h" #include "swrenderer/things/r_visiblesprite.h" +#include "swrenderer/things/r_voxel.h" +#include "swrenderer/things/r_particle.h" +#include "swrenderer/things/r_sprite.h" +#include "swrenderer/things/r_wallsprite.h" +#include "swrenderer/things/r_playersprite.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_portal.h" +#include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/r_memory.h" + +EXTERN_CVAR(Int, r_drawfuzz) +EXTERN_CVAR(Bool, r_drawvoxels) +EXTERN_CVAR(Bool, r_blendmethod) + +CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +CVAR(Bool, r_splitsprites, true, CVAR_ARCHIVE) namespace swrenderer { @@ -31,6 +49,17 @@ namespace swrenderer vissprite_t **vissprite_p; vissprite_t **lastvissprite; + bool DrewAVoxel; + + namespace + { + vissprite_t **spritesorter; + int spritesortersize = 0; + int vsprcount; + + TArray portaldrawsegs; + } + void R_DeinitVisSprites() { // Free vissprites @@ -73,4 +102,631 @@ namespace swrenderer vissprite_p++; return *(vissprite_p - 1); } + + void R_DeinitSprites() + { + R_DeinitVisSprites(); + R_DeinitRenderVoxel(); + + // Free vissprites sorter + if (spritesorter != nullptr) + { + delete[] spritesorter; + spritesortersize = 0; + spritesorter = nullptr; + } + } + + void R_ClearSprites() + { + R_ClearVisSprites(); + DrewAVoxel = false; + } + + void R_CollectPortals() + { + // This function collects all drawsegs that may be of interest to R_ClipSpriteColumnWithPortals + // Having that function over the entire list of drawsegs can break down performance quite drastically. + // This is doing the costly stuff only once so that R_ClipSpriteColumnWithPortals can + // a) exit early if no relevant info is found and + // b) skip most of the collected drawsegs which have no portal attached. + portaldrawsegs.Clear(); + for (drawseg_t* seg = ds_p; seg-- > firstdrawseg; ) // copied code from killough below + { + // I don't know what makes this happen (some old top-down portal code or possibly skybox code? something adds null lines...) + // crashes at the first frame of the first map of Action2.wad + if (!seg->curline) continue; + + line_t* line = seg->curline->linedef; + // ignore minisegs from GL nodes. + if (!line) continue; + + // check if this line will clip sprites to itself + if (!line->isVisualPortal() && line->special != Line_Mirror) + continue; + + // don't clip sprites with portal's back side (it's transparent) + if (seg->curline->sidedef != line->sidedef[0]) + continue; + + portaldrawsegs.Push(seg); + } + } + + bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr) + { + RenderPortal *renderportal = RenderPortal::Instance(); + + // [ZZ] 10.01.2016: don't clip sprites from the root of a skybox. + if (renderportal->CurrentPortalInSkybox) + return false; + + for (drawseg_t *seg : portaldrawsegs) + { + // ignore segs from other portals + if (seg->CurrentPortalUniq != renderportal->CurrentPortalUniq) + continue; + + // (all checks that are already done in R_CollectPortals have been removed for performance reasons.) + + // don't clip if the sprite is in front of the portal + if (!P_PointOnLineSidePrecise(spr->gpos.X, spr->gpos.Y, seg->curline->linedef)) + continue; + + // now if current column is covered by this drawseg, we clip it away + if ((x >= seg->x1) && (x < seg->x2)) + return true; + } + + return false; + } + + // This is the standard version, which does a simple test based on depth. + bool sv_compare(vissprite_t *a, vissprite_t *b) + { + return a->idepth > b->idepth; + } + + // This is an alternate version, for when one or more voxel is in view. + // It does a 2D distance test based on whichever one is furthest from + // the viewpoint. + bool sv_compare2d(vissprite_t *a, vissprite_t *b) + { + return DVector2(a->deltax, a->deltay).LengthSquared() < DVector2(b->deltax, b->deltay).LengthSquared(); + } + + void R_SortVisSprites(bool(*compare)(vissprite_t *, vissprite_t *), size_t first) + { + int i; + vissprite_t **spr; + + vsprcount = int(vissprite_p - &vissprites[first]); + + if (vsprcount == 0) + return; + + if (spritesortersize < MaxVisSprites) + { + if (spritesorter != nullptr) + delete[] spritesorter; + spritesorter = new vissprite_t *[MaxVisSprites]; + spritesortersize = MaxVisSprites; + } + + if (!(i_compatflags & COMPATF_SPRITESORT)) + { + for (i = 0, spr = firstvissprite; i < vsprcount; i++, spr++) + { + spritesorter[i] = *spr; + } + } + else + { + // If the compatibility option is on sprites of equal distance need to + // be sorted in inverse order. This is most easily achieved by + // filling the sort array backwards before the sort. + for (i = 0, spr = firstvissprite + vsprcount - 1; i < vsprcount; i++, spr--) + { + spritesorter[i] = *spr; + } + } + + std::stable_sort(&spritesorter[0], &spritesorter[vsprcount], compare); + } + + void R_DrawSprite(vissprite_t *spr) + { + static short clipbot[MAXWIDTH]; + static short cliptop[MAXWIDTH]; + drawseg_t *ds; + int i; + int x1, x2; + int r1, r2; + short topclip, botclip; + short *clip1, *clip2; + FSWColormap *colormap = spr->Style.BaseColormap; + int colormapnum = spr->Style.ColormapNum; + F3DFloor *rover; + FDynamicColormap *mybasecolormap; + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + + // [RH] Check for particles + if (!spr->bIsVoxel && spr->pic == nullptr) + { + // kg3D - reject invisible parts + if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gpos.Z <= clip3d->sclipBottom) return; + if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gpos.Z >= clip3d->sclipTop) return; + R_DrawParticle(spr); + return; + } + + x1 = spr->x1; + x2 = spr->x2; + + // [RH] Quickly reject sprites with bad x ranges. + if (x1 >= x2) + return; + + // [RH] Sprites split behind a one-sided line can also be discarded. + if (spr->sector == nullptr) + return; + + // kg3D - reject invisible parts + if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gzt <= clip3d->sclipBottom) return; + if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gzb >= clip3d->sclipTop) return; + + // kg3D - correct colors now + if (!fixedcolormap && fixedlightlev < 0 && spr->sector->e && spr->sector->e->XFloor.lightlist.Size()) + { + if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) + { + clip3d->sclipTop = spr->sector->ceilingplane.ZatPoint(ViewPos); + } + sector_t *sec = nullptr; + for (i = spr->sector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) + { + if (clip3d->sclipTop <= spr->sector->e->XFloor.lightlist[i].plane.Zat0()) + { + rover = spr->sector->e->XFloor.lightlist[i].caster; + if (rover) + { + if (rover->flags & FF_DOUBLESHADOW && clip3d->sclipTop <= rover->bottom.plane->Zat0()) + { + break; + } + sec = rover->model; + if (rover->flags & FF_FADEWALLS) + { + mybasecolormap = sec->ColorMap; + } + else + { + mybasecolormap = spr->sector->e->XFloor.lightlist[i].extra_colormap; + } + } + break; + } + } + // found new values, recalculate + if (sec) + { + INTBOOL invertcolormap = (spr->Style.RenderStyle.Flags & STYLEF_InvertOverlay); + + if (spr->Style.RenderStyle.Flags & STYLEF_InvertSource) + { + invertcolormap = !invertcolormap; + } + + // Sprites that are added to the scene must fade to black. + if (spr->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); + } + + if (spr->Style.RenderStyle.Flags & STYLEF_FadeToBlack) + { + if (invertcolormap) + { // Fade to white + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); + invertcolormap = false; + } + else + { // Fade to black + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); + } + } + + // get light level + if (invertcolormap) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); + } + if (fixedlightlev >= 0) + { + spr->Style.BaseColormap = mybasecolormap; + spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + } + else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) + { // full bright + spr->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + spr->Style.ColormapNum = 0; + } + else + { // diminished light + int spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); + spr->Style.BaseColormap = mybasecolormap; + spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); + } + } + } + + // [RH] Initialize the clipping arrays to their largest possible range + // instead of using a special "not clipped" value. This eliminates + // visual anomalies when looking down and should be faster, too. + topclip = 0; + botclip = viewheight; + + // killough 3/27/98: + // Clip the sprite against deep water and/or fake ceilings. + // [RH] rewrote this to be based on which part of the sector is really visible + + double scale = InvZtoScale * spr->idepth; + double hzb = DBL_MIN, hzt = DBL_MAX; + + if (spr->bIsVoxel && spr->floorclip != 0) + { + hzb = spr->gzb; + } + + if (spr->heightsec && !(spr->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC)) + { // only things in specially marked sectors + if (spr->FakeFlatStat != WaterFakeSide::AboveCeiling) + { + double hz = spr->heightsec->floorplane.ZatPoint(spr->gpos); + int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + + if (spr->FakeFlatStat == WaterFakeSide::BelowFloor) + { // seen below floor: clip top + if (!spr->bIsVoxel && h > topclip) + { + topclip = short(MIN(h, viewheight)); + } + hzt = MIN(hzt, hz); + } + else + { // seen in the middle: clip bottom + if (!spr->bIsVoxel && h < botclip) + { + botclip = MAX(0, h); + } + hzb = MAX(hzb, hz); + } + } + if (spr->FakeFlatStat != WaterFakeSide::BelowFloor && !(spr->heightsec->MoreFlags & SECF_FAKEFLOORONLY)) + { + double hz = spr->heightsec->ceilingplane.ZatPoint(spr->gpos); + int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + + if (spr->FakeFlatStat == WaterFakeSide::AboveCeiling) + { // seen above ceiling: clip bottom + if (!spr->bIsVoxel && h < botclip) + { + botclip = MAX(0, h); + } + hzb = MAX(hzb, hz); + } + else + { // seen in the middle: clip top + if (!spr->bIsVoxel && h > topclip) + { + topclip = MIN(h, viewheight); + } + hzt = MIN(hzt, hz); + } + } + } + // killough 3/27/98: end special clipping for deep water / fake ceilings + else if (!spr->bIsVoxel && spr->floorclip) + { // [RH] Move floorclip stuff from R_DrawVisSprite to here + //int clip = ((FLOAT2FIXED(CenterY) - FixedMul (spr->texturemid - (spr->pic->GetHeight() << FRACBITS) + spr->floorclip, spr->yscale)) >> FRACBITS); + int clip = xs_RoundToInt(CenterY - (spr->texturemid - spr->pic->GetHeight() + spr->floorclip) * spr->yscale); + if (clip < botclip) + { + botclip = MAX(0, clip); + } + } + + if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) + { + if (!spr->bIsVoxel) + { + double hz = clip3d->sclipBottom; + if (spr->fakefloor) + { + double floorz = spr->fakefloor->top.plane->Zat0(); + if (ViewPos.Z > floorz && floorz == clip3d->sclipBottom) + { + hz = spr->fakefloor->bottom.plane->Zat0(); + } + } + int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + if (h < botclip) + { + botclip = MAX(0, h); + } + } + hzb = MAX(hzb, clip3d->sclipBottom); + } + if (clip3d->fake3D & FAKE3D_CLIPTOP) + { + if (!spr->bIsVoxel) + { + double hz = clip3d->sclipTop; + if (spr->fakeceiling != nullptr) + { + double ceilingZ = spr->fakeceiling->bottom.plane->Zat0(); + if (ViewPos.Z < ceilingZ && ceilingZ == clip3d->sclipTop) + { + hz = spr->fakeceiling->top.plane->Zat0(); + } + } + int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + if (h > topclip) + { + topclip = short(MIN(h, viewheight)); + } + } + hzt = MIN(hzt, clip3d->sclipTop); + } + + if (topclip >= botclip) + { + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; + return; + } + + i = x2 - x1; + clip1 = clipbot + x1; + clip2 = cliptop + x1; + do + { + *clip1++ = botclip; + *clip2++ = topclip; + } while (--i); + + // Scan drawsegs from end to start for obscuring segs. + // The first drawseg that is closer than the sprite is the clip seg. + + // Modified by Lee Killough: + // (pointer check was originally nonportable + // and buggy, by going past LEFT end of array): + + // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code + + for (ds = ds_p; ds-- > firstdrawseg; ) // new -- killough + { + // [ZZ] portal handling here + //if (ds->CurrentPortalUniq != spr->CurrentPortalUniq) + // continue; + // [ZZ] WARNING: uncommenting the two above lines, totally breaks sprite clipping + + // kg3D - no clipping on fake segs + if (ds->fake) continue; + // determine if the drawseg obscures the sprite + if (ds->x1 >= x2 || ds->x2 <= x1 || + (!(ds->silhouette & SIL_BOTH) && ds->maskedtexturecol == -1 && + !ds->bFogBoundary)) + { + // does not cover sprite + continue; + } + + r1 = MAX(ds->x1, x1); + r2 = MIN(ds->x2, x2); + + float neardepth, fardepth; + if (!spr->bWallSprite) + { + if (ds->sz1 < ds->sz2) + { + neardepth = ds->sz1, fardepth = ds->sz2; + } + else + { + neardepth = ds->sz2, fardepth = ds->sz1; + } + } + + + // Check if sprite is in front of draw seg: + if ((!spr->bWallSprite && neardepth > spr->depth) || ((spr->bWallSprite || fardepth > spr->depth) && + (spr->gpos.Y - ds->curline->v1->fY()) * (ds->curline->v2->fX() - ds->curline->v1->fX()) - + (spr->gpos.X - ds->curline->v1->fX()) * (ds->curline->v2->fY() - ds->curline->v1->fY()) <= 0)) + { + RenderPortal *renderportal = RenderPortal::Instance(); + + // seg is behind sprite, so draw the mid texture if it has one + if (ds->CurrentPortalUniq == renderportal->CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here + (ds->maskedtexturecol != -1 || ds->bFogBoundary)) + R_RenderMaskedSegRange(ds, r1, r2); + + continue; + } + + // clip this piece of the sprite + // killough 3/27/98: optimized and made much shorter + // [RH] Optimized further (at least for VC++; + // other compilers should be at least as good as before) + + if (ds->silhouette & SIL_BOTTOM) //bottom sil + { + clip1 = clipbot + r1; + clip2 = openings + ds->sprbottomclip + r1 - ds->x1; + i = r2 - r1; + do + { + if (*clip1 > *clip2) + *clip1 = *clip2; + clip1++; + clip2++; + } while (--i); + } + + if (ds->silhouette & SIL_TOP) // top sil + { + clip1 = cliptop + r1; + clip2 = openings + ds->sprtopclip + r1 - ds->x1; + i = r2 - r1; + do + { + if (*clip1 < *clip2) + *clip1 = *clip2; + clip1++; + clip2++; + } while (--i); + } + } + + // all clipping has been performed, so draw the sprite + + if (!spr->bIsVoxel) + { + if (!spr->bWallSprite) + { + R_DrawVisSprite(spr, clipbot, cliptop); + } + else + { + R_DrawWallSprite(spr, clipbot, cliptop); + } + } + else + { + // If it is completely clipped away, don't bother drawing it. + if (cliptop[x2] >= clipbot[x2]) + { + for (i = x1; i < x2; ++i) + { + if (cliptop[i] < clipbot[i]) + { + break; + } + } + if (i == x2) + { + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; + return; + } + } + // Add everything outside the left and right edges to the clipping array + // for R_DrawVisVoxel(). + if (x1 > 0) + { + fillshort(cliptop, x1, viewheight); + } + if (x2 < viewwidth - 1) + { + fillshort(cliptop + x2, viewwidth - x2, viewheight); + } + int minvoxely = spr->gzt <= hzt ? 0 : xs_RoundToInt((spr->gzt - hzt) / spr->yscale); + int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); + R_DrawVisVoxel(spr, minvoxely, maxvoxely, cliptop, clipbot); + } + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; + } + + void R_DrawMaskedSingle(bool renew) + { + RenderPortal *renderportal = RenderPortal::Instance(); + + for (int i = vsprcount; i > 0; i--) + { + if (spritesorter[i - 1]->CurrentPortalUniq != renderportal->CurrentPortalUniq) + continue; // probably another time + R_DrawSprite(spritesorter[i - 1]); + } + + // render any remaining masked mid textures + + // Modified by Lee Killough: + // (pointer check was originally nonportable + // and buggy, by going past LEFT end of array): + + // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code + + if (renew) + { + Clip3DFloors::Instance()->fake3D |= FAKE3D_REFRESHCLIP; + } + for (drawseg_t *ds = ds_p; ds-- > firstdrawseg; ) // new -- killough + { + // [ZZ] the same as above + if (ds->CurrentPortalUniq != renderportal->CurrentPortalUniq) + continue; + // kg3D - no fake segs + if (ds->fake) continue; + if (ds->maskedtexturecol != -1 || ds->bFogBoundary) + { + R_RenderMaskedSegRange(ds, ds->x1, ds->x2); + } + } + } + + void R_DrawMasked() + { + R_CollectPortals(); + R_SortVisSprites(DrewAVoxel ? sv_compare2d : sv_compare, firstvissprite - vissprites); + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + if (clip3d->height_top == nullptr) + { // kg3D - no visible 3D floors, normal rendering + R_DrawMaskedSingle(false); + } + else + { // kg3D - correct sorting + // ceilings + for (HeightLevel *hl = clip3d->height_cur; hl != nullptr && hl->height >= ViewPos.Z; hl = hl->prev) + { + if (hl->next) + { + clip3d->fake3D = FAKE3D_CLIPBOTTOM | FAKE3D_CLIPTOP; + clip3d->sclipTop = hl->next->height; + } + else + { + clip3d->fake3D = FAKE3D_CLIPBOTTOM; + } + clip3d->sclipBottom = hl->height; + R_DrawMaskedSingle(true); + R_DrawHeightPlanes(hl->height); + } + + // floors + clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP; + clip3d->sclipTop = clip3d->height_top->height; + R_DrawMaskedSingle(true); + for (HeightLevel *hl = clip3d->height_top; hl != nullptr && hl->height < ViewPos.Z; hl = hl->next) + { + R_DrawHeightPlanes(hl->height); + if (hl->next) + { + clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP | FAKE3D_CLIPBOTTOM; + clip3d->sclipTop = hl->next->height; + } + else + { + clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPBOTTOM; + } + clip3d->sclipBottom = hl->height; + R_DrawMaskedSingle(true); + } + clip3d->DeleteHeights(); + clip3d->fake3D = 0; + } + R_DrawPlayerSprites(); + } } diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index db40aa1cdd..6a540d1ab3 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -92,7 +92,21 @@ namespace swrenderer extern vissprite_t **vissprites, **firstvissprite; extern vissprite_t **vissprite_p; + extern bool DrewAVoxel; + void R_DeinitVisSprites(); void R_ClearVisSprites(); vissprite_t *R_NewVisSprite(); + + void R_DeinitSprites(); + void R_ClearSprites(); + void R_CollectPortals(); + bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr); + void R_SortVisSprites(bool(*compare)(vissprite_t *, vissprite_t *), size_t first); + void R_DrawSprite(vissprite_t *spr); + void R_DrawMaskedSingle(bool renew); + void R_DrawMasked(); + + bool sv_compare(vissprite_t *a, vissprite_t *b); + bool sv_compare2d(vissprite_t *a, vissprite_t *b); } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index c02e9ac8f4..67f1e47e88 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -31,12 +31,12 @@ #include "r_data/sprites.h" #include "d_net.h" #include "po_man.h" -#include "swrenderer/scene/r_things.h" +#include "r_utility.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/drawers/r_thread.h" -#include "r_utility.h" +#include "swrenderer/things/r_visiblesprite.h" +#include "swrenderer/things/r_voxel.h" #include "swrenderer/r_main.h" -#include "r_voxel.h" namespace swrenderer { diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index 7d566394d5..e8f97cdd96 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -24,6 +24,7 @@ #pragma once struct kvxslab_t; +struct FVoxelMipLevel; namespace swrenderer { diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index d3ba5fee9e..9ddd7bfd1f 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -21,7 +21,6 @@ #include "i_system.h" #include "w_wad.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_things.h" #include "swrenderer/things/r_wallsprite.h" #include "c_console.h" #include "c_cvars.h" @@ -56,6 +55,8 @@ #include "swrenderer/line/r_walldraw.h" #include "swrenderer/r_memory.h" +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); + namespace swrenderer { void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade) From 4b96d7377fd57098f1e19205f4eee35bd370b241 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 17:59:29 +0100 Subject: [PATCH 680/912] Detach voxel and wallsprite projection from (normal) sprite projection and move the type decision making to RenderBSP::AddSprites --- src/swrenderer/scene/r_bsp.cpp | 167 +++++++++++++- src/swrenderer/scene/r_bsp.h | 16 ++ src/swrenderer/things/r_sprite.cpp | 339 ++++++----------------------- src/swrenderer/things/r_sprite.h | 2 +- src/swrenderer/things/r_voxel.cpp | 179 +++++++++++++++ src/swrenderer/things/r_voxel.h | 1 + 6 files changed, 424 insertions(+), 280 deletions(-) diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 5aa15e3cc6..67a8a3219c 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -36,6 +36,8 @@ #include "swrenderer/drawers/r_draw.h" #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/things/r_sprite.h" +#include "swrenderer/things/r_wallsprite.h" +#include "swrenderer/things/r_voxel.h" #include "swrenderer/things/r_particle.h" #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/line/r_wallsetup.h" @@ -45,6 +47,7 @@ #include "g_level.h" #include "p_effect.h" #include "c_console.h" +#include "p_maputl.h" // State. #include "doomstat.h" @@ -57,6 +60,7 @@ #include "g_levellocals.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); +EXTERN_CVAR(Bool, r_drawvoxels); namespace swrenderer { @@ -855,9 +859,170 @@ namespace swrenderer if (rover->bottom.plane->ZatPoint(0., 0.) >= thing->Top()) fakeceiling = rover; } } - R_ProjectSprite(thing, fakeside, fakefloor, fakeceiling, sec, spriteshade); + + if (IsPotentiallyVisible(thing)) + { + ThingSprite sprite; + if (GetThingSprite(thing, sprite)) + { + if ((sprite.renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) + { + R_ProjectWallSprite(thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, spriteshade); + } + else if (sprite.voxel) + { + R_ProjectVoxel(thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade); + } + else + { + R_ProjectSprite(thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade); + } + } + } + fakeceiling = nullptr; fakefloor = nullptr; } } + + bool RenderBSP::IsPotentiallyVisible(AActor *thing) + { + // Don't waste time projecting sprites that are definitely not visible. + if (thing == nullptr || + (thing->renderflags & RF_INVISIBLE) || + !thing->RenderStyle.IsVisible(thing->Alpha) || + !thing->IsVisibleToPlayer() || + !thing->IsInsideVisibleAngles()) + { + return false; + } + + // [ZZ] Or less definitely not visible (hue) + // [ZZ] 10.01.2016: don't try to clip stuff inside a skybox against the current portal. + RenderPortal *renderportal = RenderPortal::Instance(); + if (!renderportal->CurrentPortalInSkybox && renderportal->CurrentPortal && !!P_PointOnLineSidePrecise(thing->Pos(), renderportal->CurrentPortal->dst)) + return false; + + return true; + } + + bool RenderBSP::GetThingSprite(AActor *thing, ThingSprite &sprite) + { + sprite.pos = thing->InterpolatedPosition(r_TicFracF); + sprite.pos.Z += thing->GetBobOffset(r_TicFracF); + + sprite.spritenum = thing->sprite; + sprite.tex = nullptr; + sprite.voxel = nullptr; + sprite.spriteScale = thing->Scale; + sprite.renderflags = thing->renderflags; + + if (thing->picnum.isValid()) + { + sprite.picnum = thing->picnum; + + sprite.tex = TexMan(sprite.picnum); + if (sprite.tex->UseType == FTexture::TEX_Null) + { + return false; + } + + if (sprite.tex->Rotations != 0xFFFF) + { + // choose a different rotation based on player view + spriteframe_t *sprframe = &SpriteFrames[sprite.tex->Rotations]; + DAngle ang = (sprite.pos - ViewPos).Angle(); + angle_t rot; + if (sprframe->Texture[0] == sprframe->Texture[1]) + { + if (thing->flags7 & MF7_SPRITEANGLE) + rot = (thing->SpriteAngle + 45.0 / 2 * 9).BAMs() >> 28; + else + rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + 45.0 / 2 * 9).BAMs() >> 28; + } + else + { + if (thing->flags7 & MF7_SPRITEANGLE) + rot = (thing->SpriteAngle + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + else + rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + } + sprite.picnum = sprframe->Texture[rot]; + if (sprframe->Flip & (1 << rot)) + { + sprite.renderflags ^= RF_XFLIP; + } + sprite.tex = TexMan[sprite.picnum]; // Do not animate the rotation + } + } + else + { + // decide which texture to use for the sprite + if ((unsigned)sprite.spritenum >= sprites.Size()) + { + DPrintf(DMSG_ERROR, "R_ProjectSprite: invalid sprite number %u\n", sprite.spritenum); + return false; + } + spritedef_t *sprdef = &sprites[sprite.spritenum]; + if (thing->frame >= sprdef->numframes) + { + // If there are no frames at all for this sprite, don't draw it. + return false; + } + else + { + //picnum = SpriteFrames[sprdef->spriteframes + thing->frame].Texture[0]; + // choose a different rotation based on player view + spriteframe_t *sprframe = &SpriteFrames[sprdef->spriteframes + thing->frame]; + DAngle ang = (sprite.pos - ViewPos).Angle(); + angle_t rot; + if (sprframe->Texture[0] == sprframe->Texture[1]) + { + if (thing->flags7 & MF7_SPRITEANGLE) + rot = (thing->SpriteAngle + 45.0 / 2 * 9).BAMs() >> 28; + else + rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + 45.0 / 2 * 9).BAMs() >> 28; + } + else + { + if (thing->flags7 & MF7_SPRITEANGLE) + rot = (thing->SpriteAngle + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + else + rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; + } + sprite.picnum = sprframe->Texture[rot]; + if (sprframe->Flip & (1 << rot)) + { + sprite.renderflags ^= RF_XFLIP; + } + sprite.tex = TexMan[sprite.picnum]; // Do not animate the rotation + if (r_drawvoxels) + { + sprite.voxel = sprframe->Voxel; + } + } + + if (sprite.voxel == nullptr && (sprite.tex == nullptr || sprite.tex->UseType == FTexture::TEX_Null)) + { + return false; + } + + if (sprite.spriteScale.Y < 0) + { + sprite.spriteScale.Y = -sprite.spriteScale.Y; + sprite.renderflags ^= RF_YFLIP; + } + if (thing->player != nullptr) + { + P_CheckPlayerSprite(thing, sprite.spritenum, sprite.spriteScale); + } + if (sprite.spriteScale.X < 0) + { + sprite.spriteScale.X = -sprite.spriteScale.X; + sprite.renderflags ^= RF_XFLIP; + } + } + + return true; + } } diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_bsp.h index e673baff6b..ebc59a865b 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_bsp.h @@ -19,6 +19,8 @@ #include "swrenderer/line/r_line.h" #include "swrenderer/scene/r_3dfloors.h" +struct FVoxelDef; + namespace swrenderer { struct visplane_t; @@ -35,6 +37,17 @@ namespace swrenderer AboveCeiling }; + struct ThingSprite + { + DVector3 pos; + int spritenum; + FTexture *tex; + FVoxelDef *voxel; + FTextureID picnum; + DVector2 spriteScale; + int renderflags; + }; + class RenderBSP { public: @@ -59,6 +72,9 @@ namespace swrenderer void AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside); + static bool IsPotentiallyVisible(AActor *thing); + static bool GetThingSprite(AActor *thing, ThingSprite &sprite); + subsector_t *InSubsector = nullptr; sector_t *frontsector = nullptr; WaterFakeSide FakeSide = WaterFakeSide::Center; diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index e65aac6bfe..7bc93e4ac0 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -47,194 +47,32 @@ #include "r_data/colormaps.h" #include "r_data/voxels.h" #include "p_local.h" -#include "p_maputl.h" #include "r_voxel.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/things/r_sprite.h" #include "swrenderer/r_memory.h" -EXTERN_CVAR(Bool, r_drawvoxels) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - void R_ProjectSprite(AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade) + void R_ProjectSprite(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade) { - double tr_x; - double tr_y; - - double gzt; // killough 3/27/98 - double gzb; // [RH] use bottom of sprite, not actor - double tx;// , tx2; - double tz; - - double xscale = 1, yscale = 1; - - int x1; - int x2; - - FTextureID picnum; - FTexture *tex; - FVoxelDef *voxel; - - vissprite_t* vis; - - fixed_t iscale; - - sector_t* heightsec; // killough 3/27/98 - - // Don't waste time projecting sprites that are definitely not visible. - if (thing == nullptr || - (thing->renderflags & RF_INVISIBLE) || - !thing->RenderStyle.IsVisible(thing->Alpha) || - !thing->IsVisibleToPlayer() || - !thing->IsInsideVisibleAngles()) - { - return; - } - - RenderPortal *renderportal = RenderPortal::Instance(); - - // [ZZ] Or less definitely not visible (hue) - // [ZZ] 10.01.2016: don't try to clip stuff inside a skybox against the current portal. - if (!renderportal->CurrentPortalInSkybox && renderportal->CurrentPortal && !!P_PointOnLineSidePrecise(thing->Pos(), renderportal->CurrentPortal->dst)) - return; - - // [RH] Interpolate the sprite's position to make it look smooth - DVector3 pos = thing->InterpolatedPosition(r_TicFracF); - pos.Z += thing->GetBobOffset(r_TicFracF); - - tex = nullptr; - voxel = nullptr; - - int spritenum = thing->sprite; - DVector2 spriteScale = thing->Scale; - int renderflags = thing->renderflags; - if (spriteScale.Y < 0) - { - spriteScale.Y = -spriteScale.Y; - renderflags ^= RF_YFLIP; - } - if (thing->player != nullptr) - { - P_CheckPlayerSprite(thing, spritenum, spriteScale); - } - - if (thing->picnum.isValid()) - { - picnum = thing->picnum; - - tex = TexMan(picnum); - if (tex->UseType == FTexture::TEX_Null) - { - return; - } - - if (tex->Rotations != 0xFFFF) - { - // choose a different rotation based on player view - spriteframe_t *sprframe = &SpriteFrames[tex->Rotations]; - DAngle ang = (pos - ViewPos).Angle(); - angle_t rot; - if (sprframe->Texture[0] == sprframe->Texture[1]) - { - if (thing->flags7 & MF7_SPRITEANGLE) - rot = (thing->SpriteAngle + 45.0 / 2 * 9).BAMs() >> 28; - else - rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + 45.0 / 2 * 9).BAMs() >> 28; - } - else - { - if (thing->flags7 & MF7_SPRITEANGLE) - rot = (thing->SpriteAngle + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; - else - rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; - } - picnum = sprframe->Texture[rot]; - if (sprframe->Flip & (1 << rot)) - { - renderflags ^= RF_XFLIP; - } - tex = TexMan[picnum]; // Do not animate the rotation - } - } - else - { - // decide which texture to use for the sprite - if ((unsigned)spritenum >= sprites.Size()) - { - DPrintf(DMSG_ERROR, "R_ProjectSprite: invalid sprite number %u\n", spritenum); - return; - } - spritedef_t *sprdef = &sprites[spritenum]; - if (thing->frame >= sprdef->numframes) - { - // If there are no frames at all for this sprite, don't draw it. - return; - } - else - { - //picnum = SpriteFrames[sprdef->spriteframes + thing->frame].Texture[0]; - // choose a different rotation based on player view - spriteframe_t *sprframe = &SpriteFrames[sprdef->spriteframes + thing->frame]; - DAngle ang = (pos - ViewPos).Angle(); - angle_t rot; - if (sprframe->Texture[0] == sprframe->Texture[1]) - { - if (thing->flags7 & MF7_SPRITEANGLE) - rot = (thing->SpriteAngle + 45.0 / 2 * 9).BAMs() >> 28; - else - rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + 45.0 / 2 * 9).BAMs() >> 28; - } - else - { - if (thing->flags7 & MF7_SPRITEANGLE) - rot = (thing->SpriteAngle + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; - else - rot = (ang - (thing->Angles.Yaw + thing->SpriteRotation) + (45.0 / 2 * 9 - 180.0 / 16)).BAMs() >> 28; - } - picnum = sprframe->Texture[rot]; - if (sprframe->Flip & (1 << rot)) - { - renderflags ^= RF_XFLIP; - } - tex = TexMan[picnum]; // Do not animate the rotation - if (r_drawvoxels) - { - voxel = sprframe->Voxel; - } - } - } - if (spriteScale.X < 0) - { - spriteScale.X = -spriteScale.X; - renderflags ^= RF_XFLIP; - } - if (voxel == nullptr && (tex == nullptr || tex->UseType == FTexture::TEX_Null)) - { - return; - } - - if ((renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) - { - R_ProjectWallSprite(thing, pos, picnum, spriteScale, renderflags, spriteshade); - return; - } - // transform the origin point - tr_x = pos.X - ViewPos.X; - tr_y = pos.Y - ViewPos.Y; + double tr_x = pos.X - ViewPos.X; + double tr_y = pos.Y - ViewPos.Y; - tz = tr_x * ViewTanCos + tr_y * ViewTanSin; + double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; // thing is behind view plane? - if (voxel == nullptr && tz < MINZ) + if (tz < MINZ) return; - tx = tr_x * ViewSin - tr_y * ViewCos; + double tx = tr_x * ViewSin - tr_y * ViewCos; // [RH] Flip for mirrors + RenderPortal *renderportal = RenderPortal::Instance(); if (renderportal->MirrorFlags & RF_XFLIP) { tx = -tx; @@ -242,37 +80,22 @@ namespace swrenderer //tx2 = tx >> 4; // too far off the side? - // if it's a voxel, it can be further off the side - if ((voxel == nullptr && (fabs(tx / 64) > fabs(tz))) || - (voxel != nullptr && (fabs(tx / 128) > fabs(tz)))) + if (fabs(tx / 64) > fabs(tz)) { return; } - if (voxel == nullptr) - { - // [RH] Added scaling - int scaled_to = tex->GetScaledTopOffset(); - int scaled_bo = scaled_to - tex->GetScaledHeight(); - gzt = pos.Z + spriteScale.Y * scaled_to; - gzb = pos.Z + spriteScale.Y * scaled_bo; - } - else - { - xscale = spriteScale.X * voxel->Scale; - yscale = spriteScale.Y * voxel->Scale; - double piv = voxel->Voxel->Mips[0].Pivot.Z; - gzt = pos.Z + yscale * piv - thing->Floorclip; - gzb = pos.Z + yscale * (piv - voxel->Voxel->Mips[0].SizeZ); - if (gzt <= gzb) - return; - } + // [RH] Added scaling + int scaled_to = tex->GetScaledTopOffset(); + int scaled_bo = scaled_to - tex->GetScaledHeight(); + double gzt = pos.Z + spriteScale.Y * scaled_to; + double gzb = pos.Z + spriteScale.Y * scaled_bo; // killough 3/27/98: exclude things totally separated // from the viewer, by either water or fake ceilings // killough 4/11/98: improve sprite clipping for underwater/fake ceilings - heightsec = thing->Sector->GetHeightSec(); + sector_t *heightsec = thing->Sector->GetHeightSec(); if (heightsec != nullptr) // only clip things which are in special sectors { @@ -295,95 +118,66 @@ namespace swrenderer } } - if (voxel == nullptr) + double xscale = CenterX / tz; + + // [RH] Reject sprites that are off the top or bottom of the screen + if (globaluclip * tz > ViewPos.Z - gzb || globaldclip * tz < ViewPos.Z - gzt) { - xscale = CenterX / tz; + return; + } - // [RH] Reject sprites that are off the top or bottom of the screen - if (globaluclip * tz > ViewPos.Z - gzb || globaldclip * tz < ViewPos.Z - gzt) - { - return; - } + // [RH] Flip for mirrors + renderflags ^= renderportal->MirrorFlags & RF_XFLIP; - // [RH] Flip for mirrors - renderflags ^= renderportal->MirrorFlags & RF_XFLIP; + // calculate edges of the shape + const double thingxscalemul = spriteScale.X / tex->Scale.X; - // calculate edges of the shape - const double thingxscalemul = spriteScale.X / tex->Scale.X; + tx -= ((renderflags & RF_XFLIP) ? (tex->GetWidth() - tex->LeftOffset - 1) : tex->LeftOffset) * thingxscalemul; + double dtx1 = tx * xscale; + int x1 = centerx + xs_RoundToInt(dtx1); - tx -= ((renderflags & RF_XFLIP) ? (tex->GetWidth() - tex->LeftOffset - 1) : tex->LeftOffset) * thingxscalemul; - double dtx1 = tx * xscale; - x1 = centerx + xs_RoundToInt(dtx1); + // off the right side? + if (x1 >= renderportal->WindowRight) + return; - // off the right side? - if (x1 >= renderportal->WindowRight) - return; + tx += tex->GetWidth() * thingxscalemul; + int x2 = centerx + xs_RoundToInt(tx * xscale); - tx += tex->GetWidth() * thingxscalemul; - x2 = centerx + xs_RoundToInt(tx * xscale); + // off the left side or too small? + if ((x2 < renderportal->WindowLeft || x2 <= x1)) + return; - // off the left side or too small? - if ((x2 < renderportal->WindowLeft || x2 <= x1)) - return; + xscale = spriteScale.X * xscale / tex->Scale.X; + fixed_t iscale = (fixed_t)(FRACUNIT / xscale); // Round towards zero to avoid wrapping in edge cases - xscale = spriteScale.X * xscale / tex->Scale.X; - iscale = (fixed_t)(FRACUNIT / xscale); // Round towards zero to avoid wrapping in edge cases + double yscale = spriteScale.Y / tex->Scale.Y; - double yscale = spriteScale.Y / tex->Scale.Y; + // store information in a vissprite + vissprite_t *vis = R_NewVisSprite(); - // store information in a vissprite - vis = R_NewVisSprite(); + vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; + vis->xscale = FLOAT2FIXED(xscale); + vis->yscale = float(InvZtoScale * yscale / tz); + vis->idepth = float(1 / tz); + vis->floorclip = thing->Floorclip / yscale; + vis->texturemid = tex->TopOffset - (ViewPos.Z - pos.Z + thing->Floorclip) / yscale; + vis->x1 = x1 < renderportal->WindowLeft ? renderportal->WindowLeft : x1; + vis->x2 = x2 > renderportal->WindowRight ? renderportal->WindowRight : x2; + vis->Angle = thing->Angles.Yaw; - vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; - vis->xscale = FLOAT2FIXED(xscale); - vis->yscale = float(InvZtoScale * yscale / tz); - vis->idepth = float(1 / tz); - vis->floorclip = thing->Floorclip / yscale; - vis->texturemid = tex->TopOffset - (ViewPos.Z - pos.Z + thing->Floorclip) / yscale; - vis->x1 = x1 < renderportal->WindowLeft ? renderportal->WindowLeft : x1; - vis->x2 = x2 > renderportal->WindowRight ? renderportal->WindowRight : x2; - vis->Angle = thing->Angles.Yaw; - - if (renderflags & RF_XFLIP) - { - vis->startfrac = (tex->GetWidth() << FRACBITS) - 1; - vis->xiscale = -iscale; - } - else - { - vis->startfrac = 0; - vis->xiscale = iscale; - } - - vis->startfrac += (fixed_t)(vis->xiscale * (vis->x1 - centerx + 0.5 - dtx1)); + if (renderflags & RF_XFLIP) + { + vis->startfrac = (tex->GetWidth() << FRACBITS) - 1; + vis->xiscale = -iscale; } else { - vis = R_NewVisSprite(); - - vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; - vis->xscale = FLOAT2FIXED(xscale); - vis->yscale = (float)yscale; - vis->x1 = renderportal->WindowLeft; - vis->x2 = renderportal->WindowRight; - vis->idepth = 1 / MINZ; - vis->floorclip = thing->Floorclip; - - pos.Z -= thing->Floorclip; - - vis->Angle = thing->Angles.Yaw + voxel->AngleOffset; - - int voxelspin = (thing->flags & MF_DROPPED) ? voxel->DroppedSpin : voxel->PlacedSpin; - if (voxelspin != 0) - { - DAngle ang = double(I_FPSTime()) * voxelspin / 1000; - vis->Angle -= ang; - } - - vis->pa.vpos = { (float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z }; - vis->pa.vang = FAngle((float)ViewAngle.Degrees); + vis->startfrac = 0; + vis->xiscale = iscale; } + vis->startfrac += (fixed_t)(vis->xiscale * (vis->x1 - centerx + 0.5 - dtx1)); + // killough 3/27/98: save sector for special clipping later vis->heightsec = heightsec; vis->sector = thing->Sector; @@ -408,19 +202,9 @@ namespace swrenderer vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; vis->bSplitSprite = false; - if (voxel != nullptr) - { - vis->voxel = voxel->Voxel; - vis->bIsVoxel = true; - vis->bWallSprite = false; - DrewAVoxel = true; - } - else - { - vis->pic = tex; - vis->bIsVoxel = false; - vis->bWallSprite = false; - } + vis->pic = tex; + vis->bIsVoxel = false; + vis->bWallSprite = false; // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what @@ -481,8 +265,7 @@ namespace swrenderer } else { // diminished light - vis->Style.ColormapNum = GETPALOOKUP( - r_SpriteVisibility / MAX(tz, MINZ), spriteshade); + vis->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(tz, MINZ), spriteshade); vis->Style.BaseColormap = mybasecolormap; } } diff --git a/src/swrenderer/things/r_sprite.h b/src/swrenderer/things/r_sprite.h index fd31ec759d..860885e675 100644 --- a/src/swrenderer/things/r_sprite.h +++ b/src/swrenderer/things/r_sprite.h @@ -17,6 +17,6 @@ namespace swrenderer { - void R_ProjectSprite(AActor *thing, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade); + void R_ProjectSprite(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade); void R_DrawVisSprite(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip); } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 67f1e47e88..2e2e1dbb47 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -36,8 +36,11 @@ #include "swrenderer/drawers/r_thread.h" #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/things/r_voxel.h" +#include "swrenderer/scene/r_portal.h" #include "swrenderer/r_main.h" +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) + namespace swrenderer { namespace @@ -47,6 +50,182 @@ namespace swrenderer uint8_t *OffscreenColorBuffer; } + void R_ProjectVoxel(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade) + { + // transform the origin point + double tr_x = pos.X - ViewPos.X; + double tr_y = pos.Y - ViewPos.Y; + + double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; + double tx = tr_x * ViewSin - tr_y * ViewCos; + + // [RH] Flip for mirrors + RenderPortal *renderportal = RenderPortal::Instance(); + if (renderportal->MirrorFlags & RF_XFLIP) + { + tx = -tx; + } + //tx2 = tx >> 4; + + // too far off the side? + if (fabs(tx / 128) > fabs(tz)) + { + return; + } + + double xscale = spriteScale.X * voxel->Scale; + double yscale = spriteScale.Y * voxel->Scale; + double piv = voxel->Voxel->Mips[0].Pivot.Z; + double gzt = pos.Z + yscale * piv - thing->Floorclip; + double gzb = pos.Z + yscale * (piv - voxel->Voxel->Mips[0].SizeZ); + if (gzt <= gzb) + return; + + // killough 3/27/98: exclude things totally separated + // from the viewer, by either water or fake ceilings + // killough 4/11/98: improve sprite clipping for underwater/fake ceilings + + sector_t *heightsec = thing->Sector->GetHeightSec(); + + if (heightsec != nullptr) // only clip things which are in special sectors + { + if (fakeside == WaterFakeSide::AboveCeiling) + { + if (gzt < heightsec->ceilingplane.ZatPoint(pos)) + return; + } + else if (fakeside == WaterFakeSide::BelowFloor) + { + if (gzb >= heightsec->floorplane.ZatPoint(pos)) + return; + } + else + { + if (gzt < heightsec->floorplane.ZatPoint(pos)) + return; + if (!(heightsec->MoreFlags & SECF_FAKEFLOORONLY) && gzb >= heightsec->ceilingplane.ZatPoint(pos)) + return; + } + } + + vissprite_t *vis = R_NewVisSprite(); + + vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; + vis->xscale = FLOAT2FIXED(xscale); + vis->yscale = (float)yscale; + vis->x1 = renderportal->WindowLeft; + vis->x2 = renderportal->WindowRight; + vis->idepth = 1 / MINZ; + vis->floorclip = thing->Floorclip; + + pos.Z -= thing->Floorclip; + + vis->Angle = thing->Angles.Yaw + voxel->AngleOffset; + + int voxelspin = (thing->flags & MF_DROPPED) ? voxel->DroppedSpin : voxel->PlacedSpin; + if (voxelspin != 0) + { + DAngle ang = double(I_FPSTime()) * voxelspin / 1000; + vis->Angle -= ang; + } + + vis->pa.vpos = { (float)ViewPos.X, (float)ViewPos.Y, (float)ViewPos.Z }; + vis->pa.vang = FAngle((float)ViewAngle.Degrees); + + // killough 3/27/98: save sector for special clipping later + vis->heightsec = heightsec; + vis->sector = thing->Sector; + + vis->depth = (float)tz; + vis->gpos = { (float)pos.X, (float)pos.Y, (float)pos.Z }; + vis->gzb = (float)gzb; // [RH] use gzb, not thing->z + vis->gzt = (float)gzt; // killough 3/27/98 + vis->deltax = float(pos.X - ViewPos.X); + vis->deltay = float(pos.Y - ViewPos.Y); + vis->renderflags = renderflags; + if (thing->flags5 & MF5_BRIGHT) + vis->renderflags |= RF_FULLBRIGHT; // kg3D + vis->Style.RenderStyle = thing->RenderStyle; + vis->FillColor = thing->fillcolor; + vis->Translation = thing->Translation; // [RH] thing translation table + vis->FakeFlatStat = fakeside; + vis->Style.Alpha = float(thing->Alpha); + vis->fakefloor = fakefloor; + vis->fakeceiling = fakeceiling; + vis->Style.ColormapNum = 0; + vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; + vis->bSplitSprite = false; + + vis->voxel = voxel->Voxel; + vis->bIsVoxel = true; + vis->bWallSprite = false; + DrewAVoxel = true; + + // The software renderer cannot invert the source without inverting the overlay + // too. That means if the source is inverted, we need to do the reverse of what + // the invert overlay flag says to do. + INTBOOL invertcolormap = (vis->Style.RenderStyle.Flags & STYLEF_InvertOverlay); + + if (vis->Style.RenderStyle.Flags & STYLEF_InvertSource) + { + invertcolormap = !invertcolormap; + } + + FDynamicColormap *mybasecolormap = basecolormap; + if (current_sector->sectornum != thing->Sector->sectornum) // compare sectornums to account for R_FakeFlat copies. + { + // Todo: The actor is from a different sector so we have to retrieve the proper basecolormap for that sector. + } + + // Sprites that are added to the scene must fade to black. + if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); + } + + if (vis->Style.RenderStyle.Flags & STYLEF_FadeToBlack) + { + if (invertcolormap) + { // Fade to white + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); + invertcolormap = false; + } + else + { // Fade to black + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); + } + } + + // get light level + if (fixedcolormap != nullptr) + { // fixed map + vis->Style.BaseColormap = fixedcolormap; + vis->Style.ColormapNum = 0; + } + else + { + if (invertcolormap) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); + } + if (fixedlightlev >= 0) + { + vis->Style.BaseColormap = mybasecolormap; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + } + else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) + { // full bright + vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + vis->Style.ColormapNum = 0; + } + else + { // diminished light + vis->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(tz, MINZ), spriteshade); + vis->Style.BaseColormap = mybasecolormap; + } + } + } + void R_DrawVisVoxel(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom) { R_SetColorMapLight(sprite->Style.BaseColormap, 0, sprite->Style.ColormapNum << FRACBITS); diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index e8f97cdd96..a9cb8e1ed2 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -32,6 +32,7 @@ namespace swrenderer enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; + void R_ProjectVoxel(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade); void R_DrawVisVoxel(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom); void R_FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); kvxslab_t *R_GetSlabStart(const FVoxelMipLevel &mip, int x, int y); From 7dfb46b8d9f37cbf85332e3c983d00615e72c9c1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 18:10:51 +0100 Subject: [PATCH 681/912] Convert r_decal to a class --- src/swrenderer/line/r_line.cpp | 2 +- src/swrenderer/things/r_decal.cpp | 10 +++++----- src/swrenderer/things/r_decal.h | 12 +++++++++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 813ff43c3a..02514367e7 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -585,7 +585,7 @@ namespace swrenderer // [ZZ] Only if not an active mirror if (!rw_markportal) { - R_RenderDecals(curline->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, curline, WallC); + RenderDecal::RenderDecals(curline->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, curline, WallC); } if (rw_markportal) diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 9e2d0b8939..46140bc3e2 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -45,11 +45,11 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void R_RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC) + void RenderDecal::RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC) { for (DBaseDecal *decal = sidedef->AttachedDecals; decal != NULL; decal = decal->WallNext) { - R_RenderDecal(sidedef, decal, draw_segment, wallshade, lightleft, lightstep, curline, wallC, 0); + Render(sidedef, decal, draw_segment, wallshade, lightleft, lightstep, curline, wallC, 0); } } @@ -57,7 +57,7 @@ namespace swrenderer // = 1: drawing masked textures (including sprites) // Currently, only pass = 0 is done or used - void R_RenderDecal(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords WallC, int pass) + void RenderDecal::Render(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords WallC, int pass) { DVector2 decal_left, decal_right, decal_pos; int x1, x2; @@ -291,7 +291,7 @@ namespace swrenderer { // calculate lighting R_SetColorMapLight(usecolormap, light, wallshade); } - R_DecalColumn(x, WallSpriteTile, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + DrawColumn(x, WallSpriteTile, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } @@ -312,7 +312,7 @@ namespace swrenderer WallC = savecoord; } - void R_DecalColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderDecal::DrawColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = swall[x] * maskedScaleY; double spryscale = 1 / iscale; diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 461675a71d..4551e5c1ba 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -20,7 +20,13 @@ namespace swrenderer { struct drawseg_t; - void R_RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC); - void R_RenderDecal(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, int pass); - void R_DecalColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + class RenderDecal + { + public: + static void RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC); + + private: + static void Render(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, int pass); + static void DrawColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + }; } From deae5bb1a675be51d38fdc0fc39f69c144831533 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 18:14:04 +0100 Subject: [PATCH 682/912] Convert r_particle to a class --- src/swrenderer/scene/r_bsp.cpp | 2 +- src/swrenderer/things/r_particle.cpp | 8 ++++---- src/swrenderer/things/r_particle.h | 12 +++++++++--- src/swrenderer/things/r_visiblesprite.cpp | 2 +- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 67a8a3219c..7c323fbbee 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -710,7 +710,7 @@ namespace swrenderer int shade = LIGHT2SHADE((floorlightlevel + ceilinglightlevel) / 2 + r_actualextralight); for (WORD i = ParticlesInSubsec[(unsigned int)(sub - subsectors)]; i != NO_PARTICLE; i = Particles[i].snext) { - R_ProjectParticle(Particles + i, subsectors[sub - subsectors].sector, shade, FakeSide); + RenderParticle::Project(Particles + i, subsectors[sub - subsectors].sector, shade, FakeSide); } } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index b256ffc38f..4ad74b2506 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -57,7 +57,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void R_ProjectParticle(particle_t *particle, const sector_t *sector, int shade, WaterFakeSide fakeside) + void RenderParticle::Project(particle_t *particle, const sector_t *sector, int shade, WaterFakeSide fakeside) { double tr_x, tr_y; double tx, ty; @@ -222,7 +222,7 @@ namespace swrenderer } } - void R_DrawParticle(vissprite_t *vis) + void RenderParticle::Render(vissprite_t *vis) { using namespace drawerargs; @@ -236,7 +236,7 @@ namespace swrenderer if (ycount <= 0 || countbase <= 0) return; - R_DrawMaskedSegsBehindParticle(vis); + DrawMaskedSegsBehindParticle(vis); uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); @@ -271,7 +271,7 @@ namespace swrenderer } } - void R_DrawMaskedSegsBehindParticle(const vissprite_t *vis) + void RenderParticle::DrawMaskedSegsBehindParticle(const vissprite_t *vis) { const int x1 = vis->x1; const int x2 = vis->x2; diff --git a/src/swrenderer/things/r_particle.h b/src/swrenderer/things/r_particle.h index 257a96aa39..3f9f89df32 100644 --- a/src/swrenderer/things/r_particle.h +++ b/src/swrenderer/things/r_particle.h @@ -18,7 +18,13 @@ namespace swrenderer { - void R_ProjectParticle(particle_t *, const sector_t *sector, int shade, WaterFakeSide fakeside); - void R_DrawParticle(vissprite_t *); - void R_DrawMaskedSegsBehindParticle(const vissprite_t *vis); + class RenderParticle + { + public: + static void Project(particle_t *, const sector_t *sector, int shade, WaterFakeSide fakeside); + static void Render(vissprite_t *); + + private: + static void DrawMaskedSegsBehindParticle(const vissprite_t *vis); + }; } diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 492fa3eab0..bc27f35205 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -257,7 +257,7 @@ namespace swrenderer // kg3D - reject invisible parts if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gpos.Z <= clip3d->sclipBottom) return; if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gpos.Z >= clip3d->sclipTop) return; - R_DrawParticle(spr); + RenderParticle::Render(spr); return; } From 164af7264f917907878d9fb5ed616041c6294dcb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 18:17:28 +0100 Subject: [PATCH 683/912] Convert r_sprite into a class --- src/swrenderer/scene/r_bsp.cpp | 2 +- src/swrenderer/things/r_playersprite.cpp | 2 +- src/swrenderer/things/r_sprite.cpp | 4 ++-- src/swrenderer/things/r_sprite.h | 8 ++++++-- src/swrenderer/things/r_visiblesprite.cpp | 2 +- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 7c323fbbee..d701a0e6a5 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -875,7 +875,7 @@ namespace swrenderer } else { - R_ProjectSprite(thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade); + RenderSprite::Project(thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade); } } } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index bf648e851c..dd86cafb86 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -579,7 +579,7 @@ namespace swrenderer short *mfloorclip = screenheightarray; short *mceilingclip = zeroarray; - R_DrawVisSprite(vis, mfloorclip, mceilingclip); + RenderSprite::Render(vis, mfloorclip, mceilingclip); } void R_DrawRemainingPlayerSprites() diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 7bc93e4ac0..d5766611c0 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -57,7 +57,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - void R_ProjectSprite(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade) + void RenderSprite::Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade) { // transform the origin point double tr_x = pos.X - ViewPos.X; @@ -271,7 +271,7 @@ namespace swrenderer } } - void R_DrawVisSprite(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip) + void RenderSprite::Render(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip) { fixed_t frac; FTexture *tex; diff --git a/src/swrenderer/things/r_sprite.h b/src/swrenderer/things/r_sprite.h index 860885e675..2711af7ecc 100644 --- a/src/swrenderer/things/r_sprite.h +++ b/src/swrenderer/things/r_sprite.h @@ -17,6 +17,10 @@ namespace swrenderer { - void R_ProjectSprite(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade); - void R_DrawVisSprite(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip); + class RenderSprite + { + public: + static void Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade); + static void Render(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip); + }; } diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index bc27f35205..2ac874d48e 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -595,7 +595,7 @@ namespace swrenderer { if (!spr->bWallSprite) { - R_DrawVisSprite(spr, clipbot, cliptop); + RenderSprite::Render(spr, clipbot, cliptop); } else { From 7bed0ffeb6c578a3e54412ff5e0752ef4ff6cdf6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 18:25:14 +0100 Subject: [PATCH 684/912] Convert r_voxel to a class --- src/swrenderer/scene/r_bsp.cpp | 2 +- src/swrenderer/things/r_visiblesprite.cpp | 4 +-- src/swrenderer/things/r_voxel.cpp | 38 +++++++++++------------ src/swrenderer/things/r_voxel.h | 37 +++++++++++++++------- 4 files changed, 46 insertions(+), 35 deletions(-) diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index d701a0e6a5..9daaa78789 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -871,7 +871,7 @@ namespace swrenderer } else if (sprite.voxel) { - R_ProjectVoxel(thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade); + RenderVoxel::Project(thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade); } else { diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 2ac874d48e..8e5d7da117 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -106,7 +106,7 @@ namespace swrenderer void R_DeinitSprites() { R_DeinitVisSprites(); - R_DeinitRenderVoxel(); + RenderVoxel::Deinit(); // Free vissprites sorter if (spritesorter != nullptr) @@ -633,7 +633,7 @@ namespace swrenderer } int minvoxely = spr->gzt <= hzt ? 0 : xs_RoundToInt((spr->gzt - hzt) / spr->yscale); int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); - R_DrawVisVoxel(spr, minvoxely, maxvoxely, cliptop, clipbot); + RenderVoxel::Render(spr, minvoxely, maxvoxely, cliptop, clipbot); } spr->Style.BaseColormap = colormap; spr->Style.ColormapNum = colormapnum; diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 2e2e1dbb47..d3ae841284 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -43,14 +43,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - namespace - { - FCoverageBuffer *OffscreenCoverageBuffer; - int OffscreenBufferWidth, OffscreenBufferHeight; - uint8_t *OffscreenColorBuffer; - } - - void R_ProjectVoxel(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade) + void RenderVoxel::Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade) { // transform the origin point double tr_x = pos.X - ViewPos.X; @@ -226,7 +219,7 @@ namespace swrenderer } } - void R_DrawVisVoxel(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom) + void RenderVoxel::Render(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom) { R_SetColorMapLight(sprite->Style.BaseColormap, 0, sprite->Style.ColormapNum << FRACBITS); bool visible = R_SetPatchStyle(sprite->Style.RenderStyle, sprite->Style.Alpha, sprite->Translation, sprite->FillColor); @@ -305,10 +298,10 @@ namespace swrenderer { for (int y = startY[index]; y != endY; y += stepY[index]) { - kvxslab_t *slab_start = R_GetSlabStart(mip, x, y); - kvxslab_t *slab_end = R_GetSlabEnd(mip, x, y); + kvxslab_t *slab_start = GetSlabStart(mip, x, y); + kvxslab_t *slab_end = GetSlabEnd(mip, x, y); - for (kvxslab_t *slab = slab_start; slab != slab_end; slab = R_NextSlab(slab)) + for (kvxslab_t *slab = slab_start; slab != slab_end; slab = NextSlab(slab)) { // To do: check slab->backfacecull @@ -327,7 +320,7 @@ namespace swrenderer voxel_pos.Y += dirY.X * x + dirY.Y * y; voxel_pos.Z += dirZ * z; - R_FillBox(voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); + FillBox(voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); } } } @@ -335,22 +328,22 @@ namespace swrenderer } } - kvxslab_t *R_GetSlabStart(const FVoxelMipLevel &mip, int x, int y) + kvxslab_t *RenderVoxel::GetSlabStart(const FVoxelMipLevel &mip, int x, int y) { return (kvxslab_t *)&mip.SlabData[mip.OffsetX[x] + (int)mip.OffsetXY[x * (mip.SizeY + 1) + y]]; } - kvxslab_t *R_GetSlabEnd(const FVoxelMipLevel &mip, int x, int y) + kvxslab_t *RenderVoxel::GetSlabEnd(const FVoxelMipLevel &mip, int x, int y) { - return R_GetSlabStart(mip, x, y + 1); + return GetSlabStart(mip, x, y + 1); } - kvxslab_t *R_NextSlab(kvxslab_t *slab) + kvxslab_t *RenderVoxel::NextSlab(kvxslab_t *slab) { return (kvxslab_t*)(((uint8_t*)slab) + 3 + slab->zleng); } - void R_FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) + void RenderVoxel::FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) { double viewX, viewY, viewZ; if (viewspace) @@ -402,7 +395,7 @@ namespace swrenderer } } - void R_DeinitRenderVoxel() + void RenderVoxel::Deinit() { // Free offscreen buffer if (OffscreenColorBuffer != nullptr) @@ -418,7 +411,7 @@ namespace swrenderer OffscreenBufferHeight = OffscreenBufferWidth = 0; } - void R_CheckOffscreenBuffer(int width, int height, bool spansonly) + void RenderVoxel::CheckOffscreenBuffer(int width, int height, bool spansonly) { // Allocates the offscreen coverage buffer and optionally the offscreen // color buffer. If they already exist but are the wrong size, they will @@ -460,6 +453,11 @@ namespace swrenderer OffscreenBufferHeight = height; } + FCoverageBuffer *RenderVoxel::OffscreenCoverageBuffer; + int RenderVoxel::OffscreenBufferWidth; + int RenderVoxel::OffscreenBufferHeight; + uint8_t *RenderVoxel::OffscreenColorBuffer; + //////////////////////////////////////////////////////////////////////////// FCoverageBuffer::FCoverageBuffer(int lists) diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index a9cb8e1ed2..93ec214e25 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -30,18 +30,6 @@ namespace swrenderer { struct vissprite_t; - enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; - - void R_ProjectVoxel(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade); - void R_DrawVisVoxel(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom); - void R_FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); - kvxslab_t *R_GetSlabStart(const FVoxelMipLevel &mip, int x, int y); - kvxslab_t *R_GetSlabEnd(const FVoxelMipLevel &mip, int x, int y); - kvxslab_t *R_NextSlab(kvxslab_t *slab); - - void R_CheckOffscreenBuffer(int width, int height, bool spansonly); - void R_DeinitRenderVoxel(); - // [RH] A c-buffer. Used for keeping track of offscreen voxel spans. struct FCoverageBuffer { @@ -63,4 +51,29 @@ namespace swrenderer Span *FreeSpans; unsigned int NumLists; }; + + class RenderVoxel + { + public: + static void Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade); + static void Render(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom); + + static void Deinit(); + + private: + enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; + + static void FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); + + static kvxslab_t *GetSlabStart(const FVoxelMipLevel &mip, int x, int y); + static kvxslab_t *GetSlabEnd(const FVoxelMipLevel &mip, int x, int y); + static kvxslab_t *NextSlab(kvxslab_t *slab); + + static void CheckOffscreenBuffer(int width, int height, bool spansonly); + + static FCoverageBuffer *OffscreenCoverageBuffer; + static int OffscreenBufferWidth; + static int OffscreenBufferHeight; + static uint8_t *OffscreenColorBuffer; + }; } From ee2811450d90b750a71bc146313e474f98c6add7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 18:28:19 +0100 Subject: [PATCH 685/912] Convert r_wallsprite to a class --- src/swrenderer/scene/r_bsp.cpp | 2 +- src/swrenderer/things/r_visiblesprite.cpp | 2 +- src/swrenderer/things/r_wallsprite.cpp | 8 ++++---- src/swrenderer/things/r_wallsprite.h | 12 +++++++++--- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_bsp.cpp index 9daaa78789..6faab056d1 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_bsp.cpp @@ -867,7 +867,7 @@ namespace swrenderer { if ((sprite.renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) { - R_ProjectWallSprite(thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, spriteshade); + RenderWallSprite::Project(thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, spriteshade); } else if (sprite.voxel) { diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 8e5d7da117..a8241e99a0 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -599,7 +599,7 @@ namespace swrenderer } else { - R_DrawWallSprite(spr, clipbot, cliptop); + RenderWallSprite::Render(spr, clipbot, cliptop); } } else diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 9ddd7bfd1f..7158197a14 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -59,7 +59,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade) + void RenderWallSprite::Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade) { FWallCoords wallc; double x1, x2; @@ -135,7 +135,7 @@ namespace swrenderer vis->wallc = wallc; } - void R_DrawWallSprite(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip) + void RenderWallSprite::Render(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip) { int x1, x2; double iyscale; @@ -223,7 +223,7 @@ namespace swrenderer R_SetColorMapLight(usecolormap, light, shade); } if (!R_ClipSpriteColumnWithPortals(x, spr)) - R_WallSpriteColumn(x, WallSpriteTile, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + DrawColumn(x, WallSpriteTile, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } @@ -231,7 +231,7 @@ namespace swrenderer R_FinishSetPatchStyle(); } - void R_WallSpriteColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderWallSprite::DrawColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = swall[x] * maskedScaleY; double spryscale = 1 / iscale; diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index 2fe78a68e2..6e6793045c 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -17,7 +17,13 @@ namespace swrenderer { - void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade); - void R_DrawWallSprite(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip); - void R_WallSpriteColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + class RenderWallSprite + { + public: + static void Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade); + static void Render(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip); + + private: + static void DrawColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + }; } From 48b4915f5be92c7ea24810df19be78a3226d2028 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 18:35:20 +0100 Subject: [PATCH 686/912] Convert r_playersprite to a class --- src/swrenderer/r_main.cpp | 2 +- src/swrenderer/r_swrenderer.cpp | 2 +- src/swrenderer/things/r_playersprite.cpp | 32 ++++++++--------------- src/swrenderer/things/r_playersprite.h | 31 +++++++++++++++++++--- src/swrenderer/things/r_visiblesprite.cpp | 2 +- 5 files changed, 41 insertions(+), 28 deletions(-) diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index 630edc9aa2..e6a67473fd 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -345,7 +345,7 @@ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, WallTMapScale2 = IYaspectMul / CenterX; // psprite scales - R_SetupPlayerSpriteScale(); + RenderPlayerSprite::SetupSpriteScale(); // thing clipping fillshort (screenheightarray, viewwidth, (short)viewheight); diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index f08a2e026f..7c6fc6b3fc 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -294,7 +294,7 @@ void FSoftwareRenderer::DrawRemainingPlayerSprites() { if (!r_polyrenderer) { - R_DrawRemainingPlayerSprites(); + RenderPlayerSprite::RenderRemainingPlayerSprites(); } else { diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index dd86cafb86..07325bea3f 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -63,32 +63,23 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - namespace - { - // Used to store a psprite's drawing information if it needs to be drawn later. - struct vispsp_t - { - vissprite_t *vis; - FDynamicColormap *basecolormap; - int x1; - }; + TArray RenderPlayerSprite::vispsprites; + unsigned int RenderPlayerSprite::vispspindex; - TArray vispsprites; - unsigned int vispspindex; + double RenderPlayerSprite::pspritexscale; + double RenderPlayerSprite::pspritexiscale; + double RenderPlayerSprite::pspriteyscale; - double pspritexscale; - double pspritexiscale; - double pspriteyscale; - } + TArray RenderPlayerSprite::avis; - void R_SetupPlayerSpriteScale() + void RenderPlayerSprite::SetupSpriteScale() { pspritexscale = centerxwide / 160.0; pspriteyscale = pspritexscale * YaspectMul; pspritexiscale = 1 / pspritexscale; } - void R_DrawPlayerSprites() + void RenderPlayerSprite::RenderPlayerSprites() { int i; int lightnum; @@ -191,7 +182,7 @@ namespace swrenderer if ((psp->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && psp->GetCaller() != nullptr) { - R_DrawPSprite(psp, camera, bobx, boby, wx, wy, r_TicFracF, spriteshade); + Render(psp, camera, bobx, boby, wx, wy, r_TicFracF, spriteshade); } psp = psp->GetNext(); @@ -201,7 +192,7 @@ namespace swrenderer } } - void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade) + void RenderPlayerSprite::Render(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade) { double tx; int x1; @@ -215,7 +206,6 @@ namespace swrenderer vissprite_t* vis; bool noaccel; double alpha = owner->Alpha; - static TArray avis; if (avis.Size() < vispspindex + 1) avis.Reserve(avis.Size() - vispspindex + 1); @@ -582,7 +572,7 @@ namespace swrenderer RenderSprite::Render(vis, mfloorclip, mceilingclip); } - void R_DrawRemainingPlayerSprites() + void RenderPlayerSprite::RenderRemainingPlayerSprites() { for (unsigned int i = 0; i < vispspindex; i++) { diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index c1a4b53b19..0bf01fc659 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -20,9 +20,32 @@ namespace swrenderer { - void R_SetupPlayerSpriteScale(); + class RenderPlayerSprite + { + public: + static void SetupSpriteScale(); - void R_DrawPlayerSprites(); - void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade); - void R_DrawRemainingPlayerSprites(); + static void RenderPlayerSprites(); + static void RenderRemainingPlayerSprites(); + + private: + static void Render(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade); + + // Used to store a psprite's drawing information if it needs to be drawn later. + struct vispsp_t + { + vissprite_t *vis; + FDynamicColormap *basecolormap; + int x1; + }; + + static TArray vispsprites; + static unsigned int vispspindex; + + static double pspritexscale; + static double pspritexiscale; + static double pspriteyscale; + + static TArray avis; + }; } diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index a8241e99a0..9f327003ba 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -727,6 +727,6 @@ namespace swrenderer clip3d->DeleteHeights(); clip3d->fake3D = 0; } - R_DrawPlayerSprites(); + RenderPlayerSprite::RenderPlayerSprites(); } } From 8ed66791e794675599b6856fc6ea9d3245802503 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 18:38:51 +0100 Subject: [PATCH 687/912] Change define to enum --- src/swrenderer/things/r_playersprite.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index 0bf01fc659..293cf9d3a2 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -15,9 +15,6 @@ #include "r_visiblesprite.h" -#define BASEXCENTER (160) -#define BASEYCENTER (100) - namespace swrenderer { class RenderPlayerSprite @@ -31,6 +28,9 @@ namespace swrenderer private: static void Render(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade); + enum { BASEXCENTER = 160 }; + enum { BASEYCENTER = 100 }; + // Used to store a psprite's drawing information if it needs to be drawn later. struct vispsp_t { From 32b91dd978943d8794bfaadeda7d6d591cbbb649 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 19:33:02 +0100 Subject: [PATCH 688/912] Convert r_visiblesprite to classes --- src/swrenderer/r_main.cpp | 7 +- src/swrenderer/scene/r_portal.cpp | 18 +- src/swrenderer/things/r_particle.cpp | 6 +- src/swrenderer/things/r_sprite.cpp | 4 +- src/swrenderer/things/r_visiblesprite.cpp | 200 +++++++++++----------- src/swrenderer/things/r_visiblesprite.h | 66 +++++-- src/swrenderer/things/r_voxel.cpp | 4 +- src/swrenderer/things/r_wallsprite.cpp | 4 +- 8 files changed, 170 insertions(+), 139 deletions(-) diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index e6a67473fd..f90690d494 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -92,7 +92,6 @@ namespace swrenderer // EXTERNAL FUNCTION PROTOTYPES -------------------------------------------- void R_SpanInitData (); -void R_DeinitSprites(); // PUBLIC FUNCTION PROTOTYPES ---------------------------------------------- @@ -385,7 +384,7 @@ void R_InitRenderer() static void R_ShutdownRenderer() { - R_DeinitSprites(); + RenderTranslucent::Deinit(); R_DeinitPlanes(); Clip3DFloors::Instance()->Cleanup(); R_DeinitOpenings(); @@ -528,7 +527,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) R_ClearClipSegs (0, viewwidth); R_ClearDrawSegs (); R_ClearPlanes (true); - R_ClearSprites (); + RenderTranslucent::Clear(); // opening / clipping determination RenderBSP::Instance()->ClearClip(); @@ -577,7 +576,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) NetUpdate (); MaskedCycles.Clock(); - R_DrawMasked (); + RenderTranslucent::Render(); MaskedCycles.Unclock(); NetUpdate (); diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 5ca458b309..d22777eea8 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -96,7 +96,7 @@ namespace swrenderer int savedextralight = extralight; DVector3 savedpos = ViewPos; DAngle savedangle = ViewAngle; - ptrdiff_t savedvissprite_p = vissprite_p - vissprites; + ptrdiff_t savedvissprite_p = VisibleSpriteList::vissprite_p - VisibleSpriteList::vissprites; ptrdiff_t savedds_p = ds_p - drawsegs; size_t savedinteresting = FirstInterestingDrawseg; double savedvisibility = R_GetVisibility(); @@ -212,14 +212,14 @@ namespace swrenderer memcpy(openings + draw_segment->sprbottomclip, floorclip + pl->left, (pl->right - pl->left) * sizeof(short)); memcpy(openings + draw_segment->sprtopclip, ceilingclip + pl->left, (pl->right - pl->left) * sizeof(short)); - firstvissprite = vissprite_p; + VisibleSpriteList::firstvissprite = VisibleSpriteList::vissprite_p; firstdrawseg = draw_segment; FirstInterestingDrawseg = InterestingDrawsegs.Size(); interestingStack.Push(FirstInterestingDrawseg); ptrdiff_t diffnum = firstdrawseg - drawsegs; drawsegStack.Push(diffnum); - diffnum = firstvissprite - vissprites; + diffnum = VisibleSpriteList::firstvissprite - VisibleSpriteList::vissprites; visspriteStack.Push(diffnum); viewposStack.Push(ViewPos); visplaneStack.Push(pl); @@ -242,15 +242,15 @@ namespace swrenderer drawsegStack.Pop(pd); firstdrawseg = drawsegs + pd; visspriteStack.Pop(pd); - firstvissprite = vissprites + pd; + VisibleSpriteList::firstvissprite = VisibleSpriteList::vissprites + pd; // Masked textures and planes need the view coordinates restored for proper positioning. viewposStack.Pop(ViewPos); - R_DrawMasked(); + RenderTranslucent::Render(); ds_p = firstdrawseg; - vissprite_p = firstvissprite; + VisibleSpriteList::vissprite_p = VisibleSpriteList::firstvissprite; visplaneStack.Pop(pl); if (pl->Alpha > 0 && pl->picnum != skyflatnum) @@ -260,8 +260,8 @@ namespace swrenderer *freehead = pl; freehead = &pl->next; } - firstvissprite = vissprites; - vissprite_p = vissprites + savedvissprite_p; + VisibleSpriteList::firstvissprite = VisibleSpriteList::vissprites; + VisibleSpriteList::vissprite_p = VisibleSpriteList::vissprites + savedvissprite_p; firstdrawseg = drawsegs; ds_p = drawsegs + savedds_p; InterestingDrawsegs.Resize((unsigned int)FirstInterestingDrawseg); @@ -469,7 +469,7 @@ namespace swrenderer NetUpdate(); MaskedCycles.Clock(); // [ZZ] count sprites in portals/mirrors along with normal ones. - R_DrawMasked(); // this is required since with portals there often will be cases when more than 80% of the view is inside a portal. + RenderTranslucent::Render(); // this is required since with portals there often will be cases when more than 80% of the view is inside a portal. MaskedCycles.Unclock(); NetUpdate(); diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 4ad74b2506..8ec92569ff 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -177,7 +177,7 @@ namespace swrenderer return; // store information in a vissprite - vis = R_NewVisSprite(); + vis = VisibleSpriteList::Add(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->heightsec = heightsec; vis->xscale = FLOAT2FIXED(xscale); @@ -253,7 +253,7 @@ namespace swrenderer { for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) { - if (R_ClipSpriteColumnWithPortals(x, vis)) + if (RenderTranslucent::ClipSpriteColumnWithPortals(x, vis)) continue; uint32_t *dest = ylookup[yl] + x + (uint32_t*)dc_destorg; DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); @@ -263,7 +263,7 @@ namespace swrenderer { for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) { - if (R_ClipSpriteColumnWithPortals(x, vis)) + if (RenderTranslucent::ClipSpriteColumnWithPortals(x, vis)) continue; uint8_t *dest = ylookup[yl] + x + dc_destorg; DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index d5766611c0..7751f859e7 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -153,7 +153,7 @@ namespace swrenderer double yscale = spriteScale.Y / tex->Scale.Y; // store information in a vissprite - vissprite_t *vis = R_NewVisSprite(); + vissprite_t *vis = VisibleSpriteList::Add(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->xscale = FLOAT2FIXED(xscale); @@ -330,7 +330,7 @@ namespace swrenderer { while (x < x2) { - if (ispsprite || !R_ClipSpriteColumnWithPortals(x, vis)) + if (ispsprite || !RenderTranslucent::ClipSpriteColumnWithPortals(x, vis)) R_DrawMaskedColumn(x, iscale, tex, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); x++; frac += xiscale; diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 9f327003ba..36ee18432b 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -43,24 +43,7 @@ CVAR(Bool, r_splitsprites, true, CVAR_ARCHIVE) namespace swrenderer { - int MaxVisSprites; - vissprite_t **vissprites; - vissprite_t **firstvissprite; - vissprite_t **vissprite_p; - vissprite_t **lastvissprite; - - bool DrewAVoxel; - - namespace - { - vissprite_t **spritesorter; - int spritesortersize = 0; - int vsprcount; - - TArray portaldrawsegs; - } - - void R_DeinitVisSprites() + void VisibleSpriteList::Deinit() { // Free vissprites for (int i = 0; i < MaxVisSprites; ++i) @@ -73,12 +56,12 @@ namespace swrenderer MaxVisSprites = 0; } - void R_ClearVisSprites() + void VisibleSpriteList::Clear() { vissprite_p = firstvissprite; } - vissprite_t *R_NewVisSprite() + vissprite_t *VisibleSpriteList::Add() { if (vissprite_p == lastvissprite) { @@ -103,27 +86,97 @@ namespace swrenderer return *(vissprite_p - 1); } - void R_DeinitSprites() - { - R_DeinitVisSprites(); - RenderVoxel::Deinit(); + int VisibleSpriteList::MaxVisSprites; + vissprite_t **VisibleSpriteList::vissprites; + vissprite_t **VisibleSpriteList::firstvissprite; + vissprite_t **VisibleSpriteList::vissprite_p; + vissprite_t **VisibleSpriteList::lastvissprite; - // Free vissprites sorter - if (spritesorter != nullptr) - { - delete[] spritesorter; - spritesortersize = 0; - spritesorter = nullptr; - } + ///////////////////////////////////////////////////////////////////////// + + void SortedVisibleSpriteList::Deinit() + { + delete[] spritesorter; + spritesortersize = 0; + spritesorter = nullptr; } - void R_ClearSprites() + // This is the standard version, which does a simple test based on depth. + bool SortedVisibleSpriteList::sv_compare(vissprite_t *a, vissprite_t *b) { - R_ClearVisSprites(); + return a->idepth > b->idepth; + } + + // This is an alternate version, for when one or more voxel is in view. + // It does a 2D distance test based on whichever one is furthest from + // the viewpoint. + bool SortedVisibleSpriteList::sv_compare2d(vissprite_t *a, vissprite_t *b) + { + return DVector2(a->deltax, a->deltay).LengthSquared() < DVector2(b->deltax, b->deltay).LengthSquared(); + } + + void SortedVisibleSpriteList::Sort(bool(*compare)(vissprite_t *, vissprite_t *), size_t first) + { + int i; + vissprite_t **spr; + + vsprcount = int(VisibleSpriteList::vissprite_p - &VisibleSpriteList::vissprites[first]); + + if (vsprcount == 0) + return; + + if (spritesortersize < VisibleSpriteList::MaxVisSprites) + { + if (spritesorter != nullptr) + delete[] spritesorter; + spritesorter = new vissprite_t *[VisibleSpriteList::MaxVisSprites]; + spritesortersize = VisibleSpriteList::MaxVisSprites; + } + + if (!(i_compatflags & COMPATF_SPRITESORT)) + { + for (i = 0, spr = VisibleSpriteList::firstvissprite; i < vsprcount; i++, spr++) + { + spritesorter[i] = *spr; + } + } + else + { + // If the compatibility option is on sprites of equal distance need to + // be sorted in inverse order. This is most easily achieved by + // filling the sort array backwards before the sort. + for (i = 0, spr = VisibleSpriteList::firstvissprite + vsprcount - 1; i < vsprcount; i++, spr--) + { + spritesorter[i] = *spr; + } + } + + std::stable_sort(&spritesorter[0], &spritesorter[vsprcount], compare); + } + + vissprite_t **SortedVisibleSpriteList::spritesorter; + int SortedVisibleSpriteList::spritesortersize = 0; + int SortedVisibleSpriteList::vsprcount; + + ///////////////////////////////////////////////////////////////////////// + + bool RenderTranslucent::DrewAVoxel; + TArray RenderTranslucent::portaldrawsegs; + + void RenderTranslucent::Deinit() + { + VisibleSpriteList::Deinit(); + SortedVisibleSpriteList::Deinit(); + RenderVoxel::Deinit(); + } + + void RenderTranslucent::Clear() + { + VisibleSpriteList::Clear(); DrewAVoxel = false; } - void R_CollectPortals() + void RenderTranslucent::CollectPortals() { // This function collects all drawsegs that may be of interest to R_ClipSpriteColumnWithPortals // Having that function over the entire list of drawsegs can break down performance quite drastically. @@ -153,7 +206,7 @@ namespace swrenderer } } - bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr) + bool RenderTranslucent::ClipSpriteColumnWithPortals(int x, vissprite_t* spr) { RenderPortal *renderportal = RenderPortal::Instance(); @@ -181,60 +234,7 @@ namespace swrenderer return false; } - // This is the standard version, which does a simple test based on depth. - bool sv_compare(vissprite_t *a, vissprite_t *b) - { - return a->idepth > b->idepth; - } - - // This is an alternate version, for when one or more voxel is in view. - // It does a 2D distance test based on whichever one is furthest from - // the viewpoint. - bool sv_compare2d(vissprite_t *a, vissprite_t *b) - { - return DVector2(a->deltax, a->deltay).LengthSquared() < DVector2(b->deltax, b->deltay).LengthSquared(); - } - - void R_SortVisSprites(bool(*compare)(vissprite_t *, vissprite_t *), size_t first) - { - int i; - vissprite_t **spr; - - vsprcount = int(vissprite_p - &vissprites[first]); - - if (vsprcount == 0) - return; - - if (spritesortersize < MaxVisSprites) - { - if (spritesorter != nullptr) - delete[] spritesorter; - spritesorter = new vissprite_t *[MaxVisSprites]; - spritesortersize = MaxVisSprites; - } - - if (!(i_compatflags & COMPATF_SPRITESORT)) - { - for (i = 0, spr = firstvissprite; i < vsprcount; i++, spr++) - { - spritesorter[i] = *spr; - } - } - else - { - // If the compatibility option is on sprites of equal distance need to - // be sorted in inverse order. This is most easily achieved by - // filling the sort array backwards before the sort. - for (i = 0, spr = firstvissprite + vsprcount - 1; i < vsprcount; i++, spr--) - { - spritesorter[i] = *spr; - } - } - - std::stable_sort(&spritesorter[0], &spritesorter[vsprcount], compare); - } - - void R_DrawSprite(vissprite_t *spr) + void RenderTranslucent::DrawSprite(vissprite_t *spr) { static short clipbot[MAXWIDTH]; static short cliptop[MAXWIDTH]; @@ -639,15 +639,15 @@ namespace swrenderer spr->Style.ColormapNum = colormapnum; } - void R_DrawMaskedSingle(bool renew) + void RenderTranslucent::DrawMaskedSingle(bool renew) { RenderPortal *renderportal = RenderPortal::Instance(); - for (int i = vsprcount; i > 0; i--) + for (int i = SortedVisibleSpriteList::vsprcount; i > 0; i--) { - if (spritesorter[i - 1]->CurrentPortalUniq != renderportal->CurrentPortalUniq) + if (SortedVisibleSpriteList::spritesorter[i - 1]->CurrentPortalUniq != renderportal->CurrentPortalUniq) continue; // probably another time - R_DrawSprite(spritesorter[i - 1]); + DrawSprite(SortedVisibleSpriteList::spritesorter[i - 1]); } // render any remaining masked mid textures @@ -676,15 +676,15 @@ namespace swrenderer } } - void R_DrawMasked() + void RenderTranslucent::Render() { - R_CollectPortals(); - R_SortVisSprites(DrewAVoxel ? sv_compare2d : sv_compare, firstvissprite - vissprites); + CollectPortals(); + SortedVisibleSpriteList::Sort(DrewAVoxel ? SortedVisibleSpriteList::sv_compare2d : SortedVisibleSpriteList::sv_compare, VisibleSpriteList::firstvissprite - VisibleSpriteList::vissprites); Clip3DFloors *clip3d = Clip3DFloors::Instance(); if (clip3d->height_top == nullptr) { // kg3D - no visible 3D floors, normal rendering - R_DrawMaskedSingle(false); + DrawMaskedSingle(false); } else { // kg3D - correct sorting @@ -701,14 +701,14 @@ namespace swrenderer clip3d->fake3D = FAKE3D_CLIPBOTTOM; } clip3d->sclipBottom = hl->height; - R_DrawMaskedSingle(true); + DrawMaskedSingle(true); R_DrawHeightPlanes(hl->height); } // floors clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP; clip3d->sclipTop = clip3d->height_top->height; - R_DrawMaskedSingle(true); + DrawMaskedSingle(true); for (HeightLevel *hl = clip3d->height_top; hl != nullptr && hl->height < ViewPos.Z; hl = hl->next) { R_DrawHeightPlanes(hl->height); @@ -722,7 +722,7 @@ namespace swrenderer clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPBOTTOM; } clip3d->sclipBottom = hl->height; - R_DrawMaskedSingle(true); + DrawMaskedSingle(true); } clip3d->DeleteHeights(); clip3d->fake3D = 0; diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index 6a540d1ab3..b9405dc199 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -23,6 +23,8 @@ struct FVoxel; namespace swrenderer { + struct drawseg_t; + struct vissprite_t { struct posang @@ -88,25 +90,55 @@ namespace swrenderer vissprite_t() {} }; - extern int MaxVisSprites; - extern vissprite_t **vissprites, **firstvissprite; - extern vissprite_t **vissprite_p; + class VisibleSpriteList + { + public: + static int MaxVisSprites; + static vissprite_t **vissprites; + static vissprite_t **firstvissprite; + static vissprite_t **vissprite_p; - extern bool DrewAVoxel; + static void Deinit(); + static void Clear(); + static vissprite_t *Add(); - void R_DeinitVisSprites(); - void R_ClearVisSprites(); - vissprite_t *R_NewVisSprite(); + private: + static vissprite_t **lastvissprite; + }; - void R_DeinitSprites(); - void R_ClearSprites(); - void R_CollectPortals(); - bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr); - void R_SortVisSprites(bool(*compare)(vissprite_t *, vissprite_t *), size_t first); - void R_DrawSprite(vissprite_t *spr); - void R_DrawMaskedSingle(bool renew); - void R_DrawMasked(); + class SortedVisibleSpriteList + { + public: + static void Deinit(); - bool sv_compare(vissprite_t *a, vissprite_t *b); - bool sv_compare2d(vissprite_t *a, vissprite_t *b); + static void Sort(bool(*compare)(vissprite_t *, vissprite_t *), size_t first); + + static bool sv_compare(vissprite_t *a, vissprite_t *b); + static bool sv_compare2d(vissprite_t *a, vissprite_t *b); + + static vissprite_t **spritesorter; + static int vsprcount; + + private: + static int spritesortersize; + }; + + class RenderTranslucent + { + public: + static void Deinit(); + static void Clear(); + static void Render(); + + static bool DrewAVoxel; + + static bool ClipSpriteColumnWithPortals(int x, vissprite_t* spr); + + private: + static void CollectPortals(); + static void DrawSprite(vissprite_t *spr); + static void DrawMaskedSingle(bool renew); + + static TArray portaldrawsegs; + }; } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index d3ae841284..2d81f2e62c 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -101,7 +101,7 @@ namespace swrenderer } } - vissprite_t *vis = R_NewVisSprite(); + vissprite_t *vis = VisibleSpriteList::Add(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->xscale = FLOAT2FIXED(xscale); @@ -152,7 +152,7 @@ namespace swrenderer vis->voxel = voxel->Voxel; vis->bIsVoxel = true; vis->bWallSprite = false; - DrewAVoxel = true; + RenderTranslucent::DrewAVoxel = true; // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 7158197a14..307f1bdf08 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -102,7 +102,7 @@ namespace swrenderer gzt = pos.Z + scale.Y * scaled_to; gzb = pos.Z + scale.Y * scaled_bo; - vis = R_NewVisSprite(); + vis = VisibleSpriteList::Add(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->x1 = wallc.sx1 < renderportal->WindowLeft ? renderportal->WindowLeft : wallc.sx1; vis->x2 = wallc.sx2 >= renderportal->WindowRight ? renderportal->WindowRight : wallc.sx2; @@ -222,7 +222,7 @@ namespace swrenderer { // calculate lighting R_SetColorMapLight(usecolormap, light, shade); } - if (!R_ClipSpriteColumnWithPortals(x, spr)) + if (!RenderTranslucent::ClipSpriteColumnWithPortals(x, spr)) DrawColumn(x, WallSpriteTile, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; From 6f1836b68bdb22568a645f037c0826081bb24cde Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 19:50:07 +0100 Subject: [PATCH 689/912] Move classes in r_visiblesprite to their own files --- src/CMakeLists.txt | 2 + src/swrenderer/r_main.cpp | 1 + src/swrenderer/scene/r_portal.cpp | 1 + src/swrenderer/scene/r_translucent.cpp | 616 ++++++++++++++++ src/swrenderer/scene/r_translucent.h | 46 ++ src/swrenderer/things/r_particle.cpp | 1 + src/swrenderer/things/r_sprite.cpp | 1 + src/swrenderer/things/r_visiblesprite.cpp | 693 ------------------ src/swrenderer/things/r_visiblesprite.h | 55 +- src/swrenderer/things/r_visiblespritelist.cpp | 146 ++++ src/swrenderer/things/r_visiblespritelist.h | 53 ++ src/swrenderer/things/r_voxel.cpp | 1 + src/swrenderer/things/r_wallsprite.cpp | 1 + 13 files changed, 870 insertions(+), 747 deletions(-) create mode 100644 src/swrenderer/scene/r_translucent.cpp create mode 100644 src/swrenderer/scene/r_translucent.h create mode 100644 src/swrenderer/things/r_visiblespritelist.cpp create mode 100644 src/swrenderer/things/r_visiblespritelist.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 74d1183564..5865a4e93e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -848,6 +848,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_3dfloors.cpp swrenderer/scene/r_bsp.cpp swrenderer/scene/r_portal.cpp + swrenderer/scene/r_translucent.cpp swrenderer/line/r_line.cpp swrenderer/line/r_walldraw.cpp swrenderer/line/r_wallsetup.cpp @@ -856,6 +857,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/segments/r_drawsegment.cpp swrenderer/segments/r_portalsegment.cpp swrenderer/things/r_visiblesprite.cpp + swrenderer/things/r_visiblespritelist.cpp swrenderer/things/r_voxel.cpp swrenderer/things/r_particle.cpp swrenderer/things/r_playersprite.cpp diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index f90690d494..a72e8214ac 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -43,6 +43,7 @@ #include "segments/r_clipsegment.h" #include "scene/r_3dfloors.h" #include "scene/r_portal.h" +#include "scene/r_translucent.h" #include "r_sky.h" #include "drawers/r_draw_rgba.h" #include "st_stuff.h" diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index d22777eea8..daa6769adb 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -50,6 +50,7 @@ #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_translucent.h" #include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" diff --git a/src/swrenderer/scene/r_translucent.cpp b/src/swrenderer/scene/r_translucent.cpp new file mode 100644 index 0000000000..7b47965055 --- /dev/null +++ b/src/swrenderer/scene/r_translucent.cpp @@ -0,0 +1,616 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include +#include +#include "p_lnspec.h" +#include "templates.h" +#include "doomdef.h" +#include "m_swap.h" +#include "i_system.h" +#include "w_wad.h" +#include "g_levellocals.h" +#include "p_maputl.h" +#include "swrenderer/r_main.h" +#include "swrenderer/things/r_visiblesprite.h" +#include "swrenderer/things/r_visiblespritelist.h" +#include "swrenderer/things/r_voxel.h" +#include "swrenderer/things/r_particle.h" +#include "swrenderer/things/r_sprite.h" +#include "swrenderer/things/r_wallsprite.h" +#include "swrenderer/things/r_playersprite.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_translucent.h" +#include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/r_memory.h" + +EXTERN_CVAR(Int, r_drawfuzz) +EXTERN_CVAR(Bool, r_drawvoxels) +EXTERN_CVAR(Bool, r_blendmethod) + +CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +namespace swrenderer +{ + bool RenderTranslucent::DrewAVoxel; + TArray RenderTranslucent::portaldrawsegs; + + void RenderTranslucent::Deinit() + { + VisibleSpriteList::Deinit(); + SortedVisibleSpriteList::Deinit(); + RenderVoxel::Deinit(); + } + + void RenderTranslucent::Clear() + { + VisibleSpriteList::Clear(); + DrewAVoxel = false; + } + + void RenderTranslucent::CollectPortals() + { + // This function collects all drawsegs that may be of interest to R_ClipSpriteColumnWithPortals + // Having that function over the entire list of drawsegs can break down performance quite drastically. + // This is doing the costly stuff only once so that R_ClipSpriteColumnWithPortals can + // a) exit early if no relevant info is found and + // b) skip most of the collected drawsegs which have no portal attached. + portaldrawsegs.Clear(); + for (drawseg_t* seg = ds_p; seg-- > firstdrawseg; ) // copied code from killough below + { + // I don't know what makes this happen (some old top-down portal code or possibly skybox code? something adds null lines...) + // crashes at the first frame of the first map of Action2.wad + if (!seg->curline) continue; + + line_t* line = seg->curline->linedef; + // ignore minisegs from GL nodes. + if (!line) continue; + + // check if this line will clip sprites to itself + if (!line->isVisualPortal() && line->special != Line_Mirror) + continue; + + // don't clip sprites with portal's back side (it's transparent) + if (seg->curline->sidedef != line->sidedef[0]) + continue; + + portaldrawsegs.Push(seg); + } + } + + bool RenderTranslucent::ClipSpriteColumnWithPortals(int x, vissprite_t* spr) + { + RenderPortal *renderportal = RenderPortal::Instance(); + + // [ZZ] 10.01.2016: don't clip sprites from the root of a skybox. + if (renderportal->CurrentPortalInSkybox) + return false; + + for (drawseg_t *seg : portaldrawsegs) + { + // ignore segs from other portals + if (seg->CurrentPortalUniq != renderportal->CurrentPortalUniq) + continue; + + // (all checks that are already done in R_CollectPortals have been removed for performance reasons.) + + // don't clip if the sprite is in front of the portal + if (!P_PointOnLineSidePrecise(spr->gpos.X, spr->gpos.Y, seg->curline->linedef)) + continue; + + // now if current column is covered by this drawseg, we clip it away + if ((x >= seg->x1) && (x < seg->x2)) + return true; + } + + return false; + } + + void RenderTranslucent::DrawSprite(vissprite_t *spr) + { + static short clipbot[MAXWIDTH]; + static short cliptop[MAXWIDTH]; + drawseg_t *ds; + int i; + int x1, x2; + int r1, r2; + short topclip, botclip; + short *clip1, *clip2; + FSWColormap *colormap = spr->Style.BaseColormap; + int colormapnum = spr->Style.ColormapNum; + F3DFloor *rover; + FDynamicColormap *mybasecolormap; + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + + // [RH] Check for particles + if (!spr->bIsVoxel && spr->pic == nullptr) + { + // kg3D - reject invisible parts + if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gpos.Z <= clip3d->sclipBottom) return; + if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gpos.Z >= clip3d->sclipTop) return; + RenderParticle::Render(spr); + return; + } + + x1 = spr->x1; + x2 = spr->x2; + + // [RH] Quickly reject sprites with bad x ranges. + if (x1 >= x2) + return; + + // [RH] Sprites split behind a one-sided line can also be discarded. + if (spr->sector == nullptr) + return; + + // kg3D - reject invisible parts + if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gzt <= clip3d->sclipBottom) return; + if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gzb >= clip3d->sclipTop) return; + + // kg3D - correct colors now + if (!fixedcolormap && fixedlightlev < 0 && spr->sector->e && spr->sector->e->XFloor.lightlist.Size()) + { + if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) + { + clip3d->sclipTop = spr->sector->ceilingplane.ZatPoint(ViewPos); + } + sector_t *sec = nullptr; + for (i = spr->sector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) + { + if (clip3d->sclipTop <= spr->sector->e->XFloor.lightlist[i].plane.Zat0()) + { + rover = spr->sector->e->XFloor.lightlist[i].caster; + if (rover) + { + if (rover->flags & FF_DOUBLESHADOW && clip3d->sclipTop <= rover->bottom.plane->Zat0()) + { + break; + } + sec = rover->model; + if (rover->flags & FF_FADEWALLS) + { + mybasecolormap = sec->ColorMap; + } + else + { + mybasecolormap = spr->sector->e->XFloor.lightlist[i].extra_colormap; + } + } + break; + } + } + // found new values, recalculate + if (sec) + { + INTBOOL invertcolormap = (spr->Style.RenderStyle.Flags & STYLEF_InvertOverlay); + + if (spr->Style.RenderStyle.Flags & STYLEF_InvertSource) + { + invertcolormap = !invertcolormap; + } + + // Sprites that are added to the scene must fade to black. + if (spr->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); + } + + if (spr->Style.RenderStyle.Flags & STYLEF_FadeToBlack) + { + if (invertcolormap) + { // Fade to white + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); + invertcolormap = false; + } + else + { // Fade to black + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); + } + } + + // get light level + if (invertcolormap) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); + } + if (fixedlightlev >= 0) + { + spr->Style.BaseColormap = mybasecolormap; + spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + } + else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) + { // full bright + spr->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + spr->Style.ColormapNum = 0; + } + else + { // diminished light + int spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); + spr->Style.BaseColormap = mybasecolormap; + spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); + } + } + } + + // [RH] Initialize the clipping arrays to their largest possible range + // instead of using a special "not clipped" value. This eliminates + // visual anomalies when looking down and should be faster, too. + topclip = 0; + botclip = viewheight; + + // killough 3/27/98: + // Clip the sprite against deep water and/or fake ceilings. + // [RH] rewrote this to be based on which part of the sector is really visible + + double scale = InvZtoScale * spr->idepth; + double hzb = DBL_MIN, hzt = DBL_MAX; + + if (spr->bIsVoxel && spr->floorclip != 0) + { + hzb = spr->gzb; + } + + if (spr->heightsec && !(spr->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC)) + { // only things in specially marked sectors + if (spr->FakeFlatStat != WaterFakeSide::AboveCeiling) + { + double hz = spr->heightsec->floorplane.ZatPoint(spr->gpos); + int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + + if (spr->FakeFlatStat == WaterFakeSide::BelowFloor) + { // seen below floor: clip top + if (!spr->bIsVoxel && h > topclip) + { + topclip = short(MIN(h, viewheight)); + } + hzt = MIN(hzt, hz); + } + else + { // seen in the middle: clip bottom + if (!spr->bIsVoxel && h < botclip) + { + botclip = MAX(0, h); + } + hzb = MAX(hzb, hz); + } + } + if (spr->FakeFlatStat != WaterFakeSide::BelowFloor && !(spr->heightsec->MoreFlags & SECF_FAKEFLOORONLY)) + { + double hz = spr->heightsec->ceilingplane.ZatPoint(spr->gpos); + int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + + if (spr->FakeFlatStat == WaterFakeSide::AboveCeiling) + { // seen above ceiling: clip bottom + if (!spr->bIsVoxel && h < botclip) + { + botclip = MAX(0, h); + } + hzb = MAX(hzb, hz); + } + else + { // seen in the middle: clip top + if (!spr->bIsVoxel && h > topclip) + { + topclip = MIN(h, viewheight); + } + hzt = MIN(hzt, hz); + } + } + } + // killough 3/27/98: end special clipping for deep water / fake ceilings + else if (!spr->bIsVoxel && spr->floorclip) + { // [RH] Move floorclip stuff from R_DrawVisSprite to here + //int clip = ((FLOAT2FIXED(CenterY) - FixedMul (spr->texturemid - (spr->pic->GetHeight() << FRACBITS) + spr->floorclip, spr->yscale)) >> FRACBITS); + int clip = xs_RoundToInt(CenterY - (spr->texturemid - spr->pic->GetHeight() + spr->floorclip) * spr->yscale); + if (clip < botclip) + { + botclip = MAX(0, clip); + } + } + + if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) + { + if (!spr->bIsVoxel) + { + double hz = clip3d->sclipBottom; + if (spr->fakefloor) + { + double floorz = spr->fakefloor->top.plane->Zat0(); + if (ViewPos.Z > floorz && floorz == clip3d->sclipBottom) + { + hz = spr->fakefloor->bottom.plane->Zat0(); + } + } + int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + if (h < botclip) + { + botclip = MAX(0, h); + } + } + hzb = MAX(hzb, clip3d->sclipBottom); + } + if (clip3d->fake3D & FAKE3D_CLIPTOP) + { + if (!spr->bIsVoxel) + { + double hz = clip3d->sclipTop; + if (spr->fakeceiling != nullptr) + { + double ceilingZ = spr->fakeceiling->bottom.plane->Zat0(); + if (ViewPos.Z < ceilingZ && ceilingZ == clip3d->sclipTop) + { + hz = spr->fakeceiling->top.plane->Zat0(); + } + } + int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + if (h > topclip) + { + topclip = short(MIN(h, viewheight)); + } + } + hzt = MIN(hzt, clip3d->sclipTop); + } + + if (topclip >= botclip) + { + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; + return; + } + + i = x2 - x1; + clip1 = clipbot + x1; + clip2 = cliptop + x1; + do + { + *clip1++ = botclip; + *clip2++ = topclip; + } while (--i); + + // Scan drawsegs from end to start for obscuring segs. + // The first drawseg that is closer than the sprite is the clip seg. + + // Modified by Lee Killough: + // (pointer check was originally nonportable + // and buggy, by going past LEFT end of array): + + // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code + + for (ds = ds_p; ds-- > firstdrawseg; ) // new -- killough + { + // [ZZ] portal handling here + //if (ds->CurrentPortalUniq != spr->CurrentPortalUniq) + // continue; + // [ZZ] WARNING: uncommenting the two above lines, totally breaks sprite clipping + + // kg3D - no clipping on fake segs + if (ds->fake) continue; + // determine if the drawseg obscures the sprite + if (ds->x1 >= x2 || ds->x2 <= x1 || + (!(ds->silhouette & SIL_BOTH) && ds->maskedtexturecol == -1 && + !ds->bFogBoundary)) + { + // does not cover sprite + continue; + } + + r1 = MAX(ds->x1, x1); + r2 = MIN(ds->x2, x2); + + float neardepth, fardepth; + if (!spr->bWallSprite) + { + if (ds->sz1 < ds->sz2) + { + neardepth = ds->sz1, fardepth = ds->sz2; + } + else + { + neardepth = ds->sz2, fardepth = ds->sz1; + } + } + + + // Check if sprite is in front of draw seg: + if ((!spr->bWallSprite && neardepth > spr->depth) || ((spr->bWallSprite || fardepth > spr->depth) && + (spr->gpos.Y - ds->curline->v1->fY()) * (ds->curline->v2->fX() - ds->curline->v1->fX()) - + (spr->gpos.X - ds->curline->v1->fX()) * (ds->curline->v2->fY() - ds->curline->v1->fY()) <= 0)) + { + RenderPortal *renderportal = RenderPortal::Instance(); + + // seg is behind sprite, so draw the mid texture if it has one + if (ds->CurrentPortalUniq == renderportal->CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here + (ds->maskedtexturecol != -1 || ds->bFogBoundary)) + R_RenderMaskedSegRange(ds, r1, r2); + + continue; + } + + // clip this piece of the sprite + // killough 3/27/98: optimized and made much shorter + // [RH] Optimized further (at least for VC++; + // other compilers should be at least as good as before) + + if (ds->silhouette & SIL_BOTTOM) //bottom sil + { + clip1 = clipbot + r1; + clip2 = openings + ds->sprbottomclip + r1 - ds->x1; + i = r2 - r1; + do + { + if (*clip1 > *clip2) + *clip1 = *clip2; + clip1++; + clip2++; + } while (--i); + } + + if (ds->silhouette & SIL_TOP) // top sil + { + clip1 = cliptop + r1; + clip2 = openings + ds->sprtopclip + r1 - ds->x1; + i = r2 - r1; + do + { + if (*clip1 < *clip2) + *clip1 = *clip2; + clip1++; + clip2++; + } while (--i); + } + } + + // all clipping has been performed, so draw the sprite + + if (!spr->bIsVoxel) + { + if (!spr->bWallSprite) + { + RenderSprite::Render(spr, clipbot, cliptop); + } + else + { + RenderWallSprite::Render(spr, clipbot, cliptop); + } + } + else + { + // If it is completely clipped away, don't bother drawing it. + if (cliptop[x2] >= clipbot[x2]) + { + for (i = x1; i < x2; ++i) + { + if (cliptop[i] < clipbot[i]) + { + break; + } + } + if (i == x2) + { + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; + return; + } + } + // Add everything outside the left and right edges to the clipping array + // for R_DrawVisVoxel(). + if (x1 > 0) + { + fillshort(cliptop, x1, viewheight); + } + if (x2 < viewwidth - 1) + { + fillshort(cliptop + x2, viewwidth - x2, viewheight); + } + int minvoxely = spr->gzt <= hzt ? 0 : xs_RoundToInt((spr->gzt - hzt) / spr->yscale); + int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); + RenderVoxel::Render(spr, minvoxely, maxvoxely, cliptop, clipbot); + } + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; + } + + void RenderTranslucent::DrawMaskedSingle(bool renew) + { + RenderPortal *renderportal = RenderPortal::Instance(); + + for (int i = SortedVisibleSpriteList::vsprcount; i > 0; i--) + { + if (SortedVisibleSpriteList::spritesorter[i - 1]->CurrentPortalUniq != renderportal->CurrentPortalUniq) + continue; // probably another time + DrawSprite(SortedVisibleSpriteList::spritesorter[i - 1]); + } + + // render any remaining masked mid textures + + // Modified by Lee Killough: + // (pointer check was originally nonportable + // and buggy, by going past LEFT end of array): + + // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code + + if (renew) + { + Clip3DFloors::Instance()->fake3D |= FAKE3D_REFRESHCLIP; + } + for (drawseg_t *ds = ds_p; ds-- > firstdrawseg; ) // new -- killough + { + // [ZZ] the same as above + if (ds->CurrentPortalUniq != renderportal->CurrentPortalUniq) + continue; + // kg3D - no fake segs + if (ds->fake) continue; + if (ds->maskedtexturecol != -1 || ds->bFogBoundary) + { + R_RenderMaskedSegRange(ds, ds->x1, ds->x2); + } + } + } + + void RenderTranslucent::Render() + { + CollectPortals(); + SortedVisibleSpriteList::Sort(DrewAVoxel ? SortedVisibleSpriteList::sv_compare2d : SortedVisibleSpriteList::sv_compare, VisibleSpriteList::firstvissprite - VisibleSpriteList::vissprites); + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + if (clip3d->height_top == nullptr) + { // kg3D - no visible 3D floors, normal rendering + DrawMaskedSingle(false); + } + else + { // kg3D - correct sorting + // ceilings + for (HeightLevel *hl = clip3d->height_cur; hl != nullptr && hl->height >= ViewPos.Z; hl = hl->prev) + { + if (hl->next) + { + clip3d->fake3D = FAKE3D_CLIPBOTTOM | FAKE3D_CLIPTOP; + clip3d->sclipTop = hl->next->height; + } + else + { + clip3d->fake3D = FAKE3D_CLIPBOTTOM; + } + clip3d->sclipBottom = hl->height; + DrawMaskedSingle(true); + R_DrawHeightPlanes(hl->height); + } + + // floors + clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP; + clip3d->sclipTop = clip3d->height_top->height; + DrawMaskedSingle(true); + for (HeightLevel *hl = clip3d->height_top; hl != nullptr && hl->height < ViewPos.Z; hl = hl->next) + { + R_DrawHeightPlanes(hl->height); + if (hl->next) + { + clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP | FAKE3D_CLIPBOTTOM; + clip3d->sclipTop = hl->next->height; + } + else + { + clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPBOTTOM; + } + clip3d->sclipBottom = hl->height; + DrawMaskedSingle(true); + } + clip3d->DeleteHeights(); + clip3d->fake3D = 0; + } + RenderPlayerSprite::RenderPlayerSprites(); + } +} diff --git a/src/swrenderer/scene/r_translucent.h b/src/swrenderer/scene/r_translucent.h new file mode 100644 index 0000000000..30ca656793 --- /dev/null +++ b/src/swrenderer/scene/r_translucent.h @@ -0,0 +1,46 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#pragma once + +#include "tarray.h" + +#define MINZ double((2048*4) / double(1 << 20)) + +struct particle_t; +struct FVoxel; + +namespace swrenderer +{ + struct vissprite_t; + struct drawseg_t; + + class RenderTranslucent + { + public: + static void Deinit(); + static void Clear(); + static void Render(); + + static bool DrewAVoxel; + + static bool ClipSpriteColumnWithPortals(int x, vissprite_t* spr); + + private: + static void CollectPortals(); + static void DrawSprite(vissprite_t *spr); + static void DrawMaskedSingle(bool renew); + + static TArray portaldrawsegs; + }; +} diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 8ec92569ff..fd4585aac7 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -40,6 +40,7 @@ #include "p_effect.h" #include "swrenderer/scene/r_bsp.h" #include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_translucent.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" #include "v_palette.h" diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 7751f859e7..edd7a24749 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -40,6 +40,7 @@ #include "p_effect.h" #include "swrenderer/scene/r_bsp.h" #include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_translucent.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" #include "v_palette.h" diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 36ee18432b..2eb52770e3 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -34,699 +34,6 @@ #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/r_memory.h" -EXTERN_CVAR(Int, r_drawfuzz) -EXTERN_CVAR(Bool, r_drawvoxels) -EXTERN_CVAR(Bool, r_blendmethod) - -CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); -CVAR(Bool, r_splitsprites, true, CVAR_ARCHIVE) - namespace swrenderer { - void VisibleSpriteList::Deinit() - { - // Free vissprites - for (int i = 0; i < MaxVisSprites; ++i) - { - delete vissprites[i]; - } - free(vissprites); - vissprites = nullptr; - vissprite_p = lastvissprite = nullptr; - MaxVisSprites = 0; - } - - void VisibleSpriteList::Clear() - { - vissprite_p = firstvissprite; - } - - vissprite_t *VisibleSpriteList::Add() - { - if (vissprite_p == lastvissprite) - { - ptrdiff_t firstvisspritenum = firstvissprite - vissprites; - ptrdiff_t prevvisspritenum = vissprite_p - vissprites; - - MaxVisSprites = MaxVisSprites ? MaxVisSprites * 2 : 128; - vissprites = (vissprite_t **)M_Realloc(vissprites, MaxVisSprites * sizeof(vissprite_t)); - lastvissprite = &vissprites[MaxVisSprites]; - firstvissprite = &vissprites[firstvisspritenum]; - vissprite_p = &vissprites[prevvisspritenum]; - DPrintf(DMSG_NOTIFY, "MaxVisSprites increased to %d\n", MaxVisSprites); - - // Allocate sprites from the new pile - for (vissprite_t **p = vissprite_p; p < lastvissprite; ++p) - { - *p = new vissprite_t; - } - } - - vissprite_p++; - return *(vissprite_p - 1); - } - - int VisibleSpriteList::MaxVisSprites; - vissprite_t **VisibleSpriteList::vissprites; - vissprite_t **VisibleSpriteList::firstvissprite; - vissprite_t **VisibleSpriteList::vissprite_p; - vissprite_t **VisibleSpriteList::lastvissprite; - - ///////////////////////////////////////////////////////////////////////// - - void SortedVisibleSpriteList::Deinit() - { - delete[] spritesorter; - spritesortersize = 0; - spritesorter = nullptr; - } - - // This is the standard version, which does a simple test based on depth. - bool SortedVisibleSpriteList::sv_compare(vissprite_t *a, vissprite_t *b) - { - return a->idepth > b->idepth; - } - - // This is an alternate version, for when one or more voxel is in view. - // It does a 2D distance test based on whichever one is furthest from - // the viewpoint. - bool SortedVisibleSpriteList::sv_compare2d(vissprite_t *a, vissprite_t *b) - { - return DVector2(a->deltax, a->deltay).LengthSquared() < DVector2(b->deltax, b->deltay).LengthSquared(); - } - - void SortedVisibleSpriteList::Sort(bool(*compare)(vissprite_t *, vissprite_t *), size_t first) - { - int i; - vissprite_t **spr; - - vsprcount = int(VisibleSpriteList::vissprite_p - &VisibleSpriteList::vissprites[first]); - - if (vsprcount == 0) - return; - - if (spritesortersize < VisibleSpriteList::MaxVisSprites) - { - if (spritesorter != nullptr) - delete[] spritesorter; - spritesorter = new vissprite_t *[VisibleSpriteList::MaxVisSprites]; - spritesortersize = VisibleSpriteList::MaxVisSprites; - } - - if (!(i_compatflags & COMPATF_SPRITESORT)) - { - for (i = 0, spr = VisibleSpriteList::firstvissprite; i < vsprcount; i++, spr++) - { - spritesorter[i] = *spr; - } - } - else - { - // If the compatibility option is on sprites of equal distance need to - // be sorted in inverse order. This is most easily achieved by - // filling the sort array backwards before the sort. - for (i = 0, spr = VisibleSpriteList::firstvissprite + vsprcount - 1; i < vsprcount; i++, spr--) - { - spritesorter[i] = *spr; - } - } - - std::stable_sort(&spritesorter[0], &spritesorter[vsprcount], compare); - } - - vissprite_t **SortedVisibleSpriteList::spritesorter; - int SortedVisibleSpriteList::spritesortersize = 0; - int SortedVisibleSpriteList::vsprcount; - - ///////////////////////////////////////////////////////////////////////// - - bool RenderTranslucent::DrewAVoxel; - TArray RenderTranslucent::portaldrawsegs; - - void RenderTranslucent::Deinit() - { - VisibleSpriteList::Deinit(); - SortedVisibleSpriteList::Deinit(); - RenderVoxel::Deinit(); - } - - void RenderTranslucent::Clear() - { - VisibleSpriteList::Clear(); - DrewAVoxel = false; - } - - void RenderTranslucent::CollectPortals() - { - // This function collects all drawsegs that may be of interest to R_ClipSpriteColumnWithPortals - // Having that function over the entire list of drawsegs can break down performance quite drastically. - // This is doing the costly stuff only once so that R_ClipSpriteColumnWithPortals can - // a) exit early if no relevant info is found and - // b) skip most of the collected drawsegs which have no portal attached. - portaldrawsegs.Clear(); - for (drawseg_t* seg = ds_p; seg-- > firstdrawseg; ) // copied code from killough below - { - // I don't know what makes this happen (some old top-down portal code or possibly skybox code? something adds null lines...) - // crashes at the first frame of the first map of Action2.wad - if (!seg->curline) continue; - - line_t* line = seg->curline->linedef; - // ignore minisegs from GL nodes. - if (!line) continue; - - // check if this line will clip sprites to itself - if (!line->isVisualPortal() && line->special != Line_Mirror) - continue; - - // don't clip sprites with portal's back side (it's transparent) - if (seg->curline->sidedef != line->sidedef[0]) - continue; - - portaldrawsegs.Push(seg); - } - } - - bool RenderTranslucent::ClipSpriteColumnWithPortals(int x, vissprite_t* spr) - { - RenderPortal *renderportal = RenderPortal::Instance(); - - // [ZZ] 10.01.2016: don't clip sprites from the root of a skybox. - if (renderportal->CurrentPortalInSkybox) - return false; - - for (drawseg_t *seg : portaldrawsegs) - { - // ignore segs from other portals - if (seg->CurrentPortalUniq != renderportal->CurrentPortalUniq) - continue; - - // (all checks that are already done in R_CollectPortals have been removed for performance reasons.) - - // don't clip if the sprite is in front of the portal - if (!P_PointOnLineSidePrecise(spr->gpos.X, spr->gpos.Y, seg->curline->linedef)) - continue; - - // now if current column is covered by this drawseg, we clip it away - if ((x >= seg->x1) && (x < seg->x2)) - return true; - } - - return false; - } - - void RenderTranslucent::DrawSprite(vissprite_t *spr) - { - static short clipbot[MAXWIDTH]; - static short cliptop[MAXWIDTH]; - drawseg_t *ds; - int i; - int x1, x2; - int r1, r2; - short topclip, botclip; - short *clip1, *clip2; - FSWColormap *colormap = spr->Style.BaseColormap; - int colormapnum = spr->Style.ColormapNum; - F3DFloor *rover; - FDynamicColormap *mybasecolormap; - - Clip3DFloors *clip3d = Clip3DFloors::Instance(); - - // [RH] Check for particles - if (!spr->bIsVoxel && spr->pic == nullptr) - { - // kg3D - reject invisible parts - if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gpos.Z <= clip3d->sclipBottom) return; - if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gpos.Z >= clip3d->sclipTop) return; - RenderParticle::Render(spr); - return; - } - - x1 = spr->x1; - x2 = spr->x2; - - // [RH] Quickly reject sprites with bad x ranges. - if (x1 >= x2) - return; - - // [RH] Sprites split behind a one-sided line can also be discarded. - if (spr->sector == nullptr) - return; - - // kg3D - reject invisible parts - if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gzt <= clip3d->sclipBottom) return; - if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gzb >= clip3d->sclipTop) return; - - // kg3D - correct colors now - if (!fixedcolormap && fixedlightlev < 0 && spr->sector->e && spr->sector->e->XFloor.lightlist.Size()) - { - if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) - { - clip3d->sclipTop = spr->sector->ceilingplane.ZatPoint(ViewPos); - } - sector_t *sec = nullptr; - for (i = spr->sector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) - { - if (clip3d->sclipTop <= spr->sector->e->XFloor.lightlist[i].plane.Zat0()) - { - rover = spr->sector->e->XFloor.lightlist[i].caster; - if (rover) - { - if (rover->flags & FF_DOUBLESHADOW && clip3d->sclipTop <= rover->bottom.plane->Zat0()) - { - break; - } - sec = rover->model; - if (rover->flags & FF_FADEWALLS) - { - mybasecolormap = sec->ColorMap; - } - else - { - mybasecolormap = spr->sector->e->XFloor.lightlist[i].extra_colormap; - } - } - break; - } - } - // found new values, recalculate - if (sec) - { - INTBOOL invertcolormap = (spr->Style.RenderStyle.Flags & STYLEF_InvertOverlay); - - if (spr->Style.RenderStyle.Flags & STYLEF_InvertSource) - { - invertcolormap = !invertcolormap; - } - - // Sprites that are added to the scene must fade to black. - if (spr->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); - } - - if (spr->Style.RenderStyle.Flags & STYLEF_FadeToBlack) - { - if (invertcolormap) - { // Fade to white - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); - invertcolormap = false; - } - else - { // Fade to black - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); - } - } - - // get light level - if (invertcolormap) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); - } - if (fixedlightlev >= 0) - { - spr->Style.BaseColormap = mybasecolormap; - spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - { // full bright - spr->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - spr->Style.ColormapNum = 0; - } - else - { // diminished light - int spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); - spr->Style.BaseColormap = mybasecolormap; - spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); - } - } - } - - // [RH] Initialize the clipping arrays to their largest possible range - // instead of using a special "not clipped" value. This eliminates - // visual anomalies when looking down and should be faster, too. - topclip = 0; - botclip = viewheight; - - // killough 3/27/98: - // Clip the sprite against deep water and/or fake ceilings. - // [RH] rewrote this to be based on which part of the sector is really visible - - double scale = InvZtoScale * spr->idepth; - double hzb = DBL_MIN, hzt = DBL_MAX; - - if (spr->bIsVoxel && spr->floorclip != 0) - { - hzb = spr->gzb; - } - - if (spr->heightsec && !(spr->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC)) - { // only things in specially marked sectors - if (spr->FakeFlatStat != WaterFakeSide::AboveCeiling) - { - double hz = spr->heightsec->floorplane.ZatPoint(spr->gpos); - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - - if (spr->FakeFlatStat == WaterFakeSide::BelowFloor) - { // seen below floor: clip top - if (!spr->bIsVoxel && h > topclip) - { - topclip = short(MIN(h, viewheight)); - } - hzt = MIN(hzt, hz); - } - else - { // seen in the middle: clip bottom - if (!spr->bIsVoxel && h < botclip) - { - botclip = MAX(0, h); - } - hzb = MAX(hzb, hz); - } - } - if (spr->FakeFlatStat != WaterFakeSide::BelowFloor && !(spr->heightsec->MoreFlags & SECF_FAKEFLOORONLY)) - { - double hz = spr->heightsec->ceilingplane.ZatPoint(spr->gpos); - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - - if (spr->FakeFlatStat == WaterFakeSide::AboveCeiling) - { // seen above ceiling: clip bottom - if (!spr->bIsVoxel && h < botclip) - { - botclip = MAX(0, h); - } - hzb = MAX(hzb, hz); - } - else - { // seen in the middle: clip top - if (!spr->bIsVoxel && h > topclip) - { - topclip = MIN(h, viewheight); - } - hzt = MIN(hzt, hz); - } - } - } - // killough 3/27/98: end special clipping for deep water / fake ceilings - else if (!spr->bIsVoxel && spr->floorclip) - { // [RH] Move floorclip stuff from R_DrawVisSprite to here - //int clip = ((FLOAT2FIXED(CenterY) - FixedMul (spr->texturemid - (spr->pic->GetHeight() << FRACBITS) + spr->floorclip, spr->yscale)) >> FRACBITS); - int clip = xs_RoundToInt(CenterY - (spr->texturemid - spr->pic->GetHeight() + spr->floorclip) * spr->yscale); - if (clip < botclip) - { - botclip = MAX(0, clip); - } - } - - if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) - { - if (!spr->bIsVoxel) - { - double hz = clip3d->sclipBottom; - if (spr->fakefloor) - { - double floorz = spr->fakefloor->top.plane->Zat0(); - if (ViewPos.Z > floorz && floorz == clip3d->sclipBottom) - { - hz = spr->fakefloor->bottom.plane->Zat0(); - } - } - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - if (h < botclip) - { - botclip = MAX(0, h); - } - } - hzb = MAX(hzb, clip3d->sclipBottom); - } - if (clip3d->fake3D & FAKE3D_CLIPTOP) - { - if (!spr->bIsVoxel) - { - double hz = clip3d->sclipTop; - if (spr->fakeceiling != nullptr) - { - double ceilingZ = spr->fakeceiling->bottom.plane->Zat0(); - if (ViewPos.Z < ceilingZ && ceilingZ == clip3d->sclipTop) - { - hz = spr->fakeceiling->top.plane->Zat0(); - } - } - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - if (h > topclip) - { - topclip = short(MIN(h, viewheight)); - } - } - hzt = MIN(hzt, clip3d->sclipTop); - } - - if (topclip >= botclip) - { - spr->Style.BaseColormap = colormap; - spr->Style.ColormapNum = colormapnum; - return; - } - - i = x2 - x1; - clip1 = clipbot + x1; - clip2 = cliptop + x1; - do - { - *clip1++ = botclip; - *clip2++ = topclip; - } while (--i); - - // Scan drawsegs from end to start for obscuring segs. - // The first drawseg that is closer than the sprite is the clip seg. - - // Modified by Lee Killough: - // (pointer check was originally nonportable - // and buggy, by going past LEFT end of array): - - // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code - - for (ds = ds_p; ds-- > firstdrawseg; ) // new -- killough - { - // [ZZ] portal handling here - //if (ds->CurrentPortalUniq != spr->CurrentPortalUniq) - // continue; - // [ZZ] WARNING: uncommenting the two above lines, totally breaks sprite clipping - - // kg3D - no clipping on fake segs - if (ds->fake) continue; - // determine if the drawseg obscures the sprite - if (ds->x1 >= x2 || ds->x2 <= x1 || - (!(ds->silhouette & SIL_BOTH) && ds->maskedtexturecol == -1 && - !ds->bFogBoundary)) - { - // does not cover sprite - continue; - } - - r1 = MAX(ds->x1, x1); - r2 = MIN(ds->x2, x2); - - float neardepth, fardepth; - if (!spr->bWallSprite) - { - if (ds->sz1 < ds->sz2) - { - neardepth = ds->sz1, fardepth = ds->sz2; - } - else - { - neardepth = ds->sz2, fardepth = ds->sz1; - } - } - - - // Check if sprite is in front of draw seg: - if ((!spr->bWallSprite && neardepth > spr->depth) || ((spr->bWallSprite || fardepth > spr->depth) && - (spr->gpos.Y - ds->curline->v1->fY()) * (ds->curline->v2->fX() - ds->curline->v1->fX()) - - (spr->gpos.X - ds->curline->v1->fX()) * (ds->curline->v2->fY() - ds->curline->v1->fY()) <= 0)) - { - RenderPortal *renderportal = RenderPortal::Instance(); - - // seg is behind sprite, so draw the mid texture if it has one - if (ds->CurrentPortalUniq == renderportal->CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here - (ds->maskedtexturecol != -1 || ds->bFogBoundary)) - R_RenderMaskedSegRange(ds, r1, r2); - - continue; - } - - // clip this piece of the sprite - // killough 3/27/98: optimized and made much shorter - // [RH] Optimized further (at least for VC++; - // other compilers should be at least as good as before) - - if (ds->silhouette & SIL_BOTTOM) //bottom sil - { - clip1 = clipbot + r1; - clip2 = openings + ds->sprbottomclip + r1 - ds->x1; - i = r2 - r1; - do - { - if (*clip1 > *clip2) - *clip1 = *clip2; - clip1++; - clip2++; - } while (--i); - } - - if (ds->silhouette & SIL_TOP) // top sil - { - clip1 = cliptop + r1; - clip2 = openings + ds->sprtopclip + r1 - ds->x1; - i = r2 - r1; - do - { - if (*clip1 < *clip2) - *clip1 = *clip2; - clip1++; - clip2++; - } while (--i); - } - } - - // all clipping has been performed, so draw the sprite - - if (!spr->bIsVoxel) - { - if (!spr->bWallSprite) - { - RenderSprite::Render(spr, clipbot, cliptop); - } - else - { - RenderWallSprite::Render(spr, clipbot, cliptop); - } - } - else - { - // If it is completely clipped away, don't bother drawing it. - if (cliptop[x2] >= clipbot[x2]) - { - for (i = x1; i < x2; ++i) - { - if (cliptop[i] < clipbot[i]) - { - break; - } - } - if (i == x2) - { - spr->Style.BaseColormap = colormap; - spr->Style.ColormapNum = colormapnum; - return; - } - } - // Add everything outside the left and right edges to the clipping array - // for R_DrawVisVoxel(). - if (x1 > 0) - { - fillshort(cliptop, x1, viewheight); - } - if (x2 < viewwidth - 1) - { - fillshort(cliptop + x2, viewwidth - x2, viewheight); - } - int minvoxely = spr->gzt <= hzt ? 0 : xs_RoundToInt((spr->gzt - hzt) / spr->yscale); - int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); - RenderVoxel::Render(spr, minvoxely, maxvoxely, cliptop, clipbot); - } - spr->Style.BaseColormap = colormap; - spr->Style.ColormapNum = colormapnum; - } - - void RenderTranslucent::DrawMaskedSingle(bool renew) - { - RenderPortal *renderportal = RenderPortal::Instance(); - - for (int i = SortedVisibleSpriteList::vsprcount; i > 0; i--) - { - if (SortedVisibleSpriteList::spritesorter[i - 1]->CurrentPortalUniq != renderportal->CurrentPortalUniq) - continue; // probably another time - DrawSprite(SortedVisibleSpriteList::spritesorter[i - 1]); - } - - // render any remaining masked mid textures - - // Modified by Lee Killough: - // (pointer check was originally nonportable - // and buggy, by going past LEFT end of array): - - // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code - - if (renew) - { - Clip3DFloors::Instance()->fake3D |= FAKE3D_REFRESHCLIP; - } - for (drawseg_t *ds = ds_p; ds-- > firstdrawseg; ) // new -- killough - { - // [ZZ] the same as above - if (ds->CurrentPortalUniq != renderportal->CurrentPortalUniq) - continue; - // kg3D - no fake segs - if (ds->fake) continue; - if (ds->maskedtexturecol != -1 || ds->bFogBoundary) - { - R_RenderMaskedSegRange(ds, ds->x1, ds->x2); - } - } - } - - void RenderTranslucent::Render() - { - CollectPortals(); - SortedVisibleSpriteList::Sort(DrewAVoxel ? SortedVisibleSpriteList::sv_compare2d : SortedVisibleSpriteList::sv_compare, VisibleSpriteList::firstvissprite - VisibleSpriteList::vissprites); - - Clip3DFloors *clip3d = Clip3DFloors::Instance(); - if (clip3d->height_top == nullptr) - { // kg3D - no visible 3D floors, normal rendering - DrawMaskedSingle(false); - } - else - { // kg3D - correct sorting - // ceilings - for (HeightLevel *hl = clip3d->height_cur; hl != nullptr && hl->height >= ViewPos.Z; hl = hl->prev) - { - if (hl->next) - { - clip3d->fake3D = FAKE3D_CLIPBOTTOM | FAKE3D_CLIPTOP; - clip3d->sclipTop = hl->next->height; - } - else - { - clip3d->fake3D = FAKE3D_CLIPBOTTOM; - } - clip3d->sclipBottom = hl->height; - DrawMaskedSingle(true); - R_DrawHeightPlanes(hl->height); - } - - // floors - clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP; - clip3d->sclipTop = clip3d->height_top->height; - DrawMaskedSingle(true); - for (HeightLevel *hl = clip3d->height_top; hl != nullptr && hl->height < ViewPos.Z; hl = hl->next) - { - R_DrawHeightPlanes(hl->height); - if (hl->next) - { - clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP | FAKE3D_CLIPBOTTOM; - clip3d->sclipTop = hl->next->height; - } - else - { - clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPBOTTOM; - } - clip3d->sclipBottom = hl->height; - DrawMaskedSingle(true); - } - clip3d->DeleteHeights(); - clip3d->fake3D = 0; - } - RenderPlayerSprite::RenderPlayerSprites(); - } } diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index b9405dc199..06e50f2922 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -15,6 +15,7 @@ #include "swrenderer/line/r_line.h" #include "swrenderer/scene/r_bsp.h" +#include "swrenderer/things/r_visiblespritelist.h" #define MINZ double((2048*4) / double(1 << 20)) @@ -23,8 +24,6 @@ struct FVoxel; namespace swrenderer { - struct drawseg_t; - struct vissprite_t { struct posang @@ -89,56 +88,4 @@ namespace swrenderer vissprite_t() {} }; - - class VisibleSpriteList - { - public: - static int MaxVisSprites; - static vissprite_t **vissprites; - static vissprite_t **firstvissprite; - static vissprite_t **vissprite_p; - - static void Deinit(); - static void Clear(); - static vissprite_t *Add(); - - private: - static vissprite_t **lastvissprite; - }; - - class SortedVisibleSpriteList - { - public: - static void Deinit(); - - static void Sort(bool(*compare)(vissprite_t *, vissprite_t *), size_t first); - - static bool sv_compare(vissprite_t *a, vissprite_t *b); - static bool sv_compare2d(vissprite_t *a, vissprite_t *b); - - static vissprite_t **spritesorter; - static int vsprcount; - - private: - static int spritesortersize; - }; - - class RenderTranslucent - { - public: - static void Deinit(); - static void Clear(); - static void Render(); - - static bool DrewAVoxel; - - static bool ClipSpriteColumnWithPortals(int x, vissprite_t* spr); - - private: - static void CollectPortals(); - static void DrawSprite(vissprite_t *spr); - static void DrawMaskedSingle(bool renew); - - static TArray portaldrawsegs; - }; } diff --git a/src/swrenderer/things/r_visiblespritelist.cpp b/src/swrenderer/things/r_visiblespritelist.cpp new file mode 100644 index 0000000000..fff4dce8eb --- /dev/null +++ b/src/swrenderer/things/r_visiblespritelist.cpp @@ -0,0 +1,146 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include +#include +#include "p_lnspec.h" +#include "templates.h" +#include "doomdef.h" +#include "m_swap.h" +#include "i_system.h" +#include "w_wad.h" +#include "g_levellocals.h" +#include "p_maputl.h" +#include "swrenderer/r_main.h" +#include "swrenderer/things/r_visiblesprite.h" +#include "swrenderer/things/r_visiblespritelist.h" +#include "swrenderer/r_memory.h" + +namespace swrenderer +{ + void VisibleSpriteList::Deinit() + { + // Free vissprites + for (int i = 0; i < MaxVisSprites; ++i) + { + delete vissprites[i]; + } + free(vissprites); + vissprites = nullptr; + vissprite_p = lastvissprite = nullptr; + MaxVisSprites = 0; + } + + void VisibleSpriteList::Clear() + { + vissprite_p = firstvissprite; + } + + vissprite_t *VisibleSpriteList::Add() + { + if (vissprite_p == lastvissprite) + { + ptrdiff_t firstvisspritenum = firstvissprite - vissprites; + ptrdiff_t prevvisspritenum = vissprite_p - vissprites; + + MaxVisSprites = MaxVisSprites ? MaxVisSprites * 2 : 128; + vissprites = (vissprite_t **)M_Realloc(vissprites, MaxVisSprites * sizeof(vissprite_t)); + lastvissprite = &vissprites[MaxVisSprites]; + firstvissprite = &vissprites[firstvisspritenum]; + vissprite_p = &vissprites[prevvisspritenum]; + DPrintf(DMSG_NOTIFY, "MaxVisSprites increased to %d\n", MaxVisSprites); + + // Allocate sprites from the new pile + for (vissprite_t **p = vissprite_p; p < lastvissprite; ++p) + { + *p = new vissprite_t; + } + } + + vissprite_p++; + return *(vissprite_p - 1); + } + + int VisibleSpriteList::MaxVisSprites; + vissprite_t **VisibleSpriteList::vissprites; + vissprite_t **VisibleSpriteList::firstvissprite; + vissprite_t **VisibleSpriteList::vissprite_p; + vissprite_t **VisibleSpriteList::lastvissprite; + + ///////////////////////////////////////////////////////////////////////// + + void SortedVisibleSpriteList::Deinit() + { + delete[] spritesorter; + spritesortersize = 0; + spritesorter = nullptr; + } + + // This is the standard version, which does a simple test based on depth. + bool SortedVisibleSpriteList::sv_compare(vissprite_t *a, vissprite_t *b) + { + return a->idepth > b->idepth; + } + + // This is an alternate version, for when one or more voxel is in view. + // It does a 2D distance test based on whichever one is furthest from + // the viewpoint. + bool SortedVisibleSpriteList::sv_compare2d(vissprite_t *a, vissprite_t *b) + { + return DVector2(a->deltax, a->deltay).LengthSquared() < DVector2(b->deltax, b->deltay).LengthSquared(); + } + + void SortedVisibleSpriteList::Sort(bool(*compare)(vissprite_t *, vissprite_t *), size_t first) + { + int i; + vissprite_t **spr; + + vsprcount = int(VisibleSpriteList::vissprite_p - &VisibleSpriteList::vissprites[first]); + + if (vsprcount == 0) + return; + + if (spritesortersize < VisibleSpriteList::MaxVisSprites) + { + if (spritesorter != nullptr) + delete[] spritesorter; + spritesorter = new vissprite_t *[VisibleSpriteList::MaxVisSprites]; + spritesortersize = VisibleSpriteList::MaxVisSprites; + } + + if (!(i_compatflags & COMPATF_SPRITESORT)) + { + for (i = 0, spr = VisibleSpriteList::firstvissprite; i < vsprcount; i++, spr++) + { + spritesorter[i] = *spr; + } + } + else + { + // If the compatibility option is on sprites of equal distance need to + // be sorted in inverse order. This is most easily achieved by + // filling the sort array backwards before the sort. + for (i = 0, spr = VisibleSpriteList::firstvissprite + vsprcount - 1; i < vsprcount; i++, spr--) + { + spritesorter[i] = *spr; + } + } + + std::stable_sort(&spritesorter[0], &spritesorter[vsprcount], compare); + } + + vissprite_t **SortedVisibleSpriteList::spritesorter; + int SortedVisibleSpriteList::spritesortersize = 0; + int SortedVisibleSpriteList::vsprcount; +} diff --git a/src/swrenderer/things/r_visiblespritelist.h b/src/swrenderer/things/r_visiblespritelist.h new file mode 100644 index 0000000000..0e3fddfdc8 --- /dev/null +++ b/src/swrenderer/things/r_visiblespritelist.h @@ -0,0 +1,53 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#pragma once + +namespace swrenderer +{ + struct drawseg_t; + struct vissprite_t; + + class VisibleSpriteList + { + public: + static int MaxVisSprites; + static vissprite_t **vissprites; + static vissprite_t **firstvissprite; + static vissprite_t **vissprite_p; + + static void Deinit(); + static void Clear(); + static vissprite_t *Add(); + + private: + static vissprite_t **lastvissprite; + }; + + class SortedVisibleSpriteList + { + public: + static void Deinit(); + + static void Sort(bool(*compare)(vissprite_t *, vissprite_t *), size_t first); + + static bool sv_compare(vissprite_t *a, vissprite_t *b); + static bool sv_compare2d(vissprite_t *a, vissprite_t *b); + + static vissprite_t **spritesorter; + static int vsprcount; + + private: + static int spritesortersize; + }; +} diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 2d81f2e62c..c55f7439e0 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -37,6 +37,7 @@ #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/things/r_voxel.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_translucent.h" #include "swrenderer/r_main.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 307f1bdf08..8fc5ea0932 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -40,6 +40,7 @@ #include "p_effect.h" #include "swrenderer/scene/r_bsp.h" #include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_translucent.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" #include "v_palette.h" From 3f8e5d26b8040b53b51a126acd97558253ffe676 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 20:42:39 +0100 Subject: [PATCH 690/912] Rename RenderBSP to RenderOpaquePass and RenderTranslucent to RenderTranslucentPass --- src/CMakeLists.txt | 4 +-- src/swrenderer/line/r_fogboundary.cpp | 2 +- src/swrenderer/line/r_line.cpp | 14 ++++----- src/swrenderer/line/r_walldraw.cpp | 2 +- src/swrenderer/plane/r_flatplane.cpp | 2 +- src/swrenderer/plane/r_skyplane.cpp | 2 +- src/swrenderer/plane/r_slopeplane.cpp | 2 +- src/swrenderer/plane/r_visibleplane.cpp | 2 +- src/swrenderer/r_main.cpp | 16 +++++----- src/swrenderer/r_swrenderer.cpp | 4 +-- src/swrenderer/scene/r_3dfloors.cpp | 6 ++-- .../scene/{r_bsp.cpp => r_opaque_pass.cpp} | 30 +++++++++---------- .../scene/{r_bsp.h => r_opaque_pass.h} | 4 +-- src/swrenderer/scene/r_portal.cpp | 20 ++++++------- ...translucent.cpp => r_translucent_pass.cpp} | 20 ++++++------- .../{r_translucent.h => r_translucent_pass.h} | 2 +- src/swrenderer/segments/r_clipsegment.cpp | 2 +- src/swrenderer/segments/r_drawsegment.cpp | 4 +-- src/swrenderer/segments/r_portalsegment.cpp | 2 +- src/swrenderer/things/r_decal.cpp | 10 +++---- src/swrenderer/things/r_particle.cpp | 12 ++++---- src/swrenderer/things/r_particle.h | 2 +- src/swrenderer/things/r_playersprite.cpp | 4 +-- src/swrenderer/things/r_sprite.cpp | 6 ++-- src/swrenderer/things/r_visiblesprite.h | 2 +- src/swrenderer/things/r_voxel.cpp | 4 +-- src/swrenderer/things/r_wallsprite.cpp | 6 ++-- 27 files changed, 93 insertions(+), 93 deletions(-) rename src/swrenderer/scene/{r_bsp.cpp => r_opaque_pass.cpp} (97%) rename src/swrenderer/scene/{r_bsp.h => r_opaque_pass.h} (97%) rename src/swrenderer/scene/{r_translucent.cpp => r_translucent_pass.cpp} (97%) rename src/swrenderer/scene/{r_translucent.h => r_translucent_pass.h} (97%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5865a4e93e..3e6e01a673 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -846,9 +846,9 @@ set( FASTMATH_PCH_SOURCES swrenderer/drawers/r_drawers.cpp swrenderer/drawers/r_thread.cpp swrenderer/scene/r_3dfloors.cpp - swrenderer/scene/r_bsp.cpp + swrenderer/scene/r_opaque_pass.cpp swrenderer/scene/r_portal.cpp - swrenderer/scene/r_translucent.cpp + swrenderer/scene/r_translucent_pass.cpp swrenderer/line/r_line.cpp swrenderer/line/r_walldraw.cpp swrenderer/line/r_wallsetup.cpp diff --git a/src/swrenderer/line/r_fogboundary.cpp b/src/swrenderer/line/r_fogboundary.cpp index 1c8c92160a..edaa942655 100644 --- a/src/swrenderer/line/r_fogboundary.cpp +++ b/src/swrenderer/line/r_fogboundary.cpp @@ -31,7 +31,7 @@ #include "r_data/colormaps.h" #include "gl/dynlights/gl_dynlight.h" #include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/segments/r_clipsegment.h" diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 02514367e7..ccf65ef266 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -33,7 +33,7 @@ #include "r_data/colormaps.h" #include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/line/r_line.h" @@ -138,7 +138,7 @@ namespace swrenderer // kg3D - its fake, no transfer_heights if (!(clip3d->fake3D & FAKE3D_FAKEBACK)) { // killough 3/8/98, 4/4/98: hack for invisible ceilings / deep water - backsector = RenderBSP::Instance()->FakeFlat(backsector, &tempsec, nullptr, nullptr, curline, WallC.sx1, WallC.sx2, rw_frontcz1, rw_frontcz2); + backsector = RenderOpaquePass::Instance()->FakeFlat(backsector, &tempsec, nullptr, nullptr, curline, WallC.sx1, WallC.sx2, rw_frontcz1, rw_frontcz2); } doorclosed = false; // killough 4/16/98 @@ -455,7 +455,7 @@ namespace swrenderer // kg3D - backup for mid and fake walls draw_segment->bkup = R_NewOpening(stop - start); - memcpy(openings + draw_segment->bkup, &RenderBSP::Instance()->ceilingclip[start], sizeof(short)*(stop - start)); + memcpy(openings + draw_segment->bkup, &RenderOpaquePass::Instance()->ceilingclip[start], sizeof(short)*(stop - start)); draw_segment->bFogBoundary = IsFogBoundary(frontsector, backsector); if (sidedef->GetTexture(side_t::mid).isValid() || draw_segment->bFakeBoundary) @@ -567,13 +567,13 @@ namespace swrenderer if (((draw_segment->silhouette & SIL_TOP) || maskedtexture) && draw_segment->sprtopclip == -1) { draw_segment->sprtopclip = R_NewOpening(stop - start); - memcpy(openings + draw_segment->sprtopclip, &RenderBSP::Instance()->ceilingclip[start], sizeof(short)*(stop - start)); + memcpy(openings + draw_segment->sprtopclip, &RenderOpaquePass::Instance()->ceilingclip[start], sizeof(short)*(stop - start)); } if (((draw_segment->silhouette & SIL_BOTTOM) || maskedtexture) && draw_segment->sprbottomclip == -1) { draw_segment->sprbottomclip = R_NewOpening(stop - start); - memcpy(openings + draw_segment->sprbottomclip, &RenderBSP::Instance()->floorclip[start], sizeof(short)*(stop - start)); + memcpy(openings + draw_segment->sprbottomclip, &RenderOpaquePass::Instance()->floorclip[start], sizeof(short)*(stop - start)); } if (maskedtexture && curline->sidedef->GetTexture(side_t::mid).isValid()) @@ -969,8 +969,8 @@ namespace swrenderer R_SetColorMapLight(fixedcolormap, 0, 0); // clip wall to the floor and ceiling - auto ceilingclip = RenderBSP::Instance()->ceilingclip; - auto floorclip = RenderBSP::Instance()->floorclip; + auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; + auto floorclip = RenderOpaquePass::Instance()->floorclip; for (x = x1; x < x2; ++x) { if (walltop[x] < ceilingclip[x]) diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 29309f0223..f0320383a1 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -42,7 +42,7 @@ #include "swrenderer/drawers/r_drawers.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/segments/r_drawsegment.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/line/r_walldraw.h" diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index af4c377975..af98b4169e 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -27,7 +27,7 @@ #include "cmdlib.h" #include "d_net.h" #include "g_level.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "r_flatplane.h" #include "swrenderer/scene/r_3dfloors.h" #include "v_palette.h" diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index b31445c140..0fbb5e5167 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -27,7 +27,7 @@ #include "cmdlib.h" #include "d_net.h" #include "g_level.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "r_skyplane.h" #include "swrenderer/scene/r_3dfloors.h" #include "v_palette.h" diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 85afd1d33e..be101e2a29 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -27,7 +27,7 @@ #include "cmdlib.h" #include "d_net.h" #include "g_level.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "r_slopeplane.h" #include "swrenderer/scene/r_3dfloors.h" #include "v_palette.h" diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index df937ffdfa..948859cb28 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -29,7 +29,7 @@ #include "g_level.h" #include "gl/dynlights/gl_dynlight.h" #include "swrenderer/r_main.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/plane/r_flatplane.h" diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index a72e8214ac..c55303becb 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -37,13 +37,13 @@ #include "r_main.h" #include "drawers/r_draw.h" #include "plane/r_flatplane.h" -#include "scene/r_bsp.h" +#include "scene/r_opaque_pass.h" #include "segments/r_drawsegment.h" #include "segments/r_portalsegment.h" #include "segments/r_clipsegment.h" #include "scene/r_3dfloors.h" #include "scene/r_portal.h" -#include "scene/r_translucent.h" +#include "scene/r_translucent_pass.h" #include "r_sky.h" #include "drawers/r_draw_rgba.h" #include "st_stuff.h" @@ -385,7 +385,7 @@ void R_InitRenderer() static void R_ShutdownRenderer() { - RenderTranslucent::Deinit(); + RenderTranslucentPass::Deinit(); R_DeinitPlanes(); Clip3DFloors::Instance()->Cleanup(); R_DeinitOpenings(); @@ -528,10 +528,10 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) R_ClearClipSegs (0, viewwidth); R_ClearDrawSegs (); R_ClearPlanes (true); - RenderTranslucent::Clear(); + RenderTranslucentPass::Clear(); // opening / clipping determination - RenderBSP::Instance()->ClearClip(); + RenderOpaquePass::Instance()->ClearClip(); R_FreeOpenings(); NetUpdate (); @@ -544,7 +544,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) r_dontmaplines = dontmaplines; // [RH] Hack to make windows into underwater areas possible - RenderBSP::Instance()->ResetFakingUnderwater(); + RenderOpaquePass::Instance()->ResetFakingUnderwater(); // [RH] Setup particles for this frame P_FindParticleSubsectors (); @@ -558,7 +558,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) } // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function PO_LinkToSubsectors(); - RenderBSP::Instance()->RenderScene(); + RenderOpaquePass::Instance()->RenderScene(); Clip3DFloors::Instance()->ResetClip(); // reset clips (floor/ceiling) camera->renderflags = savedflags; WallCycles.Unclock(); @@ -577,7 +577,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) NetUpdate (); MaskedCycles.Clock(); - RenderTranslucent::Render(); + RenderTranslucentPass::Render(); MaskedCycles.Unclock(); NetUpdate (); diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index 7c6fc6b3fc..d11d6ee89a 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -39,7 +39,7 @@ #include "v_video.h" #include "m_png.h" #include "r_swrenderer.h" -#include "scene/r_bsp.h" +#include "scene/r_opaque_pass.h" #include "scene/r_3dfloors.h" #include "scene/r_portal.h" #include "textures/textures.h" @@ -477,6 +477,6 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin sector_t *FSoftwareRenderer::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) { - return RenderBSP::Instance()->FakeFlat(sec, tempsec, floorlightlevel, ceilinglightlevel, nullptr, 0, 0, 0, 0); + return RenderOpaquePass::Instance()->FakeFlat(sec, tempsec, floorlightlevel, ceilinglightlevel, nullptr, 0, 0, 0, 0); } diff --git a/src/swrenderer/scene/r_3dfloors.cpp b/src/swrenderer/scene/r_3dfloors.cpp index dbfa6dfae4..23b4adabad 100644 --- a/src/swrenderer/scene/r_3dfloors.cpp +++ b/src/swrenderer/scene/r_3dfloors.cpp @@ -10,7 +10,7 @@ #include "p_local.h" #include "c_dispatch.h" #include "swrenderer/r_main.h" -#include "r_bsp.h" +#include "r_opaque_pass.h" #include "c_cvars.h" #include "r_3dfloors.h" @@ -102,8 +102,8 @@ namespace swrenderer curr = (ClipStack*)M_Malloc(sizeof(ClipStack)); curr->next = 0; - memcpy(curr->floorclip, RenderBSP::Instance()->floorclip, sizeof(short) * MAXWIDTH); - memcpy(curr->ceilingclip, RenderBSP::Instance()->ceilingclip, sizeof(short) * MAXWIDTH); + memcpy(curr->floorclip, RenderOpaquePass::Instance()->floorclip, sizeof(short) * MAXWIDTH); + memcpy(curr->ceilingclip, RenderOpaquePass::Instance()->ceilingclip, sizeof(short) * MAXWIDTH); curr->ffloor = fakeFloor; assert(fakeFloor->floorclip == nullptr); assert(fakeFloor->ceilingclip == nullptr); diff --git a/src/swrenderer/scene/r_bsp.cpp b/src/swrenderer/scene/r_opaque_pass.cpp similarity index 97% rename from src/swrenderer/scene/r_bsp.cpp rename to src/swrenderer/scene/r_opaque_pass.cpp index 6faab056d1..337942cc4f 100644 --- a/src/swrenderer/scene/r_bsp.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -52,7 +52,7 @@ // State. #include "doomstat.h" #include "r_state.h" -#include "r_bsp.h" +#include "r_opaque_pass.h" #include "v_palette.h" #include "r_sky.h" #include "po_man.h" @@ -64,13 +64,13 @@ EXTERN_CVAR(Bool, r_drawvoxels); namespace swrenderer { - RenderBSP *RenderBSP::Instance() + RenderOpaquePass *RenderOpaquePass::Instance() { - static RenderBSP bsp; - return &bsp; + static RenderOpaquePass instance; + return &instance; } - sector_t *RenderBSP::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, seg_t *backline, int backx1, int backx2, double frontcz1, double frontcz2) + sector_t *RenderOpaquePass::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, seg_t *backline, int backx1, int backx2, double frontcz1, double frontcz2) { // If player's view height is underneath fake floor, lower the // drawn ceiling to be just under the floor height, and replace @@ -277,7 +277,7 @@ namespace swrenderer // Checks BSP node/subtree bounding box. // Returns true if some part of the bbox might be visible. - bool RenderBSP::CheckBBox(float *bspcoord) + bool RenderOpaquePass::CheckBBox(float *bspcoord) { static const int checkcoord[12][4] = { @@ -378,7 +378,7 @@ namespace swrenderer return R_IsWallSegmentVisible(sx1, sx2); } - void RenderBSP::AddPolyobjs(subsector_t *sub) + void RenderOpaquePass::AddPolyobjs(subsector_t *sub) { if (sub->BSP == nullptr || sub->BSP->bDirty) { @@ -395,7 +395,7 @@ namespace swrenderer } // kg3D - add fake segs, never rendered - void RenderBSP::FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane) + void RenderOpaquePass::FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane) { int count; seg_t* line; @@ -413,7 +413,7 @@ namespace swrenderer } } - void RenderBSP::RenderSubsector(subsector_t *sub) + void RenderOpaquePass::RenderSubsector(subsector_t *sub) { // Determine floor/ceiling planes. // Add sprites of things in sector. @@ -761,7 +761,7 @@ namespace swrenderer } } - void RenderBSP::RenderScene() + void RenderOpaquePass::RenderScene() { InSubsector = nullptr; RenderBSPNode(nodes + numnodes - 1); // The head node is the last node output. @@ -773,7 +773,7 @@ namespace swrenderer // Just call with BSP root and -1. // killough 5/2/98: reformatted, removed tail recursion - void RenderBSP::RenderBSPNode(void *node) + void RenderOpaquePass::RenderBSPNode(void *node) { if (numnodes == 0) { @@ -800,14 +800,14 @@ namespace swrenderer RenderSubsector((subsector_t *)((BYTE *)node - 1)); } - void RenderBSP::ClearClip() + void RenderOpaquePass::ClearClip() { // clip ceiling to console bottom fillshort(floorclip, viewwidth, viewheight); fillshort(ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); } - void RenderBSP::AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside) + void RenderOpaquePass::AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside) { F3DFloor *fakeceiling = nullptr; F3DFloor *fakefloor = nullptr; @@ -885,7 +885,7 @@ namespace swrenderer } } - bool RenderBSP::IsPotentiallyVisible(AActor *thing) + bool RenderOpaquePass::IsPotentiallyVisible(AActor *thing) { // Don't waste time projecting sprites that are definitely not visible. if (thing == nullptr || @@ -906,7 +906,7 @@ namespace swrenderer return true; } - bool RenderBSP::GetThingSprite(AActor *thing, ThingSprite &sprite) + bool RenderOpaquePass::GetThingSprite(AActor *thing, ThingSprite &sprite) { sprite.pos = thing->InterpolatedPosition(r_TicFracF); sprite.pos.Z += thing->GetBobOffset(r_TicFracF); diff --git a/src/swrenderer/scene/r_bsp.h b/src/swrenderer/scene/r_opaque_pass.h similarity index 97% rename from src/swrenderer/scene/r_bsp.h rename to src/swrenderer/scene/r_opaque_pass.h index ebc59a865b..72c764d04c 100644 --- a/src/swrenderer/scene/r_bsp.h +++ b/src/swrenderer/scene/r_opaque_pass.h @@ -48,10 +48,10 @@ namespace swrenderer int renderflags; }; - class RenderBSP + class RenderOpaquePass { public: - static RenderBSP *Instance(); + static RenderOpaquePass *Instance(); void ClearClip(); void RenderScene(); diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index daa6769adb..88b9415ce4 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -49,8 +49,8 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/things/r_visiblesprite.h" -#include "swrenderer/scene/r_bsp.h" -#include "swrenderer/scene/r_translucent.h" +#include "swrenderer/scene/r_opaque_pass.h" +#include "swrenderer/scene/r_translucent_pass.h" #include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" @@ -178,8 +178,8 @@ namespace swrenderer WindowLeft = pl->left; WindowRight = pl->right; - auto ceilingclip = RenderBSP::Instance()->ceilingclip; - auto floorclip = RenderBSP::Instance()->floorclip; + auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; + auto floorclip = RenderOpaquePass::Instance()->floorclip; for (i = pl->left; i < pl->right; i++) { if (pl->top[i] == 0x7fff) @@ -225,7 +225,7 @@ namespace swrenderer viewposStack.Push(ViewPos); visplaneStack.Push(pl); - RenderBSP::Instance()->RenderScene(); + RenderOpaquePass::Instance()->RenderScene(); Clip3DFloors::Instance()->ResetClip(); // reset clips (floor/ceiling) R_DrawPlanes(); @@ -248,7 +248,7 @@ namespace swrenderer // Masked textures and planes need the view coordinates restored for proper positioning. viewposStack.Pop(ViewPos); - RenderTranslucent::Render(); + RenderTranslucentPass::Render(); ds_p = firstdrawseg; VisibleSpriteList::vissprite_p = VisibleSpriteList::firstvissprite; @@ -441,12 +441,12 @@ namespace swrenderer CurrentPortalInSkybox = false; // first portal in a skybox should set this variable to false for proper clipping in skyboxes. // first pass, set clipping - auto ceilingclip = RenderBSP::Instance()->ceilingclip; - auto floorclip = RenderBSP::Instance()->floorclip; + auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; + auto floorclip = RenderOpaquePass::Instance()->floorclip; memcpy(ceilingclip + pds->x1, &pds->ceilingclip[0], pds->len * sizeof(*ceilingclip)); memcpy(floorclip + pds->x1, &pds->floorclip[0], pds->len * sizeof(*floorclip)); - RenderBSP::Instance()->RenderScene(); + RenderOpaquePass::Instance()->RenderScene(); Clip3DFloors::Instance()->ResetClip(); // reset clips (floor/ceiling) if (!savedvisibility && camera) camera->renderflags &= ~RF_INVISIBLE; @@ -470,7 +470,7 @@ namespace swrenderer NetUpdate(); MaskedCycles.Clock(); // [ZZ] count sprites in portals/mirrors along with normal ones. - RenderTranslucent::Render(); // this is required since with portals there often will be cases when more than 80% of the view is inside a portal. + RenderTranslucentPass::Render(); // this is required since with portals there often will be cases when more than 80% of the view is inside a portal. MaskedCycles.Unclock(); NetUpdate(); diff --git a/src/swrenderer/scene/r_translucent.cpp b/src/swrenderer/scene/r_translucent_pass.cpp similarity index 97% rename from src/swrenderer/scene/r_translucent.cpp rename to src/swrenderer/scene/r_translucent_pass.cpp index 7b47965055..3f5d69e03b 100644 --- a/src/swrenderer/scene/r_translucent.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -32,7 +32,7 @@ #include "swrenderer/things/r_playersprite.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" -#include "swrenderer/scene/r_translucent.h" +#include "swrenderer/scene/r_translucent_pass.h" #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/r_memory.h" @@ -44,23 +44,23 @@ CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG namespace swrenderer { - bool RenderTranslucent::DrewAVoxel; - TArray RenderTranslucent::portaldrawsegs; + bool RenderTranslucentPass::DrewAVoxel; + TArray RenderTranslucentPass::portaldrawsegs; - void RenderTranslucent::Deinit() + void RenderTranslucentPass::Deinit() { VisibleSpriteList::Deinit(); SortedVisibleSpriteList::Deinit(); RenderVoxel::Deinit(); } - void RenderTranslucent::Clear() + void RenderTranslucentPass::Clear() { VisibleSpriteList::Clear(); DrewAVoxel = false; } - void RenderTranslucent::CollectPortals() + void RenderTranslucentPass::CollectPortals() { // This function collects all drawsegs that may be of interest to R_ClipSpriteColumnWithPortals // Having that function over the entire list of drawsegs can break down performance quite drastically. @@ -90,7 +90,7 @@ namespace swrenderer } } - bool RenderTranslucent::ClipSpriteColumnWithPortals(int x, vissprite_t* spr) + bool RenderTranslucentPass::ClipSpriteColumnWithPortals(int x, vissprite_t* spr) { RenderPortal *renderportal = RenderPortal::Instance(); @@ -118,7 +118,7 @@ namespace swrenderer return false; } - void RenderTranslucent::DrawSprite(vissprite_t *spr) + void RenderTranslucentPass::DrawSprite(vissprite_t *spr) { static short clipbot[MAXWIDTH]; static short cliptop[MAXWIDTH]; @@ -523,7 +523,7 @@ namespace swrenderer spr->Style.ColormapNum = colormapnum; } - void RenderTranslucent::DrawMaskedSingle(bool renew) + void RenderTranslucentPass::DrawMaskedSingle(bool renew) { RenderPortal *renderportal = RenderPortal::Instance(); @@ -560,7 +560,7 @@ namespace swrenderer } } - void RenderTranslucent::Render() + void RenderTranslucentPass::Render() { CollectPortals(); SortedVisibleSpriteList::Sort(DrewAVoxel ? SortedVisibleSpriteList::sv_compare2d : SortedVisibleSpriteList::sv_compare, VisibleSpriteList::firstvissprite - VisibleSpriteList::vissprites); diff --git a/src/swrenderer/scene/r_translucent.h b/src/swrenderer/scene/r_translucent_pass.h similarity index 97% rename from src/swrenderer/scene/r_translucent.h rename to src/swrenderer/scene/r_translucent_pass.h index 30ca656793..359a5002d6 100644 --- a/src/swrenderer/scene/r_translucent.h +++ b/src/swrenderer/scene/r_translucent_pass.h @@ -25,7 +25,7 @@ namespace swrenderer struct vissprite_t; struct drawseg_t; - class RenderTranslucent + class RenderTranslucentPass { public: static void Deinit(); diff --git a/src/swrenderer/segments/r_clipsegment.cpp b/src/swrenderer/segments/r_clipsegment.cpp index c1cc94de66..66dbc354f4 100644 --- a/src/swrenderer/segments/r_clipsegment.cpp +++ b/src/swrenderer/segments/r_clipsegment.cpp @@ -26,7 +26,7 @@ #include "p_effect.h" #include "doomstat.h" #include "r_state.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "v_palette.h" #include "r_sky.h" #include "po_man.h" diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 18530e157d..269154b9e3 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -32,7 +32,7 @@ #include "swrenderer/r_memory.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/scene/r_3dfloors.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/line/r_wallsetup.h" #include "swrenderer/line/r_walldraw.h" @@ -166,7 +166,7 @@ namespace swrenderer } // killough 4/13/98: get correct lightlevel for 2s normal textures - sec = RenderBSP::Instance()->FakeFlat(frontsector, &tempsec, nullptr, nullptr, nullptr, 0, 0, 0, 0); + sec = RenderOpaquePass::Instance()->FakeFlat(frontsector, &tempsec, nullptr, nullptr, nullptr, 0, 0, 0, 0); basecolormap = sec->ColorMap; // [RH] Set basecolormap diff --git a/src/swrenderer/segments/r_portalsegment.cpp b/src/swrenderer/segments/r_portalsegment.cpp index 476e6b0a35..a1bb2bb74a 100644 --- a/src/swrenderer/segments/r_portalsegment.cpp +++ b/src/swrenderer/segments/r_portalsegment.cpp @@ -26,7 +26,7 @@ #include "p_effect.h" #include "doomstat.h" #include "r_state.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "v_palette.h" #include "r_sky.h" #include "po_man.h" diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 46140bc3e2..59db70b8ce 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -28,7 +28,7 @@ #include "a_sharedglobal.h" #include "d_net.h" #include "g_level.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "r_decal.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw.h" @@ -173,7 +173,7 @@ namespace swrenderer else if (pass == 0) { mceilingclip = walltop; - mfloorclip = RenderBSP::Instance()->ceilingclip; + mfloorclip = RenderOpaquePass::Instance()->ceilingclip; needrepeat = 1; } else @@ -189,7 +189,7 @@ namespace swrenderer goto done; } mceilingclip = walltop; - mfloorclip = RenderBSP::Instance()->ceilingclip; + mfloorclip = RenderOpaquePass::Instance()->ceilingclip; break; case RF_CLIPMID: @@ -206,7 +206,7 @@ namespace swrenderer { goto done; } - mceilingclip = RenderBSP::Instance()->floorclip; + mceilingclip = RenderOpaquePass::Instance()->floorclip; mfloorclip = wallbottom; break; } @@ -300,7 +300,7 @@ namespace swrenderer // If this sprite is RF_CLIPFULL on a two-sided line, needrepeat will // be set 1 if we need to draw on the lower wall. In all other cases, // needrepeat will be 0, and the while will fail. - mceilingclip = RenderBSP::Instance()->floorclip; + mceilingclip = RenderOpaquePass::Instance()->floorclip; mfloorclip = wallbottom; R_FinishSetPatchStyle(); } while (needrepeat--); diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index fd4585aac7..079b229fc7 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -38,9 +38,9 @@ #include "colormatcher.h" #include "d_netinf.h" #include "p_effect.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" -#include "swrenderer/scene/r_translucent.h" +#include "swrenderer/scene/r_translucent_pass.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" #include "v_palette.h" @@ -118,8 +118,8 @@ namespace swrenderer // entered, we don't need to clip it to drawsegs like a normal sprite. // Clip particles behind walls. - auto ceilingclip = RenderBSP::Instance()->ceilingclip; - auto floorclip = RenderBSP::Instance()->floorclip; + auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; + auto floorclip = RenderOpaquePass::Instance()->floorclip; if (y1 < ceilingclip[x1]) y1 = ceilingclip[x1]; if (y1 < ceilingclip[x2 - 1]) y1 = ceilingclip[x2 - 1]; if (y2 >= floorclip[x1]) y2 = floorclip[x1] - 1; @@ -254,7 +254,7 @@ namespace swrenderer { for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) { - if (RenderTranslucent::ClipSpriteColumnWithPortals(x, vis)) + if (RenderTranslucentPass::ClipSpriteColumnWithPortals(x, vis)) continue; uint32_t *dest = ylookup[yl] + x + (uint32_t*)dc_destorg; DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); @@ -264,7 +264,7 @@ namespace swrenderer { for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) { - if (RenderTranslucent::ClipSpriteColumnWithPortals(x, vis)) + if (RenderTranslucentPass::ClipSpriteColumnWithPortals(x, vis)) continue; uint8_t *dest = ylookup[yl] + x + dc_destorg; DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); diff --git a/src/swrenderer/things/r_particle.h b/src/swrenderer/things/r_particle.h index 3f9f89df32..331716cec5 100644 --- a/src/swrenderer/things/r_particle.h +++ b/src/swrenderer/things/r_particle.h @@ -14,7 +14,7 @@ #pragma once #include "r_visiblesprite.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" namespace swrenderer { diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 07325bea3f..e53a9b2bc3 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -38,7 +38,7 @@ #include "colormatcher.h" #include "d_netinf.h" #include "p_effect.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" @@ -127,7 +127,7 @@ namespace swrenderer else { // This used to use camera->Sector but due to interpolation that can be incorrect // when the interpolated viewpoint is in a different sector than the camera. - sec = RenderBSP::Instance()->FakeFlat(viewsector, &tempsec, &floorlight, &ceilinglight, nullptr, 0, 0, 0, 0); + sec = RenderOpaquePass::Instance()->FakeFlat(viewsector, &tempsec, &floorlight, &ceilinglight, nullptr, 0, 0, 0, 0); // [RH] set basecolormap basecolormap = sec->ColorMap; diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index edd7a24749..c75720afe2 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -38,9 +38,9 @@ #include "colormatcher.h" #include "d_netinf.h" #include "p_effect.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" -#include "swrenderer/scene/r_translucent.h" +#include "swrenderer/scene/r_translucent_pass.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" #include "v_palette.h" @@ -331,7 +331,7 @@ namespace swrenderer { while (x < x2) { - if (ispsprite || !RenderTranslucent::ClipSpriteColumnWithPortals(x, vis)) + if (ispsprite || !RenderTranslucentPass::ClipSpriteColumnWithPortals(x, vis)) R_DrawMaskedColumn(x, iscale, tex, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); x++; frac += xiscale; diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index 06e50f2922..f655b35cec 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -14,7 +14,7 @@ #pragma once #include "swrenderer/line/r_line.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/things/r_visiblespritelist.h" #define MINZ double((2048*4) / double(1 << 20)) diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index c55f7439e0..da9adc6be6 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -37,7 +37,7 @@ #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/things/r_voxel.h" #include "swrenderer/scene/r_portal.h" -#include "swrenderer/scene/r_translucent.h" +#include "swrenderer/scene/r_translucent_pass.h" #include "swrenderer/r_main.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) @@ -153,7 +153,7 @@ namespace swrenderer vis->voxel = voxel->Voxel; vis->bIsVoxel = true; vis->bWallSprite = false; - RenderTranslucent::DrewAVoxel = true; + RenderTranslucentPass::DrewAVoxel = true; // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 8fc5ea0932..de4da612de 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -38,9 +38,9 @@ #include "colormatcher.h" #include "d_netinf.h" #include "p_effect.h" -#include "swrenderer/scene/r_bsp.h" +#include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" -#include "swrenderer/scene/r_translucent.h" +#include "swrenderer/scene/r_translucent_pass.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" #include "v_palette.h" @@ -223,7 +223,7 @@ namespace swrenderer { // calculate lighting R_SetColorMapLight(usecolormap, light, shade); } - if (!RenderTranslucent::ClipSpriteColumnWithPortals(x, spr)) + if (!RenderTranslucentPass::ClipSpriteColumnWithPortals(x, spr)) DrawColumn(x, WallSpriteTile, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; From fc29958dc727c8628bf135f08bf941364f045ecf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 21:33:28 +0100 Subject: [PATCH 691/912] Convert r_fogboundary to a class --- src/swrenderer/line/r_fogboundary.cpp | 15 ++++++--------- src/swrenderer/line/r_fogboundary.h | 13 +++++++++++-- src/swrenderer/segments/r_drawsegment.cpp | 2 +- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/swrenderer/line/r_fogboundary.cpp b/src/swrenderer/line/r_fogboundary.cpp index edaa942655..ca1822a2c1 100644 --- a/src/swrenderer/line/r_fogboundary.cpp +++ b/src/swrenderer/line/r_fogboundary.cpp @@ -45,12 +45,7 @@ namespace swrenderer { - namespace - { - short spanend[MAXHEIGHT]; - } - - void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep) + void RenderFogBoundary::Render(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep) { // This is essentially the same as R_MapVisPlane but with an extra step // to create new horizontal spans whenever the light changes enough that @@ -87,7 +82,7 @@ namespace swrenderer if (t2 < b2 && rcolormap != 0) { // Colormap 0 is always the identity map, so rendering it is // just a waste of time. - R_DrawFogBoundarySection(t2, b2, xr); + RenderSection(t2, b2, xr); } if (t1 < t2) t2 = t1; if (b1 > b2) b2 = b1; @@ -139,15 +134,17 @@ namespace swrenderer } if (t2 < b2 && rcolormap != 0) { - R_DrawFogBoundarySection(t2, b2, x1); + RenderSection(t2, b2, x1); } } - void R_DrawFogBoundarySection(int y, int y2, int x1) + void RenderFogBoundary::RenderSection(int y, int y2, int x1) { for (; y < y2; ++y) { R_Drawers()->DrawFogBoundaryLine(y, x1, spanend[y]); } } + + short RenderFogBoundary::spanend[MAXHEIGHT]; } diff --git a/src/swrenderer/line/r_fogboundary.h b/src/swrenderer/line/r_fogboundary.h index bb7d267241..de48738a09 100644 --- a/src/swrenderer/line/r_fogboundary.h +++ b/src/swrenderer/line/r_fogboundary.h @@ -15,6 +15,15 @@ namespace swrenderer { - void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep); - void R_DrawFogBoundarySection(int y, int y2, int x1); + class RenderFogBoundary + { + public: + static void Render(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep); + + private: + static void RenderSection(int y, int y2, int x1); + + static short spanend[MAXHEIGHT]; + }; + } diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 269154b9e3..99ad33cefe 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -201,7 +201,7 @@ namespace swrenderer // [RH] Draw fog partition if (ds->bFogBoundary) { - R_DrawFogBoundary(x1, x2, mceilingclip, mfloorclip, wallshade, rw_light, rw_lightstep); + RenderFogBoundary::Render(x1, x2, mceilingclip, mfloorclip, wallshade, rw_light, rw_lightstep); if (ds->maskedtexturecol == -1) { goto clearfog; From 0885ff44a0e9bc1678752ae327b50a4335eb39e2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 21:59:26 +0100 Subject: [PATCH 692/912] Convert r_flatplane and r_slopeplane into classes --- src/swrenderer/plane/r_flatplane.cpp | 55 +++++++++++-------------- src/swrenderer/plane/r_flatplane.h | 35 +++++++++++++--- src/swrenderer/plane/r_slopeplane.cpp | 16 ++----- src/swrenderer/plane/r_slopeplane.h | 17 +++++++- src/swrenderer/plane/r_visibleplane.cpp | 19 ++++----- src/swrenderer/plane/r_visibleplane.h | 13 +++++- src/swrenderer/r_main.cpp | 2 +- 7 files changed, 92 insertions(+), 65 deletions(-) diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index af98b4169e..3a1e3936cc 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -41,20 +41,7 @@ namespace swrenderer { - namespace - { - double planeheight; - bool plane_shade; - int planeshade; - fixed_t pviewx, pviewy; - float yslope[MAXHEIGHT]; - fixed_t xscale, yscale; - double xstepscale, ystepscale; - double basexfrac, baseyfrac; - visplane_light *ds_light_list; - } - - void R_DrawNormalPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) + void RenderFlatPlane::Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { using namespace drawerargs; @@ -189,18 +176,12 @@ namespace swrenderer } } - ds_light_list = pl->lights; + light_list = pl->lights; - R_MapVisPlane(pl, R_MapPlane, R_StepPlane); + RenderLines(pl); } - void R_StepPlane() - { - basexfrac -= xstepscale; - baseyfrac -= ystepscale; - } - - void R_MapPlane(int y, int x1, int x2) + void RenderFlatPlane::RenderLine(int y, int x1, int x2) { using namespace drawerargs; @@ -275,7 +256,7 @@ namespace swrenderer // Setup lights for row dc_num_lights = 0; dc_lights = lightbuffer + nextlightindex; - visplane_light *cur_node = ds_light_list; + visplane_light *cur_node = light_list; while (cur_node && nextlightindex < 64 * 1024) { double lightX = cur_node->lightsource->X() - ViewPos.X; @@ -326,17 +307,13 @@ namespace swrenderer (R_Drawers()->*spanfunc)(); } - void R_DrawColoredPlane(visplane_t *pl) + void RenderFlatPlane::StepColumn() { - R_MapVisPlane(pl, R_MapColoredPlane, nullptr); + basexfrac -= xstepscale; + baseyfrac -= ystepscale; } - void R_MapColoredPlane(int y, int x1, int x2) - { - R_Drawers()->DrawColoredSpan(y, x1, x2); - } - - void R_SetupPlaneSlope() + void RenderFlatPlane::SetupSlope() { int e, i; @@ -377,4 +354,18 @@ namespace swrenderer } while (++i < e); } } + + float RenderFlatPlane::yslope[MAXHEIGHT]; + + ///////////////////////////////////////////////////////////////////////// + + void RenderColoredPlane::Render(visplane_t *pl) + { + RenderLines(pl); + } + + void RenderColoredPlane::RenderLine(int y, int x1, int x2) + { + R_Drawers()->DrawColoredSpan(y, x1, x2); + } } diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index c922df34e9..246087c3d1 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -17,12 +17,35 @@ namespace swrenderer { - void R_SetupPlaneSlope(); + class RenderFlatPlane : PlaneRenderer + { + public: + void Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked); - void R_DrawNormalPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked); - void R_MapPlane(int y, int x1, int x2); - void R_StepPlane(); + static void SetupSlope(); - void R_DrawColoredPlane(visplane_t *pl); - void R_MapColoredPlane(int y, int x1, int x2); + private: + void RenderLine(int y, int x1, int x2) override; + void StepColumn() override; + + double planeheight; + bool plane_shade; + int planeshade; + fixed_t pviewx, pviewy; + fixed_t xscale, yscale; + double xstepscale, ystepscale; + double basexfrac, baseyfrac; + visplane_light *light_list; + + static float yslope[MAXHEIGHT]; + }; + + class RenderColoredPlane : PlaneRenderer + { + public: + void Render(visplane_t *pl); + + private: + void RenderLine(int y, int x1, int x2) override; + }; } diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index be101e2a29..8a5897bb58 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -45,17 +45,7 @@ namespace swrenderer { - namespace - { - FVector3 plane_sz, plane_su, plane_sv; - float planelightfloat; - bool plane_shade; - int planeshade; - fixed_t pviewx, pviewy; - fixed_t xscale, yscale; - } - - void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) + void RenderSlopePlane::Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { using namespace drawerargs; @@ -182,10 +172,10 @@ namespace swrenderer plane_su[2] = plane_su[1] = plane_su[0] = 0; } - R_MapVisPlane(pl, R_MapTiltedPlane, nullptr); + RenderLines(pl); } - void R_MapTiltedPlane(int y, int x1, int x2) + void RenderSlopePlane::RenderLine(int y, int x1, int x2) { R_Drawers()->DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } diff --git a/src/swrenderer/plane/r_slopeplane.h b/src/swrenderer/plane/r_slopeplane.h index d366c5feff..9ea635a26f 100644 --- a/src/swrenderer/plane/r_slopeplane.h +++ b/src/swrenderer/plane/r_slopeplane.h @@ -17,6 +17,19 @@ namespace swrenderer { - void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked); - void R_MapTiltedPlane(int y, int x1, int x2); + class RenderSlopePlane : PlaneRenderer + { + public: + void Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked); + + private: + void RenderLine(int y, int x1, int x2) override; + + FVector3 plane_sz, plane_su, plane_sv; + float planelightfloat; + bool plane_shade; + int planeshade; + fixed_t pviewx, pviewy; + fixed_t xscale, yscale; + }; } diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index 948859cb28..a045d7a991 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -52,8 +52,6 @@ namespace swrenderer enum { max_plane_lights = 32 * 1024 }; visplane_light plane_lights[max_plane_lights]; int next_plane_light = 0; - - short spanend[MAXHEIGHT]; } void R_DeinitPlanes() @@ -509,17 +507,19 @@ namespace swrenderer if (!pl->height.isSlope() && !tilt) { - R_DrawNormalPlane(pl, xscale, yscale, alpha, additive, masked); + RenderFlatPlane renderer; + renderer.Render(pl, xscale, yscale, alpha, additive, masked); } else { - R_DrawTiltedPlane(pl, xscale, yscale, alpha, additive, masked); + RenderSlopePlane renderer; + renderer.Render(pl, xscale, yscale, alpha, additive, masked); } } NetUpdate(); } - void R_MapVisPlane(visplane_t *pl, void(*mapfunc)(int y, int x1, int x2), void(*stepfunc)()) + void PlaneRenderer::RenderLines(visplane_t *pl) { // t1/b1 are at x // t2/b2 are at x+1 @@ -547,14 +547,14 @@ namespace swrenderer { int y = t2++; int x2 = spanend[y]; - mapfunc(y, xr, x2); + RenderLine(y, xr, x2); } stop = MAX(b1, t2); while (b2 > stop) { int y = --b2; int x2 = spanend[y]; - mapfunc(y, xr, x2); + RenderLine(y, xr, x2); } // Mark any spans that have just opened @@ -572,15 +572,14 @@ namespace swrenderer t2 = pl->top[x]; b2 = pl->bottom[x]; - if (stepfunc) - stepfunc(); + StepColumn(); } // Draw any spans that are still open while (t2 < b2) { int y = --b2; int x2 = spanend[y]; - mapfunc(y, pl->left, x2); + RenderLine(y, pl->left, x2); } } } diff --git a/src/swrenderer/plane/r_visibleplane.h b/src/swrenderer/plane/r_visibleplane.h index c201c691a1..41f7d7bda2 100644 --- a/src/swrenderer/plane/r_visibleplane.h +++ b/src/swrenderer/plane/r_visibleplane.h @@ -87,5 +87,16 @@ namespace swrenderer int R_DrawPlanes(); void R_DrawHeightPlanes(double height); void R_DrawSinglePlane(visplane_t *pl, fixed_t alpha, bool additive, bool masked); - void R_MapVisPlane(visplane_t *pl, void(*mapfunc)(int y, int x1, int x2), void(*stepfunc)()); + + class PlaneRenderer + { + public: + void RenderLines(visplane_t *pl); + + virtual void RenderLine(int y, int x1, int x2) = 0; + virtual void StepColumn() { } + + private: + short spanend[MAXHEIGHT]; + }; } diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index c55303becb..e2f034b4ce 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -467,7 +467,7 @@ void R_SetupFreelook() globaluclip = -CenterY / InvZtoScale; globaldclip = (viewheight - CenterY) / InvZtoScale; - R_SetupPlaneSlope(); + RenderFlatPlane::SetupSlope(); } //========================================================================== From 9f8ac7e49855d68f7c075f862082c1b7f5b38e03 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 22:09:06 +0100 Subject: [PATCH 693/912] Convert r_skyplane into a class --- src/swrenderer/plane/r_skyplane.cpp | 81 ++++++++++++------------- src/swrenderer/plane/r_skyplane.h | 45 +++++++++++--- src/swrenderer/plane/r_visibleplane.cpp | 2 +- 3 files changed, 78 insertions(+), 50 deletions(-) diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 0fbb5e5167..9909f868e3 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -47,31 +47,7 @@ EXTERN_CVAR(Int, r_skymode) namespace swrenderer { - namespace - { - FTexture *frontskytex, *backskytex; - angle_t skyflip; - int frontpos, backpos; - double frontyScale; - fixed_t frontcyl, backcyl; - double skymid; - angle_t skyangle; - double frontiScale; - - // Allow for layer skies up to 512 pixels tall. This is overkill, - // since the most anyone can ever see of the sky is 500 pixels. - // We need 4 skybufs because R_DrawSkySegment can draw up to 4 columns at a time. - // Need two versions - one for true color and one for palette - #define MAXSKYBUF 3072 - uint8_t skybuf[4][512]; - uint32_t skybuf_bgra[MAXSKYBUF][512]; - uint32_t lastskycol[4]; - uint32_t lastskycol_bgra[MAXSKYBUF]; - int skycolplace; - int skycolplace_bgra; - } - - void R_DrawSkyPlane(visplane_t *pl) + void RenderSkyPlane::Render(visplane_t *pl) { FTextureID sky1tex, sky2tex; double frontdpos = 0, backdpos = 0; @@ -181,7 +157,7 @@ namespace swrenderer R_SetColorMapLight(fixedcolormap, 0, 0); } - R_DrawSky(pl); + DrawSky(pl); if (fakefixed) fixedcolormap = NULL; @@ -189,7 +165,7 @@ namespace swrenderer // Get a column of sky when there is only one sky texture. - const uint8_t *R_GetOneSkyColumn(FTexture *fronttex, int x) + const uint8_t *RenderSkyPlane::GetOneSkyColumn(FTexture *fronttex, int x) { int tx; if (r_linearsky) @@ -213,7 +189,7 @@ namespace swrenderer } // Get a column of sky when there are two overlapping sky textures - const uint8_t *R_GetTwoSkyColumns(FTexture *fronttex, int x) + const uint8_t *RenderSkyPlane::GetTwoSkyColumns(FTexture *fronttex, int x) { uint32_t ang, angle1, angle2; @@ -271,7 +247,7 @@ namespace swrenderer } else { - //return R_GetOneSkyColumn(fronttex, x); + //return GetOneSkyColumn(fronttex, x); for (i = skycolplace_bgra - 4; i < skycolplace_bgra; ++i) { int ic = (i % MAXSKYBUF); // i "checker" - can wrap around the ends of the array @@ -311,7 +287,7 @@ namespace swrenderer } } - void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) + void RenderSkyPlane::DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) { using namespace drawerargs; @@ -376,12 +352,12 @@ namespace swrenderer R_Drawers()->DrawDoubleSkyColumn(solid_top, solid_bottom); } - void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) + void RenderSkyPlane::DrawSkyColumn(int start_x, int y1, int y2, int columns) { if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) { double texturemid = skymid * frontskytex->Scale.Y + frontskytex->GetHeight(); - R_DrawSkyColumnStripe(start_x, y1, y2, columns, frontskytex->Scale.Y, texturemid, frontskytex->Scale.Y); + DrawSkyColumnStripe(start_x, y1, y2, columns, frontskytex->Scale.Y, texturemid, frontskytex->Scale.Y); } else { @@ -392,11 +368,11 @@ namespace swrenderer double topfrac = fmod(skymid + iscale * (1 - CenterY), frontskytex->GetHeight()); if (topfrac < 0) topfrac += frontskytex->GetHeight(); double texturemid = topfrac - iscale * (1 - CenterY); - R_DrawSkyColumnStripe(start_x, y1, y2, columns, scale, texturemid, yrepeat); + DrawSkyColumnStripe(start_x, y1, y2, columns, scale, texturemid, yrepeat); } } - void R_DrawCapSky(visplane_t *pl) + void RenderSkyPlane::DrawCapSky(visplane_t *pl) { int x1 = pl->left; int x2 = pl->right; @@ -410,15 +386,15 @@ namespace swrenderer if (y2 <= y1) continue; - R_DrawSkyColumn(x, y1, y2, 1); + DrawSkyColumn(x, y1, y2, 1); } } - void R_DrawSky(visplane_t *pl) + void RenderSkyPlane::DrawSky(visplane_t *pl) { if (r_skymode == 2) { - R_DrawCapSky(pl); + DrawCapSky(pl); return; } @@ -468,17 +444,17 @@ namespace swrenderer lastskycol_bgra[x] = 0xffffffff; } R_DrawSkySegment(frontskytex, pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, - frontyScale, 0, 0, 0.0f, 0.0f, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + frontyScale, 0, 0, 0.0f, 0.0f, backskytex == NULL ? RenderSkyPlane::GetOneSkyColumn : RenderSkyPlane::GetTwoSkyColumns); } else { // The texture does not tile nicely frontyScale *= skyscale; frontiScale = 1 / frontyScale; - R_DrawSkyStriped(pl); + DrawSkyStriped(pl); } } - void R_DrawSkyStriped(visplane_t *pl) + void RenderSkyPlane::DrawSkyStriped(visplane_t *pl) { short drawheight = short(frontskytex->GetHeight() * frontyScale); double topfrac; @@ -505,10 +481,33 @@ namespace swrenderer lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - R_DrawSkySegment(frontskytex, pl->left, pl->right, top, bot, swall, lwall, frontskytex->Scale.Y, 0, 0, 0.0f, 0.0f, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + R_DrawSkySegment(frontskytex, pl->left, pl->right, top, bot, swall, lwall, frontskytex->Scale.Y, 0, 0, 0.0f, 0.0f, backskytex == NULL ? RenderSkyPlane::GetOneSkyColumn : RenderSkyPlane::GetTwoSkyColumns); yl = yh; yh += drawheight; dc_texturemid = iscale * (centery - yl - 1); } } + + FTexture *RenderSkyPlane::frontskytex; + FTexture *RenderSkyPlane::backskytex; + angle_t RenderSkyPlane::skyflip; + int RenderSkyPlane::frontpos; + int RenderSkyPlane::backpos; + double RenderSkyPlane::frontyScale; + fixed_t RenderSkyPlane::frontcyl; + fixed_t RenderSkyPlane::backcyl; + double RenderSkyPlane::skymid; + angle_t RenderSkyPlane::skyangle; + double RenderSkyPlane::frontiScale; + + // Allow for layer skies up to 512 pixels tall. This is overkill, + // since the most anyone can ever see of the sky is 500 pixels. + // We need 4 skybufs because DrawSkySegment can draw up to 4 columns at a time. + // Need two versions - one for true color and one for palette + uint8_t RenderSkyPlane::skybuf[4][512]; + uint32_t RenderSkyPlane::skybuf_bgra[MAXSKYBUF][512]; + uint32_t RenderSkyPlane::lastskycol[4]; + uint32_t RenderSkyPlane::lastskycol_bgra[MAXSKYBUF]; + int RenderSkyPlane::skycolplace; + int RenderSkyPlane::skycolplace_bgra; } diff --git a/src/swrenderer/plane/r_skyplane.h b/src/swrenderer/plane/r_skyplane.h index 2810a86933..3acf4d53a6 100644 --- a/src/swrenderer/plane/r_skyplane.h +++ b/src/swrenderer/plane/r_skyplane.h @@ -17,14 +17,43 @@ namespace swrenderer { - void R_DrawSkyPlane(visplane_t *pl); + class RenderSkyPlane + { + public: + static void Render(visplane_t *pl); - void R_DrawSky(visplane_t *pl); - void R_DrawSkyStriped(visplane_t *pl); - void R_DrawCapSky(visplane_t *pl); - void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat); - void R_DrawSkyColumn(int start_x, int y1, int y2, int columns); + private: + static void DrawSky(visplane_t *pl); + static void DrawSkyStriped(visplane_t *pl); + static void DrawCapSky(visplane_t *pl); + static void DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat); + static void DrawSkyColumn(int start_x, int y1, int y2, int columns); - const uint8_t *R_GetOneSkyColumn(FTexture *fronttex, int x); - const uint8_t *R_GetTwoSkyColumns(FTexture *fronttex, int x); + static const uint8_t *GetOneSkyColumn(FTexture *fronttex, int x); + static const uint8_t *GetTwoSkyColumns(FTexture *fronttex, int x); + + static FTexture *frontskytex; + static FTexture *backskytex; + static angle_t skyflip; + static int frontpos; + static int backpos; + static double frontyScale; + static fixed_t frontcyl; + static fixed_t backcyl; + static double skymid; + static angle_t skyangle; + static double frontiScale; + + // Allow for layer skies up to 512 pixels tall. This is overkill, + // since the most anyone can ever see of the sky is 500 pixels. + // We need 4 skybufs because R_DrawSkySegment can draw up to 4 columns at a time. + // Need two versions - one for true color and one for palette + enum { MAXSKYBUF = 3072 }; + static uint8_t skybuf[4][512]; + static uint32_t skybuf_bgra[MAXSKYBUF][512]; + static uint32_t lastskycol[4]; + static uint32_t lastskycol_bgra[MAXSKYBUF]; + static int skycolplace; + static int skycolplace_bgra; + }; } diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index a045d7a991..be530aae37 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -480,7 +480,7 @@ namespace swrenderer if (pl->picnum == skyflatnum) // sky flat { - R_DrawSkyPlane(pl); + RenderSkyPlane::Render(pl); } else // regular flat { From 2988a5fe878453c271628bfa828188a9e2cca30f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 23:08:24 +0100 Subject: [PATCH 694/912] Convert r_visibleplane into classes --- src/swrenderer/line/r_line.cpp | 7 +- src/swrenderer/plane/r_visibleplane.cpp | 212 ++++++++++---------- src/swrenderer/plane/r_visibleplane.h | 38 ++-- src/swrenderer/r_main.cpp | 9 +- src/swrenderer/r_memory.cpp | 22 ++ src/swrenderer/r_memory.h | 5 + src/swrenderer/scene/r_opaque_pass.cpp | 16 +- src/swrenderer/scene/r_portal.cpp | 38 ++-- src/swrenderer/scene/r_translucent_pass.cpp | 4 +- 9 files changed, 195 insertions(+), 156 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index ccf65ef266..805f6aac53 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -537,7 +537,7 @@ namespace swrenderer { if (ceilingplane) { // killough 4/11/98: add NULL ptr checks - ceilingplane = R_CheckPlane(ceilingplane, start, stop); + ceilingplane = VisiblePlaneList::Instance()->GetRange(ceilingplane, start, stop); } else { @@ -549,7 +549,7 @@ namespace swrenderer { if (floorplane) { // killough 4/11/98: add NULL ptr checks - floorplane = R_CheckPlane(floorplane, start, stop); + floorplane = VisiblePlaneList::Instance()->GetRange(floorplane, start, stop); } else { @@ -559,7 +559,8 @@ namespace swrenderer RenderWallSegmentTextures(start, stop); - if (clip3d->fake3D & FAKE3D_FAKEMASK) { + if (clip3d->fake3D & FAKE3D_FAKEMASK) + { return (clip3d->fake3D & FAKE3D_FAKEMASK) == 0; } diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index be530aae37..2f84e18a3e 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -29,6 +29,7 @@ #include "g_level.h" #include "gl/dynlights/gl_dynlight.h" #include "swrenderer/r_main.h" +#include "swrenderer/r_memory.h" #include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" @@ -42,19 +43,101 @@ CVAR(Bool, tilt, false, 0); namespace swrenderer { - // [RH] Allocate one extra for sky box planes. - visplane_t *visplanes[MAXVISPLANES + 1]; - visplane_t *freetail; - visplane_t **freehead = &freetail; - - namespace + void visplane_t::AddLights(FLightNode *node) { - enum { max_plane_lights = 32 * 1024 }; - visplane_light plane_lights[max_plane_lights]; - int next_plane_light = 0; + if (!r_dynlights) + return; + + while (node) + { + if (!(node->lightsource->flags2&MF2_DORMANT)) + { + bool found = false; + visplane_light *light_node = lights; + while (light_node) + { + if (light_node->lightsource == node->lightsource) + { + found = true; + break; + } + light_node = light_node->next; + } + if (!found) + { + visplane_light *newlight = R_NewPlaneLight(); + if (!newlight) + return; + + newlight->next = lights; + newlight->lightsource = node->lightsource; + lights = newlight; + } + } + node = node->nextLight; + } } - void R_DeinitPlanes() + void visplane_t::Render(fixed_t alpha, bool additive, bool masked) + { + if (left >= right) + return; + + if (picnum == skyflatnum) // sky flat + { + RenderSkyPlane::Render(this); + } + else // regular flat + { + FTexture *tex = TexMan(picnum, true); + + if (tex->UseType == FTexture::TEX_Null) + { + return; + } + + if (!masked && !additive) + { // If we're not supposed to see through this plane, draw it opaque. + alpha = OPAQUE; + } + else if (!tex->bMasked) + { // Don't waste time on a masked texture if it isn't really masked. + masked = false; + } + R_SetSpanTexture(tex); + double xscale = xform.xScale * tex->Scale.X; + double yscale = xform.yScale * tex->Scale.Y; + + basecolormap = colormap; + + if (!height.isSlope() && !tilt) + { + RenderFlatPlane renderer; + renderer.Render(this, xscale, yscale, alpha, additive, masked); + } + else + { + RenderSlopePlane renderer; + renderer.Render(this, xscale, yscale, alpha, additive, masked); + } + } + NetUpdate(); + } + + VisiblePlaneList *VisiblePlaneList::Instance() + { + static VisiblePlaneList instance; + return &instance; + } + + VisiblePlaneList::VisiblePlaneList() + { + for (auto &plane : visplanes) + plane = nullptr; + freehead = &freetail; + } + + void VisiblePlaneList::Deinit() { // do not use R_ClearPlanes because at this point the screen pointer is no longer valid. for (int i = 0; i <= MAXVISPLANES; i++) // new code -- killough @@ -72,7 +155,7 @@ namespace swrenderer } } - visplane_t *new_visplane(unsigned hash) + visplane_t *VisiblePlaneList::Add(unsigned hash) { visplane_t *check = freetail; @@ -94,7 +177,7 @@ namespace swrenderer return check; } - void R_PlaneInitData() + void VisiblePlaneList::Init() { int i; visplane_t *pl; @@ -124,7 +207,7 @@ namespace swrenderer } } - void R_ClearPlanes(bool fullclear) + void VisiblePlaneList::Clear(bool fullclear) { int i; @@ -159,47 +242,10 @@ namespace swrenderer freehead = &(*freehead)->next; } } - - next_plane_light = 0; } } - void R_AddPlaneLights(visplane_t *plane, FLightNode *node) - { - if (!r_dynlights) - return; - - while (node) - { - if (!(node->lightsource->flags2&MF2_DORMANT)) - { - bool found = false; - visplane_light *light_node = plane->lights; - while (light_node) - { - if (light_node->lightsource == node->lightsource) - { - found = true; - break; - } - light_node = light_node->next; - } - if (!found) - { - if (next_plane_light == max_plane_lights) - return; - - visplane_light *newlight = &plane_lights[next_plane_light++]; - newlight->next = plane->lights; - newlight->lightsource = node->lightsource; - plane->lights = newlight; - } - } - node = node->nextLight; - } - } - - visplane_t *R_FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal) + visplane_t *VisiblePlaneList::FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal) { secplane_t plane; visplane_t *check; @@ -246,7 +292,7 @@ namespace swrenderer } // New visplane algorithm uses hash table -- killough - hash = isskybox ? MAXVISPLANES : visplane_hash(picnum.GetIndex(), lightlevel, height); + hash = isskybox ? MAXVISPLANES : CalcHash(picnum.GetIndex(), lightlevel, height); RenderPortal *renderportal = RenderPortal::Instance(); @@ -307,7 +353,7 @@ namespace swrenderer } } - check = new_visplane(hash); // killough + check = Add(hash); // killough check->height = plane; check->picnum = picnum; @@ -333,7 +379,7 @@ namespace swrenderer return check; } - visplane_t *R_CheckPlane(visplane_t *pl, int start, int stop) + visplane_t *VisiblePlaneList::GetRange(visplane_t *pl, int start, int stop) { int intrl, intrh; int unionl, unionh; @@ -384,9 +430,9 @@ namespace swrenderer } else { - hash = visplane_hash(pl->picnum.GetIndex(), pl->lightlevel, pl->height); + hash = CalcHash(pl->picnum.GetIndex(), pl->lightlevel, pl->height); } - visplane_t *new_pl = new_visplane(hash); + visplane_t *new_pl = Add(hash); new_pl->height = pl->height; new_pl->picnum = pl->picnum; @@ -413,7 +459,7 @@ namespace swrenderer return pl; } - int R_DrawPlanes() + int VisiblePlaneList::Render() { visplane_t *pl; int i; @@ -433,14 +479,14 @@ namespace swrenderer // kg3D - draw only real planes now if (pl->sky >= 0) { vpcount++; - R_DrawSinglePlane(pl, OPAQUE, false, false); + pl->Render(OPAQUE, false, false); } } } return vpcount; } - void R_DrawHeightPlanes(double height) + void VisiblePlaneList::RenderHeight(double height) { visplane_t *pl; int i; @@ -465,7 +511,7 @@ namespace swrenderer ViewAngle = pl->viewangle; renderportal->MirrorFlags = pl->MirrorFlags; - R_DrawSinglePlane(pl, pl->sky & 0x7FFFFFFF, pl->Additive, true); + pl->Render(pl->sky & 0x7FFFFFFF, pl->Additive, true); } } } @@ -473,52 +519,6 @@ namespace swrenderer ViewAngle = oViewAngle; } - void R_DrawSinglePlane(visplane_t *pl, fixed_t alpha, bool additive, bool masked) - { - if (pl->left >= pl->right) - return; - - if (pl->picnum == skyflatnum) // sky flat - { - RenderSkyPlane::Render(pl); - } - else // regular flat - { - FTexture *tex = TexMan(pl->picnum, true); - - if (tex->UseType == FTexture::TEX_Null) - { - return; - } - - if (!masked && !additive) - { // If we're not supposed to see through this plane, draw it opaque. - alpha = OPAQUE; - } - else if (!tex->bMasked) - { // Don't waste time on a masked texture if it isn't really masked. - masked = false; - } - R_SetSpanTexture(tex); - double xscale = pl->xform.xScale * tex->Scale.X; - double yscale = pl->xform.yScale * tex->Scale.Y; - - basecolormap = pl->colormap; - - if (!pl->height.isSlope() && !tilt) - { - RenderFlatPlane renderer; - renderer.Render(pl, xscale, yscale, alpha, additive, masked); - } - else - { - RenderSlopePlane renderer; - renderer.Render(pl, xscale, yscale, alpha, additive, masked); - } - } - NetUpdate(); - } - void PlaneRenderer::RenderLines(visplane_t *pl) { // t1/b1 are at x diff --git a/src/swrenderer/plane/r_visibleplane.h b/src/swrenderer/plane/r_visibleplane.h index 41f7d7bda2..ca4da07547 100644 --- a/src/swrenderer/plane/r_visibleplane.h +++ b/src/swrenderer/plane/r_visibleplane.h @@ -64,29 +64,37 @@ namespace swrenderer unsigned short *bottom; // [RH] bottom and top arrays are dynamically unsigned short pad; // allocated immediately after the unsigned short top[]; // visplane. + + void AddLights(FLightNode *node); + void Render(fixed_t alpha, bool additive, bool masked); }; - #define MAXVISPLANES 128 /* must be a power of 2 */ - #define visplane_hash(picnum,lightlevel,height) ((unsigned)((picnum)*3+(lightlevel)+(FLOAT2FIXED((height).fD()))*7) & (MAXVISPLANES-1)) + class VisiblePlaneList + { + public: + static VisiblePlaneList *Instance(); - extern visplane_t *visplanes[MAXVISPLANES + 1]; - extern visplane_t *freetail; - extern visplane_t **freehead; + void Init(); + void Deinit(); + void Clear(bool fullclear); - void R_DeinitPlanes(); - visplane_t *new_visplane(unsigned hash); + visplane_t *FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal); + visplane_t *GetRange(visplane_t *pl, int start, int stop); - void R_PlaneInitData(); - void R_ClearPlanes(bool fullclear); + int Render(); + void RenderHeight(double height); - void R_AddPlaneLights(visplane_t *plane, FLightNode *node); + enum { MAXVISPLANES = 128 }; // must be a power of 2 + visplane_t *visplanes[MAXVISPLANES + 1]; + visplane_t *freetail = nullptr; + visplane_t **freehead = nullptr; - visplane_t *R_FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal); - visplane_t *R_CheckPlane(visplane_t *pl, int start, int stop); + private: + VisiblePlaneList(); + visplane_t *Add(unsigned hash); - int R_DrawPlanes(); - void R_DrawHeightPlanes(double height); - void R_DrawSinglePlane(visplane_t *pl, fixed_t alpha, bool additive, bool masked); + static unsigned CalcHash(int picnum, int lightlevel, const secplane_t &height) { return (unsigned)((picnum) * 3 + (lightlevel)+(FLOAT2FIXED((height).fD())) * 7) & (MAXVISPLANES - 1); } + }; class PlaneRenderer { diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index e2f034b4ce..cdbc083d5e 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -386,7 +386,7 @@ void R_InitRenderer() static void R_ShutdownRenderer() { RenderTranslucentPass::Deinit(); - R_DeinitPlanes(); + VisiblePlaneList::Instance()->Deinit(); Clip3DFloors::Instance()->Cleanup(); R_DeinitOpenings(); R_FreeDrawSegs(); @@ -527,7 +527,8 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // Clear buffers. R_ClearClipSegs (0, viewwidth); R_ClearDrawSegs (); - R_ClearPlanes (true); + VisiblePlaneList::Instance()->Clear(true); + R_FreePlaneLights(); RenderTranslucentPass::Clear(); // opening / clipping determination @@ -568,7 +569,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) if (viewactive) { PlaneCycles.Clock(); - R_DrawPlanes(); + VisiblePlaneList::Instance()->Render(); RenderPortal::Instance()->RenderPlanePortals(); PlaneCycles.Unclock(); @@ -655,7 +656,7 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, void R_MultiresInit () { - R_PlaneInitData (); + VisiblePlaneList::Instance()->Init(); } diff --git a/src/swrenderer/r_memory.cpp b/src/swrenderer/r_memory.cpp index 0b3d3c1d4b..c77cada56c 100644 --- a/src/swrenderer/r_memory.cpp +++ b/src/swrenderer/r_memory.cpp @@ -20,6 +20,7 @@ #include "p_setup.h" #include "swrenderer/r_main.h" #include "swrenderer/drawers/r_draw.h" +#include "swrenderer/plane/r_visibleplane.h" #include "a_sharedglobal.h" #include "g_level.h" #include "p_effect.h" @@ -70,4 +71,25 @@ namespace swrenderer openings = nullptr; } } + + ///////////////////////////////////////////////////////////////////////// + + namespace + { + enum { max_plane_lights = 32 * 1024 }; + visplane_light plane_lights[max_plane_lights]; + int next_plane_light = 0; + } + + visplane_light *R_NewPlaneLight() + { + if (next_plane_light == max_plane_lights) + return nullptr; + return &plane_lights[next_plane_light++]; + } + + void R_FreePlaneLights() + { + next_plane_light = 0; + } } diff --git a/src/swrenderer/r_memory.h b/src/swrenderer/r_memory.h index 5df7504807..7914cdfcfe 100644 --- a/src/swrenderer/r_memory.h +++ b/src/swrenderer/r_memory.h @@ -15,9 +15,14 @@ namespace swrenderer { + struct visplane_light; + extern short *openings; ptrdiff_t R_NewOpening(ptrdiff_t len); void R_FreeOpenings(); void R_DeinitOpenings(); + + visplane_light *R_NewPlaneLight(); + void R_FreePlaneLights(); } diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 337942cc4f..99d79f10d7 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -501,7 +501,7 @@ namespace swrenderer (frontsector->heightsec && !(frontsector->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC) && frontsector->heightsec->GetTexture(sector_t::floor) == skyflatnum) ? - R_FindPlane(frontsector->ceilingplane, // killough 3/8/98 + VisiblePlaneList::Instance()->FindPlane(frontsector->ceilingplane, // killough 3/8/98 frontsector->GetTexture(sector_t::ceiling), ceilinglightlevel + r_actualextralight, // killough 4/11/98 frontsector->GetAlpha(sector_t::ceiling), @@ -512,7 +512,7 @@ namespace swrenderer ) : nullptr; if (ceilingplane) - R_AddPlaneLights(ceilingplane, frontsector->lighthead); + ceilingplane->AddLights(frontsector->lighthead); if (fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) { @@ -541,7 +541,7 @@ namespace swrenderer (frontsector->heightsec && !(frontsector->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC) && frontsector->heightsec->GetTexture(sector_t::ceiling) == skyflatnum) ? - R_FindPlane(frontsector->floorplane, + VisiblePlaneList::Instance()->FindPlane(frontsector->floorplane, frontsector->GetTexture(sector_t::floor), floorlightlevel + r_actualextralight, // killough 3/16/98 frontsector->GetAlpha(sector_t::floor), @@ -552,7 +552,7 @@ namespace swrenderer ) : nullptr; if (floorplane) - R_AddPlaneLights(floorplane, frontsector->lighthead); + floorplane->AddLights(frontsector->lighthead); // kg3D - fake planes rendering if (r_3dfloors && frontsector->e && frontsector->e->XFloor.ffloors.Size()) @@ -606,7 +606,7 @@ namespace swrenderer } ceilingplane = nullptr; - floorplane = R_FindPlane(frontsector->floorplane, + floorplane = VisiblePlaneList::Instance()->FindPlane(frontsector->floorplane, frontsector->GetTexture(sector_t::floor), floorlightlevel + r_actualextralight, // killough 3/16/98 frontsector->GetAlpha(sector_t::floor), @@ -616,7 +616,7 @@ namespace swrenderer nullptr); if (floorplane) - R_AddPlaneLights(floorplane, frontsector->lighthead); + floorplane->AddLights(frontsector->lighthead); FakeDrawLoop(sub, floorplane, ceilingplane); clip3d->fake3D = 0; @@ -671,7 +671,7 @@ namespace swrenderer tempsec.ceilingplane.ChangeHeight(1 / 65536.); floorplane = nullptr; - ceilingplane = R_FindPlane(frontsector->ceilingplane, // killough 3/8/98 + ceilingplane = VisiblePlaneList::Instance()->FindPlane(frontsector->ceilingplane, // killough 3/8/98 frontsector->GetTexture(sector_t::ceiling), ceilinglightlevel + r_actualextralight, // killough 4/11/98 frontsector->GetAlpha(sector_t::ceiling), @@ -681,7 +681,7 @@ namespace swrenderer nullptr); if (ceilingplane) - R_AddPlaneLights(ceilingplane, frontsector->lighthead); + ceilingplane->AddLights(frontsector->lighthead); FakeDrawLoop(sub, floorplane, ceilingplane); clip3d->fake3D = 0; diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 88b9415ce4..b235944d87 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -88,7 +88,9 @@ namespace swrenderer { numskyboxes = 0; - if (visplanes[MAXVISPLANES] == nullptr) + VisiblePlaneList *planes = VisiblePlaneList::Instance(); + + if (planes->visplanes[VisiblePlaneList::MAXVISPLANES] == nullptr) return; Clip3DFloors::Instance()->EnterSkybox(); @@ -107,19 +109,19 @@ namespace swrenderer int i; visplane_t *pl; - for (pl = visplanes[MAXVISPLANES]; pl != nullptr; pl = visplanes[MAXVISPLANES]) + for (pl = planes->visplanes[VisiblePlaneList::MAXVISPLANES]; pl != nullptr; pl = planes->visplanes[VisiblePlaneList::MAXVISPLANES]) { // Pop the visplane off the list now so that if this skybox adds more // skyboxes to the list, they will be drawn instead of skipped (because // new skyboxes go to the beginning of the list instead of the end). - visplanes[MAXVISPLANES] = pl->next; + planes->visplanes[VisiblePlaneList::MAXVISPLANES] = pl->next; pl->next = nullptr; if (pl->right < pl->left || !r_skyboxes || numskyboxes == MAX_SKYBOX_PLANES || pl->portal == nullptr) { - R_DrawSinglePlane(pl, OPAQUE, false, false); - *freehead = pl; - freehead = &pl->next; + pl->Render(OPAQUE, false, false); + *planes->freehead = pl; + planes->freehead = &pl->next; continue; } @@ -158,9 +160,9 @@ namespace swrenderer // not implemented yet default: - R_DrawSinglePlane(pl, OPAQUE, false, false); - *freehead = pl; - freehead = &pl->next; + pl->Render(OPAQUE, false, false); + *planes->freehead = pl; + planes->freehead = &pl->next; numskyboxes--; continue; } @@ -173,7 +175,7 @@ namespace swrenderer R_SetViewAngle(); validcount++; // Make sure we see all sprites - R_ClearPlanes(false); + planes->Clear(false); R_ClearClipSegs(pl->left, pl->right); WindowLeft = pl->left; WindowRight = pl->right; @@ -227,7 +229,7 @@ namespace swrenderer RenderOpaquePass::Instance()->RenderScene(); Clip3DFloors::Instance()->ResetClip(); // reset clips (floor/ceiling) - R_DrawPlanes(); + planes->Render(); port->mFlags &= ~PORTSF_INSKYBOX; if (port->mPartner > 0) sectorPortals[port->mPartner].mFlags &= ~PORTSF_INSKYBOX; @@ -256,10 +258,10 @@ namespace swrenderer visplaneStack.Pop(pl); if (pl->Alpha > 0 && pl->picnum != skyflatnum) { - R_DrawSinglePlane(pl, pl->Alpha, pl->Additive, true); + pl->Render(pl->Alpha, pl->Additive, true); } - *freehead = pl; - freehead = &pl->next; + *planes->freehead = pl; + planes->freehead = &pl->next; } VisibleSpriteList::firstvissprite = VisibleSpriteList::vissprites; VisibleSpriteList::vissprite_p = VisibleSpriteList::vissprites + savedvissprite_p; @@ -281,8 +283,8 @@ namespace swrenderer if (Clip3DFloors::Instance()->fakeActive) return; - for (*freehead = visplanes[MAXVISPLANES], visplanes[MAXVISPLANES] = nullptr; *freehead; ) - freehead = &(*freehead)->next; + for (*planes->freehead = planes->visplanes[VisiblePlaneList::MAXVISPLANES], planes->visplanes[VisiblePlaneList::MAXVISPLANES] = nullptr; *planes->freehead; ) + planes->freehead = &(*planes->freehead)->next; } void RenderPortal::RenderLinePortals() @@ -421,7 +423,7 @@ namespace swrenderer PortalDrawseg* prevpds = CurrentPortal; CurrentPortal = pds; - R_ClearPlanes(false); + VisiblePlaneList::Instance()->Clear(false); R_ClearClipSegs(pds->x1, pds->x2); WindowLeft = pds->x1; @@ -451,7 +453,7 @@ namespace swrenderer if (!savedvisibility && camera) camera->renderflags &= ~RF_INVISIBLE; PlaneCycles.Clock(); - R_DrawPlanes(); + VisiblePlaneList::Instance()->Render(); RenderPlanePortals(); PlaneCycles.Unclock(); diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 3f5d69e03b..48cf627dc8 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -586,7 +586,7 @@ namespace swrenderer } clip3d->sclipBottom = hl->height; DrawMaskedSingle(true); - R_DrawHeightPlanes(hl->height); + VisiblePlaneList::Instance()->RenderHeight(hl->height); } // floors @@ -595,7 +595,7 @@ namespace swrenderer DrawMaskedSingle(true); for (HeightLevel *hl = clip3d->height_top; hl != nullptr && hl->height < ViewPos.Z; hl = hl->next) { - R_DrawHeightPlanes(hl->height); + VisiblePlaneList::Instance()->RenderHeight(hl->height); if (hl->next) { clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP | FAKE3D_CLIPBOTTOM; From c80860cd5de5fb454f1337506a2c8356394b9293 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 11 Jan 2017 23:27:35 +0100 Subject: [PATCH 695/912] Split r_visibleplane into more files --- src/CMakeLists.txt | 2 + src/swrenderer/line/r_line.cpp | 1 + src/swrenderer/plane/r_flatplane.cpp | 1 + src/swrenderer/plane/r_flatplane.h | 4 +- src/swrenderer/plane/r_planerenderer.cpp | 99 +++++ src/swrenderer/plane/r_planerenderer.h | 34 ++ src/swrenderer/plane/r_slopeplane.cpp | 1 + src/swrenderer/plane/r_slopeplane.h | 2 +- src/swrenderer/plane/r_visibleplane.cpp | 459 -------------------- src/swrenderer/plane/r_visibleplane.h | 39 -- src/swrenderer/plane/r_visibleplanelist.cpp | 439 +++++++++++++++++++ src/swrenderer/plane/r_visibleplanelist.h | 51 +++ src/swrenderer/r_main.cpp | 1 + src/swrenderer/scene/r_opaque_pass.cpp | 1 + src/swrenderer/scene/r_portal.cpp | 1 + src/swrenderer/scene/r_translucent_pass.cpp | 1 + 16 files changed, 636 insertions(+), 500 deletions(-) create mode 100644 src/swrenderer/plane/r_planerenderer.cpp create mode 100644 src/swrenderer/plane/r_planerenderer.h create mode 100644 src/swrenderer/plane/r_visibleplanelist.cpp create mode 100644 src/swrenderer/plane/r_visibleplanelist.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3e6e01a673..44c72406f0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -865,7 +865,9 @@ set( FASTMATH_PCH_SOURCES swrenderer/things/r_wallsprite.cpp swrenderer/things/r_decal.cpp swrenderer/plane/r_visibleplane.cpp + swrenderer/plane/r_visibleplanelist.cpp swrenderer/plane/r_skyplane.cpp + swrenderer/plane/r_planerenderer.cpp swrenderer/plane/r_flatplane.cpp swrenderer/plane/r_slopeplane.cpp polyrenderer/poly_renderer.cpp diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 805f6aac53..4122623b3f 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -43,6 +43,7 @@ #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/plane/r_visibleplanelist.h" #include "swrenderer/things/r_decal.h" CVAR(Bool, r_fogboundary, true, 0) diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 3a1e3936cc..760ae1b223 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -37,6 +37,7 @@ #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/r_memory.h" namespace swrenderer diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index 246087c3d1..c72b6402b3 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -13,10 +13,12 @@ #pragma once -#include "r_visibleplane.h" +#include "r_planerenderer.h" namespace swrenderer { + struct visplane_light; + class RenderFlatPlane : PlaneRenderer { public: diff --git a/src/swrenderer/plane/r_planerenderer.cpp b/src/swrenderer/plane/r_planerenderer.cpp new file mode 100644 index 0000000000..0a6d511bf0 --- /dev/null +++ b/src/swrenderer/plane/r_planerenderer.cpp @@ -0,0 +1,99 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include + +#include "templates.h" +#include "i_system.h" +#include "w_wad.h" +#include "doomdef.h" +#include "doomstat.h" +#include "r_sky.h" +#include "stats.h" +#include "v_video.h" +#include "a_sharedglobal.h" +#include "c_console.h" +#include "cmdlib.h" +#include "d_net.h" +#include "g_level.h" +#include "gl/dynlights/gl_dynlight.h" +#include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/plane/r_planerenderer.h" + +namespace swrenderer +{ + void PlaneRenderer::RenderLines(visplane_t *pl) + { + // t1/b1 are at x + // t2/b2 are at x+1 + // spanend[y] is at the right edge + + int x = pl->right - 1; + int t2 = pl->top[x]; + int b2 = pl->bottom[x]; + + if (b2 > t2) + { + fillshort(spanend + t2, b2 - t2, x); + } + + for (--x; x >= pl->left; --x) + { + int t1 = pl->top[x]; + int b1 = pl->bottom[x]; + const int xr = x + 1; + int stop; + + // Draw any spans that have just closed + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + int x2 = spanend[y]; + RenderLine(y, xr, x2); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + int x2 = spanend[y]; + RenderLine(y, xr, x2); + } + + // Mark any spans that have just opened + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + + t2 = pl->top[x]; + b2 = pl->bottom[x]; + + StepColumn(); + } + // Draw any spans that are still open + while (t2 < b2) + { + int y = --b2; + int x2 = spanend[y]; + RenderLine(y, pl->left, x2); + } + } +} diff --git a/src/swrenderer/plane/r_planerenderer.h b/src/swrenderer/plane/r_planerenderer.h new file mode 100644 index 0000000000..a13fb14e20 --- /dev/null +++ b/src/swrenderer/plane/r_planerenderer.h @@ -0,0 +1,34 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#pragma once + +#include +#include "r_defs.h" + +namespace swrenderer +{ + struct visplane_t; + + class PlaneRenderer + { + public: + void RenderLines(visplane_t *pl); + + virtual void RenderLine(int y, int x1, int x2) = 0; + virtual void StepColumn() { } + + private: + short spanend[MAXHEIGHT]; + }; +} diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 8a5897bb58..6317928a71 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -38,6 +38,7 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/r_memory.h" +#include "swrenderer/plane/r_visibleplane.h" #ifdef _MSC_VER #pragma warning(disable:4244) diff --git a/src/swrenderer/plane/r_slopeplane.h b/src/swrenderer/plane/r_slopeplane.h index 9ea635a26f..6fad957098 100644 --- a/src/swrenderer/plane/r_slopeplane.h +++ b/src/swrenderer/plane/r_slopeplane.h @@ -13,7 +13,7 @@ #pragma once -#include "r_visibleplane.h" +#include "r_planerenderer.h" namespace swrenderer { diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index 2f84e18a3e..2fa0fc44d2 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -123,463 +123,4 @@ namespace swrenderer } NetUpdate(); } - - VisiblePlaneList *VisiblePlaneList::Instance() - { - static VisiblePlaneList instance; - return &instance; - } - - VisiblePlaneList::VisiblePlaneList() - { - for (auto &plane : visplanes) - plane = nullptr; - freehead = &freetail; - } - - void VisiblePlaneList::Deinit() - { - // do not use R_ClearPlanes because at this point the screen pointer is no longer valid. - for (int i = 0; i <= MAXVISPLANES; i++) // new code -- killough - { - for (*freehead = visplanes[i], visplanes[i] = nullptr; *freehead; ) - { - freehead = &(*freehead)->next; - } - } - for (visplane_t *pl = freetail; pl != nullptr; ) - { - visplane_t *next = pl->next; - free(pl); - pl = next; - } - } - - visplane_t *VisiblePlaneList::Add(unsigned hash) - { - visplane_t *check = freetail; - - if (check == nullptr) - { - check = (visplane_t *)M_Malloc(sizeof(*check) + 3 + sizeof(*check->top)*(MAXWIDTH * 2)); - memset(check, 0, sizeof(*check) + 3 + sizeof(*check->top)*(MAXWIDTH * 2)); - check->bottom = check->top + MAXWIDTH + 2; - } - else if (nullptr == (freetail = freetail->next)) - { - freehead = &freetail; - } - - check->lights = nullptr; - - check->next = visplanes[hash]; - visplanes[hash] = check; - return check; - } - - void VisiblePlaneList::Init() - { - int i; - visplane_t *pl; - - // Free all visplanes and let them be re-allocated as needed. - pl = freetail; - - while (pl) - { - visplane_t *next = pl->next; - M_Free(pl); - pl = next; - } - freetail = nullptr; - freehead = &freetail; - - for (i = 0; i < MAXVISPLANES; i++) - { - pl = visplanes[i]; - visplanes[i] = nullptr; - while (pl) - { - visplane_t *next = pl->next; - M_Free(pl); - pl = next; - } - } - } - - void VisiblePlaneList::Clear(bool fullclear) - { - int i; - - // Don't clear fake planes if not doing a full clear. - if (!fullclear) - { - for (i = 0; i <= MAXVISPLANES - 1; i++) // new code -- killough - { - for (visplane_t **probe = &visplanes[i]; *probe != nullptr; ) - { - if ((*probe)->sky < 0) - { // fake: move past it - probe = &(*probe)->next; - } - else - { // not fake: move to freelist - visplane_t *vis = *probe; - *freehead = vis; - *probe = vis->next; - vis->next = nullptr; - freehead = &vis->next; - } - } - } - } - else - { - for (i = 0; i <= MAXVISPLANES; i++) // new code -- killough - { - for (*freehead = visplanes[i], visplanes[i] = nullptr; *freehead; ) - { - freehead = &(*freehead)->next; - } - } - } - } - - visplane_t *VisiblePlaneList::FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal) - { - secplane_t plane; - visplane_t *check; - unsigned hash; // killough - bool isskybox; - const FTransform *xform = &xxform; - fixed_t alpha = FLOAT2FIXED(Alpha); - //angle_t angle = (xform.Angle + xform.baseAngle).BAMs(); - - FTransform nulltransform; - - if (picnum == skyflatnum) // killough 10/98 - { // most skies map together - lightlevel = 0; - xform = &nulltransform; - nulltransform.xOffs = nulltransform.yOffs = nulltransform.baseyOffs = 0; - nulltransform.xScale = nulltransform.yScale = 1; - nulltransform.Angle = nulltransform.baseAngle = 0.0; - additive = false; - // [RH] Map floor skies and ceiling skies to separate visplanes. This isn't - // always necessary, but it is needed if a floor and ceiling sky are in the - // same column but separated by a wall. If they both try to reside in the - // same visplane, then only the floor sky will be drawn. - plane.set(0., 0., height.fC(), 0.); - isskybox = portal != nullptr && !(portal->mFlags & PORTSF_INSKYBOX); - } - else if (portal != nullptr && !(portal->mFlags & PORTSF_INSKYBOX)) - { - plane = height; - isskybox = true; - } - else - { - plane = height; - isskybox = false; - // kg3D - hack, store alpha in sky - // i know there is ->alpha, but this also allows to identify fake plane - // and ->alpha is for stacked sectors - Clip3DFloors *clip3d = Clip3DFloors::Instance(); - if (clip3d->fake3D & (FAKE3D_FAKEFLOOR | FAKE3D_FAKECEILING)) sky = 0x80000000 | clip3d->fakeAlpha; - else sky = 0; // not skyflatnum so it can't be a sky - portal = nullptr; - alpha = OPAQUE; - } - - // New visplane algorithm uses hash table -- killough - hash = isskybox ? MAXVISPLANES : CalcHash(picnum.GetIndex(), lightlevel, height); - - RenderPortal *renderportal = RenderPortal::Instance(); - - for (check = visplanes[hash]; check; check = check->next) // killough - { - if (isskybox) - { - if (portal == check->portal && plane == check->height) - { - if (portal->mType != PORTS_SKYVIEWPOINT) - { // This skybox is really a stacked sector, so we need to - // check even more. - if (check->extralight == renderportal->stacked_extralight && - check->visibility == renderportal->stacked_visibility && - check->viewpos == renderportal->stacked_viewpos && - ( - // headache inducing logic... :( - (portal->mType != PORTS_STACKEDSECTORTHING) || - ( - check->Alpha == alpha && - check->Additive == additive && - (alpha == 0 || // if alpha is > 0 everything needs to be checked - (plane == check->height && - picnum == check->picnum && - lightlevel == check->lightlevel && - basecolormap == check->colormap && // [RH] Add more checks - *xform == check->xform - ) - ) && - check->viewangle == renderportal->stacked_angle - ) - ) - ) - { - return check; - } - } - else - { - return check; - } - } - } - else - if (plane == check->height && - picnum == check->picnum && - lightlevel == check->lightlevel && - basecolormap == check->colormap && // [RH] Add more checks - *xform == check->xform && - sky == check->sky && - renderportal->CurrentPortalUniq == check->CurrentPortalUniq && - renderportal->MirrorFlags == check->MirrorFlags && - Clip3DFloors::Instance()->CurrentSkybox == check->CurrentSkybox && - ViewPos == check->viewpos - ) - { - return check; - } - } - - check = Add(hash); // killough - - check->height = plane; - check->picnum = picnum; - check->lightlevel = lightlevel; - check->xform = *xform; - check->colormap = basecolormap; // [RH] Save colormap - check->sky = sky; - check->portal = portal; - check->left = viewwidth; // Was SCREENWIDTH -- killough 11/98 - check->right = 0; - check->extralight = renderportal->stacked_extralight; - check->visibility = renderportal->stacked_visibility; - check->viewpos = renderportal->stacked_viewpos; - check->viewangle = renderportal->stacked_angle; - check->Alpha = alpha; - check->Additive = additive; - check->CurrentPortalUniq = renderportal->CurrentPortalUniq; - check->MirrorFlags = renderportal->MirrorFlags; - check->CurrentSkybox = Clip3DFloors::Instance()->CurrentSkybox; - - fillshort(check->top, viewwidth, 0x7fff); - - return check; - } - - visplane_t *VisiblePlaneList::GetRange(visplane_t *pl, int start, int stop) - { - int intrl, intrh; - int unionl, unionh; - int x; - - assert(start >= 0 && start < viewwidth); - assert(stop > start && stop <= viewwidth); - - if (start < pl->left) - { - intrl = pl->left; - unionl = start; - } - else - { - unionl = pl->left; - intrl = start; - } - - if (stop > pl->right) - { - intrh = pl->right; - unionh = stop; - } - else - { - unionh = pl->right; - intrh = stop; - } - - for (x = intrl; x < intrh && pl->top[x] == 0x7fff; x++) - ; - - if (x >= intrh) - { - // use the same visplane - pl->left = unionl; - pl->right = unionh; - } - else - { - // make a new visplane - unsigned hash; - - if (pl->portal != nullptr && !(pl->portal->mFlags & PORTSF_INSKYBOX) && viewactive) - { - hash = MAXVISPLANES; - } - else - { - hash = CalcHash(pl->picnum.GetIndex(), pl->lightlevel, pl->height); - } - visplane_t *new_pl = Add(hash); - - new_pl->height = pl->height; - new_pl->picnum = pl->picnum; - new_pl->lightlevel = pl->lightlevel; - new_pl->xform = pl->xform; - new_pl->colormap = pl->colormap; - new_pl->portal = pl->portal; - new_pl->extralight = pl->extralight; - new_pl->visibility = pl->visibility; - new_pl->viewpos = pl->viewpos; - new_pl->viewangle = pl->viewangle; - new_pl->sky = pl->sky; - new_pl->Alpha = pl->Alpha; - new_pl->Additive = pl->Additive; - new_pl->CurrentPortalUniq = pl->CurrentPortalUniq; - new_pl->MirrorFlags = pl->MirrorFlags; - new_pl->CurrentSkybox = pl->CurrentSkybox; - new_pl->lights = pl->lights; - pl = new_pl; - pl->left = start; - pl->right = stop; - fillshort(pl->top, viewwidth, 0x7fff); - } - return pl; - } - - int VisiblePlaneList::Render() - { - visplane_t *pl; - int i; - int vpcount = 0; - - drawerargs::ds_color = 3; - - RenderPortal *renderportal = RenderPortal::Instance(); - - for (i = 0; i < MAXVISPLANES; i++) - { - for (pl = visplanes[i]; pl; pl = pl->next) - { - // kg3D - draw only correct planes - if (pl->CurrentPortalUniq != renderportal->CurrentPortalUniq || pl->CurrentSkybox != Clip3DFloors::Instance()->CurrentSkybox) - continue; - // kg3D - draw only real planes now - if (pl->sky >= 0) { - vpcount++; - pl->Render(OPAQUE, false, false); - } - } - } - return vpcount; - } - - void VisiblePlaneList::RenderHeight(double height) - { - visplane_t *pl; - int i; - - drawerargs::ds_color = 3; - - DVector3 oViewPos = ViewPos; - DAngle oViewAngle = ViewAngle; - - RenderPortal *renderportal = RenderPortal::Instance(); - - for (i = 0; i < MAXVISPLANES; i++) - { - for (pl = visplanes[i]; pl; pl = pl->next) - { - if (pl->CurrentSkybox != Clip3DFloors::Instance()->CurrentSkybox || pl->CurrentPortalUniq != renderportal->CurrentPortalUniq) - continue; - - if (pl->sky < 0 && pl->height.Zat0() == height) - { - ViewPos = pl->viewpos; - ViewAngle = pl->viewangle; - renderportal->MirrorFlags = pl->MirrorFlags; - - pl->Render(pl->sky & 0x7FFFFFFF, pl->Additive, true); - } - } - } - ViewPos = oViewPos; - ViewAngle = oViewAngle; - } - - void PlaneRenderer::RenderLines(visplane_t *pl) - { - // t1/b1 are at x - // t2/b2 are at x+1 - // spanend[y] is at the right edge - - int x = pl->right - 1; - int t2 = pl->top[x]; - int b2 = pl->bottom[x]; - - if (b2 > t2) - { - fillshort(spanend + t2, b2 - t2, x); - } - - for (--x; x >= pl->left; --x) - { - int t1 = pl->top[x]; - int b1 = pl->bottom[x]; - const int xr = x + 1; - int stop; - - // Draw any spans that have just closed - stop = MIN(t1, b2); - while (t2 < stop) - { - int y = t2++; - int x2 = spanend[y]; - RenderLine(y, xr, x2); - } - stop = MAX(b1, t2); - while (b2 > stop) - { - int y = --b2; - int x2 = spanend[y]; - RenderLine(y, xr, x2); - } - - // Mark any spans that have just opened - stop = MIN(t2, b1); - while (t1 < stop) - { - spanend[t1++] = x; - } - stop = MAX(b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; - } - - t2 = pl->top[x]; - b2 = pl->bottom[x]; - - StepColumn(); - } - // Draw any spans that are still open - while (t2 < b2) - { - int y = --b2; - int x2 = spanend[y]; - RenderLine(y, pl->left, x2); - } - } } diff --git a/src/swrenderer/plane/r_visibleplane.h b/src/swrenderer/plane/r_visibleplane.h index ca4da07547..262d9c4c18 100644 --- a/src/swrenderer/plane/r_visibleplane.h +++ b/src/swrenderer/plane/r_visibleplane.h @@ -68,43 +68,4 @@ namespace swrenderer void AddLights(FLightNode *node); void Render(fixed_t alpha, bool additive, bool masked); }; - - class VisiblePlaneList - { - public: - static VisiblePlaneList *Instance(); - - void Init(); - void Deinit(); - void Clear(bool fullclear); - - visplane_t *FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal); - visplane_t *GetRange(visplane_t *pl, int start, int stop); - - int Render(); - void RenderHeight(double height); - - enum { MAXVISPLANES = 128 }; // must be a power of 2 - visplane_t *visplanes[MAXVISPLANES + 1]; - visplane_t *freetail = nullptr; - visplane_t **freehead = nullptr; - - private: - VisiblePlaneList(); - visplane_t *Add(unsigned hash); - - static unsigned CalcHash(int picnum, int lightlevel, const secplane_t &height) { return (unsigned)((picnum) * 3 + (lightlevel)+(FLOAT2FIXED((height).fD())) * 7) & (MAXVISPLANES - 1); } - }; - - class PlaneRenderer - { - public: - void RenderLines(visplane_t *pl); - - virtual void RenderLine(int y, int x1, int x2) = 0; - virtual void StepColumn() { } - - private: - short spanend[MAXHEIGHT]; - }; } diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp new file mode 100644 index 0000000000..299f2f8627 --- /dev/null +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -0,0 +1,439 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include + +#include "templates.h" +#include "i_system.h" +#include "w_wad.h" +#include "doomdef.h" +#include "doomstat.h" +#include "r_sky.h" +#include "stats.h" +#include "v_video.h" +#include "a_sharedglobal.h" +#include "c_console.h" +#include "cmdlib.h" +#include "d_net.h" +#include "g_level.h" +#include "gl/dynlights/gl_dynlight.h" +#include "swrenderer/r_main.h" +#include "swrenderer/r_memory.h" +#include "swrenderer/scene/r_opaque_pass.h" +#include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_portal.h" +#include "swrenderer/plane/r_flatplane.h" +#include "swrenderer/plane/r_slopeplane.h" +#include "swrenderer/plane/r_skyplane.h" +#include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/plane/r_visibleplanelist.h" +#include "swrenderer/drawers/r_draw.h" + +namespace swrenderer +{ + VisiblePlaneList *VisiblePlaneList::Instance() + { + static VisiblePlaneList instance; + return &instance; + } + + VisiblePlaneList::VisiblePlaneList() + { + for (auto &plane : visplanes) + plane = nullptr; + freehead = &freetail; + } + + void VisiblePlaneList::Deinit() + { + // do not use R_ClearPlanes because at this point the screen pointer is no longer valid. + for (int i = 0; i <= MAXVISPLANES; i++) // new code -- killough + { + for (*freehead = visplanes[i], visplanes[i] = nullptr; *freehead; ) + { + freehead = &(*freehead)->next; + } + } + for (visplane_t *pl = freetail; pl != nullptr; ) + { + visplane_t *next = pl->next; + free(pl); + pl = next; + } + } + + visplane_t *VisiblePlaneList::Add(unsigned hash) + { + visplane_t *check = freetail; + + if (check == nullptr) + { + check = (visplane_t *)M_Malloc(sizeof(*check) + 3 + sizeof(*check->top)*(MAXWIDTH * 2)); + memset(check, 0, sizeof(*check) + 3 + sizeof(*check->top)*(MAXWIDTH * 2)); + check->bottom = check->top + MAXWIDTH + 2; + } + else if (nullptr == (freetail = freetail->next)) + { + freehead = &freetail; + } + + check->lights = nullptr; + + check->next = visplanes[hash]; + visplanes[hash] = check; + return check; + } + + void VisiblePlaneList::Init() + { + int i; + visplane_t *pl; + + // Free all visplanes and let them be re-allocated as needed. + pl = freetail; + + while (pl) + { + visplane_t *next = pl->next; + M_Free(pl); + pl = next; + } + freetail = nullptr; + freehead = &freetail; + + for (i = 0; i < MAXVISPLANES; i++) + { + pl = visplanes[i]; + visplanes[i] = nullptr; + while (pl) + { + visplane_t *next = pl->next; + M_Free(pl); + pl = next; + } + } + } + + void VisiblePlaneList::Clear(bool fullclear) + { + int i; + + // Don't clear fake planes if not doing a full clear. + if (!fullclear) + { + for (i = 0; i <= MAXVISPLANES - 1; i++) // new code -- killough + { + for (visplane_t **probe = &visplanes[i]; *probe != nullptr; ) + { + if ((*probe)->sky < 0) + { // fake: move past it + probe = &(*probe)->next; + } + else + { // not fake: move to freelist + visplane_t *vis = *probe; + *freehead = vis; + *probe = vis->next; + vis->next = nullptr; + freehead = &vis->next; + } + } + } + } + else + { + for (i = 0; i <= MAXVISPLANES; i++) // new code -- killough + { + for (*freehead = visplanes[i], visplanes[i] = nullptr; *freehead; ) + { + freehead = &(*freehead)->next; + } + } + } + } + + visplane_t *VisiblePlaneList::FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal) + { + secplane_t plane; + visplane_t *check; + unsigned hash; // killough + bool isskybox; + const FTransform *xform = &xxform; + fixed_t alpha = FLOAT2FIXED(Alpha); + //angle_t angle = (xform.Angle + xform.baseAngle).BAMs(); + + FTransform nulltransform; + + if (picnum == skyflatnum) // killough 10/98 + { // most skies map together + lightlevel = 0; + xform = &nulltransform; + nulltransform.xOffs = nulltransform.yOffs = nulltransform.baseyOffs = 0; + nulltransform.xScale = nulltransform.yScale = 1; + nulltransform.Angle = nulltransform.baseAngle = 0.0; + additive = false; + // [RH] Map floor skies and ceiling skies to separate visplanes. This isn't + // always necessary, but it is needed if a floor and ceiling sky are in the + // same column but separated by a wall. If they both try to reside in the + // same visplane, then only the floor sky will be drawn. + plane.set(0., 0., height.fC(), 0.); + isskybox = portal != nullptr && !(portal->mFlags & PORTSF_INSKYBOX); + } + else if (portal != nullptr && !(portal->mFlags & PORTSF_INSKYBOX)) + { + plane = height; + isskybox = true; + } + else + { + plane = height; + isskybox = false; + // kg3D - hack, store alpha in sky + // i know there is ->alpha, but this also allows to identify fake plane + // and ->alpha is for stacked sectors + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + if (clip3d->fake3D & (FAKE3D_FAKEFLOOR | FAKE3D_FAKECEILING)) sky = 0x80000000 | clip3d->fakeAlpha; + else sky = 0; // not skyflatnum so it can't be a sky + portal = nullptr; + alpha = OPAQUE; + } + + // New visplane algorithm uses hash table -- killough + hash = isskybox ? MAXVISPLANES : CalcHash(picnum.GetIndex(), lightlevel, height); + + RenderPortal *renderportal = RenderPortal::Instance(); + + for (check = visplanes[hash]; check; check = check->next) // killough + { + if (isskybox) + { + if (portal == check->portal && plane == check->height) + { + if (portal->mType != PORTS_SKYVIEWPOINT) + { // This skybox is really a stacked sector, so we need to + // check even more. + if (check->extralight == renderportal->stacked_extralight && + check->visibility == renderportal->stacked_visibility && + check->viewpos == renderportal->stacked_viewpos && + ( + // headache inducing logic... :( + (portal->mType != PORTS_STACKEDSECTORTHING) || + ( + check->Alpha == alpha && + check->Additive == additive && + (alpha == 0 || // if alpha is > 0 everything needs to be checked + (plane == check->height && + picnum == check->picnum && + lightlevel == check->lightlevel && + basecolormap == check->colormap && // [RH] Add more checks + *xform == check->xform + ) + ) && + check->viewangle == renderportal->stacked_angle + ) + ) + ) + { + return check; + } + } + else + { + return check; + } + } + } + else + if (plane == check->height && + picnum == check->picnum && + lightlevel == check->lightlevel && + basecolormap == check->colormap && // [RH] Add more checks + *xform == check->xform && + sky == check->sky && + renderportal->CurrentPortalUniq == check->CurrentPortalUniq && + renderportal->MirrorFlags == check->MirrorFlags && + Clip3DFloors::Instance()->CurrentSkybox == check->CurrentSkybox && + ViewPos == check->viewpos + ) + { + return check; + } + } + + check = Add(hash); // killough + + check->height = plane; + check->picnum = picnum; + check->lightlevel = lightlevel; + check->xform = *xform; + check->colormap = basecolormap; // [RH] Save colormap + check->sky = sky; + check->portal = portal; + check->left = viewwidth; // Was SCREENWIDTH -- killough 11/98 + check->right = 0; + check->extralight = renderportal->stacked_extralight; + check->visibility = renderportal->stacked_visibility; + check->viewpos = renderportal->stacked_viewpos; + check->viewangle = renderportal->stacked_angle; + check->Alpha = alpha; + check->Additive = additive; + check->CurrentPortalUniq = renderportal->CurrentPortalUniq; + check->MirrorFlags = renderportal->MirrorFlags; + check->CurrentSkybox = Clip3DFloors::Instance()->CurrentSkybox; + + fillshort(check->top, viewwidth, 0x7fff); + + return check; + } + + visplane_t *VisiblePlaneList::GetRange(visplane_t *pl, int start, int stop) + { + int intrl, intrh; + int unionl, unionh; + int x; + + assert(start >= 0 && start < viewwidth); + assert(stop > start && stop <= viewwidth); + + if (start < pl->left) + { + intrl = pl->left; + unionl = start; + } + else + { + unionl = pl->left; + intrl = start; + } + + if (stop > pl->right) + { + intrh = pl->right; + unionh = stop; + } + else + { + unionh = pl->right; + intrh = stop; + } + + for (x = intrl; x < intrh && pl->top[x] == 0x7fff; x++) + ; + + if (x >= intrh) + { + // use the same visplane + pl->left = unionl; + pl->right = unionh; + } + else + { + // make a new visplane + unsigned hash; + + if (pl->portal != nullptr && !(pl->portal->mFlags & PORTSF_INSKYBOX) && viewactive) + { + hash = MAXVISPLANES; + } + else + { + hash = CalcHash(pl->picnum.GetIndex(), pl->lightlevel, pl->height); + } + visplane_t *new_pl = Add(hash); + + new_pl->height = pl->height; + new_pl->picnum = pl->picnum; + new_pl->lightlevel = pl->lightlevel; + new_pl->xform = pl->xform; + new_pl->colormap = pl->colormap; + new_pl->portal = pl->portal; + new_pl->extralight = pl->extralight; + new_pl->visibility = pl->visibility; + new_pl->viewpos = pl->viewpos; + new_pl->viewangle = pl->viewangle; + new_pl->sky = pl->sky; + new_pl->Alpha = pl->Alpha; + new_pl->Additive = pl->Additive; + new_pl->CurrentPortalUniq = pl->CurrentPortalUniq; + new_pl->MirrorFlags = pl->MirrorFlags; + new_pl->CurrentSkybox = pl->CurrentSkybox; + new_pl->lights = pl->lights; + pl = new_pl; + pl->left = start; + pl->right = stop; + fillshort(pl->top, viewwidth, 0x7fff); + } + return pl; + } + + int VisiblePlaneList::Render() + { + visplane_t *pl; + int i; + int vpcount = 0; + + drawerargs::ds_color = 3; + + RenderPortal *renderportal = RenderPortal::Instance(); + + for (i = 0; i < MAXVISPLANES; i++) + { + for (pl = visplanes[i]; pl; pl = pl->next) + { + // kg3D - draw only correct planes + if (pl->CurrentPortalUniq != renderportal->CurrentPortalUniq || pl->CurrentSkybox != Clip3DFloors::Instance()->CurrentSkybox) + continue; + // kg3D - draw only real planes now + if (pl->sky >= 0) { + vpcount++; + pl->Render(OPAQUE, false, false); + } + } + } + return vpcount; + } + + void VisiblePlaneList::RenderHeight(double height) + { + visplane_t *pl; + int i; + + drawerargs::ds_color = 3; + + DVector3 oViewPos = ViewPos; + DAngle oViewAngle = ViewAngle; + + RenderPortal *renderportal = RenderPortal::Instance(); + + for (i = 0; i < MAXVISPLANES; i++) + { + for (pl = visplanes[i]; pl; pl = pl->next) + { + if (pl->CurrentSkybox != Clip3DFloors::Instance()->CurrentSkybox || pl->CurrentPortalUniq != renderportal->CurrentPortalUniq) + continue; + + if (pl->sky < 0 && pl->height.Zat0() == height) + { + ViewPos = pl->viewpos; + ViewAngle = pl->viewangle; + renderportal->MirrorFlags = pl->MirrorFlags; + + pl->Render(pl->sky & 0x7FFFFFFF, pl->Additive, true); + } + } + } + ViewPos = oViewPos; + ViewAngle = oViewAngle; + } +} diff --git a/src/swrenderer/plane/r_visibleplanelist.h b/src/swrenderer/plane/r_visibleplanelist.h new file mode 100644 index 0000000000..3db52d5bbc --- /dev/null +++ b/src/swrenderer/plane/r_visibleplanelist.h @@ -0,0 +1,51 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#pragma once + +#include +#include "r_defs.h" + +struct FSectorPortal; + +namespace swrenderer +{ + struct visplane_t; + + class VisiblePlaneList + { + public: + static VisiblePlaneList *Instance(); + + void Init(); + void Deinit(); + void Clear(bool fullclear); + + visplane_t *FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal); + visplane_t *GetRange(visplane_t *pl, int start, int stop); + + int Render(); + void RenderHeight(double height); + + enum { MAXVISPLANES = 128 }; // must be a power of 2 + visplane_t *visplanes[MAXVISPLANES + 1]; + visplane_t *freetail = nullptr; + visplane_t **freehead = nullptr; + + private: + VisiblePlaneList(); + visplane_t *Add(unsigned hash); + + static unsigned CalcHash(int picnum, int lightlevel, const secplane_t &height) { return (unsigned)((picnum) * 3 + (lightlevel)+(FLOAT2FIXED((height).fD())) * 7) & (MAXVISPLANES - 1); } + }; +} diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp index cdbc083d5e..f07b26162f 100644 --- a/src/swrenderer/r_main.cpp +++ b/src/swrenderer/r_main.cpp @@ -69,6 +69,7 @@ #include "c_console.h" #include "r_memory.h" #include "swrenderer/things/r_playersprite.h" +#include "swrenderer/plane/r_visibleplanelist.h" CVAR (String, r_viewsize, "", CVAR_NOSET) CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 99d79f10d7..f43c269320 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -35,6 +35,7 @@ #include "swrenderer/r_main.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/plane/r_visibleplanelist.h" #include "swrenderer/things/r_sprite.h" #include "swrenderer/things/r_wallsprite.h" #include "swrenderer/things/r_voxel.h" diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index b235944d87..1ff62438af 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -48,6 +48,7 @@ #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/plane/r_visibleplanelist.h" #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_translucent_pass.h" diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 48cf627dc8..6b6db1caae 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -34,6 +34,7 @@ #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_translucent_pass.h" #include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/plane/r_visibleplanelist.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Int, r_drawfuzz) From 80e1844d6cacaf2dbc2f931587c53a4efcdf91bd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 12 Jan 2017 16:21:46 +0100 Subject: [PATCH 696/912] Split r_main into r_viewport, r_scene and r_light --- src/CMakeLists.txt | 4 +- src/gl/system/gl_swframebuffer.cpp | 3 +- src/gl/system/gl_swwipe.cpp | 1 - src/polyrenderer/drawers/poly_draw_args.cpp | 2 +- src/polyrenderer/drawers/poly_triangle.cpp | 1 - src/polyrenderer/drawers/screen_triangle.cpp | 1 - src/polyrenderer/math/tri_matrix.cpp | 3 +- src/polyrenderer/poly_renderer.cpp | 2 + src/polyrenderer/poly_renderer.h | 1 - src/polyrenderer/scene/poly_decal.cpp | 2 + src/polyrenderer/scene/poly_particle.cpp | 1 + src/polyrenderer/scene/poly_plane.cpp | 1 + src/polyrenderer/scene/poly_playersprite.cpp | 3 + src/polyrenderer/scene/poly_portal.cpp | 1 + src/polyrenderer/scene/poly_scene.h | 1 - src/polyrenderer/scene/poly_sky.cpp | 1 + src/polyrenderer/scene/poly_sprite.cpp | 1 + src/polyrenderer/scene/poly_wall.cpp | 1 + src/r_state.h | 1 - src/r_utility.h | 5 + src/swrenderer/drawers/r_draw.cpp | 2 +- src/swrenderer/drawers/r_draw_pal.cpp | 3 +- src/swrenderer/drawers/r_draw_rgba.cpp | 3 +- src/swrenderer/drawers/r_thread.cpp | 1 - src/swrenderer/line/r_fogboundary.cpp | 2 +- src/swrenderer/line/r_line.cpp | 5 +- src/swrenderer/line/r_line.h | 8 + src/swrenderer/line/r_walldraw.cpp | 4 +- src/swrenderer/line/r_walldraw.h | 1 + src/swrenderer/line/r_wallsetup.cpp | 4 +- src/swrenderer/plane/r_flatplane.cpp | 4 +- src/swrenderer/plane/r_skyplane.cpp | 4 +- src/swrenderer/plane/r_slopeplane.cpp | 4 +- src/swrenderer/plane/r_visibleplane.cpp | 2 +- src/swrenderer/plane/r_visibleplanelist.cpp | 4 +- src/swrenderer/r_main.cpp | 751 ------------------ src/swrenderer/r_main.h | 141 ---- src/swrenderer/r_memory.cpp | 1 - src/swrenderer/r_swrenderer.cpp | 17 +- src/swrenderer/scene/r_3dfloors.cpp | 2 +- src/swrenderer/scene/r_light.cpp | 178 +++++ src/swrenderer/scene/r_light.h | 73 ++ src/swrenderer/scene/r_opaque_pass.cpp | 4 +- src/swrenderer/scene/r_portal.cpp | 4 +- src/swrenderer/scene/r_scene.cpp | 270 +++++++ src/swrenderer/scene/r_scene.h | 34 + src/swrenderer/scene/r_translucent_pass.cpp | 3 +- src/swrenderer/scene/r_viewport.cpp | 195 +++++ src/swrenderer/scene/r_viewport.h | 41 + src/swrenderer/segments/r_clipsegment.cpp | 1 - src/swrenderer/segments/r_drawsegment.cpp | 3 +- src/swrenderer/segments/r_portalsegment.cpp | 1 - src/swrenderer/things/r_decal.cpp | 4 +- src/swrenderer/things/r_particle.cpp | 3 +- src/swrenderer/things/r_playersprite.cpp | 4 +- src/swrenderer/things/r_playersprite.h | 2 + src/swrenderer/things/r_sprite.cpp | 4 +- src/swrenderer/things/r_visiblesprite.cpp | 1 - src/swrenderer/things/r_visiblespritelist.cpp | 1 - src/swrenderer/things/r_voxel.cpp | 4 +- src/swrenderer/things/r_wallsprite.cpp | 4 +- src/v_draw.cpp | 3 +- src/win32/fb_d3d9.cpp | 2 +- 63 files changed, 893 insertions(+), 945 deletions(-) delete mode 100644 src/swrenderer/r_main.cpp delete mode 100644 src/swrenderer/r_main.h create mode 100644 src/swrenderer/scene/r_light.cpp create mode 100644 src/swrenderer/scene/r_light.h create mode 100644 src/swrenderer/scene/r_scene.cpp create mode 100644 src/swrenderer/scene/r_scene.h create mode 100644 src/swrenderer/scene/r_viewport.cpp create mode 100644 src/swrenderer/scene/r_viewport.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 44c72406f0..3baba1c4b1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -838,7 +838,6 @@ set( NOT_COMPILED_SOURCE_FILES set( FASTMATH_PCH_SOURCES swrenderer/r_swrenderer.cpp - swrenderer/r_main.cpp swrenderer/r_memory.cpp swrenderer/drawers/r_draw.cpp swrenderer/drawers/r_draw_pal.cpp @@ -846,9 +845,12 @@ set( FASTMATH_PCH_SOURCES swrenderer/drawers/r_drawers.cpp swrenderer/drawers/r_thread.cpp swrenderer/scene/r_3dfloors.cpp + swrenderer/scene/r_light.cpp swrenderer/scene/r_opaque_pass.cpp swrenderer/scene/r_portal.cpp + swrenderer/scene/r_scene.cpp swrenderer/scene/r_translucent_pass.cpp + swrenderer/scene/r_viewport.cpp swrenderer/line/r_line.cpp swrenderer/line/r_walldraw.cpp swrenderer/line/r_wallsetup.cpp diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 0d9cdfce9d..1ad965c6a2 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -57,7 +57,6 @@ #include "v_pfx.h" #include "stats.h" #include "doomerrors.h" -#include "swrenderer/r_main.h" #include "r_data/r_translate.h" #include "f_wipe.h" #include "sbar.h" @@ -72,6 +71,8 @@ #include "gl/gl_functions.h" #include "gl_debug.h" +#include "swrenderer/scene/r_light.h" + CVAR(Int, gl_showpacks, 0, 0) #ifndef WIN32 // Defined in fb_d3d9 for Windows CVAR(Bool, vid_hwaalines, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) diff --git a/src/gl/system/gl_swwipe.cpp b/src/gl/system/gl_swwipe.cpp index 2f51ae5a4f..7f8e44a669 100644 --- a/src/gl/system/gl_swwipe.cpp +++ b/src/gl/system/gl_swwipe.cpp @@ -53,7 +53,6 @@ #include "v_pfx.h" #include "stats.h" #include "doomerrors.h" -#include "swrenderer/r_main.h" #include "r_data/r_translate.h" #include "f_wipe.h" #include "sbar.h" diff --git a/src/polyrenderer/drawers/poly_draw_args.cpp b/src/polyrenderer/drawers/poly_draw_args.cpp index 60621553a6..9e9686e92c 100644 --- a/src/polyrenderer/drawers/poly_draw_args.cpp +++ b/src/polyrenderer/drawers/poly_draw_args.cpp @@ -34,7 +34,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "poly_draw_args.h" -#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_viewport.h" void PolyDrawArgs::SetClipPlane(float a, float b, float c, float d) { diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index 5856b0267b..a289457151 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -35,7 +35,6 @@ #include "r_data/colormaps.h" #include "poly_triangle.h" #include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/r_main.h" #include "screen_triangle.h" CVAR(Bool, r_debug_trisetup, false, 0); diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp index e164b6fe0e..d1858d5111 100644 --- a/src/polyrenderer/drawers/screen_triangle.cpp +++ b/src/polyrenderer/drawers/screen_triangle.cpp @@ -35,7 +35,6 @@ #include "r_data/colormaps.h" #include "poly_triangle.h" #include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/r_main.h" #include "screen_triangle.h" void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread) diff --git a/src/polyrenderer/math/tri_matrix.cpp b/src/polyrenderer/math/tri_matrix.cpp index 72be0d0edf..1f26ef2ef3 100644 --- a/src/polyrenderer/math/tri_matrix.cpp +++ b/src/polyrenderer/math/tri_matrix.cpp @@ -33,10 +33,11 @@ #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_utility.h" #include "tri_matrix.h" #include "polyrenderer/drawers/poly_triangle.h" #include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_viewport.h" TriMatrix TriMatrix::null() { diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index 031dff683d..22d996e9fb 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -30,6 +30,8 @@ #include "gl/data/gl_data.h" #include "d_net.h" #include "po_man.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" EXTERN_CVAR(Int, screenblocks) void InitGLRMapinfoData(); diff --git a/src/polyrenderer/poly_renderer.h b/src/polyrenderer/poly_renderer.h index c7de1b1cc8..368872d97e 100644 --- a/src/polyrenderer/poly_renderer.h +++ b/src/polyrenderer/poly_renderer.h @@ -28,7 +28,6 @@ #include #include "doomdata.h" #include "r_utility.h" -#include "swrenderer/r_main.h" #include "scene/poly_portal.h" #include "scene/poly_playersprite.h" #include "scene/poly_sky.h" diff --git a/src/polyrenderer/scene/poly_decal.cpp b/src/polyrenderer/scene/poly_decal.cpp index fbc95a15c3..c36b67f4a7 100644 --- a/src/polyrenderer/scene/poly_decal.cpp +++ b/src/polyrenderer/scene/poly_decal.cpp @@ -28,6 +28,8 @@ #include "poly_decal.h" #include "polyrenderer/poly_renderer.h" #include "a_sharedglobal.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_light.h" void RenderPolyDecal::RenderWallDecals(const TriMatrix &worldToClip, const Vec4f &clipPlane, const seg_t *line, uint32_t subsectorDepth, uint32_t stencilValue) { diff --git a/src/polyrenderer/scene/poly_particle.cpp b/src/polyrenderer/scene/poly_particle.cpp index 275fd46403..e4380dae97 100644 --- a/src/polyrenderer/scene/poly_particle.cpp +++ b/src/polyrenderer/scene/poly_particle.cpp @@ -27,6 +27,7 @@ #include "r_data/r_translate.h" #include "poly_particle.h" #include "polyrenderer/poly_renderer.h" +#include "swrenderer/scene/r_light.h" void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, particle_t *particle, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) { diff --git a/src/polyrenderer/scene/poly_plane.cpp b/src/polyrenderer/scene/poly_plane.cpp index 5a8396c616..9089881041 100644 --- a/src/polyrenderer/scene/poly_plane.cpp +++ b/src/polyrenderer/scene/poly_plane.cpp @@ -29,6 +29,7 @@ #include "poly_portal.h" #include "polyrenderer/poly_renderer.h" #include "r_sky.h" +#include "swrenderer/scene/r_light.h" EXTERN_CVAR(Int, r_3dfloors) diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index 03e22f1d1c..a3b29fa300 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -27,6 +27,9 @@ #include "r_data/r_translate.h" #include "poly_playersprite.h" #include "polyrenderer/poly_renderer.h" +#include "d_player.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_deathcamera) diff --git a/src/polyrenderer/scene/poly_portal.cpp b/src/polyrenderer/scene/poly_portal.cpp index 58bb5b0496..1aa43e9bce 100644 --- a/src/polyrenderer/scene/poly_portal.cpp +++ b/src/polyrenderer/scene/poly_portal.cpp @@ -28,6 +28,7 @@ #include "r_data/r_translate.h" #include "poly_portal.h" #include "polyrenderer/poly_renderer.h" +#include "swrenderer/scene/r_light.h" #include "gl/data/gl_data.h" extern bool r_showviewer; diff --git a/src/polyrenderer/scene/poly_scene.h b/src/polyrenderer/scene/poly_scene.h index fbc878fc7c..ba6fd5eea4 100644 --- a/src/polyrenderer/scene/poly_scene.h +++ b/src/polyrenderer/scene/poly_scene.h @@ -28,7 +28,6 @@ #include #include "doomdata.h" #include "r_utility.h" -#include "swrenderer/r_main.h" #include "polyrenderer/drawers/poly_triangle.h" #include "polyrenderer/math/poly_intersection.h" #include "poly_wall.h" diff --git a/src/polyrenderer/scene/poly_sky.cpp b/src/polyrenderer/scene/poly_sky.cpp index 82be3f2bc0..41bca43ff3 100644 --- a/src/polyrenderer/scene/poly_sky.cpp +++ b/src/polyrenderer/scene/poly_sky.cpp @@ -28,6 +28,7 @@ #include "poly_portal.h" #include "r_sky.h" // for skyflatnum #include "g_levellocals.h" +#include "swrenderer/scene/r_light.h" PolySkyDome::PolySkyDome() { diff --git a/src/polyrenderer/scene/poly_sprite.cpp b/src/polyrenderer/scene/poly_sprite.cpp index d344ae78b9..ce0547f622 100644 --- a/src/polyrenderer/scene/poly_sprite.cpp +++ b/src/polyrenderer/scene/poly_sprite.cpp @@ -28,6 +28,7 @@ #include "poly_sprite.h" #include "polyrenderer/poly_renderer.h" #include "polyrenderer/math/poly_intersection.h" +#include "swrenderer/scene/r_light.h" EXTERN_CVAR(Float, transsouls) EXTERN_CVAR(Int, r_drawfuzz) diff --git a/src/polyrenderer/scene/poly_wall.cpp b/src/polyrenderer/scene/poly_wall.cpp index 5c912dec08..1bf7cf24b0 100644 --- a/src/polyrenderer/scene/poly_wall.cpp +++ b/src/polyrenderer/scene/poly_wall.cpp @@ -32,6 +32,7 @@ #include "poly_decal.h" #include "polyrenderer/poly_renderer.h" #include "r_sky.h" +#include "swrenderer/scene/r_light.h" EXTERN_CVAR(Bool, r_drawmirrors) diff --git a/src/r_state.h b/src/r_state.h index 7151c0441b..9cd8400b4e 100644 --- a/src/r_state.h +++ b/src/r_state.h @@ -68,7 +68,6 @@ extern int numgamesubsectors; extern AActor* camera; // [RH] camera instead of viewplayer extern sector_t* viewsector; // [RH] keep track of sector viewing from -namespace swrenderer { extern angle_t xtoviewangle[MAXWIDTH+1]; } extern DAngle FieldOfView; int R_FindSkin (const char *name, int pclass); // [RH] Find a skin diff --git a/src/r_utility.h b/src/r_utility.h index cb7bd50e27..488d456720 100644 --- a/src/r_utility.h +++ b/src/r_utility.h @@ -21,6 +21,11 @@ extern DAngle ViewPitch; extern DAngle ViewRoll; extern DVector3 ViewPath[2]; +extern double ViewCos; +extern double ViewSin; +extern int viewwindowx; +extern int viewwindowy; + extern "C" int centerx, centerxwide; extern "C" int centery; diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index a7d40e63c3..b9a4a3ef08 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -50,7 +50,7 @@ #include "r_draw_rgba.h" #include "r_draw_pal.h" #include "r_thread.h" -#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_light.h" CVAR(Bool, r_dynlights, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 13f6219acf..18dd88adb3 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -45,9 +45,10 @@ #include "doomdef.h" #include "r_defs.h" #include "r_draw.h" -#include "swrenderer/r_main.h" #include "v_video.h" #include "r_draw_pal.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" // [SP] r_blendmethod - false = rgb555 matching (ZDoom classic), true = rgb666 (refactored) CVAR(Bool, r_blendmethod, false, CVAR_GLOBALCONFIG | CVAR_ARCHIVE) diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 309a3e2bd0..352a1538bb 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -27,7 +27,6 @@ #include "doomdef.h" #include "i_system.h" #include "w_wad.h" -#include "swrenderer/r_main.h" #include "v_video.h" #include "doomstat.h" #include "st_stuff.h" @@ -39,6 +38,8 @@ #include "r_draw_rgba.h" #include "r_drawers.h" #include "gl/data/gl_matrix.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "gi.h" #include "stats.h" diff --git a/src/swrenderer/drawers/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp index d629a23f2a..c35d597772 100644 --- a/src/swrenderer/drawers/r_thread.cpp +++ b/src/swrenderer/drawers/r_thread.cpp @@ -25,7 +25,6 @@ #include "doomdef.h" #include "i_system.h" #include "w_wad.h" -#include "swrenderer/r_main.h" #include "v_video.h" #include "doomstat.h" #include "st_stuff.h" diff --git a/src/swrenderer/line/r_fogboundary.cpp b/src/swrenderer/line/r_fogboundary.cpp index ca1822a2c1..523ce4b08e 100644 --- a/src/swrenderer/line/r_fogboundary.cpp +++ b/src/swrenderer/line/r_fogboundary.cpp @@ -18,7 +18,6 @@ #include "w_wad.h" #include "doomdef.h" #include "doomstat.h" -#include "swrenderer/r_main.h" #include "r_sky.h" #include "stats.h" #include "v_video.h" @@ -38,6 +37,7 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/line/r_fogboundary.h" #include "swrenderer/r_memory.h" +#include "swrenderer/scene/r_light.h" #ifdef _MSC_VER #pragma warning(disable:4244) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 4122623b3f..5679b8e507 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -30,12 +30,15 @@ #include "g_level.h" #include "r_wallsetup.h" #include "v_palette.h" +#include "r_utility.h" #include "r_data/colormaps.h" -#include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" #include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" +#include "swrenderer/scene/r_scene.h" #include "swrenderer/line/r_line.h" #include "swrenderer/line/r_walldraw.h" #include "swrenderer/line/r_wallsetup.h" diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 18d46e8b4e..ae758ccabf 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -13,6 +13,14 @@ #pragma once +#include "vectors.h" + +struct seg_t; +struct subsector_t; +struct sector_t; +struct side_t; +struct line_t; + namespace swrenderer { struct visplane_t; diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index f0320383a1..c5e1e036ac 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -27,7 +27,6 @@ #include "doomstat.h" #include "doomdata.h" -#include "swrenderer/r_main.h" #include "r_sky.h" #include "v_video.h" @@ -45,6 +44,9 @@ #include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/line/r_walldraw.h" #include "swrenderer/line/r_wallsetup.h" diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index 8ef23438f2..d3fad4ea89 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -15,6 +15,7 @@ class FTexture; struct FLightNode; +struct seg_t; namespace swrenderer { diff --git a/src/swrenderer/line/r_wallsetup.cpp b/src/swrenderer/line/r_wallsetup.cpp index ee32c1bd0d..e7a86700de 100644 --- a/src/swrenderer/line/r_wallsetup.cpp +++ b/src/swrenderer/line/r_wallsetup.cpp @@ -19,9 +19,11 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_walldraw.h" -#include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" #include "swrenderer/line/r_line.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" namespace swrenderer { diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 760ae1b223..9a9494833a 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -18,7 +18,6 @@ #include "w_wad.h" #include "doomdef.h" #include "doomstat.h" -#include "swrenderer/r_main.h" #include "r_sky.h" #include "stats.h" #include "v_video.h" @@ -37,6 +36,9 @@ #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/r_memory.h" diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 9909f868e3..ab8dedb973 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -18,7 +18,6 @@ #include "w_wad.h" #include "doomdef.h" #include "doomstat.h" -#include "swrenderer/r_main.h" #include "r_sky.h" #include "stats.h" #include "v_video.h" @@ -39,6 +38,9 @@ #include "swrenderer/line/r_wallsetup.h" #include "swrenderer/line/r_walldraw.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/r_memory.h" #include "g_levellocals.h" diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 6317928a71..b2d42e1980 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -18,7 +18,6 @@ #include "w_wad.h" #include "doomdef.h" #include "doomstat.h" -#include "swrenderer/r_main.h" #include "r_sky.h" #include "stats.h" #include "v_video.h" @@ -37,6 +36,9 @@ #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/r_memory.h" #include "swrenderer/plane/r_visibleplane.h" diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index 2fa0fc44d2..6731262f7e 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -28,11 +28,11 @@ #include "d_net.h" #include "g_level.h" #include "gl/dynlights/gl_dynlight.h" -#include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" #include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/plane/r_flatplane.h" #include "swrenderer/plane/r_slopeplane.h" #include "swrenderer/plane/r_skyplane.h" diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp index 299f2f8627..760b855609 100644 --- a/src/swrenderer/plane/r_visibleplanelist.cpp +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -28,11 +28,13 @@ #include "d_net.h" #include "g_level.h" #include "gl/dynlights/gl_dynlight.h" -#include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" #include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/plane/r_flatplane.h" #include "swrenderer/plane/r_slopeplane.h" #include "swrenderer/plane/r_skyplane.h" diff --git a/src/swrenderer/r_main.cpp b/src/swrenderer/r_main.cpp deleted file mode 100644 index f07b26162f..0000000000 --- a/src/swrenderer/r_main.cpp +++ /dev/null @@ -1,751 +0,0 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// $Log:$ -// -// DESCRIPTION: -// Rendering main loop and setup functions, -// utility functions (BSP, geometry, trigonometry). -// See tables.c, too. -// -//----------------------------------------------------------------------------- - -// HEADER FILES ------------------------------------------------------------ - -#include -#include - -#include "templates.h" -#include "doomdef.h" -#include "d_net.h" -#include "doomstat.h" -#include "m_random.h" -#include "m_bbox.h" -#include "r_main.h" -#include "drawers/r_draw.h" -#include "plane/r_flatplane.h" -#include "scene/r_opaque_pass.h" -#include "segments/r_drawsegment.h" -#include "segments/r_portalsegment.h" -#include "segments/r_clipsegment.h" -#include "scene/r_3dfloors.h" -#include "scene/r_portal.h" -#include "scene/r_translucent_pass.h" -#include "r_sky.h" -#include "drawers/r_draw_rgba.h" -#include "st_stuff.h" -#include "c_cvars.h" -#include "c_dispatch.h" -#include "v_video.h" -#include "stats.h" -#include "i_video.h" -#include "i_system.h" -#include "a_sharedglobal.h" -#include "r_data/r_translate.h" -#include "p_3dmidtex.h" -#include "r_data/r_interpolate.h" -#include "v_palette.h" -#include "po_man.h" -#include "p_effect.h" -#include "st_start.h" -#include "v_font.h" -#include "r_data/colormaps.h" -#include "p_maputl.h" -#include "p_setup.h" -#include "version.h" -#include "c_console.h" -#include "r_memory.h" -#include "swrenderer/things/r_playersprite.h" -#include "swrenderer/plane/r_visibleplanelist.h" - -CVAR (String, r_viewsize, "", CVAR_NOSET) -CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) - -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) - -namespace swrenderer -{ - -// MACROS ------------------------------------------------------------------ - -#if 0 -#define TEST_X 32343794 -#define TEST_Y 111387517 -#define TEST_Z 2164524 -#define TEST_ANGLE 2468347904 -#endif - - -// TYPES ------------------------------------------------------------------- - -// EXTERNAL FUNCTION PROTOTYPES -------------------------------------------- - -void R_SpanInitData (); - -// PUBLIC FUNCTION PROTOTYPES ---------------------------------------------- - -// PRIVATE FUNCTION PROTOTYPES --------------------------------------------- - -static void R_ShutdownRenderer(); - -// EXTERNAL DATA DECLARATIONS ---------------------------------------------- - -extern int fuzzviewheight; - - -// PRIVATE DATA DECLARATIONS ----------------------------------------------- - -static double CurrentVisibility = 8.f; -static double MaxVisForWall; -static double MaxVisForFloor; -bool r_dontmaplines; - -// PUBLIC DATA DEFINITIONS ------------------------------------------------- - -bool r_swtruecolor; - -double r_BaseVisibility; -double r_WallVisibility; -double r_FloorVisibility; -float r_TiltVisibility; -double r_SpriteVisibility; -double r_ParticleVisibility; - -double GlobVis; -fixed_t viewingrangerecip; -double FocalLengthX; -double FocalLengthY; -FDynamicColormap*basecolormap; // [RH] colormap currently drawing with -int fixedlightlev; -FSWColormap *fixedcolormap; -FSpecialColormap *realfixedcolormap; -double WallTMapScale2; - - -bool bRenderingToCanvas; // [RH] True if rendering to a special canvas -double globaluclip, globaldclip; -double CenterX, CenterY; -double YaspectMul; -double BaseYaspectMul; // yaspectmul without a forced aspect ratio -double IYaspectMul; -double InvZtoScale; - -// just for profiling purposes -int linecount; -int loopcount; - - -// -// precalculated math tables -// - -// The xtoviewangleangle[] table maps a screen pixel -// to the lowest viewangle that maps back to x ranges -// from clipangle to -clipangle. -angle_t xtoviewangle[MAXWIDTH+1]; - -bool foggy; // [RH] ignore extralight and fullbright? -int r_actualextralight; - -cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; - -// PRIVATE DATA DEFINITIONS ------------------------------------------------ - -static int lastcenteryfrac; - -// CODE -------------------------------------------------------------------- - -//========================================================================== -// -// R_InitTextureMapping -// -//========================================================================== - -void R_InitTextureMapping () -{ - int i; - - // Calc focallength so FieldOfView angles cover viewwidth. - FocalLengthX = CenterX / FocalTangent; - FocalLengthY = FocalLengthX * YaspectMul; - - // This is 1/FocalTangent before the widescreen extension of FOV. - viewingrangerecip = FLOAT2FIXED(1. / tan(FieldOfView.Radians() / 2)); - - - // Now generate xtoviewangle for sky texture mapping. - // [RH] Do not generate viewangletox, because texture mapping is no - // longer done with trig, so it's not needed. - const double slopestep = FocalTangent / centerx; - double slope; - - for (i = centerx, slope = 0; i <= viewwidth; i++, slope += slopestep) - { - xtoviewangle[i] = angle_t((2 * M_PI - atan(slope)) * (ANGLE_180 / M_PI)); - } - for (i = 0; i < centerx; i++) - { - xtoviewangle[i] = 0 - xtoviewangle[viewwidth - i - 1]; - } -} - -//========================================================================== -// -// R_SetVisibility -// -// Changes how rapidly things get dark with distance -// -//========================================================================== - -void R_SetVisibility(double vis) -{ - // Allow negative visibilities, just for novelty's sake - vis = clamp(vis, -204.7, 204.7); // (205 and larger do not work in 5:4 aspect ratio) - - CurrentVisibility = vis; - - if (FocalTangent == 0 || FocalLengthY == 0) - { // If r_visibility is called before the renderer is all set up, don't - // divide by zero. This will be called again later, and the proper - // values can be initialized then. - return; - } - - r_BaseVisibility = vis; - - // Prevent overflow on walls - if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForWall) - r_WallVisibility = -MaxVisForWall; - else if (r_BaseVisibility > 0 && r_BaseVisibility > MaxVisForWall) - r_WallVisibility = MaxVisForWall; - else - r_WallVisibility = r_BaseVisibility; - - r_WallVisibility = (InvZtoScale * SCREENWIDTH*AspectBaseHeight(WidescreenRatio) / - (viewwidth*SCREENHEIGHT*3)) * (r_WallVisibility * FocalTangent); - - // Prevent overflow on floors/ceilings. Note that the calculation of - // MaxVisForFloor means that planes less than two units from the player's - // view could still overflow, but there is no way to totally eliminate - // that while still using fixed point math. - if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForFloor) - r_FloorVisibility = -MaxVisForFloor; - else if (r_BaseVisibility > 0 && r_BaseVisibility > MaxVisForFloor) - r_FloorVisibility = MaxVisForFloor; - else - r_FloorVisibility = r_BaseVisibility; - - r_FloorVisibility = 160.0 * r_FloorVisibility / FocalLengthY; - - r_TiltVisibility = float(vis * FocalTangent * (16.f * 320.f) / viewwidth); - r_SpriteVisibility = r_WallVisibility; -} - -//========================================================================== -// -// R_GetVisibility -// -//========================================================================== - -double R_GetVisibility() -{ - return CurrentVisibility; -} - -//========================================================================== -// -// CCMD r_visibility -// -// Controls how quickly light ramps across a 1/z range. Set this, and it -// sets all the r_*Visibility variables (except r_SkyVisibilily, which is -// currently unused). -// -//========================================================================== - -CCMD (r_visibility) -{ - if (argv.argc() < 2) - { - Printf ("Visibility is %g\n", R_GetVisibility()); - } - else if (!netgame) - { - R_SetVisibility(atof(argv[1])); - } - else - { - Printf ("Visibility cannot be changed in net games.\n"); - } -} - -//========================================================================== -// -// R_SetWindow -// -//========================================================================== - -void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio) -{ - int virtheight, virtwidth, virtwidth2, virtheight2; - - if (!bRenderingToCanvas) - { // Set r_viewsize cvar to reflect the current view size - UCVarValue value; - char temp[16]; - - mysnprintf (temp, countof(temp), "%d x %d", viewwidth, viewheight); - value.String = temp; - r_viewsize.ForceSet (value, CVAR_String); - } - - fuzzviewheight = viewheight - 2; // Maximum row the fuzzer can draw to - - lastcenteryfrac = 1<<30; - CenterX = centerx; - CenterY = centery; - - virtwidth = virtwidth2 = fullWidth; - virtheight = virtheight2 = fullHeight; - - if (AspectTallerThanWide(trueratio)) - { - virtheight2 = virtheight2 * AspectMultiplier(trueratio) / 48; - } - else - { - virtwidth2 = virtwidth2 * AspectMultiplier(trueratio) / 48; - } - - if (AspectTallerThanWide(WidescreenRatio)) - { - virtheight = virtheight * AspectMultiplier(WidescreenRatio) / 48; - } - else - { - virtwidth = virtwidth * AspectMultiplier(WidescreenRatio) / 48; - } - - BaseYaspectMul = 320.0 * virtheight2 / (r_Yaspect * virtwidth2); - YaspectMul = 320.0 * virtheight / (r_Yaspect * virtwidth); - IYaspectMul = (double)virtwidth * r_Yaspect / 320.0 / virtheight; - InvZtoScale = YaspectMul * CenterX; - - WallTMapScale2 = IYaspectMul / CenterX; - - // psprite scales - RenderPlayerSprite::SetupSpriteScale(); - - // thing clipping - fillshort (screenheightarray, viewwidth, (short)viewheight); - - R_InitTextureMapping (); - - MaxVisForWall = (InvZtoScale * (SCREENWIDTH*r_Yaspect) / - (viewwidth*SCREENHEIGHT * FocalTangent)); - MaxVisForWall = 32767.0 / MaxVisForWall; - MaxVisForFloor = 32767.0 / (viewheight >> 2) * FocalLengthY / 160; - - // Reset r_*Visibility vars - R_SetVisibility(R_GetVisibility()); -} - -//========================================================================== -// -// R_Init -// -//========================================================================== - -void R_InitRenderer() -{ - atterm(R_ShutdownRenderer); - // viewwidth / viewheight are set by the defaults - fillshort (zeroarray, MAXWIDTH, 0); - - R_InitShadeMaps(); - R_InitColumnDrawers (); -} - -//========================================================================== -// -// R_ShutdownRenderer -// -//========================================================================== - -static void R_ShutdownRenderer() -{ - RenderTranslucentPass::Deinit(); - VisiblePlaneList::Instance()->Deinit(); - Clip3DFloors::Instance()->Cleanup(); - R_DeinitOpenings(); - R_FreeDrawSegs(); -} - -//========================================================================== -// -// R_SetupColormap -// -// Sets up special fixed colormaps -// -//========================================================================== - -void R_SetupColormap(player_t *player) -{ - realfixedcolormap = NULL; - fixedcolormap = NULL; - fixedlightlev = -1; - - if (player != NULL && camera == player->mo) - { - if (player->fixedcolormap >= 0 && player->fixedcolormap < (int)SpecialColormaps.Size()) - { - realfixedcolormap = &SpecialColormaps[player->fixedcolormap]; - if (RenderTarget == screen && (r_swtruecolor || ((DFrameBuffer *)screen->Accel2D && r_shadercolormaps))) - { - // Render everything fullbright. The copy to video memory will - // apply the special colormap, so it won't be restricted to the - // palette. - fixedcolormap = &realcolormaps; - } - else - { - fixedcolormap = &SpecialColormaps[player->fixedcolormap]; - } - } - else if (player->fixedlightlevel >= 0 && player->fixedlightlevel < NUMCOLORMAPS) - { - fixedlightlev = player->fixedlightlevel * 256; - // [SP] Emulate GZDoom's light-amp goggles. - if (r_fullbrightignoresectorcolor && fixedlightlev >= 0) - { - fixedcolormap = &FullNormalLight; - } - } - } - // [RH] Inverse light for shooting the Sigil - if (fixedcolormap == NULL && extralight == INT_MIN) - { - fixedcolormap = &SpecialColormaps[INVERSECOLORMAP]; - extralight = 0; - } -} - -//========================================================================== -// -// R_SetupFreelook -// -// [RH] freelook stuff -// -//========================================================================== - -void R_SetupFreelook() -{ - double dy; - - if (camera != NULL) - { - dy = FocalLengthY * (-ViewPitch).Tan(); - } - else - { - dy = 0; - } - - CenterY = (viewheight / 2.0) + dy; - centery = xs_ToInt(CenterY); - globaluclip = -CenterY / InvZtoScale; - globaldclip = (viewheight - CenterY) / InvZtoScale; - - RenderFlatPlane::SetupSlope(); -} - -//========================================================================== -// -// R_SetupBuffer -// -// Precalculate all row offsets and fuzz table. -// -//========================================================================== - -void R_SetupBuffer () -{ - using namespace drawerargs; - - static BYTE *lastbuff = NULL; - - int pitch = RenderTarget->GetPitch(); - int pixelsize = r_swtruecolor ? 4 : 1; - BYTE *lineptr = RenderTarget->GetBuffer() + (viewwindowy*pitch + viewwindowx) * pixelsize; - - if (dc_pitch != pitch || lineptr != lastbuff) - { - if (dc_pitch != pitch) - { - dc_pitch = pitch; - R_InitFuzzTable (pitch); - } - dc_destorg = lineptr; - dc_destheight = RenderTarget->GetHeight() - viewwindowy; - for (int i = 0; i < RenderTarget->GetHeight(); i++) - { - ylookup[i] = i * pitch; - } - } -} - -//========================================================================== -// -// R_RenderActorView -// -//========================================================================== - -void R_RenderActorView (AActor *actor, bool dontmaplines) -{ - WallCycles.Reset(); - PlaneCycles.Reset(); - MaskedCycles.Reset(); - WallScanCycles.Reset(); - - Clip3DFloors *clip3d = Clip3DFloors::Instance(); - clip3d->fakeActive = false; // kg3D - reset fake floor indicator - clip3d->ResetClip(); // reset clips (floor/ceiling) - - R_SetupBuffer (); - R_SetupFrame (actor); - - // Clear buffers. - R_ClearClipSegs (0, viewwidth); - R_ClearDrawSegs (); - VisiblePlaneList::Instance()->Clear(true); - R_FreePlaneLights(); - RenderTranslucentPass::Clear(); - - // opening / clipping determination - RenderOpaquePass::Instance()->ClearClip(); - R_FreeOpenings(); - - NetUpdate (); - - colfunc = basecolfunc; - spanfunc = &SWPixelFormatDrawers::DrawSpan; - - RenderPortal::Instance()->SetMainPortal(); - - r_dontmaplines = dontmaplines; - - // [RH] Hack to make windows into underwater areas possible - RenderOpaquePass::Instance()->ResetFakingUnderwater(); - - // [RH] Setup particles for this frame - P_FindParticleSubsectors (); - - WallCycles.Clock(); - ActorRenderFlags savedflags = camera->renderflags; - // Never draw the player unless in chasecam mode - if (!r_showviewer) - { - camera->renderflags |= RF_INVISIBLE; - } - // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function - PO_LinkToSubsectors(); - RenderOpaquePass::Instance()->RenderScene(); - Clip3DFloors::Instance()->ResetClip(); // reset clips (floor/ceiling) - camera->renderflags = savedflags; - WallCycles.Unclock(); - - NetUpdate (); - - if (viewactive) - { - PlaneCycles.Clock(); - VisiblePlaneList::Instance()->Render(); - RenderPortal::Instance()->RenderPlanePortals(); - PlaneCycles.Unclock(); - - RenderPortal::Instance()->RenderLinePortals(); - - NetUpdate (); - - MaskedCycles.Clock(); - RenderTranslucentPass::Render(); - MaskedCycles.Unclock(); - - NetUpdate (); - } - WallPortals.Clear (); - interpolator.RestoreInterpolations (); - R_SetupBuffer (); - - // If we don't want shadered colormaps, NULL it now so that the - // copy to the screen does not use a special colormap shader. - if (!r_shadercolormaps && !r_swtruecolor) - { - realfixedcolormap = NULL; - } -} - -//========================================================================== -// -// R_RenderViewToCanvas -// -// Pre: Canvas is already locked. -// -//========================================================================== - -void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, - int x, int y, int width, int height, bool dontmaplines) -{ - const bool savedviewactive = viewactive; - const bool savedoutputformat = r_swtruecolor; - - if (r_swtruecolor != canvas->IsBgra()) - { - r_swtruecolor = canvas->IsBgra(); - R_InitColumnDrawers(); - } - - R_BeginDrawerCommands(); - - viewwidth = width; - RenderTarget = canvas; - bRenderingToCanvas = true; - - R_SetWindow (12, width, height, height, true); - viewwindowx = x; - viewwindowy = y; - viewactive = true; - - R_RenderActorView (actor, dontmaplines); - - R_EndDrawerCommands(); - - RenderTarget = screen; - bRenderingToCanvas = false; - R_ExecuteSetViewSize (); - screen->Lock (true); - R_SetupBuffer (); - screen->Unlock (); - - viewactive = savedviewactive; - r_swtruecolor = savedoutputformat; - - if (r_swtruecolor != canvas->IsBgra()) - { - R_InitColumnDrawers(); - } -} - -//========================================================================== -// -// R_MultiresInit -// -// Called from V_SetResolution() -// -//========================================================================== - -void R_MultiresInit () -{ - VisiblePlaneList::Instance()->Init(); -} - - -//========================================================================== -// -// STAT fps -// -// Displays statistics about rendering times -// -//========================================================================== - -ADD_STAT (fps) -{ - FString out; - out.Format("frame=%04.1f ms walls=%04.1f ms planes=%04.1f ms masked=%04.1f ms", - FrameCycles.TimeMS(), WallCycles.TimeMS(), PlaneCycles.TimeMS(), MaskedCycles.TimeMS()); - return out; -} - - -static double f_acc, w_acc,p_acc,m_acc; -static int acc_c; - -ADD_STAT (fps_accumulated) -{ - f_acc += FrameCycles.TimeMS(); - w_acc += WallCycles.TimeMS(); - p_acc += PlaneCycles.TimeMS(); - m_acc += MaskedCycles.TimeMS(); - acc_c++; - FString out; - out.Format("frame=%04.1f ms walls=%04.1f ms planes=%04.1f ms masked=%04.1f ms %d counts", - f_acc/acc_c, w_acc/acc_c, p_acc/acc_c, m_acc/acc_c, acc_c); - Printf(PRINT_LOG, "%s\n", out.GetChars()); - return out; -} - -//========================================================================== -// -// STAT wallcycles -// -// Displays the minimum number of cycles spent drawing walls -// -//========================================================================== - -static double bestwallcycles = HUGE_VAL; - -ADD_STAT (wallcycles) -{ - FString out; - double cycles = WallCycles.Time(); - if (cycles && cycles < bestwallcycles) - bestwallcycles = cycles; - out.Format ("%g", bestwallcycles); - return out; -} - -//========================================================================== -// -// CCMD clearwallcycles -// -// Resets the count of minimum wall drawing cycles -// -//========================================================================== - -CCMD (clearwallcycles) -{ - bestwallcycles = HUGE_VAL; -} - -#if 0 -// The replacement code for Build's wallscan doesn't have any timing calls so this does not work anymore. -static double bestscancycles = HUGE_VAL; - -ADD_STAT (scancycles) -{ - FString out; - double scancycles = WallScanCycles.Time(); - if (scancycles && scancycles < bestscancycles) - bestscancycles = scancycles; - out.Format ("%g", bestscancycles); - return out; -} - -CCMD (clearscancycles) -{ - bestscancycles = HUGE_VAL; -} -#endif - -} diff --git a/src/swrenderer/r_main.h b/src/swrenderer/r_main.h deleted file mode 100644 index c0a8ac345c..0000000000 --- a/src/swrenderer/r_main.h +++ /dev/null @@ -1,141 +0,0 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// DESCRIPTION: -// System specific interface stuff. -// -//----------------------------------------------------------------------------- - - -#ifndef __R_MAIN_H__ -#define __R_MAIN_H__ - -#include "r_utility.h" -#include "d_player.h" -#include "v_palette.h" -#include "r_data/colormaps.h" - -extern double ViewCos; -extern double ViewSin; -extern int viewwindowx; -extern int viewwindowy; - -typedef BYTE lighttable_t; // This could be wider for >8 bit display. - -extern cycle_t FrameCycles; - -namespace swrenderer -{ - -extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; - -// -// POV related. -// -extern bool bRenderingToCanvas; -extern fixed_t viewingrangerecip; -extern double FocalLengthX, FocalLengthY; -extern double InvZtoScale; - -extern double WallTMapScale2; - - -extern double CenterX; -extern double CenterY; -extern double YaspectMul; -extern double IYaspectMul; - -extern FDynamicColormap*basecolormap; // [RH] Colormap for sector currently being drawn - -extern int linecount; -extern int loopcount; - -extern bool r_dontmaplines; - -// -// Lighting. -// -// [RH] This has changed significantly from Doom, which used lookup -// tables based on 1/z for walls and z for flats and only recognized -// 16 discrete light levels. The terminology I use is borrowed from Build. -// - -// The size of a single colormap, in bits -#define COLORMAPSHIFT 8 - -// Convert a light level into an unbounded colormap index (shade). Result is -// fixed point. Why the +12? I wish I knew, but experimentation indicates it -// is necessary in order to best reproduce Doom's original lighting. -#define LIGHT2SHADE(l) ((NUMCOLORMAPS*2*FRACUNIT)-(((l)+12)*(FRACUNIT*NUMCOLORMAPS/128))) - -// MAXLIGHTSCALE from original DOOM, divided by 2. -#define MAXLIGHTVIS (24.0) - -// Convert a shade and visibility to a clamped colormap index. -// Result is not fixed point. -// Change R_CalcTiltedLighting() when this changes. -#define GETPALOOKUP(vis,shade) (clamp (((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis))))>>FRACBITS, 0, NUMCOLORMAPS-1)) - -// Calculate the light multiplier for dc_light/ds_light -// This is used instead of GETPALOOKUP when ds_colormap/dc_colormap is set to the base colormap -// Returns a value between 0 and 1 in fixed point -#define LIGHTSCALE(vis,shade) FLOAT2FIXED(clamp((FIXED2DBL(shade) - (MIN(MAXLIGHTVIS,double(vis)))) / NUMCOLORMAPS, 0.0, (NUMCOLORMAPS-1)/(double)NUMCOLORMAPS)) - -// Converts fixedlightlev into a shade value -#define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) - -extern bool r_swtruecolor; - -extern double GlobVis; - -void R_SetVisibility(double visibility); -double R_GetVisibility(); - -extern double r_BaseVisibility; -extern double r_WallVisibility; -extern double r_FloorVisibility; -extern float r_TiltVisibility; -extern double r_SpriteVisibility; - -extern int r_actualextralight; -extern bool foggy; -extern int fixedlightlev; -extern FSWColormap* fixedcolormap; -extern FSpecialColormap*realfixedcolormap; - -void R_InitTextureMapping (); - - -// -// REFRESH - the actual rendering functions. -// - -// Called by G_Drawer. -void R_RenderActorView (AActor *actor, bool dontmaplines = false); -void R_SetupBuffer (); - -void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines = false); - -// [RH] Initialize multires stuff for renderer -void R_MultiresInit (void); - - - -extern double globaluclip, globaldclip; - -} - -#endif // __R_MAIN_H__ diff --git a/src/swrenderer/r_memory.cpp b/src/swrenderer/r_memory.cpp index c77cada56c..0893f69369 100644 --- a/src/swrenderer/r_memory.cpp +++ b/src/swrenderer/r_memory.cpp @@ -18,7 +18,6 @@ #include "i_system.h" #include "p_lnspec.h" #include "p_setup.h" -#include "swrenderer/r_main.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/plane/r_visibleplane.h" #include "a_sharedglobal.h" diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index d11d6ee89a..d1397f10b8 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -32,9 +32,12 @@ ** */ - -#include "r_main.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" #include "swrenderer/things/r_playersprite.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "v_palette.h" #include "v_video.h" #include "m_png.h" @@ -69,16 +72,6 @@ CUSTOM_CVAR(Bool, r_polyrenderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOI } } -namespace swrenderer -{ - -void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); -void R_SetupColormap(player_t *); -void R_SetupFreelook(); -void R_InitRenderer(); - -} - using namespace swrenderer; FSoftwareRenderer::FSoftwareRenderer() diff --git a/src/swrenderer/scene/r_3dfloors.cpp b/src/swrenderer/scene/r_3dfloors.cpp index 23b4adabad..3feb7b91c1 100644 --- a/src/swrenderer/scene/r_3dfloors.cpp +++ b/src/swrenderer/scene/r_3dfloors.cpp @@ -9,10 +9,10 @@ #include "doomdef.h" #include "p_local.h" #include "c_dispatch.h" -#include "swrenderer/r_main.h" #include "r_opaque_pass.h" #include "c_cvars.h" #include "r_3dfloors.h" +#include "r_utility.h" CVAR(Int, r_3dfloors, true, 0); diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp new file mode 100644 index 0000000000..e5cfb3f4ff --- /dev/null +++ b/src/swrenderer/scene/r_light.cpp @@ -0,0 +1,178 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include + +#include "templates.h" +#include "i_system.h" +#include "w_wad.h" +#include "doomdef.h" +#include "doomstat.h" +#include "r_sky.h" +#include "stats.h" +#include "v_video.h" +#include "a_sharedglobal.h" +#include "c_console.h" +#include "c_dispatch.h" +#include "cmdlib.h" +#include "d_net.h" +#include "g_level.h" +#include "r_utility.h" +#include "d_player.h" +#include "swrenderer/scene/r_light.h" +#include "swrenderer/scene/r_viewport.h" + +CVAR(Bool, r_shadercolormaps, true, CVAR_ARCHIVE) +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) + +namespace swrenderer +{ + double r_BaseVisibility; + double r_WallVisibility; + double r_FloorVisibility; + float r_TiltVisibility; + double r_SpriteVisibility; + double r_ParticleVisibility; + + double GlobVis; + + FDynamicColormap *basecolormap; // [RH] colormap currently drawing with + int fixedlightlev; + FSWColormap *fixedcolormap; + FSpecialColormap *realfixedcolormap; + + bool foggy; // [RH] ignore extralight and fullbright? + int r_actualextralight; + + namespace + { + double CurrentVisibility = 8.f; + double MaxVisForWall; + double MaxVisForFloor; + } + + // Changes how rapidly things get dark with distance + void R_SetVisibility(double vis) + { + // Allow negative visibilities, just for novelty's sake + vis = clamp(vis, -204.7, 204.7); // (205 and larger do not work in 5:4 aspect ratio) + + CurrentVisibility = vis; + + if (FocalTangent == 0 || FocalLengthY == 0) + { // If r_visibility is called before the renderer is all set up, don't + // divide by zero. This will be called again later, and the proper + // values can be initialized then. + return; + } + + r_BaseVisibility = vis; + + MaxVisForWall = (InvZtoScale * (SCREENWIDTH*r_Yaspect) / (viewwidth*SCREENHEIGHT * FocalTangent)); + MaxVisForWall = 32767.0 / MaxVisForWall; + MaxVisForFloor = 32767.0 / (viewheight >> 2) * FocalLengthY / 160; + + // Prevent overflow on walls + if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForWall) + r_WallVisibility = -MaxVisForWall; + else if (r_BaseVisibility > 0 && r_BaseVisibility > MaxVisForWall) + r_WallVisibility = MaxVisForWall; + else + r_WallVisibility = r_BaseVisibility; + + r_WallVisibility = (InvZtoScale * SCREENWIDTH*AspectBaseHeight(WidescreenRatio) / + (viewwidth*SCREENHEIGHT * 3)) * (r_WallVisibility * FocalTangent); + + // Prevent overflow on floors/ceilings. Note that the calculation of + // MaxVisForFloor means that planes less than two units from the player's + // view could still overflow, but there is no way to totally eliminate + // that while still using fixed point math. + if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForFloor) + r_FloorVisibility = -MaxVisForFloor; + else if (r_BaseVisibility > 0 && r_BaseVisibility > MaxVisForFloor) + r_FloorVisibility = MaxVisForFloor; + else + r_FloorVisibility = r_BaseVisibility; + + r_FloorVisibility = 160.0 * r_FloorVisibility / FocalLengthY; + + r_TiltVisibility = float(vis * FocalTangent * (16.f * 320.f) / viewwidth); + r_SpriteVisibility = r_WallVisibility; + } + + double R_GetVisibility() + { + return CurrentVisibility; + } + + void R_SetupColormap(player_t *player) + { + realfixedcolormap = NULL; + fixedcolormap = NULL; + fixedlightlev = -1; + + if (player != NULL && camera == player->mo) + { + if (player->fixedcolormap >= 0 && player->fixedcolormap < (int)SpecialColormaps.Size()) + { + realfixedcolormap = &SpecialColormaps[player->fixedcolormap]; + if (RenderTarget == screen && (r_swtruecolor || ((DFrameBuffer *)screen->Accel2D && r_shadercolormaps))) + { + // Render everything fullbright. The copy to video memory will + // apply the special colormap, so it won't be restricted to the + // palette. + fixedcolormap = &realcolormaps; + } + else + { + fixedcolormap = &SpecialColormaps[player->fixedcolormap]; + } + } + else if (player->fixedlightlevel >= 0 && player->fixedlightlevel < NUMCOLORMAPS) + { + fixedlightlev = player->fixedlightlevel * 256; + // [SP] Emulate GZDoom's light-amp goggles. + if (r_fullbrightignoresectorcolor && fixedlightlev >= 0) + { + fixedcolormap = &FullNormalLight; + } + } + } + // [RH] Inverse light for shooting the Sigil + if (fixedcolormap == NULL && extralight == INT_MIN) + { + fixedcolormap = &SpecialColormaps[INVERSECOLORMAP]; + extralight = 0; + } + } + + // Controls how quickly light ramps across a 1/z range. Set this, and it + // sets all the r_*Visibility variables (except r_SkyVisibilily, which is + // currently unused). + CCMD(r_visibility) + { + if (argv.argc() < 2) + { + Printf("Visibility is %g\n", R_GetVisibility()); + } + else if (!netgame) + { + R_SetVisibility(atof(argv[1])); + } + else + { + Printf("Visibility cannot be changed in net games.\n"); + } + } +} diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h new file mode 100644 index 0000000000..eab64f0944 --- /dev/null +++ b/src/swrenderer/scene/r_light.h @@ -0,0 +1,73 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#pragma once + +#include +#include "r_defs.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_utility.h" + +// Lighting. +// +// [RH] This has changed significantly from Doom, which used lookup +// tables based on 1/z for walls and z for flats and only recognized +// 16 discrete light levels. The terminology I use is borrowed from Build. + +// The size of a single colormap, in bits +#define COLORMAPSHIFT 8 + +// Convert a light level into an unbounded colormap index (shade). Result is +// fixed point. Why the +12? I wish I knew, but experimentation indicates it +// is necessary in order to best reproduce Doom's original lighting. +#define LIGHT2SHADE(l) ((NUMCOLORMAPS*2*FRACUNIT)-(((l)+12)*(FRACUNIT*NUMCOLORMAPS/128))) + +// MAXLIGHTSCALE from original DOOM, divided by 2. +#define MAXLIGHTVIS (24.0) + +// Convert a shade and visibility to a clamped colormap index. +// Result is not fixed point. +// Change R_CalcTiltedLighting() when this changes. +#define GETPALOOKUP(vis,shade) (clamp (((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis))))>>FRACBITS, 0, NUMCOLORMAPS-1)) + +// Calculate the light multiplier for dc_light/ds_light +// This is used instead of GETPALOOKUP when ds_colormap/dc_colormap is set to the base colormap +// Returns a value between 0 and 1 in fixed point +#define LIGHTSCALE(vis,shade) FLOAT2FIXED(clamp((FIXED2DBL(shade) - (MIN(MAXLIGHTVIS,double(vis)))) / NUMCOLORMAPS, 0.0, (NUMCOLORMAPS-1)/(double)NUMCOLORMAPS)) + +// Converts fixedlightlev into a shade value +#define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) + +namespace swrenderer +{ + extern double r_BaseVisibility; + extern double r_WallVisibility; + extern double r_FloorVisibility; + extern float r_TiltVisibility; + extern double r_SpriteVisibility; + + extern double GlobVis; + + extern int r_actualextralight; + extern bool foggy; + extern int fixedlightlev; + extern FSWColormap *fixedcolormap; + extern FSpecialColormap *realfixedcolormap; + extern FDynamicColormap *basecolormap; // [RH] Colormap for sector currently being drawn + + void R_SetVisibility(double visibility); + double R_GetVisibility(); + + void R_SetupColormap(player_t *); +} diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index f43c269320..7cdc30834e 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -32,7 +32,6 @@ #include "p_lnspec.h" #include "p_setup.h" -#include "swrenderer/r_main.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/plane/r_visibleplanelist.h" @@ -42,6 +41,9 @@ #include "swrenderer/things/r_particle.h" #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/line/r_wallsetup.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "r_3dfloors.h" #include "r_portal.h" #include "a_sharedglobal.h" diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 1ff62438af..6c89d9fc56 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -52,7 +52,9 @@ #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_translucent_pass.h" -#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/r_memory.h" CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp new file mode 100644 index 0000000000..50e20be722 --- /dev/null +++ b/src/swrenderer/scene/r_scene.cpp @@ -0,0 +1,270 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include + +#include "templates.h" +#include "i_system.h" +#include "w_wad.h" +#include "doomdef.h" +#include "doomstat.h" +#include "r_sky.h" +#include "stats.h" +#include "v_video.h" +#include "a_sharedglobal.h" +#include "c_console.h" +#include "c_dispatch.h" +#include "cmdlib.h" +#include "d_net.h" +#include "g_level.h" +#include "p_effect.h" +#include "po_man.h" +#include "r_data/r_interpolate.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" +#include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_opaque_pass.h" +#include "swrenderer/scene/r_translucent_pass.h" +#include "swrenderer/scene/r_portal.h" +#include "swrenderer/segments/r_clipsegment.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/segments/r_portalsegment.h" +#include "swrenderer/plane/r_visibleplanelist.h" +#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/drawers/r_thread.h" +#include "swrenderer/r_memory.h" + +EXTERN_CVAR(Bool, r_shadercolormaps) + +namespace swrenderer +{ + cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; + bool r_dontmaplines; + + void R_RenderActorView(AActor *actor, bool dontmaplines) + { + WallCycles.Reset(); + PlaneCycles.Reset(); + MaskedCycles.Reset(); + WallScanCycles.Reset(); + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + clip3d->fakeActive = false; // kg3D - reset fake floor indicator + clip3d->ResetClip(); // reset clips (floor/ceiling) + + R_SetupBuffer(); + R_SetupFrame(actor); + + // Clear buffers. + R_ClearClipSegs(0, viewwidth); + R_ClearDrawSegs(); + VisiblePlaneList::Instance()->Clear(true); + R_FreePlaneLights(); + RenderTranslucentPass::Clear(); + + // opening / clipping determination + RenderOpaquePass::Instance()->ClearClip(); + R_FreeOpenings(); + + NetUpdate(); + + colfunc = basecolfunc; + spanfunc = &SWPixelFormatDrawers::DrawSpan; + + RenderPortal::Instance()->SetMainPortal(); + + r_dontmaplines = dontmaplines; + + // [RH] Hack to make windows into underwater areas possible + RenderOpaquePass::Instance()->ResetFakingUnderwater(); + + // [RH] Setup particles for this frame + P_FindParticleSubsectors(); + + WallCycles.Clock(); + ActorRenderFlags savedflags = camera->renderflags; + // Never draw the player unless in chasecam mode + if (!r_showviewer) + { + camera->renderflags |= RF_INVISIBLE; + } + // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function + PO_LinkToSubsectors(); + RenderOpaquePass::Instance()->RenderScene(); + Clip3DFloors::Instance()->ResetClip(); // reset clips (floor/ceiling) + camera->renderflags = savedflags; + WallCycles.Unclock(); + + NetUpdate(); + + if (viewactive) + { + PlaneCycles.Clock(); + VisiblePlaneList::Instance()->Render(); + RenderPortal::Instance()->RenderPlanePortals(); + PlaneCycles.Unclock(); + + RenderPortal::Instance()->RenderLinePortals(); + + NetUpdate(); + + MaskedCycles.Clock(); + RenderTranslucentPass::Render(); + MaskedCycles.Unclock(); + + NetUpdate(); + } + WallPortals.Clear(); + interpolator.RestoreInterpolations(); + R_SetupBuffer(); + + // If we don't want shadered colormaps, NULL it now so that the + // copy to the screen does not use a special colormap shader. + if (!r_shadercolormaps && !r_swtruecolor) + { + realfixedcolormap = NULL; + } + } + + void R_RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) + { + const bool savedviewactive = viewactive; + const bool savedoutputformat = r_swtruecolor; + + if (r_swtruecolor != canvas->IsBgra()) + { + r_swtruecolor = canvas->IsBgra(); + R_InitColumnDrawers(); + } + + R_BeginDrawerCommands(); + + viewwidth = width; + RenderTarget = canvas; + bRenderingToCanvas = true; + + R_SetWindow(12, width, height, height, true); + viewwindowx = x; + viewwindowy = y; + viewactive = true; + + R_RenderActorView(actor, dontmaplines); + + R_EndDrawerCommands(); + + RenderTarget = screen; + bRenderingToCanvas = false; + R_ExecuteSetViewSize(); + screen->Lock(true); + R_SetupBuffer(); + screen->Unlock(); + + viewactive = savedviewactive; + r_swtruecolor = savedoutputformat; + + if (r_swtruecolor != canvas->IsBgra()) + { + R_InitColumnDrawers(); + } + } + + void R_MultiresInit() + { + VisiblePlaneList::Instance()->Init(); + } + + void R_InitRenderer() + { + atterm(R_ShutdownRenderer); + // viewwidth / viewheight are set by the defaults + fillshort(zeroarray, MAXWIDTH, 0); + + R_InitShadeMaps(); + R_InitColumnDrawers(); + } + + void R_ShutdownRenderer() + { + RenderTranslucentPass::Deinit(); + VisiblePlaneList::Instance()->Deinit(); + Clip3DFloors::Instance()->Cleanup(); + R_DeinitOpenings(); + R_FreeDrawSegs(); + } + + ///////////////////////////////////////////////////////////////////////// + + ADD_STAT(fps) + { + FString out; + out.Format("frame=%04.1f ms walls=%04.1f ms planes=%04.1f ms masked=%04.1f ms", + FrameCycles.TimeMS(), WallCycles.TimeMS(), PlaneCycles.TimeMS(), MaskedCycles.TimeMS()); + return out; + } + + static double f_acc, w_acc, p_acc, m_acc; + static int acc_c; + + ADD_STAT(fps_accumulated) + { + f_acc += FrameCycles.TimeMS(); + w_acc += WallCycles.TimeMS(); + p_acc += PlaneCycles.TimeMS(); + m_acc += MaskedCycles.TimeMS(); + acc_c++; + FString out; + out.Format("frame=%04.1f ms walls=%04.1f ms planes=%04.1f ms masked=%04.1f ms %d counts", + f_acc / acc_c, w_acc / acc_c, p_acc / acc_c, m_acc / acc_c, acc_c); + Printf(PRINT_LOG, "%s\n", out.GetChars()); + return out; + } + + static double bestwallcycles = HUGE_VAL; + + ADD_STAT(wallcycles) + { + FString out; + double cycles = WallCycles.Time(); + if (cycles && cycles < bestwallcycles) + bestwallcycles = cycles; + out.Format("%g", bestwallcycles); + return out; + } + + CCMD(clearwallcycles) + { + bestwallcycles = HUGE_VAL; + } + +#if 0 + // The replacement code for Build's wallscan doesn't have any timing calls so this does not work anymore. + static double bestscancycles = HUGE_VAL; + + ADD_STAT(scancycles) + { + FString out; + double scancycles = WallScanCycles.Time(); + if (scancycles && scancycles < bestscancycles) + bestscancycles = scancycles; + out.Format("%g", bestscancycles); + return out; + } + + CCMD(clearscancycles) + { + bestscancycles = HUGE_VAL; + } +#endif +} diff --git a/src/swrenderer/scene/r_scene.h b/src/swrenderer/scene/r_scene.h new file mode 100644 index 0000000000..5b5a951bce --- /dev/null +++ b/src/swrenderer/scene/r_scene.h @@ -0,0 +1,34 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#pragma once + +#include +#include "r_defs.h" +#include "d_player.h" + +extern cycle_t FrameCycles; + +namespace swrenderer +{ + extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; + + extern bool r_dontmaplines; + + void R_RenderActorView(AActor *actor, bool dontmaplines = false); + void R_RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines = false); + + void R_InitRenderer(); + void R_MultiresInit(); + void R_ShutdownRenderer(); +} diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 6b6db1caae..9edbe972e8 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -22,7 +22,6 @@ #include "w_wad.h" #include "g_levellocals.h" #include "p_maputl.h" -#include "swrenderer/r_main.h" #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/things/r_visiblespritelist.h" #include "swrenderer/things/r_voxel.h" @@ -33,6 +32,8 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_translucent_pass.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/plane/r_visibleplanelist.h" #include "swrenderer/r_memory.h" diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp new file mode 100644 index 0000000000..7812fab652 --- /dev/null +++ b/src/swrenderer/scene/r_viewport.cpp @@ -0,0 +1,195 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include + +#include "templates.h" +#include "i_system.h" +#include "w_wad.h" +#include "doomdef.h" +#include "doomstat.h" +#include "r_sky.h" +#include "stats.h" +#include "v_video.h" +#include "a_sharedglobal.h" +#include "c_console.h" +#include "cmdlib.h" +#include "d_net.h" +#include "g_level.h" +#include "r_utility.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" +#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/things/r_playersprite.h" +#include "swrenderer/plane/r_flatplane.h" + +CVAR(String, r_viewsize, "", CVAR_NOSET) + +namespace swrenderer +{ + bool r_swtruecolor; + + fixed_t viewingrangerecip; + double FocalLengthX; + double FocalLengthY; + + bool bRenderingToCanvas; + double globaluclip, globaldclip; + double CenterX, CenterY; + double YaspectMul; + double BaseYaspectMul; // yaspectmul without a forced aspect ratio + double IYaspectMul; + double InvZtoScale; + + double WallTMapScale2; + + // The xtoviewangleangle[] table maps a screen pixel + // to the lowest viewangle that maps back to x ranges + // from clipangle to -clipangle. + angle_t xtoviewangle[MAXWIDTH + 1]; + + void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio) + { + int virtheight, virtwidth, virtwidth2, virtheight2; + + if (!bRenderingToCanvas) + { // Set r_viewsize cvar to reflect the current view size + UCVarValue value; + char temp[16]; + + mysnprintf(temp, countof(temp), "%d x %d", viewwidth, viewheight); + value.String = temp; + r_viewsize.ForceSet(value, CVAR_String); + } + + fuzzviewheight = viewheight - 2; // Maximum row the fuzzer can draw to + + CenterX = centerx; + CenterY = centery; + + virtwidth = virtwidth2 = fullWidth; + virtheight = virtheight2 = fullHeight; + + if (AspectTallerThanWide(trueratio)) + { + virtheight2 = virtheight2 * AspectMultiplier(trueratio) / 48; + } + else + { + virtwidth2 = virtwidth2 * AspectMultiplier(trueratio) / 48; + } + + if (AspectTallerThanWide(WidescreenRatio)) + { + virtheight = virtheight * AspectMultiplier(WidescreenRatio) / 48; + } + else + { + virtwidth = virtwidth * AspectMultiplier(WidescreenRatio) / 48; + } + + BaseYaspectMul = 320.0 * virtheight2 / (r_Yaspect * virtwidth2); + YaspectMul = 320.0 * virtheight / (r_Yaspect * virtwidth); + IYaspectMul = (double)virtwidth * r_Yaspect / 320.0 / virtheight; + InvZtoScale = YaspectMul * CenterX; + + WallTMapScale2 = IYaspectMul / CenterX; + + // psprite scales + RenderPlayerSprite::SetupSpriteScale(); + + // thing clipping + fillshort(screenheightarray, viewwidth, (short)viewheight); + + R_InitTextureMapping(); + + // Reset r_*Visibility vars + R_SetVisibility(R_GetVisibility()); + } + + void R_SetupFreelook() + { + double dy; + + if (camera != NULL) + { + dy = FocalLengthY * (-ViewPitch).Tan(); + } + else + { + dy = 0; + } + + CenterY = (viewheight / 2.0) + dy; + centery = xs_ToInt(CenterY); + globaluclip = -CenterY / InvZtoScale; + globaldclip = (viewheight - CenterY) / InvZtoScale; + + RenderFlatPlane::SetupSlope(); + } + + void R_SetupBuffer() + { + using namespace drawerargs; + + static BYTE *lastbuff = NULL; + + int pitch = RenderTarget->GetPitch(); + int pixelsize = r_swtruecolor ? 4 : 1; + BYTE *lineptr = RenderTarget->GetBuffer() + (viewwindowy*pitch + viewwindowx) * pixelsize; + + if (dc_pitch != pitch || lineptr != lastbuff) + { + if (dc_pitch != pitch) + { + dc_pitch = pitch; + R_InitFuzzTable(pitch); + } + dc_destorg = lineptr; + dc_destheight = RenderTarget->GetHeight() - viewwindowy; + for (int i = 0; i < RenderTarget->GetHeight(); i++) + { + ylookup[i] = i * pitch; + } + } + } + + void R_InitTextureMapping() + { + int i; + + // Calc focallength so FieldOfView angles cover viewwidth. + FocalLengthX = CenterX / FocalTangent; + FocalLengthY = FocalLengthX * YaspectMul; + + // This is 1/FocalTangent before the widescreen extension of FOV. + viewingrangerecip = FLOAT2FIXED(1. / tan(FieldOfView.Radians() / 2)); + + + // Now generate xtoviewangle for sky texture mapping. + // [RH] Do not generate viewangletox, because texture mapping is no + // longer done with trig, so it's not needed. + const double slopestep = FocalTangent / centerx; + double slope; + + for (i = centerx, slope = 0; i <= viewwidth; i++, slope += slopestep) + { + xtoviewangle[i] = angle_t((2 * M_PI - atan(slope)) * (ANGLE_180 / M_PI)); + } + for (i = 0; i < centerx; i++) + { + xtoviewangle[i] = 0 - xtoviewangle[viewwidth - i - 1]; + } + } +} diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/scene/r_viewport.h new file mode 100644 index 0000000000..2fa32988ed --- /dev/null +++ b/src/swrenderer/scene/r_viewport.h @@ -0,0 +1,41 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#pragma once + +#include +#include "r_defs.h" + +namespace swrenderer +{ + extern bool bRenderingToCanvas; + extern fixed_t viewingrangerecip; + extern double FocalLengthX; + extern double FocalLengthY; + extern double InvZtoScale; + extern double WallTMapScale2; + extern double CenterX; + extern double CenterY; + extern double YaspectMul; + extern double IYaspectMul; + extern bool r_swtruecolor; + extern double globaluclip; + extern double globaldclip; + extern angle_t xtoviewangle[MAXWIDTH + 1]; + + void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); + void R_InitTextureMapping(); + void R_SetupBuffer(); + void R_SetupFreelook(); + void R_InitRenderer(); +} diff --git a/src/swrenderer/segments/r_clipsegment.cpp b/src/swrenderer/segments/r_clipsegment.cpp index 66dbc354f4..593a253e38 100644 --- a/src/swrenderer/segments/r_clipsegment.cpp +++ b/src/swrenderer/segments/r_clipsegment.cpp @@ -18,7 +18,6 @@ #include "i_system.h" #include "p_lnspec.h" #include "p_setup.h" -#include "swrenderer/r_main.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/scene/r_3dfloors.h" #include "a_sharedglobal.h" diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 99ad33cefe..37356da0aa 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -28,7 +28,6 @@ #include "po_man.h" #include "r_data/colormaps.h" #include "d_net.h" -#include "swrenderer/r_main.h" #include "swrenderer/r_memory.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/scene/r_3dfloors.h" @@ -39,6 +38,8 @@ #include "swrenderer/line/r_fogboundary.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/things/r_visiblesprite.h" +#include "swrenderer/scene/r_light.h" +#include "swrenderer/scene/r_viewport.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); diff --git a/src/swrenderer/segments/r_portalsegment.cpp b/src/swrenderer/segments/r_portalsegment.cpp index a1bb2bb74a..302dbe3c9c 100644 --- a/src/swrenderer/segments/r_portalsegment.cpp +++ b/src/swrenderer/segments/r_portalsegment.cpp @@ -18,7 +18,6 @@ #include "i_system.h" #include "p_lnspec.h" #include "p_setup.h" -#include "swrenderer/r_main.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/scene/r_3dfloors.h" #include "a_sharedglobal.h" diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 59db70b8ce..e1f2da1c79 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -19,7 +19,6 @@ #include "doomstat.h" #include "doomdata.h" #include "p_lnspec.h" -#include "swrenderer/r_main.h" #include "r_sky.h" #include "v_video.h" #include "m_swap.h" @@ -38,6 +37,9 @@ #include "swrenderer/line/r_walldraw.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/things/r_wallsprite.h" #include "swrenderer/r_memory.h" diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 079b229fc7..ae5b349785 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -20,7 +20,6 @@ #include "m_swap.h" #include "i_system.h" #include "w_wad.h" -#include "swrenderer/r_main.h" #include "swrenderer/things/r_particle.h" #include "c_console.h" #include "c_cvars.h" @@ -53,6 +52,8 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/r_memory.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index e53a9b2bc3..1efc780f09 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -20,7 +20,6 @@ #include "m_swap.h" #include "i_system.h" #include "w_wad.h" -#include "swrenderer/r_main.h" #include "swrenderer/things/r_playersprite.h" #include "c_console.h" #include "c_cvars.h" @@ -51,6 +50,9 @@ #include "r_voxel.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/things/r_sprite.h" #include "swrenderer/r_memory.h" #include "g_levellocals.h" diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index 293cf9d3a2..6030c2ad33 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -15,6 +15,8 @@ #include "r_visiblesprite.h" +class DPSprite; + namespace swrenderer { class RenderPlayerSprite diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index c75720afe2..f689d6eb03 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -20,7 +20,6 @@ #include "m_swap.h" #include "i_system.h" #include "w_wad.h" -#include "swrenderer/r_main.h" #include "swrenderer/things/r_wallsprite.h" #include "c_console.h" #include "c_cvars.h" @@ -51,6 +50,9 @@ #include "r_voxel.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/things/r_sprite.h" #include "swrenderer/r_memory.h" diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 2eb52770e3..71f036c9a0 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -22,7 +22,6 @@ #include "w_wad.h" #include "g_levellocals.h" #include "p_maputl.h" -#include "swrenderer/r_main.h" #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/things/r_voxel.h" #include "swrenderer/things/r_particle.h" diff --git a/src/swrenderer/things/r_visiblespritelist.cpp b/src/swrenderer/things/r_visiblespritelist.cpp index fff4dce8eb..db6689c80e 100644 --- a/src/swrenderer/things/r_visiblespritelist.cpp +++ b/src/swrenderer/things/r_visiblespritelist.cpp @@ -22,7 +22,6 @@ #include "w_wad.h" #include "g_levellocals.h" #include "p_maputl.h" -#include "swrenderer/r_main.h" #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/things/r_visiblespritelist.h" #include "swrenderer/r_memory.h" diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index da9adc6be6..7770b46e4b 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -38,7 +38,9 @@ #include "swrenderer/things/r_voxel.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_translucent_pass.h" -#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index de4da612de..2d1455a1a5 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -20,7 +20,6 @@ #include "m_swap.h" #include "i_system.h" #include "w_wad.h" -#include "swrenderer/r_main.h" #include "swrenderer/things/r_wallsprite.h" #include "c_console.h" #include "c_cvars.h" @@ -52,6 +51,9 @@ #include "r_voxel.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/line/r_wallsetup.h" #include "swrenderer/line/r_walldraw.h" #include "swrenderer/r_memory.h" diff --git a/src/v_draw.cpp b/src/v_draw.cpp index a285fdd1ee..689546f8c8 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -45,7 +45,8 @@ #ifndef NO_SWRENDER #include "swrenderer/drawers/r_draw.h" #include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/r_main.h" +#include "swrenderer/scene/r_light.h" +#include "swrenderer/scene/r_viewport.h" #endif #include "r_data/r_translate.h" #include "doomstat.h" diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index cf627f4d7f..d28ef2246f 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -63,7 +63,6 @@ #include "v_pfx.h" #include "stats.h" #include "doomerrors.h" -#include "swrenderer/r_main.h" #include "r_data/r_translate.h" #include "f_wipe.h" #include "sbar.h" @@ -73,6 +72,7 @@ #include "w_wad.h" #include "r_data/colormaps.h" #include "SkylineBinPack.h" +#include "swrenderer/scene/r_light.h" // MACROS ------------------------------------------------------------------ From 40f79371f50da1a548dea54ffc936f43218cb14c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 12 Jan 2017 18:38:27 +0100 Subject: [PATCH 697/912] Remove GlobVis global as it apparently wasn't very global! --- src/swrenderer/line/r_line.cpp | 2 +- src/swrenderer/plane/r_flatplane.h | 1 + src/swrenderer/scene/r_light.cpp | 2 -- src/swrenderer/scene/r_light.h | 2 -- src/swrenderer/things/r_wallsprite.cpp | 2 +- 5 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 5679b8e507..9398611cfb 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -940,7 +940,7 @@ namespace swrenderer { wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, frontsector->lightlevel) + r_actualextralight); - GlobVis = r_WallVisibility; + double GlobVis = r_WallVisibility; rw_lightleft = float(GlobVis / WallC.sz1); rw_lightstep = float((GlobVis / WallC.sz2 - rw_lightleft) / (WallC.sx2 - WallC.sx1)); } diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index c72b6402b3..bd830f63b2 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -33,6 +33,7 @@ namespace swrenderer double planeheight; bool plane_shade; int planeshade; + double GlobVis; fixed_t pviewx, pviewy; fixed_t xscale, yscale; double xstepscale, ystepscale; diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index e5cfb3f4ff..7a45d93777 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -45,8 +45,6 @@ namespace swrenderer double r_SpriteVisibility; double r_ParticleVisibility; - double GlobVis; - FDynamicColormap *basecolormap; // [RH] colormap currently drawing with int fixedlightlev; FSWColormap *fixedcolormap; diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index eab64f0944..e39b630674 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -57,8 +57,6 @@ namespace swrenderer extern float r_TiltVisibility; extern double r_SpriteVisibility; - extern double GlobVis; - extern int r_actualextralight; extern bool foggy; extern int fixedlightlev; diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 2d1455a1a5..e55b0fcc52 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -175,7 +175,7 @@ namespace swrenderer } int shade = LIGHT2SHADE(spr->sector->lightlevel + r_actualextralight); - GlobVis = r_WallVisibility; + double GlobVis = r_WallVisibility; float lightleft = float(GlobVis / spr->wallc.sz1); float lightstep = float((GlobVis / spr->wallc.sz2 - lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); float light = lightleft + (x1 - spr->wallc.sx1) * lightstep; From f912b4ab8bfb79c7b6d84d89f5fb03ecc32620b5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 12 Jan 2017 19:09:13 +0100 Subject: [PATCH 698/912] Convert r_actualextralight global to a function --- src/swrenderer/line/r_line.cpp | 6 ++---- src/swrenderer/line/r_walldraw.cpp | 2 +- src/swrenderer/scene/r_light.cpp | 1 - src/swrenderer/scene/r_light.h | 3 ++- src/swrenderer/scene/r_opaque_pass.cpp | 13 ++++++------- src/swrenderer/scene/r_translucent_pass.cpp | 2 +- src/swrenderer/segments/r_drawsegment.cpp | 10 +++++----- src/swrenderer/things/r_playersprite.cpp | 3 +-- src/swrenderer/things/r_wallsprite.cpp | 2 +- 9 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 9398611cfb..fb5c956d2c 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -524,8 +524,7 @@ namespace swrenderer } else { - draw_segment->shade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, curline->frontsector->lightlevel) - + r_actualextralight); + draw_segment->shade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, curline->frontsector->lightlevel) + R_ActualExtraLight(foggy)); } if (draw_segment->bFogBoundary || draw_segment->maskedtexturecol != -1) @@ -938,8 +937,7 @@ namespace swrenderer if (fixedcolormap == NULL && fixedlightlev < 0) { - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, frontsector->lightlevel) - + r_actualextralight); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, frontsector->lightlevel) + R_ActualExtraLight(foggy)); double GlobVis = r_WallVisibility; rw_lightleft = float(GlobVis / WallC.sz1); rw_lightstep = float((GlobVis / WallC.sz2 - rw_lightleft) / (WallC.sx2 - WallC.sx1)); diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index c5e1e036ac..b23a7e6022 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -464,7 +464,7 @@ namespace swrenderer lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, *lit->p_lightlevel, lit->lightsource != NULL) + R_ActualExtraLight(foggy)); } ProcessNormalWall(WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index 7a45d93777..7e82f33ca8 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -51,7 +51,6 @@ namespace swrenderer FSpecialColormap *realfixedcolormap; bool foggy; // [RH] ignore extralight and fullbright? - int r_actualextralight; namespace { diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index e39b630674..9f96856d9f 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -57,13 +57,14 @@ namespace swrenderer extern float r_TiltVisibility; extern double r_SpriteVisibility; - extern int r_actualextralight; extern bool foggy; extern int fixedlightlev; extern FSWColormap *fixedcolormap; extern FSpecialColormap *realfixedcolormap; extern FDynamicColormap *basecolormap; // [RH] Colormap for sector currently being drawn + inline int R_ActualExtraLight(bool fog) { return fog ? 0 : extralight << 4; } + void R_SetVisibility(double visibility); double R_GetVisibility(); diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 7cdc30834e..1d15ac11e2 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -477,7 +477,6 @@ namespace swrenderer // [RH] set foggy flag foggy = level.fadeto || frontsector->ColorMap->Fade || (level.flags & LEVEL_HASFADETABLE); - r_actualextralight = foggy ? 0 : extralight << 4; // kg3D - fake lights if (fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) @@ -506,7 +505,7 @@ namespace swrenderer frontsector->heightsec->GetTexture(sector_t::floor) == skyflatnum) ? VisiblePlaneList::Instance()->FindPlane(frontsector->ceilingplane, // killough 3/8/98 frontsector->GetTexture(sector_t::ceiling), - ceilinglightlevel + r_actualextralight, // killough 4/11/98 + ceilinglightlevel + R_ActualExtraLight(foggy), // killough 4/11/98 frontsector->GetAlpha(sector_t::ceiling), !!(frontsector->GetFlags(sector_t::ceiling) & PLANEF_ADDITIVE), frontsector->planes[sector_t::ceiling].xform, @@ -546,7 +545,7 @@ namespace swrenderer frontsector->heightsec->GetTexture(sector_t::ceiling) == skyflatnum) ? VisiblePlaneList::Instance()->FindPlane(frontsector->floorplane, frontsector->GetTexture(sector_t::floor), - floorlightlevel + r_actualextralight, // killough 3/16/98 + floorlightlevel + R_ActualExtraLight(foggy), // killough 3/16/98 frontsector->GetAlpha(sector_t::floor), !!(frontsector->GetFlags(sector_t::floor) & PLANEF_ADDITIVE), frontsector->planes[sector_t::floor].xform, @@ -611,7 +610,7 @@ namespace swrenderer ceilingplane = nullptr; floorplane = VisiblePlaneList::Instance()->FindPlane(frontsector->floorplane, frontsector->GetTexture(sector_t::floor), - floorlightlevel + r_actualextralight, // killough 3/16/98 + floorlightlevel + R_ActualExtraLight(foggy), // killough 3/16/98 frontsector->GetAlpha(sector_t::floor), !!(clip3d->fakeFloor->flags & FF_ADDITIVETRANS), frontsector->planes[position].xform, @@ -676,7 +675,7 @@ namespace swrenderer floorplane = nullptr; ceilingplane = VisiblePlaneList::Instance()->FindPlane(frontsector->ceilingplane, // killough 3/8/98 frontsector->GetTexture(sector_t::ceiling), - ceilinglightlevel + r_actualextralight, // killough 4/11/98 + ceilinglightlevel + R_ActualExtraLight(foggy), // killough 4/11/98 frontsector->GetAlpha(sector_t::ceiling), !!(clip3d->fakeFloor->flags & FF_ADDITIVETRANS), frontsector->planes[position].xform, @@ -710,7 +709,7 @@ namespace swrenderer // [RH] Add particles if ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors) { // Only do it for the main BSP. - int shade = LIGHT2SHADE((floorlightlevel + ceilinglightlevel) / 2 + r_actualextralight); + int shade = LIGHT2SHADE((floorlightlevel + ceilinglightlevel) / 2 + R_ActualExtraLight(foggy)); for (WORD i = ParticlesInSubsec[(unsigned int)(sub - subsectors)]; i != NO_PARTICLE; i = Particles[i].snext) { RenderParticle::Project(Particles + i, subsectors[sub - subsectors].sector, shade, FakeSide); @@ -825,7 +824,7 @@ namespace swrenderer // Well, now it will be done. sec->validcount = validcount; - int spriteshade = LIGHT2SHADE(lightlevel + r_actualextralight); + int spriteshade = LIGHT2SHADE(lightlevel + R_ActualExtraLight(foggy)); // Handle all things in sector. for (auto p = sec->touching_renderthings; p != nullptr; p = p->m_snext) diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 9edbe972e8..6c3b4500c6 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -240,7 +240,7 @@ namespace swrenderer } else { // diminished light - int spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); + int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(foggy)); spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); } diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 37356da0aa..b2829b884c 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -189,7 +189,7 @@ namespace swrenderer { lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + r_actualextralight); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(foggy)); break; } } @@ -746,7 +746,7 @@ namespace swrenderer { lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + r_actualextralight); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(foggy)); break; } } @@ -759,7 +759,7 @@ namespace swrenderer { lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + r_actualextralight); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(foggy)); break; } } @@ -920,7 +920,7 @@ namespace swrenderer { lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + r_actualextralight); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(foggy)); break; } } @@ -933,7 +933,7 @@ namespace swrenderer { lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + r_actualextralight); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(foggy)); break; } } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 1efc780f09..72a383b58a 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -137,10 +137,9 @@ namespace swrenderer // [RH] set foggy flag foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); - r_actualextralight = foggy ? 0 : extralight << 4; // get light level - lightnum = ((floorlight + ceilinglight) >> 1) + r_actualextralight; + lightnum = ((floorlight + ceilinglight) >> 1) + R_ActualExtraLight(foggy); int spriteshade = LIGHT2SHADE(lightnum) - 24 * FRACUNIT; if (camera->player != NULL) diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index e55b0fcc52..b340897e04 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -174,7 +174,7 @@ namespace swrenderer rereadcolormap = false; } - int shade = LIGHT2SHADE(spr->sector->lightlevel + r_actualextralight); + int shade = LIGHT2SHADE(spr->sector->lightlevel + R_ActualExtraLight(foggy)); double GlobVis = r_WallVisibility; float lightleft = float(GlobVis / spr->wallc.sz1); float lightstep = float((GlobVis / spr->wallc.sz2 - lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); From 929e07697d60fcc2721f8ec5928f5eb52b187cb1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 12 Jan 2017 20:13:21 +0100 Subject: [PATCH 699/912] Remove foggy global --- src/swrenderer/line/r_line.cpp | 12 ++++---- src/swrenderer/line/r_line.h | 4 ++- src/swrenderer/line/r_walldraw.cpp | 31 ++++++++++----------- src/swrenderer/line/r_walldraw.h | 4 +-- src/swrenderer/scene/r_light.cpp | 2 -- src/swrenderer/scene/r_light.h | 1 - src/swrenderer/scene/r_opaque_pass.cpp | 26 ++++++++--------- src/swrenderer/scene/r_opaque_pass.h | 4 +-- src/swrenderer/scene/r_portal.cpp | 1 + src/swrenderer/scene/r_translucent_pass.cpp | 4 +-- src/swrenderer/segments/r_drawsegment.cpp | 14 +++++----- src/swrenderer/segments/r_drawsegment.h | 1 + src/swrenderer/things/r_decal.cpp | 6 ++-- src/swrenderer/things/r_decal.h | 4 +-- src/swrenderer/things/r_particle.cpp | 3 +- src/swrenderer/things/r_particle.h | 2 +- src/swrenderer/things/r_playersprite.cpp | 6 ++-- src/swrenderer/things/r_sprite.cpp | 6 ++-- src/swrenderer/things/r_sprite.h | 2 +- src/swrenderer/things/r_visiblesprite.h | 2 ++ src/swrenderer/things/r_voxel.cpp | 6 ++-- src/swrenderer/things/r_voxel.h | 2 +- src/swrenderer/things/r_wallsprite.cpp | 7 +++-- src/swrenderer/things/r_wallsprite.h | 2 +- 24 files changed, 81 insertions(+), 71 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index fb5c956d2c..c61caf2da7 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -55,7 +55,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void SWRenderLine::Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, visplane_t *linefloorplane, visplane_t *lineceilingplane) + void SWRenderLine::Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, visplane_t *linefloorplane, visplane_t *lineceilingplane, bool infog) { static sector_t tempsec; // killough 3/8/98: ceiling/water hack bool solid; @@ -66,6 +66,7 @@ namespace swrenderer backsector = fakebacksector; floorplane = linefloorplane; ceilingplane = lineceilingplane; + foggy = infog; curline = line; @@ -365,6 +366,7 @@ namespace swrenderer draw_segment->curline = curline; draw_segment->bFogBoundary = false; draw_segment->bFakeBoundary = false; + draw_segment->foggy = foggy; Clip3DFloors *clip3d = Clip3DFloors::Instance(); if (clip3d->fake3D & FAKE3D_FAKEMASK) draw_segment->fake = 1; @@ -589,7 +591,7 @@ namespace swrenderer // [ZZ] Only if not an active mirror if (!rw_markportal) { - RenderDecal::RenderDecals(curline->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, curline, WallC); + RenderDecal::RenderDecals(curline->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, curline, WallC, foggy); } if (rw_markportal) @@ -1077,7 +1079,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy); } fillshort(ceilingclip + x1, x2 - x1, viewheight); fillshort(floorclip + x1, x2 - x1, 0xffff); @@ -1113,7 +1115,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy); } memcpy(ceilingclip + x1, wallupper + x1, (x2 - x1) * sizeof(short)); } @@ -1152,7 +1154,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy); } memcpy(floorclip + x1, walllower + x1, (x2 - x1) * sizeof(short)); } diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index ae758ccabf..0be56dff58 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -48,7 +48,7 @@ namespace swrenderer class SWRenderLine { public: - void Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, visplane_t *floorplane, visplane_t *ceilingplane); + void Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, visplane_t *floorplane, visplane_t *ceilingplane, bool foggy); private: bool RenderWallSegment(int x1, int x2); @@ -116,5 +116,7 @@ namespace swrenderer FTexture *toptexture; FTexture *bottomtexture; FTexture *midtexture; + + bool foggy; }; } diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index b23a7e6022..a33568472f 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -431,10 +431,9 @@ namespace swrenderer } } - static void ProcessStripedWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep) + static void ProcessStripedWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy) { FDynamicColormap *startcolormap = basecolormap; - bool fogginess = foggy; short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; short *up, *down; @@ -464,14 +463,14 @@ namespace swrenderer lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, *lit->p_lightlevel, lit->lightsource != NULL) + R_ActualExtraLight(foggy)); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + R_ActualExtraLight(foggy)); } ProcessNormalWall(WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); basecolormap = startcolormap; } - static void ProcessWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask) + static void ProcessWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy) { if (mask) { @@ -492,7 +491,7 @@ namespace swrenderer } else { - ProcessStripedWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); + ProcessStripedWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, foggy); } } } @@ -508,7 +507,7 @@ namespace swrenderer // //============================================================================= - static void ProcessWallNP2(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask) + static void ProcessWallNP2(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy) { short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; short *up, *down; @@ -535,14 +534,14 @@ namespace swrenderer { down[j] = clamp(most3[j], up[j], dwal[j]); } - ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask); + ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy); up = down; down = (down == most1) ? most2 : most1; } partition -= scaledtexheight; dc_texturemid -= texheight; } - ProcessWall(frontsector, curline, WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask); + ProcessWall(frontsector, curline, WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy); } else { // upside down: draw strips from bottom to top @@ -559,18 +558,18 @@ namespace swrenderer { up[j] = clamp(most3[j], uwal[j], down[j]); } - ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask); + ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy); down = up; up = (up == most1) ? most2 : most1; } partition -= scaledtexheight; dc_texturemid -= texheight; } - ProcessWall(frontsector, curline, WallC, x1, x2, uwal, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask); + ProcessWall(frontsector, curline, WallC, x1, x2, uwal, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy); } } - void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep) + void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy) { rw_pic = pic; if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) @@ -590,26 +589,26 @@ namespace swrenderer { bot = MAX(bot, clip3d->sclipBottom); } - ProcessWallNP2(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, xoffset, light, lightstep, true); + ProcessWallNP2(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, xoffset, light, lightstep, true, foggy); } else { - ProcessWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, true); + ProcessWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, true, foggy); } } - void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list) + void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy) { rw_pic = pic; dc_light_list = light_list; if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) { - ProcessWallNP2(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, wallshade, xoffset, light, lightstep, false); + ProcessWallNP2(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, wallshade, xoffset, light, lightstep, false, foggy); } else { - ProcessWall(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, wallshade, xoffset, light, lightstep, false); + ProcessWall(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, wallshade, xoffset, light, lightstep, false, foggy); } dc_light_list = nullptr; } diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index d3fad4ea89..797795099a 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -37,7 +37,7 @@ namespace swrenderer uint32_t height; }; - void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list); + void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy); void R_DrawSkySegment(FTexture *rw_pic, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, const uint8_t *(*getcol)(FTexture *tex, int col)); - void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep); + void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy); } diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index 7e82f33ca8..d6c03b8317 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -50,8 +50,6 @@ namespace swrenderer FSWColormap *fixedcolormap; FSpecialColormap *realfixedcolormap; - bool foggy; // [RH] ignore extralight and fullbright? - namespace { double CurrentVisibility = 8.f; diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index 9f96856d9f..245eaafc91 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -57,7 +57,6 @@ namespace swrenderer extern float r_TiltVisibility; extern double r_SpriteVisibility; - extern bool foggy; extern int fixedlightlev; extern FSWColormap *fixedcolormap; extern FSpecialColormap *realfixedcolormap; diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 1d15ac11e2..b49782517d 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -398,7 +398,7 @@ namespace swrenderer } // kg3D - add fake segs, never rendered - void RenderOpaquePass::FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane) + void RenderOpaquePass::FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane, bool foggy) { int count; seg_t* line; @@ -410,7 +410,7 @@ namespace swrenderer { if ((line->sidedef) && !(line->sidedef->Flags & WALLF_POLYOBJ)) { - renderline.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane); + renderline.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane, foggy); } line++; } @@ -476,7 +476,7 @@ namespace swrenderer cll = ceilinglightlevel; // [RH] set foggy flag - foggy = level.fadeto || frontsector->ColorMap->Fade || (level.flags & LEVEL_HASFADETABLE); + bool foggy = level.fadeto || frontsector->ColorMap->Fade || (level.flags & LEVEL_HASFADETABLE); // kg3D - fake lights if (fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) @@ -620,7 +620,7 @@ namespace swrenderer if (floorplane) floorplane->AddLights(frontsector->lighthead); - FakeDrawLoop(sub, floorplane, ceilingplane); + FakeDrawLoop(sub, floorplane, ceilingplane, foggy); clip3d->fake3D = 0; frontsector = sub->sector; } @@ -685,7 +685,7 @@ namespace swrenderer if (ceilingplane) ceilingplane->AddLights(frontsector->lighthead); - FakeDrawLoop(sub, floorplane, ceilingplane); + FakeDrawLoop(sub, floorplane, ceilingplane, foggy); clip3d->fake3D = 0; frontsector = sub->sector; } @@ -704,7 +704,7 @@ namespace swrenderer // lightlevels on floor & ceiling lightlevels in the surrounding area. // [RH] Handle sprite lighting like Duke 3D: If the ceiling is a sky, sprites are lit by // it, otherwise they are lit by the floor. - AddSprites(sub->sector, frontsector->GetTexture(sector_t::ceiling) == skyflatnum ? ceilinglightlevel : floorlightlevel, FakeSide); + AddSprites(sub->sector, frontsector->GetTexture(sector_t::ceiling) == skyflatnum ? ceilinglightlevel : floorlightlevel, FakeSide, foggy); // [RH] Add particles if ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors) @@ -712,7 +712,7 @@ namespace swrenderer int shade = LIGHT2SHADE((floorlightlevel + ceilinglightlevel) / 2 + R_ActualExtraLight(foggy)); for (WORD i = ParticlesInSubsec[(unsigned int)(sub - subsectors)]; i != NO_PARTICLE; i = Particles[i].snext) { - RenderParticle::Project(Particles + i, subsectors[sub - subsectors].sector, shade, FakeSide); + RenderParticle::Project(Particles + i, subsectors[sub - subsectors].sector, shade, FakeSide, foggy); } } @@ -746,14 +746,14 @@ namespace swrenderer clip3d->fakeFloor->validcount = validcount; clip3d->NewClip(); } - renderline.Render(line, InSubsector, frontsector, &tempsec, floorplane, ceilingplane); // fake + renderline.Render(line, InSubsector, frontsector, &tempsec, floorplane, ceilingplane, foggy); // fake } clip3d->fakeFloor = nullptr; clip3d->fake3D = 0; floorplane = backupfp; ceilingplane = backupcp; } - renderline.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane); // now real + renderline.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane, foggy); // now real } line++; } @@ -809,7 +809,7 @@ namespace swrenderer fillshort(ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); } - void RenderOpaquePass::AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside) + void RenderOpaquePass::AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside, bool foggy) { F3DFloor *fakeceiling = nullptr; F3DFloor *fakefloor = nullptr; @@ -869,15 +869,15 @@ namespace swrenderer { if ((sprite.renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) { - RenderWallSprite::Project(thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, spriteshade); + RenderWallSprite::Project(thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, spriteshade, foggy); } else if (sprite.voxel) { - RenderVoxel::Project(thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade); + RenderVoxel::Project(thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade, foggy); } else { - RenderSprite::Project(thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade); + RenderSprite::Project(thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade, foggy); } } } diff --git a/src/swrenderer/scene/r_opaque_pass.h b/src/swrenderer/scene/r_opaque_pass.h index 72c764d04c..4ea6a64658 100644 --- a/src/swrenderer/scene/r_opaque_pass.h +++ b/src/swrenderer/scene/r_opaque_pass.h @@ -68,9 +68,9 @@ namespace swrenderer bool CheckBBox(float *bspcoord); void AddPolyobjs(subsector_t *sub); - void FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane); + void FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane, bool foggy); - void AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside); + void AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside, bool foggy); static bool IsPotentiallyVisible(AActor *thing); static bool GetThingSprite(AActor *thing, ThingSprite &sprite); diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 6c89d9fc56..308390c7c7 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -215,6 +215,7 @@ namespace swrenderer draw_segment->bFogBoundary = false; draw_segment->curline = nullptr; draw_segment->fake = 0; + draw_segment->foggy = false; memcpy(openings + draw_segment->sprbottomclip, floorclip + pl->left, (pl->right - pl->left) * sizeof(short)); memcpy(openings + draw_segment->sprtopclip, ceilingclip + pl->left, (pl->right - pl->left) * sizeof(short)); diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 6c3b4500c6..1d46cb8314 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -233,14 +233,14 @@ namespace swrenderer spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } - else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) + else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) { // full bright spr->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; spr->Style.ColormapNum = 0; } else { // diminished light - int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(foggy)); + int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(spr->foggy)); spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); } diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index b2829b884c..7d38eb2ec9 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -189,7 +189,7 @@ namespace swrenderer { lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(foggy)); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); break; } } @@ -420,7 +420,7 @@ namespace swrenderer rw_offset = 0; rw_pic = tex; - R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, mceilingclip, mfloorclip, MaskedSWall, maskedtexturecol, ds->yscale, wallshade, rw_offset, rw_light, rw_lightstep); + R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, mceilingclip, mfloorclip, MaskedSWall, maskedtexturecol, ds->yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy); } clearfog: @@ -547,7 +547,7 @@ namespace swrenderer } PrepLWall(lwall, curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2, WallT); - R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale, wallshade, rw_offset, rw_light, rw_lightstep); + R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy); R_FinishSetPatchStyle(); } @@ -746,7 +746,7 @@ namespace swrenderer { lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(foggy)); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); break; } } @@ -759,7 +759,7 @@ namespace swrenderer { lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(foggy)); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); break; } } @@ -920,7 +920,7 @@ namespace swrenderer { lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(foggy)); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); break; } } @@ -933,7 +933,7 @@ namespace swrenderer { lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(foggy)); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); break; } } diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index 7ab957cd48..eaa1283a07 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -32,6 +32,7 @@ namespace swrenderer uint8_t bFogBoundary; uint8_t bFakeBoundary; // for fake walls int shade; + bool foggy; // Pointers to lists for sprite clipping, all three adjusted so [x1] is first value. ptrdiff_t sprtopclip; // type short diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index e1f2da1c79..7011bb08b0 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -47,11 +47,11 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void RenderDecal::RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC) + void RenderDecal::RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy) { for (DBaseDecal *decal = sidedef->AttachedDecals; decal != NULL; decal = decal->WallNext) { - Render(sidedef, decal, draw_segment, wallshade, lightleft, lightstep, curline, wallC, 0); + Render(sidedef, decal, draw_segment, wallshade, lightleft, lightstep, curline, wallC, foggy, 0); } } @@ -59,7 +59,7 @@ namespace swrenderer // = 1: drawing masked textures (including sprites) // Currently, only pass = 0 is done or used - void RenderDecal::Render(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords WallC, int pass) + void RenderDecal::Render(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords WallC, bool foggy, int pass) { DVector2 decal_left, decal_right, decal_pos; int x1, x2; diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 4551e5c1ba..c16c2047ef 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -23,10 +23,10 @@ namespace swrenderer class RenderDecal { public: - static void RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC); + static void RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy); private: - static void Render(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, int pass); + static void Render(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, bool foggy, int pass); static void DrawColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); }; } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index ae5b349785..b20acbcf67 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -59,7 +59,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void RenderParticle::Project(particle_t *particle, const sector_t *sector, int shade, WaterFakeSide fakeside) + void RenderParticle::Project(particle_t *particle, const sector_t *sector, int shade, WaterFakeSide fakeside, bool foggy) { double tr_x, tr_y; double tx, ty; @@ -200,6 +200,7 @@ namespace swrenderer vis->FakeFlatStat = fakeside; vis->floorclip = 0; vis->Style.ColormapNum = 0; + vis->foggy = foggy; if (fixedlightlev >= 0) { diff --git a/src/swrenderer/things/r_particle.h b/src/swrenderer/things/r_particle.h index 331716cec5..d8aa2952a7 100644 --- a/src/swrenderer/things/r_particle.h +++ b/src/swrenderer/things/r_particle.h @@ -21,7 +21,7 @@ namespace swrenderer class RenderParticle { public: - static void Project(particle_t *, const sector_t *sector, int shade, WaterFakeSide fakeside); + static void Project(particle_t *, const sector_t *sector, int shade, WaterFakeSide fakeside, bool foggy); static void Render(vissprite_t *); private: diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 72a383b58a..17c1d4da7a 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -136,7 +136,7 @@ namespace swrenderer } // [RH] set foggy flag - foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); + bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); // get light level lightnum = ((floorlight + ceilinglight) >> 1) + R_ActualExtraLight(foggy); @@ -478,7 +478,7 @@ namespace swrenderer vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } - else if (!foggy && pspr->GetState()->GetFullbright()) + else if (!vis->foggy && pspr->GetState()->GetFullbright()) { // full bright vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap vis->Style.ColormapNum = 0; @@ -535,7 +535,7 @@ namespace swrenderer // [SP] If emulating GZDoom fullbright, disable acceleration if (r_fullbrightignoresectorcolor && fixedlightlev >= 0) mybasecolormap = &FullNormalLight; - if (r_fullbrightignoresectorcolor && !foggy && pspr->GetState()->GetFullbright()) + if (r_fullbrightignoresectorcolor && !vis->foggy && pspr->GetState()->GetFullbright()) mybasecolormap = &FullNormalLight; colormap_to_use = mybasecolormap; } diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index f689d6eb03..75033349cc 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -60,7 +60,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - void RenderSprite::Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade) + void RenderSprite::Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy) { // transform the origin point double tr_x = pos.X - ViewPos.X; @@ -209,6 +209,8 @@ namespace swrenderer vis->bIsVoxel = false; vis->bWallSprite = false; + vis->foggy = foggy; + // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what // the invert overlay flag says to do. @@ -261,7 +263,7 @@ namespace swrenderer vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } - else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) + else if (!vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; vis->Style.ColormapNum = 0; diff --git a/src/swrenderer/things/r_sprite.h b/src/swrenderer/things/r_sprite.h index 2711af7ecc..c345d85f23 100644 --- a/src/swrenderer/things/r_sprite.h +++ b/src/swrenderer/things/r_sprite.h @@ -20,7 +20,7 @@ namespace swrenderer class RenderSprite { public: - static void Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade); + static void Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy); static void Render(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip); }; } diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index f655b35cec..b7718991d1 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -86,6 +86,8 @@ namespace swrenderer visstyle_t Style; int CurrentPortalUniq; // [ZZ] to identify the portal that this thing is in. used for clipping. + bool foggy; + vissprite_t() {} }; } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 7770b46e4b..9519482889 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -46,7 +46,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - void RenderVoxel::Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade) + void RenderVoxel::Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy) { // transform the origin point double tr_x = pos.X - ViewPos.X; @@ -155,6 +155,8 @@ namespace swrenderer vis->voxel = voxel->Voxel; vis->bIsVoxel = true; vis->bWallSprite = false; + vis->foggy = foggy; + RenderTranslucentPass::DrewAVoxel = true; // The software renderer cannot invert the source without inverting the overlay @@ -209,7 +211,7 @@ namespace swrenderer vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } - else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) + else if (!vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; vis->Style.ColormapNum = 0; diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index 93ec214e25..56180d7585 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -55,7 +55,7 @@ namespace swrenderer class RenderVoxel { public: - static void Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade); + static void Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy); static void Render(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom); static void Deinit(); diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index b340897e04..4dd4823a3c 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -62,7 +62,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void RenderWallSprite::Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade) + void RenderWallSprite::Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade, bool foggy) { FWallCoords wallc; double x1, x2; @@ -136,6 +136,7 @@ namespace swrenderer r_SpriteVisibility / MAX(tz, MINZ), spriteshade); vis->Style.BaseColormap = basecolormap; vis->wallc = wallc; + vis->foggy = foggy; } void RenderWallSprite::Render(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip) @@ -174,7 +175,7 @@ namespace swrenderer rereadcolormap = false; } - int shade = LIGHT2SHADE(spr->sector->lightlevel + R_ActualExtraLight(foggy)); + int shade = LIGHT2SHADE(spr->sector->lightlevel + R_ActualExtraLight(spr->foggy)); double GlobVis = r_WallVisibility; float lightleft = float(GlobVis / spr->wallc.sz1); float lightstep = float((GlobVis / spr->wallc.sz2 - lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); @@ -183,7 +184,7 @@ namespace swrenderer R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); - else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) + else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index 6e6793045c..57a19a1229 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -20,7 +20,7 @@ namespace swrenderer class RenderWallSprite { public: - static void Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade); + static void Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade, bool foggy); static void Render(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip); private: From 9e940b4287a4ddae4f405784daf22f8117031f89 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 12 Jan 2017 21:29:19 +0100 Subject: [PATCH 700/912] Remove basecolormap global --- src/polyrenderer/scene/poly_plane.cpp | 2 +- src/swrenderer/drawers/r_draw.cpp | 14 +---- src/swrenderer/drawers/r_draw.h | 7 +-- src/swrenderer/drawers/r_draw_pal.cpp | 2 +- src/swrenderer/drawers/r_draw_pal.h | 6 +- src/swrenderer/drawers/r_draw_rgba.h | 2 +- src/swrenderer/line/r_fogboundary.cpp | 2 +- src/swrenderer/line/r_fogboundary.h | 2 +- src/swrenderer/line/r_line.cpp | 11 ++-- src/swrenderer/line/r_line.h | 4 +- src/swrenderer/line/r_walldraw.cpp | 63 ++++++++++----------- src/swrenderer/line/r_walldraw.h | 6 +- src/swrenderer/plane/r_flatplane.cpp | 3 +- src/swrenderer/plane/r_flatplane.h | 3 +- src/swrenderer/plane/r_skyplane.cpp | 4 +- src/swrenderer/plane/r_slopeplane.cpp | 6 +- src/swrenderer/plane/r_slopeplane.h | 3 +- src/swrenderer/plane/r_visibleplane.cpp | 6 +- src/swrenderer/plane/r_visibleplanelist.cpp | 2 +- src/swrenderer/plane/r_visibleplanelist.h | 2 +- src/swrenderer/scene/r_light.cpp | 1 - src/swrenderer/scene/r_light.h | 1 - src/swrenderer/scene/r_opaque_pass.cpp | 35 +++++++----- src/swrenderer/scene/r_opaque_pass.h | 4 +- src/swrenderer/segments/r_drawsegment.cpp | 29 +++++----- src/swrenderer/segments/r_drawsegment.h | 2 +- src/swrenderer/things/r_decal.cpp | 10 ++-- src/swrenderer/things/r_decal.h | 4 +- src/swrenderer/things/r_playersprite.cpp | 5 +- src/swrenderer/things/r_playersprite.h | 2 +- src/swrenderer/things/r_sprite.cpp | 8 +-- src/swrenderer/things/r_sprite.h | 2 +- src/swrenderer/things/r_voxel.cpp | 8 ++- src/swrenderer/things/r_voxel.h | 2 +- src/swrenderer/things/r_wallsprite.cpp | 11 ++-- src/swrenderer/things/r_wallsprite.h | 2 +- src/v_draw.cpp | 6 +- 37 files changed, 140 insertions(+), 142 deletions(-) diff --git a/src/polyrenderer/scene/poly_plane.cpp b/src/polyrenderer/scene/poly_plane.cpp index 9089881041..275310c31f 100644 --- a/src/polyrenderer/scene/poly_plane.cpp +++ b/src/polyrenderer/scene/poly_plane.cpp @@ -100,7 +100,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c if (swrenderer::fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) { lightlist_t *light = P_GetPlaneLight(sub->sector, &sub->sector->ceilingplane, false); - swrenderer::basecolormap = light->extra_colormap; + //basecolormap = light->extra_colormap; lightlevel = *light->p_lightlevel; } diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index b9a4a3ef08..f90b63af17 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -375,11 +375,9 @@ namespace swrenderer default: return 0; } } - - FDynamicColormap *basecolormapsave; } - bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color) + bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap) { using namespace drawerargs; @@ -428,7 +426,6 @@ namespace swrenderer } } } - basecolormapsave = basecolormap; // Check for special modes if (style.BlendOp == STYLEOP_Fuzz) @@ -488,14 +485,9 @@ namespace swrenderer return true; } - bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color) + bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap) { - return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); - } - - void R_FinishSetPatchStyle() - { - basecolormap = basecolormapsave; + return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap); } DrawerFunc R_GetTransMaskDrawer() diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index bf26c8636a..fd77d1c32a 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -162,7 +162,7 @@ namespace swrenderer virtual void DrawSpanAddClamp() = 0; virtual void DrawSpanMaskedAddClamp() = 0; virtual void FillSpan() = 0; - virtual void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) = 0; + virtual void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) = 0; virtual void DrawColoredSpan(int y, int x1, int x2) = 0; virtual void DrawFogBoundaryLine(int y, int x1, int x2) = 0; }; @@ -175,9 +175,8 @@ namespace swrenderer void R_InitShadeMaps(); void R_InitFuzzTable(int fuzzoff); - bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); - bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color); - void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade + bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap); + bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap); DrawerFunc R_GetTransMaskDrawer(); void R_UpdateFuzzPos(); diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 18dd88adb3..4e4783f3f1 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -2480,7 +2480,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) : y(y), x1(x1), x2(x2), plane_sz(plane_sz), plane_su(plane_su), plane_sv(plane_sv), plane_shade(plane_shade), planeshade(planeshade), planelightfloat(planelightfloat), pviewx(pviewx), pviewy(pviewy) { using namespace drawerargs; diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index 06cbe1ea64..a96660f849 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -162,7 +162,7 @@ namespace swrenderer class DrawTiltedSpanPalCommand : public DrawerCommand { public: - DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "DrawTiltedSpanPalCommand"; } @@ -276,9 +276,9 @@ namespace swrenderer void DrawSpanMaskedAddClamp() override { DrawerCommandQueue::QueueCommand(); } void FillSpan() override { DrawerCommandQueue::QueueCommand(); } - void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) override + void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override { - DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); } void DrawColoredSpan(int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(y, x1, x2); } diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index ae49578796..e1ed703505 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -380,7 +380,7 @@ namespace swrenderer void DrawSpanMaskedAddClamp() override { DrawerCommandQueue::QueueCommand(); } void FillSpan() override { DrawerCommandQueue::QueueCommand(); } - void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) override + void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override { DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } diff --git a/src/swrenderer/line/r_fogboundary.cpp b/src/swrenderer/line/r_fogboundary.cpp index 523ce4b08e..18cf94055c 100644 --- a/src/swrenderer/line/r_fogboundary.cpp +++ b/src/swrenderer/line/r_fogboundary.cpp @@ -45,7 +45,7 @@ namespace swrenderer { - void RenderFogBoundary::Render(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep) + void RenderFogBoundary::Render(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep, FDynamicColormap *basecolormap) { // This is essentially the same as R_MapVisPlane but with an extra step // to create new horizontal spans whenever the light changes enough that diff --git a/src/swrenderer/line/r_fogboundary.h b/src/swrenderer/line/r_fogboundary.h index de48738a09..ba7cd00440 100644 --- a/src/swrenderer/line/r_fogboundary.h +++ b/src/swrenderer/line/r_fogboundary.h @@ -18,7 +18,7 @@ namespace swrenderer class RenderFogBoundary { public: - static void Render(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep); + static void Render(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep, FDynamicColormap *basecolormap); private: static void RenderSection(int y, int y2, int x1); diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index c61caf2da7..dee5f9250b 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -55,7 +55,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void SWRenderLine::Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, visplane_t *linefloorplane, visplane_t *lineceilingplane, bool infog) + void SWRenderLine::Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, visplane_t *linefloorplane, visplane_t *lineceilingplane, bool infog, FDynamicColormap *colormap) { static sector_t tempsec; // killough 3/8/98: ceiling/water hack bool solid; @@ -67,6 +67,7 @@ namespace swrenderer floorplane = linefloorplane; ceilingplane = lineceilingplane; foggy = infog; + basecolormap = colormap; curline = line; @@ -591,7 +592,7 @@ namespace swrenderer // [ZZ] Only if not an active mirror if (!rw_markportal) { - RenderDecal::RenderDecals(curline->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, curline, WallC, foggy); + RenderDecal::RenderDecals(curline->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, curline, WallC, foggy, basecolormap); } if (rw_markportal) @@ -1079,7 +1080,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } fillshort(ceilingclip + x1, x2 - x1, viewheight); fillshort(floorclip + x1, x2 - x1, 0xffff); @@ -1115,7 +1116,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(ceilingclip + x1, wallupper + x1, (x2 - x1) * sizeof(short)); } @@ -1154,7 +1155,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(floorclip + x1, walllower + x1, (x2 - x1) * sizeof(short)); } diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 0be56dff58..c1ee23898d 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -20,6 +20,7 @@ struct subsector_t; struct sector_t; struct side_t; struct line_t; +struct FDynamicColormap; namespace swrenderer { @@ -48,7 +49,7 @@ namespace swrenderer class SWRenderLine { public: - void Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, visplane_t *floorplane, visplane_t *ceilingplane, bool foggy); + void Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, visplane_t *floorplane, visplane_t *ceilingplane, bool foggy, FDynamicColormap *basecolormap); private: bool RenderWallSegment(int x1, int x2); @@ -118,5 +119,6 @@ namespace swrenderer FTexture *midtexture; bool foggy; + FDynamicColormap *basecolormap; }; } diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index a33568472f..5a13689ac4 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -337,7 +337,7 @@ namespace swrenderer static void ProcessWallWorker( const FWallCoords &WallC, - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const BYTE *(*getcol)(FTexture *tex, int x), DrawerFunc drawcolumn) { if (rw_pic->UseType == FTexture::TEX_Null) @@ -400,41 +400,39 @@ namespace swrenderer NetUpdate(); } - static void ProcessNormalWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessNormalWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, getcol, &SWPixelFormatDrawers::DrawWallColumn); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, getcol, &SWPixelFormatDrawers::DrawWallColumn); } - static void ProcessMaskedWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessMaskedWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { if (!rw_pic->bMasked) // Textures that aren't masked can use the faster ProcessNormalWall. { - ProcessNormalWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, getcol); + ProcessNormalWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, getcol); } else { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); } } - static void ProcessTranslucentWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessTranslucentWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { DrawerFunc drawcol1 = R_GetTransMaskDrawer(); if (drawcol1 == nullptr) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. - ProcessMaskedWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, getcol); + ProcessMaskedWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, getcol); } else { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, getcol, drawcol1); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, getcol, drawcol1); } } - static void ProcessStripedWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy) + static void ProcessStripedWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap) { - FDynamicColormap *startcolormap = basecolormap; - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; short *up, *down; @@ -456,7 +454,7 @@ namespace swrenderer { down[j] = clamp(most3[j], up[j], dwal[j]); } - ProcessNormalWall(WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); + ProcessNormalWall(WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap); up = down; down = (down == most1) ? most2 : most1; } @@ -466,32 +464,31 @@ namespace swrenderer wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + R_ActualExtraLight(foggy)); } - ProcessNormalWall(WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); - basecolormap = startcolormap; + ProcessNormalWall(WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap); } - static void ProcessWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy) + static void ProcessWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap) { if (mask) { if (colfunc == basecolfunc) { - ProcessMaskedWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); + ProcessMaskedWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap); } else { - ProcessTranslucentWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); + ProcessTranslucentWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap); } } else { if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) { - ProcessNormalWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep); + ProcessNormalWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap); } else { - ProcessStripedWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, foggy); + ProcessStripedWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, foggy, basecolormap); } } } @@ -507,7 +504,7 @@ namespace swrenderer // //============================================================================= - static void ProcessWallNP2(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy) + static void ProcessWallNP2(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap) { short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; short *up, *down; @@ -534,14 +531,14 @@ namespace swrenderer { down[j] = clamp(most3[j], up[j], dwal[j]); } - ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy); + ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap); up = down; down = (down == most1) ? most2 : most1; } partition -= scaledtexheight; dc_texturemid -= texheight; } - ProcessWall(frontsector, curline, WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy); + ProcessWall(frontsector, curline, WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap); } else { // upside down: draw strips from bottom to top @@ -558,18 +555,18 @@ namespace swrenderer { up[j] = clamp(most3[j], uwal[j], down[j]); } - ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy); + ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap); down = up; up = (up == most1) ? most2 : most1; } partition -= scaledtexheight; dc_texturemid -= texheight; } - ProcessWall(frontsector, curline, WallC, x1, x2, uwal, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy); + ProcessWall(frontsector, curline, WallC, x1, x2, uwal, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap); } } - void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy) + void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap) { rw_pic = pic; if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) @@ -589,34 +586,34 @@ namespace swrenderer { bot = MAX(bot, clip3d->sclipBottom); } - ProcessWallNP2(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, xoffset, light, lightstep, true, foggy); + ProcessWallNP2(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, xoffset, light, lightstep, true, foggy, basecolormap); } else { - ProcessWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, true, foggy); + ProcessWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, true, foggy, basecolormap); } } - void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy) + void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap) { rw_pic = pic; dc_light_list = light_list; if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) { - ProcessWallNP2(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, wallshade, xoffset, light, lightstep, false, foggy); + ProcessWallNP2(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, wallshade, xoffset, light, lightstep, false, foggy, basecolormap); } else { - ProcessWall(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, wallshade, xoffset, light, lightstep, false, foggy); + ProcessWall(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, wallshade, xoffset, light, lightstep, false, foggy, basecolormap); } dc_light_list = nullptr; } - void R_DrawSkySegment(FTexture *pic, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, const uint8_t *(*getcol)(FTexture *tex, int x)) + void R_DrawSkySegment(FTexture *pic, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const uint8_t *(*getcol)(FTexture *tex, int x)) { rw_pic = pic; FWallCoords wallC; // Not used. To do: don't use r_walldraw to draw the sky!! - ProcessNormalWall(wallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, getcol); + ProcessNormalWall(wallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, getcol); } } diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index 797795099a..d2d3ffbc1a 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -37,7 +37,7 @@ namespace swrenderer uint32_t height; }; - void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy); - void R_DrawSkySegment(FTexture *rw_pic, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, const uint8_t *(*getcol)(FTexture *tex, int col)); - void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy); + void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap); + void R_DrawSkySegment(FTexture *rw_pic, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const uint8_t *(*getcol)(FTexture *tex, int col)); + void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap); } diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 9a9494833a..4c020468e1 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -44,7 +44,7 @@ namespace swrenderer { - void RenderFlatPlane::Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) + void RenderFlatPlane::Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap) { using namespace drawerargs; @@ -105,6 +105,7 @@ namespace swrenderer planeheight = fabs(pl->height.Zat0() - ViewPos.Z); + basecolormap = colormap; GlobVis = r_FloorVisibility / planeheight; ds_light = 0; if (fixedlightlev >= 0) diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index bd830f63b2..c4077a908c 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -22,7 +22,7 @@ namespace swrenderer class RenderFlatPlane : PlaneRenderer { public: - void Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked); + void Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap); static void SetupSlope(); @@ -34,6 +34,7 @@ namespace swrenderer bool plane_shade; int planeshade; double GlobVis; + FDynamicColormap *basecolormap; fixed_t pviewx, pviewy; fixed_t xscale, yscale; double xstepscale, ystepscale; diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index ab8dedb973..14b494a812 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -446,7 +446,7 @@ namespace swrenderer lastskycol_bgra[x] = 0xffffffff; } R_DrawSkySegment(frontskytex, pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, - frontyScale, 0, 0, 0.0f, 0.0f, backskytex == NULL ? RenderSkyPlane::GetOneSkyColumn : RenderSkyPlane::GetTwoSkyColumns); + frontyScale, 0, 0, 0.0f, 0.0f, nullptr, backskytex == nullptr ? RenderSkyPlane::GetOneSkyColumn : RenderSkyPlane::GetTwoSkyColumns); } else { // The texture does not tile nicely @@ -483,7 +483,7 @@ namespace swrenderer lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - R_DrawSkySegment(frontskytex, pl->left, pl->right, top, bot, swall, lwall, frontskytex->Scale.Y, 0, 0, 0.0f, 0.0f, backskytex == NULL ? RenderSkyPlane::GetOneSkyColumn : RenderSkyPlane::GetTwoSkyColumns); + R_DrawSkySegment(frontskytex, pl->left, pl->right, top, bot, swall, lwall, frontskytex->Scale.Y, 0, 0, 0.0f, 0.0f, nullptr, backskytex == nullptr ? RenderSkyPlane::GetOneSkyColumn : RenderSkyPlane::GetTwoSkyColumns); yl = yh; yh += drawheight; dc_texturemid = iscale * (centery - yl - 1); diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index b2d42e1980..9857dc11bb 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -48,7 +48,7 @@ namespace swrenderer { - void RenderSlopePlane::Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) + void RenderSlopePlane::Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap) { using namespace drawerargs; @@ -148,6 +148,8 @@ namespace swrenderer if (pl->height.fC() > 0) planelightfloat = -planelightfloat; + basecolormap = colormap; + if (fixedlightlev >= 0) { R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); @@ -180,6 +182,6 @@ namespace swrenderer void RenderSlopePlane::RenderLine(int y, int x1, int x2) { - R_Drawers()->DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + R_Drawers()->DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); } } diff --git a/src/swrenderer/plane/r_slopeplane.h b/src/swrenderer/plane/r_slopeplane.h index 6fad957098..7dc6e7838f 100644 --- a/src/swrenderer/plane/r_slopeplane.h +++ b/src/swrenderer/plane/r_slopeplane.h @@ -20,7 +20,7 @@ namespace swrenderer class RenderSlopePlane : PlaneRenderer { public: - void Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked); + void Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap); private: void RenderLine(int y, int x1, int x2) override; @@ -31,5 +31,6 @@ namespace swrenderer int planeshade; fixed_t pviewx, pviewy; fixed_t xscale, yscale; + FDynamicColormap *basecolormap; }; } diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index 6731262f7e..fbb1e37a00 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -108,17 +108,15 @@ namespace swrenderer double xscale = xform.xScale * tex->Scale.X; double yscale = xform.yScale * tex->Scale.Y; - basecolormap = colormap; - if (!height.isSlope() && !tilt) { RenderFlatPlane renderer; - renderer.Render(this, xscale, yscale, alpha, additive, masked); + renderer.Render(this, xscale, yscale, alpha, additive, masked, colormap); } else { RenderSlopePlane renderer; - renderer.Render(this, xscale, yscale, alpha, additive, masked); + renderer.Render(this, xscale, yscale, alpha, additive, masked, colormap); } } NetUpdate(); diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp index 760b855609..7cdfdc4254 100644 --- a/src/swrenderer/plane/r_visibleplanelist.cpp +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -165,7 +165,7 @@ namespace swrenderer } } - visplane_t *VisiblePlaneList::FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal) + visplane_t *VisiblePlaneList::FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal, FDynamicColormap *basecolormap) { secplane_t plane; visplane_t *check; diff --git a/src/swrenderer/plane/r_visibleplanelist.h b/src/swrenderer/plane/r_visibleplanelist.h index 3db52d5bbc..a38fe2ffd9 100644 --- a/src/swrenderer/plane/r_visibleplanelist.h +++ b/src/swrenderer/plane/r_visibleplanelist.h @@ -31,7 +31,7 @@ namespace swrenderer void Deinit(); void Clear(bool fullclear); - visplane_t *FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal); + visplane_t *FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal, FDynamicColormap *basecolormap); visplane_t *GetRange(visplane_t *pl, int start, int stop); int Render(); diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index d6c03b8317..7ef86ca257 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -45,7 +45,6 @@ namespace swrenderer double r_SpriteVisibility; double r_ParticleVisibility; - FDynamicColormap *basecolormap; // [RH] colormap currently drawing with int fixedlightlev; FSWColormap *fixedcolormap; FSpecialColormap *realfixedcolormap; diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index 245eaafc91..50b4fae842 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -60,7 +60,6 @@ namespace swrenderer extern int fixedlightlev; extern FSWColormap *fixedcolormap; extern FSpecialColormap *realfixedcolormap; - extern FDynamicColormap *basecolormap; // [RH] Colormap for sector currently being drawn inline int R_ActualExtraLight(bool fog) { return fog ? 0 : extralight << 4; } diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index b49782517d..7db41d6f80 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -398,7 +398,7 @@ namespace swrenderer } // kg3D - add fake segs, never rendered - void RenderOpaquePass::FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane, bool foggy) + void RenderOpaquePass::FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane, bool foggy, FDynamicColormap *basecolormap) { int count; seg_t* line; @@ -410,7 +410,7 @@ namespace swrenderer { if ((line->sidedef) && !(line->sidedef->Flags & WALLF_POLYOBJ)) { - renderline.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane, foggy); + renderline.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane, foggy, basecolormap); } line++; } @@ -479,6 +479,7 @@ namespace swrenderer bool foggy = level.fadeto || frontsector->ColorMap->Fade || (level.flags & LEVEL_HASFADETABLE); // kg3D - fake lights + FDynamicColormap *basecolormap; if (fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) { light = P_GetPlaneLight(frontsector, &frontsector->ceilingplane, false); @@ -510,7 +511,8 @@ namespace swrenderer !!(frontsector->GetFlags(sector_t::ceiling) & PLANEF_ADDITIVE), frontsector->planes[sector_t::ceiling].xform, frontsector->sky, - portal + portal, + basecolormap ) : nullptr; if (ceilingplane) @@ -550,7 +552,8 @@ namespace swrenderer !!(frontsector->GetFlags(sector_t::floor) & PLANEF_ADDITIVE), frontsector->planes[sector_t::floor].xform, frontsector->sky, - portal + portal, + basecolormap ) : nullptr; if (floorplane) @@ -615,12 +618,13 @@ namespace swrenderer !!(clip3d->fakeFloor->flags & FF_ADDITIVETRANS), frontsector->planes[position].xform, frontsector->sky, - nullptr); + nullptr, + basecolormap); if (floorplane) floorplane->AddLights(frontsector->lighthead); - FakeDrawLoop(sub, floorplane, ceilingplane, foggy); + FakeDrawLoop(sub, floorplane, ceilingplane, foggy, basecolormap); clip3d->fake3D = 0; frontsector = sub->sector; } @@ -680,12 +684,13 @@ namespace swrenderer !!(clip3d->fakeFloor->flags & FF_ADDITIVETRANS), frontsector->planes[position].xform, frontsector->sky, - nullptr); + nullptr, + basecolormap); if (ceilingplane) ceilingplane->AddLights(frontsector->lighthead); - FakeDrawLoop(sub, floorplane, ceilingplane, foggy); + FakeDrawLoop(sub, floorplane, ceilingplane, foggy, basecolormap); clip3d->fake3D = 0; frontsector = sub->sector; } @@ -704,7 +709,7 @@ namespace swrenderer // lightlevels on floor & ceiling lightlevels in the surrounding area. // [RH] Handle sprite lighting like Duke 3D: If the ceiling is a sky, sprites are lit by // it, otherwise they are lit by the floor. - AddSprites(sub->sector, frontsector->GetTexture(sector_t::ceiling) == skyflatnum ? ceilinglightlevel : floorlightlevel, FakeSide, foggy); + AddSprites(sub->sector, frontsector->GetTexture(sector_t::ceiling) == skyflatnum ? ceilinglightlevel : floorlightlevel, FakeSide, foggy, basecolormap); // [RH] Add particles if ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors) @@ -746,14 +751,14 @@ namespace swrenderer clip3d->fakeFloor->validcount = validcount; clip3d->NewClip(); } - renderline.Render(line, InSubsector, frontsector, &tempsec, floorplane, ceilingplane, foggy); // fake + renderline.Render(line, InSubsector, frontsector, &tempsec, floorplane, ceilingplane, foggy, basecolormap); // fake } clip3d->fakeFloor = nullptr; clip3d->fake3D = 0; floorplane = backupfp; ceilingplane = backupcp; } - renderline.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane, foggy); // now real + renderline.Render(line, InSubsector, frontsector, nullptr, floorplane, ceilingplane, foggy, basecolormap); // now real } line++; } @@ -809,7 +814,7 @@ namespace swrenderer fillshort(ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); } - void RenderOpaquePass::AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside, bool foggy) + void RenderOpaquePass::AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside, bool foggy, FDynamicColormap *basecolormap) { F3DFloor *fakeceiling = nullptr; F3DFloor *fakefloor = nullptr; @@ -869,15 +874,15 @@ namespace swrenderer { if ((sprite.renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) { - RenderWallSprite::Project(thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, spriteshade, foggy); + RenderWallSprite::Project(thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, spriteshade, foggy, basecolormap); } else if (sprite.voxel) { - RenderVoxel::Project(thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade, foggy); + RenderVoxel::Project(thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade, foggy, basecolormap); } else { - RenderSprite::Project(thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade, foggy); + RenderSprite::Project(thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade, foggy, basecolormap); } } } diff --git a/src/swrenderer/scene/r_opaque_pass.h b/src/swrenderer/scene/r_opaque_pass.h index 4ea6a64658..e0a2440929 100644 --- a/src/swrenderer/scene/r_opaque_pass.h +++ b/src/swrenderer/scene/r_opaque_pass.h @@ -68,9 +68,9 @@ namespace swrenderer bool CheckBBox(float *bspcoord); void AddPolyobjs(subsector_t *sub); - void FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane, bool foggy); + void FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane, bool foggy, FDynamicColormap *basecolormap); - void AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside, bool foggy); + void AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside, bool foggy, FDynamicColormap *basecolormap); static bool IsPotentiallyVisible(AActor *thing); static bool GetThingSprite(AActor *thing, ThingSprite &sprite); diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 7d38eb2ec9..5b7503c38f 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -147,8 +147,9 @@ namespace swrenderer curline = ds->curline; + FDynamicColormap *patchstylecolormap = nullptr; bool visible = R_SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], - (float)MIN(curline->linedef->alpha, 1.), 0, 0); + (float)MIN(curline->linedef->alpha, 1.), 0, 0, patchstylecolormap); if (!visible && !ds->bFogBoundary && !ds->bFakeBoundary) { @@ -169,7 +170,7 @@ namespace swrenderer // killough 4/13/98: get correct lightlevel for 2s normal textures sec = RenderOpaquePass::Instance()->FakeFlat(frontsector, &tempsec, nullptr, nullptr, nullptr, 0, 0, 0, 0); - basecolormap = sec->ColorMap; // [RH] Set basecolormap + FDynamicColormap *basecolormap = sec->ColorMap; // [RH] Set basecolormap int wallshade = ds->shade; rw_lightstep = ds->lightstep; @@ -202,7 +203,7 @@ namespace swrenderer // [RH] Draw fog partition if (ds->bFogBoundary) { - RenderFogBoundary::Render(x1, x2, mceilingclip, mfloorclip, wallshade, rw_light, rw_lightstep); + RenderFogBoundary::Render(x1, x2, mceilingclip, mfloorclip, wallshade, rw_light, rw_lightstep, basecolormap); if (ds->maskedtexturecol == -1) { goto clearfog; @@ -420,11 +421,10 @@ namespace swrenderer rw_offset = 0; rw_pic = tex; - R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, mceilingclip, mfloorclip, MaskedSWall, maskedtexturecol, ds->yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy); + R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, mceilingclip, mfloorclip, MaskedSWall, maskedtexturecol, ds->yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy, basecolormap); } clearfog: - R_FinishSetPatchStyle(); if (ds->bFakeBoundary & 3) { R_RenderFakeWallRange(ds, x1, x2, wallshade); @@ -448,7 +448,7 @@ namespace swrenderer } // kg3D - render one fake wall - void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover, int wallshade) + void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap) { int i; double xscale; @@ -456,12 +456,10 @@ namespace swrenderer fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); bool visible = R_SetPatchStyle(LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], - Alpha, 0, 0); + Alpha, 0, 0, basecolormap); - if (!visible) { - R_FinishSetPatchStyle(); + if (!visible) return; - } rw_lightstep = ds->lightstep; rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; @@ -547,8 +545,7 @@ namespace swrenderer } PrepLWall(lwall, curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2, WallT); - R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy); - R_FinishSetPatchStyle(); + R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy, basecolormap); } // kg3D - walls of fake floors @@ -734,7 +731,7 @@ namespace swrenderer } } // correct colors now - basecolormap = frontsector->ColorMap; + FDynamicColormap *basecolormap = frontsector->ColorMap; wallshade = ds->shade; if (fixedlightlev < 0) { @@ -767,7 +764,7 @@ namespace swrenderer } if (rw_pic != DONT_DRAW) { - R_RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade); + R_RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade, basecolormap); } else rw_pic = nullptr; break; @@ -908,7 +905,7 @@ namespace swrenderer } } // correct colors now - basecolormap = frontsector->ColorMap; + FDynamicColormap *basecolormap = frontsector->ColorMap; wallshade = ds->shade; if (fixedlightlev < 0) { @@ -942,7 +939,7 @@ namespace swrenderer if (rw_pic != DONT_DRAW) { - R_RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade); + R_RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade, basecolormap); } else { diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index eaa1283a07..5533ec4300 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -60,6 +60,6 @@ namespace swrenderer drawseg_t *R_AddDrawSegment(); void ClipMidtex(int x1, int x2); void R_RenderMaskedSegRange(drawseg_t *ds, int x1, int x2); - void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover, int wallshade); + void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap); void R_RenderFakeWallRange(drawseg_t *ds, int x1, int x2, int wallshade); } diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 7011bb08b0..1f0c02d0ac 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -47,11 +47,11 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void RenderDecal::RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy) + void RenderDecal::RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap) { for (DBaseDecal *decal = sidedef->AttachedDecals; decal != NULL; decal = decal->WallNext) { - Render(sidedef, decal, draw_segment, wallshade, lightleft, lightstep, curline, wallC, foggy, 0); + Render(sidedef, decal, draw_segment, wallshade, lightleft, lightstep, curline, wallC, foggy, basecolormap, 0); } } @@ -59,7 +59,7 @@ namespace swrenderer // = 1: drawing masked textures (including sprites) // Currently, only pass = 0 is done or used - void RenderDecal::Render(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords WallC, bool foggy, int pass) + void RenderDecal::Render(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords WallC, bool foggy, FDynamicColormap *basecolormap, int pass) { DVector2 decal_left, decal_right, decal_pos; int x1, x2; @@ -277,7 +277,7 @@ namespace swrenderer { int x = x1; - bool visible = R_SetPatchStyle(decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); + bool visible = R_SetPatchStyle(decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor, basecolormap); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -304,12 +304,10 @@ namespace swrenderer // needrepeat will be 0, and the while will fail. mceilingclip = RenderOpaquePass::Instance()->floorclip; mfloorclip = wallbottom; - R_FinishSetPatchStyle(); } while (needrepeat--); colfunc = basecolfunc; - R_FinishSetPatchStyle(); done: WallC = savecoord; } diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index c16c2047ef..2bee90036d 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -23,10 +23,10 @@ namespace swrenderer class RenderDecal { public: - static void RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy); + static void RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap); private: - static void Render(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, bool foggy, int pass); + static void Render(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, bool foggy, FDynamicColormap *basecolormap, int pass); static void DrawColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); }; } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 17c1d4da7a..b94ce19417 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -99,6 +99,7 @@ namespace swrenderer (r_deathcamera && camera->health <= 0)) return; + FDynamicColormap *basecolormap; if (fixedlightlev < 0 && viewsector->e && viewsector->e->XFloor.lightlist.Size()) { for (i = viewsector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) @@ -183,7 +184,7 @@ namespace swrenderer if ((psp->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && psp->GetCaller() != nullptr) { - Render(psp, camera, bobx, boby, wx, wy, r_TicFracF, spriteshade); + Render(psp, camera, bobx, boby, wx, wy, r_TicFracF, spriteshade, basecolormap); } psp = psp->GetNext(); @@ -193,7 +194,7 @@ namespace swrenderer } } - void RenderPlayerSprite::Render(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade) + void RenderPlayerSprite::Render(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade, FDynamicColormap *basecolormap) { double tx; int x1; diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index 6030c2ad33..b1eebe4310 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -28,7 +28,7 @@ namespace swrenderer static void RenderRemainingPlayerSprites(); private: - static void Render(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade); + static void Render(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade, FDynamicColormap *basecolormap); enum { BASEXCENTER = 160 }; enum { BASEYCENTER = 100 }; diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 75033349cc..57e6d46d1a 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -60,7 +60,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - void RenderSprite::Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy) + void RenderSprite::Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap) { // transform the origin point double tr_x = pos.X - ViewPos.X; @@ -295,7 +295,9 @@ namespace swrenderer fixed_t centeryfrac = FLOAT2FIXED(CenterY); R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); - bool visible = R_SetPatchStyle(vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); + FDynamicColormap *basecolormap = static_cast(vis->Style.BaseColormap); + + bool visible = R_SetPatchStyle(vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor, basecolormap); if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Shaded]) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but @@ -343,8 +345,6 @@ namespace swrenderer } } - R_FinishSetPatchStyle(); - NetUpdate(); } } diff --git a/src/swrenderer/things/r_sprite.h b/src/swrenderer/things/r_sprite.h index c345d85f23..7c5a68a078 100644 --- a/src/swrenderer/things/r_sprite.h +++ b/src/swrenderer/things/r_sprite.h @@ -20,7 +20,7 @@ namespace swrenderer class RenderSprite { public: - static void Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy); + static void Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap); static void Render(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip); }; } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 9519482889..c7cdee6607 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -46,7 +46,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - void RenderVoxel::Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy) + void RenderVoxel::Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap) { // transform the origin point double tr_x = pos.X - ViewPos.X; @@ -206,6 +206,7 @@ namespace swrenderer { mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); } + if (fixedlightlev >= 0) { vis->Style.BaseColormap = mybasecolormap; @@ -226,8 +227,11 @@ namespace swrenderer void RenderVoxel::Render(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom) { + FDynamicColormap *basecolormap = static_cast(sprite->Style.BaseColormap); + R_SetColorMapLight(sprite->Style.BaseColormap, 0, sprite->Style.ColormapNum << FRACBITS); - bool visible = R_SetPatchStyle(sprite->Style.RenderStyle, sprite->Style.Alpha, sprite->Translation, sprite->FillColor); + + bool visible = R_SetPatchStyle(sprite->Style.RenderStyle, sprite->Style.Alpha, sprite->Translation, sprite->FillColor, basecolormap); if (!visible) return; diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index 56180d7585..3788d0b192 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -55,7 +55,7 @@ namespace swrenderer class RenderVoxel { public: - static void Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy); + static void Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap); static void Render(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom); static void Deinit(); diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 4dd4823a3c..06f1b0bed0 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -62,7 +62,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void RenderWallSprite::Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade, bool foggy) + void RenderWallSprite::Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade, bool foggy, FDynamicColormap *basecolormap) { FWallCoords wallc; double x1, x2; @@ -165,7 +165,7 @@ namespace swrenderer } // Prepare lighting bool calclighting = false; - FDynamicColormap *usecolormap = basecolormap; + FSWColormap *usecolormap = spr->Style.BaseColormap; bool rereadcolormap = true; // Decals that are added to the scene must fade to black. @@ -206,12 +206,14 @@ namespace swrenderer int x = x1; - bool visible = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); + FDynamicColormap *basecolormap = static_cast(spr->Style.BaseColormap); + + bool visible = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor, basecolormap); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) { - usecolormap = basecolormap; + usecolormap = spr->Style.BaseColormap; } if (!visible) @@ -232,7 +234,6 @@ namespace swrenderer x++; } } - R_FinishSetPatchStyle(); } void RenderWallSprite::DrawColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index 57a19a1229..d478bf83f7 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -20,7 +20,7 @@ namespace swrenderer class RenderWallSprite { public: - static void Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade, bool foggy); + static void Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade, bool foggy, FDynamicColormap *basecolormap); static void Render(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip); private: diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 689546f8c8..5a2a2485af 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -189,10 +189,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) fixedcolormap = dc_fcolormap; bool visible; + FDynamicColormap *basecolormap = nullptr; if (r_swtruecolor) - visible = R_SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor); + visible = R_SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); else - visible = R_SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor); + visible = R_SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor, basecolormap); BYTE *destorgsave = dc_destorg; int destheightsave = dc_destheight; @@ -287,7 +288,6 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) CenterY = centeryback; } - R_FinishSetPatchStyle (); dc_destorg = destorgsave; dc_destheight = destheightsave; From bd8d2f501f958ca2889ab84908b0070927ebf6b9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 12 Jan 2017 22:11:25 +0100 Subject: [PATCH 701/912] Remove dc_light_list --- src/swrenderer/drawers/r_draw.cpp | 1 - src/swrenderer/drawers/r_draw.h | 1 - src/swrenderer/line/r_walldraw.cpp | 63 +++++++++++++++--------------- 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index f90b63af17..8a2df4bfd3 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -57,7 +57,6 @@ CVAR(Bool, r_dynlights, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); namespace swrenderer { double dc_texturemid; - FLightNode *dc_light_list; int ylookup[MAXHEIGHT]; uint8_t shadetables[NUMCOLORMAPS * 16 * 256]; diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index fd77d1c32a..6dff625d30 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -36,7 +36,6 @@ namespace swrenderer }; extern double dc_texturemid; - extern FLightNode *dc_light_list; namespace drawerargs { diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 5a13689ac4..4503392c00 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -197,9 +197,9 @@ namespace swrenderer } // Draw a column with support for non-power-of-two ranges - static void Draw1Column(const FWallCoords &WallC, int x, int y1, int y2, WallSampler &sampler, DrawerFunc draw1column) + static void Draw1Column(const FWallCoords &WallC, int x, int y1, int y2, WallSampler &sampler, FLightNode *light_list, DrawerFunc draw1column) { - if (r_dynlights && dc_light_list) + if (r_dynlights && light_list) { // Find column position in view space float w1 = 1.0f / WallC.sz1; @@ -218,7 +218,7 @@ namespace swrenderer // Setup lights for column dc_num_lights = 0; dc_lights = lightbuffer + nextlightindex; - FLightNode *cur_node = dc_light_list; + FLightNode *cur_node = light_list; while (cur_node && nextlightindex < 64 * 1024) { if (!(cur_node->lightsource->flags2&MF2_DORMANT)) @@ -338,6 +338,7 @@ namespace swrenderer static void ProcessWallWorker( const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, + FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x), DrawerFunc drawcolumn) { if (rw_pic->UseType == FTexture::TEX_Null) @@ -394,44 +395,44 @@ namespace swrenderer if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - Draw1Column(WallC, x, y1, y2, sampler, drawcolumn); + Draw1Column(WallC, x, y1, y2, sampler, light_list, drawcolumn); } NetUpdate(); } - static void ProcessNormalWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessNormalWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, getcol, &SWPixelFormatDrawers::DrawWallColumn); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol, &SWPixelFormatDrawers::DrawWallColumn); } - static void ProcessMaskedWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessMaskedWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { if (!rw_pic->bMasked) // Textures that aren't masked can use the faster ProcessNormalWall. { - ProcessNormalWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, getcol); + ProcessNormalWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol); } else { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); } } - static void ProcessTranslucentWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessTranslucentWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { DrawerFunc drawcol1 = R_GetTransMaskDrawer(); if (drawcol1 == nullptr) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. - ProcessMaskedWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, getcol); + ProcessMaskedWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol); } else { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, getcol, drawcol1); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol, drawcol1); } } - static void ProcessStripedWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap) + static void ProcessStripedWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) { short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; short *up, *down; @@ -454,7 +455,7 @@ namespace swrenderer { down[j] = clamp(most3[j], up[j], dwal[j]); } - ProcessNormalWall(WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap); + ProcessNormalWall(WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); up = down; down = (down == most1) ? most2 : most1; } @@ -464,31 +465,31 @@ namespace swrenderer wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + R_ActualExtraLight(foggy)); } - ProcessNormalWall(WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap); + ProcessNormalWall(WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); } - static void ProcessWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap) + static void ProcessWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) { if (mask) { if (colfunc == basecolfunc) { - ProcessMaskedWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap); + ProcessMaskedWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); } else { - ProcessTranslucentWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap); + ProcessTranslucentWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); } } else { if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) { - ProcessNormalWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap); + ProcessNormalWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); } else { - ProcessStripedWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, foggy, basecolormap); + ProcessStripedWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, foggy, basecolormap, light_list); } } } @@ -504,7 +505,7 @@ namespace swrenderer // //============================================================================= - static void ProcessWallNP2(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap) + static void ProcessWallNP2(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) { short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; short *up, *down; @@ -531,14 +532,14 @@ namespace swrenderer { down[j] = clamp(most3[j], up[j], dwal[j]); } - ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap); + ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); up = down; down = (down == most1) ? most2 : most1; } partition -= scaledtexheight; dc_texturemid -= texheight; } - ProcessWall(frontsector, curline, WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap); + ProcessWall(frontsector, curline, WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); } else { // upside down: draw strips from bottom to top @@ -555,14 +556,14 @@ namespace swrenderer { up[j] = clamp(most3[j], uwal[j], down[j]); } - ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap); + ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); down = up; up = (up == most1) ? most2 : most1; } partition -= scaledtexheight; dc_texturemid -= texheight; } - ProcessWall(frontsector, curline, WallC, x1, x2, uwal, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap); + ProcessWall(frontsector, curline, WallC, x1, x2, uwal, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); } } @@ -586,11 +587,11 @@ namespace swrenderer { bot = MAX(bot, clip3d->sclipBottom); } - ProcessWallNP2(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, xoffset, light, lightstep, true, foggy, basecolormap); + ProcessWallNP2(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, xoffset, light, lightstep, true, foggy, basecolormap, nullptr); } else { - ProcessWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, true, foggy, basecolormap); + ProcessWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, true, foggy, basecolormap, nullptr); } } @@ -598,22 +599,20 @@ namespace swrenderer void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap) { rw_pic = pic; - dc_light_list = light_list; if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) { - ProcessWallNP2(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, wallshade, xoffset, light, lightstep, false, foggy, basecolormap); + ProcessWallNP2(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, wallshade, xoffset, light, lightstep, false, foggy, basecolormap, light_list); } else { - ProcessWall(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, wallshade, xoffset, light, lightstep, false, foggy, basecolormap); + ProcessWall(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, wallshade, xoffset, light, lightstep, false, foggy, basecolormap, light_list); } - dc_light_list = nullptr; } void R_DrawSkySegment(FTexture *pic, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const uint8_t *(*getcol)(FTexture *tex, int x)) { rw_pic = pic; FWallCoords wallC; // Not used. To do: don't use r_walldraw to draw the sky!! - ProcessNormalWall(wallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, getcol); + ProcessNormalWall(wallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, nullptr, getcol); } } From 9723078121cde7c68740b34a2ba7c38ce8c72bd5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 12 Jan 2017 22:52:17 +0100 Subject: [PATCH 702/912] Removed dc_texturemid --- src/swrenderer/drawers/r_draw.cpp | 2 - src/swrenderer/drawers/r_draw.h | 2 - src/swrenderer/line/r_line.cpp | 9 +-- src/swrenderer/line/r_walldraw.cpp | 82 +++++++++++------------ src/swrenderer/line/r_walldraw.h | 8 +-- src/swrenderer/plane/r_skyplane.cpp | 10 +-- src/swrenderer/segments/r_drawsegment.cpp | 36 +++++----- src/swrenderer/things/r_decal.cpp | 12 ++-- src/swrenderer/things/r_decal.h | 2 +- src/swrenderer/things/r_sprite.cpp | 8 +-- src/swrenderer/things/r_wallsprite.cpp | 12 ++-- src/swrenderer/things/r_wallsprite.h | 2 +- 12 files changed, 90 insertions(+), 95 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 8a2df4bfd3..994bb5aa0b 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -56,8 +56,6 @@ CVAR(Bool, r_dynlights, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); namespace swrenderer { - double dc_texturemid; - int ylookup[MAXHEIGHT]; uint8_t shadetables[NUMCOLORMAPS * 16 * 256]; FDynamicColormap ShadeFakeColormap[16]; diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 6dff625d30..8d586afdd1 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -35,8 +35,6 @@ namespace swrenderer bool simple_shade; }; - extern double dc_texturemid; - namespace drawerargs { extern int dc_pitch; diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index dee5f9250b..416fa7efb2 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -1059,7 +1059,6 @@ namespace swrenderer { // one sided line if (midtexture->UseType != FTexture::TEX_Null && viewactive) { - dc_texturemid = rw_midtexturemid; rw_pic = midtexture; xscale = rw_pic->Scale.X * rw_midtexturescalex; yscale = rw_pic->Scale.Y * rw_midtexturescaley; @@ -1080,7 +1079,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallbottom, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallbottom, rw_midtexturemid, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } fillshort(ceilingclip + x1, x2 - x1, viewheight); fillshort(floorclip + x1, x2 - x1, 0xffff); @@ -1095,7 +1094,6 @@ namespace swrenderer } if (viewactive) { - dc_texturemid = rw_toptexturemid; rw_pic = toptexture; xscale = rw_pic->Scale.X * rw_toptexturescalex; yscale = rw_pic->Scale.Y * rw_toptexturescaley; @@ -1116,7 +1114,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallupper, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallupper, rw_toptexturemid, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(ceilingclip + x1, wallupper + x1, (x2 - x1) * sizeof(short)); } @@ -1134,7 +1132,6 @@ namespace swrenderer } if (viewactive) { - dc_texturemid = rw_bottomtexturemid; rw_pic = bottomtexture; xscale = rw_pic->Scale.X * rw_bottomtexturescalex; yscale = rw_pic->Scale.Y * rw_bottomtexturescaley; @@ -1155,7 +1152,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walllower, wallbottom, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walllower, wallbottom, rw_bottomtexturemid, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(floorclip + x1, walllower + x1, (x2 - x1) * sizeof(short)); } diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 4503392c00..391d54ce3b 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -76,7 +76,7 @@ namespace swrenderer return tex->GetColumn(col, nullptr); } - WallSampler::WallSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) + WallSampler::WallSampler(int y1, double texturemid, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) { xoffset += FLOAT2FIXED(xmagnitude * 0.5); @@ -92,7 +92,7 @@ namespace swrenderer // Find start uv in [0-base_height[ range. // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + double v = (texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; v = v - floor(v); v *= height; v *= (1 << uv_fracbits); @@ -117,7 +117,7 @@ namespace swrenderer { // Normalize to 0-1 range: double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); + double v = (texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); v = v - floor(v); double v_step = uv_stepd / texture->GetHeight(); @@ -337,7 +337,7 @@ namespace swrenderer static void ProcessWallWorker( const FWallCoords &WallC, - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, + int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x), DrawerFunc drawcolumn) { @@ -350,7 +350,7 @@ namespace swrenderer { // Hack for one pixel tall textures fracbits = 0; yrepeat = 0; - dc_texturemid = 0; + texturemid = 0; } dc_wall_fracbits = r_swtruecolor ? FRACBITS : fracbits; @@ -394,45 +394,45 @@ namespace swrenderer if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); - WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); + WallSampler sampler(y1, texturemid, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); Draw1Column(WallC, x, y1, y2, sampler, light_list, drawcolumn); } NetUpdate(); } - static void ProcessNormalWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessNormalWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol, &SWPixelFormatDrawers::DrawWallColumn); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol, &SWPixelFormatDrawers::DrawWallColumn); } - static void ProcessMaskedWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessMaskedWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { if (!rw_pic->bMasked) // Textures that aren't masked can use the faster ProcessNormalWall. { - ProcessNormalWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol); + ProcessNormalWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol); } else { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); } } - static void ProcessTranslucentWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessTranslucentWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { DrawerFunc drawcol1 = R_GetTransMaskDrawer(); if (drawcol1 == nullptr) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. - ProcessMaskedWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol); + ProcessMaskedWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol); } else { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol, drawcol1); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol, drawcol1); } } - static void ProcessStripedWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) + static void ProcessStripedWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) { short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; short *up, *down; @@ -455,7 +455,7 @@ namespace swrenderer { down[j] = clamp(most3[j], up[j], dwal[j]); } - ProcessNormalWall(WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); + ProcessNormalWall(WallC, x1, x2, up, down, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); up = down; down = (down == most1) ? most2 : most1; } @@ -465,31 +465,31 @@ namespace swrenderer wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + R_ActualExtraLight(foggy)); } - ProcessNormalWall(WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); + ProcessNormalWall(WallC, x1, x2, up, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); } - static void ProcessWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) + static void ProcessWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) { if (mask) { if (colfunc == basecolfunc) { - ProcessMaskedWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); + ProcessMaskedWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); } else { - ProcessTranslucentWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); + ProcessTranslucentWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); } } else { if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) { - ProcessNormalWall(WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); + ProcessNormalWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); } else { - ProcessStripedWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, foggy, basecolormap, light_list); + ProcessStripedWall(frontsector, curline, WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, foggy, basecolormap, light_list); } } } @@ -505,7 +505,7 @@ namespace swrenderer // //============================================================================= - static void ProcessWallNP2(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) + static void ProcessWallNP2(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) { short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; short *up, *down; @@ -515,14 +515,14 @@ namespace swrenderer if (yrepeat >= 0) { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + partition = top - fmod(top - texturemid / yrepeat - ViewPos.Z, scaledtexheight); if (partition == top) { partition -= scaledtexheight; } up = uwal; down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + texturemid = (partition - ViewPos.Z) * yrepeat + texheight; while (partition > bot) { int j = R_CreateWallSegmentY(most3, partition - ViewPos.Z, &WallC); @@ -532,21 +532,21 @@ namespace swrenderer { down[j] = clamp(most3[j], up[j], dwal[j]); } - ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); + ProcessWall(frontsector, curline, WallC, x1, x2, up, down, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); up = down; down = (down == most1) ? most2 : most1; } partition -= scaledtexheight; - dc_texturemid -= texheight; + texturemid -= texheight; } - ProcessWall(frontsector, curline, WallC, x1, x2, up, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); + ProcessWall(frontsector, curline, WallC, x1, x2, up, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); } else { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + partition = bot - fmod(bot - texturemid / yrepeat - ViewPos.Z, scaledtexheight); up = most1; down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + texturemid = (partition - ViewPos.Z) * yrepeat + texheight; while (partition < top) { int j = R_CreateWallSegmentY(most3, partition - ViewPos.Z, &WallC); @@ -556,18 +556,18 @@ namespace swrenderer { up[j] = clamp(most3[j], uwal[j], down[j]); } - ProcessWall(frontsector, curline, WallC, x1, x2, up, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); + ProcessWall(frontsector, curline, WallC, x1, x2, up, down, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); down = up; up = (up == most1) ? most2 : most1; } partition -= scaledtexheight; - dc_texturemid -= texheight; + texturemid -= texheight; } - ProcessWall(frontsector, curline, WallC, x1, x2, uwal, down, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); + ProcessWall(frontsector, curline, WallC, x1, x2, uwal, down, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); } } - void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap) + void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap) { rw_pic = pic; if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) @@ -587,32 +587,32 @@ namespace swrenderer { bot = MAX(bot, clip3d->sclipBottom); } - ProcessWallNP2(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, wallshade, xoffset, light, lightstep, true, foggy, basecolormap, nullptr); + ProcessWallNP2(frontsector, curline, WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, top, bot, wallshade, xoffset, light, lightstep, true, foggy, basecolormap, nullptr); } else { - ProcessWall(frontsector, curline, WallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, true, foggy, basecolormap, nullptr); + ProcessWall(frontsector, curline, WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, true, foggy, basecolormap, nullptr); } } - void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap) + void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, short *walltop, short *wallbottom, double texturemid, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap) { rw_pic = pic; if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) { - ProcessWallNP2(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, top, bottom, wallshade, xoffset, light, lightstep, false, foggy, basecolormap, light_list); + ProcessWallNP2(frontsector, curline, WallC, x1, x2, walltop, wallbottom, texturemid, swall, lwall, yscale, top, bottom, wallshade, xoffset, light, lightstep, false, foggy, basecolormap, light_list); } else { - ProcessWall(frontsector, curline, WallC, x1, x2, walltop, wallbottom, swall, lwall, yscale, wallshade, xoffset, light, lightstep, false, foggy, basecolormap, light_list); + ProcessWall(frontsector, curline, WallC, x1, x2, walltop, wallbottom, texturemid, swall, lwall, yscale, wallshade, xoffset, light, lightstep, false, foggy, basecolormap, light_list); } } - void R_DrawSkySegment(FTexture *pic, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const uint8_t *(*getcol)(FTexture *tex, int x)) + void R_DrawSkySegment(FTexture *pic, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const uint8_t *(*getcol)(FTexture *tex, int x)) { rw_pic = pic; FWallCoords wallC; // Not used. To do: don't use r_walldraw to draw the sky!! - ProcessNormalWall(wallC, x1, x2, uwal, dwal, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, nullptr, getcol); + ProcessNormalWall(wallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, nullptr, getcol); } } diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index d2d3ffbc1a..f7debf908f 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -25,7 +25,7 @@ namespace swrenderer struct WallSampler { WallSampler() { } - WallSampler(int y1, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + WallSampler(int y1, double texturemid, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); uint32_t uv_pos; uint32_t uv_step; @@ -37,7 +37,7 @@ namespace swrenderer uint32_t height; }; - void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap); - void R_DrawSkySegment(FTexture *rw_pic, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const uint8_t *(*getcol)(FTexture *tex, int col)); - void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap); + void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, double texturemid, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap); + void R_DrawSkySegment(FTexture *rw_pic, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const uint8_t *(*getcol)(FTexture *tex, int col)); + void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap); } diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 14b494a812..2b73150630 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -436,7 +436,7 @@ namespace swrenderer } frontyScale = frontskytex->Scale.Y; - dc_texturemid = skymid * frontyScale; + double texturemid = skymid * frontyScale; if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) { // The texture tiles nicely @@ -445,7 +445,7 @@ namespace swrenderer lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - R_DrawSkySegment(frontskytex, pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, + R_DrawSkySegment(frontskytex, pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, texturemid, swall, lwall, frontyScale, 0, 0, 0.0f, 0.0f, nullptr, backskytex == nullptr ? RenderSkyPlane::GetOneSkyColumn : RenderSkyPlane::GetTwoSkyColumns); } else @@ -469,7 +469,7 @@ namespace swrenderer if (topfrac < 0) topfrac += frontskytex->GetHeight(); yl = 0; yh = short((frontskytex->GetHeight() - topfrac) * frontyScale); - dc_texturemid = topfrac - iscale * (1 - CenterY); + double texturemid = topfrac - iscale * (1 - CenterY); while (yl < viewheight) { @@ -483,10 +483,10 @@ namespace swrenderer lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - R_DrawSkySegment(frontskytex, pl->left, pl->right, top, bot, swall, lwall, frontskytex->Scale.Y, 0, 0, 0.0f, 0.0f, nullptr, backskytex == nullptr ? RenderSkyPlane::GetOneSkyColumn : RenderSkyPlane::GetTwoSkyColumns); + R_DrawSkySegment(frontskytex, pl->left, pl->right, top, bot, texturemid, swall, lwall, frontskytex->Scale.Y, 0, 0, 0.0f, 0.0f, nullptr, backskytex == nullptr ? RenderSkyPlane::GetOneSkyColumn : RenderSkyPlane::GetTwoSkyColumns); yl = yh; yh += drawheight; - dc_texturemid = iscale * (centery - yl - 1); + texturemid = iscale * (centery - yl - 1); } } diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 5b7503c38f..68f54b05f4 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -232,13 +232,15 @@ namespace swrenderer { texheight = texheight / texheightscale; } + + double texturemid; if (curline->linedef->flags & ML_DONTPEGBOTTOM) { - dc_texturemid = MAX(frontsector->GetPlaneTexZ(sector_t::floor), backsector->GetPlaneTexZ(sector_t::floor)) + texheight; + texturemid = MAX(frontsector->GetPlaneTexZ(sector_t::floor), backsector->GetPlaneTexZ(sector_t::floor)) + texheight; } else { - dc_texturemid = MIN(frontsector->GetPlaneTexZ(sector_t::ceiling), backsector->GetPlaneTexZ(sector_t::ceiling)); + texturemid = MIN(frontsector->GetPlaneTexZ(sector_t::ceiling), backsector->GetPlaneTexZ(sector_t::ceiling)); } rowoffset = curline->sidedef->GetTextureYOffset(side_t::mid); @@ -257,21 +259,21 @@ namespace swrenderer { // rowoffset is added before the multiply so that the masked texture will // still be positioned in world units rather than texels. - dc_texturemid += rowoffset - ViewPos.Z; - textop = dc_texturemid; - dc_texturemid *= MaskedScaleY; + texturemid += rowoffset - ViewPos.Z; + textop = texturemid; + texturemid *= MaskedScaleY; } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - textop = dc_texturemid + rowoffset / MaskedScaleY - ViewPos.Z; - dc_texturemid = (dc_texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; + textop = texturemid + rowoffset / MaskedScaleY - ViewPos.Z; + texturemid = (texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; } if (sprflipvert) { MaskedScaleY = -MaskedScaleY; - dc_texturemid -= tex->GetHeight() << FRACBITS; + texturemid -= tex->GetHeight() << FRACBITS; } // [RH] Don't bother drawing segs that are completely offscreen @@ -356,9 +358,9 @@ namespace swrenderer fixed_t iscale = xs_Fix<16>::ToFix(MaskedSWall[x] * MaskedScaleY); double sprtopscreen; if (sprflipvert) - sprtopscreen = CenterY + dc_texturemid * spryscale; + sprtopscreen = CenterY + texturemid * spryscale; else - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - texturemid * spryscale; R_DrawMaskedColumn(x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); @@ -373,13 +375,13 @@ namespace swrenderer { // rowoffset is added before the multiply so that the masked texture will // still be positioned in world units rather than texels. - dc_texturemid = (dc_texturemid - ViewPos.Z + rowoffset) * MaskedScaleY; + texturemid = (texturemid - ViewPos.Z + rowoffset) * MaskedScaleY; } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - dc_texturemid = (dc_texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; + texturemid = (texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; } WallC.sz1 = ds->sz1; @@ -421,7 +423,7 @@ namespace swrenderer rw_offset = 0; rw_pic = tex; - R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, mceilingclip, mfloorclip, MaskedSWall, maskedtexturecol, ds->yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy, basecolormap); + R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy, basecolormap); } clearfog: @@ -498,20 +500,20 @@ namespace swrenderer { rowoffset += rw_pic->GetHeight(); } - dc_texturemid = (planez - ViewPos.Z) * yscale; + double texturemid = (planez - ViewPos.Z) * yscale; if (rw_pic->bWorldPanning) { // rowoffset is added before the multiply so that the masked texture will // still be positioned in world units rather than texels. - dc_texturemid = dc_texturemid + rowoffset * yscale; + texturemid = texturemid + rowoffset * yscale; rw_offset = xs_RoundToInt(rw_offset * xscale); } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - dc_texturemid += rowoffset; + texturemid += rowoffset; } if (fixedlightlev >= 0) @@ -545,7 +547,7 @@ namespace swrenderer } PrepLWall(lwall, curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2, WallT); - R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy, basecolormap); + R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, wallupper, walllower, texturemid, MaskedSWall, lwall, yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy, basecolormap); } // kg3D - walls of fake floors diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 1f0c02d0ac..554864670c 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -214,7 +214,7 @@ namespace swrenderer } yscale = decal->ScaleY; - dc_texturemid = WallSpriteTile->TopOffset + (zpos - ViewPos.Z) / yscale; + double texturemid = WallSpriteTile->TopOffset + (zpos - ViewPos.Z) / yscale; // Clip sprite to drawseg x1 = MAX(clipper->x1, x1); @@ -265,7 +265,7 @@ namespace swrenderer { sprflipvert = true; yscale = -yscale; - dc_texturemid -= WallSpriteTile->GetHeight(); + texturemid -= WallSpriteTile->GetHeight(); } else { @@ -293,7 +293,7 @@ namespace swrenderer { // calculate lighting R_SetColorMapLight(usecolormap, light, wallshade); } - DrawColumn(x, WallSpriteTile, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + DrawColumn(x, WallSpriteTile, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } @@ -312,15 +312,15 @@ namespace swrenderer WallC = savecoord; } - void RenderDecal::DrawColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderDecal::DrawColumn(int x, FTexture *WallSpriteTile, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = swall[x] * maskedScaleY; double spryscale = 1 / iscale; double sprtopscreen; if (sprflipvert) - sprtopscreen = CenterY + dc_texturemid * spryscale; + sprtopscreen = CenterY + texturemid * spryscale; else - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - texturemid * spryscale; R_DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, lwall[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 2bee90036d..1bd9f85b70 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -27,6 +27,6 @@ namespace swrenderer private: static void Render(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, bool foggy, FDynamicColormap *basecolormap, int pass); - static void DrawColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void DrawColumn(int x, FTexture *WallSpriteTile, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); }; } diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 57e6d46d1a..c7ec7fe2d4 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -314,20 +314,20 @@ namespace swrenderer fixed_t iscale = FLOAT2FIXED(1 / vis->yscale); frac = vis->startfrac; xiscale = vis->xiscale; - dc_texturemid = vis->texturemid; + double texturemid = vis->texturemid; if (vis->renderflags & RF_YFLIP) { sprflipvert = true; spryscale = -spryscale; iscale = -iscale; - dc_texturemid -= vis->pic->GetHeight(); - sprtopscreen = CenterY + dc_texturemid * spryscale; + texturemid -= vis->pic->GetHeight(); + sprtopscreen = CenterY + texturemid * spryscale; } else { sprflipvert = false; - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - texturemid * spryscale; } int x = vis->x1; diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 06f1b0bed0..6f4fd79ec7 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -153,7 +153,7 @@ namespace swrenderer WallT.InitFromWallCoords(&spr->wallc); PrepWall(swall, lwall, spr->pic->GetWidth() << FRACBITS, x1, x2, WallT); iyscale = 1 / spr->yscale; - dc_texturemid = (spr->gzt - ViewPos.Z) * iyscale; + double texturemid = (spr->gzt - ViewPos.Z) * iyscale; if (spr->renderflags & RF_XFLIP) { int right = (spr->pic->GetWidth() << FRACBITS) - 1; @@ -195,7 +195,7 @@ namespace swrenderer { sprflipvert = true; iyscale = -iyscale; - dc_texturemid -= spr->pic->GetHeight(); + texturemid -= spr->pic->GetHeight(); } else { @@ -229,22 +229,22 @@ namespace swrenderer R_SetColorMapLight(usecolormap, light, shade); } if (!RenderTranslucentPass::ClipSpriteColumnWithPortals(x, spr)) - DrawColumn(x, WallSpriteTile, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + DrawColumn(x, WallSpriteTile, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } } } - void RenderWallSprite::DrawColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderWallSprite::DrawColumn(int x, FTexture *WallSpriteTile, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = swall[x] * maskedScaleY; double spryscale = 1 / iscale; double sprtopscreen; if (sprflipvert) - sprtopscreen = CenterY + dc_texturemid * spryscale; + sprtopscreen = CenterY + texturemid * spryscale; else - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - texturemid * spryscale; R_DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, lwall[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index d478bf83f7..350912cd1b 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -24,6 +24,6 @@ namespace swrenderer static void Render(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip); private: - static void DrawColumn(int x, FTexture *WallSpriteTile, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void DrawColumn(int x, FTexture *WallSpriteTile, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); }; } From 5d9cc6a706cc032803fc12e67d79762fd1d7f21f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 13 Jan 2017 13:23:00 +0100 Subject: [PATCH 703/912] let 'forcenoskystretch' also apply to sky mode 2 --- src/swrenderer/plane/r_skyplane.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 2b73150630..a4c8bec2cd 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -394,7 +394,7 @@ namespace swrenderer void RenderSkyPlane::DrawSky(visplane_t *pl) { - if (r_skymode == 2) + if (r_skymode == 2 && !(level.flags & LEVEL_FORCETILEDSKY)) { DrawCapSky(pl); return; From 8209c4f392d15603caeacffa2de03f20453b32c2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 13 Jan 2017 13:27:31 +0100 Subject: [PATCH 704/912] Fix compile error --- src/polyrenderer/scene/poly_wallsprite.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/polyrenderer/scene/poly_wallsprite.cpp b/src/polyrenderer/scene/poly_wallsprite.cpp index 197f528ed1..ba09c77c1f 100644 --- a/src/polyrenderer/scene/poly_wallsprite.cpp +++ b/src/polyrenderer/scene/poly_wallsprite.cpp @@ -27,6 +27,7 @@ #include "r_data/r_translate.h" #include "poly_wallsprite.h" #include "polyrenderer/poly_renderer.h" +#include "swrenderer/scene/r_light.h" void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, AActor *thing, subsector_t *sub, uint32_t subsectorDepth, uint32_t stencilValue) { From e57c6e98a80315c203ce6d01e7395011bcdedb75 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 13 Jan 2017 13:31:33 +0100 Subject: [PATCH 705/912] Fix typo --- src/win32/zdoom.rc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/win32/zdoom.rc b/src/win32/zdoom.rc index 50f85d8539..cede1e894d 100644 --- a/src/win32/zdoom.rc +++ b/src/win32/zdoom.rc @@ -74,7 +74,7 @@ BEGIN " VALUE ""CompanyName"", "" ""\r\n" " VALUE ""FileDescription"", ""QZDoom""\r\n" " VALUE ""FileVersion"", RC_FILEVERSION2\r\n" - " VALUE ""InternalName"", "QZDoom""\r\n" + " VALUE ""InternalName"", ""QZDoom""\r\n" " VALUE ""LegalCopyright"", ""Copyright \\u00A9 1993-1996 id Software, 1998-2010 Randy Heit, 2002-2010 Christoph Oelckers, et al.""\r\n" " VALUE ""LegalTrademarks"", ""DoomR is a Registered Trademark of id Software, Inc.""\r\n" " VALUE ""OriginalFilename"", ""qzdoom.exe""\r\n" From 8260bbbe77cc0fee62b190b27362b2f769678969 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Fri, 13 Jan 2017 15:44:23 +0200 Subject: [PATCH 706/912] Fixed compilation with GCC/Clang No more 'error: cannot jump from this goto statement to its label / jump bypasses variable initialization' --- src/swrenderer/things/r_decal.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 554864670c..203401ec48 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -147,6 +147,8 @@ namespace swrenderer decal_left = decal_pos - edge_left * angvec - ViewPos; decal_right = decal_pos + edge_right * angvec - ViewPos; + double texturemid; + if (WallC.Init(decal_left, decal_right, TOO_CLOSE_Z)) goto done; @@ -214,7 +216,7 @@ namespace swrenderer } yscale = decal->ScaleY; - double texturemid = WallSpriteTile->TopOffset + (zpos - ViewPos.Z) / yscale; + texturemid = WallSpriteTile->TopOffset + (zpos - ViewPos.Z) / yscale; // Clip sprite to drawseg x1 = MAX(clipper->x1, x1); From e1506df8a6838959cb682683d9e115f46bed8497 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 13 Jan 2017 15:22:22 +0100 Subject: [PATCH 707/912] Change globvis to be picked up from the r_light values in softpoly --- src/polyrenderer/scene/poly_decal.cpp | 1 + src/polyrenderer/scene/poly_particle.cpp | 2 ++ src/polyrenderer/scene/poly_plane.cpp | 2 ++ src/polyrenderer/scene/poly_scene.cpp | 2 ++ src/polyrenderer/scene/poly_sky.cpp | 1 + src/polyrenderer/scene/poly_sprite.cpp | 1 + src/polyrenderer/scene/poly_wall.cpp | 1 + src/polyrenderer/scene/poly_wallsprite.cpp | 1 + src/swrenderer/drawers/r_drawers.h | 1 + src/swrenderer/scene/r_light.cpp | 1 - tools/drawergen/fixedfunction/drawtrianglecodegen.cpp | 5 +++-- tools/drawergen/llvmdrawers.cpp | 1 + 12 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/polyrenderer/scene/poly_decal.cpp b/src/polyrenderer/scene/poly_decal.cpp index c36b67f4a7..03e33c0155 100644 --- a/src/polyrenderer/scene/poly_decal.cpp +++ b/src/polyrenderer/scene/poly_decal.cpp @@ -137,6 +137,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan args.uniforms.flags = 0; args.SetColormap(front->ColorMap); args.SetTexture(tex, decal->Translation, true); + args.uniforms.globvis = (float)swrenderer::r_WallVisibility; if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { args.uniforms.light = 256; diff --git a/src/polyrenderer/scene/poly_particle.cpp b/src/polyrenderer/scene/poly_particle.cpp index e4380dae97..b5d7220c00 100644 --- a/src/polyrenderer/scene/poly_particle.cpp +++ b/src/polyrenderer/scene/poly_particle.cpp @@ -73,6 +73,8 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipP PolyDrawArgs args; + args.uniforms.globvis = (float)swrenderer::r_SpriteVisibility; + if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { args.uniforms.light = 256; diff --git a/src/polyrenderer/scene/poly_plane.cpp b/src/polyrenderer/scene/poly_plane.cpp index 275310c31f..86dd89fd1e 100644 --- a/src/polyrenderer/scene/poly_plane.cpp +++ b/src/polyrenderer/scene/poly_plane.cpp @@ -107,6 +107,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c UVTransform xform(ceiling ? fakeFloor->top.model->planes[sector_t::ceiling].xform : fakeFloor->top.model->planes[sector_t::floor].xform, tex); PolyDrawArgs args; + args.uniforms.globvis = (float)swrenderer::r_TiltVisibility * 48.0f; args.uniforms.light = (uint32_t)(lightlevel / 255.0f * 256.0f); if (swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) args.uniforms.light = 256; @@ -300,6 +301,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan UVTransform transform(ceiling ? frontsector->planes[sector_t::ceiling].xform : frontsector->planes[sector_t::floor].xform, tex); PolyDrawArgs args; + args.uniforms.globvis = (float)swrenderer::r_TiltVisibility * 48.0f; args.uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); if (swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) args.uniforms.light = 256; diff --git a/src/polyrenderer/scene/poly_scene.cpp b/src/polyrenderer/scene/poly_scene.cpp index 51ce53a26f..8744940b36 100644 --- a/src/polyrenderer/scene/poly_scene.cpp +++ b/src/polyrenderer/scene/poly_scene.cpp @@ -29,6 +29,7 @@ #include "polyrenderer/scene/poly_scene.h" #include "polyrenderer/poly_renderer.h" #include "gl/data/gl_data.h" +#include "swrenderer/scene/r_light.h" CVAR(Bool, r_debug_cull, 0, 0) EXTERN_CVAR(Int, r_portal_recursions) @@ -240,6 +241,7 @@ void RenderPolyScene::RenderPortals(int portalDepth) PolyDrawArgs args; args.objectToClip = &WorldToClip; args.mode = TriangleDrawMode::Fan; + args.uniforms.globvis = (float)swrenderer::r_WallVisibility; args.uniforms.color = 0; args.uniforms.light = 256; args.uniforms.flags = TriUniforms::fixed_light; diff --git a/src/polyrenderer/scene/poly_sky.cpp b/src/polyrenderer/scene/poly_sky.cpp index 41bca43ff3..e102cde50f 100644 --- a/src/polyrenderer/scene/poly_sky.cpp +++ b/src/polyrenderer/scene/poly_sky.cpp @@ -55,6 +55,7 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) int rc = mRows + 1; PolyDrawArgs args; + args.uniforms.globvis = (float)swrenderer::r_WallVisibility; args.uniforms.light = 256; args.uniforms.flags = 0; args.uniforms.subsectorDepth = RenderPolyScene::SkySubsectorDepth; diff --git a/src/polyrenderer/scene/poly_sprite.cpp b/src/polyrenderer/scene/poly_sprite.cpp index ce0547f622..c6cf88ee1a 100644 --- a/src/polyrenderer/scene/poly_sprite.cpp +++ b/src/polyrenderer/scene/poly_sprite.cpp @@ -138,6 +138,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); PolyDrawArgs args; + args.uniforms.globvis = (float)swrenderer::r_SpriteVisibility; args.uniforms.flags = 0; if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { diff --git a/src/polyrenderer/scene/poly_wall.cpp b/src/polyrenderer/scene/poly_wall.cpp index 1bf7cf24b0..009c5fca7e 100644 --- a/src/polyrenderer/scene/poly_wall.cpp +++ b/src/polyrenderer/scene/poly_wall.cpp @@ -247,6 +247,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane } PolyDrawArgs args; + args.uniforms.globvis = (float)swrenderer::r_WallVisibility; args.uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); args.uniforms.flags = 0; args.uniforms.subsectorDepth = SubsectorDepth; diff --git a/src/polyrenderer/scene/poly_wallsprite.cpp b/src/polyrenderer/scene/poly_wallsprite.cpp index ba09c77c1f..4fc9e4994c 100644 --- a/src/polyrenderer/scene/poly_wallsprite.cpp +++ b/src/polyrenderer/scene/poly_wallsprite.cpp @@ -100,6 +100,7 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, const Vec4f &cli bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); PolyDrawArgs args; + args.uniforms.globvis = (float)swrenderer::r_WallVisibility; if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { args.uniforms.light = 256; diff --git a/src/swrenderer/drawers/r_drawers.h b/src/swrenderer/drawers/r_drawers.h index 0a79f26d53..0492930d96 100644 --- a/src/swrenderer/drawers/r_drawers.h +++ b/src/swrenderer/drawers/r_drawers.h @@ -230,6 +230,7 @@ struct TriUniforms uint16_t fade_green; uint16_t fade_blue; uint16_t desaturate; + float globvis; uint32_t flags; enum Flags { diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index 7ef86ca257..4059a73bf0 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -43,7 +43,6 @@ namespace swrenderer double r_FloorVisibility; float r_TiltVisibility; double r_SpriteVisibility; - double r_ParticleVisibility; int fixedlightlev; FSWColormap *fixedcolormap; diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index c5f7405f5c..6f3a725fab 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -613,7 +613,6 @@ void DrawTriangleCodegen::CalculateGradients() } shade = (64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f) / 32.0f; - globVis = SSAFloat(1706.0f / 32.0f); } void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) @@ -671,7 +670,9 @@ void DrawTriangleCodegen::LoadUniforms(SSAValue uniforms) SSAShort fade_green = uniforms[0][11].load(true); SSAShort fade_blue = uniforms[0][12].load(true); SSAShort desaturate = uniforms[0][13].load(true); - SSAInt flags = uniforms[0][14].load(true); + globVis = uniforms[0][14].load(true); + globVis = globVis * SSAFloat(1.0f / 32.0f); + SSAInt flags = uniforms[0][15].load(true); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index 0ce8ee8305..995b5b8496 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -360,6 +360,7 @@ llvm::Type *LLVMDrawers::GetTriUniformsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getFloatTy(context)); // float globvis; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; elements.push_back(GetTriMatrixStruct(context)); // TriMatrix objectToClip TriUniformsStruct = llvm::StructType::create(context, elements, "TriUniforms", false)->getPointerTo(); From edd9b6c69ce4ef85817a8cdbaeb1bafcca969699 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 13 Jan 2017 15:43:06 +0100 Subject: [PATCH 708/912] Change the shape of the particle texture --- src/swrenderer/drawers/r_draw.cpp | 18 ++++++++++++++++++ src/swrenderer/drawers/r_draw.h | 3 +++ src/swrenderer/drawers/r_draw_pal.cpp | 23 ----------------------- src/swrenderer/drawers/r_draw_rgba.cpp | 23 ----------------------- src/swrenderer/scene/r_viewport.cpp | 2 ++ 5 files changed, 23 insertions(+), 46 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 994bb5aa0b..081c379a71 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -65,6 +65,8 @@ namespace swrenderer int fuzzpos; int fuzzviewheight; + uint32_t particle_texture[16 * 16]; + short zeroarray[MAXWIDTH]; short screenheightarray[MAXWIDTH]; @@ -233,6 +235,22 @@ namespace swrenderer } } + void R_InitParticleTexture() + { + for (int y = 0; y < 16; y++) + { + for (int x = 0; x < 16; x++) + { + float dx = (8 - x) / 8.0f; + float dy = (8 - y) / 8.0f; + float dist = sqrt(dx * dx + dy * dy); + float alpha = clamp(3.0f - dist * 3.0f, 0.0f, 1.0f); + + particle_texture[x + y * 16] = (int)(alpha * 64.0f + 0.5f); + } + } + } + namespace { bool R_SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 8d586afdd1..30bf467715 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -121,6 +121,8 @@ namespace swrenderer extern int fuzzpos; extern int fuzzviewheight; + extern uint32_t particle_texture[16 * 16]; + extern bool r_swtruecolor; class SWPixelFormatDrawers @@ -171,6 +173,7 @@ namespace swrenderer void R_InitColumnDrawers(); void R_InitShadeMaps(); void R_InitFuzzTable(int fuzzoff); + void R_InitParticleTexture(); bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap); bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap); diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 4e4783f3f1..f577f64252 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -2768,29 +2768,6 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - namespace - { - static uint32_t particle_texture[16 * 16] = - { - 1 * 1, 2 * 1, 3 * 1, 4 * 1, 5 * 1, 6 * 1, 7 * 1, 8 * 1, 8 * 1, 7 * 1, 6 * 1, 5 * 1, 4 * 1, 3 * 1, 2 * 1, 1 * 1, - 1 * 2, 2 * 2, 3 * 2, 4 * 2, 5 * 2, 6 * 2, 7 * 2, 8 * 2, 8 * 2, 7 * 2, 6 * 2, 5 * 2, 4 * 2, 3 * 2, 2 * 2, 1 * 2, - 1 * 3, 2 * 3, 3 * 3, 4 * 3, 5 * 3, 6 * 3, 7 * 3, 8 * 3, 8 * 3, 7 * 3, 6 * 3, 5 * 3, 4 * 3, 3 * 3, 2 * 3, 1 * 3, - 1 * 4, 2 * 4, 3 * 4, 4 * 4, 5 * 4, 6 * 4, 7 * 4, 8 * 4, 8 * 4, 7 * 4, 6 * 4, 5 * 4, 4 * 4, 3 * 4, 2 * 4, 1 * 4, - 1 * 5, 2 * 5, 3 * 5, 4 * 5, 5 * 5, 6 * 5, 7 * 5, 8 * 5, 8 * 5, 7 * 5, 6 * 5, 5 * 5, 4 * 5, 3 * 5, 2 * 5, 1 * 5, - 1 * 6, 2 * 6, 3 * 6, 4 * 6, 5 * 6, 6 * 6, 7 * 6, 8 * 6, 8 * 6, 7 * 6, 6 * 6, 5 * 6, 4 * 6, 3 * 6, 2 * 6, 1 * 6, - 1 * 7, 2 * 7, 3 * 7, 4 * 7, 5 * 7, 6 * 7, 7 * 7, 8 * 7, 8 * 7, 7 * 7, 6 * 7, 5 * 7, 4 * 7, 3 * 7, 2 * 7, 1 * 7, - 1 * 8, 2 * 8, 3 * 8, 4 * 8, 5 * 8, 6 * 8, 7 * 8, 8 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, 3 * 8, 2 * 8, 1 * 8, - 1 * 8, 2 * 8, 3 * 8, 4 * 8, 5 * 8, 6 * 8, 7 * 8, 8 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, 3 * 8, 2 * 8, 1 * 8, - 1 * 7, 2 * 7, 3 * 7, 4 * 7, 5 * 7, 6 * 7, 7 * 7, 8 * 7, 8 * 7, 7 * 7, 6 * 7, 5 * 7, 4 * 7, 3 * 7, 2 * 7, 1 * 7, - 1 * 6, 2 * 6, 3 * 6, 4 * 6, 5 * 6, 6 * 6, 7 * 6, 8 * 6, 8 * 6, 7 * 6, 6 * 6, 5 * 6, 4 * 6, 3 * 6, 2 * 6, 1 * 6, - 1 * 5, 2 * 5, 3 * 5, 4 * 5, 5 * 5, 6 * 5, 7 * 5, 8 * 5, 8 * 5, 7 * 5, 6 * 5, 5 * 5, 4 * 5, 3 * 5, 2 * 5, 1 * 5, - 1 * 4, 2 * 4, 3 * 4, 4 * 4, 5 * 4, 6 * 4, 7 * 4, 8 * 4, 8 * 4, 7 * 4, 6 * 4, 5 * 4, 4 * 4, 3 * 4, 2 * 4, 1 * 4, - 1 * 3, 2 * 3, 3 * 3, 4 * 3, 5 * 3, 6 * 3, 7 * 3, 8 * 3, 8 * 3, 7 * 3, 6 * 3, 5 * 3, 4 * 3, 3 * 3, 2 * 3, 1 * 3, - 1 * 2, 2 * 2, 3 * 2, 4 * 2, 5 * 2, 6 * 2, 7 * 2, 8 * 2, 8 * 2, 7 * 2, 6 * 2, 5 * 2, 4 * 2, 3 * 2, 2 * 2, 1 * 2, - 1 * 1, 2 * 1, 3 * 1, 4 * 1, 5 * 1, 6 * 1, 7 * 1, 8 * 1, 8 * 1, 7 * 1, 6 * 1, 5 * 1, 4 * 1, 3 * 1, 2 * 1, 1 * 1 - }; - } - DrawParticleColumnPalCommand::DrawParticleColumnPalCommand(uint8_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx) { _dest = dest; diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 352a1538bb..6dbd676b2e 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -942,29 +942,6 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - namespace - { - static uint32_t particle_texture[16 * 16] = - { - 1 * 1, 2 * 1, 3 * 1, 4 * 1, 5 * 1, 6 * 1, 7 * 1, 8 * 1, 8 * 1, 7 * 1, 6 * 1, 5 * 1, 4 * 1, 3 * 1, 2 * 1, 1 * 1, - 1 * 2, 2 * 2, 3 * 2, 4 * 2, 5 * 2, 6 * 2, 7 * 2, 8 * 2, 8 * 2, 7 * 2, 6 * 2, 5 * 2, 4 * 2, 3 * 2, 2 * 2, 1 * 2, - 1 * 3, 2 * 3, 3 * 3, 4 * 3, 5 * 3, 6 * 3, 7 * 3, 8 * 3, 8 * 3, 7 * 3, 6 * 3, 5 * 3, 4 * 3, 3 * 3, 2 * 3, 1 * 3, - 1 * 4, 2 * 4, 3 * 4, 4 * 4, 5 * 4, 6 * 4, 7 * 4, 8 * 4, 8 * 4, 7 * 4, 6 * 4, 5 * 4, 4 * 4, 3 * 4, 2 * 4, 1 * 4, - 1 * 5, 2 * 5, 3 * 5, 4 * 5, 5 * 5, 6 * 5, 7 * 5, 8 * 5, 8 * 5, 7 * 5, 6 * 5, 5 * 5, 4 * 5, 3 * 5, 2 * 5, 1 * 5, - 1 * 6, 2 * 6, 3 * 6, 4 * 6, 5 * 6, 6 * 6, 7 * 6, 8 * 6, 8 * 6, 7 * 6, 6 * 6, 5 * 6, 4 * 6, 3 * 6, 2 * 6, 1 * 6, - 1 * 7, 2 * 7, 3 * 7, 4 * 7, 5 * 7, 6 * 7, 7 * 7, 8 * 7, 8 * 7, 7 * 7, 6 * 7, 5 * 7, 4 * 7, 3 * 7, 2 * 7, 1 * 7, - 1 * 8, 2 * 8, 3 * 8, 4 * 8, 5 * 8, 6 * 8, 7 * 8, 8 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, 3 * 8, 2 * 8, 1 * 8, - 1 * 8, 2 * 8, 3 * 8, 4 * 8, 5 * 8, 6 * 8, 7 * 8, 8 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, 3 * 8, 2 * 8, 1 * 8, - 1 * 7, 2 * 7, 3 * 7, 4 * 7, 5 * 7, 6 * 7, 7 * 7, 8 * 7, 8 * 7, 7 * 7, 6 * 7, 5 * 7, 4 * 7, 3 * 7, 2 * 7, 1 * 7, - 1 * 6, 2 * 6, 3 * 6, 4 * 6, 5 * 6, 6 * 6, 7 * 6, 8 * 6, 8 * 6, 7 * 6, 6 * 6, 5 * 6, 4 * 6, 3 * 6, 2 * 6, 1 * 6, - 1 * 5, 2 * 5, 3 * 5, 4 * 5, 5 * 5, 6 * 5, 7 * 5, 8 * 5, 8 * 5, 7 * 5, 6 * 5, 5 * 5, 4 * 5, 3 * 5, 2 * 5, 1 * 5, - 1 * 4, 2 * 4, 3 * 4, 4 * 4, 5 * 4, 6 * 4, 7 * 4, 8 * 4, 8 * 4, 7 * 4, 6 * 4, 5 * 4, 4 * 4, 3 * 4, 2 * 4, 1 * 4, - 1 * 3, 2 * 3, 3 * 3, 4 * 3, 5 * 3, 6 * 3, 7 * 3, 8 * 3, 8 * 3, 7 * 3, 6 * 3, 5 * 3, 4 * 3, 3 * 3, 2 * 3, 1 * 3, - 1 * 2, 2 * 2, 3 * 2, 4 * 2, 5 * 2, 6 * 2, 7 * 2, 8 * 2, 8 * 2, 7 * 2, 6 * 2, 5 * 2, 4 * 2, 3 * 2, 2 * 2, 1 * 2, - 1 * 1, 2 * 1, 3 * 1, 4 * 1, 5 * 1, 6 * 1, 7 * 1, 8 * 1, 8 * 1, 7 * 1, 6 * 1, 5 * 1, 4 * 1, 3 * 1, 2 * 1, 1 * 1 - }; - } - DrawParticleColumnRGBACommand::DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx) { _dest = dest; diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index 7812fab652..8d79237d25 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -163,6 +163,8 @@ namespace swrenderer ylookup[i] = i * pitch; } } + + R_InitParticleTexture(); } void R_InitTextureMapping() From ca046d26c5a2db9bc1594b0da4cfd62b6a8a6043 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 13 Jan 2017 16:12:43 +0100 Subject: [PATCH 709/912] Make particle texture slightly higher quality --- src/swrenderer/drawers/r_draw.cpp | 17 +++++++++-------- src/swrenderer/drawers/r_draw.h | 3 ++- src/swrenderer/drawers/r_draw_pal.cpp | 6 +++--- src/swrenderer/drawers/r_draw_rgba.cpp | 6 +++--- src/swrenderer/things/r_particle.cpp | 2 +- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 081c379a71..46c0ec7846 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -65,7 +65,7 @@ namespace swrenderer int fuzzpos; int fuzzviewheight; - uint32_t particle_texture[16 * 16]; + uint32_t particle_texture[PARTICLE_TEXTURE_SIZE * PARTICLE_TEXTURE_SIZE]; short zeroarray[MAXWIDTH]; short screenheightarray[MAXWIDTH]; @@ -237,16 +237,17 @@ namespace swrenderer void R_InitParticleTexture() { - for (int y = 0; y < 16; y++) + float center = PARTICLE_TEXTURE_SIZE * 0.5f; + for (int y = 0; y < PARTICLE_TEXTURE_SIZE; y++) { - for (int x = 0; x < 16; x++) + for (int x = 0; x < PARTICLE_TEXTURE_SIZE; x++) { - float dx = (8 - x) / 8.0f; - float dy = (8 - y) / 8.0f; - float dist = sqrt(dx * dx + dy * dy); - float alpha = clamp(3.0f - dist * 3.0f, 0.0f, 1.0f); + float dx = (center - x - 0.5f) / center; + float dy = (center - y - 0.5f) / center; + float dist2 = dx * dx + dy * dy; + float alpha = clamp(1.1f - dist2 * 1.1f, 0.0f, 1.0f); - particle_texture[x + y * 16] = (int)(alpha * 64.0f + 0.5f); + particle_texture[x + y * PARTICLE_TEXTURE_SIZE] = (int)(alpha * 128.0f + 0.5f); } } } diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 30bf467715..f7facda887 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -121,7 +121,8 @@ namespace swrenderer extern int fuzzpos; extern int fuzzviewheight; - extern uint32_t particle_texture[16 * 16]; + #define PARTICLE_TEXTURE_SIZE 64 + extern uint32_t particle_texture[PARTICLE_TEXTURE_SIZE * PARTICLE_TEXTURE_SIZE]; extern bool r_swtruecolor; diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index f577f64252..b0938572d3 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -2788,10 +2788,10 @@ namespace swrenderer uint8_t *dest = thread->dest_for_thread(_dest_y, _pitch, _dest); int pitch = _pitch * thread->num_cores; - const uint32_t *source = &particle_texture[(_fracposx >> FRACBITS) * 16]; + const uint32_t *source = &particle_texture[(_fracposx >> FRACBITS) * PARTICLE_TEXTURE_SIZE]; uint32_t particle_alpha = _alpha; - uint32_t fracstep = 16 * FRACUNIT / _count; + uint32_t fracstep = PARTICLE_TEXTURE_SIZE * FRACUNIT / _count; uint32_t fracpos = fracstep * thread->skipped_by_thread(_dest_y) + fracstep / 2; fracstep *= thread->num_cores; @@ -2801,7 +2801,7 @@ namespace swrenderer for (int y = 0; y < count; y++) { - uint32_t alpha = (source[fracpos >> FRACBITS] * particle_alpha) >> 6; + uint32_t alpha = (source[fracpos >> FRACBITS] * particle_alpha) >> 7; uint32_t inv_alpha = 256 - alpha; int bg = *dest; diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 6dbd676b2e..434497f99f 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -962,10 +962,10 @@ namespace swrenderer uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, _dest); int pitch = _pitch * thread->num_cores; - const uint32_t *source = &particle_texture[(_fracposx >> FRACBITS) * 16]; + const uint32_t *source = &particle_texture[(_fracposx >> FRACBITS) * PARTICLE_TEXTURE_SIZE]; uint32_t particle_alpha = _alpha; - uint32_t fracstep = 16 * FRACUNIT / _count; + uint32_t fracstep = PARTICLE_TEXTURE_SIZE * FRACUNIT / _count; uint32_t fracpos = fracstep * thread->skipped_by_thread(_dest_y) + fracstep / 2; fracstep *= thread->num_cores; @@ -975,7 +975,7 @@ namespace swrenderer for (int y = 0; y < count; y++) { - uint32_t alpha = (source[fracpos >> FRACBITS] * particle_alpha) >> 6; + uint32_t alpha = (source[fracpos >> FRACBITS] * particle_alpha) >> 7; uint32_t inv_alpha = 256 - alpha; uint32_t bg_red = (*dest >> 16) & 0xff; diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index b20acbcf67..e8d1cb1b72 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -249,7 +249,7 @@ namespace swrenderer spacing = RenderTarget->GetPitch(); - uint32_t fracstepx = 16 * FRACUNIT / countbase; + uint32_t fracstepx = PARTICLE_TEXTURE_SIZE * FRACUNIT / countbase; uint32_t fracposx = fracstepx / 2; if (r_swtruecolor) From 5ea28897af3e9a7ca26da84d73298198f17841e4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 14 Jan 2017 02:25:02 +0100 Subject: [PATCH 710/912] Move r_scene into a class --- src/swrenderer/line/r_line.cpp | 2 +- src/swrenderer/r_swrenderer.cpp | 10 +++++----- src/swrenderer/scene/r_scene.cpp | 23 ++++++++++++++--------- src/swrenderer/scene/r_scene.h | 23 ++++++++++++++++------- src/swrenderer/scene/r_viewport.h | 1 - 5 files changed, 36 insertions(+), 23 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 416fa7efb2..da506602c0 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -638,7 +638,7 @@ namespace swrenderer linedef = curline->linedef; // mark the segment as visible for auto map - if (!r_dontmaplines) linedef->flags |= ML_MAPPED; + if (!RenderScene::Instance()->DontMapLines()) linedef->flags |= ML_MAPPED; midtexture = toptexture = bottomtexture = 0; diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index d1397f10b8..a315385a3a 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -93,7 +93,7 @@ void FSoftwareRenderer::Init() gl_ParseDefs(); r_swtruecolor = screen->IsBgra(); - R_InitRenderer(); + RenderScene::Instance()->Init(); } //========================================================================== @@ -224,7 +224,7 @@ void FSoftwareRenderer::RenderView(player_t *player) } R_BeginDrawerCommands(); - R_RenderActorView (player->mo); + RenderScene::Instance()->RenderActorView(player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); @@ -268,7 +268,7 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FileWriter *file, int wi if (r_polyrenderer) PolyRenderer::Instance()->RenderViewToCanvas(player->mo, pic, 0, 0, width, height, true); else - R_RenderViewToCanvas (player->mo, pic, 0, 0, width, height); + RenderScene::Instance()->RenderViewToCanvas (player->mo, pic, 0, 0, width, height); screen->GetFlashedPalette (palette); M_CreatePNG (file, pic->GetBuffer(), palette, SS_PAL, width, height, pic->GetPitch()); pic->Unlock (); @@ -323,7 +323,7 @@ bool FSoftwareRenderer::RequireGLNodes() void FSoftwareRenderer::OnModeSet () { - R_MultiresInit (); + RenderScene::Instance()->ScreenResized(); RenderTarget = screen; screen->Lock (true); @@ -413,7 +413,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin if (r_polyrenderer) PolyRenderer::Instance()->RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); else - R_RenderViewToCanvas (viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); + RenderScene::Instance()->RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); R_SetFOV (savedfov); if (Canvas->IsBgra()) diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 50e20be722..75946f367c 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -51,9 +51,14 @@ EXTERN_CVAR(Bool, r_shadercolormaps) namespace swrenderer { cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; - bool r_dontmaplines; + + RenderScene *RenderScene::Instance() + { + static RenderScene instance; + return &instance; + } - void R_RenderActorView(AActor *actor, bool dontmaplines) + void RenderScene::RenderActorView(AActor *actor, bool dontmaplines) { WallCycles.Reset(); PlaneCycles.Reset(); @@ -85,7 +90,7 @@ namespace swrenderer RenderPortal::Instance()->SetMainPortal(); - r_dontmaplines = dontmaplines; + this->dontmaplines = dontmaplines; // [RH] Hack to make windows into underwater areas possible RenderOpaquePass::Instance()->ResetFakingUnderwater(); @@ -138,7 +143,7 @@ namespace swrenderer } } - void R_RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) + void RenderScene::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) { const bool savedviewactive = viewactive; const bool savedoutputformat = r_swtruecolor; @@ -160,7 +165,7 @@ namespace swrenderer viewwindowy = y; viewactive = true; - R_RenderActorView(actor, dontmaplines); + RenderActorView(actor, dontmaplines); R_EndDrawerCommands(); @@ -180,14 +185,14 @@ namespace swrenderer } } - void R_MultiresInit() + void RenderScene::ScreenResized() { VisiblePlaneList::Instance()->Init(); } - void R_InitRenderer() + void RenderScene::Init() { - atterm(R_ShutdownRenderer); + atterm([]() { RenderScene::Instance()->Deinit(); }); // viewwidth / viewheight are set by the defaults fillshort(zeroarray, MAXWIDTH, 0); @@ -195,7 +200,7 @@ namespace swrenderer R_InitColumnDrawers(); } - void R_ShutdownRenderer() + void RenderScene::Deinit() { RenderTranslucentPass::Deinit(); VisiblePlaneList::Instance()->Deinit(); diff --git a/src/swrenderer/scene/r_scene.h b/src/swrenderer/scene/r_scene.h index 5b5a951bce..1ad345890a 100644 --- a/src/swrenderer/scene/r_scene.h +++ b/src/swrenderer/scene/r_scene.h @@ -22,13 +22,22 @@ extern cycle_t FrameCycles; namespace swrenderer { extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; + + class RenderScene + { + public: + static RenderScene *Instance(); - extern bool r_dontmaplines; + void Init(); + void ScreenResized(); + void Deinit(); + + void RenderActorView(AActor *actor, bool dontmaplines = false); + void RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines = false); + + bool DontMapLines() const { return dontmaplines; } - void R_RenderActorView(AActor *actor, bool dontmaplines = false); - void R_RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines = false); - - void R_InitRenderer(); - void R_MultiresInit(); - void R_ShutdownRenderer(); + private: + bool dontmaplines = false; + }; } diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/scene/r_viewport.h index 2fa32988ed..6638a85922 100644 --- a/src/swrenderer/scene/r_viewport.h +++ b/src/swrenderer/scene/r_viewport.h @@ -37,5 +37,4 @@ namespace swrenderer void R_InitTextureMapping(); void R_SetupBuffer(); void R_SetupFreelook(); - void R_InitRenderer(); } From 1b043bb46c75efa6e068ee01b803cb79ea0220dd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 15 Jan 2017 01:36:57 +0100 Subject: [PATCH 711/912] Minor cleanup in r_swrenderer --- src/swrenderer/r_swrenderer.cpp | 179 ++++++++++--------------------- src/swrenderer/r_swrenderer.h | 28 +---- src/swrenderer/scene/r_scene.cpp | 5 + 3 files changed, 65 insertions(+), 147 deletions(-) diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index a315385a3a..75348772f7 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -54,6 +54,9 @@ void gl_ParseDefs(); void gl_InitData(); +void gl_SetActorLights(AActor *); +void gl_PreprocessLevel(); +void gl_CleanLevelData(); EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Float, maxviewpitch) // [SP] CVAR from GZDoom @@ -82,12 +85,6 @@ FSoftwareRenderer::~FSoftwareRenderer() { } -//========================================================================== -// -// DCanvas :: Init -// -//========================================================================== - void FSoftwareRenderer::Init() { gl_ParseDefs(); @@ -96,38 +93,28 @@ void FSoftwareRenderer::Init() RenderScene::Instance()->Init(); } -//========================================================================== -// -// DCanvas :: UsesColormap -// -//========================================================================== - bool FSoftwareRenderer::UsesColormap() const { return true; } -//=========================================================================== -// -// Texture precaching -// -//=========================================================================== - void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache) { + bool isbgra = screen->IsBgra(); + if (tex != NULL) { if (cache & FTextureManager::HIT_Columnmode) { const FTexture::Span *spanp; - if (r_swtruecolor) + if (isbgra) tex->GetColumnBgra(0, &spanp); else tex->GetColumn(0, &spanp); } else if (cache != 0) { - if (r_swtruecolor) + if (isbgra) tex->GetPixelsBgra(); else tex->GetPixels (); @@ -188,12 +175,6 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap &act } } -//=========================================================================== -// -// Render the view -// -//=========================================================================== - void FSoftwareRenderer::RenderView(player_t *player) { if (r_polyrenderer) @@ -202,7 +183,6 @@ void FSoftwareRenderer::RenderView(player_t *player) r_swtruecolor = screen->IsBgra(); PolyRenderer::Instance()->RenderActorView(player->mo, false); - FCanvasTextureInfo::UpdateAll(); // Apply special colormap if the target cannot do it if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) @@ -213,36 +193,32 @@ void FSoftwareRenderer::RenderView(player_t *player) } r_swtruecolor = saved_swtruecolor; - - return; - } - if (r_swtruecolor != screen->IsBgra()) + FCanvasTextureInfo::UpdateAll(); + } + else { - r_swtruecolor = screen->IsBgra(); - R_InitColumnDrawers(); + if (r_swtruecolor != screen->IsBgra()) + { + r_swtruecolor = screen->IsBgra(); + R_InitColumnDrawers(); + } + + R_BeginDrawerCommands(); + RenderScene::Instance()->RenderActorView(player->mo); + + // Apply special colormap if the target cannot do it + if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + { + DrawerCommandQueue::QueueCommand(realfixedcolormap, screen); + } + + R_EndDrawerCommands(); + + FCanvasTextureInfo::UpdateAll(); } - - R_BeginDrawerCommands(); - RenderScene::Instance()->RenderActorView(player->mo); - // [RH] Let cameras draw onto textures that were visible this frame. - FCanvasTextureInfo::UpdateAll (); - - // Apply special colormap if the target cannot do it - if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) - { - DrawerCommandQueue::QueueCommand(realfixedcolormap, screen); - } - - R_EndDrawerCommands(); } -//========================================================================== -// -// -// -//========================================================================== - void FSoftwareRenderer::RemapVoxels() { for (unsigned i=0; iScreenResized(); - - RenderTarget = screen; - screen->Lock (true); - R_SetupBuffer (); - screen->Unlock (); } -//=========================================================================== -// -// -// -//=========================================================================== - void FSoftwareRenderer::ErrorCleanup () { Clip3DFloors::Instance()->Cleanup(); } -//=========================================================================== -// -// -// -//=========================================================================== - void FSoftwareRenderer::ClearBuffer(int color) { - // [SP] For now, for truecolor, this just outputs black. We'll figure out how to get something more meaningful - // later when this actually matters more. This is just to clear HOMs for now. if (!r_swtruecolor) + { memset(RenderTarget->GetBuffer(), color, RenderTarget->GetPitch() * RenderTarget->GetHeight()); + } else - memset(RenderTarget->GetBuffer(), 0, RenderTarget->GetPitch() * RenderTarget->GetHeight() * 4); + { + uint32_t bgracolor = GPalette.BaseColors[color].d; + int size = RenderTarget->GetPitch() * RenderTarget->GetHeight(); + uint32_t *dest = (uint32_t *)RenderTarget->GetBuffer(); + for (int i = 0; i < size; i++) + dest[i] = bgracolor; + } } -//=========================================================================== -// -// -// -//=========================================================================== - void FSoftwareRenderer::SetWindow (int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio) { R_SWRSetWindow(windowSize, fullWidth, fullHeight, stHeight, trueratio); } -//=========================================================================== -// -// -// -//=========================================================================== - void FSoftwareRenderer::SetupFrame(player_t *player) { R_SetupColormap(player); R_SetupFreelook(); } -//========================================================================== -// -// R_CopyStackedViewParameters -// -//========================================================================== - void FSoftwareRenderer::CopyStackedViewParameters() { RenderPortal::Instance()->CopyStackedViewParameters(); } -//========================================================================== -// -// -// -//========================================================================== - void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { BYTE *Pixels = r_swtruecolor ? (BYTE*)tex->GetPixelsBgra() : (BYTE*)tex->GetPixels(); @@ -410,10 +325,12 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin DAngle savedfov = FieldOfView; R_SetFOV ((double)fov); + if (r_polyrenderer) PolyRenderer::Instance()->RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); else RenderScene::Instance()->RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); + R_SetFOV (savedfov); if (Canvas->IsBgra()) @@ -473,3 +390,17 @@ sector_t *FSoftwareRenderer::FakeFlat(sector_t *sec, sector_t *tempsec, int *flo return RenderOpaquePass::Instance()->FakeFlat(sec, tempsec, floorlightlevel, ceilinglightlevel, nullptr, 0, 0, 0, 0); } +void FSoftwareRenderer::StateChanged(AActor *actor) +{ + gl_SetActorLights(actor); +} + +void FSoftwareRenderer::PreprocessLevel() +{ + gl_PreprocessLevel(); +} + +void FSoftwareRenderer::CleanLevelData() +{ + gl_CleanLevelData(); +} diff --git a/src/swrenderer/r_swrenderer.h b/src/swrenderer/r_swrenderer.h index c09542959c..87496882ff 100644 --- a/src/swrenderer/r_swrenderer.h +++ b/src/swrenderer/r_swrenderer.h @@ -1,12 +1,8 @@ -#ifndef __R_SWRENDERER_H -#define __R_SWRENDERER_H + +#pragma once #include "r_renderer.h" -void gl_SetActorLights(AActor *); -void gl_PreprocessLevel(); -void gl_CleanLevelData(); - struct FSoftwareRenderer : public FRenderer { FSoftwareRenderer(); @@ -44,22 +40,8 @@ struct FSoftwareRenderer : public FRenderer void RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) override; sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) override; - void StateChanged(AActor *actor) override - { - gl_SetActorLights(actor); - } - - void PreprocessLevel() override - { - gl_PreprocessLevel(); - } - - void CleanLevelData() override - { - gl_CleanLevelData(); - } + void StateChanged(AActor *actor) override; + void PreprocessLevel() override; + void CleanLevelData() override; }; - - -#endif diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 75946f367c..15dfa265a6 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -188,6 +188,11 @@ namespace swrenderer void RenderScene::ScreenResized() { VisiblePlaneList::Instance()->Init(); + + RenderTarget = screen; + screen->Lock(true); + R_SetupBuffer(); + screen->Unlock(); } void RenderScene::Init() From 3093aaadc9143e41c0abab8c4dd6f378c3dbca60 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 15 Jan 2017 01:54:25 +0100 Subject: [PATCH 712/912] Remove SetupFrame and CopyStackedViewParameters from FRenderer interface --- src/polyrenderer/poly_renderer.cpp | 5 ++++- src/r_renderer.h | 2 -- src/r_utility.cpp | 3 --- src/swrenderer/r_swrenderer.cpp | 11 ----------- src/swrenderer/r_swrenderer.h | 2 -- src/swrenderer/scene/r_light.cpp | 6 +++++- src/swrenderer/scene/r_light.h | 2 +- src/swrenderer/scene/r_scene.cpp | 4 ++++ 8 files changed, 14 insertions(+), 21 deletions(-) diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index 22d996e9fb..67f997c614 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -32,6 +32,7 @@ #include "po_man.h" #include "swrenderer/scene/r_scene.h" #include "swrenderer/scene/r_viewport.h" +#include "swrenderer/scene/r_light.h" EXTERN_CVAR(Int, screenblocks) void InitGLRMapinfoData(); @@ -81,7 +82,9 @@ void PolyRenderer::RenderActorView(AActor *actor, bool dontmaplines) P_FindParticleSubsectors(); PO_LinkToSubsectors(); R_SetupFrame(actor); - + swrenderer::R_SetupColormap(actor); + swrenderer::R_SetupFreelook(); + ActorRenderFlags savedflags = camera->renderflags; // Never draw the player unless in chasecam mode if (!r_showviewer) diff --git a/src/r_renderer.h b/src/r_renderer.h index 0b14058954..92ac3ef0cd 100644 --- a/src/r_renderer.h +++ b/src/r_renderer.h @@ -57,8 +57,6 @@ struct FRenderer virtual void ClearBuffer(int color) = 0; virtual void Init() = 0; virtual void SetWindow (int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio) {} - virtual void SetupFrame(player_t *player) {} - virtual void CopyStackedViewParameters() {} virtual void RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) = 0; virtual sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) = 0; virtual void SetFogParams(int _fogdensity, PalEntry _outsidefogcolor, int _outsidefogdensity, int _skyfog) {} diff --git a/src/r_utility.cpp b/src/r_utility.cpp index c28422dffa..03ba0756f7 100644 --- a/src/r_utility.cpp +++ b/src/r_utility.cpp @@ -910,9 +910,6 @@ void R_SetupFrame (AActor *actor) } } - Renderer->CopyStackedViewParameters(); - Renderer->SetupFrame(player); - validcount++; if (RenderTarget == screen && r_clearbuffer != 0) diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index 75348772f7..fff9a36f70 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -302,17 +302,6 @@ void FSoftwareRenderer::SetWindow (int windowSize, int fullWidth, int fullHeight R_SWRSetWindow(windowSize, fullWidth, fullHeight, stHeight, trueratio); } -void FSoftwareRenderer::SetupFrame(player_t *player) -{ - R_SetupColormap(player); - R_SetupFreelook(); -} - -void FSoftwareRenderer::CopyStackedViewParameters() -{ - RenderPortal::Instance()->CopyStackedViewParameters(); -} - void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { BYTE *Pixels = r_swtruecolor ? (BYTE*)tex->GetPixelsBgra() : (BYTE*)tex->GetPixels(); diff --git a/src/swrenderer/r_swrenderer.h b/src/swrenderer/r_swrenderer.h index 87496882ff..51d2b1d163 100644 --- a/src/swrenderer/r_swrenderer.h +++ b/src/swrenderer/r_swrenderer.h @@ -35,8 +35,6 @@ struct FSoftwareRenderer : public FRenderer void ClearBuffer(int color) override; void Init() override; void SetWindow (int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio) override; - void SetupFrame(player_t *player) override; - void CopyStackedViewParameters() override; void RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) override; sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) override; diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index 4059a73bf0..a1bcc75ded 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -109,8 +109,12 @@ namespace swrenderer return CurrentVisibility; } - void R_SetupColormap(player_t *player) + void R_SetupColormap(AActor *actor) { + player_t *player = actor->player; + if (camera && camera->player != 0) + player = camera->player; + realfixedcolormap = NULL; fixedcolormap = NULL; fixedlightlev = -1; diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index 50b4fae842..26469b37c8 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -66,5 +66,5 @@ namespace swrenderer void R_SetVisibility(double visibility); double R_GetVisibility(); - void R_SetupColormap(player_t *); + void R_SetupColormap(AActor *actor); } diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 15dfa265a6..54271d867f 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -71,6 +71,10 @@ namespace swrenderer R_SetupBuffer(); R_SetupFrame(actor); + R_SetupColormap(actor); + R_SetupFreelook(); + + RenderPortal::Instance()->CopyStackedViewParameters(); // Clear buffers. R_ClearClipSegs(0, viewwidth); From 1e7015643d1917d03c07525659264c26dd2a0f99 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 15 Jan 2017 02:04:49 +0100 Subject: [PATCH 713/912] Remove ErrorCleanup from FRenderer interface --- src/d_main.cpp | 1 - src/r_renderer.h | 1 - src/swrenderer/r_swrenderer.cpp | 5 ----- src/swrenderer/r_swrenderer.h | 1 - src/swrenderer/scene/r_scene.cpp | 2 +- 5 files changed, 1 insertion(+), 9 deletions(-) diff --git a/src/d_main.cpp b/src/d_main.cpp index 8879232604..2241c35c5e 100644 --- a/src/d_main.cpp +++ b/src/d_main.cpp @@ -961,7 +961,6 @@ void D_ErrorCleanup () menuactive = MENU_Off; } insave = false; - Renderer->ErrorCleanup(); } //========================================================================== diff --git a/src/r_renderer.h b/src/r_renderer.h index 92ac3ef0cd..d8f54ec5ba 100644 --- a/src/r_renderer.h +++ b/src/r_renderer.h @@ -53,7 +53,6 @@ struct FRenderer virtual int GetMaxViewPitch(bool down) = 0; // return value is in plain degrees virtual void OnModeSet () {} - virtual void ErrorCleanup () {} virtual void ClearBuffer(int color) = 0; virtual void Init() = 0; virtual void SetWindow (int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio) {} diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index fff9a36f70..907b770511 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -276,11 +276,6 @@ void FSoftwareRenderer::OnModeSet () RenderScene::Instance()->ScreenResized(); } -void FSoftwareRenderer::ErrorCleanup () -{ - Clip3DFloors::Instance()->Cleanup(); -} - void FSoftwareRenderer::ClearBuffer(int color) { if (!r_swtruecolor) diff --git a/src/swrenderer/r_swrenderer.h b/src/swrenderer/r_swrenderer.h index 51d2b1d163..d116a1a953 100644 --- a/src/swrenderer/r_swrenderer.h +++ b/src/swrenderer/r_swrenderer.h @@ -31,7 +31,6 @@ struct FSoftwareRenderer : public FRenderer bool RequireGLNodes() override; void OnModeSet () override; - void ErrorCleanup () override; void ClearBuffer(int color) override; void Init() override; void SetWindow (int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio) override; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 54271d867f..c9bfcb8bc5 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -66,7 +66,7 @@ namespace swrenderer WallScanCycles.Reset(); Clip3DFloors *clip3d = Clip3DFloors::Instance(); - clip3d->fakeActive = false; // kg3D - reset fake floor indicator + clip3d->Cleanup(); clip3d->ResetClip(); // reset clips (floor/ceiling) R_SetupBuffer(); From 4c12ba740f01be98ea59a99390fa934a01045d76 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 15 Jan 2017 02:46:43 +0100 Subject: [PATCH 714/912] Remove SetWindow from FRenderer interface --- src/polyrenderer/poly_renderer.cpp | 34 ++++++++++++++++++++++ src/polyrenderer/poly_renderer.h | 3 +- src/r_renderer.h | 1 - src/r_utility.cpp | 6 +--- src/swrenderer/r_swrenderer.cpp | 45 ++---------------------------- src/swrenderer/r_swrenderer.h | 1 - src/swrenderer/scene/r_scene.cpp | 36 ++++++++++++++++++++++++ src/swrenderer/scene/r_scene.h | 1 + 8 files changed, 77 insertions(+), 50 deletions(-) diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index 67f997c614..ceabfceffc 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -30,10 +30,13 @@ #include "gl/data/gl_data.h" #include "d_net.h" #include "po_man.h" +#include "st_stuff.h" #include "swrenderer/scene/r_scene.h" #include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" +#include "swrenderer/drawers/r_draw_rgba.h" +EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, screenblocks) void InitGLRMapinfoData(); extern bool r_showviewer; @@ -46,6 +49,33 @@ PolyRenderer *PolyRenderer::Instance() return &scene; } +void PolyRenderer::RenderView(player_t *player) +{ + using namespace swrenderer; + + bool saved_swtruecolor = r_swtruecolor; + r_swtruecolor = screen->IsBgra(); + + int width = SCREENWIDTH; + int height = SCREENHEIGHT; + int stHeight = ST_Y; + float trueratio; + ActiveRatio(width, height, &trueratio); + R_SWRSetWindow(setblocks, width, height, stHeight, trueratio); + + RenderActorView(player->mo, false); + + // Apply special colormap if the target cannot do it + if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + { + R_BeginDrawerCommands(); + DrawerCommandQueue::QueueCommand(realfixedcolormap, screen); + R_EndDrawerCommands(); + } + + r_swtruecolor = saved_swtruecolor; +} + void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) { const bool savedviewactive = viewactive; @@ -55,6 +85,7 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int RenderTarget = canvas; swrenderer::bRenderingToCanvas = true; R_SetWindow(12, width, height, height, true); + swrenderer::R_SWRSetWindow(12, width, height, height, WidescreenRatio); viewwindowx = x; viewwindowy = y; viewactive = true; @@ -69,6 +100,9 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int RenderTarget = screen; swrenderer::bRenderingToCanvas = false; R_ExecuteSetViewSize(); + float trueratio; + ActiveRatio(width, height, &trueratio); + swrenderer::R_SWRSetWindow(setblocks, width, height, height, WidescreenRatio); viewactive = savedviewactive; swrenderer::r_swtruecolor = savedoutputformat; } diff --git a/src/polyrenderer/poly_renderer.h b/src/polyrenderer/poly_renderer.h index 368872d97e..75e192e9c0 100644 --- a/src/polyrenderer/poly_renderer.h +++ b/src/polyrenderer/poly_renderer.h @@ -38,8 +38,8 @@ class DCanvas; class PolyRenderer { public: + void RenderView(player_t *player); void RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines); - void RenderActorView(AActor *actor, bool dontmaplines); void RenderRemainingPlayerSprites(); static PolyRenderer *Instance(); @@ -52,6 +52,7 @@ public: bool DontMapLines = false; private: + void RenderActorView(AActor *actor, bool dontmaplines); void ClearBuffers(); void SetSceneViewport(); void SetupPerspectiveMatrix(); diff --git a/src/r_renderer.h b/src/r_renderer.h index d8f54ec5ba..54e6d9dacd 100644 --- a/src/r_renderer.h +++ b/src/r_renderer.h @@ -55,7 +55,6 @@ struct FRenderer virtual void OnModeSet () {} virtual void ClearBuffer(int color) = 0; virtual void Init() = 0; - virtual void SetWindow (int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio) {} virtual void RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) = 0; virtual sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) = 0; virtual void SetFogParams(int _fogdensity, PalEntry _outsidefogcolor, int _outsidefogdensity, int _skyfog) {} diff --git a/src/r_utility.cpp b/src/r_utility.cpp index 03ba0756f7..29119754b5 100644 --- a/src/r_utility.cpp +++ b/src/r_utility.cpp @@ -201,8 +201,6 @@ void R_SetViewSize (int blocks) void R_SetWindow (int windowSize, int fullWidth, int fullHeight, int stHeight, bool renderingToCanvas) { - float trueratio; - if (windowSize >= 11) { viewwidth = fullWidth; @@ -224,11 +222,10 @@ void R_SetWindow (int windowSize, int fullWidth, int fullHeight, int stHeight, b if (renderingToCanvas) { WidescreenRatio = fullWidth / (float)fullHeight; - trueratio = WidescreenRatio; } else { - WidescreenRatio = ActiveRatio(fullWidth, fullHeight, &trueratio); + WidescreenRatio = ActiveRatio(fullWidth, fullHeight); } DrawFSHUD = (windowSize == 11); @@ -258,7 +255,6 @@ void R_SetWindow (int windowSize, int fullWidth, int fullHeight, int stHeight, b if (fov > 170.) fov = 170.; } FocalTangent = tan(fov.Radians() / 2); - Renderer->SetWindow(windowSize, fullWidth, fullHeight, stHeight, trueratio); } //========================================================================== diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index 907b770511..85131c9757 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -178,45 +178,11 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap &act void FSoftwareRenderer::RenderView(player_t *player) { if (r_polyrenderer) - { - bool saved_swtruecolor = r_swtruecolor; - r_swtruecolor = screen->IsBgra(); - - PolyRenderer::Instance()->RenderActorView(player->mo, false); - - // Apply special colormap if the target cannot do it - if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) - { - R_BeginDrawerCommands(); - DrawerCommandQueue::QueueCommand(realfixedcolormap, screen); - R_EndDrawerCommands(); - } - - r_swtruecolor = saved_swtruecolor; - - FCanvasTextureInfo::UpdateAll(); - } + PolyRenderer::Instance()->RenderView(player); else - { - if (r_swtruecolor != screen->IsBgra()) - { - r_swtruecolor = screen->IsBgra(); - R_InitColumnDrawers(); - } + RenderScene::Instance()->RenderView(player); - R_BeginDrawerCommands(); - RenderScene::Instance()->RenderActorView(player->mo); - - // Apply special colormap if the target cannot do it - if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) - { - DrawerCommandQueue::QueueCommand(realfixedcolormap, screen); - } - - R_EndDrawerCommands(); - - FCanvasTextureInfo::UpdateAll(); - } + FCanvasTextureInfo::UpdateAll(); } void FSoftwareRenderer::RemapVoxels() @@ -292,11 +258,6 @@ void FSoftwareRenderer::ClearBuffer(int color) } } -void FSoftwareRenderer::SetWindow (int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio) -{ - R_SWRSetWindow(windowSize, fullWidth, fullHeight, stHeight, trueratio); -} - void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { BYTE *Pixels = r_swtruecolor ? (BYTE*)tex->GetPixelsBgra() : (BYTE*)tex->GetPixels(); diff --git a/src/swrenderer/r_swrenderer.h b/src/swrenderer/r_swrenderer.h index d116a1a953..5b2a6eedca 100644 --- a/src/swrenderer/r_swrenderer.h +++ b/src/swrenderer/r_swrenderer.h @@ -33,7 +33,6 @@ struct FSoftwareRenderer : public FRenderer void OnModeSet () override; void ClearBuffer(int color) override; void Init() override; - void SetWindow (int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio) override; void RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) override; sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) override; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index c9bfcb8bc5..77b0edf142 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -30,6 +30,7 @@ #include "g_level.h" #include "p_effect.h" #include "po_man.h" +#include "st_stuff.h" #include "r_data/r_interpolate.h" #include "swrenderer/scene/r_scene.h" #include "swrenderer/scene/r_viewport.h" @@ -43,6 +44,7 @@ #include "swrenderer/segments/r_portalsegment.h" #include "swrenderer/plane/r_visibleplanelist.h" #include "swrenderer/drawers/r_draw.h" +#include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_thread.h" #include "swrenderer/r_memory.h" @@ -58,6 +60,34 @@ namespace swrenderer return &instance; } + void RenderScene::RenderView(player_t *player) + { + int width = SCREENWIDTH; + int height = SCREENHEIGHT; + int stHeight = ST_Y; + float trueratio; + ActiveRatio(width, height, &trueratio); + R_SWRSetWindow(setblocks, width, height, stHeight, trueratio); + + if (r_swtruecolor != screen->IsBgra()) + { + r_swtruecolor = screen->IsBgra(); + R_InitColumnDrawers(); + } + + R_BeginDrawerCommands(); + + RenderActorView(player->mo); + + // Apply special colormap if the target cannot do it + if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + { + DrawerCommandQueue::QueueCommand(realfixedcolormap, screen); + } + + R_EndDrawerCommands(); + } + void RenderScene::RenderActorView(AActor *actor, bool dontmaplines) { WallCycles.Reset(); @@ -165,6 +195,8 @@ namespace swrenderer bRenderingToCanvas = true; R_SetWindow(12, width, height, height, true); + R_SWRSetWindow(12, width, height, height, WidescreenRatio); + viewwindowx = x; viewwindowy = y; viewactive = true; @@ -175,7 +207,11 @@ namespace swrenderer RenderTarget = screen; bRenderingToCanvas = false; + R_ExecuteSetViewSize(); + float trueratio; + ActiveRatio(width, height, &trueratio); + R_SWRSetWindow(setblocks, width, height, height, WidescreenRatio); screen->Lock(true); R_SetupBuffer(); screen->Unlock(); diff --git a/src/swrenderer/scene/r_scene.h b/src/swrenderer/scene/r_scene.h index 1ad345890a..8432838134 100644 --- a/src/swrenderer/scene/r_scene.h +++ b/src/swrenderer/scene/r_scene.h @@ -33,6 +33,7 @@ namespace swrenderer void Deinit(); void RenderActorView(AActor *actor, bool dontmaplines = false); + void RenderView(player_t *player); void RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines = false); bool DontMapLines() const { return dontmaplines; } From aaee6e333f291719eb0022e91605f5951ba7659d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 15 Jan 2017 02:50:37 +0100 Subject: [PATCH 715/912] Mark PrecacheTexture private and remove unneeded declarations --- src/swrenderer/r_swrenderer.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/swrenderer/r_swrenderer.h b/src/swrenderer/r_swrenderer.h index 5b2a6eedca..aedff62146 100644 --- a/src/swrenderer/r_swrenderer.h +++ b/src/swrenderer/r_swrenderer.h @@ -9,28 +9,27 @@ struct FSoftwareRenderer : public FRenderer ~FSoftwareRenderer(); // Can be overridden so that the colormaps for sector color/fade won't be built. - virtual bool UsesColormap() const override; + bool UsesColormap() const override; - // precache one texture - void PrecacheTexture(FTexture *tex, int cache); - virtual void Precache(BYTE *texhitlist, TMap &actorhitlist) override; + // precache textures + void Precache(BYTE *texhitlist, TMap &actorhitlist) override; // render 3D view - virtual void RenderView(player_t *player) override; + void RenderView(player_t *player) override; // Remap voxel palette - virtual void RemapVoxels() override; + void RemapVoxels() override; // renders view to a savegame picture - virtual void WriteSavePic (player_t *player, FileWriter *file, int width, int height) override; + void WriteSavePic (player_t *player, FileWriter *file, int width, int height) override; // draws player sprites with hardware acceleration (only useful for software rendering) - virtual void DrawRemainingPlayerSprites() override; + void DrawRemainingPlayerSprites() override; - virtual int GetMaxViewPitch(bool down) override; + int GetMaxViewPitch(bool down) override; bool RequireGLNodes() override; - void OnModeSet () override; + void OnModeSet() override; void ClearBuffer(int color) override; void Init() override; void RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) override; @@ -40,4 +39,6 @@ struct FSoftwareRenderer : public FRenderer void PreprocessLevel() override; void CleanLevelData() override; +private: + void PrecacheTexture(FTexture *tex, int cache); }; From f9175561bb042ea65d211f99567dbc93384b031d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 15 Jan 2017 03:19:03 +0100 Subject: [PATCH 716/912] Change RenderTarget global to be an internal swrenderer variable --- src/d_main.cpp | 2 -- src/polyrenderer/poly_renderer.cpp | 12 ++++++---- src/polyrenderer/scene/poly_playersprite.cpp | 6 ++--- src/r_utility.cpp | 4 +--- src/r_utility.h | 2 -- src/swrenderer/r_swrenderer.cpp | 13 +---------- src/swrenderer/scene/r_scene.cpp | 24 ++++++++++++++++++++ src/swrenderer/scene/r_scene.h | 3 +++ src/swrenderer/scene/r_viewport.cpp | 1 + src/swrenderer/scene/r_viewport.h | 1 + 10 files changed, 42 insertions(+), 26 deletions(-) diff --git a/src/d_main.cpp b/src/d_main.cpp index 2241c35c5e..dc2248d193 100644 --- a/src/d_main.cpp +++ b/src/d_main.cpp @@ -694,8 +694,6 @@ void D_Display () } } - RenderTarget = screen; - // change the view size if needed if (setsizeneeded && StatusBar != NULL) { diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index ceabfceffc..6e2e54be3d 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -53,6 +53,8 @@ void PolyRenderer::RenderView(player_t *player) { using namespace swrenderer; + swrenderer::RenderTarget = screen; + bool saved_swtruecolor = r_swtruecolor; r_swtruecolor = screen->IsBgra(); @@ -82,7 +84,7 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int const bool savedoutputformat = swrenderer::r_swtruecolor; viewwidth = width; - RenderTarget = canvas; + swrenderer::RenderTarget = canvas; swrenderer::bRenderingToCanvas = true; R_SetWindow(12, width, height, height, true); swrenderer::R_SWRSetWindow(12, width, height, height, WidescreenRatio); @@ -97,7 +99,7 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int canvas->Unlock(); - RenderTarget = screen; + swrenderer::RenderTarget = screen; swrenderer::bRenderingToCanvas = false; R_ExecuteSetViewSize(); float trueratio; @@ -153,8 +155,8 @@ void PolyRenderer::RenderRemainingPlayerSprites() void PolyRenderer::ClearBuffers() { PolyVertexBuffer::Clear(); - PolyStencilBuffer::Instance()->Clear(RenderTarget->GetWidth(), RenderTarget->GetHeight(), 0); - PolySubsectorGBuffer::Instance()->Resize(RenderTarget->GetPitch(), RenderTarget->GetHeight()); + PolyStencilBuffer::Instance()->Clear(swrenderer::RenderTarget->GetWidth(), swrenderer::RenderTarget->GetHeight(), 0); + PolySubsectorGBuffer::Instance()->Resize(swrenderer::RenderTarget->GetPitch(), swrenderer::RenderTarget->GetHeight()); NextStencilValue = 0; SeenLinePortals.clear(); SeenMirrors.clear(); @@ -162,6 +164,8 @@ void PolyRenderer::ClearBuffers() void PolyRenderer::SetSceneViewport() { + using namespace swrenderer; + if (RenderTarget == screen) // Rendering to screen { int height; diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index a3b29fa300..e9665c8576 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -165,12 +165,12 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa double texturemid = (BaseYCenter - sy) * tex->Scale.Y + tex->TopOffset; // Adjust PSprite for fullscreen views - if (camera->player && (RenderTarget != screen || viewheight == RenderTarget->GetHeight() || (RenderTarget->GetWidth() > (BaseXCenter * 2) && !st_scale))) + if (camera->player && (swrenderer::RenderTarget != screen || viewheight == swrenderer::RenderTarget->GetHeight() || (swrenderer::RenderTarget->GetWidth() > (BaseXCenter * 2) && !st_scale))) { AWeapon *weapon = dyn_cast(sprite->GetCaller()); if (weapon != nullptr && weapon->YAdjust != 0) { - if (RenderTarget != screen || viewheight == RenderTarget->GetHeight()) + if (swrenderer::RenderTarget != screen || viewheight == swrenderer::RenderTarget->GetHeight()) { texturemid -= weapon->YAdjust; } @@ -338,7 +338,7 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa // Check for hardware-assisted 2D. If it's available, and this sprite is not // fuzzy, don't draw it until after the switch to 2D mode. - if (!noaccel && RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) + if (!noaccel && swrenderer::RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) { FRenderStyle style = visstyle.RenderStyle; style.CheckFuzz(); diff --git a/src/r_utility.cpp b/src/r_utility.cpp index 29119754b5..34ad10a6e5 100644 --- a/src/r_utility.cpp +++ b/src/r_utility.cpp @@ -100,8 +100,6 @@ CUSTOM_CVAR(Float, r_quakeintensity, 1.0f, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) else if (self > 1.f) self = 1.f; } -DCanvas *RenderTarget; // [RH] canvas to render to - int viewwindowx; int viewwindowy; @@ -908,7 +906,7 @@ void R_SetupFrame (AActor *actor) validcount++; - if (RenderTarget == screen && r_clearbuffer != 0) + if (r_clearbuffer != 0) { int color; int hom = r_clearbuffer; diff --git a/src/r_utility.h b/src/r_utility.h index 488d456720..ac31a6f5b5 100644 --- a/src/r_utility.h +++ b/src/r_utility.h @@ -12,8 +12,6 @@ class FSerializer; // There a 0-31, i.e. 32 LUT in the COLORMAP lump. #define NUMCOLORMAPS 32 -extern DCanvas *RenderTarget; - extern DVector3 ViewPos; extern DVector3 ViewActorPos; extern DAngle ViewAngle; diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index 85131c9757..e64b7f0b75 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -244,18 +244,7 @@ void FSoftwareRenderer::OnModeSet () void FSoftwareRenderer::ClearBuffer(int color) { - if (!r_swtruecolor) - { - memset(RenderTarget->GetBuffer(), color, RenderTarget->GetPitch() * RenderTarget->GetHeight()); - } - else - { - uint32_t bgracolor = GPalette.BaseColors[color].d; - int size = RenderTarget->GetPitch() * RenderTarget->GetHeight(); - uint32_t *dest = (uint32_t *)RenderTarget->GetBuffer(); - for (int i = 0; i < size; i++) - dest[i] = bgracolor; - } + RenderScene::Instance()->SetClearColor(color); } void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 77b0edf142..2dee1eb274 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -49,6 +49,7 @@ #include "swrenderer/r_memory.h" EXTERN_CVAR(Bool, r_shadercolormaps) +EXTERN_CVAR(Int, r_clearbuffer) namespace swrenderer { @@ -60,8 +61,15 @@ namespace swrenderer return &instance; } + void RenderScene::SetClearColor(int color) + { + clearcolor = color; + } + void RenderScene::RenderView(player_t *player) { + RenderTarget = screen; + int width = SCREENWIDTH; int height = SCREENHEIGHT; int stHeight = ST_Y; @@ -75,6 +83,22 @@ namespace swrenderer R_InitColumnDrawers(); } + if (r_clearbuffer != 0) + { + if (!r_swtruecolor) + { + memset(RenderTarget->GetBuffer(), clearcolor, RenderTarget->GetPitch() * RenderTarget->GetHeight()); + } + else + { + uint32_t bgracolor = GPalette.BaseColors[clearcolor].d; + int size = RenderTarget->GetPitch() * RenderTarget->GetHeight(); + uint32_t *dest = (uint32_t *)RenderTarget->GetBuffer(); + for (int i = 0; i < size; i++) + dest[i] = bgracolor; + } + } + R_BeginDrawerCommands(); RenderActorView(player->mo); diff --git a/src/swrenderer/scene/r_scene.h b/src/swrenderer/scene/r_scene.h index 8432838134..31e074c08c 100644 --- a/src/swrenderer/scene/r_scene.h +++ b/src/swrenderer/scene/r_scene.h @@ -31,6 +31,8 @@ namespace swrenderer void Init(); void ScreenResized(); void Deinit(); + + void SetClearColor(int color); void RenderActorView(AActor *actor, bool dontmaplines = false); void RenderView(player_t *player); @@ -40,5 +42,6 @@ namespace swrenderer private: bool dontmaplines = false; + int clearcolor = 0; }; } diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index 8d79237d25..b45a7ccd61 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -44,6 +44,7 @@ namespace swrenderer double FocalLengthX; double FocalLengthY; + DCanvas *RenderTarget; bool bRenderingToCanvas; double globaluclip, globaldclip; double CenterX, CenterY; diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/scene/r_viewport.h index 6638a85922..3cf3ad0ef5 100644 --- a/src/swrenderer/scene/r_viewport.h +++ b/src/swrenderer/scene/r_viewport.h @@ -18,6 +18,7 @@ namespace swrenderer { + extern DCanvas *RenderTarget; extern bool bRenderingToCanvas; extern fixed_t viewingrangerecip; extern double FocalLengthX; From 42535b17f1f66604e645fbe82f1e5533d7d4eebc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 15 Jan 2017 03:21:35 +0100 Subject: [PATCH 717/912] Rename FRenderer::ClearBuffer to SetClearColor --- src/gl/scene/gl_scene.cpp | 4 ++-- src/r_renderer.h | 2 +- src/r_utility.cpp | 2 +- src/swrenderer/r_swrenderer.cpp | 2 +- src/swrenderer/r_swrenderer.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/gl/scene/gl_scene.cpp b/src/gl/scene/gl_scene.cpp index 7f1432e142..9a9a994bdd 100644 --- a/src/gl/scene/gl_scene.cpp +++ b/src/gl/scene/gl_scene.cpp @@ -1013,7 +1013,7 @@ struct FGLInterface : public FRenderer bool RequireGLNodes() override; int GetMaxViewPitch(bool down) override; - void ClearBuffer(int color) override; + void SetClearColor(int color) override; void Init() override; }; @@ -1263,7 +1263,7 @@ int FGLInterface::GetMaxViewPitch(bool down) // //=========================================================================== -void FGLInterface::ClearBuffer(int color) +void FGLInterface::SetClearColor(int color) { PalEntry pe = GPalette.BaseColors[color]; GLRenderer->mSceneClearColor[0] = pe.r / 255.f; diff --git a/src/r_renderer.h b/src/r_renderer.h index 54e6d9dacd..9f1e94428e 100644 --- a/src/r_renderer.h +++ b/src/r_renderer.h @@ -53,7 +53,7 @@ struct FRenderer virtual int GetMaxViewPitch(bool down) = 0; // return value is in plain degrees virtual void OnModeSet () {} - virtual void ClearBuffer(int color) = 0; + virtual void SetClearColor(int color) = 0; virtual void Init() = 0; virtual void RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) = 0; virtual sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) = 0; diff --git a/src/r_utility.cpp b/src/r_utility.cpp index 34ad10a6e5..025b948043 100644 --- a/src/r_utility.cpp +++ b/src/r_utility.cpp @@ -931,7 +931,7 @@ void R_SetupFrame (AActor *actor) { color = pr_hom(); } - Renderer->ClearBuffer(color); + Renderer->SetClearColor(color); } } diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index e64b7f0b75..db0ede2e60 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -242,7 +242,7 @@ void FSoftwareRenderer::OnModeSet () RenderScene::Instance()->ScreenResized(); } -void FSoftwareRenderer::ClearBuffer(int color) +void FSoftwareRenderer::SetClearColor(int color) { RenderScene::Instance()->SetClearColor(color); } diff --git a/src/swrenderer/r_swrenderer.h b/src/swrenderer/r_swrenderer.h index aedff62146..c4d81c1d08 100644 --- a/src/swrenderer/r_swrenderer.h +++ b/src/swrenderer/r_swrenderer.h @@ -30,7 +30,7 @@ struct FSoftwareRenderer : public FRenderer bool RequireGLNodes() override; void OnModeSet() override; - void ClearBuffer(int color) override; + void SetClearColor(int color) override; void Init() override; void RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) override; sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) override; From 601ddb270dbaa8c05708bd7474f09dc4873a7e71 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 15 Jan 2017 04:06:52 +0100 Subject: [PATCH 718/912] Create a class for the viewport --- src/polyrenderer/poly_renderer.cpp | 8 ++++---- src/swrenderer/scene/r_scene.cpp | 19 +++++++++---------- src/swrenderer/scene/r_viewport.cpp | 18 +++++++++++++----- src/swrenderer/scene/r_viewport.h | 16 ++++++++++++---- 4 files changed, 38 insertions(+), 23 deletions(-) diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index 6e2e54be3d..06aa3cb0ed 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -63,7 +63,7 @@ void PolyRenderer::RenderView(player_t *player) int stHeight = ST_Y; float trueratio; ActiveRatio(width, height, &trueratio); - R_SWRSetWindow(setblocks, width, height, stHeight, trueratio); + RenderViewport::Instance()->SetViewport(width, height, trueratio); RenderActorView(player->mo, false); @@ -87,7 +87,7 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int swrenderer::RenderTarget = canvas; swrenderer::bRenderingToCanvas = true; R_SetWindow(12, width, height, height, true); - swrenderer::R_SWRSetWindow(12, width, height, height, WidescreenRatio); + swrenderer::RenderViewport::Instance()->SetViewport(width, height, WidescreenRatio); viewwindowx = x; viewwindowy = y; viewactive = true; @@ -104,7 +104,7 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int R_ExecuteSetViewSize(); float trueratio; ActiveRatio(width, height, &trueratio); - swrenderer::R_SWRSetWindow(setblocks, width, height, height, WidescreenRatio); + swrenderer::RenderViewport::Instance()->SetViewport(width, height, WidescreenRatio); viewactive = savedviewactive; swrenderer::r_swtruecolor = savedoutputformat; } @@ -119,7 +119,7 @@ void PolyRenderer::RenderActorView(AActor *actor, bool dontmaplines) PO_LinkToSubsectors(); R_SetupFrame(actor); swrenderer::R_SetupColormap(actor); - swrenderer::R_SetupFreelook(); + swrenderer::RenderViewport::Instance()->SetupFreelook(); ActorRenderFlags savedflags = camera->renderflags; // Never draw the player unless in chasecam mode diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 2dee1eb274..f2f56b8333 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -72,10 +72,9 @@ namespace swrenderer int width = SCREENWIDTH; int height = SCREENHEIGHT; - int stHeight = ST_Y; float trueratio; ActiveRatio(width, height, &trueratio); - R_SWRSetWindow(setblocks, width, height, stHeight, trueratio); + RenderViewport::Instance()->SetViewport(width, height, trueratio); if (r_swtruecolor != screen->IsBgra()) { @@ -123,10 +122,9 @@ namespace swrenderer clip3d->Cleanup(); clip3d->ResetClip(); // reset clips (floor/ceiling) - R_SetupBuffer(); R_SetupFrame(actor); R_SetupColormap(actor); - R_SetupFreelook(); + RenderViewport::Instance()->SetupFreelook(); RenderPortal::Instance()->CopyStackedViewParameters(); @@ -191,7 +189,6 @@ namespace swrenderer } WallPortals.Clear(); interpolator.RestoreInterpolations(); - R_SetupBuffer(); // If we don't want shadered colormaps, NULL it now so that the // copy to the screen does not use a special colormap shader. @@ -219,11 +216,10 @@ namespace swrenderer bRenderingToCanvas = true; R_SetWindow(12, width, height, height, true); - R_SWRSetWindow(12, width, height, height, WidescreenRatio); - viewwindowx = x; viewwindowy = y; viewactive = true; + RenderViewport::Instance()->SetViewport(width, height, WidescreenRatio); RenderActorView(actor, dontmaplines); @@ -235,9 +231,8 @@ namespace swrenderer R_ExecuteSetViewSize(); float trueratio; ActiveRatio(width, height, &trueratio); - R_SWRSetWindow(setblocks, width, height, height, WidescreenRatio); screen->Lock(true); - R_SetupBuffer(); + RenderViewport::Instance()->SetViewport(width, height, trueratio); screen->Unlock(); viewactive = savedviewactive; @@ -254,8 +249,12 @@ namespace swrenderer VisiblePlaneList::Instance()->Init(); RenderTarget = screen; + int width = SCREENWIDTH; + int height = SCREENHEIGHT; + float trueratio; + ActiveRatio(width, height, &trueratio); screen->Lock(true); - R_SetupBuffer(); + RenderViewport::Instance()->SetViewport(SCREENWIDTH, SCREENHEIGHT, trueratio); screen->Unlock(); } diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index b45a7ccd61..df5b37f086 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -60,7 +60,13 @@ namespace swrenderer // from clipangle to -clipangle. angle_t xtoviewangle[MAXWIDTH + 1]; - void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio) + RenderViewport *RenderViewport::Instance() + { + static RenderViewport instance; + return &instance; + } + + void RenderViewport::SetViewport(int fullWidth, int fullHeight, float trueratio) { int virtheight, virtwidth, virtwidth2, virtheight2; @@ -113,13 +119,15 @@ namespace swrenderer // thing clipping fillshort(screenheightarray, viewwidth, (short)viewheight); - R_InitTextureMapping(); + InitTextureMapping(); // Reset r_*Visibility vars R_SetVisibility(R_GetVisibility()); + + SetupBuffer(); } - void R_SetupFreelook() + void RenderViewport::SetupFreelook() { double dy; @@ -140,7 +148,7 @@ namespace swrenderer RenderFlatPlane::SetupSlope(); } - void R_SetupBuffer() + void RenderViewport::SetupBuffer() { using namespace drawerargs; @@ -168,7 +176,7 @@ namespace swrenderer R_InitParticleTexture(); } - void R_InitTextureMapping() + void RenderViewport::InitTextureMapping() { int i; diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/scene/r_viewport.h index 3cf3ad0ef5..c6445dba2f 100644 --- a/src/swrenderer/scene/r_viewport.h +++ b/src/swrenderer/scene/r_viewport.h @@ -34,8 +34,16 @@ namespace swrenderer extern double globaldclip; extern angle_t xtoviewangle[MAXWIDTH + 1]; - void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); - void R_InitTextureMapping(); - void R_SetupBuffer(); - void R_SetupFreelook(); + class RenderViewport + { + public: + static RenderViewport *Instance(); + + void SetViewport(int width, int height, float trueratio); + void SetupFreelook(); + + private: + void InitTextureMapping(); + void SetupBuffer(); + }; } From e02aece40ad55a16bb9c7024ef736af0fcdf09a1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 15 Jan 2017 21:45:21 +0100 Subject: [PATCH 719/912] Added a generic memory allocator for memory needed for a frame --- src/swrenderer/plane/r_visibleplane.cpp | 5 +-- src/swrenderer/r_memory.cpp | 48 +++++++++++++++++-------- src/swrenderer/r_memory.h | 44 +++++++++++++++++++++-- src/swrenderer/scene/r_scene.cpp | 3 +- 4 files changed, 78 insertions(+), 22 deletions(-) diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index fbb1e37a00..a1928bbb8e 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -65,10 +65,7 @@ namespace swrenderer } if (!found) { - visplane_light *newlight = R_NewPlaneLight(); - if (!newlight) - return; - + visplane_light *newlight = RenderMemory::NewObject(); newlight->next = lights; newlight->lightsource = node->lightsource; lights = newlight; diff --git a/src/swrenderer/r_memory.cpp b/src/swrenderer/r_memory.cpp index 0893f69369..b4faa44cd2 100644 --- a/src/swrenderer/r_memory.cpp +++ b/src/swrenderer/r_memory.cpp @@ -72,23 +72,43 @@ namespace swrenderer } ///////////////////////////////////////////////////////////////////////// - - namespace + + void *RenderMemory::AllocBytes(int size) { - enum { max_plane_lights = 32 * 1024 }; - visplane_light plane_lights[max_plane_lights]; - int next_plane_light = 0; - } + size = (size + 15) / 16 * 16; // 16-byte align + + if (UsedBlocks.empty() || UsedBlocks.back()->Position + size > BlockSize) + { + if (!FreeBlocks.empty()) + { + auto block = std::move(FreeBlocks.back()); + block->Position = 0; + FreeBlocks.pop_back(); + UsedBlocks.push_back(std::move(block)); + } + else + { + UsedBlocks.push_back(std::make_unique()); + } + } + + auto &block = UsedBlocks.back(); + void *data = block->Data + block->Position; + block->Position += size; - visplane_light *R_NewPlaneLight() - { - if (next_plane_light == max_plane_lights) - return nullptr; - return &plane_lights[next_plane_light++]; + return data; } - - void R_FreePlaneLights() + + void RenderMemory::Clear() { - next_plane_light = 0; + while (!UsedBlocks.empty()) + { + auto block = std::move(UsedBlocks.back()); + UsedBlocks.pop_back(); + FreeBlocks.push_back(std::move(block)); + } } + + std::vector> RenderMemory::UsedBlocks; + std::vector> RenderMemory::FreeBlocks; } diff --git a/src/swrenderer/r_memory.h b/src/swrenderer/r_memory.h index 7914cdfcfe..0bf86f2cda 100644 --- a/src/swrenderer/r_memory.h +++ b/src/swrenderer/r_memory.h @@ -13,6 +13,8 @@ #pragma once +#include + namespace swrenderer { struct visplane_light; @@ -22,7 +24,43 @@ namespace swrenderer ptrdiff_t R_NewOpening(ptrdiff_t len); void R_FreeOpenings(); void R_DeinitOpenings(); - - visplane_light *R_NewPlaneLight(); - void R_FreePlaneLights(); + + // Memory needed for the duration of a frame rendering + class RenderMemory + { + public: + static void Clear(); + + template + static T *AllocMemory(int size = 1) + { + return (T*)AllocBytes(sizeof(T) * size); + } + + template + static T *NewObject(Types &&... args) + { + void *ptr = AllocBytes(sizeof(T)); + return new (ptr)T(std::forward(args)...); + } + + private: + static void *AllocBytes(int size); + + enum { BlockSize = 1024 * 1024 }; + + struct MemoryBlock + { + MemoryBlock() : Data(new uint8_t[BlockSize]), Position(0) { } + ~MemoryBlock() { delete[] Data; } + + MemoryBlock(const MemoryBlock &) = delete; + MemoryBlock &operator=(const MemoryBlock &) = delete; + + uint8_t *Data; + uint32_t Position; + }; + static std::vector> UsedBlocks; + static std::vector> FreeBlocks; + }; } diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index f2f56b8333..71740aefca 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -117,6 +117,8 @@ namespace swrenderer PlaneCycles.Reset(); MaskedCycles.Reset(); WallScanCycles.Reset(); + + RenderMemory::Clear(); Clip3DFloors *clip3d = Clip3DFloors::Instance(); clip3d->Cleanup(); @@ -132,7 +134,6 @@ namespace swrenderer R_ClearClipSegs(0, viewwidth); R_ClearDrawSegs(); VisiblePlaneList::Instance()->Clear(true); - R_FreePlaneLights(); RenderTranslucentPass::Clear(); // opening / clipping determination From 74e1cea9c3930cadb58f4ac702fdf387f711e847 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 15 Jan 2017 22:21:21 +0100 Subject: [PATCH 720/912] Removed openings array --- src/swrenderer/line/r_line.cpp | 55 +++++++++++---------- src/swrenderer/r_memory.cpp | 40 --------------- src/swrenderer/r_memory.h | 8 --- src/swrenderer/scene/r_portal.cpp | 11 +++-- src/swrenderer/scene/r_scene.cpp | 2 - src/swrenderer/scene/r_translucent_pass.cpp | 10 ++-- src/swrenderer/segments/r_drawsegment.cpp | 22 ++++----- src/swrenderer/segments/r_drawsegment.h | 10 ++-- src/swrenderer/things/r_decal.cpp | 8 +-- 9 files changed, 60 insertions(+), 106 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index da506602c0..d7ad75b65e 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -373,7 +373,11 @@ namespace swrenderer if (clip3d->fake3D & FAKE3D_FAKEMASK) draw_segment->fake = 1; else draw_segment->fake = 0; - draw_segment->sprtopclip = draw_segment->sprbottomclip = draw_segment->maskedtexturecol = draw_segment->bkup = draw_segment->swall = -1; + draw_segment->sprtopclip = nullptr; + draw_segment->sprbottomclip = nullptr; + draw_segment->maskedtexturecol = nullptr; + draw_segment->bkup = nullptr; + draw_segment->swall = nullptr; if (rw_markportal) { @@ -381,10 +385,10 @@ namespace swrenderer } else if (backsector == NULL) { - draw_segment->sprtopclip = R_NewOpening(stop - start); - draw_segment->sprbottomclip = R_NewOpening(stop - start); - fillshort(openings + draw_segment->sprtopclip, stop - start, viewheight); - memset(openings + draw_segment->sprbottomclip, -1, (stop - start) * sizeof(short)); + draw_segment->sprtopclip = RenderMemory::AllocMemory(stop - start); + draw_segment->sprbottomclip = RenderMemory::AllocMemory(stop - start); + fillshort(draw_segment->sprtopclip, stop - start, viewheight); + memset(draw_segment->sprbottomclip, -1, (stop - start) * sizeof(short)); draw_segment->silhouette = SIL_BOTH; } else @@ -415,14 +419,14 @@ namespace swrenderer { if (doorclosed || (rw_backcz1 <= rw_frontfz1 && rw_backcz2 <= rw_frontfz2)) { - draw_segment->sprbottomclip = R_NewOpening(stop - start); - memset(openings + draw_segment->sprbottomclip, -1, (stop - start) * sizeof(short)); + draw_segment->sprbottomclip = RenderMemory::AllocMemory(stop - start); + memset(draw_segment->sprbottomclip, -1, (stop - start) * sizeof(short)); draw_segment->silhouette |= SIL_BOTTOM; } if (doorclosed || (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) { // killough 1/17/98, 2/8/98 - draw_segment->sprtopclip = R_NewOpening(stop - start); - fillshort(openings + draw_segment->sprtopclip, stop - start, viewheight); + draw_segment->sprtopclip = RenderMemory::AllocMemory(stop - start); + fillshort(draw_segment->sprtopclip, stop - start, viewheight); draw_segment->silhouette |= SIL_TOP; } } @@ -461,8 +465,8 @@ namespace swrenderer maskedtexture = true; // kg3D - backup for mid and fake walls - draw_segment->bkup = R_NewOpening(stop - start); - memcpy(openings + draw_segment->bkup, &RenderOpaquePass::Instance()->ceilingclip[start], sizeof(short)*(stop - start)); + draw_segment->bkup = RenderMemory::AllocMemory(stop - start); + memcpy(draw_segment->bkup, &RenderOpaquePass::Instance()->ceilingclip[start], sizeof(short)*(stop - start)); draw_segment->bFogBoundary = IsFogBoundary(frontsector, backsector); if (sidedef->GetTexture(side_t::mid).isValid() || draw_segment->bFakeBoundary) @@ -470,12 +474,11 @@ namespace swrenderer if (sidedef->GetTexture(side_t::mid).isValid()) draw_segment->bFakeBoundary |= 4; // it is also mid texture - // note: This should never have used the openings array to store its data! - draw_segment->maskedtexturecol = R_NewOpening((stop - start) * 2); - draw_segment->swall = R_NewOpening((stop - start) * 2); + draw_segment->maskedtexturecol = RenderMemory::AllocMemory(stop - start); + draw_segment->swall = RenderMemory::AllocMemory(stop - start); - lwal = (fixed_t *)(openings + draw_segment->maskedtexturecol); - swal = (float *)(openings + draw_segment->swall); + lwal = draw_segment->maskedtexturecol; + swal = draw_segment->swall; FTexture *pic = TexMan(sidedef->GetTexture(side_t::mid), true); double yscale = pic->Scale.Y * sidedef->GetTextureYScale(side_t::mid); fixed_t xoffset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); @@ -491,7 +494,7 @@ namespace swrenderer *swal++ = swall[i]; } - double istart = *((float *)(openings + draw_segment->swall)) * yscale; + double istart = draw_segment->swall[0] * yscale; double iend = *(swal - 1) * yscale; #if 0 ///This was for avoiding overflow when using fixed point. It might not be needed anymore. @@ -530,7 +533,7 @@ namespace swrenderer draw_segment->shade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, curline->frontsector->lightlevel) + R_ActualExtraLight(foggy)); } - if (draw_segment->bFogBoundary || draw_segment->maskedtexturecol != -1) + if (draw_segment->bFogBoundary || draw_segment->maskedtexturecol != nullptr) { size_t drawsegnum = draw_segment - drawsegs; InterestingDrawsegs.Push(drawsegnum); @@ -571,16 +574,16 @@ namespace swrenderer } // save sprite clipping info - if (((draw_segment->silhouette & SIL_TOP) || maskedtexture) && draw_segment->sprtopclip == -1) + if (((draw_segment->silhouette & SIL_TOP) || maskedtexture) && draw_segment->sprtopclip == nullptr) { - draw_segment->sprtopclip = R_NewOpening(stop - start); - memcpy(openings + draw_segment->sprtopclip, &RenderOpaquePass::Instance()->ceilingclip[start], sizeof(short)*(stop - start)); + draw_segment->sprtopclip = RenderMemory::AllocMemory(stop - start); + memcpy(draw_segment->sprtopclip, &RenderOpaquePass::Instance()->ceilingclip[start], sizeof(short)*(stop - start)); } - if (((draw_segment->silhouette & SIL_BOTTOM) || maskedtexture) && draw_segment->sprbottomclip == -1) + if (((draw_segment->silhouette & SIL_BOTTOM) || maskedtexture) && draw_segment->sprbottomclip == nullptr) { - draw_segment->sprbottomclip = R_NewOpening(stop - start); - memcpy(openings + draw_segment->sprbottomclip, &RenderOpaquePass::Instance()->floorclip[start], sizeof(short)*(stop - start)); + draw_segment->sprbottomclip = RenderMemory::AllocMemory(stop - start); + memcpy(draw_segment->sprbottomclip, &RenderOpaquePass::Instance()->floorclip[start], sizeof(short)*(stop - start)); } if (maskedtexture && curline->sidedef->GetTexture(side_t::mid).isValid()) @@ -604,9 +607,9 @@ namespace swrenderer pds.x2 = draw_segment->x2; pds.len = pds.x2 - pds.x1; pds.ceilingclip.Resize(pds.len); - memcpy(&pds.ceilingclip[0], openings + draw_segment->sprtopclip, pds.len * sizeof(*openings)); + memcpy(&pds.ceilingclip[0], draw_segment->sprtopclip, pds.len * sizeof(short)); pds.floorclip.Resize(pds.len); - memcpy(&pds.floorclip[0], openings + draw_segment->sprbottomclip, pds.len * sizeof(*openings)); + memcpy(&pds.floorclip[0], draw_segment->sprbottomclip, pds.len * sizeof(short)); for (int i = 0; i < pds.x2 - pds.x1; i++) { diff --git a/src/swrenderer/r_memory.cpp b/src/swrenderer/r_memory.cpp index b4faa44cd2..5b102c09bd 100644 --- a/src/swrenderer/r_memory.cpp +++ b/src/swrenderer/r_memory.cpp @@ -33,46 +33,6 @@ namespace swrenderer { - short *openings; - - namespace - { - size_t maxopenings; - ptrdiff_t lastopening; - } - - ptrdiff_t R_NewOpening(ptrdiff_t len) - { - ptrdiff_t res = lastopening; - len = (len + 1) & ~1; // only return DWORD aligned addresses because some code stores fixed_t's and floats in openings... - lastopening += len; - if ((size_t)lastopening > maxopenings) - { - do - maxopenings = maxopenings ? maxopenings * 2 : 16384; - while ((size_t)lastopening > maxopenings); - openings = (short *)M_Realloc(openings, maxopenings * sizeof(*openings)); - DPrintf(DMSG_NOTIFY, "MaxOpenings increased to %zu\n", maxopenings); - } - return res; - } - - void R_FreeOpenings() - { - lastopening = 0; - } - - void R_DeinitOpenings() - { - if (openings != nullptr) - { - M_Free(openings); - openings = nullptr; - } - } - - ///////////////////////////////////////////////////////////////////////// - void *RenderMemory::AllocBytes(int size) { size = (size + 15) / 16 * 16; // 16-byte align diff --git a/src/swrenderer/r_memory.h b/src/swrenderer/r_memory.h index 0bf86f2cda..dcd00a7a8c 100644 --- a/src/swrenderer/r_memory.h +++ b/src/swrenderer/r_memory.h @@ -17,14 +17,6 @@ namespace swrenderer { - struct visplane_light; - - extern short *openings; - - ptrdiff_t R_NewOpening(ptrdiff_t len); - void R_FreeOpenings(); - void R_DeinitOpenings(); - // Memory needed for the duration of a frame rendering class RenderMemory { diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 308390c7c7..fbe1ad915b 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -209,15 +209,16 @@ namespace swrenderer draw_segment->x1 = pl->left; draw_segment->x2 = pl->right; draw_segment->silhouette = SIL_BOTH; - draw_segment->sprbottomclip = R_NewOpening(pl->right - pl->left); - draw_segment->sprtopclip = R_NewOpening(pl->right - pl->left); - draw_segment->maskedtexturecol = ds_p->swall = -1; + draw_segment->sprbottomclip = RenderMemory::AllocMemory(pl->right - pl->left); + draw_segment->sprtopclip = RenderMemory::AllocMemory(pl->right - pl->left); + draw_segment->maskedtexturecol = nullptr; + draw_segment->swall = nullptr; draw_segment->bFogBoundary = false; draw_segment->curline = nullptr; draw_segment->fake = 0; draw_segment->foggy = false; - memcpy(openings + draw_segment->sprbottomclip, floorclip + pl->left, (pl->right - pl->left) * sizeof(short)); - memcpy(openings + draw_segment->sprtopclip, ceilingclip + pl->left, (pl->right - pl->left) * sizeof(short)); + memcpy(draw_segment->sprbottomclip, floorclip + pl->left, (pl->right - pl->left) * sizeof(short)); + memcpy(draw_segment->sprtopclip, ceilingclip + pl->left, (pl->right - pl->left) * sizeof(short)); VisibleSpriteList::firstvissprite = VisibleSpriteList::vissprite_p; firstdrawseg = draw_segment; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 71740aefca..f33c5fa7c2 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -138,7 +138,6 @@ namespace swrenderer // opening / clipping determination RenderOpaquePass::Instance()->ClearClip(); - R_FreeOpenings(); NetUpdate(); @@ -274,7 +273,6 @@ namespace swrenderer RenderTranslucentPass::Deinit(); VisiblePlaneList::Instance()->Deinit(); Clip3DFloors::Instance()->Cleanup(); - R_DeinitOpenings(); R_FreeDrawSegs(); } diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 1d46cb8314..1490b9dfad 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -402,7 +402,7 @@ namespace swrenderer if (ds->fake) continue; // determine if the drawseg obscures the sprite if (ds->x1 >= x2 || ds->x2 <= x1 || - (!(ds->silhouette & SIL_BOTH) && ds->maskedtexturecol == -1 && + (!(ds->silhouette & SIL_BOTH) && ds->maskedtexturecol == nullptr && !ds->bFogBoundary)) { // does not cover sprite @@ -435,7 +435,7 @@ namespace swrenderer // seg is behind sprite, so draw the mid texture if it has one if (ds->CurrentPortalUniq == renderportal->CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here - (ds->maskedtexturecol != -1 || ds->bFogBoundary)) + (ds->maskedtexturecol != nullptr || ds->bFogBoundary)) R_RenderMaskedSegRange(ds, r1, r2); continue; @@ -449,7 +449,7 @@ namespace swrenderer if (ds->silhouette & SIL_BOTTOM) //bottom sil { clip1 = clipbot + r1; - clip2 = openings + ds->sprbottomclip + r1 - ds->x1; + clip2 = ds->sprbottomclip + r1 - ds->x1; i = r2 - r1; do { @@ -463,7 +463,7 @@ namespace swrenderer if (ds->silhouette & SIL_TOP) // top sil { clip1 = cliptop + r1; - clip2 = openings + ds->sprtopclip + r1 - ds->x1; + clip2 = ds->sprtopclip + r1 - ds->x1; i = r2 - r1; do { @@ -555,7 +555,7 @@ namespace swrenderer continue; // kg3D - no fake segs if (ds->fake) continue; - if (ds->maskedtexturecol != -1 || ds->bFogBoundary) + if (ds->maskedtexturecol != nullptr || ds->bFogBoundary) { R_RenderMaskedSegRange(ds, ds->x1, ds->x2); } diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 68f54b05f4..160290068e 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -196,15 +196,15 @@ namespace swrenderer } } - short *mfloorclip = openings + ds->sprbottomclip - ds->x1; - short *mceilingclip = openings + ds->sprtopclip - ds->x1; + short *mfloorclip = ds->sprbottomclip - ds->x1; + short *mceilingclip = ds->sprtopclip - ds->x1; double spryscale; // [RH] Draw fog partition if (ds->bFogBoundary) { RenderFogBoundary::Render(x1, x2, mceilingclip, mfloorclip, wallshade, rw_light, rw_lightstep, basecolormap); - if (ds->maskedtexturecol == -1) + if (ds->maskedtexturecol == nullptr) { goto clearfog; } @@ -214,9 +214,9 @@ namespace swrenderer goto clearfog; } - MaskedSWall = (float *)(openings + ds->swall) - ds->x1; + MaskedSWall = ds->swall - ds->x1; MaskedScaleY = ds->yscale; - maskedtexturecol = (fixed_t *)(openings + ds->maskedtexturecol) - ds->x1; + maskedtexturecol = ds->maskedtexturecol - ds->x1; spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); rw_scalestep = ds->iscalestep; @@ -437,13 +437,13 @@ namespace swrenderer { if (!wrap) { - assert(ds->bkup >= 0); - memcpy(openings + ds->sprtopclip, openings + ds->bkup, (ds->x2 - ds->x1) * 2); + assert(ds->bkup != nullptr); + memcpy(ds->sprtopclip, ds->bkup, (ds->x2 - ds->x1) * 2); } } else { - fillshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); + fillshort(ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); } } return; @@ -466,11 +466,11 @@ namespace swrenderer rw_lightstep = ds->lightstep; rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; - short *mfloorclip = openings + ds->sprbottomclip - ds->x1; - short *mceilingclip = openings + ds->sprtopclip - ds->x1; + short *mfloorclip = ds->sprbottomclip - ds->x1; + short *mceilingclip = ds->sprtopclip - ds->x1; //double spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); - float *MaskedSWall = (float *)(openings + ds->swall) - ds->x1; + float *MaskedSWall = ds->swall - ds->x1; // find positioning side_t *scaledside; diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index 5533ec4300..28d1df8d5a 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -35,11 +35,11 @@ namespace swrenderer bool foggy; // Pointers to lists for sprite clipping, all three adjusted so [x1] is first value. - ptrdiff_t sprtopclip; // type short - ptrdiff_t sprbottomclip; // type short - ptrdiff_t maskedtexturecol; // type short - ptrdiff_t swall; // type float - ptrdiff_t bkup; // sprtopclip backup, for mid and fake textures + short *sprtopclip; + short *sprbottomclip; + fixed_t *maskedtexturecol; + float *swall; + short *bkup; // sprtopclip backup, for mid and fake textures FWallTmapVals tmapvals; diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 203401ec48..9e17b95c8b 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -182,8 +182,8 @@ namespace swrenderer } else { - mceilingclip = openings + clipper->sprtopclip - clipper->x1; - mfloorclip = openings + clipper->sprbottomclip - clipper->x1; + mceilingclip = clipper->sprtopclip - clipper->x1; + mfloorclip = clipper->sprbottomclip - clipper->x1; } break; @@ -201,8 +201,8 @@ namespace swrenderer { goto done; } - mceilingclip = openings + clipper->sprtopclip - clipper->x1; - mfloorclip = openings + clipper->sprbottomclip - clipper->x1; + mceilingclip = clipper->sprtopclip - clipper->x1; + mfloorclip = clipper->sprbottomclip - clipper->x1; break; case RF_CLIPLOWER: From f6cc75fad58e05d2bf0f332e5fa40e2d47ea2916 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 15 Jan 2017 22:57:42 +0100 Subject: [PATCH 721/912] Convert r_clipsegment into a class --- src/swrenderer/line/r_line.cpp | 6 +++--- src/swrenderer/scene/r_opaque_pass.cpp | 2 +- src/swrenderer/scene/r_portal.cpp | 4 ++-- src/swrenderer/scene/r_scene.cpp | 2 +- src/swrenderer/segments/r_clipsegment.cpp | 19 +++++++------------ src/swrenderer/segments/r_clipsegment.h | 23 +++++++++++++++++++---- 6 files changed, 33 insertions(+), 23 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index d7ad75b65e..af25166bc8 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -91,7 +91,7 @@ namespace swrenderer if (line->linedef == NULL) { - if (R_CheckClipWallSegment(WallC.sx1, WallC.sx2)) + if (RenderClipSegment::Instance()->Check(WallC.sx1, WallC.sx2)) { InSubsector->flags |= SSECF_DRAWN; } @@ -257,7 +257,7 @@ namespace swrenderer // mark their subsectors as visible for automap texturing. if (hasglnodes && !(InSubsector->flags & SSECF_DRAWN)) { - if (R_CheckClipWallSegment(WallC.sx1, WallC.sx2)) + if (RenderClipSegment::Instance()->Check(WallC.sx1, WallC.sx2)) { InSubsector->flags |= SSECF_DRAWN; } @@ -298,7 +298,7 @@ namespace swrenderer } static SWRenderLine *self = this; - bool visible = R_ClipWallSegment(WallC.sx1, WallC.sx2, solid, [](int x1, int x2) -> bool + bool visible = RenderClipSegment::Instance()->Clip(WallC.sx1, WallC.sx2, solid, [](int x1, int x2) -> bool { return self->RenderWallSegment(x1, x2); }); diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 7db41d6f80..8c70ab1fed 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -378,7 +378,7 @@ namespace swrenderer // Find the first clippost that touches the source post // (adjacent pixels are touching). - return R_IsWallSegmentVisible(sx1, sx2); + return RenderClipSegment::Instance()->IsVisible(sx1, sx2); } void RenderOpaquePass::AddPolyobjs(subsector_t *sub) diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index fbe1ad915b..69c9217161 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -179,7 +179,7 @@ namespace swrenderer validcount++; // Make sure we see all sprites planes->Clear(false); - R_ClearClipSegs(pl->left, pl->right); + RenderClipSegment::Instance()->Clear(pl->left, pl->right); WindowLeft = pl->left; WindowRight = pl->right; @@ -429,7 +429,7 @@ namespace swrenderer CurrentPortal = pds; VisiblePlaneList::Instance()->Clear(false); - R_ClearClipSegs(pds->x1, pds->x2); + RenderClipSegment::Instance()->Clear(pds->x1, pds->x2); WindowLeft = pds->x1; WindowRight = pds->x2; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index f33c5fa7c2..dfbf63a96e 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -131,7 +131,7 @@ namespace swrenderer RenderPortal::Instance()->CopyStackedViewParameters(); // Clear buffers. - R_ClearClipSegs(0, viewwidth); + RenderClipSegment::Instance()->Clear(0, viewwidth); R_ClearDrawSegs(); VisiblePlaneList::Instance()->Clear(true); RenderTranslucentPass::Clear(); diff --git a/src/swrenderer/segments/r_clipsegment.cpp b/src/swrenderer/segments/r_clipsegment.cpp index 593a253e38..517d2ee07e 100644 --- a/src/swrenderer/segments/r_clipsegment.cpp +++ b/src/swrenderer/segments/r_clipsegment.cpp @@ -34,18 +34,13 @@ namespace swrenderer { - namespace + RenderClipSegment *RenderClipSegment::Instance() { - struct cliprange_t - { - short first, last; - }; - - cliprange_t *newend; // newend is one past the last valid seg - cliprange_t solidsegs[MAXWIDTH / 2 + 2]; + static RenderClipSegment instance; + return &instance; } - void R_ClearClipSegs(short left, short right) + void RenderClipSegment::Clear(short left, short right) { solidsegs[0].first = -0x7fff; solidsegs[0].last = left; @@ -54,7 +49,7 @@ namespace swrenderer newend = solidsegs+2; } - bool R_CheckClipWallSegment(int first, int last) + bool RenderClipSegment::Check(int first, int last) { cliprange_t *start; @@ -78,7 +73,7 @@ namespace swrenderer return false; } - bool R_IsWallSegmentVisible(int sx1, int sx2) + bool RenderClipSegment::IsVisible(int sx1, int sx2) { // Does not cross a pixel. if (sx2 <= sx1) @@ -97,7 +92,7 @@ namespace swrenderer return true; } - bool R_ClipWallSegment(int first, int last, bool solid, VisibleSegmentCallback callback) + bool RenderClipSegment::Clip(int first, int last, bool solid, VisibleSegmentCallback callback) { cliprange_t *next, *start; int i, j; diff --git a/src/swrenderer/segments/r_clipsegment.h b/src/swrenderer/segments/r_clipsegment.h index 3204531036..33ec0525de 100644 --- a/src/swrenderer/segments/r_clipsegment.h +++ b/src/swrenderer/segments/r_clipsegment.h @@ -17,8 +17,23 @@ namespace swrenderer { typedef bool(*VisibleSegmentCallback)(int x1, int x2); - void R_ClearClipSegs(short left, short right); - bool R_ClipWallSegment(int x1, int x2, bool solid, VisibleSegmentCallback callback); - bool R_CheckClipWallSegment(int first, int last); - bool R_IsWallSegmentVisible(int x1, int x2); + class RenderClipSegment + { + public: + static RenderClipSegment *Instance(); + + void Clear(short left, short right); + bool Clip(int x1, int x2, bool solid, VisibleSegmentCallback callback); + bool Check(int first, int last); + bool IsVisible(int x1, int x2); + + private: + struct cliprange_t + { + short first, last; + }; + + cliprange_t *newend; // newend is one past the last valid seg + cliprange_t solidsegs[MAXWIDTH / 2 + 2]; + }; } From cd9043fd9474f07f9f7c308cd16ea5f74b4d516e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 15 Jan 2017 23:03:58 +0100 Subject: [PATCH 722/912] Make RenderActorView private --- src/swrenderer/scene/r_scene.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/swrenderer/scene/r_scene.h b/src/swrenderer/scene/r_scene.h index 31e074c08c..599ef55fbe 100644 --- a/src/swrenderer/scene/r_scene.h +++ b/src/swrenderer/scene/r_scene.h @@ -34,13 +34,14 @@ namespace swrenderer void SetClearColor(int color); - void RenderActorView(AActor *actor, bool dontmaplines = false); void RenderView(player_t *player); void RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines = false); bool DontMapLines() const { return dontmaplines; } private: + void RenderActorView(AActor *actor, bool dontmaplines = false); + bool dontmaplines = false; int clearcolor = 0; }; From 57d8b0e34cb46d95ac15cf9b75b0c1618f84477e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 16 Jan 2017 03:46:05 +0100 Subject: [PATCH 723/912] Rewrite VisibleSpriteList to use TArray --- src/swrenderer/scene/r_portal.cpp | 12 +- src/swrenderer/scene/r_portal.h | 1 - src/swrenderer/scene/r_translucent_pass.cpp | 13 +- src/swrenderer/things/r_particle.cpp | 5 +- src/swrenderer/things/r_sprite.cpp | 4 +- src/swrenderer/things/r_visiblespritelist.cpp | 124 ++++++------------ src/swrenderer/things/r_visiblespritelist.h | 35 ++--- src/swrenderer/things/r_voxel.cpp | 8 +- src/swrenderer/things/r_wallsprite.cpp | 5 +- 9 files changed, 75 insertions(+), 132 deletions(-) diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 69c9217161..e799f377c9 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -102,7 +102,6 @@ namespace swrenderer int savedextralight = extralight; DVector3 savedpos = ViewPos; DAngle savedangle = ViewAngle; - ptrdiff_t savedvissprite_p = VisibleSpriteList::vissprite_p - VisibleSpriteList::vissprites; ptrdiff_t savedds_p = ds_p - drawsegs; size_t savedinteresting = FirstInterestingDrawseg; double savedvisibility = R_GetVisibility(); @@ -220,15 +219,13 @@ namespace swrenderer memcpy(draw_segment->sprbottomclip, floorclip + pl->left, (pl->right - pl->left) * sizeof(short)); memcpy(draw_segment->sprtopclip, ceilingclip + pl->left, (pl->right - pl->left) * sizeof(short)); - VisibleSpriteList::firstvissprite = VisibleSpriteList::vissprite_p; firstdrawseg = draw_segment; FirstInterestingDrawseg = InterestingDrawsegs.Size(); interestingStack.Push(FirstInterestingDrawseg); ptrdiff_t diffnum = firstdrawseg - drawsegs; drawsegStack.Push(diffnum); - diffnum = VisibleSpriteList::firstvissprite - VisibleSpriteList::vissprites; - visspriteStack.Push(diffnum); + VisibleSpriteList::Instance()->PushPortal(); viewposStack.Push(ViewPos); visplaneStack.Push(pl); @@ -249,8 +246,6 @@ namespace swrenderer drawsegStack.Pop(pd); firstdrawseg = drawsegs + pd; - visspriteStack.Pop(pd); - VisibleSpriteList::firstvissprite = VisibleSpriteList::vissprites + pd; // Masked textures and planes need the view coordinates restored for proper positioning. viewposStack.Pop(ViewPos); @@ -258,7 +253,8 @@ namespace swrenderer RenderTranslucentPass::Render(); ds_p = firstdrawseg; - VisibleSpriteList::vissprite_p = VisibleSpriteList::firstvissprite; + + VisibleSpriteList::Instance()->PopPortal(); visplaneStack.Pop(pl); if (pl->Alpha > 0 && pl->picnum != skyflatnum) @@ -268,8 +264,6 @@ namespace swrenderer *planes->freehead = pl; planes->freehead = &pl->next; } - VisibleSpriteList::firstvissprite = VisibleSpriteList::vissprites; - VisibleSpriteList::vissprite_p = VisibleSpriteList::vissprites + savedvissprite_p; firstdrawseg = drawsegs; ds_p = drawsegs + savedds_p; InterestingDrawsegs.Resize((unsigned int)FirstInterestingDrawseg); diff --git a/src/swrenderer/scene/r_portal.h b/src/swrenderer/scene/r_portal.h index 1781026189..654468d722 100644 --- a/src/swrenderer/scene/r_portal.h +++ b/src/swrenderer/scene/r_portal.h @@ -55,7 +55,6 @@ namespace swrenderer TArray interestingStack; TArray drawsegStack; - TArray visspriteStack; TArray viewposStack; TArray visplaneStack; }; diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 1490b9dfad..591b57588b 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -51,14 +51,12 @@ namespace swrenderer void RenderTranslucentPass::Deinit() { - VisibleSpriteList::Deinit(); - SortedVisibleSpriteList::Deinit(); RenderVoxel::Deinit(); } void RenderTranslucentPass::Clear() { - VisibleSpriteList::Clear(); + VisibleSpriteList::Instance()->Clear(); DrewAVoxel = false; } @@ -529,11 +527,12 @@ namespace swrenderer { RenderPortal *renderportal = RenderPortal::Instance(); - for (int i = SortedVisibleSpriteList::vsprcount; i > 0; i--) + auto &sortedSprites = VisibleSpriteList::Instance()->SortedSprites; + for (int i = sortedSprites.Size(); i > 0; i--) { - if (SortedVisibleSpriteList::spritesorter[i - 1]->CurrentPortalUniq != renderportal->CurrentPortalUniq) + if (sortedSprites[i - 1]->CurrentPortalUniq != renderportal->CurrentPortalUniq) continue; // probably another time - DrawSprite(SortedVisibleSpriteList::spritesorter[i - 1]); + DrawSprite(sortedSprites[i - 1]); } // render any remaining masked mid textures @@ -565,7 +564,7 @@ namespace swrenderer void RenderTranslucentPass::Render() { CollectPortals(); - SortedVisibleSpriteList::Sort(DrewAVoxel ? SortedVisibleSpriteList::sv_compare2d : SortedVisibleSpriteList::sv_compare, VisibleSpriteList::firstvissprite - VisibleSpriteList::vissprites); + VisibleSpriteList::Instance()->Sort(DrewAVoxel); Clip3DFloors *clip3d = Clip3DFloors::Instance(); if (clip3d->height_top == nullptr) diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index e8d1cb1b72..60363e9807 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -66,7 +66,6 @@ namespace swrenderer double tz, tiz; double xscale, yscale; int x1, x2, y1, y2; - vissprite_t* vis; sector_t* heightsec = NULL; FSWColormap* map; @@ -179,7 +178,7 @@ namespace swrenderer return; // store information in a vissprite - vis = VisibleSpriteList::Add(); + vissprite_t *vis = RenderMemory::NewObject(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->heightsec = heightsec; vis->xscale = FLOAT2FIXED(xscale); @@ -223,6 +222,8 @@ namespace swrenderer vis->Style.ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade); vis->Style.BaseColormap = map; } + + VisibleSpriteList::Instance()->Push(vis); } void RenderParticle::Render(vissprite_t *vis) diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index c7ec7fe2d4..d1c55165fa 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -156,7 +156,7 @@ namespace swrenderer double yscale = spriteScale.Y / tex->Scale.Y; // store information in a vissprite - vissprite_t *vis = VisibleSpriteList::Add(); + vissprite_t *vis = RenderMemory::NewObject(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->xscale = FLOAT2FIXED(xscale); @@ -274,6 +274,8 @@ namespace swrenderer vis->Style.BaseColormap = mybasecolormap; } } + + VisibleSpriteList::Instance()->Push(vis); } void RenderSprite::Render(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip) diff --git a/src/swrenderer/things/r_visiblespritelist.cpp b/src/swrenderer/things/r_visiblespritelist.cpp index db6689c80e..084f7b49ca 100644 --- a/src/swrenderer/things/r_visiblespritelist.cpp +++ b/src/swrenderer/things/r_visiblespritelist.cpp @@ -28,118 +28,78 @@ namespace swrenderer { - void VisibleSpriteList::Deinit() + VisibleSpriteList *VisibleSpriteList::Instance() { - // Free vissprites - for (int i = 0; i < MaxVisSprites; ++i) - { - delete vissprites[i]; - } - free(vissprites); - vissprites = nullptr; - vissprite_p = lastvissprite = nullptr; - MaxVisSprites = 0; + static VisibleSpriteList instance; + return &instance; } void VisibleSpriteList::Clear() { - vissprite_p = firstvissprite; + Sprites.Clear(); + StartIndices.Clear(); + SortedSprites.Clear(); } - vissprite_t *VisibleSpriteList::Add() + void VisibleSpriteList::PushPortal() { - if (vissprite_p == lastvissprite) - { - ptrdiff_t firstvisspritenum = firstvissprite - vissprites; - ptrdiff_t prevvisspritenum = vissprite_p - vissprites; - - MaxVisSprites = MaxVisSprites ? MaxVisSprites * 2 : 128; - vissprites = (vissprite_t **)M_Realloc(vissprites, MaxVisSprites * sizeof(vissprite_t)); - lastvissprite = &vissprites[MaxVisSprites]; - firstvissprite = &vissprites[firstvisspritenum]; - vissprite_p = &vissprites[prevvisspritenum]; - DPrintf(DMSG_NOTIFY, "MaxVisSprites increased to %d\n", MaxVisSprites); - - // Allocate sprites from the new pile - for (vissprite_t **p = vissprite_p; p < lastvissprite; ++p) - { - *p = new vissprite_t; - } - } - - vissprite_p++; - return *(vissprite_p - 1); + StartIndices.Push(Sprites.Size()); } - int VisibleSpriteList::MaxVisSprites; - vissprite_t **VisibleSpriteList::vissprites; - vissprite_t **VisibleSpriteList::firstvissprite; - vissprite_t **VisibleSpriteList::vissprite_p; - vissprite_t **VisibleSpriteList::lastvissprite; - - ///////////////////////////////////////////////////////////////////////// - - void SortedVisibleSpriteList::Deinit() + void VisibleSpriteList::PopPortal() { - delete[] spritesorter; - spritesortersize = 0; - spritesorter = nullptr; + Sprites.Resize(StartIndices.Last()); + StartIndices.Pop(); } - // This is the standard version, which does a simple test based on depth. - bool SortedVisibleSpriteList::sv_compare(vissprite_t *a, vissprite_t *b) + void VisibleSpriteList::Push(vissprite_t *sprite) { - return a->idepth > b->idepth; + Sprites.Push(sprite); } - // This is an alternate version, for when one or more voxel is in view. - // It does a 2D distance test based on whichever one is furthest from - // the viewpoint. - bool SortedVisibleSpriteList::sv_compare2d(vissprite_t *a, vissprite_t *b) + void VisibleSpriteList::Sort(bool compare2d) { - return DVector2(a->deltax, a->deltay).LengthSquared() < DVector2(b->deltax, b->deltay).LengthSquared(); - } + size_t first = StartIndices.Size() == 0 ? 0 : StartIndices.Last(); + size_t count = Sprites.Size() - first; - void SortedVisibleSpriteList::Sort(bool(*compare)(vissprite_t *, vissprite_t *), size_t first) - { - int i; - vissprite_t **spr; + SortedSprites.Resize(count); - vsprcount = int(VisibleSpriteList::vissprite_p - &VisibleSpriteList::vissprites[first]); - - if (vsprcount == 0) + if (count == 0) return; - if (spritesortersize < VisibleSpriteList::MaxVisSprites) - { - if (spritesorter != nullptr) - delete[] spritesorter; - spritesorter = new vissprite_t *[VisibleSpriteList::MaxVisSprites]; - spritesortersize = VisibleSpriteList::MaxVisSprites; - } - if (!(i_compatflags & COMPATF_SPRITESORT)) { - for (i = 0, spr = VisibleSpriteList::firstvissprite; i < vsprcount; i++, spr++) - { - spritesorter[i] = *spr; - } + for (size_t i = 0; i < count; i++) + SortedSprites[i] = Sprites[first + i]; } else { // If the compatibility option is on sprites of equal distance need to // be sorted in inverse order. This is most easily achieved by // filling the sort array backwards before the sort. - for (i = 0, spr = VisibleSpriteList::firstvissprite + vsprcount - 1; i < vsprcount; i++, spr--) - { - spritesorter[i] = *spr; - } + for (size_t i = 0; i < count; i++) + SortedSprites[i] = Sprites[first + count - i - 1]; } - std::stable_sort(&spritesorter[0], &spritesorter[vsprcount], compare); - } + if (compare2d) + { + // This is an alternate version, for when one or more voxel is in view. + // It does a 2D distance test based on whichever one is furthest from + // the viewpoint. - vissprite_t **SortedVisibleSpriteList::spritesorter; - int SortedVisibleSpriteList::spritesortersize = 0; - int SortedVisibleSpriteList::vsprcount; + std::stable_sort(&SortedSprites[0], &SortedSprites[count], [](vissprite_t *a, vissprite_t *b) -> bool + { + return DVector2(a->deltax, a->deltay).LengthSquared() < DVector2(b->deltax, b->deltay).LengthSquared(); + }); + } + else + { + // This is the standard version, which does a simple test based on depth. + + std::stable_sort(&SortedSprites[0], &SortedSprites[count], [](vissprite_t *a, vissprite_t *b) -> bool + { + return a->idepth > b->idepth; + }); + } + } } diff --git a/src/swrenderer/things/r_visiblespritelist.h b/src/swrenderer/things/r_visiblespritelist.h index 0e3fddfdc8..0dd43e0009 100644 --- a/src/swrenderer/things/r_visiblespritelist.h +++ b/src/swrenderer/things/r_visiblespritelist.h @@ -21,33 +21,18 @@ namespace swrenderer class VisibleSpriteList { public: - static int MaxVisSprites; - static vissprite_t **vissprites; - static vissprite_t **firstvissprite; - static vissprite_t **vissprite_p; + static VisibleSpriteList *Instance(); - static void Deinit(); - static void Clear(); - static vissprite_t *Add(); + void Clear(); + void PushPortal(); + void PopPortal(); + void Push(vissprite_t *sprite); + void Sort(bool compare2d); + + TArray SortedSprites; private: - static vissprite_t **lastvissprite; - }; - - class SortedVisibleSpriteList - { - public: - static void Deinit(); - - static void Sort(bool(*compare)(vissprite_t *, vissprite_t *), size_t first); - - static bool sv_compare(vissprite_t *a, vissprite_t *b); - static bool sv_compare2d(vissprite_t *a, vissprite_t *b); - - static vissprite_t **spritesorter; - static int vsprcount; - - private: - static int spritesortersize; + TArray Sprites; + TArray StartIndices; }; } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index c7cdee6607..6c4d3322f4 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -41,6 +41,7 @@ #include "swrenderer/scene/r_scene.h" #include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" +#include "swrenderer/r_memory.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) @@ -104,7 +105,7 @@ namespace swrenderer } } - vissprite_t *vis = VisibleSpriteList::Add(); + vissprite_t *vis = RenderMemory::NewObject(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->xscale = FLOAT2FIXED(xscale); @@ -157,8 +158,6 @@ namespace swrenderer vis->bWallSprite = false; vis->foggy = foggy; - RenderTranslucentPass::DrewAVoxel = true; - // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what // the invert overlay flag says to do. @@ -223,6 +222,9 @@ namespace swrenderer vis->Style.BaseColormap = mybasecolormap; } } + + VisibleSpriteList::Instance()->Push(vis); + RenderTranslucentPass::DrewAVoxel = true; } void RenderVoxel::Render(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom) diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 6f4fd79ec7..319dcfc891 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -72,7 +72,6 @@ namespace swrenderer DAngle ang = thing->Angles.Yaw + 90; double angcos = ang.Cos(); double angsin = ang.Sin(); - vissprite_t *vis; // Determine left and right edges of sprite. The sprite's angle is its normal, // so the edges are 90 degrees each side of it. @@ -105,7 +104,7 @@ namespace swrenderer gzt = pos.Z + scale.Y * scaled_to; gzb = pos.Z + scale.Y * scaled_bo; - vis = VisibleSpriteList::Add(); + vissprite_t *vis = RenderMemory::NewObject(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->x1 = wallc.sx1 < renderportal->WindowLeft ? renderportal->WindowLeft : wallc.sx1; vis->x2 = wallc.sx2 >= renderportal->WindowRight ? renderportal->WindowRight : wallc.sx2; @@ -137,6 +136,8 @@ namespace swrenderer vis->Style.BaseColormap = basecolormap; vis->wallc = wallc; vis->foggy = foggy; + + VisibleSpriteList::Instance()->Push(vis); } void RenderWallSprite::Render(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip) From 55131a7a6dc9f1ec29b5e1b757f744789eaa077d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 16 Jan 2017 05:26:22 +0100 Subject: [PATCH 724/912] Rename vissprite_t to VisibleSprite, convert it into a base class and lower all variables if possible. Remove unused fields and unions. --- src/swrenderer/drawers/r_draw.h | 2 - src/swrenderer/scene/r_translucent_pass.cpp | 42 ++++----- src/swrenderer/scene/r_translucent_pass.h | 6 +- src/swrenderer/things/r_particle.cpp | 18 ++-- src/swrenderer/things/r_particle.h | 15 +++- src/swrenderer/things/r_playersprite.cpp | 11 +-- src/swrenderer/things/r_playersprite.h | 5 +- src/swrenderer/things/r_sprite.cpp | 14 +-- src/swrenderer/things/r_sprite.h | 15 +++- src/swrenderer/things/r_visiblesprite.h | 88 +++++++------------ src/swrenderer/things/r_visiblespritelist.cpp | 14 +-- src/swrenderer/things/r_visiblespritelist.h | 10 +-- src/swrenderer/things/r_voxel.cpp | 12 +-- src/swrenderer/things/r_voxel.h | 23 ++++- src/swrenderer/things/r_wallsprite.cpp | 13 ++- src/swrenderer/things/r_wallsprite.h | 10 ++- 16 files changed, 150 insertions(+), 148 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index f7facda887..e6fa8ea4d0 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -19,8 +19,6 @@ EXTERN_CVAR(Bool, r_dynlights); namespace swrenderer { - struct vissprite_t; - struct ShadeConstants { uint16_t light_alpha; diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 591b57588b..252c31f561 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -90,7 +90,7 @@ namespace swrenderer } } - bool RenderTranslucentPass::ClipSpriteColumnWithPortals(int x, vissprite_t* spr) + bool RenderTranslucentPass::ClipSpriteColumnWithPortals(int x, VisibleSprite *spr) { RenderPortal *renderportal = RenderPortal::Instance(); @@ -118,7 +118,7 @@ namespace swrenderer return false; } - void RenderTranslucentPass::DrawSprite(vissprite_t *spr) + void RenderTranslucentPass::DrawSprite(VisibleSprite *spr) { static short clipbot[MAXWIDTH]; static short cliptop[MAXWIDTH]; @@ -136,12 +136,13 @@ namespace swrenderer Clip3DFloors *clip3d = Clip3DFloors::Instance(); // [RH] Check for particles - if (!spr->bIsVoxel && spr->pic == nullptr) + if (spr->IsParticle()) { // kg3D - reject invisible parts if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gpos.Z <= clip3d->sclipBottom) return; if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gpos.Z >= clip3d->sclipTop) return; - RenderParticle::Render(spr); + + spr->Render(nullptr, nullptr, 0, 0); return; } @@ -258,7 +259,7 @@ namespace swrenderer double scale = InvZtoScale * spr->idepth; double hzb = DBL_MIN, hzt = DBL_MAX; - if (spr->bIsVoxel && spr->floorclip != 0) + if (spr->IsVoxel() && spr->floorclip != 0) { hzb = spr->gzb; } @@ -272,7 +273,7 @@ namespace swrenderer if (spr->FakeFlatStat == WaterFakeSide::BelowFloor) { // seen below floor: clip top - if (!spr->bIsVoxel && h > topclip) + if (!spr->IsVoxel() && h > topclip) { topclip = short(MIN(h, viewheight)); } @@ -280,7 +281,7 @@ namespace swrenderer } else { // seen in the middle: clip bottom - if (!spr->bIsVoxel && h < botclip) + if (!spr->IsVoxel() && h < botclip) { botclip = MAX(0, h); } @@ -294,7 +295,7 @@ namespace swrenderer if (spr->FakeFlatStat == WaterFakeSide::AboveCeiling) { // seen above ceiling: clip bottom - if (!spr->bIsVoxel && h < botclip) + if (!spr->IsVoxel() && h < botclip) { botclip = MAX(0, h); } @@ -302,7 +303,7 @@ namespace swrenderer } else { // seen in the middle: clip top - if (!spr->bIsVoxel && h > topclip) + if (!spr->IsVoxel() && h > topclip) { topclip = MIN(h, viewheight); } @@ -311,7 +312,7 @@ namespace swrenderer } } // killough 3/27/98: end special clipping for deep water / fake ceilings - else if (!spr->bIsVoxel && spr->floorclip) + else if (!spr->IsVoxel() && spr->floorclip) { // [RH] Move floorclip stuff from R_DrawVisSprite to here //int clip = ((FLOAT2FIXED(CenterY) - FixedMul (spr->texturemid - (spr->pic->GetHeight() << FRACBITS) + spr->floorclip, spr->yscale)) >> FRACBITS); int clip = xs_RoundToInt(CenterY - (spr->texturemid - spr->pic->GetHeight() + spr->floorclip) * spr->yscale); @@ -323,7 +324,7 @@ namespace swrenderer if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) { - if (!spr->bIsVoxel) + if (!spr->IsVoxel()) { double hz = clip3d->sclipBottom; if (spr->fakefloor) @@ -344,7 +345,7 @@ namespace swrenderer } if (clip3d->fake3D & FAKE3D_CLIPTOP) { - if (!spr->bIsVoxel) + if (!spr->IsVoxel()) { double hz = clip3d->sclipTop; if (spr->fakeceiling != nullptr) @@ -411,7 +412,7 @@ namespace swrenderer r2 = MIN(ds->x2, x2); float neardepth, fardepth; - if (!spr->bWallSprite) + if (!spr->IsWallSprite()) { if (ds->sz1 < ds->sz2) { @@ -425,7 +426,7 @@ namespace swrenderer // Check if sprite is in front of draw seg: - if ((!spr->bWallSprite && neardepth > spr->depth) || ((spr->bWallSprite || fardepth > spr->depth) && + if ((!spr->IsWallSprite() && neardepth > spr->depth) || ((spr->IsWallSprite() || fardepth > spr->depth) && (spr->gpos.Y - ds->curline->v1->fY()) * (ds->curline->v2->fX() - ds->curline->v1->fX()) - (spr->gpos.X - ds->curline->v1->fX()) * (ds->curline->v2->fY() - ds->curline->v1->fY()) <= 0)) { @@ -475,16 +476,9 @@ namespace swrenderer // all clipping has been performed, so draw the sprite - if (!spr->bIsVoxel) + if (!spr->IsVoxel()) { - if (!spr->bWallSprite) - { - RenderSprite::Render(spr, clipbot, cliptop); - } - else - { - RenderWallSprite::Render(spr, clipbot, cliptop); - } + spr->Render(clipbot, cliptop, 0, 0); } else { @@ -517,7 +511,7 @@ namespace swrenderer } int minvoxely = spr->gzt <= hzt ? 0 : xs_RoundToInt((spr->gzt - hzt) / spr->yscale); int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); - RenderVoxel::Render(spr, minvoxely, maxvoxely, cliptop, clipbot); + spr->Render(cliptop, clipbot, minvoxely, maxvoxely); } spr->Style.BaseColormap = colormap; spr->Style.ColormapNum = colormapnum; diff --git a/src/swrenderer/scene/r_translucent_pass.h b/src/swrenderer/scene/r_translucent_pass.h index 359a5002d6..2e94198b64 100644 --- a/src/swrenderer/scene/r_translucent_pass.h +++ b/src/swrenderer/scene/r_translucent_pass.h @@ -22,7 +22,7 @@ struct FVoxel; namespace swrenderer { - struct vissprite_t; + class VisibleSprite; struct drawseg_t; class RenderTranslucentPass @@ -34,11 +34,11 @@ namespace swrenderer static bool DrewAVoxel; - static bool ClipSpriteColumnWithPortals(int x, vissprite_t* spr); + static bool ClipSpriteColumnWithPortals(int x, VisibleSprite *spr); private: static void CollectPortals(); - static void DrawSprite(vissprite_t *spr); + static void DrawSprite(VisibleSprite *spr); static void DrawMaskedSingle(bool renew); static TArray portaldrawsegs; diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 60363e9807..3620c916b0 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -178,7 +178,7 @@ namespace swrenderer return; // store information in a vissprite - vissprite_t *vis = RenderMemory::NewObject(); + RenderParticle *vis = RenderMemory::NewObject(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->heightsec = heightsec; vis->xscale = FLOAT2FIXED(xscale); @@ -194,7 +194,6 @@ namespace swrenderer vis->Translation = 0; vis->startfrac = 255 & (particle->color >> 24); vis->pic = NULL; - vis->bIsVoxel = false; vis->renderflags = (short)(particle->alpha * 255.0f + 0.5f); vis->FakeFlatStat = fakeside; vis->floorclip = 0; @@ -226,10 +225,12 @@ namespace swrenderer VisibleSpriteList::Instance()->Push(vis); } - void RenderParticle::Render(vissprite_t *vis) + void RenderParticle::Render(short *cliptop, short *clipbottom, int minZ, int maxZ) { using namespace drawerargs; + auto vis = this; + int spacing; BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac]; int yl = vis->y1; @@ -240,7 +241,7 @@ namespace swrenderer if (ycount <= 0 || countbase <= 0) return; - DrawMaskedSegsBehindParticle(vis); + DrawMaskedSegsBehindParticle(); uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); @@ -275,11 +276,8 @@ namespace swrenderer } } - void RenderParticle::DrawMaskedSegsBehindParticle(const vissprite_t *vis) + void RenderParticle::DrawMaskedSegsBehindParticle() { - const int x1 = vis->x1; - const int x2 = vis->x2; - // Draw any masked textures behind this particle so that when the // particle is drawn, it will be in front of them. for (unsigned int p = InterestingDrawsegs.Size(); p-- > FirstInterestingDrawseg; ) @@ -291,10 +289,10 @@ namespace swrenderer { continue; } - if ((ds->siz2 - ds->siz1) * ((x2 + x1) / 2 - ds->sx1) / (ds->sx2 - ds->sx1) + ds->siz1 < vis->idepth) + if ((ds->siz2 - ds->siz1) * ((x2 + x1) / 2 - ds->sx1) / (ds->sx2 - ds->sx1) + ds->siz1 < idepth) { // [ZZ] only draw stuff that's inside the same portal as the particle, other portals will care for themselves - if (ds->CurrentPortalUniq == vis->CurrentPortalUniq) + if (ds->CurrentPortalUniq == CurrentPortalUniq) R_RenderMaskedSegRange(ds, MAX(ds->x1, x1), MIN(ds->x2, x2)); } } diff --git a/src/swrenderer/things/r_particle.h b/src/swrenderer/things/r_particle.h index d8aa2952a7..964b2ac164 100644 --- a/src/swrenderer/things/r_particle.h +++ b/src/swrenderer/things/r_particle.h @@ -18,13 +18,22 @@ namespace swrenderer { - class RenderParticle + class RenderParticle : public VisibleSprite { public: static void Project(particle_t *, const sector_t *sector, int shade, WaterFakeSide fakeside, bool foggy); - static void Render(vissprite_t *); + + bool IsParticle() const override { return true; } + void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: - static void DrawMaskedSegsBehindParticle(const vissprite_t *vis); + void DrawMaskedSegsBehindParticle(); + + fixed_t xscale; + fixed_t startfrac; // horizontal position of x1 + int y1, y2; + + uint32_t Translation; + uint32_t FillColor; }; } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index b94ce19417..0632af0741 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -72,7 +72,7 @@ namespace swrenderer double RenderPlayerSprite::pspritexiscale; double RenderPlayerSprite::pspriteyscale; - TArray RenderPlayerSprite::avis; + TArray RenderPlayerSprite::avis; void RenderPlayerSprite::SetupSpriteScale() { @@ -205,7 +205,6 @@ namespace swrenderer FTextureID picnum; WORD flip; FTexture* tex; - vissprite_t* vis; bool noaccel; double alpha = owner->Alpha; @@ -273,7 +272,7 @@ namespace swrenderer return; // store information in a vissprite - vis = &avis[vispspindex]; + RenderSprite *vis = &avis[vispspindex]; vis->renderflags = owner->renderflags; vis->floorclip = 0; @@ -571,16 +570,14 @@ namespace swrenderer short *mfloorclip = screenheightarray; short *mceilingclip = zeroarray; - RenderSprite::Render(vis, mfloorclip, mceilingclip); + vis->Render(mfloorclip, mceilingclip, 0, 0); } void RenderPlayerSprite::RenderRemainingPlayerSprites() { for (unsigned int i = 0; i < vispspindex; i++) { - vissprite_t *vis; - - vis = vispsprites[i].vis; + RenderSprite *vis = vispsprites[i].vis; FDynamicColormap *colormap = vispsprites[i].basecolormap; bool flip = vis->xiscale < 0; FSpecialColormap *special = NULL; diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index b1eebe4310..75a4e7ba67 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -14,6 +14,7 @@ #pragma once #include "r_visiblesprite.h" +#include "r_sprite.h" class DPSprite; @@ -36,7 +37,7 @@ namespace swrenderer // Used to store a psprite's drawing information if it needs to be drawn later. struct vispsp_t { - vissprite_t *vis; + RenderSprite *vis; FDynamicColormap *basecolormap; int x1; }; @@ -48,6 +49,6 @@ namespace swrenderer static double pspritexiscale; static double pspriteyscale; - static TArray avis; + static TArray avis; }; } diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index d1c55165fa..95c95a208d 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -156,7 +156,7 @@ namespace swrenderer double yscale = spriteScale.Y / tex->Scale.Y; // store information in a vissprite - vissprite_t *vis = RenderMemory::NewObject(); + RenderSprite *vis = RenderMemory::NewObject(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->xscale = FLOAT2FIXED(xscale); @@ -166,7 +166,7 @@ namespace swrenderer vis->texturemid = tex->TopOffset - (ViewPos.Z - pos.Z + thing->Floorclip) / yscale; vis->x1 = x1 < renderportal->WindowLeft ? renderportal->WindowLeft : x1; vis->x2 = x2 > renderportal->WindowRight ? renderportal->WindowRight : x2; - vis->Angle = thing->Angles.Yaw; + //vis->Angle = thing->Angles.Yaw; if (renderflags & RF_XFLIP) { @@ -202,12 +202,10 @@ namespace swrenderer vis->fakefloor = fakefloor; vis->fakeceiling = fakeceiling; vis->Style.ColormapNum = 0; - vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; - vis->bSplitSprite = false; + //vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; + //vis->bSplitSprite = false; vis->pic = tex; - vis->bIsVoxel = false; - vis->bWallSprite = false; vis->foggy = foggy; @@ -278,8 +276,10 @@ namespace swrenderer VisibleSpriteList::Instance()->Push(vis); } - void RenderSprite::Render(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip) + void RenderSprite::Render(short *mfloorclip, short *mceilingclip, int, int) { + auto vis = this; + fixed_t frac; FTexture *tex; int x2; diff --git a/src/swrenderer/things/r_sprite.h b/src/swrenderer/things/r_sprite.h index 7c5a68a078..d45fd39550 100644 --- a/src/swrenderer/things/r_sprite.h +++ b/src/swrenderer/things/r_sprite.h @@ -17,10 +17,21 @@ namespace swrenderer { - class RenderSprite + class RenderSprite : public VisibleSprite { public: static void Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap); - static void Render(vissprite_t *vis, const short *mfloorclip, const short *mceilingclip); + + void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; + + private: + fixed_t xscale; + fixed_t startfrac; // horizontal position of x1 + fixed_t xiscale; // negative if flipped + + uint32_t Translation; + uint32_t FillColor; + + friend class RenderPlayerSprite; // To do: detach sprite from playersprite! }; } diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index b7718991d1..95b8028b1c 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -24,70 +24,44 @@ struct FVoxel; namespace swrenderer { - struct vissprite_t + class VisibleSprite { - struct posang - { - FVector3 vpos; // view origin - FAngle vang; // view angle - }; + public: + virtual bool IsParticle() const { return false; } + virtual bool IsVoxel() const { return false; } + virtual bool IsWallSprite() const { return false; } + + virtual void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) = 0; + + FTexture *pic; short x1, x2; - FVector3 gpos; // origin in world coordinates - union - { - struct - { - float gzb, gzt; // global bottom / top for silhouette clipping - }; - struct - { - int y1, y2; // top / bottom of particle on screen - }; - }; - DAngle Angle; - fixed_t xscale; - float yscale; - float depth; - float idepth; // 1/z - float deltax, deltay; - uint32_t FillColor; + float gzb, gzt; // global bottom / top for silhouette clipping + double floorclip; - union - { - FTexture *pic; - struct FVoxel *voxel; - }; - union - { - // Used by face sprites - struct - { - double texturemid; - fixed_t startfrac; // horizontal position of x1 - fixed_t xiscale; // negative if flipped - }; - // Used by wall sprites - FWallCoords wallc; - // Used by voxels - posang pa; - }; - sector_t *heightsec; // killough 3/27/98: height sector for underwater/fake ceiling - sector_t *sector; // [RH] sector this sprite is in - F3DFloor *fakefloor; + + double texturemid; // floorclip + float yscale; // voxel and floorclip + + sector_t *heightsec; // height sector for underwater/fake ceiling + WaterFakeSide FakeFlatStat; // which side of fake/floor ceiling sprite is on + + F3DFloor *fakefloor; // 3d floor clipping F3DFloor *fakeceiling; - uint8_t bIsVoxel : 1; // [RH] Use voxel instead of pic - uint8_t bWallSprite : 1; // [RH] This is a wall sprite - uint8_t bSplitSprite : 1; // [RH] Sprite was split by a drawseg - uint8_t bInMirror : 1; // [RH] Sprite is "inside" a mirror - WaterFakeSide FakeFlatStat; // [RH] which side of fake/floor ceiling sprite is on - short renderflags; - uint32_t Translation; // [RH] for color translation + + FVector3 gpos; // origin in world coordinates + sector_t *sector; // sector this sprite is in + + // Light shared calculation? visstyle_t Style; - int CurrentPortalUniq; // [ZZ] to identify the portal that this thing is in. used for clipping. - bool foggy; + short renderflags; - vissprite_t() {} + float depth; // Sort (draw segments), also light + + float deltax, deltay; // Sort (2d/voxel version) + float idepth; // Sort (non-voxel version) + + int CurrentPortalUniq; // to identify the portal that this thing is in. used for clipping. }; } diff --git a/src/swrenderer/things/r_visiblespritelist.cpp b/src/swrenderer/things/r_visiblespritelist.cpp index 084f7b49ca..2ec36e9788 100644 --- a/src/swrenderer/things/r_visiblespritelist.cpp +++ b/src/swrenderer/things/r_visiblespritelist.cpp @@ -52,15 +52,15 @@ namespace swrenderer StartIndices.Pop(); } - void VisibleSpriteList::Push(vissprite_t *sprite) + void VisibleSpriteList::Push(VisibleSprite *sprite) { Sprites.Push(sprite); } void VisibleSpriteList::Sort(bool compare2d) { - size_t first = StartIndices.Size() == 0 ? 0 : StartIndices.Last(); - size_t count = Sprites.Size() - first; + unsigned int first = StartIndices.Size() == 0 ? 0 : StartIndices.Last(); + unsigned int count = Sprites.Size() - first; SortedSprites.Resize(count); @@ -69,7 +69,7 @@ namespace swrenderer if (!(i_compatflags & COMPATF_SPRITESORT)) { - for (size_t i = 0; i < count; i++) + for (unsigned int i = 0; i < count; i++) SortedSprites[i] = Sprites[first + i]; } else @@ -77,7 +77,7 @@ namespace swrenderer // If the compatibility option is on sprites of equal distance need to // be sorted in inverse order. This is most easily achieved by // filling the sort array backwards before the sort. - for (size_t i = 0; i < count; i++) + for (unsigned int i = 0; i < count; i++) SortedSprites[i] = Sprites[first + count - i - 1]; } @@ -87,7 +87,7 @@ namespace swrenderer // It does a 2D distance test based on whichever one is furthest from // the viewpoint. - std::stable_sort(&SortedSprites[0], &SortedSprites[count], [](vissprite_t *a, vissprite_t *b) -> bool + std::stable_sort(&SortedSprites[0], &SortedSprites[count], [](VisibleSprite *a, VisibleSprite *b) -> bool { return DVector2(a->deltax, a->deltay).LengthSquared() < DVector2(b->deltax, b->deltay).LengthSquared(); }); @@ -96,7 +96,7 @@ namespace swrenderer { // This is the standard version, which does a simple test based on depth. - std::stable_sort(&SortedSprites[0], &SortedSprites[count], [](vissprite_t *a, vissprite_t *b) -> bool + std::stable_sort(&SortedSprites[0], &SortedSprites[count], [](VisibleSprite *a, VisibleSprite *b) -> bool { return a->idepth > b->idepth; }); diff --git a/src/swrenderer/things/r_visiblespritelist.h b/src/swrenderer/things/r_visiblespritelist.h index 0dd43e0009..7add812ae1 100644 --- a/src/swrenderer/things/r_visiblespritelist.h +++ b/src/swrenderer/things/r_visiblespritelist.h @@ -16,7 +16,7 @@ namespace swrenderer { struct drawseg_t; - struct vissprite_t; + class VisibleSprite; class VisibleSpriteList { @@ -26,13 +26,13 @@ namespace swrenderer void Clear(); void PushPortal(); void PopPortal(); - void Push(vissprite_t *sprite); + void Push(VisibleSprite *sprite); void Sort(bool compare2d); - TArray SortedSprites; + TArray SortedSprites; private: - TArray Sprites; - TArray StartIndices; + TArray Sprites; + TArray StartIndices; }; } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 6c4d3322f4..7ff15c7be3 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -105,7 +105,7 @@ namespace swrenderer } } - vissprite_t *vis = RenderMemory::NewObject(); + RenderVoxel *vis = RenderMemory::NewObject(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->xscale = FLOAT2FIXED(xscale); @@ -150,12 +150,10 @@ namespace swrenderer vis->fakefloor = fakefloor; vis->fakeceiling = fakeceiling; vis->Style.ColormapNum = 0; - vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; - vis->bSplitSprite = false; + //vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; + //vis->bSplitSprite = false; vis->voxel = voxel->Voxel; - vis->bIsVoxel = true; - vis->bWallSprite = false; vis->foggy = foggy; // The software renderer cannot invert the source without inverting the overlay @@ -227,8 +225,10 @@ namespace swrenderer RenderTranslucentPass::DrewAVoxel = true; } - void RenderVoxel::Render(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom) + void RenderVoxel::Render(short *cliptop, short *clipbottom, int minZ, int maxZ) { + auto sprite = this; + FDynamicColormap *basecolormap = static_cast(sprite->Style.BaseColormap); R_SetColorMapLight(sprite->Style.BaseColormap, 0, sprite->Style.ColormapNum << FRACBITS); diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index 3788d0b192..57992fe834 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -25,11 +25,10 @@ struct kvxslab_t; struct FVoxelMipLevel; +struct FVoxel; namespace swrenderer { - struct vissprite_t; - // [RH] A c-buffer. Used for keeping track of offscreen voxel spans. struct FCoverageBuffer { @@ -52,15 +51,31 @@ namespace swrenderer unsigned int NumLists; }; - class RenderVoxel + class RenderVoxel : public VisibleSprite { public: static void Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap); - static void Render(vissprite_t *sprite, int minZ, int maxZ, short *cliptop, short *clipbottom); static void Deinit(); + bool IsVoxel() const override { return true; } + void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; + private: + struct posang + { + FVector3 vpos; // view origin + FAngle vang; // view angle + }; + + posang pa; + DAngle Angle; + fixed_t xscale; + FVoxel *voxel; + + uint32_t Translation; + uint32_t FillColor; + enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; static void FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 319dcfc891..3f8ecbaa79 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -104,7 +104,7 @@ namespace swrenderer gzt = pos.Z + scale.Y * scaled_to; gzb = pos.Z + scale.Y * scaled_bo; - vissprite_t *vis = RenderMemory::NewObject(); + RenderWallSprite *vis = RenderMemory::NewObject(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->x1 = wallc.sx1 < renderportal->WindowLeft ? renderportal->WindowLeft : wallc.sx1; vis->x2 = wallc.sx2 >= renderportal->WindowRight ? renderportal->WindowRight : wallc.sx2; @@ -127,12 +127,9 @@ namespace swrenderer vis->Style.Alpha = float(thing->Alpha); vis->fakefloor = NULL; vis->fakeceiling = NULL; - vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; + //vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; vis->pic = pic; - vis->bIsVoxel = false; - vis->bWallSprite = true; - vis->Style.ColormapNum = GETPALOOKUP( - r_SpriteVisibility / MAX(tz, MINZ), spriteshade); + vis->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(tz, MINZ), spriteshade); vis->Style.BaseColormap = basecolormap; vis->wallc = wallc; vis->foggy = foggy; @@ -140,8 +137,10 @@ namespace swrenderer VisibleSpriteList::Instance()->Push(vis); } - void RenderWallSprite::Render(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip) + void RenderWallSprite::Render(short *mfloorclip, short *mceilingclip, int, int) { + auto spr = this; + int x1, x2; double iyscale; bool sprflipvert; diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index 350912cd1b..9791da678b 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -17,13 +17,19 @@ namespace swrenderer { - class RenderWallSprite + class RenderWallSprite : public VisibleSprite { public: static void Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade, bool foggy, FDynamicColormap *basecolormap); - static void Render(vissprite_t *spr, const short *mfloorclip, const short *mceilingclip); + + bool IsWallSprite() const override { return true; } + void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: static void DrawColumn(int x, FTexture *WallSpriteTile, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + + FWallCoords wallc; + uint32_t Translation; + uint32_t FillColor; }; } From 433eb77c37177878954e4ef874f4b8a5790719f4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 16 Jan 2017 05:43:56 +0100 Subject: [PATCH 725/912] Moved DrawSprite to VisibleSprite and marked all its variables as protected --- src/swrenderer/scene/r_translucent_pass.cpp | 408 +----------------- src/swrenderer/scene/r_translucent_pass.h | 1 - src/swrenderer/things/r_particle.h | 3 + src/swrenderer/things/r_sprite.h | 1 + src/swrenderer/things/r_visiblesprite.cpp | 407 ++++++++++++++++- src/swrenderer/things/r_visiblesprite.h | 12 +- src/swrenderer/things/r_visiblespritelist.cpp | 4 +- src/swrenderer/things/r_voxel.h | 1 + src/swrenderer/things/r_wallsprite.h | 1 + 9 files changed, 428 insertions(+), 410 deletions(-) diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 252c31f561..cbacf11e1b 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -107,7 +107,7 @@ namespace swrenderer // (all checks that are already done in R_CollectPortals have been removed for performance reasons.) // don't clip if the sprite is in front of the portal - if (!P_PointOnLineSidePrecise(spr->gpos.X, spr->gpos.Y, seg->curline->linedef)) + if (!P_PointOnLineSidePrecise(spr->WorldPos().X, spr->WorldPos().Y, seg->curline->linedef)) continue; // now if current column is covered by this drawseg, we clip it away @@ -118,405 +118,6 @@ namespace swrenderer return false; } - void RenderTranslucentPass::DrawSprite(VisibleSprite *spr) - { - static short clipbot[MAXWIDTH]; - static short cliptop[MAXWIDTH]; - drawseg_t *ds; - int i; - int x1, x2; - int r1, r2; - short topclip, botclip; - short *clip1, *clip2; - FSWColormap *colormap = spr->Style.BaseColormap; - int colormapnum = spr->Style.ColormapNum; - F3DFloor *rover; - FDynamicColormap *mybasecolormap; - - Clip3DFloors *clip3d = Clip3DFloors::Instance(); - - // [RH] Check for particles - if (spr->IsParticle()) - { - // kg3D - reject invisible parts - if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gpos.Z <= clip3d->sclipBottom) return; - if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gpos.Z >= clip3d->sclipTop) return; - - spr->Render(nullptr, nullptr, 0, 0); - return; - } - - x1 = spr->x1; - x2 = spr->x2; - - // [RH] Quickly reject sprites with bad x ranges. - if (x1 >= x2) - return; - - // [RH] Sprites split behind a one-sided line can also be discarded. - if (spr->sector == nullptr) - return; - - // kg3D - reject invisible parts - if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gzt <= clip3d->sclipBottom) return; - if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gzb >= clip3d->sclipTop) return; - - // kg3D - correct colors now - if (!fixedcolormap && fixedlightlev < 0 && spr->sector->e && spr->sector->e->XFloor.lightlist.Size()) - { - if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) - { - clip3d->sclipTop = spr->sector->ceilingplane.ZatPoint(ViewPos); - } - sector_t *sec = nullptr; - for (i = spr->sector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) - { - if (clip3d->sclipTop <= spr->sector->e->XFloor.lightlist[i].plane.Zat0()) - { - rover = spr->sector->e->XFloor.lightlist[i].caster; - if (rover) - { - if (rover->flags & FF_DOUBLESHADOW && clip3d->sclipTop <= rover->bottom.plane->Zat0()) - { - break; - } - sec = rover->model; - if (rover->flags & FF_FADEWALLS) - { - mybasecolormap = sec->ColorMap; - } - else - { - mybasecolormap = spr->sector->e->XFloor.lightlist[i].extra_colormap; - } - } - break; - } - } - // found new values, recalculate - if (sec) - { - INTBOOL invertcolormap = (spr->Style.RenderStyle.Flags & STYLEF_InvertOverlay); - - if (spr->Style.RenderStyle.Flags & STYLEF_InvertSource) - { - invertcolormap = !invertcolormap; - } - - // Sprites that are added to the scene must fade to black. - if (spr->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); - } - - if (spr->Style.RenderStyle.Flags & STYLEF_FadeToBlack) - { - if (invertcolormap) - { // Fade to white - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); - invertcolormap = false; - } - else - { // Fade to black - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); - } - } - - // get light level - if (invertcolormap) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); - } - if (fixedlightlev >= 0) - { - spr->Style.BaseColormap = mybasecolormap; - spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) - { // full bright - spr->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - spr->Style.ColormapNum = 0; - } - else - { // diminished light - int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(spr->foggy)); - spr->Style.BaseColormap = mybasecolormap; - spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); - } - } - } - - // [RH] Initialize the clipping arrays to their largest possible range - // instead of using a special "not clipped" value. This eliminates - // visual anomalies when looking down and should be faster, too. - topclip = 0; - botclip = viewheight; - - // killough 3/27/98: - // Clip the sprite against deep water and/or fake ceilings. - // [RH] rewrote this to be based on which part of the sector is really visible - - double scale = InvZtoScale * spr->idepth; - double hzb = DBL_MIN, hzt = DBL_MAX; - - if (spr->IsVoxel() && spr->floorclip != 0) - { - hzb = spr->gzb; - } - - if (spr->heightsec && !(spr->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC)) - { // only things in specially marked sectors - if (spr->FakeFlatStat != WaterFakeSide::AboveCeiling) - { - double hz = spr->heightsec->floorplane.ZatPoint(spr->gpos); - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - - if (spr->FakeFlatStat == WaterFakeSide::BelowFloor) - { // seen below floor: clip top - if (!spr->IsVoxel() && h > topclip) - { - topclip = short(MIN(h, viewheight)); - } - hzt = MIN(hzt, hz); - } - else - { // seen in the middle: clip bottom - if (!spr->IsVoxel() && h < botclip) - { - botclip = MAX(0, h); - } - hzb = MAX(hzb, hz); - } - } - if (spr->FakeFlatStat != WaterFakeSide::BelowFloor && !(spr->heightsec->MoreFlags & SECF_FAKEFLOORONLY)) - { - double hz = spr->heightsec->ceilingplane.ZatPoint(spr->gpos); - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - - if (spr->FakeFlatStat == WaterFakeSide::AboveCeiling) - { // seen above ceiling: clip bottom - if (!spr->IsVoxel() && h < botclip) - { - botclip = MAX(0, h); - } - hzb = MAX(hzb, hz); - } - else - { // seen in the middle: clip top - if (!spr->IsVoxel() && h > topclip) - { - topclip = MIN(h, viewheight); - } - hzt = MIN(hzt, hz); - } - } - } - // killough 3/27/98: end special clipping for deep water / fake ceilings - else if (!spr->IsVoxel() && spr->floorclip) - { // [RH] Move floorclip stuff from R_DrawVisSprite to here - //int clip = ((FLOAT2FIXED(CenterY) - FixedMul (spr->texturemid - (spr->pic->GetHeight() << FRACBITS) + spr->floorclip, spr->yscale)) >> FRACBITS); - int clip = xs_RoundToInt(CenterY - (spr->texturemid - spr->pic->GetHeight() + spr->floorclip) * spr->yscale); - if (clip < botclip) - { - botclip = MAX(0, clip); - } - } - - if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) - { - if (!spr->IsVoxel()) - { - double hz = clip3d->sclipBottom; - if (spr->fakefloor) - { - double floorz = spr->fakefloor->top.plane->Zat0(); - if (ViewPos.Z > floorz && floorz == clip3d->sclipBottom) - { - hz = spr->fakefloor->bottom.plane->Zat0(); - } - } - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - if (h < botclip) - { - botclip = MAX(0, h); - } - } - hzb = MAX(hzb, clip3d->sclipBottom); - } - if (clip3d->fake3D & FAKE3D_CLIPTOP) - { - if (!spr->IsVoxel()) - { - double hz = clip3d->sclipTop; - if (spr->fakeceiling != nullptr) - { - double ceilingZ = spr->fakeceiling->bottom.plane->Zat0(); - if (ViewPos.Z < ceilingZ && ceilingZ == clip3d->sclipTop) - { - hz = spr->fakeceiling->top.plane->Zat0(); - } - } - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); - if (h > topclip) - { - topclip = short(MIN(h, viewheight)); - } - } - hzt = MIN(hzt, clip3d->sclipTop); - } - - if (topclip >= botclip) - { - spr->Style.BaseColormap = colormap; - spr->Style.ColormapNum = colormapnum; - return; - } - - i = x2 - x1; - clip1 = clipbot + x1; - clip2 = cliptop + x1; - do - { - *clip1++ = botclip; - *clip2++ = topclip; - } while (--i); - - // Scan drawsegs from end to start for obscuring segs. - // The first drawseg that is closer than the sprite is the clip seg. - - // Modified by Lee Killough: - // (pointer check was originally nonportable - // and buggy, by going past LEFT end of array): - - // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code - - for (ds = ds_p; ds-- > firstdrawseg; ) // new -- killough - { - // [ZZ] portal handling here - //if (ds->CurrentPortalUniq != spr->CurrentPortalUniq) - // continue; - // [ZZ] WARNING: uncommenting the two above lines, totally breaks sprite clipping - - // kg3D - no clipping on fake segs - if (ds->fake) continue; - // determine if the drawseg obscures the sprite - if (ds->x1 >= x2 || ds->x2 <= x1 || - (!(ds->silhouette & SIL_BOTH) && ds->maskedtexturecol == nullptr && - !ds->bFogBoundary)) - { - // does not cover sprite - continue; - } - - r1 = MAX(ds->x1, x1); - r2 = MIN(ds->x2, x2); - - float neardepth, fardepth; - if (!spr->IsWallSprite()) - { - if (ds->sz1 < ds->sz2) - { - neardepth = ds->sz1, fardepth = ds->sz2; - } - else - { - neardepth = ds->sz2, fardepth = ds->sz1; - } - } - - - // Check if sprite is in front of draw seg: - if ((!spr->IsWallSprite() && neardepth > spr->depth) || ((spr->IsWallSprite() || fardepth > spr->depth) && - (spr->gpos.Y - ds->curline->v1->fY()) * (ds->curline->v2->fX() - ds->curline->v1->fX()) - - (spr->gpos.X - ds->curline->v1->fX()) * (ds->curline->v2->fY() - ds->curline->v1->fY()) <= 0)) - { - RenderPortal *renderportal = RenderPortal::Instance(); - - // seg is behind sprite, so draw the mid texture if it has one - if (ds->CurrentPortalUniq == renderportal->CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here - (ds->maskedtexturecol != nullptr || ds->bFogBoundary)) - R_RenderMaskedSegRange(ds, r1, r2); - - continue; - } - - // clip this piece of the sprite - // killough 3/27/98: optimized and made much shorter - // [RH] Optimized further (at least for VC++; - // other compilers should be at least as good as before) - - if (ds->silhouette & SIL_BOTTOM) //bottom sil - { - clip1 = clipbot + r1; - clip2 = ds->sprbottomclip + r1 - ds->x1; - i = r2 - r1; - do - { - if (*clip1 > *clip2) - *clip1 = *clip2; - clip1++; - clip2++; - } while (--i); - } - - if (ds->silhouette & SIL_TOP) // top sil - { - clip1 = cliptop + r1; - clip2 = ds->sprtopclip + r1 - ds->x1; - i = r2 - r1; - do - { - if (*clip1 < *clip2) - *clip1 = *clip2; - clip1++; - clip2++; - } while (--i); - } - } - - // all clipping has been performed, so draw the sprite - - if (!spr->IsVoxel()) - { - spr->Render(clipbot, cliptop, 0, 0); - } - else - { - // If it is completely clipped away, don't bother drawing it. - if (cliptop[x2] >= clipbot[x2]) - { - for (i = x1; i < x2; ++i) - { - if (cliptop[i] < clipbot[i]) - { - break; - } - } - if (i == x2) - { - spr->Style.BaseColormap = colormap; - spr->Style.ColormapNum = colormapnum; - return; - } - } - // Add everything outside the left and right edges to the clipping array - // for R_DrawVisVoxel(). - if (x1 > 0) - { - fillshort(cliptop, x1, viewheight); - } - if (x2 < viewwidth - 1) - { - fillshort(cliptop + x2, viewwidth - x2, viewheight); - } - int minvoxely = spr->gzt <= hzt ? 0 : xs_RoundToInt((spr->gzt - hzt) / spr->yscale); - int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); - spr->Render(cliptop, clipbot, minvoxely, maxvoxely); - } - spr->Style.BaseColormap = colormap; - spr->Style.ColormapNum = colormapnum; - } - void RenderTranslucentPass::DrawMaskedSingle(bool renew) { RenderPortal *renderportal = RenderPortal::Instance(); @@ -524,9 +125,10 @@ namespace swrenderer auto &sortedSprites = VisibleSpriteList::Instance()->SortedSprites; for (int i = sortedSprites.Size(); i > 0; i--) { - if (sortedSprites[i - 1]->CurrentPortalUniq != renderportal->CurrentPortalUniq) - continue; // probably another time - DrawSprite(sortedSprites[i - 1]); + if (sortedSprites[i - 1]->IsCurrentPortalUniq(renderportal->CurrentPortalUniq)) + { + sortedSprites[i - 1]->Render(); + } } // render any remaining masked mid textures diff --git a/src/swrenderer/scene/r_translucent_pass.h b/src/swrenderer/scene/r_translucent_pass.h index 2e94198b64..e3744db994 100644 --- a/src/swrenderer/scene/r_translucent_pass.h +++ b/src/swrenderer/scene/r_translucent_pass.h @@ -38,7 +38,6 @@ namespace swrenderer private: static void CollectPortals(); - static void DrawSprite(VisibleSprite *spr); static void DrawMaskedSingle(bool renew); static TArray portaldrawsegs; diff --git a/src/swrenderer/things/r_particle.h b/src/swrenderer/things/r_particle.h index 964b2ac164..244d1a8eee 100644 --- a/src/swrenderer/things/r_particle.h +++ b/src/swrenderer/things/r_particle.h @@ -16,6 +16,8 @@ #include "r_visiblesprite.h" #include "swrenderer/scene/r_opaque_pass.h" +struct particle_t; + namespace swrenderer { class RenderParticle : public VisibleSprite @@ -23,6 +25,7 @@ namespace swrenderer public: static void Project(particle_t *, const sector_t *sector, int shade, WaterFakeSide fakeside, bool foggy); + protected: bool IsParticle() const override { return true; } void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; diff --git a/src/swrenderer/things/r_sprite.h b/src/swrenderer/things/r_sprite.h index d45fd39550..595a5d2a83 100644 --- a/src/swrenderer/things/r_sprite.h +++ b/src/swrenderer/things/r_sprite.h @@ -22,6 +22,7 @@ namespace swrenderer public: static void Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap); + protected: void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 71f036c9a0..d0551377c8 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -29,10 +29,415 @@ #include "swrenderer/things/r_wallsprite.h" #include "swrenderer/things/r_playersprite.h" #include "swrenderer/segments/r_drawsegment.h" -#include "swrenderer/scene/r_portal.h" #include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_light.h" +#include "swrenderer/scene/r_viewport.h" #include "swrenderer/r_memory.h" +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); + namespace swrenderer { + void VisibleSprite::Render() + { + static short clipbot[MAXWIDTH]; + static short cliptop[MAXWIDTH]; + + VisibleSprite *spr = this; + + drawseg_t *ds; + int i; + int x1, x2; + int r1, r2; + short topclip, botclip; + short *clip1, *clip2; + FSWColormap *colormap = spr->Style.BaseColormap; + int colormapnum = spr->Style.ColormapNum; + F3DFloor *rover; + FDynamicColormap *mybasecolormap; + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + + // [RH] Check for particles + if (spr->IsParticle()) + { + // kg3D - reject invisible parts + if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gpos.Z <= clip3d->sclipBottom) return; + if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gpos.Z >= clip3d->sclipTop) return; + + spr->Render(nullptr, nullptr, 0, 0); + return; + } + + x1 = spr->x1; + x2 = spr->x2; + + // [RH] Quickly reject sprites with bad x ranges. + if (x1 >= x2) + return; + + // [RH] Sprites split behind a one-sided line can also be discarded. + if (spr->sector == nullptr) + return; + + // kg3D - reject invisible parts + if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gzt <= clip3d->sclipBottom) return; + if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gzb >= clip3d->sclipTop) return; + + // kg3D - correct colors now + if (!fixedcolormap && fixedlightlev < 0 && spr->sector->e && spr->sector->e->XFloor.lightlist.Size()) + { + if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) + { + clip3d->sclipTop = spr->sector->ceilingplane.ZatPoint(ViewPos); + } + sector_t *sec = nullptr; + for (i = spr->sector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) + { + if (clip3d->sclipTop <= spr->sector->e->XFloor.lightlist[i].plane.Zat0()) + { + rover = spr->sector->e->XFloor.lightlist[i].caster; + if (rover) + { + if (rover->flags & FF_DOUBLESHADOW && clip3d->sclipTop <= rover->bottom.plane->Zat0()) + { + break; + } + sec = rover->model; + if (rover->flags & FF_FADEWALLS) + { + mybasecolormap = sec->ColorMap; + } + else + { + mybasecolormap = spr->sector->e->XFloor.lightlist[i].extra_colormap; + } + } + break; + } + } + // found new values, recalculate + if (sec) + { + INTBOOL invertcolormap = (spr->Style.RenderStyle.Flags & STYLEF_InvertOverlay); + + if (spr->Style.RenderStyle.Flags & STYLEF_InvertSource) + { + invertcolormap = !invertcolormap; + } + + // Sprites that are added to the scene must fade to black. + if (spr->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); + } + + if (spr->Style.RenderStyle.Flags & STYLEF_FadeToBlack) + { + if (invertcolormap) + { // Fade to white + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); + invertcolormap = false; + } + else + { // Fade to black + mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); + } + } + + // get light level + if (invertcolormap) + { + mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); + } + if (fixedlightlev >= 0) + { + spr->Style.BaseColormap = mybasecolormap; + spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + } + else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) + { // full bright + spr->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + spr->Style.ColormapNum = 0; + } + else + { // diminished light + int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(spr->foggy)); + spr->Style.BaseColormap = mybasecolormap; + spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); + } + } + } + + // [RH] Initialize the clipping arrays to their largest possible range + // instead of using a special "not clipped" value. This eliminates + // visual anomalies when looking down and should be faster, too. + topclip = 0; + botclip = viewheight; + + // killough 3/27/98: + // Clip the sprite against deep water and/or fake ceilings. + // [RH] rewrote this to be based on which part of the sector is really visible + + double scale = InvZtoScale * spr->idepth; + double hzb = DBL_MIN, hzt = DBL_MAX; + + if (spr->IsVoxel() && spr->floorclip != 0) + { + hzb = spr->gzb; + } + + if (spr->heightsec && !(spr->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC)) + { // only things in specially marked sectors + if (spr->FakeFlatStat != WaterFakeSide::AboveCeiling) + { + double hz = spr->heightsec->floorplane.ZatPoint(spr->gpos); + int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + + if (spr->FakeFlatStat == WaterFakeSide::BelowFloor) + { // seen below floor: clip top + if (!spr->IsVoxel() && h > topclip) + { + topclip = short(MIN(h, viewheight)); + } + hzt = MIN(hzt, hz); + } + else + { // seen in the middle: clip bottom + if (!spr->IsVoxel() && h < botclip) + { + botclip = MAX(0, h); + } + hzb = MAX(hzb, hz); + } + } + if (spr->FakeFlatStat != WaterFakeSide::BelowFloor && !(spr->heightsec->MoreFlags & SECF_FAKEFLOORONLY)) + { + double hz = spr->heightsec->ceilingplane.ZatPoint(spr->gpos); + int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + + if (spr->FakeFlatStat == WaterFakeSide::AboveCeiling) + { // seen above ceiling: clip bottom + if (!spr->IsVoxel() && h < botclip) + { + botclip = MAX(0, h); + } + hzb = MAX(hzb, hz); + } + else + { // seen in the middle: clip top + if (!spr->IsVoxel() && h > topclip) + { + topclip = MIN(h, viewheight); + } + hzt = MIN(hzt, hz); + } + } + } + // killough 3/27/98: end special clipping for deep water / fake ceilings + else if (!spr->IsVoxel() && spr->floorclip) + { // [RH] Move floorclip stuff from R_DrawVisSprite to here + //int clip = ((FLOAT2FIXED(CenterY) - FixedMul (spr->texturemid - (spr->pic->GetHeight() << FRACBITS) + spr->floorclip, spr->yscale)) >> FRACBITS); + int clip = xs_RoundToInt(CenterY - (spr->texturemid - spr->pic->GetHeight() + spr->floorclip) * spr->yscale); + if (clip < botclip) + { + botclip = MAX(0, clip); + } + } + + if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) + { + if (!spr->IsVoxel()) + { + double hz = clip3d->sclipBottom; + if (spr->fakefloor) + { + double floorz = spr->fakefloor->top.plane->Zat0(); + if (ViewPos.Z > floorz && floorz == clip3d->sclipBottom) + { + hz = spr->fakefloor->bottom.plane->Zat0(); + } + } + int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + if (h < botclip) + { + botclip = MAX(0, h); + } + } + hzb = MAX(hzb, clip3d->sclipBottom); + } + if (clip3d->fake3D & FAKE3D_CLIPTOP) + { + if (!spr->IsVoxel()) + { + double hz = clip3d->sclipTop; + if (spr->fakeceiling != nullptr) + { + double ceilingZ = spr->fakeceiling->bottom.plane->Zat0(); + if (ViewPos.Z < ceilingZ && ceilingZ == clip3d->sclipTop) + { + hz = spr->fakeceiling->top.plane->Zat0(); + } + } + int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + if (h > topclip) + { + topclip = short(MIN(h, viewheight)); + } + } + hzt = MIN(hzt, clip3d->sclipTop); + } + + if (topclip >= botclip) + { + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; + return; + } + + i = x2 - x1; + clip1 = clipbot + x1; + clip2 = cliptop + x1; + do + { + *clip1++ = botclip; + *clip2++ = topclip; + } while (--i); + + // Scan drawsegs from end to start for obscuring segs. + // The first drawseg that is closer than the sprite is the clip seg. + + // Modified by Lee Killough: + // (pointer check was originally nonportable + // and buggy, by going past LEFT end of array): + + // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code + + for (ds = ds_p; ds-- > firstdrawseg; ) // new -- killough + { + // [ZZ] portal handling here + //if (ds->CurrentPortalUniq != spr->CurrentPortalUniq) + // continue; + // [ZZ] WARNING: uncommenting the two above lines, totally breaks sprite clipping + + // kg3D - no clipping on fake segs + if (ds->fake) continue; + // determine if the drawseg obscures the sprite + if (ds->x1 >= x2 || ds->x2 <= x1 || + (!(ds->silhouette & SIL_BOTH) && ds->maskedtexturecol == nullptr && + !ds->bFogBoundary)) + { + // does not cover sprite + continue; + } + + r1 = MAX(ds->x1, x1); + r2 = MIN(ds->x2, x2); + + float neardepth, fardepth; + if (!spr->IsWallSprite()) + { + if (ds->sz1 < ds->sz2) + { + neardepth = ds->sz1, fardepth = ds->sz2; + } + else + { + neardepth = ds->sz2, fardepth = ds->sz1; + } + } + + + // Check if sprite is in front of draw seg: + if ((!spr->IsWallSprite() && neardepth > spr->depth) || ((spr->IsWallSprite() || fardepth > spr->depth) && + (spr->gpos.Y - ds->curline->v1->fY()) * (ds->curline->v2->fX() - ds->curline->v1->fX()) - + (spr->gpos.X - ds->curline->v1->fX()) * (ds->curline->v2->fY() - ds->curline->v1->fY()) <= 0)) + { + RenderPortal *renderportal = RenderPortal::Instance(); + + // seg is behind sprite, so draw the mid texture if it has one + if (ds->CurrentPortalUniq == renderportal->CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here + (ds->maskedtexturecol != nullptr || ds->bFogBoundary)) + R_RenderMaskedSegRange(ds, r1, r2); + + continue; + } + + // clip this piece of the sprite + // killough 3/27/98: optimized and made much shorter + // [RH] Optimized further (at least for VC++; + // other compilers should be at least as good as before) + + if (ds->silhouette & SIL_BOTTOM) //bottom sil + { + clip1 = clipbot + r1; + clip2 = ds->sprbottomclip + r1 - ds->x1; + i = r2 - r1; + do + { + if (*clip1 > *clip2) + *clip1 = *clip2; + clip1++; + clip2++; + } while (--i); + } + + if (ds->silhouette & SIL_TOP) // top sil + { + clip1 = cliptop + r1; + clip2 = ds->sprtopclip + r1 - ds->x1; + i = r2 - r1; + do + { + if (*clip1 < *clip2) + *clip1 = *clip2; + clip1++; + clip2++; + } while (--i); + } + } + + // all clipping has been performed, so draw the sprite + + if (!spr->IsVoxel()) + { + spr->Render(clipbot, cliptop, 0, 0); + } + else + { + // If it is completely clipped away, don't bother drawing it. + if (cliptop[x2] >= clipbot[x2]) + { + for (i = x1; i < x2; ++i) + { + if (cliptop[i] < clipbot[i]) + { + break; + } + } + if (i == x2) + { + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; + return; + } + } + // Add everything outside the left and right edges to the clipping array + // for R_DrawVisVoxel(). + if (x1 > 0) + { + fillshort(cliptop, x1, viewheight); + } + if (x2 < viewwidth - 1) + { + fillshort(cliptop + x2, viewwidth - x2, viewheight); + } + int minvoxely = spr->gzt <= hzt ? 0 : xs_RoundToInt((spr->gzt - hzt) / spr->yscale); + int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); + spr->Render(cliptop, clipbot, minvoxely, maxvoxely); + } + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; + } } diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index 95b8028b1c..41d50aff16 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -19,14 +19,20 @@ #define MINZ double((2048*4) / double(1 << 20)) -struct particle_t; -struct FVoxel; - namespace swrenderer { class VisibleSprite { public: + void Render(); + + bool IsCurrentPortalUniq(int portalUniq) const { return CurrentPortalUniq == portalUniq; } + const FVector3 &WorldPos() const { return gpos; } + + double SortDist2D() const { return DVector2(deltax, deltay).LengthSquared(); } + float SortDist() const { return idepth; } + + protected: virtual bool IsParticle() const { return false; } virtual bool IsVoxel() const { return false; } virtual bool IsWallSprite() const { return false; } diff --git a/src/swrenderer/things/r_visiblespritelist.cpp b/src/swrenderer/things/r_visiblespritelist.cpp index 2ec36e9788..395499ab14 100644 --- a/src/swrenderer/things/r_visiblespritelist.cpp +++ b/src/swrenderer/things/r_visiblespritelist.cpp @@ -89,7 +89,7 @@ namespace swrenderer std::stable_sort(&SortedSprites[0], &SortedSprites[count], [](VisibleSprite *a, VisibleSprite *b) -> bool { - return DVector2(a->deltax, a->deltay).LengthSquared() < DVector2(b->deltax, b->deltay).LengthSquared(); + return a->SortDist2D() < b->SortDist2D(); }); } else @@ -98,7 +98,7 @@ namespace swrenderer std::stable_sort(&SortedSprites[0], &SortedSprites[count], [](VisibleSprite *a, VisibleSprite *b) -> bool { - return a->idepth > b->idepth; + return a->SortDist() > b->SortDist(); }); } } diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index 57992fe834..d237d9b66e 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -58,6 +58,7 @@ namespace swrenderer static void Deinit(); + protected: bool IsVoxel() const override { return true; } void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index 9791da678b..2123948669 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -22,6 +22,7 @@ namespace swrenderer public: static void Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade, bool foggy, FDynamicColormap *basecolormap); + protected: bool IsWallSprite() const override { return true; } void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; From 6c76c8534b8b059da5f9e2f3f72a9967e51e2237 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 16 Jan 2017 16:23:02 +0100 Subject: [PATCH 726/912] Change visstyle_t back to how it was in ZDoom and stop using it internally in the swrenderer --- src/g_inventory/a_artifacts.cpp | 3 +- src/polyrenderer/scene/poly_playersprite.cpp | 103 ++++++++------- src/polyrenderer/scene/poly_playersprite.h | 8 +- src/polyrenderer/scene/poly_sprite.cpp | 33 ++--- src/polyrenderer/scene/poly_sprite.h | 2 +- src/r_defs.h | 5 +- src/swrenderer/things/r_particle.cpp | 22 ++-- src/swrenderer/things/r_playersprite.cpp | 130 +++++++++---------- src/swrenderer/things/r_sprite.cpp | 40 +++--- src/swrenderer/things/r_visiblesprite.cpp | 36 ++--- src/swrenderer/things/r_visiblesprite.h | 9 +- src/swrenderer/things/r_voxel.cpp | 42 +++--- src/swrenderer/things/r_wallsprite.cpp | 18 +-- 13 files changed, 231 insertions(+), 220 deletions(-) diff --git a/src/g_inventory/a_artifacts.cpp b/src/g_inventory/a_artifacts.cpp index 80c423b453..f8e34c9a73 100644 --- a/src/g_inventory/a_artifacts.cpp +++ b/src/g_inventory/a_artifacts.cpp @@ -791,8 +791,7 @@ int APowerInvisibility::AlterWeaponSprite (visstyle_t *vis) if ((vis->Alpha < 0.25f && special1 > 0) || (vis->Alpha == 0)) { vis->Alpha = clamp((1.f - float(Strength/100)), 0.f, 1.f); - vis->BaseColormap = &SpecialColormaps[INVERSECOLORMAP]; - vis->ColormapNum = 0; + vis->colormap = SpecialColormaps[INVERSECOLORMAP].Colormap; } return -1; // This item is valid so another one shouldn't reset the translucency } diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index e9665c8576..640d902528 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -213,36 +213,36 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa FDynamicColormap *basecolormap = viewsector->ColorMap; FDynamicColormap *colormap_to_use = basecolormap; - visstyle_t visstyle; - visstyle.ColormapNum = 0; - visstyle.BaseColormap = basecolormap; - visstyle.Alpha = 0; - visstyle.RenderStyle = STYLE_Normal; + int ColormapNum = 0; + FSWColormap *BaseColormap = basecolormap; + float Alpha = 0; + FRenderStyle RenderStyle; + RenderStyle = STYLE_Normal; bool foggy = false; int actualextralight = foggy ? 0 : extralight << 4; int spriteshade = LIGHT2SHADE(owner->Sector->lightlevel + actualextralight); double minz = double((2048 * 4) / double(1 << 20)); - visstyle.ColormapNum = GETPALOOKUP(swrenderer::r_SpriteVisibility / minz, spriteshade); + ColormapNum = GETPALOOKUP(swrenderer::r_SpriteVisibility / minz, spriteshade); if (sprite->GetID() < PSP_TARGETCENTER) { - visstyle.Alpha = float(owner->Alpha); - visstyle.RenderStyle = owner->RenderStyle; + Alpha = float(owner->Alpha); + RenderStyle = owner->RenderStyle; // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what // the invert overlay flag says to do. - INTBOOL invertcolormap = (visstyle.RenderStyle.Flags & STYLEF_InvertOverlay); + INTBOOL invertcolormap = (RenderStyle.Flags & STYLEF_InvertOverlay); - if (visstyle.RenderStyle.Flags & STYLEF_InvertSource) + if (RenderStyle.Flags & STYLEF_InvertSource) { invertcolormap = !invertcolormap; } FDynamicColormap *mybasecolormap = basecolormap; - if (visstyle.RenderStyle.Flags & STYLEF_FadeToBlack) + if (RenderStyle.Flags & STYLEF_FadeToBlack) { if (invertcolormap) { // Fade to white @@ -258,8 +258,8 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa /* if (swrenderer::realfixedcolormap != nullptr && (!swrenderer::r_swtruecolor || (r_shadercolormaps && screen->Accel2D))) { // fixed color - visstyle.BaseColormap = swrenderer::realfixedcolormap; - visstyle.ColormapNum = 0; + BaseColormap = swrenderer::realfixedcolormap; + ColormapNum = 0; } else { @@ -269,49 +269,46 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa } if (swrenderer::fixedlightlev >= 0) { - visstyle.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - visstyle.ColormapNum = swrenderer::fixedlightlev >> COLORMAPSHIFT; + BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + ColormapNum = swrenderer::fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && sprite->GetState()->GetFullbright()) { // full bright - visstyle.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap - visstyle.ColormapNum = 0; + BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap + ColormapNum = 0; } else { // local light - visstyle.BaseColormap = mybasecolormap; - visstyle.ColormapNum = GETPALOOKUP(0, spriteshade); + BaseColormap = mybasecolormap; + ColormapNum = GETPALOOKUP(0, spriteshade); } } */ if (camera->Inventory != nullptr) { - BYTE oldcolormapnum = visstyle.ColormapNum; - FSWColormap *oldcolormap = visstyle.BaseColormap; + visstyle_t visstyle; + visstyle.Alpha = Alpha; + visstyle.RenderStyle = RenderStyle; + visstyle.colormap = nullptr; // Same as the GL render is doing. + camera->Inventory->AlterWeaponSprite(&visstyle); - if (visstyle.BaseColormap != oldcolormap || visstyle.ColormapNum != oldcolormapnum) + + RenderStyle = visstyle.RenderStyle; + Alpha = visstyle.Alpha; + + // Only bother checking for the one type it changes it to until this has been ZScript'ed.. + if (visstyle.colormap == SpecialColormaps[INVERSECOLORMAP].Colormap) { - // The colormap has changed. Is it one we can easily identify? - // If not, then don't bother trying to identify it for - // hardware accelerated drawing. - if (visstyle.BaseColormap < &SpecialColormaps[0] || - visstyle.BaseColormap > &SpecialColormaps.Last()) - { - noaccel = true; - } - // Has the basecolormap changed? If so, we can't hardware accelerate it, - // since we don't know what it is anymore. - else if (visstyle.BaseColormap != mybasecolormap) - { - noaccel = true; - } + BaseColormap = &SpecialColormaps[INVERSECOLORMAP]; + ColormapNum = 0; } } + // If we're drawing with a special colormap, but shaders for them are disabled, do // not accelerate. - if (!r_shadercolormaps && (visstyle.BaseColormap >= &SpecialColormaps[0] && - visstyle.BaseColormap <= &SpecialColormaps.Last())) + if (!r_shadercolormaps && (BaseColormap >= &SpecialColormaps[0] && + BaseColormap <= &SpecialColormaps.Last())) { noaccel = true; } @@ -340,7 +337,7 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa // fuzzy, don't draw it until after the switch to 2D mode. if (!noaccel && swrenderer::RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) { - FRenderStyle style = visstyle.RenderStyle; + FRenderStyle style = RenderStyle; style.CheckFuzz(); if (style.BlendOp != STYLEOP_Fuzz) { @@ -352,7 +349,10 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa screenSprite.Height = tex->GetHeight() * yscale; screenSprite.Translation = TranslationToTable(translation); screenSprite.Flip = xiscale < 0; - screenSprite.visstyle = visstyle; + screenSprite.Alpha = Alpha; + screenSprite.RenderStyle = RenderStyle; + screenSprite.BaseColormap = BaseColormap; + screenSprite.ColormapNum = ColormapNum; screenSprite.Colormap = colormap_to_use; ScreenSprites.push_back(screenSprite); return; @@ -362,11 +362,11 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa // To do: draw sprite same way as R_DrawVisSprite(vis) here // Draw the fuzzy weapon: - FRenderStyle style = visstyle.RenderStyle; + FRenderStyle style = RenderStyle; style.CheckFuzz(); if (style.BlendOp == STYLEOP_Fuzz) { - visstyle.RenderStyle = LegacyRenderStyles[STYLE_Shadow]; + RenderStyle = LegacyRenderStyles[STYLE_Shadow]; PolyScreenSprite screenSprite; screenSprite.Pic = tex; @@ -376,7 +376,10 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa screenSprite.Height = tex->GetHeight() * yscale; screenSprite.Translation = TranslationToTable(translation); screenSprite.Flip = xiscale < 0; - screenSprite.visstyle = visstyle; + screenSprite.Alpha = Alpha; + screenSprite.RenderStyle = RenderStyle; + screenSprite.BaseColormap = BaseColormap; + screenSprite.ColormapNum = ColormapNum; screenSprite.Colormap = colormap_to_use; ScreenSprites.push_back(screenSprite); } @@ -388,16 +391,16 @@ void PolyScreenSprite::Render() FColormapStyle colormapstyle; PalEntry overlay = 0; bool usecolormapstyle = false; - if (visstyle.BaseColormap >= &SpecialColormaps[0] && - visstyle.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) + if (BaseColormap >= &SpecialColormaps[0] && + BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) { - special = static_cast(visstyle.BaseColormap); + special = static_cast(BaseColormap); } else if (Colormap->Color == PalEntry(255, 255, 255) && Colormap->Desaturate == 0) { overlay = Colormap->Fade; - overlay.a = BYTE(visstyle.ColormapNum * 255 / NUMCOLORMAPS); + overlay.a = BYTE(ColormapNum * 255 / NUMCOLORMAPS); } else { @@ -405,7 +408,7 @@ void PolyScreenSprite::Render() colormapstyle.Color = Colormap->Color; colormapstyle.Fade = Colormap->Fade; colormapstyle.Desaturate = Colormap->Desaturate; - colormapstyle.FadeLevel = visstyle.ColormapNum / float(NUMCOLORMAPS); + colormapstyle.FadeLevel = ColormapNum / float(NUMCOLORMAPS); } screen->DrawTexture(Pic, @@ -421,8 +424,8 @@ void PolyScreenSprite::Render() DTA_ClipTop, viewwindowy, DTA_ClipRight, viewwindowx + viewwidth, DTA_ClipBottom, viewwindowy + viewheight, - DTA_AlphaF, visstyle.Alpha, - DTA_RenderStyle, visstyle.RenderStyle, + DTA_AlphaF, Alpha, + DTA_RenderStyle, RenderStyle, DTA_FillColor, FillColor, DTA_SpecialColormap, special, DTA_ColorOverlay, overlay.d, diff --git a/src/polyrenderer/scene/poly_playersprite.h b/src/polyrenderer/scene/poly_playersprite.h index e7384e2007..97c6e61183 100644 --- a/src/polyrenderer/scene/poly_playersprite.h +++ b/src/polyrenderer/scene/poly_playersprite.h @@ -22,8 +22,11 @@ #pragma once +#include "r_defs.h" + class PolyScreenSprite; class DPSprite; +struct FSWColormap; class RenderPolyPlayerSprites { @@ -53,7 +56,10 @@ public: double Height = 0.0; FRemapTable *Translation = nullptr; bool Flip = false; - visstyle_t visstyle; + float Alpha = 1; + FRenderStyle RenderStyle; + FSWColormap *BaseColormap = nullptr; + int ColormapNum = 0; uint32_t FillColor = 0; FDynamicColormap *Colormap = nullptr; }; diff --git a/src/polyrenderer/scene/poly_sprite.cpp b/src/polyrenderer/scene/poly_sprite.cpp index c6cf88ee1a..91e35765cc 100644 --- a/src/polyrenderer/scene/poly_sprite.cpp +++ b/src/polyrenderer/scene/poly_sprite.cpp @@ -290,6 +290,7 @@ bool RenderPolySprite::IsThingCulled(AActor *thing) return false; } +#if 0 visstyle_t RenderPolySprite::GetSpriteVisStyle(AActor *thing, double z) { visstyle_t visstyle; @@ -298,16 +299,17 @@ visstyle_t RenderPolySprite::GetSpriteVisStyle(AActor *thing, double z) int actualextralight = foggy ? 0 : extralight << 4; int spriteshade = LIGHT2SHADE(thing->Sector->lightlevel + actualextralight); - visstyle.RenderStyle = thing->RenderStyle; - visstyle.Alpha = float(thing->Alpha); - visstyle.ColormapNum = 0; + FRenderStyle RenderStyle; + RenderStyle = thing->RenderStyle; + float Alpha = float(thing->Alpha); + int ColormapNum = 0; // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what // the invert overlay flag says to do. - bool invertcolormap = (visstyle.RenderStyle.Flags & STYLEF_InvertOverlay) != 0; + bool invertcolormap = (RenderStyle.Flags & STYLEF_InvertOverlay) != 0; - if (visstyle.RenderStyle.Flags & STYLEF_InvertSource) + if (RenderStyle.Flags & STYLEF_InvertSource) { invertcolormap = !invertcolormap; } @@ -315,12 +317,12 @@ visstyle_t RenderPolySprite::GetSpriteVisStyle(AActor *thing, double z) FDynamicColormap *mybasecolormap = thing->Sector->ColorMap; // Sprites that are added to the scene must fade to black. - if (visstyle.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + if (RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) { mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); } - if (visstyle.RenderStyle.Flags & STYLEF_FadeToBlack) + if (RenderStyle.Flags & STYLEF_FadeToBlack) { if (invertcolormap) { // Fade to white @@ -336,8 +338,8 @@ visstyle_t RenderPolySprite::GetSpriteVisStyle(AActor *thing, double z) // get light level if (swrenderer::fixedcolormap != nullptr) { // fixed map - visstyle.BaseColormap = swrenderer::fixedcolormap; - visstyle.ColormapNum = 0; + BaseColormap = swrenderer::fixedcolormap; + ColormapNum = 0; } else { @@ -347,24 +349,25 @@ visstyle_t RenderPolySprite::GetSpriteVisStyle(AActor *thing, double z) } if (swrenderer::fixedlightlev >= 0) { - visstyle.BaseColormap = mybasecolormap; - visstyle.ColormapNum = swrenderer::fixedlightlev >> COLORMAPSHIFT; + BaseColormap = mybasecolormap; + ColormapNum = swrenderer::fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright - visstyle.BaseColormap = mybasecolormap; - visstyle.ColormapNum = 0; + BaseColormap = mybasecolormap; + ColormapNum = 0; } else { // diminished light double minz = double((2048 * 4) / double(1 << 20)); - visstyle.ColormapNum = GETPALOOKUP(swrenderer::r_SpriteVisibility / MAX(z, minz), spriteshade); - visstyle.BaseColormap = mybasecolormap; + ColormapNum = GETPALOOKUP(swrenderer::r_SpriteVisibility / MAX(z, minz), spriteshade); + BaseColormap = mybasecolormap; } } return visstyle; } +#endif FTexture *RenderPolySprite::GetSpriteTexture(AActor *thing, /*out*/ bool &flipX) { diff --git a/src/polyrenderer/scene/poly_sprite.h b/src/polyrenderer/scene/poly_sprite.h index fe38dad22e..a61c7f4b5f 100644 --- a/src/polyrenderer/scene/poly_sprite.h +++ b/src/polyrenderer/scene/poly_sprite.h @@ -36,5 +36,5 @@ public: static FTexture *GetSpriteTexture(AActor *thing, /*out*/ bool &flipX); private: - visstyle_t GetSpriteVisStyle(AActor *thing, double z); + //visstyle_t GetSpriteVisStyle(AActor *thing, double z); }; diff --git a/src/r_defs.h b/src/r_defs.h index 7f4026d41b..81f6869f9f 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1484,14 +1484,11 @@ struct FMiniBSP // typedef BYTE lighttable_t; // This could be wider for >8 bit display. -struct FSWColormap; // This encapsulates the fields of vissprite_t that can be altered by AlterWeaponSprite struct visstyle_t { - int ColormapNum; // Which colormap is rendered - FSWColormap *BaseColormap; // Base colormap used together with ColormapNum - lighttable_t *colormap; // [SP] Restored from GZDoom - will this work? + lighttable_t *colormap; float Alpha; FRenderStyle RenderStyle; }; diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 3620c916b0..3174b52ea3 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -197,29 +197,29 @@ namespace swrenderer vis->renderflags = (short)(particle->alpha * 255.0f + 0.5f); vis->FakeFlatStat = fakeside; vis->floorclip = 0; - vis->Style.ColormapNum = 0; + vis->ColormapNum = 0; vis->foggy = foggy; if (fixedlightlev >= 0) { - vis->Style.BaseColormap = map; - vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + vis->BaseColormap = map; + vis->ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (fixedcolormap) { - vis->Style.BaseColormap = fixedcolormap; - vis->Style.ColormapNum = 0; + vis->BaseColormap = fixedcolormap; + vis->ColormapNum = 0; } else if (particle->bright) { - vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : map; - vis->Style.ColormapNum = 0; + vis->BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : map; + vis->ColormapNum = 0; } else { // Particles are slightly more visible than regular sprites. - vis->Style.ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade); - vis->Style.BaseColormap = map; + vis->ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade); + vis->BaseColormap = map; } VisibleSpriteList::Instance()->Push(vis); @@ -232,7 +232,7 @@ namespace swrenderer auto vis = this; int spacing; - BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac]; + BYTE color = vis->BaseColormap->Maps[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -243,7 +243,7 @@ namespace swrenderer DrawMaskedSegsBehindParticle(); - uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); + uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->ColormapNum << FRACBITS))); // vis->renderflags holds translucency level (0-255) fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 0632af0741..3e9b136aa7 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -305,7 +305,7 @@ namespace swrenderer vis->yscale = float(pspriteyscale / tex->Scale.Y); vis->Translation = 0; // [RH] Use default colors vis->pic = tex; - vis->Style.ColormapNum = 0; + vis->ColormapNum = 0; // If flip is used, provided that it's not already flipped (that would just invert itself) // (It's an XOR...) @@ -335,42 +335,42 @@ namespace swrenderer if (pspr->Flags & PSPF_FORCESTYLE) { - vis->Style.RenderStyle = LegacyRenderStyles[rs]; + vis->RenderStyle = LegacyRenderStyles[rs]; } else if (owner->RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) { - vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Fuzzy]; + vis->RenderStyle = LegacyRenderStyles[STYLE_Fuzzy]; } else if (owner->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) { - vis->Style.RenderStyle = LegacyRenderStyles[STYLE_OptFuzzy]; - vis->Style.RenderStyle.CheckFuzz(); + vis->RenderStyle = LegacyRenderStyles[STYLE_OptFuzzy]; + vis->RenderStyle.CheckFuzz(); } else if (owner->RenderStyle == LegacyRenderStyles[STYLE_Subtract]) { - vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Subtract]; + vis->RenderStyle = LegacyRenderStyles[STYLE_Subtract]; } else { - vis->Style.RenderStyle = LegacyRenderStyles[rs]; + vis->RenderStyle = LegacyRenderStyles[rs]; } } else { - vis->Style.RenderStyle = owner->RenderStyle; + vis->RenderStyle = owner->RenderStyle; } // Set the alpha based on if using the overlay's own or not. Also adjust // and override the alpha if not forced. if (pspr->Flags & PSPF_ALPHA) { - if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) + if (vis->RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) { alpha = owner->Alpha; } - else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) + else if (vis->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) { - FRenderStyle style = vis->Style.RenderStyle; + FRenderStyle style = vis->RenderStyle; style.CheckFuzz(); switch (style.BlendOp) { @@ -384,15 +384,15 @@ namespace swrenderer } } - else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Subtract]) + else if (vis->RenderStyle == LegacyRenderStyles[STYLE_Subtract]) { alpha = owner->Alpha; } - else if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] || - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Translucent] || - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_TranslucentStencil] || - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_AddStencil] || - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_AddShaded]) + else if (vis->RenderStyle == LegacyRenderStyles[STYLE_Add] || + vis->RenderStyle == LegacyRenderStyles[STYLE_Translucent] || + vis->RenderStyle == LegacyRenderStyles[STYLE_TranslucentStencil] || + vis->RenderStyle == LegacyRenderStyles[STYLE_AddStencil] || + vis->RenderStyle == LegacyRenderStyles[STYLE_AddShaded]) { alpha = owner->Alpha * pspr->alpha; } @@ -405,10 +405,10 @@ namespace swrenderer // Should normal renderstyle come out on top at the end and we desire alpha, // switch it to translucent. Normal never applies any sort of alpha. if ((pspr->Flags & PSPF_ALPHA) && - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Normal] && - vis->Style.Alpha < 1.0) + vis->RenderStyle == LegacyRenderStyles[STYLE_Normal] && + vis->Alpha < 1.0) { - vis->Style.RenderStyle = LegacyRenderStyles[STYLE_Translucent]; + vis->RenderStyle = LegacyRenderStyles[STYLE_Translucent]; alpha = owner->Alpha * pspr->alpha; } @@ -417,22 +417,22 @@ namespace swrenderer if (pspr->Flags & PSPF_FORCEALPHA) { //Due to lack of != operators... - if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Fuzzy] || - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_SoulTrans] || - vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Stencil]) + if (vis->RenderStyle == LegacyRenderStyles[STYLE_Fuzzy] || + vis->RenderStyle == LegacyRenderStyles[STYLE_SoulTrans] || + vis->RenderStyle == LegacyRenderStyles[STYLE_Stencil]) { } else { alpha = pspr->alpha; - vis->Style.RenderStyle.Flags |= STYLEF_ForceAlpha; + vis->RenderStyle.Flags |= STYLEF_ForceAlpha; } } - vis->Style.Alpha = clamp(float(alpha), 0.f, 1.f); + vis->Alpha = clamp(float(alpha), 0.f, 1.f); // Due to how some of the effects are handled, going to 0 or less causes some // weirdness to display. There's no point rendering it anyway if it's 0. - if (vis->Style.Alpha <= 0.) + if (vis->Alpha <= 0.) return; //----------------------------------------------------------------------------- @@ -440,16 +440,16 @@ namespace swrenderer // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what // the invert overlay flag says to do. - INTBOOL invertcolormap = (vis->Style.RenderStyle.Flags & STYLEF_InvertOverlay); + INTBOOL invertcolormap = (vis->RenderStyle.Flags & STYLEF_InvertOverlay); - if (vis->Style.RenderStyle.Flags & STYLEF_InvertSource) + if (vis->RenderStyle.Flags & STYLEF_InvertSource) { invertcolormap = !invertcolormap; } FDynamicColormap *mybasecolormap = basecolormap; - if (vis->Style.RenderStyle.Flags & STYLEF_FadeToBlack) + if (vis->RenderStyle.Flags & STYLEF_FadeToBlack) { if (invertcolormap) { // Fade to white @@ -464,8 +464,8 @@ namespace swrenderer if (realfixedcolormap != nullptr && (!r_swtruecolor || (r_shadercolormaps && screen->Accel2D))) { // fixed color - vis->Style.BaseColormap = realfixedcolormap; - vis->Style.ColormapNum = 0; + vis->BaseColormap = realfixedcolormap; + vis->ColormapNum = 0; } else { @@ -475,47 +475,43 @@ namespace swrenderer } if (fixedlightlev >= 0) { - vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + vis->BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + vis->ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!vis->foggy && pspr->GetState()->GetFullbright()) { // full bright - vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap - vis->Style.ColormapNum = 0; + vis->BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap + vis->ColormapNum = 0; } else { // local light - vis->Style.BaseColormap = mybasecolormap; - vis->Style.ColormapNum = GETPALOOKUP(0, spriteshade); + vis->BaseColormap = mybasecolormap; + vis->ColormapNum = GETPALOOKUP(0, spriteshade); } } if (camera->Inventory != nullptr) { - BYTE oldcolormapnum = vis->Style.ColormapNum; - FSWColormap *oldcolormap = vis->Style.BaseColormap; - camera->Inventory->AlterWeaponSprite(&vis->Style); - if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum) + visstyle_t visstyle; + visstyle.Alpha = vis->Alpha; + visstyle.RenderStyle = vis->RenderStyle; + visstyle.colormap = nullptr; // Same as the GL render is doing. + + camera->Inventory->AlterWeaponSprite(&visstyle); + + vis->RenderStyle = visstyle.RenderStyle; + vis->Alpha = visstyle.Alpha; + + // Only bother checking for the one type it changes it to until this has been ZScript'ed.. + if (visstyle.colormap == SpecialColormaps[INVERSECOLORMAP].Colormap) { - // The colormap has changed. Is it one we can easily identify? - // If not, then don't bother trying to identify it for - // hardware accelerated drawing. - if (vis->Style.BaseColormap < &SpecialColormaps[0] || - vis->Style.BaseColormap > &SpecialColormaps.Last()) - { - noaccel = true; - } - // Has the basecolormap changed? If so, we can't hardware accelerate it, - // since we don't know what it is anymore. - else if (vis->Style.BaseColormap != mybasecolormap) - { - noaccel = true; - } + vis->BaseColormap = &SpecialColormaps[INVERSECOLORMAP]; + vis->ColormapNum = 0; } } // If we're drawing with a special colormap, but shaders for them are disabled, do // not accelerate. - if (!r_shadercolormaps && (vis->Style.BaseColormap >= &SpecialColormaps[0] && - vis->Style.BaseColormap <= &SpecialColormaps.Last())) + if (!r_shadercolormaps && (vis->BaseColormap >= &SpecialColormaps[0] && + vis->BaseColormap <= &SpecialColormaps.Last())) { noaccel = true; } @@ -543,15 +539,15 @@ namespace swrenderer { colormap_to_use = basecolormap; - vis->Style.BaseColormap = basecolormap; - vis->Style.ColormapNum = 0; + vis->BaseColormap = basecolormap; + vis->ColormapNum = 0; } // Check for hardware-assisted 2D. If it's available, and this sprite is not // fuzzy, don't draw it until after the switch to 2D mode. if (!noaccel && RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) { - FRenderStyle style = vis->Style.RenderStyle; + FRenderStyle style = vis->RenderStyle; style.CheckFuzz(); if (style.BlendOp != STYLEOP_Fuzz) { @@ -585,16 +581,16 @@ namespace swrenderer FColormapStyle colormapstyle; bool usecolormapstyle = false; - if (vis->Style.BaseColormap >= &SpecialColormaps[0] && - vis->Style.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) + if (vis->BaseColormap >= &SpecialColormaps[0] && + vis->BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) { - special = static_cast(vis->Style.BaseColormap); + special = static_cast(vis->BaseColormap); } else if (colormap->Color == PalEntry(255, 255, 255) && colormap->Desaturate == 0) { overlay = colormap->Fade; - overlay.a = BYTE(vis->Style.ColormapNum * 255 / NUMCOLORMAPS); + overlay.a = BYTE(vis->ColormapNum * 255 / NUMCOLORMAPS); } else { @@ -602,7 +598,7 @@ namespace swrenderer colormapstyle.Color = colormap->Color; colormapstyle.Fade = colormap->Fade; colormapstyle.Desaturate = colormap->Desaturate; - colormapstyle.FadeLevel = vis->Style.ColormapNum / float(NUMCOLORMAPS); + colormapstyle.FadeLevel = vis->ColormapNum / float(NUMCOLORMAPS); } screen->DrawTexture(vis->pic, viewwindowx + vispsprites[i].x1, @@ -617,8 +613,8 @@ namespace swrenderer DTA_ClipTop, viewwindowy, DTA_ClipRight, viewwindowx + viewwidth, DTA_ClipBottom, viewwindowy + viewheight, - DTA_AlphaF, vis->Style.Alpha, - DTA_RenderStyle, vis->Style.RenderStyle, + DTA_AlphaF, vis->Alpha, + DTA_RenderStyle, vis->RenderStyle, DTA_FillColor, vis->FillColor, DTA_SpecialColormap, special, DTA_ColorOverlay, overlay.d, diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 95c95a208d..664358698a 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -194,14 +194,14 @@ namespace swrenderer vis->renderflags = renderflags; if (thing->flags5 & MF5_BRIGHT) vis->renderflags |= RF_FULLBRIGHT; // kg3D - vis->Style.RenderStyle = thing->RenderStyle; + vis->RenderStyle = thing->RenderStyle; vis->FillColor = thing->fillcolor; vis->Translation = thing->Translation; // [RH] thing translation table vis->FakeFlatStat = fakeside; - vis->Style.Alpha = float(thing->Alpha); + vis->Alpha = float(thing->Alpha); vis->fakefloor = fakefloor; vis->fakeceiling = fakeceiling; - vis->Style.ColormapNum = 0; + vis->ColormapNum = 0; //vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; //vis->bSplitSprite = false; @@ -212,9 +212,9 @@ namespace swrenderer // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what // the invert overlay flag says to do. - INTBOOL invertcolormap = (vis->Style.RenderStyle.Flags & STYLEF_InvertOverlay); + INTBOOL invertcolormap = (vis->RenderStyle.Flags & STYLEF_InvertOverlay); - if (vis->Style.RenderStyle.Flags & STYLEF_InvertSource) + if (vis->RenderStyle.Flags & STYLEF_InvertSource) { invertcolormap = !invertcolormap; } @@ -226,12 +226,12 @@ namespace swrenderer } // Sprites that are added to the scene must fade to black. - if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + if (vis->RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) { mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); } - if (vis->Style.RenderStyle.Flags & STYLEF_FadeToBlack) + if (vis->RenderStyle.Flags & STYLEF_FadeToBlack) { if (invertcolormap) { // Fade to white @@ -247,8 +247,8 @@ namespace swrenderer // get light level if (fixedcolormap != nullptr) { // fixed map - vis->Style.BaseColormap = fixedcolormap; - vis->Style.ColormapNum = 0; + vis->BaseColormap = fixedcolormap; + vis->ColormapNum = 0; } else { @@ -258,18 +258,18 @@ namespace swrenderer } if (fixedlightlev >= 0) { - vis->Style.BaseColormap = mybasecolormap; - vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + vis->BaseColormap = mybasecolormap; + vis->ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright - vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - vis->Style.ColormapNum = 0; + vis->BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + vis->ColormapNum = 0; } else { // diminished light - vis->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = mybasecolormap; + vis->ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(tz, MINZ), spriteshade); + vis->BaseColormap = mybasecolormap; } } @@ -295,17 +295,17 @@ namespace swrenderer } fixed_t centeryfrac = FLOAT2FIXED(CenterY); - R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); + R_SetColorMapLight(vis->BaseColormap, 0, vis->ColormapNum << FRACBITS); - FDynamicColormap *basecolormap = static_cast(vis->Style.BaseColormap); + FDynamicColormap *basecolormap = static_cast(vis->BaseColormap); - bool visible = R_SetPatchStyle(vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor, basecolormap); + bool visible = R_SetPatchStyle(vis->RenderStyle, vis->Alpha, vis->Translation, vis->FillColor, basecolormap); - if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Shaded]) + if (vis->RenderStyle == LegacyRenderStyles[STYLE_Shaded]) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - R_SetColorMapLight(drawerargs::dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS); + R_SetColorMapLight(drawerargs::dc_fcolormap, 0, vis->ColormapNum << FRACBITS); } if (visible) diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index d0551377c8..a350cda64a 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -52,8 +52,8 @@ namespace swrenderer int r1, r2; short topclip, botclip; short *clip1, *clip2; - FSWColormap *colormap = spr->Style.BaseColormap; - int colormapnum = spr->Style.ColormapNum; + FSWColormap *colormap = spr->BaseColormap; + int colormapnum = spr->ColormapNum; F3DFloor *rover; FDynamicColormap *mybasecolormap; @@ -120,20 +120,20 @@ namespace swrenderer // found new values, recalculate if (sec) { - INTBOOL invertcolormap = (spr->Style.RenderStyle.Flags & STYLEF_InvertOverlay); + INTBOOL invertcolormap = (spr->RenderStyle.Flags & STYLEF_InvertOverlay); - if (spr->Style.RenderStyle.Flags & STYLEF_InvertSource) + if (spr->RenderStyle.Flags & STYLEF_InvertSource) { invertcolormap = !invertcolormap; } // Sprites that are added to the scene must fade to black. - if (spr->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + if (spr->RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) { mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); } - if (spr->Style.RenderStyle.Flags & STYLEF_FadeToBlack) + if (spr->RenderStyle.Flags & STYLEF_FadeToBlack) { if (invertcolormap) { // Fade to white @@ -153,19 +153,19 @@ namespace swrenderer } if (fixedlightlev >= 0) { - spr->Style.BaseColormap = mybasecolormap; - spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + spr->BaseColormap = mybasecolormap; + spr->ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) { // full bright - spr->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - spr->Style.ColormapNum = 0; + spr->BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + spr->ColormapNum = 0; } else { // diminished light int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(spr->foggy)); - spr->Style.BaseColormap = mybasecolormap; - spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); + spr->BaseColormap = mybasecolormap; + spr->ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); } } } @@ -291,8 +291,8 @@ namespace swrenderer if (topclip >= botclip) { - spr->Style.BaseColormap = colormap; - spr->Style.ColormapNum = colormapnum; + spr->BaseColormap = colormap; + spr->ColormapNum = colormapnum; return; } @@ -418,8 +418,8 @@ namespace swrenderer } if (i == x2) { - spr->Style.BaseColormap = colormap; - spr->Style.ColormapNum = colormapnum; + spr->BaseColormap = colormap; + spr->ColormapNum = colormapnum; return; } } @@ -437,7 +437,7 @@ namespace swrenderer int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); spr->Render(cliptop, clipbot, minvoxely, maxvoxely); } - spr->Style.BaseColormap = colormap; - spr->Style.ColormapNum = colormapnum; + spr->BaseColormap = colormap; + spr->ColormapNum = colormapnum; } } diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index 41d50aff16..2339ca3825 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -19,11 +19,15 @@ #define MINZ double((2048*4) / double(1 << 20)) +struct FSWColormap; + namespace swrenderer { class VisibleSprite { public: + virtual ~VisibleSprite() { } + void Render(); bool IsCurrentPortalUniq(int portalUniq) const { return CurrentPortalUniq == portalUniq; } @@ -59,7 +63,10 @@ namespace swrenderer sector_t *sector; // sector this sprite is in // Light shared calculation? - visstyle_t Style; + int ColormapNum; // Which colormap is rendered + FSWColormap *BaseColormap; // Base colormap used together with ColormapNum + float Alpha; + FRenderStyle RenderStyle; bool foggy; short renderflags; diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 7ff15c7be3..fa6f35e854 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -142,14 +142,14 @@ namespace swrenderer vis->renderflags = renderflags; if (thing->flags5 & MF5_BRIGHT) vis->renderflags |= RF_FULLBRIGHT; // kg3D - vis->Style.RenderStyle = thing->RenderStyle; + vis->RenderStyle = thing->RenderStyle; vis->FillColor = thing->fillcolor; vis->Translation = thing->Translation; // [RH] thing translation table vis->FakeFlatStat = fakeside; - vis->Style.Alpha = float(thing->Alpha); + vis->Alpha = float(thing->Alpha); vis->fakefloor = fakefloor; vis->fakeceiling = fakeceiling; - vis->Style.ColormapNum = 0; + vis->ColormapNum = 0; //vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; //vis->bSplitSprite = false; @@ -159,9 +159,9 @@ namespace swrenderer // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what // the invert overlay flag says to do. - INTBOOL invertcolormap = (vis->Style.RenderStyle.Flags & STYLEF_InvertOverlay); + INTBOOL invertcolormap = (vis->RenderStyle.Flags & STYLEF_InvertOverlay); - if (vis->Style.RenderStyle.Flags & STYLEF_InvertSource) + if (vis->RenderStyle.Flags & STYLEF_InvertSource) { invertcolormap = !invertcolormap; } @@ -173,12 +173,12 @@ namespace swrenderer } // Sprites that are added to the scene must fade to black. - if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + if (vis->RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) { mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); } - if (vis->Style.RenderStyle.Flags & STYLEF_FadeToBlack) + if (vis->RenderStyle.Flags & STYLEF_FadeToBlack) { if (invertcolormap) { // Fade to white @@ -194,8 +194,8 @@ namespace swrenderer // get light level if (fixedcolormap != nullptr) { // fixed map - vis->Style.BaseColormap = fixedcolormap; - vis->Style.ColormapNum = 0; + vis->BaseColormap = fixedcolormap; + vis->ColormapNum = 0; } else { @@ -206,18 +206,18 @@ namespace swrenderer if (fixedlightlev >= 0) { - vis->Style.BaseColormap = mybasecolormap; - vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; + vis->BaseColormap = mybasecolormap; + vis->ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright - vis->Style.BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - vis->Style.ColormapNum = 0; + vis->BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + vis->ColormapNum = 0; } else { // diminished light - vis->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = mybasecolormap; + vis->ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(tz, MINZ), spriteshade); + vis->BaseColormap = mybasecolormap; } } @@ -229,11 +229,11 @@ namespace swrenderer { auto sprite = this; - FDynamicColormap *basecolormap = static_cast(sprite->Style.BaseColormap); + FDynamicColormap *basecolormap = static_cast(sprite->BaseColormap); - R_SetColorMapLight(sprite->Style.BaseColormap, 0, sprite->Style.ColormapNum << FRACBITS); + R_SetColorMapLight(sprite->BaseColormap, 0, sprite->ColormapNum << FRACBITS); - bool visible = R_SetPatchStyle(sprite->Style.RenderStyle, sprite->Style.Alpha, sprite->Translation, sprite->FillColor, basecolormap); + bool visible = R_SetPatchStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); if (!visible) return; @@ -620,8 +620,8 @@ namespace swrenderer int flags = 0; // Do setup for blending. - R_SetColorMapLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); - bool visible = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); + R_SetColorMapLight(spr->BaseColormap, 0, spr->ColormapNum << FRACBITS); + bool visible = R_SetPatchStyle(spr->RenderStyle, spr->Alpha, spr->Translation, spr->FillColor); if (!visible) { @@ -646,7 +646,7 @@ namespace swrenderer // Render the voxel, either directly to the screen or offscreen. R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap, spr->Style.ColormapNum, cliptop, clipbot, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->BaseColormap, spr->ColormapNum, cliptop, clipbot, minslabz, maxslabz, flags); // Blend the voxel, if that's what we need to do. diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 3f8ecbaa79..1a520cfe76 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -120,17 +120,17 @@ namespace swrenderer vis->deltay = float(pos.Y - ViewPos.Y); vis->renderflags = renderflags; if (thing->flags5 & MF5_BRIGHT) vis->renderflags |= RF_FULLBRIGHT; // kg3D - vis->Style.RenderStyle = thing->RenderStyle; + vis->RenderStyle = thing->RenderStyle; vis->FillColor = thing->fillcolor; vis->Translation = thing->Translation; vis->FakeFlatStat = WaterFakeSide::Center; - vis->Style.Alpha = float(thing->Alpha); + vis->Alpha = float(thing->Alpha); vis->fakefloor = NULL; vis->fakeceiling = NULL; //vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; vis->pic = pic; - vis->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = basecolormap; + vis->ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(tz, MINZ), spriteshade); + vis->BaseColormap = basecolormap; vis->wallc = wallc; vis->foggy = foggy; @@ -165,11 +165,11 @@ namespace swrenderer } // Prepare lighting bool calclighting = false; - FSWColormap *usecolormap = spr->Style.BaseColormap; + FSWColormap *usecolormap = spr->BaseColormap; bool rereadcolormap = true; // Decals that are added to the scene must fade to black. - if (spr->Style.RenderStyle == LegacyRenderStyles[STYLE_Add] && usecolormap->Fade != 0) + if (spr->RenderStyle == LegacyRenderStyles[STYLE_Add] && usecolormap->Fade != 0) { usecolormap = GetSpecialLights(usecolormap->Color, 0, usecolormap->Desaturate); rereadcolormap = false; @@ -206,14 +206,14 @@ namespace swrenderer int x = x1; - FDynamicColormap *basecolormap = static_cast(spr->Style.BaseColormap); + FDynamicColormap *basecolormap = static_cast(spr->BaseColormap); - bool visible = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor, basecolormap); + bool visible = R_SetPatchStyle(spr->RenderStyle, spr->Alpha, spr->Translation, spr->FillColor, basecolormap); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) { - usecolormap = spr->Style.BaseColormap; + usecolormap = spr->BaseColormap; } if (!visible) From 906c944895a23925fb7ace9af5c39be0ae7b493b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 17 Jan 2017 01:30:12 +0100 Subject: [PATCH 727/912] Merge colormap selection into one function --- src/swrenderer/things/r_particle.cpp | 25 +------ src/swrenderer/things/r_playersprite.cpp | 62 +++------------- src/swrenderer/things/r_sprite.cpp | 49 +----------- src/swrenderer/things/r_visiblesprite.cpp | 90 +++++++++++++---------- src/swrenderer/things/r_visiblesprite.h | 2 + src/swrenderer/things/r_voxel.cpp | 46 +----------- src/swrenderer/things/r_wallsprite.cpp | 4 +- 7 files changed, 74 insertions(+), 204 deletions(-) diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 3174b52ea3..4cf4803ce7 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -67,7 +67,6 @@ namespace swrenderer double xscale, yscale; int x1, x2, y1, y2; sector_t* heightsec = NULL; - FSWColormap* map; RenderPortal *renderportal = RenderPortal::Instance(); @@ -135,6 +134,7 @@ namespace swrenderer const secplane_t *botplane; FTextureID toppic; FTextureID botpic; + FDynamicColormap *map; if (heightsec) // only clip things which are in special sectors { @@ -200,27 +200,8 @@ namespace swrenderer vis->ColormapNum = 0; vis->foggy = foggy; - if (fixedlightlev >= 0) - { - vis->BaseColormap = map; - vis->ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (fixedcolormap) - { - vis->BaseColormap = fixedcolormap; - vis->ColormapNum = 0; - } - else if (particle->bright) - { - vis->BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : map; - vis->ColormapNum = 0; - } - else - { - // Particles are slightly more visible than regular sprites. - vis->ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade); - vis->BaseColormap = map; - } + // Particles are slightly more visible than regular sprites. + vis->SetColormap(tiz * r_SpriteVisibility * 0.5, shade, map, particle->bright != 0, false, false); VisibleSpriteList::Instance()->Push(vis); } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 337603793c..0d35fdea49 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -440,55 +440,20 @@ namespace swrenderer // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what // the invert overlay flag says to do. - INTBOOL invertcolormap = (vis->RenderStyle.Flags & STYLEF_InvertOverlay); + bool invertcolormap = (vis->RenderStyle.Flags & STYLEF_InvertOverlay) != 0; if (vis->RenderStyle.Flags & STYLEF_InvertSource) { invertcolormap = !invertcolormap; } - FDynamicColormap *mybasecolormap = basecolormap; + bool fullbright = !vis->foggy && pspr->GetState()->GetFullbright(); + bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; - if (vis->RenderStyle.Flags & STYLEF_FadeToBlack) - { - if (invertcolormap) - { // Fade to white - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); - invertcolormap = false; - } - else - { // Fade to black - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); - } - } + vis->SetColormap(0, spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); + + colormap_to_use = (FDynamicColormap*)vis->BaseColormap; - if (realfixedcolormap != nullptr && (!r_swtruecolor || (r_shadercolormaps && screen->Accel2D))) - { // fixed color - vis->BaseColormap = realfixedcolormap; - vis->ColormapNum = 0; - } - else - { - if (invertcolormap) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); - } - if (fixedlightlev >= 0) - { - vis->BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - vis->ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (!vis->foggy && pspr->GetState()->GetFullbright()) - { // full bright - vis->BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap - vis->ColormapNum = 0; - } - else - { // local light - vis->BaseColormap = mybasecolormap; - vis->ColormapNum = GETPALOOKUP(0, spriteshade); - } - } if (camera->Inventory != nullptr) { visstyle_t visstyle; @@ -509,10 +474,7 @@ namespace swrenderer { vis->BaseColormap = &SpecialColormaps[INVERSECOLORMAP]; vis->ColormapNum = 0; - if (vis->BaseColormap->Maps < mybasecolormap->Maps || vis->BaseColormap->Maps >= mybasecolormap->Maps + NUMCOLORMAPS * 256) - { - noaccel = true; - } + noaccel = true; } } // If we're drawing with a special colormap, but shaders for them are disabled, do @@ -523,24 +485,18 @@ namespace swrenderer noaccel = true; } // If drawing with a BOOM colormap, disable acceleration. - if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps) + if (vis->BaseColormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps) { noaccel = true; } // If the main colormap has fixed lights, and this sprite is being drawn with that // colormap, disable acceleration so that the lights can remain fixed. if (!noaccel && realfixedcolormap == nullptr && - NormalLightHasFixedLights && mybasecolormap == &NormalLight && + NormalLightHasFixedLights && vis->BaseColormap == &NormalLight && vis->pic->UseBasePalette()) { noaccel = true; } - // [SP] If emulating GZDoom fullbright, disable acceleration - if (r_fullbrightignoresectorcolor && fixedlightlev >= 0) - mybasecolormap = &FullNormalLight; - if (r_fullbrightignoresectorcolor && !vis->foggy && pspr->GetState()->GetFullbright()) - mybasecolormap = &FullNormalLight; - colormap_to_use = mybasecolormap; } else { diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 664358698a..e58e5b428e 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -212,12 +212,9 @@ namespace swrenderer // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what // the invert overlay flag says to do. - INTBOOL invertcolormap = (vis->RenderStyle.Flags & STYLEF_InvertOverlay); - + bool invertcolormap = (vis->RenderStyle.Flags & STYLEF_InvertOverlay) != 0; if (vis->RenderStyle.Flags & STYLEF_InvertSource) - { invertcolormap = !invertcolormap; - } FDynamicColormap *mybasecolormap = basecolormap; if (current_sector->sectornum != thing->Sector->sectornum) // compare sectornums to account for R_FakeFlat copies. @@ -225,53 +222,15 @@ namespace swrenderer // Todo: The actor is from a different sector so we have to retrieve the proper basecolormap for that sector. } - // Sprites that are added to the scene must fade to black. if (vis->RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) { mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); } - if (vis->RenderStyle.Flags & STYLEF_FadeToBlack) - { - if (invertcolormap) - { // Fade to white - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); - invertcolormap = false; - } - else - { // Fade to black - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); - } - } + bool fullbright = !vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); + bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; - // get light level - if (fixedcolormap != nullptr) - { // fixed map - vis->BaseColormap = fixedcolormap; - vis->ColormapNum = 0; - } - else - { - if (invertcolormap) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); - } - if (fixedlightlev >= 0) - { - vis->BaseColormap = mybasecolormap; - vis->ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (!vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) - { // full bright - vis->BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - vis->ColormapNum = 0; - } - else - { // diminished light - vis->ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->BaseColormap = mybasecolormap; - } - } + vis->SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, mybasecolormap, fullbright, invertcolormap, fadeToBlack); VisibleSpriteList::Instance()->Push(vis); } diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index a350cda64a..5007cd10d1 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -55,7 +55,6 @@ namespace swrenderer FSWColormap *colormap = spr->BaseColormap; int colormapnum = spr->ColormapNum; F3DFloor *rover; - FDynamicColormap *mybasecolormap; Clip3DFloors *clip3d = Clip3DFloors::Instance(); @@ -93,6 +92,7 @@ namespace swrenderer clip3d->sclipTop = spr->sector->ceilingplane.ZatPoint(ViewPos); } sector_t *sec = nullptr; + FDynamicColormap *mybasecolormap = nullptr; for (i = spr->sector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) { if (clip3d->sclipTop <= spr->sector->e->XFloor.lightlist[i].plane.Zat0()) @@ -120,53 +120,21 @@ namespace swrenderer // found new values, recalculate if (sec) { - INTBOOL invertcolormap = (spr->RenderStyle.Flags & STYLEF_InvertOverlay); - + bool invertcolormap = (spr->RenderStyle.Flags & STYLEF_InvertOverlay) != 0; if (spr->RenderStyle.Flags & STYLEF_InvertSource) - { invertcolormap = !invertcolormap; - } - // Sprites that are added to the scene must fade to black. - if (spr->RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + if (RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) { mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); } - if (spr->RenderStyle.Flags & STYLEF_FadeToBlack) - { - if (invertcolormap) - { // Fade to white - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); - invertcolormap = false; - } - else - { // Fade to black - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); - } - } + bool isFullBright = !foggy && (renderflags & RF_FULLBRIGHT); + bool fadeToBlack = spr->RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0; - // get light level - if (invertcolormap) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); - } - if (fixedlightlev >= 0) - { - spr->BaseColormap = mybasecolormap; - spr->ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) - { // full bright - spr->BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - spr->ColormapNum = 0; - } - else - { // diminished light - int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(spr->foggy)); - spr->BaseColormap = mybasecolormap; - spr->ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); - } + int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(spr->foggy)); + + SetColormap(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade, mybasecolormap, isFullBright, invertcolormap, fadeToBlack); } } @@ -440,4 +408,46 @@ namespace swrenderer spr->BaseColormap = colormap; spr->ColormapNum = colormapnum; } + + void VisibleSprite::SetColormap(double visibility, int shade, FDynamicColormap *basecolormap, bool fullbright, bool invertColormap, bool fadeToBlack) + { + if (fadeToBlack) + { + if (invertColormap) // Fade to white + { + basecolormap = GetSpecialLights(basecolormap->Color, MAKERGB(255, 255, 255), basecolormap->Desaturate); + invertColormap = false; + } + else // Fade to black + { + basecolormap = GetSpecialLights(basecolormap->Color, MAKERGB(0, 0, 0), basecolormap->Desaturate); + } + } + + if (invertColormap) + { + basecolormap = GetSpecialLights(basecolormap->Color, basecolormap->Fade.InverseColor(), basecolormap->Desaturate); + } + + if (fixedcolormap) + { + BaseColormap = fixedcolormap; + ColormapNum = 0; + } + else if (fixedlightlev >= 0) + { + BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap; + ColormapNum = fixedlightlev >> COLORMAPSHIFT; + } + else if (fullbright) + { + BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap; + ColormapNum = 0; + } + else + { + BaseColormap = basecolormap; + ColormapNum = GETPALOOKUP(visibility, shade); + } + } } diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index 2339ca3825..73b84ebae8 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -37,6 +37,8 @@ namespace swrenderer float SortDist() const { return idepth; } protected: + void SetColormap(double visibility, int shade, FDynamicColormap *basecolormap, bool fullbright, bool invertColormap, bool fadeToBlack); + virtual bool IsParticle() const { return false; } virtual bool IsVoxel() const { return false; } virtual bool IsWallSprite() const { return false; } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index fa6f35e854..b34caf1e49 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -159,7 +159,7 @@ namespace swrenderer // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what // the invert overlay flag says to do. - INTBOOL invertcolormap = (vis->RenderStyle.Flags & STYLEF_InvertOverlay); + bool invertcolormap = (vis->RenderStyle.Flags & STYLEF_InvertOverlay) != 0; if (vis->RenderStyle.Flags & STYLEF_InvertSource) { @@ -178,48 +178,10 @@ namespace swrenderer mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); } - if (vis->RenderStyle.Flags & STYLEF_FadeToBlack) - { - if (invertcolormap) - { // Fade to white - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(255, 255, 255), mybasecolormap->Desaturate); - invertcolormap = false; - } - else - { // Fade to black - mybasecolormap = GetSpecialLights(mybasecolormap->Color, MAKERGB(0, 0, 0), mybasecolormap->Desaturate); - } - } + bool fullbright = !vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); + bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; - // get light level - if (fixedcolormap != nullptr) - { // fixed map - vis->BaseColormap = fixedcolormap; - vis->ColormapNum = 0; - } - else - { - if (invertcolormap) - { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, mybasecolormap->Fade.InverseColor(), mybasecolormap->Desaturate); - } - - if (fixedlightlev >= 0) - { - vis->BaseColormap = mybasecolormap; - vis->ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (!vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) - { // full bright - vis->BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; - vis->ColormapNum = 0; - } - else - { // diminished light - vis->ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->BaseColormap = mybasecolormap; - } - } + vis->SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, mybasecolormap, fullbright, invertcolormap, fadeToBlack); VisibleSpriteList::Instance()->Push(vis); RenderTranslucentPass::DrewAVoxel = true; diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 1a520cfe76..44477c1e14 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -129,11 +129,11 @@ namespace swrenderer vis->fakeceiling = NULL; //vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; vis->pic = pic; - vis->ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->BaseColormap = basecolormap; vis->wallc = wallc; vis->foggy = foggy; + vis->SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, false, false, false); + VisibleSpriteList::Instance()->Push(vis); } From e154ff888deef310a891c84e95cf79cd3b988e22 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 17 Jan 2017 01:43:45 +0100 Subject: [PATCH 728/912] Implement the todo that someone left in the source code --- src/swrenderer/things/r_sprite.cpp | 9 ++++----- src/swrenderer/things/r_voxel.cpp | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index e58e5b428e..6de827674a 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -216,21 +216,20 @@ namespace swrenderer if (vis->RenderStyle.Flags & STYLEF_InvertSource) invertcolormap = !invertcolormap; - FDynamicColormap *mybasecolormap = basecolormap; if (current_sector->sectornum != thing->Sector->sectornum) // compare sectornums to account for R_FakeFlat copies. { - // Todo: The actor is from a different sector so we have to retrieve the proper basecolormap for that sector. + basecolormap = thing->Sector->ColorMap; } - if (vis->RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + if (vis->RenderStyle == LegacyRenderStyles[STYLE_Add] && basecolormap->Fade != 0) { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); + basecolormap = GetSpecialLights(basecolormap->Color, 0, basecolormap->Desaturate); } bool fullbright = !vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; - vis->SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, mybasecolormap, fullbright, invertcolormap, fadeToBlack); + vis->SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); VisibleSpriteList::Instance()->Push(vis); } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index b34caf1e49..664df761ef 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -166,22 +166,21 @@ namespace swrenderer invertcolormap = !invertcolormap; } - FDynamicColormap *mybasecolormap = basecolormap; if (current_sector->sectornum != thing->Sector->sectornum) // compare sectornums to account for R_FakeFlat copies. { - // Todo: The actor is from a different sector so we have to retrieve the proper basecolormap for that sector. + basecolormap = thing->Sector->ColorMap; } // Sprites that are added to the scene must fade to black. - if (vis->RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0) + if (vis->RenderStyle == LegacyRenderStyles[STYLE_Add] && basecolormap->Fade != 0) { - mybasecolormap = GetSpecialLights(mybasecolormap->Color, 0, mybasecolormap->Desaturate); + basecolormap = GetSpecialLights(basecolormap->Color, 0, basecolormap->Desaturate); } bool fullbright = !vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; - vis->SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, mybasecolormap, fullbright, invertcolormap, fadeToBlack); + vis->SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); VisibleSpriteList::Instance()->Push(vis); RenderTranslucentPass::DrewAVoxel = true; From 8e72e094ce15ed3bbaaf62709a47eb0a8f6fcfbd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 17 Jan 2017 02:16:13 +0100 Subject: [PATCH 729/912] Fix floor brightness affects sprites not in sector --- src/swrenderer/scene/r_opaque_pass.cpp | 14 +++++++++++--- src/swrenderer/things/r_sprite.cpp | 5 ----- src/swrenderer/things/r_voxel.cpp | 5 ----- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 8c70ab1fed..5b064e4565 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -872,17 +872,25 @@ namespace swrenderer ThingSprite sprite; if (GetThingSprite(thing, sprite)) { + FDynamicColormap *thingColormap = basecolormap; + if (sec->sectornum != thing->Sector->sectornum) // compare sectornums to account for R_FakeFlat copies. + { + int lightlevel = thing->Sector->GetTexture(sector_t::ceiling) == skyflatnum ? thing->Sector->GetCeilingLight() : thing->Sector->GetFloorLight(); + spriteshade = LIGHT2SHADE(lightlevel + R_ActualExtraLight(foggy)); + thingColormap = thing->Sector->ColorMap; + } + if ((sprite.renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) { - RenderWallSprite::Project(thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, spriteshade, foggy, basecolormap); + RenderWallSprite::Project(thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, spriteshade, foggy, thingColormap); } else if (sprite.voxel) { - RenderVoxel::Project(thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade, foggy, basecolormap); + RenderVoxel::Project(thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade, foggy, thingColormap); } else { - RenderSprite::Project(thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade, foggy, basecolormap); + RenderSprite::Project(thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade, foggy, thingColormap); } } } diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 6de827674a..d237348f49 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -216,11 +216,6 @@ namespace swrenderer if (vis->RenderStyle.Flags & STYLEF_InvertSource) invertcolormap = !invertcolormap; - if (current_sector->sectornum != thing->Sector->sectornum) // compare sectornums to account for R_FakeFlat copies. - { - basecolormap = thing->Sector->ColorMap; - } - if (vis->RenderStyle == LegacyRenderStyles[STYLE_Add] && basecolormap->Fade != 0) { basecolormap = GetSpecialLights(basecolormap->Color, 0, basecolormap->Desaturate); diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 664df761ef..7706da69b2 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -166,11 +166,6 @@ namespace swrenderer invertcolormap = !invertcolormap; } - if (current_sector->sectornum != thing->Sector->sectornum) // compare sectornums to account for R_FakeFlat copies. - { - basecolormap = thing->Sector->ColorMap; - } - // Sprites that are added to the scene must fade to black. if (vis->RenderStyle == LegacyRenderStyles[STYLE_Add] && basecolormap->Fade != 0) { From cc0c0f0236a2c23e0d5851ef21067b6e0e3532c3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 17 Jan 2017 02:27:59 +0100 Subject: [PATCH 730/912] Fix not copying the shade variable first --- src/swrenderer/scene/r_opaque_pass.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 5b064e4565..2255266c96 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -873,24 +873,25 @@ namespace swrenderer if (GetThingSprite(thing, sprite)) { FDynamicColormap *thingColormap = basecolormap; + int thingShade = spriteshade; if (sec->sectornum != thing->Sector->sectornum) // compare sectornums to account for R_FakeFlat copies. { int lightlevel = thing->Sector->GetTexture(sector_t::ceiling) == skyflatnum ? thing->Sector->GetCeilingLight() : thing->Sector->GetFloorLight(); - spriteshade = LIGHT2SHADE(lightlevel + R_ActualExtraLight(foggy)); + thingShade = LIGHT2SHADE(lightlevel + R_ActualExtraLight(foggy)); thingColormap = thing->Sector->ColorMap; } if ((sprite.renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) { - RenderWallSprite::Project(thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, spriteshade, foggy, thingColormap); + RenderWallSprite::Project(thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, thingShade, foggy, thingColormap); } else if (sprite.voxel) { - RenderVoxel::Project(thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade, foggy, thingColormap); + RenderVoxel::Project(thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, thingShade, foggy, thingColormap); } else { - RenderSprite::Project(thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, spriteshade, foggy, thingColormap); + RenderSprite::Project(thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, thingShade, foggy, thingColormap); } } } From 9e0ae211978775aea5d9a6ca9553f46e88a80fd5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 17 Jan 2017 02:32:23 +0100 Subject: [PATCH 731/912] Move fakeceiling and fakefloor into the inner loop --- src/swrenderer/scene/r_opaque_pass.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 2255266c96..05ebc1b1aa 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -816,9 +816,6 @@ namespace swrenderer void RenderOpaquePass::AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside, bool foggy, FDynamicColormap *basecolormap) { - F3DFloor *fakeceiling = nullptr; - F3DFloor *fakefloor = nullptr; - // BSP is traversed by subsector. // A sector might have been split into several // subsectors during BSP building. @@ -850,6 +847,8 @@ namespace swrenderer } // find fake level + F3DFloor *fakeceiling = nullptr; + F3DFloor *fakefloor = nullptr; for (auto rover : thing->Sector->e->XFloor.ffloors) { if (!(rover->flags & FF_EXISTS) || !(rover->flags & FF_RENDERPLANES)) continue; @@ -895,9 +894,6 @@ namespace swrenderer } } } - - fakeceiling = nullptr; - fakefloor = nullptr; } } From 8788a9e78811ead83d142720b1b2469997bfe471 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 19 Jan 2017 00:02:51 +0100 Subject: [PATCH 732/912] - Change all sky drawing code use to use the sky drawers - Add support for drawing repeating skies in the sky drawers - Add the old 4 column sky fade optimization to the 1 column variant (fixes speed regression when 4col was removed) - Remove skyplane globals - Remove walldraw code used to draw the old skies --- src/swrenderer/drawers/r_draw.h | 4 +- src/swrenderer/drawers/r_draw_pal.cpp | 280 ++++++++++++++++++------ src/swrenderer/drawers/r_draw_pal.h | 7 +- src/swrenderer/drawers/r_draw_rgba.cpp | 2 +- src/swrenderer/drawers/r_draw_rgba.h | 6 +- src/swrenderer/line/r_walldraw.cpp | 156 ++++++------- src/swrenderer/line/r_walldraw.h | 3 +- src/swrenderer/plane/r_skyplane.cpp | 253 +-------------------- src/swrenderer/plane/r_skyplane.h | 45 ++-- src/swrenderer/plane/r_visibleplane.cpp | 3 +- 10 files changed, 314 insertions(+), 445 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index e6fa8ea4d0..8ce1d904fe 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -134,8 +134,8 @@ namespace swrenderer virtual void DrawWallAddClampColumn() = 0; virtual void DrawWallSubClampColumn() = 0; virtual void DrawWallRevSubClampColumn() = 0; - virtual void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) = 0; - virtual void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) = 0; + virtual void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; + virtual void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; virtual void DrawColumn() = 0; virtual void FillColumn() = 0; virtual void FillAddColumn() = 0; diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index b0938572d3..ef77ee1eb0 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -560,7 +560,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom) : solid_top(solid_top), solid_bottom(solid_bottom) + PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) : solid_top(solid_top), solid_bottom(solid_bottom), fadeSky(fadeSky) { using namespace drawerargs; @@ -588,7 +588,39 @@ namespace swrenderer int32_t frac = _texturefrac[0]; int32_t fracstep = _iscale[0]; + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: int start_fade = 2; // How fast it should fade out + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac) / fracstep; + int end_fadetop_y = (fade_length - frac) / fracstep; + int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep; + int end_fadebottom_y = ((2 << 24) - frac) / fracstep; + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * skipped; + fracstep *= num_cores; + pitch *= num_cores; + + if (!fadeSky) + { + count = thread->count_for_thread(_dest_y, count); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + dest += pitch; + frac += fracstep; + } + + return; + } int solid_top_r = RPART(solid_top); int solid_top_g = GPART(solid_top); @@ -596,49 +628,83 @@ namespace swrenderer int solid_bottom_r = RPART(solid_bottom); int solid_bottom_g = GPART(solid_bottom); int solid_bottom_b = BPART(solid_bottom); + uint8_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint8_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * skipped; - fracstep *= thread->num_cores; - pitch *= thread->num_cores; + int index = skipped; - for (int index = 0; index < count; index++) + // Top solid color: + while (index < start_fadetop_y) + { + *dest = solid_top_fill; + dest += pitch; + frac += fracstep; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) { uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; uint8_t fg = source0[sample_index]; + uint32_t c = palette[fg]; int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); - - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; - } + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + *dest = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; frac += fracstep; dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *dest = solid_bottom_fill; + dest += pitch; + index += num_cores; } } @@ -655,7 +721,46 @@ namespace swrenderer int32_t frac = _texturefrac[0]; int32_t fracstep = _iscale[0]; + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: int start_fade = 2; // How fast it should fade out + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac) / fracstep; + int end_fadetop_y = (fade_length - frac) / fracstep; + int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep; + int end_fadebottom_y = ((2 << 24) - frac) / fracstep; + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * skipped; + fracstep *= num_cores; + pitch *= num_cores; + + if (!fadeSky) + { + count = thread->count_for_thread(_dest_y, count); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + *dest = fg; + dest += pitch; + frac += fracstep; + } + + return; + } int solid_top_r = RPART(solid_top); int solid_top_g = GPART(solid_top); @@ -663,18 +768,24 @@ namespace swrenderer int solid_bottom_r = RPART(solid_bottom); int solid_bottom_g = GPART(solid_bottom); int solid_bottom_b = BPART(solid_bottom); + uint8_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint8_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * skipped; - fracstep *= thread->num_cores; - pitch *= thread->num_cores; + int index = skipped; - for (int index = 0; index < count; index++) + // Top solid color: + while (index < start_fadetop_y) + { + *dest = solid_top_fill; + dest += pitch; + frac += fracstep; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) { uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; uint8_t fg = source0[sample_index]; @@ -684,33 +795,72 @@ namespace swrenderer fg = source1[sample_index2]; } + uint32_t c = palette[fg]; int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); - - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; - } + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + *dest = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; frac += fracstep; dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + *dest = fg; + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *dest = solid_bottom_fill; + dest += pitch; + index += num_cores; } } diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index a96660f849..e2d33f63e4 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -42,12 +42,13 @@ namespace swrenderer class PalSkyCommand : public DrawerCommand { public: - PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom); + PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); FString DebugInfo() override { return "PalSkyCommand"; } protected: uint32_t solid_top; uint32_t solid_bottom; + bool fadeSky; uint8_t *_dest; int _count; @@ -249,8 +250,8 @@ namespace swrenderer void DrawWallAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } void DrawWallSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } void DrawWallRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } - void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } + void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom, fadeSky); } + void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom, fadeSky); } void DrawColumn() override { DrawerCommandQueue::QueueCommand(); } void FillColumn() override { DrawerCommandQueue::QueueCommand(); } void FillAddColumn() override { DrawerCommandQueue::QueueCommand(); } diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 434497f99f..a520d28ae1 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -310,7 +310,7 @@ namespace swrenderer return d; } - DrawSkyLLVMCommand::DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom) + DrawSkyLLVMCommand::DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) { using namespace drawerargs; diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index e1ed703505..0d40c63edc 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -157,7 +157,7 @@ namespace swrenderer WorkerThreadData ThreadData(DrawerThread *thread); public: - DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom); + DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); FString DebugInfo() override; }; @@ -353,8 +353,8 @@ namespace swrenderer void DrawWallAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } void DrawWallSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } void DrawWallRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } - void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } + void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom, skyFade); } + void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom, skyFade); } void DrawColumn() override { DrawerCommandQueue::QueueCommand(); } void FillColumn() override { DrawerCommandQueue::QueueCommand(); } void FillAddColumn() override { DrawerCommandQueue::QueueCommand(); } diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 391d54ce3b..43aaa38bcd 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -59,24 +59,7 @@ namespace swrenderer FTexture *rw_pic; } - static const uint8_t *R_GetColumn(FTexture *tex, int col) - { - int width; - - // If the texture's width isn't a power of 2, then we need to make it a - // positive offset for proper clamping. - if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) - { - col = width + (col % width); - } - - if (r_swtruecolor) - return (const uint8_t *)tex->GetColumnBgra(col, nullptr); - else - return tex->GetColumn(col, nullptr); - } - - WallSampler::WallSampler(int y1, double texturemid, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) + WallSampler::WallSampler(int y1, double texturemid, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture) { xoffset += FLOAT2FIXED(xmagnitude * 0.5); @@ -109,7 +92,21 @@ namespace swrenderer uv_max = 1; } - source = getcol(texture, xoffset >> FRACBITS); + int col = xoffset >> FRACBITS; + + // If the texture's width isn't a power of 2, then we need to make it a + // positive offset for proper clamping. + int width; + if (col < 0 && (width = texture->GetWidth()) != (1 << texture->WidthBits)) + { + col = width + (col % width); + } + + if (r_swtruecolor) + source = (const uint8_t *)texture->GetColumnBgra(col, nullptr); + else + source = texture->GetColumn(col, nullptr); + source2 = nullptr; texturefracx = 0; } @@ -134,64 +131,54 @@ namespace swrenderer uv_max = 0; // Texture mipmap and filter selection: - if (getcol != R_GetColumn) + double ymagnitude = fabs(uv_stepd); + double magnitude = MAX(ymagnitude, xmagnitude); + double min_lod = -1000.0; + double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); + bool magnifying = lod < 0.0f; + + int mipmap_offset = 0; + int mip_width = texture->GetWidth(); + int mip_height = texture->GetHeight(); + if (r_mipmap && texture->Mipmapped() && mip_width > 1 && mip_height > 1) { - source = getcol(texture, xoffset >> FRACBITS); + uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); + + int level = (int)lod; + while (level > 0 && mip_width > 1 && mip_height > 1) + { + mipmap_offset += mip_width * mip_height; + level--; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + xoffset = (xpos >> FRACBITS) * mip_width; + } + + const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + + bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); + if (filter_nearest) + { + int tx = (xoffset >> FRACBITS) % mip_width; + if (tx < 0) + tx += mip_width; + source = (BYTE*)(pixels + tx * mip_height); source2 = nullptr; - height = texture->GetHeight(); + height = mip_height; texturefracx = 0; } else { - double ymagnitude = fabs(uv_stepd); - double magnitude = MAX(ymagnitude, xmagnitude); - double min_lod = -1000.0; - double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); - bool magnifying = lod < 0.0f; - - int mipmap_offset = 0; - int mip_width = texture->GetWidth(); - int mip_height = texture->GetHeight(); - if (r_mipmap && texture->Mipmapped() && mip_width > 1 && mip_height > 1) - { - uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); - - int level = (int)lod; - while (level > 0 && mip_width > 1 && mip_height > 1) - { - mipmap_offset += mip_width * mip_height; - level--; - mip_width = MAX(mip_width >> 1, 1); - mip_height = MAX(mip_height >> 1, 1); - } - xoffset = (xpos >> FRACBITS) * mip_width; - } - - const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; - - bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); - if (filter_nearest) - { - int tx = (xoffset >> FRACBITS) % mip_width; - if (tx < 0) - tx += mip_width; - source = (BYTE*)(pixels + tx * mip_height); - source2 = nullptr; - height = mip_height; - texturefracx = 0; - } - else - { - xoffset -= FRACUNIT / 2; - int tx0 = (xoffset >> FRACBITS) % mip_width; - if (tx0 < 0) - tx0 += mip_width; - int tx1 = (tx0 + 1) % mip_width; - source = (BYTE*)(pixels + tx0 * mip_height); - source2 = (BYTE*)(pixels + tx1 * mip_height); - height = mip_height; - texturefracx = (xoffset >> (FRACBITS - 4)) & 15; - } + xoffset -= FRACUNIT / 2; + int tx0 = (xoffset >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + height = mip_height; + texturefracx = (xoffset >> (FRACBITS - 4)) & 15; } } } @@ -339,7 +326,7 @@ namespace swrenderer const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, - const BYTE *(*getcol)(FTexture *tex, int x), DrawerFunc drawcolumn) + DrawerFunc drawcolumn) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -394,41 +381,41 @@ namespace swrenderer if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); - WallSampler sampler(y1, texturemid, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); + WallSampler sampler(y1, texturemid, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic); Draw1Column(WallC, x, y1, y2, sampler, light_list, drawcolumn); } NetUpdate(); } - static void ProcessNormalWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessNormalWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list) { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol, &SWPixelFormatDrawers::DrawWallColumn); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, &SWPixelFormatDrawers::DrawWallColumn); } - static void ProcessMaskedWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessMaskedWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list) { if (!rw_pic->bMasked) // Textures that aren't masked can use the faster ProcessNormalWall. { - ProcessNormalWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol); + ProcessNormalWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); } else { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol, &SWPixelFormatDrawers::DrawWallMaskedColumn); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, &SWPixelFormatDrawers::DrawWallMaskedColumn); } } - static void ProcessTranslucentWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) + static void ProcessTranslucentWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list) { DrawerFunc drawcol1 = R_GetTransMaskDrawer(); if (drawcol1 == nullptr) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. - ProcessMaskedWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol); + ProcessMaskedWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); } else { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, getcol, drawcol1); + ProcessWallWorker(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, drawcol1); } } @@ -608,11 +595,4 @@ namespace swrenderer ProcessWall(frontsector, curline, WallC, x1, x2, walltop, wallbottom, texturemid, swall, lwall, yscale, wallshade, xoffset, light, lightstep, false, foggy, basecolormap, light_list); } } - - void R_DrawSkySegment(FTexture *pic, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const uint8_t *(*getcol)(FTexture *tex, int x)) - { - rw_pic = pic; - FWallCoords wallC; // Not used. To do: don't use r_walldraw to draw the sky!! - ProcessNormalWall(wallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, nullptr, getcol); - } } diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index f7debf908f..a74a21a896 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -25,7 +25,7 @@ namespace swrenderer struct WallSampler { WallSampler() { } - WallSampler(int y1, double texturemid, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + WallSampler(int y1, double texturemid, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture); uint32_t uv_pos; uint32_t uv_step; @@ -38,6 +38,5 @@ namespace swrenderer }; void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, double texturemid, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap); - void R_DrawSkySegment(FTexture *rw_pic, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, const uint8_t *(*getcol)(FTexture *tex, int col)); void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap); } diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index a4c8bec2cd..f2602eb1be 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -165,130 +165,6 @@ namespace swrenderer fixedcolormap = NULL; } - - // Get a column of sky when there is only one sky texture. - const uint8_t *RenderSkyPlane::GetOneSkyColumn(FTexture *fronttex, int x) - { - int tx; - if (r_linearsky) - { - angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); - angle_t column = (skyangle + xangle) ^ skyflip; - tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; - } - else - { - angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; - } - - if (!r_swtruecolor) - return fronttex->GetColumn(tx, NULL); - else - { - return (const uint8_t *)fronttex->GetColumnBgra(tx, NULL); - } - } - - // Get a column of sky when there are two overlapping sky textures - const uint8_t *RenderSkyPlane::GetTwoSkyColumns(FTexture *fronttex, int x) - { - uint32_t ang, angle1, angle2; - - if (r_linearsky) - { - angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); - ang = (skyangle + xangle) ^ skyflip; - } - else - { - ang = (skyangle + xtoviewangle[x]) ^ skyflip; - } - angle1 = (uint32_t)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); - angle2 = (uint32_t)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); - - // Check if this column has already been built. If so, there's - // no reason to waste time building it again. - uint32_t skycol = (angle1 << 16) | angle2; - int i; - - if (!r_swtruecolor) - { - for (i = 0; i < 4; ++i) - { - if (lastskycol[i] == skycol) - { - return skybuf[i]; - } - } - - lastskycol[skycolplace] = skycol; - uint8_t *composite = skybuf[skycolplace]; - skycolplace = (skycolplace + 1) & 3; - - // The ordering of the following code has been tuned to allow VC++ to optimize - // it well. In particular, this arrangement lets it keep count in a register - // instead of on the stack. - const uint8_t *front = fronttex->GetColumn(angle1, NULL); - const uint8_t *back = backskytex->GetColumn(angle2, NULL); - - int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); - i = 0; - do - { - if (front[i]) - { - composite[i] = front[i]; - } - else - { - composite[i] = back[i]; - } - } while (++i, --count); - return composite; - } - else - { - //return GetOneSkyColumn(fronttex, x); - for (i = skycolplace_bgra - 4; i < skycolplace_bgra; ++i) - { - int ic = (i % MAXSKYBUF); // i "checker" - can wrap around the ends of the array - if (lastskycol_bgra[ic] == skycol) - { - return (uint8_t*)(skybuf_bgra[ic]); - } - } - - lastskycol_bgra[skycolplace_bgra] = skycol; - uint32_t *composite = skybuf_bgra[skycolplace_bgra]; - skycolplace_bgra = (skycolplace_bgra + 1) % MAXSKYBUF; - - // The ordering of the following code has been tuned to allow VC++ to optimize - // it well. In particular, this arrangement lets it keep count in a register - // instead of on the stack. - const uint32_t *front = (const uint32_t *)fronttex->GetColumnBgra(angle1, NULL); - const uint32_t *back = (const uint32_t *)backskytex->GetColumnBgra(angle2, NULL); - - //[SP] Paletted version is used for comparison only - const uint8_t *frontcompare = fronttex->GetColumn(angle1, NULL); - - int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); - i = 0; - do - { - if (frontcompare[i]) - { - composite[i] = front[i]; - } - else - { - composite[i] = back[i]; - } - } while (++i, --count); - return (uint8_t*)composite; - } - } - void RenderSkyPlane::DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) { using namespace drawerargs; @@ -348,10 +224,12 @@ namespace swrenderer uint32_t solid_top = frontskytex->GetSkyCapColor(false); uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); + bool fadeSky = (r_skymode == 2 && !(level.flags & LEVEL_FORCETILEDSKY)); + if (!backskytex) - R_Drawers()->DrawSingleSkyColumn(solid_top, solid_bottom); + R_Drawers()->DrawSingleSkyColumn(solid_top, solid_bottom, fadeSky); else - R_Drawers()->DrawDoubleSkyColumn(solid_top, solid_bottom); + R_Drawers()->DrawDoubleSkyColumn(solid_top, solid_bottom, fadeSky); } void RenderSkyPlane::DrawSkyColumn(int start_x, int y1, int y2, int columns) @@ -374,7 +252,7 @@ namespace swrenderer } } - void RenderSkyPlane::DrawCapSky(visplane_t *pl) + void RenderSkyPlane::DrawSky(visplane_t *pl) { int x1 = pl->left; int x2 = pl->right; @@ -391,125 +269,4 @@ namespace swrenderer DrawSkyColumn(x, y1, y2, 1); } } - - void RenderSkyPlane::DrawSky(visplane_t *pl) - { - if (r_skymode == 2 && !(level.flags & LEVEL_FORCETILEDSKY)) - { - DrawCapSky(pl); - return; - } - - int x; - float swal; - - if (pl->left >= pl->right) - return; - - swal = skyiscale; - for (x = pl->left; x < pl->right; ++x) - { - swall[x] = swal; - } - - RenderPortal *renderportal = RenderPortal::Instance(); - - if (renderportal->MirrorFlags & RF_XFLIP) - { - for (x = pl->left; x < pl->right; ++x) - { - lwall[x] = (viewwidth - x) << FRACBITS; - } - } - else - { - for (x = pl->left; x < pl->right; ++x) - { - lwall[x] = x << FRACBITS; - } - } - - for (x = 0; x < 4; ++x) - { - lastskycol[x] = 0xffffffff; - lastskycol_bgra[x] = 0xffffffff; - } - - frontyScale = frontskytex->Scale.Y; - double texturemid = skymid * frontyScale; - - if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) - { // The texture tiles nicely - for (x = 0; x < 4; ++x) - { - lastskycol[x] = 0xffffffff; - lastskycol_bgra[x] = 0xffffffff; - } - R_DrawSkySegment(frontskytex, pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, texturemid, swall, lwall, - frontyScale, 0, 0, 0.0f, 0.0f, nullptr, backskytex == nullptr ? RenderSkyPlane::GetOneSkyColumn : RenderSkyPlane::GetTwoSkyColumns); - } - else - { // The texture does not tile nicely - frontyScale *= skyscale; - frontiScale = 1 / frontyScale; - DrawSkyStriped(pl); - } - } - - void RenderSkyPlane::DrawSkyStriped(visplane_t *pl) - { - short drawheight = short(frontskytex->GetHeight() * frontyScale); - double topfrac; - double iscale = frontiScale; - short top[MAXWIDTH], bot[MAXWIDTH]; - short yl, yh; - int x; - - topfrac = fmod(skymid + iscale * (1 - CenterY), frontskytex->GetHeight()); - if (topfrac < 0) topfrac += frontskytex->GetHeight(); - yl = 0; - yh = short((frontskytex->GetHeight() - topfrac) * frontyScale); - double texturemid = topfrac - iscale * (1 - CenterY); - - while (yl < viewheight) - { - for (x = pl->left; x < pl->right; ++x) - { - top[x] = MAX(yl, (short)pl->top[x]); - bot[x] = MIN(yh, (short)pl->bottom[x]); - } - for (x = 0; x < 4; ++x) - { - lastskycol[x] = 0xffffffff; - lastskycol_bgra[x] = 0xffffffff; - } - R_DrawSkySegment(frontskytex, pl->left, pl->right, top, bot, texturemid, swall, lwall, frontskytex->Scale.Y, 0, 0, 0.0f, 0.0f, nullptr, backskytex == nullptr ? RenderSkyPlane::GetOneSkyColumn : RenderSkyPlane::GetTwoSkyColumns); - yl = yh; - yh += drawheight; - texturemid = iscale * (centery - yl - 1); - } - } - - FTexture *RenderSkyPlane::frontskytex; - FTexture *RenderSkyPlane::backskytex; - angle_t RenderSkyPlane::skyflip; - int RenderSkyPlane::frontpos; - int RenderSkyPlane::backpos; - double RenderSkyPlane::frontyScale; - fixed_t RenderSkyPlane::frontcyl; - fixed_t RenderSkyPlane::backcyl; - double RenderSkyPlane::skymid; - angle_t RenderSkyPlane::skyangle; - double RenderSkyPlane::frontiScale; - - // Allow for layer skies up to 512 pixels tall. This is overkill, - // since the most anyone can ever see of the sky is 500 pixels. - // We need 4 skybufs because DrawSkySegment can draw up to 4 columns at a time. - // Need two versions - one for true color and one for palette - uint8_t RenderSkyPlane::skybuf[4][512]; - uint32_t RenderSkyPlane::skybuf_bgra[MAXSKYBUF][512]; - uint32_t RenderSkyPlane::lastskycol[4]; - uint32_t RenderSkyPlane::lastskycol_bgra[MAXSKYBUF]; - int RenderSkyPlane::skycolplace; - int RenderSkyPlane::skycolplace_bgra; } diff --git a/src/swrenderer/plane/r_skyplane.h b/src/swrenderer/plane/r_skyplane.h index 3acf4d53a6..8deacaf188 100644 --- a/src/swrenderer/plane/r_skyplane.h +++ b/src/swrenderer/plane/r_skyplane.h @@ -20,40 +20,21 @@ namespace swrenderer class RenderSkyPlane { public: - static void Render(visplane_t *pl); + void Render(visplane_t *pl); private: - static void DrawSky(visplane_t *pl); - static void DrawSkyStriped(visplane_t *pl); - static void DrawCapSky(visplane_t *pl); - static void DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat); - static void DrawSkyColumn(int start_x, int y1, int y2, int columns); + void DrawSky(visplane_t *pl); + void DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat); + void DrawSkyColumn(int start_x, int y1, int y2, int columns); - static const uint8_t *GetOneSkyColumn(FTexture *fronttex, int x); - static const uint8_t *GetTwoSkyColumns(FTexture *fronttex, int x); - - static FTexture *frontskytex; - static FTexture *backskytex; - static angle_t skyflip; - static int frontpos; - static int backpos; - static double frontyScale; - static fixed_t frontcyl; - static fixed_t backcyl; - static double skymid; - static angle_t skyangle; - static double frontiScale; - - // Allow for layer skies up to 512 pixels tall. This is overkill, - // since the most anyone can ever see of the sky is 500 pixels. - // We need 4 skybufs because R_DrawSkySegment can draw up to 4 columns at a time. - // Need two versions - one for true color and one for palette - enum { MAXSKYBUF = 3072 }; - static uint8_t skybuf[4][512]; - static uint32_t skybuf_bgra[MAXSKYBUF][512]; - static uint32_t lastskycol[4]; - static uint32_t lastskycol_bgra[MAXSKYBUF]; - static int skycolplace; - static int skycolplace_bgra; + FTexture *frontskytex = nullptr; + FTexture *backskytex = nullptr; + angle_t skyflip = 0; + int frontpos = 0; + int backpos = 0; + fixed_t frontcyl = 0; + fixed_t backcyl = 0; + double skymid = 0.0; + angle_t skyangle = 0; }; } diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index a1928bbb8e..a1d7dc8ddd 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -82,7 +82,8 @@ namespace swrenderer if (picnum == skyflatnum) // sky flat { - RenderSkyPlane::Render(this); + RenderSkyPlane renderer; + renderer.Render(this); } else // regular flat { From 111b5c54691f5886a7283fbbe1fe8cfe7ba97bbb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 19 Jan 2017 00:12:54 +0100 Subject: [PATCH 733/912] Add support for repeating skies in the TC sky drawer --- src/swrenderer/drawers/r_draw_rgba.cpp | 1 + src/swrenderer/drawers/r_drawers.h | 5 ++++ .../fixedfunction/drawskycodegen.cpp | 23 +++++++++++++++---- .../drawergen/fixedfunction/drawskycodegen.h | 4 +++- tools/drawergen/llvmdrawers.cpp | 2 +- 5 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index a520d28ae1..fcf94e5da1 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -329,6 +329,7 @@ namespace swrenderer args.textureheight1 = dc_wall_sourceheight[1]; args.top_color = solid_top; args.bottom_color = solid_bottom; + args.flags = fadeSky ? DrawSkyArgs::fade_sky : 0; DetectRangeError(args.dest, args.dest_y, args.count); } diff --git a/src/swrenderer/drawers/r_drawers.h b/src/swrenderer/drawers/r_drawers.h index 0492930d96..2d251478c6 100644 --- a/src/swrenderer/drawers/r_drawers.h +++ b/src/swrenderer/drawers/r_drawers.h @@ -200,6 +200,11 @@ struct DrawSkyArgs uint32_t textureheight1; uint32_t top_color; uint32_t bottom_color; + uint32_t flags; + enum Flags + { + fade_sky = 1 + }; FString ToString(); }; diff --git a/tools/drawergen/fixedfunction/drawskycodegen.cpp b/tools/drawergen/fixedfunction/drawskycodegen.cpp index 3bd23e5290..9fdd33a17c 100644 --- a/tools/drawergen/fixedfunction/drawskycodegen.cpp +++ b/tools/drawergen/fixedfunction/drawskycodegen.cpp @@ -47,12 +47,15 @@ void DrawSkyCodegen::Generate(DrawSkyVariant variant, SSAValue args, SSAValue th maxtextureheight1 = textureheight1 - 1; top_color = SSAVec4i::unpack(args[0][22].load(true)); bottom_color = SSAVec4i::unpack(args[0][23].load(true)); + SSAInt flags = args[0][24].load(true); thread.core = thread_data[0][0].load(true); thread.num_cores = thread_data[0][1].load(true); thread.pass_start_y = thread_data[0][2].load(true); thread.pass_end_y = thread_data[0][3].load(true); + is_fade_sky = (flags & DrawSkyArgs::fade_sky) == SSAInt(DrawSkyArgs::fade_sky); + count = count_for_thread(dest_y, count, thread); dest = dest_for_thread(dest_y, pitch, dest, thread); @@ -61,10 +64,15 @@ void DrawSkyCodegen::Generate(DrawSkyVariant variant, SSAValue args, SSAValue th stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); fracstep = iscale * thread.num_cores; - Loop(variant); + SSAIfBlock branch; + branch.if_block(is_fade_sky); + Loop(variant, true); + branch.else_block(); + Loop(variant, false); + branch.end_block(); } -void DrawSkyCodegen::Loop(DrawSkyVariant variant) +void DrawSkyCodegen::Loop(DrawSkyVariant variant, bool fade_sky) { stack_index.store(SSAInt(0)); { @@ -76,8 +84,15 @@ void DrawSkyCodegen::Loop(DrawSkyVariant variant) SSAInt offset = index * pitch * 4; - SSAVec4i color = FadeOut(frac, Sample(frac, variant)); - dest[offset].store_vec4ub(color); + if (fade_sky) + { + SSAVec4i color = FadeOut(frac, Sample(frac, variant)); + dest[offset].store_vec4ub(color); + } + else + { + dest[offset].store_vec4ub(Sample(frac, variant)); + } stack_index.store(index.add(SSAInt(1), true, true)); stack_frac.store(frac + fracstep); diff --git a/tools/drawergen/fixedfunction/drawskycodegen.h b/tools/drawergen/fixedfunction/drawskycodegen.h index 6cd700203f..463c8ca232 100644 --- a/tools/drawergen/fixedfunction/drawskycodegen.h +++ b/tools/drawergen/fixedfunction/drawskycodegen.h @@ -36,7 +36,7 @@ public: void Generate(DrawSkyVariant variant, SSAValue args, SSAValue thread_data); private: - void Loop(DrawSkyVariant variant); + void Loop(DrawSkyVariant variant, bool fade_sky); SSAVec4i Sample(SSAInt frac, DrawSkyVariant variant); SSAVec4i FadeOut(SSAInt frac, SSAVec4i color); @@ -57,4 +57,6 @@ private: SSAWorkerThread thread; SSAInt fracstep; + + SSABool is_fade_sky; }; diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index 995b5b8496..7822597020 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -280,7 +280,7 @@ llvm::Type *LLVMDrawers::GetDrawSkyArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt8PtrTy(context)); for (int i = 0; i < 8; i++) elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 15; i++) + for (int i = 0; i < 16; i++) elements.push_back(llvm::Type::getInt32Ty(context)); DrawSkyArgsStruct = llvm::StructType::create(context, elements, "DrawSkyArgs", false)->getPointerTo(); return DrawSkyArgsStruct; From a92771431bb8189edc82d23f0e721f82ee7aa13c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 19 Jan 2017 01:47:58 +0100 Subject: [PATCH 734/912] Changed visible plane list to use the shared frame memory allocator instead of using its own internal free list --- src/swrenderer/plane/r_visibleplane.cpp | 12 +++ src/swrenderer/plane/r_visibleplane.h | 48 +++++----- src/swrenderer/plane/r_visibleplanelist.cpp | 97 +++------------------ src/swrenderer/plane/r_visibleplanelist.h | 4 - src/swrenderer/scene/r_portal.cpp | 9 +- src/swrenderer/scene/r_scene.cpp | 3 - 6 files changed, 49 insertions(+), 124 deletions(-) diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index a1d7dc8ddd..755af34615 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -43,6 +43,18 @@ CVAR(Bool, tilt, false, 0); namespace swrenderer { + visplane_t::visplane_t() + { + picnum.SetNull(); + height.set(0.0, 0.0, 1.0, 0.0); + + bottom = RenderMemory::AllocMemory(viewwidth); + top = RenderMemory::AllocMemory(viewwidth); + + fillshort(bottom, viewwidth, 0); + fillshort(top, viewwidth, 0x7fff); + } + void visplane_t::AddLights(FLightNode *node) { if (!r_dynlights) diff --git a/src/swrenderer/plane/r_visibleplane.h b/src/swrenderer/plane/r_visibleplane.h index f905a47588..6c06fc8020 100644 --- a/src/swrenderer/plane/r_visibleplane.h +++ b/src/swrenderer/plane/r_visibleplane.h @@ -15,6 +15,8 @@ #include #include "r_defs.h" +#include "r_state.h" +#include "swrenderer/r_memory.h" class ADynamicLight; struct FLightNode; @@ -31,40 +33,42 @@ namespace swrenderer struct visplane_t { - visplane_t *next; // Next visplane in hash chain -- killough + visplane_t(); - FDynamicColormap *colormap; // [RH] Support multiple colormaps - FSectorPortal *portal; // [RH] Support sky boxes - visplane_light *lights; + void AddLights(FLightNode *node); + void Render(fixed_t alpha, bool additive, bool masked); + + visplane_t *next = nullptr; // Next visplane in hash chain -- killough + + FDynamicColormap *colormap = nullptr; // [RH] Support multiple colormaps + FSectorPortal *portal = nullptr; // [RH] Support sky boxes + visplane_light *lights = nullptr; FTransform xform; secplane_t height; FTextureID picnum; - int lightlevel; - int left, right; - int sky; + int lightlevel = 0; + int left = viewwidth; + int right = 0; + int sky = 0; // [RH] This set of variables copies information from the time when the // visplane is created. They are only used by stacks so that you can // have stacked sectors inside a skybox. If the visplane is not for a // stack, then they are unused. - int extralight; - double visibility; - DVector3 viewpos; - DAngle viewangle; - fixed_t Alpha; - bool Additive; + int extralight = 0; + double visibility = 0.0; + DVector3 viewpos = { 0.0, 0.0, 0.0 }; + DAngle viewangle = { 0.0 }; + fixed_t Alpha = 0; + bool Additive = false; // kg3D - keep track of mirror and skybox owner - int CurrentSkybox; - int CurrentPortalUniq; // mirror counter, counts all of them - int MirrorFlags; // this is not related to CurrentMirror + int CurrentSkybox = 0; + int CurrentPortalUniq = 0; // mirror counter, counts all of them + int MirrorFlags = 0; // this is not related to CurrentMirror - unsigned short *bottom; // [RH] bottom and top arrays are dynamically - unsigned short pad; // allocated immediately after the - unsigned short top[]; // visplane. - - void AddLights(FLightNode *node); - void Render(fixed_t alpha, bool additive, bool masked); + uint16_t *bottom = nullptr; + uint16_t *top = nullptr; }; } diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp index 7cdfdc4254..b6e6de1cbc 100644 --- a/src/swrenderer/plane/r_visibleplanelist.cpp +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -54,87 +54,22 @@ namespace swrenderer { for (auto &plane : visplanes) plane = nullptr; - freehead = &freetail; - } - - void VisiblePlaneList::Deinit() - { - // do not use R_ClearPlanes because at this point the screen pointer is no longer valid. - for (int i = 0; i <= MAXVISPLANES; i++) // new code -- killough - { - for (*freehead = visplanes[i], visplanes[i] = nullptr; *freehead; ) - { - freehead = &(*freehead)->next; - } - } - for (visplane_t *pl = freetail; pl != nullptr; ) - { - visplane_t *next = pl->next; - free(pl); - pl = next; - } } visplane_t *VisiblePlaneList::Add(unsigned hash) { - visplane_t *check = freetail; - - if (check == nullptr) - { - check = (visplane_t *)M_Malloc(sizeof(*check) + 3 + sizeof(*check->top)*(MAXWIDTH * 2)); - memset(check, 0, sizeof(*check) + 3 + sizeof(*check->top)*(MAXWIDTH * 2)); - check->bottom = check->top + MAXWIDTH + 2; - } - else if (nullptr == (freetail = freetail->next)) - { - freehead = &freetail; - } - - check->lights = nullptr; - - check->next = visplanes[hash]; - visplanes[hash] = check; - return check; - } - - void VisiblePlaneList::Init() - { - int i; - visplane_t *pl; - - // Free all visplanes and let them be re-allocated as needed. - pl = freetail; - - while (pl) - { - visplane_t *next = pl->next; - M_Free(pl); - pl = next; - } - freetail = nullptr; - freehead = &freetail; - - for (i = 0; i < MAXVISPLANES; i++) - { - pl = visplanes[i]; - visplanes[i] = nullptr; - while (pl) - { - visplane_t *next = pl->next; - M_Free(pl); - pl = next; - } - } + visplane_t *newplane = RenderMemory::NewObject(); + newplane->next = visplanes[hash]; + visplanes[hash] = newplane; + return newplane; } void VisiblePlaneList::Clear(bool fullclear) { - int i; - // Don't clear fake planes if not doing a full clear. if (!fullclear) { - for (i = 0; i <= MAXVISPLANES - 1; i++) // new code -- killough + for (int i = 0; i <= MAXVISPLANES - 1; i++) { for (visplane_t **probe = &visplanes[i]; *probe != nullptr; ) { @@ -143,25 +78,18 @@ namespace swrenderer probe = &(*probe)->next; } else - { // not fake: move to freelist + { // not fake: move from list visplane_t *vis = *probe; - *freehead = vis; *probe = vis->next; vis->next = nullptr; - freehead = &vis->next; } } } } else { - for (i = 0; i <= MAXVISPLANES; i++) // new code -- killough - { - for (*freehead = visplanes[i], visplanes[i] = nullptr; *freehead; ) - { - freehead = &(*freehead)->next; - } - } + for (int i = 0; i <= MAXVISPLANES; i++) + visplanes[i] = nullptr; } } @@ -282,8 +210,6 @@ namespace swrenderer check->colormap = basecolormap; // [RH] Save colormap check->sky = sky; check->portal = portal; - check->left = viewwidth; // Was SCREENWIDTH -- killough 11/98 - check->right = 0; check->extralight = renderportal->stacked_extralight; check->visibility = renderportal->stacked_visibility; check->viewpos = renderportal->stacked_viewpos; @@ -294,8 +220,6 @@ namespace swrenderer check->MirrorFlags = renderportal->MirrorFlags; check->CurrentSkybox = Clip3DFloors::Instance()->CurrentSkybox; - fillshort(check->top, viewwidth, 0x7fff); - return check; } @@ -330,8 +254,8 @@ namespace swrenderer intrh = stop; } - for (x = intrl; x < intrh && pl->top[x] == 0x7fff; x++) - ; + x = intrl; + while (x < intrh && pl->top[x] == 0x7fff) x++; if (x >= intrh) { @@ -374,7 +298,6 @@ namespace swrenderer pl = new_pl; pl->left = start; pl->right = stop; - fillshort(pl->top, viewwidth, 0x7fff); } return pl; } diff --git a/src/swrenderer/plane/r_visibleplanelist.h b/src/swrenderer/plane/r_visibleplanelist.h index a38fe2ffd9..46197c99c4 100644 --- a/src/swrenderer/plane/r_visibleplanelist.h +++ b/src/swrenderer/plane/r_visibleplanelist.h @@ -27,8 +27,6 @@ namespace swrenderer public: static VisiblePlaneList *Instance(); - void Init(); - void Deinit(); void Clear(bool fullclear); visplane_t *FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal, FDynamicColormap *basecolormap); @@ -39,8 +37,6 @@ namespace swrenderer enum { MAXVISPLANES = 128 }; // must be a power of 2 visplane_t *visplanes[MAXVISPLANES + 1]; - visplane_t *freetail = nullptr; - visplane_t **freehead = nullptr; private: VisiblePlaneList(); diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 2ab01b0bb0..0111d74bee 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -123,8 +123,6 @@ namespace swrenderer if (pl->right < pl->left || !r_skyboxes || numskyboxes == MAX_SKYBOX_PLANES || pl->portal == nullptr) { pl->Render(OPAQUE, false, false); - *planes->freehead = pl; - planes->freehead = &pl->next; continue; } @@ -164,8 +162,6 @@ namespace swrenderer default: pl->Render(OPAQUE, false, false); - *planes->freehead = pl; - planes->freehead = &pl->next; numskyboxes--; continue; } @@ -262,8 +258,6 @@ namespace swrenderer { pl->Render(pl->Alpha, pl->Additive, true); } - *planes->freehead = pl; - planes->freehead = &pl->next; } firstdrawseg = drawsegs; ds_p = drawsegs + savedds_p; @@ -283,8 +277,7 @@ namespace swrenderer if (Clip3DFloors::Instance()->fakeActive) return; - for (*planes->freehead = planes->visplanes[VisiblePlaneList::MAXVISPLANES], planes->visplanes[VisiblePlaneList::MAXVISPLANES] = nullptr; *planes->freehead; ) - planes->freehead = &(*planes->freehead)->next; + planes->visplanes[VisiblePlaneList::MAXVISPLANES] = nullptr; } void RenderPortal::RenderLinePortals() diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index dfbf63a96e..451bd64989 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -246,8 +246,6 @@ namespace swrenderer void RenderScene::ScreenResized() { - VisiblePlaneList::Instance()->Init(); - RenderTarget = screen; int width = SCREENWIDTH; int height = SCREENHEIGHT; @@ -271,7 +269,6 @@ namespace swrenderer void RenderScene::Deinit() { RenderTranslucentPass::Deinit(); - VisiblePlaneList::Instance()->Deinit(); Clip3DFloors::Instance()->Cleanup(); R_FreeDrawSegs(); } From 9eef7f9b323ae4a1e5423720d6d01774358c09ba Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 19 Jan 2017 03:02:32 +0100 Subject: [PATCH 735/912] Make visplanes hash list private --- src/swrenderer/plane/r_visibleplanelist.cpp | 21 +++++++++++++++++++++ src/swrenderer/plane/r_visibleplanelist.h | 10 +++++++--- src/swrenderer/scene/r_portal.cpp | 18 +++++------------- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp index b6e6de1cbc..d9849f2cc6 100644 --- a/src/swrenderer/plane/r_visibleplanelist.cpp +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -302,6 +302,27 @@ namespace swrenderer return pl; } + bool VisiblePlaneList::HasPortalPlanes() const + { + return visplanes[MAXVISPLANES] != nullptr; + } + + visplane_t *VisiblePlaneList::PopFirstPortalPlane() + { + visplane_t *pl = visplanes[VisiblePlaneList::MAXVISPLANES]; + if (pl) + { + visplanes[VisiblePlaneList::MAXVISPLANES] = pl->next; + pl->next = nullptr; + } + return pl; + } + + void VisiblePlaneList::ClearPortalPlanes() + { + visplanes[VisiblePlaneList::MAXVISPLANES] = nullptr; + } + int VisiblePlaneList::Render() { visplane_t *pl; diff --git a/src/swrenderer/plane/r_visibleplanelist.h b/src/swrenderer/plane/r_visibleplanelist.h index 46197c99c4..64266c80de 100644 --- a/src/swrenderer/plane/r_visibleplanelist.h +++ b/src/swrenderer/plane/r_visibleplanelist.h @@ -32,16 +32,20 @@ namespace swrenderer visplane_t *FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal, FDynamicColormap *basecolormap); visplane_t *GetRange(visplane_t *pl, int start, int stop); + bool HasPortalPlanes() const; + visplane_t *PopFirstPortalPlane(); + void ClearPortalPlanes(); + int Render(); void RenderHeight(double height); - enum { MAXVISPLANES = 128 }; // must be a power of 2 - visplane_t *visplanes[MAXVISPLANES + 1]; - private: VisiblePlaneList(); visplane_t *Add(unsigned hash); + enum { MAXVISPLANES = 128 }; // must be a power of 2 + visplane_t *visplanes[MAXVISPLANES + 1]; + static unsigned CalcHash(int picnum, int lightlevel, const secplane_t &height) { return (unsigned)((picnum) * 3 + (lightlevel)+(FLOAT2FIXED((height).fD())) * 7) & (MAXVISPLANES - 1); } }; } diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 0111d74bee..d05a8c4cae 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -94,7 +94,7 @@ namespace swrenderer VisiblePlaneList *planes = VisiblePlaneList::Instance(); - if (planes->visplanes[VisiblePlaneList::MAXVISPLANES] == nullptr) + if (!planes->HasPortalPlanes()) return; Clip3DFloors::Instance()->EnterSkybox(); @@ -109,17 +109,8 @@ namespace swrenderer AActor *savedcamera = camera; sector_t *savedsector = viewsector; - int i; - visplane_t *pl; - - for (pl = planes->visplanes[VisiblePlaneList::MAXVISPLANES]; pl != nullptr; pl = planes->visplanes[VisiblePlaneList::MAXVISPLANES]) + for (visplane_t *pl = planes->PopFirstPortalPlane(); pl != nullptr; pl = planes->PopFirstPortalPlane()) { - // Pop the visplane off the list now so that if this skybox adds more - // skyboxes to the list, they will be drawn instead of skipped (because - // new skyboxes go to the beginning of the list instead of the end). - planes->visplanes[VisiblePlaneList::MAXVISPLANES] = pl->next; - pl->next = nullptr; - if (pl->right < pl->left || !r_skyboxes || numskyboxes == MAX_SKYBOX_PLANES || pl->portal == nullptr) { pl->Render(OPAQUE, false, false); @@ -181,7 +172,7 @@ namespace swrenderer auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; auto floorclip = RenderOpaquePass::Instance()->floorclip; - for (i = pl->left; i < pl->right; i++) + for (int i = pl->left; i < pl->right; i++) { if (pl->top[i] == 0x7fff) { @@ -253,6 +244,7 @@ namespace swrenderer VisibleSpriteList::Instance()->PopPortal(); + visplane_t *pl; visplaneStack.Pop(pl); if (pl->Alpha > 0 && pl->picnum != skyflatnum) { @@ -277,7 +269,7 @@ namespace swrenderer if (Clip3DFloors::Instance()->fakeActive) return; - planes->visplanes[VisiblePlaneList::MAXVISPLANES] = nullptr; + planes->ClearPortalPlanes(); } void RenderPortal::RenderLinePortals() From e94cb3f11421599b4981d5d522fa31f3b54db6c0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 19 Jan 2017 03:11:49 +0100 Subject: [PATCH 736/912] Rename visplane_t to VisiblePlane --- src/swrenderer/line/r_line.cpp | 2 +- src/swrenderer/line/r_line.h | 8 +++---- src/swrenderer/plane/r_flatplane.cpp | 6 +++--- src/swrenderer/plane/r_flatplane.h | 8 +++---- src/swrenderer/plane/r_planerenderer.cpp | 2 +- src/swrenderer/plane/r_planerenderer.h | 4 ++-- src/swrenderer/plane/r_skyplane.cpp | 4 ++-- src/swrenderer/plane/r_skyplane.h | 4 ++-- src/swrenderer/plane/r_slopeplane.cpp | 2 +- src/swrenderer/plane/r_slopeplane.h | 2 +- src/swrenderer/plane/r_visibleplane.cpp | 10 ++++----- src/swrenderer/plane/r_visibleplane.h | 12 +++++------ src/swrenderer/plane/r_visibleplanelist.cpp | 24 ++++++++++----------- src/swrenderer/plane/r_visibleplanelist.h | 12 +++++------ src/swrenderer/scene/r_opaque_pass.cpp | 10 ++++----- src/swrenderer/scene/r_opaque_pass.h | 4 ++-- src/swrenderer/scene/r_portal.cpp | 4 ++-- src/swrenderer/scene/r_portal.h | 4 ++-- 18 files changed, 61 insertions(+), 61 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 51eeb76ff5..95adff8aa2 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -56,7 +56,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void SWRenderLine::Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, visplane_t *linefloorplane, visplane_t *lineceilingplane, bool infog, FDynamicColormap *colormap) + void SWRenderLine::Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, VisiblePlane *linefloorplane, VisiblePlane *lineceilingplane, bool infog, FDynamicColormap *colormap) { static sector_t tempsec; // killough 3/8/98: ceiling/water hack bool solid; diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index c1ee23898d..cb48eb4bbd 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -24,7 +24,7 @@ struct FDynamicColormap; namespace swrenderer { - struct visplane_t; + struct VisiblePlane; struct FWallCoords { @@ -49,7 +49,7 @@ namespace swrenderer class SWRenderLine { public: - void Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, visplane_t *floorplane, visplane_t *ceilingplane, bool foggy, FDynamicColormap *basecolormap); + void Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, VisiblePlane *floorplane, VisiblePlane *ceilingplane, bool foggy, FDynamicColormap *basecolormap); private: bool RenderWallSegment(int x1, int x2); @@ -62,8 +62,8 @@ namespace swrenderer subsector_t *InSubsector; sector_t *frontsector; sector_t *backsector; - visplane_t *floorplane; - visplane_t *ceilingplane; + VisiblePlane *floorplane; + VisiblePlane *ceilingplane; seg_t *curline; side_t *sidedef; diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 4c020468e1..72e25dc1e7 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -44,7 +44,7 @@ namespace swrenderer { - void RenderFlatPlane::Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap) + void RenderFlatPlane::Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap) { using namespace drawerargs; @@ -260,7 +260,7 @@ namespace swrenderer // Setup lights for row dc_num_lights = 0; dc_lights = lightbuffer + nextlightindex; - visplane_light *cur_node = light_list; + VisiblePlaneLight *cur_node = light_list; while (cur_node && nextlightindex < 64 * 1024) { double lightX = cur_node->lightsource->X() - ViewPos.X; @@ -363,7 +363,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - void RenderColoredPlane::Render(visplane_t *pl) + void RenderColoredPlane::Render(VisiblePlane *pl) { RenderLines(pl); } diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index c4077a908c..249fe7f724 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -17,12 +17,12 @@ namespace swrenderer { - struct visplane_light; + struct VisiblePlaneLight; class RenderFlatPlane : PlaneRenderer { public: - void Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap); + void Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap); static void SetupSlope(); @@ -39,7 +39,7 @@ namespace swrenderer fixed_t xscale, yscale; double xstepscale, ystepscale; double basexfrac, baseyfrac; - visplane_light *light_list; + VisiblePlaneLight *light_list; static float yslope[MAXHEIGHT]; }; @@ -47,7 +47,7 @@ namespace swrenderer class RenderColoredPlane : PlaneRenderer { public: - void Render(visplane_t *pl); + void Render(VisiblePlane *pl); private: void RenderLine(int y, int x1, int x2) override; diff --git a/src/swrenderer/plane/r_planerenderer.cpp b/src/swrenderer/plane/r_planerenderer.cpp index 0a6d511bf0..a3f47c5eac 100644 --- a/src/swrenderer/plane/r_planerenderer.cpp +++ b/src/swrenderer/plane/r_planerenderer.cpp @@ -33,7 +33,7 @@ namespace swrenderer { - void PlaneRenderer::RenderLines(visplane_t *pl) + void PlaneRenderer::RenderLines(VisiblePlane *pl) { // t1/b1 are at x // t2/b2 are at x+1 diff --git a/src/swrenderer/plane/r_planerenderer.h b/src/swrenderer/plane/r_planerenderer.h index a13fb14e20..97bc17f2b9 100644 --- a/src/swrenderer/plane/r_planerenderer.h +++ b/src/swrenderer/plane/r_planerenderer.h @@ -18,12 +18,12 @@ namespace swrenderer { - struct visplane_t; + struct VisiblePlane; class PlaneRenderer { public: - void RenderLines(visplane_t *pl); + void RenderLines(VisiblePlane *pl); virtual void RenderLine(int y, int x1, int x2) = 0; virtual void StepColumn() { } diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index f2602eb1be..451d894b6b 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -49,7 +49,7 @@ EXTERN_CVAR(Int, r_skymode) namespace swrenderer { - void RenderSkyPlane::Render(visplane_t *pl) + void RenderSkyPlane::Render(VisiblePlane *pl) { FTextureID sky1tex, sky2tex; double frontdpos = 0, backdpos = 0; @@ -252,7 +252,7 @@ namespace swrenderer } } - void RenderSkyPlane::DrawSky(visplane_t *pl) + void RenderSkyPlane::DrawSky(VisiblePlane *pl) { int x1 = pl->left; int x2 = pl->right; diff --git a/src/swrenderer/plane/r_skyplane.h b/src/swrenderer/plane/r_skyplane.h index 8deacaf188..8ea638a27b 100644 --- a/src/swrenderer/plane/r_skyplane.h +++ b/src/swrenderer/plane/r_skyplane.h @@ -20,10 +20,10 @@ namespace swrenderer class RenderSkyPlane { public: - void Render(visplane_t *pl); + void Render(VisiblePlane *pl); private: - void DrawSky(visplane_t *pl); + void DrawSky(VisiblePlane *pl); void DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat); void DrawSkyColumn(int start_x, int y1, int y2, int columns); diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 9857dc11bb..115b689bae 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -48,7 +48,7 @@ namespace swrenderer { - void RenderSlopePlane::Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap) + void RenderSlopePlane::Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap) { using namespace drawerargs; diff --git a/src/swrenderer/plane/r_slopeplane.h b/src/swrenderer/plane/r_slopeplane.h index 7dc6e7838f..253e3b7cc3 100644 --- a/src/swrenderer/plane/r_slopeplane.h +++ b/src/swrenderer/plane/r_slopeplane.h @@ -20,7 +20,7 @@ namespace swrenderer class RenderSlopePlane : PlaneRenderer { public: - void Render(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap); + void Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap); private: void RenderLine(int y, int x1, int x2) override; diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index 755af34615..3a7df8300d 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -43,7 +43,7 @@ CVAR(Bool, tilt, false, 0); namespace swrenderer { - visplane_t::visplane_t() + VisiblePlane::VisiblePlane() { picnum.SetNull(); height.set(0.0, 0.0, 1.0, 0.0); @@ -55,7 +55,7 @@ namespace swrenderer fillshort(top, viewwidth, 0x7fff); } - void visplane_t::AddLights(FLightNode *node) + void VisiblePlane::AddLights(FLightNode *node) { if (!r_dynlights) return; @@ -65,7 +65,7 @@ namespace swrenderer if (!(node->lightsource->flags2&MF2_DORMANT)) { bool found = false; - visplane_light *light_node = lights; + VisiblePlaneLight *light_node = lights; while (light_node) { if (light_node->lightsource == node->lightsource) @@ -77,7 +77,7 @@ namespace swrenderer } if (!found) { - visplane_light *newlight = RenderMemory::NewObject(); + VisiblePlaneLight *newlight = RenderMemory::NewObject(); newlight->next = lights; newlight->lightsource = node->lightsource; lights = newlight; @@ -87,7 +87,7 @@ namespace swrenderer } } - void visplane_t::Render(fixed_t alpha, bool additive, bool masked) + void VisiblePlane::Render(fixed_t alpha, bool additive, bool masked) { if (left >= right) return; diff --git a/src/swrenderer/plane/r_visibleplane.h b/src/swrenderer/plane/r_visibleplane.h index 6c06fc8020..d26f0fdfd1 100644 --- a/src/swrenderer/plane/r_visibleplane.h +++ b/src/swrenderer/plane/r_visibleplane.h @@ -25,24 +25,24 @@ struct FSectorPortal; namespace swrenderer { - struct visplane_light + struct VisiblePlaneLight { ADynamicLight *lightsource; - visplane_light *next; + VisiblePlaneLight *next; }; - struct visplane_t + struct VisiblePlane { - visplane_t(); + VisiblePlane(); void AddLights(FLightNode *node); void Render(fixed_t alpha, bool additive, bool masked); - visplane_t *next = nullptr; // Next visplane in hash chain -- killough + VisiblePlane *next = nullptr; // Next visplane in hash chain -- killough FDynamicColormap *colormap = nullptr; // [RH] Support multiple colormaps FSectorPortal *portal = nullptr; // [RH] Support sky boxes - visplane_light *lights = nullptr; + VisiblePlaneLight *lights = nullptr; FTransform xform; secplane_t height; diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp index d9849f2cc6..966329e974 100644 --- a/src/swrenderer/plane/r_visibleplanelist.cpp +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -56,9 +56,9 @@ namespace swrenderer plane = nullptr; } - visplane_t *VisiblePlaneList::Add(unsigned hash) + VisiblePlane *VisiblePlaneList::Add(unsigned hash) { - visplane_t *newplane = RenderMemory::NewObject(); + VisiblePlane *newplane = RenderMemory::NewObject(); newplane->next = visplanes[hash]; visplanes[hash] = newplane; return newplane; @@ -71,7 +71,7 @@ namespace swrenderer { for (int i = 0; i <= MAXVISPLANES - 1; i++) { - for (visplane_t **probe = &visplanes[i]; *probe != nullptr; ) + for (VisiblePlane **probe = &visplanes[i]; *probe != nullptr; ) { if ((*probe)->sky < 0) { // fake: move past it @@ -79,7 +79,7 @@ namespace swrenderer } else { // not fake: move from list - visplane_t *vis = *probe; + VisiblePlane *vis = *probe; *probe = vis->next; vis->next = nullptr; } @@ -93,10 +93,10 @@ namespace swrenderer } } - visplane_t *VisiblePlaneList::FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal, FDynamicColormap *basecolormap) + VisiblePlane *VisiblePlaneList::FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal, FDynamicColormap *basecolormap) { secplane_t plane; - visplane_t *check; + VisiblePlane *check; unsigned hash; // killough bool isskybox; const FTransform *xform = &xxform; @@ -223,7 +223,7 @@ namespace swrenderer return check; } - visplane_t *VisiblePlaneList::GetRange(visplane_t *pl, int start, int stop) + VisiblePlane *VisiblePlaneList::GetRange(VisiblePlane *pl, int start, int stop) { int intrl, intrh; int unionl, unionh; @@ -276,7 +276,7 @@ namespace swrenderer { hash = CalcHash(pl->picnum.GetIndex(), pl->lightlevel, pl->height); } - visplane_t *new_pl = Add(hash); + VisiblePlane *new_pl = Add(hash); new_pl->height = pl->height; new_pl->picnum = pl->picnum; @@ -307,9 +307,9 @@ namespace swrenderer return visplanes[MAXVISPLANES] != nullptr; } - visplane_t *VisiblePlaneList::PopFirstPortalPlane() + VisiblePlane *VisiblePlaneList::PopFirstPortalPlane() { - visplane_t *pl = visplanes[VisiblePlaneList::MAXVISPLANES]; + VisiblePlane *pl = visplanes[VisiblePlaneList::MAXVISPLANES]; if (pl) { visplanes[VisiblePlaneList::MAXVISPLANES] = pl->next; @@ -325,7 +325,7 @@ namespace swrenderer int VisiblePlaneList::Render() { - visplane_t *pl; + VisiblePlane *pl; int i; int vpcount = 0; @@ -352,7 +352,7 @@ namespace swrenderer void VisiblePlaneList::RenderHeight(double height) { - visplane_t *pl; + VisiblePlane *pl; int i; drawerargs::ds_color = 3; diff --git a/src/swrenderer/plane/r_visibleplanelist.h b/src/swrenderer/plane/r_visibleplanelist.h index 64266c80de..92f3609e95 100644 --- a/src/swrenderer/plane/r_visibleplanelist.h +++ b/src/swrenderer/plane/r_visibleplanelist.h @@ -20,7 +20,7 @@ struct FSectorPortal; namespace swrenderer { - struct visplane_t; + struct VisiblePlane; class VisiblePlaneList { @@ -29,11 +29,11 @@ namespace swrenderer void Clear(bool fullclear); - visplane_t *FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal, FDynamicColormap *basecolormap); - visplane_t *GetRange(visplane_t *pl, int start, int stop); + VisiblePlane *FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal, FDynamicColormap *basecolormap); + VisiblePlane *GetRange(VisiblePlane *pl, int start, int stop); bool HasPortalPlanes() const; - visplane_t *PopFirstPortalPlane(); + VisiblePlane *PopFirstPortalPlane(); void ClearPortalPlanes(); int Render(); @@ -41,10 +41,10 @@ namespace swrenderer private: VisiblePlaneList(); - visplane_t *Add(unsigned hash); + VisiblePlane *Add(unsigned hash); enum { MAXVISPLANES = 128 }; // must be a power of 2 - visplane_t *visplanes[MAXVISPLANES + 1]; + VisiblePlane *visplanes[MAXVISPLANES + 1]; static unsigned CalcHash(int picnum, int lightlevel, const secplane_t &height) { return (unsigned)((picnum) * 3 + (lightlevel)+(FLOAT2FIXED((height).fD())) * 7) & (MAXVISPLANES - 1); } }; diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 05ebc1b1aa..2018fe38f3 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -398,7 +398,7 @@ namespace swrenderer } // kg3D - add fake segs, never rendered - void RenderOpaquePass::FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane, bool foggy, FDynamicColormap *basecolormap) + void RenderOpaquePass::FakeDrawLoop(subsector_t *sub, VisiblePlane *floorplane, VisiblePlane *ceilingplane, bool foggy, FDynamicColormap *basecolormap) { int count; seg_t* line; @@ -432,8 +432,8 @@ namespace swrenderer FSectorPortal *portal; // kg3D - fake floor stuff - visplane_t *backupfp; - visplane_t *backupcp; + VisiblePlane *backupfp; + VisiblePlane *backupcp; //secplane_t templane; lightlist_t *light; @@ -498,7 +498,7 @@ namespace swrenderer portal = frontsector->ValidatePortal(sector_t::ceiling); - visplane_t *ceilingplane = frontsector->ceilingplane.PointOnSide(ViewPos) > 0 || + VisiblePlane *ceilingplane = frontsector->ceilingplane.PointOnSide(ViewPos) > 0 || frontsector->GetTexture(sector_t::ceiling) == skyflatnum || portal != nullptr || (frontsector->heightsec && @@ -539,7 +539,7 @@ namespace swrenderer // killough 10/98: add support for skies transferred from sidedefs portal = frontsector->ValidatePortal(sector_t::floor); - visplane_t *floorplane = frontsector->floorplane.PointOnSide(ViewPos) > 0 || // killough 3/7/98 + VisiblePlane *floorplane = frontsector->floorplane.PointOnSide(ViewPos) > 0 || // killough 3/7/98 frontsector->GetTexture(sector_t::floor) == skyflatnum || portal != nullptr || (frontsector->heightsec && diff --git a/src/swrenderer/scene/r_opaque_pass.h b/src/swrenderer/scene/r_opaque_pass.h index e0a2440929..a009cc772f 100644 --- a/src/swrenderer/scene/r_opaque_pass.h +++ b/src/swrenderer/scene/r_opaque_pass.h @@ -23,7 +23,7 @@ struct FVoxelDef; namespace swrenderer { - struct visplane_t; + struct VisiblePlane; // The 3072 below is just an arbitrary value picked to avoid // drawing lines the player is too close to that would overflow @@ -68,7 +68,7 @@ namespace swrenderer bool CheckBBox(float *bspcoord); void AddPolyobjs(subsector_t *sub); - void FakeDrawLoop(subsector_t *sub, visplane_t *floorplane, visplane_t *ceilingplane, bool foggy, FDynamicColormap *basecolormap); + void FakeDrawLoop(subsector_t *sub, VisiblePlane *floorplane, VisiblePlane *ceilingplane, bool foggy, FDynamicColormap *basecolormap); void AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside, bool foggy, FDynamicColormap *basecolormap); diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index d05a8c4cae..158ef7062c 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -109,7 +109,7 @@ namespace swrenderer AActor *savedcamera = camera; sector_t *savedsector = viewsector; - for (visplane_t *pl = planes->PopFirstPortalPlane(); pl != nullptr; pl = planes->PopFirstPortalPlane()) + for (VisiblePlane *pl = planes->PopFirstPortalPlane(); pl != nullptr; pl = planes->PopFirstPortalPlane()) { if (pl->right < pl->left || !r_skyboxes || numskyboxes == MAX_SKYBOX_PLANES || pl->portal == nullptr) { @@ -244,7 +244,7 @@ namespace swrenderer VisibleSpriteList::Instance()->PopPortal(); - visplane_t *pl; + VisiblePlane *pl; visplaneStack.Pop(pl); if (pl->Alpha > 0 && pl->picnum != skyflatnum) { diff --git a/src/swrenderer/scene/r_portal.h b/src/swrenderer/scene/r_portal.h index 654468d722..74bd4e3490 100644 --- a/src/swrenderer/scene/r_portal.h +++ b/src/swrenderer/scene/r_portal.h @@ -17,7 +17,7 @@ namespace swrenderer { - struct visplane_t; + struct VisiblePlane; class RenderPortal { @@ -56,6 +56,6 @@ namespace swrenderer TArray interestingStack; TArray drawsegStack; TArray viewposStack; - TArray visplaneStack; + TArray visplaneStack; }; } From 112085ebff3f96a52b0af3ad4a9f3720b6e1108c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 19 Jan 2017 03:19:31 +0100 Subject: [PATCH 737/912] Split Clear into two functions --- src/swrenderer/plane/r_visibleplanelist.cpp | 39 ++++++++++----------- src/swrenderer/plane/r_visibleplanelist.h | 3 +- src/swrenderer/scene/r_portal.cpp | 4 +-- src/swrenderer/scene/r_scene.cpp | 2 +- 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp index 966329e974..b774b21120 100644 --- a/src/swrenderer/plane/r_visibleplanelist.cpp +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -64,33 +64,30 @@ namespace swrenderer return newplane; } - void VisiblePlaneList::Clear(bool fullclear) + void VisiblePlaneList::Clear() { - // Don't clear fake planes if not doing a full clear. - if (!fullclear) + for (int i = 0; i <= MAXVISPLANES; i++) + visplanes[i] = nullptr; + } + + void VisiblePlaneList::ClearKeepFakePlanes() + { + for (int i = 0; i <= MAXVISPLANES - 1; i++) { - for (int i = 0; i <= MAXVISPLANES - 1; i++) + for (VisiblePlane **probe = &visplanes[i]; *probe != nullptr; ) { - for (VisiblePlane **probe = &visplanes[i]; *probe != nullptr; ) - { - if ((*probe)->sky < 0) - { // fake: move past it - probe = &(*probe)->next; - } - else - { // not fake: move from list - VisiblePlane *vis = *probe; - *probe = vis->next; - vis->next = nullptr; - } + if ((*probe)->sky < 0) + { // fake: move past it + probe = &(*probe)->next; + } + else + { // not fake: move from list + VisiblePlane *vis = *probe; + *probe = vis->next; + vis->next = nullptr; } } } - else - { - for (int i = 0; i <= MAXVISPLANES; i++) - visplanes[i] = nullptr; - } } VisiblePlane *VisiblePlaneList::FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal, FDynamicColormap *basecolormap) diff --git a/src/swrenderer/plane/r_visibleplanelist.h b/src/swrenderer/plane/r_visibleplanelist.h index 92f3609e95..3a3134fbda 100644 --- a/src/swrenderer/plane/r_visibleplanelist.h +++ b/src/swrenderer/plane/r_visibleplanelist.h @@ -27,7 +27,8 @@ namespace swrenderer public: static VisiblePlaneList *Instance(); - void Clear(bool fullclear); + void Clear(); + void ClearKeepFakePlanes(); VisiblePlane *FindPlane(const secplane_t &height, FTextureID picnum, int lightlevel, double Alpha, bool additive, const FTransform &xxform, int sky, FSectorPortal *portal, FDynamicColormap *basecolormap); VisiblePlane *GetRange(VisiblePlane *pl, int start, int stop); diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 158ef7062c..4d18bcfd4b 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -165,7 +165,7 @@ namespace swrenderer R_SetViewAngle(); validcount++; // Make sure we see all sprites - planes->Clear(false); + planes->ClearKeepFakePlanes(); RenderClipSegment::Instance()->Clear(pl->left, pl->right); WindowLeft = pl->left; WindowRight = pl->right; @@ -408,7 +408,7 @@ namespace swrenderer PortalDrawseg* prevpds = CurrentPortal; CurrentPortal = pds; - VisiblePlaneList::Instance()->Clear(false); + VisiblePlaneList::Instance()->ClearKeepFakePlanes(); RenderClipSegment::Instance()->Clear(pds->x1, pds->x2); WindowLeft = pds->x1; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 451bd64989..659505f0da 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -133,7 +133,7 @@ namespace swrenderer // Clear buffers. RenderClipSegment::Instance()->Clear(0, viewwidth); R_ClearDrawSegs(); - VisiblePlaneList::Instance()->Clear(true); + VisiblePlaneList::Instance()->Clear(); RenderTranslucentPass::Clear(); // opening / clipping determination From 8af97cbbd394b370e5044cafc7cac7e30310076c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 19 Jan 2017 03:31:51 +0100 Subject: [PATCH 738/912] Removed file was still present in CMakeLists.txt --- src/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e37e655be4..a2daa2decf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1195,7 +1195,6 @@ set (PCH_SOURCES w_wad.cpp wi_stuff.cpp zstrformat.cpp - g_inventory/a_armor.cpp g_inventory/a_keys.cpp g_inventory/a_pickups.cpp g_inventory/a_weapons.cpp From 62fb5d87c851c3de72658a4f2ccee59f1f911c7e Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Thu, 19 Jan 2017 18:48:21 -0500 Subject: [PATCH 739/912] - fixed: Remove ccmd should check if an object is actually an inventory object before attempting to check its owner. (Ooops!) --- src/d_net.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/d_net.cpp b/src/d_net.cpp index d9eca7a43f..6e1d4436ed 100644 --- a/src/d_net.cpp +++ b/src/d_net.cpp @@ -2110,7 +2110,7 @@ static int RemoveClass(const PClass *cls) continue; } // [SP] Don't remove owned inventory objects. - if (static_cast(actor)->Owner != NULL) + if (actor->IsKindOf(RUNTIME_CLASS(AInventory)) && static_cast(actor)->Owner != NULL) { continue; } From 07409f49973988e76815d5cf3cdb008b79920f38 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 21 Jan 2017 01:20:24 -0500 Subject: [PATCH 740/912] - fixed compile error with status bar code in poly renderer. --- src/polyrenderer/poly_renderer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index 06aa3cb0ed..bf30e5ef84 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -24,6 +24,7 @@ #include "templates.h" #include "doomdef.h" #include "sbar.h" +#include "st_stuff.h" #include "r_data/r_translate.h" #include "r_data/r_interpolate.h" #include "poly_renderer.h" @@ -60,7 +61,7 @@ void PolyRenderer::RenderView(player_t *player) int width = SCREENWIDTH; int height = SCREENHEIGHT; - int stHeight = ST_Y; + int stHeight = gST_Y; float trueratio; ActiveRatio(width, height, &trueratio); RenderViewport::Instance()->SetViewport(width, height, trueratio); From 33b69a27ae8ecf57fb85b80f2a66cde44d3a315f Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sun, 22 Jan 2017 11:14:48 +0200 Subject: [PATCH 741/912] Restored HAVE_MMX definition in CMake --- src/CMakeLists.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 765114b385..ffeba6d49d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -705,6 +705,25 @@ else() set( OTHER_SYSTEM_SOURCES ${PLAT_WIN32_SOURCES} ${PLAT_OSX_SOURCES} ${PLAT_COCOA_SOURCES} ) endif() +if( HAVE_MMX ) + add_definitions( -DHAVE_MMX=1 ) + + set( SYSTEM_SOURCES ${SYSTEM_SOURCES} + gl/hqnx_asm/hq2x_asm.cpp + gl/hqnx_asm/hq3x_asm.cpp + gl/hqnx_asm/hq4x_asm.cpp + gl/hqnx_asm/hqnx_asm_Image.cpp) + + if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) + set_source_files_properties( + gl/hqnx_asm/hq2x_asm.cpp + gl/hqnx_asm/hq3x_asm.cpp + gl/hqnx_asm/hq4x_asm.cpp + gl/textures/gl_hqresize.cpp + PROPERTIES COMPILE_FLAGS "-mmmx" ) + endif( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) +endif( HAVE_MMX ) + add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h COMMAND lemon -C${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y ) From 55d9392fb8f33fb3860dce1b8121e5949950aa69 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 24 Jan 2017 01:43:45 +0100 Subject: [PATCH 742/912] Add fallback code to Linux target so if OpenGL is either unavailable or can't be used it falls back to the old software SDL FB --- src/gl/system/gl_swframebuffer.cpp | 54 ++++--- src/gl/system/gl_swframebuffer.h | 2 +- src/posix/sdl/hardware.cpp | 5 +- src/posix/sdl/sdlglvideo.cpp | 22 ++- src/posix/sdl/sdlglvideo.h | 14 +- src/posix/sdl/sdlvideo.cpp | 243 ----------------------------- src/posix/sdl/sdlvideo.h | 61 ++++++-- 7 files changed, 108 insertions(+), 293 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 1ad965c6a2..7bcb71f6bc 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -149,27 +149,6 @@ const char *const OpenGLSWFrameBuffer::ShaderDefines[OpenGLSWFrameBuffer::NUM_SH OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen, bool bgra) : Super(hMonitor, ClampWidth(width), ClampHeight(height), bits, refreshHz, fullscreen, bgra) { - // To do: this needs to cooperate with the same static in OpenGLFrameBuffer::InitializeState - static bool first = true; - if (first) - { - ogl_LoadFunctions(); - } - gl_LoadExtensions(); - InitializeState(); - if (first) - { - gl_PrintStartupLog(); - first = false; - } - - // SetVSync needs to be at the very top to workaround a bug in Nvidia's OpenGL driver. - // If wglSwapIntervalEXT is called after glBindFramebuffer in a frame the setting is not changed! - Super::SetVSync(vid_vsync); - - Debug = std::make_shared(); - Debug->Update(); - VertexBuffer = nullptr; IndexBuffer = nullptr; FBTexture = nullptr; @@ -181,7 +160,7 @@ OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, { Shaders[i] = nullptr; } - VSync = vid_vsync; + BlendingRect.left = 0; BlendingRect.top = 0; BlendingRect.right = Width; @@ -214,12 +193,37 @@ OpenGLSWFrameBuffer::OpenGLSWFrameBuffer(void *hMonitor, int width, int height, memcpy(SourcePalette, GPalette.BaseColors, sizeof(PalEntry) * 256); + // To do: this needs to cooperate with the same static in OpenGLFrameBuffer::InitializeState + static bool first = true; + if (first) + { + ogl_LoadFunctions(); + } + gl_LoadExtensions(); + InitializeState(); + if (first) + { + gl_PrintStartupLog(); + first = false; + } + + if (!glGetString) + return; + + // SetVSync needs to be at the very top to workaround a bug in Nvidia's OpenGL driver. + // If wglSwapIntervalEXT is called after glBindFramebuffer in a frame the setting is not changed! + Super::SetVSync(vid_vsync); + + Debug = std::make_shared(); + Debug->Update(); + //Windowed = !(static_cast(Video)->GoFullscreen(fullscreen)); TrueHeight = height; - CreateResources(); - SetInitialState(); + Valid = CreateResources(); + if (Valid) + SetInitialState(); } OpenGLSWFrameBuffer::~OpenGLSWFrameBuffer() @@ -1077,7 +1081,7 @@ int OpenGLSWFrameBuffer::GetPageCount() bool OpenGLSWFrameBuffer::IsValid() { - return true; + return Valid; } //========================================================================== diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index a4ba4e4267..e211b3e51f 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -422,6 +422,7 @@ private: template static void SafeRelease(T &x) { if (x != nullptr) { delete x; x = nullptr; } } + bool Valid = false; std::shared_ptr Debug; std::unique_ptr StreamVertexBuffer, StreamVertexBufferBurn; @@ -452,7 +453,6 @@ private: bool UpdatePending; bool NeedPalUpdate; bool NeedGammaUpdate; - bool VSync; LTRBRect BlendingRect; int In2D; bool InScene; diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index d8e714ccc4..84508b728e 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -56,8 +56,6 @@ EXTERN_CVAR (Bool, fullscreen) EXTERN_CVAR (Bool, swtruecolor) EXTERN_CVAR (Float, vid_winscale) -CVAR (Bool, vid_sdl, 0, 0); - IVideo *Video; extern int NewWidth, NewHeight, NewBits, DisplayBits; @@ -122,8 +120,7 @@ void I_InitGraphics () ticker.SetGenericRepDefault (val, CVAR_Bool); //currentrenderer = vid_renderer; - if (currentrenderer==1 || vid_sdl==0) Video = new SDLGLVideo(0); - else Video = new SDLVideo (0); + Video = new SDLGLVideo(0); if (Video == NULL) I_FatalError ("Failed to initialize display"); diff --git a/src/posix/sdl/sdlglvideo.cpp b/src/posix/sdl/sdlglvideo.cpp index e8b61ef026..18f1edf166 100644 --- a/src/posix/sdl/sdlglvideo.cpp +++ b/src/posix/sdl/sdlglvideo.cpp @@ -13,6 +13,7 @@ #include "c_console.h" #include "sdlglvideo.h" +#include "sdlvideo.h" #include "gl/system/gl_system.h" #include "r_defs.h" #include "gl/gl_functions.h" @@ -29,6 +30,7 @@ // TYPES ------------------------------------------------------------------- +IMPLEMENT_CLASS(SDLBaseFB, true, false) IMPLEMENT_CLASS(SDLGLFB, true, false) struct MiniModeInfo @@ -171,15 +173,15 @@ DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool bgra, b if (old != NULL) { // Reuse the old framebuffer if its attributes are the same - SDLGLFB *fb = static_cast (old); + SDLBaseFB *fb = static_cast (old); if (fb->Width == width && fb->Height == height) { - bool fsnow = (SDL_GetWindowFlags (fb->Screen) & SDL_WINDOW_FULLSCREEN_DESKTOP) != 0; + bool fsnow = (SDL_GetWindowFlags (fb->GetSDLWindow()) & SDL_WINDOW_FULLSCREEN_DESKTOP) != 0; if (fsnow != fullscreen) { - SDL_SetWindowFullscreen (fb->Screen, fullscreen ? SDL_WINDOW_FULLSCREEN_DESKTOP : 0); + SDL_SetWindowFullscreen (fb->GetSDLWindow(), fullscreen ? SDL_WINDOW_FULLSCREEN_DESKTOP : 0); } return old; } @@ -192,11 +194,17 @@ DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool bgra, b // flashAmount = 0; } - SDLGLFB *fb; + SDLBaseFB *fb; if (vid_renderer == 1) - fb = new OpenGLFrameBuffer (0, width, height, 32, 60, fullscreen); + { + fb = new OpenGLFrameBuffer(0, width, height, 32, 60, fullscreen); + } else - fb = (SDLGLFB*)CreateGLSWFrameBuffer (width, height, bgra, fullscreen); + { + fb = (SDLBaseFB*)CreateGLSWFrameBuffer(width, height, bgra, fullscreen); + if (!fb->IsValid()) + fb = new SDLFB(width, height, bgra, fullscreen, nullptr); + } retry = 0; @@ -240,7 +248,7 @@ DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool bgra, b } ++retry; - fb = static_cast(CreateFrameBuffer (width, height, false, fullscreen, NULL)); + fb = static_cast(CreateFrameBuffer (width, height, false, fullscreen, NULL)); } // fb->SetFlash (flashColor, flashAmount); diff --git a/src/posix/sdl/sdlglvideo.h b/src/posix/sdl/sdlglvideo.h index 01e70caac3..7acb0fa1be 100644 --- a/src/posix/sdl/sdlglvideo.h +++ b/src/posix/sdl/sdlglvideo.h @@ -34,9 +34,17 @@ private: int IteratorMode; int IteratorBits; }; -class SDLGLFB : public DFrameBuffer + +class SDLBaseFB : public DFrameBuffer { - DECLARE_CLASS(SDLGLFB, DFrameBuffer) + DECLARE_CLASS(SDLBaseFB, DFrameBuffer) +public: + virtual SDL_Window *GetSDLWindow() = 0; +}; + +class SDLGLFB : public SDLBaseFB +{ + DECLARE_CLASS(SDLGLFB, SDLBaseFB) public: // this must have the same parameters as the Windows version, even if they are not used! SDLGLFB (void *hMonitor, int width, int height, int, int, bool fullscreen, bool bgra); @@ -61,6 +69,8 @@ public: int GetClientWidth(); int GetClientHeight(); + SDL_Window *GetSDLWindow() override { return Screen; } + protected: bool CanUpdate(); void SetGammaTable(WORD *tbl); diff --git a/src/posix/sdl/sdlvideo.cpp b/src/posix/sdl/sdlvideo.cpp index 8c5c370753..c88d688a5e 100644 --- a/src/posix/sdl/sdlvideo.cpp +++ b/src/posix/sdl/sdlvideo.cpp @@ -24,61 +24,6 @@ // TYPES ------------------------------------------------------------------- -class SDLFB : public DFrameBuffer -{ - DECLARE_CLASS(SDLFB, DFrameBuffer) -public: - SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin); - ~SDLFB (); - - bool Lock (bool buffer); - void Unlock (); - bool Relock (); - void ForceBuffering (bool force); - bool IsValid (); - void Update (); - PalEntry *GetPalette (); - void GetFlashedPalette (PalEntry pal[256]); - void UpdatePalette (); - bool SetGamma (float gamma); - bool SetFlash (PalEntry rgb, int amount); - void GetFlash (PalEntry &rgb, int &amount); - void SetFullscreen (bool fullscreen); - int GetPageCount (); - bool IsFullscreen (); - - friend class SDLVideo; - - virtual void SetVSync (bool vsync); - virtual void ScaleCoordsFromWindow(SWORD &x, SWORD &y); - -private: - PalEntry SourcePalette[256]; - BYTE GammaTable[3][256]; - PalEntry Flash; - int FlashAmount; - float Gamma; - bool UpdatePending; - - SDL_Window *Screen; - SDL_Renderer *Renderer; - union - { - SDL_Texture *Texture; - SDL_Surface *Surface; - }; - - bool UsingRenderer; - bool NeedPalUpdate; - bool NeedGammaUpdate; - bool NotPaletted; - - void UpdateColors (); - void ResetSDLRenderer (); - - SDLFB () {} -}; - IMPLEMENT_CLASS(SDLFB, false, false) struct MiniModeInfo @@ -132,72 +77,6 @@ CUSTOM_CVAR (Float, bgamma, 1.f, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) // PRIVATE DATA DEFINITIONS ------------------------------------------------ -// Dummy screen sizes to pass when windowed -static MiniModeInfo WinModes[] = -{ - { 320, 200 }, - { 320, 240 }, - { 400, 225 }, // 16:9 - { 400, 300 }, - { 480, 270 }, // 16:9 - { 480, 360 }, - { 512, 288 }, // 16:9 - { 512, 384 }, - { 640, 360 }, // 16:9 - { 640, 400 }, - { 640, 480 }, - { 720, 480 }, // 16:10 - { 720, 540 }, - { 800, 450 }, // 16:9 - { 800, 480 }, - { 800, 500 }, // 16:10 - { 800, 600 }, - { 848, 480 }, // 16:9 - { 960, 600 }, // 16:10 - { 960, 720 }, - { 1024, 576 }, // 16:9 - { 1024, 600 }, // 17:10 - { 1024, 640 }, // 16:10 - { 1024, 768 }, - { 1088, 612 }, // 16:9 - { 1152, 648 }, // 16:9 - { 1152, 720 }, // 16:10 - { 1152, 864 }, - { 1280, 540 }, // 21:9 - { 1280, 720 }, // 16:9 - { 1280, 854 }, - { 1280, 800 }, // 16:10 - { 1280, 960 }, - { 1280, 1024 }, // 5:4 - { 1360, 768 }, // 16:9 - { 1366, 768 }, - { 1400, 787 }, // 16:9 - { 1400, 875 }, // 16:10 - { 1400, 1050 }, - { 1440, 900 }, - { 1440, 960 }, - { 1440, 1080 }, - { 1600, 900 }, // 16:9 - { 1600, 1000 }, // 16:10 - { 1600, 1200 }, - { 1680, 1050 }, // 16:10 - { 1920, 1080 }, - { 1920, 1200 }, - { 2048, 1536 }, - { 2560, 1080 }, // 21:9 - { 2560, 1440 }, - { 2560, 1600 }, - { 2560, 2048 }, - { 2880, 1800 }, - { 3200, 1800 }, - { 3440, 1440 }, // 21:9 - { 3840, 2160 }, - { 3840, 2400 }, - { 4096, 2160 }, - { 5120, 2160 }, // 21:9 - { 5120, 2880 } -}; - static cycle_t BlitCycles; static cycle_t SDLFlipCycles; @@ -228,128 +107,6 @@ void ScaleWithAspect (int &w, int &h, int Width, int Height) h = y; } -SDLVideo::SDLVideo (int parm) -{ - IteratorBits = 0; -} - -SDLVideo::~SDLVideo () -{ -} - -void SDLVideo::StartModeIterator (int bits, bool fs) -{ - IteratorMode = 0; - IteratorBits = bits; -} - -bool SDLVideo::NextMode (int *width, int *height, bool *letterbox) -{ - if (IteratorBits != 8) - return false; - - if ((unsigned)IteratorMode < sizeof(WinModes)/sizeof(WinModes[0])) - { - *width = WinModes[IteratorMode].Width; - *height = WinModes[IteratorMode].Height; - ++IteratorMode; - return true; - } - return false; -} - -DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old) -{ - static int retry = 0; - static int owidth, oheight; - - PalEntry flashColor; - int flashAmount; - - SDL_Window *oldwin = NULL; - - if (old != NULL) - { // Reuse the old framebuffer if its attributes are the same - SDLFB *fb = static_cast (old); - if (fb->Width == width && - fb->Height == height && - fb->Bgra == bgra) - { - bool fsnow = (SDL_GetWindowFlags (fb->Screen) & SDL_WINDOW_FULLSCREEN_DESKTOP) != 0; - - if (fsnow != fullscreen) - { - fb->SetFullscreen (fullscreen); - } - return old; - } - - oldwin = fb->Screen; - fb->Screen = NULL; - - old->GetFlash (flashColor, flashAmount); - old->ObjectFlags |= OF_YesReallyDelete; - if (screen == old) screen = NULL; - delete old; - } - else - { - flashColor = 0; - flashAmount = 0; - } - - SDLFB *fb = new SDLFB (width, height, bgra, fullscreen, oldwin); - - // If we could not create the framebuffer, try again with slightly - // different parameters in this order: - // 1. Try with the closest size - // 2. Try in the opposite screen mode with the original size - // 3. Try in the opposite screen mode with the closest size - // This is a somewhat confusing mass of recursion here. - - while (fb == NULL || !fb->IsValid ()) - { - if (fb != NULL) - { - delete fb; - } - - switch (retry) - { - case 0: - owidth = width; - oheight = height; - case 2: - // Try a different resolution. Hopefully that will work. - I_ClosestResolution (&width, &height, 8); - break; - - case 1: - // Try changing fullscreen mode. Maybe that will work. - width = owidth; - height = oheight; - fullscreen = !fullscreen; - break; - - default: - // I give up! - I_FatalError ("Could not create new screen (%d x %d)", owidth, oheight); - } - - ++retry; - fb = static_cast(CreateFrameBuffer (width, height, bgra, fullscreen, NULL)); - } - retry = 0; - - fb->SetFlash (flashColor, flashAmount); - - return fb; -} - -void SDLVideo::SetWindowedScale (float scale) -{ -} - // FrameBuffer implementation ----------------------------------------------- SDLFB::SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin) diff --git a/src/posix/sdl/sdlvideo.h b/src/posix/sdl/sdlvideo.h index 385733bc15..d0888daba6 100644 --- a/src/posix/sdl/sdlvideo.h +++ b/src/posix/sdl/sdlvideo.h @@ -1,21 +1,60 @@ #include "hardware.h" #include "v_video.h" +#include "sdlglvideo.h" -class SDLVideo : public IVideo +class SDLFB : public DFrameBuffer { - public: - SDLVideo (int parm); - ~SDLVideo (); + DECLARE_CLASS(SDLFB, SDLBaseFB) +public: + SDLFB(int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin); + ~SDLFB(); - EDisplayType GetDisplayType () { return DISPLAY_Both; } - void SetWindowedScale (float scale); + bool Lock(bool buffer); + void Unlock(); + bool Relock(); + void ForceBuffering(bool force); + bool IsValid(); + void Update(); + PalEntry *GetPalette(); + void GetFlashedPalette(PalEntry pal[256]); + void UpdatePalette(); + bool SetGamma(float gamma); + bool SetFlash(PalEntry rgb, int amount); + void GetFlash(PalEntry &rgb, int &amount); + void SetFullscreen(bool fullscreen); + int GetPageCount(); + bool IsFullscreen(); - DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old); + friend class SDLGLVideo; - void StartModeIterator (int bits, bool fs); - bool NextMode (int *width, int *height, bool *letterbox); + virtual void SetVSync(bool vsync); + virtual void ScaleCoordsFromWindow(SWORD &x, SWORD &y); + + SDL_Window *GetSDLWindow() override { return Screen; } private: - int IteratorMode; - int IteratorBits; + PalEntry SourcePalette[256]; + BYTE GammaTable[3][256]; + PalEntry Flash; + int FlashAmount; + float Gamma; + bool UpdatePending; + + SDL_Window *Screen; + SDL_Renderer *Renderer; + union + { + SDL_Texture *Texture; + SDL_Surface *Surface; + }; + + bool UsingRenderer; + bool NeedPalUpdate; + bool NeedGammaUpdate; + bool NotPaletted; + + void UpdateColors(); + void ResetSDLRenderer(); + + SDLFB() {} }; From 07acd9375b0612adeb691ecab5438e87794cf4a1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 24 Jan 2017 02:28:32 +0100 Subject: [PATCH 743/912] The memset antipattern cannot be used on classes with a virtual table --- src/swrenderer/things/r_particle.cpp | 1 - src/swrenderer/things/r_particle.h | 10 +++--- src/swrenderer/things/r_playersprite.cpp | 1 - src/swrenderer/things/r_sprite.cpp | 1 - src/swrenderer/things/r_sprite.h | 10 +++--- src/swrenderer/things/r_visiblesprite.h | 43 ++++++++++++------------ src/swrenderer/things/r_voxel.h | 14 ++++---- src/swrenderer/things/r_wallsprite.h | 4 +-- 8 files changed, 41 insertions(+), 43 deletions(-) diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 510d05b7f6..ed32c2a005 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -179,7 +179,6 @@ namespace swrenderer // store information in a vissprite RenderParticle *vis = RenderMemory::NewObject(); - memset(vis, 0, sizeof(*vis)); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->heightsec = heightsec; diff --git a/src/swrenderer/things/r_particle.h b/src/swrenderer/things/r_particle.h index 244d1a8eee..2115220749 100644 --- a/src/swrenderer/things/r_particle.h +++ b/src/swrenderer/things/r_particle.h @@ -32,11 +32,11 @@ namespace swrenderer private: void DrawMaskedSegsBehindParticle(); - fixed_t xscale; - fixed_t startfrac; // horizontal position of x1 - int y1, y2; + fixed_t xscale = 0; + fixed_t startfrac = 0; // horizontal position of x1 + int y1 = 0, y2 = 0; - uint32_t Translation; - uint32_t FillColor; + uint32_t Translation = 0; + uint32_t FillColor = 0; }; } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 4cba801b7e..a259ff1b3f 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -273,7 +273,6 @@ namespace swrenderer // store information in a vissprite RenderSprite *vis = &avis[vispspindex]; - memset(vis, 0, sizeof(*vis)); vis->renderflags = owner->renderflags; vis->floorclip = 0; diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 3776540434..d237348f49 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -157,7 +157,6 @@ namespace swrenderer // store information in a vissprite RenderSprite *vis = RenderMemory::NewObject(); - memset(vis, 0, sizeof(*vis)); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->xscale = FLOAT2FIXED(xscale); diff --git a/src/swrenderer/things/r_sprite.h b/src/swrenderer/things/r_sprite.h index 595a5d2a83..86462786ee 100644 --- a/src/swrenderer/things/r_sprite.h +++ b/src/swrenderer/things/r_sprite.h @@ -26,12 +26,12 @@ namespace swrenderer void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: - fixed_t xscale; - fixed_t startfrac; // horizontal position of x1 - fixed_t xiscale; // negative if flipped + fixed_t xscale = 0; + fixed_t startfrac = 0; // horizontal position of x1 + fixed_t xiscale = 0; // negative if flipped - uint32_t Translation; - uint32_t FillColor; + uint32_t Translation = 0; + uint32_t FillColor = 0; friend class RenderPlayerSprite; // To do: detach sprite from playersprite! }; diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index 73b84ebae8..bfe8a84e0b 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -26,6 +26,7 @@ namespace swrenderer class VisibleSprite { public: + VisibleSprite() { RenderStyle = STYLE_Normal; } virtual ~VisibleSprite() { } void Render(); @@ -45,38 +46,38 @@ namespace swrenderer virtual void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) = 0; - FTexture *pic; + FTexture *pic = nullptr; - short x1, x2; - float gzb, gzt; // global bottom / top for silhouette clipping + short x1 = 0, x2 = 0; + float gzb = 0.0f, gzt = 0.0f; // global bottom / top for silhouette clipping - double floorclip; + double floorclip = 0.0; - double texturemid; // floorclip - float yscale; // voxel and floorclip + double texturemid = 0.0; // floorclip + float yscale = 0.0f; // voxel and floorclip - sector_t *heightsec; // height sector for underwater/fake ceiling - WaterFakeSide FakeFlatStat; // which side of fake/floor ceiling sprite is on + sector_t *heightsec = nullptr; // height sector for underwater/fake ceiling + WaterFakeSide FakeFlatStat = WaterFakeSide::Center; // which side of fake/floor ceiling sprite is on - F3DFloor *fakefloor; // 3d floor clipping - F3DFloor *fakeceiling; + F3DFloor *fakefloor = nullptr; // 3d floor clipping + F3DFloor *fakeceiling = nullptr; - FVector3 gpos; // origin in world coordinates - sector_t *sector; // sector this sprite is in + FVector3 gpos = { 0.0f, 0.0f, 0.0f }; // origin in world coordinates + sector_t *sector = nullptr; // sector this sprite is in // Light shared calculation? - int ColormapNum; // Which colormap is rendered - FSWColormap *BaseColormap; // Base colormap used together with ColormapNum - float Alpha; + int ColormapNum = 0; // Which colormap is rendered + FSWColormap *BaseColormap = nullptr; // Base colormap used together with ColormapNum + float Alpha = 0.0f; FRenderStyle RenderStyle; - bool foggy; - short renderflags; + bool foggy = false; + short renderflags = 0; - float depth; // Sort (draw segments), also light + float depth = 0.0f; // Sort (draw segments), also light - float deltax, deltay; // Sort (2d/voxel version) - float idepth; // Sort (non-voxel version) + float deltax = 0.0f, deltay = 0.0f; // Sort (2d/voxel version) + float idepth = 0.0f; // Sort (non-voxel version) - int CurrentPortalUniq; // to identify the portal that this thing is in. used for clipping. + int CurrentPortalUniq = 0; // to identify the portal that this thing is in. used for clipping. }; } diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index d237d9b66e..ae41ea7ee3 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -65,17 +65,17 @@ namespace swrenderer private: struct posang { - FVector3 vpos; // view origin - FAngle vang; // view angle + FVector3 vpos = { 0.0f, 0.0f, 0.0f }; // view origin + FAngle vang = { 0.0f }; // view angle }; posang pa; - DAngle Angle; - fixed_t xscale; - FVoxel *voxel; + DAngle Angle = { 0.0 }; + fixed_t xscale = 0; + FVoxel *voxel = nullptr; - uint32_t Translation; - uint32_t FillColor; + uint32_t Translation = 0; + uint32_t FillColor = 0; enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index 2123948669..a68454b2e4 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -30,7 +30,7 @@ namespace swrenderer static void DrawColumn(int x, FTexture *WallSpriteTile, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); FWallCoords wallc; - uint32_t Translation; - uint32_t FillColor; + uint32_t Translation = 0; + uint32_t FillColor = 0; }; } From ba6094be2e90436ce23ba1b2494f1ecb9c6962ff Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 24 Jan 2017 04:15:54 +0100 Subject: [PATCH 744/912] Detach player sprites from VisibleSprite --- src/swrenderer/drawers/r_draw.cpp | 2 +- src/swrenderer/r_swrenderer.cpp | 2 +- src/swrenderer/scene/r_translucent_pass.cpp | 3 +- src/swrenderer/scene/r_viewport.cpp | 2 +- src/swrenderer/things/r_particle.cpp | 7 +- src/swrenderer/things/r_playersprite.cpp | 313 +++++++++++--------- src/swrenderer/things/r_playersprite.h | 84 ++++-- src/swrenderer/things/r_sprite.cpp | 12 +- src/swrenderer/things/r_visiblesprite.cpp | 20 +- src/swrenderer/things/r_visiblesprite.h | 15 +- src/swrenderer/things/r_voxel.cpp | 8 +- src/swrenderer/things/r_wallsprite.cpp | 8 +- 12 files changed, 284 insertions(+), 192 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 46c0ec7846..cf3e4edbb5 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -452,7 +452,7 @@ namespace swrenderer else if (style == LegacyRenderStyles[STYLE_Shaded]) { // Shaded drawer only gets 16 levels of alpha because it saves memory. - if ((alpha >>= 12) == 0) + if ((alpha >>= 12) == 0 || basecolormap == nullptr) return false; colfunc = &SWPixelFormatDrawers::DrawShadedColumn; drawer_needs_pal_input = true; diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index db0ede2e60..e8e1d2c12b 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -217,7 +217,7 @@ void FSoftwareRenderer::DrawRemainingPlayerSprites() { if (!r_polyrenderer) { - RenderPlayerSprite::RenderRemainingPlayerSprites(); + RenderPlayerSprites::Instance()->RenderRemaining(); } else { diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index cbacf11e1b..3cf4998f16 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -208,6 +208,7 @@ namespace swrenderer clip3d->DeleteHeights(); clip3d->fake3D = 0; } - RenderPlayerSprite::RenderPlayerSprites(); + + RenderPlayerSprites::Instance()->Render(); } } diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index df5b37f086..c925094923 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -114,7 +114,7 @@ namespace swrenderer WallTMapScale2 = IYaspectMul / CenterX; // psprite scales - RenderPlayerSprite::SetupSpriteScale(); + RenderPlayerSprites::Instance()->SetupSpriteScale(); // thing clipping fillshort(screenheightarray, viewwidth, (short)viewheight); diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index ed32c2a005..775e9358e7 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -198,11 +198,10 @@ namespace swrenderer vis->renderflags = (short)(particle->alpha * 255.0f + 0.5f); vis->FakeFlatStat = fakeside; vis->floorclip = 0; - vis->ColormapNum = 0; vis->foggy = foggy; // Particles are slightly more visible than regular sprites. - vis->SetColormap(tiz * r_SpriteVisibility * 0.5, shade, map, particle->bright != 0, false, false); + vis->Light.SetColormap(tiz * r_SpriteVisibility * 0.5, shade, map, particle->bright != 0, false, false); VisibleSpriteList::Instance()->Push(vis); } @@ -214,7 +213,7 @@ namespace swrenderer auto vis = this; int spacing; - BYTE color = vis->BaseColormap->Maps[vis->startfrac]; + BYTE color = vis->Light.BaseColormap->Maps[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -225,7 +224,7 @@ namespace swrenderer DrawMaskedSegsBehindParticle(); - uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->ColormapNum << FRACBITS))); + uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Light.ColormapNum << FRACBITS))); // vis->renderflags holds translucency level (0-255) fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index a259ff1b3f..69da6b9477 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -65,23 +65,20 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - TArray RenderPlayerSprite::vispsprites; - unsigned int RenderPlayerSprite::vispspindex; + RenderPlayerSprites *RenderPlayerSprites::Instance() + { + static RenderPlayerSprites instance; + return &instance; + } - double RenderPlayerSprite::pspritexscale; - double RenderPlayerSprite::pspritexiscale; - double RenderPlayerSprite::pspriteyscale; - - TArray RenderPlayerSprite::avis; - - void RenderPlayerSprite::SetupSpriteScale() + void RenderPlayerSprites::SetupSpriteScale() { pspritexscale = centerxwide / 160.0; pspriteyscale = pspritexscale * YaspectMul; pspritexiscale = 1 / pspritexscale; } - void RenderPlayerSprite::RenderPlayerSprites() + void RenderPlayerSprites::Render() { int i; int lightnum; @@ -184,7 +181,7 @@ namespace swrenderer if ((psp->GetID() != PSP_TARGETCENTER || CrosshairImage == nullptr) && psp->GetCaller() != nullptr) { - Render(psp, camera, bobx, boby, wx, wy, r_TicFracF, spriteshade, basecolormap); + RenderSprite(psp, camera, bobx, boby, wx, wy, r_TicFracF, spriteshade, basecolormap, foggy); } psp = psp->GetNext(); @@ -194,7 +191,7 @@ namespace swrenderer } } - void RenderPlayerSprite::Render(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade, FDynamicColormap *basecolormap) + void RenderPlayerSprites::RenderSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade, FDynamicColormap *basecolormap, bool foggy) { double tx; int x1; @@ -208,9 +205,6 @@ namespace swrenderer bool noaccel; double alpha = owner->Alpha; - if (avis.Size() < vispspindex + 1) - avis.Reserve(avis.Size() - vispspindex + 1); - // decide which patch to use if ((unsigned)pspr->GetSprite() >= (unsigned)sprites.Size()) { @@ -272,12 +266,11 @@ namespace swrenderer return; // store information in a vissprite - RenderSprite *vis = &avis[vispspindex]; + NoAccelPlayerSprite vis; - vis->renderflags = owner->renderflags; - vis->floorclip = 0; + vis.renderflags = owner->renderflags; - vis->texturemid = (BASEYCENTER - sy) * tex->Scale.Y + tex->TopOffset; + vis.texturemid = (BASEYCENTER - sy) * tex->Scale.Y + tex->TopOffset; if (camera->player && (RenderTarget != screen || viewheight == RenderTarget->GetHeight() || @@ -288,41 +281,39 @@ namespace swrenderer { if (RenderTarget != screen || viewheight == RenderTarget->GetHeight()) { - vis->texturemid -= weapon->YAdjust; + vis.texturemid -= weapon->YAdjust; } else { - vis->texturemid -= StatusBar->GetDisplacement() * weapon->YAdjust; + vis.texturemid -= StatusBar->GetDisplacement() * weapon->YAdjust; } } } if (pspr->GetID() < PSP_TARGETCENTER) { // Move the weapon down for 1280x1024. - vis->texturemid -= AspectPspriteOffset(WidescreenRatio); + vis.texturemid -= AspectPspriteOffset(WidescreenRatio); } - vis->x1 = x1 < 0 ? 0 : x1; - vis->x2 = x2 >= viewwidth ? viewwidth : x2; - vis->xscale = FLOAT2FIXED(pspritexscale / tex->Scale.X); - vis->yscale = float(pspriteyscale / tex->Scale.Y); - vis->Translation = 0; // [RH] Use default colors - vis->pic = tex; - vis->ColormapNum = 0; + vis.x1 = x1 < 0 ? 0 : x1; + vis.x2 = x2 >= viewwidth ? viewwidth : x2; + vis.xscale = FLOAT2FIXED(pspritexscale / tex->Scale.X); + vis.yscale = float(pspriteyscale / tex->Scale.Y); + vis.pic = tex; // If flip is used, provided that it's not already flipped (that would just invert itself) // (It's an XOR...) if (!(flip) != !(pspr->Flags & PSPF_FLIP)) { - vis->xiscale = -FLOAT2FIXED(pspritexiscale * tex->Scale.X); - vis->startfrac = (tex->GetWidth() << FRACBITS) - 1; + vis.xiscale = -FLOAT2FIXED(pspritexiscale * tex->Scale.X); + vis.startfrac = (tex->GetWidth() << FRACBITS) - 1; } else { - vis->xiscale = FLOAT2FIXED(pspritexiscale * tex->Scale.X); - vis->startfrac = 0; + vis.xiscale = FLOAT2FIXED(pspritexiscale * tex->Scale.X); + vis.startfrac = 0; } - if (vis->x1 > x1) - vis->startfrac += vis->xiscale*(vis->x1 - x1); + if (vis.x1 > x1) + vis.startfrac += vis.xiscale*(vis.x1 - x1); noaccel = false; FDynamicColormap *colormap_to_use = nullptr; @@ -336,42 +327,42 @@ namespace swrenderer if (pspr->Flags & PSPF_FORCESTYLE) { - vis->RenderStyle = LegacyRenderStyles[rs]; + vis.RenderStyle = LegacyRenderStyles[rs]; } else if (owner->RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) { - vis->RenderStyle = LegacyRenderStyles[STYLE_Fuzzy]; + vis.RenderStyle = LegacyRenderStyles[STYLE_Fuzzy]; } else if (owner->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) { - vis->RenderStyle = LegacyRenderStyles[STYLE_OptFuzzy]; - vis->RenderStyle.CheckFuzz(); + vis.RenderStyle = LegacyRenderStyles[STYLE_OptFuzzy]; + vis.RenderStyle.CheckFuzz(); } else if (owner->RenderStyle == LegacyRenderStyles[STYLE_Subtract]) { - vis->RenderStyle = LegacyRenderStyles[STYLE_Subtract]; + vis.RenderStyle = LegacyRenderStyles[STYLE_Subtract]; } else { - vis->RenderStyle = LegacyRenderStyles[rs]; + vis.RenderStyle = LegacyRenderStyles[rs]; } } else { - vis->RenderStyle = owner->RenderStyle; + vis.RenderStyle = owner->RenderStyle; } // Set the alpha based on if using the overlay's own or not. Also adjust // and override the alpha if not forced. if (pspr->Flags & PSPF_ALPHA) { - if (vis->RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) + if (vis.RenderStyle == LegacyRenderStyles[STYLE_Fuzzy]) { alpha = owner->Alpha; } - else if (vis->RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) + else if (vis.RenderStyle == LegacyRenderStyles[STYLE_OptFuzzy]) { - FRenderStyle style = vis->RenderStyle; + FRenderStyle style = vis.RenderStyle; style.CheckFuzz(); switch (style.BlendOp) { @@ -385,15 +376,15 @@ namespace swrenderer } } - else if (vis->RenderStyle == LegacyRenderStyles[STYLE_Subtract]) + else if (vis.RenderStyle == LegacyRenderStyles[STYLE_Subtract]) { alpha = owner->Alpha; } - else if (vis->RenderStyle == LegacyRenderStyles[STYLE_Add] || - vis->RenderStyle == LegacyRenderStyles[STYLE_Translucent] || - vis->RenderStyle == LegacyRenderStyles[STYLE_TranslucentStencil] || - vis->RenderStyle == LegacyRenderStyles[STYLE_AddStencil] || - vis->RenderStyle == LegacyRenderStyles[STYLE_AddShaded]) + else if (vis.RenderStyle == LegacyRenderStyles[STYLE_Add] || + vis.RenderStyle == LegacyRenderStyles[STYLE_Translucent] || + vis.RenderStyle == LegacyRenderStyles[STYLE_TranslucentStencil] || + vis.RenderStyle == LegacyRenderStyles[STYLE_AddStencil] || + vis.RenderStyle == LegacyRenderStyles[STYLE_AddShaded]) { alpha = owner->Alpha * pspr->alpha; } @@ -406,10 +397,10 @@ namespace swrenderer // Should normal renderstyle come out on top at the end and we desire alpha, // switch it to translucent. Normal never applies any sort of alpha. if ((pspr->Flags & PSPF_ALPHA) && - vis->RenderStyle == LegacyRenderStyles[STYLE_Normal] && - vis->Alpha < 1.0) + vis.RenderStyle == LegacyRenderStyles[STYLE_Normal] && + vis.Alpha < 1.0) { - vis->RenderStyle = LegacyRenderStyles[STYLE_Translucent]; + vis.RenderStyle = LegacyRenderStyles[STYLE_Translucent]; alpha = owner->Alpha * pspr->alpha; } @@ -418,22 +409,22 @@ namespace swrenderer if (pspr->Flags & PSPF_FORCEALPHA) { //Due to lack of != operators... - if (vis->RenderStyle == LegacyRenderStyles[STYLE_Fuzzy] || - vis->RenderStyle == LegacyRenderStyles[STYLE_SoulTrans] || - vis->RenderStyle == LegacyRenderStyles[STYLE_Stencil]) + if (vis.RenderStyle == LegacyRenderStyles[STYLE_Fuzzy] || + vis.RenderStyle == LegacyRenderStyles[STYLE_SoulTrans] || + vis.RenderStyle == LegacyRenderStyles[STYLE_Stencil]) { } else { alpha = pspr->alpha; - vis->RenderStyle.Flags |= STYLEF_ForceAlpha; + vis.RenderStyle.Flags |= STYLEF_ForceAlpha; } } - vis->Alpha = clamp(float(alpha), 0.f, 1.f); + vis.Alpha = clamp(float(alpha), 0.f, 1.f); // Due to how some of the effects are handled, going to 0 or less causes some // weirdness to display. There's no point rendering it anyway if it's 0. - if (vis->Alpha <= 0.) + if (vis.Alpha <= 0.) return; //----------------------------------------------------------------------------- @@ -441,60 +432,60 @@ namespace swrenderer // The software renderer cannot invert the source without inverting the overlay // too. That means if the source is inverted, we need to do the reverse of what // the invert overlay flag says to do. - bool invertcolormap = (vis->RenderStyle.Flags & STYLEF_InvertOverlay) != 0; + bool invertcolormap = (vis.RenderStyle.Flags & STYLEF_InvertOverlay) != 0; - if (vis->RenderStyle.Flags & STYLEF_InvertSource) + if (vis.RenderStyle.Flags & STYLEF_InvertSource) { invertcolormap = !invertcolormap; } - bool fullbright = !vis->foggy && pspr->GetState()->GetFullbright(); - bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; + bool fullbright = !foggy && pspr->GetState()->GetFullbright(); + bool fadeToBlack = (vis.RenderStyle.Flags & STYLEF_FadeToBlack) != 0; - vis->SetColormap(0, spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); + vis.Light.SetColormap(0, spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); - colormap_to_use = (FDynamicColormap*)vis->BaseColormap; + colormap_to_use = (FDynamicColormap*)vis.Light.BaseColormap; if (camera->Inventory != nullptr) { visstyle_t visstyle; - visstyle.Alpha = vis->Alpha; + visstyle.Alpha = vis.Alpha; visstyle.RenderStyle = STYLE_Count; visstyle.Invert = false; camera->Inventory->AlterWeaponSprite(&visstyle); - vis->Alpha = visstyle.Alpha; + vis.Alpha = visstyle.Alpha; if (visstyle.RenderStyle != STYLE_Count) { - vis->RenderStyle = visstyle.RenderStyle; + vis.RenderStyle = visstyle.RenderStyle; } if (visstyle.Invert) { - vis->BaseColormap = &SpecialColormaps[INVERSECOLORMAP]; - vis->ColormapNum = 0; + vis.Light.BaseColormap = &SpecialColormaps[INVERSECOLORMAP]; + vis.Light.ColormapNum = 0; noaccel = true; } } // If we're drawing with a special colormap, but shaders for them are disabled, do // not accelerate. - if (!r_shadercolormaps && (vis->BaseColormap >= &SpecialColormaps[0] && - vis->BaseColormap <= &SpecialColormaps.Last())) + if (!r_shadercolormaps && (vis.Light.BaseColormap >= &SpecialColormaps[0] && + vis.Light.BaseColormap <= &SpecialColormaps.Last())) { noaccel = true; } // If drawing with a BOOM colormap, disable acceleration. - if (vis->BaseColormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps) + if (vis.Light.BaseColormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps) { noaccel = true; } // If the main colormap has fixed lights, and this sprite is being drawn with that // colormap, disable acceleration so that the lights can remain fixed. if (!noaccel && realfixedcolormap == nullptr && - NormalLightHasFixedLights && vis->BaseColormap == &NormalLight && - vis->pic->UseBasePalette()) + NormalLightHasFixedLights && vis.Light.BaseColormap == &NormalLight && + vis.pic->UseBasePalette()) { noaccel = true; } @@ -503,89 +494,145 @@ namespace swrenderer { colormap_to_use = basecolormap; - vis->BaseColormap = basecolormap; - vis->ColormapNum = 0; + vis.Light.BaseColormap = basecolormap; + vis.Light.ColormapNum = 0; } // Check for hardware-assisted 2D. If it's available, and this sprite is not // fuzzy, don't draw it until after the switch to 2D mode. if (!noaccel && RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) { - FRenderStyle style = vis->RenderStyle; + FRenderStyle style = vis.RenderStyle; style.CheckFuzz(); if (style.BlendOp != STYLEOP_Fuzz) { - if (vispsprites.Size() < vispspindex + 1) - vispsprites.Reserve(vispsprites.Size() - vispspindex + 1); + HWAccelPlayerSprite accelSprite; - vispsprites[vispspindex].vis = vis; - vispsprites[vispspindex].basecolormap = colormap_to_use; - vispsprites[vispspindex].x1 = x1; - vispspindex++; + accelSprite.pic = vis.pic; + accelSprite.texturemid = vis.texturemid; + accelSprite.yscale = vis.yscale; + accelSprite.xscale = vis.xscale; + + accelSprite.Alpha = vis.Alpha; + accelSprite.RenderStyle = vis.RenderStyle; + accelSprite.Translation = vis.Translation; + accelSprite.FillColor = vis.FillColor; + + accelSprite.basecolormap = colormap_to_use; + accelSprite.x1 = x1; + accelSprite.flip = vis.xiscale < 0; + + if (vis.Light.BaseColormap >= &SpecialColormaps[0] && + vis.Light.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) + { + accelSprite.special = static_cast(vis.Light.BaseColormap); + } + else if (colormap_to_use->Color == PalEntry(255, 255, 255) && + colormap_to_use->Desaturate == 0) + { + accelSprite.overlay = colormap_to_use->Fade; + accelSprite.overlay.a = BYTE(vis.Light.ColormapNum * 255 / NUMCOLORMAPS); + } + else + { + accelSprite.usecolormapstyle = true; + accelSprite.colormapstyle.Color = colormap_to_use->Color; + accelSprite.colormapstyle.Fade = colormap_to_use->Fade; + accelSprite.colormapstyle.Desaturate = colormap_to_use->Desaturate; + accelSprite.colormapstyle.FadeLevel = vis.Light.ColormapNum / float(NUMCOLORMAPS); + } + + AcceleratedSprites.Push(accelSprite); return; } } - // clip to screen bounds - short *mfloorclip = screenheightarray; - short *mceilingclip = zeroarray; - - vis->Render(mfloorclip, mceilingclip, 0, 0); + vis.Render(); } - void RenderPlayerSprite::RenderRemainingPlayerSprites() + void RenderPlayerSprites::RenderRemaining() { - for (unsigned int i = 0; i < vispspindex; i++) + for (const HWAccelPlayerSprite &sprite : AcceleratedSprites) { - RenderSprite *vis = vispsprites[i].vis; - FDynamicColormap *colormap = vispsprites[i].basecolormap; - bool flip = vis->xiscale < 0; - FSpecialColormap *special = NULL; - PalEntry overlay = 0; - FColormapStyle colormapstyle; - bool usecolormapstyle = false; - - if (vis->BaseColormap >= &SpecialColormaps[0] && - vis->BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) - { - special = static_cast(vis->BaseColormap); - } - else if (colormap->Color == PalEntry(255, 255, 255) && - colormap->Desaturate == 0) - { - overlay = colormap->Fade; - overlay.a = BYTE(vis->ColormapNum * 255 / NUMCOLORMAPS); - } - else - { - usecolormapstyle = true; - colormapstyle.Color = colormap->Color; - colormapstyle.Fade = colormap->Fade; - colormapstyle.Desaturate = colormap->Desaturate; - colormapstyle.FadeLevel = vis->ColormapNum / float(NUMCOLORMAPS); - } - screen->DrawTexture(vis->pic, - viewwindowx + vispsprites[i].x1, - viewwindowy + viewheight / 2 - vis->texturemid * vis->yscale - 0.5, - DTA_DestWidthF, FIXED2DBL(vis->pic->GetWidth() * vis->xscale), - DTA_DestHeightF, vis->pic->GetHeight() * vis->yscale, - DTA_Translation, TranslationToTable(vis->Translation), - DTA_FlipX, flip, + screen->DrawTexture(sprite.pic, + viewwindowx + sprite.x1, + viewwindowy + viewheight / 2 - sprite.texturemid * sprite.yscale - 0.5, + DTA_DestWidthF, FIXED2DBL(sprite.pic->GetWidth() * sprite.xscale), + DTA_DestHeightF, sprite.pic->GetHeight() * sprite.yscale, + DTA_Translation, TranslationToTable(sprite.Translation), + DTA_FlipX, sprite.flip, DTA_TopOffset, 0, DTA_LeftOffset, 0, DTA_ClipLeft, viewwindowx, DTA_ClipTop, viewwindowy, DTA_ClipRight, viewwindowx + viewwidth, DTA_ClipBottom, viewwindowy + viewheight, - DTA_AlphaF, vis->Alpha, - DTA_RenderStyle, vis->RenderStyle, - DTA_FillColor, vis->FillColor, - DTA_SpecialColormap, special, - DTA_ColorOverlay, overlay.d, - DTA_ColormapStyle, usecolormapstyle ? &colormapstyle : NULL, + DTA_AlphaF, sprite.Alpha, + DTA_RenderStyle, sprite.RenderStyle, + DTA_FillColor, sprite.FillColor, + DTA_SpecialColormap, sprite.special, + DTA_ColorOverlay, sprite.overlay.d, + DTA_ColormapStyle, sprite.usecolormapstyle ? &sprite.colormapstyle : nullptr, TAG_DONE); } - vispspindex = 0; + AcceleratedSprites.Clear(); + } + + ///////////////////////////////////////////////////////////////////////// + + void NoAccelPlayerSprite::Render() + { + if (xscale == 0 || fabs(yscale) < (1.0f / 32000.0f)) + { // scaled to 0; can't see + return; + } + + R_SetColorMapLight(Light.BaseColormap, 0, Light.ColormapNum << FRACBITS); + + FDynamicColormap *basecolormap = static_cast(Light.BaseColormap); + + bool visible = R_SetPatchStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap); + + if (RenderStyle == LegacyRenderStyles[STYLE_Shaded]) + { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but + // it is the brightest one. We need to get back to the proper light level for + // this sprite. + R_SetColorMapLight(drawerargs::dc_fcolormap, 0, Light.ColormapNum << FRACBITS); + } + + if (!visible) + return; + + double spryscale = yscale; + bool sprflipvert = false; + fixed_t iscale = FLOAT2FIXED(1 / yscale); + + double sprtopscreen; + if (renderflags & RF_YFLIP) + { + sprflipvert = true; + spryscale = -spryscale; + iscale = -iscale; + sprtopscreen = CenterY + (texturemid - pic->GetHeight()) * spryscale; + } + else + { + sprflipvert = false; + sprtopscreen = CenterY - texturemid * spryscale; + } + + // clip to screen bounds + short *mfloorclip = screenheightarray; + short *mceilingclip = zeroarray; + + fixed_t frac = startfrac; + for (int x = x1; x < x2; x++) + { + R_DrawMaskedColumn(x, iscale, pic, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); + frac += xiscale; + } + + NetUpdate(); } } diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index 75a4e7ba67..b12bbd8d35 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -14,41 +14,83 @@ #pragma once #include "r_visiblesprite.h" -#include "r_sprite.h" +#include "r_data/colormaps.h" class DPSprite; namespace swrenderer { - class RenderPlayerSprite + class NoAccelPlayerSprite { public: - static void SetupSpriteScale(); + short x1 = 0; + short x2 = 0; - static void RenderPlayerSprites(); - static void RenderRemainingPlayerSprites(); + double texturemid = 0.0; + + fixed_t xscale = 0; + float yscale = 0.0f; + + FTexture *pic = nullptr; + + fixed_t xiscale = 0; + fixed_t startfrac = 0; + + float Alpha = 0.0f; + FRenderStyle RenderStyle; + uint32_t Translation = 0; + uint32_t FillColor = 0; + + ColormapLight Light; + + short renderflags = 0; + + void Render(); + }; + + class HWAccelPlayerSprite + { + public: + FTexture *pic = nullptr; + double texturemid = 0.0; + float yscale = 0.0f; + fixed_t xscale = 0; + + float Alpha = 0.0f; + FRenderStyle RenderStyle; + uint32_t Translation = 0; + uint32_t FillColor = 0; + + FDynamicColormap *basecolormap = nullptr; + int x1 = 0; + + bool flip = false; + FSpecialColormap *special = nullptr; + PalEntry overlay = 0; + FColormapStyle colormapstyle; + bool usecolormapstyle = false; + }; + + class RenderPlayerSprites + { + public: + static RenderPlayerSprites *Instance(); + + void SetupSpriteScale(); + + void Render(); + void RenderRemaining(); private: - static void Render(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade, FDynamicColormap *basecolormap); + void RenderSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade, FDynamicColormap *basecolormap, bool foggy); enum { BASEXCENTER = 160 }; enum { BASEYCENTER = 100 }; - // Used to store a psprite's drawing information if it needs to be drawn later. - struct vispsp_t - { - RenderSprite *vis; - FDynamicColormap *basecolormap; - int x1; - }; + TArray AcceleratedSprites; - static TArray vispsprites; - static unsigned int vispspindex; - - static double pspritexscale; - static double pspritexiscale; - static double pspriteyscale; - - static TArray avis; + double pspritexscale = 0.0; + double pspritexiscale = 0.0; + double pspriteyscale = 0.0; }; } diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index d237348f49..1c91c75b05 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -201,7 +201,6 @@ namespace swrenderer vis->Alpha = float(thing->Alpha); vis->fakefloor = fakefloor; vis->fakeceiling = fakeceiling; - vis->ColormapNum = 0; //vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; //vis->bSplitSprite = false; @@ -224,7 +223,7 @@ namespace swrenderer bool fullbright = !vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; - vis->SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); + vis->Light.SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); VisibleSpriteList::Instance()->Push(vis); } @@ -237,7 +236,6 @@ namespace swrenderer FTexture *tex; int x2; fixed_t xiscale; - bool ispsprite = (!vis->sector && vis->gpos != FVector3(0, 0, 0)); double spryscale, sprtopscreen; bool sprflipvert; @@ -248,9 +246,9 @@ namespace swrenderer } fixed_t centeryfrac = FLOAT2FIXED(CenterY); - R_SetColorMapLight(vis->BaseColormap, 0, vis->ColormapNum << FRACBITS); + R_SetColorMapLight(vis->Light.BaseColormap, 0, vis->Light.ColormapNum << FRACBITS); - FDynamicColormap *basecolormap = static_cast(vis->BaseColormap); + FDynamicColormap *basecolormap = static_cast(vis->Light.BaseColormap); bool visible = R_SetPatchStyle(vis->RenderStyle, vis->Alpha, vis->Translation, vis->FillColor, basecolormap); @@ -258,7 +256,7 @@ namespace swrenderer { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - R_SetColorMapLight(drawerargs::dc_fcolormap, 0, vis->ColormapNum << FRACBITS); + R_SetColorMapLight(drawerargs::dc_fcolormap, 0, vis->Light.ColormapNum << FRACBITS); } if (visible) @@ -292,7 +290,7 @@ namespace swrenderer { while (x < x2) { - if (ispsprite || !RenderTranslucentPass::ClipSpriteColumnWithPortals(x, vis)) + if (!RenderTranslucentPass::ClipSpriteColumnWithPortals(x, vis)) R_DrawMaskedColumn(x, iscale, tex, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); x++; frac += xiscale; diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 5007cd10d1..73cf909f45 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -52,8 +52,8 @@ namespace swrenderer int r1, r2; short topclip, botclip; short *clip1, *clip2; - FSWColormap *colormap = spr->BaseColormap; - int colormapnum = spr->ColormapNum; + FSWColormap *colormap = spr->Light.BaseColormap; + int colormapnum = spr->Light.ColormapNum; F3DFloor *rover; Clip3DFloors *clip3d = Clip3DFloors::Instance(); @@ -134,7 +134,7 @@ namespace swrenderer int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(spr->foggy)); - SetColormap(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade, mybasecolormap, isFullBright, invertcolormap, fadeToBlack); + Light.SetColormap(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade, mybasecolormap, isFullBright, invertcolormap, fadeToBlack); } } @@ -259,8 +259,8 @@ namespace swrenderer if (topclip >= botclip) { - spr->BaseColormap = colormap; - spr->ColormapNum = colormapnum; + spr->Light.BaseColormap = colormap; + spr->Light.ColormapNum = colormapnum; return; } @@ -386,8 +386,8 @@ namespace swrenderer } if (i == x2) { - spr->BaseColormap = colormap; - spr->ColormapNum = colormapnum; + spr->Light.BaseColormap = colormap; + spr->Light.ColormapNum = colormapnum; return; } } @@ -405,11 +405,11 @@ namespace swrenderer int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); spr->Render(cliptop, clipbot, minvoxely, maxvoxely); } - spr->BaseColormap = colormap; - spr->ColormapNum = colormapnum; + spr->Light.BaseColormap = colormap; + spr->Light.ColormapNum = colormapnum; } - void VisibleSprite::SetColormap(double visibility, int shade, FDynamicColormap *basecolormap, bool fullbright, bool invertColormap, bool fadeToBlack) + void ColormapLight::SetColormap(double visibility, int shade, FDynamicColormap *basecolormap, bool fullbright, bool invertColormap, bool fadeToBlack) { if (fadeToBlack) { diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index bfe8a84e0b..edc83667eb 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -23,6 +23,15 @@ struct FSWColormap; namespace swrenderer { + class ColormapLight + { + public: + int ColormapNum = 0; + FSWColormap *BaseColormap = nullptr; + + void SetColormap(double visibility, int shade, FDynamicColormap *basecolormap, bool fullbright, bool invertColormap, bool fadeToBlack); + }; + class VisibleSprite { public: @@ -38,8 +47,6 @@ namespace swrenderer float SortDist() const { return idepth; } protected: - void SetColormap(double visibility, int shade, FDynamicColormap *basecolormap, bool fullbright, bool invertColormap, bool fadeToBlack); - virtual bool IsParticle() const { return false; } virtual bool IsVoxel() const { return false; } virtual bool IsWallSprite() const { return false; } @@ -65,9 +72,7 @@ namespace swrenderer FVector3 gpos = { 0.0f, 0.0f, 0.0f }; // origin in world coordinates sector_t *sector = nullptr; // sector this sprite is in - // Light shared calculation? - int ColormapNum = 0; // Which colormap is rendered - FSWColormap *BaseColormap = nullptr; // Base colormap used together with ColormapNum + ColormapLight Light; float Alpha = 0.0f; FRenderStyle RenderStyle; bool foggy = false; diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 7706da69b2..a4b44e0087 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -149,7 +149,7 @@ namespace swrenderer vis->Alpha = float(thing->Alpha); vis->fakefloor = fakefloor; vis->fakeceiling = fakeceiling; - vis->ColormapNum = 0; + vis->Light.ColormapNum = 0; //vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; //vis->bSplitSprite = false; @@ -175,7 +175,7 @@ namespace swrenderer bool fullbright = !vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; - vis->SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); + vis->Light.SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); VisibleSpriteList::Instance()->Push(vis); RenderTranslucentPass::DrewAVoxel = true; @@ -185,9 +185,9 @@ namespace swrenderer { auto sprite = this; - FDynamicColormap *basecolormap = static_cast(sprite->BaseColormap); + FDynamicColormap *basecolormap = static_cast(sprite->Light.BaseColormap); - R_SetColorMapLight(sprite->BaseColormap, 0, sprite->ColormapNum << FRACBITS); + R_SetColorMapLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); bool visible = R_SetPatchStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); if (!visible) diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 44477c1e14..76691e07d2 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -132,7 +132,7 @@ namespace swrenderer vis->wallc = wallc; vis->foggy = foggy; - vis->SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, false, false, false); + vis->Light.SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, false, false, false); VisibleSpriteList::Instance()->Push(vis); } @@ -165,7 +165,7 @@ namespace swrenderer } // Prepare lighting bool calclighting = false; - FSWColormap *usecolormap = spr->BaseColormap; + FSWColormap *usecolormap = spr->Light.BaseColormap; bool rereadcolormap = true; // Decals that are added to the scene must fade to black. @@ -206,14 +206,14 @@ namespace swrenderer int x = x1; - FDynamicColormap *basecolormap = static_cast(spr->BaseColormap); + FDynamicColormap *basecolormap = static_cast(spr->Light.BaseColormap); bool visible = R_SetPatchStyle(spr->RenderStyle, spr->Alpha, spr->Translation, spr->FillColor, basecolormap); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) { - usecolormap = spr->BaseColormap; + usecolormap = spr->Light.BaseColormap; } if (!visible) From ca8f71b5612f13451d5f9a87e5a66edd88adf5f6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 24 Jan 2017 04:19:43 +0100 Subject: [PATCH 745/912] Remove the need for RenderPlayerSprites::SetupSpriteScale --- src/swrenderer/scene/r_viewport.cpp | 3 --- src/swrenderer/things/r_playersprite.cpp | 11 ++++------- src/swrenderer/things/r_playersprite.h | 6 ------ 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index c925094923..d2df38f221 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -113,9 +113,6 @@ namespace swrenderer WallTMapScale2 = IYaspectMul / CenterX; - // psprite scales - RenderPlayerSprites::Instance()->SetupSpriteScale(); - // thing clipping fillshort(screenheightarray, viewwidth, (short)viewheight); diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 69da6b9477..649df88586 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -71,13 +71,6 @@ namespace swrenderer return &instance; } - void RenderPlayerSprites::SetupSpriteScale() - { - pspritexscale = centerxwide / 160.0; - pspriteyscale = pspritexscale * YaspectMul; - pspritexiscale = 1 / pspritexscale; - } - void RenderPlayerSprites::Render() { int i; @@ -248,6 +241,10 @@ namespace swrenderer sy += wy; } + double pspritexscale = centerxwide / 160.0; + double pspriteyscale = pspritexscale * YaspectMul; + double pspritexiscale = 1 / pspritexscale; + // calculate edges of the shape tx = sx - BASEXCENTER; diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index b12bbd8d35..7c821f7425 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -76,8 +76,6 @@ namespace swrenderer public: static RenderPlayerSprites *Instance(); - void SetupSpriteScale(); - void Render(); void RenderRemaining(); @@ -88,9 +86,5 @@ namespace swrenderer enum { BASEYCENTER = 100 }; TArray AcceleratedSprites; - - double pspritexscale = 0.0; - double pspritexiscale = 0.0; - double pspriteyscale = 0.0; }; } From f94cced13d53ea7e860d6ab48da06ff965c8effa Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 24 Jan 2017 04:24:04 +0100 Subject: [PATCH 746/912] Move ColormapLight to r_light --- src/swrenderer/scene/r_light.cpp | 44 +++++++++++++++++++++++ src/swrenderer/scene/r_light.h | 11 ++++++ src/swrenderer/things/r_visiblesprite.cpp | 42 ---------------------- src/swrenderer/things/r_visiblesprite.h | 12 +------ 4 files changed, 56 insertions(+), 53 deletions(-) diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index a1bcc75ded..028927c5b5 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -172,4 +172,48 @@ namespace swrenderer Printf("Visibility cannot be changed in net games.\n"); } } + + ///////////////////////////////////////////////////////////////////////// + + void ColormapLight::SetColormap(double visibility, int shade, FDynamicColormap *basecolormap, bool fullbright, bool invertColormap, bool fadeToBlack) + { + if (fadeToBlack) + { + if (invertColormap) // Fade to white + { + basecolormap = GetSpecialLights(basecolormap->Color, MAKERGB(255, 255, 255), basecolormap->Desaturate); + invertColormap = false; + } + else // Fade to black + { + basecolormap = GetSpecialLights(basecolormap->Color, MAKERGB(0, 0, 0), basecolormap->Desaturate); + } + } + + if (invertColormap) + { + basecolormap = GetSpecialLights(basecolormap->Color, basecolormap->Fade.InverseColor(), basecolormap->Desaturate); + } + + if (fixedcolormap) + { + BaseColormap = fixedcolormap; + ColormapNum = 0; + } + else if (fixedlightlev >= 0) + { + BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap; + ColormapNum = fixedlightlev >> COLORMAPSHIFT; + } + else if (fullbright) + { + BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap; + ColormapNum = 0; + } + else + { + BaseColormap = basecolormap; + ColormapNum = GETPALOOKUP(visibility, shade); + } + } } diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index 26469b37c8..44f1b49f52 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -49,6 +49,8 @@ // Converts fixedlightlev into a shade value #define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) +struct FSWColormap; + namespace swrenderer { extern double r_BaseVisibility; @@ -67,4 +69,13 @@ namespace swrenderer double R_GetVisibility(); void R_SetupColormap(AActor *actor); + + class ColormapLight + { + public: + int ColormapNum = 0; + FSWColormap *BaseColormap = nullptr; + + void SetColormap(double visibility, int shade, FDynamicColormap *basecolormap, bool fullbright, bool invertColormap, bool fadeToBlack); + }; } diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 73cf909f45..0f1088d809 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -408,46 +408,4 @@ namespace swrenderer spr->Light.BaseColormap = colormap; spr->Light.ColormapNum = colormapnum; } - - void ColormapLight::SetColormap(double visibility, int shade, FDynamicColormap *basecolormap, bool fullbright, bool invertColormap, bool fadeToBlack) - { - if (fadeToBlack) - { - if (invertColormap) // Fade to white - { - basecolormap = GetSpecialLights(basecolormap->Color, MAKERGB(255, 255, 255), basecolormap->Desaturate); - invertColormap = false; - } - else // Fade to black - { - basecolormap = GetSpecialLights(basecolormap->Color, MAKERGB(0, 0, 0), basecolormap->Desaturate); - } - } - - if (invertColormap) - { - basecolormap = GetSpecialLights(basecolormap->Color, basecolormap->Fade.InverseColor(), basecolormap->Desaturate); - } - - if (fixedcolormap) - { - BaseColormap = fixedcolormap; - ColormapNum = 0; - } - else if (fixedlightlev >= 0) - { - BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap; - ColormapNum = fixedlightlev >> COLORMAPSHIFT; - } - else if (fullbright) - { - BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap; - ColormapNum = 0; - } - else - { - BaseColormap = basecolormap; - ColormapNum = GETPALOOKUP(visibility, shade); - } - } } diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index edc83667eb..87abe767a4 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -14,24 +14,14 @@ #pragma once #include "swrenderer/line/r_line.h" +#include "swrenderer/scene/r_light.h" #include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/things/r_visiblespritelist.h" #define MINZ double((2048*4) / double(1 << 20)) -struct FSWColormap; - namespace swrenderer { - class ColormapLight - { - public: - int ColormapNum = 0; - FSWColormap *BaseColormap = nullptr; - - void SetColormap(double visibility, int shade, FDynamicColormap *basecolormap, bool fullbright, bool invertColormap, bool fadeToBlack); - }; - class VisibleSprite { public: From 946ab93ff6e066f99d884058f1fde9be45c6da41 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 24 Jan 2017 05:00:11 +0100 Subject: [PATCH 747/912] Remove unused friend declaration --- src/swrenderer/things/r_sprite.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/swrenderer/things/r_sprite.h b/src/swrenderer/things/r_sprite.h index 86462786ee..b6298d571b 100644 --- a/src/swrenderer/things/r_sprite.h +++ b/src/swrenderer/things/r_sprite.h @@ -32,7 +32,5 @@ namespace swrenderer uint32_t Translation = 0; uint32_t FillColor = 0; - - friend class RenderPlayerSprite; // To do: detach sprite from playersprite! }; } From 12271cbfb5c445f8959c0cc9f416710057020bc0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 24 Jan 2017 05:31:39 +0100 Subject: [PATCH 748/912] Remove the 1000 portal segment limit and make WallPortals private to RenderPortal --- src/swrenderer/line/r_line.cpp | 26 +------------------- src/swrenderer/scene/r_portal.cpp | 10 ++++++-- src/swrenderer/scene/r_portal.h | 3 +++ src/swrenderer/scene/r_scene.cpp | 1 - src/swrenderer/segments/r_portalsegment.cpp | 27 ++++++++++++++++++++- src/swrenderer/segments/r_portalsegment.h | 20 +++++++-------- 6 files changed, 48 insertions(+), 39 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 95adff8aa2..077daa72a7 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -601,31 +601,7 @@ namespace swrenderer if (rw_markportal) { - PortalDrawseg pds; - pds.src = curline->linedef; - pds.dst = curline->linedef->special == Line_Mirror ? curline->linedef : curline->linedef->getPortalDestination(); - pds.x1 = draw_segment->x1; - pds.x2 = draw_segment->x2; - pds.len = pds.x2 - pds.x1; - pds.ceilingclip.Resize(pds.len); - memcpy(&pds.ceilingclip[0], draw_segment->sprtopclip, pds.len * sizeof(short)); - pds.floorclip.Resize(pds.len); - memcpy(&pds.floorclip[0], draw_segment->sprbottomclip, pds.len * sizeof(short)); - - for (int i = 0; i < pds.x2 - pds.x1; i++) - { - if (pds.ceilingclip[i] < 0) - pds.ceilingclip[i] = 0; - if (pds.ceilingclip[i] >= viewheight) - pds.ceilingclip[i] = viewheight - 1; - if (pds.floorclip[i] < 0) - pds.floorclip[i] = 0; - if (pds.floorclip[i] >= viewheight) - pds.floorclip[i] = viewheight - 1; - } - - pds.mirror = curline->linedef->special == Line_Mirror; - WallPortals.Push(pds); + RenderPortal::Instance()->AddLinePortal(curline->linedef, draw_segment->x1, draw_segment->x2, draw_segment->sprtopclip, draw_segment->sprbottomclip); } return (clip3d->fake3D & FAKE3D_FAKEMASK) == 0; diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 4d18bcfd4b..82e72ab456 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -279,7 +279,7 @@ namespace swrenderer size_t lastportal = WallPortals.Size(); for (unsigned int i = 0; i < lastportal; i++) { - RenderLinePortal(&WallPortals[i], 0); + RenderLinePortal(WallPortals[i], 0); } CurrentPortal = nullptr; @@ -449,7 +449,7 @@ namespace swrenderer unsigned int portalsAtEnd = WallPortals.Size(); for (; portalsAtStart < portalsAtEnd; portalsAtStart++) { - RenderLinePortal(&WallPortals[portalsAtStart], depth + 1); + RenderLinePortal(WallPortals[portalsAtStart], depth + 1); } int prevuniq2 = CurrentPortalUniq; CurrentPortalUniq = prevuniq; @@ -532,6 +532,12 @@ namespace swrenderer MirrorFlags = 0; CurrentPortal = nullptr; CurrentPortalUniq = 0; + WallPortals.Clear(); + } + + void RenderPortal::AddLinePortal(line_t *linedef, int x1, int x2, const short *topclip, const short *bottomclip) + { + WallPortals.Push(RenderMemory::NewObject(linedef, x1, x2, topclip, bottomclip)); } } diff --git a/src/swrenderer/scene/r_portal.h b/src/swrenderer/scene/r_portal.h index 74bd4e3490..f62cab3eb0 100644 --- a/src/swrenderer/scene/r_portal.h +++ b/src/swrenderer/scene/r_portal.h @@ -29,6 +29,8 @@ namespace swrenderer void RenderPlanePortals(); void RenderLinePortals(); + + void AddLinePortal(line_t *linedef, int x1, int x2, const short *topclip, const short *bottomclip); int WindowLeft = 0; int WindowRight = 0; @@ -57,5 +59,6 @@ namespace swrenderer TArray drawsegStack; TArray viewposStack; TArray visplaneStack; + TArray WallPortals; }; } diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 659505f0da..be23aae322 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -187,7 +187,6 @@ namespace swrenderer NetUpdate(); } - WallPortals.Clear(); interpolator.RestoreInterpolations(); // If we don't want shadered colormaps, NULL it now so that the diff --git a/src/swrenderer/segments/r_portalsegment.cpp b/src/swrenderer/segments/r_portalsegment.cpp index 302dbe3c9c..dbc8df9768 100644 --- a/src/swrenderer/segments/r_portalsegment.cpp +++ b/src/swrenderer/segments/r_portalsegment.cpp @@ -31,8 +31,33 @@ #include "po_man.h" #include "r_data/colormaps.h" #include "swrenderer/segments/r_portalsegment.h" +#include "swrenderer/r_memory.h" namespace swrenderer { - TArray WallPortals(1000); // note: this array needs to go away as reallocation can cause crashes. + PortalDrawseg::PortalDrawseg(line_t *linedef, int x1, int x2, const short *topclip, const short *bottomclip) : x1(x1), x2(x2) + { + src = linedef; + dst = linedef->special == Line_Mirror ? linedef : linedef->getPortalDestination(); + len = x2 - x1; + + ceilingclip = RenderMemory::AllocMemory(len); + floorclip = RenderMemory::AllocMemory(len); + memcpy(ceilingclip, topclip, len * sizeof(short)); + memcpy(floorclip, bottomclip, len * sizeof(short)); + + for (int i = 0; i < x2 - x1; i++) + { + if (ceilingclip[i] < 0) + ceilingclip[i] = 0; + if (ceilingclip[i] >= viewheight) + ceilingclip[i] = viewheight - 1; + if (floorclip[i] < 0) + floorclip[i] = 0; + if (floorclip[i] >= viewheight) + floorclip[i] = viewheight - 1; + } + + mirror = linedef->special == Line_Mirror; + } } diff --git a/src/swrenderer/segments/r_portalsegment.h b/src/swrenderer/segments/r_portalsegment.h index f1d757276e..9c6f644dfb 100644 --- a/src/swrenderer/segments/r_portalsegment.h +++ b/src/swrenderer/segments/r_portalsegment.h @@ -18,18 +18,18 @@ namespace swrenderer /* portal structure, this is used in r_ code in order to store drawsegs with portals (and mirrors) */ struct PortalDrawseg { - line_t* src; // source line (the one drawn) this doesn't change over render loops - line_t* dst; // destination line (the one that the portal is linked with, equals 'src' for mirrors) + PortalDrawseg(line_t *linedef, int x1, int x2, const short *topclip, const short *bottomclip); - int x1; // drawseg x1 - int x2; // drawseg x2 + line_t* src = nullptr; // source line (the one drawn) this doesn't change over render loops + line_t* dst = nullptr; // destination line (the one that the portal is linked with, equals 'src' for mirrors) - int len; - TArray ceilingclip; - TArray floorclip; + int x1 = 0; // drawseg x1 + int x2 = 0; // drawseg x2 - bool mirror; // true if this is a mirror (src should equal dst) + int len = 0; + short *ceilingclip = nullptr; + short *floorclip = nullptr; + + bool mirror = false; // true if this is a mirror (src should equal dst) }; - - extern TArray WallPortals; } From ac74a7a1e0bd24f69ca36d7775f6a6dc56634441 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 24 Jan 2017 06:50:17 +0100 Subject: [PATCH 749/912] Refactor wall setup into ProjectedWallLine and ProjectWallTexcoords --- src/swrenderer/line/r_line.cpp | 91 +++++++++-------------- src/swrenderer/line/r_line.h | 5 +- src/swrenderer/line/r_walldraw.cpp | 34 ++++----- src/swrenderer/line/r_wallsetup.cpp | 58 +++++++-------- src/swrenderer/line/r_wallsetup.h | 42 ++++++++--- src/swrenderer/segments/r_drawsegment.cpp | 66 ++++++++-------- src/swrenderer/things/r_decal.cpp | 20 ++--- src/swrenderer/things/r_wallsprite.cpp | 8 +- 8 files changed, 164 insertions(+), 160 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 077daa72a7..d702813606 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -171,12 +171,12 @@ namespace swrenderer if (rw_frontcz1 > rw_backcz1 || rw_frontcz2 > rw_backcz2) { rw_havehigh = true; - R_CreateWallSegmentYSloped(wallupper, backsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + wallupper.Project(backsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); } if (rw_frontfz1 < rw_backfz1 || rw_frontfz2 < rw_backfz2) { rw_havelow = true; - R_CreateWallSegmentYSloped(walllower, backsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + walllower.Project(backsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); } // Portal @@ -272,30 +272,13 @@ namespace swrenderer if (line->linedef->special == Line_Horizon) { // Be aware: Line_Horizon does not work properly with sloped planes - fillshort(walltop + WallC.sx1, WallC.sx2 - WallC.sx1, centery); - fillshort(wallbottom + WallC.sx1, WallC.sx2 - WallC.sx1, centery); + fillshort(walltop.ScreenY + WallC.sx1, WallC.sx2 - WallC.sx1, centery); + fillshort(wallbottom.ScreenY + WallC.sx1, WallC.sx2 - WallC.sx1, centery); } else { - rw_ceilstat = R_CreateWallSegmentYSloped(walltop, frontsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); - rw_floorstat = R_CreateWallSegmentYSloped(wallbottom, frontsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); - - // [RH] treat off-screen walls as solid -#if 0 // Maybe later... - if (!solid) - { - if (rw_ceilstat == 12 && line->sidedef->GetTexture(side_t::top) != 0) - { - rw_mustmarkceiling = true; - solid = true; - } - if (rw_floorstat == 3 && line->sidedef->GetTexture(side_t::bottom) != 0) - { - rw_mustmarkfloor = true; - solid = true; - } - } -#endif + rw_ceilstat = walltop.Project(frontsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + rw_floorstat = wallbottom.Project(frontsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); } static SWRenderLine *self = this; @@ -455,8 +438,8 @@ namespace swrenderer // allocate space for masked texture tables, if needed // [RH] Don't just allocate the space; fill it in too. if ((TexMan(sidedef->GetTexture(side_t::mid), true)->UseType != FTexture::TEX_Null || draw_segment->bFakeBoundary || IsFogBoundary(frontsector, backsector)) && - (rw_ceilstat != 12 || !sidedef->GetTexture(side_t::top).isValid()) && - (rw_floorstat != 3 || !sidedef->GetTexture(side_t::bottom).isValid()) && + (rw_ceilstat != ProjectedWallCull::OutsideBelow || !sidedef->GetTexture(side_t::top).isValid()) && + (rw_floorstat != ProjectedWallCull::OutsideAbove || !sidedef->GetTexture(side_t::bottom).isValid()) && (WallC.sz1 >= TOO_CLOSE_Z && WallC.sz2 >= TOO_CLOSE_Z)) { float *swal; @@ -491,8 +474,8 @@ namespace swrenderer for (i = start; i < stop; i++) { - *lwal++ = lwall[i] + xoffset; - *swal++ = swall[i]; + *lwal++ = walltexcoords.UPos[i] + xoffset; + *swal++ = walltexcoords.VStep[i]; } double istart = draw_segment->swall[0] * yscale; @@ -696,7 +679,7 @@ namespace swrenderer { if (rw_havehigh) { // front ceiling is above back ceiling - memcpy(&walltop[WallC.sx1], &wallupper[WallC.sx1], (WallC.sx2 - WallC.sx1) * sizeof(walltop[0])); + memcpy(&walltop.ScreenY[WallC.sx1], &wallupper.ScreenY[WallC.sx1], (WallC.sx2 - WallC.sx1) * sizeof(walltop.ScreenY[0])); rw_havehigh = false; } else if (rw_havelow && frontsector->ceilingplane != backsector->ceilingplane) @@ -707,7 +690,7 @@ namespace swrenderer // Recalculate walltop so that the wall is clipped by the back sector's // ceiling instead of the front sector's ceiling. RenderPortal *renderportal = RenderPortal::Instance(); - R_CreateWallSegmentYSloped(walltop, backsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + walltop.Project(backsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); } // Putting sky ceilings on the front and back of a line alters the way unpegged // positioning works. @@ -916,7 +899,7 @@ namespace swrenderer bottomtexture ? (bottomtexture->Scale.X * sidedef->GetTextureXScale(side_t::bottom)) : 1.; - PrepWall(swall, lwall, sidedef->TexelLength * lwallscale, WallC.sx1, WallC.sx2, WallT); + walltexcoords.Project(sidedef->TexelLength * lwallscale, WallC.sx1, WallC.sx2, WallT); if (fixedcolormap == NULL && fixedlightlev < 0) { @@ -959,13 +942,13 @@ namespace swrenderer auto floorclip = RenderOpaquePass::Instance()->floorclip; for (x = x1; x < x2; ++x) { - if (walltop[x] < ceilingclip[x]) + if (walltop.ScreenY[x] < ceilingclip[x]) { - walltop[x] = ceilingclip[x]; + walltop.ScreenY[x] = ceilingclip[x]; } - if (wallbottom[x] > floorclip[x]) + if (wallbottom.ScreenY[x] > floorclip[x]) { - wallbottom[x] = floorclip[x]; + wallbottom.ScreenY[x] = floorclip[x]; } } @@ -977,7 +960,7 @@ namespace swrenderer for (x = x1; x < x2; ++x) { short top = (clip3d->fakeFloor && clip3d->fake3D & FAKE3D_FAKECEILING) ? clip3d->fakeFloor->ceilingclip[x] : ceilingclip[x]; - short bottom = MIN(walltop[x], floorclip[x]); + short bottom = MIN(walltop.ScreenY[x], floorclip[x]); if (top < bottom) { ceilingplane->top[x] = top; @@ -991,7 +974,7 @@ namespace swrenderer { for (x = x1; x < x2; ++x) { - short top = MAX(wallbottom[x], ceilingclip[x]); + short top = MAX(wallbottom.ScreenY[x], ceilingclip[x]); short bottom = (clip3d->fakeFloor && clip3d->fake3D & FAKE3D_FAKEFLOOR) ? clip3d->fakeFloor->floorclip[x] : floorclip[x]; if (top < bottom) { @@ -1007,27 +990,27 @@ namespace swrenderer { if (clip3d->fake3D & FAKE3D_CLIPBOTFRONT) { - memcpy(clip3d->fakeFloor->floorclip + x1, wallbottom + x1, (x2 - x1) * sizeof(short)); + memcpy(clip3d->fakeFloor->floorclip + x1, wallbottom.ScreenY + x1, (x2 - x1) * sizeof(short)); } else { for (x = x1; x < x2; ++x) { - walllower[x] = MIN(MAX(walllower[x], ceilingclip[x]), wallbottom[x]); + walllower.ScreenY[x] = MIN(MAX(walllower.ScreenY[x], ceilingclip[x]), wallbottom.ScreenY[x]); } - memcpy(clip3d->fakeFloor->floorclip + x1, walllower + x1, (x2 - x1) * sizeof(short)); + memcpy(clip3d->fakeFloor->floorclip + x1, walllower.ScreenY + x1, (x2 - x1) * sizeof(short)); } if (clip3d->fake3D & FAKE3D_CLIPTOPFRONT) { - memcpy(clip3d->fakeFloor->ceilingclip + x1, walltop + x1, (x2 - x1) * sizeof(short)); + memcpy(clip3d->fakeFloor->ceilingclip + x1, walltop.ScreenY + x1, (x2 - x1) * sizeof(short)); } else { for (x = x1; x < x2; ++x) { - wallupper[x] = MAX(MIN(wallupper[x], floorclip[x]), walltop[x]); + wallupper.ScreenY[x] = MAX(MIN(wallupper.ScreenY[x], floorclip[x]), walltop.ScreenY[x]); } - memcpy(clip3d->fakeFloor->ceilingclip + x1, wallupper + x1, (x2 - x1) * sizeof(short)); + memcpy(clip3d->fakeFloor->ceilingclip + x1, wallupper.ScreenY + x1, (x2 - x1) * sizeof(short)); } } if (clip3d->fake3D & FAKE3D_FAKEMASK) return; @@ -1044,7 +1027,7 @@ namespace swrenderer yscale = rw_pic->Scale.Y * rw_midtexturescaley; if (xscale != lwallscale) { - PrepLWall(lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); + walltexcoords.ProjectPos(curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } if (midtexture->bWorldPanning) @@ -1059,7 +1042,7 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallbottom, rw_midtexturemid, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, rw_midtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } fillshort(ceilingclip + x1, x2 - x1, viewheight); fillshort(floorclip + x1, x2 - x1, 0xffff); @@ -1070,7 +1053,7 @@ namespace swrenderer { // top wall for (x = x1; x < x2; ++x) { - wallupper[x] = MAX(MIN(wallupper[x], floorclip[x]), walltop[x]); + wallupper.ScreenY[x] = MAX(MIN(wallupper.ScreenY[x], floorclip[x]), walltop.ScreenY[x]); } if (viewactive) { @@ -1079,7 +1062,7 @@ namespace swrenderer yscale = rw_pic->Scale.Y * rw_toptexturescaley; if (xscale != lwallscale) { - PrepLWall(lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); + walltexcoords.ProjectPos(curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } if (toptexture->bWorldPanning) @@ -1094,13 +1077,13 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop, wallupper, rw_toptexturemid, swall, lwall, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, rw_toptexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } - memcpy(ceilingclip + x1, wallupper + x1, (x2 - x1) * sizeof(short)); + memcpy(ceilingclip + x1, wallupper.ScreenY + x1, (x2 - x1) * sizeof(short)); } else if (markceiling) { // no top wall - memcpy(ceilingclip + x1, walltop + x1, (x2 - x1) * sizeof(short)); + memcpy(ceilingclip + x1, walltop.ScreenY + x1, (x2 - x1) * sizeof(short)); } @@ -1108,7 +1091,7 @@ namespace swrenderer { // bottom wall for (x = x1; x < x2; ++x) { - walllower[x] = MIN(MAX(walllower[x], ceilingclip[x]), wallbottom[x]); + walllower.ScreenY[x] = MIN(MAX(walllower.ScreenY[x], ceilingclip[x]), wallbottom.ScreenY[x]); } if (viewactive) { @@ -1117,7 +1100,7 @@ namespace swrenderer yscale = rw_pic->Scale.Y * rw_bottomtexturescaley; if (xscale != lwallscale) { - PrepLWall(lwall, curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); + walltexcoords.ProjectPos(curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } if (bottomtexture->bWorldPanning) @@ -1132,13 +1115,13 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walllower, wallbottom, rw_bottomtexturemid, swall, lwall, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, rw_bottomtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } - memcpy(floorclip + x1, walllower + x1, (x2 - x1) * sizeof(short)); + memcpy(floorclip + x1, walllower.ScreenY + x1, (x2 - x1) * sizeof(short)); } else if (markfloor) { // no bottom wall - memcpy(floorclip + x1, wallbottom + x1, (x2 - x1) * sizeof(short)); + memcpy(floorclip + x1, wallbottom.ScreenY + x1, (x2 - x1) * sizeof(short)); } } rw_offset = xoffset; diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index cb48eb4bbd..51ec10787e 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -14,6 +14,7 @@ #pragma once #include "vectors.h" +#include "r_wallsetup.h" struct seg_t; struct subsector_t; @@ -85,7 +86,7 @@ namespace swrenderer fixed_t rw_offset_mid; fixed_t rw_offset_bottom; - int rw_ceilstat, rw_floorstat; + ProjectedWallCull rw_ceilstat, rw_floorstat; bool rw_mustmarkfloor, rw_mustmarkceiling; bool rw_prepped; bool rw_markportal; @@ -120,5 +121,7 @@ namespace swrenderer bool foggy; FDynamicColormap *basecolormap; + + double lwallscale; }; } diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 43aaa38bcd..d6c475a81c 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -421,11 +421,11 @@ namespace swrenderer static void ProcessStripedWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) { - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + ProjectedWallLine most1, most2, most3; short *up, *down; up = uwal; - down = most1; + down = most1.ScreenY; assert(WallC.sx1 <= x1); assert(WallC.sx2 >= x2); @@ -435,16 +435,16 @@ namespace swrenderer // kg3D - fake floors instead of zdoom light list for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) { - int j = R_CreateWallSegmentYSloped(most3, frontsector->e->XFloor.lightlist[i].plane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); - if (j != 3) + ProjectedWallCull j = most3.Project(frontsector->e->XFloor.lightlist[i].plane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + if (j != ProjectedWallCull::OutsideAbove) { for (int j = x1; j < x2; ++j) { - down[j] = clamp(most3[j], up[j], dwal[j]); + down[j] = clamp(most3.ScreenY[j], up[j], dwal[j]); } ProcessNormalWall(WallC, x1, x2, up, down, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); up = down; - down = (down == most1) ? most2 : most1; + down = (down == most1.ScreenY) ? most2.ScreenY : most1.ScreenY; } lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; @@ -494,7 +494,7 @@ namespace swrenderer static void ProcessWallNP2(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) { - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + ProjectedWallLine most1, most2, most3; short *up, *down; double texheight = rw_pic->GetHeight(); double partition; @@ -508,20 +508,20 @@ namespace swrenderer partition -= scaledtexheight; } up = uwal; - down = most1; + down = most1.ScreenY; texturemid = (partition - ViewPos.Z) * yrepeat + texheight; while (partition > bot) { - int j = R_CreateWallSegmentY(most3, partition - ViewPos.Z, &WallC); - if (j != 3) + ProjectedWallCull j = most3.Project(partition - ViewPos.Z, &WallC); + if (j != ProjectedWallCull::OutsideAbove) { for (int j = x1; j < x2; ++j) { - down[j] = clamp(most3[j], up[j], dwal[j]); + down[j] = clamp(most3.ScreenY[j], up[j], dwal[j]); } ProcessWall(frontsector, curline, WallC, x1, x2, up, down, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); up = down; - down = (down == most1) ? most2 : most1; + down = (down == most1.ScreenY) ? most2.ScreenY : most1.ScreenY; } partition -= scaledtexheight; texturemid -= texheight; @@ -531,21 +531,21 @@ namespace swrenderer else { // upside down: draw strips from bottom to top partition = bot - fmod(bot - texturemid / yrepeat - ViewPos.Z, scaledtexheight); - up = most1; + up = most1.ScreenY; down = dwal; texturemid = (partition - ViewPos.Z) * yrepeat + texheight; while (partition < top) { - int j = R_CreateWallSegmentY(most3, partition - ViewPos.Z, &WallC); - if (j != 12) + ProjectedWallCull j = most3.Project(partition - ViewPos.Z, &WallC); + if (j != ProjectedWallCull::OutsideBelow) { for (int j = x1; j < x2; ++j) { - up[j] = clamp(most3[j], uwal[j], down[j]); + up[j] = clamp(most3.ScreenY[j], uwal[j], down[j]); } ProcessWall(frontsector, curline, WallC, x1, x2, up, down, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); down = up; - up = (up == most1) ? most2 : most1; + up = (up == most1.ScreenY) ? most2.ScreenY : most1.ScreenY; } partition -= scaledtexheight; texturemid -= texheight; diff --git a/src/swrenderer/line/r_wallsetup.cpp b/src/swrenderer/line/r_wallsetup.cpp index e7a86700de..4c34f3483c 100644 --- a/src/swrenderer/line/r_wallsetup.cpp +++ b/src/swrenderer/line/r_wallsetup.cpp @@ -27,37 +27,35 @@ namespace swrenderer { - short walltop[MAXWIDTH]; - short wallbottom[MAXWIDTH]; - short wallupper[MAXWIDTH]; - short walllower[MAXWIDTH]; - float swall[MAXWIDTH]; - fixed_t lwall[MAXWIDTH]; - double lwallscale; + ProjectedWallLine walltop; + ProjectedWallLine wallbottom; + ProjectedWallLine wallupper; + ProjectedWallLine walllower; + ProjectedWallTexcoords walltexcoords; - int R_CreateWallSegmentY(short *outbuf, double z, const FWallCoords *wallc) + ProjectedWallCull ProjectedWallLine::Project(double z, const FWallCoords *wallc) { - return R_CreateWallSegmentY(outbuf, z, z, wallc); + return Project(z, z, wallc); } - int R_CreateWallSegmentY(short *outbuf, double z1, double z2, const FWallCoords *wallc) + ProjectedWallCull ProjectedWallLine::Project(double z1, double z2, const FWallCoords *wallc) { float y1 = (float)(CenterY - z1 * InvZtoScale / wallc->sz1); float y2 = (float)(CenterY - z2 * InvZtoScale / wallc->sz2); if (y1 < 0 && y2 < 0) // entire line is above screen { - memset(&outbuf[wallc->sx1], 0, (wallc->sx2 - wallc->sx1) * sizeof(outbuf[0])); - return 3; + memset(&ScreenY[wallc->sx1], 0, (wallc->sx2 - wallc->sx1) * sizeof(ScreenY[0])); + return ProjectedWallCull::OutsideAbove; } else if (y1 > viewheight && y2 > viewheight) // entire line is below screen { - fillshort(&outbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); - return 12; + fillshort(&ScreenY[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); + return ProjectedWallCull::OutsideBelow; } if (wallc->sx2 <= wallc->sx1) - return 0; + return ProjectedWallCull::Visible; float rcp_delta = 1.0f / (wallc->sx2 - wallc->sx1); if (y1 >= 0.0f && y2 >= 0.0f && xs_RoundToInt(y1) <= viewheight && xs_RoundToInt(y2) <= viewheight) @@ -66,7 +64,7 @@ namespace swrenderer { float t = (x - wallc->sx1) * rcp_delta; float y = y1 * (1.0f - t) + y2 * t; - outbuf[x] = (short)xs_RoundToInt(y); + ScreenY[x] = (short)xs_RoundToInt(y); } } else @@ -75,18 +73,18 @@ namespace swrenderer { float t = (x - wallc->sx1) * rcp_delta; float y = y1 * (1.0f - t) + y2 * t; - outbuf[x] = (short)clamp(xs_RoundToInt(y), 0, viewheight); + ScreenY[x] = (short)clamp(xs_RoundToInt(y), 0, viewheight); } } - return 0; + return ProjectedWallCull::Visible; } - int R_CreateWallSegmentYSloped(short *outbuf, const secplane_t &plane, const FWallCoords *wallc, seg_t *curline, bool xflip) + ProjectedWallCull ProjectedWallLine::Project(const secplane_t &plane, const FWallCoords *wallc, seg_t *curline, bool xflip) { if (!plane.isSlope()) { - return R_CreateWallSegmentY(outbuf, plane.Zat0() - ViewPos.Z, wallc); + return Project(plane.Zat0() - ViewPos.Z, wallc); } else { @@ -151,11 +149,13 @@ namespace swrenderer } } - return R_CreateWallSegmentY(outbuf, z1, z2, wallc); + return Project(z1, z2, wallc); } } - void PrepWall(float *vstep, fixed_t *upos, double walxrepeat, int x1, int x2, const FWallTmapVals &WallT) + ///////////////////////////////////////////////////////////////////////// + + void ProjectedWallTexcoords::Project(double walxrepeat, int x1, int x2, const FWallTmapVals &WallT) { float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - CenterX); float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - CenterX); @@ -171,8 +171,8 @@ namespace swrenderer { float u = uOverZ / invZ; - upos[x] = (fixed_t)((xrepeat - u * xrepeat) * FRACUNIT); - vstep[x] = depthOrg + u * depthScale; + UPos[x] = (fixed_t)((xrepeat - u * xrepeat) * FRACUNIT); + VStep[x] = depthOrg + u * depthScale; uOverZ += uGradient; invZ += zGradient; @@ -184,8 +184,8 @@ namespace swrenderer { float u = uOverZ / invZ; - upos[x] = (fixed_t)(u * xrepeat * FRACUNIT); - vstep[x] = depthOrg + u * depthScale; + UPos[x] = (fixed_t)(u * xrepeat * FRACUNIT); + VStep[x] = depthOrg + u * depthScale; uOverZ += uGradient; invZ += zGradient; @@ -193,7 +193,7 @@ namespace swrenderer } } - void PrepLWall(fixed_t *upos, double walxrepeat, int x1, int x2, const FWallTmapVals &WallT) + void ProjectedWallTexcoords::ProjectPos(double walxrepeat, int x1, int x2, const FWallTmapVals &WallT) { float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - CenterX); float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - CenterX); @@ -207,7 +207,7 @@ namespace swrenderer { float u = uOverZ / invZ * xrepeat - xrepeat; - upos[x] = (fixed_t)(u * FRACUNIT); + UPos[x] = (fixed_t)(u * FRACUNIT); uOverZ += uGradient; invZ += zGradient; @@ -219,7 +219,7 @@ namespace swrenderer { float u = uOverZ / invZ * xrepeat; - upos[x] = (fixed_t)(u * FRACUNIT); + UPos[x] = (fixed_t)(u * FRACUNIT); uOverZ += uGradient; invZ += zGradient; diff --git a/src/swrenderer/line/r_wallsetup.h b/src/swrenderer/line/r_wallsetup.h index 5ceac6f65e..628a9029b6 100644 --- a/src/swrenderer/line/r_wallsetup.h +++ b/src/swrenderer/line/r_wallsetup.h @@ -8,18 +8,36 @@ namespace swrenderer struct FWallCoords; struct FWallTmapVals; - extern short walltop[MAXWIDTH]; - extern short wallbottom[MAXWIDTH]; - extern short wallupper[MAXWIDTH]; - extern short walllower[MAXWIDTH]; - extern float swall[MAXWIDTH]; - extern fixed_t lwall[MAXWIDTH]; - extern double lwallscale; + enum class ProjectedWallCull + { + Visible, + OutsideAbove, + OutsideBelow + }; - int R_CreateWallSegmentY(short *outbuf, double z1, double z2, const FWallCoords *wallc); - int R_CreateWallSegmentYSloped(short *outbuf, const secplane_t &plane, const FWallCoords *wallc, seg_t *line, bool xflip); - int R_CreateWallSegmentY(short *outbuf, double z, const FWallCoords *wallc); + class ProjectedWallLine + { + public: + short ScreenY[MAXWIDTH]; - void PrepWall(float *swall, fixed_t *lwall, double walxrepeat, int x1, int x2, const FWallTmapVals &WallT); - void PrepLWall(fixed_t *lwall, double walxrepeat, int x1, int x2, const FWallTmapVals &WallT); + ProjectedWallCull Project(double z1, double z2, const FWallCoords *wallc); + ProjectedWallCull Project(const secplane_t &plane, const FWallCoords *wallc, seg_t *line, bool xflip); + ProjectedWallCull Project(double z, const FWallCoords *wallc); + }; + + class ProjectedWallTexcoords + { + public: + float VStep[MAXWIDTH]; // swall + fixed_t UPos[MAXWIDTH]; // lwall + + void Project(double walxrepeat, int x1, int x2, const FWallTmapVals &WallT); + void ProjectPos(double walxrepeat, int x1, int x2, const FWallTmapVals &WallT); + }; + + extern ProjectedWallLine walltop; + extern ProjectedWallLine wallbottom; + extern ProjectedWallLine wallupper; + extern ProjectedWallLine walllower; + extern ProjectedWallTexcoords walltexcoords; } diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 160290068e..e5864f8050 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -110,21 +110,21 @@ namespace swrenderer // Clip a midtexture to the floor and ceiling of the sector in front of it. void ClipMidtex(int x1, int x2) { - short most[MAXWIDTH]; + ProjectedWallLine most; RenderPortal *renderportal = RenderPortal::Instance(); - R_CreateWallSegmentYSloped(most, curline->frontsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + most.Project(curline->frontsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); for (int i = x1; i < x2; ++i) { - if (wallupper[i] < most[i]) - wallupper[i] = most[i]; + if (wallupper.ScreenY[i] < most.ScreenY[i]) + wallupper.ScreenY[i] = most.ScreenY[i]; } - R_CreateWallSegmentYSloped(most, curline->frontsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + most.Project(curline->frontsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); for (int i = x1; i < x2; ++i) { - if (walllower[i] > most[i]) - walllower[i] = most[i]; + if (walllower.ScreenY[i] > most.ScreenY[i]) + walllower.ScreenY[i] = most.ScreenY[i]; } } @@ -305,30 +305,30 @@ namespace swrenderer if (clip3d->fake3D & FAKE3D_CLIPTOP) { - R_CreateWallSegmentY(wallupper, textop < clip3d->sclipTop - ViewPos.Z ? textop : clip3d->sclipTop - ViewPos.Z, &WallC); + wallupper.Project(textop < clip3d->sclipTop - ViewPos.Z ? textop : clip3d->sclipTop - ViewPos.Z, &WallC); } else { - R_CreateWallSegmentY(wallupper, textop, &WallC); + wallupper.Project(textop, &WallC); } if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) { - R_CreateWallSegmentY(walllower, textop - texheight > clip3d->sclipBottom - ViewPos.Z ? textop - texheight : clip3d->sclipBottom - ViewPos.Z, &WallC); + walllower.Project(textop - texheight > clip3d->sclipBottom - ViewPos.Z ? textop - texheight : clip3d->sclipBottom - ViewPos.Z, &WallC); } else { - R_CreateWallSegmentY(walllower, textop - texheight, &WallC); + walllower.Project(textop - texheight, &WallC); } for (i = x1; i < x2; i++) { - if (wallupper[i] < mceilingclip[i]) - wallupper[i] = mceilingclip[i]; + if (wallupper.ScreenY[i] < mceilingclip[i]) + wallupper.ScreenY[i] = mceilingclip[i]; } for (i = x1; i < x2; i++) { - if (walllower[i] > mfloorclip[i]) - walllower[i] = mfloorclip[i]; + if (walllower.ScreenY[i] > mfloorclip[i]) + walllower.ScreenY[i] = mfloorclip[i]; } if (clip3d->CurrentSkybox) @@ -342,8 +342,8 @@ namespace swrenderer } } - mfloorclip = walllower; - mceilingclip = wallupper; + mfloorclip = walllower.ScreenY; + mceilingclip = wallupper.ScreenY; // draw the columns one at a time if (visible) @@ -402,23 +402,23 @@ namespace swrenderer if (clip3d->fake3D & FAKE3D_CLIPTOP) { - R_CreateWallSegmentY(wallupper, clip3d->sclipTop - ViewPos.Z, &WallC); + wallupper.Project(clip3d->sclipTop - ViewPos.Z, &WallC); for (i = x1; i < x2; i++) { - if (wallupper[i] < mceilingclip[i]) - wallupper[i] = mceilingclip[i]; + if (wallupper.ScreenY[i] < mceilingclip[i]) + wallupper.ScreenY[i] = mceilingclip[i]; } - mceilingclip = wallupper; + mceilingclip = wallupper.ScreenY; } if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) { - R_CreateWallSegmentY(walllower, clip3d->sclipBottom - ViewPos.Z, &WallC); + walllower.Project(clip3d->sclipBottom - ViewPos.Z, &WallC); for (i = x1; i < x2; i++) { - if (walllower[i] > mfloorclip[i]) - walllower[i] = mfloorclip[i]; + if (walllower.ScreenY[i] > mfloorclip[i]) + walllower.ScreenY[i] = mfloorclip[i]; } - mfloorclip = walllower; + mfloorclip = walllower.ScreenY; } rw_offset = 0; @@ -532,22 +532,22 @@ namespace swrenderer WallT = ds->tmapvals; Clip3DFloors *clip3d = Clip3DFloors::Instance(); - R_CreateWallSegmentY(wallupper, clip3d->sclipTop - ViewPos.Z, &WallC); - R_CreateWallSegmentY(walllower, clip3d->sclipBottom - ViewPos.Z, &WallC); + wallupper.Project(clip3d->sclipTop - ViewPos.Z, &WallC); + walllower.Project(clip3d->sclipBottom - ViewPos.Z, &WallC); for (i = x1; i < x2; i++) { - if (wallupper[i] < mceilingclip[i]) - wallupper[i] = mceilingclip[i]; + if (wallupper.ScreenY[i] < mceilingclip[i]) + wallupper.ScreenY[i] = mceilingclip[i]; } for (i = x1; i < x2; i++) { - if (walllower[i] > mfloorclip[i]) - walllower[i] = mfloorclip[i]; + if (walllower.ScreenY[i] > mfloorclip[i]) + walllower.ScreenY[i] = mfloorclip[i]; } - PrepLWall(lwall, curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2, WallT); - R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, wallupper, walllower, texturemid, MaskedSWall, lwall, yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy, basecolormap); + walltexcoords.ProjectPos(curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2, WallT); + R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, wallupper.ScreenY, walllower.ScreenY, texturemid, MaskedSWall, walltexcoords.UPos, yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy, basecolormap); } // kg3D - walls of fake floors diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 9e17b95c8b..c25341c09e 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -171,12 +171,12 @@ namespace swrenderer { goto done; } - mceilingclip = walltop; - mfloorclip = wallbottom; + mceilingclip = walltop.ScreenY; + mfloorclip = wallbottom.ScreenY; } else if (pass == 0) { - mceilingclip = walltop; + mceilingclip = walltop.ScreenY; mfloorclip = RenderOpaquePass::Instance()->ceilingclip; needrepeat = 1; } @@ -192,7 +192,7 @@ namespace swrenderer { goto done; } - mceilingclip = walltop; + mceilingclip = walltop.ScreenY; mfloorclip = RenderOpaquePass::Instance()->ceilingclip; break; @@ -211,7 +211,7 @@ namespace swrenderer goto done; } mceilingclip = RenderOpaquePass::Instance()->floorclip; - mfloorclip = wallbottom; + mfloorclip = wallbottom.ScreenY; break; } @@ -226,7 +226,7 @@ namespace swrenderer goto done; } - PrepWall(swall, lwall, WallSpriteTile->GetWidth(), x1, x2, WallT); + walltexcoords.Project(WallSpriteTile->GetWidth(), x1, x2, WallT); if (flipx) { @@ -235,7 +235,7 @@ namespace swrenderer for (i = x1; i < x2; i++) { - lwall[i] = right - lwall[i]; + walltexcoords.UPos[i] = right - walltexcoords.UPos[i]; } } @@ -305,7 +305,7 @@ namespace swrenderer // be set 1 if we need to draw on the lower wall. In all other cases, // needrepeat will be 0, and the while will fail. mceilingclip = RenderOpaquePass::Instance()->floorclip; - mfloorclip = wallbottom; + mfloorclip = wallbottom.ScreenY; } while (needrepeat--); colfunc = basecolfunc; @@ -316,7 +316,7 @@ namespace swrenderer void RenderDecal::DrawColumn(int x, FTexture *WallSpriteTile, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { - float iscale = swall[x] * maskedScaleY; + float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; double sprtopscreen; if (sprflipvert) @@ -324,6 +324,6 @@ namespace swrenderer else sprtopscreen = CenterY - texturemid * spryscale; - R_DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, lwall[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + R_DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } } diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 76691e07d2..8951724b30 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -151,7 +151,7 @@ namespace swrenderer return; FWallTmapVals WallT; WallT.InitFromWallCoords(&spr->wallc); - PrepWall(swall, lwall, spr->pic->GetWidth() << FRACBITS, x1, x2, WallT); + walltexcoords.Project(spr->pic->GetWidth() << FRACBITS, x1, x2, WallT); iyscale = 1 / spr->yscale; double texturemid = (spr->gzt - ViewPos.Z) * iyscale; if (spr->renderflags & RF_XFLIP) @@ -160,7 +160,7 @@ namespace swrenderer for (int i = x1; i < x2; i++) { - lwall[i] = right - lwall[i]; + walltexcoords.UPos[i] = right - walltexcoords.UPos[i]; } } // Prepare lighting @@ -238,7 +238,7 @@ namespace swrenderer void RenderWallSprite::DrawColumn(int x, FTexture *WallSpriteTile, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { - float iscale = swall[x] * maskedScaleY; + float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; double sprtopscreen; if (sprflipvert) @@ -246,6 +246,6 @@ namespace swrenderer else sprtopscreen = CenterY - texturemid * spryscale; - R_DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, lwall[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + R_DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } } From b256f6ed898b09bff455523f03bcdbe4f01aacc7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 24 Jan 2017 07:06:47 +0100 Subject: [PATCH 750/912] Make wallsetup globals local to where they are used --- src/swrenderer/line/r_line.cpp | 2 +- src/swrenderer/line/r_line.h | 6 +++++ src/swrenderer/line/r_wallsetup.cpp | 6 ----- src/swrenderer/line/r_wallsetup.h | 6 ----- src/swrenderer/segments/r_drawsegment.cpp | 4 ++++ src/swrenderer/things/r_decal.cpp | 27 ++++++++++++----------- src/swrenderer/things/r_decal.h | 7 +++--- src/swrenderer/things/r_wallsprite.cpp | 8 +++++-- src/swrenderer/things/r_wallsprite.h | 4 +++- 9 files changed, 38 insertions(+), 32 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index d702813606..983e3494f9 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -579,7 +579,7 @@ namespace swrenderer // [ZZ] Only if not an active mirror if (!rw_markportal) { - RenderDecal::RenderDecals(curline->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, curline, WallC, foggy, basecolormap); + RenderDecal::RenderDecals(curline->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, curline, WallC, foggy, basecolormap, walltop.ScreenY, wallbottom.ScreenY); } if (rw_markportal) diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 51ec10787e..3da9e0d854 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -123,5 +123,11 @@ namespace swrenderer FDynamicColormap *basecolormap; double lwallscale; + + ProjectedWallLine walltop; + ProjectedWallLine wallbottom; + ProjectedWallLine wallupper; + ProjectedWallLine walllower; + ProjectedWallTexcoords walltexcoords; }; } diff --git a/src/swrenderer/line/r_wallsetup.cpp b/src/swrenderer/line/r_wallsetup.cpp index 4c34f3483c..2c9c824422 100644 --- a/src/swrenderer/line/r_wallsetup.cpp +++ b/src/swrenderer/line/r_wallsetup.cpp @@ -27,12 +27,6 @@ namespace swrenderer { - ProjectedWallLine walltop; - ProjectedWallLine wallbottom; - ProjectedWallLine wallupper; - ProjectedWallLine walllower; - ProjectedWallTexcoords walltexcoords; - ProjectedWallCull ProjectedWallLine::Project(double z, const FWallCoords *wallc) { return Project(z, z, wallc); diff --git a/src/swrenderer/line/r_wallsetup.h b/src/swrenderer/line/r_wallsetup.h index 628a9029b6..1234a33dcf 100644 --- a/src/swrenderer/line/r_wallsetup.h +++ b/src/swrenderer/line/r_wallsetup.h @@ -34,10 +34,4 @@ namespace swrenderer void Project(double walxrepeat, int x1, int x2, const FWallTmapVals &WallT); void ProjectPos(double walxrepeat, int x1, int x2, const FWallTmapVals &WallT); }; - - extern ProjectedWallLine walltop; - extern ProjectedWallLine wallbottom; - extern ProjectedWallLine wallupper; - extern ProjectedWallLine walllower; - extern ProjectedWallTexcoords walltexcoords; } diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index e5864f8050..da42923337 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -68,6 +68,9 @@ namespace swrenderer float rw_lightstep; fixed_t rw_offset; FTexture *rw_pic; + + ProjectedWallLine wallupper; + ProjectedWallLine walllower; } void R_FreeDrawSegs() @@ -546,6 +549,7 @@ namespace swrenderer walllower.ScreenY[i] = mfloorclip[i]; } + ProjectedWallTexcoords walltexcoords; walltexcoords.ProjectPos(curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2, WallT); R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, wallupper.ScreenY, walllower.ScreenY, texturemid, MaskedSWall, walltexcoords.UPos, yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy, basecolormap); } diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index c25341c09e..3e082aeb4a 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -47,11 +47,11 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void RenderDecal::RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap) + void RenderDecal::RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom) { for (DBaseDecal *decal = sidedef->AttachedDecals; decal != NULL; decal = decal->WallNext) { - Render(sidedef, decal, draw_segment, wallshade, lightleft, lightstep, curline, wallC, foggy, basecolormap, 0); + Render(sidedef, decal, draw_segment, wallshade, lightleft, lightstep, curline, wallC, foggy, basecolormap, walltop, wallbottom, 0); } } @@ -59,7 +59,7 @@ namespace swrenderer // = 1: drawing masked textures (including sprites) // Currently, only pass = 0 is done or used - void RenderDecal::Render(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords WallC, bool foggy, FDynamicColormap *basecolormap, int pass) + void RenderDecal::Render(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords WallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass) { DVector2 decal_left, decal_right, decal_pos; int x1, x2; @@ -72,8 +72,8 @@ namespace swrenderer bool rereadcolormap; FDynamicColormap *usecolormap; float light = 0; - short *mfloorclip; - short *mceilingclip; + const short *mfloorclip; + const short *mceilingclip; if (decal->RenderFlags & RF_INVISIBLE || !viewactive || !decal->PicNum.isValid()) return; @@ -171,12 +171,12 @@ namespace swrenderer { goto done; } - mceilingclip = walltop.ScreenY; - mfloorclip = wallbottom.ScreenY; + mceilingclip = walltop; + mfloorclip = wallbottom; } else if (pass == 0) { - mceilingclip = walltop.ScreenY; + mceilingclip = walltop; mfloorclip = RenderOpaquePass::Instance()->ceilingclip; needrepeat = 1; } @@ -192,7 +192,7 @@ namespace swrenderer { goto done; } - mceilingclip = walltop.ScreenY; + mceilingclip = walltop; mfloorclip = RenderOpaquePass::Instance()->ceilingclip; break; @@ -211,7 +211,7 @@ namespace swrenderer goto done; } mceilingclip = RenderOpaquePass::Instance()->floorclip; - mfloorclip = wallbottom.ScreenY; + mfloorclip = wallbottom; break; } @@ -226,6 +226,7 @@ namespace swrenderer goto done; } + ProjectedWallTexcoords walltexcoords; walltexcoords.Project(WallSpriteTile->GetWidth(), x1, x2, WallT); if (flipx) @@ -295,7 +296,7 @@ namespace swrenderer { // calculate lighting R_SetColorMapLight(usecolormap, light, wallshade); } - DrawColumn(x, WallSpriteTile, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + DrawColumn(x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } @@ -305,7 +306,7 @@ namespace swrenderer // be set 1 if we need to draw on the lower wall. In all other cases, // needrepeat will be 0, and the while will fail. mceilingclip = RenderOpaquePass::Instance()->floorclip; - mfloorclip = wallbottom.ScreenY; + mfloorclip = wallbottom; } while (needrepeat--); colfunc = basecolfunc; @@ -314,7 +315,7 @@ namespace swrenderer WallC = savecoord; } - void RenderDecal::DrawColumn(int x, FTexture *WallSpriteTile, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderDecal::DrawColumn(int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 1bd9f85b70..20670c6f47 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -19,14 +19,15 @@ class DBaseDecal; namespace swrenderer { struct drawseg_t; + class ProjectedWallTexcoords; class RenderDecal { public: - static void RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap); + static void RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom); private: - static void Render(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, bool foggy, FDynamicColormap *basecolormap, int pass); - static void DrawColumn(int x, FTexture *WallSpriteTile, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void Render(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass); + static void DrawColumn(int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); }; } diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 8951724b30..a6da5ff0e8 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -149,9 +149,13 @@ namespace swrenderer x2 = MIN(spr->x2, spr->wallc.sx2); if (x1 >= x2) return; + FWallTmapVals WallT; WallT.InitFromWallCoords(&spr->wallc); + + ProjectedWallTexcoords walltexcoords; walltexcoords.Project(spr->pic->GetWidth() << FRACBITS, x1, x2, WallT); + iyscale = 1 / spr->yscale; double texturemid = (spr->gzt - ViewPos.Z) * iyscale; if (spr->renderflags & RF_XFLIP) @@ -229,14 +233,14 @@ namespace swrenderer R_SetColorMapLight(usecolormap, light, shade); } if (!RenderTranslucentPass::ClipSpriteColumnWithPortals(x, spr)) - DrawColumn(x, WallSpriteTile, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + DrawColumn(x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } } } - void RenderWallSprite::DrawColumn(int x, FTexture *WallSpriteTile, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderWallSprite::DrawColumn(int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index a68454b2e4..6c2976d361 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -17,6 +17,8 @@ namespace swrenderer { + class ProjectedWallTexcoords; + class RenderWallSprite : public VisibleSprite { public: @@ -27,7 +29,7 @@ namespace swrenderer void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: - static void DrawColumn(int x, FTexture *WallSpriteTile, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void DrawColumn(int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); FWallCoords wallc; uint32_t Translation = 0; From 86d9594d6e3aa9aec9dd482e36c4c17e12b4de4f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 24 Jan 2017 08:41:35 +0100 Subject: [PATCH 751/912] Convert r_walldraw to a class --- src/swrenderer/line/r_line.cpp | 12 +- src/swrenderer/line/r_walldraw.cpp | 127 +++++++++------------- src/swrenderer/line/r_walldraw.h | 66 ++++++++++- src/swrenderer/segments/r_drawsegment.cpp | 34 +++++- src/swrenderer/segments/r_drawsegment.h | 1 + 5 files changed, 158 insertions(+), 82 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 983e3494f9..b163fac529 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -1042,7 +1042,9 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, rw_midtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + + RenderWallPart renderWallpart; + renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, rw_midtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } fillshort(ceilingclip + x1, x2 - x1, viewheight); fillshort(floorclip + x1, x2 - x1, 0xffff); @@ -1077,7 +1079,9 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, rw_toptexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + + RenderWallPart renderWallpart; + renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, rw_toptexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(ceilingclip + x1, wallupper.ScreenY + x1, (x2 - x1) * sizeof(short)); } @@ -1115,7 +1119,9 @@ namespace swrenderer { rw_offset = -rw_offset; } - R_DrawWallSegment(frontsector, curline, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, rw_bottomtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + + RenderWallPart renderWallpart; + renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, rw_bottomtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(floorclip + x1, walllower.ScreenY + x1, (x2 - x1) * sizeof(short)); } diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index d6c475a81c..30020295a6 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -52,13 +52,6 @@ namespace swrenderer { - using namespace drawerargs; - - namespace - { - FTexture *rw_pic; - } - WallSampler::WallSampler(int y1, double texturemid, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture) { xoffset += FLOAT2FIXED(xmagnitude * 0.5); @@ -184,8 +177,10 @@ namespace swrenderer } // Draw a column with support for non-power-of-two ranges - static void Draw1Column(const FWallCoords &WallC, int x, int y1, int y2, WallSampler &sampler, FLightNode *light_list, DrawerFunc draw1column) + void RenderWallPart::Draw1Column(int x, int y1, int y2, WallSampler &sampler, DrawerFunc draw1column) { + using namespace drawerargs; + if (r_dynlights && light_list) { // Find column position in view space @@ -322,12 +317,10 @@ namespace swrenderer } } - static void ProcessWallWorker( - const FWallCoords &WallC, - int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, - FLightNode *light_list, - DrawerFunc drawcolumn) + void RenderWallPart::ProcessWallWorker(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, DrawerFunc drawcolumn) { + using namespace drawerargs; + if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -382,47 +375,48 @@ namespace swrenderer if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); WallSampler sampler(y1, texturemid, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic); - Draw1Column(WallC, x, y1, y2, sampler, light_list, drawcolumn); + Draw1Column(x, y1, y2, sampler, drawcolumn); } NetUpdate(); } - static void ProcessNormalWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list) + void RenderWallPart::ProcessNormalWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, &SWPixelFormatDrawers::DrawWallColumn); + ProcessWallWorker(uwal, dwal, texturemid, swal, lwal, &SWPixelFormatDrawers::DrawWallColumn); } - static void ProcessMaskedWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list) + void RenderWallPart::ProcessMaskedWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { if (!rw_pic->bMasked) // Textures that aren't masked can use the faster ProcessNormalWall. { - ProcessNormalWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); + ProcessNormalWall(uwal, dwal, texturemid, swal, lwal); } else { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, &SWPixelFormatDrawers::DrawWallMaskedColumn); + ProcessWallWorker(uwal, dwal, texturemid, swal, lwal, &SWPixelFormatDrawers::DrawWallMaskedColumn); } } - static void ProcessTranslucentWall(const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, FDynamicColormap *basecolormap, FLightNode *light_list) + void RenderWallPart::ProcessTranslucentWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { DrawerFunc drawcol1 = R_GetTransMaskDrawer(); if (drawcol1 == nullptr) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. - ProcessMaskedWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); + ProcessMaskedWall(uwal, dwal, texturemid, swal, lwal); } else { - ProcessWallWorker(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list, drawcol1); + ProcessWallWorker(uwal, dwal, texturemid, swal, lwal, drawcol1); } } - static void ProcessStripedWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) + void RenderWallPart::ProcessStripedWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { ProjectedWallLine most1, most2, most3; - short *up, *down; + const short *up; + short *down; up = uwal; down = most1.ScreenY; @@ -442,7 +436,7 @@ namespace swrenderer { down[j] = clamp(most3.ScreenY[j], up[j], dwal[j]); } - ProcessNormalWall(WallC, x1, x2, up, down, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); + ProcessNormalWall(up, down, texturemid, swal, lwal); up = down; down = (down == most1.ScreenY) ? most2.ScreenY : most1.ScreenY; } @@ -452,31 +446,31 @@ namespace swrenderer wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + R_ActualExtraLight(foggy)); } - ProcessNormalWall(WallC, x1, x2, up, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); + ProcessNormalWall(up, dwal, texturemid, swal, lwal); } - static void ProcessWall(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) + void RenderWallPart::ProcessWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { if (mask) { if (colfunc == basecolfunc) { - ProcessMaskedWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); + ProcessMaskedWall(uwal, dwal, texturemid, swal, lwal); } else { - ProcessTranslucentWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); + ProcessTranslucentWall(uwal, dwal, texturemid, swal, lwal); } } else { if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) { - ProcessNormalWall(WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, basecolormap, light_list); + ProcessNormalWall(uwal, dwal, texturemid, swal, lwal); } else { - ProcessStripedWall(frontsector, curline, WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, foggy, basecolormap, light_list); + ProcessStripedWall(uwal, dwal, texturemid, swal, lwal); } } } @@ -492,10 +486,9 @@ namespace swrenderer // //============================================================================= - static void ProcessWallNP2(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, int wallshade, fixed_t xoffset, float light, float lightstep, bool mask, bool foggy, FDynamicColormap *basecolormap, FLightNode *light_list) + void RenderWallPart::ProcessWallNP2(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, double top, double bot) { ProjectedWallLine most1, most2, most3; - short *up, *down; double texheight = rw_pic->GetHeight(); double partition; double scaledtexheight = texheight / yrepeat; @@ -507,8 +500,8 @@ namespace swrenderer { partition -= scaledtexheight; } - up = uwal; - down = most1.ScreenY; + const short *up = uwal; + short *down = most1.ScreenY; texturemid = (partition - ViewPos.Z) * yrepeat + texheight; while (partition > bot) { @@ -519,20 +512,20 @@ namespace swrenderer { down[j] = clamp(most3.ScreenY[j], up[j], dwal[j]); } - ProcessWall(frontsector, curline, WallC, x1, x2, up, down, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); + ProcessWall(up, down, texturemid, swal, lwal); up = down; down = (down == most1.ScreenY) ? most2.ScreenY : most1.ScreenY; } partition -= scaledtexheight; texturemid -= texheight; } - ProcessWall(frontsector, curline, WallC, x1, x2, up, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); + ProcessWall(up, dwal, texturemid, swal, lwal); } else { // upside down: draw strips from bottom to top partition = bot - fmod(bot - texturemid / yrepeat - ViewPos.Z, scaledtexheight); - up = most1.ScreenY; - down = dwal; + short *up = most1.ScreenY; + const short *down = dwal; texturemid = (partition - ViewPos.Z) * yrepeat + texheight; while (partition < top) { @@ -543,56 +536,42 @@ namespace swrenderer { up[j] = clamp(most3.ScreenY[j], uwal[j], down[j]); } - ProcessWall(frontsector, curline, WallC, x1, x2, up, down, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); + ProcessWall(up, down, texturemid, swal, lwal); down = up; up = (up == most1.ScreenY) ? most2.ScreenY : most1.ScreenY; } partition -= scaledtexheight; texturemid -= texheight; } - ProcessWall(frontsector, curline, WallC, x1, x2, uwal, down, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, mask, foggy, basecolormap, light_list); + ProcessWall(uwal, down, texturemid, swal, lwal); } } - void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap) + void RenderWallPart::Render(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, const short *walltop, const short *wallbottom, double texturemid, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap) { - rw_pic = pic; + this->x1 = x1; + this->x2 = x2; + this->frontsector = frontsector; + this->curline = curline; + this->WallC = WallC; + this->yrepeat = yscale; + this->wallshade = wallshade; + this->xoffset = xoffset; + this->light = light; + this->lightstep = lightstep; + this->foggy = foggy; + this->basecolormap = basecolormap; + this->light_list = light_list; + this->rw_pic = pic; + this->mask = mask; + if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) { - double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); - double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); - double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); - double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); - double top = MAX(frontcz1, frontcz2); - double bot = MIN(frontfz1, frontfz2); - Clip3DFloors *clip3d = Clip3DFloors::Instance(); - if (clip3d->fake3D & FAKE3D_CLIPTOP) - { - top = MIN(top, clip3d->sclipTop); - } - if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) - { - bot = MAX(bot, clip3d->sclipBottom); - } - ProcessWallNP2(frontsector, curline, WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, top, bot, wallshade, xoffset, light, lightstep, true, foggy, basecolormap, nullptr); + ProcessWallNP2(walltop, wallbottom, texturemid, swall, lwall, top, bottom); } else { - ProcessWall(frontsector, curline, WallC, x1, x2, uwal, dwal, texturemid, swal, lwal, yrepeat, wallshade, xoffset, light, lightstep, true, foggy, basecolormap, nullptr); - } - } - - - void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, short *walltop, short *wallbottom, double texturemid, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap) - { - rw_pic = pic; - if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) - { - ProcessWallNP2(frontsector, curline, WallC, x1, x2, walltop, wallbottom, texturemid, swall, lwall, yscale, top, bottom, wallshade, xoffset, light, lightstep, false, foggy, basecolormap, light_list); - } - else - { - ProcessWall(frontsector, curline, WallC, x1, x2, walltop, wallbottom, texturemid, swall, lwall, yscale, wallshade, xoffset, light, lightstep, false, foggy, basecolormap, light_list); + ProcessWall(walltop, wallbottom, texturemid, swall, lwall); } } } diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index a74a21a896..a1c41980f2 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -13,14 +13,77 @@ #pragma once +#include "swrenderer/drawers/r_draw.h" +#include "r_line.h" + class FTexture; struct FLightNode; struct seg_t; +struct FLightNode; +struct FDynamicColormap; namespace swrenderer { struct drawseg_t; struct FWallCoords; + class ProjectedWallLine; + class ProjectedWallTexcoords; + struct WallSampler; + + class RenderWallPart + { + public: + void Render( + sector_t *frontsector, + seg_t *curline, + const FWallCoords &WallC, + FTexture *rw_pic, + int x1, + int x2, + const short *walltop, + const short *wallbottom, + double texturemid, + float *swall, + fixed_t *lwall, + double yscale, + double top, + double bottom, + bool mask, + int wallshade, + fixed_t xoffset, + float light, + float lightstep, + FLightNode *light_list, + bool foggy, + FDynamicColormap *basecolormap); + + private: + void ProcessWallNP2(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, double top, double bot); + void ProcessWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); + void ProcessStripedWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); + void ProcessTranslucentWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); + void ProcessMaskedWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); + void ProcessNormalWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); + void ProcessWallWorker(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, DrawerFunc drawcolumn); + void Draw1Column(int x, int y1, int y2, WallSampler &sampler, DrawerFunc draw1column); + + int x1 = 0; + int x2 = 0; + FTexture *rw_pic = nullptr; + sector_t *frontsector = nullptr; + seg_t *curline = nullptr; + FWallCoords WallC; + + double yrepeat = 0.0; + int wallshade = 0; + fixed_t xoffset = 0; + float light = 0.0f; + float lightstep = 0.0f; + bool foggy = false; + FDynamicColormap *basecolormap = nullptr; + FLightNode *light_list = nullptr; + bool mask = false; + }; struct WallSampler { @@ -36,7 +99,4 @@ namespace swrenderer uint32_t texturefracx; uint32_t height; }; - - void R_DrawWallSegment(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, int x1, int x2, short *walltop, short *wallbottom, double texturemid, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap); - void R_DrawDrawSeg(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *rw_pic, drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, double texturemid, float *swal, fixed_t *lwal, double yrepeat, int wallshade, fixed_t xoffset, float light, float lightstep, bool foggy, FDynamicColormap *basecolormap); } diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index da42923337..9cb485831e 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -131,6 +131,26 @@ namespace swrenderer } } + void R_GetMaskedWallTopBottom(drawseg_t *ds, double &top, double &bot) + { + double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); + double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); + double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); + double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); + top = MAX(frontcz1, frontcz2); + bot = MIN(frontfz1, frontfz2); + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + if (clip3d->fake3D & FAKE3D_CLIPTOP) + { + top = MIN(top, clip3d->sclipTop); + } + if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) + { + bot = MAX(bot, clip3d->sclipBottom); + } + } + void R_RenderMaskedSegRange(drawseg_t *ds, int x1, int x2) { float *MaskedSWall = nullptr, MaskedScaleY = 0, rw_scalestep = 0; @@ -426,7 +446,12 @@ namespace swrenderer rw_offset = 0; rw_pic = tex; - R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy, basecolormap); + + double top, bot; + R_GetMaskedWallTopBottom(ds, top, bot); + + RenderWallPart renderWallpart; + renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); } clearfog: @@ -551,7 +576,12 @@ namespace swrenderer ProjectedWallTexcoords walltexcoords; walltexcoords.ProjectPos(curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2, WallT); - R_DrawDrawSeg(frontsector, curline, WallC, rw_pic, ds, x1, x2, wallupper.ScreenY, walllower.ScreenY, texturemid, MaskedSWall, walltexcoords.UPos, yscale, wallshade, rw_offset, rw_light, rw_lightstep, ds->foggy, basecolormap); + + double top, bot; + R_GetMaskedWallTopBottom(ds, top, bot); + + RenderWallPart renderWallpart; + renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, wallupper.ScreenY, walllower.ScreenY, texturemid, MaskedSWall, walltexcoords.UPos, yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); } // kg3D - walls of fake floors diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index 28d1df8d5a..2b276ffaf3 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -62,4 +62,5 @@ namespace swrenderer void R_RenderMaskedSegRange(drawseg_t *ds, int x1, int x2); void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap); void R_RenderFakeWallRange(drawseg_t *ds, int x1, int x2, int wallshade); + void R_GetMaskedWallTopBottom(drawseg_t *ds, double &top, double &bot); } From 934f6a88bbdd8dc918998d93b71237ac4172d148 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 24 Jan 2017 17:10:28 +0100 Subject: [PATCH 752/912] Fix compile error --- src/posix/sdl/sdlvideo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/posix/sdl/sdlvideo.h b/src/posix/sdl/sdlvideo.h index d0888daba6..33192800de 100644 --- a/src/posix/sdl/sdlvideo.h +++ b/src/posix/sdl/sdlvideo.h @@ -2,7 +2,7 @@ #include "v_video.h" #include "sdlglvideo.h" -class SDLFB : public DFrameBuffer +class SDLFB : public SDLBaseFB { DECLARE_CLASS(SDLFB, SDLBaseFB) public: From eb2b5269f9020048312ae8b7eb166fedf3620a32 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 25 Jan 2017 03:28:11 +0100 Subject: [PATCH 753/912] Fix linux compile errors --- src/posix/sdl/sdlglvideo.cpp | 2 +- src/posix/sdl/sdlglvideo.h | 3 +++ src/posix/sdl/sdlvideo.cpp | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/posix/sdl/sdlglvideo.cpp b/src/posix/sdl/sdlglvideo.cpp index 18f1edf166..576ca570b1 100644 --- a/src/posix/sdl/sdlglvideo.cpp +++ b/src/posix/sdl/sdlglvideo.cpp @@ -326,7 +326,7 @@ bool SDLGLVideo::InitHardware (bool allowsoftware, int multisample) // FrameBuffer implementation ----------------------------------------------- SDLGLFB::SDLGLFB (void *, int width, int height, int, int, bool fullscreen, bool bgra) - : DFrameBuffer (width, height, bgra) + : SDLBaseFB (width, height, bgra) { int i; diff --git a/src/posix/sdl/sdlglvideo.h b/src/posix/sdl/sdlglvideo.h index 7acb0fa1be..c3fde256c9 100644 --- a/src/posix/sdl/sdlglvideo.h +++ b/src/posix/sdl/sdlglvideo.h @@ -39,7 +39,10 @@ class SDLBaseFB : public DFrameBuffer { DECLARE_CLASS(SDLBaseFB, DFrameBuffer) public: + using DFrameBuffer::DFrameBuffer; virtual SDL_Window *GetSDLWindow() = 0; + + friend class SDLGLVideo; }; class SDLGLFB : public SDLBaseFB diff --git a/src/posix/sdl/sdlvideo.cpp b/src/posix/sdl/sdlvideo.cpp index c88d688a5e..8594eb648b 100644 --- a/src/posix/sdl/sdlvideo.cpp +++ b/src/posix/sdl/sdlvideo.cpp @@ -110,7 +110,7 @@ void ScaleWithAspect (int &w, int &h, int Width, int Height) // FrameBuffer implementation ----------------------------------------------- SDLFB::SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin) - : DFrameBuffer (width, height, bgra) + : SDLBaseFB (width, height, bgra) { int i; From 348b73eb83803e8b3618e80aeeb8e08992df708a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 25 Jan 2017 03:45:57 +0100 Subject: [PATCH 754/912] Fix gcc warning --- src/swrenderer/plane/r_visibleplanelist.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp index b774b21120..a91d4a0195 100644 --- a/src/swrenderer/plane/r_visibleplanelist.cpp +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -137,7 +137,7 @@ namespace swrenderer } // New visplane algorithm uses hash table -- killough - hash = isskybox ? MAXVISPLANES : CalcHash(picnum.GetIndex(), lightlevel, height); + hash = isskybox ? ((unsigned)MAXVISPLANES) : CalcHash(picnum.GetIndex(), lightlevel, height); RenderPortal *renderportal = RenderPortal::Instance(); From 9c4b11b6714a09ce8136ad2730737c511cc46e98 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 25 Jan 2017 07:18:26 +0100 Subject: [PATCH 755/912] Add OpenGL ES support to Linux target and enable it for ARM devices --- src/gl/system/gl_interface.cpp | 177 +++++++++++++++---------- src/gl/system/gl_interface.h | 1 + src/gl/system/gl_swframebuffer.cpp | 100 +++++++++++--- src/gl/system/gl_swframebuffer.h | 4 + src/posix/sdl/sdlglvideo.cpp | 20 +++ wadsrc/static/shaders/glsl/swshader.fp | 6 +- 6 files changed, 217 insertions(+), 91 deletions(-) diff --git a/src/gl/system/gl_interface.cpp b/src/gl/system/gl_interface.cpp index 7f98c7f3c5..485876e7a9 100644 --- a/src/gl/system/gl_interface.cpp +++ b/src/gl/system/gl_interface.cpp @@ -130,8 +130,16 @@ void gl_LoadExtensions() InitContext(); CollectExtensions(); - const char *version = Args->CheckValue("-glversion"); const char *glversion = (const char*)glGetString(GL_VERSION); + gl.es = false; + + if (glversion && strlen(glversion) > 10 && memcmp(glversion, "OpenGL ES ", 10) == 0) + { + glversion += 10; + gl.es = true; + } + + const char *version = Args->CheckValue("-glversion"); if (version == NULL) { @@ -147,90 +155,117 @@ void gl_LoadExtensions() float gl_version = (float)strtod(version, NULL) + 0.01f; - // Don't even start if it's lower than 2.0 or no framebuffers are available (The framebuffer extension is needed for glGenerateMipmapsEXT!) - if ((gl_version < 2.0f || !CheckExtension("GL_EXT_framebuffer_object")) && gl_version < 3.0f) + if (gl.es) { - I_FatalError("Unsupported OpenGL version.\nAt least OpenGL 2.0 with framebuffer support is required to run " GAMENAME ".\n"); - } + if (gl_version < 2.0f) + { + I_FatalError("Unsupported OpenGL ES version.\nAt least OpenGL ES 2.0 is required to run " GAMENAME ".\n"); + } + + const char *glslversion = (const char*)glGetString(GL_SHADING_LANGUAGE_VERSION); + if (glslversion && strlen(glslversion) > 18 && memcmp(glslversion, "OpenGL ES GLSL ES ", 10) == 0) + { + glslversion += 18; + } + + // add 0.01 to account for roundoff errors making the number a tad smaller than the actual version + gl.glslversion = strtod(glslversion, NULL) + 0.01f; + gl.vendorstring = (char*)glGetString(GL_VENDOR); - // add 0.01 to account for roundoff errors making the number a tad smaller than the actual version - gl.glslversion = strtod((char*)glGetString(GL_SHADING_LANGUAGE_VERSION), NULL) + 0.01f; - - gl.vendorstring = (char*)glGetString(GL_VENDOR); - - // first test for optional features - if (CheckExtension("GL_ARB_texture_compression")) gl.flags |= RFL_TEXTURE_COMPRESSION; - if (CheckExtension("GL_EXT_texture_compression_s3tc")) gl.flags |= RFL_TEXTURE_COMPRESSION_S3TC; - - if ((gl_version >= 3.3f || CheckExtension("GL_ARB_sampler_objects")) && !Args->CheckParm("-nosampler")) - { - gl.flags |= RFL_SAMPLER_OBJECTS; - } - - // The minimum requirement for the modern render path are GL 3.0 + uniform buffers. Also exclude the Linux Mesa driver at GL 3.0 because it errors out on shader compilation. - if (gl_version < 3.0f || (gl_version < 3.1f && (!CheckExtension("GL_ARB_uniform_buffer_object") || strstr(gl.vendorstring, "X.Org") != nullptr))) - { - gl.legacyMode = true; - gl.lightmethod = LM_LEGACY; - gl.buffermethod = BM_LEGACY; - gl.glslversion = 0; - gl.flags |= RFL_NO_CLIP_PLANES; - } - else - { + // Use the slowest/oldest modern path for now gl.legacyMode = false; gl.lightmethod = LM_DEFERRED; gl.buffermethod = BM_DEFERRED; - if (gl_version < 4.f) + } + else + { + // Don't even start if it's lower than 2.0 or no framebuffers are available (The framebuffer extension is needed for glGenerateMipmapsEXT!) + if ((gl_version < 2.0f || !CheckExtension("GL_EXT_framebuffer_object")) && gl_version < 3.0f) { -#ifdef _WIN32 - if (strstr(gl.vendorstring, "ATI Tech")) - { - gl.flags |= RFL_NO_CLIP_PLANES; // gl_ClipDistance is horribly broken on ATI GL3 drivers for Windows. - } -#endif + I_FatalError("Unsupported OpenGL version.\nAt least OpenGL 2.0 with framebuffer support is required to run " GAMENAME ".\n"); } - else if (gl_version < 4.5f) + + gl.es = false; + + // add 0.01 to account for roundoff errors making the number a tad smaller than the actual version + gl.glslversion = strtod((char*)glGetString(GL_SHADING_LANGUAGE_VERSION), NULL) + 0.01f; + + gl.vendorstring = (char*)glGetString(GL_VENDOR); + + // first test for optional features + if (CheckExtension("GL_ARB_texture_compression")) gl.flags |= RFL_TEXTURE_COMPRESSION; + if (CheckExtension("GL_EXT_texture_compression_s3tc")) gl.flags |= RFL_TEXTURE_COMPRESSION_S3TC; + + if ((gl_version >= 3.3f || CheckExtension("GL_ARB_sampler_objects")) && !Args->CheckParm("-nosampler")) { - // don't use GL 4.x features when running a GL 3.x context. - if (CheckExtension("GL_ARB_buffer_storage")) - { - // work around a problem with older AMD drivers: Their implementation of shader storage buffer objects is piss-poor and does not match uniform buffers even closely. - // Recent drivers, GL 4.4 don't have this problem, these can easily be recognized by also supporting the GL_ARB_buffer_storage extension. - if (CheckExtension("GL_ARB_shader_storage_buffer_object")) - { - // Shader storage buffer objects are broken on current Intel drivers. - if (strstr(gl.vendorstring, "Intel") == NULL) - { - gl.flags |= RFL_SHADER_STORAGE_BUFFER; - } - } - gl.flags |= RFL_BUFFER_STORAGE; - gl.lightmethod = LM_DIRECT; - gl.buffermethod = BM_PERSISTENT; - } + gl.flags |= RFL_SAMPLER_OBJECTS; + } + + // The minimum requirement for the modern render path are GL 3.0 + uniform buffers. Also exclude the Linux Mesa driver at GL 3.0 because it errors out on shader compilation. + if (gl_version < 3.0f || (gl_version < 3.1f && (!CheckExtension("GL_ARB_uniform_buffer_object") || strstr(gl.vendorstring, "X.Org") != nullptr))) + { + gl.legacyMode = true; + gl.lightmethod = LM_LEGACY; + gl.buffermethod = BM_LEGACY; + gl.glslversion = 0; + gl.flags |= RFL_NO_CLIP_PLANES; } else { - // Assume that everything works without problems on GL 4.5 drivers where these things are core features. - gl.flags |= RFL_SHADER_STORAGE_BUFFER | RFL_BUFFER_STORAGE; - gl.lightmethod = LM_DIRECT; - gl.buffermethod = BM_PERSISTENT; - } + gl.legacyMode = false; + gl.lightmethod = LM_DEFERRED; + gl.buffermethod = BM_DEFERRED; + if (gl_version < 4.f) + { +#ifdef _WIN32 + if (strstr(gl.vendorstring, "ATI Tech")) + { + gl.flags |= RFL_NO_CLIP_PLANES; // gl_ClipDistance is horribly broken on ATI GL3 drivers for Windows. + } +#endif + } + else if (gl_version < 4.5f) + { + // don't use GL 4.x features when running a GL 3.x context. + if (CheckExtension("GL_ARB_buffer_storage")) + { + // work around a problem with older AMD drivers: Their implementation of shader storage buffer objects is piss-poor and does not match uniform buffers even closely. + // Recent drivers, GL 4.4 don't have this problem, these can easily be recognized by also supporting the GL_ARB_buffer_storage extension. + if (CheckExtension("GL_ARB_shader_storage_buffer_object")) + { + // Shader storage buffer objects are broken on current Intel drivers. + if (strstr(gl.vendorstring, "Intel") == NULL) + { + gl.flags |= RFL_SHADER_STORAGE_BUFFER; + } + } + gl.flags |= RFL_BUFFER_STORAGE; + gl.lightmethod = LM_DIRECT; + gl.buffermethod = BM_PERSISTENT; + } + } + else + { + // Assume that everything works without problems on GL 4.5 drivers where these things are core features. + gl.flags |= RFL_SHADER_STORAGE_BUFFER | RFL_BUFFER_STORAGE; + gl.lightmethod = LM_DIRECT; + gl.buffermethod = BM_PERSISTENT; + } - if (gl_version >= 4.3f || CheckExtension("GL_ARB_invalidate_subdata")) gl.flags |= RFL_INVALIDATE_BUFFER; - if (gl_version >= 4.3f || CheckExtension("GL_KHR_debug")) gl.flags |= RFL_DEBUG; + if (gl_version >= 4.3f || CheckExtension("GL_ARB_invalidate_subdata")) gl.flags |= RFL_INVALIDATE_BUFFER; + if (gl_version >= 4.3f || CheckExtension("GL_KHR_debug")) gl.flags |= RFL_DEBUG; - const char *lm = Args->CheckValue("-lightmethod"); - if (lm != NULL) - { - if (!stricmp(lm, "deferred") && gl.lightmethod == LM_DIRECT) gl.lightmethod = LM_DEFERRED; - } + const char *lm = Args->CheckValue("-lightmethod"); + if (lm != NULL) + { + if (!stricmp(lm, "deferred") && gl.lightmethod == LM_DIRECT) gl.lightmethod = LM_DEFERRED; + } - lm = Args->CheckValue("-buffermethod"); - if (lm != NULL) - { - if (!stricmp(lm, "deferred") && gl.buffermethod == BM_PERSISTENT) gl.buffermethod = BM_DEFERRED; + lm = Args->CheckValue("-buffermethod"); + if (lm != NULL) + { + if (!stricmp(lm, "deferred") && gl.buffermethod == BM_PERSISTENT) gl.buffermethod = BM_DEFERRED; + } } } diff --git a/src/gl/system/gl_interface.h b/src/gl/system/gl_interface.h index edaec58c05..a839e13b46 100644 --- a/src/gl/system/gl_interface.h +++ b/src/gl/system/gl_interface.h @@ -66,6 +66,7 @@ struct RenderContext int max_texturesize; char * vendorstring; bool legacyMode; + bool es; int MaxLights() const { diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 7bcb71f6bc..2250588d3a 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -303,8 +303,11 @@ OpenGLSWFrameBuffer::HWPixelShader::~HWPixelShader() bool OpenGLSWFrameBuffer::CreateFrameBuffer(const FString &name, int width, int height, HWFrameBuffer **outFramebuffer) { auto fb = std::make_unique(); + + GLint format = GL_RGBA16F; + if (gl.es) format = GL_RGB; - if (!CreateTexture(name, width, height, 1, GL_RGBA16F, &fb->Texture)) + if (!CreateTexture(name, width, height, 1, format, &fb->Texture)) { outFramebuffer = nullptr; return false; @@ -329,7 +332,7 @@ bool OpenGLSWFrameBuffer::CreateFrameBuffer(const FString &name, int width, int if (result != GL_FRAMEBUFFER_COMPLETE) { - //Printf("Framebuffer is not complete"); + Printf("Framebuffer is not complete\n"); outFramebuffer = nullptr; return false; } @@ -351,6 +354,7 @@ bool OpenGLSWFrameBuffer::CreatePixelShader(FString vertexsrc, FString fragments FString prefix; prefix.AppendFormat("#version %d\n%s\n#line 0\n", shaderVersion, defines.GetChars()); + //Printf("Shader prefix: %s", prefix.GetChars()); vertexsrc = prefix + vertexsrc; fragmentsrc = prefix + fragmentsrc; @@ -400,7 +404,7 @@ bool OpenGLSWFrameBuffer::CreatePixelShader(FString vertexsrc, FString fragments GLsizei length = 0; buffer[0] = 0; glGetProgramInfoLog(shader->Program, 10000, &length, buffer); - //Printf("Shader compile failed: %s", buffer); + //Printf("Shader link failed: %s", buffer); *outShader = nullptr; return false; @@ -485,8 +489,9 @@ bool OpenGLSWFrameBuffer::CreateTexture(const FString &name, int width, int heig GLenum srcformat; switch (format) { + case GL_RGB: srcformat = GL_RGB; break; case GL_R8: srcformat = GL_RED; break; - case GL_RGBA8: srcformat = GL_BGRA; break; + case GL_RGBA8: srcformat = gl.es ? GL_RGBA : GL_BGRA; break; case GL_RGBA16F: srcformat = GL_RGBA; break; case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: srcformat = GL_RGB; break; case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: srcformat = GL_RGBA; break; @@ -743,6 +748,8 @@ void OpenGLSWFrameBuffer::Present() void OpenGLSWFrameBuffer::SetInitialState() { + if (gl.es) UseMappedMemBuffer = false; + AlphaBlendEnabled = false; AlphaBlendOp = GL_FUNC_ADD; AlphaSrcBlend = 0; @@ -787,12 +794,11 @@ bool OpenGLSWFrameBuffer::CreateResources() { Atlases = nullptr; if (!LoadShaders()) - { return false; - } if (!CreateFrameBuffer("OutputFB", Width, Height, &OutputFB)) return false; + glBindFramebuffer(GL_FRAMEBUFFER, OutputFB->Framebuffer); if (!CreateFBTexture() || @@ -1312,6 +1318,23 @@ void OpenGLSWFrameBuffer::BindFBBuffer() } } +void OpenGLSWFrameBuffer::BgraToRgba(uint32_t *dest, const uint32_t *src, int width, int height, int srcpitch) +{ + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + uint32_t r = RPART(src[x]); + uint32_t g = GPART(src[x]); + uint32_t b = BPART(src[x]); + uint32_t a = APART(src[x]); + dest[x] = r | (g << 8) | (b << 16) | (a << 24); + } + dest += width; + src += srcpitch; + } +} + void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) { if (copy3d) @@ -1327,7 +1350,11 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) uint8_t *dest = (uint8_t*)MapBuffer(GL_PIXEL_UNPACK_BUFFER, size); if (dest) { - if (Pitch == Width) + if (gl.es && pixelsize == 4) + { + BgraToRgba((uint32_t*)dest, (const uint32_t *)MemBuffer, Width, Height, Pitch); + } + else if (Pitch == Width) { memcpy(dest, MemBuffer, Width * Height * pixelsize); } @@ -1354,7 +1381,7 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); glBindTexture(GL_TEXTURE_2D, FBTexture->Texture); if (IsBgra()) - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, GL_BGRA, GL_UNSIGNED_BYTE, 0); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, gl.es ? GL_RGBA : GL_BGRA, GL_UNSIGNED_BYTE, 0); else glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, Width, Height, GL_RED, GL_UNSIGNED_BYTE, 0); glBindTexture(GL_TEXTURE_2D, oldBinding); @@ -1462,6 +1489,8 @@ void OpenGLSWFrameBuffer::UploadPalette() glBufferData(GL_PIXEL_UNPACK_BUFFER, 256 * 4, nullptr, GL_STREAM_DRAW); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, PaletteTexture->Buffers[1]); glBufferData(GL_PIXEL_UNPACK_BUFFER, 256 * 4, nullptr, GL_STREAM_DRAW); + + if (gl.es) PaletteTexture->MapBuffer.resize(256 * 4); } else { @@ -1469,7 +1498,7 @@ void OpenGLSWFrameBuffer::UploadPalette() PaletteTexture->CurrentBuffer = (PaletteTexture->CurrentBuffer + 1) & 1; } - uint8_t *pix = (uint8_t*)MapBuffer(GL_PIXEL_UNPACK_BUFFER, 256 * 4); + uint8_t *pix = gl.es ? PaletteTexture->MapBuffer.data() : (uint8_t*)MapBuffer(GL_PIXEL_UNPACK_BUFFER, 256 * 4); if (pix) { int i; @@ -1490,11 +1519,21 @@ void OpenGLSWFrameBuffer::UploadPalette() pix[2] = SourcePalette[i].r; pix[3] = 255; } - glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + if (gl.es) + { + uint8_t *tempbuffer = PaletteTexture->MapBuffer.data(); + BgraToRgba((uint32_t*)tempbuffer, (const uint32_t *)tempbuffer, 256, 1, 256); + glBufferSubData(GL_PIXEL_UNPACK_BUFFER, 0, 256 * 4, tempbuffer); + } + else + { + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + } + GLint oldBinding = 0; glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); glBindTexture(GL_TEXTURE_2D, PaletteTexture->Texture); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 1, GL_BGRA, GL_UNSIGNED_BYTE, 0); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 1, gl.es ? GL_RGBA : GL_BGRA, GL_UNSIGNED_BYTE, 0); glBindTexture(GL_TEXTURE_2D, oldBinding); BorderColor = ColorXRGB(SourcePalette[255].r, SourcePalette[255].g, SourcePalette[255].b); } @@ -2094,9 +2133,13 @@ bool OpenGLSWFrameBuffer::OpenGLTex::Update() glBindBuffer(GL_PIXEL_UNPACK_BUFFER, Box->Owner->Tex->Buffers[Box->Owner->Tex->CurrentBuffer]); glBufferData(GL_PIXEL_UNPACK_BUFFER, buffersize, nullptr, GL_STREAM_DRAW); Box->Owner->Tex->CurrentBuffer = (Box->Owner->Tex->CurrentBuffer + 1) & 1; + + static std::vector tempbuffer; + if (gl.es) + tempbuffer.resize(buffersize); int pitch = (rect.right - rect.left) * bytesPerPixel; - uint8_t *bits = (uint8_t *)MapBuffer(GL_PIXEL_UNPACK_BUFFER, buffersize); + uint8_t *bits = gl.es ? tempbuffer.data() : (uint8_t *)MapBuffer(GL_PIXEL_UNPACK_BUFFER, buffersize); dest = bits; if (!dest) { @@ -2140,13 +2183,21 @@ bool OpenGLSWFrameBuffer::OpenGLTex::Update() // Clear bottom padding row. memset(dest, 0, numbytes); } + + if (gl.es && format == GL_RGBA8) + { + BgraToRgba((uint32_t*)bits, (const uint32_t *)bits, rect.right - rect.left, rect.bottom - rect.top, rect.right - rect.left); + } - glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + if (gl.es) + glBufferSubData(GL_PIXEL_UNPACK_BUFFER, 0, buffersize, bits); + else + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); GLint oldBinding = 0; glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); glBindTexture(GL_TEXTURE_2D, Box->Owner->Tex->Texture); if (format == GL_RGBA8) - glTexSubImage2D(GL_TEXTURE_2D, 0, rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, GL_BGRA, GL_UNSIGNED_BYTE, 0); + glTexSubImage2D(GL_TEXTURE_2D, 0, rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, gl.es ? GL_RGBA : GL_BGRA, GL_UNSIGNED_BYTE, 0); else glTexSubImage2D(GL_TEXTURE_2D, 0, rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, GL_RED, GL_UNSIGNED_BYTE, 0); glBindTexture(GL_TEXTURE_2D, oldBinding); @@ -2302,8 +2353,12 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update() } int numEntries = MIN(Remap->NumEntries, RoundedPaletteSize); + + std::vector &tempbuffer = Tex->MapBuffer; + if (gl.es) + tempbuffer.resize(numEntries * 4); - buff = (uint32_t *)MapBuffer(GL_PIXEL_UNPACK_BUFFER, numEntries * 4); + buff = gl.es ? (uint32_t*)tempbuffer.data() : (uint32_t *)MapBuffer(GL_PIXEL_UNPACK_BUFFER, numEntries * 4); if (buff == nullptr) { return false; @@ -2326,12 +2381,21 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update() i = numEntries - 1; BorderColor = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); } - - glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + + if (gl.es) + { + BgraToRgba((uint32_t*)buff, (const uint32_t *)buff, numEntries, 1, numEntries); + glBufferSubData(GL_PIXEL_UNPACK_BUFFER, 0, numEntries * 4, buff); + } + else + { + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + } + GLint oldBinding = 0; glGetIntegerv(GL_TEXTURE_BINDING_2D, &oldBinding); glBindTexture(GL_TEXTURE_2D, Tex->Texture); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, numEntries, 1, GL_BGRA, GL_UNSIGNED_BYTE, 0); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, numEntries, 1, gl.es ? GL_RGBA : GL_BGRA, GL_UNSIGNED_BYTE, 0); glBindTexture(GL_TEXTURE_2D, oldBinding); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index e211b3e51f..6e69e94217 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -121,6 +121,8 @@ private: int WrapS = 0; int WrapT = 0; int Format = 0; + + std::vector MapBuffer; }; class HWFrameBuffer @@ -193,6 +195,8 @@ private: void DrawLineList(int count); void DrawTriangleList(int minIndex, int numVertices, int startIndex, int primitiveCount); void Present(); + + static void BgraToRgba(uint32_t *dest, const uint32_t *src, int width, int height, int srcpitch); void BindFBBuffer(); void *MappedMemBuffer = nullptr; diff --git a/src/posix/sdl/sdlglvideo.cpp b/src/posix/sdl/sdlglvideo.cpp index 576ca570b1..51a15b8c4a 100644 --- a/src/posix/sdl/sdlglvideo.cpp +++ b/src/posix/sdl/sdlglvideo.cpp @@ -63,6 +63,18 @@ CUSTOM_CVAR(Bool, gl_debug, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINI Printf("This won't take effect until " GAMENAME " is restarted.\n"); } +#ifdef __arm__ +CUSTOM_CVAR(Bool, gl_es, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +{ + Printf("This won't take effect until " GAMENAME " is restarted.\n"); +} +#else +CUSTOM_CVAR(Bool, gl_es, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +{ + Printf("This won't take effect until " GAMENAME " is restarted.\n"); +} +#endif + // PRIVATE DATA DEFINITIONS ------------------------------------------------ // Dummy screen sizes to pass when windowed @@ -303,6 +315,14 @@ bool SDLGLVideo::SetupPixelFormat(bool allowsoftware, int multisample) } if (gl_debug) SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG); + + if (gl_es) + { + SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_ES); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 2); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 0); + } + return true; } diff --git a/wadsrc/static/shaders/glsl/swshader.fp b/wadsrc/static/shaders/glsl/swshader.fp index 01d539c88f..e33389f29b 100644 --- a/wadsrc/static/shaders/glsl/swshader.fp +++ b/wadsrc/static/shaders/glsl/swshader.fp @@ -1,4 +1,6 @@ +precision mediump float; + in vec4 PixelColor0; in vec4 PixelColor1; in vec4 PixelTexCoord0; @@ -75,7 +77,7 @@ vec4 SpecialColormap(vec2 tex_coord, vec4 start, vec4 end) vec4 range = end - start; // We can't store values greater than 1.0 in a color register, so we multiply // the final result by 2 and expect the caller to divide the start and end by 2. - color.rgb = 2 * (start + Grayscale(color) * range).rgb; + color.rgb = 2.0 * (start + Grayscale(color) * range).rgb; // Duplicate alpha semantics of NormalColor. color.a = start.a + color.a * end.a; return color; @@ -120,7 +122,7 @@ vec4 BurnWipe(vec4 coord) { vec4 color = texture(NewScreen, coord.xy); vec4 alpha = texture(Burn, coord.zw); - color.a = alpha.r * 2; + color.a = alpha.r * 2.0; return color; } From 9d9395c8559aa53d7a4e558853b20d4214e6e47f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 26 Jan 2017 06:59:20 +0100 Subject: [PATCH 756/912] Updated the copyright --- src/swrenderer/line/r_walldraw.cpp | 33 +++++++++++------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 30020295a6..66c036a3f7 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -1,24 +1,15 @@ -/* -** Wall drawing stuff -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// #include #include From 2821c157956fea39188b0c8cdb1fbfaade923336 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 26 Jan 2017 07:03:27 +0100 Subject: [PATCH 757/912] Rename drawseg_t to DrawSegment --- src/swrenderer/line/r_line.cpp | 2 +- src/swrenderer/line/r_walldraw.h | 2 +- src/swrenderer/scene/r_portal.cpp | 2 +- src/swrenderer/scene/r_translucent_pass.cpp | 8 ++++---- src/swrenderer/scene/r_translucent_pass.h | 4 ++-- src/swrenderer/segments/r_drawsegment.cpp | 20 ++++++++++---------- src/swrenderer/segments/r_drawsegment.h | 18 +++++++++--------- src/swrenderer/things/r_decal.cpp | 4 ++-- src/swrenderer/things/r_decal.h | 6 +++--- src/swrenderer/things/r_particle.cpp | 2 +- src/swrenderer/things/r_visiblesprite.cpp | 2 +- src/swrenderer/things/r_visiblespritelist.h | 2 +- 12 files changed, 36 insertions(+), 36 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index b163fac529..2547ee48b9 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -321,7 +321,7 @@ namespace swrenderer I_FatalError("Bad R_StoreWallRange: %i to %i", start, stop); #endif - drawseg_t *draw_segment = R_AddDrawSegment(); + DrawSegment *draw_segment = R_AddDrawSegment(); if (!rw_prepped) { diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index a1c41980f2..b199e51b58 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -24,7 +24,7 @@ struct FDynamicColormap; namespace swrenderer { - struct drawseg_t; + struct DrawSegment; struct FWallCoords; class ProjectedWallLine; class ProjectedWallTexcoords; diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 82e72ab456..74d4eb2464 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -187,7 +187,7 @@ namespace swrenderer } // Create a drawseg to clip sprites to the sky plane - drawseg_t *draw_segment = R_AddDrawSegment(); + DrawSegment *draw_segment = R_AddDrawSegment(); draw_segment->CurrentPortalUniq = CurrentPortalUniq; draw_segment->siz1 = INT_MAX; draw_segment->siz2 = INT_MAX; diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 3cf4998f16..8330852b01 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -47,7 +47,7 @@ CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG namespace swrenderer { bool RenderTranslucentPass::DrewAVoxel; - TArray RenderTranslucentPass::portaldrawsegs; + TArray RenderTranslucentPass::portaldrawsegs; void RenderTranslucentPass::Deinit() { @@ -68,7 +68,7 @@ namespace swrenderer // a) exit early if no relevant info is found and // b) skip most of the collected drawsegs which have no portal attached. portaldrawsegs.Clear(); - for (drawseg_t* seg = ds_p; seg-- > firstdrawseg; ) // copied code from killough below + for (DrawSegment* seg = ds_p; seg-- > firstdrawseg; ) // copied code from killough below { // I don't know what makes this happen (some old top-down portal code or possibly skybox code? something adds null lines...) // crashes at the first frame of the first map of Action2.wad @@ -98,7 +98,7 @@ namespace swrenderer if (renderportal->CurrentPortalInSkybox) return false; - for (drawseg_t *seg : portaldrawsegs) + for (DrawSegment *seg : portaldrawsegs) { // ignore segs from other portals if (seg->CurrentPortalUniq != renderportal->CurrentPortalUniq) @@ -143,7 +143,7 @@ namespace swrenderer { Clip3DFloors::Instance()->fake3D |= FAKE3D_REFRESHCLIP; } - for (drawseg_t *ds = ds_p; ds-- > firstdrawseg; ) // new -- killough + for (DrawSegment *ds = ds_p; ds-- > firstdrawseg; ) // new -- killough { // [ZZ] the same as above if (ds->CurrentPortalUniq != renderportal->CurrentPortalUniq) diff --git a/src/swrenderer/scene/r_translucent_pass.h b/src/swrenderer/scene/r_translucent_pass.h index e3744db994..6bd675af18 100644 --- a/src/swrenderer/scene/r_translucent_pass.h +++ b/src/swrenderer/scene/r_translucent_pass.h @@ -23,7 +23,7 @@ struct FVoxel; namespace swrenderer { class VisibleSprite; - struct drawseg_t; + struct DrawSegment; class RenderTranslucentPass { @@ -40,6 +40,6 @@ namespace swrenderer static void CollectPortals(); static void DrawMaskedSingle(bool renew); - static TArray portaldrawsegs; + static TArray portaldrawsegs; }; } diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 9cb485831e..ff38422934 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -45,9 +45,9 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - drawseg_t *firstdrawseg; - drawseg_t *ds_p; - drawseg_t *drawsegs; + DrawSegment *firstdrawseg; + DrawSegment *ds_p; + DrawSegment *drawsegs; size_t FirstInterestingDrawseg; TArray InterestingDrawsegs; @@ -87,20 +87,20 @@ namespace swrenderer if (drawsegs == nullptr) { MaxDrawSegs = 256; // [RH] Default. Increased as needed. - firstdrawseg = drawsegs = (drawseg_t *)M_Malloc (MaxDrawSegs * sizeof(drawseg_t)); + firstdrawseg = drawsegs = (DrawSegment *)M_Malloc (MaxDrawSegs * sizeof(DrawSegment)); } FirstInterestingDrawseg = 0; InterestingDrawsegs.Clear (); ds_p = drawsegs; } - drawseg_t *R_AddDrawSegment() + DrawSegment *R_AddDrawSegment() { if (ds_p == &drawsegs[MaxDrawSegs]) { // [RH] Grab some more drawsegs size_t newdrawsegs = MaxDrawSegs ? MaxDrawSegs * 2 : 32; ptrdiff_t firstofs = firstdrawseg - drawsegs; - drawsegs = (drawseg_t *)M_Realloc(drawsegs, newdrawsegs * sizeof(drawseg_t)); + drawsegs = (DrawSegment *)M_Realloc(drawsegs, newdrawsegs * sizeof(DrawSegment)); firstdrawseg = drawsegs + firstofs; ds_p = drawsegs + MaxDrawSegs; MaxDrawSegs = newdrawsegs; @@ -131,7 +131,7 @@ namespace swrenderer } } - void R_GetMaskedWallTopBottom(drawseg_t *ds, double &top, double &bot) + void R_GetMaskedWallTopBottom(DrawSegment *ds, double &top, double &bot) { double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); @@ -151,7 +151,7 @@ namespace swrenderer } } - void R_RenderMaskedSegRange(drawseg_t *ds, int x1, int x2) + void R_RenderMaskedSegRange(DrawSegment *ds, int x1, int x2) { float *MaskedSWall = nullptr, MaskedScaleY = 0, rw_scalestep = 0; fixed_t *maskedtexturecol = nullptr; @@ -478,7 +478,7 @@ namespace swrenderer } // kg3D - render one fake wall - void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap) + void R_RenderFakeWall(DrawSegment *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap) { int i; double xscale; @@ -585,7 +585,7 @@ namespace swrenderer } // kg3D - walls of fake floors - void R_RenderFakeWallRange(drawseg_t *ds, int x1, int x2, int wallshade) + void R_RenderFakeWallRange(DrawSegment *ds, int x1, int x2, int wallshade) { FTexture *const DONT_DRAW = ((FTexture*)(intptr_t)-1); int i, j; diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index 2b276ffaf3..65af091cba 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -17,7 +17,7 @@ namespace swrenderer { - struct drawseg_t + struct DrawSegment { seg_t *curline; float light, lightstep; @@ -47,9 +47,9 @@ namespace swrenderer int CurrentPortalUniq; // [ZZ] to identify the portal that this drawseg is in. used for sprite clipping. }; - extern drawseg_t *firstdrawseg; - extern drawseg_t *ds_p; - extern drawseg_t *drawsegs; + extern DrawSegment *firstdrawseg; + extern DrawSegment *ds_p; + extern DrawSegment *drawsegs; extern TArray InterestingDrawsegs; // drawsegs that have something drawn on them extern size_t FirstInterestingDrawseg; @@ -57,10 +57,10 @@ namespace swrenderer void R_ClearDrawSegs(); void R_FreeDrawSegs(); - drawseg_t *R_AddDrawSegment(); + DrawSegment *R_AddDrawSegment(); void ClipMidtex(int x1, int x2); - void R_RenderMaskedSegRange(drawseg_t *ds, int x1, int x2); - void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap); - void R_RenderFakeWallRange(drawseg_t *ds, int x1, int x2, int wallshade); - void R_GetMaskedWallTopBottom(drawseg_t *ds, double &top, double &bot); + void R_RenderMaskedSegRange(DrawSegment *ds, int x1, int x2); + void R_RenderFakeWall(DrawSegment *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap); + void R_RenderFakeWallRange(DrawSegment *ds, int x1, int x2, int wallshade); + void R_GetMaskedWallTopBottom(DrawSegment *ds, double &top, double &bot); } diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 3e082aeb4a..23f987c63e 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -47,7 +47,7 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void RenderDecal::RenderDecals(side_t *sidedef, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom) + void RenderDecal::RenderDecals(side_t *sidedef, DrawSegment *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom) { for (DBaseDecal *decal = sidedef->AttachedDecals; decal != NULL; decal = decal->WallNext) { @@ -59,7 +59,7 @@ namespace swrenderer // = 1: drawing masked textures (including sprites) // Currently, only pass = 0 is done or used - void RenderDecal::Render(side_t *wall, DBaseDecal *decal, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords WallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass) + void RenderDecal::Render(side_t *wall, DBaseDecal *decal, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords WallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass) { DVector2 decal_left, decal_right, decal_pos; int x1, x2; diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 20670c6f47..56e63f8dc7 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -18,16 +18,16 @@ class DBaseDecal; namespace swrenderer { - struct drawseg_t; + struct DrawSegment; class ProjectedWallTexcoords; class RenderDecal { public: - static void RenderDecals(side_t *wall, drawseg_t *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom); + static void RenderDecals(side_t *wall, DrawSegment *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom); private: - static void Render(side_t *wall, DBaseDecal *first, drawseg_t *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass); + static void Render(side_t *wall, DBaseDecal *first, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass); static void DrawColumn(int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); }; } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 775e9358e7..2867708b70 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -263,7 +263,7 @@ namespace swrenderer // particle is drawn, it will be in front of them. for (unsigned int p = InterestingDrawsegs.Size(); p-- > FirstInterestingDrawseg; ) { - drawseg_t *ds = &drawsegs[InterestingDrawsegs[p]]; + DrawSegment *ds = &drawsegs[InterestingDrawsegs[p]]; // kg3D - no fake segs if (ds->fake) continue; if (ds->x1 >= x2 || ds->x2 <= x1) diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 0f1088d809..f1e8faadd8 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -46,7 +46,7 @@ namespace swrenderer VisibleSprite *spr = this; - drawseg_t *ds; + DrawSegment *ds; int i; int x1, x2; int r1, r2; diff --git a/src/swrenderer/things/r_visiblespritelist.h b/src/swrenderer/things/r_visiblespritelist.h index 7add812ae1..d70fc88d4e 100644 --- a/src/swrenderer/things/r_visiblespritelist.h +++ b/src/swrenderer/things/r_visiblespritelist.h @@ -15,7 +15,7 @@ namespace swrenderer { - struct drawseg_t; + struct DrawSegment; class VisibleSprite; class VisibleSpriteList From 6899b351a20a6e1b99e546c16b560be146564d9d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 26 Jan 2017 08:01:44 +0100 Subject: [PATCH 758/912] Create class for draw segment list --- src/swrenderer/line/r_line.cpp | 6 +- src/swrenderer/scene/r_portal.cpp | 29 +++++----- src/swrenderer/scene/r_scene.cpp | 4 +- src/swrenderer/scene/r_translucent_pass.cpp | 15 ++--- src/swrenderer/segments/r_drawsegment.cpp | 56 ++++++------------- src/swrenderer/segments/r_drawsegment.h | 62 ++++++++++++++++----- src/swrenderer/things/r_particle.cpp | 10 +++- src/swrenderer/things/r_visiblesprite.cpp | 8 ++- 8 files changed, 104 insertions(+), 86 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 2547ee48b9..8f7dfd0a7d 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -321,7 +321,7 @@ namespace swrenderer I_FatalError("Bad R_StoreWallRange: %i to %i", start, stop); #endif - DrawSegment *draw_segment = R_AddDrawSegment(); + DrawSegment *draw_segment = DrawSegmentList::Instance()->Add(); if (!rw_prepped) { @@ -519,8 +519,8 @@ namespace swrenderer if (draw_segment->bFogBoundary || draw_segment->maskedtexturecol != nullptr) { - size_t drawsegnum = draw_segment - drawsegs; - InterestingDrawsegs.Push(drawsegnum); + size_t drawsegnum = draw_segment - DrawSegmentList::Instance()->drawsegs; + DrawSegmentList::Instance()->InterestingDrawsegs.Push(drawsegnum); } } } diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 74d4eb2464..c6685ddaef 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -93,6 +93,7 @@ namespace swrenderer numskyboxes = 0; VisiblePlaneList *planes = VisiblePlaneList::Instance(); + DrawSegmentList *drawseglist = DrawSegmentList::Instance(); if (!planes->HasPortalPlanes()) return; @@ -103,8 +104,8 @@ namespace swrenderer int savedextralight = extralight; DVector3 savedpos = ViewPos; DAngle savedangle = ViewAngle; - ptrdiff_t savedds_p = ds_p - drawsegs; - size_t savedinteresting = FirstInterestingDrawseg; + ptrdiff_t savedds_p = drawseglist->ds_p - drawseglist->drawsegs; + size_t savedinteresting = drawseglist->FirstInterestingDrawseg; double savedvisibility = R_GetVisibility(); AActor *savedcamera = camera; sector_t *savedsector = viewsector; @@ -187,7 +188,7 @@ namespace swrenderer } // Create a drawseg to clip sprites to the sky plane - DrawSegment *draw_segment = R_AddDrawSegment(); + DrawSegment *draw_segment = drawseglist->Add(); draw_segment->CurrentPortalUniq = CurrentPortalUniq; draw_segment->siz1 = INT_MAX; draw_segment->siz2 = INT_MAX; @@ -207,11 +208,11 @@ namespace swrenderer memcpy(draw_segment->sprbottomclip, floorclip + pl->left, (pl->right - pl->left) * sizeof(short)); memcpy(draw_segment->sprtopclip, ceilingclip + pl->left, (pl->right - pl->left) * sizeof(short)); - firstdrawseg = draw_segment; - FirstInterestingDrawseg = InterestingDrawsegs.Size(); + drawseglist->firstdrawseg = draw_segment; + drawseglist->FirstInterestingDrawseg = drawseglist->InterestingDrawsegs.Size(); - interestingStack.Push(FirstInterestingDrawseg); - ptrdiff_t diffnum = firstdrawseg - drawsegs; + interestingStack.Push(drawseglist->FirstInterestingDrawseg); + ptrdiff_t diffnum = drawseglist->firstdrawseg - drawseglist->drawsegs; drawsegStack.Push(diffnum); VisibleSpriteList::Instance()->PushPortal(); viewposStack.Push(ViewPos); @@ -228,19 +229,19 @@ namespace swrenderer // Draw all the masked textures in a second pass, in the reverse order they // were added. This must be done separately from the previous step for the // sake of nested skyboxes. - while (interestingStack.Pop(FirstInterestingDrawseg)) + while (interestingStack.Pop(drawseglist->FirstInterestingDrawseg)) { ptrdiff_t pd = 0; drawsegStack.Pop(pd); - firstdrawseg = drawsegs + pd; + drawseglist->firstdrawseg = drawseglist->drawsegs + pd; // Masked textures and planes need the view coordinates restored for proper positioning. viewposStack.Pop(ViewPos); RenderTranslucentPass::Render(); - ds_p = firstdrawseg; + drawseglist->ds_p = drawseglist->firstdrawseg; VisibleSpriteList::Instance()->PopPortal(); @@ -251,10 +252,10 @@ namespace swrenderer pl->Render(pl->Alpha, pl->Additive, true); } } - firstdrawseg = drawsegs; - ds_p = drawsegs + savedds_p; - InterestingDrawsegs.Resize((unsigned int)FirstInterestingDrawseg); - FirstInterestingDrawseg = savedinteresting; + drawseglist->firstdrawseg = drawseglist->drawsegs; + drawseglist->ds_p = drawseglist->drawsegs + savedds_p; + drawseglist->InterestingDrawsegs.Resize((unsigned int)drawseglist->FirstInterestingDrawseg); + drawseglist->FirstInterestingDrawseg = savedinteresting; camera = savedcamera; viewsector = savedsector; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index be23aae322..2d1e1d7bb8 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -132,7 +132,7 @@ namespace swrenderer // Clear buffers. RenderClipSegment::Instance()->Clear(0, viewwidth); - R_ClearDrawSegs(); + DrawSegmentList::Instance()->Clear(); VisiblePlaneList::Instance()->Clear(); RenderTranslucentPass::Clear(); @@ -269,7 +269,7 @@ namespace swrenderer { RenderTranslucentPass::Deinit(); Clip3DFloors::Instance()->Cleanup(); - R_FreeDrawSegs(); + DrawSegmentList::Instance()->Deinit(); } ///////////////////////////////////////////////////////////////////////// diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 8330852b01..d95fda1e0b 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -68,7 +68,8 @@ namespace swrenderer // a) exit early if no relevant info is found and // b) skip most of the collected drawsegs which have no portal attached. portaldrawsegs.Clear(); - for (DrawSegment* seg = ds_p; seg-- > firstdrawseg; ) // copied code from killough below + DrawSegmentList *drawseglist = DrawSegmentList::Instance(); + for (DrawSegment* seg = drawseglist->ds_p; seg-- > drawseglist->firstdrawseg; ) { // I don't know what makes this happen (some old top-down portal code or possibly skybox code? something adds null lines...) // crashes at the first frame of the first map of Action2.wad @@ -133,17 +134,12 @@ namespace swrenderer // render any remaining masked mid textures - // Modified by Lee Killough: - // (pointer check was originally nonportable - // and buggy, by going past LEFT end of array): - - // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code - if (renew) { Clip3DFloors::Instance()->fake3D |= FAKE3D_REFRESHCLIP; } - for (DrawSegment *ds = ds_p; ds-- > firstdrawseg; ) // new -- killough + DrawSegmentList *drawseglist = DrawSegmentList::Instance(); + for (DrawSegment *ds = drawseglist->ds_p; ds-- > drawseglist->firstdrawseg; ) { // [ZZ] the same as above if (ds->CurrentPortalUniq != renderportal->CurrentPortalUniq) @@ -152,7 +148,8 @@ namespace swrenderer if (ds->fake) continue; if (ds->maskedtexturecol != nullptr || ds->bFogBoundary) { - R_RenderMaskedSegRange(ds, ds->x1, ds->x2); + RenderDrawSegment renderer; + renderer.Render(ds, ds->x1, ds->x2); } } } diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index ff38422934..586195aa4e 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -45,35 +45,13 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - DrawSegment *firstdrawseg; - DrawSegment *ds_p; - DrawSegment *drawsegs; - - size_t FirstInterestingDrawseg; - TArray InterestingDrawsegs; - - namespace + DrawSegmentList *DrawSegmentList::Instance() { - size_t MaxDrawSegs; - - sector_t *frontsector; - sector_t *backsector; - - seg_t *curline; - - FWallCoords WallC; - FWallTmapVals WallT; - - float rw_light; - float rw_lightstep; - fixed_t rw_offset; - FTexture *rw_pic; - - ProjectedWallLine wallupper; - ProjectedWallLine walllower; + static DrawSegmentList instance; + return &instance; } - void R_FreeDrawSegs() + void DrawSegmentList::Deinit() { if (drawsegs != nullptr) { @@ -82,7 +60,7 @@ namespace swrenderer } } - void R_ClearDrawSegs() + void DrawSegmentList::Clear() { if (drawsegs == nullptr) { @@ -94,7 +72,7 @@ namespace swrenderer ds_p = drawsegs; } - DrawSegment *R_AddDrawSegment() + DrawSegment *DrawSegmentList::Add() { if (ds_p == &drawsegs[MaxDrawSegs]) { // [RH] Grab some more drawsegs @@ -110,8 +88,10 @@ namespace swrenderer return ds_p++; } + ///////////////////////////////////////////////////////////////////////// + // Clip a midtexture to the floor and ceiling of the sector in front of it. - void ClipMidtex(int x1, int x2) + void RenderDrawSegment::ClipMidtex(int x1, int x2) { ProjectedWallLine most; @@ -131,7 +111,7 @@ namespace swrenderer } } - void R_GetMaskedWallTopBottom(DrawSegment *ds, double &top, double &bot) + void RenderDrawSegment::GetMaskedWallTopBottom(DrawSegment *ds, double &top, double &bot) { double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); @@ -151,7 +131,7 @@ namespace swrenderer } } - void R_RenderMaskedSegRange(DrawSegment *ds, int x1, int x2) + void RenderDrawSegment::Render(DrawSegment *ds, int x1, int x2) { float *MaskedSWall = nullptr, MaskedScaleY = 0, rw_scalestep = 0; fixed_t *maskedtexturecol = nullptr; @@ -448,7 +428,7 @@ namespace swrenderer rw_pic = tex; double top, bot; - R_GetMaskedWallTopBottom(ds, top, bot); + GetMaskedWallTopBottom(ds, top, bot); RenderWallPart renderWallpart; renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); @@ -457,7 +437,7 @@ namespace swrenderer clearfog: if (ds->bFakeBoundary & 3) { - R_RenderFakeWallRange(ds, x1, x2, wallshade); + RenderFakeWallRange(ds, x1, x2, wallshade); } if (!notrelevant) { @@ -478,7 +458,7 @@ namespace swrenderer } // kg3D - render one fake wall - void R_RenderFakeWall(DrawSegment *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap) + void RenderDrawSegment::RenderFakeWall(DrawSegment *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap) { int i; double xscale; @@ -578,14 +558,14 @@ namespace swrenderer walltexcoords.ProjectPos(curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2, WallT); double top, bot; - R_GetMaskedWallTopBottom(ds, top, bot); + GetMaskedWallTopBottom(ds, top, bot); RenderWallPart renderWallpart; renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, wallupper.ScreenY, walllower.ScreenY, texturemid, MaskedSWall, walltexcoords.UPos, yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); } // kg3D - walls of fake floors - void R_RenderFakeWallRange(DrawSegment *ds, int x1, int x2, int wallshade) + void RenderDrawSegment::RenderFakeWallRange(DrawSegment *ds, int x1, int x2, int wallshade) { FTexture *const DONT_DRAW = ((FTexture*)(intptr_t)-1); int i, j; @@ -800,7 +780,7 @@ namespace swrenderer } if (rw_pic != DONT_DRAW) { - R_RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade, basecolormap); + RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade, basecolormap); } else rw_pic = nullptr; break; @@ -975,7 +955,7 @@ namespace swrenderer if (rw_pic != DONT_DRAW) { - R_RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade, basecolormap); + RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade, basecolormap); } else { diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index 65af091cba..a56dae7729 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -46,21 +46,53 @@ namespace swrenderer int fake; // ident fake drawseg, don't draw and clip sprites backups int CurrentPortalUniq; // [ZZ] to identify the portal that this drawseg is in. used for sprite clipping. }; - - extern DrawSegment *firstdrawseg; - extern DrawSegment *ds_p; - extern DrawSegment *drawsegs; - extern TArray InterestingDrawsegs; // drawsegs that have something drawn on them - extern size_t FirstInterestingDrawseg; - - void R_ClearDrawSegs(); - void R_FreeDrawSegs(); + class DrawSegmentList + { + public: + static DrawSegmentList *Instance(); - DrawSegment *R_AddDrawSegment(); - void ClipMidtex(int x1, int x2); - void R_RenderMaskedSegRange(DrawSegment *ds, int x1, int x2); - void R_RenderFakeWall(DrawSegment *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap); - void R_RenderFakeWallRange(DrawSegment *ds, int x1, int x2, int wallshade); - void R_GetMaskedWallTopBottom(DrawSegment *ds, double &top, double &bot); + DrawSegment *firstdrawseg = nullptr; + DrawSegment *ds_p = nullptr; + DrawSegment *drawsegs = nullptr; + + TArray InterestingDrawsegs; // drawsegs that have something drawn on them + size_t FirstInterestingDrawseg = 0; + + void Clear(); + void Deinit(); + + DrawSegment *Add(); + + private: + size_t MaxDrawSegs = 0; + }; + + class RenderDrawSegment + { + public: + void Render(DrawSegment *ds, int x1, int x2); + + private: + void ClipMidtex(int x1, int x2); + void RenderFakeWall(DrawSegment *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap); + void RenderFakeWallRange(DrawSegment *ds, int x1, int x2, int wallshade); + void GetMaskedWallTopBottom(DrawSegment *ds, double &top, double &bot); + + sector_t *frontsector = nullptr; + sector_t *backsector = nullptr; + + seg_t *curline = nullptr; + + FWallCoords WallC; + FWallTmapVals WallT; + + float rw_light = 0.0f; + float rw_lightstep = 0.0f; + fixed_t rw_offset = 0; + FTexture *rw_pic = nullptr; + + ProjectedWallLine wallupper; + ProjectedWallLine walllower; + }; } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 2867708b70..919f1c08c9 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -261,9 +261,10 @@ namespace swrenderer { // Draw any masked textures behind this particle so that when the // particle is drawn, it will be in front of them. - for (unsigned int p = InterestingDrawsegs.Size(); p-- > FirstInterestingDrawseg; ) + DrawSegmentList *segmentlist = DrawSegmentList::Instance(); + for (unsigned int p = segmentlist->InterestingDrawsegs.Size(); p-- > segmentlist->FirstInterestingDrawseg; ) { - DrawSegment *ds = &drawsegs[InterestingDrawsegs[p]]; + DrawSegment *ds = &segmentlist->drawsegs[segmentlist->InterestingDrawsegs[p]]; // kg3D - no fake segs if (ds->fake) continue; if (ds->x1 >= x2 || ds->x2 <= x1) @@ -274,7 +275,10 @@ namespace swrenderer { // [ZZ] only draw stuff that's inside the same portal as the particle, other portals will care for themselves if (ds->CurrentPortalUniq == CurrentPortalUniq) - R_RenderMaskedSegRange(ds, MAX(ds->x1, x1), MIN(ds->x2, x2)); + { + RenderDrawSegment renderer; + renderer.Render(ds, MAX(ds->x1, x1), MIN(ds->x2, x2)); + } } } } diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index f1e8faadd8..ff70344f52 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -282,7 +282,8 @@ namespace swrenderer // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code - for (ds = ds_p; ds-- > firstdrawseg; ) // new -- killough + DrawSegmentList *segmentlist = DrawSegmentList::Instance(); + for (ds = segmentlist->ds_p; ds-- > segmentlist->firstdrawseg; ) // new -- killough { // [ZZ] portal handling here //if (ds->CurrentPortalUniq != spr->CurrentPortalUniq) @@ -327,7 +328,10 @@ namespace swrenderer // seg is behind sprite, so draw the mid texture if it has one if (ds->CurrentPortalUniq == renderportal->CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here (ds->maskedtexturecol != nullptr || ds->bFogBoundary)) - R_RenderMaskedSegRange(ds, r1, r2); + { + RenderDrawSegment renderer; + renderer.Render(ds, r1, r2); + } continue; } From 29dcea49d716dfe5ff72fdc766e7877eba5bd2b2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 26 Jan 2017 08:13:39 +0100 Subject: [PATCH 759/912] Move RenderDrawSegment to its own file --- src/CMakeLists.txt | 1 + src/swrenderer/line/r_renderdrawsegment.cpp | 925 ++++++++++++++++++++ src/swrenderer/line/r_renderdrawsegment.h | 47 + src/swrenderer/scene/r_translucent_pass.cpp | 1 + src/swrenderer/segments/r_drawsegment.cpp | 881 ------------------- src/swrenderer/segments/r_drawsegment.h | 28 - src/swrenderer/things/r_particle.cpp | 17 +- src/swrenderer/things/r_visiblesprite.cpp | 1 + src/swrenderer/things/r_voxel.h | 2 + 9 files changed, 986 insertions(+), 917 deletions(-) create mode 100644 src/swrenderer/line/r_renderdrawsegment.cpp create mode 100644 src/swrenderer/line/r_renderdrawsegment.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b5c2cc949c..47c4f7e585 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -863,6 +863,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/line/r_walldraw.cpp swrenderer/line/r_wallsetup.cpp swrenderer/line/r_fogboundary.cpp + swrenderer/line/r_renderdrawsegment.cpp swrenderer/segments/r_clipsegment.cpp swrenderer/segments/r_drawsegment.cpp swrenderer/segments/r_portalsegment.cpp diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp new file mode 100644 index 0000000000..8e61606934 --- /dev/null +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -0,0 +1,925 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include "templates.h" +#include "doomdef.h" +#include "m_bbox.h" +#include "i_system.h" +#include "p_lnspec.h" +#include "p_setup.h" +#include "a_sharedglobal.h" +#include "g_level.h" +#include "p_effect.h" +#include "doomstat.h" +#include "r_state.h" +#include "v_palette.h" +#include "r_sky.h" +#include "po_man.h" +#include "r_data/colormaps.h" +#include "d_net.h" +#include "swrenderer/r_memory.h" +#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_opaque_pass.h" +#include "swrenderer/scene/r_portal.h" +#include "swrenderer/line/r_wallsetup.h" +#include "swrenderer/line/r_walldraw.h" +#include "swrenderer/line/r_fogboundary.h" +#include "swrenderer/line/r_renderdrawsegment.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/things/r_visiblesprite.h" +#include "swrenderer/scene/r_light.h" +#include "swrenderer/scene/r_viewport.h" + +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); + +namespace swrenderer +{ + void RenderDrawSegment::Render(DrawSegment *ds, int x1, int x2) + { + float *MaskedSWall = nullptr, MaskedScaleY = 0, rw_scalestep = 0; + fixed_t *maskedtexturecol = nullptr; + + FTexture *tex; + int i; + sector_t tempsec; // killough 4/13/98 + double texheight, texheightscale; + bool notrelevant = false; + double rowoffset; + bool wrap = false; + + const sector_t *sec; + + bool sprflipvert = false; + + curline = ds->curline; + + FDynamicColormap *patchstylecolormap = nullptr; + bool visible = R_SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], + (float)MIN(curline->linedef->alpha, 1.), 0, 0, patchstylecolormap); + + if (!visible && !ds->bFogBoundary && !ds->bFakeBoundary) + { + return; + } + + NetUpdate(); + + frontsector = curline->frontsector; + backsector = curline->backsector; + + tex = TexMan(curline->sidedef->GetTexture(side_t::mid), true); + if (i_compatflags & COMPATF_MASKEDMIDTEX) + { + tex = tex->GetRawTexture(); + } + + // killough 4/13/98: get correct lightlevel for 2s normal textures + sec = RenderOpaquePass::Instance()->FakeFlat(frontsector, &tempsec, nullptr, nullptr, nullptr, 0, 0, 0, 0); + + FDynamicColormap *basecolormap = sec->ColorMap; // [RH] Set basecolormap + + int wallshade = ds->shade; + rw_lightstep = ds->lightstep; + rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + + if (fixedlightlev < 0) + { + if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) + { + clip3d->sclipTop = sec->ceilingplane.ZatPoint(ViewPos); + } + for (i = frontsector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) + { + if (clip3d->sclipTop <= frontsector->e->XFloor.lightlist[i].plane.Zat0()) + { + lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); + break; + } + } + } + + short *mfloorclip = ds->sprbottomclip - ds->x1; + short *mceilingclip = ds->sprtopclip - ds->x1; + double spryscale; + + // [RH] Draw fog partition + if (ds->bFogBoundary) + { + RenderFogBoundary::Render(x1, x2, mceilingclip, mfloorclip, wallshade, rw_light, rw_lightstep, basecolormap); + if (ds->maskedtexturecol == nullptr) + { + goto clearfog; + } + } + if ((ds->bFakeBoundary && !(ds->bFakeBoundary & 4)) || !visible) + { + goto clearfog; + } + + MaskedSWall = ds->swall - ds->x1; + MaskedScaleY = ds->yscale; + maskedtexturecol = ds->maskedtexturecol - ds->x1; + spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); + rw_scalestep = ds->iscalestep; + + if (fixedlightlev >= 0) + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + else if (fixedcolormap != nullptr) + R_SetColorMapLight(fixedcolormap, 0, 0); + + // find positioning + texheight = tex->GetScaledHeightDouble(); + texheightscale = fabs(curline->sidedef->GetTextureYScale(side_t::mid)); + if (texheightscale != 1) + { + texheight = texheight / texheightscale; + } + + double texturemid; + if (curline->linedef->flags & ML_DONTPEGBOTTOM) + { + texturemid = MAX(frontsector->GetPlaneTexZ(sector_t::floor), backsector->GetPlaneTexZ(sector_t::floor)) + texheight; + } + else + { + texturemid = MIN(frontsector->GetPlaneTexZ(sector_t::ceiling), backsector->GetPlaneTexZ(sector_t::ceiling)); + } + + rowoffset = curline->sidedef->GetTextureYOffset(side_t::mid); + + wrap = (curline->linedef->flags & ML_WRAP_MIDTEX) || (curline->sidedef->Flags & WALLF_WRAP_MIDTEX); + if (!wrap) + { // Texture does not wrap vertically. + double textop; + + if (MaskedScaleY < 0) + { + MaskedScaleY = -MaskedScaleY; + sprflipvert = true; + } + if (tex->bWorldPanning) + { + // rowoffset is added before the multiply so that the masked texture will + // still be positioned in world units rather than texels. + texturemid += rowoffset - ViewPos.Z; + textop = texturemid; + texturemid *= MaskedScaleY; + } + else + { + // rowoffset is added outside the multiply so that it positions the texture + // by texels instead of world units. + textop = texturemid + rowoffset / MaskedScaleY - ViewPos.Z; + texturemid = (texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; + } + if (sprflipvert) + { + MaskedScaleY = -MaskedScaleY; + texturemid -= tex->GetHeight() << FRACBITS; + } + + // [RH] Don't bother drawing segs that are completely offscreen + if (globaldclip * ds->sz1 < -textop && globaldclip * ds->sz2 < -textop) + { // Texture top is below the bottom of the screen + goto clearfog; + } + + if (globaluclip * ds->sz1 > texheight - textop && globaluclip * ds->sz2 > texheight - textop) + { // Texture bottom is above the top of the screen + goto clearfog; + } + + if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && textop < clip3d->sclipBottom - ViewPos.Z) + { + notrelevant = true; + goto clearfog; + } + if ((clip3d->fake3D & FAKE3D_CLIPTOP) && textop - texheight > clip3d->sclipTop - ViewPos.Z) + { + notrelevant = true; + goto clearfog; + } + + WallC.sz1 = ds->sz1; + WallC.sz2 = ds->sz2; + WallC.sx1 = ds->sx1; + WallC.sx2 = ds->sx2; + + if (clip3d->fake3D & FAKE3D_CLIPTOP) + { + wallupper.Project(textop < clip3d->sclipTop - ViewPos.Z ? textop : clip3d->sclipTop - ViewPos.Z, &WallC); + } + else + { + wallupper.Project(textop, &WallC); + } + if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) + { + walllower.Project(textop - texheight > clip3d->sclipBottom - ViewPos.Z ? textop - texheight : clip3d->sclipBottom - ViewPos.Z, &WallC); + } + else + { + walllower.Project(textop - texheight, &WallC); + } + + for (i = x1; i < x2; i++) + { + if (wallupper.ScreenY[i] < mceilingclip[i]) + wallupper.ScreenY[i] = mceilingclip[i]; + } + for (i = x1; i < x2; i++) + { + if (walllower.ScreenY[i] > mfloorclip[i]) + walllower.ScreenY[i] = mfloorclip[i]; + } + + if (clip3d->CurrentSkybox) + { // Midtex clipping doesn't work properly with skyboxes, since you're normally below the floor + // or above the ceiling, so the appropriate end won't be clipped automatically when adding + // this drawseg. + if ((curline->linedef->flags & ML_CLIP_MIDTEX) || + (curline->sidedef->Flags & WALLF_CLIP_MIDTEX)) + { + ClipMidtex(x1, x2); + } + } + + mfloorclip = walllower.ScreenY; + mceilingclip = wallupper.ScreenY; + + // draw the columns one at a time + if (visible) + { + for (int x = x1; x < x2; ++x) + { + if (fixedcolormap == nullptr && fixedlightlev < 0) + { + R_SetColorMapLight(basecolormap, rw_light, wallshade); + } + + fixed_t iscale = xs_Fix<16>::ToFix(MaskedSWall[x] * MaskedScaleY); + double sprtopscreen; + if (sprflipvert) + sprtopscreen = CenterY + texturemid * spryscale; + else + sprtopscreen = CenterY - texturemid * spryscale; + + R_DrawMaskedColumn(x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + + rw_light += rw_lightstep; + spryscale += rw_scalestep; + } + } + } + else + { // Texture does wrap vertically. + if (tex->bWorldPanning) + { + // rowoffset is added before the multiply so that the masked texture will + // still be positioned in world units rather than texels. + texturemid = (texturemid - ViewPos.Z + rowoffset) * MaskedScaleY; + } + else + { + // rowoffset is added outside the multiply so that it positions the texture + // by texels instead of world units. + texturemid = (texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; + } + + WallC.sz1 = ds->sz1; + WallC.sz2 = ds->sz2; + WallC.sx1 = ds->sx1; + WallC.sx2 = ds->sx2; + + if (clip3d->CurrentSkybox) + { // Midtex clipping doesn't work properly with skyboxes, since you're normally below the floor + // or above the ceiling, so the appropriate end won't be clipped automatically when adding + // this drawseg. + if ((curline->linedef->flags & ML_CLIP_MIDTEX) || + (curline->sidedef->Flags & WALLF_CLIP_MIDTEX)) + { + ClipMidtex(x1, x2); + } + } + + if (clip3d->fake3D & FAKE3D_CLIPTOP) + { + wallupper.Project(clip3d->sclipTop - ViewPos.Z, &WallC); + for (i = x1; i < x2; i++) + { + if (wallupper.ScreenY[i] < mceilingclip[i]) + wallupper.ScreenY[i] = mceilingclip[i]; + } + mceilingclip = wallupper.ScreenY; + } + if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) + { + walllower.Project(clip3d->sclipBottom - ViewPos.Z, &WallC); + for (i = x1; i < x2; i++) + { + if (walllower.ScreenY[i] > mfloorclip[i]) + walllower.ScreenY[i] = mfloorclip[i]; + } + mfloorclip = walllower.ScreenY; + } + + rw_offset = 0; + rw_pic = tex; + + double top, bot; + GetMaskedWallTopBottom(ds, top, bot); + + RenderWallPart renderWallpart; + renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); + } + + clearfog: + if (ds->bFakeBoundary & 3) + { + RenderFakeWallRange(ds, x1, x2, wallshade); + } + if (!notrelevant) + { + if (clip3d->fake3D & FAKE3D_REFRESHCLIP) + { + if (!wrap) + { + assert(ds->bkup != nullptr); + memcpy(ds->sprtopclip, ds->bkup, (ds->x2 - ds->x1) * 2); + } + } + else + { + fillshort(ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); + } + } + return; + } + + // kg3D - render one fake wall + void RenderDrawSegment::RenderFakeWall(DrawSegment *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap) + { + int i; + double xscale; + double yscale; + + fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); + bool visible = R_SetPatchStyle(LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], + Alpha, 0, 0, basecolormap); + + if (!visible) + return; + + rw_lightstep = ds->lightstep; + rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; + + short *mfloorclip = ds->sprbottomclip - ds->x1; + short *mceilingclip = ds->sprtopclip - ds->x1; + + //double spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); + float *MaskedSWall = ds->swall - ds->x1; + + // find positioning + side_t *scaledside; + side_t::ETexpart scaledpart; + if (rover->flags & FF_UPPERTEXTURE) + { + scaledside = curline->sidedef; + scaledpart = side_t::top; + } + else if (rover->flags & FF_LOWERTEXTURE) + { + scaledside = curline->sidedef; + scaledpart = side_t::bottom; + } + else + { + scaledside = rover->master->sidedef[0]; + scaledpart = side_t::mid; + } + xscale = rw_pic->Scale.X * scaledside->GetTextureXScale(scaledpart); + yscale = rw_pic->Scale.Y * scaledside->GetTextureYScale(scaledpart); + + double rowoffset = curline->sidedef->GetTextureYOffset(side_t::mid) + rover->master->sidedef[0]->GetTextureYOffset(side_t::mid); + double planez = rover->model->GetPlaneTexZ(sector_t::ceiling); + rw_offset = FLOAT2FIXED(curline->sidedef->GetTextureXOffset(side_t::mid) + rover->master->sidedef[0]->GetTextureXOffset(side_t::mid)); + if (rowoffset < 0) + { + rowoffset += rw_pic->GetHeight(); + } + double texturemid = (planez - ViewPos.Z) * yscale; + if (rw_pic->bWorldPanning) + { + // rowoffset is added before the multiply so that the masked texture will + // still be positioned in world units rather than texels. + + texturemid = texturemid + rowoffset * yscale; + rw_offset = xs_RoundToInt(rw_offset * xscale); + } + else + { + // rowoffset is added outside the multiply so that it positions the texture + // by texels instead of world units. + texturemid += rowoffset; + } + + if (fixedlightlev >= 0) + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + else if (fixedcolormap != nullptr) + R_SetColorMapLight(fixedcolormap, 0, 0); + + WallC.sz1 = ds->sz1; + WallC.sz2 = ds->sz2; + WallC.sx1 = ds->sx1; + WallC.sx2 = ds->sx2; + WallC.tleft.X = ds->cx; + WallC.tleft.Y = ds->cy; + WallC.tright.X = ds->cx + ds->cdx; + WallC.tright.Y = ds->cy + ds->cdy; + WallT = ds->tmapvals; + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + wallupper.Project(clip3d->sclipTop - ViewPos.Z, &WallC); + walllower.Project(clip3d->sclipBottom - ViewPos.Z, &WallC); + + for (i = x1; i < x2; i++) + { + if (wallupper.ScreenY[i] < mceilingclip[i]) + wallupper.ScreenY[i] = mceilingclip[i]; + } + for (i = x1; i < x2; i++) + { + if (walllower.ScreenY[i] > mfloorclip[i]) + walllower.ScreenY[i] = mfloorclip[i]; + } + + ProjectedWallTexcoords walltexcoords; + walltexcoords.ProjectPos(curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2, WallT); + + double top, bot; + GetMaskedWallTopBottom(ds, top, bot); + + RenderWallPart renderWallpart; + renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, wallupper.ScreenY, walllower.ScreenY, texturemid, MaskedSWall, walltexcoords.UPos, yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); + } + + // kg3D - walls of fake floors + void RenderDrawSegment::RenderFakeWallRange(DrawSegment *ds, int x1, int x2, int wallshade) + { + FTexture *const DONT_DRAW = ((FTexture*)(intptr_t)-1); + int i, j; + F3DFloor *rover, *fover = nullptr; + int passed, last; + double floorHeight; + double ceilingHeight; + + curline = ds->curline; + + frontsector = curline->frontsector; + backsector = curline->backsector; + + if (backsector == nullptr) + { + return; + } + if ((ds->bFakeBoundary & 3) == 2) + { + sector_t *sec = backsector; + backsector = frontsector; + frontsector = sec; + } + + floorHeight = backsector->CenterFloor(); + ceilingHeight = backsector->CenterCeiling(); + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + + // maybe fix clipheights + if (!(clip3d->fake3D & FAKE3D_CLIPBOTTOM)) clip3d->sclipBottom = floorHeight; + if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) clip3d->sclipTop = ceilingHeight; + + // maybe not visible + if (clip3d->sclipBottom >= frontsector->CenterCeiling()) return; + if (clip3d->sclipTop <= frontsector->CenterFloor()) return; + + if (clip3d->fake3D & FAKE3D_DOWN2UP) + { // bottom to viewz + last = 0; + for (i = backsector->e->XFloor.ffloors.Size() - 1; i >= 0; i--) + { + rover = backsector->e->XFloor.ffloors[i]; + if (!(rover->flags & FF_EXISTS)) continue; + + // visible? + passed = 0; + if (!(rover->flags & FF_RENDERSIDES) || rover->top.plane->isSlope() || rover->bottom.plane->isSlope() || + rover->top.plane->Zat0() <= clip3d->sclipBottom || + rover->bottom.plane->Zat0() >= ceilingHeight || + rover->top.plane->Zat0() <= floorHeight) + { + if (!i) + { + passed = 1; + } + else + { + continue; + } + } + + rw_pic = nullptr; + if (rover->bottom.plane->Zat0() >= clip3d->sclipTop || passed) + { + if (last) + { + break; + } + // maybe wall from inside rendering? + fover = nullptr; + for (j = frontsector->e->XFloor.ffloors.Size() - 1; j >= 0; j--) + { + fover = frontsector->e->XFloor.ffloors[j]; + if (fover->model == rover->model) + { // never + fover = nullptr; + break; + } + if (!(fover->flags & FF_EXISTS)) continue; + if (!(fover->flags & FF_RENDERSIDES)) continue; + // no sloped walls, it's bugged + if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; + + // visible? + if (fover->top.plane->Zat0() <= clip3d->sclipBottom) continue; // no + if (fover->bottom.plane->Zat0() >= clip3d->sclipTop) + { // no, last possible + fover = nullptr; + break; + } + // it is, render inside? + if (!(fover->flags & (FF_BOTHPLANES | FF_INVERTPLANES))) + { // no + fover = nullptr; + } + break; + } + // nothing + if (!fover || j == -1) + { + break; + } + // correct texture + if (fover->flags & rover->flags & FF_SWIMMABLE) + { // don't ever draw (but treat as something has been found) + rw_pic = DONT_DRAW; + } + else if (fover->flags & FF_UPPERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); + } + else if (fover->flags & FF_LOWERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); + } + else + { + rw_pic = TexMan(fover->master->sidedef[0]->GetTexture(side_t::mid), true); + } + } + else if (frontsector->e->XFloor.ffloors.Size()) + { + // maybe not visible? + fover = nullptr; + for (j = frontsector->e->XFloor.ffloors.Size() - 1; j >= 0; j--) + { + fover = frontsector->e->XFloor.ffloors[j]; + if (fover->model == rover->model) // never + { + break; + } + if (!(fover->flags & FF_EXISTS)) continue; + if (!(fover->flags & FF_RENDERSIDES)) continue; + // no sloped walls, it's bugged + if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; + + // visible? + if (fover->top.plane->Zat0() <= clip3d->sclipBottom) continue; // no + if (fover->bottom.plane->Zat0() >= clip3d->sclipTop) + { // visible, last possible + fover = nullptr; + break; + } + if ((fover->flags & FF_SOLID) == (rover->flags & FF_SOLID) && + !(!(fover->flags & FF_SOLID) && (fover->alpha == 255 || rover->alpha == 255)) + ) + { + break; + } + if (fover->flags & rover->flags & FF_SWIMMABLE) + { // don't ever draw (but treat as something has been found) + rw_pic = DONT_DRAW; + } + fover = nullptr; // visible + break; + } + if (fover && j != -1) + { + fover = nullptr; + last = 1; + continue; // not visible + } + } + if (!rw_pic) + { + fover = nullptr; + if (rover->flags & FF_UPPERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); + } + else if (rover->flags & FF_LOWERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); + } + else + { + rw_pic = TexMan(rover->master->sidedef[0]->GetTexture(side_t::mid), true); + } + } + // correct colors now + FDynamicColormap *basecolormap = frontsector->ColorMap; + wallshade = ds->shade; + if (fixedlightlev < 0) + { + if ((ds->bFakeBoundary & 3) == 2) + { + for (j = backsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) + { + if (clip3d->sclipTop <= backsector->e->XFloor.lightlist[j].plane.Zat0()) + { + lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); + break; + } + } + } + else + { + for (j = frontsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) + { + if (clip3d->sclipTop <= frontsector->e->XFloor.lightlist[j].plane.Zat0()) + { + lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); + break; + } + } + } + } + if (rw_pic != DONT_DRAW) + { + RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade, basecolormap); + } + else rw_pic = nullptr; + break; + } + } + else + { // top to viewz + for (i = 0; i < (int)backsector->e->XFloor.ffloors.Size(); i++) + { + rover = backsector->e->XFloor.ffloors[i]; + if (!(rover->flags & FF_EXISTS)) continue; + + // visible? + passed = 0; + if (!(rover->flags & FF_RENDERSIDES) || + rover->top.plane->isSlope() || rover->bottom.plane->isSlope() || + rover->bottom.plane->Zat0() >= clip3d->sclipTop || + rover->top.plane->Zat0() <= floorHeight || + rover->bottom.plane->Zat0() >= ceilingHeight) + { + if ((unsigned)i == backsector->e->XFloor.ffloors.Size() - 1) + { + passed = 1; + } + else + { + continue; + } + } + rw_pic = nullptr; + if (rover->top.plane->Zat0() <= clip3d->sclipBottom || passed) + { // maybe wall from inside rendering? + fover = nullptr; + for (j = 0; j < (int)frontsector->e->XFloor.ffloors.Size(); j++) + { + fover = frontsector->e->XFloor.ffloors[j]; + if (fover->model == rover->model) + { // never + fover = nullptr; + break; + } + if (!(fover->flags & FF_EXISTS)) continue; + if (!(fover->flags & FF_RENDERSIDES)) continue; + // no sloped walls, it's bugged + if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; + + // visible? + if (fover->bottom.plane->Zat0() >= clip3d->sclipTop) continue; // no + if (fover->top.plane->Zat0() <= clip3d->sclipBottom) + { // no, last possible + fover = nullptr; + break; + } + // it is, render inside? + if (!(fover->flags & (FF_BOTHPLANES | FF_INVERTPLANES))) + { // no + fover = nullptr; + } + break; + } + // nothing + if (!fover || (unsigned)j == frontsector->e->XFloor.ffloors.Size()) + { + break; + } + // correct texture + if (fover->flags & rover->flags & FF_SWIMMABLE) + { + rw_pic = DONT_DRAW; // don't ever draw (but treat as something has been found) + } + else if (fover->flags & FF_UPPERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); + } + else if (fover->flags & FF_LOWERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); + } + else + { + rw_pic = TexMan(fover->master->sidedef[0]->GetTexture(side_t::mid), true); + } + } + else if (frontsector->e->XFloor.ffloors.Size()) + { // maybe not visible? + fover = nullptr; + for (j = 0; j < (int)frontsector->e->XFloor.ffloors.Size(); j++) + { + fover = frontsector->e->XFloor.ffloors[j]; + if (fover->model == rover->model) + { // never + break; + } + if (!(fover->flags & FF_EXISTS)) continue; + if (!(fover->flags & FF_RENDERSIDES)) continue; + // no sloped walls, its bugged + if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; + + // visible? + if (fover->bottom.plane->Zat0() >= clip3d->sclipTop) continue; // no + if (fover->top.plane->Zat0() <= clip3d->sclipBottom) + { // visible, last possible + fover = nullptr; + break; + } + if ((fover->flags & FF_SOLID) == (rover->flags & FF_SOLID) && + !(!(rover->flags & FF_SOLID) && (fover->alpha == 255 || rover->alpha == 255)) + ) + { + break; + } + if (fover->flags & rover->flags & FF_SWIMMABLE) + { // don't ever draw (but treat as something has been found) + rw_pic = DONT_DRAW; + } + fover = nullptr; // visible + break; + } + if (fover && (unsigned)j != frontsector->e->XFloor.ffloors.Size()) + { // not visible + break; + } + } + if (rw_pic == nullptr) + { + fover = nullptr; + if (rover->flags & FF_UPPERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); + } + else if (rover->flags & FF_LOWERTEXTURE) + { + rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); + } + else + { + rw_pic = TexMan(rover->master->sidedef[0]->GetTexture(side_t::mid), true); + } + } + // correct colors now + FDynamicColormap *basecolormap = frontsector->ColorMap; + wallshade = ds->shade; + if (fixedlightlev < 0) + { + if ((ds->bFakeBoundary & 3) == 2) + { + for (j = backsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) + { + if (clip3d->sclipTop <= backsector->e->XFloor.lightlist[j].plane.Zat0()) + { + lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); + break; + } + } + } + else + { + for (j = frontsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) + { + if (clip3d->sclipTop <= frontsector->e->XFloor.lightlist[j].plane.Zat0()) + { + lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); + break; + } + } + } + } + + if (rw_pic != DONT_DRAW) + { + RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade, basecolormap); + } + else + { + rw_pic = nullptr; + } + break; + } + } + return; + } + + // Clip a midtexture to the floor and ceiling of the sector in front of it. + void RenderDrawSegment::ClipMidtex(int x1, int x2) + { + ProjectedWallLine most; + + RenderPortal *renderportal = RenderPortal::Instance(); + + most.Project(curline->frontsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + for (int i = x1; i < x2; ++i) + { + if (wallupper.ScreenY[i] < most.ScreenY[i]) + wallupper.ScreenY[i] = most.ScreenY[i]; + } + most.Project(curline->frontsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + for (int i = x1; i < x2; ++i) + { + if (walllower.ScreenY[i] > most.ScreenY[i]) + walllower.ScreenY[i] = most.ScreenY[i]; + } + } + + void RenderDrawSegment::GetMaskedWallTopBottom(DrawSegment *ds, double &top, double &bot) + { + double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); + double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); + double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); + double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); + top = MAX(frontcz1, frontcz2); + bot = MIN(frontfz1, frontfz2); + + Clip3DFloors *clip3d = Clip3DFloors::Instance(); + if (clip3d->fake3D & FAKE3D_CLIPTOP) + { + top = MIN(top, clip3d->sclipTop); + } + if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) + { + bot = MAX(bot, clip3d->sclipBottom); + } + } +} diff --git a/src/swrenderer/line/r_renderdrawsegment.h b/src/swrenderer/line/r_renderdrawsegment.h new file mode 100644 index 0000000000..b3a3ee6c00 --- /dev/null +++ b/src/swrenderer/line/r_renderdrawsegment.h @@ -0,0 +1,47 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#pragma once + +#include "swrenderer/segments/r_drawsegment.h" + +namespace swrenderer +{ + class RenderDrawSegment + { + public: + void Render(DrawSegment *ds, int x1, int x2); + + private: + void ClipMidtex(int x1, int x2); + void RenderFakeWall(DrawSegment *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap); + void RenderFakeWallRange(DrawSegment *ds, int x1, int x2, int wallshade); + void GetMaskedWallTopBottom(DrawSegment *ds, double &top, double &bot); + + sector_t *frontsector = nullptr; + sector_t *backsector = nullptr; + + seg_t *curline = nullptr; + + FWallCoords WallC; + FWallTmapVals WallT; + + float rw_light = 0.0f; + float rw_lightstep = 0.0f; + fixed_t rw_offset = 0; + FTexture *rw_pic = nullptr; + + ProjectedWallLine wallupper; + ProjectedWallLine walllower; + }; +} diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index d95fda1e0b..21e9a50dfd 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -36,6 +36,7 @@ #include "swrenderer/scene/r_light.h" #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/plane/r_visibleplanelist.h" +#include "swrenderer/line/r_renderdrawsegment.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Int, r_drawfuzz) diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 586195aa4e..a9e1479024 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -41,8 +41,6 @@ #include "swrenderer/scene/r_light.h" #include "swrenderer/scene/r_viewport.h" -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); - namespace swrenderer { DrawSegmentList *DrawSegmentList::Instance() @@ -87,883 +85,4 @@ namespace swrenderer return ds_p++; } - - ///////////////////////////////////////////////////////////////////////// - - // Clip a midtexture to the floor and ceiling of the sector in front of it. - void RenderDrawSegment::ClipMidtex(int x1, int x2) - { - ProjectedWallLine most; - - RenderPortal *renderportal = RenderPortal::Instance(); - - most.Project(curline->frontsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); - for (int i = x1; i < x2; ++i) - { - if (wallupper.ScreenY[i] < most.ScreenY[i]) - wallupper.ScreenY[i] = most.ScreenY[i]; - } - most.Project(curline->frontsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); - for (int i = x1; i < x2; ++i) - { - if (walllower.ScreenY[i] > most.ScreenY[i]) - walllower.ScreenY[i] = most.ScreenY[i]; - } - } - - void RenderDrawSegment::GetMaskedWallTopBottom(DrawSegment *ds, double &top, double &bot) - { - double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); - double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); - double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); - double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); - top = MAX(frontcz1, frontcz2); - bot = MIN(frontfz1, frontfz2); - - Clip3DFloors *clip3d = Clip3DFloors::Instance(); - if (clip3d->fake3D & FAKE3D_CLIPTOP) - { - top = MIN(top, clip3d->sclipTop); - } - if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) - { - bot = MAX(bot, clip3d->sclipBottom); - } - } - - void RenderDrawSegment::Render(DrawSegment *ds, int x1, int x2) - { - float *MaskedSWall = nullptr, MaskedScaleY = 0, rw_scalestep = 0; - fixed_t *maskedtexturecol = nullptr; - - FTexture *tex; - int i; - sector_t tempsec; // killough 4/13/98 - double texheight, texheightscale; - bool notrelevant = false; - double rowoffset; - bool wrap = false; - - const sector_t *sec; - - bool sprflipvert = false; - - curline = ds->curline; - - FDynamicColormap *patchstylecolormap = nullptr; - bool visible = R_SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], - (float)MIN(curline->linedef->alpha, 1.), 0, 0, patchstylecolormap); - - if (!visible && !ds->bFogBoundary && !ds->bFakeBoundary) - { - return; - } - - NetUpdate(); - - frontsector = curline->frontsector; - backsector = curline->backsector; - - tex = TexMan(curline->sidedef->GetTexture(side_t::mid), true); - if (i_compatflags & COMPATF_MASKEDMIDTEX) - { - tex = tex->GetRawTexture(); - } - - // killough 4/13/98: get correct lightlevel for 2s normal textures - sec = RenderOpaquePass::Instance()->FakeFlat(frontsector, &tempsec, nullptr, nullptr, nullptr, 0, 0, 0, 0); - - FDynamicColormap *basecolormap = sec->ColorMap; // [RH] Set basecolormap - - int wallshade = ds->shade; - rw_lightstep = ds->lightstep; - rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; - - Clip3DFloors *clip3d = Clip3DFloors::Instance(); - - if (fixedlightlev < 0) - { - if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) - { - clip3d->sclipTop = sec->ceilingplane.ZatPoint(ViewPos); - } - for (i = frontsector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) - { - if (clip3d->sclipTop <= frontsector->e->XFloor.lightlist[i].plane.Zat0()) - { - lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); - break; - } - } - } - - short *mfloorclip = ds->sprbottomclip - ds->x1; - short *mceilingclip = ds->sprtopclip - ds->x1; - double spryscale; - - // [RH] Draw fog partition - if (ds->bFogBoundary) - { - RenderFogBoundary::Render(x1, x2, mceilingclip, mfloorclip, wallshade, rw_light, rw_lightstep, basecolormap); - if (ds->maskedtexturecol == nullptr) - { - goto clearfog; - } - } - if ((ds->bFakeBoundary && !(ds->bFakeBoundary & 4)) || !visible) - { - goto clearfog; - } - - MaskedSWall = ds->swall - ds->x1; - MaskedScaleY = ds->yscale; - maskedtexturecol = ds->maskedtexturecol - ds->x1; - spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); - rw_scalestep = ds->iscalestep; - - if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap != nullptr) - R_SetColorMapLight(fixedcolormap, 0, 0); - - // find positioning - texheight = tex->GetScaledHeightDouble(); - texheightscale = fabs(curline->sidedef->GetTextureYScale(side_t::mid)); - if (texheightscale != 1) - { - texheight = texheight / texheightscale; - } - - double texturemid; - if (curline->linedef->flags & ML_DONTPEGBOTTOM) - { - texturemid = MAX(frontsector->GetPlaneTexZ(sector_t::floor), backsector->GetPlaneTexZ(sector_t::floor)) + texheight; - } - else - { - texturemid = MIN(frontsector->GetPlaneTexZ(sector_t::ceiling), backsector->GetPlaneTexZ(sector_t::ceiling)); - } - - rowoffset = curline->sidedef->GetTextureYOffset(side_t::mid); - - wrap = (curline->linedef->flags & ML_WRAP_MIDTEX) || (curline->sidedef->Flags & WALLF_WRAP_MIDTEX); - if (!wrap) - { // Texture does not wrap vertically. - double textop; - - if (MaskedScaleY < 0) - { - MaskedScaleY = -MaskedScaleY; - sprflipvert = true; - } - if (tex->bWorldPanning) - { - // rowoffset is added before the multiply so that the masked texture will - // still be positioned in world units rather than texels. - texturemid += rowoffset - ViewPos.Z; - textop = texturemid; - texturemid *= MaskedScaleY; - } - else - { - // rowoffset is added outside the multiply so that it positions the texture - // by texels instead of world units. - textop = texturemid + rowoffset / MaskedScaleY - ViewPos.Z; - texturemid = (texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; - } - if (sprflipvert) - { - MaskedScaleY = -MaskedScaleY; - texturemid -= tex->GetHeight() << FRACBITS; - } - - // [RH] Don't bother drawing segs that are completely offscreen - if (globaldclip * ds->sz1 < -textop && globaldclip * ds->sz2 < -textop) - { // Texture top is below the bottom of the screen - goto clearfog; - } - - if (globaluclip * ds->sz1 > texheight - textop && globaluclip * ds->sz2 > texheight - textop) - { // Texture bottom is above the top of the screen - goto clearfog; - } - - if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && textop < clip3d->sclipBottom - ViewPos.Z) - { - notrelevant = true; - goto clearfog; - } - if ((clip3d->fake3D & FAKE3D_CLIPTOP) && textop - texheight > clip3d->sclipTop - ViewPos.Z) - { - notrelevant = true; - goto clearfog; - } - - WallC.sz1 = ds->sz1; - WallC.sz2 = ds->sz2; - WallC.sx1 = ds->sx1; - WallC.sx2 = ds->sx2; - - if (clip3d->fake3D & FAKE3D_CLIPTOP) - { - wallupper.Project(textop < clip3d->sclipTop - ViewPos.Z ? textop : clip3d->sclipTop - ViewPos.Z, &WallC); - } - else - { - wallupper.Project(textop, &WallC); - } - if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) - { - walllower.Project(textop - texheight > clip3d->sclipBottom - ViewPos.Z ? textop - texheight : clip3d->sclipBottom - ViewPos.Z, &WallC); - } - else - { - walllower.Project(textop - texheight, &WallC); - } - - for (i = x1; i < x2; i++) - { - if (wallupper.ScreenY[i] < mceilingclip[i]) - wallupper.ScreenY[i] = mceilingclip[i]; - } - for (i = x1; i < x2; i++) - { - if (walllower.ScreenY[i] > mfloorclip[i]) - walllower.ScreenY[i] = mfloorclip[i]; - } - - if (clip3d->CurrentSkybox) - { // Midtex clipping doesn't work properly with skyboxes, since you're normally below the floor - // or above the ceiling, so the appropriate end won't be clipped automatically when adding - // this drawseg. - if ((curline->linedef->flags & ML_CLIP_MIDTEX) || - (curline->sidedef->Flags & WALLF_CLIP_MIDTEX)) - { - ClipMidtex(x1, x2); - } - } - - mfloorclip = walllower.ScreenY; - mceilingclip = wallupper.ScreenY; - - // draw the columns one at a time - if (visible) - { - for (int x = x1; x < x2; ++x) - { - if (fixedcolormap == nullptr && fixedlightlev < 0) - { - R_SetColorMapLight(basecolormap, rw_light, wallshade); - } - - fixed_t iscale = xs_Fix<16>::ToFix(MaskedSWall[x] * MaskedScaleY); - double sprtopscreen; - if (sprflipvert) - sprtopscreen = CenterY + texturemid * spryscale; - else - sprtopscreen = CenterY - texturemid * spryscale; - - R_DrawMaskedColumn(x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); - - rw_light += rw_lightstep; - spryscale += rw_scalestep; - } - } - } - else - { // Texture does wrap vertically. - if (tex->bWorldPanning) - { - // rowoffset is added before the multiply so that the masked texture will - // still be positioned in world units rather than texels. - texturemid = (texturemid - ViewPos.Z + rowoffset) * MaskedScaleY; - } - else - { - // rowoffset is added outside the multiply so that it positions the texture - // by texels instead of world units. - texturemid = (texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; - } - - WallC.sz1 = ds->sz1; - WallC.sz2 = ds->sz2; - WallC.sx1 = ds->sx1; - WallC.sx2 = ds->sx2; - - if (clip3d->CurrentSkybox) - { // Midtex clipping doesn't work properly with skyboxes, since you're normally below the floor - // or above the ceiling, so the appropriate end won't be clipped automatically when adding - // this drawseg. - if ((curline->linedef->flags & ML_CLIP_MIDTEX) || - (curline->sidedef->Flags & WALLF_CLIP_MIDTEX)) - { - ClipMidtex(x1, x2); - } - } - - if (clip3d->fake3D & FAKE3D_CLIPTOP) - { - wallupper.Project(clip3d->sclipTop - ViewPos.Z, &WallC); - for (i = x1; i < x2; i++) - { - if (wallupper.ScreenY[i] < mceilingclip[i]) - wallupper.ScreenY[i] = mceilingclip[i]; - } - mceilingclip = wallupper.ScreenY; - } - if (clip3d->fake3D & FAKE3D_CLIPBOTTOM) - { - walllower.Project(clip3d->sclipBottom - ViewPos.Z, &WallC); - for (i = x1; i < x2; i++) - { - if (walllower.ScreenY[i] > mfloorclip[i]) - walllower.ScreenY[i] = mfloorclip[i]; - } - mfloorclip = walllower.ScreenY; - } - - rw_offset = 0; - rw_pic = tex; - - double top, bot; - GetMaskedWallTopBottom(ds, top, bot); - - RenderWallPart renderWallpart; - renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); - } - - clearfog: - if (ds->bFakeBoundary & 3) - { - RenderFakeWallRange(ds, x1, x2, wallshade); - } - if (!notrelevant) - { - if (clip3d->fake3D & FAKE3D_REFRESHCLIP) - { - if (!wrap) - { - assert(ds->bkup != nullptr); - memcpy(ds->sprtopclip, ds->bkup, (ds->x2 - ds->x1) * 2); - } - } - else - { - fillshort(ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); - } - } - return; - } - - // kg3D - render one fake wall - void RenderDrawSegment::RenderFakeWall(DrawSegment *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap) - { - int i; - double xscale; - double yscale; - - fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); - bool visible = R_SetPatchStyle(LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], - Alpha, 0, 0, basecolormap); - - if (!visible) - return; - - rw_lightstep = ds->lightstep; - rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; - - short *mfloorclip = ds->sprbottomclip - ds->x1; - short *mceilingclip = ds->sprtopclip - ds->x1; - - //double spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); - float *MaskedSWall = ds->swall - ds->x1; - - // find positioning - side_t *scaledside; - side_t::ETexpart scaledpart; - if (rover->flags & FF_UPPERTEXTURE) - { - scaledside = curline->sidedef; - scaledpart = side_t::top; - } - else if (rover->flags & FF_LOWERTEXTURE) - { - scaledside = curline->sidedef; - scaledpart = side_t::bottom; - } - else - { - scaledside = rover->master->sidedef[0]; - scaledpart = side_t::mid; - } - xscale = rw_pic->Scale.X * scaledside->GetTextureXScale(scaledpart); - yscale = rw_pic->Scale.Y * scaledside->GetTextureYScale(scaledpart); - - double rowoffset = curline->sidedef->GetTextureYOffset(side_t::mid) + rover->master->sidedef[0]->GetTextureYOffset(side_t::mid); - double planez = rover->model->GetPlaneTexZ(sector_t::ceiling); - rw_offset = FLOAT2FIXED(curline->sidedef->GetTextureXOffset(side_t::mid) + rover->master->sidedef[0]->GetTextureXOffset(side_t::mid)); - if (rowoffset < 0) - { - rowoffset += rw_pic->GetHeight(); - } - double texturemid = (planez - ViewPos.Z) * yscale; - if (rw_pic->bWorldPanning) - { - // rowoffset is added before the multiply so that the masked texture will - // still be positioned in world units rather than texels. - - texturemid = texturemid + rowoffset * yscale; - rw_offset = xs_RoundToInt(rw_offset * xscale); - } - else - { - // rowoffset is added outside the multiply so that it positions the texture - // by texels instead of world units. - texturemid += rowoffset; - } - - if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap != nullptr) - R_SetColorMapLight(fixedcolormap, 0, 0); - - WallC.sz1 = ds->sz1; - WallC.sz2 = ds->sz2; - WallC.sx1 = ds->sx1; - WallC.sx2 = ds->sx2; - WallC.tleft.X = ds->cx; - WallC.tleft.Y = ds->cy; - WallC.tright.X = ds->cx + ds->cdx; - WallC.tright.Y = ds->cy + ds->cdy; - WallT = ds->tmapvals; - - Clip3DFloors *clip3d = Clip3DFloors::Instance(); - wallupper.Project(clip3d->sclipTop - ViewPos.Z, &WallC); - walllower.Project(clip3d->sclipBottom - ViewPos.Z, &WallC); - - for (i = x1; i < x2; i++) - { - if (wallupper.ScreenY[i] < mceilingclip[i]) - wallupper.ScreenY[i] = mceilingclip[i]; - } - for (i = x1; i < x2; i++) - { - if (walllower.ScreenY[i] > mfloorclip[i]) - walllower.ScreenY[i] = mfloorclip[i]; - } - - ProjectedWallTexcoords walltexcoords; - walltexcoords.ProjectPos(curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2, WallT); - - double top, bot; - GetMaskedWallTopBottom(ds, top, bot); - - RenderWallPart renderWallpart; - renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, wallupper.ScreenY, walllower.ScreenY, texturemid, MaskedSWall, walltexcoords.UPos, yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); - } - - // kg3D - walls of fake floors - void RenderDrawSegment::RenderFakeWallRange(DrawSegment *ds, int x1, int x2, int wallshade) - { - FTexture *const DONT_DRAW = ((FTexture*)(intptr_t)-1); - int i, j; - F3DFloor *rover, *fover = nullptr; - int passed, last; - double floorHeight; - double ceilingHeight; - - curline = ds->curline; - - frontsector = curline->frontsector; - backsector = curline->backsector; - - if (backsector == nullptr) - { - return; - } - if ((ds->bFakeBoundary & 3) == 2) - { - sector_t *sec = backsector; - backsector = frontsector; - frontsector = sec; - } - - floorHeight = backsector->CenterFloor(); - ceilingHeight = backsector->CenterCeiling(); - - Clip3DFloors *clip3d = Clip3DFloors::Instance(); - - // maybe fix clipheights - if (!(clip3d->fake3D & FAKE3D_CLIPBOTTOM)) clip3d->sclipBottom = floorHeight; - if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) clip3d->sclipTop = ceilingHeight; - - // maybe not visible - if (clip3d->sclipBottom >= frontsector->CenterCeiling()) return; - if (clip3d->sclipTop <= frontsector->CenterFloor()) return; - - if (clip3d->fake3D & FAKE3D_DOWN2UP) - { // bottom to viewz - last = 0; - for (i = backsector->e->XFloor.ffloors.Size() - 1; i >= 0; i--) - { - rover = backsector->e->XFloor.ffloors[i]; - if (!(rover->flags & FF_EXISTS)) continue; - - // visible? - passed = 0; - if (!(rover->flags & FF_RENDERSIDES) || rover->top.plane->isSlope() || rover->bottom.plane->isSlope() || - rover->top.plane->Zat0() <= clip3d->sclipBottom || - rover->bottom.plane->Zat0() >= ceilingHeight || - rover->top.plane->Zat0() <= floorHeight) - { - if (!i) - { - passed = 1; - } - else - { - continue; - } - } - - rw_pic = nullptr; - if (rover->bottom.plane->Zat0() >= clip3d->sclipTop || passed) - { - if (last) - { - break; - } - // maybe wall from inside rendering? - fover = nullptr; - for (j = frontsector->e->XFloor.ffloors.Size() - 1; j >= 0; j--) - { - fover = frontsector->e->XFloor.ffloors[j]; - if (fover->model == rover->model) - { // never - fover = nullptr; - break; - } - if (!(fover->flags & FF_EXISTS)) continue; - if (!(fover->flags & FF_RENDERSIDES)) continue; - // no sloped walls, it's bugged - if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; - - // visible? - if (fover->top.plane->Zat0() <= clip3d->sclipBottom) continue; // no - if (fover->bottom.plane->Zat0() >= clip3d->sclipTop) - { // no, last possible - fover = nullptr; - break; - } - // it is, render inside? - if (!(fover->flags & (FF_BOTHPLANES | FF_INVERTPLANES))) - { // no - fover = nullptr; - } - break; - } - // nothing - if (!fover || j == -1) - { - break; - } - // correct texture - if (fover->flags & rover->flags & FF_SWIMMABLE) - { // don't ever draw (but treat as something has been found) - rw_pic = DONT_DRAW; - } - else if (fover->flags & FF_UPPERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); - } - else if (fover->flags & FF_LOWERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); - } - else - { - rw_pic = TexMan(fover->master->sidedef[0]->GetTexture(side_t::mid), true); - } - } - else if (frontsector->e->XFloor.ffloors.Size()) - { - // maybe not visible? - fover = nullptr; - for (j = frontsector->e->XFloor.ffloors.Size() - 1; j >= 0; j--) - { - fover = frontsector->e->XFloor.ffloors[j]; - if (fover->model == rover->model) // never - { - break; - } - if (!(fover->flags & FF_EXISTS)) continue; - if (!(fover->flags & FF_RENDERSIDES)) continue; - // no sloped walls, it's bugged - if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; - - // visible? - if (fover->top.plane->Zat0() <= clip3d->sclipBottom) continue; // no - if (fover->bottom.plane->Zat0() >= clip3d->sclipTop) - { // visible, last possible - fover = nullptr; - break; - } - if ((fover->flags & FF_SOLID) == (rover->flags & FF_SOLID) && - !(!(fover->flags & FF_SOLID) && (fover->alpha == 255 || rover->alpha == 255)) - ) - { - break; - } - if (fover->flags & rover->flags & FF_SWIMMABLE) - { // don't ever draw (but treat as something has been found) - rw_pic = DONT_DRAW; - } - fover = nullptr; // visible - break; - } - if (fover && j != -1) - { - fover = nullptr; - last = 1; - continue; // not visible - } - } - if (!rw_pic) - { - fover = nullptr; - if (rover->flags & FF_UPPERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); - } - else if (rover->flags & FF_LOWERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); - } - else - { - rw_pic = TexMan(rover->master->sidedef[0]->GetTexture(side_t::mid), true); - } - } - // correct colors now - FDynamicColormap *basecolormap = frontsector->ColorMap; - wallshade = ds->shade; - if (fixedlightlev < 0) - { - if ((ds->bFakeBoundary & 3) == 2) - { - for (j = backsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) - { - if (clip3d->sclipTop <= backsector->e->XFloor.lightlist[j].plane.Zat0()) - { - lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); - break; - } - } - } - else - { - for (j = frontsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) - { - if (clip3d->sclipTop <= frontsector->e->XFloor.lightlist[j].plane.Zat0()) - { - lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); - break; - } - } - } - } - if (rw_pic != DONT_DRAW) - { - RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade, basecolormap); - } - else rw_pic = nullptr; - break; - } - } - else - { // top to viewz - for (i = 0; i < (int)backsector->e->XFloor.ffloors.Size(); i++) - { - rover = backsector->e->XFloor.ffloors[i]; - if (!(rover->flags & FF_EXISTS)) continue; - - // visible? - passed = 0; - if (!(rover->flags & FF_RENDERSIDES) || - rover->top.plane->isSlope() || rover->bottom.plane->isSlope() || - rover->bottom.plane->Zat0() >= clip3d->sclipTop || - rover->top.plane->Zat0() <= floorHeight || - rover->bottom.plane->Zat0() >= ceilingHeight) - { - if ((unsigned)i == backsector->e->XFloor.ffloors.Size() - 1) - { - passed = 1; - } - else - { - continue; - } - } - rw_pic = nullptr; - if (rover->top.plane->Zat0() <= clip3d->sclipBottom || passed) - { // maybe wall from inside rendering? - fover = nullptr; - for (j = 0; j < (int)frontsector->e->XFloor.ffloors.Size(); j++) - { - fover = frontsector->e->XFloor.ffloors[j]; - if (fover->model == rover->model) - { // never - fover = nullptr; - break; - } - if (!(fover->flags & FF_EXISTS)) continue; - if (!(fover->flags & FF_RENDERSIDES)) continue; - // no sloped walls, it's bugged - if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; - - // visible? - if (fover->bottom.plane->Zat0() >= clip3d->sclipTop) continue; // no - if (fover->top.plane->Zat0() <= clip3d->sclipBottom) - { // no, last possible - fover = nullptr; - break; - } - // it is, render inside? - if (!(fover->flags & (FF_BOTHPLANES | FF_INVERTPLANES))) - { // no - fover = nullptr; - } - break; - } - // nothing - if (!fover || (unsigned)j == frontsector->e->XFloor.ffloors.Size()) - { - break; - } - // correct texture - if (fover->flags & rover->flags & FF_SWIMMABLE) - { - rw_pic = DONT_DRAW; // don't ever draw (but treat as something has been found) - } - else if (fover->flags & FF_UPPERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); - } - else if (fover->flags & FF_LOWERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); - } - else - { - rw_pic = TexMan(fover->master->sidedef[0]->GetTexture(side_t::mid), true); - } - } - else if (frontsector->e->XFloor.ffloors.Size()) - { // maybe not visible? - fover = nullptr; - for (j = 0; j < (int)frontsector->e->XFloor.ffloors.Size(); j++) - { - fover = frontsector->e->XFloor.ffloors[j]; - if (fover->model == rover->model) - { // never - break; - } - if (!(fover->flags & FF_EXISTS)) continue; - if (!(fover->flags & FF_RENDERSIDES)) continue; - // no sloped walls, its bugged - if (fover->top.plane->isSlope() || fover->bottom.plane->isSlope()) continue; - - // visible? - if (fover->bottom.plane->Zat0() >= clip3d->sclipTop) continue; // no - if (fover->top.plane->Zat0() <= clip3d->sclipBottom) - { // visible, last possible - fover = nullptr; - break; - } - if ((fover->flags & FF_SOLID) == (rover->flags & FF_SOLID) && - !(!(rover->flags & FF_SOLID) && (fover->alpha == 255 || rover->alpha == 255)) - ) - { - break; - } - if (fover->flags & rover->flags & FF_SWIMMABLE) - { // don't ever draw (but treat as something has been found) - rw_pic = DONT_DRAW; - } - fover = nullptr; // visible - break; - } - if (fover && (unsigned)j != frontsector->e->XFloor.ffloors.Size()) - { // not visible - break; - } - } - if (rw_pic == nullptr) - { - fover = nullptr; - if (rover->flags & FF_UPPERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::top), true); - } - else if (rover->flags & FF_LOWERTEXTURE) - { - rw_pic = TexMan(curline->sidedef->GetTexture(side_t::bottom), true); - } - else - { - rw_pic = TexMan(rover->master->sidedef[0]->GetTexture(side_t::mid), true); - } - } - // correct colors now - FDynamicColormap *basecolormap = frontsector->ColorMap; - wallshade = ds->shade; - if (fixedlightlev < 0) - { - if ((ds->bFakeBoundary & 3) == 2) - { - for (j = backsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) - { - if (clip3d->sclipTop <= backsector->e->XFloor.lightlist[j].plane.Zat0()) - { - lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); - break; - } - } - } - else - { - for (j = frontsector->e->XFloor.lightlist.Size() - 1; j >= 0; j--) - { - if (clip3d->sclipTop <= frontsector->e->XFloor.lightlist[j].plane.Zat0()) - { - lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); - break; - } - } - } - } - - if (rw_pic != DONT_DRAW) - { - RenderFakeWall(ds, x1, x2, fover ? fover : rover, wallshade, basecolormap); - } - else - { - rw_pic = nullptr; - } - break; - } - } - return; - } } diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index a56dae7729..6282936ac4 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -67,32 +67,4 @@ namespace swrenderer private: size_t MaxDrawSegs = 0; }; - - class RenderDrawSegment - { - public: - void Render(DrawSegment *ds, int x1, int x2); - - private: - void ClipMidtex(int x1, int x2); - void RenderFakeWall(DrawSegment *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap); - void RenderFakeWallRange(DrawSegment *ds, int x1, int x2, int wallshade); - void GetMaskedWallTopBottom(DrawSegment *ds, double &top, double &bot); - - sector_t *frontsector = nullptr; - sector_t *backsector = nullptr; - - seg_t *curline = nullptr; - - FWallCoords WallC; - FWallTmapVals WallT; - - float rw_light = 0.0f; - float rw_lightstep = 0.0f; - fixed_t rw_offset = 0; - FTexture *rw_pic = nullptr; - - ProjectedWallLine wallupper; - ProjectedWallLine walllower; - }; } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 919f1c08c9..47a36a719f 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -20,7 +20,6 @@ #include "m_swap.h" #include "i_system.h" #include "w_wad.h" -#include "swrenderer/things/r_particle.h" #include "c_console.h" #include "c_cvars.h" #include "c_dispatch.h" @@ -37,11 +36,6 @@ #include "colormatcher.h" #include "d_netinf.h" #include "p_effect.h" -#include "swrenderer/scene/r_opaque_pass.h" -#include "swrenderer/scene/r_3dfloors.h" -#include "swrenderer/scene/r_translucent_pass.h" -#include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/drawers/r_draw_pal.h" #include "v_palette.h" #include "r_data/r_translate.h" #include "r_data/colormaps.h" @@ -49,11 +43,18 @@ #include "p_local.h" #include "p_maputl.h" #include "r_voxel.h" -#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/scene/r_opaque_pass.h" +#include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_translucent_pass.h" #include "swrenderer/scene/r_portal.h" -#include "swrenderer/r_memory.h" #include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/line/r_renderdrawsegment.h" +#include "swrenderer/things/r_particle.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/drawers/r_draw_pal.h" +#include "swrenderer/r_memory.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index ff70344f52..c11014b358 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -29,6 +29,7 @@ #include "swrenderer/things/r_wallsprite.h" #include "swrenderer/things/r_playersprite.h" #include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/line/r_renderdrawsegment.h" #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_light.h" diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index ae41ea7ee3..838bcf61d4 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -23,6 +23,8 @@ #pragma once +#include "r_visiblesprite.h" + struct kvxslab_t; struct FVoxelMipLevel; struct FVoxel; From 8b0304c1e3075ff8d32c557d6cb0551737c3fde9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 26 Jan 2017 08:24:44 +0100 Subject: [PATCH 760/912] Remove static from RenderFogBoundary --- src/swrenderer/line/r_fogboundary.cpp | 2 -- src/swrenderer/line/r_fogboundary.h | 7 +++---- src/swrenderer/line/r_renderdrawsegment.cpp | 3 ++- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/swrenderer/line/r_fogboundary.cpp b/src/swrenderer/line/r_fogboundary.cpp index 18cf94055c..d0a36f948b 100644 --- a/src/swrenderer/line/r_fogboundary.cpp +++ b/src/swrenderer/line/r_fogboundary.cpp @@ -145,6 +145,4 @@ namespace swrenderer R_Drawers()->DrawFogBoundaryLine(y, x1, spanend[y]); } } - - short RenderFogBoundary::spanend[MAXHEIGHT]; } diff --git a/src/swrenderer/line/r_fogboundary.h b/src/swrenderer/line/r_fogboundary.h index ba7cd00440..cfa0e139ab 100644 --- a/src/swrenderer/line/r_fogboundary.h +++ b/src/swrenderer/line/r_fogboundary.h @@ -18,12 +18,11 @@ namespace swrenderer class RenderFogBoundary { public: - static void Render(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep, FDynamicColormap *basecolormap); + void Render(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep, FDynamicColormap *basecolormap); private: - static void RenderSection(int y, int y2, int x1); + void RenderSection(int y, int y2, int x1); - static short spanend[MAXHEIGHT]; + short spanend[MAXHEIGHT]; }; - } diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 8e61606934..07a7817958 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -48,6 +48,7 @@ namespace swrenderer { void RenderDrawSegment::Render(DrawSegment *ds, int x1, int x2) { + RenderFogBoundary renderfog; float *MaskedSWall = nullptr, MaskedScaleY = 0, rw_scalestep = 0; fixed_t *maskedtexturecol = nullptr; @@ -121,7 +122,7 @@ namespace swrenderer // [RH] Draw fog partition if (ds->bFogBoundary) { - RenderFogBoundary::Render(x1, x2, mceilingclip, mfloorclip, wallshade, rw_light, rw_lightstep, basecolormap); + renderfog.Render(x1, x2, mceilingclip, mfloorclip, wallshade, rw_light, rw_lightstep, basecolormap); if (ds->maskedtexturecol == nullptr) { goto clearfog; From 34f85569b485692bfbd2b9d4022d82ba7bbc3cb5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 26 Jan 2017 08:36:28 +0100 Subject: [PATCH 761/912] Change RenderTranslucentPass to be a singleton --- src/swrenderer/scene/r_portal.cpp | 4 ++-- src/swrenderer/scene/r_scene.cpp | 6 +++--- src/swrenderer/scene/r_translucent_pass.cpp | 7 +++++-- src/swrenderer/scene/r_translucent_pass.h | 18 ++++++++++-------- src/swrenderer/things/r_particle.cpp | 6 ++++-- src/swrenderer/things/r_sprite.cpp | 4 +++- src/swrenderer/things/r_voxel.cpp | 2 +- src/swrenderer/things/r_wallsprite.cpp | 4 +++- 8 files changed, 31 insertions(+), 20 deletions(-) diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index c6685ddaef..af2832f01b 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -239,7 +239,7 @@ namespace swrenderer // Masked textures and planes need the view coordinates restored for proper positioning. viewposStack.Pop(ViewPos); - RenderTranslucentPass::Render(); + RenderTranslucentPass::Instance()->Render(); drawseglist->ds_p = drawseglist->firstdrawseg; @@ -458,7 +458,7 @@ namespace swrenderer NetUpdate(); MaskedCycles.Clock(); // [ZZ] count sprites in portals/mirrors along with normal ones. - RenderTranslucentPass::Render(); // this is required since with portals there often will be cases when more than 80% of the view is inside a portal. + RenderTranslucentPass::Instance()->Render(); // this is required since with portals there often will be cases when more than 80% of the view is inside a portal. MaskedCycles.Unclock(); NetUpdate(); diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 2d1e1d7bb8..a85120eb30 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -134,7 +134,7 @@ namespace swrenderer RenderClipSegment::Instance()->Clear(0, viewwidth); DrawSegmentList::Instance()->Clear(); VisiblePlaneList::Instance()->Clear(); - RenderTranslucentPass::Clear(); + RenderTranslucentPass::Instance()->Clear(); // opening / clipping determination RenderOpaquePass::Instance()->ClearClip(); @@ -182,7 +182,7 @@ namespace swrenderer NetUpdate(); MaskedCycles.Clock(); - RenderTranslucentPass::Render(); + RenderTranslucentPass::Instance()->Render(); MaskedCycles.Unclock(); NetUpdate(); @@ -267,7 +267,7 @@ namespace swrenderer void RenderScene::Deinit() { - RenderTranslucentPass::Deinit(); + RenderTranslucentPass::Instance()->Deinit(); Clip3DFloors::Instance()->Cleanup(); DrawSegmentList::Instance()->Deinit(); } diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 21e9a50dfd..862b47a713 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -47,8 +47,11 @@ CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG namespace swrenderer { - bool RenderTranslucentPass::DrewAVoxel; - TArray RenderTranslucentPass::portaldrawsegs; + RenderTranslucentPass *RenderTranslucentPass::Instance() + { + static RenderTranslucentPass instance; + return &instance; + } void RenderTranslucentPass::Deinit() { diff --git a/src/swrenderer/scene/r_translucent_pass.h b/src/swrenderer/scene/r_translucent_pass.h index 6bd675af18..6e5ea778d6 100644 --- a/src/swrenderer/scene/r_translucent_pass.h +++ b/src/swrenderer/scene/r_translucent_pass.h @@ -28,18 +28,20 @@ namespace swrenderer class RenderTranslucentPass { public: - static void Deinit(); - static void Clear(); - static void Render(); + static RenderTranslucentPass *Instance(); - static bool DrewAVoxel; + void Deinit(); + void Clear(); + void Render(); - static bool ClipSpriteColumnWithPortals(int x, VisibleSprite *spr); + bool DrewAVoxel = false; + + bool ClipSpriteColumnWithPortals(int x, VisibleSprite *spr); private: - static void CollectPortals(); - static void DrawMaskedSingle(bool renew); + void CollectPortals(); + void DrawMaskedSingle(bool renew); - static TArray portaldrawsegs; + TArray portaldrawsegs; }; } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 47a36a719f..f7a7ba94bc 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -236,11 +236,13 @@ namespace swrenderer uint32_t fracstepx = PARTICLE_TEXTURE_SIZE * FRACUNIT / countbase; uint32_t fracposx = fracstepx / 2; + RenderTranslucentPass *translucentPass = RenderTranslucentPass::Instance(); + if (r_swtruecolor) { for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) { - if (RenderTranslucentPass::ClipSpriteColumnWithPortals(x, vis)) + if (translucentPass->ClipSpriteColumnWithPortals(x, vis)) continue; uint32_t *dest = ylookup[yl] + x + (uint32_t*)dc_destorg; DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); @@ -250,7 +252,7 @@ namespace swrenderer { for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) { - if (RenderTranslucentPass::ClipSpriteColumnWithPortals(x, vis)) + if (translucentPass->ClipSpriteColumnWithPortals(x, vis)) continue; uint8_t *dest = ylookup[yl] + x + dc_destorg; DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 1c91c75b05..126a6942e3 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -288,9 +288,11 @@ namespace swrenderer if (x < x2) { + RenderTranslucentPass *translucentPass = RenderTranslucentPass::Instance(); + while (x < x2) { - if (!RenderTranslucentPass::ClipSpriteColumnWithPortals(x, vis)) + if (!translucentPass->ClipSpriteColumnWithPortals(x, vis)) R_DrawMaskedColumn(x, iscale, tex, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); x++; frac += xiscale; diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index a4b44e0087..481c2c378c 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -178,7 +178,7 @@ namespace swrenderer vis->Light.SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); VisibleSpriteList::Instance()->Push(vis); - RenderTranslucentPass::DrewAVoxel = true; + RenderTranslucentPass::Instance()->DrewAVoxel = true; } void RenderVoxel::Render(short *cliptop, short *clipbottom, int minZ, int maxZ) diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index a6da5ff0e8..09d882b0f4 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -226,13 +226,15 @@ namespace swrenderer } else { + RenderTranslucentPass *translucentPass = RenderTranslucentPass::Instance(); + while (x < x2) { if (calclighting) { // calculate lighting R_SetColorMapLight(usecolormap, light, shade); } - if (!RenderTranslucentPass::ClipSpriteColumnWithPortals(x, spr)) + if (!translucentPass->ClipSpriteColumnWithPortals(x, spr)) DrawColumn(x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; From 933f2d116a79b0860dad867dd41b462a26ea7764 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 26 Jan 2017 08:39:44 +0100 Subject: [PATCH 762/912] Move DrewAVoxel to VisibleSpriteList --- src/swrenderer/scene/r_translucent_pass.cpp | 3 +-- src/swrenderer/scene/r_translucent_pass.h | 2 -- src/swrenderer/things/r_visiblespritelist.cpp | 9 +++++++-- src/swrenderer/things/r_visiblespritelist.h | 5 +++-- src/swrenderer/things/r_voxel.cpp | 3 +-- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 862b47a713..67b0600955 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -61,7 +61,6 @@ namespace swrenderer void RenderTranslucentPass::Clear() { VisibleSpriteList::Instance()->Clear(); - DrewAVoxel = false; } void RenderTranslucentPass::CollectPortals() @@ -161,7 +160,7 @@ namespace swrenderer void RenderTranslucentPass::Render() { CollectPortals(); - VisibleSpriteList::Instance()->Sort(DrewAVoxel); + VisibleSpriteList::Instance()->Sort(); Clip3DFloors *clip3d = Clip3DFloors::Instance(); if (clip3d->height_top == nullptr) diff --git a/src/swrenderer/scene/r_translucent_pass.h b/src/swrenderer/scene/r_translucent_pass.h index 6e5ea778d6..64695e0db8 100644 --- a/src/swrenderer/scene/r_translucent_pass.h +++ b/src/swrenderer/scene/r_translucent_pass.h @@ -34,8 +34,6 @@ namespace swrenderer void Clear(); void Render(); - bool DrewAVoxel = false; - bool ClipSpriteColumnWithPortals(int x, VisibleSprite *spr); private: diff --git a/src/swrenderer/things/r_visiblespritelist.cpp b/src/swrenderer/things/r_visiblespritelist.cpp index 395499ab14..76706d7bc9 100644 --- a/src/swrenderer/things/r_visiblespritelist.cpp +++ b/src/swrenderer/things/r_visiblespritelist.cpp @@ -39,6 +39,7 @@ namespace swrenderer Sprites.Clear(); StartIndices.Clear(); SortedSprites.Clear(); + DrewAVoxel = false; } void VisibleSpriteList::PushPortal() @@ -52,13 +53,17 @@ namespace swrenderer StartIndices.Pop(); } - void VisibleSpriteList::Push(VisibleSprite *sprite) + void VisibleSpriteList::Push(VisibleSprite *sprite, bool isVoxel) { Sprites.Push(sprite); + if (isVoxel) + DrewAVoxel = true; } - void VisibleSpriteList::Sort(bool compare2d) + void VisibleSpriteList::Sort() { + bool compare2d = DrewAVoxel; + unsigned int first = StartIndices.Size() == 0 ? 0 : StartIndices.Last(); unsigned int count = Sprites.Size() - first; diff --git a/src/swrenderer/things/r_visiblespritelist.h b/src/swrenderer/things/r_visiblespritelist.h index d70fc88d4e..99370c2503 100644 --- a/src/swrenderer/things/r_visiblespritelist.h +++ b/src/swrenderer/things/r_visiblespritelist.h @@ -26,13 +26,14 @@ namespace swrenderer void Clear(); void PushPortal(); void PopPortal(); - void Push(VisibleSprite *sprite); - void Sort(bool compare2d); + void Push(VisibleSprite *sprite, bool isVoxel = false); + void Sort(); TArray SortedSprites; private: TArray Sprites; TArray StartIndices; + bool DrewAVoxel = false; }; } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 481c2c378c..8d30f50c96 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -177,8 +177,7 @@ namespace swrenderer vis->Light.SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); - VisibleSpriteList::Instance()->Push(vis); - RenderTranslucentPass::Instance()->DrewAVoxel = true; + VisibleSpriteList::Instance()->Push(vis, true); } void RenderVoxel::Render(short *cliptop, short *clipbottom, int minZ, int maxZ) From 4bbf1ba11cf5e261956647e83a277970e89187be Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 26 Jan 2017 09:49:07 +0100 Subject: [PATCH 763/912] Move visibility stuff into LightVisibility --- src/polyrenderer/scene/poly_decal.cpp | 2 +- src/polyrenderer/scene/poly_particle.cpp | 2 +- src/polyrenderer/scene/poly_plane.cpp | 4 +- src/polyrenderer/scene/poly_playersprite.cpp | 2 +- src/polyrenderer/scene/poly_portal.cpp | 6 +- src/polyrenderer/scene/poly_scene.cpp | 2 +- src/polyrenderer/scene/poly_sky.cpp | 2 +- src/polyrenderer/scene/poly_sprite.cpp | 2 +- src/polyrenderer/scene/poly_wall.cpp | 2 +- src/polyrenderer/scene/poly_wallsprite.cpp | 2 +- src/swrenderer/line/r_line.cpp | 2 +- src/swrenderer/plane/r_flatplane.cpp | 2 +- src/swrenderer/plane/r_slopeplane.cpp | 2 +- src/swrenderer/scene/r_light.cpp | 127 +++++++++---------- src/swrenderer/scene/r_light.h | 45 +++++-- src/swrenderer/scene/r_portal.cpp | 10 +- src/swrenderer/scene/r_viewport.cpp | 3 +- src/swrenderer/things/r_particle.cpp | 3 +- src/swrenderer/things/r_sprite.cpp | 2 +- src/swrenderer/things/r_visiblesprite.cpp | 2 +- src/swrenderer/things/r_voxel.cpp | 2 +- src/swrenderer/things/r_wallsprite.cpp | 4 +- 22 files changed, 121 insertions(+), 109 deletions(-) diff --git a/src/polyrenderer/scene/poly_decal.cpp b/src/polyrenderer/scene/poly_decal.cpp index 03e33c0155..5595819727 100644 --- a/src/polyrenderer/scene/poly_decal.cpp +++ b/src/polyrenderer/scene/poly_decal.cpp @@ -137,7 +137,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan args.uniforms.flags = 0; args.SetColormap(front->ColorMap); args.SetTexture(tex, decal->Translation, true); - args.uniforms.globvis = (float)swrenderer::r_WallVisibility; + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { args.uniforms.light = 256; diff --git a/src/polyrenderer/scene/poly_particle.cpp b/src/polyrenderer/scene/poly_particle.cpp index b5d7220c00..c49e2e5317 100644 --- a/src/polyrenderer/scene/poly_particle.cpp +++ b/src/polyrenderer/scene/poly_particle.cpp @@ -73,7 +73,7 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipP PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::r_SpriteVisibility; + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->ParticleGlobVis(); if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { diff --git a/src/polyrenderer/scene/poly_plane.cpp b/src/polyrenderer/scene/poly_plane.cpp index 86dd89fd1e..eae3fced22 100644 --- a/src/polyrenderer/scene/poly_plane.cpp +++ b/src/polyrenderer/scene/poly_plane.cpp @@ -107,7 +107,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c UVTransform xform(ceiling ? fakeFloor->top.model->planes[sector_t::ceiling].xform : fakeFloor->top.model->planes[sector_t::floor].xform, tex); PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::r_TiltVisibility * 48.0f; + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SlopePlaneGlobVis() * 48.0f; args.uniforms.light = (uint32_t)(lightlevel / 255.0f * 256.0f); if (swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) args.uniforms.light = 256; @@ -301,7 +301,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan UVTransform transform(ceiling ? frontsector->planes[sector_t::ceiling].xform : frontsector->planes[sector_t::floor].xform, tex); PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::r_TiltVisibility * 48.0f; + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SlopePlaneGlobVis() * 48.0f; args.uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); if (swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) args.uniforms.light = 256; diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index a1a46deb60..3333cf9a77 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -223,7 +223,7 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa int actualextralight = foggy ? 0 : extralight << 4; int spriteshade = LIGHT2SHADE(owner->Sector->lightlevel + actualextralight); double minz = double((2048 * 4) / double(1 << 20)); - ColormapNum = GETPALOOKUP(swrenderer::r_SpriteVisibility / minz, spriteshade); + ColormapNum = GETPALOOKUP(swrenderer::LightVisibility::Instance()->SpriteGlobVis() / minz, spriteshade); if (sprite->GetID() < PSP_TARGETCENTER) { diff --git a/src/polyrenderer/scene/poly_portal.cpp b/src/polyrenderer/scene/poly_portal.cpp index 9737aa90fe..9ef0f3bdd8 100644 --- a/src/polyrenderer/scene/poly_portal.cpp +++ b/src/polyrenderer/scene/poly_portal.cpp @@ -90,7 +90,7 @@ void PolyDrawSectorPortal::SaveGlobals() savedextralight = extralight; savedpos = ViewPos; savedangle = ViewAngle; - savedvisibility = swrenderer::R_GetVisibility(); + savedvisibility = swrenderer::LightVisibility::Instance()->GetVisibility(); savedcamera = camera; savedsector = viewsector; @@ -99,7 +99,7 @@ void PolyDrawSectorPortal::SaveGlobals() // Don't let gun flashes brighten the sky box AActor *sky = Portal->mSkybox; extralight = 0; - swrenderer::R_SetVisibility(sky->args[0] * 0.25f); + swrenderer::LightVisibility::Instance()->SetVisibility(sky->args[0] * 0.25f); ViewPos = sky->InterpolatedPosition(r_TicFracF); ViewAngle = savedangle + (sky->PrevAngles.Yaw + deltaangle(sky->PrevAngles.Yaw, sky->Angles.Yaw) * r_TicFracF); } @@ -127,7 +127,7 @@ void PolyDrawSectorPortal::RestoreGlobals() camera = savedcamera; viewsector = savedsector; ViewPos = savedpos; - swrenderer::R_SetVisibility(savedvisibility); + swrenderer::LightVisibility::Instance()->SetVisibility(savedvisibility); extralight = savedextralight; ViewAngle = savedangle; R_SetViewAngle(); diff --git a/src/polyrenderer/scene/poly_scene.cpp b/src/polyrenderer/scene/poly_scene.cpp index 8744940b36..9685c4241f 100644 --- a/src/polyrenderer/scene/poly_scene.cpp +++ b/src/polyrenderer/scene/poly_scene.cpp @@ -241,7 +241,7 @@ void RenderPolyScene::RenderPortals(int portalDepth) PolyDrawArgs args; args.objectToClip = &WorldToClip; args.mode = TriangleDrawMode::Fan; - args.uniforms.globvis = (float)swrenderer::r_WallVisibility; + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); args.uniforms.color = 0; args.uniforms.light = 256; args.uniforms.flags = TriUniforms::fixed_light; diff --git a/src/polyrenderer/scene/poly_sky.cpp b/src/polyrenderer/scene/poly_sky.cpp index e102cde50f..c4d0d7320a 100644 --- a/src/polyrenderer/scene/poly_sky.cpp +++ b/src/polyrenderer/scene/poly_sky.cpp @@ -55,7 +55,7 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) int rc = mRows + 1; PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::r_WallVisibility; + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); args.uniforms.light = 256; args.uniforms.flags = 0; args.uniforms.subsectorDepth = RenderPolyScene::SkySubsectorDepth; diff --git a/src/polyrenderer/scene/poly_sprite.cpp b/src/polyrenderer/scene/poly_sprite.cpp index 91e35765cc..ff84ce5e36 100644 --- a/src/polyrenderer/scene/poly_sprite.cpp +++ b/src/polyrenderer/scene/poly_sprite.cpp @@ -138,7 +138,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::r_SpriteVisibility; + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SpriteGlobVis(); args.uniforms.flags = 0; if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { diff --git a/src/polyrenderer/scene/poly_wall.cpp b/src/polyrenderer/scene/poly_wall.cpp index 009c5fca7e..64de4c6753 100644 --- a/src/polyrenderer/scene/poly_wall.cpp +++ b/src/polyrenderer/scene/poly_wall.cpp @@ -247,7 +247,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane } PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::r_WallVisibility; + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); args.uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); args.uniforms.flags = 0; args.uniforms.subsectorDepth = SubsectorDepth; diff --git a/src/polyrenderer/scene/poly_wallsprite.cpp b/src/polyrenderer/scene/poly_wallsprite.cpp index 4fc9e4994c..6025a274e5 100644 --- a/src/polyrenderer/scene/poly_wallsprite.cpp +++ b/src/polyrenderer/scene/poly_wallsprite.cpp @@ -100,7 +100,7 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, const Vec4f &cli bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::r_WallVisibility; + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) { args.uniforms.light = 256; diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 8f7dfd0a7d..84c1470f8b 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -904,7 +904,7 @@ namespace swrenderer if (fixedcolormap == NULL && fixedlightlev < 0) { wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, frontsector->lightlevel) + R_ActualExtraLight(foggy)); - double GlobVis = r_WallVisibility; + double GlobVis = LightVisibility::Instance()->WallGlobVis(); rw_lightleft = float(GlobVis / WallC.sz1); rw_lightstep = float((GlobVis / WallC.sz2 - rw_lightleft) / (WallC.sx2 - WallC.sx1)); } diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 72e25dc1e7..08fad4ce34 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -106,7 +106,7 @@ namespace swrenderer planeheight = fabs(pl->height.Zat0() - ViewPos.Z); basecolormap = colormap; - GlobVis = r_FloorVisibility / planeheight; + GlobVis = LightVisibility::Instance()->FlatPlaneGlobVis() / planeheight; ds_light = 0; if (fixedlightlev >= 0) { diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 115b689bae..d697fecad7 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -143,7 +143,7 @@ namespace swrenderer plane_sz[0] = -plane_sz[0]; } - planelightfloat = (r_TiltVisibility * lxscale * lyscale) / (fabs(pl->height.ZatPoint(ViewPos) - ViewPos.Z)) / 65536.f; + planelightfloat = (LightVisibility::Instance()->SlopePlaneGlobVis() * lxscale * lyscale) / (fabs(pl->height.ZatPoint(ViewPos) - ViewPos.Z)) / 65536.f; if (pl->height.fC() > 0) planelightfloat = -planelightfloat; diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index 028927c5b5..a57daaea85 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -38,77 +38,10 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - double r_BaseVisibility; - double r_WallVisibility; - double r_FloorVisibility; - float r_TiltVisibility; - double r_SpriteVisibility; - int fixedlightlev; FSWColormap *fixedcolormap; FSpecialColormap *realfixedcolormap; - namespace - { - double CurrentVisibility = 8.f; - double MaxVisForWall; - double MaxVisForFloor; - } - - // Changes how rapidly things get dark with distance - void R_SetVisibility(double vis) - { - // Allow negative visibilities, just for novelty's sake - vis = clamp(vis, -204.7, 204.7); // (205 and larger do not work in 5:4 aspect ratio) - - CurrentVisibility = vis; - - if (FocalTangent == 0 || FocalLengthY == 0) - { // If r_visibility is called before the renderer is all set up, don't - // divide by zero. This will be called again later, and the proper - // values can be initialized then. - return; - } - - r_BaseVisibility = vis; - - MaxVisForWall = (InvZtoScale * (SCREENWIDTH*r_Yaspect) / (viewwidth*SCREENHEIGHT * FocalTangent)); - MaxVisForWall = 32767.0 / MaxVisForWall; - MaxVisForFloor = 32767.0 / (viewheight >> 2) * FocalLengthY / 160; - - // Prevent overflow on walls - if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForWall) - r_WallVisibility = -MaxVisForWall; - else if (r_BaseVisibility > 0 && r_BaseVisibility > MaxVisForWall) - r_WallVisibility = MaxVisForWall; - else - r_WallVisibility = r_BaseVisibility; - - r_WallVisibility = (InvZtoScale * SCREENWIDTH*AspectBaseHeight(WidescreenRatio) / - (viewwidth*SCREENHEIGHT * 3)) * (r_WallVisibility * FocalTangent); - - // Prevent overflow on floors/ceilings. Note that the calculation of - // MaxVisForFloor means that planes less than two units from the player's - // view could still overflow, but there is no way to totally eliminate - // that while still using fixed point math. - if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForFloor) - r_FloorVisibility = -MaxVisForFloor; - else if (r_BaseVisibility > 0 && r_BaseVisibility > MaxVisForFloor) - r_FloorVisibility = MaxVisForFloor; - else - r_FloorVisibility = r_BaseVisibility; - - r_FloorVisibility = 160.0 * r_FloorVisibility / FocalLengthY; - - r_TiltVisibility = float(vis * FocalTangent * (16.f * 320.f) / viewwidth); - r_SpriteVisibility = r_WallVisibility; - } - - double R_GetVisibility() - { - return CurrentVisibility; - } - void R_SetupColormap(AActor *actor) { player_t *player = actor->player; @@ -154,6 +87,62 @@ namespace swrenderer } } + ///////////////////////////////////////////////////////////////////////// + + LightVisibility *LightVisibility::Instance() + { + static LightVisibility instance; + return &instance; + } + + // Changes how rapidly things get dark with distance + void LightVisibility::SetVisibility(double vis) + { + // Allow negative visibilities, just for novelty's sake + vis = clamp(vis, -204.7, 204.7); // (205 and larger do not work in 5:4 aspect ratio) + + CurrentVisibility = vis; + + if (FocalTangent == 0 || FocalLengthY == 0) + { // If r_visibility is called before the renderer is all set up, don't + // divide by zero. This will be called again later, and the proper + // values can be initialized then. + return; + } + + BaseVisibility = vis; + + MaxVisForWall = (InvZtoScale * (SCREENWIDTH*r_Yaspect) / (viewwidth*SCREENHEIGHT * FocalTangent)); + MaxVisForWall = 32767.0 / MaxVisForWall; + MaxVisForFloor = 32767.0 / (viewheight >> 2) * FocalLengthY / 160; + + // Prevent overflow on walls + if (BaseVisibility < 0 && BaseVisibility < -MaxVisForWall) + WallVisibility = -MaxVisForWall; + else if (BaseVisibility > 0 && BaseVisibility > MaxVisForWall) + WallVisibility = MaxVisForWall; + else + WallVisibility = BaseVisibility; + + WallVisibility = (InvZtoScale * SCREENWIDTH*AspectBaseHeight(WidescreenRatio) / + (viewwidth*SCREENHEIGHT * 3)) * (WallVisibility * FocalTangent); + + // Prevent overflow on floors/ceilings. Note that the calculation of + // MaxVisForFloor means that planes less than two units from the player's + // view could still overflow, but there is no way to totally eliminate + // that while still using fixed point math. + if (BaseVisibility < 0 && BaseVisibility < -MaxVisForFloor) + FloorVisibility = -MaxVisForFloor; + else if (BaseVisibility > 0 && BaseVisibility > MaxVisForFloor) + FloorVisibility = MaxVisForFloor; + else + FloorVisibility = BaseVisibility; + + FloorVisibility = 160.0 * FloorVisibility / FocalLengthY; + + TiltVisibility = float(vis * FocalTangent * (16.f * 320.f) / viewwidth); + } + // Controls how quickly light ramps across a 1/z range. Set this, and it // sets all the r_*Visibility variables (except r_SkyVisibilily, which is // currently unused). @@ -161,11 +150,11 @@ namespace swrenderer { if (argv.argc() < 2) { - Printf("Visibility is %g\n", R_GetVisibility()); + Printf("Visibility is %g\n", LightVisibility::Instance()->GetVisibility()); } else if (!netgame) { - R_SetVisibility(atof(argv[1])); + LightVisibility::Instance()->SetVisibility(atof(argv[1])); } else { diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index 44f1b49f52..04a12c0ac7 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -18,6 +18,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_utility.h" +#include "r_viewport.h" // Lighting. // @@ -53,23 +54,43 @@ struct FSWColormap; namespace swrenderer { - extern double r_BaseVisibility; - extern double r_WallVisibility; - extern double r_FloorVisibility; - extern float r_TiltVisibility; - extern double r_SpriteVisibility; - extern int fixedlightlev; extern FSWColormap *fixedcolormap; extern FSpecialColormap *realfixedcolormap; - inline int R_ActualExtraLight(bool fog) { return fog ? 0 : extralight << 4; } - - void R_SetVisibility(double visibility); - double R_GetVisibility(); - void R_SetupColormap(AActor *actor); + class LightVisibility + { + public: + static LightVisibility *Instance(); + + void SetVisibility(double visibility); + double GetVisibility() const { return CurrentVisibility; } + + double WallGlobVis() const { return WallVisibility; } + double SpriteGlobVis() const { return WallVisibility; } + double ParticleGlobVis() const { return WallVisibility * 0.5; } + double FlatPlaneGlobVis() const { return FloorVisibility; } + double SlopePlaneGlobVis() const { return TiltVisibility; } + + // The vis value to pass into the GETPALOOKUP or LIGHTSCALE macros + double WallVis(double screenZ) const { return WallGlobVis() / screenZ; } + double SpriteVis(double screenZ) const { return WallGlobVis() / screenZ; } + double ParticleVis(double screenZ) const { return WallGlobVis() / screenZ; } + double FlatPlaneVis(int screenY, double planeZ) const { return FlatPlaneGlobVis() / fabs(planeZ - ViewPos.Z) * fabs(CenterY - screenY); } + + private: + double BaseVisibility = 0.0; + double WallVisibility = 0.0; + double FloorVisibility = 0.0; + float TiltVisibility = 0.0f; + + double CurrentVisibility = 8.f; + double MaxVisForWall = 0.0; + double MaxVisForFloor = 0.0; + }; + class ColormapLight { public: @@ -78,4 +99,6 @@ namespace swrenderer void SetColormap(double visibility, int shade, FDynamicColormap *basecolormap, bool fullbright, bool invertColormap, bool fadeToBlack); }; + + inline int R_ActualExtraLight(bool fog) { return fog ? 0 : extralight << 4; } } diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index af2832f01b..be18ff59f9 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -106,7 +106,7 @@ namespace swrenderer DAngle savedangle = ViewAngle; ptrdiff_t savedds_p = drawseglist->ds_p - drawseglist->drawsegs; size_t savedinteresting = drawseglist->FirstInterestingDrawseg; - double savedvisibility = R_GetVisibility(); + double savedvisibility = LightVisibility::Instance()->GetVisibility(); AActor *savedcamera = camera; sector_t *savedsector = viewsector; @@ -128,7 +128,7 @@ namespace swrenderer // Don't let gun flashes brighten the sky box AActor *sky = port->mSkybox; extralight = 0; - R_SetVisibility(sky->args[0] * 0.25f); + LightVisibility::Instance()->SetVisibility(sky->args[0] * 0.25f); ViewPos = sky->InterpolatedPosition(r_TicFracF); ViewAngle = savedangle + (sky->PrevAngles.Yaw + deltaangle(sky->PrevAngles.Yaw, sky->Angles.Yaw) * r_TicFracF); @@ -141,7 +141,7 @@ namespace swrenderer case PORTS_PORTAL: case PORTS_LINKEDPORTAL: extralight = pl->extralight; - R_SetVisibility(pl->visibility); + LightVisibility::Instance()->SetVisibility(pl->visibility); ViewPos.X = pl->viewpos.X + port->mDisplacement.X; ViewPos.Y = pl->viewpos.Y + port->mDisplacement.Y; ViewPos.Z = pl->viewpos.Z; @@ -260,7 +260,7 @@ namespace swrenderer camera = savedcamera; viewsector = savedsector; ViewPos = savedpos; - R_SetVisibility(savedvisibility); + LightVisibility::Instance()->SetVisibility(savedvisibility); extralight = savedextralight; ViewAngle = savedangle; R_SetViewAngle(); @@ -523,7 +523,7 @@ namespace swrenderer stacked_viewpos = ViewPos; stacked_angle = ViewAngle; stacked_extralight = extralight; - stacked_visibility = R_GetVisibility(); + stacked_visibility = LightVisibility::Instance()->GetVisibility(); } void RenderPortal::SetMainPortal() diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index d2df38f221..1753ab0b86 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -119,7 +119,8 @@ namespace swrenderer InitTextureMapping(); // Reset r_*Visibility vars - R_SetVisibility(R_GetVisibility()); + LightVisibility *visibility = LightVisibility::Instance(); + visibility->SetVisibility(visibility->GetVisibility()); SetupBuffer(); } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index f7a7ba94bc..5d978fd9f5 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -201,8 +201,7 @@ namespace swrenderer vis->floorclip = 0; vis->foggy = foggy; - // Particles are slightly more visible than regular sprites. - vis->Light.SetColormap(tiz * r_SpriteVisibility * 0.5, shade, map, particle->bright != 0, false, false); + vis->Light.SetColormap(tiz * LightVisibility::Instance()->ParticleGlobVis(), shade, map, particle->bright != 0, false, false); VisibleSpriteList::Instance()->Push(vis); } diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 126a6942e3..28082b2737 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -223,7 +223,7 @@ namespace swrenderer bool fullbright = !vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; - vis->Light.SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); + vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis() / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); VisibleSpriteList::Instance()->Push(vis); } diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index c11014b358..f364a6cbdf 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -135,7 +135,7 @@ namespace swrenderer int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(spr->foggy)); - Light.SetColormap(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade, mybasecolormap, isFullBright, invertcolormap, fadeToBlack); + Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis() / MAX(MINZ, (double)spr->depth), spriteshade, mybasecolormap, isFullBright, invertcolormap, fadeToBlack); } } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 8d30f50c96..cf0d4acd9e 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -175,7 +175,7 @@ namespace swrenderer bool fullbright = !vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; - vis->Light.SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); + vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis() / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); VisibleSpriteList::Instance()->Push(vis, true); } diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 09d882b0f4..e5b95c31d1 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -132,7 +132,7 @@ namespace swrenderer vis->wallc = wallc; vis->foggy = foggy; - vis->Light.SetColormap(r_SpriteVisibility / MAX(tz, MINZ), spriteshade, basecolormap, false, false, false); + vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis() / MAX(tz, MINZ), spriteshade, basecolormap, false, false, false); VisibleSpriteList::Instance()->Push(vis); } @@ -180,7 +180,7 @@ namespace swrenderer } int shade = LIGHT2SHADE(spr->sector->lightlevel + R_ActualExtraLight(spr->foggy)); - double GlobVis = r_WallVisibility; + double GlobVis = LightVisibility::Instance()->WallGlobVis(); float lightleft = float(GlobVis / spr->wallc.sz1); float lightstep = float((GlobVis / spr->wallc.sz2 - lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); float light = lightleft + (x1 - spr->wallc.sx1) * lightstep; From ed05a2edd39c4919339bdcffd9e6af6a23a420a3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 26 Jan 2017 10:22:54 +0100 Subject: [PATCH 764/912] Create CameraLight class --- src/gl/system/gl_swframebuffer.cpp | 2 +- src/polyrenderer/poly_renderer.cpp | 7 +++-- src/polyrenderer/scene/poly_decal.cpp | 4 ++- src/polyrenderer/scene/poly_particle.cpp | 3 +- src/polyrenderer/scene/poly_plane.cpp | 10 +++++-- src/polyrenderer/scene/poly_playersprite.cpp | 5 ++-- src/polyrenderer/scene/poly_sprite.cpp | 3 +- src/polyrenderer/scene/poly_wall.cpp | 3 +- src/polyrenderer/scene/poly_wallsprite.cpp | 3 +- src/swrenderer/drawers/r_draw.cpp | 7 +++-- src/swrenderer/line/r_line.cpp | 14 ++++++---- src/swrenderer/line/r_renderdrawsegment.cpp | 28 +++++++++++-------- src/swrenderer/line/r_walldraw.cpp | 10 ++++--- src/swrenderer/plane/r_flatplane.cpp | 9 +++--- src/swrenderer/plane/r_skyplane.cpp | 11 ++++---- src/swrenderer/plane/r_slopeplane.cpp | 9 +++--- src/swrenderer/r_swrenderer.cpp | 6 ++-- src/swrenderer/scene/r_light.cpp | 29 +++++++++++--------- src/swrenderer/scene/r_light.h | 14 +++++++--- src/swrenderer/scene/r_opaque_pass.cpp | 13 +++++---- src/swrenderer/scene/r_scene.cpp | 8 +++--- src/swrenderer/things/r_decal.cpp | 9 +++--- src/swrenderer/things/r_playersprite.cpp | 6 ++-- src/swrenderer/things/r_visiblesprite.cpp | 3 +- src/swrenderer/things/r_wallsprite.cpp | 9 +++--- src/v_draw.cpp | 2 +- src/win32/fb_d3d9.cpp | 2 +- 27 files changed, 133 insertions(+), 96 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 2250588d3a..9ea7066f8f 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -1409,7 +1409,7 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) uint32_t color0, color1; if (Accel2D) { - auto &map = swrenderer::realfixedcolormap; + auto &map = swrenderer::CameraLight::Instance()->realfixedcolormap; if (map == nullptr) { color0 = 0; diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index bf30e5ef84..be1fe0074c 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -69,10 +69,11 @@ void PolyRenderer::RenderView(player_t *player) RenderActorView(player->mo, false); // Apply special colormap if the target cannot do it - if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) { R_BeginDrawerCommands(); - DrawerCommandQueue::QueueCommand(realfixedcolormap, screen); + DrawerCommandQueue::QueueCommand(cameraLight->realfixedcolormap, screen); R_EndDrawerCommands(); } @@ -119,7 +120,7 @@ void PolyRenderer::RenderActorView(AActor *actor, bool dontmaplines) P_FindParticleSubsectors(); PO_LinkToSubsectors(); R_SetupFrame(actor); - swrenderer::R_SetupColormap(actor); + swrenderer::CameraLight::Instance()->SetCamera(actor); swrenderer::RenderViewport::Instance()->SetupFreelook(); ActorRenderFlags savedflags = camera->renderflags; diff --git a/src/polyrenderer/scene/poly_decal.cpp b/src/polyrenderer/scene/poly_decal.cpp index 5595819727..78e43df9eb 100644 --- a/src/polyrenderer/scene/poly_decal.cpp +++ b/src/polyrenderer/scene/poly_decal.cpp @@ -133,12 +133,14 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan bool fullbrightSprite = (decal->RenderFlags & RF_FULLBRIGHT) == RF_FULLBRIGHT; + swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); + PolyDrawArgs args; args.uniforms.flags = 0; args.SetColormap(front->ColorMap); args.SetTexture(tex, decal->Translation, true); args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); - if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) + if (fullbrightSprite || cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) { args.uniforms.light = 256; args.uniforms.flags |= TriUniforms::fixed_light; diff --git a/src/polyrenderer/scene/poly_particle.cpp b/src/polyrenderer/scene/poly_particle.cpp index c49e2e5317..bb21275f98 100644 --- a/src/polyrenderer/scene/poly_particle.cpp +++ b/src/polyrenderer/scene/poly_particle.cpp @@ -70,12 +70,13 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipP // int color = (particle->color >> 24) & 0xff; // pal index, I think bool fullbrightSprite = particle->bright != 0; + swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); PolyDrawArgs args; args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->ParticleGlobVis(); - if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) + if (fullbrightSprite || cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) { args.uniforms.light = 256; args.uniforms.flags = TriUniforms::fixed_light; diff --git a/src/polyrenderer/scene/poly_plane.cpp b/src/polyrenderer/scene/poly_plane.cpp index eae3fced22..1fcbad9c99 100644 --- a/src/polyrenderer/scene/poly_plane.cpp +++ b/src/polyrenderer/scene/poly_plane.cpp @@ -96,8 +96,10 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c if (tex->UseType == FTexture::TEX_Null) return; + swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); + int lightlevel = 255; - if (swrenderer::fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) + if (cameraLight->fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) { lightlist_t *light = P_GetPlaneLight(sub->sector, &sub->sector->ceilingplane, false); //basecolormap = light->extra_colormap; @@ -109,7 +111,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c PolyDrawArgs args; args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SlopePlaneGlobVis() * 48.0f; args.uniforms.light = (uint32_t)(lightlevel / 255.0f * 256.0f); - if (swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) + if (cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) args.uniforms.light = 256; args.uniforms.flags = 0; args.uniforms.subsectorDepth = subsectorDepth; @@ -300,10 +302,12 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan UVTransform transform(ceiling ? frontsector->planes[sector_t::ceiling].xform : frontsector->planes[sector_t::floor].xform, tex); + swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); + PolyDrawArgs args; args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SlopePlaneGlobVis() * 48.0f; args.uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); - if (swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) + if (cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) args.uniforms.light = 256; args.uniforms.flags = 0; args.uniforms.subsectorDepth = isSky ? RenderPolyScene::SkySubsectorDepth : subsectorDepth; diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index 3333cf9a77..dd8fc48509 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -326,14 +326,15 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa } // If the main colormap has fixed lights, and this sprite is being drawn with that // colormap, disable acceleration so that the lights can remain fixed. - if (!noaccel && swrenderer::realfixedcolormap == nullptr && + swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); + if (!noaccel && cameraLight->realfixedcolormap == nullptr && NormalLightHasFixedLights && mybasecolormap == &NormalLight && tex->UseBasePalette()) { noaccel = true; } // [SP] If emulating GZDoom fullbright, disable acceleration - if (r_fullbrightignoresectorcolor && swrenderer::fixedlightlev >= 0) + if (r_fullbrightignoresectorcolor && cameraLight->fixedlightlev >= 0) mybasecolormap = &FullNormalLight; if (r_fullbrightignoresectorcolor && !foggy && sprite->GetState()->GetFullbright()) mybasecolormap = &FullNormalLight; diff --git a/src/polyrenderer/scene/poly_sprite.cpp b/src/polyrenderer/scene/poly_sprite.cpp index ff84ce5e36..0b9b7a9860 100644 --- a/src/polyrenderer/scene/poly_sprite.cpp +++ b/src/polyrenderer/scene/poly_sprite.cpp @@ -136,11 +136,12 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla } bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); + swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); PolyDrawArgs args; args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SpriteGlobVis(); args.uniforms.flags = 0; - if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) + if (fullbrightSprite || cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) { args.uniforms.light = 256; args.uniforms.flags |= TriUniforms::fixed_light; diff --git a/src/polyrenderer/scene/poly_wall.cpp b/src/polyrenderer/scene/poly_wall.cpp index 64de4c6753..18df84744f 100644 --- a/src/polyrenderer/scene/poly_wall.cpp +++ b/src/polyrenderer/scene/poly_wall.cpp @@ -352,7 +352,8 @@ FTexture *RenderPolyWall::GetTexture() int RenderPolyWall::GetLightLevel() { - if (swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) + swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); + if (cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) { return 255; } diff --git a/src/polyrenderer/scene/poly_wallsprite.cpp b/src/polyrenderer/scene/poly_wallsprite.cpp index 6025a274e5..1d321fbc0c 100644 --- a/src/polyrenderer/scene/poly_wallsprite.cpp +++ b/src/polyrenderer/scene/poly_wallsprite.cpp @@ -98,10 +98,11 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, const Vec4f &cli } bool fullbrightSprite = ((thing->renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); + swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); PolyDrawArgs args; args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); - if (fullbrightSprite || swrenderer::fixedlightlev >= 0 || swrenderer::fixedcolormap) + if (fullbrightSprite || cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) { args.uniforms.light = 256; args.uniforms.flags = TriUniforms::fixed_light; diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index cf3e4edbb5..36b8407457 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -456,11 +456,12 @@ namespace swrenderer return false; colfunc = &SWPixelFormatDrawers::DrawShadedColumn; drawer_needs_pal_input = true; - dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; + CameraLight *cameraLight = CameraLight::Instance(); + dc_color = cameraLight->fixedcolormap ? cameraLight->fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; basecolormap = &ShadeFakeColormap[16 - alpha]; - if (fixedlightlev >= 0 && fixedcolormap == NULL) + if (cameraLight->fixedlightlev >= 0 && cameraLight->fixedcolormap == NULL) { - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); } else { diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 84c1470f8b..5f16f2b319 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -901,7 +901,8 @@ namespace swrenderer walltexcoords.Project(sidedef->TexelLength * lwallscale, WallC.sx1, WallC.sx2, WallT); - if (fixedcolormap == NULL && fixedlightlev < 0) + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedcolormap == nullptr && cameraLight->fixedlightlev < 0) { wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, frontsector->lightlevel) + R_ActualExtraLight(foggy)); double GlobVis = LightVisibility::Instance()->WallGlobVis(); @@ -918,7 +919,7 @@ namespace swrenderer bool SWRenderLine::IsFogBoundary(sector_t *front, sector_t *back) const { - return r_fogboundary && fixedcolormap == NULL && front->ColorMap->Fade && + return r_fogboundary && CameraLight::Instance()->fixedcolormap == nullptr && front->ColorMap->Fade && front->ColorMap->Fade != back->ColorMap->Fade && (front->GetTexture(sector_t::ceiling) != skyflatnum || back->GetTexture(sector_t::ceiling) != skyflatnum); } @@ -932,10 +933,11 @@ namespace swrenderer double yscale; fixed_t xoffset = rw_offset; - if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedlightlev >= 0) + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + else if (cameraLight->fixedcolormap != nullptr) + R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); // clip wall to the floor and ceiling auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 07a7817958..142162496d 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -97,7 +97,8 @@ namespace swrenderer Clip3DFloors *clip3d = Clip3DFloors::Instance(); - if (fixedlightlev < 0) + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedlightlev < 0) { if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) { @@ -139,10 +140,10 @@ namespace swrenderer spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); rw_scalestep = ds->iscalestep; - if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap != nullptr) - R_SetColorMapLight(fixedcolormap, 0, 0); + if (cameraLight->fixedlightlev >= 0) + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + else if (cameraLight->fixedcolormap != nullptr) + R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); // find positioning texheight = tex->GetScaledHeightDouble(); @@ -269,7 +270,7 @@ namespace swrenderer { for (int x = x1; x < x2; ++x) { - if (fixedcolormap == nullptr && fixedlightlev < 0) + if (cameraLight->fixedcolormap == nullptr && cameraLight->fixedlightlev < 0) { R_SetColorMapLight(basecolormap, rw_light, wallshade); } @@ -440,10 +441,11 @@ namespace swrenderer texturemid += rowoffset; } - if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap != nullptr) - R_SetColorMapLight(fixedcolormap, 0, 0); + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedlightlev >= 0) + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + else if (cameraLight->fixedcolormap != nullptr) + R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -665,7 +667,8 @@ namespace swrenderer // correct colors now FDynamicColormap *basecolormap = frontsector->ColorMap; wallshade = ds->shade; - if (fixedlightlev < 0) + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedlightlev < 0) { if ((ds->bFakeBoundary & 3) == 2) { @@ -839,7 +842,8 @@ namespace swrenderer // correct colors now FDynamicColormap *basecolormap = frontsector->ColorMap; wallshade = ds->shade; - if (fixedlightlev < 0) + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedlightlev < 0) { if ((ds->bFakeBoundary & 3) == 2) { diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 66c036a3f7..75907c2615 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -326,7 +326,8 @@ namespace swrenderer dc_wall_fracbits = r_swtruecolor ? FRACBITS : fracbits; - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); + CameraLight *cameraLight = CameraLight::Instance(); + bool fixed = (cameraLight->fixedcolormap != NULL || cameraLight->fixedlightlev >= 0); if (fixed) { dc_wall_colormap[0] = dc_colormap; @@ -339,8 +340,8 @@ namespace swrenderer dc_wall_light[3] = 0; } - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); + if (cameraLight->fixedcolormap) + R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); else R_SetColorMapLight(basecolormap, 0, 0); @@ -455,7 +456,8 @@ namespace swrenderer } else { - if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedcolormap != NULL || cameraLight->fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) { ProcessNormalWall(uwal, dwal, texturemid, swal, lwal); } diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 08fad4ce34..dff82a5e45 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -108,14 +108,15 @@ namespace swrenderer basecolormap = colormap; GlobVis = LightVisibility::Instance()->FlatPlaneGlobVis() / planeheight; ds_light = 0; - if (fixedlightlev >= 0) + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); plane_shade = false; } - else if (fixedcolormap) + else if (cameraLight->fixedcolormap) { - R_SetDSColorMapLight(fixedcolormap, 0, 0); + R_SetDSColorMapLight(cameraLight->fixedcolormap, 0, 0); plane_shade = false; } else diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 451d894b6b..d07c25b445 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -148,21 +148,22 @@ namespace swrenderer } bool fakefixed = false; - if (fixedcolormap) + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedcolormap) { - R_SetColorMapLight(fixedcolormap, 0, 0); + R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); } else { fakefixed = true; - fixedcolormap = &NormalLight; - R_SetColorMapLight(fixedcolormap, 0, 0); + cameraLight->fixedcolormap = &NormalLight; + R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); } DrawSky(pl); if (fakefixed) - fixedcolormap = NULL; + cameraLight->fixedcolormap = nullptr; } void RenderSkyPlane::DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index d697fecad7..510697eeb1 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -150,14 +150,15 @@ namespace swrenderer basecolormap = colormap; - if (fixedlightlev >= 0) + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); plane_shade = false; } - else if (fixedcolormap) + else if (cameraLight->fixedcolormap) { - R_SetDSColorMapLight(fixedcolormap, 0, 0); + R_SetDSColorMapLight(cameraLight->fixedcolormap, 0, 0); plane_shade = false; } else diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index e8e1d2c12b..a558e04c37 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -254,8 +254,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin // curse Doom's overuse of global variables in the renderer. // These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette. - FSWColormap *savecolormap = fixedcolormap; - FSpecialColormap *savecm = realfixedcolormap; + CameraLight savedCameraLight = *CameraLight::Instance(); DAngle savedfov = FieldOfView; R_SetFOV ((double)fov); @@ -315,8 +314,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin tex->SetUpdated(); - fixedcolormap = savecolormap; - realfixedcolormap = savecm; + *CameraLight::Instance() = savedCameraLight; } sector_t *FSoftwareRenderer::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index a57daaea85..851fab7500 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -38,21 +38,23 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - int fixedlightlev; - FSWColormap *fixedcolormap; - FSpecialColormap *realfixedcolormap; + CameraLight *CameraLight::Instance() + { + static CameraLight instance; + return &instance; + } - void R_SetupColormap(AActor *actor) + void CameraLight::SetCamera(AActor *actor) { player_t *player = actor->player; - if (camera && camera->player != 0) + if (camera && camera->player != nullptr) player = camera->player; - realfixedcolormap = NULL; - fixedcolormap = NULL; + realfixedcolormap = nullptr; + fixedcolormap = nullptr; fixedlightlev = -1; - if (player != NULL && camera == player->mo) + if (player != nullptr && camera == player->mo) { if (player->fixedcolormap >= 0 && player->fixedcolormap < (int)SpecialColormaps.Size()) { @@ -80,7 +82,7 @@ namespace swrenderer } } // [RH] Inverse light for shooting the Sigil - if (fixedcolormap == NULL && extralight == INT_MIN) + if (fixedcolormap == nullptr && extralight == INT_MIN) { fixedcolormap = &SpecialColormaps[INVERSECOLORMAP]; extralight = 0; @@ -184,15 +186,16 @@ namespace swrenderer basecolormap = GetSpecialLights(basecolormap->Color, basecolormap->Fade.InverseColor(), basecolormap->Desaturate); } - if (fixedcolormap) + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedcolormap) { - BaseColormap = fixedcolormap; + BaseColormap = cameraLight->fixedcolormap; ColormapNum = 0; } - else if (fixedlightlev >= 0) + else if (cameraLight->fixedlightlev >= 0) { BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap; - ColormapNum = fixedlightlev >> COLORMAPSHIFT; + ColormapNum = cameraLight->fixedlightlev >> COLORMAPSHIFT; } else if (fullbright) { diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index 04a12c0ac7..cebb34e9ed 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -54,11 +54,17 @@ struct FSWColormap; namespace swrenderer { - extern int fixedlightlev; - extern FSWColormap *fixedcolormap; - extern FSpecialColormap *realfixedcolormap; + class CameraLight + { + public: + static CameraLight *Instance(); - void R_SetupColormap(AActor *actor); + int fixedlightlev = 0; + FSWColormap *fixedcolormap = nullptr; + FSpecialColormap *realfixedcolormap = nullptr; + + void SetCamera(AActor *actor); + }; class LightVisibility { diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 2018fe38f3..2391d05428 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -479,8 +479,9 @@ namespace swrenderer bool foggy = level.fadeto || frontsector->ColorMap->Fade || (level.flags & LEVEL_HASFADETABLE); // kg3D - fake lights + CameraLight *cameraLight = CameraLight::Instance(); FDynamicColormap *basecolormap; - if (fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) + if (cameraLight->fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) { light = P_GetPlaneLight(frontsector, &frontsector->ceilingplane, false); basecolormap = light->extra_colormap; @@ -493,7 +494,7 @@ namespace swrenderer } else { - basecolormap = (r_fullbrightignoresectorcolor && fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; + basecolormap = (r_fullbrightignoresectorcolor && cameraLight->fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; } portal = frontsector->ValidatePortal(sector_t::ceiling); @@ -518,7 +519,7 @@ namespace swrenderer if (ceilingplane) ceilingplane->AddLights(frontsector->lighthead); - if (fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) + if (cameraLight->fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) { light = P_GetPlaneLight(frontsector, &frontsector->floorplane, false); basecolormap = light->extra_colormap; @@ -531,7 +532,7 @@ namespace swrenderer } else { - basecolormap = (r_fullbrightignoresectorcolor && fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; + basecolormap = (r_fullbrightignoresectorcolor && cameraLight->fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; } // killough 3/7/98: Add (x,y) offsets to flats, add deep water check @@ -603,7 +604,7 @@ namespace swrenderer else position = sector_t::floor; frontsector = &tempsec; - if (fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) + if (cameraLight->fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) { light = P_GetPlaneLight(sub->sector, &frontsector->floorplane, false); basecolormap = light->extra_colormap; @@ -668,7 +669,7 @@ namespace swrenderer frontsector = &tempsec; tempsec.ceilingplane.ChangeHeight(-1 / 65536.); - if (fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) + if (cameraLight->fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) { light = P_GetPlaneLight(sub->sector, &frontsector->ceilingplane, false); basecolormap = light->extra_colormap; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index a85120eb30..1941bd4cbb 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -103,9 +103,9 @@ namespace swrenderer RenderActorView(player->mo); // Apply special colormap if the target cannot do it - if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + if (CameraLight::Instance()->realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) { - DrawerCommandQueue::QueueCommand(realfixedcolormap, screen); + DrawerCommandQueue::QueueCommand(CameraLight::Instance()->realfixedcolormap, screen); } R_EndDrawerCommands(); @@ -125,7 +125,7 @@ namespace swrenderer clip3d->ResetClip(); // reset clips (floor/ceiling) R_SetupFrame(actor); - R_SetupColormap(actor); + CameraLight::Instance()->SetCamera(actor); RenderViewport::Instance()->SetupFreelook(); RenderPortal::Instance()->CopyStackedViewParameters(); @@ -193,7 +193,7 @@ namespace swrenderer // copy to the screen does not use a special colormap shader. if (!r_shadercolormaps && !r_swtruecolor) { - realfixedcolormap = NULL; + CameraLight::Instance()->realfixedcolormap = NULL; } } diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 23f987c63e..eb34f7c3a9 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -253,10 +253,11 @@ namespace swrenderer } light = lightleft + (x1 - savecoord.sx1) * lightstep; - if (fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedlightlev >= 0) + R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + else if (cameraLight->fixedcolormap != NULL) + R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 649df88586..a1041808ea 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -90,7 +90,8 @@ namespace swrenderer return; FDynamicColormap *basecolormap; - if (fixedlightlev < 0 && viewsector->e && viewsector->e->XFloor.lightlist.Size()) + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedlightlev < 0 && viewsector->e && viewsector->e->XFloor.lightlist.Size()) { for (i = viewsector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) { @@ -480,7 +481,8 @@ namespace swrenderer } // If the main colormap has fixed lights, and this sprite is being drawn with that // colormap, disable acceleration so that the lights can remain fixed. - if (!noaccel && realfixedcolormap == nullptr && + CameraLight *cameraLight = CameraLight::Instance(); + if (!noaccel && cameraLight->realfixedcolormap == nullptr && NormalLightHasFixedLights && vis.Light.BaseColormap == &NormalLight && vis.pic->UseBasePalette()) { diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index f364a6cbdf..b2c0b054a1 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -86,7 +86,8 @@ namespace swrenderer if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gzb >= clip3d->sclipTop) return; // kg3D - correct colors now - if (!fixedcolormap && fixedlightlev < 0 && spr->sector->e && spr->sector->e->XFloor.lightlist.Size()) + CameraLight *cameraLight = CameraLight::Instance(); + if (!cameraLight->fixedcolormap && cameraLight->fixedlightlev < 0 && spr->sector->e && spr->sector->e->XFloor.lightlist.Size()) { if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) { diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index e5b95c31d1..f4587e6a1f 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -184,10 +184,11 @@ namespace swrenderer float lightleft = float(GlobVis / spr->wallc.sz1); float lightstep = float((GlobVis / spr->wallc.sz2 - lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); float light = lightleft + (x1 - spr->wallc.sx1) * lightstep; - if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); - else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedlightlev >= 0) + R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + else if (cameraLight->fixedcolormap != NULL) + R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 296cc29b13..567799d478 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -197,7 +197,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) R_SetTranslationMap(identitymap); } - fixedcolormap = dc_fcolormap; + CameraLight::Instance()->fixedcolormap = dc_fcolormap; bool visible; FDynamicColormap *basecolormap = nullptr; if (r_swtruecolor) diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index d28ef2246f..e04ab1dc12 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -1405,7 +1405,7 @@ void D3DFB::Draw3DPart(bool copy3d) D3DCOLOR color0, color1; if (Accel2D) { - auto &map = swrenderer::realfixedcolormap; + auto &map = swrenderer::CameraLight::Instance()->realfixedcolormap; if (map == NULL) { color0 = 0; From 6a826f37bd6b244d2a4c483e75ccbf43e071080d Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Thu, 26 Jan 2017 22:21:22 +0200 Subject: [PATCH 765/912] Fixed compilation with GCC/Clang No more 'error: cannot jump from this goto statement to its label' --- src/swrenderer/things/r_decal.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index eb34f7c3a9..c6843a7fc8 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -147,6 +147,7 @@ namespace swrenderer decal_left = decal_pos - edge_left * angvec - ViewPos; decal_right = decal_pos + edge_right * angvec - ViewPos; + CameraLight *cameraLight; double texturemid; if (WallC.Init(decal_left, decal_right, TOO_CLOSE_Z)) @@ -253,7 +254,7 @@ namespace swrenderer } light = lightleft + (x1 - savecoord.sx1) * lightstep; - CameraLight *cameraLight = CameraLight::Instance(); + cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != NULL) From f9eb06a22e2983367954a204e00baae194a6aaed Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 28 Jan 2017 00:13:27 -0500 Subject: [PATCH 766/912] - fixed: If GLRenderer is uninitialized (i.e. using software renderer, or during startup), gl_paltonemap_* CVARs would crash the game. --- src/gl/renderer/gl_postprocess.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gl/renderer/gl_postprocess.cpp b/src/gl/renderer/gl_postprocess.cpp index c504088f4d..72121d338f 100644 --- a/src/gl/renderer/gl_postprocess.cpp +++ b/src/gl/renderer/gl_postprocess.cpp @@ -135,12 +135,14 @@ CUSTOM_CVAR(Float, gl_ssao_exponent, 1.8f, 0) CUSTOM_CVAR(Float, gl_paltonemap_powtable, 2.0f, CVAR_ARCHIVE | CVAR_NOINITCALL) { - GLRenderer->ClearTonemapPalette(); + if (GLRenderer) + GLRenderer->ClearTonemapPalette(); } CUSTOM_CVAR(Bool, gl_paltonemap_reverselookup, true, CVAR_ARCHIVE | CVAR_NOINITCALL) { - GLRenderer->ClearTonemapPalette(); + if (GLRenderer) + GLRenderer->ClearTonemapPalette(); } From 5f38b156354e78b83096a6e868ac4b11f66fe3a1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 28 Jan 2017 07:08:59 +0100 Subject: [PATCH 767/912] Move colfunc family of globals into a DrawerStyle class and localize its usage --- src/swrenderer/drawers/r_draw.cpp | 624 +++++++++++--------- src/swrenderer/drawers/r_draw.h | 42 +- src/swrenderer/line/r_line.cpp | 9 +- src/swrenderer/line/r_renderdrawsegment.cpp | 12 +- src/swrenderer/line/r_walldraw.cpp | 7 +- src/swrenderer/line/r_walldraw.h | 3 + src/swrenderer/plane/r_flatplane.cpp | 58 +- src/swrenderer/plane/r_flatplane.h | 3 + src/swrenderer/scene/r_scene.cpp | 3 - src/swrenderer/things/r_decal.cpp | 11 +- src/swrenderer/things/r_decal.h | 3 +- src/swrenderer/things/r_playersprite.cpp | 5 +- src/swrenderer/things/r_sprite.cpp | 5 +- src/swrenderer/things/r_voxel.cpp | 3 +- src/swrenderer/things/r_wallsprite.cpp | 9 +- src/swrenderer/things/r_wallsprite.h | 3 +- src/v_draw.cpp | 7 +- 17 files changed, 416 insertions(+), 391 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 36b8407457..1824ba03ef 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -70,12 +70,6 @@ namespace swrenderer short zeroarray[MAXWIDTH]; short screenheightarray[MAXWIDTH]; - DrawerFunc colfunc; - DrawerFunc basecolfunc; - DrawerFunc fuzzcolfunc; - DrawerFunc transcolfunc; - DrawerFunc spanfunc; - namespace drawerargs { int dc_pitch; @@ -160,11 +154,6 @@ namespace swrenderer active_drawers = &tc_drawers; else active_drawers = &pal_drawers; - - colfunc = basecolfunc = &SWPixelFormatDrawers::DrawColumn; - fuzzcolfunc = &SWPixelFormatDrawers::DrawFuzzColumn; - transcolfunc = &SWPixelFormatDrawers::DrawTranslatedColumn; - spanfunc = &SWPixelFormatDrawers::DrawSpan; } void R_InitShadeMaps() @@ -252,282 +241,6 @@ namespace swrenderer } } - namespace - { - bool R_SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) - { - using namespace drawerargs; - - // r_drawtrans is a seriously bad thing to turn off. I wonder if I should - // just remove it completely. - if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) - { - if (flags & STYLEF_ColorIsFixed) - { - colfunc = &SWPixelFormatDrawers::FillColumn; - } - else if (dc_translation == NULL) - { - colfunc = basecolfunc; - } - else - { - colfunc = transcolfunc; - drawer_needs_pal_input = true; - } - return true; - } - if (flags & STYLEF_InvertSource) - { - dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; - dc_srcalpha = fglevel; - dc_destalpha = bglevel; - } - else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) - { - dc_srcblend = Col2RGB8[fglevel >> 10]; - dc_destblend = Col2RGB8[bglevel >> 10]; - dc_srcalpha = fglevel; - dc_destalpha = bglevel; - } - else - { - dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; - dc_srcalpha = fglevel; - dc_destalpha = bglevel; - } - switch (op) - { - case STYLEOP_Add: - if (fglevel == 0 && bglevel == FRACUNIT) - { - return false; - } - if (fglevel + bglevel <= FRACUNIT) - { // Colors won't overflow when added - if (flags & STYLEF_ColorIsFixed) - { - colfunc = &SWPixelFormatDrawers::FillAddColumn; - } - else if (dc_translation == NULL) - { - colfunc = &SWPixelFormatDrawers::DrawAddColumn; - } - else - { - colfunc = &SWPixelFormatDrawers::DrawTranslatedAddColumn; - drawer_needs_pal_input = true; - } - } - else - { // Colors might overflow when added - if (flags & STYLEF_ColorIsFixed) - { - colfunc = &SWPixelFormatDrawers::FillAddClampColumn; - } - else if (dc_translation == NULL) - { - colfunc = &SWPixelFormatDrawers::DrawAddClampColumn; - } - else - { - colfunc = &SWPixelFormatDrawers::DrawAddClampTranslatedColumn; - drawer_needs_pal_input = true; - } - } - return true; - - case STYLEOP_Sub: - if (flags & STYLEF_ColorIsFixed) - { - colfunc = &SWPixelFormatDrawers::FillSubClampColumn; - } - else if (dc_translation == NULL) - { - colfunc = &SWPixelFormatDrawers::DrawSubClampColumn; - } - else - { - colfunc = &SWPixelFormatDrawers::DrawSubClampTranslatedColumn; - drawer_needs_pal_input = true; - } - return true; - - case STYLEOP_RevSub: - if (fglevel == 0 && bglevel == FRACUNIT) - { - return false; - } - if (flags & STYLEF_ColorIsFixed) - { - colfunc = &SWPixelFormatDrawers::FillRevSubClampColumn; - } - else if (dc_translation == NULL) - { - colfunc = &SWPixelFormatDrawers::DrawRevSubClampColumn; - } - else - { - colfunc = &SWPixelFormatDrawers::DrawRevSubClampTranslatedColumn; - drawer_needs_pal_input = true; - } - return true; - - default: - return false; - } - } - - fixed_t GetAlpha(int type, fixed_t alpha) - { - switch (type) - { - case STYLEALPHA_Zero: return 0; - case STYLEALPHA_One: return OPAQUE; - case STYLEALPHA_Src: return alpha; - case STYLEALPHA_InvSrc: return OPAQUE - alpha; - default: return 0; - } - } - } - - bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap) - { - using namespace drawerargs; - - fixed_t fglevel, bglevel; - - drawer_needs_pal_input = false; - - style.CheckFuzz(); - - if (style.BlendOp == STYLEOP_Shadow) - { - style = LegacyRenderStyles[STYLE_TranslucentStencil]; - alpha = TRANSLUC33; - color = 0; - } - - if (style.Flags & STYLEF_ForceAlpha) - { - alpha = clamp(alpha, 0, OPAQUE); - } - else if (style.Flags & STYLEF_TransSoulsAlpha) - { - alpha = fixed_t(transsouls * OPAQUE); - } - else if (style.Flags & STYLEF_Alpha1) - { - alpha = FRACUNIT; - } - else - { - alpha = clamp(alpha, 0, OPAQUE); - } - - if (translation != -1) - { - dc_translation = NULL; - if (translation != 0) - { - FRemapTable *table = TranslationToTable(translation); - if (table != NULL && !table->Inactive) - { - if (r_swtruecolor) - dc_translation = (uint8_t*)table->Palette; - else - dc_translation = table->Remap; - } - } - } - - // Check for special modes - if (style.BlendOp == STYLEOP_Fuzz) - { - colfunc = fuzzcolfunc; - return true; - } - else if (style == LegacyRenderStyles[STYLE_Shaded]) - { - // Shaded drawer only gets 16 levels of alpha because it saves memory. - if ((alpha >>= 12) == 0 || basecolormap == nullptr) - return false; - colfunc = &SWPixelFormatDrawers::DrawShadedColumn; - drawer_needs_pal_input = true; - CameraLight *cameraLight = CameraLight::Instance(); - dc_color = cameraLight->fixedcolormap ? cameraLight->fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; - basecolormap = &ShadeFakeColormap[16 - alpha]; - if (cameraLight->fixedlightlev >= 0 && cameraLight->fixedcolormap == NULL) - { - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); - } - else - { - R_SetColorMapLight(basecolormap, 0, 0); - } - return true; - } - - fglevel = GetAlpha(style.SrcAlpha, alpha); - bglevel = GetAlpha(style.DestAlpha, alpha); - - if (style.Flags & STYLEF_ColorIsFixed) - { - uint32_t x = fglevel >> 10; - uint32_t r = RPART(color); - uint32_t g = GPART(color); - uint32_t b = BPART(color); - // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. - dc_color = RGB256k.RGB[r >> 2][g >> 2][b >> 2]; - if (style.Flags & STYLEF_InvertSource) - { - r = 255 - r; - g = 255 - g; - b = 255 - b; - } - uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); - dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; - // dc_srccolor is used by the R_Fill* routines. It is premultiplied - // with the alpha. - dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; - R_SetColorMapLight(&identitycolormap, 0, 0); - } - - if (!R_SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) - { - return false; - } - return true; - } - - bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap) - { - return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap); - } - - DrawerFunc R_GetTransMaskDrawer() - { - if (colfunc == &SWPixelFormatDrawers::DrawAddColumn) - { - return &SWPixelFormatDrawers::DrawWallAddColumn; - } - if (colfunc == &SWPixelFormatDrawers::DrawAddClampColumn) - { - return &SWPixelFormatDrawers::DrawWallAddClampColumn; - } - if (colfunc == &SWPixelFormatDrawers::DrawSubClampColumn) - { - return &SWPixelFormatDrawers::DrawWallSubClampColumn; - } - if (colfunc == &SWPixelFormatDrawers::DrawRevSubClampColumn) - { - return &SWPixelFormatDrawers::DrawWallRevSubClampColumn; - } - return nullptr; - } - void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade) { using namespace drawerargs; @@ -643,14 +356,14 @@ namespace swrenderer fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; } - void R_DrawMaskedColumn(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) + void DrawerStyle::DrawMaskedColumn(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) { using namespace drawerargs; // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. if (r_swtruecolor && !drawer_needs_pal_input) // To do: add support to R_DrawColumnHoriz_rgba { - R_DrawMaskedColumnBgra(x, iscale, tex, col, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, unmasked); + DrawMaskedColumnBgra(x, iscale, tex, col, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, unmasked); return; } @@ -722,7 +435,7 @@ namespace swrenderer } } - void R_DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) + void DrawerStyle::DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) { using namespace drawerargs; @@ -838,4 +551,335 @@ namespace swrenderer span++; } } + + bool DrawerStyle::SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) + { + using namespace drawerargs; + + // r_drawtrans is a seriously bad thing to turn off. I wonder if I should + // just remove it completely. + if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) + { + if (flags & STYLEF_ColorIsFixed) + { + colfunc = &SWPixelFormatDrawers::FillColumn; + } + else if (dc_translation == NULL) + { + colfunc = basecolfunc; + } + else + { + colfunc = transcolfunc; + drawer_needs_pal_input = true; + } + return true; + } + if (flags & STYLEF_InvertSource) + { + dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) + { + dc_srcblend = Col2RGB8[fglevel >> 10]; + dc_destblend = Col2RGB8[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else + { + dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + switch (op) + { + case STYLEOP_Add: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (fglevel + bglevel <= FRACUNIT) + { // Colors won't overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = &SWPixelFormatDrawers::FillAddColumn; + } + else if (dc_translation == NULL) + { + colfunc = &SWPixelFormatDrawers::DrawAddColumn; + } + else + { + colfunc = &SWPixelFormatDrawers::DrawTranslatedAddColumn; + drawer_needs_pal_input = true; + } + } + else + { // Colors might overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = &SWPixelFormatDrawers::FillAddClampColumn; + } + else if (dc_translation == NULL) + { + colfunc = &SWPixelFormatDrawers::DrawAddClampColumn; + } + else + { + colfunc = &SWPixelFormatDrawers::DrawAddClampTranslatedColumn; + drawer_needs_pal_input = true; + } + } + return true; + + case STYLEOP_Sub: + if (flags & STYLEF_ColorIsFixed) + { + colfunc = &SWPixelFormatDrawers::FillSubClampColumn; + } + else if (dc_translation == NULL) + { + colfunc = &SWPixelFormatDrawers::DrawSubClampColumn; + } + else + { + colfunc = &SWPixelFormatDrawers::DrawSubClampTranslatedColumn; + drawer_needs_pal_input = true; + } + return true; + + case STYLEOP_RevSub: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (flags & STYLEF_ColorIsFixed) + { + colfunc = &SWPixelFormatDrawers::FillRevSubClampColumn; + } + else if (dc_translation == NULL) + { + colfunc = &SWPixelFormatDrawers::DrawRevSubClampColumn; + } + else + { + colfunc = &SWPixelFormatDrawers::DrawRevSubClampTranslatedColumn; + drawer_needs_pal_input = true; + } + return true; + + default: + return false; + } + } + + fixed_t DrawerStyle::GetAlpha(int type, fixed_t alpha) + { + switch (type) + { + case STYLEALPHA_Zero: return 0; + case STYLEALPHA_One: return OPAQUE; + case STYLEALPHA_Src: return alpha; + case STYLEALPHA_InvSrc: return OPAQUE - alpha; + default: return 0; + } + } + + bool DrawerStyle::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap) + { + using namespace drawerargs; + + fixed_t fglevel, bglevel; + + drawer_needs_pal_input = false; + + style.CheckFuzz(); + + if (style.BlendOp == STYLEOP_Shadow) + { + style = LegacyRenderStyles[STYLE_TranslucentStencil]; + alpha = TRANSLUC33; + color = 0; + } + + if (style.Flags & STYLEF_ForceAlpha) + { + alpha = clamp(alpha, 0, OPAQUE); + } + else if (style.Flags & STYLEF_TransSoulsAlpha) + { + alpha = fixed_t(transsouls * OPAQUE); + } + else if (style.Flags & STYLEF_Alpha1) + { + alpha = FRACUNIT; + } + else + { + alpha = clamp(alpha, 0, OPAQUE); + } + + if (translation != -1) + { + dc_translation = NULL; + if (translation != 0) + { + FRemapTable *table = TranslationToTable(translation); + if (table != NULL && !table->Inactive) + { + if (r_swtruecolor) + dc_translation = (uint8_t*)table->Palette; + else + dc_translation = table->Remap; + } + } + } + + // Check for special modes + if (style.BlendOp == STYLEOP_Fuzz) + { + colfunc = fuzzcolfunc; + return true; + } + else if (style == LegacyRenderStyles[STYLE_Shaded]) + { + // Shaded drawer only gets 16 levels of alpha because it saves memory. + if ((alpha >>= 12) == 0 || basecolormap == nullptr) + return false; + colfunc = &SWPixelFormatDrawers::DrawShadedColumn; + drawer_needs_pal_input = true; + CameraLight *cameraLight = CameraLight::Instance(); + dc_color = cameraLight->fixedcolormap ? cameraLight->fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; + basecolormap = &ShadeFakeColormap[16 - alpha]; + if (cameraLight->fixedlightlev >= 0 && cameraLight->fixedcolormap == NULL) + { + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + } + else + { + R_SetColorMapLight(basecolormap, 0, 0); + } + return true; + } + + fglevel = GetAlpha(style.SrcAlpha, alpha); + bglevel = GetAlpha(style.DestAlpha, alpha); + + if (style.Flags & STYLEF_ColorIsFixed) + { + uint32_t x = fglevel >> 10; + uint32_t r = RPART(color); + uint32_t g = GPART(color); + uint32_t b = BPART(color); + // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. + dc_color = RGB256k.RGB[r >> 2][g >> 2][b >> 2]; + if (style.Flags & STYLEF_InvertSource) + { + r = 255 - r; + g = 255 - g; + b = 255 - b; + } + uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); + dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; + // dc_srccolor is used by the R_Fill* routines. It is premultiplied + // with the alpha. + dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; + R_SetColorMapLight(&identitycolormap, 0, 0); + } + + if (!DrawerStyle::SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) + { + return false; + } + return true; + } + + bool DrawerStyle::SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap) + { + return SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap); + } + + DrawerFunc DrawerStyle::GetTransMaskDrawer() + { + if (colfunc == &SWPixelFormatDrawers::DrawAddColumn) + { + return &SWPixelFormatDrawers::DrawWallAddColumn; + } + if (colfunc == &SWPixelFormatDrawers::DrawAddClampColumn) + { + return &SWPixelFormatDrawers::DrawWallAddClampColumn; + } + if (colfunc == &SWPixelFormatDrawers::DrawSubClampColumn) + { + return &SWPixelFormatDrawers::DrawWallSubClampColumn; + } + if (colfunc == &SWPixelFormatDrawers::DrawRevSubClampColumn) + { + return &SWPixelFormatDrawers::DrawWallRevSubClampColumn; + } + return nullptr; + } + + void DrawerStyle::SetSpanStyle(bool masked, bool additive, fixed_t alpha) + { + using namespace drawerargs; + + if (masked) + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; + } + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpanMasked; + } + } + else + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = &SWPixelFormatDrawers::DrawSpanTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpanAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; + } + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpan; + } + } + } } diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 8ce1d904fe..d6a6fd17e2 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -174,10 +174,6 @@ namespace swrenderer void R_InitFuzzTable(int fuzzoff); void R_InitParticleTexture(); - bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap); - bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap); - DrawerFunc R_GetTransMaskDrawer(); - void R_UpdateFuzzPos(); // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) @@ -188,12 +184,36 @@ namespace swrenderer void R_SetSpanTexture(FTexture *tex); void R_SetSpanColormap(FDynamicColormap *colormap, int shade); - void R_DrawMaskedColumn(int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); - void R_DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked); + class DrawerStyle + { + public: + DrawerStyle() + { + colfunc = &SWPixelFormatDrawers::DrawColumn; + basecolfunc = &SWPixelFormatDrawers::DrawColumn; + fuzzcolfunc = &SWPixelFormatDrawers::DrawFuzzColumn; + transcolfunc = &SWPixelFormatDrawers::DrawTranslatedColumn; + spanfunc = &SWPixelFormatDrawers::DrawSpan; + } - extern DrawerFunc colfunc; - extern DrawerFunc basecolfunc; - extern DrawerFunc fuzzcolfunc; - extern DrawerFunc transcolfunc; - extern DrawerFunc spanfunc; + bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap); + bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap); + void SetSpanStyle(bool masked, bool additive, fixed_t alpha); + + void DrawMaskedColumn(int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); + + DrawerFunc GetTransMaskDrawer(); + + DrawerFunc colfunc; + DrawerFunc basecolfunc; + DrawerFunc fuzzcolfunc; + DrawerFunc transcolfunc; + DrawerFunc spanfunc; + + private: + void DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked); + + bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); + static fixed_t GetAlpha(int type, fixed_t alpha); + }; } diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 5f16f2b319..09b1965e43 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -1045,8 +1045,9 @@ namespace swrenderer rw_offset = -rw_offset; } + DrawerStyle drawerstyle; RenderWallPart renderWallpart; - renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, rw_midtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, rw_midtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } fillshort(ceilingclip + x1, x2 - x1, viewheight); fillshort(floorclip + x1, x2 - x1, 0xffff); @@ -1082,8 +1083,9 @@ namespace swrenderer rw_offset = -rw_offset; } + DrawerStyle drawerstyle; RenderWallPart renderWallpart; - renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, rw_toptexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, rw_toptexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(ceilingclip + x1, wallupper.ScreenY + x1, (x2 - x1) * sizeof(short)); } @@ -1122,8 +1124,9 @@ namespace swrenderer rw_offset = -rw_offset; } + DrawerStyle drawerstyle; RenderWallPart renderWallpart; - renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, rw_bottomtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, rw_bottomtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(floorclip + x1, walllower.ScreenY + x1, (x2 - x1) * sizeof(short)); } diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 142162496d..214ed52b11 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -67,7 +67,8 @@ namespace swrenderer curline = ds->curline; FDynamicColormap *patchstylecolormap = nullptr; - bool visible = R_SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], + DrawerStyle drawerstyle; + bool visible = drawerstyle.SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], (float)MIN(curline->linedef->alpha, 1.), 0, 0, patchstylecolormap); if (!visible && !ds->bFogBoundary && !ds->bFakeBoundary) @@ -282,7 +283,7 @@ namespace swrenderer else sprtopscreen = CenterY - texturemid * spryscale; - R_DrawMaskedColumn(x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + drawerstyle.DrawMaskedColumn(x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); rw_light += rw_lightstep; spryscale += rw_scalestep; @@ -348,7 +349,7 @@ namespace swrenderer GetMaskedWallTopBottom(ds, top, bot); RenderWallPart renderWallpart; - renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); + renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); } clearfog: @@ -382,7 +383,8 @@ namespace swrenderer double yscale; fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); - bool visible = R_SetPatchStyle(LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], + DrawerStyle drawerstyle; + bool visible = drawerstyle.SetPatchStyle(LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], Alpha, 0, 0, basecolormap); if (!visible) @@ -479,7 +481,7 @@ namespace swrenderer GetMaskedWallTopBottom(ds, top, bot); RenderWallPart renderWallpart; - renderWallpart.Render(frontsector, curline, WallC, rw_pic, x1, x2, wallupper.ScreenY, walllower.ScreenY, texturemid, MaskedSWall, walltexcoords.UPos, yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); + renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, wallupper.ScreenY, walllower.ScreenY, texturemid, MaskedSWall, walltexcoords.UPos, yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); } // kg3D - walls of fake floors diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 75907c2615..da5e251cbd 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -392,7 +392,7 @@ namespace swrenderer void RenderWallPart::ProcessTranslucentWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { - DrawerFunc drawcol1 = R_GetTransMaskDrawer(); + DrawerFunc drawcol1 = drawerstyle.GetTransMaskDrawer(); if (drawcol1 == nullptr) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. @@ -445,7 +445,7 @@ namespace swrenderer { if (mask) { - if (colfunc == basecolfunc) + if (drawerstyle.colfunc == drawerstyle.basecolfunc) { ProcessMaskedWall(uwal, dwal, texturemid, swal, lwal); } @@ -540,8 +540,9 @@ namespace swrenderer } } - void RenderWallPart::Render(sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, const short *walltop, const short *wallbottom, double texturemid, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap) + void RenderWallPart::Render(const DrawerStyle &drawerstyle, sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, const short *walltop, const short *wallbottom, double texturemid, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap) { + this->drawerstyle = drawerstyle; this->x1 = x1; this->x2 = x2; this->frontsector = frontsector; diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index b199e51b58..e5d7d11538 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -34,6 +34,7 @@ namespace swrenderer { public: void Render( + const DrawerStyle &drawerstyle, sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, @@ -83,6 +84,8 @@ namespace swrenderer FDynamicColormap *basecolormap = nullptr; FLightNode *light_list = nullptr; bool mask = false; + + DrawerStyle drawerstyle; }; struct WallSampler diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index dff82a5e45..2beae7756f 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -125,61 +125,7 @@ namespace swrenderer planeshade = LIGHT2SHADE(pl->lightlevel); } - if (spanfunc != &SWPixelFormatDrawers::FillSpan) - { - if (masked) - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedTranslucent; - dc_srcblend = Col2RGB8[alpha >> 10]; - dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; - dc_srcalpha = alpha; - dc_destalpha = FRACUNIT; - } - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpanMasked; - } - } - else - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = &SWPixelFormatDrawers::DrawSpanTranslucent; - dc_srcblend = Col2RGB8[alpha >> 10]; - dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpanAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; - dc_srcalpha = alpha; - dc_destalpha = FRACUNIT; - } - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpan; - } - } - } + drawerstyle.SetSpanStyle(masked, additive, alpha); light_list = pl->lights; @@ -309,7 +255,7 @@ namespace swrenderer ds_x1 = x1; ds_x2 = x2; - (R_Drawers()->*spanfunc)(); + (R_Drawers()->*drawerstyle.spanfunc)(); } void RenderFlatPlane::StepColumn() diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index 249fe7f724..4dd189dabc 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -14,6 +14,7 @@ #pragma once #include "r_planerenderer.h" +#include "swrenderer/drawers/r_draw.h" namespace swrenderer { @@ -41,6 +42,8 @@ namespace swrenderer double basexfrac, baseyfrac; VisiblePlaneLight *light_list; + DrawerStyle drawerstyle; + static float yslope[MAXHEIGHT]; }; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 1941bd4cbb..4914fbe251 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -141,9 +141,6 @@ namespace swrenderer NetUpdate(); - colfunc = basecolfunc; - spanfunc = &SWPixelFormatDrawers::DrawSpan; - RenderPortal::Instance()->SetMainPortal(); this->dontmaplines = dontmaplines; diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index c6843a7fc8..3d93f0146d 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -282,7 +282,8 @@ namespace swrenderer { int x = x1; - bool visible = R_SetPatchStyle(decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor, basecolormap); + DrawerStyle drawerstyle; + bool visible = drawerstyle.SetPatchStyle(decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor, basecolormap); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -298,7 +299,7 @@ namespace swrenderer { // calculate lighting R_SetColorMapLight(usecolormap, light, wallshade); } - DrawColumn(x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + DrawColumn(drawerstyle, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } @@ -311,13 +312,11 @@ namespace swrenderer mfloorclip = wallbottom; } while (needrepeat--); - colfunc = basecolfunc; - done: WallC = savecoord; } - void RenderDecal::DrawColumn(int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderDecal::DrawColumn(DrawerStyle &drawerstyle, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; @@ -327,6 +326,6 @@ namespace swrenderer else sprtopscreen = CenterY - texturemid * spryscale; - R_DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + drawerstyle.DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } } diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 56e63f8dc7..760d7664a8 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -20,6 +20,7 @@ namespace swrenderer { struct DrawSegment; class ProjectedWallTexcoords; + class DrawerStyle; class RenderDecal { @@ -28,6 +29,6 @@ namespace swrenderer private: static void Render(side_t *wall, DBaseDecal *first, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass); - static void DrawColumn(int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void DrawColumn(DrawerStyle &drawerstyle, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); }; } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index a1041808ea..2380c7a452 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -591,7 +591,8 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(Light.BaseColormap); - bool visible = R_SetPatchStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap); + DrawerStyle drawerstyle; + bool visible = drawerstyle.SetPatchStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap); if (RenderStyle == LegacyRenderStyles[STYLE_Shaded]) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but @@ -628,7 +629,7 @@ namespace swrenderer fixed_t frac = startfrac; for (int x = x1; x < x2; x++) { - R_DrawMaskedColumn(x, iscale, pic, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); + drawerstyle.DrawMaskedColumn(x, iscale, pic, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); frac += xiscale; } diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 28082b2737..575cb8369d 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -250,7 +250,8 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(vis->Light.BaseColormap); - bool visible = R_SetPatchStyle(vis->RenderStyle, vis->Alpha, vis->Translation, vis->FillColor, basecolormap); + DrawerStyle drawerstyle; + bool visible = drawerstyle.SetPatchStyle(vis->RenderStyle, vis->Alpha, vis->Translation, vis->FillColor, basecolormap); if (vis->RenderStyle == LegacyRenderStyles[STYLE_Shaded]) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but @@ -293,7 +294,7 @@ namespace swrenderer while (x < x2) { if (!translucentPass->ClipSpriteColumnWithPortals(x, vis)) - R_DrawMaskedColumn(x, iscale, tex, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); + drawerstyle.DrawMaskedColumn(x, iscale, tex, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); x++; frac += xiscale; } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index cf0d4acd9e..aa6b8206ea 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -188,7 +188,8 @@ namespace swrenderer R_SetColorMapLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); - bool visible = R_SetPatchStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); + DrawerStyle drawerstyle; + bool visible = drawerstyle.SetPatchStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); if (!visible) return; diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index f4587e6a1f..f599098d37 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -213,7 +213,8 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(spr->Light.BaseColormap); - bool visible = R_SetPatchStyle(spr->RenderStyle, spr->Alpha, spr->Translation, spr->FillColor, basecolormap); + DrawerStyle drawerstyle; + bool visible = drawerstyle.SetPatchStyle(spr->RenderStyle, spr->Alpha, spr->Translation, spr->FillColor, basecolormap); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -236,14 +237,14 @@ namespace swrenderer R_SetColorMapLight(usecolormap, light, shade); } if (!translucentPass->ClipSpriteColumnWithPortals(x, spr)) - DrawColumn(x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + DrawColumn(drawerstyle, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } } } - void RenderWallSprite::DrawColumn(int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderWallSprite::DrawColumn(DrawerStyle &drawerstyle, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; @@ -253,6 +254,6 @@ namespace swrenderer else sprtopscreen = CenterY - texturemid * spryscale; - R_DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + drawerstyle.DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } } diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index 6c2976d361..1e10cd273e 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -18,6 +18,7 @@ namespace swrenderer { class ProjectedWallTexcoords; + class DrawerStyle; class RenderWallSprite : public VisibleSprite { @@ -29,7 +30,7 @@ namespace swrenderer void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: - static void DrawColumn(int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void DrawColumn(DrawerStyle &drawerstyle, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); FWallCoords wallc; uint32_t Translation = 0; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 567799d478..6e4f417287 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -200,10 +200,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) CameraLight::Instance()->fixedcolormap = dc_fcolormap; bool visible; FDynamicColormap *basecolormap = nullptr; + DrawerStyle drawerstyle; if (r_swtruecolor) - visible = R_SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); + visible = drawerstyle.SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); else - visible = R_SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor, basecolormap); + visible = drawerstyle.SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor, basecolormap); BYTE *destorgsave = dc_destorg; int destheightsave = dc_destheight; @@ -291,7 +292,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) while (x < x2_i) { - R_DrawMaskedColumn(x, iscale, img, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, !parms.masked); + drawerstyle.DrawMaskedColumn(x, iscale, img, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, !parms.masked); x++; frac += xiscale_i; } From fe40ad200ef8fad62e2f9b97aa1cc79fab16bb4f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 28 Jan 2017 07:13:52 +0100 Subject: [PATCH 768/912] Remove the goto in RenderDecal::Render! --- src/swrenderer/things/r_decal.cpp | 22 +++++++++------------- src/swrenderer/things/r_decal.h | 2 +- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 3d93f0146d..bf47283152 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -59,7 +59,7 @@ namespace swrenderer // = 1: drawing masked textures (including sprites) // Currently, only pass = 0 is done or used - void RenderDecal::Render(side_t *wall, DBaseDecal *decal, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords WallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass) + void RenderDecal::Render(side_t *wall, DBaseDecal *decal, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &savecoord, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass) { DVector2 decal_left, decal_right, decal_pos; int x1, x2; @@ -130,8 +130,6 @@ namespace swrenderer // to a wall, we use the wall's angle instead of the decal's. This is // pretty much the same as what R_AddLine() does. - FWallCoords savecoord = WallC; - double edge_right = WallSpriteTile->GetWidth(); double edge_left = WallSpriteTile->LeftOffset; edge_right = (edge_right - edge_left) * decal->ScaleX; @@ -150,14 +148,15 @@ namespace swrenderer CameraLight *cameraLight; double texturemid; + FWallCoords WallC; if (WallC.Init(decal_left, decal_right, TOO_CLOSE_Z)) - goto done; + return; x1 = WallC.sx1; x2 = WallC.sx2; if (x1 >= clipper->x2 || x2 <= clipper->x1) - goto done; + return; FWallTmapVals WallT; WallT.InitFromWallCoords(&WallC); @@ -170,7 +169,7 @@ namespace swrenderer { if (pass != 0) { - goto done; + return; } mceilingclip = walltop; mfloorclip = wallbottom; @@ -191,7 +190,7 @@ namespace swrenderer case RF_CLIPUPPER: if (pass != 0) { - goto done; + return; } mceilingclip = walltop; mfloorclip = RenderOpaquePass::Instance()->ceilingclip; @@ -200,7 +199,7 @@ namespace swrenderer case RF_CLIPMID: if (curline->backsector != NULL && pass != 2) { - goto done; + return; } mceilingclip = clipper->sprtopclip - clipper->x1; mfloorclip = clipper->sprbottomclip - clipper->x1; @@ -209,7 +208,7 @@ namespace swrenderer case RF_CLIPLOWER: if (pass != 0) { - goto done; + return; } mceilingclip = RenderOpaquePass::Instance()->floorclip; mfloorclip = wallbottom; @@ -224,7 +223,7 @@ namespace swrenderer x2 = MIN(clipper->x2, x2); if (x1 >= x2) { - goto done; + return; } ProjectedWallTexcoords walltexcoords; @@ -311,9 +310,6 @@ namespace swrenderer mceilingclip = RenderOpaquePass::Instance()->floorclip; mfloorclip = wallbottom; } while (needrepeat--); - - done: - WallC = savecoord; } void RenderDecal::DrawColumn(DrawerStyle &drawerstyle, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 760d7664a8..b70c08ddf8 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -28,7 +28,7 @@ namespace swrenderer static void RenderDecals(side_t *wall, DrawSegment *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom); private: - static void Render(side_t *wall, DBaseDecal *first, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, FWallCoords wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass); + static void Render(side_t *wall, DBaseDecal *first, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass); static void DrawColumn(DrawerStyle &drawerstyle, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); }; } From 9eebe3e9406946c0feef8269ce14f2ac99252ffc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 28 Jan 2017 08:04:11 +0100 Subject: [PATCH 769/912] Remove unused drawer args --- src/swrenderer/drawers/r_draw.cpp | 3 --- src/swrenderer/drawers/r_draw.h | 4 ---- 2 files changed, 7 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 1824ba03ef..88a477df91 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -131,9 +131,6 @@ namespace swrenderer bool ds_source_mipmapped; int ds_color; bool drawer_needs_pal_input; - unsigned int dc_tspans[4][MAXHEIGHT]; - unsigned int *dc_ctspan[4]; - unsigned int *horizspan[4]; } namespace diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index d6a6fd17e2..f4dd0378eb 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -97,10 +97,6 @@ namespace swrenderer extern const uint8_t *ds_source; extern bool ds_source_mipmapped; extern int ds_color; - - extern unsigned int dc_tspans[4][MAXHEIGHT]; - extern unsigned int *dc_ctspan[4]; - extern unsigned int *horizspan[4]; } extern int ylookup[MAXHEIGHT]; From dbf9cd5de514428557deff5bf80eca68a02afa52 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 28 Jan 2017 08:17:31 +0100 Subject: [PATCH 770/912] Remove a few more drawerargs --- src/swrenderer/drawers/r_draw.cpp | 34 ++++++++++++------------ src/swrenderer/drawers/r_draw.h | 9 +++---- src/swrenderer/things/r_playersprite.cpp | 10 +------ src/swrenderer/things/r_sprite.cpp | 9 +------ src/v_draw.cpp | 1 - 5 files changed, 23 insertions(+), 40 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 88a477df91..885c3fc36f 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -130,7 +130,6 @@ namespace swrenderer const uint8_t *ds_source; bool ds_source_mipmapped; int ds_color; - bool drawer_needs_pal_input; } namespace @@ -242,19 +241,18 @@ namespace swrenderer { using namespace drawerargs; - dc_fcolormap = base_colormap; if (r_swtruecolor) { - dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; - dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; - dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; - dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; - dc_shade_constants.fade_red = dc_fcolormap->Fade.r; - dc_shade_constants.fade_green = dc_fcolormap->Fade.g; - dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; - dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; - dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; - dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); + dc_shade_constants.light_red = base_colormap->Color.r * 256 / 255; + dc_shade_constants.light_green = base_colormap->Color.g * 256 / 255; + dc_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; + dc_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; + dc_shade_constants.fade_red = base_colormap->Fade.r; + dc_shade_constants.fade_green = base_colormap->Fade.g; + dc_shade_constants.fade_blue = base_colormap->Fade.b; + dc_shade_constants.fade_alpha = base_colormap->Fade.a; + dc_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); dc_colormap = base_colormap->Maps; dc_light = LIGHTSCALE(light, shade); } @@ -687,7 +685,7 @@ namespace swrenderer } } - bool DrawerStyle::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap) + bool DrawerStyle::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) { using namespace drawerargs; @@ -755,11 +753,13 @@ namespace swrenderer basecolormap = &ShadeFakeColormap[16 - alpha]; if (cameraLight->fixedlightlev >= 0 && cameraLight->fixedcolormap == NULL) { - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + fixed_t shade = shadedlightshade; + if (shade == 0) FIXEDLIGHT2SHADE(cameraLight->fixedlightlev); + R_SetColorMapLight(basecolormap, 0, shade); } else { - R_SetColorMapLight(basecolormap, 0, 0); + R_SetColorMapLight(basecolormap, 0, shadedlightshade); } return true; } @@ -796,9 +796,9 @@ namespace swrenderer return true; } - bool DrawerStyle::SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap) + bool DrawerStyle::SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) { - return SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap); + return SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap, shadedlightshade); } DrawerFunc DrawerStyle::GetTransMaskDrawer() diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index f4dd0378eb..20e77d3460 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -37,7 +37,6 @@ namespace swrenderer { extern int dc_pitch; extern lighttable_t *dc_colormap; - extern FSWColormap *dc_fcolormap; extern ShadeConstants dc_shade_constants; extern fixed_t dc_light; extern int dc_x; @@ -67,8 +66,6 @@ namespace swrenderer extern TriLight *dc_lights; extern int dc_num_lights; - extern bool drawer_needs_pal_input; - extern uint32_t dc_wall_texturefrac[4]; extern uint32_t dc_wall_iscale[4]; extern uint8_t *dc_wall_colormap[4]; @@ -192,8 +189,8 @@ namespace swrenderer spanfunc = &SWPixelFormatDrawers::DrawSpan; } - bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap); - bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap); + bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); + bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); void SetSpanStyle(bool masked, bool additive, fixed_t alpha); void DrawMaskedColumn(int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); @@ -211,5 +208,7 @@ namespace swrenderer bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); static fixed_t GetAlpha(int type, fixed_t alpha); + + bool drawer_needs_pal_input = false; }; } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 2380c7a452..3e7f745646 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -592,15 +592,7 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(Light.BaseColormap); DrawerStyle drawerstyle; - bool visible = drawerstyle.SetPatchStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap); - - if (RenderStyle == LegacyRenderStyles[STYLE_Shaded]) - { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but - // it is the brightest one. We need to get back to the proper light level for - // this sprite. - R_SetColorMapLight(drawerargs::dc_fcolormap, 0, Light.ColormapNum << FRACBITS); - } - + bool visible = drawerstyle.SetPatchStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap, Light.ColormapNum << FRACBITS); if (!visible) return; diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 575cb8369d..ac2cedb2a5 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -251,14 +251,7 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(vis->Light.BaseColormap); DrawerStyle drawerstyle; - bool visible = drawerstyle.SetPatchStyle(vis->RenderStyle, vis->Alpha, vis->Translation, vis->FillColor, basecolormap); - - if (vis->RenderStyle == LegacyRenderStyles[STYLE_Shaded]) - { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but - // it is the brightest one. We need to get back to the proper light level for - // this sprite. - R_SetColorMapLight(drawerargs::dc_fcolormap, 0, vis->Light.ColormapNum << FRACBITS); - } + bool visible = drawerstyle.SetPatchStyle(vis->RenderStyle, vis->Alpha, vis->Translation, vis->FillColor, basecolormap, vis->Light.ColormapNum << FRACBITS); if (visible) { diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 6e4f417287..a3d206726b 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -197,7 +197,6 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) R_SetTranslationMap(identitymap); } - CameraLight::Instance()->fixedcolormap = dc_fcolormap; bool visible; FDynamicColormap *basecolormap = nullptr; DrawerStyle drawerstyle; From bd35d1d39f24b83917f2436d25ccb3e2e1ddf37d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 28 Jan 2017 08:19:21 +0100 Subject: [PATCH 771/912] Remove ds_fcolormap --- src/swrenderer/drawers/r_draw.cpp | 22 ++++++++++------------ src/swrenderer/drawers/r_draw.h | 1 - 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 885c3fc36f..ae2e9343a1 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -116,7 +116,6 @@ namespace swrenderer int ds_x1; int ds_x2; lighttable_t * ds_colormap; - FSWColormap *ds_fcolormap; ShadeConstants ds_shade_constants; dsfixed_t ds_light; dsfixed_t ds_xfrac; @@ -266,19 +265,18 @@ namespace swrenderer { using namespace drawerargs; - ds_fcolormap = base_colormap; if (r_swtruecolor) { - ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; - ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; - ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; - ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; - ds_shade_constants.fade_red = ds_fcolormap->Fade.r; - ds_shade_constants.fade_green = ds_fcolormap->Fade.g; - ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; - ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; - ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; - ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); + ds_shade_constants.light_red = base_colormap->Color.r * 256 / 255; + ds_shade_constants.light_green = base_colormap->Color.g * 256 / 255; + ds_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; + ds_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; + ds_shade_constants.fade_red = base_colormap->Fade.r; + ds_shade_constants.fade_green = base_colormap->Fade.g; + ds_shade_constants.fade_blue = base_colormap->Fade.b; + ds_shade_constants.fade_alpha = base_colormap->Fade.a; + ds_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); ds_colormap = base_colormap->Maps; ds_light = LIGHTSCALE(light, shade); } diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 20e77d3460..7264643f7b 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -80,7 +80,6 @@ namespace swrenderer extern int ds_x1; extern int ds_x2; extern lighttable_t * ds_colormap; - extern FSWColormap *ds_fcolormap; extern ShadeConstants ds_shade_constants; extern dsfixed_t ds_light; extern dsfixed_t ds_xfrac; From 8fceb60532fd65e86ffaf76687009dc1dac2191f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 28 Jan 2017 08:40:31 +0100 Subject: [PATCH 772/912] Removed unused variable --- src/swrenderer/drawers/r_draw.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index ae2e9343a1..de5b34deae 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -74,7 +74,6 @@ namespace swrenderer { int dc_pitch; lighttable_t *dc_colormap; - FSWColormap *dc_fcolormap; ShadeConstants dc_shade_constants; fixed_t dc_light; int dc_x; @@ -292,7 +291,6 @@ namespace swrenderer if (r_swtruecolor) { - dc_fcolormap = nullptr; dc_colormap = nullptr; dc_translation = translation; dc_shade_constants.light_red = 256; @@ -309,7 +307,6 @@ namespace swrenderer } else { - dc_fcolormap = nullptr; dc_colormap = translation; } } From 2f9453bc86a41621685660f8f150520d8e78d61f Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 28 Jan 2017 09:53:24 -0500 Subject: [PATCH 773/912] - Fully implemented Graf's level.PreserveSectorColor() changes into the software renderers. --- src/polyrenderer/scene/poly_playersprite.cpp | 10 +++++----- src/swrenderer/line/r_line.cpp | 3 +-- src/swrenderer/line/r_renderdrawsegment.cpp | 7 +++---- src/swrenderer/scene/r_light.cpp | 8 ++++---- src/swrenderer/scene/r_opaque_pass.cpp | 5 ++--- src/swrenderer/scene/r_translucent_pass.cpp | 2 -- src/swrenderer/things/r_decal.cpp | 7 +++---- src/swrenderer/things/r_particle.cpp | 2 -- src/swrenderer/things/r_playersprite.cpp | 1 - src/swrenderer/things/r_sprite.cpp | 2 -- src/swrenderer/things/r_visiblesprite.cpp | 2 -- src/swrenderer/things/r_voxel.cpp | 2 -- src/swrenderer/things/r_wallsprite.cpp | 5 ++--- 13 files changed, 20 insertions(+), 36 deletions(-) diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index dd8fc48509..b4b5189331 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -30,11 +30,11 @@ #include "d_player.h" #include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" +#include "g_levellocals.h" EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_deathcamera) EXTERN_CVAR(Bool, st_scale) -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) EXTERN_CVAR(Bool, r_shadercolormaps) void RenderPolyPlayerSprites::Render() @@ -269,12 +269,12 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa } if (swrenderer::fixedlightlev >= 0) { - BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; + BaseColormap = (!level.PreserveSectorColor()) ? &FullNormalLight : mybasecolormap; ColormapNum = swrenderer::fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && sprite->GetState()->GetFullbright()) { // full bright - BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap + BaseColormap = (!level.PreserveSectorColor()) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap ColormapNum = 0; } else @@ -334,9 +334,9 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa noaccel = true; } // [SP] If emulating GZDoom fullbright, disable acceleration - if (r_fullbrightignoresectorcolor && cameraLight->fixedlightlev >= 0) + if (!level.PreserveSectorColor() && cameraLight->fixedlightlev >= 0) mybasecolormap = &FullNormalLight; - if (r_fullbrightignoresectorcolor && !foggy && sprite->GetState()->GetFullbright()) + if (!level.PreserveSectorColor() && !foggy && sprite->GetState()->GetFullbright()) mybasecolormap = &FullNormalLight; colormap_to_use = mybasecolormap; } diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 09b1965e43..e0975a0afd 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -52,7 +52,6 @@ CVAR(Bool, r_fogboundary, true, 0) CVAR(Bool, r_drawmirrors, true, 0) -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { @@ -935,7 +934,7 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + R_SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 214ed52b11..a381781c73 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -41,8 +41,7 @@ #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/scene/r_viewport.h" - -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); +#include "g_levellocals.h" namespace swrenderer { @@ -142,7 +141,7 @@ namespace swrenderer rw_scalestep = ds->iscalestep; if (cameraLight->fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + R_SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); @@ -445,7 +444,7 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + R_SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index 851fab7500..f214e52613 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -32,9 +32,9 @@ #include "d_player.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/scene/r_viewport.h" +#include "g_levellocals.h" CVAR(Bool, r_shadercolormaps, true, CVAR_ARCHIVE) -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { @@ -75,7 +75,7 @@ namespace swrenderer { fixedlightlev = player->fixedlightlevel * 256; // [SP] Emulate GZDoom's light-amp goggles. - if (r_fullbrightignoresectorcolor && fixedlightlev >= 0) + if (!level.PreserveSectorColor() && fixedlightlev >= 0) { fixedcolormap = &FullNormalLight; } @@ -194,12 +194,12 @@ namespace swrenderer } else if (cameraLight->fixedlightlev >= 0) { - BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap; + BaseColormap = (!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap; ColormapNum = cameraLight->fixedlightlev >> COLORMAPSHIFT; } else if (fullbright) { - BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap; + BaseColormap = (!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap; ColormapNum = 0; } else diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 2391d05428..2cb5a5993d 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -62,7 +62,6 @@ #include "r_data/colormaps.h" #include "g_levellocals.h" -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); EXTERN_CVAR(Bool, r_drawvoxels); namespace swrenderer @@ -494,7 +493,7 @@ namespace swrenderer } else { - basecolormap = (r_fullbrightignoresectorcolor && cameraLight->fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; + basecolormap = (!level.PreserveSectorColor() && cameraLight->fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; } portal = frontsector->ValidatePortal(sector_t::ceiling); @@ -532,7 +531,7 @@ namespace swrenderer } else { - basecolormap = (r_fullbrightignoresectorcolor && cameraLight->fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; + basecolormap = (!level.PreserveSectorColor() && cameraLight->fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; } // killough 3/7/98: Add (x,y) offsets to flats, add deep water check diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 67b0600955..69444f560f 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -43,8 +43,6 @@ EXTERN_CVAR(Int, r_drawfuzz) EXTERN_CVAR(Bool, r_drawvoxels) EXTERN_CVAR(Bool, r_blendmethod) -CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); - namespace swrenderer { RenderTranslucentPass *RenderTranslucentPass::Instance() diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index bf47283152..37c83faa5d 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -42,8 +42,7 @@ #include "swrenderer/scene/r_light.h" #include "swrenderer/things/r_wallsprite.h" #include "swrenderer/r_memory.h" - -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); +#include "g_levellocals.h" namespace swrenderer { @@ -255,11 +254,11 @@ namespace swrenderer light = lightleft + (x1 - savecoord.sx1) * lightstep; cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + R_SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != NULL) R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); + R_SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 5d978fd9f5..a63028e84f 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -56,8 +56,6 @@ #include "swrenderer/drawers/r_draw_pal.h" #include "swrenderer/r_memory.h" -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); - namespace swrenderer { void RenderParticle::Project(particle_t *particle, const sector_t *sector, int shade, WaterFakeSide fakeside, bool foggy) diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 3e7f745646..ac703ab319 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -61,7 +61,6 @@ EXTERN_CVAR(Bool, st_scale) EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_deathcamera) EXTERN_CVAR(Bool, r_shadercolormaps) -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index ac2cedb2a5..531c83b1ae 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -56,8 +56,6 @@ #include "swrenderer/things/r_sprite.h" #include "swrenderer/r_memory.h" -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) - namespace swrenderer { void RenderSprite::Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap) diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index b2c0b054a1..14406be39b 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -36,8 +36,6 @@ #include "swrenderer/scene/r_viewport.h" #include "swrenderer/r_memory.h" -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); - namespace swrenderer { void VisibleSprite::Render() diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index aa6b8206ea..73509dbac7 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -43,8 +43,6 @@ #include "swrenderer/scene/r_light.h" #include "swrenderer/r_memory.h" -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) - namespace swrenderer { void RenderVoxel::Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap) diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index f599098d37..a6a6000583 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -57,8 +57,7 @@ #include "swrenderer/line/r_wallsetup.h" #include "swrenderer/line/r_walldraw.h" #include "swrenderer/r_memory.h" - -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); +#include "g_levellocals.h" namespace swrenderer { @@ -190,7 +189,7 @@ namespace swrenderer else if (cameraLight->fixedcolormap != NULL) R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) - R_SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); + R_SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; From 4afac0f2ccecb57e1cf4f996263b1a30dd5ad70c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 28 Jan 2017 16:36:39 +0100 Subject: [PATCH 774/912] Force all drawing to go through DrawerStyle --- src/swrenderer/drawers/r_draw.cpp | 24 +++++----- src/swrenderer/drawers/r_draw.h | 50 ++++++++++++++++----- src/swrenderer/line/r_fogboundary.cpp | 10 ++--- src/swrenderer/line/r_fogboundary.h | 3 ++ src/swrenderer/line/r_line.cpp | 9 ++-- src/swrenderer/line/r_renderdrawsegment.cpp | 10 ++--- src/swrenderer/line/r_walldraw.cpp | 12 ++--- src/swrenderer/plane/r_flatplane.cpp | 14 +++--- src/swrenderer/plane/r_flatplane.h | 4 +- src/swrenderer/plane/r_skyplane.cpp | 8 ++-- src/swrenderer/plane/r_skyplane.h | 3 ++ src/swrenderer/plane/r_slopeplane.cpp | 12 ++--- src/swrenderer/plane/r_slopeplane.h | 4 +- src/swrenderer/plane/r_visibleplane.cpp | 5 +-- src/swrenderer/things/r_decal.cpp | 21 +++++---- src/swrenderer/things/r_playersprite.cpp | 4 +- src/swrenderer/things/r_sprite.cpp | 5 +-- src/swrenderer/things/r_voxel.cpp | 10 ++--- src/swrenderer/things/r_voxel.h | 4 +- src/swrenderer/things/r_wallsprite.cpp | 11 ++--- src/v_draw.cpp | 18 ++++---- 21 files changed, 145 insertions(+), 96 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index de5b34deae..25c74f9c58 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -137,7 +137,7 @@ namespace swrenderer SWTruecolorDrawers tc_drawers; } - SWPixelFormatDrawers *R_Drawers() + SWPixelFormatDrawers *DrawerStyle::Drawers() const { return active_drawers; } @@ -235,7 +235,7 @@ namespace swrenderer } } - void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade) + void DrawerStyle::SetColorMapLight(FSWColormap *base_colormap, float light, int shade) { using namespace drawerargs; @@ -260,7 +260,7 @@ namespace swrenderer } } - void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade) + void DrawerStyle::SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade) { using namespace drawerargs; @@ -285,7 +285,7 @@ namespace swrenderer } } - void R_SetTranslationMap(lighttable_t *translation) + void DrawerStyle::SetTranslationMap(lighttable_t *translation) { using namespace drawerargs; @@ -311,7 +311,7 @@ namespace swrenderer } } - void R_SetSpanTexture(FTexture *tex) + void DrawerStyle::SetSpanTexture(FTexture *tex) { using namespace drawerargs; @@ -331,9 +331,9 @@ namespace swrenderer ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; } - void R_SetSpanColormap(FDynamicColormap *colormap, int shade) + void DrawerStyle::SetSpanColormap(FDynamicColormap *colormap, int shade) { - R_SetDSColorMapLight(colormap, 0, shade); + SetDSColorMapLight(colormap, 0, shade); } void R_UpdateFuzzPos() @@ -419,7 +419,7 @@ namespace swrenderer else if (dc_iscale < 0) dc_count = MIN(dc_count, (dc_texturefrac - dc_iscale) / (-dc_iscale)); - (R_Drawers()->*colfunc)(); + (Drawers()->*colfunc)(); } span++; } @@ -536,7 +536,7 @@ namespace swrenderer double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight(); dc_texturefrac = (uint32_t)(v * (1 << 30)); - (R_Drawers()->*colfunc)(); + (Drawers()->*colfunc)(); } span++; } @@ -750,11 +750,11 @@ namespace swrenderer { fixed_t shade = shadedlightshade; if (shade == 0) FIXEDLIGHT2SHADE(cameraLight->fixedlightlev); - R_SetColorMapLight(basecolormap, 0, shade); + SetColorMapLight(basecolormap, 0, shade); } else { - R_SetColorMapLight(basecolormap, 0, shadedlightshade); + SetColorMapLight(basecolormap, 0, shadedlightshade); } return true; } @@ -781,7 +781,7 @@ namespace swrenderer // dc_srccolor is used by the R_Fill* routines. It is premultiplied // with the alpha. dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; - R_SetColorMapLight(&identitycolormap, 0, 0); + SetColorMapLight(&identitycolormap, 0, 0); } if (!DrawerStyle::SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 7264643f7b..bb3158331e 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -159,8 +159,6 @@ namespace swrenderer typedef void(SWPixelFormatDrawers::*DrawerFunc)(); - SWPixelFormatDrawers *R_Drawers(); - void R_InitColumnDrawers(); void R_InitShadeMaps(); void R_InitFuzzTable(int fuzzoff); @@ -168,14 +166,6 @@ namespace swrenderer void R_UpdateFuzzPos(); - // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) - void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade); - void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); - void R_SetTranslationMap(lighttable_t *translation); - - void R_SetSpanTexture(FTexture *tex); - void R_SetSpanColormap(FDynamicColormap *colormap, int shade); - class DrawerStyle { public: @@ -194,6 +184,14 @@ namespace swrenderer void DrawMaskedColumn(int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); + // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) + void SetColorMapLight(FSWColormap *base_colormap, float light, int shade); + void SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); + void SetTranslationMap(lighttable_t *translation); + + void SetSpanTexture(FTexture *tex); + void SetSpanColormap(FDynamicColormap *colormap, int shade); + DrawerFunc GetTransMaskDrawer(); DrawerFunc colfunc; @@ -202,6 +200,38 @@ namespace swrenderer DrawerFunc transcolfunc; DrawerFunc spanfunc; + void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) + { + Drawers()->DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); + } + + void DrawFogBoundaryLine(int y, int x1, int x2) + { + Drawers()->DrawFogBoundaryLine(y, x1, x2); + } + + void DrawColoredSpan(int y, int x1, int x2) + { + Drawers()->DrawColoredSpan(y, x1, x2); + } + + void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) + { + Drawers()->DrawSingleSkyColumn(solid_top, solid_bottom, fadeSky); + } + + void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) + { + Drawers()->DrawDoubleSkyColumn(solid_top, solid_bottom, fadeSky); + } + + void FillColumn() + { + Drawers()->FillColumn(); + } + + SWPixelFormatDrawers *Drawers() const; + private: void DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked); diff --git a/src/swrenderer/line/r_fogboundary.cpp b/src/swrenderer/line/r_fogboundary.cpp index d0a36f948b..f9928aa183 100644 --- a/src/swrenderer/line/r_fogboundary.cpp +++ b/src/swrenderer/line/r_fogboundary.cpp @@ -64,7 +64,7 @@ namespace swrenderer fillshort(spanend + t2, b2 - t2, x); } - R_SetColorMapLight(basecolormap, (float)light, wallshade); + drawerstyle.SetColorMapLight(basecolormap, (float)light, wallshade); uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); @@ -91,7 +91,7 @@ namespace swrenderer fillshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; - R_SetColorMapLight(basecolormap, (float)light, wallshade); + drawerstyle.SetColorMapLight(basecolormap, (float)light, wallshade); fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); } else @@ -102,13 +102,13 @@ namespace swrenderer while (t2 < stop) { int y = t2++; - R_Drawers()->DrawFogBoundaryLine(y, xr, spanend[y]); + drawerstyle.DrawFogBoundaryLine(y, xr, spanend[y]); } stop = MAX(b1, t2); while (b2 > stop) { int y = --b2; - R_Drawers()->DrawFogBoundaryLine(y, xr, spanend[y]); + drawerstyle.DrawFogBoundaryLine(y, xr, spanend[y]); } } else @@ -142,7 +142,7 @@ namespace swrenderer { for (; y < y2; ++y) { - R_Drawers()->DrawFogBoundaryLine(y, x1, spanend[y]); + drawerstyle.DrawFogBoundaryLine(y, x1, spanend[y]); } } } diff --git a/src/swrenderer/line/r_fogboundary.h b/src/swrenderer/line/r_fogboundary.h index cfa0e139ab..92a42c817e 100644 --- a/src/swrenderer/line/r_fogboundary.h +++ b/src/swrenderer/line/r_fogboundary.h @@ -13,6 +13,8 @@ #pragma once +#include "swrenderer/drawers/r_draw.h" + namespace swrenderer { class RenderFogBoundary @@ -24,5 +26,6 @@ namespace swrenderer void RenderSection(int y, int y2, int x1); short spanend[MAXHEIGHT]; + DrawerStyle drawerstyle; }; } diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index e0975a0afd..b4a43d7607 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -932,11 +932,13 @@ namespace swrenderer double yscale; fixed_t xoffset = rw_offset; + DrawerStyle drawerstyle; + CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - R_SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerstyle.SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) - R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); // clip wall to the floor and ceiling auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; @@ -1044,7 +1046,6 @@ namespace swrenderer rw_offset = -rw_offset; } - DrawerStyle drawerstyle; RenderWallPart renderWallpart; renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, rw_midtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } @@ -1082,7 +1083,6 @@ namespace swrenderer rw_offset = -rw_offset; } - DrawerStyle drawerstyle; RenderWallPart renderWallpart; renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, rw_toptexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } @@ -1123,7 +1123,6 @@ namespace swrenderer rw_offset = -rw_offset; } - DrawerStyle drawerstyle; RenderWallPart renderWallpart; renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, rw_bottomtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index a381781c73..7c81ec8ba9 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -141,9 +141,9 @@ namespace swrenderer rw_scalestep = ds->iscalestep; if (cameraLight->fixedlightlev >= 0) - R_SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerstyle.SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) - R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); // find positioning texheight = tex->GetScaledHeightDouble(); @@ -272,7 +272,7 @@ namespace swrenderer { if (cameraLight->fixedcolormap == nullptr && cameraLight->fixedlightlev < 0) { - R_SetColorMapLight(basecolormap, rw_light, wallshade); + drawerstyle.SetColorMapLight(basecolormap, rw_light, wallshade); } fixed_t iscale = xs_Fix<16>::ToFix(MaskedSWall[x] * MaskedScaleY); @@ -444,9 +444,9 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - R_SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerstyle.SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) - R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index da5e251cbd..8c17fa6214 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -250,7 +250,7 @@ namespace swrenderer dc_iscale = sampler.uv_step; dc_texturefrac = sampler.uv_pos; dc_textureheight = sampler.height; - (R_Drawers()->*draw1column)(); + (drawerstyle.Drawers()->*draw1column)(); uint64_t step64 = sampler.uv_step; uint64_t pos64 = sampler.uv_pos; @@ -269,7 +269,7 @@ namespace swrenderer dc_count = count; dc_iscale = sampler.uv_step; dc_texturefrac = sampler.uv_pos; - (R_Drawers()->*draw1column)(); + (drawerstyle.Drawers()->*draw1column)(); uint64_t step64 = sampler.uv_step; uint64_t pos64 = sampler.uv_pos; @@ -295,7 +295,7 @@ namespace swrenderer dc_count = count; dc_iscale = sampler.uv_step; dc_texturefrac = uv_pos; - (R_Drawers()->*draw1column)(); + (drawerstyle.Drawers()->*draw1column)(); left -= count; uv_pos += sampler.uv_step * count; @@ -341,9 +341,9 @@ namespace swrenderer } if (cameraLight->fixedcolormap) - R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); else - R_SetColorMapLight(basecolormap, 0, 0); + drawerstyle.SetColorMapLight(basecolormap, 0, 0); float dx = WallC.tright.X - WallC.tleft.X; float dy = WallC.tright.Y - WallC.tleft.Y; @@ -362,7 +362,7 @@ namespace swrenderer continue; if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); + drawerstyle.SetColorMapLight(basecolormap, light, wallshade); if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 2beae7756f..54850bc6fc 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -44,7 +44,7 @@ namespace swrenderer { - void RenderFlatPlane::Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap) + void RenderFlatPlane::Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap, FTexture *texture) { using namespace drawerargs; @@ -53,6 +53,8 @@ namespace swrenderer return; } + drawerstyle.SetSpanTexture(texture); + double planeang = (pl->xform.Angle + pl->xform.baseAngle).Radians(); double xstep, ystep, leftxfrac, leftyfrac, rightxfrac, rightyfrac; double x; @@ -111,12 +113,12 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerstyle.SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); plane_shade = false; } else if (cameraLight->fixedcolormap) { - R_SetDSColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerstyle.SetDSColorMapLight(cameraLight->fixedcolormap, 0, 0); plane_shade = false; } else @@ -184,7 +186,7 @@ namespace swrenderer if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - R_SetDSColorMapLight(basecolormap, (float)(GlobVis * fabs(CenterY - y)), planeshade); + drawerstyle.SetDSColorMapLight(basecolormap, (float)(GlobVis * fabs(CenterY - y)), planeshade); } if (r_dynlights) @@ -255,7 +257,7 @@ namespace swrenderer ds_x1 = x1; ds_x2 = x2; - (R_Drawers()->*drawerstyle.spanfunc)(); + (drawerstyle.Drawers()->*drawerstyle.spanfunc)(); } void RenderFlatPlane::StepColumn() @@ -317,6 +319,6 @@ namespace swrenderer void RenderColoredPlane::RenderLine(int y, int x1, int x2) { - R_Drawers()->DrawColoredSpan(y, x1, x2); + drawerstyle.DrawColoredSpan(y, x1, x2); } } diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index 4dd189dabc..87efcfdeb2 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -23,7 +23,7 @@ namespace swrenderer class RenderFlatPlane : PlaneRenderer { public: - void Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap); + void Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap, FTexture *texture); static void SetupSlope(); @@ -54,5 +54,7 @@ namespace swrenderer private: void RenderLine(int y, int x1, int x2) override; + + DrawerStyle drawerstyle; }; } diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index d07c25b445..72ea005591 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -151,13 +151,13 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedcolormap) { - R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); } else { fakefixed = true; cameraLight->fixedcolormap = &NormalLight; - R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); } DrawSky(pl); @@ -228,9 +228,9 @@ namespace swrenderer bool fadeSky = (r_skymode == 2 && !(level.flags & LEVEL_FORCETILEDSKY)); if (!backskytex) - R_Drawers()->DrawSingleSkyColumn(solid_top, solid_bottom, fadeSky); + drawerstyle.DrawSingleSkyColumn(solid_top, solid_bottom, fadeSky); else - R_Drawers()->DrawDoubleSkyColumn(solid_top, solid_bottom, fadeSky); + drawerstyle.DrawDoubleSkyColumn(solid_top, solid_bottom, fadeSky); } void RenderSkyPlane::DrawSkyColumn(int start_x, int y1, int y2, int columns) diff --git a/src/swrenderer/plane/r_skyplane.h b/src/swrenderer/plane/r_skyplane.h index 8ea638a27b..da6fd689ef 100644 --- a/src/swrenderer/plane/r_skyplane.h +++ b/src/swrenderer/plane/r_skyplane.h @@ -14,6 +14,7 @@ #pragma once #include "r_visibleplane.h" +#include "swrenderer/drawers/r_draw.h" namespace swrenderer { @@ -36,5 +37,7 @@ namespace swrenderer fixed_t backcyl = 0; double skymid = 0.0; angle_t skyangle = 0; + + DrawerStyle drawerstyle; }; } diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 510697eeb1..03817b3ae6 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -48,7 +48,7 @@ namespace swrenderer { - void RenderSlopePlane::Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap) + void RenderSlopePlane::Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap, FTexture *texture) { using namespace drawerargs; @@ -72,6 +72,8 @@ namespace swrenderer return; } + drawerstyle.SetSpanTexture(texture); + lxscale = _xscale * ifloatpow2[ds_xbits]; lyscale = _yscale * ifloatpow2[ds_ybits]; xscale = 64.f / lxscale; @@ -153,17 +155,17 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerstyle.SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); plane_shade = false; } else if (cameraLight->fixedcolormap) { - R_SetDSColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerstyle.SetDSColorMapLight(cameraLight->fixedcolormap, 0, 0); plane_shade = false; } else { - R_SetDSColorMapLight(basecolormap, 0, 0); + drawerstyle.SetDSColorMapLight(basecolormap, 0, 0); plane_shade = true; planeshade = LIGHT2SHADE(pl->lightlevel); } @@ -183,6 +185,6 @@ namespace swrenderer void RenderSlopePlane::RenderLine(int y, int x1, int x2) { - R_Drawers()->DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); + drawerstyle.DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); } } diff --git a/src/swrenderer/plane/r_slopeplane.h b/src/swrenderer/plane/r_slopeplane.h index 253e3b7cc3..8a95967432 100644 --- a/src/swrenderer/plane/r_slopeplane.h +++ b/src/swrenderer/plane/r_slopeplane.h @@ -14,13 +14,14 @@ #pragma once #include "r_planerenderer.h" +#include "swrenderer/drawers/r_draw.h" namespace swrenderer { class RenderSlopePlane : PlaneRenderer { public: - void Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap); + void Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap, FTexture *texture); private: void RenderLine(int y, int x1, int x2) override; @@ -32,5 +33,6 @@ namespace swrenderer fixed_t pviewx, pviewy; fixed_t xscale, yscale; FDynamicColormap *basecolormap; + DrawerStyle drawerstyle; }; } diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index 3a7df8300d..8f6d93d378 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -114,19 +114,18 @@ namespace swrenderer { // Don't waste time on a masked texture if it isn't really masked. masked = false; } - R_SetSpanTexture(tex); double xscale = xform.xScale * tex->Scale.X; double yscale = xform.yScale * tex->Scale.Y; if (!height.isSlope() && !tilt) { RenderFlatPlane renderer; - renderer.Render(this, xscale, yscale, alpha, additive, masked, colormap); + renderer.Render(this, xscale, yscale, alpha, additive, masked, colormap, tex); } else { RenderSlopePlane renderer; - renderer.Render(this, xscale, yscale, alpha, additive, masked, colormap); + renderer.Render(this, xscale, yscale, alpha, additive, masked, colormap, tex); } } NetUpdate(); diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 37c83faa5d..1e15c55926 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -252,15 +252,8 @@ namespace swrenderer } light = lightleft + (x1 - savecoord.sx1) * lightstep; + cameraLight = CameraLight::Instance(); - if (cameraLight->fixedlightlev >= 0) - R_SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); - else if (cameraLight->fixedcolormap != NULL) - R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); - else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - R_SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : usecolormap, 0, 0); - else - calclighting = true; // Draw it bool sprflipvert; @@ -281,6 +274,16 @@ namespace swrenderer int x = x1; DrawerStyle drawerstyle; + + if (cameraLight->fixedlightlev >= 0) + drawerstyle.SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + else if (cameraLight->fixedcolormap != NULL) + drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) + drawerstyle.SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : usecolormap, 0, 0); + else + calclighting = true; + bool visible = drawerstyle.SetPatchStyle(decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor, basecolormap); // R_SetPatchStyle can modify basecolormap. @@ -295,7 +298,7 @@ namespace swrenderer { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, light, wallshade); + drawerstyle.SetColorMapLight(usecolormap, light, wallshade); } DrawColumn(drawerstyle, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index ac703ab319..31c31973b6 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -586,11 +586,11 @@ namespace swrenderer return; } - R_SetColorMapLight(Light.BaseColormap, 0, Light.ColormapNum << FRACBITS); + DrawerStyle drawerstyle; + drawerstyle.SetColorMapLight(Light.BaseColormap, 0, Light.ColormapNum << FRACBITS); FDynamicColormap *basecolormap = static_cast(Light.BaseColormap); - DrawerStyle drawerstyle; bool visible = drawerstyle.SetPatchStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap, Light.ColormapNum << FRACBITS); if (!visible) return; diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 531c83b1ae..292307389a 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -243,12 +243,11 @@ namespace swrenderer return; } - fixed_t centeryfrac = FLOAT2FIXED(CenterY); - R_SetColorMapLight(vis->Light.BaseColormap, 0, vis->Light.ColormapNum << FRACBITS); + DrawerStyle drawerstyle; + drawerstyle.SetColorMapLight(vis->Light.BaseColormap, 0, vis->Light.ColormapNum << FRACBITS); FDynamicColormap *basecolormap = static_cast(vis->Light.BaseColormap); - DrawerStyle drawerstyle; bool visible = drawerstyle.SetPatchStyle(vis->RenderStyle, vis->Alpha, vis->Translation, vis->FillColor, basecolormap, vis->Light.ColormapNum << FRACBITS); if (visible) diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 73509dbac7..0cd4d4d2b4 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -184,9 +184,9 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(sprite->Light.BaseColormap); - R_SetColorMapLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); - DrawerStyle drawerstyle; + drawerstyle.SetColorMapLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); + bool visible = drawerstyle.SetPatchStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); if (!visible) return; @@ -285,7 +285,7 @@ namespace swrenderer voxel_pos.Y += dirY.X * x + dirY.Y * y; voxel_pos.Z += dirZ * z; - FillBox(voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); + FillBox(drawerstyle, voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); } } } @@ -308,7 +308,7 @@ namespace swrenderer return (kvxslab_t*)(((uint8_t*)slab) + 3 + slab->zleng); } - void RenderVoxel::FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) + void RenderVoxel::FillBox(DrawerStyle &drawerstyle, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) { double viewX, viewY, viewZ; if (viewspace) @@ -354,7 +354,7 @@ namespace swrenderer dc_dest = dc_destorg + (dc_pitch * columnY1 + x) * pixelsize; dc_color = color; dc_count = columnY2 - columnY1; - R_Drawers()->FillColumn(); + drawerstyle.FillColumn(); } } } diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index 838bcf61d4..26a84de5ef 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -31,6 +31,8 @@ struct FVoxel; namespace swrenderer { + class DrawerStyle; + // [RH] A c-buffer. Used for keeping track of offscreen voxel spans. struct FCoverageBuffer { @@ -81,7 +83,7 @@ namespace swrenderer enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; - static void FillBox(DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); + static void FillBox(DrawerStyle &drawerstyle, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); static kvxslab_t *GetSlabStart(const FVoxelMipLevel &mip, int x, int y); static kvxslab_t *GetSlabEnd(const FVoxelMipLevel &mip, int x, int y); diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index a6a6000583..78ab9f6403 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -178,6 +178,8 @@ namespace swrenderer rereadcolormap = false; } + DrawerStyle drawerstyle; + int shade = LIGHT2SHADE(spr->sector->lightlevel + R_ActualExtraLight(spr->foggy)); double GlobVis = LightVisibility::Instance()->WallGlobVis(); float lightleft = float(GlobVis / spr->wallc.sz1); @@ -185,11 +187,11 @@ namespace swrenderer float light = lightleft + (x1 - spr->wallc.sx1) * lightstep; CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerstyle.SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != NULL) - R_SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) - R_SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : usecolormap, 0, 0); + drawerstyle.SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; @@ -212,7 +214,6 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(spr->Light.BaseColormap); - DrawerStyle drawerstyle; bool visible = drawerstyle.SetPatchStyle(spr->RenderStyle, spr->Alpha, spr->Translation, spr->FillColor, basecolormap); // R_SetPatchStyle can modify basecolormap. @@ -233,7 +234,7 @@ namespace swrenderer { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, light, shade); + drawerstyle.SetColorMapLight(usecolormap, light, shade); } if (!translucentPass->ClipSpriteColumnWithPortals(x, spr)) DrawColumn(drawerstyle, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index a3d206726b..754f1f26c9 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -185,21 +185,22 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) translation = parms.remap->Remap; } + DrawerStyle drawerstyle; + if (translation != NULL) { - R_SetTranslationMap((lighttable_t *)translation); + drawerstyle.SetTranslationMap((lighttable_t *)translation); } else { if (r_swtruecolor) - R_SetTranslationMap(nullptr); + drawerstyle.SetTranslationMap(nullptr); else - R_SetTranslationMap(identitymap); + drawerstyle.SetTranslationMap(identitymap); } bool visible; FDynamicColormap *basecolormap = nullptr; - DrawerStyle drawerstyle; if (r_swtruecolor) visible = drawerstyle.SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); else @@ -1388,11 +1389,12 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, sinrot = sin(rotation.Radians()); // Setup constant texture mapping parameters. - R_SetSpanTexture(tex); + DrawerStyle drawerstyle; + drawerstyle.SetSpanTexture(tex); if (colormap) - R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); + drawerstyle.SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else - R_SetSpanColormap(&identitycolormap, 0); + drawerstyle.SetSpanColormap(&identitycolormap, 0); if (ds_xbits != 0) { scalex = double(1u << (32 - ds_xbits)) / scalex; @@ -1491,7 +1493,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, ds_xfrac = xs_RoundToInt(tex.X * scalex); ds_yfrac = xs_RoundToInt(tex.Y * scaley); - R_Drawers()->DrawSpan(); + (drawerstyle.Drawers()->*drawerstyle.spanfunc)(); #endif } x += xinc; From 2fb82aaa9f9719f31f88ad9dabc464a625a7bf0f Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 28 Jan 2017 17:37:57 -0500 Subject: [PATCH 775/912] Revert "- Fully implemented Graf's level.PreserveSectorColor() changes into the software renderers." This reverts commit 2f9453bc86a41621685660f8f150520d8e78d61f. # Conflicts: # src/swrenderer/line/r_line.cpp # src/swrenderer/line/r_renderdrawsegment.cpp # src/swrenderer/things/r_decal.cpp # src/swrenderer/things/r_wallsprite.cpp --- src/polyrenderer/scene/poly_playersprite.cpp | 10 +++++----- src/swrenderer/line/r_line.cpp | 3 ++- src/swrenderer/line/r_renderdrawsegment.cpp | 7 ++++--- src/swrenderer/scene/r_light.cpp | 8 ++++---- src/swrenderer/scene/r_opaque_pass.cpp | 5 +++-- src/swrenderer/scene/r_translucent_pass.cpp | 2 ++ src/swrenderer/things/r_decal.cpp | 3 ++- src/swrenderer/things/r_particle.cpp | 2 ++ src/swrenderer/things/r_playersprite.cpp | 1 + src/swrenderer/things/r_sprite.cpp | 2 ++ src/swrenderer/things/r_visiblesprite.cpp | 2 ++ src/swrenderer/things/r_voxel.cpp | 2 ++ src/swrenderer/things/r_wallsprite.cpp | 5 +++-- 13 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index b4b5189331..dd8fc48509 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -30,11 +30,11 @@ #include "d_player.h" #include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" -#include "g_levellocals.h" EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_deathcamera) EXTERN_CVAR(Bool, st_scale) +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) EXTERN_CVAR(Bool, r_shadercolormaps) void RenderPolyPlayerSprites::Render() @@ -269,12 +269,12 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa } if (swrenderer::fixedlightlev >= 0) { - BaseColormap = (!level.PreserveSectorColor()) ? &FullNormalLight : mybasecolormap; + BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; ColormapNum = swrenderer::fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && sprite->GetState()->GetFullbright()) { // full bright - BaseColormap = (!level.PreserveSectorColor()) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap + BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : mybasecolormap; // [RH] use basecolormap ColormapNum = 0; } else @@ -334,9 +334,9 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa noaccel = true; } // [SP] If emulating GZDoom fullbright, disable acceleration - if (!level.PreserveSectorColor() && cameraLight->fixedlightlev >= 0) + if (r_fullbrightignoresectorcolor && cameraLight->fixedlightlev >= 0) mybasecolormap = &FullNormalLight; - if (!level.PreserveSectorColor() && !foggy && sprite->GetState()->GetFullbright()) + if (r_fullbrightignoresectorcolor && !foggy && sprite->GetState()->GetFullbright()) mybasecolormap = &FullNormalLight; colormap_to_use = mybasecolormap; } diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index b4a43d7607..ace69ea3b6 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -52,6 +52,7 @@ CVAR(Bool, r_fogboundary, true, 0) CVAR(Bool, r_drawmirrors, true, 0) +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { @@ -936,7 +937,7 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - drawerstyle.SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerstyle.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 7c81ec8ba9..2de9d81662 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -41,7 +41,8 @@ #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/scene/r_viewport.h" -#include "g_levellocals.h" + +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { @@ -141,7 +142,7 @@ namespace swrenderer rw_scalestep = ds->iscalestep; if (cameraLight->fixedlightlev >= 0) - drawerstyle.SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerstyle.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); @@ -444,7 +445,7 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - drawerstyle.SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerstyle.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index f214e52613..851fab7500 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -32,9 +32,9 @@ #include "d_player.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/scene/r_viewport.h" -#include "g_levellocals.h" CVAR(Bool, r_shadercolormaps, true, CVAR_ARCHIVE) +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { @@ -75,7 +75,7 @@ namespace swrenderer { fixedlightlev = player->fixedlightlevel * 256; // [SP] Emulate GZDoom's light-amp goggles. - if (!level.PreserveSectorColor() && fixedlightlev >= 0) + if (r_fullbrightignoresectorcolor && fixedlightlev >= 0) { fixedcolormap = &FullNormalLight; } @@ -194,12 +194,12 @@ namespace swrenderer } else if (cameraLight->fixedlightlev >= 0) { - BaseColormap = (!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap; + BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap; ColormapNum = cameraLight->fixedlightlev >> COLORMAPSHIFT; } else if (fullbright) { - BaseColormap = (!level.PreserveSectorColor()) ? &FullNormalLight : basecolormap; + BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap; ColormapNum = 0; } else diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 2cb5a5993d..2391d05428 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -62,6 +62,7 @@ #include "r_data/colormaps.h" #include "g_levellocals.h" +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); EXTERN_CVAR(Bool, r_drawvoxels); namespace swrenderer @@ -493,7 +494,7 @@ namespace swrenderer } else { - basecolormap = (!level.PreserveSectorColor() && cameraLight->fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; + basecolormap = (r_fullbrightignoresectorcolor && cameraLight->fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; } portal = frontsector->ValidatePortal(sector_t::ceiling); @@ -531,7 +532,7 @@ namespace swrenderer } else { - basecolormap = (!level.PreserveSectorColor() && cameraLight->fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; + basecolormap = (r_fullbrightignoresectorcolor && cameraLight->fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; } // killough 3/7/98: Add (x,y) offsets to flats, add deep water check diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 69444f560f..67b0600955 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -43,6 +43,8 @@ EXTERN_CVAR(Int, r_drawfuzz) EXTERN_CVAR(Bool, r_drawvoxels) EXTERN_CVAR(Bool, r_blendmethod) +CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + namespace swrenderer { RenderTranslucentPass *RenderTranslucentPass::Instance() diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 1e15c55926..6c0fea63fe 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -42,7 +42,8 @@ #include "swrenderer/scene/r_light.h" #include "swrenderer/things/r_wallsprite.h" #include "swrenderer/r_memory.h" -#include "g_levellocals.h" + +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index a63028e84f..5d978fd9f5 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -56,6 +56,8 @@ #include "swrenderer/drawers/r_draw_pal.h" #include "swrenderer/r_memory.h" +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); + namespace swrenderer { void RenderParticle::Project(particle_t *particle, const sector_t *sector, int shade, WaterFakeSide fakeside, bool foggy) diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 31c31973b6..b4b76cd621 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -61,6 +61,7 @@ EXTERN_CVAR(Bool, st_scale) EXTERN_CVAR(Bool, r_drawplayersprites) EXTERN_CVAR(Bool, r_deathcamera) EXTERN_CVAR(Bool, r_shadercolormaps) +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 292307389a..600d4bc716 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -56,6 +56,8 @@ #include "swrenderer/things/r_sprite.h" #include "swrenderer/r_memory.h" +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) + namespace swrenderer { void RenderSprite::Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap) diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 14406be39b..b2c0b054a1 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -36,6 +36,8 @@ #include "swrenderer/scene/r_viewport.h" #include "swrenderer/r_memory.h" +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); + namespace swrenderer { void VisibleSprite::Render() diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 0cd4d4d2b4..d7f4c047d8 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -43,6 +43,8 @@ #include "swrenderer/scene/r_light.h" #include "swrenderer/r_memory.h" +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) + namespace swrenderer { void RenderVoxel::Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap) diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 78ab9f6403..297dd96cda 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -57,7 +57,8 @@ #include "swrenderer/line/r_wallsetup.h" #include "swrenderer/line/r_walldraw.h" #include "swrenderer/r_memory.h" -#include "g_levellocals.h" + +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { @@ -191,7 +192,7 @@ namespace swrenderer else if (cameraLight->fixedcolormap != NULL) drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) - drawerstyle.SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : usecolormap, 0, 0); + drawerstyle.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; From 92bd752935b239e67405ba05559550b9c7b3be49 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 29 Jan 2017 07:49:04 +0100 Subject: [PATCH 776/912] Move drawerargs globals into a class and make them local --- src/CMakeLists.txt | 1 + src/gl/system/gl_swframebuffer.cpp | 4 +- src/gl/system/gl_swframebuffer.h | 2 +- src/swrenderer/drawers/r_draw.cpp | 723 +------------------- src/swrenderer/drawers/r_draw.h | 226 +----- src/swrenderer/drawers/r_draw_pal.cpp | 157 ++--- src/swrenderer/drawers/r_draw_pal.h | 157 +++-- src/swrenderer/drawers/r_draw_rgba.cpp | 266 ++++--- src/swrenderer/drawers/r_draw_rgba.h | 101 +-- src/swrenderer/drawers/r_thread.h | 35 - src/swrenderer/line/r_fogboundary.cpp | 10 +- src/swrenderer/line/r_fogboundary.h | 2 +- src/swrenderer/line/r_line.cpp | 18 +- src/swrenderer/line/r_renderdrawsegment.cpp | 24 +- src/swrenderer/line/r_walldraw.cpp | 114 ++- src/swrenderer/line/r_walldraw.h | 6 +- src/swrenderer/plane/r_flatplane.cpp | 81 ++- src/swrenderer/plane/r_flatplane.h | 6 +- src/swrenderer/plane/r_skyplane.cpp | 30 +- src/swrenderer/plane/r_skyplane.h | 4 +- src/swrenderer/plane/r_slopeplane.cpp | 25 +- src/swrenderer/plane/r_slopeplane.h | 4 +- src/swrenderer/plane/r_visibleplanelist.cpp | 4 - src/swrenderer/scene/r_viewport.cpp | 6 +- src/swrenderer/scene/r_viewport.h | 4 + src/swrenderer/things/r_decal.cpp | 18 +- src/swrenderer/things/r_decal.h | 4 +- src/swrenderer/things/r_particle.cpp | 2 - src/swrenderer/things/r_playersprite.cpp | 8 +- src/swrenderer/things/r_sprite.cpp | 8 +- src/swrenderer/things/r_voxel.cpp | 20 +- src/swrenderer/things/r_voxel.h | 4 +- src/swrenderer/things/r_wallsprite.cpp | 18 +- src/swrenderer/things/r_wallsprite.h | 4 +- src/v_draw.cpp | 52 +- 35 files changed, 606 insertions(+), 1542 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 47c4f7e585..a37649086e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -850,6 +850,7 @@ set( FASTMATH_PCH_SOURCES swrenderer/drawers/r_draw.cpp swrenderer/drawers/r_draw_pal.cpp swrenderer/drawers/r_draw_rgba.cpp + swrenderer/drawers/r_drawerargs.cpp swrenderer/drawers/r_drawers.cpp swrenderer/drawers/r_thread.cpp swrenderer/scene/r_3dfloors.cpp diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 9ea7066f8f..09a8d5664f 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -2948,7 +2948,7 @@ void OpenGLSWFrameBuffer::FlatFill(int left, int top, int right, int bottom, FTe void OpenGLSWFrameBuffer::FillSimplePoly(FTexture *texture, FVector2 *points, int npoints, double originx, double originy, double scalex, double scaley, - DAngle rotation, FDynamicColormap *colormap, int lightlevel, int bottomclip) + DAngle rotation, FDynamicColormap *colormap, PalEntry flatcolor, int lightlevel, int bottomclip) { // Use an equation similar to player sprites to determine shade double fadelevel = clamp((LIGHT2SHADE(lightlevel) / 65536. - 12) / NUMCOLORMAPS, 0.0, 1.0); @@ -2969,7 +2969,7 @@ void OpenGLSWFrameBuffer::FillSimplePoly(FTexture *texture, FVector2 *points, in } if (In2D < 2) { - Super::FillSimplePoly(texture, points, npoints, originx, originy, scalex, scaley, rotation, colormap, lightlevel, bottomclip); + Super::FillSimplePoly(texture, points, npoints, originx, originy, scalex, scaley, rotation, colormap, lightlevel, flatcolor, bottomclip); return; } if (!InScene) diff --git a/src/gl/system/gl_swframebuffer.h b/src/gl/system/gl_swframebuffer.h index 6e69e94217..a9e7aa888f 100644 --- a/src/gl/system/gl_swframebuffer.h +++ b/src/gl/system/gl_swframebuffer.h @@ -59,7 +59,7 @@ public: void FlatFill(int left, int top, int right, int bottom, FTexture *src, bool local_origin) override; void DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) override; void DrawPixel(int x, int y, int palcolor, uint32 rgbcolor) override; - void FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, double originx, double originy, double scalex, double scaley, DAngle rotation, FDynamicColormap *colormap, int lightlevel, int bottomclip) override; + void FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, double originx, double originy, double scalex, double scaley, DAngle rotation, FDynamicColormap *colormap, PalEntry flatcolor, int lightlevel, int bottomclip) override; bool WipeStartScreen(int type) override; void WipeEndScreen() override; bool WipeDo(int ticks) override; diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 25c74f9c58..b569aac0d2 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -50,6 +50,7 @@ #include "r_draw_rgba.h" #include "r_draw_pal.h" #include "r_thread.h" +#include "r_drawerargs.h" #include "swrenderer/scene/r_light.h" CVAR(Bool, r_dynlights, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); @@ -70,86 +71,6 @@ namespace swrenderer short zeroarray[MAXWIDTH]; short screenheightarray[MAXWIDTH]; - namespace drawerargs - { - int dc_pitch; - lighttable_t *dc_colormap; - ShadeConstants dc_shade_constants; - fixed_t dc_light; - int dc_x; - int dc_yl; - int dc_yh; - fixed_t dc_iscale; - fixed_t dc_texturefrac; - uint32_t dc_textureheight; - int dc_color; - uint32_t dc_srccolor; - uint32_t dc_srccolor_bgra; - uint32_t *dc_srcblend; - uint32_t *dc_destblend; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - const uint8_t *dc_source; - const uint8_t *dc_source2; - uint32_t dc_texturefracx; - uint8_t *dc_translation; - uint8_t *dc_dest; - uint8_t *dc_destorg; - int dc_destheight; - int dc_count; - FVector3 dc_normal; - FVector3 dc_viewpos; - FVector3 dc_viewpos_step; - TriLight *dc_lights; - int dc_num_lights; - uint32_t dc_wall_texturefrac[4]; - uint32_t dc_wall_iscale[4]; - uint8_t *dc_wall_colormap[4]; - fixed_t dc_wall_light[4]; - const uint8_t *dc_wall_source[4]; - const uint8_t *dc_wall_source2[4]; - uint32_t dc_wall_texturefracx[4]; - uint32_t dc_wall_sourceheight[4]; - int dc_wall_fracbits; - int ds_y; - int ds_x1; - int ds_x2; - lighttable_t * ds_colormap; - ShadeConstants ds_shade_constants; - dsfixed_t ds_light; - dsfixed_t ds_xfrac; - dsfixed_t ds_yfrac; - dsfixed_t ds_xstep; - dsfixed_t ds_ystep; - int ds_xbits; - int ds_ybits; - fixed_t ds_alpha; - double ds_lod; - const uint8_t *ds_source; - bool ds_source_mipmapped; - int ds_color; - } - - namespace - { - SWPixelFormatDrawers *active_drawers; - SWPalDrawers pal_drawers; - SWTruecolorDrawers tc_drawers; - } - - SWPixelFormatDrawers *DrawerStyle::Drawers() const - { - return active_drawers; - } - - void R_InitColumnDrawers() - { - if (r_swtruecolor) - active_drawers = &tc_drawers; - else - active_drawers = &pal_drawers; - } - void R_InitShadeMaps() { int i, j; @@ -235,643 +156,11 @@ namespace swrenderer } } - void DrawerStyle::SetColorMapLight(FSWColormap *base_colormap, float light, int shade) + void R_UpdateFuzzPos(const DrawerArgs &args) { - using namespace drawerargs; - - if (r_swtruecolor) - { - dc_shade_constants.light_red = base_colormap->Color.r * 256 / 255; - dc_shade_constants.light_green = base_colormap->Color.g * 256 / 255; - dc_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; - dc_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; - dc_shade_constants.fade_red = base_colormap->Fade.r; - dc_shade_constants.fade_green = base_colormap->Fade.g; - dc_shade_constants.fade_blue = base_colormap->Fade.b; - dc_shade_constants.fade_alpha = base_colormap->Fade.a; - dc_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; - dc_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); - dc_colormap = base_colormap->Maps; - dc_light = LIGHTSCALE(light, shade); - } - else - { - dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); - } - } - - void DrawerStyle::SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade) - { - using namespace drawerargs; - - if (r_swtruecolor) - { - ds_shade_constants.light_red = base_colormap->Color.r * 256 / 255; - ds_shade_constants.light_green = base_colormap->Color.g * 256 / 255; - ds_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; - ds_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; - ds_shade_constants.fade_red = base_colormap->Fade.r; - ds_shade_constants.fade_green = base_colormap->Fade.g; - ds_shade_constants.fade_blue = base_colormap->Fade.b; - ds_shade_constants.fade_alpha = base_colormap->Fade.a; - ds_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; - ds_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); - ds_colormap = base_colormap->Maps; - ds_light = LIGHTSCALE(light, shade); - } - else - { - ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); - } - } - - void DrawerStyle::SetTranslationMap(lighttable_t *translation) - { - using namespace drawerargs; - - if (r_swtruecolor) - { - dc_colormap = nullptr; - dc_translation = translation; - dc_shade_constants.light_red = 256; - dc_shade_constants.light_green = 256; - dc_shade_constants.light_blue = 256; - dc_shade_constants.light_alpha = 256; - dc_shade_constants.fade_red = 0; - dc_shade_constants.fade_green = 0; - dc_shade_constants.fade_blue = 0; - dc_shade_constants.fade_alpha = 256; - dc_shade_constants.desaturate = 0; - dc_shade_constants.simple_shade = true; - dc_light = 0; - } - else - { - dc_colormap = translation; - } - } - - void DrawerStyle::SetSpanTexture(FTexture *tex) - { - using namespace drawerargs; - - tex->GetWidth(); - ds_xbits = tex->WidthBits; - ds_ybits = tex->HeightBits; - if ((1 << ds_xbits) > tex->GetWidth()) - { - ds_xbits--; - } - if ((1 << ds_ybits) > tex->GetHeight()) - { - ds_ybits--; - } - - ds_source = r_swtruecolor ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); - ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; - } - - void DrawerStyle::SetSpanColormap(FDynamicColormap *colormap, int shade) - { - SetDSColorMapLight(colormap, 0, shade); - } - - void R_UpdateFuzzPos() - { - using namespace drawerargs; - - dc_yl = MAX(dc_yl, 1); - dc_yh = MIN(dc_yh, fuzzviewheight); - if (dc_yl <= dc_yh) - fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; - } - - void DrawerStyle::DrawMaskedColumn(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) - { - using namespace drawerargs; - - // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. - if (r_swtruecolor && !drawer_needs_pal_input) // To do: add support to R_DrawColumnHoriz_rgba - { - DrawMaskedColumnBgra(x, iscale, tex, col, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, unmasked); - return; - } - - dc_x = x; - dc_iscale = iscale; - dc_textureheight = tex->GetHeight(); - - const FTexture::Span *span; - const BYTE *column; - if (r_swtruecolor && !drawer_needs_pal_input) - column = (const BYTE *)tex->GetColumnBgra(col >> FRACBITS, &span); - else - column = tex->GetColumn(col >> FRACBITS, &span); - - FTexture::Span unmaskedSpan[2]; - if (unmasked) - { - span = unmaskedSpan; - unmaskedSpan[0].TopOffset = 0; - unmaskedSpan[0].Length = tex->GetHeight(); - unmaskedSpan[1].TopOffset = 0; - unmaskedSpan[1].Length = 0; - } - - int pixelsize = r_swtruecolor ? 4 : 1; - - while (span->Length != 0) - { - const int length = span->Length; - const int top = span->TopOffset; - - // calculate unclipped screen coordinates for post - dc_yl = (int)(sprtopscreen + spryscale * top + 0.5); - dc_yh = (int)(sprtopscreen + spryscale * (top + length) + 0.5) - 1; - - if (sprflipvert) - { - swapvalues(dc_yl, dc_yh); - } - - if (dc_yh >= mfloorclip[dc_x]) - { - dc_yh = mfloorclip[dc_x] - 1; - } - if (dc_yl < mceilingclip[dc_x]) - { - dc_yl = mceilingclip[dc_x]; - } - - if (dc_yl <= dc_yh) - { - dc_texturefrac = FLOAT2FIXED((dc_yl + 0.5 - sprtopscreen) / spryscale); - dc_source = column; - dc_source2 = nullptr; - dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; - dc_count = dc_yh - dc_yl + 1; - - fixed_t maxfrac = ((top + length) << FRACBITS) - 1; - dc_texturefrac = MAX(dc_texturefrac, 0); - dc_texturefrac = MIN(dc_texturefrac, maxfrac); - if (dc_iscale > 0) - dc_count = MIN(dc_count, (maxfrac - dc_texturefrac + dc_iscale - 1) / dc_iscale); - else if (dc_iscale < 0) - dc_count = MIN(dc_count, (dc_texturefrac - dc_iscale) / (-dc_iscale)); - - (Drawers()->*colfunc)(); - } - span++; - } - } - - void DrawerStyle::DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) - { - using namespace drawerargs; - - dc_x = x; - dc_iscale = iscale; - - // Normalize to 0-1 range: - double uv_stepd = FIXED2DBL(dc_iscale); - double v_step = uv_stepd / tex->GetHeight(); - - // Convert to uint32: - dc_iscale = (uint32_t)(v_step * (1 << 30)); - - // Texture mipmap and filter selection: - fixed_t xoffset = col; - - double xmagnitude = 1.0; // To do: pass this into R_DrawMaskedColumn - double ymagnitude = fabs(uv_stepd); - double magnitude = MAX(ymagnitude, xmagnitude); - double min_lod = -1000.0; - double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); - bool magnifying = lod < 0.0f; - - int mipmap_offset = 0; - int mip_width = tex->GetWidth(); - int mip_height = tex->GetHeight(); - uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); - if (r_mipmap && tex->Mipmapped() && mip_width > 1 && mip_height > 1) - { - int level = (int)lod; - while (level > 0 && mip_width > 1 && mip_height > 1) - { - mipmap_offset += mip_width * mip_height; - level--; - mip_width = MAX(mip_width >> 1, 1); - mip_height = MAX(mip_height >> 1, 1); - } - } - xoffset = (xpos >> FRACBITS) * mip_width; - - const uint32_t *pixels = tex->GetPixelsBgra() + mipmap_offset; - - bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); - if (filter_nearest) - { - xoffset = MAX(MIN(xoffset, (mip_width << FRACBITS) - 1), 0); - - int tx = xoffset >> FRACBITS; - dc_source = (BYTE*)(pixels + tx * mip_height); - dc_source2 = nullptr; - dc_textureheight = mip_height; - dc_texturefracx = 0; - } - else - { - xoffset = MAX(MIN(xoffset - (FRACUNIT / 2), (mip_width << FRACBITS) - 1), 0); - - int tx0 = xoffset >> FRACBITS; - int tx1 = MIN(tx0 + 1, mip_width - 1); - dc_source = (BYTE*)(pixels + tx0 * mip_height); - dc_source2 = (BYTE*)(pixels + tx1 * mip_height); - dc_textureheight = mip_height; - dc_texturefracx = (xoffset >> (FRACBITS - 4)) & 15; - } - - // Grab the posts we need to draw - const FTexture::Span *span; - tex->GetColumnBgra(col >> FRACBITS, &span); - FTexture::Span unmaskedSpan[2]; - if (unmasked) - { - span = unmaskedSpan; - unmaskedSpan[0].TopOffset = 0; - unmaskedSpan[0].Length = tex->GetHeight(); - unmaskedSpan[1].TopOffset = 0; - unmaskedSpan[1].Length = 0; - } - - // Draw each span post - while (span->Length != 0) - { - const int length = span->Length; - const int top = span->TopOffset; - - // calculate unclipped screen coordinates for post - dc_yl = (int)(sprtopscreen + spryscale * top + 0.5); - dc_yh = (int)(sprtopscreen + spryscale * (top + length) + 0.5) - 1; - - if (sprflipvert) - { - swapvalues(dc_yl, dc_yh); - } - - if (dc_yh >= mfloorclip[dc_x]) - { - dc_yh = mfloorclip[dc_x] - 1; - } - if (dc_yl < mceilingclip[dc_x]) - { - dc_yl = mceilingclip[dc_x]; - } - - if (dc_yl <= dc_yh) - { - dc_dest = (ylookup[dc_yl] + dc_x) * 4 + dc_destorg; - dc_count = dc_yh - dc_yl + 1; - - double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight(); - dc_texturefrac = (uint32_t)(v * (1 << 30)); - - (Drawers()->*colfunc)(); - } - span++; - } - } - - bool DrawerStyle::SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) - { - using namespace drawerargs; - - // r_drawtrans is a seriously bad thing to turn off. I wonder if I should - // just remove it completely. - if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) - { - if (flags & STYLEF_ColorIsFixed) - { - colfunc = &SWPixelFormatDrawers::FillColumn; - } - else if (dc_translation == NULL) - { - colfunc = basecolfunc; - } - else - { - colfunc = transcolfunc; - drawer_needs_pal_input = true; - } - return true; - } - if (flags & STYLEF_InvertSource) - { - dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; - dc_srcalpha = fglevel; - dc_destalpha = bglevel; - } - else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) - { - dc_srcblend = Col2RGB8[fglevel >> 10]; - dc_destblend = Col2RGB8[bglevel >> 10]; - dc_srcalpha = fglevel; - dc_destalpha = bglevel; - } - else - { - dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; - dc_srcalpha = fglevel; - dc_destalpha = bglevel; - } - switch (op) - { - case STYLEOP_Add: - if (fglevel == 0 && bglevel == FRACUNIT) - { - return false; - } - if (fglevel + bglevel <= FRACUNIT) - { // Colors won't overflow when added - if (flags & STYLEF_ColorIsFixed) - { - colfunc = &SWPixelFormatDrawers::FillAddColumn; - } - else if (dc_translation == NULL) - { - colfunc = &SWPixelFormatDrawers::DrawAddColumn; - } - else - { - colfunc = &SWPixelFormatDrawers::DrawTranslatedAddColumn; - drawer_needs_pal_input = true; - } - } - else - { // Colors might overflow when added - if (flags & STYLEF_ColorIsFixed) - { - colfunc = &SWPixelFormatDrawers::FillAddClampColumn; - } - else if (dc_translation == NULL) - { - colfunc = &SWPixelFormatDrawers::DrawAddClampColumn; - } - else - { - colfunc = &SWPixelFormatDrawers::DrawAddClampTranslatedColumn; - drawer_needs_pal_input = true; - } - } - return true; - - case STYLEOP_Sub: - if (flags & STYLEF_ColorIsFixed) - { - colfunc = &SWPixelFormatDrawers::FillSubClampColumn; - } - else if (dc_translation == NULL) - { - colfunc = &SWPixelFormatDrawers::DrawSubClampColumn; - } - else - { - colfunc = &SWPixelFormatDrawers::DrawSubClampTranslatedColumn; - drawer_needs_pal_input = true; - } - return true; - - case STYLEOP_RevSub: - if (fglevel == 0 && bglevel == FRACUNIT) - { - return false; - } - if (flags & STYLEF_ColorIsFixed) - { - colfunc = &SWPixelFormatDrawers::FillRevSubClampColumn; - } - else if (dc_translation == NULL) - { - colfunc = &SWPixelFormatDrawers::DrawRevSubClampColumn; - } - else - { - colfunc = &SWPixelFormatDrawers::DrawRevSubClampTranslatedColumn; - drawer_needs_pal_input = true; - } - return true; - - default: - return false; - } - } - - fixed_t DrawerStyle::GetAlpha(int type, fixed_t alpha) - { - switch (type) - { - case STYLEALPHA_Zero: return 0; - case STYLEALPHA_One: return OPAQUE; - case STYLEALPHA_Src: return alpha; - case STYLEALPHA_InvSrc: return OPAQUE - alpha; - default: return 0; - } - } - - bool DrawerStyle::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) - { - using namespace drawerargs; - - fixed_t fglevel, bglevel; - - drawer_needs_pal_input = false; - - style.CheckFuzz(); - - if (style.BlendOp == STYLEOP_Shadow) - { - style = LegacyRenderStyles[STYLE_TranslucentStencil]; - alpha = TRANSLUC33; - color = 0; - } - - if (style.Flags & STYLEF_ForceAlpha) - { - alpha = clamp(alpha, 0, OPAQUE); - } - else if (style.Flags & STYLEF_TransSoulsAlpha) - { - alpha = fixed_t(transsouls * OPAQUE); - } - else if (style.Flags & STYLEF_Alpha1) - { - alpha = FRACUNIT; - } - else - { - alpha = clamp(alpha, 0, OPAQUE); - } - - if (translation != -1) - { - dc_translation = NULL; - if (translation != 0) - { - FRemapTable *table = TranslationToTable(translation); - if (table != NULL && !table->Inactive) - { - if (r_swtruecolor) - dc_translation = (uint8_t*)table->Palette; - else - dc_translation = table->Remap; - } - } - } - - // Check for special modes - if (style.BlendOp == STYLEOP_Fuzz) - { - colfunc = fuzzcolfunc; - return true; - } - else if (style == LegacyRenderStyles[STYLE_Shaded]) - { - // Shaded drawer only gets 16 levels of alpha because it saves memory. - if ((alpha >>= 12) == 0 || basecolormap == nullptr) - return false; - colfunc = &SWPixelFormatDrawers::DrawShadedColumn; - drawer_needs_pal_input = true; - CameraLight *cameraLight = CameraLight::Instance(); - dc_color = cameraLight->fixedcolormap ? cameraLight->fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; - basecolormap = &ShadeFakeColormap[16 - alpha]; - if (cameraLight->fixedlightlev >= 0 && cameraLight->fixedcolormap == NULL) - { - fixed_t shade = shadedlightshade; - if (shade == 0) FIXEDLIGHT2SHADE(cameraLight->fixedlightlev); - SetColorMapLight(basecolormap, 0, shade); - } - else - { - SetColorMapLight(basecolormap, 0, shadedlightshade); - } - return true; - } - - fglevel = GetAlpha(style.SrcAlpha, alpha); - bglevel = GetAlpha(style.DestAlpha, alpha); - - if (style.Flags & STYLEF_ColorIsFixed) - { - uint32_t x = fglevel >> 10; - uint32_t r = RPART(color); - uint32_t g = GPART(color); - uint32_t b = BPART(color); - // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. - dc_color = RGB256k.RGB[r >> 2][g >> 2][b >> 2]; - if (style.Flags & STYLEF_InvertSource) - { - r = 255 - r; - g = 255 - g; - b = 255 - b; - } - uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); - dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; - // dc_srccolor is used by the R_Fill* routines. It is premultiplied - // with the alpha. - dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; - SetColorMapLight(&identitycolormap, 0, 0); - } - - if (!DrawerStyle::SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) - { - return false; - } - return true; - } - - bool DrawerStyle::SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) - { - return SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap, shadedlightshade); - } - - DrawerFunc DrawerStyle::GetTransMaskDrawer() - { - if (colfunc == &SWPixelFormatDrawers::DrawAddColumn) - { - return &SWPixelFormatDrawers::DrawWallAddColumn; - } - if (colfunc == &SWPixelFormatDrawers::DrawAddClampColumn) - { - return &SWPixelFormatDrawers::DrawWallAddClampColumn; - } - if (colfunc == &SWPixelFormatDrawers::DrawSubClampColumn) - { - return &SWPixelFormatDrawers::DrawWallSubClampColumn; - } - if (colfunc == &SWPixelFormatDrawers::DrawRevSubClampColumn) - { - return &SWPixelFormatDrawers::DrawWallRevSubClampColumn; - } - return nullptr; - } - - void DrawerStyle::SetSpanStyle(bool masked, bool additive, fixed_t alpha) - { - using namespace drawerargs; - - if (masked) - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedTranslucent; - dc_srcblend = Col2RGB8[alpha >> 10]; - dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; - dc_srcalpha = alpha; - dc_destalpha = FRACUNIT; - } - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpanMasked; - } - } - else - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = &SWPixelFormatDrawers::DrawSpanTranslucent; - dc_srcblend = Col2RGB8[alpha >> 10]; - dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpanAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; - dc_srcalpha = alpha; - dc_destalpha = FRACUNIT; - } - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpan; - } - } + int yl = MAX(args.dc_yl, 1); + int yh = MIN(args.dc_yh, fuzzviewheight); + if (yl <= yh) + fuzzpos = (fuzzpos + yh - yl + 1) % FUZZTABLE; } } diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index bb3158331e..efedc277bd 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -19,81 +19,7 @@ EXTERN_CVAR(Bool, r_dynlights); namespace swrenderer { - struct ShadeConstants - { - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - bool simple_shade; - }; - - namespace drawerargs - { - extern int dc_pitch; - extern lighttable_t *dc_colormap; - extern ShadeConstants dc_shade_constants; - extern fixed_t dc_light; - extern int dc_x; - extern int dc_yl; - extern int dc_yh; - extern fixed_t dc_iscale; - extern fixed_t dc_texturefrac; - extern uint32_t dc_textureheight; - extern int dc_color; - extern uint32_t dc_srccolor; - extern uint32_t dc_srccolor_bgra; - extern uint32_t *dc_srcblend; - extern uint32_t *dc_destblend; - extern fixed_t dc_srcalpha; - extern fixed_t dc_destalpha; - extern const uint8_t *dc_source; - extern const uint8_t *dc_source2; - extern uint32_t dc_texturefracx; - extern uint8_t *dc_translation; - extern uint8_t *dc_dest; - extern uint8_t *dc_destorg; - extern int dc_destheight; - extern int dc_count; - extern FVector3 dc_normal; - extern FVector3 dc_viewpos; - extern FVector3 dc_viewpos_step; - extern TriLight *dc_lights; - extern int dc_num_lights; - - extern uint32_t dc_wall_texturefrac[4]; - extern uint32_t dc_wall_iscale[4]; - extern uint8_t *dc_wall_colormap[4]; - extern fixed_t dc_wall_light[4]; - extern const uint8_t *dc_wall_source[4]; - extern const uint8_t *dc_wall_source2[4]; - extern uint32_t dc_wall_texturefracx[4]; - extern uint32_t dc_wall_sourceheight[4]; - extern int dc_wall_fracbits; - - extern int ds_y; - extern int ds_x1; - extern int ds_x2; - extern lighttable_t * ds_colormap; - extern ShadeConstants ds_shade_constants; - extern dsfixed_t ds_light; - extern dsfixed_t ds_xfrac; - extern dsfixed_t ds_yfrac; - extern dsfixed_t ds_xstep; - extern dsfixed_t ds_ystep; - extern int ds_xbits; - extern int ds_ybits; - extern fixed_t ds_alpha; - extern double ds_lod; - extern const uint8_t *ds_source; - extern bool ds_source_mipmapped; - extern int ds_color; - } + class DrawerArgs; extern int ylookup[MAXHEIGHT]; extern uint8_t shadetables[/*NUMCOLORMAPS*16*256*/]; @@ -120,124 +46,46 @@ namespace swrenderer { public: virtual ~SWPixelFormatDrawers() { } - virtual void DrawWallColumn() = 0; - virtual void DrawWallMaskedColumn() = 0; - virtual void DrawWallAddColumn() = 0; - virtual void DrawWallAddClampColumn() = 0; - virtual void DrawWallSubClampColumn() = 0; - virtual void DrawWallRevSubClampColumn() = 0; - virtual void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; - virtual void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; - virtual void DrawColumn() = 0; - virtual void FillColumn() = 0; - virtual void FillAddColumn() = 0; - virtual void FillAddClampColumn() = 0; - virtual void FillSubClampColumn() = 0; - virtual void FillRevSubClampColumn() = 0; - virtual void DrawFuzzColumn() = 0; - virtual void DrawAddColumn() = 0; - virtual void DrawTranslatedColumn() = 0; - virtual void DrawTranslatedAddColumn() = 0; - virtual void DrawShadedColumn() = 0; - virtual void DrawAddClampColumn() = 0; - virtual void DrawAddClampTranslatedColumn() = 0; - virtual void DrawSubClampColumn() = 0; - virtual void DrawSubClampTranslatedColumn() = 0; - virtual void DrawRevSubClampColumn() = 0; - virtual void DrawRevSubClampTranslatedColumn() = 0; - virtual void DrawSpan() = 0; - virtual void DrawSpanMasked() = 0; - virtual void DrawSpanTranslucent() = 0; - virtual void DrawSpanMaskedTranslucent() = 0; - virtual void DrawSpanAddClamp() = 0; - virtual void DrawSpanMaskedAddClamp() = 0; - virtual void FillSpan() = 0; - virtual void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) = 0; - virtual void DrawColoredSpan(int y, int x1, int x2) = 0; - virtual void DrawFogBoundaryLine(int y, int x1, int x2) = 0; + virtual void DrawWallColumn(const DrawerArgs &args) = 0; + virtual void DrawWallMaskedColumn(const DrawerArgs &args) = 0; + virtual void DrawWallAddColumn(const DrawerArgs &args) = 0; + virtual void DrawWallAddClampColumn(const DrawerArgs &args) = 0; + virtual void DrawWallSubClampColumn(const DrawerArgs &args) = 0; + virtual void DrawWallRevSubClampColumn(const DrawerArgs &args) = 0; + virtual void DrawSingleSkyColumn(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; + virtual void DrawDoubleSkyColumn(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; + virtual void DrawColumn(const DrawerArgs &args) = 0; + virtual void FillColumn(const DrawerArgs &args) = 0; + virtual void FillAddColumn(const DrawerArgs &args) = 0; + virtual void FillAddClampColumn(const DrawerArgs &args) = 0; + virtual void FillSubClampColumn(const DrawerArgs &args) = 0; + virtual void FillRevSubClampColumn(const DrawerArgs &args) = 0; + virtual void DrawFuzzColumn(const DrawerArgs &args) = 0; + virtual void DrawAddColumn(const DrawerArgs &args) = 0; + virtual void DrawTranslatedColumn(const DrawerArgs &args) = 0; + virtual void DrawTranslatedAddColumn(const DrawerArgs &args) = 0; + virtual void DrawShadedColumn(const DrawerArgs &args) = 0; + virtual void DrawAddClampColumn(const DrawerArgs &args) = 0; + virtual void DrawAddClampTranslatedColumn(const DrawerArgs &args) = 0; + virtual void DrawSubClampColumn(const DrawerArgs &args) = 0; + virtual void DrawSubClampTranslatedColumn(const DrawerArgs &args) = 0; + virtual void DrawRevSubClampColumn(const DrawerArgs &args) = 0; + virtual void DrawRevSubClampTranslatedColumn(const DrawerArgs &args) = 0; + virtual void DrawSpan(const DrawerArgs &args) = 0; + virtual void DrawSpanMasked(const DrawerArgs &args) = 0; + virtual void DrawSpanTranslucent(const DrawerArgs &args) = 0; + virtual void DrawSpanMaskedTranslucent(const DrawerArgs &args) = 0; + virtual void DrawSpanAddClamp(const DrawerArgs &args) = 0; + virtual void DrawSpanMaskedAddClamp(const DrawerArgs &args) = 0; + virtual void FillSpan(const DrawerArgs &args) = 0; + virtual void DrawTiltedSpan(const DrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) = 0; + virtual void DrawColoredSpan(const DrawerArgs &args, int y, int x1, int x2) = 0; + virtual void DrawFogBoundaryLine(const DrawerArgs &args, int y, int x1, int x2) = 0; }; - typedef void(SWPixelFormatDrawers::*DrawerFunc)(); - - void R_InitColumnDrawers(); void R_InitShadeMaps(); void R_InitFuzzTable(int fuzzoff); void R_InitParticleTexture(); - void R_UpdateFuzzPos(); - - class DrawerStyle - { - public: - DrawerStyle() - { - colfunc = &SWPixelFormatDrawers::DrawColumn; - basecolfunc = &SWPixelFormatDrawers::DrawColumn; - fuzzcolfunc = &SWPixelFormatDrawers::DrawFuzzColumn; - transcolfunc = &SWPixelFormatDrawers::DrawTranslatedColumn; - spanfunc = &SWPixelFormatDrawers::DrawSpan; - } - - bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); - bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); - void SetSpanStyle(bool masked, bool additive, fixed_t alpha); - - void DrawMaskedColumn(int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); - - // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) - void SetColorMapLight(FSWColormap *base_colormap, float light, int shade); - void SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); - void SetTranslationMap(lighttable_t *translation); - - void SetSpanTexture(FTexture *tex); - void SetSpanColormap(FDynamicColormap *colormap, int shade); - - DrawerFunc GetTransMaskDrawer(); - - DrawerFunc colfunc; - DrawerFunc basecolfunc; - DrawerFunc fuzzcolfunc; - DrawerFunc transcolfunc; - DrawerFunc spanfunc; - - void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) - { - Drawers()->DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); - } - - void DrawFogBoundaryLine(int y, int x1, int x2) - { - Drawers()->DrawFogBoundaryLine(y, x1, x2); - } - - void DrawColoredSpan(int y, int x1, int x2) - { - Drawers()->DrawColoredSpan(y, x1, x2); - } - - void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) - { - Drawers()->DrawSingleSkyColumn(solid_top, solid_bottom, fadeSky); - } - - void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) - { - Drawers()->DrawDoubleSkyColumn(solid_top, solid_bottom, fadeSky); - } - - void FillColumn() - { - Drawers()->FillColumn(); - } - - SWPixelFormatDrawers *Drawers() const; - - private: - void DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked); - - bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); - static fixed_t GetAlpha(int type, fixed_t alpha); - - bool drawer_needs_pal_input = false; - }; + void R_UpdateFuzzPos(const DrawerArgs &args); } diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index ef77ee1eb0..18abbb195e 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -96,24 +96,23 @@ CVAR(Bool, r_blendmethod, false, CVAR_GLOBALCONFIG | CVAR_ARCHIVE) namespace swrenderer { - PalWall1Command::PalWall1Command() + PalWall1Command::PalWall1Command(const DrawerArgs &args) { - using namespace drawerargs; - - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _colormap = dc_colormap; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - _fracbits = dc_wall_fracbits; + _iscale = args.dc_iscale; + _texturefrac = args.dc_texturefrac; + _colormap = args.dc_colormap; + _count = args.dc_count; + _source = args.dc_source; + _dest = args.Dest(); + _dest_y = args.DestY(); + _fracbits = args.dc_wall_fracbits; _pitch = dc_pitch; - _srcblend = dc_srcblend; - _destblend = dc_destblend; - _dynlights = dc_lights; - _num_dynlights = dc_num_lights; - _viewpos_z = dc_viewpos.Z; - _step_viewpos_z = dc_viewpos_step.Z; + _srcblend = args.dc_srcblend; + _destblend = args.dc_destblend; + _dynlights = args.dc_lights; + _num_dynlights = args.dc_num_lights; + _viewpos_z = args.dc_viewpos.Z; + _step_viewpos_z = args.dc_viewpos_step.Z; } uint8_t PalWall1Command::AddLights(const TriLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material) @@ -560,20 +559,19 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) : solid_top(solid_top), solid_bottom(solid_bottom), fadeSky(fadeSky) + PalSkyCommand::PalSkyCommand(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) : solid_top(solid_top), solid_bottom(solid_bottom), fadeSky(fadeSky) { - using namespace drawerargs; - - _dest = dc_dest; - _count = dc_count; + _dest = args.Dest(); + _dest_y = args.DestY(); + _count = args.dc_count; _pitch = dc_pitch; for (int col = 0; col < 4; col++) { - _source[col] = dc_wall_source[col]; - _source2[col] = dc_wall_source2[col]; - _sourceheight[col] = dc_wall_sourceheight[col]; - _iscale[col] = dc_wall_iscale[col]; - _texturefrac[col] = dc_wall_texturefrac[col]; + _source[col] = args.dc_wall_source[col]; + _source2[col] = args.dc_wall_source2[col]; + _sourceheight[col] = args.dc_wall_sourceheight[col]; + _iscale[col] = args.dc_wall_iscale[col]; + _texturefrac[col] = args.dc_wall_texturefrac[col]; } } @@ -866,24 +864,23 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - PalColumnCommand::PalColumnCommand() + PalColumnCommand::PalColumnCommand(const DrawerArgs &args) { - using namespace drawerargs; - - _count = dc_count; - _dest = dc_dest; + _count = args.dc_count; + _dest = args.Dest(); + _dest_y = args.DestY(); _pitch = dc_pitch; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _colormap = dc_colormap; - _source = dc_source; - _translation = dc_translation; - _color = dc_color; - _srcblend = dc_srcblend; - _destblend = dc_destblend; - _srccolor = dc_srccolor; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; + _iscale = args.dc_iscale; + _texturefrac = args.dc_texturefrac; + _colormap = args.dc_colormap; + _source = args.dc_source; + _translation = args.dc_translation; + _color = args.dc_color; + _srcblend = args.dc_srcblend; + _destblend = args.dc_destblend; + _srccolor = args.dc_srccolor; + _srcalpha = args.dc_srcalpha; + _destalpha = args.dc_destalpha; } void DrawColumnPalCommand::Execute(DrawerThread *thread) @@ -1764,13 +1761,11 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawFuzzColumnPalCommand::DrawFuzzColumnPalCommand() + DrawFuzzColumnPalCommand::DrawFuzzColumnPalCommand(const DrawerArgs &args) { - using namespace drawerargs; - - _yl = dc_yl; - _yh = dc_yh; - _x = dc_x; + _yl = args.dc_yl; + _yh = args.dc_yh; + _x = args.dc_x; _destorg = dc_destorg; _pitch = dc_pitch; _fuzzpos = fuzzpos; @@ -1853,31 +1848,29 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - PalSpanCommand::PalSpanCommand() + PalSpanCommand::PalSpanCommand(const DrawerArgs &args) { - using namespace drawerargs; - - _source = ds_source; - _colormap = ds_colormap; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _y = ds_y; - _x1 = ds_x1; - _x2 = ds_x2; + _source = args.ds_source; + _colormap = args.ds_colormap; + _xfrac = args.ds_xfrac; + _yfrac = args.ds_yfrac; + _y = args.ds_y; + _x1 = args.ds_x1; + _x2 = args.ds_x2; _destorg = dc_destorg; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcblend = dc_srcblend; - _destblend = dc_destblend; - _color = ds_color; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _dynlights = dc_lights; - _num_dynlights = dc_num_lights; - _viewpos_x = dc_viewpos.X; - _step_viewpos_x = dc_viewpos_step.X; + _xstep = args.ds_xstep; + _ystep = args.ds_ystep; + _xbits = args.ds_xbits; + _ybits = args.ds_ybits; + _srcblend = args.dc_srcblend; + _destblend = args.dc_destblend; + _color = args.ds_color; + _srcalpha = args.dc_srcalpha; + _destalpha = args.dc_destalpha; + _dynlights = args.dc_lights; + _num_dynlights = args.dc_num_lights; + _viewpos_x = args.dc_viewpos.X; + _step_viewpos_x = args.dc_viewpos_step.X; } uint8_t PalSpanCommand::AddLights(const TriLight *lights, int num_lights, float viewpos_x, uint8_t fg, uint8_t material) @@ -2630,16 +2623,14 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) + DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(const DrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) : y(y), x1(x1), x2(x2), plane_sz(plane_sz), plane_su(plane_su), plane_sv(plane_sv), plane_shade(plane_shade), planeshade(planeshade), planelightfloat(planelightfloat), pviewx(pviewx), pviewy(pviewy) { - using namespace drawerargs; - - _colormap = ds_colormap; + _colormap = args.ds_colormap; _destorg = dc_destorg; - _ybits = ds_ybits; - _xbits = ds_xbits; - _source = ds_source; + _ybits = args.ds_ybits; + _xbits = args.ds_xbits; + _source = args.ds_source; basecolormapdata = basecolormap->Maps; } @@ -2878,10 +2869,9 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(int y, int x1, int x2) : y(y), x1(x1), x2(x2) + DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(const DrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) { - using namespace drawerargs; - color = ds_color; + color = args.ds_color; destorg = dc_destorg; } @@ -2895,10 +2885,9 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(int y, int x1, int x2) : y(y), x1(x1), x2(x2) + DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(const DrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) { - using namespace drawerargs; - _colormap = dc_colormap; + _colormap = args.dc_colormap; _destorg = dc_destorg; } diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index e2d33f63e4..ea798ef3c1 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -4,13 +4,14 @@ #include "r_draw.h" #include "v_palette.h" #include "r_thread.h" +#include "r_drawerargs.h" namespace swrenderer { class PalWall1Command : public DrawerCommand { public: - PalWall1Command(); + PalWall1Command(const DrawerArgs &args); FString DebugInfo() override { return "PalWallCommand"; } protected: @@ -22,6 +23,7 @@ namespace swrenderer int _count; const uint8_t *_source; uint8_t *_dest; + int _dest_y; int _fracbits; int _pitch; uint32_t *_srcblend; @@ -32,17 +34,17 @@ namespace swrenderer float _step_viewpos_z; }; - class DrawWall1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallMasked1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallAdd1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallAddClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallRevSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWall1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void Execute(DrawerThread *thread) override; }; + class DrawWallMasked1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void Execute(DrawerThread *thread) override; }; + class DrawWallAdd1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void Execute(DrawerThread *thread) override; }; + class DrawWallAddClamp1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void Execute(DrawerThread *thread) override; }; + class DrawWallSubClamp1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void Execute(DrawerThread *thread) override; }; + class DrawWallRevSubClamp1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void Execute(DrawerThread *thread) override; }; class PalSkyCommand : public DrawerCommand { public: - PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); + PalSkyCommand(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); FString DebugInfo() override { return "PalSkyCommand"; } protected: @@ -51,6 +53,7 @@ namespace swrenderer bool fadeSky; uint8_t *_dest; + int _dest_y; int _count; int _pitch; const uint8_t *_source[4]; @@ -66,12 +69,13 @@ namespace swrenderer class PalColumnCommand : public DrawerCommand { public: - PalColumnCommand(); + PalColumnCommand(const DrawerArgs &args); FString DebugInfo() override { return "PalColumnCommand"; } protected: int _count; uint8_t *_dest; + int _dest_y; int _pitch; fixed_t _iscale; fixed_t _texturefrac; @@ -86,27 +90,27 @@ namespace swrenderer fixed_t _destalpha; }; - class DrawColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class FillColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class FillColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class FillColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class FillColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class FillColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawColumnTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawColumnTlatedAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawColumnShadedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawColumnAddClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawColumnSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawColumnRevSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class FillColumnPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class FillColumnAddPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class FillColumnAddClampPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class FillColumnSubClampPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class FillColumnRevSubClampPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnAddPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnTranslatedPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnTlatedAddPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnShadedPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnAddClampPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnAddClampTranslatedPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnSubClampPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnSubClampTranslatedPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRevSubClampPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRevSubClampTranslatedPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; class DrawFuzzColumnPalCommand : public DrawerCommand { public: - DrawFuzzColumnPalCommand(); + DrawFuzzColumnPalCommand(const DrawerArgs &args); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "DrawFuzzColumnPalCommand"; } @@ -123,7 +127,7 @@ namespace swrenderer class PalSpanCommand : public DrawerCommand { public: - PalSpanCommand(); + PalSpanCommand(const DrawerArgs &args); FString DebugInfo() override { return "PalSpanCommand"; } protected: @@ -152,18 +156,18 @@ namespace swrenderer float _step_viewpos_x; }; - class DrawSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawSpanMaskedPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawSpanTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawSpanMaskedTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawSpanAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; - class DrawSpanMaskedAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; - class FillSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; }; + class DrawSpanTranslucentPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedTranslucentPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; }; + class DrawSpanAddClampPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedAddClampPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; }; + class FillSpanPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; }; class DrawTiltedSpanPalCommand : public DrawerCommand { public: - DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); + DrawTiltedSpanPalCommand(const DrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "DrawTiltedSpanPalCommand"; } @@ -193,7 +197,7 @@ namespace swrenderer class DrawColoredSpanPalCommand : public PalSpanCommand { public: - DrawColoredSpanPalCommand(int y, int x1, int x2); + DrawColoredSpanPalCommand(const DrawerArgs &args, int y, int x1, int x2); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "DrawColoredSpanPalCommand"; } @@ -208,7 +212,7 @@ namespace swrenderer class DrawFogBoundaryLinePalCommand : public PalSpanCommand { public: - DrawFogBoundaryLinePalCommand(int y, int x1, int x2); + DrawFogBoundaryLinePalCommand(const DrawerArgs &args, int y, int x1, int x2); void Execute(DrawerThread *thread) override; private: @@ -226,6 +230,7 @@ namespace swrenderer private: uint8_t *_dest; + int _dest_y; int _pitch; int _count; uint32_t _fg; @@ -236,53 +241,53 @@ namespace swrenderer class SWPalDrawers : public SWPixelFormatDrawers { public: - void DrawWallColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawWallMaskedColumn() override { DrawerCommandQueue::QueueCommand(); } + void DrawWallColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallMaskedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallAddColumn() override + void DrawWallAddColumn(const DrawerArgs &args) override { - if (drawerargs::dc_num_lights == 0) - DrawerCommandQueue::QueueCommand(); + if (args.dc_num_lights == 0) + DrawerCommandQueue::QueueCommand(args); else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(args); } - void DrawWallAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawWallSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawWallRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom, fadeSky); } - void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom, fadeSky); } - void DrawColumn() override { DrawerCommandQueue::QueueCommand(); } - void FillColumn() override { DrawerCommandQueue::QueueCommand(); } - void FillAddColumn() override { DrawerCommandQueue::QueueCommand(); } - void FillAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void FillSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void FillRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawFuzzColumn() override { DrawerCommandQueue::QueueCommand(); R_UpdateFuzzPos(); } - void DrawAddColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawTranslatedAddColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawShadedColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawAddClampTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawSubClampTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawRevSubClampTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawSpan() override { DrawerCommandQueue::QueueCommand(); } - void DrawSpanMasked() override { DrawerCommandQueue::QueueCommand(); } - void DrawSpanTranslucent() override { DrawerCommandQueue::QueueCommand(); } - void DrawSpanMaskedTranslucent() override { DrawerCommandQueue::QueueCommand(); } - void DrawSpanAddClamp() override { DrawerCommandQueue::QueueCommand(); } - void DrawSpanMaskedAddClamp() override { DrawerCommandQueue::QueueCommand(); } - void FillSpan() override { DrawerCommandQueue::QueueCommand(); } + void DrawWallAddClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallRevSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSingleSkyColumn(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, fadeSky); } + void DrawDoubleSkyColumn(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, fadeSky); } + void DrawColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillAddClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillRevSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawFuzzColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); R_UpdateFuzzPos(args); } + void DrawAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawTranslatedAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawShadedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawAddClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawAddClampTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSubClampTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawRevSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawRevSubClampTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpan(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanMasked(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanTranslucent(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanMaskedTranslucent(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanAddClamp(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanMaskedAddClamp(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillSpan(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override + void DrawTiltedSpan(const DrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override { - DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); + DrawerCommandQueue::QueueCommand(args, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); } - void DrawColoredSpan(int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(y, x1, x2); } - void DrawFogBoundaryLine(int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(y, x1, x2); } + void DrawColoredSpan(const DrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } + void DrawFogBoundaryLine(const DrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } }; } diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index fcf94e5da1..e096c97c85 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -60,44 +60,42 @@ CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG namespace swrenderer { - DrawSpanLLVMCommand::DrawSpanLLVMCommand() + DrawSpanLLVMCommand::DrawSpanLLVMCommand(const DrawerArgs &drawerargs) { - using namespace drawerargs; - - args.xfrac = ds_xfrac; - args.yfrac = ds_yfrac; - args.xstep = ds_xstep; - args.ystep = ds_ystep; - args.x1 = ds_x1; - args.x2 = ds_x2; - args.y = ds_y; - args.xbits = ds_xbits; - args.ybits = ds_ybits; + args.xfrac = drawerargs.ds_xfrac; + args.yfrac = drawerargs.ds_yfrac; + args.xstep = drawerargs.ds_xstep; + args.ystep = drawerargs.ds_ystep; + args.x1 = drawerargs.ds_x1; + args.x2 = drawerargs.ds_x2; + args.y = drawerargs.ds_y; + args.xbits = drawerargs.ds_xbits; + args.ybits = drawerargs.ds_ybits; args.destorg = (uint32_t*)dc_destorg; args.destpitch = dc_pitch; - args.source = (const uint32_t*)ds_source; - args.light = LightBgra::calc_light_multiplier(ds_light); - args.light_red = ds_shade_constants.light_red; - args.light_green = ds_shade_constants.light_green; - args.light_blue = ds_shade_constants.light_blue; - args.light_alpha = ds_shade_constants.light_alpha; - args.fade_red = ds_shade_constants.fade_red; - args.fade_green = ds_shade_constants.fade_green; - args.fade_blue = ds_shade_constants.fade_blue; - args.fade_alpha = ds_shade_constants.fade_alpha; - args.desaturate = ds_shade_constants.desaturate; - args.srcalpha = dc_srcalpha >> (FRACBITS - 8); - args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.source = (const uint32_t*)drawerargs.ds_source; + args.light = LightBgra::calc_light_multiplier(drawerargs.ds_light); + args.light_red = drawerargs.ds_shade_constants.light_red; + args.light_green = drawerargs.ds_shade_constants.light_green; + args.light_blue = drawerargs.ds_shade_constants.light_blue; + args.light_alpha = drawerargs.ds_shade_constants.light_alpha; + args.fade_red = drawerargs.ds_shade_constants.fade_red; + args.fade_green = drawerargs.ds_shade_constants.fade_green; + args.fade_blue = drawerargs.ds_shade_constants.fade_blue; + args.fade_alpha = drawerargs.ds_shade_constants.fade_alpha; + args.desaturate = drawerargs.ds_shade_constants.desaturate; + args.srcalpha = drawerargs.dc_srcalpha >> (FRACBITS - 8); + args.destalpha = drawerargs.dc_destalpha >> (FRACBITS - 8); args.flags = 0; - if (ds_shade_constants.simple_shade) + if (drawerargs.ds_shade_constants.simple_shade) args.flags |= DrawSpanArgs::simple_shade; - if (!sampler_setup(args.source, args.xbits, args.ybits, ds_source_mipmapped)) + if (!sampler_setup(drawerargs.ds_lod, args.source, args.xbits, args.ybits, drawerargs.ds_source_mipmapped)) args.flags |= DrawSpanArgs::nearest_filter; - args.viewpos_x = dc_viewpos.X; - args.step_viewpos_x = dc_viewpos_step.X; - args.dynlights = dc_lights; - args.num_dynlights = dc_num_lights; + args.viewpos_x = drawerargs.dc_viewpos.X; + args.step_viewpos_x = drawerargs.dc_viewpos_step.X; + args.dynlights = drawerargs.dc_lights; + args.num_dynlights = drawerargs.dc_num_lights; } void DrawSpanLLVMCommand::Execute(DrawerThread *thread) @@ -112,14 +110,12 @@ namespace swrenderer return "DrawSpan\n" + args.ToString(); } - bool DrawSpanLLVMCommand::sampler_setup(const uint32_t * &source, int &xbits, int &ybits, bool mipmapped) + bool DrawSpanLLVMCommand::sampler_setup(double lod, const uint32_t * &source, int &xbits, int &ybits, bool mipmapped) { - using namespace drawerargs; - - bool magnifying = ds_lod < 0.0f; + bool magnifying = lod < 0.0; if (r_mipmap && mipmapped) { - int level = (int)ds_lod; + int level = (int)lod; while (level > 0) { if (xbits <= 2 || ybits <= 2) @@ -184,44 +180,40 @@ namespace swrenderer return d; } - DrawWall1LLVMCommand::DrawWall1LLVMCommand() + DrawWall1LLVMCommand::DrawWall1LLVMCommand(const DrawerArgs &drawerargs) { - using namespace drawerargs; - - args.dest = (uint32_t*)dc_dest; - args.dest_y = _dest_y; + args.dest = (uint32_t*)drawerargs.Dest(); + args.dest_y = drawerargs.DestY(); args.pitch = dc_pitch; - args.count = dc_count; - args.texturefrac[0] = dc_texturefrac; - args.texturefracx[0] = dc_texturefracx; - args.iscale[0] = dc_iscale; - args.textureheight[0] = dc_textureheight; - args.source[0] = (const uint32 *)dc_source; - args.source2[0] = (const uint32 *)dc_source2; - args.light[0] = LightBgra::calc_light_multiplier(dc_light); - args.light_red = dc_shade_constants.light_red; - args.light_green = dc_shade_constants.light_green; - args.light_blue = dc_shade_constants.light_blue; - args.light_alpha = dc_shade_constants.light_alpha; - args.fade_red = dc_shade_constants.fade_red; - args.fade_green = dc_shade_constants.fade_green; - args.fade_blue = dc_shade_constants.fade_blue; - args.fade_alpha = dc_shade_constants.fade_alpha; - args.desaturate = dc_shade_constants.desaturate; - args.srcalpha = dc_srcalpha >> (FRACBITS - 8); - args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.count = drawerargs.dc_count; + args.texturefrac[0] = drawerargs.dc_texturefrac; + args.texturefracx[0] = drawerargs.dc_texturefracx; + args.iscale[0] = drawerargs.dc_iscale; + args.textureheight[0] = drawerargs.dc_textureheight; + args.source[0] = (const uint32 *)drawerargs.dc_source; + args.source2[0] = (const uint32 *)drawerargs.dc_source2; + args.light[0] = LightBgra::calc_light_multiplier(drawerargs.dc_light); + args.light_red = drawerargs.dc_shade_constants.light_red; + args.light_green = drawerargs.dc_shade_constants.light_green; + args.light_blue = drawerargs.dc_shade_constants.light_blue; + args.light_alpha = drawerargs.dc_shade_constants.light_alpha; + args.fade_red = drawerargs.dc_shade_constants.fade_red; + args.fade_green = drawerargs.dc_shade_constants.fade_green; + args.fade_blue = drawerargs.dc_shade_constants.fade_blue; + args.fade_alpha = drawerargs.dc_shade_constants.fade_alpha; + args.desaturate = drawerargs.dc_shade_constants.desaturate; + args.srcalpha = drawerargs.dc_srcalpha >> (FRACBITS - 8); + args.destalpha = drawerargs.dc_destalpha >> (FRACBITS - 8); args.flags = 0; - if (dc_shade_constants.simple_shade) + if (drawerargs.dc_shade_constants.simple_shade) args.flags |= DrawWallArgs::simple_shade; if (args.source2[0] == nullptr) args.flags |= DrawWallArgs::nearest_filter; - args.z = dc_viewpos.Z; - args.step_z = dc_viewpos_step.Z; - args.dynlights = dc_lights; - args.num_dynlights = dc_num_lights; - - DetectRangeError(args.dest, args.dest_y, args.count); + args.z = drawerargs.dc_viewpos.Z; + args.step_z = drawerargs.dc_viewpos_step.Z; + args.dynlights = drawerargs.dc_lights; + args.num_dynlights = drawerargs.dc_num_lights; } void DrawWall1LLVMCommand::Execute(DrawerThread *thread) @@ -252,44 +244,40 @@ namespace swrenderer return "DrawColumn\n" + args.ToString(); } - DrawColumnLLVMCommand::DrawColumnLLVMCommand() + DrawColumnLLVMCommand::DrawColumnLLVMCommand(const DrawerArgs &drawerargs) { - using namespace drawerargs; - - args.dest = (uint32_t*)dc_dest; - args.source = dc_source; - args.source2 = dc_source2; - args.colormap = dc_colormap; - args.translation = dc_translation; + args.dest = (uint32_t*)drawerargs.Dest(); + args.source = drawerargs.dc_source; + args.source2 = drawerargs.dc_source2; + args.colormap = drawerargs.dc_colormap; + args.translation = drawerargs.dc_translation; args.basecolors = (const uint32_t *)GPalette.BaseColors; args.pitch = dc_pitch; - args.count = dc_count; - args.dest_y = _dest_y; - args.iscale = dc_iscale; - args.texturefracx = dc_texturefracx; - args.textureheight = dc_textureheight; - args.texturefrac = dc_texturefrac; - args.light = LightBgra::calc_light_multiplier(dc_light); - args.color = LightBgra::shade_pal_index_simple(dc_color, args.light); - args.srccolor = dc_srccolor_bgra; - args.srcalpha = dc_srcalpha >> (FRACBITS - 8); - args.destalpha = dc_destalpha >> (FRACBITS - 8); - args.light_red = dc_shade_constants.light_red; - args.light_green = dc_shade_constants.light_green; - args.light_blue = dc_shade_constants.light_blue; - args.light_alpha = dc_shade_constants.light_alpha; - args.fade_red = dc_shade_constants.fade_red; - args.fade_green = dc_shade_constants.fade_green; - args.fade_blue = dc_shade_constants.fade_blue; - args.fade_alpha = dc_shade_constants.fade_alpha; - args.desaturate = dc_shade_constants.desaturate; + args.count = drawerargs.dc_count; + args.dest_y = drawerargs.DestY(); + args.iscale = drawerargs.dc_iscale; + args.texturefracx = drawerargs.dc_texturefracx; + args.textureheight = drawerargs.dc_textureheight; + args.texturefrac = drawerargs.dc_texturefrac; + args.light = LightBgra::calc_light_multiplier(drawerargs.dc_light); + args.color = LightBgra::shade_pal_index_simple(drawerargs.dc_color, args.light); + args.srccolor = drawerargs.dc_srccolor_bgra; + args.srcalpha = drawerargs.dc_srcalpha >> (FRACBITS - 8); + args.destalpha = drawerargs.dc_destalpha >> (FRACBITS - 8); + args.light_red = drawerargs.dc_shade_constants.light_red; + args.light_green = drawerargs.dc_shade_constants.light_green; + args.light_blue = drawerargs.dc_shade_constants.light_blue; + args.light_alpha = drawerargs.dc_shade_constants.light_alpha; + args.fade_red = drawerargs.dc_shade_constants.fade_red; + args.fade_green = drawerargs.dc_shade_constants.fade_green; + args.fade_blue = drawerargs.dc_shade_constants.fade_blue; + args.fade_alpha = drawerargs.dc_shade_constants.fade_alpha; + args.desaturate = drawerargs.dc_shade_constants.desaturate; args.flags = 0; - if (dc_shade_constants.simple_shade) + if (drawerargs.dc_shade_constants.simple_shade) args.flags |= DrawColumnArgs::simple_shade; if (args.source2 == nullptr) args.flags |= DrawColumnArgs::nearest_filter; - - DetectRangeError(args.dest, args.dest_y, args.count); } void DrawColumnLLVMCommand::Execute(DrawerThread *thread) @@ -310,28 +298,24 @@ namespace swrenderer return d; } - DrawSkyLLVMCommand::DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) + DrawSkyLLVMCommand::DrawSkyLLVMCommand(const DrawerArgs &drawerargs, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) { - using namespace drawerargs; - - args.dest = (uint32_t*)dc_dest; - args.dest_y = _dest_y; - args.count = dc_count; + args.dest = (uint32_t*)drawerargs.Dest(); + args.dest_y = drawerargs.DestY(); + args.count = drawerargs.dc_count; args.pitch = dc_pitch; for (int i = 0; i < 4; i++) { - args.texturefrac[i] = dc_wall_texturefrac[i]; - args.iscale[i] = dc_wall_iscale[i]; - args.source0[i] = (const uint32_t *)dc_wall_source[i]; - args.source1[i] = (const uint32_t *)dc_wall_source2[i]; + args.texturefrac[i] = drawerargs.dc_wall_texturefrac[i]; + args.iscale[i] = drawerargs.dc_wall_iscale[i]; + args.source0[i] = (const uint32_t *)drawerargs.dc_wall_source[i]; + args.source1[i] = (const uint32_t *)drawerargs.dc_wall_source2[i]; } - args.textureheight0 = dc_wall_sourceheight[0]; - args.textureheight1 = dc_wall_sourceheight[1]; + args.textureheight0 = drawerargs.dc_wall_sourceheight[0]; + args.textureheight1 = drawerargs.dc_wall_sourceheight[1]; args.top_color = solid_top; args.bottom_color = solid_bottom; args.flags = fadeSky ? DrawSkyArgs::fade_sky : 0; - - DetectRangeError(args.dest, args.dest_y, args.count); } FString DrawSkyLLVMCommand::DebugInfo() @@ -341,13 +325,11 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - DrawFuzzColumnRGBACommand::DrawFuzzColumnRGBACommand() + DrawFuzzColumnRGBACommand::DrawFuzzColumnRGBACommand(const DrawerArgs &drawerargs) { - using namespace drawerargs; - - _x = dc_x; - _yl = dc_yl; - _yh = dc_yh; + _x = drawerargs.dc_x; + _yl = drawerargs.dc_yl; + _yh = drawerargs.dc_yh; _destorg = dc_destorg; _pitch = dc_pitch; _fuzzpos = fuzzpos; @@ -451,16 +433,14 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - FillSpanRGBACommand::FillSpanRGBACommand() + FillSpanRGBACommand::FillSpanRGBACommand(const DrawerArgs &drawerargs) { - using namespace drawerargs; - - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; + _x1 = drawerargs.ds_x1; + _x2 = drawerargs.ds_x2; + _y = drawerargs.ds_y; _destorg = dc_destorg; - _light = ds_light; - _color = ds_color; + _light = drawerargs.ds_light; + _color = drawerargs.ds_color; } void FillSpanRGBACommand::Execute(DrawerThread *thread) @@ -483,17 +463,15 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - DrawFogBoundaryLineRGBACommand::DrawFogBoundaryLineRGBACommand(int y, int x, int x2) + DrawFogBoundaryLineRGBACommand::DrawFogBoundaryLineRGBACommand(const DrawerArgs &drawerargs, int y, int x, int x2) { - using namespace drawerargs; - _y = y; _x = x; _x2 = x2; _destorg = dc_destorg; - _light = dc_light; - _shade_constants = dc_shade_constants; + _light = drawerargs.dc_light; + _shade_constants = drawerargs.dc_shade_constants; } void DrawFogBoundaryLineRGBACommand::Execute(DrawerThread *thread) @@ -553,16 +531,14 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - DrawTiltedSpanRGBACommand::DrawTiltedSpanRGBACommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + DrawTiltedSpanRGBACommand::DrawTiltedSpanRGBACommand(const DrawerArgs &drawerargs, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { - using namespace drawerargs; - _x1 = x1; _x2 = x2; _y = y; _destorg = dc_destorg; - _light = ds_light; - _shade_constants = ds_shade_constants; + _light = drawerargs.ds_light; + _shade_constants = drawerargs.ds_shade_constants; _plane_sz = plane_sz; _plane_su = plane_su; _plane_sv = plane_sv; @@ -571,9 +547,9 @@ namespace swrenderer _planelightfloat = planelightfloat; _pviewx = pviewx; _pviewy = pviewy; - _source = (const uint32_t*)ds_source; - _xbits = ds_xbits; - _ybits = ds_ybits; + _source = (const uint32_t*)drawerargs.ds_source; + _xbits = drawerargs.ds_xbits; + _ybits = drawerargs.ds_ybits; } void DrawTiltedSpanRGBACommand::Execute(DrawerThread *thread) @@ -689,17 +665,15 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - DrawColoredSpanRGBACommand::DrawColoredSpanRGBACommand(int y, int x1, int x2) + DrawColoredSpanRGBACommand::DrawColoredSpanRGBACommand(const DrawerArgs &drawerargs, int y, int x1, int x2) { - using namespace drawerargs; - _y = y; _x1 = x1; _x2 = x2; _destorg = dc_destorg; - _light = ds_light; - _color = ds_color; + _light = drawerargs.ds_light; + _color = drawerargs.ds_color; } void DrawColoredSpanRGBACommand::Execute(DrawerThread *thread) @@ -726,10 +700,8 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - FillTransColumnRGBACommand::FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) + FillTransColumnRGBACommand::FillTransColumnRGBACommand(const DrawerArgs &drawerargs, int x, int y1, int y2, int color, int a) { - using namespace drawerargs; - _x = x; _y1 = y1; _y2 = y2; diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 0d40c63edc..1d3717a1cb 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -26,6 +26,7 @@ #include "v_palette.h" #include "r_thread.h" #include "r_drawers.h" +#include "r_drawerargs.h" #ifdef __arm__ #define NO_SSE @@ -79,7 +80,7 @@ namespace swrenderer class DrawSpanLLVMCommand : public DrawerCommand { public: - DrawSpanLLVMCommand(); + DrawSpanLLVMCommand(const DrawerArgs &drawerargs); void Execute(DrawerThread *thread) override; FString DebugInfo() override; @@ -88,36 +89,41 @@ namespace swrenderer DrawSpanArgs args; private: - inline static bool sampler_setup(const uint32_t * &source, int &xbits, int &ybits, bool mipmapped); + inline static bool sampler_setup(double lod, const uint32_t * &source, int &xbits, int &ybits, bool mipmapped); }; class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand { public: + using DrawSpanLLVMCommand::DrawSpanLLVMCommand; void Execute(DrawerThread *thread) override; }; class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand { public: + using DrawSpanLLVMCommand::DrawSpanLLVMCommand; void Execute(DrawerThread *thread) override; }; class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand { public: + using DrawSpanLLVMCommand::DrawSpanLLVMCommand; void Execute(DrawerThread *thread) override; }; class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand { public: + using DrawSpanLLVMCommand::DrawSpanLLVMCommand; void Execute(DrawerThread *thread) override; }; class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand { public: + using DrawSpanLLVMCommand::DrawSpanLLVMCommand; void Execute(DrawerThread *thread) override; }; @@ -129,7 +135,7 @@ namespace swrenderer WorkerThreadData ThreadData(DrawerThread *thread); public: - DrawWall1LLVMCommand(); + DrawWall1LLVMCommand(const DrawerArgs &drawerargs); void Execute(DrawerThread *thread) override; FString DebugInfo() override; @@ -144,7 +150,7 @@ namespace swrenderer FString DebugInfo() override; public: - DrawColumnLLVMCommand(); + DrawColumnLLVMCommand(const DrawerArgs &drawerargs); void Execute(DrawerThread *thread) override; }; @@ -157,7 +163,7 @@ namespace swrenderer WorkerThreadData ThreadData(DrawerThread *thread); public: - DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); + DrawSkyLLVMCommand(const DrawerArgs &drawerargs, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); FString DebugInfo() override; }; @@ -195,7 +201,7 @@ namespace swrenderer int _fuzzviewheight; public: - DrawFuzzColumnRGBACommand(); + DrawFuzzColumnRGBACommand(const DrawerArgs &drawerargs); void Execute(DrawerThread *thread) override; FString DebugInfo() override; }; @@ -210,7 +216,7 @@ namespace swrenderer int _color; public: - FillSpanRGBACommand(); + FillSpanRGBACommand(const DrawerArgs &drawerargs); void Execute(DrawerThread *thread) override; FString DebugInfo() override; }; @@ -225,7 +231,7 @@ namespace swrenderer ShadeConstants _shade_constants; public: - DrawFogBoundaryLineRGBACommand(int y, int x, int x2); + DrawFogBoundaryLineRGBACommand(const DrawerArgs &drawerargs, int y, int x, int x2); void Execute(DrawerThread *thread) override; FString DebugInfo() override; }; @@ -251,7 +257,7 @@ namespace swrenderer const uint32_t * RESTRICT _source; public: - DrawTiltedSpanRGBACommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + DrawTiltedSpanRGBACommand(const DrawerArgs &drawerargs, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); void Execute(DrawerThread *thread) override; FString DebugInfo() override; }; @@ -266,7 +272,7 @@ namespace swrenderer int _color; public: - DrawColoredSpanRGBACommand(int y, int x1, int x2); + DrawColoredSpanRGBACommand(const DrawerArgs &drawerargs, int y, int x1, int x2); void Execute(DrawerThread *thread) override; FString DebugInfo() override; @@ -284,7 +290,7 @@ namespace swrenderer fixed_t _light; public: - FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a); + FillTransColumnRGBACommand(const DrawerArgs &drawerargs, int x, int y1, int y2, int color, int a); void Execute(DrawerThread *thread) override; FString DebugInfo() override; }; @@ -335,6 +341,7 @@ namespace swrenderer private: uint32_t *_dest; + int _dest_y; int _pitch; int _count; uint32_t _fg; @@ -347,46 +354,46 @@ namespace swrenderer class SWTruecolorDrawers : public SWPixelFormatDrawers { public: - void DrawWallColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawWallMaskedColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawWallAddColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawWallAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawWallSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawWallRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom, skyFade); } - void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom, skyFade); } - void DrawColumn() override { DrawerCommandQueue::QueueCommand(); } - void FillColumn() override { DrawerCommandQueue::QueueCommand(); } - void FillAddColumn() override { DrawerCommandQueue::QueueCommand(); } - void FillAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void FillSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void FillRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawFuzzColumn() override { DrawerCommandQueue::QueueCommand(); R_UpdateFuzzPos(); } - void DrawAddColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawTranslatedAddColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawShadedColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawAddClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawAddClampTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawSubClampTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawRevSubClampColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawRevSubClampTranslatedColumn() override { DrawerCommandQueue::QueueCommand(); } - void DrawSpan() override { DrawerCommandQueue::QueueCommand(); } - void DrawSpanMasked() override { DrawerCommandQueue::QueueCommand(); } - void DrawSpanTranslucent() override { DrawerCommandQueue::QueueCommand(); } - void DrawSpanMaskedTranslucent() override { DrawerCommandQueue::QueueCommand(); } - void DrawSpanAddClamp() override { DrawerCommandQueue::QueueCommand(); } - void DrawSpanMaskedAddClamp() override { DrawerCommandQueue::QueueCommand(); } - void FillSpan() override { DrawerCommandQueue::QueueCommand(); } + void DrawWallColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallMaskedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallAddClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallRevSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSingleSkyColumn(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, skyFade); } + void DrawDoubleSkyColumn(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, skyFade); } + void DrawColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillAddClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillRevSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawFuzzColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); R_UpdateFuzzPos(args); } + void DrawAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawTranslatedAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawShadedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawAddClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawAddClampTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSubClampTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawRevSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawRevSubClampTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpan(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanMasked(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanTranslucent(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanMaskedTranslucent(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanAddClamp(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanMaskedAddClamp(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillSpan(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override + void DrawTiltedSpan(const DrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override { - DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + DrawerCommandQueue::QueueCommand(args, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } - void DrawColoredSpan(int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(y, x1, x2); } - void DrawFogBoundaryLine(int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(y, x1, x2); } + void DrawColoredSpan(const DrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } + void DrawFogBoundaryLine(const DrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } }; ///////////////////////////////////////////////////////////////////////////// diff --git a/src/swrenderer/drawers/r_thread.h b/src/swrenderer/drawers/r_thread.h index 38d803d4aa..8de57a32d1 100644 --- a/src/swrenderer/drawers/r_thread.h +++ b/src/swrenderer/drawers/r_thread.h @@ -108,42 +108,7 @@ public: // Task to be executed by each worker thread class DrawerCommand { -protected: - int _dest_y; - - void DetectRangeError(uint32_t *&dest, int &dest_y, int &count) - { -#if defined(_MSC_VER) && defined(_DEBUG) - if (dest_y < 0 || count < 0 || dest_y + count > swrenderer::drawerargs::dc_destheight) - __debugbreak(); // Buffer overrun detected! -#endif - - if (dest_y < 0) - { - count += dest_y; - dest_y = 0; - dest = (uint32_t*)swrenderer::drawerargs::dc_destorg; - } - else if (dest_y >= swrenderer::drawerargs::dc_destheight) - { - dest_y = 0; - count = 0; - } - - if (count < 0 || count > MAXHEIGHT) count = 0; - if (dest_y + count >= swrenderer::drawerargs::dc_destheight) - count = swrenderer::drawerargs::dc_destheight - dest_y; - } - public: - DrawerCommand() - { - if (swrenderer::r_swtruecolor) - _dest_y = static_cast((swrenderer::drawerargs::dc_dest - swrenderer::drawerargs::dc_destorg) / (swrenderer::drawerargs::dc_pitch * 4)); - else - _dest_y = static_cast((swrenderer::drawerargs::dc_dest - swrenderer::drawerargs::dc_destorg) / (swrenderer::drawerargs::dc_pitch)); - } - virtual ~DrawerCommand() { } virtual void Execute(DrawerThread *thread) = 0; diff --git a/src/swrenderer/line/r_fogboundary.cpp b/src/swrenderer/line/r_fogboundary.cpp index f9928aa183..f585109a46 100644 --- a/src/swrenderer/line/r_fogboundary.cpp +++ b/src/swrenderer/line/r_fogboundary.cpp @@ -64,7 +64,7 @@ namespace swrenderer fillshort(spanend + t2, b2 - t2, x); } - drawerstyle.SetColorMapLight(basecolormap, (float)light, wallshade); + drawerargs.SetColorMapLight(basecolormap, (float)light, wallshade); uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); @@ -91,7 +91,7 @@ namespace swrenderer fillshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; - drawerstyle.SetColorMapLight(basecolormap, (float)light, wallshade); + drawerargs.SetColorMapLight(basecolormap, (float)light, wallshade); fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); } else @@ -102,13 +102,13 @@ namespace swrenderer while (t2 < stop) { int y = t2++; - drawerstyle.DrawFogBoundaryLine(y, xr, spanend[y]); + drawerargs.DrawFogBoundaryLine(y, xr, spanend[y]); } stop = MAX(b1, t2); while (b2 > stop) { int y = --b2; - drawerstyle.DrawFogBoundaryLine(y, xr, spanend[y]); + drawerargs.DrawFogBoundaryLine(y, xr, spanend[y]); } } else @@ -142,7 +142,7 @@ namespace swrenderer { for (; y < y2; ++y) { - drawerstyle.DrawFogBoundaryLine(y, x1, spanend[y]); + drawerargs.DrawFogBoundaryLine(y, x1, spanend[y]); } } } diff --git a/src/swrenderer/line/r_fogboundary.h b/src/swrenderer/line/r_fogboundary.h index 92a42c817e..55f0e504ec 100644 --- a/src/swrenderer/line/r_fogboundary.h +++ b/src/swrenderer/line/r_fogboundary.h @@ -26,6 +26,6 @@ namespace swrenderer void RenderSection(int y, int y2, int x1); short spanend[MAXHEIGHT]; - DrawerStyle drawerstyle; + DrawerArgs drawerargs; }; } diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index ace69ea3b6..3fbe7f0917 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -72,9 +72,6 @@ namespace swrenderer curline = line; - // [RH] Color if not texturing line - drawerargs::dc_color = (((int)(line - segs) * 8) + 4) & 255; - pt1 = line->v1->fPos() - ViewPos; pt2 = line->v2->fPos() - ViewPos; @@ -933,13 +930,16 @@ namespace swrenderer double yscale; fixed_t xoffset = rw_offset; - DrawerStyle drawerstyle; + DrawerArgs drawerargs; + + // [RH] Color if not texturing line + drawerargs.dc_color = (((int)(curline - segs) * 8) + 4) & 255; CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - drawerstyle.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) - drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); // clip wall to the floor and ceiling auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; @@ -1048,7 +1048,7 @@ namespace swrenderer } RenderWallPart renderWallpart; - renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, rw_midtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, rw_midtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } fillshort(ceilingclip + x1, x2 - x1, viewheight); fillshort(floorclip + x1, x2 - x1, 0xffff); @@ -1085,7 +1085,7 @@ namespace swrenderer } RenderWallPart renderWallpart; - renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, rw_toptexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, rw_toptexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(ceilingclip + x1, wallupper.ScreenY + x1, (x2 - x1) * sizeof(short)); } @@ -1125,7 +1125,7 @@ namespace swrenderer } RenderWallPart renderWallpart; - renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, rw_bottomtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, rw_bottomtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(floorclip + x1, walllower.ScreenY + x1, (x2 - x1) * sizeof(short)); } diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 2de9d81662..76dc170f1a 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -67,8 +67,8 @@ namespace swrenderer curline = ds->curline; FDynamicColormap *patchstylecolormap = nullptr; - DrawerStyle drawerstyle; - bool visible = drawerstyle.SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], + DrawerArgs drawerargs; + bool visible = drawerargs.SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], (float)MIN(curline->linedef->alpha, 1.), 0, 0, patchstylecolormap); if (!visible && !ds->bFogBoundary && !ds->bFakeBoundary) @@ -142,9 +142,9 @@ namespace swrenderer rw_scalestep = ds->iscalestep; if (cameraLight->fixedlightlev >= 0) - drawerstyle.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) - drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); // find positioning texheight = tex->GetScaledHeightDouble(); @@ -273,7 +273,7 @@ namespace swrenderer { if (cameraLight->fixedcolormap == nullptr && cameraLight->fixedlightlev < 0) { - drawerstyle.SetColorMapLight(basecolormap, rw_light, wallshade); + drawerargs.SetColorMapLight(basecolormap, rw_light, wallshade); } fixed_t iscale = xs_Fix<16>::ToFix(MaskedSWall[x] * MaskedScaleY); @@ -283,7 +283,7 @@ namespace swrenderer else sprtopscreen = CenterY - texturemid * spryscale; - drawerstyle.DrawMaskedColumn(x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + drawerargs.DrawMaskedColumn(x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); rw_light += rw_lightstep; spryscale += rw_scalestep; @@ -349,7 +349,7 @@ namespace swrenderer GetMaskedWallTopBottom(ds, top, bot); RenderWallPart renderWallpart; - renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); + renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); } clearfog: @@ -383,8 +383,8 @@ namespace swrenderer double yscale; fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); - DrawerStyle drawerstyle; - bool visible = drawerstyle.SetPatchStyle(LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], + DrawerArgs drawerargs; + bool visible = drawerargs.SetPatchStyle(LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], Alpha, 0, 0, basecolormap); if (!visible) @@ -445,9 +445,9 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - drawerstyle.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) - drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -481,7 +481,7 @@ namespace swrenderer GetMaskedWallTopBottom(ds, top, bot); RenderWallPart renderWallpart; - renderWallpart.Render(drawerstyle, frontsector, curline, WallC, rw_pic, x1, x2, wallupper.ScreenY, walllower.ScreenY, texturemid, MaskedSWall, walltexcoords.UPos, yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); + renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, wallupper.ScreenY, walllower.ScreenY, texturemid, MaskedSWall, walltexcoords.UPos, yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); } // kg3D - walls of fake floors diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 8c17fa6214..2a28b23127 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -170,8 +170,6 @@ namespace swrenderer // Draw a column with support for non-power-of-two ranges void RenderWallPart::Draw1Column(int x, int y1, int y2, WallSampler &sampler, DrawerFunc draw1column) { - using namespace drawerargs; - if (r_dynlights && light_list) { // Find column position in view space @@ -180,17 +178,17 @@ namespace swrenderer float t = (x - WallC.sx1 + 0.5f) / (WallC.sx2 - WallC.sx1); float wcol = w1 * (1.0f - t) + w2 * t; float zcol = 1.0f / wcol; - dc_viewpos.X = (float)((x + 0.5 - CenterX) / CenterX * zcol); - dc_viewpos.Y = zcol; - dc_viewpos.Z = (float)((CenterY - y1 - 0.5) / InvZtoScale * zcol); - dc_viewpos_step.Z = (float)(-zcol / InvZtoScale); + drawerargs.dc_viewpos.X = (float)((x + 0.5 - CenterX) / CenterX * zcol); + drawerargs.dc_viewpos.Y = zcol; + drawerargs.dc_viewpos.Z = (float)((CenterY - y1 - 0.5) / InvZtoScale * zcol); + drawerargs.dc_viewpos_step.Z = (float)(-zcol / InvZtoScale); static TriLight lightbuffer[64 * 1024]; static int nextlightindex = 0; // Setup lights for column - dc_num_lights = 0; - dc_lights = lightbuffer + nextlightindex; + drawerargs.dc_num_lights = 0; + drawerargs.dc_lights = lightbuffer + nextlightindex; FLightNode *cur_node = light_list; while (cur_node && nextlightindex < 64 * 1024) { @@ -200,14 +198,14 @@ namespace swrenderer double lightY = cur_node->lightsource->Y() - ViewPos.Y; double lightZ = cur_node->lightsource->Z() - ViewPos.Z; - float lx = (float)(lightX * ViewSin - lightY * ViewCos) - dc_viewpos.X; - float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; + float lx = (float)(lightX * ViewSin - lightY * ViewCos) - drawerargs.dc_viewpos.X; + float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - drawerargs.dc_viewpos.Y; float lz = (float)lightZ; // Precalculate the constant part of the dot here so the drawer doesn't have to. bool is_point_light = (cur_node->lightsource->flags4 & MF4_ATTENUATE) != 0; float lconstant = lx * lx + ly * ly; - float nlconstant = is_point_light ? lx * dc_normal.X + ly * dc_normal.Y : 0.0f; + float nlconstant = is_point_light ? lx * drawerargs.dc_normal.X + ly * drawerargs.dc_normal.Y : 0.0f; // Include light only if it touches this column float radius = cur_node->lightsource->GetRadius(); @@ -218,7 +216,7 @@ namespace swrenderer uint32_t blue = cur_node->lightsource->GetBlue(); nextlightindex++; - auto &light = dc_lights[dc_num_lights++]; + auto &light = drawerargs.dc_lights[drawerargs.dc_num_lights++]; light.x = lconstant; light.y = nlconstant; light.z = lz; @@ -235,22 +233,22 @@ namespace swrenderer } else { - dc_num_lights = 0; + drawerargs.dc_num_lights = 0; } if (r_swtruecolor) { int count = y2 - y1; - dc_source = sampler.source; - dc_source2 = sampler.source2; - dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = sampler.uv_pos; - dc_textureheight = sampler.height; - (drawerstyle.Drawers()->*draw1column)(); + drawerargs.dc_source = sampler.source; + drawerargs.dc_source2 = sampler.source2; + drawerargs.dc_texturefracx = sampler.texturefracx; + drawerargs.SetDest(x, y1); + drawerargs.dc_count = count; + drawerargs.dc_iscale = sampler.uv_step; + drawerargs.dc_texturefrac = sampler.uv_pos; + drawerargs.dc_textureheight = sampler.height; + (drawerargs.Drawers()->*draw1column)(drawerargs); uint64_t step64 = sampler.uv_step; uint64_t pos64 = sampler.uv_pos; @@ -262,14 +260,14 @@ namespace swrenderer { int count = y2 - y1; - dc_source = sampler.source; - dc_source2 = sampler.source2; - dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = sampler.uv_pos; - (drawerstyle.Drawers()->*draw1column)(); + drawerargs.dc_source = sampler.source; + drawerargs.dc_source2 = sampler.source2; + drawerargs.dc_texturefracx = sampler.texturefracx; + drawerargs.SetDest(x, y1); + drawerargs.dc_count = count; + drawerargs.dc_iscale = sampler.uv_step; + drawerargs.dc_texturefrac = sampler.uv_pos; + (drawerargs.Drawers()->*draw1column)(drawerargs); uint64_t step64 = sampler.uv_step; uint64_t pos64 = sampler.uv_pos; @@ -288,14 +286,14 @@ namespace swrenderer next_uv_wrap++; uint32_t count = MIN(left, next_uv_wrap); - dc_source = sampler.source; - dc_source2 = sampler.source2; - dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = uv_pos; - (drawerstyle.Drawers()->*draw1column)(); + drawerargs.dc_source = sampler.source; + drawerargs.dc_source2 = sampler.source2; + drawerargs.dc_texturefracx = sampler.texturefracx; + drawerargs.SetDest(x, y1); + drawerargs.dc_count = count; + drawerargs.dc_iscale = sampler.uv_step; + drawerargs.dc_texturefrac = uv_pos; + (drawerargs.Drawers()->*draw1column)(drawerargs); left -= count; uv_pos += sampler.uv_step * count; @@ -310,8 +308,6 @@ namespace swrenderer void RenderWallPart::ProcessWallWorker(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, DrawerFunc drawcolumn) { - using namespace drawerargs; - if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -324,33 +320,33 @@ namespace swrenderer texturemid = 0; } - dc_wall_fracbits = r_swtruecolor ? FRACBITS : fracbits; + drawerargs.dc_wall_fracbits = r_swtruecolor ? FRACBITS : fracbits; CameraLight *cameraLight = CameraLight::Instance(); bool fixed = (cameraLight->fixedcolormap != NULL || cameraLight->fixedlightlev >= 0); if (fixed) { - dc_wall_colormap[0] = dc_colormap; - dc_wall_colormap[1] = dc_colormap; - dc_wall_colormap[2] = dc_colormap; - dc_wall_colormap[3] = dc_colormap; - dc_wall_light[0] = 0; - dc_wall_light[1] = 0; - dc_wall_light[2] = 0; - dc_wall_light[3] = 0; + drawerargs.dc_wall_colormap[0] = drawerargs.dc_colormap; + drawerargs.dc_wall_colormap[1] = drawerargs.dc_colormap; + drawerargs.dc_wall_colormap[2] = drawerargs.dc_colormap; + drawerargs.dc_wall_colormap[3] = drawerargs.dc_colormap; + drawerargs.dc_wall_light[0] = 0; + drawerargs.dc_wall_light[1] = 0; + drawerargs.dc_wall_light[2] = 0; + drawerargs.dc_wall_light[3] = 0; } if (cameraLight->fixedcolormap) - drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); else - drawerstyle.SetColorMapLight(basecolormap, 0, 0); + drawerargs.SetColorMapLight(basecolormap, 0, 0); float dx = WallC.tright.X - WallC.tleft.X; float dy = WallC.tright.Y - WallC.tleft.Y; float length = sqrt(dx * dx + dy * dy); - dc_normal.X = dy / length; - dc_normal.Y = -dx / length; - dc_normal.Z = 0.0f; + drawerargs.dc_normal.X = dy / length; + drawerargs.dc_normal.Y = -dx / length; + drawerargs.dc_normal.Z = 0.0f; double xmagnitude = 1.0; @@ -362,7 +358,7 @@ namespace swrenderer continue; if (!fixed) - drawerstyle.SetColorMapLight(basecolormap, light, wallshade); + drawerargs.SetColorMapLight(basecolormap, light, wallshade); if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); @@ -392,7 +388,7 @@ namespace swrenderer void RenderWallPart::ProcessTranslucentWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { - DrawerFunc drawcol1 = drawerstyle.GetTransMaskDrawer(); + DrawerFunc drawcol1 = drawerargs.GetTransMaskDrawer(); if (drawcol1 == nullptr) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. @@ -445,7 +441,7 @@ namespace swrenderer { if (mask) { - if (drawerstyle.colfunc == drawerstyle.basecolfunc) + if (drawerargs.colfunc == drawerargs.basecolfunc) { ProcessMaskedWall(uwal, dwal, texturemid, swal, lwal); } @@ -540,9 +536,9 @@ namespace swrenderer } } - void RenderWallPart::Render(const DrawerStyle &drawerstyle, sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, const short *walltop, const short *wallbottom, double texturemid, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap) + void RenderWallPart::Render(const DrawerArgs &drawerargs, sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, const short *walltop, const short *wallbottom, double texturemid, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap) { - this->drawerstyle = drawerstyle; + this->drawerargs = drawerargs; this->x1 = x1; this->x2 = x2; this->frontsector = frontsector; diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index e5d7d11538..24060129ce 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -13,7 +13,7 @@ #pragma once -#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/drawers/r_drawerargs.h" #include "r_line.h" class FTexture; @@ -34,7 +34,7 @@ namespace swrenderer { public: void Render( - const DrawerStyle &drawerstyle, + const DrawerArgs &drawerargs, sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, @@ -85,7 +85,7 @@ namespace swrenderer FLightNode *light_list = nullptr; bool mask = false; - DrawerStyle drawerstyle; + DrawerArgs drawerargs; }; struct WallSampler diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 54850bc6fc..c8e14e5da7 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -46,21 +46,20 @@ namespace swrenderer { void RenderFlatPlane::Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap, FTexture *texture) { - using namespace drawerargs; - if (alpha <= 0) { return; } - drawerstyle.SetSpanTexture(texture); + drawerargs.ds_color = 3; + drawerargs.SetSpanTexture(texture); double planeang = (pl->xform.Angle + pl->xform.baseAngle).Radians(); double xstep, ystep, leftxfrac, leftyfrac, rightxfrac, rightyfrac; double x; - xscale = xs_ToFixed(32 - ds_xbits, _xscale); - yscale = xs_ToFixed(32 - ds_ybits, _yscale); + xscale = xs_ToFixed(32 - drawerargs.ds_xbits, _xscale); + yscale = xs_ToFixed(32 - drawerargs.ds_ybits, _yscale); if (planeang != 0) { double cosine = cos(planeang), sine = sin(planeang); @@ -109,16 +108,16 @@ namespace swrenderer basecolormap = colormap; GlobVis = LightVisibility::Instance()->FlatPlaneGlobVis() / planeheight; - ds_light = 0; + drawerargs.ds_light = 0; CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) { - drawerstyle.SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); plane_shade = false; } else if (cameraLight->fixedcolormap) { - drawerstyle.SetDSColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetDSColorMapLight(cameraLight->fixedcolormap, 0, 0); plane_shade = false; } else @@ -127,7 +126,7 @@ namespace swrenderer planeshade = LIGHT2SHADE(pl->lightlevel); } - drawerstyle.SetSpanStyle(masked, additive, alpha); + drawerargs.SetSpanStyle(masked, additive, alpha); light_list = pl->lights; @@ -136,8 +135,6 @@ namespace swrenderer void RenderFlatPlane::RenderLine(int y, int x1, int x2) { - using namespace drawerargs; - double distance; #ifdef RANGECHECK @@ -152,25 +149,25 @@ namespace swrenderer distance = planeheight * yslope[y]; - if (ds_xbits != 0) + if (drawerargs.ds_xbits != 0) { - ds_xstep = xs_ToFixed(32 - ds_xbits, distance * xstepscale); - ds_xfrac = xs_ToFixed(32 - ds_xbits, distance * basexfrac) + pviewx; + drawerargs.ds_xstep = xs_ToFixed(32 - drawerargs.ds_xbits, distance * xstepscale); + drawerargs.ds_xfrac = xs_ToFixed(32 - drawerargs.ds_xbits, distance * basexfrac) + pviewx; } else { - ds_xstep = 0; - ds_xfrac = 0; + drawerargs.ds_xstep = 0; + drawerargs.ds_xfrac = 0; } - if (ds_ybits != 0) + if (drawerargs.ds_ybits != 0) { - ds_ystep = xs_ToFixed(32 - ds_ybits, distance * ystepscale); - ds_yfrac = xs_ToFixed(32 - ds_ybits, distance * baseyfrac) + pviewy; + drawerargs.ds_ystep = xs_ToFixed(32 - drawerargs.ds_ybits, distance * ystepscale); + drawerargs.ds_yfrac = xs_ToFixed(32 - drawerargs.ds_ybits, distance * baseyfrac) + pviewy; } else { - ds_ystep = 0; - ds_yfrac = 0; + drawerargs.ds_ystep = 0; + drawerargs.ds_yfrac = 0; } if (r_swtruecolor) @@ -180,35 +177,35 @@ namespace swrenderer double ymagnitude = fabs(xstepscale * (distance2 - distance) * FocalLengthX); double magnitude = MAX(ymagnitude, xmagnitude); double min_lod = -1000.0; - ds_lod = MAX(log2(magnitude) + r_lod_bias, min_lod); + drawerargs.ds_lod = MAX(log2(magnitude) + r_lod_bias, min_lod); } if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - drawerstyle.SetDSColorMapLight(basecolormap, (float)(GlobVis * fabs(CenterY - y)), planeshade); + drawerargs.SetDSColorMapLight(basecolormap, (float)(GlobVis * fabs(CenterY - y)), planeshade); } if (r_dynlights) { // Find row position in view space float zspan = (float)(planeheight / (fabs(y + 0.5 - CenterY) / InvZtoScale)); - dc_viewpos.X = (float)((x1 + 0.5 - CenterX) / CenterX * zspan); - dc_viewpos.Y = zspan; - dc_viewpos.Z = (float)((CenterY - y - 0.5) / InvZtoScale * zspan); - dc_viewpos_step.X = (float)(zspan / CenterX); + drawerargs.dc_viewpos.X = (float)((x1 + 0.5 - CenterX) / CenterX * zspan); + drawerargs.dc_viewpos.Y = zspan; + drawerargs.dc_viewpos.Z = (float)((CenterY - y - 0.5) / InvZtoScale * zspan); + drawerargs.dc_viewpos_step.X = (float)(zspan / CenterX); static TriLight lightbuffer[64 * 1024]; static int nextlightindex = 0; // Plane normal - dc_normal.X = 0.0f; - dc_normal.Y = 0.0f; - dc_normal.Z = (y >= CenterY) ? 1.0f : -1.0f; + drawerargs.dc_normal.X = 0.0f; + drawerargs.dc_normal.Y = 0.0f; + drawerargs.dc_normal.Z = (y >= CenterY) ? 1.0f : -1.0f; // Setup lights for row - dc_num_lights = 0; - dc_lights = lightbuffer + nextlightindex; + drawerargs.dc_num_lights = 0; + drawerargs.dc_lights = lightbuffer + nextlightindex; VisiblePlaneLight *cur_node = light_list; while (cur_node && nextlightindex < 64 * 1024) { @@ -217,13 +214,13 @@ namespace swrenderer double lightZ = cur_node->lightsource->Z() - ViewPos.Z; float lx = (float)(lightX * ViewSin - lightY * ViewCos); - float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - dc_viewpos.Y; - float lz = (float)lightZ - dc_viewpos.Z; + float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - drawerargs.dc_viewpos.Y; + float lz = (float)lightZ - drawerargs.dc_viewpos.Z; // Precalculate the constant part of the dot here so the drawer doesn't have to. bool is_point_light = (cur_node->lightsource->flags4 & MF4_ATTENUATE) != 0; float lconstant = ly * ly + lz * lz; - float nlconstant = is_point_light ? lz * dc_normal.Z : 0.0f; + float nlconstant = is_point_light ? lz * drawerargs.dc_normal.Z : 0.0f; // Include light only if it touches this row float radius = cur_node->lightsource->GetRadius(); @@ -234,7 +231,7 @@ namespace swrenderer uint32_t blue = cur_node->lightsource->GetBlue(); nextlightindex++; - auto &light = dc_lights[dc_num_lights++]; + auto &light = drawerargs.dc_lights[drawerargs.dc_num_lights++]; light.x = lx; light.y = lconstant; light.z = nlconstant; @@ -250,14 +247,14 @@ namespace swrenderer } else { - dc_num_lights = 0; + drawerargs.dc_num_lights = 0; } - ds_y = y; - ds_x1 = x1; - ds_x2 = x2; + drawerargs.ds_y = y; + drawerargs.ds_x1 = x1; + drawerargs.ds_x2 = x2; - (drawerstyle.Drawers()->*drawerstyle.spanfunc)(); + (drawerargs.Drawers()->*drawerargs.spanfunc)(drawerargs); } void RenderFlatPlane::StepColumn() @@ -319,6 +316,6 @@ namespace swrenderer void RenderColoredPlane::RenderLine(int y, int x1, int x2) { - drawerstyle.DrawColoredSpan(y, x1, x2); + drawerargs.DrawColoredSpan(y, x1, x2); } } diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index 87efcfdeb2..a47ccca272 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -14,7 +14,7 @@ #pragma once #include "r_planerenderer.h" -#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/drawers/r_drawerargs.h" namespace swrenderer { @@ -42,7 +42,7 @@ namespace swrenderer double basexfrac, baseyfrac; VisiblePlaneLight *light_list; - DrawerStyle drawerstyle; + DrawerArgs drawerargs; static float yslope[MAXHEIGHT]; }; @@ -55,6 +55,6 @@ namespace swrenderer private: void RenderLine(int y, int x1, int x2) override; - DrawerStyle drawerstyle; + DrawerArgs drawerargs; }; } diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 72ea005591..9b7284b7c9 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -151,13 +151,13 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedcolormap) { - drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); } else { fakefixed = true; cameraLight->fixedcolormap = &NormalLight; - drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); } DrawSky(pl); @@ -168,8 +168,6 @@ namespace swrenderer void RenderSkyPlane::DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) { - using namespace drawerargs; - RenderPortal *renderportal = RenderPortal::Instance(); uint32_t height = frontskytex->GetHeight(); @@ -203,24 +201,24 @@ namespace swrenderer if (r_swtruecolor) { - dc_wall_source[i] = (const uint8_t *)frontskytex->GetColumnBgra(angle1, nullptr); - dc_wall_source2[i] = backskytex ? (const uint8_t *)backskytex->GetColumnBgra(angle2, nullptr) : nullptr; + drawerargs.dc_wall_source[i] = (const uint8_t *)frontskytex->GetColumnBgra(angle1, nullptr); + drawerargs.dc_wall_source2[i] = backskytex ? (const uint8_t *)backskytex->GetColumnBgra(angle2, nullptr) : nullptr; } else { - dc_wall_source[i] = (const uint8_t *)frontskytex->GetColumn(angle1, nullptr); - dc_wall_source2[i] = backskytex ? (const uint8_t *)backskytex->GetColumn(angle2, nullptr) : nullptr; + drawerargs.dc_wall_source[i] = (const uint8_t *)frontskytex->GetColumn(angle1, nullptr); + drawerargs.dc_wall_source2[i] = backskytex ? (const uint8_t *)backskytex->GetColumn(angle2, nullptr) : nullptr; } - dc_wall_iscale[i] = uv_step; - dc_wall_texturefrac[i] = uv_pos; + drawerargs.dc_wall_iscale[i] = uv_step; + drawerargs.dc_wall_texturefrac[i] = uv_pos; } - dc_wall_sourceheight[0] = height; - dc_wall_sourceheight[1] = backskytex ? backskytex->GetHeight() : height; + drawerargs.dc_wall_sourceheight[0] = height; + drawerargs.dc_wall_sourceheight[1] = backskytex ? backskytex->GetHeight() : height; int pixelsize = r_swtruecolor ? 4 : 1; - dc_dest = (ylookup[y1] + start_x) * pixelsize + dc_destorg; - dc_count = y2 - y1; + drawerargs.SetDest(start_x, y1); + drawerargs.dc_count = y2 - y1; uint32_t solid_top = frontskytex->GetSkyCapColor(false); uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); @@ -228,9 +226,9 @@ namespace swrenderer bool fadeSky = (r_skymode == 2 && !(level.flags & LEVEL_FORCETILEDSKY)); if (!backskytex) - drawerstyle.DrawSingleSkyColumn(solid_top, solid_bottom, fadeSky); + drawerargs.DrawSingleSkyColumn(solid_top, solid_bottom, fadeSky); else - drawerstyle.DrawDoubleSkyColumn(solid_top, solid_bottom, fadeSky); + drawerargs.DrawDoubleSkyColumn(solid_top, solid_bottom, fadeSky); } void RenderSkyPlane::DrawSkyColumn(int start_x, int y1, int y2, int columns) diff --git a/src/swrenderer/plane/r_skyplane.h b/src/swrenderer/plane/r_skyplane.h index da6fd689ef..969b8deae9 100644 --- a/src/swrenderer/plane/r_skyplane.h +++ b/src/swrenderer/plane/r_skyplane.h @@ -14,7 +14,7 @@ #pragma once #include "r_visibleplane.h" -#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/drawers/r_drawerargs.h" namespace swrenderer { @@ -38,6 +38,6 @@ namespace swrenderer double skymid = 0.0; angle_t skyangle = 0; - DrawerStyle drawerstyle; + DrawerArgs drawerargs; }; } diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 03817b3ae6..599c279b46 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -50,8 +50,6 @@ namespace swrenderer { void RenderSlopePlane::Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap, FTexture *texture) { - using namespace drawerargs; - static const float ifloatpow2[16] = { // ifloatpow2[i] = 1 / (1 << i) @@ -72,16 +70,17 @@ namespace swrenderer return; } - drawerstyle.SetSpanTexture(texture); + drawerargs.ds_color = 3; + drawerargs.SetSpanTexture(texture); - lxscale = _xscale * ifloatpow2[ds_xbits]; - lyscale = _yscale * ifloatpow2[ds_ybits]; + lxscale = _xscale * ifloatpow2[drawerargs.ds_xbits]; + lyscale = _yscale * ifloatpow2[drawerargs.ds_ybits]; xscale = 64.f / lxscale; yscale = 64.f / lyscale; zeroheight = pl->height.ZatPoint(ViewPos); - pviewx = xs_ToFixed(32 - ds_xbits, pl->xform.xOffs * pl->xform.xScale); - pviewy = xs_ToFixed(32 - ds_ybits, pl->xform.yOffs * pl->xform.yScale); + pviewx = xs_ToFixed(32 - drawerargs.ds_xbits, pl->xform.xOffs * pl->xform.xScale); + pviewy = xs_ToFixed(32 - drawerargs.ds_ybits, pl->xform.yOffs * pl->xform.yScale); planeang = (pl->xform.Angle + pl->xform.baseAngle).Radians(); // p is the texture origin in view space @@ -155,27 +154,27 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) { - drawerstyle.SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); plane_shade = false; } else if (cameraLight->fixedcolormap) { - drawerstyle.SetDSColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetDSColorMapLight(cameraLight->fixedcolormap, 0, 0); plane_shade = false; } else { - drawerstyle.SetDSColorMapLight(basecolormap, 0, 0); + drawerargs.SetDSColorMapLight(basecolormap, 0, 0); plane_shade = true; planeshade = LIGHT2SHADE(pl->lightlevel); } // Hack in support for 1 x Z and Z x 1 texture sizes - if (ds_ybits == 0) + if (drawerargs.ds_ybits == 0) { plane_sv[2] = plane_sv[1] = plane_sv[0] = 0; } - if (ds_xbits == 0) + if (drawerargs.ds_xbits == 0) { plane_su[2] = plane_su[1] = plane_su[0] = 0; } @@ -185,6 +184,6 @@ namespace swrenderer void RenderSlopePlane::RenderLine(int y, int x1, int x2) { - drawerstyle.DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); + drawerargs.DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); } } diff --git a/src/swrenderer/plane/r_slopeplane.h b/src/swrenderer/plane/r_slopeplane.h index 8a95967432..dd0820ab2a 100644 --- a/src/swrenderer/plane/r_slopeplane.h +++ b/src/swrenderer/plane/r_slopeplane.h @@ -14,7 +14,7 @@ #pragma once #include "r_planerenderer.h" -#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/drawers/r_drawerargs.h" namespace swrenderer { @@ -33,6 +33,6 @@ namespace swrenderer fixed_t pviewx, pviewy; fixed_t xscale, yscale; FDynamicColormap *basecolormap; - DrawerStyle drawerstyle; + DrawerArgs drawerargs; }; } diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp index a91d4a0195..97378c1aec 100644 --- a/src/swrenderer/plane/r_visibleplanelist.cpp +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -326,8 +326,6 @@ namespace swrenderer int i; int vpcount = 0; - drawerargs::ds_color = 3; - RenderPortal *renderportal = RenderPortal::Instance(); for (i = 0; i < MAXVISPLANES; i++) @@ -352,8 +350,6 @@ namespace swrenderer VisiblePlane *pl; int i; - drawerargs::ds_color = 3; - DVector3 oViewPos = ViewPos; DAngle oViewAngle = ViewAngle; diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index 1753ab0b86..dc4a246b8d 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -55,6 +55,10 @@ namespace swrenderer double WallTMapScale2; + uint8_t *dc_destorg; + int dc_destheight; + int dc_pitch; + // The xtoviewangleangle[] table maps a screen pixel // to the lowest viewangle that maps back to x ranges // from clipangle to -clipangle. @@ -148,8 +152,6 @@ namespace swrenderer void RenderViewport::SetupBuffer() { - using namespace drawerargs; - static BYTE *lastbuff = NULL; int pitch = RenderTarget->GetPitch(); diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/scene/r_viewport.h index c6445dba2f..c036adb5a9 100644 --- a/src/swrenderer/scene/r_viewport.h +++ b/src/swrenderer/scene/r_viewport.h @@ -34,6 +34,10 @@ namespace swrenderer extern double globaldclip; extern angle_t xtoviewangle[MAXWIDTH + 1]; + extern uint8_t *dc_destorg; + extern int dc_destheight; + extern int dc_pitch; + class RenderViewport { public: diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 6c0fea63fe..9a831f59b3 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -274,18 +274,18 @@ namespace swrenderer { int x = x1; - DrawerStyle drawerstyle; + DrawerArgs drawerargs; if (cameraLight->fixedlightlev >= 0) - drawerstyle.SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != NULL) - drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - drawerstyle.SetColorMapLight((!level.PreserveSectorColor()) ? &FullNormalLight : usecolormap, 0, 0); + drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; - bool visible = drawerstyle.SetPatchStyle(decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor, basecolormap); + bool visible = drawerargs.SetPatchStyle(decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor, basecolormap); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -299,9 +299,9 @@ namespace swrenderer { if (calclighting) { // calculate lighting - drawerstyle.SetColorMapLight(usecolormap, light, wallshade); + drawerargs.SetColorMapLight(usecolormap, light, wallshade); } - DrawColumn(drawerstyle, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + DrawColumn(drawerargs, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } @@ -315,7 +315,7 @@ namespace swrenderer } while (needrepeat--); } - void RenderDecal::DrawColumn(DrawerStyle &drawerstyle, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderDecal::DrawColumn(DrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; @@ -325,6 +325,6 @@ namespace swrenderer else sprtopscreen = CenterY - texturemid * spryscale; - drawerstyle.DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + drawerargs.DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } } diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index b70c08ddf8..7124369885 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -20,7 +20,7 @@ namespace swrenderer { struct DrawSegment; class ProjectedWallTexcoords; - class DrawerStyle; + class DrawerArgs; class RenderDecal { @@ -29,6 +29,6 @@ namespace swrenderer private: static void Render(side_t *wall, DBaseDecal *first, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass); - static void DrawColumn(DrawerStyle &drawerstyle, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void DrawColumn(DrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); }; } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 5d978fd9f5..461964314c 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -208,8 +208,6 @@ namespace swrenderer void RenderParticle::Render(short *cliptop, short *clipbottom, int minZ, int maxZ) { - using namespace drawerargs; - auto vis = this; int spacing; diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index b4b76cd621..051fd403c5 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -587,12 +587,12 @@ namespace swrenderer return; } - DrawerStyle drawerstyle; - drawerstyle.SetColorMapLight(Light.BaseColormap, 0, Light.ColormapNum << FRACBITS); + DrawerArgs drawerargs; + drawerargs.SetColorMapLight(Light.BaseColormap, 0, Light.ColormapNum << FRACBITS); FDynamicColormap *basecolormap = static_cast(Light.BaseColormap); - bool visible = drawerstyle.SetPatchStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap, Light.ColormapNum << FRACBITS); + bool visible = drawerargs.SetPatchStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap, Light.ColormapNum << FRACBITS); if (!visible) return; @@ -621,7 +621,7 @@ namespace swrenderer fixed_t frac = startfrac; for (int x = x1; x < x2; x++) { - drawerstyle.DrawMaskedColumn(x, iscale, pic, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); + drawerargs.DrawMaskedColumn(x, iscale, pic, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); frac += xiscale; } diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 600d4bc716..18aebe8201 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -245,12 +245,12 @@ namespace swrenderer return; } - DrawerStyle drawerstyle; - drawerstyle.SetColorMapLight(vis->Light.BaseColormap, 0, vis->Light.ColormapNum << FRACBITS); + DrawerArgs drawerargs; + drawerargs.SetColorMapLight(vis->Light.BaseColormap, 0, vis->Light.ColormapNum << FRACBITS); FDynamicColormap *basecolormap = static_cast(vis->Light.BaseColormap); - bool visible = drawerstyle.SetPatchStyle(vis->RenderStyle, vis->Alpha, vis->Translation, vis->FillColor, basecolormap, vis->Light.ColormapNum << FRACBITS); + bool visible = drawerargs.SetPatchStyle(vis->RenderStyle, vis->Alpha, vis->Translation, vis->FillColor, basecolormap, vis->Light.ColormapNum << FRACBITS); if (visible) { @@ -286,7 +286,7 @@ namespace swrenderer while (x < x2) { if (!translucentPass->ClipSpriteColumnWithPortals(x, vis)) - drawerstyle.DrawMaskedColumn(x, iscale, tex, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); + drawerargs.DrawMaskedColumn(x, iscale, tex, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); x++; frac += xiscale; } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index d7f4c047d8..d4ce963043 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -33,6 +33,7 @@ #include "po_man.h" #include "r_utility.h" #include "swrenderer/drawers/r_draw.h" +#include "swrenderer/drawers/r_drawerargs.h" #include "swrenderer/drawers/r_thread.h" #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/things/r_voxel.h" @@ -186,10 +187,10 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(sprite->Light.BaseColormap); - DrawerStyle drawerstyle; - drawerstyle.SetColorMapLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); + DrawerArgs drawerargs; + drawerargs.SetColorMapLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); - bool visible = drawerstyle.SetPatchStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); + bool visible = drawerargs.SetPatchStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); if (!visible) return; @@ -287,7 +288,7 @@ namespace swrenderer voxel_pos.Y += dirY.X * x + dirY.Y * y; voxel_pos.Z += dirZ * z; - FillBox(drawerstyle, voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); + FillBox(drawerargs, voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); } } } @@ -310,7 +311,7 @@ namespace swrenderer return (kvxslab_t*)(((uint8_t*)slab) + 3 + slab->zleng); } - void RenderVoxel::FillBox(DrawerStyle &drawerstyle, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) + void RenderVoxel::FillBox(DrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) { double viewX, viewY, viewZ; if (viewspace) @@ -352,11 +353,10 @@ namespace swrenderer int columnY2 = MIN(y2, (int)clipbottom[x]); if (columnY1 < columnY2) { - using namespace drawerargs; - dc_dest = dc_destorg + (dc_pitch * columnY1 + x) * pixelsize; - dc_color = color; - dc_count = columnY2 - columnY1; - drawerstyle.FillColumn(); + drawerargs.SetDest(x, columnY1); + drawerargs.dc_color = color; + drawerargs.dc_count = columnY2 - columnY1; + drawerargs.FillColumn(); } } } diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index 26a84de5ef..7762330a21 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -31,7 +31,7 @@ struct FVoxel; namespace swrenderer { - class DrawerStyle; + class DrawerArgs; // [RH] A c-buffer. Used for keeping track of offscreen voxel spans. struct FCoverageBuffer @@ -83,7 +83,7 @@ namespace swrenderer enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; - static void FillBox(DrawerStyle &drawerstyle, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); + static void FillBox(DrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); static kvxslab_t *GetSlabStart(const FVoxelMipLevel &mip, int x, int y); static kvxslab_t *GetSlabEnd(const FVoxelMipLevel &mip, int x, int y); diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 297dd96cda..bad1f07e17 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -179,7 +179,7 @@ namespace swrenderer rereadcolormap = false; } - DrawerStyle drawerstyle; + DrawerArgs drawerargs; int shade = LIGHT2SHADE(spr->sector->lightlevel + R_ActualExtraLight(spr->foggy)); double GlobVis = LightVisibility::Instance()->WallGlobVis(); @@ -188,11 +188,11 @@ namespace swrenderer float light = lightleft + (x1 - spr->wallc.sx1) * lightstep; CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - drawerstyle.SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != NULL) - drawerstyle.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) - drawerstyle.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); + drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; @@ -215,7 +215,7 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(spr->Light.BaseColormap); - bool visible = drawerstyle.SetPatchStyle(spr->RenderStyle, spr->Alpha, spr->Translation, spr->FillColor, basecolormap); + bool visible = drawerargs.SetPatchStyle(spr->RenderStyle, spr->Alpha, spr->Translation, spr->FillColor, basecolormap); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -235,17 +235,17 @@ namespace swrenderer { if (calclighting) { // calculate lighting - drawerstyle.SetColorMapLight(usecolormap, light, shade); + drawerargs.SetColorMapLight(usecolormap, light, shade); } if (!translucentPass->ClipSpriteColumnWithPortals(x, spr)) - DrawColumn(drawerstyle, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + DrawColumn(drawerargs, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } } } - void RenderWallSprite::DrawColumn(DrawerStyle &drawerstyle, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderWallSprite::DrawColumn(DrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; @@ -255,6 +255,6 @@ namespace swrenderer else sprtopscreen = CenterY - texturemid * spryscale; - drawerstyle.DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + drawerargs.DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } } diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index 1e10cd273e..bc3fb3b413 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -18,7 +18,7 @@ namespace swrenderer { class ProjectedWallTexcoords; - class DrawerStyle; + class DrawerArgs; class RenderWallSprite : public VisibleSprite { @@ -30,7 +30,7 @@ namespace swrenderer void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: - static void DrawColumn(DrawerStyle &drawerstyle, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void DrawColumn(DrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); FWallCoords wallc; uint32_t Translation = 0; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 61023f0c81..d4e077eb62 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -143,7 +143,6 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) { #ifndef NO_SWRENDER using namespace swrenderer; - using namespace drawerargs; static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; const BYTE *translation = NULL; @@ -185,26 +184,26 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) translation = parms.remap->Remap; } - DrawerStyle drawerstyle; + DrawerArgs drawerargs; if (translation != NULL) { - drawerstyle.SetTranslationMap((lighttable_t *)translation); + drawerargs.SetTranslationMap((lighttable_t *)translation); } else { if (r_swtruecolor) - drawerstyle.SetTranslationMap(nullptr); + drawerargs.SetTranslationMap(nullptr); else - drawerstyle.SetTranslationMap(identitymap); + drawerargs.SetTranslationMap(identitymap); } bool visible; FDynamicColormap *basecolormap = nullptr; if (r_swtruecolor) - visible = drawerstyle.SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); + visible = drawerargs.SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); else - visible = drawerstyle.SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor, basecolormap); + visible = drawerargs.SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor, basecolormap); BYTE *destorgsave = dc_destorg; int destheightsave = dc_destheight; @@ -292,7 +291,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) while (x < x2_i) { - drawerstyle.DrawMaskedColumn(x, iscale, img, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, !parms.masked); + drawerargs.DrawMaskedColumn(x, iscale, img, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, !parms.masked); x++; frac += xiscale_i; } @@ -1322,7 +1321,6 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, { #ifndef NO_SWRENDER using namespace swrenderer; - using namespace drawerargs; // Use an equation similar to player sprites to determine shade fixed_t shade = LIGHT2SHADE(lightlevel) - 12*FRACUNIT; @@ -1389,31 +1387,31 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, sinrot = sin(rotation.Radians()); // Setup constant texture mapping parameters. - DrawerStyle drawerstyle; - drawerstyle.SetSpanTexture(tex); + DrawerArgs drawerargs; + drawerargs.SetSpanTexture(tex); if (colormap) - drawerstyle.SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); + drawerargs.SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else - drawerstyle.SetSpanColormap(&identitycolormap, 0); - if (ds_xbits != 0) + drawerargs.SetSpanColormap(&identitycolormap, 0); + if (drawerargs.ds_xbits != 0) { - scalex = double(1u << (32 - ds_xbits)) / scalex; - ds_xstep = xs_RoundToInt(cosrot * scalex); + scalex = double(1u << (32 - drawerargs.ds_xbits)) / scalex; + drawerargs.ds_xstep = xs_RoundToInt(cosrot * scalex); } else { // Texture is one pixel wide. scalex = 0; - ds_xstep = 0; + drawerargs.ds_xstep = 0; } - if (ds_ybits != 0) + if (drawerargs.ds_ybits != 0) { - scaley = double(1u << (32 - ds_ybits)) / scaley; - ds_ystep = xs_RoundToInt(sinrot * scaley); + scaley = double(1u << (32 - drawerargs.ds_ybits)) / scaley; + drawerargs.ds_ystep = xs_RoundToInt(sinrot * scaley); } else { // Texture is one pixel tall. scaley = 0; - ds_ystep = 0; + drawerargs.ds_ystep = 0; } // Travel down the right edge and create an outline of that edge. @@ -1479,9 +1477,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, #if 0 memset(this->Buffer + y * this->Pitch + x1, (int)tex, x2 - x1); #else - ds_y = y; - ds_x1 = x1; - ds_x2 = x2 - 1; + drawerargs.ds_y = y; + drawerargs.ds_x1 = x1; + drawerargs.ds_x2 = x2 - 1; DVector2 tex(x1 - originx, y - originy); if (dorotate) @@ -1490,10 +1488,10 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, tex.X = t * cosrot - tex.Y * sinrot; tex.Y = tex.Y * cosrot + t * sinrot; } - ds_xfrac = xs_RoundToInt(tex.X * scalex); - ds_yfrac = xs_RoundToInt(tex.Y * scaley); + drawerargs.ds_xfrac = xs_RoundToInt(tex.X * scalex); + drawerargs.ds_yfrac = xs_RoundToInt(tex.Y * scaley); - (drawerstyle.Drawers()->*drawerstyle.spanfunc)(); + (drawerargs.Drawers()->*drawerargs.spanfunc)(drawerargs); #endif } x += xinc; From 7c7d6e99e9101a8153b2f845448fe2b3ff3e70e7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 29 Jan 2017 10:05:37 +0100 Subject: [PATCH 777/912] Split DrawerArgs into WallDrawerArgs, ColumnDrawerArgs, SpanDrawerArgs and SkyDrawerArgs --- src/swrenderer/drawers/r_draw.cpp | 2 +- src/swrenderer/drawers/r_draw.h | 76 +- src/swrenderer/drawers/r_draw_pal.cpp | 16 +- src/swrenderer/drawers/r_draw_pal.h | 86 +-- src/swrenderer/drawers/r_draw_rgba.cpp | 18 +- src/swrenderer/drawers/r_draw_rgba.h | 88 +-- src/swrenderer/drawers/r_drawerargs.cpp | 733 ++++++++++++++++++++ src/swrenderer/drawers/r_drawerargs.h | 215 ++++++ src/swrenderer/line/r_fogboundary.h | 2 +- src/swrenderer/line/r_line.cpp | 2 +- src/swrenderer/line/r_renderdrawsegment.cpp | 27 +- src/swrenderer/line/r_walldraw.cpp | 8 +- src/swrenderer/line/r_walldraw.h | 8 +- src/swrenderer/plane/r_flatplane.h | 4 +- src/swrenderer/plane/r_skyplane.h | 2 +- src/swrenderer/plane/r_slopeplane.h | 2 +- src/swrenderer/things/r_decal.cpp | 4 +- src/swrenderer/things/r_decal.h | 4 +- src/swrenderer/things/r_playersprite.cpp | 2 +- src/swrenderer/things/r_sprite.cpp | 2 +- src/swrenderer/things/r_voxel.cpp | 4 +- src/swrenderer/things/r_voxel.h | 4 +- src/swrenderer/things/r_wallsprite.cpp | 4 +- src/swrenderer/things/r_wallsprite.h | 4 +- src/v_draw.cpp | 4 +- 25 files changed, 1142 insertions(+), 179 deletions(-) create mode 100644 src/swrenderer/drawers/r_drawerargs.cpp create mode 100644 src/swrenderer/drawers/r_drawerargs.h diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index b569aac0d2..db7cb8a6ba 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -156,7 +156,7 @@ namespace swrenderer } } - void R_UpdateFuzzPos(const DrawerArgs &args) + void R_UpdateFuzzPos(const ColumnDrawerArgs &args) { int yl = MAX(args.dc_yl, 1); int yh = MIN(args.dc_yh, fuzzviewheight); diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index efedc277bd..f34601b57f 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -20,6 +20,10 @@ EXTERN_CVAR(Bool, r_dynlights); namespace swrenderer { class DrawerArgs; + class SkyDrawerArgs; + class WallDrawerArgs; + class SpanDrawerArgs; + class ColumnDrawerArgs; extern int ylookup[MAXHEIGHT]; extern uint8_t shadetables[/*NUMCOLORMAPS*16*256*/]; @@ -46,46 +50,46 @@ namespace swrenderer { public: virtual ~SWPixelFormatDrawers() { } - virtual void DrawWallColumn(const DrawerArgs &args) = 0; - virtual void DrawWallMaskedColumn(const DrawerArgs &args) = 0; - virtual void DrawWallAddColumn(const DrawerArgs &args) = 0; - virtual void DrawWallAddClampColumn(const DrawerArgs &args) = 0; - virtual void DrawWallSubClampColumn(const DrawerArgs &args) = 0; - virtual void DrawWallRevSubClampColumn(const DrawerArgs &args) = 0; - virtual void DrawSingleSkyColumn(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; - virtual void DrawDoubleSkyColumn(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; - virtual void DrawColumn(const DrawerArgs &args) = 0; - virtual void FillColumn(const DrawerArgs &args) = 0; - virtual void FillAddColumn(const DrawerArgs &args) = 0; - virtual void FillAddClampColumn(const DrawerArgs &args) = 0; - virtual void FillSubClampColumn(const DrawerArgs &args) = 0; - virtual void FillRevSubClampColumn(const DrawerArgs &args) = 0; - virtual void DrawFuzzColumn(const DrawerArgs &args) = 0; - virtual void DrawAddColumn(const DrawerArgs &args) = 0; - virtual void DrawTranslatedColumn(const DrawerArgs &args) = 0; - virtual void DrawTranslatedAddColumn(const DrawerArgs &args) = 0; - virtual void DrawShadedColumn(const DrawerArgs &args) = 0; - virtual void DrawAddClampColumn(const DrawerArgs &args) = 0; - virtual void DrawAddClampTranslatedColumn(const DrawerArgs &args) = 0; - virtual void DrawSubClampColumn(const DrawerArgs &args) = 0; - virtual void DrawSubClampTranslatedColumn(const DrawerArgs &args) = 0; - virtual void DrawRevSubClampColumn(const DrawerArgs &args) = 0; - virtual void DrawRevSubClampTranslatedColumn(const DrawerArgs &args) = 0; - virtual void DrawSpan(const DrawerArgs &args) = 0; - virtual void DrawSpanMasked(const DrawerArgs &args) = 0; - virtual void DrawSpanTranslucent(const DrawerArgs &args) = 0; - virtual void DrawSpanMaskedTranslucent(const DrawerArgs &args) = 0; - virtual void DrawSpanAddClamp(const DrawerArgs &args) = 0; - virtual void DrawSpanMaskedAddClamp(const DrawerArgs &args) = 0; - virtual void FillSpan(const DrawerArgs &args) = 0; - virtual void DrawTiltedSpan(const DrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) = 0; - virtual void DrawColoredSpan(const DrawerArgs &args, int y, int x1, int x2) = 0; - virtual void DrawFogBoundaryLine(const DrawerArgs &args, int y, int x1, int x2) = 0; + virtual void DrawWallColumn(const WallDrawerArgs &args) = 0; + virtual void DrawWallMaskedColumn(const WallDrawerArgs &args) = 0; + virtual void DrawWallAddColumn(const WallDrawerArgs &args) = 0; + virtual void DrawWallAddClampColumn(const WallDrawerArgs &args) = 0; + virtual void DrawWallSubClampColumn(const WallDrawerArgs &args) = 0; + virtual void DrawWallRevSubClampColumn(const WallDrawerArgs &args) = 0; + virtual void DrawSingleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; + virtual void DrawDoubleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; + virtual void DrawColumn(const ColumnDrawerArgs &args) = 0; + virtual void FillColumn(const ColumnDrawerArgs &args) = 0; + virtual void FillAddColumn(const ColumnDrawerArgs &args) = 0; + virtual void FillAddClampColumn(const ColumnDrawerArgs &args) = 0; + virtual void FillSubClampColumn(const ColumnDrawerArgs &args) = 0; + virtual void FillRevSubClampColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawFuzzColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawAddColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawTranslatedColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawTranslatedAddColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawShadedColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawAddClampColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawAddClampTranslatedColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawSubClampColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawSubClampTranslatedColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawRevSubClampColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawRevSubClampTranslatedColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawSpan(const SpanDrawerArgs &args) = 0; + virtual void DrawSpanMasked(const SpanDrawerArgs &args) = 0; + virtual void DrawSpanTranslucent(const SpanDrawerArgs &args) = 0; + virtual void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) = 0; + virtual void DrawSpanAddClamp(const SpanDrawerArgs &args) = 0; + virtual void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) = 0; + virtual void FillSpan(const SpanDrawerArgs &args) = 0; + virtual void DrawTiltedSpan(const SpanDrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) = 0; + virtual void DrawColoredSpan(const SpanDrawerArgs &args, int y, int x1, int x2) = 0; + virtual void DrawFogBoundaryLine(const SpanDrawerArgs &args, int y, int x1, int x2) = 0; }; void R_InitShadeMaps(); void R_InitFuzzTable(int fuzzoff); void R_InitParticleTexture(); - void R_UpdateFuzzPos(const DrawerArgs &args); + void R_UpdateFuzzPos(const ColumnDrawerArgs &args); } diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 18abbb195e..0ae0e294da 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -96,7 +96,7 @@ CVAR(Bool, r_blendmethod, false, CVAR_GLOBALCONFIG | CVAR_ARCHIVE) namespace swrenderer { - PalWall1Command::PalWall1Command(const DrawerArgs &args) + PalWall1Command::PalWall1Command(const WallDrawerArgs &args) { _iscale = args.dc_iscale; _texturefrac = args.dc_texturefrac; @@ -559,7 +559,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - PalSkyCommand::PalSkyCommand(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) : solid_top(solid_top), solid_bottom(solid_bottom), fadeSky(fadeSky) + PalSkyCommand::PalSkyCommand(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) : solid_top(solid_top), solid_bottom(solid_bottom), fadeSky(fadeSky) { _dest = args.Dest(); _dest_y = args.DestY(); @@ -864,7 +864,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - PalColumnCommand::PalColumnCommand(const DrawerArgs &args) + PalColumnCommand::PalColumnCommand(const ColumnDrawerArgs &args) { _count = args.dc_count; _dest = args.Dest(); @@ -1761,7 +1761,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawFuzzColumnPalCommand::DrawFuzzColumnPalCommand(const DrawerArgs &args) + DrawFuzzColumnPalCommand::DrawFuzzColumnPalCommand(const ColumnDrawerArgs &args) { _yl = args.dc_yl; _yh = args.dc_yh; @@ -1848,7 +1848,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - PalSpanCommand::PalSpanCommand(const DrawerArgs &args) + PalSpanCommand::PalSpanCommand(const SpanDrawerArgs &args) { _source = args.ds_source; _colormap = args.ds_colormap; @@ -2623,7 +2623,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(const DrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) + DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(const SpanDrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) : y(y), x1(x1), x2(x2), plane_sz(plane_sz), plane_su(plane_su), plane_sv(plane_sv), plane_shade(plane_shade), planeshade(planeshade), planelightfloat(planelightfloat), pviewx(pviewx), pviewy(pviewy) { _colormap = args.ds_colormap; @@ -2869,7 +2869,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(const DrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) + DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(const SpanDrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) { color = args.ds_color; destorg = dc_destorg; @@ -2885,7 +2885,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(const DrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) + DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(const SpanDrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) { _colormap = args.dc_colormap; _destorg = dc_destorg; diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index ea798ef3c1..c8c3dacb60 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -11,7 +11,7 @@ namespace swrenderer class PalWall1Command : public DrawerCommand { public: - PalWall1Command(const DrawerArgs &args); + PalWall1Command(const WallDrawerArgs &args); FString DebugInfo() override { return "PalWallCommand"; } protected: @@ -44,7 +44,7 @@ namespace swrenderer class PalSkyCommand : public DrawerCommand { public: - PalSkyCommand(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); + PalSkyCommand(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); FString DebugInfo() override { return "PalSkyCommand"; } protected: @@ -69,7 +69,7 @@ namespace swrenderer class PalColumnCommand : public DrawerCommand { public: - PalColumnCommand(const DrawerArgs &args); + PalColumnCommand(const ColumnDrawerArgs &args); FString DebugInfo() override { return "PalColumnCommand"; } protected: @@ -110,7 +110,7 @@ namespace swrenderer class DrawFuzzColumnPalCommand : public DrawerCommand { public: - DrawFuzzColumnPalCommand(const DrawerArgs &args); + DrawFuzzColumnPalCommand(const ColumnDrawerArgs &args); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "DrawFuzzColumnPalCommand"; } @@ -127,7 +127,7 @@ namespace swrenderer class PalSpanCommand : public DrawerCommand { public: - PalSpanCommand(const DrawerArgs &args); + PalSpanCommand(const SpanDrawerArgs &args); FString DebugInfo() override { return "PalSpanCommand"; } protected: @@ -167,7 +167,7 @@ namespace swrenderer class DrawTiltedSpanPalCommand : public DrawerCommand { public: - DrawTiltedSpanPalCommand(const DrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); + DrawTiltedSpanPalCommand(const SpanDrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "DrawTiltedSpanPalCommand"; } @@ -197,7 +197,7 @@ namespace swrenderer class DrawColoredSpanPalCommand : public PalSpanCommand { public: - DrawColoredSpanPalCommand(const DrawerArgs &args, int y, int x1, int x2); + DrawColoredSpanPalCommand(const SpanDrawerArgs &args, int y, int x1, int x2); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "DrawColoredSpanPalCommand"; } @@ -212,7 +212,7 @@ namespace swrenderer class DrawFogBoundaryLinePalCommand : public PalSpanCommand { public: - DrawFogBoundaryLinePalCommand(const DrawerArgs &args, int y, int x1, int x2); + DrawFogBoundaryLinePalCommand(const SpanDrawerArgs &args, int y, int x1, int x2); void Execute(DrawerThread *thread) override; private: @@ -241,10 +241,10 @@ namespace swrenderer class SWPalDrawers : public SWPixelFormatDrawers { public: - void DrawWallColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallMaskedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallMaskedColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallAddColumn(const DrawerArgs &args) override + void DrawWallAddColumn(const WallDrawerArgs &args) override { if (args.dc_num_lights == 0) DrawerCommandQueue::QueueCommand(args); @@ -252,42 +252,42 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(args); } - void DrawWallAddClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallRevSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSingleSkyColumn(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, fadeSky); } - void DrawDoubleSkyColumn(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, fadeSky); } - void DrawColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillAddClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillRevSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawFuzzColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); R_UpdateFuzzPos(args); } - void DrawAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTranslatedAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawShadedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawAddClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawAddClampTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSubClampTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawRevSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawRevSubClampTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpan(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanMasked(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanTranslucent(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanMaskedTranslucent(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanAddClamp(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanMaskedAddClamp(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillSpan(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallAddClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSingleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, fadeSky); } + void DrawDoubleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, fadeSky); } + void DrawColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillAddColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillAddClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillRevSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawFuzzColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); R_UpdateFuzzPos(args); } + void DrawAddColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawTranslatedAddColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawShadedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawAddClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawAddClampTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSubClampTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawRevSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawRevSubClampTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpan(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanMasked(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanTranslucent(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanAddClamp(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillSpan(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTiltedSpan(const DrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override + void DrawTiltedSpan(const SpanDrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); } - void DrawColoredSpan(const DrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } - void DrawFogBoundaryLine(const DrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } + void DrawColoredSpan(const SpanDrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } + void DrawFogBoundaryLine(const SpanDrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } }; } diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index e096c97c85..77e74eebd8 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -60,7 +60,7 @@ CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG namespace swrenderer { - DrawSpanLLVMCommand::DrawSpanLLVMCommand(const DrawerArgs &drawerargs) + DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs) { args.xfrac = drawerargs.ds_xfrac; args.yfrac = drawerargs.ds_yfrac; @@ -180,7 +180,7 @@ namespace swrenderer return d; } - DrawWall1LLVMCommand::DrawWall1LLVMCommand(const DrawerArgs &drawerargs) + DrawWall1LLVMCommand::DrawWall1LLVMCommand(const WallDrawerArgs &drawerargs) { args.dest = (uint32_t*)drawerargs.Dest(); args.dest_y = drawerargs.DestY(); @@ -244,7 +244,7 @@ namespace swrenderer return "DrawColumn\n" + args.ToString(); } - DrawColumnLLVMCommand::DrawColumnLLVMCommand(const DrawerArgs &drawerargs) + DrawColumnLLVMCommand::DrawColumnLLVMCommand(const ColumnDrawerArgs &drawerargs) { args.dest = (uint32_t*)drawerargs.Dest(); args.source = drawerargs.dc_source; @@ -298,7 +298,7 @@ namespace swrenderer return d; } - DrawSkyLLVMCommand::DrawSkyLLVMCommand(const DrawerArgs &drawerargs, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) + DrawSkyLLVMCommand::DrawSkyLLVMCommand(const SkyDrawerArgs &drawerargs, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) { args.dest = (uint32_t*)drawerargs.Dest(); args.dest_y = drawerargs.DestY(); @@ -325,7 +325,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - DrawFuzzColumnRGBACommand::DrawFuzzColumnRGBACommand(const DrawerArgs &drawerargs) + DrawFuzzColumnRGBACommand::DrawFuzzColumnRGBACommand(const ColumnDrawerArgs &drawerargs) { _x = drawerargs.dc_x; _yl = drawerargs.dc_yl; @@ -433,7 +433,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - FillSpanRGBACommand::FillSpanRGBACommand(const DrawerArgs &drawerargs) + FillSpanRGBACommand::FillSpanRGBACommand(const SpanDrawerArgs &drawerargs) { _x1 = drawerargs.ds_x1; _x2 = drawerargs.ds_x2; @@ -463,7 +463,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - DrawFogBoundaryLineRGBACommand::DrawFogBoundaryLineRGBACommand(const DrawerArgs &drawerargs, int y, int x, int x2) + DrawFogBoundaryLineRGBACommand::DrawFogBoundaryLineRGBACommand(const SpanDrawerArgs &drawerargs, int y, int x, int x2) { _y = y; _x = x; @@ -531,7 +531,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - DrawTiltedSpanRGBACommand::DrawTiltedSpanRGBACommand(const DrawerArgs &drawerargs, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + DrawTiltedSpanRGBACommand::DrawTiltedSpanRGBACommand(const SpanDrawerArgs &drawerargs, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { _x1 = x1; _x2 = x2; @@ -665,7 +665,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - DrawColoredSpanRGBACommand::DrawColoredSpanRGBACommand(const DrawerArgs &drawerargs, int y, int x1, int x2) + DrawColoredSpanRGBACommand::DrawColoredSpanRGBACommand(const SpanDrawerArgs &drawerargs, int y, int x1, int x2) { _y = y; _x1 = x1; diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 1d3717a1cb..ae0b259db3 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -80,7 +80,7 @@ namespace swrenderer class DrawSpanLLVMCommand : public DrawerCommand { public: - DrawSpanLLVMCommand(const DrawerArgs &drawerargs); + DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs); void Execute(DrawerThread *thread) override; FString DebugInfo() override; @@ -135,7 +135,7 @@ namespace swrenderer WorkerThreadData ThreadData(DrawerThread *thread); public: - DrawWall1LLVMCommand(const DrawerArgs &drawerargs); + DrawWall1LLVMCommand(const WallDrawerArgs &drawerargs); void Execute(DrawerThread *thread) override; FString DebugInfo() override; @@ -150,7 +150,7 @@ namespace swrenderer FString DebugInfo() override; public: - DrawColumnLLVMCommand(const DrawerArgs &drawerargs); + DrawColumnLLVMCommand(const ColumnDrawerArgs &drawerargs); void Execute(DrawerThread *thread) override; }; @@ -163,7 +163,7 @@ namespace swrenderer WorkerThreadData ThreadData(DrawerThread *thread); public: - DrawSkyLLVMCommand(const DrawerArgs &drawerargs, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); + DrawSkyLLVMCommand(const SkyDrawerArgs &drawerargs, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); FString DebugInfo() override; }; @@ -201,7 +201,7 @@ namespace swrenderer int _fuzzviewheight; public: - DrawFuzzColumnRGBACommand(const DrawerArgs &drawerargs); + DrawFuzzColumnRGBACommand(const ColumnDrawerArgs &drawerargs); void Execute(DrawerThread *thread) override; FString DebugInfo() override; }; @@ -216,7 +216,7 @@ namespace swrenderer int _color; public: - FillSpanRGBACommand(const DrawerArgs &drawerargs); + FillSpanRGBACommand(const SpanDrawerArgs &drawerargs); void Execute(DrawerThread *thread) override; FString DebugInfo() override; }; @@ -231,7 +231,7 @@ namespace swrenderer ShadeConstants _shade_constants; public: - DrawFogBoundaryLineRGBACommand(const DrawerArgs &drawerargs, int y, int x, int x2); + DrawFogBoundaryLineRGBACommand(const SpanDrawerArgs &drawerargs, int y, int x, int x2); void Execute(DrawerThread *thread) override; FString DebugInfo() override; }; @@ -257,7 +257,7 @@ namespace swrenderer const uint32_t * RESTRICT _source; public: - DrawTiltedSpanRGBACommand(const DrawerArgs &drawerargs, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + DrawTiltedSpanRGBACommand(const SpanDrawerArgs &drawerargs, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); void Execute(DrawerThread *thread) override; FString DebugInfo() override; }; @@ -272,7 +272,7 @@ namespace swrenderer int _color; public: - DrawColoredSpanRGBACommand(const DrawerArgs &drawerargs, int y, int x1, int x2); + DrawColoredSpanRGBACommand(const SpanDrawerArgs &drawerargs, int y, int x1, int x2); void Execute(DrawerThread *thread) override; FString DebugInfo() override; @@ -354,46 +354,46 @@ namespace swrenderer class SWTruecolorDrawers : public SWPixelFormatDrawers { public: - void DrawWallColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallMaskedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallAddClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallRevSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSingleSkyColumn(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, skyFade); } - void DrawDoubleSkyColumn(const DrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, skyFade); } - void DrawColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillAddClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillRevSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawFuzzColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); R_UpdateFuzzPos(args); } - void DrawAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTranslatedAddColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawShadedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawAddClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawAddClampTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSubClampTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawRevSubClampColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawRevSubClampTranslatedColumn(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpan(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanMasked(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanTranslucent(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanMaskedTranslucent(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanAddClamp(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanMaskedAddClamp(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillSpan(const DrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallMaskedColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallAddColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallAddClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSingleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, skyFade); } + void DrawDoubleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, skyFade); } + void DrawColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillAddColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillAddClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillRevSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawFuzzColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); R_UpdateFuzzPos(args); } + void DrawAddColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawTranslatedAddColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawShadedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawAddClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawAddClampTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSubClampTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawRevSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawRevSubClampTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpan(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanMasked(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanTranslucent(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanAddClamp(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillSpan(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTiltedSpan(const DrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override + void DrawTiltedSpan(const SpanDrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } - void DrawColoredSpan(const DrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } - void DrawFogBoundaryLine(const DrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } + void DrawColoredSpan(const SpanDrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } + void DrawFogBoundaryLine(const SpanDrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } }; ///////////////////////////////////////////////////////////////////////////// diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp new file mode 100644 index 0000000000..af08e8d56f --- /dev/null +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -0,0 +1,733 @@ +/* +** r_drawerargs.cpp +** +**--------------------------------------------------------------------------- +** Copyright 1998-2016 Randy Heit +** Copyright 2016 Magnus Norddahl +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +*/ + +#include +#include "r_drawerargs.h" +#include "r_draw_pal.h" +#include "r_draw_rgba.h" + +namespace swrenderer +{ + namespace + { + SWPixelFormatDrawers *active_drawers; + SWPalDrawers pal_drawers; + SWTruecolorDrawers tc_drawers; + } + + void R_InitColumnDrawers() + { + if (r_swtruecolor) + active_drawers = &tc_drawers; + else + active_drawers = &pal_drawers; + } + + SWPixelFormatDrawers *DrawerArgs::Drawers() const + { + return active_drawers; + } + + DrawerArgs::DrawerArgs() + { + colfunc = &SWPixelFormatDrawers::DrawColumn; + basecolfunc = &SWPixelFormatDrawers::DrawColumn; + fuzzcolfunc = &SWPixelFormatDrawers::DrawFuzzColumn; + transcolfunc = &SWPixelFormatDrawers::DrawTranslatedColumn; + spanfunc = &SWPixelFormatDrawers::DrawSpan; + } + + void DrawerArgs::SetColorMapLight(FSWColormap *base_colormap, float light, int shade) + { + if (r_swtruecolor) + { + dc_shade_constants.light_red = base_colormap->Color.r * 256 / 255; + dc_shade_constants.light_green = base_colormap->Color.g * 256 / 255; + dc_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; + dc_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; + dc_shade_constants.fade_red = base_colormap->Fade.r; + dc_shade_constants.fade_green = base_colormap->Fade.g; + dc_shade_constants.fade_blue = base_colormap->Fade.b; + dc_shade_constants.fade_alpha = base_colormap->Fade.a; + dc_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); + dc_colormap = base_colormap->Maps; + dc_light = LIGHTSCALE(light, shade); + } + else + { + dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } + } + + void SpanDrawerArgs::SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade) + { + if (r_swtruecolor) + { + ds_shade_constants.light_red = base_colormap->Color.r * 256 / 255; + ds_shade_constants.light_green = base_colormap->Color.g * 256 / 255; + ds_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; + ds_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; + ds_shade_constants.fade_red = base_colormap->Fade.r; + ds_shade_constants.fade_green = base_colormap->Fade.g; + ds_shade_constants.fade_blue = base_colormap->Fade.b; + ds_shade_constants.fade_alpha = base_colormap->Fade.a; + ds_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); + ds_colormap = base_colormap->Maps; + ds_light = LIGHTSCALE(light, shade); + } + else + { + ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } + } + + void DrawerArgs::SetTranslationMap(lighttable_t *translation) + { + if (r_swtruecolor) + { + dc_colormap = nullptr; + dc_translation = translation; + dc_shade_constants.light_red = 256; + dc_shade_constants.light_green = 256; + dc_shade_constants.light_blue = 256; + dc_shade_constants.light_alpha = 256; + dc_shade_constants.fade_red = 0; + dc_shade_constants.fade_green = 0; + dc_shade_constants.fade_blue = 0; + dc_shade_constants.fade_alpha = 256; + dc_shade_constants.desaturate = 0; + dc_shade_constants.simple_shade = true; + dc_light = 0; + } + else + { + dc_colormap = translation; + } + } + + void SpanDrawerArgs::SetSpanTexture(FTexture *tex) + { + tex->GetWidth(); + ds_xbits = tex->WidthBits; + ds_ybits = tex->HeightBits; + if ((1 << ds_xbits) > tex->GetWidth()) + { + ds_xbits--; + } + if ((1 << ds_ybits) > tex->GetHeight()) + { + ds_ybits--; + } + + ds_source = r_swtruecolor ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); + ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; + } + + void SpanDrawerArgs::SetSpanColormap(FDynamicColormap *colormap, int shade) + { + SetDSColorMapLight(colormap, 0, shade); + } + + void ColumnDrawerArgs::DrawMaskedColumn(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) + { + // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. + if (r_swtruecolor && !drawer_needs_pal_input) // To do: add support to R_DrawColumnHoriz_rgba + { + DrawMaskedColumnBgra(x, iscale, tex, col, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, unmasked); + return; + } + + dc_x = x; + dc_iscale = iscale; + dc_textureheight = tex->GetHeight(); + + const FTexture::Span *span; + const BYTE *column; + if (r_swtruecolor && !drawer_needs_pal_input) + column = (const BYTE *)tex->GetColumnBgra(col >> FRACBITS, &span); + else + column = tex->GetColumn(col >> FRACBITS, &span); + + FTexture::Span unmaskedSpan[2]; + if (unmasked) + { + span = unmaskedSpan; + unmaskedSpan[0].TopOffset = 0; + unmaskedSpan[0].Length = tex->GetHeight(); + unmaskedSpan[1].TopOffset = 0; + unmaskedSpan[1].Length = 0; + } + + int pixelsize = r_swtruecolor ? 4 : 1; + + while (span->Length != 0) + { + const int length = span->Length; + const int top = span->TopOffset; + + // calculate unclipped screen coordinates for post + dc_yl = (int)(sprtopscreen + spryscale * top + 0.5); + dc_yh = (int)(sprtopscreen + spryscale * (top + length) + 0.5) - 1; + + if (sprflipvert) + { + swapvalues(dc_yl, dc_yh); + } + + if (dc_yh >= mfloorclip[dc_x]) + { + dc_yh = mfloorclip[dc_x] - 1; + } + if (dc_yl < mceilingclip[dc_x]) + { + dc_yl = mceilingclip[dc_x]; + } + + if (dc_yl <= dc_yh) + { + dc_texturefrac = FLOAT2FIXED((dc_yl + 0.5 - sprtopscreen) / spryscale); + dc_source = column; + dc_source2 = nullptr; + SetDest(dc_x, dc_yl); + dc_count = dc_yh - dc_yl + 1; + + fixed_t maxfrac = ((top + length) << FRACBITS) - 1; + dc_texturefrac = MAX(dc_texturefrac, 0); + dc_texturefrac = MIN(dc_texturefrac, maxfrac); + if (dc_iscale > 0) + dc_count = MIN(dc_count, (maxfrac - dc_texturefrac + dc_iscale - 1) / dc_iscale); + else if (dc_iscale < 0) + dc_count = MIN(dc_count, (dc_texturefrac - dc_iscale) / (-dc_iscale)); + + (Drawers()->*colfunc)(*this); + } + span++; + } + } + + void ColumnDrawerArgs::DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) + { + dc_x = x; + dc_iscale = iscale; + + // Normalize to 0-1 range: + double uv_stepd = FIXED2DBL(dc_iscale); + double v_step = uv_stepd / tex->GetHeight(); + + // Convert to uint32: + dc_iscale = (uint32_t)(v_step * (1 << 30)); + + // Texture mipmap and filter selection: + fixed_t xoffset = col; + + double xmagnitude = 1.0; // To do: pass this into R_DrawMaskedColumn + double ymagnitude = fabs(uv_stepd); + double magnitude = MAX(ymagnitude, xmagnitude); + double min_lod = -1000.0; + double lod = MAX(log2(magnitude) + r_lod_bias, min_lod); + bool magnifying = lod < 0.0f; + + int mipmap_offset = 0; + int mip_width = tex->GetWidth(); + int mip_height = tex->GetHeight(); + uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); + if (r_mipmap && tex->Mipmapped() && mip_width > 1 && mip_height > 1) + { + int level = (int)lod; + while (level > 0 && mip_width > 1 && mip_height > 1) + { + mipmap_offset += mip_width * mip_height; + level--; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + } + xoffset = (xpos >> FRACBITS) * mip_width; + + const uint32_t *pixels = tex->GetPixelsBgra() + mipmap_offset; + + bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); + if (filter_nearest) + { + xoffset = MAX(MIN(xoffset, (mip_width << FRACBITS) - 1), 0); + + int tx = xoffset >> FRACBITS; + dc_source = (BYTE*)(pixels + tx * mip_height); + dc_source2 = nullptr; + dc_textureheight = mip_height; + dc_texturefracx = 0; + } + else + { + xoffset = MAX(MIN(xoffset - (FRACUNIT / 2), (mip_width << FRACBITS) - 1), 0); + + int tx0 = xoffset >> FRACBITS; + int tx1 = MIN(tx0 + 1, mip_width - 1); + dc_source = (BYTE*)(pixels + tx0 * mip_height); + dc_source2 = (BYTE*)(pixels + tx1 * mip_height); + dc_textureheight = mip_height; + dc_texturefracx = (xoffset >> (FRACBITS - 4)) & 15; + } + + // Grab the posts we need to draw + const FTexture::Span *span; + tex->GetColumnBgra(col >> FRACBITS, &span); + FTexture::Span unmaskedSpan[2]; + if (unmasked) + { + span = unmaskedSpan; + unmaskedSpan[0].TopOffset = 0; + unmaskedSpan[0].Length = tex->GetHeight(); + unmaskedSpan[1].TopOffset = 0; + unmaskedSpan[1].Length = 0; + } + + // Draw each span post + while (span->Length != 0) + { + const int length = span->Length; + const int top = span->TopOffset; + + // calculate unclipped screen coordinates for post + dc_yl = (int)(sprtopscreen + spryscale * top + 0.5); + dc_yh = (int)(sprtopscreen + spryscale * (top + length) + 0.5) - 1; + + if (sprflipvert) + { + swapvalues(dc_yl, dc_yh); + } + + if (dc_yh >= mfloorclip[dc_x]) + { + dc_yh = mfloorclip[dc_x] - 1; + } + if (dc_yl < mceilingclip[dc_x]) + { + dc_yl = mceilingclip[dc_x]; + } + + if (dc_yl <= dc_yh) + { + SetDest(dc_x, dc_yl); + dc_count = dc_yh - dc_yl + 1; + + double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight(); + dc_texturefrac = (uint32_t)(v * (1 << 30)); + + (Drawers()->*colfunc)(*this); + } + span++; + } + } + + bool DrawerArgs::SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) + { + // r_drawtrans is a seriously bad thing to turn off. I wonder if I should + // just remove it completely. + if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) + { + if (flags & STYLEF_ColorIsFixed) + { + colfunc = &SWPixelFormatDrawers::FillColumn; + } + else if (dc_translation == NULL) + { + colfunc = basecolfunc; + } + else + { + colfunc = transcolfunc; + drawer_needs_pal_input = true; + } + return true; + } + if (flags & STYLEF_InvertSource) + { + dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) + { + dc_srcblend = Col2RGB8[fglevel >> 10]; + dc_destblend = Col2RGB8[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else + { + dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + switch (op) + { + case STYLEOP_Add: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (fglevel + bglevel <= FRACUNIT) + { // Colors won't overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = &SWPixelFormatDrawers::FillAddColumn; + } + else if (dc_translation == NULL) + { + colfunc = &SWPixelFormatDrawers::DrawAddColumn; + } + else + { + colfunc = &SWPixelFormatDrawers::DrawTranslatedAddColumn; + drawer_needs_pal_input = true; + } + } + else + { // Colors might overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = &SWPixelFormatDrawers::FillAddClampColumn; + } + else if (dc_translation == NULL) + { + colfunc = &SWPixelFormatDrawers::DrawAddClampColumn; + } + else + { + colfunc = &SWPixelFormatDrawers::DrawAddClampTranslatedColumn; + drawer_needs_pal_input = true; + } + } + return true; + + case STYLEOP_Sub: + if (flags & STYLEF_ColorIsFixed) + { + colfunc = &SWPixelFormatDrawers::FillSubClampColumn; + } + else if (dc_translation == NULL) + { + colfunc = &SWPixelFormatDrawers::DrawSubClampColumn; + } + else + { + colfunc = &SWPixelFormatDrawers::DrawSubClampTranslatedColumn; + drawer_needs_pal_input = true; + } + return true; + + case STYLEOP_RevSub: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (flags & STYLEF_ColorIsFixed) + { + colfunc = &SWPixelFormatDrawers::FillRevSubClampColumn; + } + else if (dc_translation == NULL) + { + colfunc = &SWPixelFormatDrawers::DrawRevSubClampColumn; + } + else + { + colfunc = &SWPixelFormatDrawers::DrawRevSubClampTranslatedColumn; + drawer_needs_pal_input = true; + } + return true; + + default: + return false; + } + } + + fixed_t DrawerArgs::GetAlpha(int type, fixed_t alpha) + { + switch (type) + { + case STYLEALPHA_Zero: return 0; + case STYLEALPHA_One: return OPAQUE; + case STYLEALPHA_Src: return alpha; + case STYLEALPHA_InvSrc: return OPAQUE - alpha; + default: return 0; + } + } + + bool DrawerArgs::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) + { + fixed_t fglevel, bglevel; + + drawer_needs_pal_input = false; + + style.CheckFuzz(); + + if (style.BlendOp == STYLEOP_Shadow) + { + style = LegacyRenderStyles[STYLE_TranslucentStencil]; + alpha = TRANSLUC33; + color = 0; + } + + if (style.Flags & STYLEF_ForceAlpha) + { + alpha = clamp(alpha, 0, OPAQUE); + } + else if (style.Flags & STYLEF_TransSoulsAlpha) + { + alpha = fixed_t(transsouls * OPAQUE); + } + else if (style.Flags & STYLEF_Alpha1) + { + alpha = FRACUNIT; + } + else + { + alpha = clamp(alpha, 0, OPAQUE); + } + + if (translation != -1) + { + dc_translation = NULL; + if (translation != 0) + { + FRemapTable *table = TranslationToTable(translation); + if (table != NULL && !table->Inactive) + { + if (r_swtruecolor) + dc_translation = (uint8_t*)table->Palette; + else + dc_translation = table->Remap; + } + } + } + + // Check for special modes + if (style.BlendOp == STYLEOP_Fuzz) + { + colfunc = fuzzcolfunc; + return true; + } + else if (style == LegacyRenderStyles[STYLE_Shaded]) + { + // Shaded drawer only gets 16 levels of alpha because it saves memory. + if ((alpha >>= 12) == 0 || basecolormap == nullptr) + return false; + colfunc = &SWPixelFormatDrawers::DrawShadedColumn; + drawer_needs_pal_input = true; + CameraLight *cameraLight = CameraLight::Instance(); + dc_color = cameraLight->fixedcolormap ? cameraLight->fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; + basecolormap = &ShadeFakeColormap[16 - alpha]; + if (cameraLight->fixedlightlev >= 0 && cameraLight->fixedcolormap == NULL) + { + fixed_t shade = shadedlightshade; + if (shade == 0) FIXEDLIGHT2SHADE(cameraLight->fixedlightlev); + SetColorMapLight(basecolormap, 0, shade); + } + else + { + SetColorMapLight(basecolormap, 0, shadedlightshade); + } + return true; + } + + fglevel = GetAlpha(style.SrcAlpha, alpha); + bglevel = GetAlpha(style.DestAlpha, alpha); + + if (style.Flags & STYLEF_ColorIsFixed) + { + uint32_t x = fglevel >> 10; + uint32_t r = RPART(color); + uint32_t g = GPART(color); + uint32_t b = BPART(color); + // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. + dc_color = RGB256k.RGB[r >> 2][g >> 2][b >> 2]; + if (style.Flags & STYLEF_InvertSource) + { + r = 255 - r; + g = 255 - g; + b = 255 - b; + } + uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); + dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; + // dc_srccolor is used by the R_Fill* routines. It is premultiplied + // with the alpha. + dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; + SetColorMapLight(&identitycolormap, 0, 0); + } + + if (!DrawerArgs::SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) + { + return false; + } + return true; + } + + bool DrawerArgs::SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) + { + return SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap, shadedlightshade); + } + + void WallDrawerArgs::SetDest(int x, int y) + { + int pixelsize = r_swtruecolor ? 4 : 1; + dc_dest = dc_destorg + (ylookup[y] + x) * pixelsize; + dc_dest_y = y; + } + + WallDrawerFunc WallDrawerArgs::GetTransMaskDrawer() + { + if (colfunc == &SWPixelFormatDrawers::DrawAddColumn) + { + return &SWPixelFormatDrawers::DrawWallAddColumn; + } + if (colfunc == &SWPixelFormatDrawers::DrawAddClampColumn) + { + return &SWPixelFormatDrawers::DrawWallAddClampColumn; + } + if (colfunc == &SWPixelFormatDrawers::DrawSubClampColumn) + { + return &SWPixelFormatDrawers::DrawWallSubClampColumn; + } + if (colfunc == &SWPixelFormatDrawers::DrawRevSubClampColumn) + { + return &SWPixelFormatDrawers::DrawWallRevSubClampColumn; + } + return nullptr; + } + + void DrawerArgs::SetSpanStyle(bool masked, bool additive, fixed_t alpha) + { + if (masked) + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; + } + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpanMasked; + } + } + else + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = &SWPixelFormatDrawers::DrawSpanTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpanAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; + } + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpan; + } + } + } + + void SpanDrawerArgs::DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) + { + Drawers()->DrawTiltedSpan(*this, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); + } + + void SpanDrawerArgs::DrawFogBoundaryLine(int y, int x1, int x2) + { + Drawers()->DrawFogBoundaryLine(*this, y, x1, x2); + } + + void SpanDrawerArgs::DrawColoredSpan(int y, int x1, int x2) + { + Drawers()->DrawColoredSpan(*this, y, x1, x2); + } + + void SkyDrawerArgs::DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) + { + Drawers()->DrawSingleSkyColumn(*this, solid_top, solid_bottom, fadeSky); + } + + void SkyDrawerArgs::DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) + { + Drawers()->DrawDoubleSkyColumn(*this, solid_top, solid_bottom, fadeSky); + } + + void SkyDrawerArgs::SetDest(int x, int y) + { + int pixelsize = r_swtruecolor ? 4 : 1; + dc_dest = dc_destorg + (ylookup[y] + x) * pixelsize; + dc_dest_y = y; + } + + void ColumnDrawerArgs::FillColumn() + { + Drawers()->FillColumn(*this); + } + + void ColumnDrawerArgs::SetDest(int x, int y) + { + int pixelsize = r_swtruecolor ? 4 : 1; + dc_dest = dc_destorg + (ylookup[y] + x) * pixelsize; + dc_dest_y = y; + } +} diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h new file mode 100644 index 0000000000..053051c8c5 --- /dev/null +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -0,0 +1,215 @@ + +#pragma once + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "r_draw.h" +#include "v_video.h" +#include "r_data/colormaps.h" +#include "r_data/r_translate.h" +#include "swrenderer/scene/r_light.h" + +struct FSWColormap; +struct FLightNode; +struct TriLight; + +namespace swrenderer +{ + class SWPixelFormatDrawers; + class DrawerArgs; + + struct ShadeConstants + { + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; + }; + + typedef void(SWPixelFormatDrawers::*DrawerFunc)(const DrawerArgs &args); + typedef void(SWPixelFormatDrawers::*WallDrawerFunc)(const WallDrawerArgs &args); + typedef void(SWPixelFormatDrawers::*ColumnDrawerFunc)(const ColumnDrawerArgs &args); + typedef void(SWPixelFormatDrawers::*SpanDrawerFunc)(const SpanDrawerArgs &args); + + class DrawerArgs + { + public: + DrawerArgs(); + + bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); + bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); + void SetSpanStyle(bool masked, bool additive, fixed_t alpha); + + // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) + void SetColorMapLight(FSWColormap *base_colormap, float light, int shade); + void SetTranslationMap(lighttable_t *translation); + + SWPixelFormatDrawers *Drawers() const; + + ColumnDrawerFunc colfunc; + ColumnDrawerFunc basecolfunc; + ColumnDrawerFunc fuzzcolfunc; + ColumnDrawerFunc transcolfunc; + SpanDrawerFunc spanfunc; + + uint8_t *dc_colormap; + ShadeConstants dc_shade_constants; + fixed_t dc_light = 0; + + uint8_t *dc_translation; + + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + + int dc_color = 0; + uint32_t dc_srccolor; + uint32_t dc_srccolor_bgra; + + protected: + bool drawer_needs_pal_input = false; + + private: + bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); + static fixed_t GetAlpha(int type, fixed_t alpha); + }; + + class SkyDrawerArgs : public DrawerArgs + { + public: + const uint8_t *dc_wall_source[4]; + const uint8_t *dc_wall_source2[4]; + uint32_t dc_wall_sourceheight[4]; + uint32_t dc_wall_texturefrac[4]; + uint32_t dc_wall_iscale[4]; + int dc_count; + + void SetDest(int x, int y); + + uint8_t *Dest() const { return dc_dest; } + int DestY() const { return dc_dest_y; } + + void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); + void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); + + private: + uint8_t *dc_dest = nullptr; + int dc_dest_y = 0; + }; + + class WallDrawerArgs : public DrawerArgs + { + public: + void SetDest(int x, int y); + + WallDrawerFunc GetTransMaskDrawer(); + + uint8_t *Dest() const { return dc_dest; } + int DestY() const { return dc_dest_y; } + + fixed_t dc_iscale; + fixed_t dc_texturefrac; + uint32_t dc_texturefracx; + uint32_t dc_textureheight; + const uint8_t *dc_source; + const uint8_t *dc_source2; + int dc_count; + + uint32_t dc_wall_texturefrac[4]; + uint32_t dc_wall_iscale[4]; + uint8_t *dc_wall_colormap[4]; + fixed_t dc_wall_light[4]; + const uint8_t *dc_wall_source[4]; + const uint8_t *dc_wall_source2[4]; + uint32_t dc_wall_texturefracx[4]; + uint32_t dc_wall_sourceheight[4]; + int dc_wall_fracbits; + + FVector3 dc_normal; + FVector3 dc_viewpos; + FVector3 dc_viewpos_step; + TriLight *dc_lights = nullptr; + int dc_num_lights = 0; + + private: + uint8_t *dc_dest = nullptr; + int dc_dest_y = 0; + }; + + class SpanDrawerArgs : public DrawerArgs + { + public: + void SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); + + void SetSpanTexture(FTexture *tex); + void SetSpanColormap(FDynamicColormap *colormap, int shade); + + void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); + void DrawColoredSpan(int y, int x1, int x2); + void DrawFogBoundaryLine(int y, int x1, int x2); + + int ds_y; + int ds_x1; + int ds_x2; + uint8_t * ds_colormap; + ShadeConstants ds_shade_constants; + dsfixed_t ds_light; + dsfixed_t ds_xfrac; + dsfixed_t ds_yfrac; + dsfixed_t ds_xstep; + dsfixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + fixed_t ds_alpha; + double ds_lod; + const uint8_t *ds_source; + bool ds_source_mipmapped; + int ds_color = 0; + + FVector3 dc_normal; + FVector3 dc_viewpos; + FVector3 dc_viewpos_step; + TriLight *dc_lights = nullptr; + int dc_num_lights = 0; + }; + + class ColumnDrawerArgs : public DrawerArgs + { + public: + void DrawMaskedColumn(int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); + void FillColumn(); + + void SetDest(int x, int y); + + uint8_t *Dest() const { return dc_dest; } + int DestY() const { return dc_dest_y; } + + int dc_x; + int dc_yl; + int dc_yh; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + uint32_t dc_textureheight; + const uint8_t *dc_source; + const uint8_t *dc_source2; + uint32_t dc_texturefracx; + int dc_count; + + private: + void DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked); + + uint8_t *dc_dest = nullptr; + int dc_dest_y = 0; + }; + + void R_InitColumnDrawers(); +} diff --git a/src/swrenderer/line/r_fogboundary.h b/src/swrenderer/line/r_fogboundary.h index 55f0e504ec..4213cb146d 100644 --- a/src/swrenderer/line/r_fogboundary.h +++ b/src/swrenderer/line/r_fogboundary.h @@ -26,6 +26,6 @@ namespace swrenderer void RenderSection(int y, int y2, int x1); short spanend[MAXHEIGHT]; - DrawerArgs drawerargs; + SpanDrawerArgs drawerargs; }; } diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 3fbe7f0917..6b604d8271 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -930,7 +930,7 @@ namespace swrenderer double yscale; fixed_t xoffset = rw_offset; - DrawerArgs drawerargs; + WallDrawerArgs drawerargs; // [RH] Color if not texturing line drawerargs.dc_color = (((int)(curline - segs) * 8) + 4) & 255; diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 76dc170f1a..faa635dd7e 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -67,8 +67,13 @@ namespace swrenderer curline = ds->curline; FDynamicColormap *patchstylecolormap = nullptr; - DrawerArgs drawerargs; - bool visible = drawerargs.SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], + + WallDrawerArgs walldrawerargs; + walldrawerargs.SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], + (float)MIN(curline->linedef->alpha, 1.), 0, 0, patchstylecolormap); + + ColumnDrawerArgs columndrawerargs; + bool visible = columndrawerargs.SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], (float)MIN(curline->linedef->alpha, 1.), 0, 0, patchstylecolormap); if (!visible && !ds->bFogBoundary && !ds->bFakeBoundary) @@ -142,9 +147,15 @@ namespace swrenderer rw_scalestep = ds->iscalestep; if (cameraLight->fixedlightlev >= 0) - drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + { + walldrawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + columndrawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + } else if (cameraLight->fixedcolormap != nullptr) - drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + { + walldrawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + columndrawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + } // find positioning texheight = tex->GetScaledHeightDouble(); @@ -273,7 +284,7 @@ namespace swrenderer { if (cameraLight->fixedcolormap == nullptr && cameraLight->fixedlightlev < 0) { - drawerargs.SetColorMapLight(basecolormap, rw_light, wallshade); + columndrawerargs.SetColorMapLight(basecolormap, rw_light, wallshade); } fixed_t iscale = xs_Fix<16>::ToFix(MaskedSWall[x] * MaskedScaleY); @@ -283,7 +294,7 @@ namespace swrenderer else sprtopscreen = CenterY - texturemid * spryscale; - drawerargs.DrawMaskedColumn(x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + columndrawerargs.DrawMaskedColumn(x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); rw_light += rw_lightstep; spryscale += rw_scalestep; @@ -349,7 +360,7 @@ namespace swrenderer GetMaskedWallTopBottom(ds, top, bot); RenderWallPart renderWallpart; - renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); + renderWallpart.Render(walldrawerargs, frontsector, curline, WallC, rw_pic, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); } clearfog: @@ -383,7 +394,7 @@ namespace swrenderer double yscale; fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); - DrawerArgs drawerargs; + WallDrawerArgs drawerargs; bool visible = drawerargs.SetPatchStyle(LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], Alpha, 0, 0, basecolormap); diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 2a28b23127..796788a0e7 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -168,7 +168,7 @@ namespace swrenderer } // Draw a column with support for non-power-of-two ranges - void RenderWallPart::Draw1Column(int x, int y1, int y2, WallSampler &sampler, DrawerFunc draw1column) + void RenderWallPart::Draw1Column(int x, int y1, int y2, WallSampler &sampler, WallDrawerFunc draw1column) { if (r_dynlights && light_list) { @@ -306,7 +306,7 @@ namespace swrenderer } } - void RenderWallPart::ProcessWallWorker(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, DrawerFunc drawcolumn) + void RenderWallPart::ProcessWallWorker(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, WallDrawerFunc drawcolumn) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -388,7 +388,7 @@ namespace swrenderer void RenderWallPart::ProcessTranslucentWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { - DrawerFunc drawcol1 = drawerargs.GetTransMaskDrawer(); + WallDrawerFunc drawcol1 = drawerargs.GetTransMaskDrawer(); if (drawcol1 == nullptr) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. @@ -536,7 +536,7 @@ namespace swrenderer } } - void RenderWallPart::Render(const DrawerArgs &drawerargs, sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, const short *walltop, const short *wallbottom, double texturemid, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap) + void RenderWallPart::Render(const WallDrawerArgs &drawerargs, sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, FTexture *pic, int x1, int x2, const short *walltop, const short *wallbottom, double texturemid, float *swall, fixed_t *lwall, double yscale, double top, double bottom, bool mask, int wallshade, fixed_t xoffset, float light, float lightstep, FLightNode *light_list, bool foggy, FDynamicColormap *basecolormap) { this->drawerargs = drawerargs; this->x1 = x1; diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index 24060129ce..8b2c24491f 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -34,7 +34,7 @@ namespace swrenderer { public: void Render( - const DrawerArgs &drawerargs, + const WallDrawerArgs &drawerargs, sector_t *frontsector, seg_t *curline, const FWallCoords &WallC, @@ -65,8 +65,8 @@ namespace swrenderer void ProcessTranslucentWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); void ProcessMaskedWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); void ProcessNormalWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); - void ProcessWallWorker(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, DrawerFunc drawcolumn); - void Draw1Column(int x, int y1, int y2, WallSampler &sampler, DrawerFunc draw1column); + void ProcessWallWorker(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, WallDrawerFunc drawcolumn); + void Draw1Column(int x, int y1, int y2, WallSampler &sampler, WallDrawerFunc draw1column); int x1 = 0; int x2 = 0; @@ -85,7 +85,7 @@ namespace swrenderer FLightNode *light_list = nullptr; bool mask = false; - DrawerArgs drawerargs; + WallDrawerArgs drawerargs; }; struct WallSampler diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index a47ccca272..385305cd09 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -42,7 +42,7 @@ namespace swrenderer double basexfrac, baseyfrac; VisiblePlaneLight *light_list; - DrawerArgs drawerargs; + SpanDrawerArgs drawerargs; static float yslope[MAXHEIGHT]; }; @@ -55,6 +55,6 @@ namespace swrenderer private: void RenderLine(int y, int x1, int x2) override; - DrawerArgs drawerargs; + SpanDrawerArgs drawerargs; }; } diff --git a/src/swrenderer/plane/r_skyplane.h b/src/swrenderer/plane/r_skyplane.h index 969b8deae9..e62032a9df 100644 --- a/src/swrenderer/plane/r_skyplane.h +++ b/src/swrenderer/plane/r_skyplane.h @@ -38,6 +38,6 @@ namespace swrenderer double skymid = 0.0; angle_t skyangle = 0; - DrawerArgs drawerargs; + SkyDrawerArgs drawerargs; }; } diff --git a/src/swrenderer/plane/r_slopeplane.h b/src/swrenderer/plane/r_slopeplane.h index dd0820ab2a..ccdfb316d2 100644 --- a/src/swrenderer/plane/r_slopeplane.h +++ b/src/swrenderer/plane/r_slopeplane.h @@ -33,6 +33,6 @@ namespace swrenderer fixed_t pviewx, pviewy; fixed_t xscale, yscale; FDynamicColormap *basecolormap; - DrawerArgs drawerargs; + SpanDrawerArgs drawerargs; }; } diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 9a831f59b3..ad830101f9 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -274,7 +274,7 @@ namespace swrenderer { int x = x1; - DrawerArgs drawerargs; + ColumnDrawerArgs drawerargs; if (cameraLight->fixedlightlev >= 0) drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); @@ -315,7 +315,7 @@ namespace swrenderer } while (needrepeat--); } - void RenderDecal::DrawColumn(DrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderDecal::DrawColumn(ColumnDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 7124369885..00f2326516 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -20,7 +20,7 @@ namespace swrenderer { struct DrawSegment; class ProjectedWallTexcoords; - class DrawerArgs; + class ColumnDrawerArgs; class RenderDecal { @@ -29,6 +29,6 @@ namespace swrenderer private: static void Render(side_t *wall, DBaseDecal *first, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass); - static void DrawColumn(DrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void DrawColumn(ColumnDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); }; } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 051fd403c5..2d937bef70 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -587,7 +587,7 @@ namespace swrenderer return; } - DrawerArgs drawerargs; + ColumnDrawerArgs drawerargs; drawerargs.SetColorMapLight(Light.BaseColormap, 0, Light.ColormapNum << FRACBITS); FDynamicColormap *basecolormap = static_cast(Light.BaseColormap); diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 18aebe8201..5110f550fb 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -245,7 +245,7 @@ namespace swrenderer return; } - DrawerArgs drawerargs; + ColumnDrawerArgs drawerargs; drawerargs.SetColorMapLight(vis->Light.BaseColormap, 0, vis->Light.ColormapNum << FRACBITS); FDynamicColormap *basecolormap = static_cast(vis->Light.BaseColormap); diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index d4ce963043..bc6e8ad65c 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -187,7 +187,7 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(sprite->Light.BaseColormap); - DrawerArgs drawerargs; + ColumnDrawerArgs drawerargs; drawerargs.SetColorMapLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); bool visible = drawerargs.SetPatchStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); @@ -311,7 +311,7 @@ namespace swrenderer return (kvxslab_t*)(((uint8_t*)slab) + 3 + slab->zleng); } - void RenderVoxel::FillBox(DrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) + void RenderVoxel::FillBox(ColumnDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) { double viewX, viewY, viewZ; if (viewspace) diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index 7762330a21..f08f33654c 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -31,7 +31,7 @@ struct FVoxel; namespace swrenderer { - class DrawerArgs; + class ColumnDrawerArgs; // [RH] A c-buffer. Used for keeping track of offscreen voxel spans. struct FCoverageBuffer @@ -83,7 +83,7 @@ namespace swrenderer enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; - static void FillBox(DrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); + static void FillBox(ColumnDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); static kvxslab_t *GetSlabStart(const FVoxelMipLevel &mip, int x, int y); static kvxslab_t *GetSlabEnd(const FVoxelMipLevel &mip, int x, int y); diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index bad1f07e17..94a0d294c2 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -179,7 +179,7 @@ namespace swrenderer rereadcolormap = false; } - DrawerArgs drawerargs; + ColumnDrawerArgs drawerargs; int shade = LIGHT2SHADE(spr->sector->lightlevel + R_ActualExtraLight(spr->foggy)); double GlobVis = LightVisibility::Instance()->WallGlobVis(); @@ -245,7 +245,7 @@ namespace swrenderer } } - void RenderWallSprite::DrawColumn(DrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderWallSprite::DrawColumn(ColumnDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index bc3fb3b413..ffdb36997a 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -18,7 +18,7 @@ namespace swrenderer { class ProjectedWallTexcoords; - class DrawerArgs; + class ColumnDrawerArgs; class RenderWallSprite : public VisibleSprite { @@ -30,7 +30,7 @@ namespace swrenderer void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: - static void DrawColumn(DrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void DrawColumn(ColumnDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); FWallCoords wallc; uint32_t Translation = 0; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index d4e077eb62..c5240c34ae 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -184,7 +184,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) translation = parms.remap->Remap; } - DrawerArgs drawerargs; + ColumnDrawerArgs drawerargs; if (translation != NULL) { @@ -1387,7 +1387,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, sinrot = sin(rotation.Radians()); // Setup constant texture mapping parameters. - DrawerArgs drawerargs; + SpanDrawerArgs drawerargs; drawerargs.SetSpanTexture(tex); if (colormap) drawerargs.SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); From 162f469630a25b937581bb6a48f1a5479c4095bf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 29 Jan 2017 10:25:32 +0100 Subject: [PATCH 778/912] Remove unused variables from the old 4 column drawers --- src/swrenderer/drawers/r_draw_pal.cpp | 28 ++++----- src/swrenderer/drawers/r_draw_pal.h | 10 +-- src/swrenderer/drawers/r_draw_rgba.cpp | 11 ++-- src/swrenderer/drawers/r_drawerargs.h | 18 ++---- src/swrenderer/line/r_walldraw.cpp | 11 ---- src/swrenderer/plane/r_skyplane.cpp | 85 +++++++++++++------------- src/swrenderer/plane/r_skyplane.h | 4 +- 7 files changed, 70 insertions(+), 97 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 0ae0e294da..0043ed98f1 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -565,14 +565,12 @@ namespace swrenderer _dest_y = args.DestY(); _count = args.dc_count; _pitch = dc_pitch; - for (int col = 0; col < 4; col++) - { - _source[col] = args.dc_wall_source[col]; - _source2[col] = args.dc_wall_source2[col]; - _sourceheight[col] = args.dc_wall_sourceheight[col]; - _iscale[col] = args.dc_wall_iscale[col]; - _texturefrac[col] = args.dc_wall_texturefrac[col]; - } + _source = args.dc_wall_source; + _source2 = args.dc_wall_source2; + _sourceheight[0] = args.dc_wall_sourceheight[0]; + _sourceheight[1] = args.dc_wall_sourceheight[1]; + _iscale = args.dc_wall_iscale; + _texturefrac = args.dc_wall_texturefrac; } void DrawSingleSky1PalCommand::Execute(DrawerThread *thread) @@ -580,11 +578,11 @@ namespace swrenderer uint8_t *dest = _dest; int count = _count; int pitch = _pitch; - const uint8_t *source0 = _source[0]; + const uint8_t *source0 = _source; int textureheight0 = _sourceheight[0]; - int32_t frac = _texturefrac[0]; - int32_t fracstep = _iscale[0]; + int32_t frac = _texturefrac; + int32_t fracstep = _iscale; // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: int start_fade = 2; // How fast it should fade out @@ -711,13 +709,13 @@ namespace swrenderer uint8_t *dest = _dest; int count = _count; int pitch = _pitch; - const uint8_t *source0 = _source[0]; - const uint8_t *source1 = _source2[0]; + const uint8_t *source0 = _source; + const uint8_t *source1 = _source2; int textureheight0 = _sourceheight[0]; uint32_t maxtextureheight1 = _sourceheight[1] - 1; - int32_t frac = _texturefrac[0]; - int32_t fracstep = _iscale[0]; + int32_t frac = _texturefrac; + int32_t fracstep = _iscale; // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: int start_fade = 2; // How fast it should fade out diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index c8c3dacb60..8dbdd51455 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -56,11 +56,11 @@ namespace swrenderer int _dest_y; int _count; int _pitch; - const uint8_t *_source[4]; - const uint8_t *_source2[4]; - int _sourceheight[4]; - uint32_t _iscale[4]; - uint32_t _texturefrac[4]; + const uint8_t *_source; + const uint8_t *_source2; + int _sourceheight[2]; + uint32_t _iscale; + uint32_t _texturefrac; }; class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 77e74eebd8..21c5d8484e 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -304,13 +304,10 @@ namespace swrenderer args.dest_y = drawerargs.DestY(); args.count = drawerargs.dc_count; args.pitch = dc_pitch; - for (int i = 0; i < 4; i++) - { - args.texturefrac[i] = drawerargs.dc_wall_texturefrac[i]; - args.iscale[i] = drawerargs.dc_wall_iscale[i]; - args.source0[i] = (const uint32_t *)drawerargs.dc_wall_source[i]; - args.source1[i] = (const uint32_t *)drawerargs.dc_wall_source2[i]; - } + args.texturefrac[0] = drawerargs.dc_wall_texturefrac; + args.iscale[0] = drawerargs.dc_wall_iscale; + args.source0[0] = (const uint32_t *)drawerargs.dc_wall_source; + args.source1[0] = (const uint32_t *)drawerargs.dc_wall_source2; args.textureheight0 = drawerargs.dc_wall_sourceheight[0]; args.textureheight1 = drawerargs.dc_wall_sourceheight[1]; args.top_color = solid_top; diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index 053051c8c5..d478958ed8 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -86,11 +86,11 @@ namespace swrenderer class SkyDrawerArgs : public DrawerArgs { public: - const uint8_t *dc_wall_source[4]; - const uint8_t *dc_wall_source2[4]; - uint32_t dc_wall_sourceheight[4]; - uint32_t dc_wall_texturefrac[4]; - uint32_t dc_wall_iscale[4]; + const uint8_t *dc_wall_source; + const uint8_t *dc_wall_source2; + uint32_t dc_wall_sourceheight[2]; + uint32_t dc_wall_texturefrac; + uint32_t dc_wall_iscale; int dc_count; void SetDest(int x, int y); @@ -124,14 +124,6 @@ namespace swrenderer const uint8_t *dc_source2; int dc_count; - uint32_t dc_wall_texturefrac[4]; - uint32_t dc_wall_iscale[4]; - uint8_t *dc_wall_colormap[4]; - fixed_t dc_wall_light[4]; - const uint8_t *dc_wall_source[4]; - const uint8_t *dc_wall_source2[4]; - uint32_t dc_wall_texturefracx[4]; - uint32_t dc_wall_sourceheight[4]; int dc_wall_fracbits; FVector3 dc_normal; diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 796788a0e7..f84ef4a133 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -324,17 +324,6 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); bool fixed = (cameraLight->fixedcolormap != NULL || cameraLight->fixedlightlev >= 0); - if (fixed) - { - drawerargs.dc_wall_colormap[0] = drawerargs.dc_colormap; - drawerargs.dc_wall_colormap[1] = drawerargs.dc_colormap; - drawerargs.dc_wall_colormap[2] = drawerargs.dc_colormap; - drawerargs.dc_wall_colormap[3] = drawerargs.dc_colormap; - drawerargs.dc_wall_light[0] = 0; - drawerargs.dc_wall_light[1] = 0; - drawerargs.dc_wall_light[2] = 0; - drawerargs.dc_wall_light[3] = 0; - } if (cameraLight->fixedcolormap) drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 9b7284b7c9..a151e4e86c 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -166,53 +166,50 @@ namespace swrenderer cameraLight->fixedcolormap = nullptr; } - void RenderSkyPlane::DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat) + void RenderSkyPlane::DrawSkyColumnStripe(int start_x, int y1, int y2, double scale, double texturemid, double yrepeat) { RenderPortal *renderportal = RenderPortal::Instance(); uint32_t height = frontskytex->GetHeight(); - for (int i = 0; i < columns; i++) + double uv_stepd = skyiscale * yrepeat; + double v = (texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + double v_step = uv_stepd / height; + + uint32_t uv_pos = (uint32_t)(v * 0x01000000); + uint32_t uv_step = (uint32_t)(v_step * 0x01000000); + + int x = start_x; + if (renderportal->MirrorFlags & RF_XFLIP) + x = (viewwidth - x); + + uint32_t ang, angle1, angle2; + + if (r_linearsky) { - double uv_stepd = skyiscale * yrepeat; - double v = (texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; - double v_step = uv_stepd / height; - - uint32_t uv_pos = (uint32_t)(v * 0x01000000); - uint32_t uv_step = (uint32_t)(v_step * 0x01000000); - - int x = start_x + i; - if (renderportal->MirrorFlags & RF_XFLIP) - x = (viewwidth - x); - - uint32_t ang, angle1, angle2; - - if (r_linearsky) - { - angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); - ang = (skyangle + xangle) ^ skyflip; - } - else - { - ang = (skyangle + xtoviewangle[x]) ^ skyflip; - } - angle1 = (uint32_t)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); - angle2 = (uint32_t)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); - - if (r_swtruecolor) - { - drawerargs.dc_wall_source[i] = (const uint8_t *)frontskytex->GetColumnBgra(angle1, nullptr); - drawerargs.dc_wall_source2[i] = backskytex ? (const uint8_t *)backskytex->GetColumnBgra(angle2, nullptr) : nullptr; - } - else - { - drawerargs.dc_wall_source[i] = (const uint8_t *)frontskytex->GetColumn(angle1, nullptr); - drawerargs.dc_wall_source2[i] = backskytex ? (const uint8_t *)backskytex->GetColumn(angle2, nullptr) : nullptr; - } - - drawerargs.dc_wall_iscale[i] = uv_step; - drawerargs.dc_wall_texturefrac[i] = uv_pos; + angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); + ang = (skyangle + xangle) ^ skyflip; } + else + { + ang = (skyangle + xtoviewangle[x]) ^ skyflip; + } + angle1 = (uint32_t)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); + angle2 = (uint32_t)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); + + if (r_swtruecolor) + { + drawerargs.dc_wall_source = (const uint8_t *)frontskytex->GetColumnBgra(angle1, nullptr); + drawerargs.dc_wall_source2 = backskytex ? (const uint8_t *)backskytex->GetColumnBgra(angle2, nullptr) : nullptr; + } + else + { + drawerargs.dc_wall_source = (const uint8_t *)frontskytex->GetColumn(angle1, nullptr); + drawerargs.dc_wall_source2 = backskytex ? (const uint8_t *)backskytex->GetColumn(angle2, nullptr) : nullptr; + } + + drawerargs.dc_wall_iscale = uv_step; + drawerargs.dc_wall_texturefrac = uv_pos; drawerargs.dc_wall_sourceheight[0] = height; drawerargs.dc_wall_sourceheight[1] = backskytex ? backskytex->GetHeight() : height; @@ -231,12 +228,12 @@ namespace swrenderer drawerargs.DrawDoubleSkyColumn(solid_top, solid_bottom, fadeSky); } - void RenderSkyPlane::DrawSkyColumn(int start_x, int y1, int y2, int columns) + void RenderSkyPlane::DrawSkyColumn(int start_x, int y1, int y2) { if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) { double texturemid = skymid * frontskytex->Scale.Y + frontskytex->GetHeight(); - DrawSkyColumnStripe(start_x, y1, y2, columns, frontskytex->Scale.Y, texturemid, frontskytex->Scale.Y); + DrawSkyColumnStripe(start_x, y1, y2, frontskytex->Scale.Y, texturemid, frontskytex->Scale.Y); } else { @@ -247,7 +244,7 @@ namespace swrenderer double topfrac = fmod(skymid + iscale * (1 - CenterY), frontskytex->GetHeight()); if (topfrac < 0) topfrac += frontskytex->GetHeight(); double texturemid = topfrac - iscale * (1 - CenterY); - DrawSkyColumnStripe(start_x, y1, y2, columns, scale, texturemid, yrepeat); + DrawSkyColumnStripe(start_x, y1, y2, scale, texturemid, yrepeat); } } @@ -265,7 +262,7 @@ namespace swrenderer if (y2 <= y1) continue; - DrawSkyColumn(x, y1, y2, 1); + DrawSkyColumn(x, y1, y2); } } } diff --git a/src/swrenderer/plane/r_skyplane.h b/src/swrenderer/plane/r_skyplane.h index e62032a9df..03a62d27e6 100644 --- a/src/swrenderer/plane/r_skyplane.h +++ b/src/swrenderer/plane/r_skyplane.h @@ -25,8 +25,8 @@ namespace swrenderer private: void DrawSky(VisiblePlane *pl); - void DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, double scale, double texturemid, double yrepeat); - void DrawSkyColumn(int start_x, int y1, int y2, int columns); + void DrawSkyColumnStripe(int start_x, int y1, int y2, double scale, double texturemid, double yrepeat); + void DrawSkyColumn(int start_x, int y1, int y2); FTexture *frontskytex = nullptr; FTexture *backskytex = nullptr; From fd459b1f13bb0796c13b0887d721e179a76175e5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 Jan 2017 04:35:28 +0100 Subject: [PATCH 779/912] Remove redundant span drawer args --- src/swrenderer/drawers/r_draw_pal.cpp | 4 ++-- src/swrenderer/drawers/r_draw_rgba.cpp | 30 ++++++++++++------------- src/swrenderer/drawers/r_drawerargs.cpp | 28 ----------------------- src/swrenderer/drawers/r_drawerargs.h | 6 ----- src/swrenderer/plane/r_flatplane.cpp | 8 +++---- src/swrenderer/plane/r_slopeplane.cpp | 6 ++--- src/v_draw.cpp | 4 ++-- 7 files changed, 26 insertions(+), 60 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 0043ed98f1..0248363cbf 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -1849,7 +1849,7 @@ namespace swrenderer PalSpanCommand::PalSpanCommand(const SpanDrawerArgs &args) { _source = args.ds_source; - _colormap = args.ds_colormap; + _colormap = args.dc_colormap; _xfrac = args.ds_xfrac; _yfrac = args.ds_yfrac; _y = args.ds_y; @@ -2624,7 +2624,7 @@ namespace swrenderer DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(const SpanDrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) : y(y), x1(x1), x2(x2), plane_sz(plane_sz), plane_su(plane_su), plane_sv(plane_sv), plane_shade(plane_shade), planeshade(planeshade), planelightfloat(planelightfloat), pviewx(pviewx), pviewy(pviewy) { - _colormap = args.ds_colormap; + _colormap = args.dc_colormap; _destorg = dc_destorg; _ybits = args.ds_ybits; _xbits = args.ds_xbits; diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 21c5d8484e..cf3ccf48fc 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -74,20 +74,20 @@ namespace swrenderer args.destorg = (uint32_t*)dc_destorg; args.destpitch = dc_pitch; args.source = (const uint32_t*)drawerargs.ds_source; - args.light = LightBgra::calc_light_multiplier(drawerargs.ds_light); - args.light_red = drawerargs.ds_shade_constants.light_red; - args.light_green = drawerargs.ds_shade_constants.light_green; - args.light_blue = drawerargs.ds_shade_constants.light_blue; - args.light_alpha = drawerargs.ds_shade_constants.light_alpha; - args.fade_red = drawerargs.ds_shade_constants.fade_red; - args.fade_green = drawerargs.ds_shade_constants.fade_green; - args.fade_blue = drawerargs.ds_shade_constants.fade_blue; - args.fade_alpha = drawerargs.ds_shade_constants.fade_alpha; - args.desaturate = drawerargs.ds_shade_constants.desaturate; + args.light = LightBgra::calc_light_multiplier(drawerargs.dc_light); + args.light_red = drawerargs.dc_shade_constants.light_red; + args.light_green = drawerargs.dc_shade_constants.light_green; + args.light_blue = drawerargs.dc_shade_constants.light_blue; + args.light_alpha = drawerargs.dc_shade_constants.light_alpha; + args.fade_red = drawerargs.dc_shade_constants.fade_red; + args.fade_green = drawerargs.dc_shade_constants.fade_green; + args.fade_blue = drawerargs.dc_shade_constants.fade_blue; + args.fade_alpha = drawerargs.dc_shade_constants.fade_alpha; + args.desaturate = drawerargs.dc_shade_constants.desaturate; args.srcalpha = drawerargs.dc_srcalpha >> (FRACBITS - 8); args.destalpha = drawerargs.dc_destalpha >> (FRACBITS - 8); args.flags = 0; - if (drawerargs.ds_shade_constants.simple_shade) + if (drawerargs.dc_shade_constants.simple_shade) args.flags |= DrawSpanArgs::simple_shade; if (!sampler_setup(drawerargs.ds_lod, args.source, args.xbits, args.ybits, drawerargs.ds_source_mipmapped)) args.flags |= DrawSpanArgs::nearest_filter; @@ -436,7 +436,7 @@ namespace swrenderer _x2 = drawerargs.ds_x2; _y = drawerargs.ds_y; _destorg = dc_destorg; - _light = drawerargs.ds_light; + _light = drawerargs.dc_light; _color = drawerargs.ds_color; } @@ -534,8 +534,8 @@ namespace swrenderer _x2 = x2; _y = y; _destorg = dc_destorg; - _light = drawerargs.ds_light; - _shade_constants = drawerargs.ds_shade_constants; + _light = drawerargs.dc_light; + _shade_constants = drawerargs.dc_shade_constants; _plane_sz = plane_sz; _plane_su = plane_su; _plane_sv = plane_sv; @@ -669,7 +669,7 @@ namespace swrenderer _x2 = x2; _destorg = dc_destorg; - _light = drawerargs.ds_light; + _light = drawerargs.dc_light; _color = drawerargs.ds_color; } diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index af08e8d56f..19a4c3a4ad 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -91,29 +91,6 @@ namespace swrenderer } } - void SpanDrawerArgs::SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade) - { - if (r_swtruecolor) - { - ds_shade_constants.light_red = base_colormap->Color.r * 256 / 255; - ds_shade_constants.light_green = base_colormap->Color.g * 256 / 255; - ds_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; - ds_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; - ds_shade_constants.fade_red = base_colormap->Fade.r; - ds_shade_constants.fade_green = base_colormap->Fade.g; - ds_shade_constants.fade_blue = base_colormap->Fade.b; - ds_shade_constants.fade_alpha = base_colormap->Fade.a; - ds_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; - ds_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); - ds_colormap = base_colormap->Maps; - ds_light = LIGHTSCALE(light, shade); - } - else - { - ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); - } - } - void DrawerArgs::SetTranslationMap(lighttable_t *translation) { if (r_swtruecolor) @@ -156,11 +133,6 @@ namespace swrenderer ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; } - void SpanDrawerArgs::SetSpanColormap(FDynamicColormap *colormap, int shade) - { - SetDSColorMapLight(colormap, 0, shade); - } - void ColumnDrawerArgs::DrawMaskedColumn(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) { // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index d478958ed8..57e741299a 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -140,10 +140,7 @@ namespace swrenderer class SpanDrawerArgs : public DrawerArgs { public: - void SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); - void SetSpanTexture(FTexture *tex); - void SetSpanColormap(FDynamicColormap *colormap, int shade); void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); void DrawColoredSpan(int y, int x1, int x2); @@ -152,9 +149,6 @@ namespace swrenderer int ds_y; int ds_x1; int ds_x2; - uint8_t * ds_colormap; - ShadeConstants ds_shade_constants; - dsfixed_t ds_light; dsfixed_t ds_xfrac; dsfixed_t ds_yfrac; dsfixed_t ds_xstep; diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index c8e14e5da7..0035540736 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -108,16 +108,16 @@ namespace swrenderer basecolormap = colormap; GlobVis = LightVisibility::Instance()->FlatPlaneGlobVis() / planeheight; - drawerargs.ds_light = 0; + drawerargs.dc_light = 0; CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) { - drawerargs.SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); plane_shade = false; } else if (cameraLight->fixedcolormap) { - drawerargs.SetDSColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); plane_shade = false; } else @@ -183,7 +183,7 @@ namespace swrenderer if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - drawerargs.SetDSColorMapLight(basecolormap, (float)(GlobVis * fabs(CenterY - y)), planeshade); + drawerargs.SetColorMapLight(basecolormap, (float)(GlobVis * fabs(CenterY - y)), planeshade); } if (r_dynlights) diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 599c279b46..363ff9edfe 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -154,17 +154,17 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) { - drawerargs.SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); plane_shade = false; } else if (cameraLight->fixedcolormap) { - drawerargs.SetDSColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); plane_shade = false; } else { - drawerargs.SetDSColorMapLight(basecolormap, 0, 0); + drawerargs.SetColorMapLight(basecolormap, 0, 0); plane_shade = true; planeshade = LIGHT2SHADE(pl->lightlevel); } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index c5240c34ae..a016272d06 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1390,9 +1390,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, SpanDrawerArgs drawerargs; drawerargs.SetSpanTexture(tex); if (colormap) - drawerargs.SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); + drawerargs.SetColorMapLight(colormap, 0, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else - drawerargs.SetSpanColormap(&identitycolormap, 0); + drawerargs.SetColorMapLight(&identitycolormap, 0, 0); if (drawerargs.ds_xbits != 0) { scalex = double(1u << (32 - drawerargs.ds_xbits)) / scalex; From 81a20213a7014eba520b4599c0517116e2b5bf0d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 Jan 2017 05:07:07 +0100 Subject: [PATCH 780/912] Force colormap/light selection to go through function calls --- src/swrenderer/drawers/r_draw_pal.cpp | 12 +-- src/swrenderer/drawers/r_draw_rgba.cpp | 86 +++++++++++----------- src/swrenderer/drawers/r_drawerargs.cpp | 98 ++++++++++++++----------- src/swrenderer/drawers/r_drawerargs.h | 18 +++-- src/swrenderer/plane/r_flatplane.cpp | 2 +- 5 files changed, 119 insertions(+), 97 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 0248363cbf..b778b27ebc 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -100,7 +100,7 @@ namespace swrenderer { _iscale = args.dc_iscale; _texturefrac = args.dc_texturefrac; - _colormap = args.dc_colormap; + _colormap = args.Colormap(); _count = args.dc_count; _source = args.dc_source; _dest = args.Dest(); @@ -870,9 +870,9 @@ namespace swrenderer _pitch = dc_pitch; _iscale = args.dc_iscale; _texturefrac = args.dc_texturefrac; - _colormap = args.dc_colormap; + _colormap = args.Colormap(); _source = args.dc_source; - _translation = args.dc_translation; + _translation = args.TranslationMap(); _color = args.dc_color; _srcblend = args.dc_srcblend; _destblend = args.dc_destblend; @@ -1849,7 +1849,7 @@ namespace swrenderer PalSpanCommand::PalSpanCommand(const SpanDrawerArgs &args) { _source = args.ds_source; - _colormap = args.dc_colormap; + _colormap = args.Colormap(); _xfrac = args.ds_xfrac; _yfrac = args.ds_yfrac; _y = args.ds_y; @@ -2624,7 +2624,7 @@ namespace swrenderer DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(const SpanDrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) : y(y), x1(x1), x2(x2), plane_sz(plane_sz), plane_su(plane_su), plane_sv(plane_sv), plane_shade(plane_shade), planeshade(planeshade), planelightfloat(planelightfloat), pviewx(pviewx), pviewy(pviewy) { - _colormap = args.dc_colormap; + _colormap = args.Colormap(); _destorg = dc_destorg; _ybits = args.ds_ybits; _xbits = args.ds_xbits; @@ -2885,7 +2885,7 @@ namespace swrenderer DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(const SpanDrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) { - _colormap = args.dc_colormap; + _colormap = args.Colormap(); _destorg = dc_destorg; } diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index cf3ccf48fc..4e71ffcde5 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -62,6 +62,7 @@ namespace swrenderer { DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs) { + auto shade_constants = drawerargs.ColormapConstants(); args.xfrac = drawerargs.ds_xfrac; args.yfrac = drawerargs.ds_yfrac; args.xstep = drawerargs.ds_xstep; @@ -74,20 +75,20 @@ namespace swrenderer args.destorg = (uint32_t*)dc_destorg; args.destpitch = dc_pitch; args.source = (const uint32_t*)drawerargs.ds_source; - args.light = LightBgra::calc_light_multiplier(drawerargs.dc_light); - args.light_red = drawerargs.dc_shade_constants.light_red; - args.light_green = drawerargs.dc_shade_constants.light_green; - args.light_blue = drawerargs.dc_shade_constants.light_blue; - args.light_alpha = drawerargs.dc_shade_constants.light_alpha; - args.fade_red = drawerargs.dc_shade_constants.fade_red; - args.fade_green = drawerargs.dc_shade_constants.fade_green; - args.fade_blue = drawerargs.dc_shade_constants.fade_blue; - args.fade_alpha = drawerargs.dc_shade_constants.fade_alpha; - args.desaturate = drawerargs.dc_shade_constants.desaturate; + args.light = LightBgra::calc_light_multiplier(drawerargs.Light()); + args.light_red = shade_constants.light_red; + args.light_green = shade_constants.light_green; + args.light_blue = shade_constants.light_blue; + args.light_alpha = shade_constants.light_alpha; + args.fade_red = shade_constants.fade_red; + args.fade_green = shade_constants.fade_green; + args.fade_blue = shade_constants.fade_blue; + args.fade_alpha = shade_constants.fade_alpha; + args.desaturate = shade_constants.desaturate; args.srcalpha = drawerargs.dc_srcalpha >> (FRACBITS - 8); args.destalpha = drawerargs.dc_destalpha >> (FRACBITS - 8); args.flags = 0; - if (drawerargs.dc_shade_constants.simple_shade) + if (shade_constants.simple_shade) args.flags |= DrawSpanArgs::simple_shade; if (!sampler_setup(drawerargs.ds_lod, args.source, args.xbits, args.ybits, drawerargs.ds_source_mipmapped)) args.flags |= DrawSpanArgs::nearest_filter; @@ -182,6 +183,7 @@ namespace swrenderer DrawWall1LLVMCommand::DrawWall1LLVMCommand(const WallDrawerArgs &drawerargs) { + auto shade_constants = drawerargs.ColormapConstants(); args.dest = (uint32_t*)drawerargs.Dest(); args.dest_y = drawerargs.DestY(); args.pitch = dc_pitch; @@ -192,20 +194,20 @@ namespace swrenderer args.textureheight[0] = drawerargs.dc_textureheight; args.source[0] = (const uint32 *)drawerargs.dc_source; args.source2[0] = (const uint32 *)drawerargs.dc_source2; - args.light[0] = LightBgra::calc_light_multiplier(drawerargs.dc_light); - args.light_red = drawerargs.dc_shade_constants.light_red; - args.light_green = drawerargs.dc_shade_constants.light_green; - args.light_blue = drawerargs.dc_shade_constants.light_blue; - args.light_alpha = drawerargs.dc_shade_constants.light_alpha; - args.fade_red = drawerargs.dc_shade_constants.fade_red; - args.fade_green = drawerargs.dc_shade_constants.fade_green; - args.fade_blue = drawerargs.dc_shade_constants.fade_blue; - args.fade_alpha = drawerargs.dc_shade_constants.fade_alpha; - args.desaturate = drawerargs.dc_shade_constants.desaturate; + args.light[0] = LightBgra::calc_light_multiplier(drawerargs.Light()); + args.light_red = shade_constants.light_red; + args.light_green = shade_constants.light_green; + args.light_blue = shade_constants.light_blue; + args.light_alpha = shade_constants.light_alpha; + args.fade_red = shade_constants.fade_red; + args.fade_green = shade_constants.fade_green; + args.fade_blue = shade_constants.fade_blue; + args.fade_alpha = shade_constants.fade_alpha; + args.desaturate = shade_constants.desaturate; args.srcalpha = drawerargs.dc_srcalpha >> (FRACBITS - 8); args.destalpha = drawerargs.dc_destalpha >> (FRACBITS - 8); args.flags = 0; - if (drawerargs.dc_shade_constants.simple_shade) + if (shade_constants.simple_shade) args.flags |= DrawWallArgs::simple_shade; if (args.source2[0] == nullptr) args.flags |= DrawWallArgs::nearest_filter; @@ -246,11 +248,13 @@ namespace swrenderer DrawColumnLLVMCommand::DrawColumnLLVMCommand(const ColumnDrawerArgs &drawerargs) { + auto shade_constants = drawerargs.ColormapConstants(); + args.dest = (uint32_t*)drawerargs.Dest(); args.source = drawerargs.dc_source; args.source2 = drawerargs.dc_source2; - args.colormap = drawerargs.dc_colormap; - args.translation = drawerargs.dc_translation; + args.colormap = drawerargs.Colormap(); + args.translation = drawerargs.TranslationMap(); args.basecolors = (const uint32_t *)GPalette.BaseColors; args.pitch = dc_pitch; args.count = drawerargs.dc_count; @@ -259,22 +263,22 @@ namespace swrenderer args.texturefracx = drawerargs.dc_texturefracx; args.textureheight = drawerargs.dc_textureheight; args.texturefrac = drawerargs.dc_texturefrac; - args.light = LightBgra::calc_light_multiplier(drawerargs.dc_light); + args.light = LightBgra::calc_light_multiplier(drawerargs.Light()); args.color = LightBgra::shade_pal_index_simple(drawerargs.dc_color, args.light); args.srccolor = drawerargs.dc_srccolor_bgra; args.srcalpha = drawerargs.dc_srcalpha >> (FRACBITS - 8); args.destalpha = drawerargs.dc_destalpha >> (FRACBITS - 8); - args.light_red = drawerargs.dc_shade_constants.light_red; - args.light_green = drawerargs.dc_shade_constants.light_green; - args.light_blue = drawerargs.dc_shade_constants.light_blue; - args.light_alpha = drawerargs.dc_shade_constants.light_alpha; - args.fade_red = drawerargs.dc_shade_constants.fade_red; - args.fade_green = drawerargs.dc_shade_constants.fade_green; - args.fade_blue = drawerargs.dc_shade_constants.fade_blue; - args.fade_alpha = drawerargs.dc_shade_constants.fade_alpha; - args.desaturate = drawerargs.dc_shade_constants.desaturate; + args.light_red = shade_constants.light_red; + args.light_green = shade_constants.light_green; + args.light_blue = shade_constants.light_blue; + args.light_alpha = shade_constants.light_alpha; + args.fade_red = shade_constants.fade_red; + args.fade_green = shade_constants.fade_green; + args.fade_blue = shade_constants.fade_blue; + args.fade_alpha = shade_constants.fade_alpha; + args.desaturate = shade_constants.desaturate; args.flags = 0; - if (drawerargs.dc_shade_constants.simple_shade) + if (shade_constants.simple_shade) args.flags |= DrawColumnArgs::simple_shade; if (args.source2 == nullptr) args.flags |= DrawColumnArgs::nearest_filter; @@ -436,7 +440,7 @@ namespace swrenderer _x2 = drawerargs.ds_x2; _y = drawerargs.ds_y; _destorg = dc_destorg; - _light = drawerargs.dc_light; + _light = drawerargs.Light(); _color = drawerargs.ds_color; } @@ -467,8 +471,8 @@ namespace swrenderer _x2 = x2; _destorg = dc_destorg; - _light = drawerargs.dc_light; - _shade_constants = drawerargs.dc_shade_constants; + _light = drawerargs.Light(); + _shade_constants = drawerargs.ColormapConstants(); } void DrawFogBoundaryLineRGBACommand::Execute(DrawerThread *thread) @@ -534,8 +538,8 @@ namespace swrenderer _x2 = x2; _y = y; _destorg = dc_destorg; - _light = drawerargs.dc_light; - _shade_constants = drawerargs.dc_shade_constants; + _light = drawerargs.Light(); + _shade_constants = drawerargs.ColormapConstants(); _plane_sz = plane_sz; _plane_su = plane_su; _plane_sv = plane_sv; @@ -669,7 +673,7 @@ namespace swrenderer _x2 = x2; _destorg = dc_destorg; - _light = drawerargs.dc_light; + _light = drawerargs.Light(); _color = drawerargs.ds_color; } diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index 19a4c3a4ad..f98db9a43c 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -70,51 +70,65 @@ namespace swrenderer void DrawerArgs::SetColorMapLight(FSWColormap *base_colormap, float light, int shade) { - if (r_swtruecolor) - { - dc_shade_constants.light_red = base_colormap->Color.r * 256 / 255; - dc_shade_constants.light_green = base_colormap->Color.g * 256 / 255; - dc_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; - dc_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; - dc_shade_constants.fade_red = base_colormap->Fade.r; - dc_shade_constants.fade_green = base_colormap->Fade.g; - dc_shade_constants.fade_blue = base_colormap->Fade.b; - dc_shade_constants.fade_alpha = base_colormap->Fade.a; - dc_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; - dc_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); - dc_colormap = base_colormap->Maps; - dc_light = LIGHTSCALE(light, shade); - } - else - { - dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); - } + mBaseColormap = base_colormap; + mTranslation = nullptr; + mLight = light; + mShade = shade; } void DrawerArgs::SetTranslationMap(lighttable_t *translation) { - if (r_swtruecolor) + mBaseColormap = nullptr; + mTranslation = translation; + } + + uint8_t *DrawerArgs::Colormap() const + { + if (mBaseColormap) { - dc_colormap = nullptr; - dc_translation = translation; - dc_shade_constants.light_red = 256; - dc_shade_constants.light_green = 256; - dc_shade_constants.light_blue = 256; - dc_shade_constants.light_alpha = 256; - dc_shade_constants.fade_red = 0; - dc_shade_constants.fade_green = 0; - dc_shade_constants.fade_blue = 0; - dc_shade_constants.fade_alpha = 256; - dc_shade_constants.desaturate = 0; - dc_shade_constants.simple_shade = true; - dc_light = 0; + if (r_swtruecolor) + return mBaseColormap->Maps; + else + return mBaseColormap->Maps + (GETPALOOKUP(mLight, mShade) << COLORMAPSHIFT); } else { - dc_colormap = translation; + return mTranslation; } } + ShadeConstants DrawerArgs::ColormapConstants() const + { + ShadeConstants shadeConstants; + if (mBaseColormap) + { + shadeConstants.light_red = mBaseColormap->Color.r * 256 / 255; + shadeConstants.light_green = mBaseColormap->Color.g * 256 / 255; + shadeConstants.light_blue = mBaseColormap->Color.b * 256 / 255; + shadeConstants.light_alpha = mBaseColormap->Color.a * 256 / 255; + shadeConstants.fade_red = mBaseColormap->Fade.r; + shadeConstants.fade_green = mBaseColormap->Fade.g; + shadeConstants.fade_blue = mBaseColormap->Fade.b; + shadeConstants.fade_alpha = mBaseColormap->Fade.a; + shadeConstants.desaturate = MIN(abs(mBaseColormap->Desaturate), 255) * 255 / 256; + shadeConstants.simple_shade = (mBaseColormap->Color.d == 0x00ffffff && mBaseColormap->Fade.d == 0x00000000 && mBaseColormap->Desaturate == 0); + } + else + { + shadeConstants.light_red = 256; + shadeConstants.light_green = 256; + shadeConstants.light_blue = 256; + shadeConstants.light_alpha = 256; + shadeConstants.fade_red = 0; + shadeConstants.fade_green = 0; + shadeConstants.fade_blue = 0; + shadeConstants.fade_alpha = 256; + shadeConstants.desaturate = 0; + shadeConstants.simple_shade = true; + } + return shadeConstants; + } + void SpanDrawerArgs::SetSpanTexture(FTexture *tex) { tex->GetWidth(); @@ -335,7 +349,7 @@ namespace swrenderer { colfunc = &SWPixelFormatDrawers::FillColumn; } - else if (dc_translation == NULL) + else if (mTranslation == nullptr) { colfunc = basecolfunc; } @@ -380,7 +394,7 @@ namespace swrenderer { colfunc = &SWPixelFormatDrawers::FillAddColumn; } - else if (dc_translation == NULL) + else if (mTranslation == nullptr) { colfunc = &SWPixelFormatDrawers::DrawAddColumn; } @@ -396,7 +410,7 @@ namespace swrenderer { colfunc = &SWPixelFormatDrawers::FillAddClampColumn; } - else if (dc_translation == NULL) + else if (mTranslation == nullptr) { colfunc = &SWPixelFormatDrawers::DrawAddClampColumn; } @@ -413,7 +427,7 @@ namespace swrenderer { colfunc = &SWPixelFormatDrawers::FillSubClampColumn; } - else if (dc_translation == NULL) + else if (mTranslation == nullptr) { colfunc = &SWPixelFormatDrawers::DrawSubClampColumn; } @@ -433,7 +447,7 @@ namespace swrenderer { colfunc = &SWPixelFormatDrawers::FillRevSubClampColumn; } - else if (dc_translation == NULL) + else if (mTranslation == nullptr) { colfunc = &SWPixelFormatDrawers::DrawRevSubClampColumn; } @@ -495,16 +509,16 @@ namespace swrenderer if (translation != -1) { - dc_translation = NULL; + mTranslation = nullptr; if (translation != 0) { FRemapTable *table = TranslationToTable(translation); if (table != NULL && !table->Inactive) { if (r_swtruecolor) - dc_translation = (uint8_t*)table->Palette; + mTranslation = (uint8_t*)table->Palette; else - dc_translation = table->Remap; + mTranslation = table->Remap; } } } diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index 57e741299a..3e8fd0ee5e 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -48,10 +48,15 @@ namespace swrenderer bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); void SetSpanStyle(bool masked, bool additive, fixed_t alpha); - // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) void SetColorMapLight(FSWColormap *base_colormap, float light, int shade); void SetTranslationMap(lighttable_t *translation); + uint8_t *Colormap() const; + uint8_t *TranslationMap() const { return mTranslation; } + + ShadeConstants ColormapConstants() const; + fixed_t Light() const { return LIGHTSCALE(mLight, mShade); } + SWPixelFormatDrawers *Drawers() const; ColumnDrawerFunc colfunc; @@ -60,12 +65,6 @@ namespace swrenderer ColumnDrawerFunc transcolfunc; SpanDrawerFunc spanfunc; - uint8_t *dc_colormap; - ShadeConstants dc_shade_constants; - fixed_t dc_light = 0; - - uint8_t *dc_translation; - uint32_t *dc_srcblend; uint32_t *dc_destblend; fixed_t dc_srcalpha; @@ -81,6 +80,11 @@ namespace swrenderer private: bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); static fixed_t GetAlpha(int type, fixed_t alpha); + + FSWColormap *mBaseColormap = nullptr; + float mLight = 0.0f; + int mShade = 0; + uint8_t *mTranslation = nullptr; }; class SkyDrawerArgs : public DrawerArgs diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 0035540736..45ffa790e8 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -108,7 +108,7 @@ namespace swrenderer basecolormap = colormap; GlobVis = LightVisibility::Instance()->FlatPlaneGlobVis() / planeheight; - drawerargs.dc_light = 0; + CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) { From 36a23d60b81c0550ce87f2204a3ea86629fcbeca Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 Jan 2017 05:26:57 +0100 Subject: [PATCH 781/912] Make spanfunc private and local to SpanDrawerArgs --- src/swrenderer/drawers/r_drawerargs.cpp | 11 ++++++++++- src/swrenderer/drawers/r_drawerargs.h | 9 +++++++-- src/swrenderer/plane/r_flatplane.cpp | 2 +- src/v_draw.cpp | 2 +- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index f98db9a43c..880166d303 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -65,6 +65,10 @@ namespace swrenderer basecolfunc = &SWPixelFormatDrawers::DrawColumn; fuzzcolfunc = &SWPixelFormatDrawers::DrawFuzzColumn; transcolfunc = &SWPixelFormatDrawers::DrawTranslatedColumn; + } + + SpanDrawerArgs::SpanDrawerArgs() + { spanfunc = &SWPixelFormatDrawers::DrawSpan; } @@ -617,7 +621,7 @@ namespace swrenderer return nullptr; } - void DrawerArgs::SetSpanStyle(bool masked, bool additive, fixed_t alpha) + void SpanDrawerArgs::SetSpanStyle(bool masked, bool additive, fixed_t alpha) { if (masked) { @@ -673,6 +677,11 @@ namespace swrenderer } } + void SpanDrawerArgs::DrawSpan() + { + (Drawers()->*spanfunc)(*this); + } + void SpanDrawerArgs::DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) { Drawers()->DrawTiltedSpan(*this, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index 3e8fd0ee5e..8ce0f821ed 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -46,7 +46,6 @@ namespace swrenderer bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); - void SetSpanStyle(bool masked, bool additive, fixed_t alpha); void SetColorMapLight(FSWColormap *base_colormap, float light, int shade); void SetTranslationMap(lighttable_t *translation); @@ -63,7 +62,6 @@ namespace swrenderer ColumnDrawerFunc basecolfunc; ColumnDrawerFunc fuzzcolfunc; ColumnDrawerFunc transcolfunc; - SpanDrawerFunc spanfunc; uint32_t *dc_srcblend; uint32_t *dc_destblend; @@ -144,8 +142,12 @@ namespace swrenderer class SpanDrawerArgs : public DrawerArgs { public: + SpanDrawerArgs(); + + void SetSpanStyle(bool masked, bool additive, fixed_t alpha); void SetSpanTexture(FTexture *tex); + void DrawSpan(); void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); void DrawColoredSpan(int y, int x1, int x2); void DrawFogBoundaryLine(int y, int x1, int x2); @@ -170,6 +172,9 @@ namespace swrenderer FVector3 dc_viewpos_step; TriLight *dc_lights = nullptr; int dc_num_lights = 0; + + private: + SpanDrawerFunc spanfunc; }; class ColumnDrawerArgs : public DrawerArgs diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 45ffa790e8..0663d02bfa 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -254,7 +254,7 @@ namespace swrenderer drawerargs.ds_x1 = x1; drawerargs.ds_x2 = x2; - (drawerargs.Drawers()->*drawerargs.spanfunc)(drawerargs); + drawerargs.DrawSpan(); } void RenderFlatPlane::StepColumn() diff --git a/src/v_draw.cpp b/src/v_draw.cpp index a016272d06..f7e9520ef2 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1491,7 +1491,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, drawerargs.ds_xfrac = xs_RoundToInt(tex.X * scalex); drawerargs.ds_yfrac = xs_RoundToInt(tex.Y * scaley); - (drawerargs.Drawers()->*drawerargs.spanfunc)(drawerargs); + drawerargs.DrawSpan(); #endif } x += xinc; From e17c8c16788018f5a4acddb502081963a9d5126b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 Jan 2017 11:25:25 +0100 Subject: [PATCH 782/912] Split wall drawer selection from sprite --- src/swrenderer/drawers/r_drawerargs.cpp | 80 +++++++------ src/swrenderer/drawers/r_drawerargs.h | 123 +++++++++++--------- src/swrenderer/line/r_line.cpp | 3 +- src/swrenderer/line/r_renderdrawsegment.cpp | 19 ++- src/swrenderer/line/r_walldraw.cpp | 56 ++------- src/swrenderer/line/r_walldraw.h | 2 - 6 files changed, 132 insertions(+), 151 deletions(-) diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index 880166d303..24f25f3b88 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -59,7 +59,7 @@ namespace swrenderer return active_drawers; } - DrawerArgs::DrawerArgs() + ColumnDrawerArgs::ColumnDrawerArgs() { colfunc = &SWPixelFormatDrawers::DrawColumn; basecolfunc = &SWPixelFormatDrawers::DrawColumn; @@ -343,7 +343,7 @@ namespace swrenderer } } - bool DrawerArgs::SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) + bool ColumnDrawerArgs::SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) { // r_drawtrans is a seriously bad thing to turn off. I wonder if I should // just remove it completely. @@ -353,7 +353,7 @@ namespace swrenderer { colfunc = &SWPixelFormatDrawers::FillColumn; } - else if (mTranslation == nullptr) + else if (TranslationMap() == nullptr) { colfunc = basecolfunc; } @@ -398,7 +398,7 @@ namespace swrenderer { colfunc = &SWPixelFormatDrawers::FillAddColumn; } - else if (mTranslation == nullptr) + else if (TranslationMap() == nullptr) { colfunc = &SWPixelFormatDrawers::DrawAddColumn; } @@ -414,7 +414,7 @@ namespace swrenderer { colfunc = &SWPixelFormatDrawers::FillAddClampColumn; } - else if (mTranslation == nullptr) + else if (TranslationMap() == nullptr) { colfunc = &SWPixelFormatDrawers::DrawAddClampColumn; } @@ -431,7 +431,7 @@ namespace swrenderer { colfunc = &SWPixelFormatDrawers::FillSubClampColumn; } - else if (mTranslation == nullptr) + else if (TranslationMap() == nullptr) { colfunc = &SWPixelFormatDrawers::DrawSubClampColumn; } @@ -451,7 +451,7 @@ namespace swrenderer { colfunc = &SWPixelFormatDrawers::FillRevSubClampColumn; } - else if (mTranslation == nullptr) + else if (TranslationMap() == nullptr) { colfunc = &SWPixelFormatDrawers::DrawRevSubClampColumn; } @@ -467,7 +467,7 @@ namespace swrenderer } } - fixed_t DrawerArgs::GetAlpha(int type, fixed_t alpha) + fixed_t ColumnDrawerArgs::GetAlpha(int type, fixed_t alpha) { switch (type) { @@ -479,7 +479,7 @@ namespace swrenderer } } - bool DrawerArgs::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) + bool ColumnDrawerArgs::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) { fixed_t fglevel, bglevel; @@ -513,16 +513,16 @@ namespace swrenderer if (translation != -1) { - mTranslation = nullptr; + SetTranslationMap(nullptr); if (translation != 0) { FRemapTable *table = TranslationToTable(translation); if (table != NULL && !table->Inactive) { if (r_swtruecolor) - mTranslation = (uint8_t*)table->Palette; + SetTranslationMap((uint8_t*)table->Palette); else - mTranslation = table->Remap; + SetTranslationMap(table->Remap); } } } @@ -581,14 +581,14 @@ namespace swrenderer SetColorMapLight(&identitycolormap, 0, 0); } - if (!DrawerArgs::SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) + if (!ColumnDrawerArgs::SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) { return false; } return true; } - bool DrawerArgs::SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) + bool ColumnDrawerArgs::SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) { return SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap, shadedlightshade); } @@ -600,27 +600,6 @@ namespace swrenderer dc_dest_y = y; } - WallDrawerFunc WallDrawerArgs::GetTransMaskDrawer() - { - if (colfunc == &SWPixelFormatDrawers::DrawAddColumn) - { - return &SWPixelFormatDrawers::DrawWallAddColumn; - } - if (colfunc == &SWPixelFormatDrawers::DrawAddClampColumn) - { - return &SWPixelFormatDrawers::DrawWallAddClampColumn; - } - if (colfunc == &SWPixelFormatDrawers::DrawSubClampColumn) - { - return &SWPixelFormatDrawers::DrawWallSubClampColumn; - } - if (colfunc == &SWPixelFormatDrawers::DrawRevSubClampColumn) - { - return &SWPixelFormatDrawers::DrawWallRevSubClampColumn; - } - return nullptr; - } - void SpanDrawerArgs::SetSpanStyle(bool masked, bool additive, fixed_t alpha) { if (masked) @@ -677,6 +656,37 @@ namespace swrenderer } } + void WallDrawerArgs::SetStyle(bool masked, bool additive, fixed_t alpha) + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + wallfunc = &SWPixelFormatDrawers::DrawWallAddColumn; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + wallfunc = &SWPixelFormatDrawers::DrawWallAddClampColumn; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; + } + } + else if (masked) + { + wallfunc = &SWPixelFormatDrawers::DrawWallMaskedColumn; + } + else + { + wallfunc = &SWPixelFormatDrawers::DrawWallColumn; + } + } + void SpanDrawerArgs::DrawSpan() { (Drawers()->*spanfunc)(*this); diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index 8ce0f821ed..ca624a6626 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -42,11 +42,6 @@ namespace swrenderer class DrawerArgs { public: - DrawerArgs(); - - bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); - bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); - void SetColorMapLight(FSWColormap *base_colormap, float light, int shade); void SetTranslationMap(lighttable_t *translation); @@ -58,27 +53,7 @@ namespace swrenderer SWPixelFormatDrawers *Drawers() const; - ColumnDrawerFunc colfunc; - ColumnDrawerFunc basecolfunc; - ColumnDrawerFunc fuzzcolfunc; - ColumnDrawerFunc transcolfunc; - - uint32_t *dc_srcblend; - uint32_t *dc_destblend; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - - int dc_color = 0; - uint32_t dc_srccolor; - uint32_t dc_srccolor_bgra; - - protected: - bool drawer_needs_pal_input = false; - private: - bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); - static fixed_t GetAlpha(int type, fixed_t alpha); - FSWColormap *mBaseColormap = nullptr; float mLight = 0.0f; int mShade = 0; @@ -108,37 +83,6 @@ namespace swrenderer int dc_dest_y = 0; }; - class WallDrawerArgs : public DrawerArgs - { - public: - void SetDest(int x, int y); - - WallDrawerFunc GetTransMaskDrawer(); - - uint8_t *Dest() const { return dc_dest; } - int DestY() const { return dc_dest_y; } - - fixed_t dc_iscale; - fixed_t dc_texturefrac; - uint32_t dc_texturefracx; - uint32_t dc_textureheight; - const uint8_t *dc_source; - const uint8_t *dc_source2; - int dc_count; - - int dc_wall_fracbits; - - FVector3 dc_normal; - FVector3 dc_viewpos; - FVector3 dc_viewpos_step; - TriLight *dc_lights = nullptr; - int dc_num_lights = 0; - - private: - uint8_t *dc_dest = nullptr; - int dc_dest_y = 0; - }; - class SpanDrawerArgs : public DrawerArgs { public: @@ -152,6 +96,11 @@ namespace swrenderer void DrawColoredSpan(int y, int x1, int x2); void DrawFogBoundaryLine(int y, int x1, int x2); + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int ds_y; int ds_x1; int ds_x2; @@ -177,9 +126,51 @@ namespace swrenderer SpanDrawerFunc spanfunc; }; + class WallDrawerArgs : public DrawerArgs + { + public: + void SetStyle(bool masked, bool additive, fixed_t alpha); + void SetDest(int x, int y); + + uint8_t *Dest() const { return dc_dest; } + int DestY() const { return dc_dest_y; } + + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + + fixed_t dc_iscale; + fixed_t dc_texturefrac; + uint32_t dc_texturefracx; + uint32_t dc_textureheight; + const uint8_t *dc_source; + const uint8_t *dc_source2; + int dc_count; + + int dc_wall_fracbits; + + FVector3 dc_normal; + FVector3 dc_viewpos; + FVector3 dc_viewpos_step; + TriLight *dc_lights = nullptr; + int dc_num_lights = 0; + + WallDrawerFunc wallfunc = nullptr; + + private: + uint8_t *dc_dest = nullptr; + int dc_dest_y = 0; + }; + class ColumnDrawerArgs : public DrawerArgs { public: + ColumnDrawerArgs(); + + bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); + bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); + void DrawMaskedColumn(int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); void FillColumn(); @@ -191,19 +182,37 @@ namespace swrenderer int dc_x; int dc_yl; int dc_yh; + fixed_t dc_iscale; fixed_t dc_texturefrac; + uint32_t dc_texturefracx; uint32_t dc_textureheight; const uint8_t *dc_source; const uint8_t *dc_source2; - uint32_t dc_texturefracx; int dc_count; + int dc_color = 0; + uint32_t dc_srccolor; + uint32_t dc_srccolor_bgra; + + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + + ColumnDrawerFunc colfunc; + ColumnDrawerFunc basecolfunc; + ColumnDrawerFunc fuzzcolfunc; + ColumnDrawerFunc transcolfunc; + private: + bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); + static fixed_t GetAlpha(int type, fixed_t alpha); void DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked); uint8_t *dc_dest = nullptr; int dc_dest_y = 0; + bool drawer_needs_pal_input = false; }; void R_InitColumnDrawers(); diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 6b604d8271..7c1a6b78a9 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -932,8 +932,7 @@ namespace swrenderer WallDrawerArgs drawerargs; - // [RH] Color if not texturing line - drawerargs.dc_color = (((int)(curline - segs) * 8) + 4) & 255; + drawerargs.SetStyle(false, false, OPAQUE); CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index faa635dd7e..8096056bcd 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -66,15 +66,15 @@ namespace swrenderer curline = ds->curline; - FDynamicColormap *patchstylecolormap = nullptr; + float alpha = (float)MIN(curline->linedef->alpha, 1.); + bool additive = (curline->linedef->flags & ML_ADDTRANS) != 0; WallDrawerArgs walldrawerargs; - walldrawerargs.SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], - (float)MIN(curline->linedef->alpha, 1.), 0, 0, patchstylecolormap); + walldrawerargs.SetStyle(true, additive, FLOAT2FIXED(alpha)); ColumnDrawerArgs columndrawerargs; - bool visible = columndrawerargs.SetPatchStyle(LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], - (float)MIN(curline->linedef->alpha, 1.), 0, 0, patchstylecolormap); + FDynamicColormap *patchstylecolormap = nullptr; + bool visible = columndrawerargs.SetPatchStyle(LegacyRenderStyles[additive ? STYLE_Add : STYLE_Translucent], alpha, 0, 0, patchstylecolormap); if (!visible && !ds->bFogBoundary && !ds->bFakeBoundary) { @@ -394,13 +394,12 @@ namespace swrenderer double yscale; fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); - WallDrawerArgs drawerargs; - bool visible = drawerargs.SetPatchStyle(LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], - Alpha, 0, 0, basecolormap); - - if (!visible) + if (Alpha <= 0) return; + WallDrawerArgs drawerargs; + drawerargs.SetStyle(true, (rover->flags & FF_ADDITIVETRANS) != 0, Alpha); + rw_lightstep = ds->lightstep; rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index f84ef4a133..3f9fd6cc9d 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -360,33 +360,7 @@ namespace swrenderer void RenderWallPart::ProcessNormalWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { - ProcessWallWorker(uwal, dwal, texturemid, swal, lwal, &SWPixelFormatDrawers::DrawWallColumn); - } - - void RenderWallPart::ProcessMaskedWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) - { - if (!rw_pic->bMasked) // Textures that aren't masked can use the faster ProcessNormalWall. - { - ProcessNormalWall(uwal, dwal, texturemid, swal, lwal); - } - else - { - ProcessWallWorker(uwal, dwal, texturemid, swal, lwal, &SWPixelFormatDrawers::DrawWallMaskedColumn); - } - } - - void RenderWallPart::ProcessTranslucentWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) - { - WallDrawerFunc drawcol1 = drawerargs.GetTransMaskDrawer(); - if (drawcol1 == nullptr) - { - // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. - ProcessMaskedWall(uwal, dwal, texturemid, swal, lwal); - } - else - { - ProcessWallWorker(uwal, dwal, texturemid, swal, lwal, drawcol1); - } + ProcessWallWorker(uwal, dwal, texturemid, swal, lwal, drawerargs.wallfunc); } void RenderWallPart::ProcessStripedWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) @@ -428,28 +402,20 @@ namespace swrenderer void RenderWallPart::ProcessWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { - if (mask) + // Textures that aren't masked can use the faster ProcessNormalWall. + if (!rw_pic->bMasked && drawerargs.wallfunc == &SWPixelFormatDrawers::DrawWallMaskedColumn) { - if (drawerargs.colfunc == drawerargs.basecolfunc) - { - ProcessMaskedWall(uwal, dwal, texturemid, swal, lwal); - } - else - { - ProcessTranslucentWall(uwal, dwal, texturemid, swal, lwal); - } + drawerargs.SetStyle(true, false, OPAQUE); + } + + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->fixedcolormap != NULL || cameraLight->fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) + { + ProcessNormalWall(uwal, dwal, texturemid, swal, lwal); } else { - CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedcolormap != NULL || cameraLight->fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) - { - ProcessNormalWall(uwal, dwal, texturemid, swal, lwal); - } - else - { - ProcessStripedWall(uwal, dwal, texturemid, swal, lwal); - } + ProcessStripedWall(uwal, dwal, texturemid, swal, lwal); } } diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index 8b2c24491f..72f9511309 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -62,8 +62,6 @@ namespace swrenderer void ProcessWallNP2(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, double top, double bot); void ProcessWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); void ProcessStripedWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); - void ProcessTranslucentWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); - void ProcessMaskedWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); void ProcessNormalWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); void ProcessWallWorker(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, WallDrawerFunc drawcolumn); void Draw1Column(int x, int y1, int y2, WallSampler &sampler, WallDrawerFunc draw1column); From 2f365e7d2b8dad9291effab9a6ee5be0dff2d992 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 Jan 2017 11:27:58 +0100 Subject: [PATCH 783/912] Rename ColumnDrawerArgs to SpriteDrawerArgs --- src/swrenderer/drawers/r_draw.cpp | 2 +- src/swrenderer/drawers/r_draw.h | 38 ++++++++++----------- src/swrenderer/drawers/r_draw_pal.cpp | 4 +-- src/swrenderer/drawers/r_draw_pal.h | 38 ++++++++++----------- src/swrenderer/drawers/r_draw_rgba.cpp | 4 +-- src/swrenderer/drawers/r_draw_rgba.h | 38 ++++++++++----------- src/swrenderer/drawers/r_drawerargs.cpp | 20 +++++------ src/swrenderer/drawers/r_drawerargs.h | 14 ++++---- src/swrenderer/line/r_renderdrawsegment.cpp | 2 +- src/swrenderer/things/r_decal.cpp | 4 +-- src/swrenderer/things/r_decal.h | 4 +-- src/swrenderer/things/r_playersprite.cpp | 2 +- src/swrenderer/things/r_sprite.cpp | 2 +- src/swrenderer/things/r_voxel.cpp | 4 +-- src/swrenderer/things/r_voxel.h | 4 +-- src/swrenderer/things/r_wallsprite.cpp | 4 +-- src/swrenderer/things/r_wallsprite.h | 4 +-- src/v_draw.cpp | 2 +- 18 files changed, 95 insertions(+), 95 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index db7cb8a6ba..2625db1a00 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -156,7 +156,7 @@ namespace swrenderer } } - void R_UpdateFuzzPos(const ColumnDrawerArgs &args) + void R_UpdateFuzzPos(const SpriteDrawerArgs &args) { int yl = MAX(args.dc_yl, 1); int yh = MIN(args.dc_yh, fuzzviewheight); diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index f34601b57f..2ab9000583 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -23,7 +23,7 @@ namespace swrenderer class SkyDrawerArgs; class WallDrawerArgs; class SpanDrawerArgs; - class ColumnDrawerArgs; + class SpriteDrawerArgs; extern int ylookup[MAXHEIGHT]; extern uint8_t shadetables[/*NUMCOLORMAPS*16*256*/]; @@ -58,23 +58,23 @@ namespace swrenderer virtual void DrawWallRevSubClampColumn(const WallDrawerArgs &args) = 0; virtual void DrawSingleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; virtual void DrawDoubleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; - virtual void DrawColumn(const ColumnDrawerArgs &args) = 0; - virtual void FillColumn(const ColumnDrawerArgs &args) = 0; - virtual void FillAddColumn(const ColumnDrawerArgs &args) = 0; - virtual void FillAddClampColumn(const ColumnDrawerArgs &args) = 0; - virtual void FillSubClampColumn(const ColumnDrawerArgs &args) = 0; - virtual void FillRevSubClampColumn(const ColumnDrawerArgs &args) = 0; - virtual void DrawFuzzColumn(const ColumnDrawerArgs &args) = 0; - virtual void DrawAddColumn(const ColumnDrawerArgs &args) = 0; - virtual void DrawTranslatedColumn(const ColumnDrawerArgs &args) = 0; - virtual void DrawTranslatedAddColumn(const ColumnDrawerArgs &args) = 0; - virtual void DrawShadedColumn(const ColumnDrawerArgs &args) = 0; - virtual void DrawAddClampColumn(const ColumnDrawerArgs &args) = 0; - virtual void DrawAddClampTranslatedColumn(const ColumnDrawerArgs &args) = 0; - virtual void DrawSubClampColumn(const ColumnDrawerArgs &args) = 0; - virtual void DrawSubClampTranslatedColumn(const ColumnDrawerArgs &args) = 0; - virtual void DrawRevSubClampColumn(const ColumnDrawerArgs &args) = 0; - virtual void DrawRevSubClampTranslatedColumn(const ColumnDrawerArgs &args) = 0; + virtual void DrawColumn(const SpriteDrawerArgs &args) = 0; + virtual void FillColumn(const SpriteDrawerArgs &args) = 0; + virtual void FillAddColumn(const SpriteDrawerArgs &args) = 0; + virtual void FillAddClampColumn(const SpriteDrawerArgs &args) = 0; + virtual void FillSubClampColumn(const SpriteDrawerArgs &args) = 0; + virtual void FillRevSubClampColumn(const SpriteDrawerArgs &args) = 0; + virtual void DrawFuzzColumn(const SpriteDrawerArgs &args) = 0; + virtual void DrawAddColumn(const SpriteDrawerArgs &args) = 0; + virtual void DrawTranslatedColumn(const SpriteDrawerArgs &args) = 0; + virtual void DrawTranslatedAddColumn(const SpriteDrawerArgs &args) = 0; + virtual void DrawShadedColumn(const SpriteDrawerArgs &args) = 0; + virtual void DrawAddClampColumn(const SpriteDrawerArgs &args) = 0; + virtual void DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args) = 0; + virtual void DrawSubClampColumn(const SpriteDrawerArgs &args) = 0; + virtual void DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) = 0; + virtual void DrawRevSubClampColumn(const SpriteDrawerArgs &args) = 0; + virtual void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) = 0; virtual void DrawSpan(const SpanDrawerArgs &args) = 0; virtual void DrawSpanMasked(const SpanDrawerArgs &args) = 0; virtual void DrawSpanTranslucent(const SpanDrawerArgs &args) = 0; @@ -91,5 +91,5 @@ namespace swrenderer void R_InitFuzzTable(int fuzzoff); void R_InitParticleTexture(); - void R_UpdateFuzzPos(const ColumnDrawerArgs &args); + void R_UpdateFuzzPos(const SpriteDrawerArgs &args); } diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index b778b27ebc..2304cff090 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -862,7 +862,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - PalColumnCommand::PalColumnCommand(const ColumnDrawerArgs &args) + PalColumnCommand::PalColumnCommand(const SpriteDrawerArgs &args) { _count = args.dc_count; _dest = args.Dest(); @@ -1759,7 +1759,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - DrawFuzzColumnPalCommand::DrawFuzzColumnPalCommand(const ColumnDrawerArgs &args) + DrawFuzzColumnPalCommand::DrawFuzzColumnPalCommand(const SpriteDrawerArgs &args) { _yl = args.dc_yl; _yh = args.dc_yh; diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index 8dbdd51455..50dd0ef828 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -69,7 +69,7 @@ namespace swrenderer class PalColumnCommand : public DrawerCommand { public: - PalColumnCommand(const ColumnDrawerArgs &args); + PalColumnCommand(const SpriteDrawerArgs &args); FString DebugInfo() override { return "PalColumnCommand"; } protected: @@ -110,7 +110,7 @@ namespace swrenderer class DrawFuzzColumnPalCommand : public DrawerCommand { public: - DrawFuzzColumnPalCommand(const ColumnDrawerArgs &args); + DrawFuzzColumnPalCommand(const SpriteDrawerArgs &args); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "DrawFuzzColumnPalCommand"; } @@ -257,23 +257,23 @@ namespace swrenderer void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawSingleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, fadeSky); } void DrawDoubleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, fadeSky); } - void DrawColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillAddColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillAddClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillRevSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawFuzzColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); R_UpdateFuzzPos(args); } - void DrawAddColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTranslatedAddColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawShadedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawAddClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawAddClampTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSubClampTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawRevSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawRevSubClampTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillAddClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillRevSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawFuzzColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); R_UpdateFuzzPos(args); } + void DrawAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawTranslatedAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawShadedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawAddClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawRevSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawSpan(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawSpanMasked(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawSpanTranslucent(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 4e71ffcde5..183d347c1e 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -246,7 +246,7 @@ namespace swrenderer return "DrawColumn\n" + args.ToString(); } - DrawColumnLLVMCommand::DrawColumnLLVMCommand(const ColumnDrawerArgs &drawerargs) + DrawColumnLLVMCommand::DrawColumnLLVMCommand(const SpriteDrawerArgs &drawerargs) { auto shade_constants = drawerargs.ColormapConstants(); @@ -326,7 +326,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - DrawFuzzColumnRGBACommand::DrawFuzzColumnRGBACommand(const ColumnDrawerArgs &drawerargs) + DrawFuzzColumnRGBACommand::DrawFuzzColumnRGBACommand(const SpriteDrawerArgs &drawerargs) { _x = drawerargs.dc_x; _yl = drawerargs.dc_yl; diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index ae0b259db3..671de80fed 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -150,7 +150,7 @@ namespace swrenderer FString DebugInfo() override; public: - DrawColumnLLVMCommand(const ColumnDrawerArgs &drawerargs); + DrawColumnLLVMCommand(const SpriteDrawerArgs &drawerargs); void Execute(DrawerThread *thread) override; }; @@ -201,7 +201,7 @@ namespace swrenderer int _fuzzviewheight; public: - DrawFuzzColumnRGBACommand(const ColumnDrawerArgs &drawerargs); + DrawFuzzColumnRGBACommand(const SpriteDrawerArgs &drawerargs); void Execute(DrawerThread *thread) override; FString DebugInfo() override; }; @@ -362,23 +362,23 @@ namespace swrenderer void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawSingleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, skyFade); } void DrawDoubleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, skyFade); } - void DrawColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillAddColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillAddClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillRevSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawFuzzColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); R_UpdateFuzzPos(args); } - void DrawAddColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTranslatedAddColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawShadedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawAddClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawAddClampTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSubClampTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawRevSubClampColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawRevSubClampTranslatedColumn(const ColumnDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillAddClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void FillRevSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawFuzzColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); R_UpdateFuzzPos(args); } + void DrawAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawTranslatedAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawShadedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawAddClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawRevSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawSpan(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawSpanMasked(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawSpanTranslucent(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index 24f25f3b88..4c7f421c20 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -59,7 +59,7 @@ namespace swrenderer return active_drawers; } - ColumnDrawerArgs::ColumnDrawerArgs() + SpriteDrawerArgs::SpriteDrawerArgs() { colfunc = &SWPixelFormatDrawers::DrawColumn; basecolfunc = &SWPixelFormatDrawers::DrawColumn; @@ -151,7 +151,7 @@ namespace swrenderer ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; } - void ColumnDrawerArgs::DrawMaskedColumn(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) + void SpriteDrawerArgs::DrawMaskedColumn(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) { // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. if (r_swtruecolor && !drawer_needs_pal_input) // To do: add support to R_DrawColumnHoriz_rgba @@ -228,7 +228,7 @@ namespace swrenderer } } - void ColumnDrawerArgs::DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) + void SpriteDrawerArgs::DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) { dc_x = x; dc_iscale = iscale; @@ -343,7 +343,7 @@ namespace swrenderer } } - bool ColumnDrawerArgs::SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) + bool SpriteDrawerArgs::SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) { // r_drawtrans is a seriously bad thing to turn off. I wonder if I should // just remove it completely. @@ -467,7 +467,7 @@ namespace swrenderer } } - fixed_t ColumnDrawerArgs::GetAlpha(int type, fixed_t alpha) + fixed_t SpriteDrawerArgs::GetAlpha(int type, fixed_t alpha) { switch (type) { @@ -479,7 +479,7 @@ namespace swrenderer } } - bool ColumnDrawerArgs::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) + bool SpriteDrawerArgs::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) { fixed_t fglevel, bglevel; @@ -581,14 +581,14 @@ namespace swrenderer SetColorMapLight(&identitycolormap, 0, 0); } - if (!ColumnDrawerArgs::SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) + if (!SpriteDrawerArgs::SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) { return false; } return true; } - bool ColumnDrawerArgs::SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) + bool SpriteDrawerArgs::SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) { return SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap, shadedlightshade); } @@ -724,12 +724,12 @@ namespace swrenderer dc_dest_y = y; } - void ColumnDrawerArgs::FillColumn() + void SpriteDrawerArgs::FillColumn() { Drawers()->FillColumn(*this); } - void ColumnDrawerArgs::SetDest(int x, int y) + void SpriteDrawerArgs::SetDest(int x, int y) { int pixelsize = r_swtruecolor ? 4 : 1; dc_dest = dc_destorg + (ylookup[y] + x) * pixelsize; diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index ca624a6626..619b2f7a3b 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -36,7 +36,7 @@ namespace swrenderer typedef void(SWPixelFormatDrawers::*DrawerFunc)(const DrawerArgs &args); typedef void(SWPixelFormatDrawers::*WallDrawerFunc)(const WallDrawerArgs &args); - typedef void(SWPixelFormatDrawers::*ColumnDrawerFunc)(const ColumnDrawerArgs &args); + typedef void(SWPixelFormatDrawers::*SpriteDrawerFunc)(const SpriteDrawerArgs &args); typedef void(SWPixelFormatDrawers::*SpanDrawerFunc)(const SpanDrawerArgs &args); class DrawerArgs @@ -163,10 +163,10 @@ namespace swrenderer int dc_dest_y = 0; }; - class ColumnDrawerArgs : public DrawerArgs + class SpriteDrawerArgs : public DrawerArgs { public: - ColumnDrawerArgs(); + SpriteDrawerArgs(); bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); @@ -200,10 +200,10 @@ namespace swrenderer fixed_t dc_srcalpha; fixed_t dc_destalpha; - ColumnDrawerFunc colfunc; - ColumnDrawerFunc basecolfunc; - ColumnDrawerFunc fuzzcolfunc; - ColumnDrawerFunc transcolfunc; + SpriteDrawerFunc colfunc; + SpriteDrawerFunc basecolfunc; + SpriteDrawerFunc fuzzcolfunc; + SpriteDrawerFunc transcolfunc; private: bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 8096056bcd..bdd587aa8c 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -72,7 +72,7 @@ namespace swrenderer WallDrawerArgs walldrawerargs; walldrawerargs.SetStyle(true, additive, FLOAT2FIXED(alpha)); - ColumnDrawerArgs columndrawerargs; + SpriteDrawerArgs columndrawerargs; FDynamicColormap *patchstylecolormap = nullptr; bool visible = columndrawerargs.SetPatchStyle(LegacyRenderStyles[additive ? STYLE_Add : STYLE_Translucent], alpha, 0, 0, patchstylecolormap); diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index ad830101f9..9653a91fab 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -274,7 +274,7 @@ namespace swrenderer { int x = x1; - ColumnDrawerArgs drawerargs; + SpriteDrawerArgs drawerargs; if (cameraLight->fixedlightlev >= 0) drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); @@ -315,7 +315,7 @@ namespace swrenderer } while (needrepeat--); } - void RenderDecal::DrawColumn(ColumnDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderDecal::DrawColumn(SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 00f2326516..4d14a85c91 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -20,7 +20,7 @@ namespace swrenderer { struct DrawSegment; class ProjectedWallTexcoords; - class ColumnDrawerArgs; + class SpriteDrawerArgs; class RenderDecal { @@ -29,6 +29,6 @@ namespace swrenderer private: static void Render(side_t *wall, DBaseDecal *first, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass); - static void DrawColumn(ColumnDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void DrawColumn(SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); }; } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 2d937bef70..938c45900c 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -587,7 +587,7 @@ namespace swrenderer return; } - ColumnDrawerArgs drawerargs; + SpriteDrawerArgs drawerargs; drawerargs.SetColorMapLight(Light.BaseColormap, 0, Light.ColormapNum << FRACBITS); FDynamicColormap *basecolormap = static_cast(Light.BaseColormap); diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 5110f550fb..f3df8d2a46 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -245,7 +245,7 @@ namespace swrenderer return; } - ColumnDrawerArgs drawerargs; + SpriteDrawerArgs drawerargs; drawerargs.SetColorMapLight(vis->Light.BaseColormap, 0, vis->Light.ColormapNum << FRACBITS); FDynamicColormap *basecolormap = static_cast(vis->Light.BaseColormap); diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index bc6e8ad65c..0d35a1768d 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -187,7 +187,7 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(sprite->Light.BaseColormap); - ColumnDrawerArgs drawerargs; + SpriteDrawerArgs drawerargs; drawerargs.SetColorMapLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); bool visible = drawerargs.SetPatchStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); @@ -311,7 +311,7 @@ namespace swrenderer return (kvxslab_t*)(((uint8_t*)slab) + 3 + slab->zleng); } - void RenderVoxel::FillBox(ColumnDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) + void RenderVoxel::FillBox(SpriteDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) { double viewX, viewY, viewZ; if (viewspace) diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index f08f33654c..7d5fd621f0 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -31,7 +31,7 @@ struct FVoxel; namespace swrenderer { - class ColumnDrawerArgs; + class SpriteDrawerArgs; // [RH] A c-buffer. Used for keeping track of offscreen voxel spans. struct FCoverageBuffer @@ -83,7 +83,7 @@ namespace swrenderer enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; - static void FillBox(ColumnDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); + static void FillBox(SpriteDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); static kvxslab_t *GetSlabStart(const FVoxelMipLevel &mip, int x, int y); static kvxslab_t *GetSlabEnd(const FVoxelMipLevel &mip, int x, int y); diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 94a0d294c2..25e33d9ce4 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -179,7 +179,7 @@ namespace swrenderer rereadcolormap = false; } - ColumnDrawerArgs drawerargs; + SpriteDrawerArgs drawerargs; int shade = LIGHT2SHADE(spr->sector->lightlevel + R_ActualExtraLight(spr->foggy)); double GlobVis = LightVisibility::Instance()->WallGlobVis(); @@ -245,7 +245,7 @@ namespace swrenderer } } - void RenderWallSprite::DrawColumn(ColumnDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderWallSprite::DrawColumn(SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index ffdb36997a..1a0c87b70e 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -18,7 +18,7 @@ namespace swrenderer { class ProjectedWallTexcoords; - class ColumnDrawerArgs; + class SpriteDrawerArgs; class RenderWallSprite : public VisibleSprite { @@ -30,7 +30,7 @@ namespace swrenderer void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: - static void DrawColumn(ColumnDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void DrawColumn(SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); FWallCoords wallc; uint32_t Translation = 0; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index f7e9520ef2..1736837f13 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -184,7 +184,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) translation = parms.remap->Remap; } - ColumnDrawerArgs drawerargs; + SpriteDrawerArgs drawerargs; if (translation != NULL) { From c486892c4a4e66d4fa3d2302aa623a9a4808753f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 Jan 2017 11:43:15 +0100 Subject: [PATCH 784/912] Make colfunc, wallfunc, spanfunc private and remove the rest --- src/swrenderer/drawers/r_drawerargs.cpp | 19 +++++++++++++------ src/swrenderer/drawers/r_drawerargs.h | 15 ++++++++------- src/swrenderer/line/r_walldraw.cpp | 16 ++++++++-------- src/swrenderer/line/r_walldraw.h | 4 ++-- 4 files changed, 31 insertions(+), 23 deletions(-) diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index 4c7f421c20..c7e5bede54 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -62,9 +62,6 @@ namespace swrenderer SpriteDrawerArgs::SpriteDrawerArgs() { colfunc = &SWPixelFormatDrawers::DrawColumn; - basecolfunc = &SWPixelFormatDrawers::DrawColumn; - fuzzcolfunc = &SWPixelFormatDrawers::DrawFuzzColumn; - transcolfunc = &SWPixelFormatDrawers::DrawTranslatedColumn; } SpanDrawerArgs::SpanDrawerArgs() @@ -355,11 +352,11 @@ namespace swrenderer } else if (TranslationMap() == nullptr) { - colfunc = basecolfunc; + colfunc = &SWPixelFormatDrawers::DrawColumn; } else { - colfunc = transcolfunc; + colfunc = &SWPixelFormatDrawers::DrawTranslatedColumn; drawer_needs_pal_input = true; } return true; @@ -530,7 +527,7 @@ namespace swrenderer // Check for special modes if (style.BlendOp == STYLEOP_Fuzz) { - colfunc = fuzzcolfunc; + colfunc = &SWPixelFormatDrawers::DrawFuzzColumn; return true; } else if (style == LegacyRenderStyles[STYLE_Shaded]) @@ -600,6 +597,11 @@ namespace swrenderer dc_dest_y = y; } + void WallDrawerArgs::DrawColumn() + { + (Drawers()->*wallfunc)(*this); + } + void SpanDrawerArgs::SetSpanStyle(bool masked, bool additive, fixed_t alpha) { if (masked) @@ -687,6 +689,11 @@ namespace swrenderer } } + bool WallDrawerArgs::IsMaskedDrawer() const + { + return wallfunc == &SWPixelFormatDrawers::DrawWallMaskedColumn; + } + void SpanDrawerArgs::DrawSpan() { (Drawers()->*spanfunc)(*this); diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index 619b2f7a3b..f1ff939a09 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -132,6 +132,10 @@ namespace swrenderer void SetStyle(bool masked, bool additive, fixed_t alpha); void SetDest(int x, int y); + bool IsMaskedDrawer() const; + + void DrawColumn(); + uint8_t *Dest() const { return dc_dest; } int DestY() const { return dc_dest_y; } @@ -156,11 +160,11 @@ namespace swrenderer TriLight *dc_lights = nullptr; int dc_num_lights = 0; - WallDrawerFunc wallfunc = nullptr; - private: uint8_t *dc_dest = nullptr; int dc_dest_y = 0; + + WallDrawerFunc wallfunc = nullptr; }; class SpriteDrawerArgs : public DrawerArgs @@ -200,11 +204,6 @@ namespace swrenderer fixed_t dc_srcalpha; fixed_t dc_destalpha; - SpriteDrawerFunc colfunc; - SpriteDrawerFunc basecolfunc; - SpriteDrawerFunc fuzzcolfunc; - SpriteDrawerFunc transcolfunc; - private: bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); static fixed_t GetAlpha(int type, fixed_t alpha); @@ -213,6 +212,8 @@ namespace swrenderer uint8_t *dc_dest = nullptr; int dc_dest_y = 0; bool drawer_needs_pal_input = false; + + SpriteDrawerFunc colfunc; }; void R_InitColumnDrawers(); diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 3f9fd6cc9d..bf18426efd 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -168,7 +168,7 @@ namespace swrenderer } // Draw a column with support for non-power-of-two ranges - void RenderWallPart::Draw1Column(int x, int y1, int y2, WallSampler &sampler, WallDrawerFunc draw1column) + void RenderWallPart::Draw1Column(int x, int y1, int y2, WallSampler &sampler) { if (r_dynlights && light_list) { @@ -248,7 +248,7 @@ namespace swrenderer drawerargs.dc_iscale = sampler.uv_step; drawerargs.dc_texturefrac = sampler.uv_pos; drawerargs.dc_textureheight = sampler.height; - (drawerargs.Drawers()->*draw1column)(drawerargs); + drawerargs.DrawColumn(); uint64_t step64 = sampler.uv_step; uint64_t pos64 = sampler.uv_pos; @@ -267,7 +267,7 @@ namespace swrenderer drawerargs.dc_count = count; drawerargs.dc_iscale = sampler.uv_step; drawerargs.dc_texturefrac = sampler.uv_pos; - (drawerargs.Drawers()->*draw1column)(drawerargs); + drawerargs.DrawColumn(); uint64_t step64 = sampler.uv_step; uint64_t pos64 = sampler.uv_pos; @@ -293,7 +293,7 @@ namespace swrenderer drawerargs.dc_count = count; drawerargs.dc_iscale = sampler.uv_step; drawerargs.dc_texturefrac = uv_pos; - (drawerargs.Drawers()->*draw1column)(drawerargs); + drawerargs.DrawColumn(); left -= count; uv_pos += sampler.uv_step * count; @@ -306,7 +306,7 @@ namespace swrenderer } } - void RenderWallPart::ProcessWallWorker(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, WallDrawerFunc drawcolumn) + void RenderWallPart::ProcessWallWorker(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -352,7 +352,7 @@ namespace swrenderer if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); WallSampler sampler(y1, texturemid, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic); - Draw1Column(x, y1, y2, sampler, drawcolumn); + Draw1Column(x, y1, y2, sampler); } NetUpdate(); @@ -360,7 +360,7 @@ namespace swrenderer void RenderWallPart::ProcessNormalWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { - ProcessWallWorker(uwal, dwal, texturemid, swal, lwal, drawerargs.wallfunc); + ProcessWallWorker(uwal, dwal, texturemid, swal, lwal); } void RenderWallPart::ProcessStripedWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) @@ -403,7 +403,7 @@ namespace swrenderer void RenderWallPart::ProcessWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) { // Textures that aren't masked can use the faster ProcessNormalWall. - if (!rw_pic->bMasked && drawerargs.wallfunc == &SWPixelFormatDrawers::DrawWallMaskedColumn) + if (!rw_pic->bMasked && drawerargs.IsMaskedDrawer()) { drawerargs.SetStyle(true, false, OPAQUE); } diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index 72f9511309..76e3bccd68 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -63,8 +63,8 @@ namespace swrenderer void ProcessWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); void ProcessStripedWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); void ProcessNormalWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); - void ProcessWallWorker(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, WallDrawerFunc drawcolumn); - void Draw1Column(int x, int y1, int y2, WallSampler &sampler, WallDrawerFunc draw1column); + void ProcessWallWorker(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); + void Draw1Column(int x, int y1, int y2, WallSampler &sampler); int x1 = 0; int x2 = 0; From c574b0ad3f3b8bd07ca0ab9e23ea5e438c7ef289 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 Jan 2017 11:53:11 +0100 Subject: [PATCH 785/912] Remove R_InitColumnDrawers and only allow DrawerArgs classes to call drawers --- src/swrenderer/drawers/r_drawerargs.cpp | 24 ++++++--------- src/swrenderer/drawers/r_drawerargs.h | 40 ++++++++++++------------- src/swrenderer/scene/r_scene.cpp | 8 ----- src/v_draw.cpp | 6 +--- 4 files changed, 29 insertions(+), 49 deletions(-) diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index c7e5bede54..82c0675042 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -39,24 +39,18 @@ namespace swrenderer { - namespace - { - SWPixelFormatDrawers *active_drawers; - SWPalDrawers pal_drawers; - SWTruecolorDrawers tc_drawers; - } - - void R_InitColumnDrawers() + SWPixelFormatDrawers *DrawerArgs::Drawers() { if (r_swtruecolor) - active_drawers = &tc_drawers; + { + static SWTruecolorDrawers tc_drawers; + return &tc_drawers; + } else - active_drawers = &pal_drawers; - } - - SWPixelFormatDrawers *DrawerArgs::Drawers() const - { - return active_drawers; + { + static SWPalDrawers pal_drawers; + return &pal_drawers; + } } SpriteDrawerArgs::SpriteDrawerArgs() diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index f1ff939a09..5ae0dadd0f 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -19,25 +19,7 @@ namespace swrenderer { class SWPixelFormatDrawers; class DrawerArgs; - - struct ShadeConstants - { - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - bool simple_shade; - }; - - typedef void(SWPixelFormatDrawers::*DrawerFunc)(const DrawerArgs &args); - typedef void(SWPixelFormatDrawers::*WallDrawerFunc)(const WallDrawerArgs &args); - typedef void(SWPixelFormatDrawers::*SpriteDrawerFunc)(const SpriteDrawerArgs &args); - typedef void(SWPixelFormatDrawers::*SpanDrawerFunc)(const SpanDrawerArgs &args); + struct ShadeConstants; class DrawerArgs { @@ -51,7 +33,8 @@ namespace swrenderer ShadeConstants ColormapConstants() const; fixed_t Light() const { return LIGHTSCALE(mLight, mShade); } - SWPixelFormatDrawers *Drawers() const; + protected: + static SWPixelFormatDrawers *Drawers(); private: FSWColormap *mBaseColormap = nullptr; @@ -123,6 +106,7 @@ namespace swrenderer int dc_num_lights = 0; private: + typedef void(SWPixelFormatDrawers::*SpanDrawerFunc)(const SpanDrawerArgs &args); SpanDrawerFunc spanfunc; }; @@ -164,6 +148,7 @@ namespace swrenderer uint8_t *dc_dest = nullptr; int dc_dest_y = 0; + typedef void(SWPixelFormatDrawers::*WallDrawerFunc)(const WallDrawerArgs &args); WallDrawerFunc wallfunc = nullptr; }; @@ -213,8 +198,21 @@ namespace swrenderer int dc_dest_y = 0; bool drawer_needs_pal_input = false; + typedef void(SWPixelFormatDrawers::*SpriteDrawerFunc)(const SpriteDrawerArgs &args); SpriteDrawerFunc colfunc; }; - void R_InitColumnDrawers(); + struct ShadeConstants + { + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; + }; } diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 4914fbe251..24e04d5e88 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -79,7 +79,6 @@ namespace swrenderer if (r_swtruecolor != screen->IsBgra()) { r_swtruecolor = screen->IsBgra(); - R_InitColumnDrawers(); } if (r_clearbuffer != 0) @@ -202,7 +201,6 @@ namespace swrenderer if (r_swtruecolor != canvas->IsBgra()) { r_swtruecolor = canvas->IsBgra(); - R_InitColumnDrawers(); } R_BeginDrawerCommands(); @@ -233,11 +231,6 @@ namespace swrenderer viewactive = savedviewactive; r_swtruecolor = savedoutputformat; - - if (r_swtruecolor != canvas->IsBgra()) - { - R_InitColumnDrawers(); - } } void RenderScene::ScreenResized() @@ -259,7 +252,6 @@ namespace swrenderer fillshort(zeroarray, MAXWIDTH, 0); R_InitShadeMaps(); - R_InitColumnDrawers(); } void RenderScene::Deinit() diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 1736837f13..9ad73942d4 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -147,11 +147,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; const BYTE *translation = NULL; - if (r_swtruecolor != IsBgra()) - { - r_swtruecolor = IsBgra(); - R_InitColumnDrawers(); - } + r_swtruecolor = IsBgra(); if (APART(parms.colorOverlay) != 0) { From 98fa2976fae0ffe76791102d5deff598146a96ef Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 Jan 2017 12:46:17 +0100 Subject: [PATCH 786/912] Clean up the SpanDrawerArgs interface a bit --- src/swrenderer/drawers/r_draw_pal.cpp | 38 +++++++------- src/swrenderer/drawers/r_draw_rgba.cpp | 42 ++++++++-------- src/swrenderer/drawers/r_drawerargs.cpp | 4 +- src/swrenderer/drawers/r_drawerargs.h | 67 +++++++++++++++++-------- src/swrenderer/plane/r_flatplane.cpp | 39 +++++++------- src/swrenderer/plane/r_slopeplane.cpp | 16 +++--- src/v_draw.cpp | 28 +++++------ 7 files changed, 130 insertions(+), 104 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 2304cff090..6e3da73de6 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -1848,23 +1848,23 @@ namespace swrenderer PalSpanCommand::PalSpanCommand(const SpanDrawerArgs &args) { - _source = args.ds_source; + _source = args.TexturePixels(); _colormap = args.Colormap(); - _xfrac = args.ds_xfrac; - _yfrac = args.ds_yfrac; - _y = args.ds_y; - _x1 = args.ds_x1; - _x2 = args.ds_x2; + _xfrac = args.TextureUPos(); + _yfrac = args.TextureVPos(); + _y = args.DestY(); + _x1 = args.DestX1(); + _x2 = args.DestX2(); _destorg = dc_destorg; - _xstep = args.ds_xstep; - _ystep = args.ds_ystep; - _xbits = args.ds_xbits; - _ybits = args.ds_ybits; - _srcblend = args.dc_srcblend; - _destblend = args.dc_destblend; - _color = args.ds_color; - _srcalpha = args.dc_srcalpha; - _destalpha = args.dc_destalpha; + _xstep = args.TextureUStep(); + _ystep = args.TextureVStep(); + _xbits = args.TextureWidthBits(); + _ybits = args.TextureHeightBits(); + _srcblend = args.SrcBlend(); + _destblend = args.DestBlend(); + _color = args.SolidColor(); + _srcalpha = args.SrcAlpha(); + _destalpha = args.DestAlpha(); _dynlights = args.dc_lights; _num_dynlights = args.dc_num_lights; _viewpos_x = args.dc_viewpos.X; @@ -2626,9 +2626,9 @@ namespace swrenderer { _colormap = args.Colormap(); _destorg = dc_destorg; - _ybits = args.ds_ybits; - _xbits = args.ds_xbits; - _source = args.ds_source; + _ybits = args.TextureHeightBits(); + _xbits = args.TextureWidthBits(); + _source = args.TexturePixels(); basecolormapdata = basecolormap->Maps; } @@ -2869,7 +2869,7 @@ namespace swrenderer DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(const SpanDrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) { - color = args.ds_color; + color = args.SolidColor(); destorg = dc_destorg; } diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 183d347c1e..ab932cdcac 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -63,18 +63,18 @@ namespace swrenderer DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs) { auto shade_constants = drawerargs.ColormapConstants(); - args.xfrac = drawerargs.ds_xfrac; - args.yfrac = drawerargs.ds_yfrac; - args.xstep = drawerargs.ds_xstep; - args.ystep = drawerargs.ds_ystep; - args.x1 = drawerargs.ds_x1; - args.x2 = drawerargs.ds_x2; - args.y = drawerargs.ds_y; - args.xbits = drawerargs.ds_xbits; - args.ybits = drawerargs.ds_ybits; + args.xfrac = drawerargs.TextureUPos(); + args.yfrac = drawerargs.TextureVPos(); + args.xstep = drawerargs.TextureUStep(); + args.ystep = drawerargs.TextureVStep(); + args.x1 = drawerargs.DestX1(); + args.x2 = drawerargs.DestX2(); + args.y = drawerargs.DestY(); + args.xbits = drawerargs.TextureWidthBits(); + args.ybits = drawerargs.TextureHeightBits(); args.destorg = (uint32_t*)dc_destorg; args.destpitch = dc_pitch; - args.source = (const uint32_t*)drawerargs.ds_source; + args.source = (const uint32_t*)drawerargs.TexturePixels(); args.light = LightBgra::calc_light_multiplier(drawerargs.Light()); args.light_red = shade_constants.light_red; args.light_green = shade_constants.light_green; @@ -85,12 +85,12 @@ namespace swrenderer args.fade_blue = shade_constants.fade_blue; args.fade_alpha = shade_constants.fade_alpha; args.desaturate = shade_constants.desaturate; - args.srcalpha = drawerargs.dc_srcalpha >> (FRACBITS - 8); - args.destalpha = drawerargs.dc_destalpha >> (FRACBITS - 8); + args.srcalpha = drawerargs.SrcAlpha() >> (FRACBITS - 8); + args.destalpha = drawerargs.DestAlpha() >> (FRACBITS - 8); args.flags = 0; if (shade_constants.simple_shade) args.flags |= DrawSpanArgs::simple_shade; - if (!sampler_setup(drawerargs.ds_lod, args.source, args.xbits, args.ybits, drawerargs.ds_source_mipmapped)) + if (!sampler_setup(drawerargs.TextureLOD(), args.source, args.xbits, args.ybits, drawerargs.MipmappedTexture())) args.flags |= DrawSpanArgs::nearest_filter; args.viewpos_x = drawerargs.dc_viewpos.X; @@ -436,12 +436,12 @@ namespace swrenderer FillSpanRGBACommand::FillSpanRGBACommand(const SpanDrawerArgs &drawerargs) { - _x1 = drawerargs.ds_x1; - _x2 = drawerargs.ds_x2; - _y = drawerargs.ds_y; + _x1 = drawerargs.DestX1(); + _x2 = drawerargs.DestX2(); + _y = drawerargs.DestY(); _destorg = dc_destorg; _light = drawerargs.Light(); - _color = drawerargs.ds_color; + _color = drawerargs.SolidColor(); } void FillSpanRGBACommand::Execute(DrawerThread *thread) @@ -548,9 +548,9 @@ namespace swrenderer _planelightfloat = planelightfloat; _pviewx = pviewx; _pviewy = pviewy; - _source = (const uint32_t*)drawerargs.ds_source; - _xbits = drawerargs.ds_xbits; - _ybits = drawerargs.ds_ybits; + _source = (const uint32_t*)drawerargs.TexturePixels(); + _xbits = drawerargs.TextureWidthBits(); + _ybits = drawerargs.TextureHeightBits(); } void DrawTiltedSpanRGBACommand::Execute(DrawerThread *thread) @@ -674,7 +674,7 @@ namespace swrenderer _destorg = dc_destorg; _light = drawerargs.Light(); - _color = drawerargs.ds_color; + _color = drawerargs.SolidColor(); } void DrawColoredSpanRGBACommand::Execute(DrawerThread *thread) diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index 82c0675042..d2dff94826 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -124,7 +124,7 @@ namespace swrenderer return shadeConstants; } - void SpanDrawerArgs::SetSpanTexture(FTexture *tex) + void SpanDrawerArgs::SetTexture(FTexture *tex) { tex->GetWidth(); ds_xbits = tex->WidthBits; @@ -596,7 +596,7 @@ namespace swrenderer (Drawers()->*wallfunc)(*this); } - void SpanDrawerArgs::SetSpanStyle(bool masked, bool additive, fixed_t alpha) + void SpanDrawerArgs::SetStyle(bool masked, bool additive, fixed_t alpha) { if (masked) { diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index 5ae0dadd0f..58f44931b4 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -71,33 +71,40 @@ namespace swrenderer public: SpanDrawerArgs(); - void SetSpanStyle(bool masked, bool additive, fixed_t alpha); - void SetSpanTexture(FTexture *tex); + void SetStyle(bool masked, bool additive, fixed_t alpha); + void SetDestY(int y) { ds_y = y; } + void SetDestX1(int x) { ds_x1 = x; } + void SetDestX2(int x) { ds_x2 = x; } + void SetTexture(FTexture *tex); + void SetTextureLOD(double lod) { ds_lod = lod; } + void SetTextureUPos(dsfixed_t xfrac) { ds_xfrac = xfrac; } + void SetTextureVPos(dsfixed_t yfrac) { ds_yfrac = yfrac; } + void SetTextureUStep(dsfixed_t xstep) { ds_xstep = xstep; } + void SetTextureVStep(dsfixed_t vstep) { ds_ystep = vstep; } + void SetSolidColor(int colorIndex) { ds_color = colorIndex; } void DrawSpan(); void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); void DrawColoredSpan(int y, int x1, int x2); void DrawFogBoundaryLine(int y, int x1, int x2); - uint32_t *dc_srcblend; - uint32_t *dc_destblend; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - - int ds_y; - int ds_x1; - int ds_x2; - dsfixed_t ds_xfrac; - dsfixed_t ds_yfrac; - dsfixed_t ds_xstep; - dsfixed_t ds_ystep; - int ds_xbits; - int ds_ybits; - fixed_t ds_alpha; - double ds_lod; - const uint8_t *ds_source; - bool ds_source_mipmapped; - int ds_color = 0; + uint32_t *SrcBlend() const { return dc_srcblend; } + uint32_t *DestBlend() const { return dc_destblend; } + fixed_t SrcAlpha() const { return dc_srcalpha; } + fixed_t DestAlpha() const { return dc_destalpha; } + int DestY() const { return ds_y; } + int DestX1() const { return ds_x1; } + int DestX2() const { return ds_x2; } + dsfixed_t TextureUPos() const { return ds_xfrac; } + dsfixed_t TextureVPos() const { return ds_yfrac; } + dsfixed_t TextureUStep() const { return ds_xstep; } + dsfixed_t TextureVStep() const { return ds_xstep; } + int SolidColor() const { return ds_color; } + int TextureWidthBits() const { return ds_xbits; } + int TextureHeightBits() const { return ds_ybits; } + const uint8_t *TexturePixels() const { return ds_source; } + bool MipmappedTexture() const { return ds_source_mipmapped; } + double TextureLOD() const { return ds_lod; } FVector3 dc_normal; FVector3 dc_viewpos; @@ -108,6 +115,24 @@ namespace swrenderer private: typedef void(SWPixelFormatDrawers::*SpanDrawerFunc)(const SpanDrawerArgs &args); SpanDrawerFunc spanfunc; + + int ds_y; + int ds_x1; + int ds_x2; + int ds_xbits; + int ds_ybits; + const uint8_t *ds_source; + bool ds_source_mipmapped; + dsfixed_t ds_xfrac; + dsfixed_t ds_yfrac; + dsfixed_t ds_xstep; + dsfixed_t ds_ystep; + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int ds_color = 0; + double ds_lod; }; class WallDrawerArgs : public DrawerArgs diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 0663d02bfa..895734ea6c 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -51,15 +51,15 @@ namespace swrenderer return; } - drawerargs.ds_color = 3; - drawerargs.SetSpanTexture(texture); + drawerargs.SetSolidColor(3); + drawerargs.SetTexture(texture); double planeang = (pl->xform.Angle + pl->xform.baseAngle).Radians(); double xstep, ystep, leftxfrac, leftyfrac, rightxfrac, rightyfrac; double x; - xscale = xs_ToFixed(32 - drawerargs.ds_xbits, _xscale); - yscale = xs_ToFixed(32 - drawerargs.ds_ybits, _yscale); + xscale = xs_ToFixed(32 - drawerargs.TextureWidthBits(), _xscale); + yscale = xs_ToFixed(32 - drawerargs.TextureHeightBits(), _yscale); if (planeang != 0) { double cosine = cos(planeang), sine = sin(planeang); @@ -126,7 +126,7 @@ namespace swrenderer planeshade = LIGHT2SHADE(pl->lightlevel); } - drawerargs.SetSpanStyle(masked, additive, alpha); + drawerargs.SetStyle(masked, additive, alpha); light_list = pl->lights; @@ -149,25 +149,26 @@ namespace swrenderer distance = planeheight * yslope[y]; - if (drawerargs.ds_xbits != 0) + if (drawerargs.TextureWidthBits() != 0) { - drawerargs.ds_xstep = xs_ToFixed(32 - drawerargs.ds_xbits, distance * xstepscale); - drawerargs.ds_xfrac = xs_ToFixed(32 - drawerargs.ds_xbits, distance * basexfrac) + pviewx; + drawerargs.SetTextureUStep(xs_ToFixed(32 - drawerargs.TextureWidthBits(), distance * xstepscale)); + drawerargs.SetTextureUPos(xs_ToFixed(32 - drawerargs.TextureWidthBits(), distance * basexfrac) + pviewx); } else { - drawerargs.ds_xstep = 0; - drawerargs.ds_xfrac = 0; + drawerargs.SetTextureUStep(0); + drawerargs.SetTextureUPos(0); } - if (drawerargs.ds_ybits != 0) + + if (drawerargs.TextureHeightBits() != 0) { - drawerargs.ds_ystep = xs_ToFixed(32 - drawerargs.ds_ybits, distance * ystepscale); - drawerargs.ds_yfrac = xs_ToFixed(32 - drawerargs.ds_ybits, distance * baseyfrac) + pviewy; + drawerargs.SetTextureVStep(xs_ToFixed(32 - drawerargs.TextureHeightBits(), distance * ystepscale)); + drawerargs.SetTextureVPos(xs_ToFixed(32 - drawerargs.TextureHeightBits(), distance * baseyfrac) + pviewy); } else { - drawerargs.ds_ystep = 0; - drawerargs.ds_yfrac = 0; + drawerargs.SetTextureVStep(0); + drawerargs.SetTextureVPos(0); } if (r_swtruecolor) @@ -177,7 +178,7 @@ namespace swrenderer double ymagnitude = fabs(xstepscale * (distance2 - distance) * FocalLengthX); double magnitude = MAX(ymagnitude, xmagnitude); double min_lod = -1000.0; - drawerargs.ds_lod = MAX(log2(magnitude) + r_lod_bias, min_lod); + drawerargs.SetTextureLOD(MAX(log2(magnitude) + r_lod_bias, min_lod)); } if (plane_shade) @@ -250,9 +251,9 @@ namespace swrenderer drawerargs.dc_num_lights = 0; } - drawerargs.ds_y = y; - drawerargs.ds_x1 = x1; - drawerargs.ds_x2 = x2; + drawerargs.SetDestY(y); + drawerargs.SetDestX1(x1); + drawerargs.SetDestX2(x2); drawerargs.DrawSpan(); } diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 363ff9edfe..cdee76e0c9 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -70,17 +70,17 @@ namespace swrenderer return; } - drawerargs.ds_color = 3; - drawerargs.SetSpanTexture(texture); + drawerargs.SetSolidColor(3); + drawerargs.SetTexture(texture); - lxscale = _xscale * ifloatpow2[drawerargs.ds_xbits]; - lyscale = _yscale * ifloatpow2[drawerargs.ds_ybits]; + lxscale = _xscale * ifloatpow2[drawerargs.TextureWidthBits()]; + lyscale = _yscale * ifloatpow2[drawerargs.TextureHeightBits()]; xscale = 64.f / lxscale; yscale = 64.f / lyscale; zeroheight = pl->height.ZatPoint(ViewPos); - pviewx = xs_ToFixed(32 - drawerargs.ds_xbits, pl->xform.xOffs * pl->xform.xScale); - pviewy = xs_ToFixed(32 - drawerargs.ds_ybits, pl->xform.yOffs * pl->xform.yScale); + pviewx = xs_ToFixed(32 - drawerargs.TextureWidthBits(), pl->xform.xOffs * pl->xform.xScale); + pviewy = xs_ToFixed(32 - drawerargs.TextureHeightBits(), pl->xform.yOffs * pl->xform.yScale); planeang = (pl->xform.Angle + pl->xform.baseAngle).Radians(); // p is the texture origin in view space @@ -170,11 +170,11 @@ namespace swrenderer } // Hack in support for 1 x Z and Z x 1 texture sizes - if (drawerargs.ds_ybits == 0) + if (drawerargs.TextureHeightBits() == 0) { plane_sv[2] = plane_sv[1] = plane_sv[0] = 0; } - if (drawerargs.ds_xbits == 0) + if (drawerargs.TextureWidthBits() == 0) { plane_su[2] = plane_su[1] = plane_su[0] = 0; } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 9ad73942d4..574fb09dd7 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1384,30 +1384,30 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // Setup constant texture mapping parameters. SpanDrawerArgs drawerargs; - drawerargs.SetSpanTexture(tex); + drawerargs.SetTexture(tex); if (colormap) drawerargs.SetColorMapLight(colormap, 0, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else drawerargs.SetColorMapLight(&identitycolormap, 0, 0); - if (drawerargs.ds_xbits != 0) + if (drawerargs.TextureWidthBits() != 0) { - scalex = double(1u << (32 - drawerargs.ds_xbits)) / scalex; - drawerargs.ds_xstep = xs_RoundToInt(cosrot * scalex); + scalex = double(1u << (32 - drawerargs.TextureWidthBits())) / scalex; + drawerargs.SetTextureUStep(xs_RoundToInt(cosrot * scalex)); } else { // Texture is one pixel wide. scalex = 0; - drawerargs.ds_xstep = 0; + drawerargs.SetTextureUStep(0); } - if (drawerargs.ds_ybits != 0) + if (drawerargs.TextureHeightBits() != 0) { - scaley = double(1u << (32 - drawerargs.ds_ybits)) / scaley; - drawerargs.ds_ystep = xs_RoundToInt(sinrot * scaley); + scaley = double(1u << (32 - drawerargs.TextureHeightBits())) / scaley; + drawerargs.SetTextureVStep(xs_RoundToInt(sinrot * scaley)); } else { // Texture is one pixel tall. scaley = 0; - drawerargs.ds_ystep = 0; + drawerargs.SetTextureVStep(0); } // Travel down the right edge and create an outline of that edge. @@ -1473,9 +1473,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, #if 0 memset(this->Buffer + y * this->Pitch + x1, (int)tex, x2 - x1); #else - drawerargs.ds_y = y; - drawerargs.ds_x1 = x1; - drawerargs.ds_x2 = x2 - 1; + drawerargs.SetDestY(y); + drawerargs.SetDestX1(x1); + drawerargs.SetDestX2(x2 - 1); DVector2 tex(x1 - originx, y - originy); if (dorotate) @@ -1484,8 +1484,8 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, tex.X = t * cosrot - tex.Y * sinrot; tex.Y = tex.Y * cosrot + t * sinrot; } - drawerargs.ds_xfrac = xs_RoundToInt(tex.X * scalex); - drawerargs.ds_yfrac = xs_RoundToInt(tex.Y * scaley); + drawerargs.SetTextureUPos(xs_RoundToInt(tex.X * scalex)); + drawerargs.SetTextureVPos(xs_RoundToInt(tex.Y * scaley)); drawerargs.DrawSpan(); #endif From 0c61b566559d2799f50e080b5f15299e6c1fb47e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 Jan 2017 20:21:18 +0100 Subject: [PATCH 787/912] Clean up SkyDrawerArgs interface --- src/swrenderer/drawers/r_draw.h | 4 +-- src/swrenderer/drawers/r_draw_pal.cpp | 19 ++++++----- src/swrenderer/drawers/r_draw_pal.h | 6 ++-- src/swrenderer/drawers/r_draw_rgba.cpp | 22 ++++++------- src/swrenderer/drawers/r_draw_rgba.h | 6 ++-- src/swrenderer/drawers/r_drawerargs.cpp | 41 +++++++++++++++++++++--- src/swrenderer/drawers/r_drawerargs.h | 42 +++++++++++++++++++------ src/swrenderer/plane/r_skyplane.cpp | 35 ++++++--------------- 8 files changed, 110 insertions(+), 65 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 2ab9000583..625e67ead8 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -56,8 +56,8 @@ namespace swrenderer virtual void DrawWallAddClampColumn(const WallDrawerArgs &args) = 0; virtual void DrawWallSubClampColumn(const WallDrawerArgs &args) = 0; virtual void DrawWallRevSubClampColumn(const WallDrawerArgs &args) = 0; - virtual void DrawSingleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; - virtual void DrawDoubleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) = 0; + virtual void DrawSingleSkyColumn(const SkyDrawerArgs &args) = 0; + virtual void DrawDoubleSkyColumn(const SkyDrawerArgs &args) = 0; virtual void DrawColumn(const SpriteDrawerArgs &args) = 0; virtual void FillColumn(const SpriteDrawerArgs &args) = 0; virtual void FillAddColumn(const SpriteDrawerArgs &args) = 0; diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 6e3da73de6..9051126ea5 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -559,18 +559,21 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - PalSkyCommand::PalSkyCommand(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) : solid_top(solid_top), solid_bottom(solid_bottom), fadeSky(fadeSky) + PalSkyCommand::PalSkyCommand(const SkyDrawerArgs &args) { _dest = args.Dest(); _dest_y = args.DestY(); - _count = args.dc_count; + _count = args.Count(); _pitch = dc_pitch; - _source = args.dc_wall_source; - _source2 = args.dc_wall_source2; - _sourceheight[0] = args.dc_wall_sourceheight[0]; - _sourceheight[1] = args.dc_wall_sourceheight[1]; - _iscale = args.dc_wall_iscale; - _texturefrac = args.dc_wall_texturefrac; + _source = args.FrontTexturePixels(); + _source2 = args.BackTexturePixels(); + _sourceheight[0] = args.FrontTextureHeight(); + _sourceheight[1] = args.BackTextureHeight(); + _iscale = args.TextureVStep(); + _texturefrac = args.TextureVPos(); + solid_top = args.SolidTopColor(); + solid_bottom = args.SolidBottomColor(); + fadeSky = args.FadeSky(); } void DrawSingleSky1PalCommand::Execute(DrawerThread *thread) diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index 50dd0ef828..ca99f18496 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -44,7 +44,7 @@ namespace swrenderer class PalSkyCommand : public DrawerCommand { public: - PalSkyCommand(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); + PalSkyCommand(const SkyDrawerArgs &args); FString DebugInfo() override { return "PalSkyCommand"; } protected: @@ -255,8 +255,8 @@ namespace swrenderer void DrawWallAddClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawWallSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSingleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, fadeSky); } - void DrawDoubleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, fadeSky); } + void DrawSingleSkyColumn(const SkyDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void FillColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void FillAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index ab932cdcac..897990cdb5 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -302,21 +302,21 @@ namespace swrenderer return d; } - DrawSkyLLVMCommand::DrawSkyLLVMCommand(const SkyDrawerArgs &drawerargs, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) + DrawSkyLLVMCommand::DrawSkyLLVMCommand(const SkyDrawerArgs &drawerargs) { args.dest = (uint32_t*)drawerargs.Dest(); args.dest_y = drawerargs.DestY(); - args.count = drawerargs.dc_count; + args.count = drawerargs.Count(); args.pitch = dc_pitch; - args.texturefrac[0] = drawerargs.dc_wall_texturefrac; - args.iscale[0] = drawerargs.dc_wall_iscale; - args.source0[0] = (const uint32_t *)drawerargs.dc_wall_source; - args.source1[0] = (const uint32_t *)drawerargs.dc_wall_source2; - args.textureheight0 = drawerargs.dc_wall_sourceheight[0]; - args.textureheight1 = drawerargs.dc_wall_sourceheight[1]; - args.top_color = solid_top; - args.bottom_color = solid_bottom; - args.flags = fadeSky ? DrawSkyArgs::fade_sky : 0; + args.texturefrac[0] = drawerargs.TextureVPos(); + args.iscale[0] = drawerargs.TextureVStep(); + args.source0[0] = (const uint32_t *)drawerargs.FrontTexturePixels(); + args.source1[0] = (const uint32_t *)drawerargs.BackTexturePixels(); + args.textureheight0 = drawerargs.FrontTextureHeight(); + args.textureheight1 = drawerargs.BackTextureHeight(); + args.top_color = drawerargs.SolidTopColor(); + args.bottom_color = drawerargs.SolidBottomColor(); + args.flags = drawerargs.FadeSky() ? DrawSkyArgs::fade_sky : 0; } FString DrawSkyLLVMCommand::DebugInfo() diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 671de80fed..37ae0e25db 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -163,7 +163,7 @@ namespace swrenderer WorkerThreadData ThreadData(DrawerThread *thread); public: - DrawSkyLLVMCommand(const SkyDrawerArgs &drawerargs, uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); + DrawSkyLLVMCommand(const SkyDrawerArgs &drawerargs); FString DebugInfo() override; }; @@ -360,8 +360,8 @@ namespace swrenderer void DrawWallAddClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawWallSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSingleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, skyFade); } - void DrawDoubleSkyColumn(const SkyDrawerArgs &args, uint32_t solid_top, uint32_t solid_bottom, bool skyFade) override { DrawerCommandQueue::QueueCommand(args, solid_top, solid_bottom, skyFade); } + void DrawSingleSkyColumn(const SkyDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void DrawColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void FillColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } void FillAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index d2dff94826..ab469d2f36 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -708,14 +708,14 @@ namespace swrenderer Drawers()->DrawColoredSpan(*this, y, x1, x2); } - void SkyDrawerArgs::DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) + void SkyDrawerArgs::DrawSingleSkyColumn() { - Drawers()->DrawSingleSkyColumn(*this, solid_top, solid_bottom, fadeSky); + Drawers()->DrawSingleSkyColumn(*this); } - void SkyDrawerArgs::DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky) + void SkyDrawerArgs::DrawDoubleSkyColumn() { - Drawers()->DrawDoubleSkyColumn(*this, solid_top, solid_bottom, fadeSky); + Drawers()->DrawDoubleSkyColumn(*this); } void SkyDrawerArgs::SetDest(int x, int y) @@ -725,6 +725,39 @@ namespace swrenderer dc_dest_y = y; } + void SkyDrawerArgs::SetFrontTexture(FTexture *texture, uint32_t column) + { + if (r_swtruecolor) + { + dc_source = (const uint8_t *)texture->GetColumnBgra(column, nullptr); + dc_sourceheight = texture->GetHeight(); + } + else + { + dc_source = texture->GetColumn(column, nullptr); + dc_sourceheight = texture->GetHeight(); + } + } + + void SkyDrawerArgs::SetBackTexture(FTexture *texture, uint32_t column) + { + if (texture == nullptr) + { + dc_source2 = nullptr; + dc_sourceheight2 = 1; + } + else if (r_swtruecolor) + { + dc_source2 = (const uint8_t *)texture->GetColumnBgra(column, nullptr); + dc_sourceheight2 = texture->GetHeight(); + } + else + { + dc_source2 = texture->GetColumn(column, nullptr); + dc_sourceheight2 = texture->GetHeight(); + } + } + void SpriteDrawerArgs::FillColumn() { Drawers()->FillColumn(*this); diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index 58f44931b4..7de839d28d 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -46,24 +46,48 @@ namespace swrenderer class SkyDrawerArgs : public DrawerArgs { public: - const uint8_t *dc_wall_source; - const uint8_t *dc_wall_source2; - uint32_t dc_wall_sourceheight[2]; - uint32_t dc_wall_texturefrac; - uint32_t dc_wall_iscale; - int dc_count; - void SetDest(int x, int y); + void SetCount(int count) { dc_count = count; } + void SetFrontTexture(FTexture *texture, uint32_t column); + void SetBackTexture(FTexture *texture, uint32_t column); + void SetTextureVPos(uint32_t texturefrac) { dc_texturefrac = texturefrac; } + void SetTextureVStep(uint32_t iscale) { dc_iscale = iscale; } + void SetSolidTop(uint32_t color) { solid_top = color; } + void SetSolidBottom(uint32_t color) { solid_bottom = color; } + void SetFadeSky(bool enable) { fadeSky = enable; } uint8_t *Dest() const { return dc_dest; } int DestY() const { return dc_dest_y; } + int Count() const { return dc_count; } - void DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); - void DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom, bool fadeSky); + uint32_t TextureVPos() const { return dc_texturefrac; } + uint32_t TextureVStep() const { return dc_iscale; } + + uint32_t SolidTopColor() const { return solid_top; } + uint32_t SolidBottomColor() const { return solid_bottom; } + bool FadeSky() const { return fadeSky; } + + const uint8_t *FrontTexturePixels() const { return dc_source; } + const uint8_t *BackTexturePixels() const { return dc_source2; } + int FrontTextureHeight() const { return dc_sourceheight; } + int BackTextureHeight() const { return dc_sourceheight; } + + void DrawSingleSkyColumn(); + void DrawDoubleSkyColumn(); private: uint8_t *dc_dest = nullptr; int dc_dest_y = 0; + int dc_count = 0; + const uint8_t *dc_source; + const uint8_t *dc_source2; + uint32_t dc_sourceheight; + uint32_t dc_sourceheight2; + uint32_t dc_texturefrac; + uint32_t dc_iscale; + uint32_t solid_top; + uint32_t solid_bottom; + bool fadeSky; }; class SpanDrawerArgs : public DrawerArgs diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index a151e4e86c..e0e12c091e 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -197,35 +197,20 @@ namespace swrenderer angle1 = (uint32_t)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); angle2 = (uint32_t)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); - if (r_swtruecolor) - { - drawerargs.dc_wall_source = (const uint8_t *)frontskytex->GetColumnBgra(angle1, nullptr); - drawerargs.dc_wall_source2 = backskytex ? (const uint8_t *)backskytex->GetColumnBgra(angle2, nullptr) : nullptr; - } - else - { - drawerargs.dc_wall_source = (const uint8_t *)frontskytex->GetColumn(angle1, nullptr); - drawerargs.dc_wall_source2 = backskytex ? (const uint8_t *)backskytex->GetColumn(angle2, nullptr) : nullptr; - } - - drawerargs.dc_wall_iscale = uv_step; - drawerargs.dc_wall_texturefrac = uv_pos; - - drawerargs.dc_wall_sourceheight[0] = height; - drawerargs.dc_wall_sourceheight[1] = backskytex ? backskytex->GetHeight() : height; - int pixelsize = r_swtruecolor ? 4 : 1; + drawerargs.SetFrontTexture(frontskytex, angle1); + drawerargs.SetBackTexture(backskytex, angle2); + drawerargs.SetTextureVStep(uv_step); + drawerargs.SetTextureVPos(uv_pos); drawerargs.SetDest(start_x, y1); - drawerargs.dc_count = y2 - y1; - - uint32_t solid_top = frontskytex->GetSkyCapColor(false); - uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); - - bool fadeSky = (r_skymode == 2 && !(level.flags & LEVEL_FORCETILEDSKY)); + drawerargs.SetCount(y2 - y1); + drawerargs.SetFadeSky(r_skymode == 2 && !(level.flags & LEVEL_FORCETILEDSKY)); + drawerargs.SetSolidTop(frontskytex->GetSkyCapColor(false)); + drawerargs.SetSolidBottom(frontskytex->GetSkyCapColor(true)); if (!backskytex) - drawerargs.DrawSingleSkyColumn(solid_top, solid_bottom, fadeSky); + drawerargs.DrawSingleSkyColumn(); else - drawerargs.DrawDoubleSkyColumn(solid_top, solid_bottom, fadeSky); + drawerargs.DrawDoubleSkyColumn(); } void RenderSkyPlane::DrawSkyColumn(int start_x, int y1, int y2) From eadeccd70951de61392a6da1e6700a06b80f31e7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 Jan 2017 20:23:50 +0100 Subject: [PATCH 788/912] Fix typo --- src/swrenderer/drawers/r_drawerargs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index 7de839d28d..ca6d202ea2 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -122,7 +122,7 @@ namespace swrenderer dsfixed_t TextureUPos() const { return ds_xfrac; } dsfixed_t TextureVPos() const { return ds_yfrac; } dsfixed_t TextureUStep() const { return ds_xstep; } - dsfixed_t TextureVStep() const { return ds_xstep; } + dsfixed_t TextureVStep() const { return ds_ystep; } int SolidColor() const { return ds_color; } int TextureWidthBits() const { return ds_xbits; } int TextureHeightBits() const { return ds_ybits; } From 4fdacfe96e54c4a955c3f7bde1d30ceb24b4d052 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 31 Jan 2017 13:26:06 +0100 Subject: [PATCH 789/912] Fix crash in palette mode --- src/swrenderer/drawers/r_drawerargs.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index ab469d2f36..b789829524 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -73,7 +73,6 @@ namespace swrenderer void DrawerArgs::SetTranslationMap(lighttable_t *translation) { - mBaseColormap = nullptr; mTranslation = translation; } From e78e76a593760b34dbba83822a94b1e367277d3e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Feb 2017 16:02:21 +0100 Subject: [PATCH 790/912] Move r_viewport variables into a class --- src/polyrenderer/drawers/poly_draw_args.cpp | 5 +- src/polyrenderer/math/tri_matrix.cpp | 5 +- src/polyrenderer/poly_renderer.cpp | 47 +++++++++++-------- src/polyrenderer/scene/poly_decal.cpp | 2 +- src/polyrenderer/scene/poly_particle.cpp | 2 +- src/polyrenderer/scene/poly_playersprite.cpp | 14 +++--- src/polyrenderer/scene/poly_sky.cpp | 2 +- src/polyrenderer/scene/poly_sprite.cpp | 2 +- src/swrenderer/drawers/r_draw.h | 2 - src/swrenderer/drawers/r_draw_pal.cpp | 18 ++++---- src/swrenderer/drawers/r_draw_rgba.cpp | 26 +++++------ src/swrenderer/drawers/r_drawerargs.cpp | 38 ++++++++++------ src/swrenderer/line/r_line.cpp | 5 +- src/swrenderer/line/r_renderdrawsegment.cpp | 9 ++-- src/swrenderer/line/r_walldraw.cpp | 22 +++++---- src/swrenderer/line/r_wallsetup.cpp | 22 +++++---- src/swrenderer/plane/r_flatplane.cpp | 36 ++++++++------- src/swrenderer/plane/r_skyplane.cpp | 10 ++-- src/swrenderer/plane/r_slopeplane.cpp | 14 +++--- src/swrenderer/r_swrenderer.cpp | 10 ++-- src/swrenderer/scene/r_light.cpp | 14 +++--- src/swrenderer/scene/r_light.h | 2 +- src/swrenderer/scene/r_opaque_pass.cpp | 9 ++-- src/swrenderer/scene/r_portal.cpp | 30 ++++++------ src/swrenderer/scene/r_scene.cpp | 48 +++++++++++--------- src/swrenderer/scene/r_viewport.cpp | 26 ----------- src/swrenderer/scene/r_viewport.h | 46 +++++++++++-------- src/swrenderer/things/r_decal.cpp | 6 ++- src/swrenderer/things/r_particle.cpp | 16 ++++--- src/swrenderer/things/r_playersprite.cpp | 32 +++++++------ src/swrenderer/things/r_sprite.cpp | 14 ++++-- src/swrenderer/things/r_visiblesprite.cpp | 14 +++--- src/swrenderer/things/r_voxel.cpp | 15 +++--- src/swrenderer/things/r_wallsprite.cpp | 6 ++- src/v_draw.cpp | 44 ++++++++++-------- 35 files changed, 336 insertions(+), 277 deletions(-) diff --git a/src/polyrenderer/drawers/poly_draw_args.cpp b/src/polyrenderer/drawers/poly_draw_args.cpp index 9e9686e92c..81ca52a1bc 100644 --- a/src/polyrenderer/drawers/poly_draw_args.cpp +++ b/src/polyrenderer/drawers/poly_draw_args.cpp @@ -48,7 +48,8 @@ void PolyDrawArgs::SetTexture(FTexture *texture) { textureWidth = texture->GetWidth(); textureHeight = texture->GetHeight(); - if (swrenderer::r_swtruecolor) + auto viewport = swrenderer::RenderViewport::Instance(); + if (viewport->r_swtruecolor) texturePixels = (const uint8_t *)texture->GetPixelsBgra(); else texturePixels = texture->GetPixels(); @@ -62,7 +63,7 @@ void PolyDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, bool fo FRemapTable *table = TranslationToTable(translationID); if (table != nullptr && !table->Inactive) { - if (swrenderer::r_swtruecolor) + if (swrenderer::RenderViewport::Instance()->r_swtruecolor) translation = (uint8_t*)table->Palette; else translation = table->Remap; diff --git a/src/polyrenderer/math/tri_matrix.cpp b/src/polyrenderer/math/tri_matrix.cpp index 1f26ef2ef3..cd6109539b 100644 --- a/src/polyrenderer/math/tri_matrix.cpp +++ b/src/polyrenderer/math/tri_matrix.cpp @@ -146,11 +146,12 @@ TriMatrix TriMatrix::worldToView() TriMatrix TriMatrix::viewToClip() { + auto viewport = swrenderer::RenderViewport::Instance(); float near = 5.0f; float far = 65536.0f; float width = (float)(FocalTangent * near); - float top = (float)(swrenderer::CenterY / swrenderer::InvZtoScale * near); - float bottom = (float)(top - viewheight / swrenderer::InvZtoScale * near); + float top = (float)(viewport->CenterY / viewport->InvZtoScale * near); + float bottom = (float)(top - viewheight / viewport->InvZtoScale * near); return frustum(-width, width, bottom, top, near, far); } diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index be1fe0074c..9d5475dccc 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -53,47 +53,51 @@ PolyRenderer *PolyRenderer::Instance() void PolyRenderer::RenderView(player_t *player) { using namespace swrenderer; + + auto viewport = RenderViewport::Instance(); - swrenderer::RenderTarget = screen; + viewport->RenderTarget = screen; - bool saved_swtruecolor = r_swtruecolor; - r_swtruecolor = screen->IsBgra(); + bool saved_swtruecolor = viewport->r_swtruecolor; + viewport->r_swtruecolor = screen->IsBgra(); int width = SCREENWIDTH; int height = SCREENHEIGHT; int stHeight = gST_Y; float trueratio; ActiveRatio(width, height, &trueratio); - RenderViewport::Instance()->SetViewport(width, height, trueratio); + viewport->SetViewport(width, height, trueratio); RenderActorView(player->mo, false); // Apply special colormap if the target cannot do it CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + if (cameraLight->realfixedcolormap && viewport->r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) { R_BeginDrawerCommands(); DrawerCommandQueue::QueueCommand(cameraLight->realfixedcolormap, screen); R_EndDrawerCommands(); } - r_swtruecolor = saved_swtruecolor; + viewport->r_swtruecolor = saved_swtruecolor; } void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) { + auto viewport = swrenderer::RenderViewport::Instance(); + const bool savedviewactive = viewactive; - const bool savedoutputformat = swrenderer::r_swtruecolor; + const bool savedoutputformat = viewport->r_swtruecolor; viewwidth = width; - swrenderer::RenderTarget = canvas; - swrenderer::bRenderingToCanvas = true; + viewport->RenderTarget = canvas; + viewport->bRenderingToCanvas = true; R_SetWindow(12, width, height, height, true); - swrenderer::RenderViewport::Instance()->SetViewport(width, height, WidescreenRatio); + viewport->SetViewport(width, height, WidescreenRatio); viewwindowx = x; viewwindowy = y; viewactive = true; - swrenderer::r_swtruecolor = canvas->IsBgra(); + viewport->r_swtruecolor = canvas->IsBgra(); canvas->Lock(true); @@ -101,14 +105,14 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int canvas->Unlock(); - swrenderer::RenderTarget = screen; - swrenderer::bRenderingToCanvas = false; + viewport->RenderTarget = screen; + viewport->bRenderingToCanvas = false; R_ExecuteSetViewSize(); float trueratio; ActiveRatio(width, height, &trueratio); - swrenderer::RenderViewport::Instance()->SetViewport(width, height, WidescreenRatio); + viewport->SetViewport(width, height, WidescreenRatio); viewactive = savedviewactive; - swrenderer::r_swtruecolor = savedoutputformat; + viewport->r_swtruecolor = savedoutputformat; } void PolyRenderer::RenderActorView(AActor *actor, bool dontmaplines) @@ -157,8 +161,9 @@ void PolyRenderer::RenderRemainingPlayerSprites() void PolyRenderer::ClearBuffers() { PolyVertexBuffer::Clear(); - PolyStencilBuffer::Instance()->Clear(swrenderer::RenderTarget->GetWidth(), swrenderer::RenderTarget->GetHeight(), 0); - PolySubsectorGBuffer::Instance()->Resize(swrenderer::RenderTarget->GetPitch(), swrenderer::RenderTarget->GetHeight()); + auto viewport = swrenderer::RenderViewport::Instance(); + PolyStencilBuffer::Instance()->Clear(viewport->RenderTarget->GetWidth(), viewport->RenderTarget->GetHeight(), 0); + PolySubsectorGBuffer::Instance()->Resize(viewport->RenderTarget->GetPitch(), viewport->RenderTarget->GetHeight()); NextStencilValue = 0; SeenLinePortals.clear(); SeenMirrors.clear(); @@ -167,8 +172,10 @@ void PolyRenderer::ClearBuffers() void PolyRenderer::SetSceneViewport() { using namespace swrenderer; + + auto viewport = RenderViewport::Instance(); - if (RenderTarget == screen) // Rendering to screen + if (viewport->RenderTarget == screen) // Rendering to screen { int height; if (screenblocks >= 10) @@ -177,11 +184,11 @@ void PolyRenderer::SetSceneViewport() height = (screenblocks*SCREENHEIGHT / 10) & ~7; int bottom = SCREENHEIGHT - (height + viewwindowy - ((height - viewheight) / 2)); - PolyTriangleDrawer::set_viewport(viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, RenderTarget); + PolyTriangleDrawer::set_viewport(viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, viewport->RenderTarget); } else // Rendering to camera texture { - PolyTriangleDrawer::set_viewport(0, 0, RenderTarget->GetWidth(), RenderTarget->GetHeight(), RenderTarget); + PolyTriangleDrawer::set_viewport(0, 0, viewport->RenderTarget->GetWidth(), viewport->RenderTarget->GetHeight(), viewport->RenderTarget); } } diff --git a/src/polyrenderer/scene/poly_decal.cpp b/src/polyrenderer/scene/poly_decal.cpp index 78e43df9eb..c658a76fa5 100644 --- a/src/polyrenderer/scene/poly_decal.cpp +++ b/src/polyrenderer/scene/poly_decal.cpp @@ -151,7 +151,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan } args.uniforms.subsectorDepth = subsectorDepth; - if (swrenderer::r_swtruecolor) + if (swrenderer::RenderViewport::Instance()->r_swtruecolor) { args.uniforms.color = 0xff000000 | decal->AlphaColor; } diff --git a/src/polyrenderer/scene/poly_particle.cpp b/src/polyrenderer/scene/poly_particle.cpp index bb21275f98..e9a80dc436 100644 --- a/src/polyrenderer/scene/poly_particle.cpp +++ b/src/polyrenderer/scene/poly_particle.cpp @@ -90,7 +90,7 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipP uint32_t alpha = (uint32_t)clamp(particle->alpha * 255.0f + 0.5f, 0.0f, 255.0f); - if (swrenderer::r_swtruecolor) + if (swrenderer::RenderViewport::Instance()->r_swtruecolor) { args.uniforms.color = (alpha << 24) | (particle->color & 0xffffff); } diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index dd8fc48509..0dee44f73b 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -140,23 +140,25 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa sx += wx; sy += wy; } + + auto viewport = swrenderer::RenderViewport::Instance(); double pspritexscale = centerxwide / 160.0; - double pspriteyscale = pspritexscale * swrenderer::YaspectMul; + double pspriteyscale = pspritexscale * viewport->YaspectMul; double pspritexiscale = 1 / pspritexscale; // calculate edges of the shape double tx = sx - BaseXCenter; tx -= tex->GetScaledLeftOffset(); - int x1 = xs_RoundToInt(swrenderer::CenterX + tx * pspritexscale); + int x1 = xs_RoundToInt(viewport->CenterX + tx * pspritexscale); // off the right side if (x1 > viewwidth) return; tx += tex->GetScaledWidth(); - int x2 = xs_RoundToInt(swrenderer::CenterX + tx * pspritexscale); + int x2 = xs_RoundToInt(viewport->CenterX + tx * pspritexscale); // off the left side if (x2 <= 0) @@ -165,12 +167,12 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa double texturemid = (BaseYCenter - sy) * tex->Scale.Y + tex->TopOffset; // Adjust PSprite for fullscreen views - if (camera->player && (swrenderer::RenderTarget != screen || viewheight == swrenderer::RenderTarget->GetHeight() || (swrenderer::RenderTarget->GetWidth() > (BaseXCenter * 2) && !st_scale))) + if (camera->player && (viewport->RenderTarget != screen || viewheight == viewport->RenderTarget->GetHeight() || (viewport->RenderTarget->GetWidth() > (BaseXCenter * 2) && !st_scale))) { AWeapon *weapon = dyn_cast(sprite->GetCaller()); if (weapon != nullptr && weapon->YAdjust != 0) { - if (swrenderer::RenderTarget != screen || viewheight == swrenderer::RenderTarget->GetHeight()) + if (viewport->RenderTarget != screen || viewheight == viewport->RenderTarget->GetHeight()) { texturemid -= weapon->YAdjust; } @@ -343,7 +345,7 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa // Check for hardware-assisted 2D. If it's available, and this sprite is not // fuzzy, don't draw it until after the switch to 2D mode. - if (!noaccel && swrenderer::RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) + if (!noaccel && swrenderer::RenderViewport::Instance()->RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) { FRenderStyle style = RenderStyle; style.CheckFuzz(); diff --git a/src/polyrenderer/scene/poly_sky.cpp b/src/polyrenderer/scene/poly_sky.cpp index c4d0d7320a..d4ebc57967 100644 --- a/src/polyrenderer/scene/poly_sky.cpp +++ b/src/polyrenderer/scene/poly_sky.cpp @@ -94,7 +94,7 @@ void PolySkyDome::RenderRow(PolyDrawArgs &args, int row, uint32_t capcolor) void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap) { uint32_t solid = skytex->GetSkyCapColor(bottomCap); - if (!swrenderer::r_swtruecolor) + if (!swrenderer::RenderViewport::Instance()->r_swtruecolor) solid = RGB32k.RGB[(RPART(solid) >> 3)][(GPART(solid) >> 3)][(BPART(solid) >> 3)]; args.vinput = &mVertices[mPrimStart[row]]; diff --git a/src/polyrenderer/scene/poly_sprite.cpp b/src/polyrenderer/scene/poly_sprite.cpp index 0b9b7a9860..43ec99d90b 100644 --- a/src/polyrenderer/scene/poly_sprite.cpp +++ b/src/polyrenderer/scene/poly_sprite.cpp @@ -250,7 +250,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla args.SetTexture(tex, thing->Translation, true); } - if (!swrenderer::r_swtruecolor) + if (!swrenderer::RenderViewport::Instance()->r_swtruecolor) { uint32_t r = (args.uniforms.color >> 16) & 0xff; uint32_t g = (args.uniforms.color >> 8) & 0xff; diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 625e67ead8..d6ec7a149a 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -44,8 +44,6 @@ namespace swrenderer #define PARTICLE_TEXTURE_SIZE 64 extern uint32_t particle_texture[PARTICLE_TEXTURE_SIZE * PARTICLE_TEXTURE_SIZE]; - extern bool r_swtruecolor; - class SWPixelFormatDrawers { public: diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 9051126ea5..e7906d3192 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -106,7 +106,7 @@ namespace swrenderer _dest = args.Dest(); _dest_y = args.DestY(); _fracbits = args.dc_wall_fracbits; - _pitch = dc_pitch; + _pitch = RenderViewport::Instance()->dc_pitch; _srcblend = args.dc_srcblend; _destblend = args.dc_destblend; _dynlights = args.dc_lights; @@ -564,7 +564,7 @@ namespace swrenderer _dest = args.Dest(); _dest_y = args.DestY(); _count = args.Count(); - _pitch = dc_pitch; + _pitch = RenderViewport::Instance()->dc_pitch; _source = args.FrontTexturePixels(); _source2 = args.BackTexturePixels(); _sourceheight[0] = args.FrontTextureHeight(); @@ -870,7 +870,7 @@ namespace swrenderer _count = args.dc_count; _dest = args.Dest(); _dest_y = args.DestY(); - _pitch = dc_pitch; + _pitch = RenderViewport::Instance()->dc_pitch; _iscale = args.dc_iscale; _texturefrac = args.dc_texturefrac; _colormap = args.Colormap(); @@ -1767,8 +1767,8 @@ namespace swrenderer _yl = args.dc_yl; _yh = args.dc_yh; _x = args.dc_x; - _destorg = dc_destorg; - _pitch = dc_pitch; + _destorg = RenderViewport::Instance()->dc_destorg; + _pitch = RenderViewport::Instance()->dc_pitch; _fuzzpos = fuzzpos; _fuzzviewheight = fuzzviewheight; } @@ -1858,7 +1858,7 @@ namespace swrenderer _y = args.DestY(); _x1 = args.DestX1(); _x2 = args.DestX2(); - _destorg = dc_destorg; + _destorg = RenderViewport::Instance()->dc_destorg; _xstep = args.TextureUStep(); _ystep = args.TextureVStep(); _xbits = args.TextureWidthBits(); @@ -2628,7 +2628,7 @@ namespace swrenderer : y(y), x1(x1), x2(x2), plane_sz(plane_sz), plane_su(plane_su), plane_sv(plane_sv), plane_shade(plane_shade), planeshade(planeshade), planelightfloat(planelightfloat), pviewx(pviewx), pviewy(pviewy) { _colormap = args.Colormap(); - _destorg = dc_destorg; + _destorg = RenderViewport::Instance()->dc_destorg; _ybits = args.TextureHeightBits(); _xbits = args.TextureWidthBits(); _source = args.TexturePixels(); @@ -2873,7 +2873,7 @@ namespace swrenderer DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(const SpanDrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) { color = args.SolidColor(); - destorg = dc_destorg; + destorg = RenderViewport::Instance()->dc_destorg; } void DrawColoredSpanPalCommand::Execute(DrawerThread *thread) @@ -2889,7 +2889,7 @@ namespace swrenderer DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(const SpanDrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) { _colormap = args.Colormap(); - _destorg = dc_destorg; + _destorg = RenderViewport::Instance()->dc_destorg; } void DrawFogBoundaryLinePalCommand::Execute(DrawerThread *thread) diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 897990cdb5..59521bb065 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -72,8 +72,8 @@ namespace swrenderer args.y = drawerargs.DestY(); args.xbits = drawerargs.TextureWidthBits(); args.ybits = drawerargs.TextureHeightBits(); - args.destorg = (uint32_t*)dc_destorg; - args.destpitch = dc_pitch; + args.destorg = (uint32_t*)RenderViewport::Instance()->dc_destorg; + args.destpitch = RenderViewport::Instance()->dc_pitch; args.source = (const uint32_t*)drawerargs.TexturePixels(); args.light = LightBgra::calc_light_multiplier(drawerargs.Light()); args.light_red = shade_constants.light_red; @@ -186,7 +186,7 @@ namespace swrenderer auto shade_constants = drawerargs.ColormapConstants(); args.dest = (uint32_t*)drawerargs.Dest(); args.dest_y = drawerargs.DestY(); - args.pitch = dc_pitch; + args.pitch = RenderViewport::Instance()->dc_pitch; args.count = drawerargs.dc_count; args.texturefrac[0] = drawerargs.dc_texturefrac; args.texturefracx[0] = drawerargs.dc_texturefracx; @@ -256,7 +256,7 @@ namespace swrenderer args.colormap = drawerargs.Colormap(); args.translation = drawerargs.TranslationMap(); args.basecolors = (const uint32_t *)GPalette.BaseColors; - args.pitch = dc_pitch; + args.pitch = RenderViewport::Instance()->dc_pitch; args.count = drawerargs.dc_count; args.dest_y = drawerargs.DestY(); args.iscale = drawerargs.dc_iscale; @@ -307,7 +307,7 @@ namespace swrenderer args.dest = (uint32_t*)drawerargs.Dest(); args.dest_y = drawerargs.DestY(); args.count = drawerargs.Count(); - args.pitch = dc_pitch; + args.pitch = RenderViewport::Instance()->dc_pitch; args.texturefrac[0] = drawerargs.TextureVPos(); args.iscale[0] = drawerargs.TextureVStep(); args.source0[0] = (const uint32_t *)drawerargs.FrontTexturePixels(); @@ -331,8 +331,8 @@ namespace swrenderer _x = drawerargs.dc_x; _yl = drawerargs.dc_yl; _yh = drawerargs.dc_yh; - _destorg = dc_destorg; - _pitch = dc_pitch; + _destorg = RenderViewport::Instance()->dc_destorg; + _pitch = RenderViewport::Instance()->dc_pitch; _fuzzpos = fuzzpos; _fuzzviewheight = fuzzviewheight; } @@ -439,7 +439,7 @@ namespace swrenderer _x1 = drawerargs.DestX1(); _x2 = drawerargs.DestX2(); _y = drawerargs.DestY(); - _destorg = dc_destorg; + _destorg = RenderViewport::Instance()->dc_destorg; _light = drawerargs.Light(); _color = drawerargs.SolidColor(); } @@ -470,7 +470,7 @@ namespace swrenderer _x = x; _x2 = x2; - _destorg = dc_destorg; + _destorg = RenderViewport::Instance()->dc_destorg; _light = drawerargs.Light(); _shade_constants = drawerargs.ColormapConstants(); } @@ -537,7 +537,7 @@ namespace swrenderer _x1 = x1; _x2 = x2; _y = y; - _destorg = dc_destorg; + _destorg = RenderViewport::Instance()->dc_destorg; _light = drawerargs.Light(); _shade_constants = drawerargs.ColormapConstants(); _plane_sz = plane_sz; @@ -672,7 +672,7 @@ namespace swrenderer _x1 = x1; _x2 = x2; - _destorg = dc_destorg; + _destorg = RenderViewport::Instance()->dc_destorg; _light = drawerargs.Light(); _color = drawerargs.SolidColor(); } @@ -709,8 +709,8 @@ namespace swrenderer _color = color; _a = a; - _destorg = dc_destorg; - _pitch = dc_pitch; + _destorg = RenderViewport::Instance()->dc_destorg; + _pitch = RenderViewport::Instance()->dc_pitch; } void FillTransColumnRGBACommand::Execute(DrawerThread *thread) diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index b789829524..8da9e3d848 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -41,7 +41,7 @@ namespace swrenderer { SWPixelFormatDrawers *DrawerArgs::Drawers() { - if (r_swtruecolor) + if (RenderViewport::Instance()->r_swtruecolor) { static SWTruecolorDrawers tc_drawers; return &tc_drawers; @@ -80,7 +80,7 @@ namespace swrenderer { if (mBaseColormap) { - if (r_swtruecolor) + if (RenderViewport::Instance()->r_swtruecolor) return mBaseColormap->Maps; else return mBaseColormap->Maps + (GETPALOOKUP(mLight, mShade) << COLORMAPSHIFT); @@ -137,14 +137,17 @@ namespace swrenderer ds_ybits--; } - ds_source = r_swtruecolor ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); + auto viewport = RenderViewport::Instance(); + ds_source = viewport->r_swtruecolor ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; } void SpriteDrawerArgs::DrawMaskedColumn(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) { + auto viewport = RenderViewport::Instance(); + // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. - if (r_swtruecolor && !drawer_needs_pal_input) // To do: add support to R_DrawColumnHoriz_rgba + if (viewport->r_swtruecolor && !drawer_needs_pal_input) // To do: add support to R_DrawColumnHoriz_rgba { DrawMaskedColumnBgra(x, iscale, tex, col, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, unmasked); return; @@ -156,7 +159,7 @@ namespace swrenderer const FTexture::Span *span; const BYTE *column; - if (r_swtruecolor && !drawer_needs_pal_input) + if (viewport->r_swtruecolor && !drawer_needs_pal_input) column = (const BYTE *)tex->GetColumnBgra(col >> FRACBITS, &span); else column = tex->GetColumn(col >> FRACBITS, &span); @@ -171,7 +174,7 @@ namespace swrenderer unmaskedSpan[1].Length = 0; } - int pixelsize = r_swtruecolor ? 4 : 1; + int pixelsize = viewport->r_swtruecolor ? 4 : 1; while (span->Length != 0) { @@ -500,6 +503,8 @@ namespace swrenderer { alpha = clamp(alpha, 0, OPAQUE); } + + auto viewport = RenderViewport::Instance(); if (translation != -1) { @@ -509,7 +514,7 @@ namespace swrenderer FRemapTable *table = TranslationToTable(translation); if (table != NULL && !table->Inactive) { - if (r_swtruecolor) + if (viewport->r_swtruecolor) SetTranslationMap((uint8_t*)table->Palette); else SetTranslationMap(table->Remap); @@ -585,8 +590,9 @@ namespace swrenderer void WallDrawerArgs::SetDest(int x, int y) { - int pixelsize = r_swtruecolor ? 4 : 1; - dc_dest = dc_destorg + (ylookup[y] + x) * pixelsize; + auto viewport = RenderViewport::Instance(); + int pixelsize = viewport->r_swtruecolor ? 4 : 1; + dc_dest = viewport->dc_destorg + (ylookup[y] + x) * pixelsize; dc_dest_y = y; } @@ -719,14 +725,15 @@ namespace swrenderer void SkyDrawerArgs::SetDest(int x, int y) { - int pixelsize = r_swtruecolor ? 4 : 1; - dc_dest = dc_destorg + (ylookup[y] + x) * pixelsize; + auto viewport = RenderViewport::Instance(); + int pixelsize = viewport->r_swtruecolor ? 4 : 1; + dc_dest = viewport->dc_destorg + (ylookup[y] + x) * pixelsize; dc_dest_y = y; } void SkyDrawerArgs::SetFrontTexture(FTexture *texture, uint32_t column) { - if (r_swtruecolor) + if (RenderViewport::Instance()->r_swtruecolor) { dc_source = (const uint8_t *)texture->GetColumnBgra(column, nullptr); dc_sourceheight = texture->GetHeight(); @@ -745,7 +752,7 @@ namespace swrenderer dc_source2 = nullptr; dc_sourceheight2 = 1; } - else if (r_swtruecolor) + else if (RenderViewport::Instance()->r_swtruecolor) { dc_source2 = (const uint8_t *)texture->GetColumnBgra(column, nullptr); dc_sourceheight2 = texture->GetHeight(); @@ -764,8 +771,9 @@ namespace swrenderer void SpriteDrawerArgs::SetDest(int x, int y) { - int pixelsize = r_swtruecolor ? 4 : 1; - dc_dest = dc_destorg + (ylookup[y] + x) * pixelsize; + auto viewport = RenderViewport::Instance(); + int pixelsize = viewport->r_swtruecolor ? 4 : 1; + dc_dest = viewport->dc_destorg + (ylookup[y] + x) * pixelsize; dc_dest_y = y; } } diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 7c1a6b78a9..dea8f62e73 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -1148,6 +1148,7 @@ namespace swrenderer tright.Y = float(pt2.X * ViewTanCos + pt2.Y * ViewTanSin); RenderPortal *renderportal = RenderPortal::Instance(); + auto viewport = RenderViewport::Instance(); if (renderportal->MirrorFlags & RF_XFLIP) { @@ -1161,7 +1162,7 @@ namespace swrenderer { if (tleft.X > tleft.Y) return true; // left edge is off the right side if (tleft.Y == 0) return true; - sx1 = xs_RoundToInt(CenterX + tleft.X * CenterX / tleft.Y); + sx1 = xs_RoundToInt(viewport->CenterX + tleft.X * viewport->CenterX / tleft.Y); sz1 = tleft.Y; } else @@ -1180,7 +1181,7 @@ namespace swrenderer { if (tright.X < -tright.Y) return true; // right edge is off the left side if (tright.Y == 0) return true; - sx2 = xs_RoundToInt(CenterX + tright.X * CenterX / tright.Y); + sx2 = xs_RoundToInt(viewport->CenterX + tright.X * viewport->CenterX / tright.Y); sz2 = tright.Y; } else diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index bdd587aa8c..42f46da5ea 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -48,6 +48,7 @@ namespace swrenderer { void RenderDrawSegment::Render(DrawSegment *ds, int x1, int x2) { + auto viewport = RenderViewport::Instance(); RenderFogBoundary renderfog; float *MaskedSWall = nullptr, MaskedScaleY = 0, rw_scalestep = 0; fixed_t *maskedtexturecol = nullptr; @@ -209,12 +210,12 @@ namespace swrenderer } // [RH] Don't bother drawing segs that are completely offscreen - if (globaldclip * ds->sz1 < -textop && globaldclip * ds->sz2 < -textop) + if (viewport->globaldclip * ds->sz1 < -textop && viewport->globaldclip * ds->sz2 < -textop) { // Texture top is below the bottom of the screen goto clearfog; } - if (globaluclip * ds->sz1 > texheight - textop && globaluclip * ds->sz2 > texheight - textop) + if (viewport->globaluclip * ds->sz1 > texheight - textop && viewport->globaluclip * ds->sz2 > texheight - textop) { // Texture bottom is above the top of the screen goto clearfog; } @@ -290,9 +291,9 @@ namespace swrenderer fixed_t iscale = xs_Fix<16>::ToFix(MaskedSWall[x] * MaskedScaleY); double sprtopscreen; if (sprflipvert) - sprtopscreen = CenterY + texturemid * spryscale; + sprtopscreen = viewport->CenterY + texturemid * spryscale; else - sprtopscreen = CenterY - texturemid * spryscale; + sprtopscreen = viewport->CenterY - texturemid * spryscale; columndrawerargs.DrawMaskedColumn(x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index bf18426efd..e8dedc6e18 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -45,9 +45,11 @@ namespace swrenderer { WallSampler::WallSampler(int y1, double texturemid, float swal, double yrepeat, fixed_t xoffset, double xmagnitude, FTexture *texture) { + auto viewport = RenderViewport::Instance(); + xoffset += FLOAT2FIXED(xmagnitude * 0.5); - if (!r_swtruecolor) + if (!viewport->r_swtruecolor) { height = texture->GetHeight(); @@ -59,7 +61,7 @@ namespace swrenderer // Find start uv in [0-base_height[ range. // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. double uv_stepd = swal * yrepeat; - double v = (texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + double v = (texturemid + uv_stepd * (y1 - viewport->CenterY + 0.5)) / height; v = v - floor(v); v *= height; v *= (1 << uv_fracbits); @@ -86,7 +88,7 @@ namespace swrenderer col = width + (col % width); } - if (r_swtruecolor) + if (viewport->r_swtruecolor) source = (const uint8_t *)texture->GetColumnBgra(col, nullptr); else source = texture->GetColumn(col, nullptr); @@ -98,7 +100,7 @@ namespace swrenderer { // Normalize to 0-1 range: double uv_stepd = swal * yrepeat; - double v = (texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); + double v = (texturemid + uv_stepd * (y1 - viewport->CenterY + 0.5)) / texture->GetHeight(); v = v - floor(v); double v_step = uv_stepd / texture->GetHeight(); @@ -172,16 +174,18 @@ namespace swrenderer { if (r_dynlights && light_list) { + auto viewport = RenderViewport::Instance(); + // Find column position in view space float w1 = 1.0f / WallC.sz1; float w2 = 1.0f / WallC.sz2; float t = (x - WallC.sx1 + 0.5f) / (WallC.sx2 - WallC.sx1); float wcol = w1 * (1.0f - t) + w2 * t; float zcol = 1.0f / wcol; - drawerargs.dc_viewpos.X = (float)((x + 0.5 - CenterX) / CenterX * zcol); + drawerargs.dc_viewpos.X = (float)((x + 0.5 - viewport->CenterX) / viewport->CenterX * zcol); drawerargs.dc_viewpos.Y = zcol; - drawerargs.dc_viewpos.Z = (float)((CenterY - y1 - 0.5) / InvZtoScale * zcol); - drawerargs.dc_viewpos_step.Z = (float)(-zcol / InvZtoScale); + drawerargs.dc_viewpos.Z = (float)((viewport->CenterY - y1 - 0.5) / viewport->InvZtoScale * zcol); + drawerargs.dc_viewpos_step.Z = (float)(-zcol / viewport->InvZtoScale); static TriLight lightbuffer[64 * 1024]; static int nextlightindex = 0; @@ -236,7 +240,7 @@ namespace swrenderer drawerargs.dc_num_lights = 0; } - if (r_swtruecolor) + if (RenderViewport::Instance()->r_swtruecolor) { int count = y2 - y1; @@ -320,7 +324,7 @@ namespace swrenderer texturemid = 0; } - drawerargs.dc_wall_fracbits = r_swtruecolor ? FRACBITS : fracbits; + drawerargs.dc_wall_fracbits = RenderViewport::Instance()->r_swtruecolor ? FRACBITS : fracbits; CameraLight *cameraLight = CameraLight::Instance(); bool fixed = (cameraLight->fixedcolormap != NULL || cameraLight->fixedlightlev >= 0); diff --git a/src/swrenderer/line/r_wallsetup.cpp b/src/swrenderer/line/r_wallsetup.cpp index 2c9c824422..33388586f0 100644 --- a/src/swrenderer/line/r_wallsetup.cpp +++ b/src/swrenderer/line/r_wallsetup.cpp @@ -34,8 +34,10 @@ namespace swrenderer ProjectedWallCull ProjectedWallLine::Project(double z1, double z2, const FWallCoords *wallc) { - float y1 = (float)(CenterY - z1 * InvZtoScale / wallc->sz1); - float y2 = (float)(CenterY - z2 * InvZtoScale / wallc->sz2); + auto viewport = RenderViewport::Instance(); + + float y1 = (float)(viewport->CenterY - z1 * viewport->InvZtoScale / wallc->sz1); + float y2 = (float)(viewport->CenterY - z2 * viewport->InvZtoScale / wallc->sz2); if (y1 < 0 && y2 < 0) // entire line is above screen { @@ -151,13 +153,15 @@ namespace swrenderer void ProjectedWallTexcoords::Project(double walxrepeat, int x1, int x2, const FWallTmapVals &WallT) { - float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - CenterX); - float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - CenterX); + auto viewport = RenderViewport::Instance(); + + float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - viewport->CenterX); + float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - viewport->CenterX); float uGradient = WallT.UoverZstep; float zGradient = WallT.InvZstep; float xrepeat = (float)walxrepeat; - float depthScale = (float)(WallT.InvZstep * WallTMapScale2); - float depthOrg = (float)(-WallT.UoverZstep * WallTMapScale2); + float depthScale = (float)(WallT.InvZstep * viewport->WallTMapScale2); + float depthOrg = (float)(-WallT.UoverZstep * viewport->WallTMapScale2); if (xrepeat < 0.0f) { @@ -189,8 +193,10 @@ namespace swrenderer void ProjectedWallTexcoords::ProjectPos(double walxrepeat, int x1, int x2, const FWallTmapVals &WallT) { - float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - CenterX); - float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - CenterX); + auto viewport = RenderViewport::Instance(); + + float uOverZ = WallT.UoverZorg + WallT.UoverZstep * (float)(x1 + 0.5 - viewport->CenterX); + float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - viewport->CenterX); float uGradient = WallT.UoverZstep; float zGradient = WallT.InvZstep; float xrepeat = (float)walxrepeat; diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 895734ea6c..cccac6d9d6 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -78,9 +78,11 @@ namespace swrenderer // left to right mapping planeang += (ViewAngle - 90).Radians(); + auto viewport = RenderViewport::Instance(); + // Scale will be unit scale at FocalLengthX (normally SCREENWIDTH/2) distance - xstep = cos(planeang) / FocalLengthX; - ystep = -sin(planeang) / FocalLengthX; + xstep = cos(planeang) / viewport->FocalLengthX; + ystep = -sin(planeang) / viewport->FocalLengthX; // [RH] flip for mirrors RenderPortal *renderportal = RenderPortal::Instance(); @@ -170,12 +172,14 @@ namespace swrenderer drawerargs.SetTextureVStep(0); drawerargs.SetTextureVPos(0); } + + auto viewport = RenderViewport::Instance(); - if (r_swtruecolor) + if (viewport->r_swtruecolor) { double distance2 = planeheight * yslope[(y + 1 < viewheight) ? y + 1 : y - 1]; - double xmagnitude = fabs(ystepscale * (distance2 - distance) * FocalLengthX); - double ymagnitude = fabs(xstepscale * (distance2 - distance) * FocalLengthX); + double xmagnitude = fabs(ystepscale * (distance2 - distance) * viewport->FocalLengthX); + double ymagnitude = fabs(xstepscale * (distance2 - distance) * viewport->FocalLengthX); double magnitude = MAX(ymagnitude, xmagnitude); double min_lod = -1000.0; drawerargs.SetTextureLOD(MAX(log2(magnitude) + r_lod_bias, min_lod)); @@ -184,17 +188,17 @@ namespace swrenderer if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - drawerargs.SetColorMapLight(basecolormap, (float)(GlobVis * fabs(CenterY - y)), planeshade); + drawerargs.SetColorMapLight(basecolormap, (float)(GlobVis * fabs(viewport->CenterY - y)), planeshade); } if (r_dynlights) { // Find row position in view space - float zspan = (float)(planeheight / (fabs(y + 0.5 - CenterY) / InvZtoScale)); - drawerargs.dc_viewpos.X = (float)((x1 + 0.5 - CenterX) / CenterX * zspan); + float zspan = (float)(planeheight / (fabs(y + 0.5 - viewport->CenterY) / viewport->InvZtoScale)); + drawerargs.dc_viewpos.X = (float)((x1 + 0.5 - viewport->CenterX) / viewport->CenterX * zspan); drawerargs.dc_viewpos.Y = zspan; - drawerargs.dc_viewpos.Z = (float)((CenterY - y - 0.5) / InvZtoScale * zspan); - drawerargs.dc_viewpos_step.X = (float)(zspan / CenterX); + drawerargs.dc_viewpos.Z = (float)((viewport->CenterY - y - 0.5) / viewport->InvZtoScale * zspan); + drawerargs.dc_viewpos_step.X = (float)(zspan / viewport->CenterX); static TriLight lightbuffer[64 * 1024]; static int nextlightindex = 0; @@ -202,7 +206,7 @@ namespace swrenderer // Plane normal drawerargs.dc_normal.X = 0.0f; drawerargs.dc_normal.Y = 0.0f; - drawerargs.dc_normal.Z = (y >= CenterY) ? 1.0f : -1.0f; + drawerargs.dc_normal.Z = (y >= viewport->CenterY) ? 1.0f : -1.0f; // Setup lights for row drawerargs.dc_num_lights = 0; @@ -266,13 +270,13 @@ namespace swrenderer void RenderFlatPlane::SetupSlope() { - int e, i; + auto viewport = RenderViewport::Instance(); - i = 0; - e = viewheight; - float focus = float(FocalLengthY); + int i = 0; + int e = viewheight; + float focus = float(viewport->FocalLengthY); float den; - float cy = float(CenterY); + float cy = float(viewport->CenterY); if (i < centery) { den = cy - i - 0.5f; diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index e0e12c091e..f7b4222f2a 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -169,11 +169,12 @@ namespace swrenderer void RenderSkyPlane::DrawSkyColumnStripe(int start_x, int y1, int y2, double scale, double texturemid, double yrepeat) { RenderPortal *renderportal = RenderPortal::Instance(); + auto viewport = RenderViewport::Instance(); uint32_t height = frontskytex->GetHeight(); double uv_stepd = skyiscale * yrepeat; - double v = (texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + double v = (texturemid + uv_stepd * (y1 - viewport->CenterY + 0.5)) / height; double v_step = uv_stepd / height; uint32_t uv_pos = (uint32_t)(v * 0x01000000); @@ -192,7 +193,7 @@ namespace swrenderer } else { - ang = (skyangle + xtoviewangle[x]) ^ skyflip; + ang = (skyangle + viewport->xtoviewangle[x]) ^ skyflip; } angle1 = (uint32_t)((UMulScale16(ang, frontcyl) + frontpos) >> FRACBITS); angle2 = (uint32_t)((UMulScale16(ang, backcyl) + backpos) >> FRACBITS); @@ -222,13 +223,14 @@ namespace swrenderer } else { + auto viewport = RenderViewport::Instance(); double yrepeat = frontskytex->Scale.Y; double scale = frontskytex->Scale.Y * skyscale; double iscale = 1 / scale; short drawheight = short(frontskytex->GetHeight() * scale); - double topfrac = fmod(skymid + iscale * (1 - CenterY), frontskytex->GetHeight()); + double topfrac = fmod(skymid + iscale * (1 - viewport->CenterY), frontskytex->GetHeight()); if (topfrac < 0) topfrac += frontskytex->GetHeight(); - double texturemid = topfrac - iscale * (1 - CenterY); + double texturemid = topfrac - iscale * (1 - viewport->CenterY); DrawSkyColumnStripe(start_x, y1, y2, scale, texturemid, yrepeat); } } diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index cdee76e0c9..93e98f2d8b 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -69,6 +69,8 @@ namespace swrenderer { return; } + + auto viewport = RenderViewport::Instance(); drawerargs.SetSolidColor(3); drawerargs.SetTexture(texture); @@ -124,13 +126,13 @@ namespace swrenderer plane_sv = p ^ n; plane_sz = m ^ n; - plane_su.Z *= FocalLengthX; - plane_sv.Z *= FocalLengthX; - plane_sz.Z *= FocalLengthX; + plane_su.Z *= viewport->FocalLengthX; + plane_sv.Z *= viewport->FocalLengthX; + plane_sz.Z *= viewport->FocalLengthX; - plane_su.Y *= IYaspectMul; - plane_sv.Y *= IYaspectMul; - plane_sz.Y *= IYaspectMul; + plane_su.Y *= viewport->IYaspectMul; + plane_sv.Y *= viewport->IYaspectMul; + plane_sz.Y *= viewport->IYaspectMul; // Premultiply the texture vectors with the scale factors plane_su *= 4294967296.f; diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index a558e04c37..ab71584717 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -89,7 +89,7 @@ void FSoftwareRenderer::Init() { gl_ParseDefs(); - r_swtruecolor = screen->IsBgra(); + RenderViewport::Instance()->r_swtruecolor = screen->IsBgra(); RenderScene::Instance()->Init(); } @@ -249,8 +249,10 @@ void FSoftwareRenderer::SetClearColor(int color) void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { - BYTE *Pixels = r_swtruecolor ? (BYTE*)tex->GetPixelsBgra() : (BYTE*)tex->GetPixels(); - DSimpleCanvas *Canvas = r_swtruecolor ? tex->GetCanvasBgra() : tex->GetCanvas(); + auto viewport = RenderViewport::Instance(); + + BYTE *Pixels = viewport->r_swtruecolor ? (BYTE*)tex->GetPixelsBgra() : (BYTE*)tex->GetPixels(); + DSimpleCanvas *Canvas = viewport->r_swtruecolor ? tex->GetCanvasBgra() : tex->GetCanvas(); // curse Doom's overuse of global variables in the renderer. // These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette. @@ -289,7 +291,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin } } - if (r_swtruecolor) + if (viewport->r_swtruecolor) { // True color render still sometimes uses palette textures (for sprites, mostly). // We need to make sure that both pixel buffers contain data: diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index 851fab7500..2b0f860921 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -59,7 +59,8 @@ namespace swrenderer if (player->fixedcolormap >= 0 && player->fixedcolormap < (int)SpecialColormaps.Size()) { realfixedcolormap = &SpecialColormaps[player->fixedcolormap]; - if (RenderTarget == screen && (r_swtruecolor || ((DFrameBuffer *)screen->Accel2D && r_shadercolormaps))) + auto viewport = RenderViewport::Instance(); + if (viewport->RenderTarget == screen && (viewport->r_swtruecolor || ((DFrameBuffer *)screen->Accel2D && r_shadercolormaps))) { // Render everything fullbright. The copy to video memory will // apply the special colormap, so it won't be restricted to the @@ -105,7 +106,8 @@ namespace swrenderer CurrentVisibility = vis; - if (FocalTangent == 0 || FocalLengthY == 0) + auto viewport = RenderViewport::Instance(); + if (FocalTangent == 0 || viewport->FocalLengthY == 0) { // If r_visibility is called before the renderer is all set up, don't // divide by zero. This will be called again later, and the proper // values can be initialized then. @@ -114,9 +116,9 @@ namespace swrenderer BaseVisibility = vis; - MaxVisForWall = (InvZtoScale * (SCREENWIDTH*r_Yaspect) / (viewwidth*SCREENHEIGHT * FocalTangent)); + MaxVisForWall = (viewport->InvZtoScale * (SCREENWIDTH*r_Yaspect) / (viewwidth*SCREENHEIGHT * FocalTangent)); MaxVisForWall = 32767.0 / MaxVisForWall; - MaxVisForFloor = 32767.0 / (viewheight >> 2) * FocalLengthY / 160; + MaxVisForFloor = 32767.0 / (viewheight >> 2) * viewport->FocalLengthY / 160; // Prevent overflow on walls if (BaseVisibility < 0 && BaseVisibility < -MaxVisForWall) @@ -126,7 +128,7 @@ namespace swrenderer else WallVisibility = BaseVisibility; - WallVisibility = (InvZtoScale * SCREENWIDTH*AspectBaseHeight(WidescreenRatio) / + WallVisibility = (viewport->InvZtoScale * SCREENWIDTH*AspectBaseHeight(WidescreenRatio) / (viewwidth*SCREENHEIGHT * 3)) * (WallVisibility * FocalTangent); // Prevent overflow on floors/ceilings. Note that the calculation of @@ -140,7 +142,7 @@ namespace swrenderer else FloorVisibility = BaseVisibility; - FloorVisibility = 160.0 * FloorVisibility / FocalLengthY; + FloorVisibility = 160.0 * FloorVisibility / viewport->FocalLengthY; TiltVisibility = float(vis * FocalTangent * (16.f * 320.f) / viewwidth); } diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index cebb34e9ed..099700980d 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -84,7 +84,7 @@ namespace swrenderer double WallVis(double screenZ) const { return WallGlobVis() / screenZ; } double SpriteVis(double screenZ) const { return WallGlobVis() / screenZ; } double ParticleVis(double screenZ) const { return WallGlobVis() / screenZ; } - double FlatPlaneVis(int screenY, double planeZ) const { return FlatPlaneGlobVis() / fabs(planeZ - ViewPos.Z) * fabs(CenterY - screenY); } + double FlatPlaneVis(int screenY, double planeZ) const { return FlatPlaneGlobVis() / fabs(planeZ - ViewPos.Z) * fabs(RenderViewport::Instance()->CenterY - screenY); } private: double BaseVisibility = 0.0; diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 2391d05428..a1c5caafcc 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -348,12 +348,14 @@ namespace swrenderer rx2 = t; swapvalues(ry1, ry2); } + + auto viewport = RenderViewport::Instance(); if (rx1 >= -ry1) { if (rx1 > ry1) return false; // left edge is off the right side if (ry1 == 0) return false; - sx1 = xs_RoundToInt(CenterX + rx1 * CenterX / ry1); + sx1 = xs_RoundToInt(viewport->CenterX + rx1 * viewport->CenterX / ry1); } else { @@ -366,7 +368,7 @@ namespace swrenderer { if (rx2 < -ry2) return false; // right edge is off the left side if (ry2 == 0) return false; - sx2 = xs_RoundToInt(CenterX + rx2 * CenterX / ry2); + sx2 = xs_RoundToInt(viewport->CenterX + rx2 * viewport->CenterX / ry2); } else { @@ -810,9 +812,10 @@ namespace swrenderer void RenderOpaquePass::ClearClip() { + auto viewport = RenderViewport::Instance(); // clip ceiling to console bottom fillshort(floorclip, viewwidth, viewheight); - fillshort(ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); + fillshort(ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !viewport->bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); } void RenderOpaquePass::AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside, bool foggy, FDynamicColormap *basecolormap) diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index be18ff59f9..be45c1a015 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -289,22 +289,24 @@ namespace swrenderer void RenderPortal::RenderLinePortal(PortalDrawseg* pds, int depth) { + auto viewport = RenderViewport::Instance(); + // [ZZ] check depth. fill portal with black if it's exceeding the visual recursion limit, and continue like nothing happened. if (depth >= r_portal_recursions) { BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 0, 0, 0, 0, 255); - int spacing = RenderTarget->GetPitch(); + int spacing = viewport->RenderTarget->GetPitch(); for (int x = pds->x1; x < pds->x2; x++) { - if (x < 0 || x >= RenderTarget->GetWidth()) + if (x < 0 || x >= viewport->RenderTarget->GetWidth()) continue; int Ytop = pds->ceilingclip[x - pds->x1]; int Ybottom = pds->floorclip[x - pds->x1]; - if (r_swtruecolor) + if (viewport->r_swtruecolor) { - uint32_t *dest = (uint32_t*)RenderTarget->GetBuffer() + x + Ytop * spacing; + uint32_t *dest = (uint32_t*)viewport->RenderTarget->GetBuffer() + x + Ytop * spacing; uint32_t c = GPalette.BaseColors[color].d; for (int y = Ytop; y <= Ybottom; y++) @@ -315,7 +317,7 @@ namespace swrenderer } else { - BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; + BYTE *dest = viewport->RenderTarget->GetBuffer() + x + Ytop * spacing; for (int y = Ytop; y <= Ybottom; y++) { @@ -483,16 +485,18 @@ namespace swrenderer // [ZZ] NO OVERFLOW CHECKS HERE // I believe it won't break. if it does, blame me. :( - if (r_swtruecolor) // Assuming this is just a debug function + auto viewport = RenderViewport::Instance(); + + if (viewport->r_swtruecolor) // Assuming this is just a debug function return; BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 255, 0, 0, 0, 255); - BYTE* pixels = RenderTarget->GetBuffer(); + BYTE* pixels = viewport->RenderTarget->GetBuffer(); // top edge for (int x = pds->x1; x < pds->x2; x++) { - if (x < 0 || x >= RenderTarget->GetWidth()) + if (x < 0 || x >= viewport->RenderTarget->GetWidth()) continue; int p = x - pds->x1; @@ -501,7 +505,7 @@ namespace swrenderer if (x == pds->x1 || x == pds->x2 - 1) { - RenderTarget->DrawLine(x, Ytop, x, Ybottom + 1, color, 0); + viewport->RenderTarget->DrawLine(x, Ytop, x, Ybottom + 1, color, 0); continue; } @@ -509,12 +513,12 @@ namespace swrenderer int YbottomPrev = pds->floorclip[p - 1]; if (abs(Ytop - YtopPrev) > 1) - RenderTarget->DrawLine(x, YtopPrev, x, Ytop, color, 0); - else *(pixels + Ytop * RenderTarget->GetPitch() + x) = color; + viewport->RenderTarget->DrawLine(x, YtopPrev, x, Ytop, color, 0); + else *(pixels + Ytop * viewport->RenderTarget->GetPitch() + x) = color; if (abs(Ybottom - YbottomPrev) > 1) - RenderTarget->DrawLine(x, YbottomPrev, x, Ybottom, color, 0); - else *(pixels + Ybottom * RenderTarget->GetPitch() + x) = color; + viewport->RenderTarget->DrawLine(x, YbottomPrev, x, Ybottom, color, 0); + else *(pixels + Ybottom * viewport->RenderTarget->GetPitch() + x) = color; } } diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 24e04d5e88..63c3147078 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -68,30 +68,31 @@ namespace swrenderer void RenderScene::RenderView(player_t *player) { - RenderTarget = screen; + auto viewport = RenderViewport::Instance(); + viewport->RenderTarget = screen; int width = SCREENWIDTH; int height = SCREENHEIGHT; float trueratio; ActiveRatio(width, height, &trueratio); - RenderViewport::Instance()->SetViewport(width, height, trueratio); + viewport->SetViewport(width, height, trueratio); - if (r_swtruecolor != screen->IsBgra()) + if (viewport->r_swtruecolor != screen->IsBgra()) { - r_swtruecolor = screen->IsBgra(); + viewport->r_swtruecolor = screen->IsBgra(); } if (r_clearbuffer != 0) { - if (!r_swtruecolor) + if (!viewport->r_swtruecolor) { - memset(RenderTarget->GetBuffer(), clearcolor, RenderTarget->GetPitch() * RenderTarget->GetHeight()); + memset(viewport->RenderTarget->GetBuffer(), clearcolor, viewport->RenderTarget->GetPitch() * viewport->RenderTarget->GetHeight()); } else { uint32_t bgracolor = GPalette.BaseColors[clearcolor].d; - int size = RenderTarget->GetPitch() * RenderTarget->GetHeight(); - uint32_t *dest = (uint32_t *)RenderTarget->GetBuffer(); + int size = viewport->RenderTarget->GetPitch() * viewport->RenderTarget->GetHeight(); + uint32_t *dest = (uint32_t *)viewport->RenderTarget->GetBuffer(); for (int i = 0; i < size; i++) dest[i] = bgracolor; } @@ -102,7 +103,7 @@ namespace swrenderer RenderActorView(player->mo); // Apply special colormap if the target cannot do it - if (CameraLight::Instance()->realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + if (CameraLight::Instance()->realfixedcolormap && viewport->r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) { DrawerCommandQueue::QueueCommand(CameraLight::Instance()->realfixedcolormap, screen); } @@ -187,7 +188,7 @@ namespace swrenderer // If we don't want shadered colormaps, NULL it now so that the // copy to the screen does not use a special colormap shader. - if (!r_shadercolormaps && !r_swtruecolor) + if (!r_shadercolormaps && !RenderViewport::Instance()->r_swtruecolor) { CameraLight::Instance()->realfixedcolormap = NULL; } @@ -195,53 +196,56 @@ namespace swrenderer void RenderScene::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) { + auto viewport = RenderViewport::Instance(); + const bool savedviewactive = viewactive; - const bool savedoutputformat = r_swtruecolor; + const bool savedoutputformat = viewport->r_swtruecolor; - if (r_swtruecolor != canvas->IsBgra()) + if (viewport->r_swtruecolor != canvas->IsBgra()) { - r_swtruecolor = canvas->IsBgra(); + viewport->r_swtruecolor = canvas->IsBgra(); } R_BeginDrawerCommands(); viewwidth = width; - RenderTarget = canvas; - bRenderingToCanvas = true; + viewport->RenderTarget = canvas; + viewport->bRenderingToCanvas = true; R_SetWindow(12, width, height, height, true); viewwindowx = x; viewwindowy = y; viewactive = true; - RenderViewport::Instance()->SetViewport(width, height, WidescreenRatio); + viewport->SetViewport(width, height, WidescreenRatio); RenderActorView(actor, dontmaplines); R_EndDrawerCommands(); - RenderTarget = screen; - bRenderingToCanvas = false; + viewport->RenderTarget = screen; + viewport->bRenderingToCanvas = false; R_ExecuteSetViewSize(); float trueratio; ActiveRatio(width, height, &trueratio); screen->Lock(true); - RenderViewport::Instance()->SetViewport(width, height, trueratio); + viewport->SetViewport(width, height, trueratio); screen->Unlock(); viewactive = savedviewactive; - r_swtruecolor = savedoutputformat; + viewport->r_swtruecolor = savedoutputformat; } void RenderScene::ScreenResized() { - RenderTarget = screen; + auto viewport = RenderViewport::Instance(); + viewport->RenderTarget = screen; int width = SCREENWIDTH; int height = SCREENHEIGHT; float trueratio; ActiveRatio(width, height, &trueratio); screen->Lock(true); - RenderViewport::Instance()->SetViewport(SCREENWIDTH, SCREENHEIGHT, trueratio); + viewport->SetViewport(SCREENWIDTH, SCREENHEIGHT, trueratio); screen->Unlock(); } diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index dc4a246b8d..c2d50839e9 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -38,32 +38,6 @@ CVAR(String, r_viewsize, "", CVAR_NOSET) namespace swrenderer { - bool r_swtruecolor; - - fixed_t viewingrangerecip; - double FocalLengthX; - double FocalLengthY; - - DCanvas *RenderTarget; - bool bRenderingToCanvas; - double globaluclip, globaldclip; - double CenterX, CenterY; - double YaspectMul; - double BaseYaspectMul; // yaspectmul without a forced aspect ratio - double IYaspectMul; - double InvZtoScale; - - double WallTMapScale2; - - uint8_t *dc_destorg; - int dc_destheight; - int dc_pitch; - - // The xtoviewangleangle[] table maps a screen pixel - // to the lowest viewangle that maps back to x ranges - // from clipangle to -clipangle. - angle_t xtoviewangle[MAXWIDTH + 1]; - RenderViewport *RenderViewport::Instance() { static RenderViewport instance; diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/scene/r_viewport.h index c036adb5a9..f3d945bbbe 100644 --- a/src/swrenderer/scene/r_viewport.h +++ b/src/swrenderer/scene/r_viewport.h @@ -18,26 +18,6 @@ namespace swrenderer { - extern DCanvas *RenderTarget; - extern bool bRenderingToCanvas; - extern fixed_t viewingrangerecip; - extern double FocalLengthX; - extern double FocalLengthY; - extern double InvZtoScale; - extern double WallTMapScale2; - extern double CenterX; - extern double CenterY; - extern double YaspectMul; - extern double IYaspectMul; - extern bool r_swtruecolor; - extern double globaluclip; - extern double globaldclip; - extern angle_t xtoviewangle[MAXWIDTH + 1]; - - extern uint8_t *dc_destorg; - extern int dc_destheight; - extern int dc_pitch; - class RenderViewport { public: @@ -45,9 +25,35 @@ namespace swrenderer void SetViewport(int width, int height, float trueratio); void SetupFreelook(); + + DCanvas *RenderTarget = nullptr; + bool bRenderingToCanvas = false; + fixed_t viewingrangerecip = 0; + double FocalLengthX = 0.0; + double FocalLengthY = 0.0; + double InvZtoScale = 0.0; + double WallTMapScale2 = 0.0; + double CenterX = 0.0; + double CenterY = 0.0; + double YaspectMul = 0.0; + double IYaspectMul = 0.0; + bool r_swtruecolor = false; + double globaluclip = 0.0; + double globaldclip = 0.0; + + // The xtoviewangleangle[] table maps a screen pixel + // to the lowest viewangle that maps back to x ranges + // from clipangle to -clipangle. + angle_t xtoviewangle[MAXWIDTH + 1]; + + uint8_t *dc_destorg = nullptr; + int dc_destheight = 0; + int dc_pitch = 0; private: void InitTextureMapping(); void SetupBuffer(); + + double BaseYaspectMul = 0.0; // yaspectmul without a forced aspect ratio }; } diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 9653a91fab..522448b206 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -317,13 +317,15 @@ namespace swrenderer void RenderDecal::DrawColumn(SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { + auto viewport = RenderViewport::Instance(); + float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; double sprtopscreen; if (sprflipvert) - sprtopscreen = CenterY + texturemid * spryscale; + sprtopscreen = viewport->CenterY + texturemid * spryscale; else - sprtopscreen = CenterY - texturemid * spryscale; + sprtopscreen = viewport->CenterY - texturemid * spryscale; drawerargs.DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 461964314c..72d3ce617c 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -108,11 +108,13 @@ namespace swrenderer if (x1 >= x2) return; + + auto viewport = RenderViewport::Instance(); yscale = xscale; // YaspectMul is not needed for particles as they should always be square ty = particle->Pos.Z - ViewPos.Z; - y1 = xs_RoundToInt(CenterY - (ty + psize) * yscale); - y2 = xs_RoundToInt(CenterY - (ty - psize) * yscale); + y1 = xs_RoundToInt(viewport->CenterY - (ty + psize) * yscale); + y2 = xs_RoundToInt(viewport->CenterY - (ty - psize) * yscale); // Clip the particle now. Because it's a point and projected as its subsector is // entered, we don't need to clip it to drawsegs like a normal sprite. @@ -227,21 +229,23 @@ namespace swrenderer // vis->renderflags holds translucency level (0-255) fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; uint32_t alpha = fglevel * 256 / FRACUNIT; + + auto viewport = RenderViewport::Instance(); - spacing = RenderTarget->GetPitch(); + spacing = viewport->RenderTarget->GetPitch(); uint32_t fracstepx = PARTICLE_TEXTURE_SIZE * FRACUNIT / countbase; uint32_t fracposx = fracstepx / 2; RenderTranslucentPass *translucentPass = RenderTranslucentPass::Instance(); - if (r_swtruecolor) + if (viewport->r_swtruecolor) { for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) { if (translucentPass->ClipSpriteColumnWithPortals(x, vis)) continue; - uint32_t *dest = ylookup[yl] + x + (uint32_t*)dc_destorg; + uint32_t *dest = ylookup[yl] + x + (uint32_t*)viewport->dc_destorg; DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); } } @@ -251,7 +255,7 @@ namespace swrenderer { if (translucentPass->ClipSpriteColumnWithPortals(x, vis)) continue; - uint8_t *dest = ylookup[yl] + x + dc_destorg; + uint8_t *dest = ylookup[yl] + x + viewport->dc_destorg; DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); } } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 938c45900c..44a82eb22c 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -136,11 +136,13 @@ namespace swrenderer if (camera->player != NULL) { - double centerhack = CenterY; + auto viewport = RenderViewport::Instance(); + + double centerhack = viewport->CenterY; double wx, wy; float bobx, boby; - CenterY = viewheight / 2; + viewport->CenterY = viewheight / 2; P_BobWeapon(camera->player, &bobx, &boby, r_TicFracF); @@ -181,7 +183,7 @@ namespace swrenderer psp = psp->GetNext(); } - CenterY = centerhack; + viewport->CenterY = centerhack; } } @@ -241,23 +243,25 @@ namespace swrenderer sx += wx; sy += wy; } + + auto viewport = RenderViewport::Instance(); double pspritexscale = centerxwide / 160.0; - double pspriteyscale = pspritexscale * YaspectMul; + double pspriteyscale = pspritexscale * viewport->YaspectMul; double pspritexiscale = 1 / pspritexscale; // calculate edges of the shape tx = sx - BASEXCENTER; tx -= tex->GetScaledLeftOffset(); - x1 = xs_RoundToInt(CenterX + tx * pspritexscale); + x1 = xs_RoundToInt(viewport->CenterX + tx * pspritexscale); // off the right side if (x1 > viewwidth) return; tx += tex->GetScaledWidth(); - x2 = xs_RoundToInt(CenterX + tx * pspritexscale); + x2 = xs_RoundToInt(viewport->CenterX + tx * pspritexscale); // off the left side if (x2 <= 0) @@ -270,14 +274,14 @@ namespace swrenderer vis.texturemid = (BASEYCENTER - sy) * tex->Scale.Y + tex->TopOffset; - if (camera->player && (RenderTarget != screen || - viewheight == RenderTarget->GetHeight() || - (RenderTarget->GetWidth() > (BASEXCENTER * 2) && !st_scale))) + if (camera->player && (viewport->RenderTarget != screen || + viewheight == viewport->RenderTarget->GetHeight() || + (viewport->RenderTarget->GetWidth() > (BASEXCENTER * 2) && !st_scale))) { // Adjust PSprite for fullscreen views AWeapon *weapon = dyn_cast(pspr->GetCaller()); if (weapon != nullptr && weapon->YAdjust != 0) { - if (RenderTarget != screen || viewheight == RenderTarget->GetHeight()) + if (viewport->RenderTarget != screen || viewheight == viewport->RenderTarget->GetHeight()) { vis.texturemid -= weapon->YAdjust; } @@ -499,7 +503,7 @@ namespace swrenderer // Check for hardware-assisted 2D. If it's available, and this sprite is not // fuzzy, don't draw it until after the switch to 2D mode. - if (!noaccel && RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) + if (!noaccel && viewport->RenderTarget == screen && (DFrameBuffer *)screen->Accel2D) { FRenderStyle style = vis.RenderStyle; style.CheckFuzz(); @@ -599,6 +603,8 @@ namespace swrenderer double spryscale = yscale; bool sprflipvert = false; fixed_t iscale = FLOAT2FIXED(1 / yscale); + + auto viewport = RenderViewport::Instance(); double sprtopscreen; if (renderflags & RF_YFLIP) @@ -606,12 +612,12 @@ namespace swrenderer sprflipvert = true; spryscale = -spryscale; iscale = -iscale; - sprtopscreen = CenterY + (texturemid - pic->GetHeight()) * spryscale; + sprtopscreen = viewport->CenterY + (texturemid - pic->GetHeight()) * spryscale; } else { sprflipvert = false; - sprtopscreen = CenterY - texturemid * spryscale; + sprtopscreen = viewport->CenterY - texturemid * spryscale; } // clip to screen bounds diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index f3df8d2a46..fd4fefc9b0 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -120,11 +120,13 @@ namespace swrenderer return; } } + + auto viewport = RenderViewport::Instance(); - double xscale = CenterX / tz; + double xscale = viewport->CenterX / tz; // [RH] Reject sprites that are off the top or bottom of the screen - if (globaluclip * tz > ViewPos.Z - gzb || globaldclip * tz < ViewPos.Z - gzt) + if (viewport->globaluclip * tz > ViewPos.Z - gzb || viewport->globaldclip * tz < ViewPos.Z - gzt) { return; } @@ -160,7 +162,7 @@ namespace swrenderer vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->xscale = FLOAT2FIXED(xscale); - vis->yscale = float(InvZtoScale * yscale / tz); + vis->yscale = float(viewport->InvZtoScale * yscale / tz); vis->idepth = float(1 / tz); vis->floorclip = thing->Floorclip / yscale; vis->texturemid = tex->TopOffset - (ViewPos.Z - pos.Z + thing->Floorclip) / yscale; @@ -262,18 +264,20 @@ namespace swrenderer xiscale = vis->xiscale; double texturemid = vis->texturemid; + auto viewport = RenderViewport::Instance(); + if (vis->renderflags & RF_YFLIP) { sprflipvert = true; spryscale = -spryscale; iscale = -iscale; texturemid -= vis->pic->GetHeight(); - sprtopscreen = CenterY + texturemid * spryscale; + sprtopscreen = viewport->CenterY + texturemid * spryscale; } else { sprflipvert = false; - sprtopscreen = CenterY - texturemid * spryscale; + sprtopscreen = viewport->CenterY - texturemid * spryscale; } int x = vis->x1; diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index b2c0b054a1..1b5a3b447d 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -149,8 +149,10 @@ namespace swrenderer // killough 3/27/98: // Clip the sprite against deep water and/or fake ceilings. // [RH] rewrote this to be based on which part of the sector is really visible + + auto viewport = RenderViewport::Instance(); - double scale = InvZtoScale * spr->idepth; + double scale = viewport->InvZtoScale * spr->idepth; double hzb = DBL_MIN, hzt = DBL_MAX; if (spr->IsVoxel() && spr->floorclip != 0) @@ -163,7 +165,7 @@ namespace swrenderer if (spr->FakeFlatStat != WaterFakeSide::AboveCeiling) { double hz = spr->heightsec->floorplane.ZatPoint(spr->gpos); - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + int h = xs_RoundToInt(viewport->CenterY - (hz - ViewPos.Z) * scale); if (spr->FakeFlatStat == WaterFakeSide::BelowFloor) { // seen below floor: clip top @@ -185,7 +187,7 @@ namespace swrenderer if (spr->FakeFlatStat != WaterFakeSide::BelowFloor && !(spr->heightsec->MoreFlags & SECF_FAKEFLOORONLY)) { double hz = spr->heightsec->ceilingplane.ZatPoint(spr->gpos); - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + int h = xs_RoundToInt(viewport->CenterY - (hz - ViewPos.Z) * scale); if (spr->FakeFlatStat == WaterFakeSide::AboveCeiling) { // seen above ceiling: clip bottom @@ -209,7 +211,7 @@ namespace swrenderer else if (!spr->IsVoxel() && spr->floorclip) { // [RH] Move floorclip stuff from R_DrawVisSprite to here //int clip = ((FLOAT2FIXED(CenterY) - FixedMul (spr->texturemid - (spr->pic->GetHeight() << FRACBITS) + spr->floorclip, spr->yscale)) >> FRACBITS); - int clip = xs_RoundToInt(CenterY - (spr->texturemid - spr->pic->GetHeight() + spr->floorclip) * spr->yscale); + int clip = xs_RoundToInt(viewport->CenterY - (spr->texturemid - spr->pic->GetHeight() + spr->floorclip) * spr->yscale); if (clip < botclip) { botclip = MAX(0, clip); @@ -229,7 +231,7 @@ namespace swrenderer hz = spr->fakefloor->bottom.plane->Zat0(); } } - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + int h = xs_RoundToInt(viewport->CenterY - (hz - ViewPos.Z) * scale); if (h < botclip) { botclip = MAX(0, h); @@ -250,7 +252,7 @@ namespace swrenderer hz = spr->fakeceiling->top.plane->Zat0(); } } - int h = xs_RoundToInt(CenterY - (hz - ViewPos.Z) * scale); + int h = xs_RoundToInt(viewport->CenterY - (hz - ViewPos.Z) * scale); if (h > topclip) { topclip = short(MIN(h, viewheight)); diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 0d35a1768d..f513b59ee7 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -184,6 +184,7 @@ namespace swrenderer void RenderVoxel::Render(short *cliptop, short *clipbottom, int minZ, int maxZ) { auto sprite = this; + auto viewport = RenderViewport::Instance(); FDynamicColormap *basecolormap = static_cast(sprite->Light.BaseColormap); @@ -208,7 +209,7 @@ namespace swrenderer double viewCos = view_angle.Sin(); double logmip = fabs((view_origin.X - sprite_origin.X) * viewCos - (view_origin.Y - sprite_origin.Y) * viewSin); int miplevel = 0; - while (miplevel < voxel->NumMips - 1 && logmip >= FocalLengthX) + while (miplevel < voxel->NumMips - 1 && logmip >= viewport->FocalLengthX) { logmip *= 0.5; miplevel++; @@ -313,6 +314,8 @@ namespace swrenderer void RenderVoxel::FillBox(SpriteDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) { + auto viewport = RenderViewport::Instance(); + double viewX, viewY, viewZ; if (viewspace) { @@ -333,17 +336,17 @@ namespace swrenderer if (viewZ < 0.01f) return; - double screenX = CenterX + viewX / viewZ * CenterX; - double screenY = CenterY - viewY / viewZ * InvZtoScale; - double screenExtentX = extentX / viewZ * CenterX; - double screenExtentY = pixelstretch ? screenExtentX * YaspectMul : screenExtentX; + double screenX = viewport->CenterX + viewX / viewZ * viewport->CenterX; + double screenY = viewport->CenterY - viewY / viewZ * viewport->InvZtoScale; + double screenExtentX = extentX / viewZ * viewport->CenterX; + double screenExtentY = pixelstretch ? screenExtentX * viewport->YaspectMul : screenExtentX; int x1 = MAX((int)(screenX - screenExtentX), 0); int x2 = MIN((int)(screenX + screenExtentX + 0.5f), viewwidth - 1); int y1 = MAX((int)(screenY - screenExtentY), 0); int y2 = MIN((int)(screenY + screenExtentY + 0.5f), viewheight - 1); - int pixelsize = r_swtruecolor ? 4 : 1; + int pixelsize = viewport->r_swtruecolor ? 4 : 1; if (y1 < y2) { diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 25e33d9ce4..f4cb600d68 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -247,13 +247,15 @@ namespace swrenderer void RenderWallSprite::DrawColumn(SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { + auto viewport = RenderViewport::Instance(); + float iscale = walltexcoords.VStep[x] * maskedScaleY; double spryscale = 1 / iscale; double sprtopscreen; if (sprflipvert) - sprtopscreen = CenterY + texturemid * spryscale; + sprtopscreen = viewport->CenterY + texturemid * spryscale; else - sprtopscreen = CenterY - texturemid * spryscale; + sprtopscreen = viewport->CenterY - texturemid * spryscale; drawerargs.DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 574fb09dd7..f190d2cc26 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -147,7 +147,9 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; const BYTE *translation = NULL; - r_swtruecolor = IsBgra(); + auto viewport = RenderViewport::Instance(); + + viewport->r_swtruecolor = IsBgra(); if (APART(parms.colorOverlay) != 0) { @@ -165,7 +167,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) parms.colorOverlay = PalEntry(parms.colorOverlay).InverseColor(); } // Note that this overrides DTA_Translation in software, but not in hardware. - if (!r_swtruecolor) + if (!viewport->r_swtruecolor) { FDynamicColormap *colormap = GetSpecialLights(MAKERGB(255, 255, 255), parms.colorOverlay & MAKEARGB(0, 255, 255, 255), 0); @@ -174,7 +176,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) } else if (parms.remap != NULL) { - if (r_swtruecolor) + if (viewport->r_swtruecolor) translation = (const BYTE*)parms.remap->Palette; else translation = parms.remap->Remap; @@ -188,7 +190,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) } else { - if (r_swtruecolor) + if (viewport->r_swtruecolor) drawerargs.SetTranslationMap(nullptr); else drawerargs.SetTranslationMap(identitymap); @@ -196,16 +198,16 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) bool visible; FDynamicColormap *basecolormap = nullptr; - if (r_swtruecolor) + if (viewport->r_swtruecolor) visible = drawerargs.SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); else visible = drawerargs.SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor, basecolormap); - - BYTE *destorgsave = dc_destorg; - int destheightsave = dc_destheight; - dc_destorg = screen->GetBuffer(); - dc_destheight = screen->GetHeight(); - if (dc_destorg == NULL) + + BYTE *destorgsave = viewport->dc_destorg; + int destheightsave = viewport->dc_destheight; + viewport->dc_destorg = screen->GetBuffer(); + viewport->dc_destheight = screen->GetHeight(); + if (viewport->dc_destorg == NULL) { I_FatalError("Attempt to write to buffer of hardware canvas"); } @@ -215,8 +217,8 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (visible) { - double centeryback = CenterY; - CenterY = 0; + double centeryback = viewport->CenterY; + viewport->CenterY = 0; // There is not enough precision in the drawing routines to keep the full // precision for y0. :( @@ -292,11 +294,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) frac += xiscale_i; } - CenterY = centeryback; + viewport->CenterY = centeryback; } - dc_destorg = destorgsave; - dc_destheight = destheightsave; + viewport->dc_destorg = destorgsave; + viewport->dc_destheight = destheightsave; if (ticdup != 0 && menuactive == MENU_Off) { @@ -1367,10 +1369,12 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, { return; } + + auto viewport = RenderViewport::Instance(); - BYTE *destorgsave = dc_destorg; - dc_destorg = screen->GetBuffer(); - if (dc_destorg == NULL) + BYTE *destorgsave = viewport->dc_destorg; + viewport->dc_destorg = screen->GetBuffer(); + if (viewport->dc_destorg == NULL) { I_FatalError("Attempt to write to buffer of hardware canvas"); } @@ -1497,7 +1501,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, pt1 = pt2; pt2--; if (pt2 < 0) pt2 = npoints; } while (pt1 != botpt); - dc_destorg = destorgsave; + viewport->dc_destorg = destorgsave; #endif } From 93166fa150a35e57128f52c7e7ea4d7d46062ef7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Feb 2017 21:42:08 +0100 Subject: [PATCH 791/912] Remove r_swtruecolor variable as it is redundant --- src/polyrenderer/drawers/poly_draw_args.cpp | 4 ++-- src/polyrenderer/poly_renderer.cpp | 10 +-------- src/polyrenderer/scene/poly_decal.cpp | 2 +- src/polyrenderer/scene/poly_particle.cpp | 2 +- src/polyrenderer/scene/poly_sky.cpp | 2 +- src/polyrenderer/scene/poly_sprite.cpp | 2 +- src/swrenderer/drawers/r_drawerargs.cpp | 24 ++++++++++----------- src/swrenderer/line/r_walldraw.cpp | 8 +++---- src/swrenderer/plane/r_flatplane.cpp | 2 +- src/swrenderer/r_swrenderer.cpp | 7 +++--- src/swrenderer/scene/r_light.cpp | 2 +- src/swrenderer/scene/r_portal.cpp | 4 ++-- src/swrenderer/scene/r_scene.cpp | 18 +++------------- src/swrenderer/scene/r_viewport.cpp | 2 +- src/swrenderer/scene/r_viewport.h | 1 - src/swrenderer/things/r_particle.cpp | 2 +- src/swrenderer/things/r_voxel.cpp | 16 ++++++++------ src/v_draw.cpp | 10 ++++----- 18 files changed, 49 insertions(+), 69 deletions(-) diff --git a/src/polyrenderer/drawers/poly_draw_args.cpp b/src/polyrenderer/drawers/poly_draw_args.cpp index 81ca52a1bc..5e197da710 100644 --- a/src/polyrenderer/drawers/poly_draw_args.cpp +++ b/src/polyrenderer/drawers/poly_draw_args.cpp @@ -49,7 +49,7 @@ void PolyDrawArgs::SetTexture(FTexture *texture) textureWidth = texture->GetWidth(); textureHeight = texture->GetHeight(); auto viewport = swrenderer::RenderViewport::Instance(); - if (viewport->r_swtruecolor) + if (viewport->RenderTarget->IsBgra()) texturePixels = (const uint8_t *)texture->GetPixelsBgra(); else texturePixels = texture->GetPixels(); @@ -63,7 +63,7 @@ void PolyDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, bool fo FRemapTable *table = TranslationToTable(translationID); if (table != nullptr && !table->Inactive) { - if (swrenderer::RenderViewport::Instance()->r_swtruecolor) + if (swrenderer::RenderViewport::Instance()->RenderTarget->IsBgra()) translation = (uint8_t*)table->Palette; else translation = table->Remap; diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index 9d5475dccc..0b3c060f14 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -58,9 +58,6 @@ void PolyRenderer::RenderView(player_t *player) viewport->RenderTarget = screen; - bool saved_swtruecolor = viewport->r_swtruecolor; - viewport->r_swtruecolor = screen->IsBgra(); - int width = SCREENWIDTH; int height = SCREENHEIGHT; int stHeight = gST_Y; @@ -72,14 +69,12 @@ void PolyRenderer::RenderView(player_t *player) // Apply special colormap if the target cannot do it CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->realfixedcolormap && viewport->r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + if (cameraLight->realfixedcolormap && viewport->RenderTarget->IsBgra() && !(r_shadercolormaps && screen->Accel2D)) { R_BeginDrawerCommands(); DrawerCommandQueue::QueueCommand(cameraLight->realfixedcolormap, screen); R_EndDrawerCommands(); } - - viewport->r_swtruecolor = saved_swtruecolor; } void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) @@ -87,7 +82,6 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int auto viewport = swrenderer::RenderViewport::Instance(); const bool savedviewactive = viewactive; - const bool savedoutputformat = viewport->r_swtruecolor; viewwidth = width; viewport->RenderTarget = canvas; @@ -97,7 +91,6 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int viewwindowx = x; viewwindowy = y; viewactive = true; - viewport->r_swtruecolor = canvas->IsBgra(); canvas->Lock(true); @@ -112,7 +105,6 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int ActiveRatio(width, height, &trueratio); viewport->SetViewport(width, height, WidescreenRatio); viewactive = savedviewactive; - viewport->r_swtruecolor = savedoutputformat; } void PolyRenderer::RenderActorView(AActor *actor, bool dontmaplines) diff --git a/src/polyrenderer/scene/poly_decal.cpp b/src/polyrenderer/scene/poly_decal.cpp index c658a76fa5..024a2dd25f 100644 --- a/src/polyrenderer/scene/poly_decal.cpp +++ b/src/polyrenderer/scene/poly_decal.cpp @@ -151,7 +151,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan } args.uniforms.subsectorDepth = subsectorDepth; - if (swrenderer::RenderViewport::Instance()->r_swtruecolor) + if (swrenderer::RenderViewport::Instance()->RenderTarget->IsBgra()) { args.uniforms.color = 0xff000000 | decal->AlphaColor; } diff --git a/src/polyrenderer/scene/poly_particle.cpp b/src/polyrenderer/scene/poly_particle.cpp index e9a80dc436..d83ac3cc4b 100644 --- a/src/polyrenderer/scene/poly_particle.cpp +++ b/src/polyrenderer/scene/poly_particle.cpp @@ -90,7 +90,7 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipP uint32_t alpha = (uint32_t)clamp(particle->alpha * 255.0f + 0.5f, 0.0f, 255.0f); - if (swrenderer::RenderViewport::Instance()->r_swtruecolor) + if (swrenderer::RenderViewport::Instance()->RenderTarget->IsBgra()) { args.uniforms.color = (alpha << 24) | (particle->color & 0xffffff); } diff --git a/src/polyrenderer/scene/poly_sky.cpp b/src/polyrenderer/scene/poly_sky.cpp index d4ebc57967..af24364fe9 100644 --- a/src/polyrenderer/scene/poly_sky.cpp +++ b/src/polyrenderer/scene/poly_sky.cpp @@ -94,7 +94,7 @@ void PolySkyDome::RenderRow(PolyDrawArgs &args, int row, uint32_t capcolor) void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap) { uint32_t solid = skytex->GetSkyCapColor(bottomCap); - if (!swrenderer::RenderViewport::Instance()->r_swtruecolor) + if (!swrenderer::RenderViewport::Instance()->RenderTarget->IsBgra()) solid = RGB32k.RGB[(RPART(solid) >> 3)][(GPART(solid) >> 3)][(BPART(solid) >> 3)]; args.vinput = &mVertices[mPrimStart[row]]; diff --git a/src/polyrenderer/scene/poly_sprite.cpp b/src/polyrenderer/scene/poly_sprite.cpp index 43ec99d90b..3149f68a6b 100644 --- a/src/polyrenderer/scene/poly_sprite.cpp +++ b/src/polyrenderer/scene/poly_sprite.cpp @@ -250,7 +250,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla args.SetTexture(tex, thing->Translation, true); } - if (!swrenderer::RenderViewport::Instance()->r_swtruecolor) + if (!swrenderer::RenderViewport::Instance()->RenderTarget->IsBgra()) { uint32_t r = (args.uniforms.color >> 16) & 0xff; uint32_t g = (args.uniforms.color >> 8) & 0xff; diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index 8da9e3d848..e29c127c0a 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -41,7 +41,7 @@ namespace swrenderer { SWPixelFormatDrawers *DrawerArgs::Drawers() { - if (RenderViewport::Instance()->r_swtruecolor) + if (RenderViewport::Instance()->RenderTarget->IsBgra()) { static SWTruecolorDrawers tc_drawers; return &tc_drawers; @@ -80,7 +80,7 @@ namespace swrenderer { if (mBaseColormap) { - if (RenderViewport::Instance()->r_swtruecolor) + if (RenderViewport::Instance()->RenderTarget->IsBgra()) return mBaseColormap->Maps; else return mBaseColormap->Maps + (GETPALOOKUP(mLight, mShade) << COLORMAPSHIFT); @@ -138,7 +138,7 @@ namespace swrenderer } auto viewport = RenderViewport::Instance(); - ds_source = viewport->r_swtruecolor ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); + ds_source = viewport->RenderTarget->IsBgra() ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; } @@ -147,7 +147,7 @@ namespace swrenderer auto viewport = RenderViewport::Instance(); // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. - if (viewport->r_swtruecolor && !drawer_needs_pal_input) // To do: add support to R_DrawColumnHoriz_rgba + if (viewport->RenderTarget->IsBgra() && !drawer_needs_pal_input) // To do: add support to R_DrawColumnHoriz_rgba { DrawMaskedColumnBgra(x, iscale, tex, col, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, unmasked); return; @@ -159,7 +159,7 @@ namespace swrenderer const FTexture::Span *span; const BYTE *column; - if (viewport->r_swtruecolor && !drawer_needs_pal_input) + if (viewport->RenderTarget->IsBgra() && !drawer_needs_pal_input) column = (const BYTE *)tex->GetColumnBgra(col >> FRACBITS, &span); else column = tex->GetColumn(col >> FRACBITS, &span); @@ -174,7 +174,7 @@ namespace swrenderer unmaskedSpan[1].Length = 0; } - int pixelsize = viewport->r_swtruecolor ? 4 : 1; + int pixelsize = viewport->RenderTarget->IsBgra() ? 4 : 1; while (span->Length != 0) { @@ -514,7 +514,7 @@ namespace swrenderer FRemapTable *table = TranslationToTable(translation); if (table != NULL && !table->Inactive) { - if (viewport->r_swtruecolor) + if (viewport->RenderTarget->IsBgra()) SetTranslationMap((uint8_t*)table->Palette); else SetTranslationMap(table->Remap); @@ -591,7 +591,7 @@ namespace swrenderer void WallDrawerArgs::SetDest(int x, int y) { auto viewport = RenderViewport::Instance(); - int pixelsize = viewport->r_swtruecolor ? 4 : 1; + int pixelsize = viewport->RenderTarget->IsBgra() ? 4 : 1; dc_dest = viewport->dc_destorg + (ylookup[y] + x) * pixelsize; dc_dest_y = y; } @@ -726,14 +726,14 @@ namespace swrenderer void SkyDrawerArgs::SetDest(int x, int y) { auto viewport = RenderViewport::Instance(); - int pixelsize = viewport->r_swtruecolor ? 4 : 1; + int pixelsize = viewport->RenderTarget->IsBgra() ? 4 : 1; dc_dest = viewport->dc_destorg + (ylookup[y] + x) * pixelsize; dc_dest_y = y; } void SkyDrawerArgs::SetFrontTexture(FTexture *texture, uint32_t column) { - if (RenderViewport::Instance()->r_swtruecolor) + if (RenderViewport::Instance()->RenderTarget->IsBgra()) { dc_source = (const uint8_t *)texture->GetColumnBgra(column, nullptr); dc_sourceheight = texture->GetHeight(); @@ -752,7 +752,7 @@ namespace swrenderer dc_source2 = nullptr; dc_sourceheight2 = 1; } - else if (RenderViewport::Instance()->r_swtruecolor) + else if (RenderViewport::Instance()->RenderTarget->IsBgra()) { dc_source2 = (const uint8_t *)texture->GetColumnBgra(column, nullptr); dc_sourceheight2 = texture->GetHeight(); @@ -772,7 +772,7 @@ namespace swrenderer void SpriteDrawerArgs::SetDest(int x, int y) { auto viewport = RenderViewport::Instance(); - int pixelsize = viewport->r_swtruecolor ? 4 : 1; + int pixelsize = viewport->RenderTarget->IsBgra() ? 4 : 1; dc_dest = viewport->dc_destorg + (ylookup[y] + x) * pixelsize; dc_dest_y = y; } diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index e8dedc6e18..2f3fc04e09 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -49,7 +49,7 @@ namespace swrenderer xoffset += FLOAT2FIXED(xmagnitude * 0.5); - if (!viewport->r_swtruecolor) + if (!viewport->RenderTarget->IsBgra()) { height = texture->GetHeight(); @@ -88,7 +88,7 @@ namespace swrenderer col = width + (col % width); } - if (viewport->r_swtruecolor) + if (viewport->RenderTarget->IsBgra()) source = (const uint8_t *)texture->GetColumnBgra(col, nullptr); else source = texture->GetColumn(col, nullptr); @@ -240,7 +240,7 @@ namespace swrenderer drawerargs.dc_num_lights = 0; } - if (RenderViewport::Instance()->r_swtruecolor) + if (RenderViewport::Instance()->RenderTarget->IsBgra()) { int count = y2 - y1; @@ -324,7 +324,7 @@ namespace swrenderer texturemid = 0; } - drawerargs.dc_wall_fracbits = RenderViewport::Instance()->r_swtruecolor ? FRACBITS : fracbits; + drawerargs.dc_wall_fracbits = RenderViewport::Instance()->RenderTarget->IsBgra() ? FRACBITS : fracbits; CameraLight *cameraLight = CameraLight::Instance(); bool fixed = (cameraLight->fixedcolormap != NULL || cameraLight->fixedlightlev >= 0); diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index cccac6d9d6..8c7c2704f9 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -175,7 +175,7 @@ namespace swrenderer auto viewport = RenderViewport::Instance(); - if (viewport->r_swtruecolor) + if (viewport->RenderTarget->IsBgra()) { double distance2 = planeheight * yslope[(y + 1 < viewheight) ? y + 1 : y - 1]; double xmagnitude = fabs(ystepscale * (distance2 - distance) * viewport->FocalLengthX); diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index ab71584717..a83289006f 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -89,7 +89,6 @@ void FSoftwareRenderer::Init() { gl_ParseDefs(); - RenderViewport::Instance()->r_swtruecolor = screen->IsBgra(); RenderScene::Instance()->Init(); } @@ -251,8 +250,8 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin { auto viewport = RenderViewport::Instance(); - BYTE *Pixels = viewport->r_swtruecolor ? (BYTE*)tex->GetPixelsBgra() : (BYTE*)tex->GetPixels(); - DSimpleCanvas *Canvas = viewport->r_swtruecolor ? tex->GetCanvasBgra() : tex->GetCanvas(); + BYTE *Pixels = viewport->RenderTarget->IsBgra() ? (BYTE*)tex->GetPixelsBgra() : (BYTE*)tex->GetPixels(); + DSimpleCanvas *Canvas = viewport->RenderTarget->IsBgra() ? tex->GetCanvasBgra() : tex->GetCanvas(); // curse Doom's overuse of global variables in the renderer. // These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette. @@ -291,7 +290,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin } } - if (viewport->r_swtruecolor) + if (viewport->RenderTarget->IsBgra()) { // True color render still sometimes uses palette textures (for sprites, mostly). // We need to make sure that both pixel buffers contain data: diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index 2b0f860921..076f8c9b00 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -60,7 +60,7 @@ namespace swrenderer { realfixedcolormap = &SpecialColormaps[player->fixedcolormap]; auto viewport = RenderViewport::Instance(); - if (viewport->RenderTarget == screen && (viewport->r_swtruecolor || ((DFrameBuffer *)screen->Accel2D && r_shadercolormaps))) + if (viewport->RenderTarget == screen && (viewport->RenderTarget->IsBgra() || ((DFrameBuffer *)screen->Accel2D && r_shadercolormaps))) { // Render everything fullbright. The copy to video memory will // apply the special colormap, so it won't be restricted to the diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index be45c1a015..b74337bf04 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -304,7 +304,7 @@ namespace swrenderer int Ytop = pds->ceilingclip[x - pds->x1]; int Ybottom = pds->floorclip[x - pds->x1]; - if (viewport->r_swtruecolor) + if (viewport->RenderTarget->IsBgra()) { uint32_t *dest = (uint32_t*)viewport->RenderTarget->GetBuffer() + x + Ytop * spacing; @@ -487,7 +487,7 @@ namespace swrenderer auto viewport = RenderViewport::Instance(); - if (viewport->r_swtruecolor) // Assuming this is just a debug function + if (viewport->RenderTarget->IsBgra()) // Assuming this is just a debug function return; BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 255, 0, 0, 0, 255); diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 63c3147078..df44d9f16a 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -77,14 +77,9 @@ namespace swrenderer ActiveRatio(width, height, &trueratio); viewport->SetViewport(width, height, trueratio); - if (viewport->r_swtruecolor != screen->IsBgra()) - { - viewport->r_swtruecolor = screen->IsBgra(); - } - if (r_clearbuffer != 0) { - if (!viewport->r_swtruecolor) + if (!viewport->RenderTarget->IsBgra()) { memset(viewport->RenderTarget->GetBuffer(), clearcolor, viewport->RenderTarget->GetPitch() * viewport->RenderTarget->GetHeight()); } @@ -103,7 +98,7 @@ namespace swrenderer RenderActorView(player->mo); // Apply special colormap if the target cannot do it - if (CameraLight::Instance()->realfixedcolormap && viewport->r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + if (CameraLight::Instance()->realfixedcolormap && viewport->RenderTarget->IsBgra() && !(r_shadercolormaps && screen->Accel2D)) { DrawerCommandQueue::QueueCommand(CameraLight::Instance()->realfixedcolormap, screen); } @@ -188,7 +183,7 @@ namespace swrenderer // If we don't want shadered colormaps, NULL it now so that the // copy to the screen does not use a special colormap shader. - if (!r_shadercolormaps && !RenderViewport::Instance()->r_swtruecolor) + if (!r_shadercolormaps && !RenderViewport::Instance()->RenderTarget->IsBgra()) { CameraLight::Instance()->realfixedcolormap = NULL; } @@ -199,12 +194,6 @@ namespace swrenderer auto viewport = RenderViewport::Instance(); const bool savedviewactive = viewactive; - const bool savedoutputformat = viewport->r_swtruecolor; - - if (viewport->r_swtruecolor != canvas->IsBgra()) - { - viewport->r_swtruecolor = canvas->IsBgra(); - } R_BeginDrawerCommands(); @@ -233,7 +222,6 @@ namespace swrenderer screen->Unlock(); viewactive = savedviewactive; - viewport->r_swtruecolor = savedoutputformat; } void RenderScene::ScreenResized() diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index c2d50839e9..677332bd82 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -129,7 +129,7 @@ namespace swrenderer static BYTE *lastbuff = NULL; int pitch = RenderTarget->GetPitch(); - int pixelsize = r_swtruecolor ? 4 : 1; + int pixelsize = RenderTarget->IsBgra() ? 4 : 1; BYTE *lineptr = RenderTarget->GetBuffer() + (viewwindowy*pitch + viewwindowx) * pixelsize; if (dc_pitch != pitch || lineptr != lastbuff) diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/scene/r_viewport.h index f3d945bbbe..44f28b6aa0 100644 --- a/src/swrenderer/scene/r_viewport.h +++ b/src/swrenderer/scene/r_viewport.h @@ -37,7 +37,6 @@ namespace swrenderer double CenterY = 0.0; double YaspectMul = 0.0; double IYaspectMul = 0.0; - bool r_swtruecolor = false; double globaluclip = 0.0; double globaldclip = 0.0; diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 72d3ce617c..6d47fc1645 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -239,7 +239,7 @@ namespace swrenderer RenderTranslucentPass *translucentPass = RenderTranslucentPass::Instance(); - if (viewport->r_swtruecolor) + if (viewport->RenderTarget->IsBgra()) { for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx) { diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index f513b59ee7..6da831163a 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -251,17 +251,19 @@ namespace swrenderer double dy = (view_origin.Y - sprite_origin.Y) / sprite_xscale; int backX = (int)(dx * spriteCos - dy * spriteSin + mip.Pivot.X); int backY = (int)(dy * spriteCos + dx * spriteSin + mip.Pivot.Y); - int endX = clamp(backX, 0, mip.SizeX - 1); - int endY = clamp(backY, 0, mip.SizeY - 1); - + //int endX = clamp(backX, 0, mip.SizeX - 1); + //int endY = clamp(backY, 0, mip.SizeY - 1); + int endX = mip.SizeX - 1;// clamp(backX, 0, mip.SizeX - 1); + int endY = mip.SizeY - 1;// clamp(backY, 0, mip.SizeY - 1); + // Draw the voxel cube: - for (int index = 0; index < 4; index++) + for (int index = 0; index < 1; index++) { - if ((stepX[index] < 0 && endX >= startX[index]) || + /*if ((stepX[index] < 0 && endX >= startX[index]) || (stepX[index] > 0 && endX <= startX[index]) || (stepY[index] < 0 && endY >= startY[index]) || - (stepY[index] > 0 && endY <= startY[index])) continue; + (stepY[index] > 0 && endY <= startY[index])) continue;*/ for (int x = startX[index]; x != endX; x += stepX[index]) { @@ -346,7 +348,7 @@ namespace swrenderer int y1 = MAX((int)(screenY - screenExtentY), 0); int y2 = MIN((int)(screenY + screenExtentY + 0.5f), viewheight - 1); - int pixelsize = viewport->r_swtruecolor ? 4 : 1; + int pixelsize = viewport->RenderTarget->IsBgra() ? 4 : 1; if (y1 < y2) { diff --git a/src/v_draw.cpp b/src/v_draw.cpp index f190d2cc26..bf3d402227 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -149,7 +149,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) auto viewport = RenderViewport::Instance(); - viewport->r_swtruecolor = IsBgra(); + viewport->RenderTarget = screen; if (APART(parms.colorOverlay) != 0) { @@ -167,7 +167,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) parms.colorOverlay = PalEntry(parms.colorOverlay).InverseColor(); } // Note that this overrides DTA_Translation in software, but not in hardware. - if (!viewport->r_swtruecolor) + if (!viewport->RenderTarget->IsBgra()) { FDynamicColormap *colormap = GetSpecialLights(MAKERGB(255, 255, 255), parms.colorOverlay & MAKEARGB(0, 255, 255, 255), 0); @@ -176,7 +176,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) } else if (parms.remap != NULL) { - if (viewport->r_swtruecolor) + if (viewport->RenderTarget->IsBgra()) translation = (const BYTE*)parms.remap->Palette; else translation = parms.remap->Remap; @@ -190,7 +190,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) } else { - if (viewport->r_swtruecolor) + if (viewport->RenderTarget->IsBgra()) drawerargs.SetTranslationMap(nullptr); else drawerargs.SetTranslationMap(identitymap); @@ -198,7 +198,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) bool visible; FDynamicColormap *basecolormap = nullptr; - if (viewport->r_swtruecolor) + if (viewport->RenderTarget->IsBgra()) visible = drawerargs.SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); else visible = drawerargs.SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor, basecolormap); From 7b578bbb536a8666bd48c9279a34e24668e4024a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Feb 2017 21:52:29 +0100 Subject: [PATCH 792/912] Remove redundant dc_pitch --- src/swrenderer/drawers/r_draw_pal.cpp | 8 ++++---- src/swrenderer/drawers/r_draw_rgba.cpp | 12 ++++++------ src/swrenderer/scene/r_viewport.cpp | 20 ++++++-------------- src/swrenderer/scene/r_viewport.h | 1 - 4 files changed, 16 insertions(+), 25 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index e7906d3192..f5b6e51ef6 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -106,7 +106,7 @@ namespace swrenderer _dest = args.Dest(); _dest_y = args.DestY(); _fracbits = args.dc_wall_fracbits; - _pitch = RenderViewport::Instance()->dc_pitch; + _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); _srcblend = args.dc_srcblend; _destblend = args.dc_destblend; _dynlights = args.dc_lights; @@ -564,7 +564,7 @@ namespace swrenderer _dest = args.Dest(); _dest_y = args.DestY(); _count = args.Count(); - _pitch = RenderViewport::Instance()->dc_pitch; + _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); _source = args.FrontTexturePixels(); _source2 = args.BackTexturePixels(); _sourceheight[0] = args.FrontTextureHeight(); @@ -870,7 +870,7 @@ namespace swrenderer _count = args.dc_count; _dest = args.Dest(); _dest_y = args.DestY(); - _pitch = RenderViewport::Instance()->dc_pitch; + _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); _iscale = args.dc_iscale; _texturefrac = args.dc_texturefrac; _colormap = args.Colormap(); @@ -1768,7 +1768,7 @@ namespace swrenderer _yh = args.dc_yh; _x = args.dc_x; _destorg = RenderViewport::Instance()->dc_destorg; - _pitch = RenderViewport::Instance()->dc_pitch; + _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); _fuzzpos = fuzzpos; _fuzzviewheight = fuzzviewheight; } diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 59521bb065..9d3a15ad82 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -73,7 +73,7 @@ namespace swrenderer args.xbits = drawerargs.TextureWidthBits(); args.ybits = drawerargs.TextureHeightBits(); args.destorg = (uint32_t*)RenderViewport::Instance()->dc_destorg; - args.destpitch = RenderViewport::Instance()->dc_pitch; + args.destpitch = RenderViewport::Instance()->RenderTarget->GetPitch(); args.source = (const uint32_t*)drawerargs.TexturePixels(); args.light = LightBgra::calc_light_multiplier(drawerargs.Light()); args.light_red = shade_constants.light_red; @@ -186,7 +186,7 @@ namespace swrenderer auto shade_constants = drawerargs.ColormapConstants(); args.dest = (uint32_t*)drawerargs.Dest(); args.dest_y = drawerargs.DestY(); - args.pitch = RenderViewport::Instance()->dc_pitch; + args.pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); args.count = drawerargs.dc_count; args.texturefrac[0] = drawerargs.dc_texturefrac; args.texturefracx[0] = drawerargs.dc_texturefracx; @@ -256,7 +256,7 @@ namespace swrenderer args.colormap = drawerargs.Colormap(); args.translation = drawerargs.TranslationMap(); args.basecolors = (const uint32_t *)GPalette.BaseColors; - args.pitch = RenderViewport::Instance()->dc_pitch; + args.pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); args.count = drawerargs.dc_count; args.dest_y = drawerargs.DestY(); args.iscale = drawerargs.dc_iscale; @@ -307,7 +307,7 @@ namespace swrenderer args.dest = (uint32_t*)drawerargs.Dest(); args.dest_y = drawerargs.DestY(); args.count = drawerargs.Count(); - args.pitch = RenderViewport::Instance()->dc_pitch; + args.pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); args.texturefrac[0] = drawerargs.TextureVPos(); args.iscale[0] = drawerargs.TextureVStep(); args.source0[0] = (const uint32_t *)drawerargs.FrontTexturePixels(); @@ -332,7 +332,7 @@ namespace swrenderer _yl = drawerargs.dc_yl; _yh = drawerargs.dc_yh; _destorg = RenderViewport::Instance()->dc_destorg; - _pitch = RenderViewport::Instance()->dc_pitch; + _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); _fuzzpos = fuzzpos; _fuzzviewheight = fuzzviewheight; } @@ -710,7 +710,7 @@ namespace swrenderer _a = a; _destorg = RenderViewport::Instance()->dc_destorg; - _pitch = RenderViewport::Instance()->dc_pitch; + _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); } void FillTransColumnRGBACommand::Execute(DrawerThread *thread) diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index 677332bd82..d4c8fae842 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -126,25 +126,17 @@ namespace swrenderer void RenderViewport::SetupBuffer() { - static BYTE *lastbuff = NULL; - int pitch = RenderTarget->GetPitch(); int pixelsize = RenderTarget->IsBgra() ? 4 : 1; BYTE *lineptr = RenderTarget->GetBuffer() + (viewwindowy*pitch + viewwindowx) * pixelsize; - if (dc_pitch != pitch || lineptr != lastbuff) + R_InitFuzzTable(pitch); + + dc_destorg = lineptr; + dc_destheight = RenderTarget->GetHeight() - viewwindowy; + for (int i = 0; i < RenderTarget->GetHeight(); i++) { - if (dc_pitch != pitch) - { - dc_pitch = pitch; - R_InitFuzzTable(pitch); - } - dc_destorg = lineptr; - dc_destheight = RenderTarget->GetHeight() - viewwindowy; - for (int i = 0; i < RenderTarget->GetHeight(); i++) - { - ylookup[i] = i * pitch; - } + ylookup[i] = i * pitch; } R_InitParticleTexture(); diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/scene/r_viewport.h index 44f28b6aa0..a430dbd975 100644 --- a/src/swrenderer/scene/r_viewport.h +++ b/src/swrenderer/scene/r_viewport.h @@ -47,7 +47,6 @@ namespace swrenderer uint8_t *dc_destorg = nullptr; int dc_destheight = 0; - int dc_pitch = 0; private: void InitTextureMapping(); From bb0a223b80c5aa8442c16e9802a7f73a4bfba27c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Feb 2017 21:55:55 +0100 Subject: [PATCH 793/912] Remove unused dc_destheight --- src/swrenderer/scene/r_viewport.cpp | 1 - src/swrenderer/scene/r_viewport.h | 1 - src/v_draw.cpp | 3 --- 3 files changed, 5 deletions(-) diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index d4c8fae842..0c0e0e931b 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -133,7 +133,6 @@ namespace swrenderer R_InitFuzzTable(pitch); dc_destorg = lineptr; - dc_destheight = RenderTarget->GetHeight() - viewwindowy; for (int i = 0; i < RenderTarget->GetHeight(); i++) { ylookup[i] = i * pitch; diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/scene/r_viewport.h index a430dbd975..0fc90bc514 100644 --- a/src/swrenderer/scene/r_viewport.h +++ b/src/swrenderer/scene/r_viewport.h @@ -46,7 +46,6 @@ namespace swrenderer angle_t xtoviewangle[MAXWIDTH + 1]; uint8_t *dc_destorg = nullptr; - int dc_destheight = 0; private: void InitTextureMapping(); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index bf3d402227..1e284c7385 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -204,9 +204,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) visible = drawerargs.SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor, basecolormap); BYTE *destorgsave = viewport->dc_destorg; - int destheightsave = viewport->dc_destheight; viewport->dc_destorg = screen->GetBuffer(); - viewport->dc_destheight = screen->GetHeight(); if (viewport->dc_destorg == NULL) { I_FatalError("Attempt to write to buffer of hardware canvas"); @@ -298,7 +296,6 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) } viewport->dc_destorg = destorgsave; - viewport->dc_destheight = destheightsave; if (ticdup != 0 && menuactive == MENU_Off) { From ca93d7456ab9187c7e4b175195e691674159d966 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Feb 2017 21:59:48 +0100 Subject: [PATCH 794/912] Change redundant bRenderingToCanvas into a helper function --- src/polyrenderer/poly_renderer.cpp | 2 -- src/swrenderer/scene/r_opaque_pass.cpp | 2 +- src/swrenderer/scene/r_scene.cpp | 2 -- src/swrenderer/scene/r_viewport.cpp | 2 +- src/swrenderer/scene/r_viewport.h | 3 ++- 5 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index 0b3c060f14..f8bcedc7c1 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -85,7 +85,6 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int viewwidth = width; viewport->RenderTarget = canvas; - viewport->bRenderingToCanvas = true; R_SetWindow(12, width, height, height, true); viewport->SetViewport(width, height, WidescreenRatio); viewwindowx = x; @@ -99,7 +98,6 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int canvas->Unlock(); viewport->RenderTarget = screen; - viewport->bRenderingToCanvas = false; R_ExecuteSetViewSize(); float trueratio; ActiveRatio(width, height, &trueratio); diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index a1c5caafcc..46facc5e53 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -815,7 +815,7 @@ namespace swrenderer auto viewport = RenderViewport::Instance(); // clip ceiling to console bottom fillshort(floorclip, viewwidth, viewheight); - fillshort(ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !viewport->bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); + fillshort(ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !viewport->RenderingToCanvas() ? (ConBottom - viewwindowy) : 0); } void RenderOpaquePass::AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside, bool foggy, FDynamicColormap *basecolormap) diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index df44d9f16a..84bcaee2bb 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -199,7 +199,6 @@ namespace swrenderer viewwidth = width; viewport->RenderTarget = canvas; - viewport->bRenderingToCanvas = true; R_SetWindow(12, width, height, height, true); viewwindowx = x; @@ -212,7 +211,6 @@ namespace swrenderer R_EndDrawerCommands(); viewport->RenderTarget = screen; - viewport->bRenderingToCanvas = false; R_ExecuteSetViewSize(); float trueratio; diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index 0c0e0e931b..89f758ad94 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -48,7 +48,7 @@ namespace swrenderer { int virtheight, virtwidth, virtwidth2, virtheight2; - if (!bRenderingToCanvas) + if (!RenderingToCanvas()) { // Set r_viewsize cvar to reflect the current view size UCVarValue value; char temp[16]; diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/scene/r_viewport.h index 0fc90bc514..e2daba8bb2 100644 --- a/src/swrenderer/scene/r_viewport.h +++ b/src/swrenderer/scene/r_viewport.h @@ -27,7 +27,6 @@ namespace swrenderer void SetupFreelook(); DCanvas *RenderTarget = nullptr; - bool bRenderingToCanvas = false; fixed_t viewingrangerecip = 0; double FocalLengthX = 0.0; double FocalLengthY = 0.0; @@ -47,6 +46,8 @@ namespace swrenderer uint8_t *dc_destorg = nullptr; + bool RenderingToCanvas() const { return RenderTarget != screen; } + private: void InitTextureMapping(); void SetupBuffer(); From d91e6ccece5749b80c5c415b07b020326b263a82 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Feb 2017 22:24:34 +0100 Subject: [PATCH 795/912] Remove dc_destorg and ylookup --- src/swrenderer/drawers/r_draw.cpp | 1 - src/swrenderer/drawers/r_draw.h | 1 - src/swrenderer/drawers/r_draw_pal.cpp | 32 ++++++++++++------------- src/swrenderer/drawers/r_draw_pal.h | 8 +++---- src/swrenderer/drawers/r_draw_rgba.cpp | 26 ++++++++++---------- src/swrenderer/drawers/r_draw_rgba.h | 8 +++---- src/swrenderer/drawers/r_drawerargs.cpp | 9 +++---- src/swrenderer/scene/r_viewport.cpp | 21 ++++++++-------- src/swrenderer/scene/r_viewport.h | 2 +- src/swrenderer/things/r_particle.cpp | 4 ++-- src/v_draw.cpp | 17 ------------- 11 files changed, 53 insertions(+), 76 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 2625db1a00..2e0539204d 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -57,7 +57,6 @@ CVAR(Bool, r_dynlights, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); namespace swrenderer { - int ylookup[MAXHEIGHT]; uint8_t shadetables[NUMCOLORMAPS * 16 * 256]; FDynamicColormap ShadeFakeColormap[16]; uint8_t identitymap[256]; diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index d6ec7a149a..e813acaaee 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -25,7 +25,6 @@ namespace swrenderer class SpanDrawerArgs; class SpriteDrawerArgs; - extern int ylookup[MAXHEIGHT]; extern uint8_t shadetables[/*NUMCOLORMAPS*16*256*/]; extern FDynamicColormap ShadeFakeColormap[16]; extern uint8_t identitymap[256]; diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index f5b6e51ef6..539baedfda 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -1767,7 +1767,7 @@ namespace swrenderer _yl = args.dc_yl; _yh = args.dc_yh; _x = args.dc_x; - _destorg = RenderViewport::Instance()->dc_destorg; + _destorg = RenderViewport::Instance()->GetDest(0, 0); _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); _fuzzpos = fuzzpos; _fuzzviewheight = fuzzviewheight; @@ -1786,7 +1786,7 @@ namespace swrenderer uint8_t *map = &NormalLight.Maps[6 * 256]; - uint8_t *dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + _destorg); + uint8_t *dest = thread->dest_for_thread(yl, _pitch, yl * _pitch + _x + _destorg); int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; @@ -1858,7 +1858,7 @@ namespace swrenderer _y = args.DestY(); _x1 = args.DestX1(); _x2 = args.DestX2(); - _destorg = RenderViewport::Instance()->dc_destorg; + _dest = RenderViewport::Instance()->GetDest(_x1, _y); _xstep = args.TextureUStep(); _ystep = args.TextureVStep(); _xbits = args.TextureWidthBits(); @@ -1942,7 +1942,7 @@ namespace swrenderer xfrac = _xfrac; yfrac = _yfrac; - dest = ylookup[_y] + _x1 + _destorg; + dest = _dest; count = _x2 - _x1 + 1; @@ -2030,7 +2030,7 @@ namespace swrenderer xfrac = _xfrac; yfrac = _yfrac; - dest = ylookup[_y] + _x1 + _destorg; + dest = _dest; count = _x2 - _x1 + 1; @@ -2104,7 +2104,7 @@ namespace swrenderer xfrac = _xfrac; yfrac = _yfrac; - dest = ylookup[_y] + _x1 + _destorg; + dest = _dest; count = _x2 - _x1 + 1; @@ -2227,7 +2227,7 @@ namespace swrenderer xfrac = _xfrac; yfrac = _yfrac; - dest = ylookup[_y] + _x1 + _destorg; + dest = _dest; count = _x2 - _x1 + 1; @@ -2368,7 +2368,7 @@ namespace swrenderer xfrac = _xfrac; yfrac = _yfrac; - dest = ylookup[_y] + _x1 + _destorg; + dest = _dest; count = _x2 - _x1 + 1; @@ -2491,7 +2491,7 @@ namespace swrenderer xfrac = _xfrac; yfrac = _yfrac; - dest = ylookup[_y] + _x1 + _destorg; + dest = _dest; count = _x2 - _x1 + 1; @@ -2619,7 +2619,7 @@ namespace swrenderer if (thread->line_skipped_by_thread(_y)) return; - memset(ylookup[_y] + _x1 + _destorg, _color, _x2 - _x1 + 1); + memset(_dest, _color, _x2 - _x1 + 1); } ///////////////////////////////////////////////////////////////////////// @@ -2628,7 +2628,7 @@ namespace swrenderer : y(y), x1(x1), x2(x2), plane_sz(plane_sz), plane_su(plane_su), plane_sv(plane_sv), plane_shade(plane_shade), planeshade(planeshade), planelightfloat(planelightfloat), pviewx(pviewx), pviewy(pviewy) { _colormap = args.Colormap(); - _destorg = RenderViewport::Instance()->dc_destorg; + _dest = RenderViewport::Instance()->GetDest(x1, y); _ybits = args.TextureHeightBits(); _xbits = args.TextureWidthBits(); _source = args.TexturePixels(); @@ -2668,7 +2668,7 @@ namespace swrenderer uz = plane_su[2] + plane_su[1] * (centery - y) + plane_su[0] * (x1 - centerx); vz = plane_sv[2] + plane_sv[1] * (centery - y) + plane_sv[0] * (x1 - centerx); - fb = ylookup[y] + x1 + _destorg; + fb = _dest; uint8_t vshift = 32 - _ybits; uint8_t ushift = vshift - _xbits; @@ -2873,7 +2873,7 @@ namespace swrenderer DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(const SpanDrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) { color = args.SolidColor(); - destorg = RenderViewport::Instance()->dc_destorg; + dest = RenderViewport::Instance()->GetDest(x1, y); } void DrawColoredSpanPalCommand::Execute(DrawerThread *thread) @@ -2881,7 +2881,7 @@ namespace swrenderer if (thread->line_skipped_by_thread(y)) return; - memset(ylookup[y] + x1 + destorg, color, x2 - x1 + 1); + memset(_dest, color, x2 - x1 + 1); } ///////////////////////////////////////////////////////////////////////// @@ -2889,7 +2889,7 @@ namespace swrenderer DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(const SpanDrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) { _colormap = args.Colormap(); - _destorg = RenderViewport::Instance()->dc_destorg; + _dest = RenderViewport::Instance()->GetDest(x1, y); } void DrawFogBoundaryLinePalCommand::Execute(DrawerThread *thread) @@ -2898,7 +2898,7 @@ namespace swrenderer return; const uint8_t *colormap = _colormap; - uint8_t *dest = ylookup[y] + _destorg; + uint8_t *dest = _dest; int x = x1; do { diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index ca99f18496..f28007e73a 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -140,7 +140,7 @@ namespace swrenderer int _y; int _x1; int _x2; - uint8_t *_destorg; + uint8_t *_dest; dsfixed_t _xstep; dsfixed_t _ystep; int _xbits; @@ -187,7 +187,7 @@ namespace swrenderer fixed_t pviewy; const uint8_t *_colormap; - uint8_t *_destorg; + uint8_t *_dest; int _ybits; int _xbits; const uint8_t *_source; @@ -206,7 +206,7 @@ namespace swrenderer int x1; int x2; int color; - uint8_t *destorg; + uint8_t *dest; }; class DrawFogBoundaryLinePalCommand : public PalSpanCommand @@ -218,7 +218,7 @@ namespace swrenderer private: int y, x1, x2; const uint8_t *_colormap; - uint8_t *_destorg; + uint8_t *_dest; }; class DrawParticleColumnPalCommand : public DrawerCommand diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 9d3a15ad82..4162f2f923 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -72,7 +72,7 @@ namespace swrenderer args.y = drawerargs.DestY(); args.xbits = drawerargs.TextureWidthBits(); args.ybits = drawerargs.TextureHeightBits(); - args.destorg = (uint32_t*)RenderViewport::Instance()->dc_destorg; + args.destorg = (uint32_t*)RenderViewport::Instance()->GetDest(0, 0); args.destpitch = RenderViewport::Instance()->RenderTarget->GetPitch(); args.source = (const uint32_t*)drawerargs.TexturePixels(); args.light = LightBgra::calc_light_multiplier(drawerargs.Light()); @@ -331,7 +331,7 @@ namespace swrenderer _x = drawerargs.dc_x; _yl = drawerargs.dc_yl; _yh = drawerargs.dc_yh; - _destorg = RenderViewport::Instance()->dc_destorg; + _destorg = RenderViewport::Instance()->GetDest(0, 0); _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); _fuzzpos = fuzzpos; _fuzzviewheight = fuzzviewheight; @@ -348,7 +348,7 @@ namespace swrenderer if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + (uint32_t*)_destorg); + uint32_t *dest = thread->dest_for_thread(yl, _pitch, _pitch * yl + _x + (uint32_t*)_destorg); int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; @@ -439,7 +439,7 @@ namespace swrenderer _x1 = drawerargs.DestX1(); _x2 = drawerargs.DestX2(); _y = drawerargs.DestY(); - _destorg = RenderViewport::Instance()->dc_destorg; + _dest = RenderViewport::Instance()->GetDest(_x1, _y); _light = drawerargs.Light(); _color = drawerargs.SolidColor(); } @@ -449,7 +449,7 @@ namespace swrenderer if (thread->line_skipped_by_thread(_y)) return; - uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + uint32_t *dest = (uint32_t*)_dest; int count = (_x2 - _x1 + 1); uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t color = LightBgra::shade_pal_index_simple(_color, light); @@ -470,7 +470,7 @@ namespace swrenderer _x = x; _x2 = x2; - _destorg = RenderViewport::Instance()->dc_destorg; + _line = RenderViewport::Instance()->GetDest(0, y); _light = drawerargs.Light(); _shade_constants = drawerargs.ColormapConstants(); } @@ -484,7 +484,7 @@ namespace swrenderer int x = _x; int x2 = _x2; - uint32_t *dest = ylookup[y] + (uint32_t*)_destorg; + uint32_t *dest = (uint32_t*)_line; uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants constants = _shade_constants; @@ -537,7 +537,7 @@ namespace swrenderer _x1 = x1; _x2 = x2; _y = y; - _destorg = RenderViewport::Instance()->dc_destorg; + _dest = RenderViewport::Instance()->GetDest(_x1, _y); _light = drawerargs.Light(); _shade_constants = drawerargs.ColormapConstants(); _plane_sz = plane_sz; @@ -568,7 +568,7 @@ namespace swrenderer int source_width = 1 << _xbits; int source_height = 1 << _ybits; - uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + uint32_t *dest = (uint32_t*)_dest; int count = _x2 - _x1 + 1; // Depth (Z) change across the span @@ -672,7 +672,7 @@ namespace swrenderer _x1 = x1; _x2 = x2; - _destorg = RenderViewport::Instance()->dc_destorg; + _dest = RenderViewport::Instance()->GetDest(_x1, _y); _light = drawerargs.Light(); _color = drawerargs.SolidColor(); } @@ -686,7 +686,7 @@ namespace swrenderer int x1 = _x1; int x2 = _x2; - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; + uint32_t *dest = (uint32_t*)_dest; int count = (x2 - x1 + 1); uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t color = LightBgra::shade_pal_index_simple(_color, light); @@ -709,7 +709,7 @@ namespace swrenderer _color = color; _a = a; - _destorg = RenderViewport::Instance()->dc_destorg; + _destorg = RenderViewport::Instance()->GetDest(0, 0); _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); } @@ -738,7 +738,7 @@ namespace swrenderer fg_blue *= alpha; int spacing = _pitch * thread->num_cores; - uint32_t *dest = thread->dest_for_thread(y1, _pitch, ylookup[y1] + x + (uint32_t*)_destorg); + uint32_t *dest = thread->dest_for_thread(y1, _pitch, _pitch * y1 + x + (uint32_t*)_destorg); for (int y = 0; y < ycount; y++) { diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 37ae0e25db..33c9ebcabf 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -211,7 +211,7 @@ namespace swrenderer int _x1; int _x2; int _y; - uint8_t * RESTRICT _destorg; + uint8_t * RESTRICT _dest; fixed_t _light; int _color; @@ -226,7 +226,7 @@ namespace swrenderer int _y; int _x; int _x2; - uint8_t * RESTRICT _destorg; + uint8_t * RESTRICT _line; fixed_t _light; ShadeConstants _shade_constants; @@ -241,7 +241,7 @@ namespace swrenderer int _x1; int _x2; int _y; - uint8_t * RESTRICT _destorg; + uint8_t * RESTRICT _dest; fixed_t _light; ShadeConstants _shade_constants; FVector3 _plane_sz; @@ -267,7 +267,7 @@ namespace swrenderer int _y; int _x1; int _x2; - uint8_t * RESTRICT _destorg; + uint8_t * RESTRICT _dest; fixed_t _light; int _color; diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index e29c127c0a..7bf3e04193 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -591,8 +591,7 @@ namespace swrenderer void WallDrawerArgs::SetDest(int x, int y) { auto viewport = RenderViewport::Instance(); - int pixelsize = viewport->RenderTarget->IsBgra() ? 4 : 1; - dc_dest = viewport->dc_destorg + (ylookup[y] + x) * pixelsize; + dc_dest = viewport->GetDest(x, y); dc_dest_y = y; } @@ -726,8 +725,7 @@ namespace swrenderer void SkyDrawerArgs::SetDest(int x, int y) { auto viewport = RenderViewport::Instance(); - int pixelsize = viewport->RenderTarget->IsBgra() ? 4 : 1; - dc_dest = viewport->dc_destorg + (ylookup[y] + x) * pixelsize; + dc_dest = viewport->GetDest(x, y); dc_dest_y = y; } @@ -772,8 +770,7 @@ namespace swrenderer void SpriteDrawerArgs::SetDest(int x, int y) { auto viewport = RenderViewport::Instance(); - int pixelsize = viewport->RenderTarget->IsBgra() ? 4 : 1; - dc_dest = viewport->dc_destorg + (ylookup[y] + x) * pixelsize; + dc_dest = viewport->GetDest(x, y); dc_dest_y = y; } } diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index 89f758ad94..1fc4494a80 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -126,19 +126,18 @@ namespace swrenderer void RenderViewport::SetupBuffer() { + R_InitFuzzTable(RenderTarget->GetPitch()); + R_InitParticleTexture(); + } + + uint8_t *RenderViewport::GetDest(int x, int y) + { + x += viewwindowx; + y += viewwindowy; + int pitch = RenderTarget->GetPitch(); int pixelsize = RenderTarget->IsBgra() ? 4 : 1; - BYTE *lineptr = RenderTarget->GetBuffer() + (viewwindowy*pitch + viewwindowx) * pixelsize; - - R_InitFuzzTable(pitch); - - dc_destorg = lineptr; - for (int i = 0; i < RenderTarget->GetHeight(); i++) - { - ylookup[i] = i * pitch; - } - - R_InitParticleTexture(); + return RenderTarget->GetBuffer() + (x + y * pitch) * pixelsize; } void RenderViewport::InitTextureMapping() diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/scene/r_viewport.h index e2daba8bb2..ea5436980f 100644 --- a/src/swrenderer/scene/r_viewport.h +++ b/src/swrenderer/scene/r_viewport.h @@ -44,7 +44,7 @@ namespace swrenderer // from clipangle to -clipangle. angle_t xtoviewangle[MAXWIDTH + 1]; - uint8_t *dc_destorg = nullptr; + uint8_t *GetDest(int x, int y); bool RenderingToCanvas() const { return RenderTarget != screen; } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 6d47fc1645..8ee6df916f 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -245,7 +245,7 @@ namespace swrenderer { if (translucentPass->ClipSpriteColumnWithPortals(x, vis)) continue; - uint32_t *dest = ylookup[yl] + x + (uint32_t*)viewport->dc_destorg; + uint32_t *dest = (uint32_t*)viewport->GetDest(x, yl); DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); } } @@ -255,7 +255,7 @@ namespace swrenderer { if (translucentPass->ClipSpriteColumnWithPortals(x, vis)) continue; - uint8_t *dest = ylookup[yl] + x + viewport->dc_destorg; + uint8_t *dest = viewport->GetDest(x, yl); DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); } } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 1e284c7385..7e5469fb34 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -203,13 +203,6 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) else visible = drawerargs.SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor, basecolormap); - BYTE *destorgsave = viewport->dc_destorg; - viewport->dc_destorg = screen->GetBuffer(); - if (viewport->dc_destorg == NULL) - { - I_FatalError("Attempt to write to buffer of hardware canvas"); - } - double x0 = parms.x - parms.left * parms.destwidth / parms.texwidth; double y0 = parms.y - parms.top * parms.destheight / parms.texheight; @@ -295,8 +288,6 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) viewport->CenterY = centeryback; } - viewport->dc_destorg = destorgsave; - if (ticdup != 0 && menuactive == MENU_Off) { NetUpdate(); @@ -1369,13 +1360,6 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, auto viewport = RenderViewport::Instance(); - BYTE *destorgsave = viewport->dc_destorg; - viewport->dc_destorg = screen->GetBuffer(); - if (viewport->dc_destorg == NULL) - { - I_FatalError("Attempt to write to buffer of hardware canvas"); - } - scalex /= tex->Scale.X; scaley /= tex->Scale.Y; @@ -1498,7 +1482,6 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, pt1 = pt2; pt2--; if (pt2 < 0) pt2 = npoints; } while (pt1 != botpt); - viewport->dc_destorg = destorgsave; #endif } From 69b73120997113bdb6863feca31a3456ceb47aa7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Feb 2017 23:23:10 +0100 Subject: [PATCH 796/912] Add some transform helpers on RenderViewport --- src/swrenderer/scene/r_viewport.cpp | 46 ++++++++++++++++++++++++++--- src/swrenderer/scene/r_viewport.h | 9 +++++- src/swrenderer/things/r_voxel.cpp | 33 +++++---------------- 3 files changed, 58 insertions(+), 30 deletions(-) diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/scene/r_viewport.cpp index 1fc4494a80..24413b3c55 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/scene/r_viewport.cpp @@ -148,10 +148,6 @@ namespace swrenderer FocalLengthX = CenterX / FocalTangent; FocalLengthY = FocalLengthX * YaspectMul; - // This is 1/FocalTangent before the widescreen extension of FOV. - viewingrangerecip = FLOAT2FIXED(1. / tan(FieldOfView.Radians() / 2)); - - // Now generate xtoviewangle for sky texture mapping. // [RH] Do not generate viewangletox, because texture mapping is no // longer done with trig, so it's not needed. @@ -167,4 +163,46 @@ namespace swrenderer xtoviewangle[i] = 0 - xtoviewangle[viewwidth - i - 1]; } } + + DVector2 RenderViewport::PointWorldToView(const DVector2 &worldPos) const + { + double translatedX = worldPos.X - ViewPos.X; + double translatedY = worldPos.Y - ViewPos.Y; + return { + translatedX * ViewSin - translatedY * ViewCos, + translatedX * ViewTanCos + translatedY * ViewTanSin + }; + } + + DVector3 RenderViewport::PointWorldToView(const DVector3 &worldPos) const + { + double translatedX = worldPos.X - ViewPos.X; + double translatedY = worldPos.Y - ViewPos.Y; + double translatedZ = worldPos.Z - ViewPos.Z; + return { + translatedX * ViewSin - translatedY * ViewCos, + translatedZ, + translatedX * ViewTanCos + translatedY * ViewTanSin + }; + } + + DVector3 RenderViewport::PointWorldToScreen(const DVector3 &worldPos) const + { + return PointViewToScreen(PointWorldToView(worldPos)); + } + + DVector3 RenderViewport::PointViewToScreen(const DVector3 &viewPos) const + { + double screenX = CenterX + viewPos.X / viewPos.Z * CenterX; + double screenY = CenterY - viewPos.Y / viewPos.Z * InvZtoScale; + return { screenX, screenY, viewPos.Z }; + } + + DVector2 RenderViewport::ScaleViewToScreen(const DVector2 &scale, double viewZ, bool pixelstretch) const + { + double screenScaleX = scale.X / viewZ * CenterX; + double screenScaleY = scale.Y / viewZ * InvZtoScale; + if (!pixelstretch) screenScaleY /= YaspectMul; + return { screenScaleX, screenScaleY }; + } } diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/scene/r_viewport.h index ea5436980f..d89dbec6ec 100644 --- a/src/swrenderer/scene/r_viewport.h +++ b/src/swrenderer/scene/r_viewport.h @@ -27,7 +27,7 @@ namespace swrenderer void SetupFreelook(); DCanvas *RenderTarget = nullptr; - fixed_t viewingrangerecip = 0; + double FocalLengthX = 0.0; double FocalLengthY = 0.0; double InvZtoScale = 0.0; @@ -48,6 +48,13 @@ namespace swrenderer bool RenderingToCanvas() const { return RenderTarget != screen; } + DVector3 PointWorldToView(const DVector3 &worldPos) const; + DVector3 PointWorldToScreen(const DVector3 &worldPos) const; + DVector3 PointViewToScreen(const DVector3 &viewPos) const; + + DVector2 PointWorldToView(const DVector2 &worldPos) const; + DVector2 ScaleViewToScreen(const DVector2 &scale, double viewZ, bool pixelstretch = true) const; + private: void InitTextureMapping(); void SetupBuffer(); diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 6da831163a..7f30257bb9 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -318,35 +318,18 @@ namespace swrenderer { auto viewport = RenderViewport::Instance(); - double viewX, viewY, viewZ; - if (viewspace) - { - viewX = origin.X; - viewY = origin.Y; - viewZ = origin.Z; - } - else // world space - { - double translatedX = origin.X - ViewPos.X; - double translatedY = origin.Y - ViewPos.Y; - double translatedZ = origin.Z - ViewPos.Z; - viewX = translatedX * ViewSin - translatedY * ViewCos; - viewY = translatedZ; - viewZ = translatedX * ViewTanCos + translatedY * ViewTanSin; - } + DVector3 viewPos = viewport->PointWorldToView(origin); - if (viewZ < 0.01f) + if (viewPos.Z < 0.01f) return; - double screenX = viewport->CenterX + viewX / viewZ * viewport->CenterX; - double screenY = viewport->CenterY - viewY / viewZ * viewport->InvZtoScale; - double screenExtentX = extentX / viewZ * viewport->CenterX; - double screenExtentY = pixelstretch ? screenExtentX * viewport->YaspectMul : screenExtentX; + DVector3 screenPos = viewport->PointViewToScreen(viewPos); + DVector2 screenExtent = viewport->ScaleViewToScreen({ extentX, extentY }, viewPos.Z, pixelstretch); - int x1 = MAX((int)(screenX - screenExtentX), 0); - int x2 = MIN((int)(screenX + screenExtentX + 0.5f), viewwidth - 1); - int y1 = MAX((int)(screenY - screenExtentY), 0); - int y2 = MIN((int)(screenY + screenExtentY + 0.5f), viewheight - 1); + int x1 = MAX((int)(screenPos.X - screenExtent.X), 0); + int x2 = MIN((int)(screenPos.X + screenExtent.X + 0.5f), viewwidth - 1); + int y1 = MAX((int)(screenPos.Y - screenExtent.Y), 0); + int y2 = MIN((int)(screenPos.Y + screenExtent.Y + 0.5f), viewheight - 1); int pixelsize = viewport->RenderTarget->IsBgra() ? 4 : 1; From f3d968cf419d27a742e1bb417d7b8767174677f2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Feb 2017 11:16:18 +0100 Subject: [PATCH 797/912] Move wall drawer args into functions --- src/swrenderer/drawers/r_draw_pal.cpp | 14 +++--- src/swrenderer/drawers/r_draw_rgba.cpp | 18 +++---- src/swrenderer/drawers/r_drawerargs.cpp | 8 +-- src/swrenderer/drawers/r_drawerargs.h | 56 +++++++++++++++------ src/swrenderer/line/r_fogboundary.cpp | 4 +- src/swrenderer/line/r_line.cpp | 4 +- src/swrenderer/line/r_renderdrawsegment.cpp | 14 +++--- src/swrenderer/line/r_walldraw.cpp | 42 +++++++--------- src/swrenderer/plane/r_flatplane.cpp | 6 +-- src/swrenderer/plane/r_skyplane.cpp | 4 +- src/swrenderer/plane/r_slopeplane.cpp | 6 +-- src/swrenderer/things/r_decal.cpp | 8 +-- src/swrenderer/things/r_playersprite.cpp | 2 +- src/swrenderer/things/r_sprite.cpp | 2 +- src/swrenderer/things/r_voxel.cpp | 2 +- src/swrenderer/things/r_wallsprite.cpp | 8 +-- src/v_draw.cpp | 4 +- 17 files changed, 112 insertions(+), 90 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 539baedfda..81f7999d17 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -98,17 +98,17 @@ namespace swrenderer { PalWall1Command::PalWall1Command(const WallDrawerArgs &args) { - _iscale = args.dc_iscale; - _texturefrac = args.dc_texturefrac; + _iscale = args.TextureVStep(); + _texturefrac = args.TextureVPos(); _colormap = args.Colormap(); - _count = args.dc_count; - _source = args.dc_source; + _count = args.Count(); + _source = args.TexturePixels(); _dest = args.Dest(); _dest_y = args.DestY(); - _fracbits = args.dc_wall_fracbits; + _fracbits = args.TextureFracBits(); _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - _srcblend = args.dc_srcblend; - _destblend = args.dc_destblend; + _srcblend = args.SrcBlend(); + _destblend = args.DestBlend(); _dynlights = args.dc_lights; _num_dynlights = args.dc_num_lights; _viewpos_z = args.dc_viewpos.Z; diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 4162f2f923..0c5d9ec820 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -187,13 +187,13 @@ namespace swrenderer args.dest = (uint32_t*)drawerargs.Dest(); args.dest_y = drawerargs.DestY(); args.pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - args.count = drawerargs.dc_count; - args.texturefrac[0] = drawerargs.dc_texturefrac; - args.texturefracx[0] = drawerargs.dc_texturefracx; - args.iscale[0] = drawerargs.dc_iscale; - args.textureheight[0] = drawerargs.dc_textureheight; - args.source[0] = (const uint32 *)drawerargs.dc_source; - args.source2[0] = (const uint32 *)drawerargs.dc_source2; + args.count = drawerargs.Count(); + args.texturefrac[0] = drawerargs.TextureVPos(); + args.texturefracx[0] = drawerargs.TextureUPos(); + args.iscale[0] = drawerargs.TextureVStep(); + args.textureheight[0] = drawerargs.TextureHeight(); + args.source[0] = (const uint32 *)drawerargs.TexturePixels(); + args.source2[0] = (const uint32 *)drawerargs.TexturePixels2(); args.light[0] = LightBgra::calc_light_multiplier(drawerargs.Light()); args.light_red = shade_constants.light_red; args.light_green = shade_constants.light_green; @@ -204,8 +204,8 @@ namespace swrenderer args.fade_blue = shade_constants.fade_blue; args.fade_alpha = shade_constants.fade_alpha; args.desaturate = shade_constants.desaturate; - args.srcalpha = drawerargs.dc_srcalpha >> (FRACBITS - 8); - args.destalpha = drawerargs.dc_destalpha >> (FRACBITS - 8); + args.srcalpha = drawerargs.SrcAlpha() >> (FRACBITS - 8); + args.destalpha = drawerargs.DestAlpha() >> (FRACBITS - 8); args.flags = 0; if (shade_constants.simple_shade) args.flags |= DrawWallArgs::simple_shade; diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/drawers/r_drawerargs.cpp index 7bf3e04193..1b9b1556e7 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/drawers/r_drawerargs.cpp @@ -63,7 +63,7 @@ namespace swrenderer spanfunc = &SWPixelFormatDrawers::DrawSpan; } - void DrawerArgs::SetColorMapLight(FSWColormap *base_colormap, float light, int shade) + void DrawerArgs::SetLight(FSWColormap *base_colormap, float light, int shade) { mBaseColormap = base_colormap; mTranslation = nullptr; @@ -542,11 +542,11 @@ namespace swrenderer { fixed_t shade = shadedlightshade; if (shade == 0) FIXEDLIGHT2SHADE(cameraLight->fixedlightlev); - SetColorMapLight(basecolormap, 0, shade); + SetLight(basecolormap, 0, shade); } else { - SetColorMapLight(basecolormap, 0, shadedlightshade); + SetLight(basecolormap, 0, shadedlightshade); } return true; } @@ -573,7 +573,7 @@ namespace swrenderer // dc_srccolor is used by the R_Fill* routines. It is premultiplied // with the alpha. dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; - SetColorMapLight(&identitycolormap, 0, 0); + SetLight(&identitycolormap, 0, 0); } if (!SpriteDrawerArgs::SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index ca6d202ea2..56e7c6afed 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -24,7 +24,7 @@ namespace swrenderer class DrawerArgs { public: - void SetColorMapLight(FSWColormap *base_colormap, float light, int shade); + void SetLight(FSWColormap *base_colormap, float light, int shade); void SetTranslationMap(lighttable_t *translation); uint8_t *Colormap() const; @@ -70,7 +70,7 @@ namespace swrenderer const uint8_t *FrontTexturePixels() const { return dc_source; } const uint8_t *BackTexturePixels() const { return dc_source2; } int FrontTextureHeight() const { return dc_sourceheight; } - int BackTextureHeight() const { return dc_sourceheight; } + int BackTextureHeight() const { return dc_sourceheight2; } void DrawSingleSkyColumn(); void DrawDoubleSkyColumn(); @@ -164,6 +164,17 @@ namespace swrenderer public: void SetStyle(bool masked, bool additive, fixed_t alpha); void SetDest(int x, int y); + void SetCount(int count) { dc_count = count; } + void SetTexture(const uint8_t *pixels, const uint8_t *pixels2, int height) + { + dc_source = pixels; + dc_source2 = pixels2; + dc_textureheight = height; + } + void SetTextureFracBits(int bits) { dc_wall_fracbits = bits; } + void SetTextureUPos(uint32_t pos) { dc_texturefracx = pos; } + void SetTextureVPos(fixed_t pos) { dc_texturefrac = pos; } + void SetTextureVStep(fixed_t step) { dc_iscale = step; } bool IsMaskedDrawer() const; @@ -171,21 +182,22 @@ namespace swrenderer uint8_t *Dest() const { return dc_dest; } int DestY() const { return dc_dest_y; } + int Count() const { return dc_count; } - uint32_t *dc_srcblend; - uint32_t *dc_destblend; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + uint32_t *SrcBlend() const { return dc_srcblend; } + uint32_t *DestBlend() const { return dc_destblend; } + fixed_t SrcAlpha() const { return dc_srcalpha; } + fixed_t DestAlpha() const { return dc_destalpha; } - fixed_t dc_iscale; - fixed_t dc_texturefrac; - uint32_t dc_texturefracx; - uint32_t dc_textureheight; - const uint8_t *dc_source; - const uint8_t *dc_source2; - int dc_count; - - int dc_wall_fracbits; + uint32_t TextureUPos() const { return dc_texturefracx; } + fixed_t TextureVPos() const { return dc_texturefrac; } + fixed_t TextureVStep() const { return dc_iscale; } + uint32_t TextureHeight() const { return dc_textureheight; } + + const uint8_t *TexturePixels() const { return dc_source; } + const uint8_t *TexturePixels2() const { return dc_source2; } + + int TextureFracBits() const { return dc_wall_fracbits; } FVector3 dc_normal; FVector3 dc_viewpos; @@ -196,6 +208,20 @@ namespace swrenderer private: uint8_t *dc_dest = nullptr; int dc_dest_y = 0; + int dc_count; + + fixed_t dc_iscale; + fixed_t dc_texturefrac; + uint32_t dc_texturefracx; + uint32_t dc_textureheight; + const uint8_t *dc_source; + const uint8_t *dc_source2; + int dc_wall_fracbits; + + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; typedef void(SWPixelFormatDrawers::*WallDrawerFunc)(const WallDrawerArgs &args); WallDrawerFunc wallfunc = nullptr; diff --git a/src/swrenderer/line/r_fogboundary.cpp b/src/swrenderer/line/r_fogboundary.cpp index f585109a46..b75ce82ce9 100644 --- a/src/swrenderer/line/r_fogboundary.cpp +++ b/src/swrenderer/line/r_fogboundary.cpp @@ -64,7 +64,7 @@ namespace swrenderer fillshort(spanend + t2, b2 - t2, x); } - drawerargs.SetColorMapLight(basecolormap, (float)light, wallshade); + drawerargs.SetLight(basecolormap, (float)light, wallshade); uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); @@ -91,7 +91,7 @@ namespace swrenderer fillshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; - drawerargs.SetColorMapLight(basecolormap, (float)light, wallshade); + drawerargs.SetLight(basecolormap, (float)light, wallshade); fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); } else diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index dea8f62e73..0cc59826ed 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -936,9 +936,9 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) - drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); // clip wall to the floor and ceiling auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 42f46da5ea..ea1db392e1 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -149,13 +149,13 @@ namespace swrenderer if (cameraLight->fixedlightlev >= 0) { - walldrawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); - columndrawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + walldrawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + columndrawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); } else if (cameraLight->fixedcolormap != nullptr) { - walldrawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); - columndrawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + walldrawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); + columndrawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); } // find positioning @@ -285,7 +285,7 @@ namespace swrenderer { if (cameraLight->fixedcolormap == nullptr && cameraLight->fixedlightlev < 0) { - columndrawerargs.SetColorMapLight(basecolormap, rw_light, wallshade); + columndrawerargs.SetLight(basecolormap, rw_light, wallshade); } fixed_t iscale = xs_Fix<16>::ToFix(MaskedSWall[x] * MaskedScaleY); @@ -456,9 +456,9 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != nullptr) - drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 2f3fc04e09..8da0c4cb2f 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -244,14 +244,12 @@ namespace swrenderer { int count = y2 - y1; - drawerargs.dc_source = sampler.source; - drawerargs.dc_source2 = sampler.source2; - drawerargs.dc_texturefracx = sampler.texturefracx; + drawerargs.SetTexture(sampler.source, sampler.source2, sampler.height); + drawerargs.SetTextureUPos(sampler.texturefracx); drawerargs.SetDest(x, y1); - drawerargs.dc_count = count; - drawerargs.dc_iscale = sampler.uv_step; - drawerargs.dc_texturefrac = sampler.uv_pos; - drawerargs.dc_textureheight = sampler.height; + drawerargs.SetCount(count); + drawerargs.SetTextureVStep(sampler.uv_step); + drawerargs.SetTextureVPos(sampler.uv_pos); drawerargs.DrawColumn(); uint64_t step64 = sampler.uv_step; @@ -264,13 +262,12 @@ namespace swrenderer { int count = y2 - y1; - drawerargs.dc_source = sampler.source; - drawerargs.dc_source2 = sampler.source2; - drawerargs.dc_texturefracx = sampler.texturefracx; + drawerargs.SetTexture(sampler.source, sampler.source2, sampler.height); + drawerargs.SetTextureUPos(sampler.texturefracx); drawerargs.SetDest(x, y1); - drawerargs.dc_count = count; - drawerargs.dc_iscale = sampler.uv_step; - drawerargs.dc_texturefrac = sampler.uv_pos; + drawerargs.SetCount(count); + drawerargs.SetTextureVStep(sampler.uv_step); + drawerargs.SetTextureVPos(sampler.uv_pos); drawerargs.DrawColumn(); uint64_t step64 = sampler.uv_step; @@ -290,13 +287,12 @@ namespace swrenderer next_uv_wrap++; uint32_t count = MIN(left, next_uv_wrap); - drawerargs.dc_source = sampler.source; - drawerargs.dc_source2 = sampler.source2; - drawerargs.dc_texturefracx = sampler.texturefracx; + drawerargs.SetTexture(sampler.source, sampler.source2, sampler.height); + drawerargs.SetTextureUPos(sampler.texturefracx); drawerargs.SetDest(x, y1); - drawerargs.dc_count = count; - drawerargs.dc_iscale = sampler.uv_step; - drawerargs.dc_texturefrac = uv_pos; + drawerargs.SetCount(count); + drawerargs.SetTextureVStep(sampler.uv_step); + drawerargs.SetTextureVPos(uv_pos); drawerargs.DrawColumn(); left -= count; @@ -324,15 +320,15 @@ namespace swrenderer texturemid = 0; } - drawerargs.dc_wall_fracbits = RenderViewport::Instance()->RenderTarget->IsBgra() ? FRACBITS : fracbits; + drawerargs.SetTextureFracBits(RenderViewport::Instance()->RenderTarget->IsBgra() ? FRACBITS : fracbits); CameraLight *cameraLight = CameraLight::Instance(); bool fixed = (cameraLight->fixedcolormap != NULL || cameraLight->fixedlightlev >= 0); if (cameraLight->fixedcolormap) - drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); else - drawerargs.SetColorMapLight(basecolormap, 0, 0); + drawerargs.SetLight(basecolormap, 0, 0); float dx = WallC.tright.X - WallC.tleft.X; float dy = WallC.tright.Y - WallC.tleft.Y; @@ -351,7 +347,7 @@ namespace swrenderer continue; if (!fixed) - drawerargs.SetColorMapLight(basecolormap, light, wallshade); + drawerargs.SetLight(basecolormap, light, wallshade); if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 8c7c2704f9..12cdc27fc5 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -114,12 +114,12 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) { - drawerargs.SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); plane_shade = false; } else if (cameraLight->fixedcolormap) { - drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); plane_shade = false; } else @@ -188,7 +188,7 @@ namespace swrenderer if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - drawerargs.SetColorMapLight(basecolormap, (float)(GlobVis * fabs(viewport->CenterY - y)), planeshade); + drawerargs.SetLight(basecolormap, (float)(GlobVis * fabs(viewport->CenterY - y)), planeshade); } if (r_dynlights) diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index f7b4222f2a..882df107ff 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -151,13 +151,13 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedcolormap) { - drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); } else { fakefixed = true; cameraLight->fixedcolormap = &NormalLight; - drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); } DrawSky(pl); diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 93e98f2d8b..f175c9b344 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -156,17 +156,17 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) { - drawerargs.SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); plane_shade = false; } else if (cameraLight->fixedcolormap) { - drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); plane_shade = false; } else { - drawerargs.SetColorMapLight(basecolormap, 0, 0); + drawerargs.SetLight(basecolormap, 0, 0); plane_shade = true; planeshade = LIGHT2SHADE(pl->lightlevel); } diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 522448b206..c902f20a1d 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -277,11 +277,11 @@ namespace swrenderer SpriteDrawerArgs drawerargs; if (cameraLight->fixedlightlev >= 0) - drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != NULL) - drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; @@ -299,7 +299,7 @@ namespace swrenderer { if (calclighting) { // calculate lighting - drawerargs.SetColorMapLight(usecolormap, light, wallshade); + drawerargs.SetLight(usecolormap, light, wallshade); } DrawColumn(drawerargs, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 44a82eb22c..71f57960a8 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -592,7 +592,7 @@ namespace swrenderer } SpriteDrawerArgs drawerargs; - drawerargs.SetColorMapLight(Light.BaseColormap, 0, Light.ColormapNum << FRACBITS); + drawerargs.SetLight(Light.BaseColormap, 0, Light.ColormapNum << FRACBITS); FDynamicColormap *basecolormap = static_cast(Light.BaseColormap); diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index fd4fefc9b0..b83089b9e3 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -248,7 +248,7 @@ namespace swrenderer } SpriteDrawerArgs drawerargs; - drawerargs.SetColorMapLight(vis->Light.BaseColormap, 0, vis->Light.ColormapNum << FRACBITS); + drawerargs.SetLight(vis->Light.BaseColormap, 0, vis->Light.ColormapNum << FRACBITS); FDynamicColormap *basecolormap = static_cast(vis->Light.BaseColormap); diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 7f30257bb9..b188368a33 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -189,7 +189,7 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(sprite->Light.BaseColormap); SpriteDrawerArgs drawerargs; - drawerargs.SetColorMapLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); + drawerargs.SetLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); bool visible = drawerargs.SetPatchStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); if (!visible) diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index f4cb600d68..0f3c8800f3 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -188,11 +188,11 @@ namespace swrenderer float light = lightleft + (x1 - spr->wallc.sx1) * lightstep; CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedlightlev >= 0) - drawerargs.SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetLight(usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); else if (cameraLight->fixedcolormap != NULL) - drawerargs.SetColorMapLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) - drawerargs.SetColorMapLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else calclighting = true; @@ -235,7 +235,7 @@ namespace swrenderer { if (calclighting) { // calculate lighting - drawerargs.SetColorMapLight(usecolormap, light, shade); + drawerargs.SetLight(usecolormap, light, shade); } if (!translucentPass->ClipSpriteColumnWithPortals(x, spr)) DrawColumn(drawerargs, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 7e5469fb34..5fdd5c6904 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1371,9 +1371,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, SpanDrawerArgs drawerargs; drawerargs.SetTexture(tex); if (colormap) - drawerargs.SetColorMapLight(colormap, 0, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); + drawerargs.SetLight(colormap, 0, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else - drawerargs.SetColorMapLight(&identitycolormap, 0, 0); + drawerargs.SetLight(&identitycolormap, 0, 0); if (drawerargs.TextureWidthBits() != 0) { scalex = double(1u << (32 - drawerargs.TextureWidthBits())) / scalex; From d8c6f9acfd8dfb15b2bac849f041980f71dea830 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Feb 2017 13:51:41 +0100 Subject: [PATCH 798/912] Limit access to sprite drawer variables --- src/swrenderer/drawers/r_draw.cpp | 4 +- src/swrenderer/drawers/r_draw_pal.cpp | 26 +++++----- src/swrenderer/drawers/r_draw_rgba.cpp | 28 +++++------ src/swrenderer/drawers/r_drawerargs.h | 68 +++++++++++++++++--------- src/swrenderer/things/r_voxel.cpp | 4 +- 5 files changed, 77 insertions(+), 53 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 2e0539204d..890b394df0 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -157,8 +157,8 @@ namespace swrenderer void R_UpdateFuzzPos(const SpriteDrawerArgs &args) { - int yl = MAX(args.dc_yl, 1); - int yh = MIN(args.dc_yh, fuzzviewheight); + int yl = MAX(args.FuzzY1(), 1); + int yh = MIN(args.FuzzY2(), fuzzviewheight); if (yl <= yh) fuzzpos = (fuzzpos + yh - yl + 1) % FUZZTABLE; } diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 81f7999d17..856c5b7b77 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -867,21 +867,21 @@ namespace swrenderer PalColumnCommand::PalColumnCommand(const SpriteDrawerArgs &args) { - _count = args.dc_count; + _count = args.Count(); _dest = args.Dest(); _dest_y = args.DestY(); _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - _iscale = args.dc_iscale; - _texturefrac = args.dc_texturefrac; + _iscale = args.TextureVStep(); + _texturefrac = args.TextureVPos(); _colormap = args.Colormap(); - _source = args.dc_source; + _source = args.TexturePixels(); _translation = args.TranslationMap(); - _color = args.dc_color; - _srcblend = args.dc_srcblend; - _destblend = args.dc_destblend; - _srccolor = args.dc_srccolor; - _srcalpha = args.dc_srcalpha; - _destalpha = args.dc_destalpha; + _color = args.SolidColor(); + _srcblend = args.SrcBlend(); + _destblend = args.DestBlend(); + _srccolor = args.SrcColorIndex(); + _srcalpha = args.SrcAlpha(); + _destalpha = args.DestAlpha(); } void DrawColumnPalCommand::Execute(DrawerThread *thread) @@ -1764,9 +1764,9 @@ namespace swrenderer DrawFuzzColumnPalCommand::DrawFuzzColumnPalCommand(const SpriteDrawerArgs &args) { - _yl = args.dc_yl; - _yh = args.dc_yh; - _x = args.dc_x; + _yl = args.FuzzY1(); + _yh = args.FuzzY2(); + _x = args.FuzzX(); _destorg = RenderViewport::Instance()->GetDest(0, 0); _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); _fuzzpos = fuzzpos; diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 0c5d9ec820..9806c6d8c2 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -251,23 +251,23 @@ namespace swrenderer auto shade_constants = drawerargs.ColormapConstants(); args.dest = (uint32_t*)drawerargs.Dest(); - args.source = drawerargs.dc_source; - args.source2 = drawerargs.dc_source2; + args.source = drawerargs.TexturePixels(); + args.source2 = drawerargs.TexturePixels2(); args.colormap = drawerargs.Colormap(); args.translation = drawerargs.TranslationMap(); args.basecolors = (const uint32_t *)GPalette.BaseColors; args.pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - args.count = drawerargs.dc_count; + args.count = drawerargs.Count(); args.dest_y = drawerargs.DestY(); - args.iscale = drawerargs.dc_iscale; - args.texturefracx = drawerargs.dc_texturefracx; - args.textureheight = drawerargs.dc_textureheight; - args.texturefrac = drawerargs.dc_texturefrac; + args.iscale = drawerargs.TextureVStep(); + args.texturefracx = drawerargs.TextureUPos(); + args.textureheight = drawerargs.TextureHeight(); + args.texturefrac = drawerargs.TextureVPos(); args.light = LightBgra::calc_light_multiplier(drawerargs.Light()); - args.color = LightBgra::shade_pal_index_simple(drawerargs.dc_color, args.light); - args.srccolor = drawerargs.dc_srccolor_bgra; - args.srcalpha = drawerargs.dc_srcalpha >> (FRACBITS - 8); - args.destalpha = drawerargs.dc_destalpha >> (FRACBITS - 8); + args.color = LightBgra::shade_pal_index_simple(drawerargs.SolidColor(), args.light); + args.srccolor = drawerargs.SrcColorBgra(); + args.srcalpha = drawerargs.SrcAlpha() >> (FRACBITS - 8); + args.destalpha = drawerargs.DestAlpha() >> (FRACBITS - 8); args.light_red = shade_constants.light_red; args.light_green = shade_constants.light_green; args.light_blue = shade_constants.light_blue; @@ -328,9 +328,9 @@ namespace swrenderer DrawFuzzColumnRGBACommand::DrawFuzzColumnRGBACommand(const SpriteDrawerArgs &drawerargs) { - _x = drawerargs.dc_x; - _yl = drawerargs.dc_yl; - _yh = drawerargs.dc_yh; + _x = drawerargs.FuzzX(); + _yl = drawerargs.FuzzY1(); + _yh = drawerargs.FuzzY2(); _destorg = RenderViewport::Instance()->GetDest(0, 0); _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); _fuzzpos = fuzzpos; diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h index 56e7c6afed..481792edc7 100644 --- a/src/swrenderer/drawers/r_drawerargs.h +++ b/src/swrenderer/drawers/r_drawerargs.h @@ -192,11 +192,11 @@ namespace swrenderer uint32_t TextureUPos() const { return dc_texturefracx; } fixed_t TextureVPos() const { return dc_texturefrac; } fixed_t TextureVStep() const { return dc_iscale; } - uint32_t TextureHeight() const { return dc_textureheight; } - + const uint8_t *TexturePixels() const { return dc_source; } const uint8_t *TexturePixels2() const { return dc_source2; } - + uint32_t TextureHeight() const { return dc_textureheight; } + int TextureFracBits() const { return dc_wall_fracbits; } FVector3 dc_normal; @@ -234,35 +234,37 @@ namespace swrenderer bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); + void SetDest(int x, int y); + void SetCount(int count) { dc_count = count; } + void SetSolidColor(int color) { dc_color = color; } void DrawMaskedColumn(int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); void FillColumn(); - void SetDest(int x, int y); - uint8_t *Dest() const { return dc_dest; } int DestY() const { return dc_dest_y; } + int Count() const { return dc_count; } - int dc_x; - int dc_yl; - int dc_yh; + int FuzzX() const { return dc_x; } + int FuzzY1() const { return dc_yl; } + int FuzzY2() const { return dc_yh; } - fixed_t dc_iscale; - fixed_t dc_texturefrac; - uint32_t dc_texturefracx; - uint32_t dc_textureheight; - const uint8_t *dc_source; - const uint8_t *dc_source2; - int dc_count; + uint32_t TextureUPos() const { return dc_texturefracx; } + fixed_t TextureVPos() const { return dc_texturefrac; } + fixed_t TextureVStep() const { return dc_iscale; } - int dc_color = 0; - uint32_t dc_srccolor; - uint32_t dc_srccolor_bgra; + int SolidColor() const { return dc_color; } + uint32_t SrcColorIndex() const { return dc_srccolor; } + uint32_t SrcColorBgra() const { return dc_srccolor_bgra; } - uint32_t *dc_srcblend; - uint32_t *dc_destblend; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + const uint8_t *TexturePixels() const { return dc_source; } + const uint8_t *TexturePixels2() const { return dc_source2; } + uint32_t TextureHeight() const { return dc_textureheight; } + + uint32_t *SrcBlend() const { return dc_srcblend; } + uint32_t *DestBlend() const { return dc_destblend; } + fixed_t SrcAlpha() const { return dc_srcalpha; } + fixed_t DestAlpha() const { return dc_destalpha; } private: bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); @@ -271,8 +273,30 @@ namespace swrenderer uint8_t *dc_dest = nullptr; int dc_dest_y = 0; + int dc_count = 0; + + fixed_t dc_iscale; + fixed_t dc_texturefrac; + uint32_t dc_texturefracx; + + uint32_t dc_textureheight = 0; + const uint8_t *dc_source = nullptr; + const uint8_t *dc_source2 = nullptr; bool drawer_needs_pal_input = false; + uint32_t *dc_srcblend = nullptr; + uint32_t *dc_destblend = nullptr; + fixed_t dc_srcalpha = OPAQUE; + fixed_t dc_destalpha = 0; + + int dc_x = 0; + int dc_yl = 0; + int dc_yh = 0; + + int dc_color = 0; + uint32_t dc_srccolor = 0; + uint32_t dc_srccolor_bgra = 0; + typedef void(SWPixelFormatDrawers::*SpriteDrawerFunc)(const SpriteDrawerArgs &args); SpriteDrawerFunc colfunc; }; diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index b188368a33..21006b1466 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -342,8 +342,8 @@ namespace swrenderer if (columnY1 < columnY2) { drawerargs.SetDest(x, columnY1); - drawerargs.dc_color = color; - drawerargs.dc_count = columnY2 - columnY1; + drawerargs.SetSolidColor(color); + drawerargs.SetCount(columnY2 - columnY1); drawerargs.FillColumn(); } } From e90b73539e01048a9c3832748123bf50ea60f9cd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Feb 2017 15:10:06 +0100 Subject: [PATCH 799/912] Move viewport drawing to its own folder --- src/CMakeLists.txt | 10 +- src/polyrenderer/math/tri_matrix.cpp | 2 +- src/polyrenderer/poly_renderer.cpp | 2 +- src/swrenderer/drawers/r_draw.cpp | 1 - src/swrenderer/drawers/r_draw_pal.cpp | 2 +- src/swrenderer/drawers/r_draw_pal.h | 5 +- src/swrenderer/drawers/r_draw_rgba.cpp | 2 +- src/swrenderer/drawers/r_draw_rgba.h | 5 +- src/swrenderer/drawers/r_drawerargs.h | 317 ------------------ src/swrenderer/line/r_fogboundary.h | 2 +- src/swrenderer/line/r_renderdrawsegment.cpp | 3 +- src/swrenderer/line/r_walldraw.cpp | 2 +- src/swrenderer/line/r_walldraw.h | 2 +- src/swrenderer/line/r_wallsetup.cpp | 2 +- src/swrenderer/plane/r_flatplane.cpp | 2 +- src/swrenderer/plane/r_flatplane.h | 2 +- src/swrenderer/plane/r_skyplane.cpp | 2 +- src/swrenderer/plane/r_skyplane.h | 2 +- src/swrenderer/plane/r_slopeplane.cpp | 2 +- src/swrenderer/plane/r_slopeplane.h | 2 +- src/swrenderer/plane/r_visibleplanelist.cpp | 2 +- src/swrenderer/r_swrenderer.cpp | 3 +- src/swrenderer/scene/r_light.cpp | 2 +- src/swrenderer/scene/r_light.h | 2 +- src/swrenderer/scene/r_opaque_pass.cpp | 2 +- src/swrenderer/scene/r_portal.cpp | 2 +- src/swrenderer/scene/r_scene.cpp | 2 +- src/swrenderer/scene/r_translucent_pass.cpp | 2 +- src/swrenderer/segments/r_drawsegment.cpp | 2 +- src/swrenderer/things/r_decal.cpp | 3 +- src/swrenderer/things/r_particle.cpp | 2 +- src/swrenderer/things/r_playersprite.cpp | 2 +- src/swrenderer/things/r_sprite.cpp | 2 +- src/swrenderer/things/r_visiblesprite.cpp | 2 +- src/swrenderer/things/r_wallsprite.cpp | 2 +- src/swrenderer/viewport/r_drawerargs.cpp | 94 ++++++ src/swrenderer/viewport/r_drawerargs.h | 59 ++++ src/swrenderer/viewport/r_skydrawer.cpp | 68 ++++ src/swrenderer/viewport/r_skydrawer.h | 58 ++++ src/swrenderer/viewport/r_spandrawer.cpp | 118 +++++++ src/swrenderer/viewport/r_spandrawer.h | 80 +++++ .../r_spritedrawer.cpp} | 278 +-------------- src/swrenderer/viewport/r_spritedrawer.h | 85 +++++ .../{scene => viewport}/r_viewport.cpp | 2 +- .../{scene => viewport}/r_viewport.h | 0 src/swrenderer/viewport/r_walldrawer.cpp | 66 ++++ src/swrenderer/viewport/r_walldrawer.h | 79 +++++ src/v_draw.cpp | 2 +- 48 files changed, 759 insertions(+), 631 deletions(-) delete mode 100644 src/swrenderer/drawers/r_drawerargs.h create mode 100644 src/swrenderer/viewport/r_drawerargs.cpp create mode 100644 src/swrenderer/viewport/r_drawerargs.h create mode 100644 src/swrenderer/viewport/r_skydrawer.cpp create mode 100644 src/swrenderer/viewport/r_skydrawer.h create mode 100644 src/swrenderer/viewport/r_spandrawer.cpp create mode 100644 src/swrenderer/viewport/r_spandrawer.h rename src/swrenderer/{drawers/r_drawerargs.cpp => viewport/r_spritedrawer.cpp} (66%) create mode 100644 src/swrenderer/viewport/r_spritedrawer.h rename src/swrenderer/{scene => viewport}/r_viewport.cpp (99%) rename src/swrenderer/{scene => viewport}/r_viewport.h (100%) create mode 100644 src/swrenderer/viewport/r_walldrawer.cpp create mode 100644 src/swrenderer/viewport/r_walldrawer.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a37649086e..885e31356d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -807,6 +807,7 @@ file( GLOB HEADER_FILES swrenderer/line/*.h swrenderer/plane/*.h swrenderer/things/*.h + swrenderer/viewport/*.h polyrenderer/*.h polyrenderer/math/*.h polyrenderer/drawers/*.h @@ -850,7 +851,6 @@ set( FASTMATH_PCH_SOURCES swrenderer/drawers/r_draw.cpp swrenderer/drawers/r_draw_pal.cpp swrenderer/drawers/r_draw_rgba.cpp - swrenderer/drawers/r_drawerargs.cpp swrenderer/drawers/r_drawers.cpp swrenderer/drawers/r_thread.cpp swrenderer/scene/r_3dfloors.cpp @@ -859,7 +859,12 @@ set( FASTMATH_PCH_SOURCES swrenderer/scene/r_portal.cpp swrenderer/scene/r_scene.cpp swrenderer/scene/r_translucent_pass.cpp - swrenderer/scene/r_viewport.cpp + swrenderer/viewport/r_drawerargs.cpp + swrenderer/viewport/r_skydrawer.cpp + swrenderer/viewport/r_spandrawer.cpp + swrenderer/viewport/r_spritedrawer.cpp + swrenderer/viewport/r_viewport.cpp + swrenderer/viewport/r_walldrawer.cpp swrenderer/line/r_line.cpp swrenderer/line/r_walldraw.cpp swrenderer/line/r_wallsetup.cpp @@ -1506,6 +1511,7 @@ source_group("Software Renderer\\Segments" REGULAR_EXPRESSION "^${CMAKE_CURRENT_ source_group("Software Renderer\\Line" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/line/.+") source_group("Software Renderer\\Plane" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/plane/.+") source_group("Software Renderer\\Things" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/things/.+") +source_group("Software Renderer\\Viewport" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/swrenderer/viewport/.+") source_group("Poly Renderer" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/polyrenderer/.+") source_group("Poly Renderer\\Math" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/polyrenderer/math/.+") source_group("Poly Renderer\\Drawers" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/polyrenderer/drawers/.+") diff --git a/src/polyrenderer/math/tri_matrix.cpp b/src/polyrenderer/math/tri_matrix.cpp index cd6109539b..daa549587a 100644 --- a/src/polyrenderer/math/tri_matrix.cpp +++ b/src/polyrenderer/math/tri_matrix.cpp @@ -37,7 +37,7 @@ #include "tri_matrix.h" #include "polyrenderer/drawers/poly_triangle.h" #include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" TriMatrix TriMatrix::null() { diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index f8bcedc7c1..f8b27bd983 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -33,9 +33,9 @@ #include "po_man.h" #include "st_stuff.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_viewport.h" EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, screenblocks) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 890b394df0..d7b4237ce9 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -50,7 +50,6 @@ #include "r_draw_rgba.h" #include "r_draw_pal.h" #include "r_thread.h" -#include "r_drawerargs.h" #include "swrenderer/scene/r_light.h" CVAR(Bool, r_dynlights, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 856c5b7b77..fad7c24362 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -47,7 +47,7 @@ #include "r_draw.h" #include "v_video.h" #include "r_draw_pal.h" -#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/scene/r_light.h" // [SP] r_blendmethod - false = rgb555 matching (ZDoom classic), true = rgb666 (refactored) diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index f28007e73a..cf955c607c 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -4,7 +4,10 @@ #include "r_draw.h" #include "v_palette.h" #include "r_thread.h" -#include "r_drawerargs.h" +#include "swrenderer/viewport/r_skydrawer.h" +#include "swrenderer/viewport/r_spandrawer.h" +#include "swrenderer/viewport/r_walldrawer.h" +#include "swrenderer/viewport/r_spritedrawer.h" namespace swrenderer { diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 9806c6d8c2..0b9af6b5e7 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -38,7 +38,7 @@ #include "r_draw_rgba.h" #include "r_drawers.h" #include "gl/data/gl_matrix.h" -#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "gi.h" diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 33c9ebcabf..6ba67f5c1d 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -26,7 +26,10 @@ #include "v_palette.h" #include "r_thread.h" #include "r_drawers.h" -#include "r_drawerargs.h" +#include "swrenderer/viewport/r_skydrawer.h" +#include "swrenderer/viewport/r_spandrawer.h" +#include "swrenderer/viewport/r_walldrawer.h" +#include "swrenderer/viewport/r_spritedrawer.h" #ifdef __arm__ #define NO_SSE diff --git a/src/swrenderer/drawers/r_drawerargs.h b/src/swrenderer/drawers/r_drawerargs.h deleted file mode 100644 index 481792edc7..0000000000 --- a/src/swrenderer/drawers/r_drawerargs.h +++ /dev/null @@ -1,317 +0,0 @@ - -#pragma once - -#include "templates.h" -#include "doomtype.h" -#include "doomdef.h" -#include "r_defs.h" -#include "r_draw.h" -#include "v_video.h" -#include "r_data/colormaps.h" -#include "r_data/r_translate.h" -#include "swrenderer/scene/r_light.h" - -struct FSWColormap; -struct FLightNode; -struct TriLight; - -namespace swrenderer -{ - class SWPixelFormatDrawers; - class DrawerArgs; - struct ShadeConstants; - - class DrawerArgs - { - public: - void SetLight(FSWColormap *base_colormap, float light, int shade); - void SetTranslationMap(lighttable_t *translation); - - uint8_t *Colormap() const; - uint8_t *TranslationMap() const { return mTranslation; } - - ShadeConstants ColormapConstants() const; - fixed_t Light() const { return LIGHTSCALE(mLight, mShade); } - - protected: - static SWPixelFormatDrawers *Drawers(); - - private: - FSWColormap *mBaseColormap = nullptr; - float mLight = 0.0f; - int mShade = 0; - uint8_t *mTranslation = nullptr; - }; - - class SkyDrawerArgs : public DrawerArgs - { - public: - void SetDest(int x, int y); - void SetCount(int count) { dc_count = count; } - void SetFrontTexture(FTexture *texture, uint32_t column); - void SetBackTexture(FTexture *texture, uint32_t column); - void SetTextureVPos(uint32_t texturefrac) { dc_texturefrac = texturefrac; } - void SetTextureVStep(uint32_t iscale) { dc_iscale = iscale; } - void SetSolidTop(uint32_t color) { solid_top = color; } - void SetSolidBottom(uint32_t color) { solid_bottom = color; } - void SetFadeSky(bool enable) { fadeSky = enable; } - - uint8_t *Dest() const { return dc_dest; } - int DestY() const { return dc_dest_y; } - int Count() const { return dc_count; } - - uint32_t TextureVPos() const { return dc_texturefrac; } - uint32_t TextureVStep() const { return dc_iscale; } - - uint32_t SolidTopColor() const { return solid_top; } - uint32_t SolidBottomColor() const { return solid_bottom; } - bool FadeSky() const { return fadeSky; } - - const uint8_t *FrontTexturePixels() const { return dc_source; } - const uint8_t *BackTexturePixels() const { return dc_source2; } - int FrontTextureHeight() const { return dc_sourceheight; } - int BackTextureHeight() const { return dc_sourceheight2; } - - void DrawSingleSkyColumn(); - void DrawDoubleSkyColumn(); - - private: - uint8_t *dc_dest = nullptr; - int dc_dest_y = 0; - int dc_count = 0; - const uint8_t *dc_source; - const uint8_t *dc_source2; - uint32_t dc_sourceheight; - uint32_t dc_sourceheight2; - uint32_t dc_texturefrac; - uint32_t dc_iscale; - uint32_t solid_top; - uint32_t solid_bottom; - bool fadeSky; - }; - - class SpanDrawerArgs : public DrawerArgs - { - public: - SpanDrawerArgs(); - - void SetStyle(bool masked, bool additive, fixed_t alpha); - void SetDestY(int y) { ds_y = y; } - void SetDestX1(int x) { ds_x1 = x; } - void SetDestX2(int x) { ds_x2 = x; } - void SetTexture(FTexture *tex); - void SetTextureLOD(double lod) { ds_lod = lod; } - void SetTextureUPos(dsfixed_t xfrac) { ds_xfrac = xfrac; } - void SetTextureVPos(dsfixed_t yfrac) { ds_yfrac = yfrac; } - void SetTextureUStep(dsfixed_t xstep) { ds_xstep = xstep; } - void SetTextureVStep(dsfixed_t vstep) { ds_ystep = vstep; } - void SetSolidColor(int colorIndex) { ds_color = colorIndex; } - - void DrawSpan(); - void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); - void DrawColoredSpan(int y, int x1, int x2); - void DrawFogBoundaryLine(int y, int x1, int x2); - - uint32_t *SrcBlend() const { return dc_srcblend; } - uint32_t *DestBlend() const { return dc_destblend; } - fixed_t SrcAlpha() const { return dc_srcalpha; } - fixed_t DestAlpha() const { return dc_destalpha; } - int DestY() const { return ds_y; } - int DestX1() const { return ds_x1; } - int DestX2() const { return ds_x2; } - dsfixed_t TextureUPos() const { return ds_xfrac; } - dsfixed_t TextureVPos() const { return ds_yfrac; } - dsfixed_t TextureUStep() const { return ds_xstep; } - dsfixed_t TextureVStep() const { return ds_ystep; } - int SolidColor() const { return ds_color; } - int TextureWidthBits() const { return ds_xbits; } - int TextureHeightBits() const { return ds_ybits; } - const uint8_t *TexturePixels() const { return ds_source; } - bool MipmappedTexture() const { return ds_source_mipmapped; } - double TextureLOD() const { return ds_lod; } - - FVector3 dc_normal; - FVector3 dc_viewpos; - FVector3 dc_viewpos_step; - TriLight *dc_lights = nullptr; - int dc_num_lights = 0; - - private: - typedef void(SWPixelFormatDrawers::*SpanDrawerFunc)(const SpanDrawerArgs &args); - SpanDrawerFunc spanfunc; - - int ds_y; - int ds_x1; - int ds_x2; - int ds_xbits; - int ds_ybits; - const uint8_t *ds_source; - bool ds_source_mipmapped; - dsfixed_t ds_xfrac; - dsfixed_t ds_yfrac; - dsfixed_t ds_xstep; - dsfixed_t ds_ystep; - uint32_t *dc_srcblend; - uint32_t *dc_destblend; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - int ds_color = 0; - double ds_lod; - }; - - class WallDrawerArgs : public DrawerArgs - { - public: - void SetStyle(bool masked, bool additive, fixed_t alpha); - void SetDest(int x, int y); - void SetCount(int count) { dc_count = count; } - void SetTexture(const uint8_t *pixels, const uint8_t *pixels2, int height) - { - dc_source = pixels; - dc_source2 = pixels2; - dc_textureheight = height; - } - void SetTextureFracBits(int bits) { dc_wall_fracbits = bits; } - void SetTextureUPos(uint32_t pos) { dc_texturefracx = pos; } - void SetTextureVPos(fixed_t pos) { dc_texturefrac = pos; } - void SetTextureVStep(fixed_t step) { dc_iscale = step; } - - bool IsMaskedDrawer() const; - - void DrawColumn(); - - uint8_t *Dest() const { return dc_dest; } - int DestY() const { return dc_dest_y; } - int Count() const { return dc_count; } - - uint32_t *SrcBlend() const { return dc_srcblend; } - uint32_t *DestBlend() const { return dc_destblend; } - fixed_t SrcAlpha() const { return dc_srcalpha; } - fixed_t DestAlpha() const { return dc_destalpha; } - - uint32_t TextureUPos() const { return dc_texturefracx; } - fixed_t TextureVPos() const { return dc_texturefrac; } - fixed_t TextureVStep() const { return dc_iscale; } - - const uint8_t *TexturePixels() const { return dc_source; } - const uint8_t *TexturePixels2() const { return dc_source2; } - uint32_t TextureHeight() const { return dc_textureheight; } - - int TextureFracBits() const { return dc_wall_fracbits; } - - FVector3 dc_normal; - FVector3 dc_viewpos; - FVector3 dc_viewpos_step; - TriLight *dc_lights = nullptr; - int dc_num_lights = 0; - - private: - uint8_t *dc_dest = nullptr; - int dc_dest_y = 0; - int dc_count; - - fixed_t dc_iscale; - fixed_t dc_texturefrac; - uint32_t dc_texturefracx; - uint32_t dc_textureheight; - const uint8_t *dc_source; - const uint8_t *dc_source2; - int dc_wall_fracbits; - - uint32_t *dc_srcblend; - uint32_t *dc_destblend; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - - typedef void(SWPixelFormatDrawers::*WallDrawerFunc)(const WallDrawerArgs &args); - WallDrawerFunc wallfunc = nullptr; - }; - - class SpriteDrawerArgs : public DrawerArgs - { - public: - SpriteDrawerArgs(); - - bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); - bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); - void SetDest(int x, int y); - void SetCount(int count) { dc_count = count; } - void SetSolidColor(int color) { dc_color = color; } - - void DrawMaskedColumn(int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); - void FillColumn(); - - uint8_t *Dest() const { return dc_dest; } - int DestY() const { return dc_dest_y; } - int Count() const { return dc_count; } - - int FuzzX() const { return dc_x; } - int FuzzY1() const { return dc_yl; } - int FuzzY2() const { return dc_yh; } - - uint32_t TextureUPos() const { return dc_texturefracx; } - fixed_t TextureVPos() const { return dc_texturefrac; } - fixed_t TextureVStep() const { return dc_iscale; } - - int SolidColor() const { return dc_color; } - uint32_t SrcColorIndex() const { return dc_srccolor; } - uint32_t SrcColorBgra() const { return dc_srccolor_bgra; } - - const uint8_t *TexturePixels() const { return dc_source; } - const uint8_t *TexturePixels2() const { return dc_source2; } - uint32_t TextureHeight() const { return dc_textureheight; } - - uint32_t *SrcBlend() const { return dc_srcblend; } - uint32_t *DestBlend() const { return dc_destblend; } - fixed_t SrcAlpha() const { return dc_srcalpha; } - fixed_t DestAlpha() const { return dc_destalpha; } - - private: - bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); - static fixed_t GetAlpha(int type, fixed_t alpha); - void DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked); - - uint8_t *dc_dest = nullptr; - int dc_dest_y = 0; - int dc_count = 0; - - fixed_t dc_iscale; - fixed_t dc_texturefrac; - uint32_t dc_texturefracx; - - uint32_t dc_textureheight = 0; - const uint8_t *dc_source = nullptr; - const uint8_t *dc_source2 = nullptr; - bool drawer_needs_pal_input = false; - - uint32_t *dc_srcblend = nullptr; - uint32_t *dc_destblend = nullptr; - fixed_t dc_srcalpha = OPAQUE; - fixed_t dc_destalpha = 0; - - int dc_x = 0; - int dc_yl = 0; - int dc_yh = 0; - - int dc_color = 0; - uint32_t dc_srccolor = 0; - uint32_t dc_srccolor_bgra = 0; - - typedef void(SWPixelFormatDrawers::*SpriteDrawerFunc)(const SpriteDrawerArgs &args); - SpriteDrawerFunc colfunc; - }; - - struct ShadeConstants - { - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - bool simple_shade; - }; -} diff --git a/src/swrenderer/line/r_fogboundary.h b/src/swrenderer/line/r_fogboundary.h index 4213cb146d..487e773851 100644 --- a/src/swrenderer/line/r_fogboundary.h +++ b/src/swrenderer/line/r_fogboundary.h @@ -13,7 +13,7 @@ #pragma once -#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/viewport/r_spandrawer.h" namespace swrenderer { diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index ea1db392e1..281a8e2c29 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -40,7 +40,8 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/scene/r_light.h" -#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" +#include "swrenderer/viewport/r_spritedrawer.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 8da0c4cb2f..700f170819 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -36,8 +36,8 @@ #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/line/r_walldraw.h" #include "swrenderer/line/r_wallsetup.h" diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index 76e3bccd68..bc5b2b3e9f 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -13,7 +13,7 @@ #pragma once -#include "swrenderer/drawers/r_drawerargs.h" +#include "swrenderer/viewport/r_walldrawer.h" #include "r_line.h" class FTexture; diff --git a/src/swrenderer/line/r_wallsetup.cpp b/src/swrenderer/line/r_wallsetup.cpp index 33388586f0..94c3a7aa44 100644 --- a/src/swrenderer/line/r_wallsetup.cpp +++ b/src/swrenderer/line/r_wallsetup.cpp @@ -22,8 +22,8 @@ #include "swrenderer/r_memory.h" #include "swrenderer/line/r_line.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" +#include "swrenderer/viewport/r_viewport.h" namespace swrenderer { diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 12cdc27fc5..49a96797f2 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -37,9 +37,9 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" namespace swrenderer diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index 385305cd09..c164ba8623 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -14,7 +14,7 @@ #pragma once #include "r_planerenderer.h" -#include "swrenderer/drawers/r_drawerargs.h" +#include "swrenderer/viewport/r_spandrawer.h" namespace swrenderer { diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 882df107ff..b4780065f4 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -39,8 +39,8 @@ #include "swrenderer/line/r_walldraw.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" #include "g_levellocals.h" diff --git a/src/swrenderer/plane/r_skyplane.h b/src/swrenderer/plane/r_skyplane.h index 03a62d27e6..a1f8a45501 100644 --- a/src/swrenderer/plane/r_skyplane.h +++ b/src/swrenderer/plane/r_skyplane.h @@ -14,7 +14,7 @@ #pragma once #include "r_visibleplane.h" -#include "swrenderer/drawers/r_drawerargs.h" +#include "swrenderer/viewport/r_skydrawer.h" namespace swrenderer { diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index f175c9b344..ee56e5da2e 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -37,8 +37,8 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" #include "swrenderer/plane/r_visibleplane.h" diff --git a/src/swrenderer/plane/r_slopeplane.h b/src/swrenderer/plane/r_slopeplane.h index ccdfb316d2..b06f525a20 100644 --- a/src/swrenderer/plane/r_slopeplane.h +++ b/src/swrenderer/plane/r_slopeplane.h @@ -14,7 +14,7 @@ #pragma once #include "r_planerenderer.h" -#include "swrenderer/drawers/r_drawerargs.h" +#include "swrenderer/viewport/r_spandrawer.h" namespace swrenderer { diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp index 97378c1aec..03eb386f6f 100644 --- a/src/swrenderer/plane/r_visibleplanelist.cpp +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -33,7 +33,6 @@ #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/plane/r_flatplane.h" #include "swrenderer/plane/r_slopeplane.h" @@ -41,6 +40,7 @@ #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/plane/r_visibleplanelist.h" #include "swrenderer/drawers/r_draw.h" +#include "swrenderer/viewport/r_viewport.h" namespace swrenderer { diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index a83289006f..b334df7961 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -33,10 +33,9 @@ */ #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/things/r_playersprite.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "v_palette.h" #include "v_video.h" diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index 076f8c9b00..633e34acba 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -31,7 +31,7 @@ #include "r_utility.h" #include "d_player.h" #include "swrenderer/scene/r_light.h" -#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" CVAR(Bool, r_shadercolormaps, true, CVAR_ARCHIVE) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index 099700980d..6059028a6f 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -18,7 +18,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_utility.h" -#include "r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" // Lighting. // diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 46facc5e53..ebd0d9ba87 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -42,8 +42,8 @@ #include "swrenderer/segments/r_clipsegment.h" #include "swrenderer/line/r_wallsetup.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" +#include "swrenderer/viewport/r_viewport.h" #include "r_3dfloors.h" #include "r_portal.h" #include "a_sharedglobal.h" diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index b74337bf04..dfa7a1bd1d 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -54,8 +54,8 @@ #include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_translucent_pass.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 84bcaee2bb..8fbe1efb2a 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -33,7 +33,6 @@ #include "st_stuff.h" #include "r_data/r_interpolate.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_opaque_pass.h" @@ -43,6 +42,7 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/segments/r_portalsegment.h" #include "swrenderer/plane/r_visibleplanelist.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_thread.h" diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 67b0600955..b8d64c141e 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -32,11 +32,11 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_translucent_pass.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/plane/r_visibleplanelist.h" #include "swrenderer/line/r_renderdrawsegment.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Int, r_drawfuzz) diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index a9e1479024..8ff4e658fa 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -39,7 +39,7 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/scene/r_light.h" -#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" namespace swrenderer { diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index c902f20a1d..f1966f60a3 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -38,9 +38,10 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/things/r_wallsprite.h" +#include "swrenderer/viewport/r_viewport.h" +#include "swrenderer/viewport/r_spritedrawer.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 8ee6df916f..dd078430be 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -47,11 +47,11 @@ #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_translucent_pass.h" #include "swrenderer/scene/r_portal.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/line/r_renderdrawsegment.h" #include "swrenderer/things/r_particle.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" #include "swrenderer/r_memory.h" diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 71f57960a8..4df08a2623 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -51,9 +51,9 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/things/r_sprite.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" #include "g_levellocals.h" diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index b83089b9e3..9130572640 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -51,9 +51,9 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/things/r_sprite.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 1b5a3b447d..cdb4da2d01 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -33,7 +33,7 @@ #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_light.h" -#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 0f3c8800f3..945faa073c 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -52,10 +52,10 @@ #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/line/r_wallsetup.h" #include "swrenderer/line/r_walldraw.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); diff --git a/src/swrenderer/viewport/r_drawerargs.cpp b/src/swrenderer/viewport/r_drawerargs.cpp new file mode 100644 index 0000000000..d54afad6bf --- /dev/null +++ b/src/swrenderer/viewport/r_drawerargs.cpp @@ -0,0 +1,94 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include "r_drawerargs.h" +#include "swrenderer/drawers/r_draw_pal.h" +#include "swrenderer/drawers/r_draw_rgba.h" + +namespace swrenderer +{ + SWPixelFormatDrawers *DrawerArgs::Drawers() + { + if (RenderViewport::Instance()->RenderTarget->IsBgra()) + { + static SWTruecolorDrawers tc_drawers; + return &tc_drawers; + } + else + { + static SWPalDrawers pal_drawers; + return &pal_drawers; + } + } + + void DrawerArgs::SetLight(FSWColormap *base_colormap, float light, int shade) + { + mBaseColormap = base_colormap; + mTranslation = nullptr; + mLight = light; + mShade = shade; + } + + void DrawerArgs::SetTranslationMap(lighttable_t *translation) + { + mTranslation = translation; + } + + uint8_t *DrawerArgs::Colormap() const + { + if (mBaseColormap) + { + if (RenderViewport::Instance()->RenderTarget->IsBgra()) + return mBaseColormap->Maps; + else + return mBaseColormap->Maps + (GETPALOOKUP(mLight, mShade) << COLORMAPSHIFT); + } + else + { + return mTranslation; + } + } + + ShadeConstants DrawerArgs::ColormapConstants() const + { + ShadeConstants shadeConstants; + if (mBaseColormap) + { + shadeConstants.light_red = mBaseColormap->Color.r * 256 / 255; + shadeConstants.light_green = mBaseColormap->Color.g * 256 / 255; + shadeConstants.light_blue = mBaseColormap->Color.b * 256 / 255; + shadeConstants.light_alpha = mBaseColormap->Color.a * 256 / 255; + shadeConstants.fade_red = mBaseColormap->Fade.r; + shadeConstants.fade_green = mBaseColormap->Fade.g; + shadeConstants.fade_blue = mBaseColormap->Fade.b; + shadeConstants.fade_alpha = mBaseColormap->Fade.a; + shadeConstants.desaturate = MIN(abs(mBaseColormap->Desaturate), 255) * 255 / 256; + shadeConstants.simple_shade = (mBaseColormap->Color.d == 0x00ffffff && mBaseColormap->Fade.d == 0x00000000 && mBaseColormap->Desaturate == 0); + } + else + { + shadeConstants.light_red = 256; + shadeConstants.light_green = 256; + shadeConstants.light_blue = 256; + shadeConstants.light_alpha = 256; + shadeConstants.fade_red = 0; + shadeConstants.fade_green = 0; + shadeConstants.fade_blue = 0; + shadeConstants.fade_alpha = 256; + shadeConstants.desaturate = 0; + shadeConstants.simple_shade = true; + } + return shadeConstants; + } +} diff --git a/src/swrenderer/viewport/r_drawerargs.h b/src/swrenderer/viewport/r_drawerargs.h new file mode 100644 index 0000000000..d36bc61a5b --- /dev/null +++ b/src/swrenderer/viewport/r_drawerargs.h @@ -0,0 +1,59 @@ + +#pragma once + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "swrenderer/drawers/r_draw.h" +#include "v_video.h" +#include "r_data/colormaps.h" +#include "r_data/r_translate.h" +#include "swrenderer/scene/r_light.h" + +struct FSWColormap; +struct FLightNode; +struct TriLight; + +namespace swrenderer +{ + class SWPixelFormatDrawers; + class DrawerArgs; + struct ShadeConstants; + + class DrawerArgs + { + public: + void SetLight(FSWColormap *base_colormap, float light, int shade); + void SetTranslationMap(lighttable_t *translation); + + uint8_t *Colormap() const; + uint8_t *TranslationMap() const { return mTranslation; } + + ShadeConstants ColormapConstants() const; + fixed_t Light() const { return LIGHTSCALE(mLight, mShade); } + + protected: + static SWPixelFormatDrawers *Drawers(); + + private: + FSWColormap *mBaseColormap = nullptr; + float mLight = 0.0f; + int mShade = 0; + uint8_t *mTranslation = nullptr; + }; + + struct ShadeConstants + { + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; + }; +} diff --git a/src/swrenderer/viewport/r_skydrawer.cpp b/src/swrenderer/viewport/r_skydrawer.cpp new file mode 100644 index 0000000000..922324f641 --- /dev/null +++ b/src/swrenderer/viewport/r_skydrawer.cpp @@ -0,0 +1,68 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include "r_skydrawer.h" + +namespace swrenderer +{ + void SkyDrawerArgs::DrawSingleSkyColumn() + { + Drawers()->DrawSingleSkyColumn(*this); + } + + void SkyDrawerArgs::DrawDoubleSkyColumn() + { + Drawers()->DrawDoubleSkyColumn(*this); + } + + void SkyDrawerArgs::SetDest(int x, int y) + { + auto viewport = RenderViewport::Instance(); + dc_dest = viewport->GetDest(x, y); + dc_dest_y = y; + } + + void SkyDrawerArgs::SetFrontTexture(FTexture *texture, uint32_t column) + { + if (RenderViewport::Instance()->RenderTarget->IsBgra()) + { + dc_source = (const uint8_t *)texture->GetColumnBgra(column, nullptr); + dc_sourceheight = texture->GetHeight(); + } + else + { + dc_source = texture->GetColumn(column, nullptr); + dc_sourceheight = texture->GetHeight(); + } + } + + void SkyDrawerArgs::SetBackTexture(FTexture *texture, uint32_t column) + { + if (texture == nullptr) + { + dc_source2 = nullptr; + dc_sourceheight2 = 1; + } + else if (RenderViewport::Instance()->RenderTarget->IsBgra()) + { + dc_source2 = (const uint8_t *)texture->GetColumnBgra(column, nullptr); + dc_sourceheight2 = texture->GetHeight(); + } + else + { + dc_source2 = texture->GetColumn(column, nullptr); + dc_sourceheight2 = texture->GetHeight(); + } + } +} diff --git a/src/swrenderer/viewport/r_skydrawer.h b/src/swrenderer/viewport/r_skydrawer.h new file mode 100644 index 0000000000..8321b4e040 --- /dev/null +++ b/src/swrenderer/viewport/r_skydrawer.h @@ -0,0 +1,58 @@ + +#pragma once + +#include "r_drawerargs.h" + +struct FSWColormap; +struct FLightNode; +struct TriLight; + +namespace swrenderer +{ + class SkyDrawerArgs : public DrawerArgs + { + public: + void SetDest(int x, int y); + void SetCount(int count) { dc_count = count; } + void SetFrontTexture(FTexture *texture, uint32_t column); + void SetBackTexture(FTexture *texture, uint32_t column); + void SetTextureVPos(uint32_t texturefrac) { dc_texturefrac = texturefrac; } + void SetTextureVStep(uint32_t iscale) { dc_iscale = iscale; } + void SetSolidTop(uint32_t color) { solid_top = color; } + void SetSolidBottom(uint32_t color) { solid_bottom = color; } + void SetFadeSky(bool enable) { fadeSky = enable; } + + uint8_t *Dest() const { return dc_dest; } + int DestY() const { return dc_dest_y; } + int Count() const { return dc_count; } + + uint32_t TextureVPos() const { return dc_texturefrac; } + uint32_t TextureVStep() const { return dc_iscale; } + + uint32_t SolidTopColor() const { return solid_top; } + uint32_t SolidBottomColor() const { return solid_bottom; } + bool FadeSky() const { return fadeSky; } + + const uint8_t *FrontTexturePixels() const { return dc_source; } + const uint8_t *BackTexturePixels() const { return dc_source2; } + int FrontTextureHeight() const { return dc_sourceheight; } + int BackTextureHeight() const { return dc_sourceheight2; } + + void DrawSingleSkyColumn(); + void DrawDoubleSkyColumn(); + + private: + uint8_t *dc_dest = nullptr; + int dc_dest_y = 0; + int dc_count = 0; + const uint8_t *dc_source; + const uint8_t *dc_source2; + uint32_t dc_sourceheight; + uint32_t dc_sourceheight2; + uint32_t dc_texturefrac; + uint32_t dc_iscale; + uint32_t solid_top; + uint32_t solid_bottom; + bool fadeSky; + }; +} diff --git a/src/swrenderer/viewport/r_spandrawer.cpp b/src/swrenderer/viewport/r_spandrawer.cpp new file mode 100644 index 0000000000..7d14886271 --- /dev/null +++ b/src/swrenderer/viewport/r_spandrawer.cpp @@ -0,0 +1,118 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include "r_spandrawer.h" + +namespace swrenderer +{ + SpanDrawerArgs::SpanDrawerArgs() + { + spanfunc = &SWPixelFormatDrawers::DrawSpan; + } + + void SpanDrawerArgs::SetTexture(FTexture *tex) + { + tex->GetWidth(); + ds_xbits = tex->WidthBits; + ds_ybits = tex->HeightBits; + if ((1 << ds_xbits) > tex->GetWidth()) + { + ds_xbits--; + } + if ((1 << ds_ybits) > tex->GetHeight()) + { + ds_ybits--; + } + + auto viewport = RenderViewport::Instance(); + ds_source = viewport->RenderTarget->IsBgra() ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); + ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; + } + + void SpanDrawerArgs::SetStyle(bool masked, bool additive, fixed_t alpha) + { + if (masked) + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; + } + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpanMasked; + } + } + else + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = &SWPixelFormatDrawers::DrawSpanTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpanAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; + } + } + else + { + spanfunc = &SWPixelFormatDrawers::DrawSpan; + } + } + } + + void SpanDrawerArgs::DrawSpan() + { + (Drawers()->*spanfunc)(*this); + } + + void SpanDrawerArgs::DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) + { + Drawers()->DrawTiltedSpan(*this, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); + } + + void SpanDrawerArgs::DrawFogBoundaryLine(int y, int x1, int x2) + { + Drawers()->DrawFogBoundaryLine(*this, y, x1, x2); + } + + void SpanDrawerArgs::DrawColoredSpan(int y, int x1, int x2) + { + Drawers()->DrawColoredSpan(*this, y, x1, x2); + } +} diff --git a/src/swrenderer/viewport/r_spandrawer.h b/src/swrenderer/viewport/r_spandrawer.h new file mode 100644 index 0000000000..dcb348f201 --- /dev/null +++ b/src/swrenderer/viewport/r_spandrawer.h @@ -0,0 +1,80 @@ + +#pragma once + +#include "r_drawerargs.h" + +struct FSWColormap; +struct FLightNode; +struct TriLight; + +namespace swrenderer +{ + class SpanDrawerArgs : public DrawerArgs + { + public: + SpanDrawerArgs(); + + void SetStyle(bool masked, bool additive, fixed_t alpha); + void SetDestY(int y) { ds_y = y; } + void SetDestX1(int x) { ds_x1 = x; } + void SetDestX2(int x) { ds_x2 = x; } + void SetTexture(FTexture *tex); + void SetTextureLOD(double lod) { ds_lod = lod; } + void SetTextureUPos(dsfixed_t xfrac) { ds_xfrac = xfrac; } + void SetTextureVPos(dsfixed_t yfrac) { ds_yfrac = yfrac; } + void SetTextureUStep(dsfixed_t xstep) { ds_xstep = xstep; } + void SetTextureVStep(dsfixed_t vstep) { ds_ystep = vstep; } + void SetSolidColor(int colorIndex) { ds_color = colorIndex; } + + void DrawSpan(); + void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); + void DrawColoredSpan(int y, int x1, int x2); + void DrawFogBoundaryLine(int y, int x1, int x2); + + uint32_t *SrcBlend() const { return dc_srcblend; } + uint32_t *DestBlend() const { return dc_destblend; } + fixed_t SrcAlpha() const { return dc_srcalpha; } + fixed_t DestAlpha() const { return dc_destalpha; } + int DestY() const { return ds_y; } + int DestX1() const { return ds_x1; } + int DestX2() const { return ds_x2; } + dsfixed_t TextureUPos() const { return ds_xfrac; } + dsfixed_t TextureVPos() const { return ds_yfrac; } + dsfixed_t TextureUStep() const { return ds_xstep; } + dsfixed_t TextureVStep() const { return ds_ystep; } + int SolidColor() const { return ds_color; } + int TextureWidthBits() const { return ds_xbits; } + int TextureHeightBits() const { return ds_ybits; } + const uint8_t *TexturePixels() const { return ds_source; } + bool MipmappedTexture() const { return ds_source_mipmapped; } + double TextureLOD() const { return ds_lod; } + + FVector3 dc_normal; + FVector3 dc_viewpos; + FVector3 dc_viewpos_step; + TriLight *dc_lights = nullptr; + int dc_num_lights = 0; + + private: + typedef void(SWPixelFormatDrawers::*SpanDrawerFunc)(const SpanDrawerArgs &args); + SpanDrawerFunc spanfunc; + + int ds_y; + int ds_x1; + int ds_x2; + int ds_xbits; + int ds_ybits; + const uint8_t *ds_source; + bool ds_source_mipmapped; + dsfixed_t ds_xfrac; + dsfixed_t ds_yfrac; + dsfixed_t ds_xstep; + dsfixed_t ds_ystep; + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int ds_color = 0; + double ds_lod; + }; +} diff --git a/src/swrenderer/drawers/r_drawerargs.cpp b/src/swrenderer/viewport/r_spritedrawer.cpp similarity index 66% rename from src/swrenderer/drawers/r_drawerargs.cpp rename to src/swrenderer/viewport/r_spritedrawer.cpp index 1b9b1556e7..95a494c31f 100644 --- a/src/swrenderer/drawers/r_drawerargs.cpp +++ b/src/swrenderer/viewport/r_spritedrawer.cpp @@ -1,5 +1,5 @@ /* -** r_drawerargs.cpp +** r_spritedrawer.cpp ** **--------------------------------------------------------------------------- ** Copyright 1998-2016 Randy Heit @@ -33,115 +33,15 @@ */ #include -#include "r_drawerargs.h" -#include "r_draw_pal.h" -#include "r_draw_rgba.h" +#include "r_spritedrawer.h" namespace swrenderer { - SWPixelFormatDrawers *DrawerArgs::Drawers() - { - if (RenderViewport::Instance()->RenderTarget->IsBgra()) - { - static SWTruecolorDrawers tc_drawers; - return &tc_drawers; - } - else - { - static SWPalDrawers pal_drawers; - return &pal_drawers; - } - } - SpriteDrawerArgs::SpriteDrawerArgs() { colfunc = &SWPixelFormatDrawers::DrawColumn; } - SpanDrawerArgs::SpanDrawerArgs() - { - spanfunc = &SWPixelFormatDrawers::DrawSpan; - } - - void DrawerArgs::SetLight(FSWColormap *base_colormap, float light, int shade) - { - mBaseColormap = base_colormap; - mTranslation = nullptr; - mLight = light; - mShade = shade; - } - - void DrawerArgs::SetTranslationMap(lighttable_t *translation) - { - mTranslation = translation; - } - - uint8_t *DrawerArgs::Colormap() const - { - if (mBaseColormap) - { - if (RenderViewport::Instance()->RenderTarget->IsBgra()) - return mBaseColormap->Maps; - else - return mBaseColormap->Maps + (GETPALOOKUP(mLight, mShade) << COLORMAPSHIFT); - } - else - { - return mTranslation; - } - } - - ShadeConstants DrawerArgs::ColormapConstants() const - { - ShadeConstants shadeConstants; - if (mBaseColormap) - { - shadeConstants.light_red = mBaseColormap->Color.r * 256 / 255; - shadeConstants.light_green = mBaseColormap->Color.g * 256 / 255; - shadeConstants.light_blue = mBaseColormap->Color.b * 256 / 255; - shadeConstants.light_alpha = mBaseColormap->Color.a * 256 / 255; - shadeConstants.fade_red = mBaseColormap->Fade.r; - shadeConstants.fade_green = mBaseColormap->Fade.g; - shadeConstants.fade_blue = mBaseColormap->Fade.b; - shadeConstants.fade_alpha = mBaseColormap->Fade.a; - shadeConstants.desaturate = MIN(abs(mBaseColormap->Desaturate), 255) * 255 / 256; - shadeConstants.simple_shade = (mBaseColormap->Color.d == 0x00ffffff && mBaseColormap->Fade.d == 0x00000000 && mBaseColormap->Desaturate == 0); - } - else - { - shadeConstants.light_red = 256; - shadeConstants.light_green = 256; - shadeConstants.light_blue = 256; - shadeConstants.light_alpha = 256; - shadeConstants.fade_red = 0; - shadeConstants.fade_green = 0; - shadeConstants.fade_blue = 0; - shadeConstants.fade_alpha = 256; - shadeConstants.desaturate = 0; - shadeConstants.simple_shade = true; - } - return shadeConstants; - } - - void SpanDrawerArgs::SetTexture(FTexture *tex) - { - tex->GetWidth(); - ds_xbits = tex->WidthBits; - ds_ybits = tex->HeightBits; - if ((1 << ds_xbits) > tex->GetWidth()) - { - ds_xbits--; - } - if ((1 << ds_ybits) > tex->GetHeight()) - { - ds_ybits--; - } - - auto viewport = RenderViewport::Instance(); - ds_source = viewport->RenderTarget->IsBgra() ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); - ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; - } - void SpriteDrawerArgs::DrawMaskedColumn(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) { auto viewport = RenderViewport::Instance(); @@ -588,180 +488,6 @@ namespace swrenderer return SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap, shadedlightshade); } - void WallDrawerArgs::SetDest(int x, int y) - { - auto viewport = RenderViewport::Instance(); - dc_dest = viewport->GetDest(x, y); - dc_dest_y = y; - } - - void WallDrawerArgs::DrawColumn() - { - (Drawers()->*wallfunc)(*this); - } - - void SpanDrawerArgs::SetStyle(bool masked, bool additive, fixed_t alpha) - { - if (masked) - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedTranslucent; - dc_srcblend = Col2RGB8[alpha >> 10]; - dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpanMaskedAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; - dc_srcalpha = alpha; - dc_destalpha = FRACUNIT; - } - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpanMasked; - } - } - else - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = &SWPixelFormatDrawers::DrawSpanTranslucent; - dc_srcblend = Col2RGB8[alpha >> 10]; - dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpanAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; - dc_srcalpha = alpha; - dc_destalpha = FRACUNIT; - } - } - else - { - spanfunc = &SWPixelFormatDrawers::DrawSpan; - } - } - } - - void WallDrawerArgs::SetStyle(bool masked, bool additive, fixed_t alpha) - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - wallfunc = &SWPixelFormatDrawers::DrawWallAddColumn; - dc_srcblend = Col2RGB8[alpha >> 10]; - dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - wallfunc = &SWPixelFormatDrawers::DrawWallAddClampColumn; - dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; - dc_srcalpha = alpha; - dc_destalpha = FRACUNIT; - } - } - else if (masked) - { - wallfunc = &SWPixelFormatDrawers::DrawWallMaskedColumn; - } - else - { - wallfunc = &SWPixelFormatDrawers::DrawWallColumn; - } - } - - bool WallDrawerArgs::IsMaskedDrawer() const - { - return wallfunc == &SWPixelFormatDrawers::DrawWallMaskedColumn; - } - - void SpanDrawerArgs::DrawSpan() - { - (Drawers()->*spanfunc)(*this); - } - - void SpanDrawerArgs::DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) - { - Drawers()->DrawTiltedSpan(*this, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); - } - - void SpanDrawerArgs::DrawFogBoundaryLine(int y, int x1, int x2) - { - Drawers()->DrawFogBoundaryLine(*this, y, x1, x2); - } - - void SpanDrawerArgs::DrawColoredSpan(int y, int x1, int x2) - { - Drawers()->DrawColoredSpan(*this, y, x1, x2); - } - - void SkyDrawerArgs::DrawSingleSkyColumn() - { - Drawers()->DrawSingleSkyColumn(*this); - } - - void SkyDrawerArgs::DrawDoubleSkyColumn() - { - Drawers()->DrawDoubleSkyColumn(*this); - } - - void SkyDrawerArgs::SetDest(int x, int y) - { - auto viewport = RenderViewport::Instance(); - dc_dest = viewport->GetDest(x, y); - dc_dest_y = y; - } - - void SkyDrawerArgs::SetFrontTexture(FTexture *texture, uint32_t column) - { - if (RenderViewport::Instance()->RenderTarget->IsBgra()) - { - dc_source = (const uint8_t *)texture->GetColumnBgra(column, nullptr); - dc_sourceheight = texture->GetHeight(); - } - else - { - dc_source = texture->GetColumn(column, nullptr); - dc_sourceheight = texture->GetHeight(); - } - } - - void SkyDrawerArgs::SetBackTexture(FTexture *texture, uint32_t column) - { - if (texture == nullptr) - { - dc_source2 = nullptr; - dc_sourceheight2 = 1; - } - else if (RenderViewport::Instance()->RenderTarget->IsBgra()) - { - dc_source2 = (const uint8_t *)texture->GetColumnBgra(column, nullptr); - dc_sourceheight2 = texture->GetHeight(); - } - else - { - dc_source2 = texture->GetColumn(column, nullptr); - dc_sourceheight2 = texture->GetHeight(); - } - } - void SpriteDrawerArgs::FillColumn() { Drawers()->FillColumn(*this); diff --git a/src/swrenderer/viewport/r_spritedrawer.h b/src/swrenderer/viewport/r_spritedrawer.h new file mode 100644 index 0000000000..74c04c9d03 --- /dev/null +++ b/src/swrenderer/viewport/r_spritedrawer.h @@ -0,0 +1,85 @@ + +#pragma once + +#include "r_drawerargs.h" + +struct FSWColormap; +struct FLightNode; +struct TriLight; + +namespace swrenderer +{ + class SpriteDrawerArgs : public DrawerArgs + { + public: + SpriteDrawerArgs(); + + bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); + bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); + void SetDest(int x, int y); + void SetCount(int count) { dc_count = count; } + void SetSolidColor(int color) { dc_color = color; } + + void DrawMaskedColumn(int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); + void FillColumn(); + + uint8_t *Dest() const { return dc_dest; } + int DestY() const { return dc_dest_y; } + int Count() const { return dc_count; } + + int FuzzX() const { return dc_x; } + int FuzzY1() const { return dc_yl; } + int FuzzY2() const { return dc_yh; } + + uint32_t TextureUPos() const { return dc_texturefracx; } + fixed_t TextureVPos() const { return dc_texturefrac; } + fixed_t TextureVStep() const { return dc_iscale; } + + int SolidColor() const { return dc_color; } + uint32_t SrcColorIndex() const { return dc_srccolor; } + uint32_t SrcColorBgra() const { return dc_srccolor_bgra; } + + const uint8_t *TexturePixels() const { return dc_source; } + const uint8_t *TexturePixels2() const { return dc_source2; } + uint32_t TextureHeight() const { return dc_textureheight; } + + uint32_t *SrcBlend() const { return dc_srcblend; } + uint32_t *DestBlend() const { return dc_destblend; } + fixed_t SrcAlpha() const { return dc_srcalpha; } + fixed_t DestAlpha() const { return dc_destalpha; } + + private: + bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); + static fixed_t GetAlpha(int type, fixed_t alpha); + void DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked); + + uint8_t *dc_dest = nullptr; + int dc_dest_y = 0; + int dc_count = 0; + + fixed_t dc_iscale; + fixed_t dc_texturefrac; + uint32_t dc_texturefracx; + + uint32_t dc_textureheight = 0; + const uint8_t *dc_source = nullptr; + const uint8_t *dc_source2 = nullptr; + bool drawer_needs_pal_input = false; + + uint32_t *dc_srcblend = nullptr; + uint32_t *dc_destblend = nullptr; + fixed_t dc_srcalpha = OPAQUE; + fixed_t dc_destalpha = 0; + + int dc_x = 0; + int dc_yl = 0; + int dc_yh = 0; + + int dc_color = 0; + uint32_t dc_srccolor = 0; + uint32_t dc_srccolor_bgra = 0; + + typedef void(SWPixelFormatDrawers::*SpriteDrawerFunc)(const SpriteDrawerArgs &args); + SpriteDrawerFunc colfunc; + }; +} diff --git a/src/swrenderer/scene/r_viewport.cpp b/src/swrenderer/viewport/r_viewport.cpp similarity index 99% rename from src/swrenderer/scene/r_viewport.cpp rename to src/swrenderer/viewport/r_viewport.cpp index 24413b3c55..62edf110e1 100644 --- a/src/swrenderer/scene/r_viewport.cpp +++ b/src/swrenderer/viewport/r_viewport.cpp @@ -28,7 +28,7 @@ #include "d_net.h" #include "g_level.h" #include "r_utility.h" -#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/things/r_playersprite.h" diff --git a/src/swrenderer/scene/r_viewport.h b/src/swrenderer/viewport/r_viewport.h similarity index 100% rename from src/swrenderer/scene/r_viewport.h rename to src/swrenderer/viewport/r_viewport.h diff --git a/src/swrenderer/viewport/r_walldrawer.cpp b/src/swrenderer/viewport/r_walldrawer.cpp new file mode 100644 index 0000000000..c30f86ffb6 --- /dev/null +++ b/src/swrenderer/viewport/r_walldrawer.cpp @@ -0,0 +1,66 @@ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// + +#include +#include "r_walldrawer.h" + +namespace swrenderer +{ + void WallDrawerArgs::SetDest(int x, int y) + { + auto viewport = RenderViewport::Instance(); + dc_dest = viewport->GetDest(x, y); + dc_dest_y = y; + } + + void WallDrawerArgs::DrawColumn() + { + (Drawers()->*wallfunc)(*this); + } + + void WallDrawerArgs::SetStyle(bool masked, bool additive, fixed_t alpha) + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + wallfunc = &SWPixelFormatDrawers::DrawWallAddColumn; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + wallfunc = &SWPixelFormatDrawers::DrawWallAddClampColumn; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = FRACUNIT; + } + } + else if (masked) + { + wallfunc = &SWPixelFormatDrawers::DrawWallMaskedColumn; + } + else + { + wallfunc = &SWPixelFormatDrawers::DrawWallColumn; + } + } + + bool WallDrawerArgs::IsMaskedDrawer() const + { + return wallfunc == &SWPixelFormatDrawers::DrawWallMaskedColumn; + } +} diff --git a/src/swrenderer/viewport/r_walldrawer.h b/src/swrenderer/viewport/r_walldrawer.h new file mode 100644 index 0000000000..678463b131 --- /dev/null +++ b/src/swrenderer/viewport/r_walldrawer.h @@ -0,0 +1,79 @@ + +#pragma once + +#include "r_drawerargs.h" + +struct FSWColormap; +struct FLightNode; +struct TriLight; + +namespace swrenderer +{ + class WallDrawerArgs : public DrawerArgs + { + public: + void SetStyle(bool masked, bool additive, fixed_t alpha); + void SetDest(int x, int y); + void SetCount(int count) { dc_count = count; } + void SetTexture(const uint8_t *pixels, const uint8_t *pixels2, int height) + { + dc_source = pixels; + dc_source2 = pixels2; + dc_textureheight = height; + } + void SetTextureFracBits(int bits) { dc_wall_fracbits = bits; } + void SetTextureUPos(uint32_t pos) { dc_texturefracx = pos; } + void SetTextureVPos(fixed_t pos) { dc_texturefrac = pos; } + void SetTextureVStep(fixed_t step) { dc_iscale = step; } + + bool IsMaskedDrawer() const; + + void DrawColumn(); + + uint8_t *Dest() const { return dc_dest; } + int DestY() const { return dc_dest_y; } + int Count() const { return dc_count; } + + uint32_t *SrcBlend() const { return dc_srcblend; } + uint32_t *DestBlend() const { return dc_destblend; } + fixed_t SrcAlpha() const { return dc_srcalpha; } + fixed_t DestAlpha() const { return dc_destalpha; } + + uint32_t TextureUPos() const { return dc_texturefracx; } + fixed_t TextureVPos() const { return dc_texturefrac; } + fixed_t TextureVStep() const { return dc_iscale; } + + const uint8_t *TexturePixels() const { return dc_source; } + const uint8_t *TexturePixels2() const { return dc_source2; } + uint32_t TextureHeight() const { return dc_textureheight; } + + int TextureFracBits() const { return dc_wall_fracbits; } + + FVector3 dc_normal; + FVector3 dc_viewpos; + FVector3 dc_viewpos_step; + TriLight *dc_lights = nullptr; + int dc_num_lights = 0; + + private: + uint8_t *dc_dest = nullptr; + int dc_dest_y = 0; + int dc_count; + + fixed_t dc_iscale; + fixed_t dc_texturefrac; + uint32_t dc_texturefracx; + uint32_t dc_textureheight; + const uint8_t *dc_source; + const uint8_t *dc_source2; + int dc_wall_fracbits; + + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + + typedef void(SWPixelFormatDrawers::*WallDrawerFunc)(const WallDrawerArgs &args); + WallDrawerFunc wallfunc = nullptr; + }; +} diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 5fdd5c6904..eeca598fef 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -46,7 +46,7 @@ #include "swrenderer/drawers/r_draw.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/scene/r_light.h" -#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" #endif #include "r_data/r_translate.h" #include "doomstat.h" From 7b4d9675c9fbc33332c7c9ae00db0a05407c7f14 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Feb 2017 15:24:21 +0100 Subject: [PATCH 800/912] Fix compile errors --- src/polyrenderer/drawers/poly_draw_args.cpp | 2 +- src/polyrenderer/scene/poly_playersprite.cpp | 2 +- src/swrenderer/line/r_line.cpp | 2 +- src/swrenderer/line/r_renderdrawsegment.cpp | 2 +- src/swrenderer/things/r_decal.cpp | 2 +- src/swrenderer/things/r_playersprite.cpp | 2 +- src/swrenderer/things/r_sprite.cpp | 2 +- src/swrenderer/things/r_voxel.cpp | 6 +++--- src/swrenderer/things/r_wallsprite.cpp | 2 +- src/swrenderer/viewport/r_spritedrawer.cpp | 6 +++--- src/swrenderer/viewport/r_spritedrawer.h | 4 ++-- src/v_draw.cpp | 4 ++-- 12 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/polyrenderer/drawers/poly_draw_args.cpp b/src/polyrenderer/drawers/poly_draw_args.cpp index 5e197da710..1f35560647 100644 --- a/src/polyrenderer/drawers/poly_draw_args.cpp +++ b/src/polyrenderer/drawers/poly_draw_args.cpp @@ -34,7 +34,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "poly_draw_args.h" -#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" void PolyDrawArgs::SetClipPlane(float a, float b, float c, float d) { diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index 0dee44f73b..9ebbc7aa1a 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -28,7 +28,7 @@ #include "poly_playersprite.h" #include "polyrenderer/poly_renderer.h" #include "d_player.h" -#include "swrenderer/scene/r_viewport.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/scene/r_light.h" EXTERN_CVAR(Bool, r_drawplayersprites) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 0cc59826ed..6eda3ee4a4 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -37,9 +37,9 @@ #include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/scene/r_scene.h" +#include "swrenderer/viewport/r_viewport.h" #include "swrenderer/line/r_line.h" #include "swrenderer/line/r_walldraw.h" #include "swrenderer/line/r_wallsetup.h" diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 281a8e2c29..9ff71cc1c7 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -76,7 +76,7 @@ namespace swrenderer SpriteDrawerArgs columndrawerargs; FDynamicColormap *patchstylecolormap = nullptr; - bool visible = columndrawerargs.SetPatchStyle(LegacyRenderStyles[additive ? STYLE_Add : STYLE_Translucent], alpha, 0, 0, patchstylecolormap); + bool visible = columndrawerargs.SetStyle(LegacyRenderStyles[additive ? STYLE_Add : STYLE_Translucent], alpha, 0, 0, patchstylecolormap); if (!visible && !ds->bFogBoundary && !ds->bFakeBoundary) { diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index f1966f60a3..4b37644de1 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -286,7 +286,7 @@ namespace swrenderer else calclighting = true; - bool visible = drawerargs.SetPatchStyle(decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor, basecolormap); + bool visible = drawerargs.SetStyle(decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor, basecolormap); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 4df08a2623..3799a7caad 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -596,7 +596,7 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(Light.BaseColormap); - bool visible = drawerargs.SetPatchStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap, Light.ColormapNum << FRACBITS); + bool visible = drawerargs.SetStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap, Light.ColormapNum << FRACBITS); if (!visible) return; diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 9130572640..897af41cd4 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -252,7 +252,7 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(vis->Light.BaseColormap); - bool visible = drawerargs.SetPatchStyle(vis->RenderStyle, vis->Alpha, vis->Translation, vis->FillColor, basecolormap, vis->Light.ColormapNum << FRACBITS); + bool visible = drawerargs.SetStyle(vis->RenderStyle, vis->Alpha, vis->Translation, vis->FillColor, basecolormap, vis->Light.ColormapNum << FRACBITS); if (visible) { diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 21006b1466..e3e101c4a4 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -33,15 +33,15 @@ #include "po_man.h" #include "r_utility.h" #include "swrenderer/drawers/r_draw.h" -#include "swrenderer/drawers/r_drawerargs.h" #include "swrenderer/drawers/r_thread.h" #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/things/r_voxel.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_translucent_pass.h" #include "swrenderer/scene/r_scene.h" -#include "swrenderer/scene/r_viewport.h" #include "swrenderer/scene/r_light.h" +#include "swrenderer/viewport/r_viewport.h" +#include "swrenderer/viewport/r_spritedrawer.h" #include "swrenderer/r_memory.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) @@ -191,7 +191,7 @@ namespace swrenderer SpriteDrawerArgs drawerargs; drawerargs.SetLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); - bool visible = drawerargs.SetPatchStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); + bool visible = drawerargs.SetStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); if (!visible) return; diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 945faa073c..57080743f8 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -215,7 +215,7 @@ namespace swrenderer FDynamicColormap *basecolormap = static_cast(spr->Light.BaseColormap); - bool visible = drawerargs.SetPatchStyle(spr->RenderStyle, spr->Alpha, spr->Translation, spr->FillColor, basecolormap); + bool visible = drawerargs.SetStyle(spr->RenderStyle, spr->Alpha, spr->Translation, spr->FillColor, basecolormap); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) diff --git a/src/swrenderer/viewport/r_spritedrawer.cpp b/src/swrenderer/viewport/r_spritedrawer.cpp index 95a494c31f..06532ff131 100644 --- a/src/swrenderer/viewport/r_spritedrawer.cpp +++ b/src/swrenderer/viewport/r_spritedrawer.cpp @@ -372,7 +372,7 @@ namespace swrenderer } } - bool SpriteDrawerArgs::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) + bool SpriteDrawerArgs::SetStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) { fixed_t fglevel, bglevel; @@ -483,9 +483,9 @@ namespace swrenderer return true; } - bool SpriteDrawerArgs::SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) + bool SpriteDrawerArgs::SetStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) { - return SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap, shadedlightshade); + return SetStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap, shadedlightshade); } void SpriteDrawerArgs::FillColumn() diff --git a/src/swrenderer/viewport/r_spritedrawer.h b/src/swrenderer/viewport/r_spritedrawer.h index 74c04c9d03..48911a179d 100644 --- a/src/swrenderer/viewport/r_spritedrawer.h +++ b/src/swrenderer/viewport/r_spritedrawer.h @@ -14,8 +14,8 @@ namespace swrenderer public: SpriteDrawerArgs(); - bool SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); - bool SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); + bool SetStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); + bool SetStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); void SetDest(int x, int y); void SetCount(int count) { dc_count = count; } void SetSolidColor(int color) { dc_color = color; } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index eeca598fef..fe345a7ab8 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -199,9 +199,9 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) bool visible; FDynamicColormap *basecolormap = nullptr; if (viewport->RenderTarget->IsBgra()) - visible = drawerargs.SetPatchStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); + visible = drawerargs.SetStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); else - visible = drawerargs.SetPatchStyle(parms.style, parms.Alpha, 0, parms.fillcolor, basecolormap); + visible = drawerargs.SetStyle(parms.style, parms.Alpha, 0, parms.fillcolor, basecolormap); double x0 = parms.x - parms.left * parms.destwidth / parms.texwidth; double y0 = parms.y - parms.top * parms.destheight / parms.texheight; From c219969b68a08529e82cf334f6485dc153d98b95 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Feb 2017 18:46:21 +0100 Subject: [PATCH 801/912] Fix blinking wall light regression --- src/swrenderer/line/r_walldraw.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 700f170819..f554249184 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -339,7 +339,8 @@ namespace swrenderer double xmagnitude = 1.0; - for (int x = x1; x < x2; x++, light += lightstep) + float curlight = light; + for (int x = x1; x < x2; x++, curlight += lightstep) { int y1 = uwal[x]; int y2 = dwal[x]; @@ -347,7 +348,7 @@ namespace swrenderer continue; if (!fixed) - drawerargs.SetLight(basecolormap, light, wallshade); + drawerargs.SetLight(basecolormap, curlight, wallshade); if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); From 191438b23869ef30cf479cde3d636c43618b79be Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Feb 2017 08:06:47 +0100 Subject: [PATCH 802/912] Move Drawers function to viewport --- src/swrenderer/viewport/r_drawerargs.cpp | 16 ---------------- src/swrenderer/viewport/r_drawerargs.h | 3 --- src/swrenderer/viewport/r_skydrawer.cpp | 4 ++-- src/swrenderer/viewport/r_spandrawer.cpp | 8 ++++---- src/swrenderer/viewport/r_spritedrawer.cpp | 6 +++--- src/swrenderer/viewport/r_viewport.cpp | 20 ++++++++++++++++++++ src/swrenderer/viewport/r_viewport.h | 13 +++++++++++++ src/swrenderer/viewport/r_walldrawer.cpp | 2 +- 8 files changed, 43 insertions(+), 29 deletions(-) diff --git a/src/swrenderer/viewport/r_drawerargs.cpp b/src/swrenderer/viewport/r_drawerargs.cpp index d54afad6bf..65477cd55f 100644 --- a/src/swrenderer/viewport/r_drawerargs.cpp +++ b/src/swrenderer/viewport/r_drawerargs.cpp @@ -13,25 +13,9 @@ #include #include "r_drawerargs.h" -#include "swrenderer/drawers/r_draw_pal.h" -#include "swrenderer/drawers/r_draw_rgba.h" namespace swrenderer { - SWPixelFormatDrawers *DrawerArgs::Drawers() - { - if (RenderViewport::Instance()->RenderTarget->IsBgra()) - { - static SWTruecolorDrawers tc_drawers; - return &tc_drawers; - } - else - { - static SWPalDrawers pal_drawers; - return &pal_drawers; - } - } - void DrawerArgs::SetLight(FSWColormap *base_colormap, float light, int shade) { mBaseColormap = base_colormap; diff --git a/src/swrenderer/viewport/r_drawerargs.h b/src/swrenderer/viewport/r_drawerargs.h index d36bc61a5b..3172a54401 100644 --- a/src/swrenderer/viewport/r_drawerargs.h +++ b/src/swrenderer/viewport/r_drawerargs.h @@ -33,9 +33,6 @@ namespace swrenderer ShadeConstants ColormapConstants() const; fixed_t Light() const { return LIGHTSCALE(mLight, mShade); } - protected: - static SWPixelFormatDrawers *Drawers(); - private: FSWColormap *mBaseColormap = nullptr; float mLight = 0.0f; diff --git a/src/swrenderer/viewport/r_skydrawer.cpp b/src/swrenderer/viewport/r_skydrawer.cpp index 922324f641..70a64e651e 100644 --- a/src/swrenderer/viewport/r_skydrawer.cpp +++ b/src/swrenderer/viewport/r_skydrawer.cpp @@ -18,12 +18,12 @@ namespace swrenderer { void SkyDrawerArgs::DrawSingleSkyColumn() { - Drawers()->DrawSingleSkyColumn(*this); + RenderViewport::Instance()->Drawers()->DrawSingleSkyColumn(*this); } void SkyDrawerArgs::DrawDoubleSkyColumn() { - Drawers()->DrawDoubleSkyColumn(*this); + RenderViewport::Instance()->Drawers()->DrawDoubleSkyColumn(*this); } void SkyDrawerArgs::SetDest(int x, int y) diff --git a/src/swrenderer/viewport/r_spandrawer.cpp b/src/swrenderer/viewport/r_spandrawer.cpp index 7d14886271..3c0acaab09 100644 --- a/src/swrenderer/viewport/r_spandrawer.cpp +++ b/src/swrenderer/viewport/r_spandrawer.cpp @@ -98,21 +98,21 @@ namespace swrenderer void SpanDrawerArgs::DrawSpan() { - (Drawers()->*spanfunc)(*this); + (RenderViewport::Instance()->Drawers()->*spanfunc)(*this); } void SpanDrawerArgs::DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) { - Drawers()->DrawTiltedSpan(*this, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); + RenderViewport::Instance()->Drawers()->DrawTiltedSpan(*this, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); } void SpanDrawerArgs::DrawFogBoundaryLine(int y, int x1, int x2) { - Drawers()->DrawFogBoundaryLine(*this, y, x1, x2); + RenderViewport::Instance()->Drawers()->DrawFogBoundaryLine(*this, y, x1, x2); } void SpanDrawerArgs::DrawColoredSpan(int y, int x1, int x2) { - Drawers()->DrawColoredSpan(*this, y, x1, x2); + RenderViewport::Instance()->Drawers()->DrawColoredSpan(*this, y, x1, x2); } } diff --git a/src/swrenderer/viewport/r_spritedrawer.cpp b/src/swrenderer/viewport/r_spritedrawer.cpp index 06532ff131..fee87e67e5 100644 --- a/src/swrenderer/viewport/r_spritedrawer.cpp +++ b/src/swrenderer/viewport/r_spritedrawer.cpp @@ -115,7 +115,7 @@ namespace swrenderer else if (dc_iscale < 0) dc_count = MIN(dc_count, (dc_texturefrac - dc_iscale) / (-dc_iscale)); - (Drawers()->*colfunc)(*this); + (RenderViewport::Instance()->Drawers()->*colfunc)(*this); } span++; } @@ -230,7 +230,7 @@ namespace swrenderer double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight(); dc_texturefrac = (uint32_t)(v * (1 << 30)); - (Drawers()->*colfunc)(*this); + (RenderViewport::Instance()->Drawers()->*colfunc)(*this); } span++; } @@ -490,7 +490,7 @@ namespace swrenderer void SpriteDrawerArgs::FillColumn() { - Drawers()->FillColumn(*this); + RenderViewport::Instance()->Drawers()->FillColumn(*this); } void SpriteDrawerArgs::SetDest(int x, int y) diff --git a/src/swrenderer/viewport/r_viewport.cpp b/src/swrenderer/viewport/r_viewport.cpp index 62edf110e1..5cafc6f99f 100644 --- a/src/swrenderer/viewport/r_viewport.cpp +++ b/src/swrenderer/viewport/r_viewport.cpp @@ -33,6 +33,8 @@ #include "swrenderer/drawers/r_draw.h" #include "swrenderer/things/r_playersprite.h" #include "swrenderer/plane/r_flatplane.h" +#include "swrenderer/drawers/r_draw_pal.h" +#include "swrenderer/drawers/r_draw_rgba.h" CVAR(String, r_viewsize, "", CVAR_NOSET) @@ -44,6 +46,24 @@ namespace swrenderer return &instance; } + RenderViewport::RenderViewport() + { + tc_drawers = std::make_unique(); + pal_drawers = std::make_unique(); + } + + RenderViewport::~RenderViewport() + { + } + + SWPixelFormatDrawers *RenderViewport::Drawers() + { + if (RenderTarget->IsBgra()) + return tc_drawers.get(); + else + return pal_drawers.get(); + } + void RenderViewport::SetViewport(int fullWidth, int fullHeight, float trueratio) { int virtheight, virtwidth, virtwidth2, virtheight2; diff --git a/src/swrenderer/viewport/r_viewport.h b/src/swrenderer/viewport/r_viewport.h index d89dbec6ec..a61709d611 100644 --- a/src/swrenderer/viewport/r_viewport.h +++ b/src/swrenderer/viewport/r_viewport.h @@ -14,15 +14,23 @@ #pragma once #include +#include #include "r_defs.h" namespace swrenderer { + class SWPixelFormatDrawers; + class SWTruecolorDrawers; + class SWPalDrawers; + class RenderViewport { public: static RenderViewport *Instance(); + RenderViewport(); + ~RenderViewport(); + void SetViewport(int width, int height, float trueratio); void SetupFreelook(); @@ -54,11 +62,16 @@ namespace swrenderer DVector2 PointWorldToView(const DVector2 &worldPos) const; DVector2 ScaleViewToScreen(const DVector2 &scale, double viewZ, bool pixelstretch = true) const; + + SWPixelFormatDrawers *Drawers(); private: void InitTextureMapping(); void SetupBuffer(); double BaseYaspectMul = 0.0; // yaspectmul without a forced aspect ratio + + std::unique_ptr tc_drawers; + std::unique_ptr pal_drawers; }; } diff --git a/src/swrenderer/viewport/r_walldrawer.cpp b/src/swrenderer/viewport/r_walldrawer.cpp index c30f86ffb6..8672c4cf5d 100644 --- a/src/swrenderer/viewport/r_walldrawer.cpp +++ b/src/swrenderer/viewport/r_walldrawer.cpp @@ -25,7 +25,7 @@ namespace swrenderer void WallDrawerArgs::DrawColumn() { - (Drawers()->*wallfunc)(*this); + (RenderViewport::Instance()->Drawers()->*wallfunc)(*this); } void WallDrawerArgs::SetStyle(bool masked, bool additive, fixed_t alpha) From a0a40281b334604399848dd03101900dbf035e3f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Feb 2017 08:19:59 +0100 Subject: [PATCH 803/912] Simplify skyplane light selection --- src/swrenderer/plane/r_skyplane.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index b4780065f4..85655c5942 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -147,7 +147,6 @@ namespace swrenderer backpos = int(fmod(backdpos, sky2cyl * 65536.0)); } - bool fakefixed = false; CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->fixedcolormap) { @@ -155,15 +154,10 @@ namespace swrenderer } else { - fakefixed = true; - cameraLight->fixedcolormap = &NormalLight; - drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(&NormalLight, 0, 0); } DrawSky(pl); - - if (fakefixed) - cameraLight->fixedcolormap = nullptr; } void RenderSkyPlane::DrawSkyColumnStripe(int start_x, int y1, int y2, double scale, double texturemid, double yrepeat) From 892350ac2ebc023bf9ba1ebaf8e1fcbd763e2012 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Feb 2017 09:00:46 +0100 Subject: [PATCH 804/912] Add functions to CameraLight --- src/gl/system/gl_swframebuffer.cpp | 2 +- src/polyrenderer/poly_renderer.cpp | 4 +-- src/polyrenderer/scene/poly_decal.cpp | 2 +- src/polyrenderer/scene/poly_particle.cpp | 2 +- src/polyrenderer/scene/poly_plane.cpp | 6 ++--- src/polyrenderer/scene/poly_playersprite.cpp | 4 +-- src/polyrenderer/scene/poly_sprite.cpp | 2 +- src/polyrenderer/scene/poly_wall.cpp | 2 +- src/polyrenderer/scene/poly_wallsprite.cpp | 2 +- src/swrenderer/line/r_line.cpp | 12 ++++----- src/swrenderer/line/r_renderdrawsegment.cpp | 28 ++++++++++---------- src/swrenderer/line/r_walldraw.cpp | 8 +++--- src/swrenderer/plane/r_flatplane.cpp | 8 +++--- src/swrenderer/plane/r_skyplane.cpp | 4 +-- src/swrenderer/plane/r_slopeplane.cpp | 8 +++--- src/swrenderer/scene/r_light.cpp | 8 +++--- src/swrenderer/scene/r_light.h | 10 +++++-- src/swrenderer/scene/r_opaque_pass.cpp | 12 ++++----- src/swrenderer/scene/r_scene.cpp | 6 ++--- src/swrenderer/things/r_decal.cpp | 8 +++--- src/swrenderer/things/r_playersprite.cpp | 4 +-- src/swrenderer/things/r_visiblesprite.cpp | 2 +- src/swrenderer/things/r_wallsprite.cpp | 8 +++--- src/swrenderer/viewport/r_spritedrawer.cpp | 6 ++--- src/win32/fb_d3d9.cpp | 2 +- 25 files changed, 83 insertions(+), 77 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 09a8d5664f..04c2b7eb7b 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -1409,7 +1409,7 @@ void OpenGLSWFrameBuffer::Draw3DPart(bool copy3d) uint32_t color0, color1; if (Accel2D) { - auto &map = swrenderer::CameraLight::Instance()->realfixedcolormap; + auto map = swrenderer::CameraLight::Instance()->ShaderColormap(); if (map == nullptr) { color0 = 0; diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index f8b27bd983..1de84530db 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -69,10 +69,10 @@ void PolyRenderer::RenderView(player_t *player) // Apply special colormap if the target cannot do it CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->realfixedcolormap && viewport->RenderTarget->IsBgra() && !(r_shadercolormaps && screen->Accel2D)) + if (cameraLight->ShaderColormap() && viewport->RenderTarget->IsBgra() && !(r_shadercolormaps && screen->Accel2D)) { R_BeginDrawerCommands(); - DrawerCommandQueue::QueueCommand(cameraLight->realfixedcolormap, screen); + DrawerCommandQueue::QueueCommand(cameraLight->ShaderColormap(), screen); R_EndDrawerCommands(); } } diff --git a/src/polyrenderer/scene/poly_decal.cpp b/src/polyrenderer/scene/poly_decal.cpp index 024a2dd25f..fad44862f6 100644 --- a/src/polyrenderer/scene/poly_decal.cpp +++ b/src/polyrenderer/scene/poly_decal.cpp @@ -140,7 +140,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan args.SetColormap(front->ColorMap); args.SetTexture(tex, decal->Translation, true); args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); - if (fullbrightSprite || cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) + if (fullbrightSprite || cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) { args.uniforms.light = 256; args.uniforms.flags |= TriUniforms::fixed_light; diff --git a/src/polyrenderer/scene/poly_particle.cpp b/src/polyrenderer/scene/poly_particle.cpp index d83ac3cc4b..39f5514f45 100644 --- a/src/polyrenderer/scene/poly_particle.cpp +++ b/src/polyrenderer/scene/poly_particle.cpp @@ -76,7 +76,7 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipP args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->ParticleGlobVis(); - if (fullbrightSprite || cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) + if (fullbrightSprite || cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) { args.uniforms.light = 256; args.uniforms.flags = TriUniforms::fixed_light; diff --git a/src/polyrenderer/scene/poly_plane.cpp b/src/polyrenderer/scene/poly_plane.cpp index 1fcbad9c99..2aec6b08df 100644 --- a/src/polyrenderer/scene/poly_plane.cpp +++ b/src/polyrenderer/scene/poly_plane.cpp @@ -99,7 +99,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); int lightlevel = 255; - if (cameraLight->fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) + if (cameraLight->FixedLightLevel() < 0 && sub->sector->e->XFloor.lightlist.Size()) { lightlist_t *light = P_GetPlaneLight(sub->sector, &sub->sector->ceilingplane, false); //basecolormap = light->extra_colormap; @@ -111,7 +111,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c PolyDrawArgs args; args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SlopePlaneGlobVis() * 48.0f; args.uniforms.light = (uint32_t)(lightlevel / 255.0f * 256.0f); - if (cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) + if (cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) args.uniforms.light = 256; args.uniforms.flags = 0; args.uniforms.subsectorDepth = subsectorDepth; @@ -307,7 +307,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan PolyDrawArgs args; args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SlopePlaneGlobVis() * 48.0f; args.uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); - if (cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) + if (cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) args.uniforms.light = 256; args.uniforms.flags = 0; args.uniforms.subsectorDepth = isSky ? RenderPolyScene::SkySubsectorDepth : subsectorDepth; diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index 9ebbc7aa1a..2bd8f2f65a 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -329,14 +329,14 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa // If the main colormap has fixed lights, and this sprite is being drawn with that // colormap, disable acceleration so that the lights can remain fixed. swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); - if (!noaccel && cameraLight->realfixedcolormap == nullptr && + if (!noaccel && cameraLight->ShaderColormap() == nullptr && NormalLightHasFixedLights && mybasecolormap == &NormalLight && tex->UseBasePalette()) { noaccel = true; } // [SP] If emulating GZDoom fullbright, disable acceleration - if (r_fullbrightignoresectorcolor && cameraLight->fixedlightlev >= 0) + if (r_fullbrightignoresectorcolor && cameraLight->FixedLightLevel() >= 0) mybasecolormap = &FullNormalLight; if (r_fullbrightignoresectorcolor && !foggy && sprite->GetState()->GetFullbright()) mybasecolormap = &FullNormalLight; diff --git a/src/polyrenderer/scene/poly_sprite.cpp b/src/polyrenderer/scene/poly_sprite.cpp index 3149f68a6b..0599a4b202 100644 --- a/src/polyrenderer/scene/poly_sprite.cpp +++ b/src/polyrenderer/scene/poly_sprite.cpp @@ -141,7 +141,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla PolyDrawArgs args; args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SpriteGlobVis(); args.uniforms.flags = 0; - if (fullbrightSprite || cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) + if (fullbrightSprite || cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) { args.uniforms.light = 256; args.uniforms.flags |= TriUniforms::fixed_light; diff --git a/src/polyrenderer/scene/poly_wall.cpp b/src/polyrenderer/scene/poly_wall.cpp index 18df84744f..d8fb2a7ae6 100644 --- a/src/polyrenderer/scene/poly_wall.cpp +++ b/src/polyrenderer/scene/poly_wall.cpp @@ -353,7 +353,7 @@ FTexture *RenderPolyWall::GetTexture() int RenderPolyWall::GetLightLevel() { swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); - if (cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) + if (cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) { return 255; } diff --git a/src/polyrenderer/scene/poly_wallsprite.cpp b/src/polyrenderer/scene/poly_wallsprite.cpp index 1d321fbc0c..31aded9ac3 100644 --- a/src/polyrenderer/scene/poly_wallsprite.cpp +++ b/src/polyrenderer/scene/poly_wallsprite.cpp @@ -102,7 +102,7 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, const Vec4f &cli PolyDrawArgs args; args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); - if (fullbrightSprite || cameraLight->fixedlightlev >= 0 || cameraLight->fixedcolormap) + if (fullbrightSprite || cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) { args.uniforms.light = 256; args.uniforms.flags = TriUniforms::fixed_light; diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 6eda3ee4a4..cd6cb1962d 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -899,7 +899,7 @@ namespace swrenderer walltexcoords.Project(sidedef->TexelLength * lwallscale, WallC.sx1, WallC.sx2, WallT); CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedcolormap == nullptr && cameraLight->fixedlightlev < 0) + if (cameraLight->FixedColormap() == nullptr && cameraLight->FixedLightLevel() < 0) { wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, frontsector->lightlevel) + R_ActualExtraLight(foggy)); double GlobVis = LightVisibility::Instance()->WallGlobVis(); @@ -916,7 +916,7 @@ namespace swrenderer bool SWRenderLine::IsFogBoundary(sector_t *front, sector_t *back) const { - return r_fogboundary && CameraLight::Instance()->fixedcolormap == nullptr && front->ColorMap->Fade && + return r_fogboundary && CameraLight::Instance()->FixedColormap() == nullptr && front->ColorMap->Fade && front->ColorMap->Fade != back->ColorMap->Fade && (front->GetTexture(sector_t::ceiling) != skyflatnum || back->GetTexture(sector_t::ceiling) != skyflatnum); } @@ -935,10 +935,10 @@ namespace swrenderer drawerargs.SetStyle(false, false, OPAQUE); CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedlightlev >= 0) - drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); - else if (cameraLight->fixedcolormap != nullptr) - drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); + if (cameraLight->FixedLightLevel() >= 0) + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + else if (cameraLight->FixedColormap() != nullptr) + drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); // clip wall to the floor and ceiling auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 9ff71cc1c7..a3074f1d30 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -106,7 +106,7 @@ namespace swrenderer Clip3DFloors *clip3d = Clip3DFloors::Instance(); CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedlightlev < 0) + if (cameraLight->FixedLightLevel() < 0) { if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) { @@ -148,15 +148,15 @@ namespace swrenderer spryscale = ds->iscale + ds->iscalestep * (x1 - ds->x1); rw_scalestep = ds->iscalestep; - if (cameraLight->fixedlightlev >= 0) + if (cameraLight->FixedLightLevel() >= 0) { - walldrawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); - columndrawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + walldrawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + columndrawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); } - else if (cameraLight->fixedcolormap != nullptr) + else if (cameraLight->FixedColormap() != nullptr) { - walldrawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); - columndrawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); + walldrawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); + columndrawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); } // find positioning @@ -284,7 +284,7 @@ namespace swrenderer { for (int x = x1; x < x2; ++x) { - if (cameraLight->fixedcolormap == nullptr && cameraLight->fixedlightlev < 0) + if (cameraLight->FixedColormap() == nullptr && cameraLight->FixedLightLevel() < 0) { columndrawerargs.SetLight(basecolormap, rw_light, wallshade); } @@ -456,10 +456,10 @@ namespace swrenderer } CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedlightlev >= 0) - drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); - else if (cameraLight->fixedcolormap != nullptr) - drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); + if (cameraLight->FixedLightLevel() >= 0) + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + else if (cameraLight->FixedColormap() != nullptr) + drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -682,7 +682,7 @@ namespace swrenderer FDynamicColormap *basecolormap = frontsector->ColorMap; wallshade = ds->shade; CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedlightlev < 0) + if (cameraLight->FixedLightLevel() < 0) { if ((ds->bFakeBoundary & 3) == 2) { @@ -857,7 +857,7 @@ namespace swrenderer FDynamicColormap *basecolormap = frontsector->ColorMap; wallshade = ds->shade; CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedlightlev < 0) + if (cameraLight->FixedLightLevel() < 0) { if ((ds->bFakeBoundary & 3) == 2) { diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index f554249184..2261fa71a1 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -323,10 +323,10 @@ namespace swrenderer drawerargs.SetTextureFracBits(RenderViewport::Instance()->RenderTarget->IsBgra() ? FRACBITS : fracbits); CameraLight *cameraLight = CameraLight::Instance(); - bool fixed = (cameraLight->fixedcolormap != NULL || cameraLight->fixedlightlev >= 0); + bool fixed = (cameraLight->FixedColormap() != NULL || cameraLight->FixedLightLevel() >= 0); - if (cameraLight->fixedcolormap) - drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); + if (cameraLight->FixedColormap()) + drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); else drawerargs.SetLight(basecolormap, 0, 0); @@ -410,7 +410,7 @@ namespace swrenderer } CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedcolormap != NULL || cameraLight->fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) + if (cameraLight->FixedColormap() != NULL || cameraLight->FixedLightLevel() >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) { ProcessNormalWall(uwal, dwal, texturemid, swal, lwal); } diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 49a96797f2..3e630c13c8 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -112,14 +112,14 @@ namespace swrenderer GlobVis = LightVisibility::Instance()->FlatPlaneGlobVis() / planeheight; CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedlightlev >= 0) + if (cameraLight->FixedLightLevel() >= 0) { - drawerargs.SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); plane_shade = false; } - else if (cameraLight->fixedcolormap) + else if (cameraLight->FixedColormap()) { - drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); plane_shade = false; } else diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 85655c5942..091182c49b 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -148,9 +148,9 @@ namespace swrenderer } CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedcolormap) + if (cameraLight->FixedColormap()) { - drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); } else { diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index ee56e5da2e..c51bf0276e 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -154,14 +154,14 @@ namespace swrenderer basecolormap = colormap; CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedlightlev >= 0) + if (cameraLight->FixedLightLevel() >= 0) { - drawerargs.SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); + drawerargs.SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); plane_shade = false; } - else if (cameraLight->fixedcolormap) + else if (cameraLight->FixedColormap()) { - drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); + drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); plane_shade = false; } else diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index 633e34acba..5be847bf8d 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -189,15 +189,15 @@ namespace swrenderer } CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedcolormap) + if (cameraLight->FixedColormap()) { - BaseColormap = cameraLight->fixedcolormap; + BaseColormap = cameraLight->FixedColormap(); ColormapNum = 0; } - else if (cameraLight->fixedlightlev >= 0) + else if (cameraLight->FixedLightLevel() >= 0) { BaseColormap = (r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap; - ColormapNum = cameraLight->fixedlightlev >> COLORMAPSHIFT; + ColormapNum = cameraLight->FixedLightLevel() >> COLORMAPSHIFT; } else if (fullbright) { diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index 6059028a6f..3e4580d92f 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -59,11 +59,17 @@ namespace swrenderer public: static CameraLight *Instance(); + int FixedLightLevel() const { return fixedlightlev; } + FSWColormap *FixedColormap() const { return fixedcolormap; } + FSpecialColormap *ShaderColormap() const { return realfixedcolormap; } + + void SetCamera(AActor *actor); + void ClearShaderColormap() { realfixedcolormap = nullptr; } + + private: int fixedlightlev = 0; FSWColormap *fixedcolormap = nullptr; FSpecialColormap *realfixedcolormap = nullptr; - - void SetCamera(AActor *actor); }; class LightVisibility diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index ebd0d9ba87..58e804519d 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -483,7 +483,7 @@ namespace swrenderer // kg3D - fake lights CameraLight *cameraLight = CameraLight::Instance(); FDynamicColormap *basecolormap; - if (cameraLight->fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) + if (cameraLight->FixedLightLevel() < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) { light = P_GetPlaneLight(frontsector, &frontsector->ceilingplane, false); basecolormap = light->extra_colormap; @@ -496,7 +496,7 @@ namespace swrenderer } else { - basecolormap = (r_fullbrightignoresectorcolor && cameraLight->fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; + basecolormap = (r_fullbrightignoresectorcolor && cameraLight->FixedLightLevel() >= 0) ? &FullNormalLight : frontsector->ColorMap; } portal = frontsector->ValidatePortal(sector_t::ceiling); @@ -521,7 +521,7 @@ namespace swrenderer if (ceilingplane) ceilingplane->AddLights(frontsector->lighthead); - if (cameraLight->fixedlightlev < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) + if (cameraLight->FixedLightLevel() < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) { light = P_GetPlaneLight(frontsector, &frontsector->floorplane, false); basecolormap = light->extra_colormap; @@ -534,7 +534,7 @@ namespace swrenderer } else { - basecolormap = (r_fullbrightignoresectorcolor && cameraLight->fixedlightlev >= 0) ? &FullNormalLight : frontsector->ColorMap; + basecolormap = (r_fullbrightignoresectorcolor && cameraLight->FixedLightLevel() >= 0) ? &FullNormalLight : frontsector->ColorMap; } // killough 3/7/98: Add (x,y) offsets to flats, add deep water check @@ -606,7 +606,7 @@ namespace swrenderer else position = sector_t::floor; frontsector = &tempsec; - if (cameraLight->fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) + if (cameraLight->FixedLightLevel() < 0 && sub->sector->e->XFloor.lightlist.Size()) { light = P_GetPlaneLight(sub->sector, &frontsector->floorplane, false); basecolormap = light->extra_colormap; @@ -671,7 +671,7 @@ namespace swrenderer frontsector = &tempsec; tempsec.ceilingplane.ChangeHeight(-1 / 65536.); - if (cameraLight->fixedlightlev < 0 && sub->sector->e->XFloor.lightlist.Size()) + if (cameraLight->FixedLightLevel() < 0 && sub->sector->e->XFloor.lightlist.Size()) { light = P_GetPlaneLight(sub->sector, &frontsector->ceilingplane, false); basecolormap = light->extra_colormap; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 8fbe1efb2a..f232f443f3 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -98,9 +98,9 @@ namespace swrenderer RenderActorView(player->mo); // Apply special colormap if the target cannot do it - if (CameraLight::Instance()->realfixedcolormap && viewport->RenderTarget->IsBgra() && !(r_shadercolormaps && screen->Accel2D)) + if (CameraLight::Instance()->ShaderColormap() && viewport->RenderTarget->IsBgra() && !(r_shadercolormaps && screen->Accel2D)) { - DrawerCommandQueue::QueueCommand(CameraLight::Instance()->realfixedcolormap, screen); + DrawerCommandQueue::QueueCommand(CameraLight::Instance()->ShaderColormap(), screen); } R_EndDrawerCommands(); @@ -185,7 +185,7 @@ namespace swrenderer // copy to the screen does not use a special colormap shader. if (!r_shadercolormaps && !RenderViewport::Instance()->RenderTarget->IsBgra()) { - CameraLight::Instance()->realfixedcolormap = NULL; + CameraLight::Instance()->ClearShaderColormap(); } } diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 4b37644de1..098de1dc20 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -277,10 +277,10 @@ namespace swrenderer SpriteDrawerArgs drawerargs; - if (cameraLight->fixedlightlev >= 0) - drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); - else if (cameraLight->fixedcolormap != NULL) - drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); + if (cameraLight->FixedLightLevel() >= 0) + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + else if (cameraLight->FixedColormap() != NULL) + drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 3799a7caad..0b8661e528 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -91,7 +91,7 @@ namespace swrenderer FDynamicColormap *basecolormap; CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedlightlev < 0 && viewsector->e && viewsector->e->XFloor.lightlist.Size()) + if (cameraLight->FixedLightLevel() < 0 && viewsector->e && viewsector->e->XFloor.lightlist.Size()) { for (i = viewsector->e->XFloor.lightlist.Size() - 1; i >= 0; i--) { @@ -486,7 +486,7 @@ namespace swrenderer // If the main colormap has fixed lights, and this sprite is being drawn with that // colormap, disable acceleration so that the lights can remain fixed. CameraLight *cameraLight = CameraLight::Instance(); - if (!noaccel && cameraLight->realfixedcolormap == nullptr && + if (!noaccel && cameraLight->ShaderColormap() == nullptr && NormalLightHasFixedLights && vis.Light.BaseColormap == &NormalLight && vis.pic->UseBasePalette()) { diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index cdb4da2d01..96be5e12c6 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -87,7 +87,7 @@ namespace swrenderer // kg3D - correct colors now CameraLight *cameraLight = CameraLight::Instance(); - if (!cameraLight->fixedcolormap && cameraLight->fixedlightlev < 0 && spr->sector->e && spr->sector->e->XFloor.lightlist.Size()) + if (!cameraLight->FixedColormap() && cameraLight->FixedLightLevel() < 0 && spr->sector->e && spr->sector->e->XFloor.lightlist.Size()) { if (!(clip3d->fake3D & FAKE3D_CLIPTOP)) { diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 57080743f8..8c813f479e 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -187,10 +187,10 @@ namespace swrenderer float lightstep = float((GlobVis / spr->wallc.sz2 - lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); float light = lightleft + (x1 - spr->wallc.sx1) * lightstep; CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->fixedlightlev >= 0) - drawerargs.SetLight(usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->fixedlightlev)); - else if (cameraLight->fixedcolormap != NULL) - drawerargs.SetLight(cameraLight->fixedcolormap, 0, 0); + if (cameraLight->FixedLightLevel() >= 0) + drawerargs.SetLight(usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + else if (cameraLight->FixedColormap() != NULL) + drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, 0); else diff --git a/src/swrenderer/viewport/r_spritedrawer.cpp b/src/swrenderer/viewport/r_spritedrawer.cpp index fee87e67e5..e0f0ccb3f3 100644 --- a/src/swrenderer/viewport/r_spritedrawer.cpp +++ b/src/swrenderer/viewport/r_spritedrawer.cpp @@ -436,12 +436,12 @@ namespace swrenderer colfunc = &SWPixelFormatDrawers::DrawShadedColumn; drawer_needs_pal_input = true; CameraLight *cameraLight = CameraLight::Instance(); - dc_color = cameraLight->fixedcolormap ? cameraLight->fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; + dc_color = cameraLight->FixedColormap() ? cameraLight->FixedColormap()->Maps[APART(color)] : basecolormap->Maps[APART(color)]; basecolormap = &ShadeFakeColormap[16 - alpha]; - if (cameraLight->fixedlightlev >= 0 && cameraLight->fixedcolormap == NULL) + if (cameraLight->FixedLightLevel() >= 0 && !cameraLight->FixedColormap()) { fixed_t shade = shadedlightshade; - if (shade == 0) FIXEDLIGHT2SHADE(cameraLight->fixedlightlev); + if (shade == 0) FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel()); SetLight(basecolormap, 0, shade); } else diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index a96fc7b88f..d364306a62 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -1405,7 +1405,7 @@ void D3DFB::Draw3DPart(bool copy3d) D3DCOLOR color0, color1; if (Accel2D) { - auto &map = swrenderer::CameraLight::Instance()->realfixedcolormap; + auto map = swrenderer::CameraLight::Instance()->ShaderColormap(); if (map == NULL) { color0 = 0; From 812cc61b16728d4ea7d713d31d680a8139bde320 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Feb 2017 09:08:00 +0100 Subject: [PATCH 805/912] Fix some typos --- src/swrenderer/scene/r_light.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index 3e4580d92f..fd919fba15 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -88,8 +88,8 @@ namespace swrenderer // The vis value to pass into the GETPALOOKUP or LIGHTSCALE macros double WallVis(double screenZ) const { return WallGlobVis() / screenZ; } - double SpriteVis(double screenZ) const { return WallGlobVis() / screenZ; } - double ParticleVis(double screenZ) const { return WallGlobVis() / screenZ; } + double SpriteVis(double screenZ) const { return SpriteGlobVis() / screenZ; } + double ParticleVis(double screenZ) const { return ParticleGlobVis() / screenZ; } double FlatPlaneVis(int screenY, double planeZ) const { return FlatPlaneGlobVis() / fabs(planeZ - ViewPos.Z) * fabs(RenderViewport::Instance()->CenterY - screenY); } private: From da346427d3cfb9b4bd2b154af8cc25f624970a33 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Feb 2017 21:11:55 +0100 Subject: [PATCH 806/912] Change draw segment list to use TArray --- src/swrenderer/line/r_line.cpp | 8 +-- src/swrenderer/scene/r_portal.cpp | 30 +++-------- src/swrenderer/scene/r_portal.h | 2 - src/swrenderer/scene/r_scene.cpp | 1 - src/swrenderer/scene/r_translucent_pass.cpp | 9 +++- src/swrenderer/segments/r_drawsegment.cpp | 57 ++++++++++----------- src/swrenderer/segments/r_drawsegment.h | 24 +++++---- src/swrenderer/things/r_particle.cpp | 5 +- src/swrenderer/things/r_visiblesprite.cpp | 11 ++-- 9 files changed, 67 insertions(+), 80 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index cd6cb1962d..d70ae98694 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -318,8 +318,6 @@ namespace swrenderer I_FatalError("Bad R_StoreWallRange: %i to %i", start, stop); #endif - DrawSegment *draw_segment = DrawSegmentList::Instance()->Add(); - if (!rw_prepped) { rw_prepped = true; @@ -331,6 +329,9 @@ namespace swrenderer RenderPortal *renderportal = RenderPortal::Instance(); + DrawSegment *draw_segment = RenderMemory::NewObject(); + DrawSegmentList::Instance()->Push(draw_segment); + draw_segment->CurrentPortalUniq = renderportal->CurrentPortalUniq; draw_segment->sx1 = WallC.sx1; draw_segment->sx2 = WallC.sx2; @@ -516,8 +517,7 @@ namespace swrenderer if (draw_segment->bFogBoundary || draw_segment->maskedtexturecol != nullptr) { - size_t drawsegnum = draw_segment - DrawSegmentList::Instance()->drawsegs; - DrawSegmentList::Instance()->InterestingDrawsegs.Push(drawsegnum); + DrawSegmentList::Instance()->PushInteresting(draw_segment); } } } diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index dfa7a1bd1d..d6c8022554 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -104,8 +104,6 @@ namespace swrenderer int savedextralight = extralight; DVector3 savedpos = ViewPos; DAngle savedangle = ViewAngle; - ptrdiff_t savedds_p = drawseglist->ds_p - drawseglist->drawsegs; - size_t savedinteresting = drawseglist->FirstInterestingDrawseg; double savedvisibility = LightVisibility::Instance()->GetVisibility(); AActor *savedcamera = camera; sector_t *savedsector = viewsector; @@ -188,7 +186,7 @@ namespace swrenderer } // Create a drawseg to clip sprites to the sky plane - DrawSegment *draw_segment = drawseglist->Add(); + DrawSegment *draw_segment = RenderMemory::NewObject(); draw_segment->CurrentPortalUniq = CurrentPortalUniq; draw_segment->siz1 = INT_MAX; draw_segment->siz2 = INT_MAX; @@ -207,13 +205,9 @@ namespace swrenderer draw_segment->foggy = false; memcpy(draw_segment->sprbottomclip, floorclip + pl->left, (pl->right - pl->left) * sizeof(short)); memcpy(draw_segment->sprtopclip, ceilingclip + pl->left, (pl->right - pl->left) * sizeof(short)); + drawseglist->Push(draw_segment); - drawseglist->firstdrawseg = draw_segment; - drawseglist->FirstInterestingDrawseg = drawseglist->InterestingDrawsegs.Size(); - - interestingStack.Push(drawseglist->FirstInterestingDrawseg); - ptrdiff_t diffnum = drawseglist->firstdrawseg - drawseglist->drawsegs; - drawsegStack.Push(diffnum); + drawseglist->PushPortal(); VisibleSpriteList::Instance()->PushPortal(); viewposStack.Push(ViewPos); visplaneStack.Push(pl); @@ -229,33 +223,23 @@ namespace swrenderer // Draw all the masked textures in a second pass, in the reverse order they // were added. This must be done separately from the previous step for the // sake of nested skyboxes. - while (interestingStack.Pop(drawseglist->FirstInterestingDrawseg)) + while (viewposStack.Size() > 0) { - ptrdiff_t pd = 0; - - drawsegStack.Pop(pd); - drawseglist->firstdrawseg = drawseglist->drawsegs + pd; - // Masked textures and planes need the view coordinates restored for proper positioning. viewposStack.Pop(ViewPos); RenderTranslucentPass::Instance()->Render(); - drawseglist->ds_p = drawseglist->firstdrawseg; - - VisibleSpriteList::Instance()->PopPortal(); - VisiblePlane *pl; visplaneStack.Pop(pl); if (pl->Alpha > 0 && pl->picnum != skyflatnum) { pl->Render(pl->Alpha, pl->Additive, true); } + + VisibleSpriteList::Instance()->PopPortal(); + drawseglist->PopPortal(); } - drawseglist->firstdrawseg = drawseglist->drawsegs; - drawseglist->ds_p = drawseglist->drawsegs + savedds_p; - drawseglist->InterestingDrawsegs.Resize((unsigned int)drawseglist->FirstInterestingDrawseg); - drawseglist->FirstInterestingDrawseg = savedinteresting; camera = savedcamera; viewsector = savedsector; diff --git a/src/swrenderer/scene/r_portal.h b/src/swrenderer/scene/r_portal.h index f62cab3eb0..f3e1b9bbef 100644 --- a/src/swrenderer/scene/r_portal.h +++ b/src/swrenderer/scene/r_portal.h @@ -55,8 +55,6 @@ namespace swrenderer void RenderLinePortal(PortalDrawseg* pds, int depth); void RenderLinePortalHighlight(PortalDrawseg* pds); - TArray interestingStack; - TArray drawsegStack; TArray viewposStack; TArray visplaneStack; TArray WallPortals; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index f232f443f3..fae0762c6a 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -248,7 +248,6 @@ namespace swrenderer { RenderTranslucentPass::Instance()->Deinit(); Clip3DFloors::Instance()->Cleanup(); - DrawSegmentList::Instance()->Deinit(); } ///////////////////////////////////////////////////////////////////////// diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index b8d64c141e..dcf99f3f7e 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -72,8 +72,10 @@ namespace swrenderer // b) skip most of the collected drawsegs which have no portal attached. portaldrawsegs.Clear(); DrawSegmentList *drawseglist = DrawSegmentList::Instance(); - for (DrawSegment* seg = drawseglist->ds_p; seg-- > drawseglist->firstdrawseg; ) + for (auto index = drawseglist->BeginIndex(); index != drawseglist->EndIndex(); index++) { + DrawSegment *seg = drawseglist->Segment(index); + // I don't know what makes this happen (some old top-down portal code or possibly skybox code? something adds null lines...) // crashes at the first frame of the first map of Action2.wad if (!seg->curline) continue; @@ -141,9 +143,12 @@ namespace swrenderer { Clip3DFloors::Instance()->fake3D |= FAKE3D_REFRESHCLIP; } + DrawSegmentList *drawseglist = DrawSegmentList::Instance(); - for (DrawSegment *ds = drawseglist->ds_p; ds-- > drawseglist->firstdrawseg; ) + for (auto index = drawseglist->BeginIndex(); index != drawseglist->EndIndex(); index++) { + DrawSegment *ds = drawseglist->Segment(index); + // [ZZ] the same as above if (ds->CurrentPortalUniq != renderportal->CurrentPortalUniq) continue; diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 8ff4e658fa..c4a03ac000 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -49,40 +49,39 @@ namespace swrenderer return &instance; } - void DrawSegmentList::Deinit() - { - if (drawsegs != nullptr) - { - M_Free(drawsegs); - drawsegs = nullptr; - } - } - void DrawSegmentList::Clear() { - if (drawsegs == nullptr) - { - MaxDrawSegs = 256; // [RH] Default. Increased as needed. - firstdrawseg = drawsegs = (DrawSegment *)M_Malloc (MaxDrawSegs * sizeof(DrawSegment)); - } - FirstInterestingDrawseg = 0; - InterestingDrawsegs.Clear (); - ds_p = drawsegs; + Segments.Clear(); + StartIndices.Clear(); + StartIndices.Push(0); + + InterestingSegments.Clear(); + StartInterestingIndices.Clear(); + StartInterestingIndices.Push(0); } - DrawSegment *DrawSegmentList::Add() + void DrawSegmentList::PushPortal() { - if (ds_p == &drawsegs[MaxDrawSegs]) - { // [RH] Grab some more drawsegs - size_t newdrawsegs = MaxDrawSegs ? MaxDrawSegs * 2 : 32; - ptrdiff_t firstofs = firstdrawseg - drawsegs; - drawsegs = (DrawSegment *)M_Realloc(drawsegs, newdrawsegs * sizeof(DrawSegment)); - firstdrawseg = drawsegs + firstofs; - ds_p = drawsegs + MaxDrawSegs; - MaxDrawSegs = newdrawsegs; - DPrintf(DMSG_NOTIFY, "MaxDrawSegs increased to %zu\n", MaxDrawSegs); - } + StartIndices.Push(Segments.Size()); + StartInterestingIndices.Push(InterestingSegments.Size()); + } - return ds_p++; + void DrawSegmentList::PopPortal() + { + Segments.Resize(StartIndices.Last()); + StartIndices.Pop(); + + StartInterestingIndices.Resize(StartInterestingIndices.Last()); + StartInterestingIndices.Pop(); + } + + void DrawSegmentList::Push(DrawSegment *segment) + { + Segments.Push(segment); + } + + void DrawSegmentList::PushInteresting(DrawSegment *segment) + { + InterestingSegments.Push(segment); } } diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index 6282936ac4..b44f190e43 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -52,19 +52,25 @@ namespace swrenderer public: static DrawSegmentList *Instance(); - DrawSegment *firstdrawseg = nullptr; - DrawSegment *ds_p = nullptr; - DrawSegment *drawsegs = nullptr; + unsigned int BeginIndex() const { return StartIndices.Last(); } + unsigned int EndIndex() const { return Segments.Size(); } + DrawSegment *Segment(int index) const { return Segments[Segments.Size() - 1 - index]; } - TArray InterestingDrawsegs; // drawsegs that have something drawn on them - size_t FirstInterestingDrawseg = 0; + unsigned int BeginInterestingIndex() const { return StartInterestingIndices.Last(); } + unsigned int EndInterestingIndex() const { return InterestingSegments.Size(); } + DrawSegment *InterestingSegment(int index) const { return InterestingSegments[Segments.Size() - 1 - index]; } void Clear(); - void Deinit(); - - DrawSegment *Add(); + void PushPortal(); + void PopPortal(); + void Push(DrawSegment *segment); + void PushInteresting(DrawSegment *segment); private: - size_t MaxDrawSegs = 0; + TArray Segments; + TArray StartIndices; + + TArray InterestingSegments; // drawsegs that have something drawn on them + TArray StartInterestingIndices; }; } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index dd078430be..2b2eae50e2 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -266,9 +266,10 @@ namespace swrenderer // Draw any masked textures behind this particle so that when the // particle is drawn, it will be in front of them. DrawSegmentList *segmentlist = DrawSegmentList::Instance(); - for (unsigned int p = segmentlist->InterestingDrawsegs.Size(); p-- > segmentlist->FirstInterestingDrawseg; ) + for (unsigned int index = segmentlist->BeginInterestingIndex(); index != segmentlist->EndInterestingIndex(); index++) { - DrawSegment *ds = &segmentlist->drawsegs[segmentlist->InterestingDrawsegs[p]]; + DrawSegment *ds = segmentlist->InterestingSegment(index); + // kg3D - no fake segs if (ds->fake) continue; if (ds->x1 >= x2 || ds->x2 <= x1) diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 96be5e12c6..7d19e14607 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -47,7 +47,6 @@ namespace swrenderer VisibleSprite *spr = this; - DrawSegment *ds; int i; int x1, x2; int r1, r2; @@ -280,15 +279,11 @@ namespace swrenderer // Scan drawsegs from end to start for obscuring segs. // The first drawseg that is closer than the sprite is the clip seg. - // Modified by Lee Killough: - // (pointer check was originally nonportable - // and buggy, by going past LEFT end of array): - - // for (ds=ds_p-1 ; ds >= drawsegs ; ds--) old buggy code - DrawSegmentList *segmentlist = DrawSegmentList::Instance(); - for (ds = segmentlist->ds_p; ds-- > segmentlist->firstdrawseg; ) // new -- killough + for (unsigned int index = segmentlist->BeginIndex(); index != segmentlist->EndIndex(); index++) { + DrawSegment *ds = segmentlist->Segment(index); + // [ZZ] portal handling here //if (ds->CurrentPortalUniq != spr->CurrentPortalUniq) // continue; From 4fadc4e9a30ff9306b6123faf1c1aa460190dbee Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Feb 2017 21:25:51 +0100 Subject: [PATCH 807/912] Fix typo --- src/swrenderer/segments/r_drawsegment.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index b44f190e43..e0710619ac 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -54,11 +54,11 @@ namespace swrenderer unsigned int BeginIndex() const { return StartIndices.Last(); } unsigned int EndIndex() const { return Segments.Size(); } - DrawSegment *Segment(int index) const { return Segments[Segments.Size() - 1 - index]; } + DrawSegment *Segment(unsigned int index) const { return Segments[Segments.Size() - 1 - index]; } unsigned int BeginInterestingIndex() const { return StartInterestingIndices.Last(); } unsigned int EndInterestingIndex() const { return InterestingSegments.Size(); } - DrawSegment *InterestingSegment(int index) const { return InterestingSegments[Segments.Size() - 1 - index]; } + DrawSegment *InterestingSegment(unsigned int index) const { return InterestingSegments[InterestingSegments.Size() - 1 - index]; } void Clear(); void PushPortal(); From 627a388d5783863a03fd6c1bbe15ae058920dc94 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 4 Feb 2017 00:25:37 +0100 Subject: [PATCH 808/912] Move renderer singletons into a RenderThread class --- src/CMakeLists.txt | 1 + src/swrenderer/line/r_line.cpp | 75 ++++++++++--------- src/swrenderer/line/r_line.h | 10 ++- src/swrenderer/line/r_renderdrawsegment.cpp | 22 ++++-- src/swrenderer/line/r_renderdrawsegment.h | 5 ++ src/swrenderer/line/r_walldraw.cpp | 8 +- src/swrenderer/line/r_walldraw.h | 5 ++ src/swrenderer/plane/r_flatplane.cpp | 8 +- src/swrenderer/plane/r_flatplane.h | 4 + src/swrenderer/plane/r_skyplane.cpp | 8 +- src/swrenderer/plane/r_skyplane.h | 4 + src/swrenderer/plane/r_slopeplane.cpp | 10 ++- src/swrenderer/plane/r_slopeplane.h | 5 ++ src/swrenderer/plane/r_visibleplane.cpp | 8 +- src/swrenderer/plane/r_visibleplane.h | 4 +- src/swrenderer/plane/r_visibleplanelist.cpp | 26 +++---- src/swrenderer/plane/r_visibleplanelist.h | 5 +- src/swrenderer/r_renderthread.cpp | 70 +++++++++++++++++ src/swrenderer/r_renderthread.h | 57 ++++++++++++++ src/swrenderer/r_swrenderer.cpp | 16 ++-- src/swrenderer/r_swrenderer.h | 3 + src/swrenderer/scene/r_3dfloors.cpp | 10 +-- src/swrenderer/scene/r_3dfloors.h | 6 +- src/swrenderer/scene/r_opaque_pass.cpp | 32 ++++---- src/swrenderer/scene/r_opaque_pass.h | 7 +- src/swrenderer/scene/r_portal.cpp | 61 +++++++-------- src/swrenderer/scene/r_portal.h | 5 +- src/swrenderer/scene/r_scene.cpp | 41 +++++----- src/swrenderer/scene/r_scene.h | 6 +- src/swrenderer/scene/r_translucent_pass.cpp | 34 ++++----- src/swrenderer/scene/r_translucent_pass.h | 5 +- src/swrenderer/segments/r_clipsegment.cpp | 6 -- src/swrenderer/segments/r_clipsegment.h | 2 - src/swrenderer/segments/r_drawsegment.cpp | 5 +- src/swrenderer/segments/r_drawsegment.h | 4 +- src/swrenderer/things/r_decal.cpp | 23 +++--- src/swrenderer/things/r_decal.h | 6 +- src/swrenderer/things/r_particle.cpp | 23 +++--- src/swrenderer/things/r_particle.h | 6 +- src/swrenderer/things/r_playersprite.cpp | 8 +- src/swrenderer/things/r_playersprite.h | 4 +- src/swrenderer/things/r_sprite.cpp | 11 +-- src/swrenderer/things/r_sprite.h | 4 +- src/swrenderer/things/r_visiblesprite.cpp | 17 +++-- src/swrenderer/things/r_visiblesprite.h | 6 +- src/swrenderer/things/r_visiblespritelist.cpp | 6 -- src/swrenderer/things/r_visiblespritelist.h | 2 - src/swrenderer/things/r_voxel.cpp | 9 ++- src/swrenderer/things/r_voxel.h | 4 +- src/swrenderer/things/r_wallsprite.cpp | 15 ++-- src/swrenderer/things/r_wallsprite.h | 4 +- 51 files changed, 468 insertions(+), 258 deletions(-) create mode 100644 src/swrenderer/r_renderthread.cpp create mode 100644 src/swrenderer/r_renderthread.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 885e31356d..3032b03b22 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -848,6 +848,7 @@ set( NOT_COMPILED_SOURCE_FILES set( FASTMATH_PCH_SOURCES swrenderer/r_swrenderer.cpp swrenderer/r_memory.cpp + swrenderer/r_renderthread.cpp swrenderer/drawers/r_draw.cpp swrenderer/drawers/r_draw_pal.cpp swrenderer/drawers/r_draw_rgba.cpp diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index d70ae98694..156c2f3af9 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -49,6 +49,7 @@ #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/plane/r_visibleplanelist.h" #include "swrenderer/things/r_decal.h" +#include "swrenderer/r_renderthread.h" CVAR(Bool, r_fogboundary, true, 0) CVAR(Bool, r_drawmirrors, true, 0) @@ -56,6 +57,11 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { + SWRenderLine::SWRenderLine(RenderThread *thread) + { + Thread = thread; + } + void SWRenderLine::Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, VisiblePlane *linefloorplane, VisiblePlane *lineceilingplane, bool infog, FDynamicColormap *colormap) { static sector_t tempsec; // killough 3/8/98: ceiling/water hack @@ -79,17 +85,17 @@ namespace swrenderer if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) return; - if (WallC.Init(pt1, pt2, 32.0 / (1 << 12))) + if (WallC.Init(Thread, pt1, pt2, 32.0 / (1 << 12))) return; - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); if (WallC.sx1 >= renderportal->WindowRight || WallC.sx2 <= renderportal->WindowLeft) return; if (line->linedef == NULL) { - if (RenderClipSegment::Instance()->Check(WallC.sx1, WallC.sx2)) + if (Thread->ClipSegments->Check(WallC.sx1, WallC.sx2)) { InSubsector->flags |= SSECF_DRAWN; } @@ -107,7 +113,7 @@ namespace swrenderer if ((v1 == line->v1 && v2 == line->v2) || (v2 == line->v1 && v1 == line->v2)) { // The seg is the entire wall. - WallT.InitFromWallCoords(&WallC); + WallT.InitFromWallCoords(Thread, &WallC); } else { // The seg is only part of the wall. @@ -115,10 +121,10 @@ namespace swrenderer { swapvalues(v1, v2); } - WallT.InitFromLine(v1->fPos() - ViewPos, v2->fPos() - ViewPos); + WallT.InitFromLine(Thread, v1->fPos() - ViewPos, v2->fPos() - ViewPos); } - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); if (!(clip3d->fake3D & FAKE3D_FAKEBACK)) { @@ -142,7 +148,7 @@ namespace swrenderer // kg3D - its fake, no transfer_heights if (!(clip3d->fake3D & FAKE3D_FAKEBACK)) { // killough 3/8/98, 4/4/98: hack for invisible ceilings / deep water - backsector = RenderOpaquePass::Instance()->FakeFlat(backsector, &tempsec, nullptr, nullptr, curline, WallC.sx1, WallC.sx2, rw_frontcz1, rw_frontcz2); + backsector = Thread->OpaquePass->FakeFlat(backsector, &tempsec, nullptr, nullptr, curline, WallC.sx1, WallC.sx2, rw_frontcz1, rw_frontcz2); } doorclosed = false; // killough 4/16/98 @@ -255,7 +261,7 @@ namespace swrenderer // mark their subsectors as visible for automap texturing. if (hasglnodes && !(InSubsector->flags & SSECF_DRAWN)) { - if (RenderClipSegment::Instance()->Check(WallC.sx1, WallC.sx2)) + if (Thread->ClipSegments->Check(WallC.sx1, WallC.sx2)) { InSubsector->flags |= SSECF_DRAWN; } @@ -279,7 +285,7 @@ namespace swrenderer } static SWRenderLine *self = this; - bool visible = RenderClipSegment::Instance()->Clip(WallC.sx1, WallC.sx2, solid, [](int x1, int x2) -> bool + bool visible = Thread->ClipSegments->Clip(WallC.sx1, WallC.sx2, solid, [](int x1, int x2) -> bool { return self->RenderWallSegment(x1, x2); }); @@ -327,10 +333,10 @@ namespace swrenderer rw_offset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); rw_light = rw_lightleft + rw_lightstep * (start - WallC.sx1); - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); DrawSegment *draw_segment = RenderMemory::NewObject(); - DrawSegmentList::Instance()->Push(draw_segment); + Thread->DrawSegments->Push(draw_segment); draw_segment->CurrentPortalUniq = renderportal->CurrentPortalUniq; draw_segment->sx1 = WallC.sx1; @@ -351,7 +357,7 @@ namespace swrenderer draw_segment->bFakeBoundary = false; draw_segment->foggy = foggy; - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); if (clip3d->fake3D & FAKE3D_FAKEMASK) draw_segment->fake = 1; else draw_segment->fake = 0; @@ -448,7 +454,7 @@ namespace swrenderer // kg3D - backup for mid and fake walls draw_segment->bkup = RenderMemory::AllocMemory(stop - start); - memcpy(draw_segment->bkup, &RenderOpaquePass::Instance()->ceilingclip[start], sizeof(short)*(stop - start)); + memcpy(draw_segment->bkup, &Thread->OpaquePass->ceilingclip[start], sizeof(short)*(stop - start)); draw_segment->bFogBoundary = IsFogBoundary(frontsector, backsector); if (sidedef->GetTexture(side_t::mid).isValid() || draw_segment->bFakeBoundary) @@ -517,7 +523,7 @@ namespace swrenderer if (draw_segment->bFogBoundary || draw_segment->maskedtexturecol != nullptr) { - DrawSegmentList::Instance()->PushInteresting(draw_segment); + Thread->DrawSegments->PushInteresting(draw_segment); } } } @@ -527,7 +533,7 @@ namespace swrenderer { if (ceilingplane) { // killough 4/11/98: add NULL ptr checks - ceilingplane = VisiblePlaneList::Instance()->GetRange(ceilingplane, start, stop); + ceilingplane = Thread->PlaneList->GetRange(ceilingplane, start, stop); } else { @@ -539,7 +545,7 @@ namespace swrenderer { if (floorplane) { // killough 4/11/98: add NULL ptr checks - floorplane = VisiblePlaneList::Instance()->GetRange(floorplane, start, stop); + floorplane = Thread->PlaneList->GetRange(floorplane, start, stop); } else { @@ -558,13 +564,13 @@ namespace swrenderer if (((draw_segment->silhouette & SIL_TOP) || maskedtexture) && draw_segment->sprtopclip == nullptr) { draw_segment->sprtopclip = RenderMemory::AllocMemory(stop - start); - memcpy(draw_segment->sprtopclip, &RenderOpaquePass::Instance()->ceilingclip[start], sizeof(short)*(stop - start)); + memcpy(draw_segment->sprtopclip, &Thread->OpaquePass->ceilingclip[start], sizeof(short)*(stop - start)); } if (((draw_segment->silhouette & SIL_BOTTOM) || maskedtexture) && draw_segment->sprbottomclip == nullptr) { draw_segment->sprbottomclip = RenderMemory::AllocMemory(stop - start); - memcpy(draw_segment->sprbottomclip, &RenderOpaquePass::Instance()->floorclip[start], sizeof(short)*(stop - start)); + memcpy(draw_segment->sprbottomclip, &Thread->OpaquePass->floorclip[start], sizeof(short)*(stop - start)); } if (maskedtexture && curline->sidedef->GetTexture(side_t::mid).isValid()) @@ -576,12 +582,12 @@ namespace swrenderer // [ZZ] Only if not an active mirror if (!rw_markportal) { - RenderDecal::RenderDecals(curline->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, curline, WallC, foggy, basecolormap, walltop.ScreenY, wallbottom.ScreenY); + RenderDecal::RenderDecals(Thread, curline->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, curline, WallC, foggy, basecolormap, walltop.ScreenY, wallbottom.ScreenY); } if (rw_markportal) { - RenderPortal::Instance()->AddLinePortal(curline->linedef, draw_segment->x1, draw_segment->x2, draw_segment->sprtopclip, draw_segment->sprbottomclip); + Thread->Portal->AddLinePortal(curline->linedef, draw_segment->x1, draw_segment->x2, draw_segment->sprtopclip, draw_segment->sprbottomclip); } return (clip3d->fake3D & FAKE3D_FAKEMASK) == 0; @@ -598,7 +604,7 @@ namespace swrenderer linedef = curline->linedef; // mark the segment as visible for auto map - if (!RenderScene::Instance()->DontMapLines()) linedef->flags |= ML_MAPPED; + if (!Thread->Scene->DontMapLines()) linedef->flags |= ML_MAPPED; midtexture = toptexture = bottomtexture = 0; @@ -686,8 +692,7 @@ namespace swrenderer // wall but nothing to draw for it. // Recalculate walltop so that the wall is clipped by the back sector's // ceiling instead of the front sector's ceiling. - RenderPortal *renderportal = RenderPortal::Instance(); - walltop.Project(backsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + walltop.Project(backsector->ceilingplane, &WallC, curline, Thread->Portal->MirrorFlags & RF_XFLIP); } // Putting sky ceilings on the front and back of a line alters the way unpegged // positioning works. @@ -941,8 +946,8 @@ namespace swrenderer drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); // clip wall to the floor and ceiling - auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; - auto floorclip = RenderOpaquePass::Instance()->floorclip; + auto ceilingclip = Thread->OpaquePass->ceilingclip; + auto floorclip = Thread->OpaquePass->floorclip; for (x = x1; x < x2; ++x) { if (walltop.ScreenY[x] < ceilingclip[x]) @@ -955,7 +960,7 @@ namespace swrenderer } } - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); // mark ceiling areas if (markceiling) @@ -1046,7 +1051,7 @@ namespace swrenderer rw_offset = -rw_offset; } - RenderWallPart renderWallpart; + RenderWallPart renderWallpart(Thread); renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, rw_midtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } fillshort(ceilingclip + x1, x2 - x1, viewheight); @@ -1083,7 +1088,7 @@ namespace swrenderer rw_offset = -rw_offset; } - RenderWallPart renderWallpart; + RenderWallPart renderWallpart(Thread); renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, rw_toptexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(ceilingclip + x1, wallupper.ScreenY + x1, (x2 - x1) * sizeof(short)); @@ -1123,7 +1128,7 @@ namespace swrenderer rw_offset = -rw_offset; } - RenderWallPart renderWallpart; + RenderWallPart renderWallpart(Thread); renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, rw_bottomtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(floorclip + x1, walllower.ScreenY + x1, (x2 - x1) * sizeof(short)); @@ -1139,7 +1144,7 @@ namespace swrenderer //////////////////////////////////////////////////////////////////////////// // Transform and clip coordinates. Returns true if it was clipped away - bool FWallCoords::Init(const DVector2 &pt1, const DVector2 &pt2, double too_close) + bool FWallCoords::Init(RenderThread *thread, const DVector2 &pt1, const DVector2 &pt2, double too_close) { tleft.X = float(pt1.X * ViewSin - pt1.Y * ViewCos); tright.X = float(pt2.X * ViewSin - pt2.Y * ViewCos); @@ -1147,7 +1152,7 @@ namespace swrenderer tleft.Y = float(pt1.X * ViewTanCos + pt1.Y * ViewTanSin); tright.Y = float(pt2.X * ViewTanCos + pt2.Y * ViewTanSin); - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = thread->Portal.get(); auto viewport = RenderViewport::Instance(); if (renderportal->MirrorFlags & RF_XFLIP) @@ -1201,12 +1206,12 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - void FWallTmapVals::InitFromWallCoords(const FWallCoords *wallc) + void FWallTmapVals::InitFromWallCoords(RenderThread *thread, const FWallCoords *wallc) { const FVector2 *left = &wallc->tleft; const FVector2 *right = &wallc->tright; - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = thread->Portal.get(); if (renderportal->MirrorFlags & RF_XFLIP) { @@ -1218,7 +1223,7 @@ namespace swrenderer InvZstep = right->Y - left->Y; } - void FWallTmapVals::InitFromLine(const DVector2 &left, const DVector2 &right) + void FWallTmapVals::InitFromLine(RenderThread *thread, const DVector2 &left, const DVector2 &right) { // Coordinates should have already had viewx,viewy subtracted @@ -1227,7 +1232,7 @@ namespace swrenderer double fully1 = left.X * ViewTanCos + left.Y * ViewTanSin; double fully2 = right.X * ViewTanCos + right.Y * ViewTanSin; - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = thread->Portal.get(); if (renderportal->MirrorFlags & RF_XFLIP) { diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 3da9e0d854..75b6d69b34 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -25,6 +25,7 @@ struct FDynamicColormap; namespace swrenderer { + class RenderThread; struct VisiblePlane; struct FWallCoords @@ -35,7 +36,7 @@ namespace swrenderer float sz1, sz2; // depth at left, right of wall in screen space yb1,yb2 short sx1, sx2; // x coords at left, right of wall in screen space xb1,xb2 - bool Init(const DVector2 &pt1, const DVector2 &pt2, double too_close); + bool Init(RenderThread *thread, const DVector2 &pt1, const DVector2 &pt2, double too_close); }; struct FWallTmapVals @@ -43,15 +44,18 @@ namespace swrenderer float UoverZorg, UoverZstep; float InvZorg, InvZstep; - void InitFromWallCoords(const FWallCoords *wallc); - void InitFromLine(const DVector2 &left, const DVector2 &right); + void InitFromWallCoords(RenderThread *thread, const FWallCoords *wallc); + void InitFromLine(RenderThread *thread, const DVector2 &left, const DVector2 &right); }; class SWRenderLine { public: + SWRenderLine(RenderThread *thread); void Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, VisiblePlane *floorplane, VisiblePlane *ceilingplane, bool foggy, FDynamicColormap *basecolormap); + RenderThread *Thread = nullptr; + private: bool RenderWallSegment(int x1, int x2); void SetWallVariables(bool needlights); diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index a3074f1d30..203e02c447 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -29,6 +29,7 @@ #include "r_data/colormaps.h" #include "d_net.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_opaque_pass.h" @@ -47,6 +48,11 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { + RenderDrawSegment::RenderDrawSegment(RenderThread *thread) + { + Thread = thread; + } + void RenderDrawSegment::Render(DrawSegment *ds, int x1, int x2) { auto viewport = RenderViewport::Instance(); @@ -95,7 +101,7 @@ namespace swrenderer } // killough 4/13/98: get correct lightlevel for 2s normal textures - sec = RenderOpaquePass::Instance()->FakeFlat(frontsector, &tempsec, nullptr, nullptr, nullptr, 0, 0, 0, 0); + sec = Thread->OpaquePass->FakeFlat(frontsector, &tempsec, nullptr, nullptr, nullptr, 0, 0, 0, 0); FDynamicColormap *basecolormap = sec->ColorMap; // [RH] Set basecolormap @@ -103,7 +109,7 @@ namespace swrenderer rw_lightstep = ds->lightstep; rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedLightLevel() < 0) @@ -361,7 +367,7 @@ namespace swrenderer double top, bot; GetMaskedWallTopBottom(ds, top, bot); - RenderWallPart renderWallpart; + RenderWallPart renderWallpart(Thread); renderWallpart.Render(walldrawerargs, frontsector, curline, WallC, rw_pic, x1, x2, mceilingclip, mfloorclip, texturemid, MaskedSWall, maskedtexturecol, ds->yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); } @@ -471,7 +477,7 @@ namespace swrenderer WallC.tright.Y = ds->cy + ds->cdy; WallT = ds->tmapvals; - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); wallupper.Project(clip3d->sclipTop - ViewPos.Z, &WallC); walllower.Project(clip3d->sclipBottom - ViewPos.Z, &WallC); @@ -492,7 +498,7 @@ namespace swrenderer double top, bot; GetMaskedWallTopBottom(ds, top, bot); - RenderWallPart renderWallpart; + RenderWallPart renderWallpart(Thread); renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, wallupper.ScreenY, walllower.ScreenY, texturemid, MaskedSWall, walltexcoords.UPos, yscale, top, bot, true, wallshade, rw_offset, rw_light, rw_lightstep, nullptr, ds->foggy, basecolormap); } @@ -525,7 +531,7 @@ namespace swrenderer floorHeight = backsector->CenterFloor(); ceilingHeight = backsector->CenterCeiling(); - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); // maybe fix clipheights if (!(clip3d->fake3D & FAKE3D_CLIPBOTTOM)) clip3d->sclipBottom = floorHeight; @@ -906,7 +912,7 @@ namespace swrenderer { ProjectedWallLine most; - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); most.Project(curline->frontsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); for (int i = x1; i < x2; ++i) @@ -931,7 +937,7 @@ namespace swrenderer top = MAX(frontcz1, frontcz2); bot = MIN(frontfz1, frontfz2); - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); if (clip3d->fake3D & FAKE3D_CLIPTOP) { top = MIN(top, clip3d->sclipTop); diff --git a/src/swrenderer/line/r_renderdrawsegment.h b/src/swrenderer/line/r_renderdrawsegment.h index b3a3ee6c00..a13d1341df 100644 --- a/src/swrenderer/line/r_renderdrawsegment.h +++ b/src/swrenderer/line/r_renderdrawsegment.h @@ -17,11 +17,16 @@ namespace swrenderer { + class RenderThread; + class RenderDrawSegment { public: + RenderDrawSegment(RenderThread *thread); void Render(DrawSegment *ds, int x1, int x2); + RenderThread *Thread = nullptr; + private: void ClipMidtex(int x1, int x2); void RenderFakeWall(DrawSegment *ds, int x1, int x2, F3DFloor *rover, int wallshade, FDynamicColormap *basecolormap); diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 2261fa71a1..ec7ea38f60 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -40,6 +40,7 @@ #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/line/r_walldraw.h" #include "swrenderer/line/r_wallsetup.h" +#include "swrenderer/r_renderthread.h" namespace swrenderer { @@ -376,7 +377,7 @@ namespace swrenderer assert(WallC.sx1 <= x1); assert(WallC.sx2 >= x2); - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); // kg3D - fake floors instead of zdoom light list for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) @@ -520,4 +521,9 @@ namespace swrenderer ProcessWall(walltop, wallbottom, texturemid, swall, lwall); } } + + RenderWallPart::RenderWallPart(RenderThread *thread) + { + Thread = thread; + } } diff --git a/src/swrenderer/line/r_walldraw.h b/src/swrenderer/line/r_walldraw.h index bc5b2b3e9f..79d7ffc918 100644 --- a/src/swrenderer/line/r_walldraw.h +++ b/src/swrenderer/line/r_walldraw.h @@ -24,6 +24,7 @@ struct FDynamicColormap; namespace swrenderer { + class RenderThread; struct DrawSegment; struct FWallCoords; class ProjectedWallLine; @@ -33,6 +34,8 @@ namespace swrenderer class RenderWallPart { public: + RenderWallPart(RenderThread *thread); + void Render( const WallDrawerArgs &drawerargs, sector_t *frontsector, @@ -58,6 +61,8 @@ namespace swrenderer bool foggy, FDynamicColormap *basecolormap); + RenderThread *Thread = nullptr; + private: void ProcessWallNP2(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal, double top, double bot); void ProcessWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal); diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 3e630c13c8..85e339e847 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -41,9 +41,15 @@ #include "swrenderer/plane/r_visibleplane.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" namespace swrenderer { + RenderFlatPlane::RenderFlatPlane(RenderThread *thread) + { + Thread = thread; + } + void RenderFlatPlane::Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap, FTexture *texture) { if (alpha <= 0) @@ -85,7 +91,7 @@ namespace swrenderer ystep = -sin(planeang) / viewport->FocalLengthX; // [RH] flip for mirrors - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); if (renderportal->MirrorFlags & RF_XFLIP) { xstep = -xstep; diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index c164ba8623..72a48d334c 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -18,15 +18,19 @@ namespace swrenderer { + class RenderThread; struct VisiblePlaneLight; class RenderFlatPlane : PlaneRenderer { public: + RenderFlatPlane(RenderThread *thread); void Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap, FTexture *texture); static void SetupSlope(); + RenderThread *Thread = nullptr; + private: void RenderLine(int y, int x1, int x2) override; void StepColumn() override; diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 091182c49b..8eefdb708d 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -42,6 +42,7 @@ #include "swrenderer/scene/r_light.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" #include "g_levellocals.h" CVAR(Bool, r_linearsky, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); @@ -49,6 +50,11 @@ EXTERN_CVAR(Int, r_skymode) namespace swrenderer { + RenderSkyPlane::RenderSkyPlane(RenderThread *thread) + { + Thread = thread; + } + void RenderSkyPlane::Render(VisiblePlane *pl) { FTextureID sky1tex, sky2tex; @@ -162,7 +168,7 @@ namespace swrenderer void RenderSkyPlane::DrawSkyColumnStripe(int start_x, int y1, int y2, double scale, double texturemid, double yrepeat) { - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); auto viewport = RenderViewport::Instance(); uint32_t height = frontskytex->GetHeight(); diff --git a/src/swrenderer/plane/r_skyplane.h b/src/swrenderer/plane/r_skyplane.h index a1f8a45501..9a79b1e96b 100644 --- a/src/swrenderer/plane/r_skyplane.h +++ b/src/swrenderer/plane/r_skyplane.h @@ -21,8 +21,12 @@ namespace swrenderer class RenderSkyPlane { public: + RenderSkyPlane(RenderThread *thread); + void Render(VisiblePlane *pl); + RenderThread *Thread = nullptr; + private: void DrawSky(VisiblePlane *pl); void DrawSkyColumnStripe(int start_x, int y1, int y2, double scale, double texturemid, double yrepeat); diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index c51bf0276e..ccb7e6107b 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -39,8 +39,9 @@ #include "swrenderer/scene/r_scene.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/viewport/r_viewport.h" -#include "swrenderer/r_memory.h" #include "swrenderer/plane/r_visibleplane.h" +#include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -48,6 +49,11 @@ namespace swrenderer { + RenderSlopePlane::RenderSlopePlane(RenderThread *thread) + { + Thread = thread; + } + void RenderSlopePlane::Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *colormap, FTexture *texture) { static const float ifloatpow2[16] = @@ -138,7 +144,7 @@ namespace swrenderer plane_su *= 4294967296.f; plane_sv *= 4294967296.f; - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); if (renderportal->MirrorFlags & RF_XFLIP) { plane_su[0] = -plane_su[0]; diff --git a/src/swrenderer/plane/r_slopeplane.h b/src/swrenderer/plane/r_slopeplane.h index b06f525a20..b707a3f7c4 100644 --- a/src/swrenderer/plane/r_slopeplane.h +++ b/src/swrenderer/plane/r_slopeplane.h @@ -18,11 +18,16 @@ namespace swrenderer { + class RenderThread; + class RenderSlopePlane : PlaneRenderer { public: + RenderSlopePlane(RenderThread *thread); void Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap, FTexture *texture); + RenderThread *Thread = nullptr; + private: void RenderLine(int y, int x1, int x2) override; diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index 8f6d93d378..0d9e97ef38 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -87,14 +87,14 @@ namespace swrenderer } } - void VisiblePlane::Render(fixed_t alpha, bool additive, bool masked) + void VisiblePlane::Render(RenderThread *thread, fixed_t alpha, bool additive, bool masked) { if (left >= right) return; if (picnum == skyflatnum) // sky flat { - RenderSkyPlane renderer; + RenderSkyPlane renderer(thread); renderer.Render(this); } else // regular flat @@ -119,12 +119,12 @@ namespace swrenderer if (!height.isSlope() && !tilt) { - RenderFlatPlane renderer; + RenderFlatPlane renderer(thread); renderer.Render(this, xscale, yscale, alpha, additive, masked, colormap, tex); } else { - RenderSlopePlane renderer; + RenderSlopePlane renderer(thread); renderer.Render(this, xscale, yscale, alpha, additive, masked, colormap, tex); } } diff --git a/src/swrenderer/plane/r_visibleplane.h b/src/swrenderer/plane/r_visibleplane.h index d26f0fdfd1..5e1c9230ba 100644 --- a/src/swrenderer/plane/r_visibleplane.h +++ b/src/swrenderer/plane/r_visibleplane.h @@ -25,6 +25,8 @@ struct FSectorPortal; namespace swrenderer { + class RenderThread; + struct VisiblePlaneLight { ADynamicLight *lightsource; @@ -36,7 +38,7 @@ namespace swrenderer VisiblePlane(); void AddLights(FLightNode *node); - void Render(fixed_t alpha, bool additive, bool masked); + void Render(RenderThread *thread, fixed_t alpha, bool additive, bool masked); VisiblePlane *next = nullptr; // Next visplane in hash chain -- killough diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp index 03eb386f6f..7ecd745bfc 100644 --- a/src/swrenderer/plane/r_visibleplanelist.cpp +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -41,13 +41,13 @@ #include "swrenderer/plane/r_visibleplanelist.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/viewport/r_viewport.h" +#include "swrenderer/r_renderthread.h" namespace swrenderer { - VisiblePlaneList *VisiblePlaneList::Instance() + VisiblePlaneList::VisiblePlaneList(RenderThread *thread) { - static VisiblePlaneList instance; - return &instance; + Thread = thread; } VisiblePlaneList::VisiblePlaneList() @@ -129,7 +129,7 @@ namespace swrenderer // kg3D - hack, store alpha in sky // i know there is ->alpha, but this also allows to identify fake plane // and ->alpha is for stacked sectors - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); if (clip3d->fake3D & (FAKE3D_FAKEFLOOR | FAKE3D_FAKECEILING)) sky = 0x80000000 | clip3d->fakeAlpha; else sky = 0; // not skyflatnum so it can't be a sky portal = nullptr; @@ -139,7 +139,7 @@ namespace swrenderer // New visplane algorithm uses hash table -- killough hash = isskybox ? ((unsigned)MAXVISPLANES) : CalcHash(picnum.GetIndex(), lightlevel, height); - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); for (check = visplanes[hash]; check; check = check->next) // killough { @@ -190,7 +190,7 @@ namespace swrenderer sky == check->sky && renderportal->CurrentPortalUniq == check->CurrentPortalUniq && renderportal->MirrorFlags == check->MirrorFlags && - Clip3DFloors::Instance()->CurrentSkybox == check->CurrentSkybox && + Thread->Clip3DFloors->CurrentSkybox == check->CurrentSkybox && ViewPos == check->viewpos ) { @@ -215,7 +215,7 @@ namespace swrenderer check->Additive = additive; check->CurrentPortalUniq = renderportal->CurrentPortalUniq; check->MirrorFlags = renderportal->MirrorFlags; - check->CurrentSkybox = Clip3DFloors::Instance()->CurrentSkybox; + check->CurrentSkybox = Thread->Clip3DFloors->CurrentSkybox; return check; } @@ -326,19 +326,19 @@ namespace swrenderer int i; int vpcount = 0; - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); for (i = 0; i < MAXVISPLANES; i++) { for (pl = visplanes[i]; pl; pl = pl->next) { // kg3D - draw only correct planes - if (pl->CurrentPortalUniq != renderportal->CurrentPortalUniq || pl->CurrentSkybox != Clip3DFloors::Instance()->CurrentSkybox) + if (pl->CurrentPortalUniq != renderportal->CurrentPortalUniq || pl->CurrentSkybox != Thread->Clip3DFloors->CurrentSkybox) continue; // kg3D - draw only real planes now if (pl->sky >= 0) { vpcount++; - pl->Render(OPAQUE, false, false); + pl->Render(Thread, OPAQUE, false, false); } } } @@ -353,13 +353,13 @@ namespace swrenderer DVector3 oViewPos = ViewPos; DAngle oViewAngle = ViewAngle; - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); for (i = 0; i < MAXVISPLANES; i++) { for (pl = visplanes[i]; pl; pl = pl->next) { - if (pl->CurrentSkybox != Clip3DFloors::Instance()->CurrentSkybox || pl->CurrentPortalUniq != renderportal->CurrentPortalUniq) + if (pl->CurrentSkybox != Thread->Clip3DFloors->CurrentSkybox || pl->CurrentPortalUniq != renderportal->CurrentPortalUniq) continue; if (pl->sky < 0 && pl->height.Zat0() == height) @@ -368,7 +368,7 @@ namespace swrenderer ViewAngle = pl->viewangle; renderportal->MirrorFlags = pl->MirrorFlags; - pl->Render(pl->sky & 0x7FFFFFFF, pl->Additive, true); + pl->Render(Thread, pl->sky & 0x7FFFFFFF, pl->Additive, true); } } } diff --git a/src/swrenderer/plane/r_visibleplanelist.h b/src/swrenderer/plane/r_visibleplanelist.h index 3a3134fbda..e07a086fd9 100644 --- a/src/swrenderer/plane/r_visibleplanelist.h +++ b/src/swrenderer/plane/r_visibleplanelist.h @@ -20,12 +20,13 @@ struct FSectorPortal; namespace swrenderer { + class RenderThread; struct VisiblePlane; class VisiblePlaneList { public: - static VisiblePlaneList *Instance(); + VisiblePlaneList(RenderThread *thread); void Clear(); void ClearKeepFakePlanes(); @@ -40,6 +41,8 @@ namespace swrenderer int Render(); void RenderHeight(double height); + RenderThread *Thread = nullptr; + private: VisiblePlaneList(); VisiblePlane *Add(unsigned hash); diff --git a/src/swrenderer/r_renderthread.cpp b/src/swrenderer/r_renderthread.cpp new file mode 100644 index 0000000000..057c096328 --- /dev/null +++ b/src/swrenderer/r_renderthread.cpp @@ -0,0 +1,70 @@ +/* +** Renderer multithreading framework +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "m_bbox.h" +#include "i_system.h" +#include "p_lnspec.h" +#include "p_setup.h" +#include "a_sharedglobal.h" +#include "g_level.h" +#include "p_effect.h" +#include "doomstat.h" +#include "r_state.h" +#include "v_palette.h" +#include "r_sky.h" +#include "po_man.h" +#include "r_data/colormaps.h" +#include "r_renderthread.h" +#include "swrenderer/things/r_visiblespritelist.h" +#include "swrenderer/scene/r_portal.h" +#include "swrenderer/scene/r_opaque_pass.h" +#include "swrenderer/scene/r_translucent_pass.h" +#include "swrenderer/scene/r_3dfloors.h" +#include "swrenderer/scene/r_scene.h" +#include "swrenderer/things/r_playersprite.h" +#include "swrenderer/plane/r_visibleplanelist.h" +#include "swrenderer/segments/r_drawsegment.h" +#include "swrenderer/segments/r_clipsegment.h" + +namespace swrenderer +{ + RenderThread::RenderThread() + { + OpaquePass = std::make_unique(this); + TranslucentPass = std::make_unique(this); + SpriteList = std::make_unique(); + Portal = std::make_unique(this); + Clip3DFloors = std::make_unique(this); + PlayerSprites = std::make_unique(this); + PlaneList = std::make_unique(this); + Scene = std::make_unique(this); + DrawSegments = std::make_unique(this); + ClipSegments = std::make_unique(); + } + + RenderThread::~RenderThread() + { + } +} diff --git a/src/swrenderer/r_renderthread.h b/src/swrenderer/r_renderthread.h new file mode 100644 index 0000000000..21bb4b6faf --- /dev/null +++ b/src/swrenderer/r_renderthread.h @@ -0,0 +1,57 @@ +/* +** Renderer multithreading framework +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include + +namespace swrenderer +{ + class VisibleSpriteList; + class RenderPortal; + class RenderOpaquePass; + class RenderTranslucentPass; + class RenderPlayerSprites; + class RenderScene; + class Clip3DFloors; + class VisiblePlaneList; + class DrawSegmentList; + class RenderClipSegment; + + class RenderThread + { + public: + RenderThread(); + ~RenderThread(); + + std::unique_ptr OpaquePass; + std::unique_ptr TranslucentPass; + std::unique_ptr SpriteList; + std::unique_ptr Portal; + std::unique_ptr Clip3DFloors; + std::unique_ptr PlayerSprites; + std::unique_ptr PlaneList; + std::unique_ptr Scene; + std::unique_ptr DrawSegments; + std::unique_ptr ClipSegments; + }; +} diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index b334df7961..a12a5ae003 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -88,7 +88,7 @@ void FSoftwareRenderer::Init() { gl_ParseDefs(); - RenderScene::Instance()->Init(); + mMainThread.Scene->Init(); } bool FSoftwareRenderer::UsesColormap() const @@ -178,7 +178,7 @@ void FSoftwareRenderer::RenderView(player_t *player) if (r_polyrenderer) PolyRenderer::Instance()->RenderView(player); else - RenderScene::Instance()->RenderView(player); + mMainThread.Scene->RenderView(player); FCanvasTextureInfo::UpdateAll(); } @@ -202,7 +202,7 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FileWriter *file, int wi if (r_polyrenderer) PolyRenderer::Instance()->RenderViewToCanvas(player->mo, pic, 0, 0, width, height, true); else - RenderScene::Instance()->RenderViewToCanvas (player->mo, pic, 0, 0, width, height); + mMainThread.Scene->RenderViewToCanvas (player->mo, pic, 0, 0, width, height); screen->GetFlashedPalette (palette); M_CreatePNG (file, pic->GetBuffer(), palette, SS_PAL, width, height, pic->GetPitch()); pic->Unlock (); @@ -215,7 +215,7 @@ void FSoftwareRenderer::DrawRemainingPlayerSprites() { if (!r_polyrenderer) { - RenderPlayerSprites::Instance()->RenderRemaining(); + mMainThread.PlayerSprites->RenderRemaining(); } else { @@ -237,12 +237,12 @@ bool FSoftwareRenderer::RequireGLNodes() void FSoftwareRenderer::OnModeSet () { - RenderScene::Instance()->ScreenResized(); + mMainThread.Scene->ScreenResized(); } void FSoftwareRenderer::SetClearColor(int color) { - RenderScene::Instance()->SetClearColor(color); + mMainThread.Scene->SetClearColor(color); } void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) @@ -262,7 +262,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin if (r_polyrenderer) PolyRenderer::Instance()->RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); else - RenderScene::Instance()->RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); + mMainThread.Scene->RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); R_SetFOV (savedfov); @@ -319,7 +319,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin sector_t *FSoftwareRenderer::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) { - return RenderOpaquePass::Instance()->FakeFlat(sec, tempsec, floorlightlevel, ceilinglightlevel, nullptr, 0, 0, 0, 0); + return mMainThread.OpaquePass->FakeFlat(sec, tempsec, floorlightlevel, ceilinglightlevel, nullptr, 0, 0, 0, 0); } void FSoftwareRenderer::StateChanged(AActor *actor) diff --git a/src/swrenderer/r_swrenderer.h b/src/swrenderer/r_swrenderer.h index c4d81c1d08..9cdc0c9beb 100644 --- a/src/swrenderer/r_swrenderer.h +++ b/src/swrenderer/r_swrenderer.h @@ -2,6 +2,7 @@ #pragma once #include "r_renderer.h" +#include "r_renderthread.h" struct FSoftwareRenderer : public FRenderer { @@ -41,4 +42,6 @@ struct FSoftwareRenderer : public FRenderer private: void PrecacheTexture(FTexture *tex, int cache); + + swrenderer::RenderThread mMainThread; }; diff --git a/src/swrenderer/scene/r_3dfloors.cpp b/src/swrenderer/scene/r_3dfloors.cpp index 3feb7b91c1..69618f9f44 100644 --- a/src/swrenderer/scene/r_3dfloors.cpp +++ b/src/swrenderer/scene/r_3dfloors.cpp @@ -13,15 +13,15 @@ #include "c_cvars.h" #include "r_3dfloors.h" #include "r_utility.h" +#include "swrenderer/r_renderthread.h" CVAR(Int, r_3dfloors, true, 0); namespace swrenderer { - Clip3DFloors *Clip3DFloors::Instance() + Clip3DFloors::Clip3DFloors(RenderThread *thread) { - static Clip3DFloors clip; - return &clip; + Thread = thread; } void Clip3DFloors::Cleanup() @@ -102,8 +102,8 @@ namespace swrenderer curr = (ClipStack*)M_Malloc(sizeof(ClipStack)); curr->next = 0; - memcpy(curr->floorclip, RenderOpaquePass::Instance()->floorclip, sizeof(short) * MAXWIDTH); - memcpy(curr->ceilingclip, RenderOpaquePass::Instance()->ceilingclip, sizeof(short) * MAXWIDTH); + memcpy(curr->floorclip, Thread->OpaquePass->floorclip, sizeof(short) * MAXWIDTH); + memcpy(curr->ceilingclip, Thread->OpaquePass->ceilingclip, sizeof(short) * MAXWIDTH); curr->ffloor = fakeFloor; assert(fakeFloor->floorclip == nullptr); assert(fakeFloor->ceilingclip == nullptr); diff --git a/src/swrenderer/scene/r_3dfloors.h b/src/swrenderer/scene/r_3dfloors.h index ca8f9830b6..f49d6c80a8 100644 --- a/src/swrenderer/scene/r_3dfloors.h +++ b/src/swrenderer/scene/r_3dfloors.h @@ -7,6 +7,8 @@ EXTERN_CVAR(Int, r_3dfloors); namespace swrenderer { + class RenderThread; + struct HeightLevel { double height; @@ -52,7 +54,7 @@ namespace swrenderer class Clip3DFloors { public: - static Clip3DFloors *Instance(); + Clip3DFloors(RenderThread *thread); void Cleanup(); @@ -63,6 +65,8 @@ namespace swrenderer void EnterSkybox(); void LeaveSkybox(); + RenderThread *Thread = nullptr; + int fake3D = 0; F3DFloor *fakeFloor = nullptr; diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 58e804519d..f2aca7a49f 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -44,6 +44,7 @@ #include "swrenderer/scene/r_scene.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/viewport/r_viewport.h" +#include "swrenderer/r_renderthread.h" #include "r_3dfloors.h" #include "r_portal.h" #include "a_sharedglobal.h" @@ -67,10 +68,9 @@ EXTERN_CVAR(Bool, r_drawvoxels); namespace swrenderer { - RenderOpaquePass *RenderOpaquePass::Instance() + RenderOpaquePass::RenderOpaquePass(RenderThread *thread) : renderline(thread) { - static RenderOpaquePass instance; - return &instance; + Thread = thread; } sector_t *RenderOpaquePass::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, seg_t *backline, int backx1, int backx2, double frontcz1, double frontcz2) @@ -341,7 +341,7 @@ namespace swrenderer ry1 = x1 * ViewTanCos + y1 * ViewTanSin; ry2 = x2 * ViewTanCos + y2 * ViewTanSin; - if (RenderPortal::Instance()->MirrorFlags & RF_XFLIP) + if (Thread->Portal->MirrorFlags & RF_XFLIP) { double t = -rx1; rx1 = -rx2; @@ -380,7 +380,7 @@ namespace swrenderer // Find the first clippost that touches the source post // (adjacent pixels are touching). - return RenderClipSegment::Instance()->IsVisible(sx1, sx2); + return Thread->ClipSegments->IsVisible(sx1, sx2); } void RenderOpaquePass::AddPolyobjs(subsector_t *sub) @@ -507,7 +507,7 @@ namespace swrenderer (frontsector->heightsec && !(frontsector->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC) && frontsector->heightsec->GetTexture(sector_t::floor) == skyflatnum) ? - VisiblePlaneList::Instance()->FindPlane(frontsector->ceilingplane, // killough 3/8/98 + Thread->PlaneList->FindPlane(frontsector->ceilingplane, // killough 3/8/98 frontsector->GetTexture(sector_t::ceiling), ceilinglightlevel + R_ActualExtraLight(foggy), // killough 4/11/98 frontsector->GetAlpha(sector_t::ceiling), @@ -548,7 +548,7 @@ namespace swrenderer (frontsector->heightsec && !(frontsector->heightsec->MoreFlags & SECF_IGNOREHEIGHTSEC) && frontsector->heightsec->GetTexture(sector_t::ceiling) == skyflatnum) ? - VisiblePlaneList::Instance()->FindPlane(frontsector->floorplane, + Thread->PlaneList->FindPlane(frontsector->floorplane, frontsector->GetTexture(sector_t::floor), floorlightlevel + R_ActualExtraLight(foggy), // killough 3/16/98 frontsector->GetAlpha(sector_t::floor), @@ -568,7 +568,7 @@ namespace swrenderer backupfp = floorplane; backupcp = ceilingplane; - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); // first check all floors for (int i = 0; i < (int)frontsector->e->XFloor.ffloors.Size(); i++) @@ -614,7 +614,7 @@ namespace swrenderer } ceilingplane = nullptr; - floorplane = VisiblePlaneList::Instance()->FindPlane(frontsector->floorplane, + floorplane = Thread->PlaneList->FindPlane(frontsector->floorplane, frontsector->GetTexture(sector_t::floor), floorlightlevel + R_ActualExtraLight(foggy), // killough 3/16/98 frontsector->GetAlpha(sector_t::floor), @@ -680,7 +680,7 @@ namespace swrenderer tempsec.ceilingplane.ChangeHeight(1 / 65536.); floorplane = nullptr; - ceilingplane = VisiblePlaneList::Instance()->FindPlane(frontsector->ceilingplane, // killough 3/8/98 + ceilingplane = Thread->PlaneList->FindPlane(frontsector->ceilingplane, // killough 3/8/98 frontsector->GetTexture(sector_t::ceiling), ceilinglightlevel + R_ActualExtraLight(foggy), // killough 4/11/98 frontsector->GetAlpha(sector_t::ceiling), @@ -720,7 +720,7 @@ namespace swrenderer int shade = LIGHT2SHADE((floorlightlevel + ceilinglightlevel) / 2 + R_ActualExtraLight(foggy)); for (WORD i = ParticlesInSubsec[(unsigned int)(sub - subsectors)]; i != NO_PARTICLE; i = Particles[i].snext) { - RenderParticle::Project(Particles + i, subsectors[sub - subsectors].sector, shade, FakeSide, foggy); + RenderParticle::Project(Thread, Particles + i, subsectors[sub - subsectors].sector, shade, FakeSide, foggy); } } @@ -738,7 +738,7 @@ namespace swrenderer backupcp = ceilingplane; floorplane = nullptr; ceilingplane = nullptr; - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); for (unsigned int i = 0; i < line->backsector->e->XFloor.ffloors.Size(); i++) { clip3d->fakeFloor = line->backsector->e->XFloor.ffloors[i]; @@ -886,15 +886,15 @@ namespace swrenderer if ((sprite.renderflags & RF_SPRITETYPEMASK) == RF_WALLSPRITE) { - RenderWallSprite::Project(thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, thingShade, foggy, thingColormap); + RenderWallSprite::Project(Thread, thing, sprite.pos, sprite.picnum, sprite.spriteScale, sprite.renderflags, thingShade, foggy, thingColormap); } else if (sprite.voxel) { - RenderVoxel::Project(thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, thingShade, foggy, thingColormap); + RenderVoxel::Project(Thread, thing, sprite.pos, sprite.voxel, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, thingShade, foggy, thingColormap); } else { - RenderSprite::Project(thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, thingShade, foggy, thingColormap); + RenderSprite::Project(Thread, thing, sprite.pos, sprite.tex, sprite.spriteScale, sprite.renderflags, fakeside, fakefloor, fakeceiling, sec, thingShade, foggy, thingColormap); } } } @@ -915,7 +915,7 @@ namespace swrenderer // [ZZ] Or less definitely not visible (hue) // [ZZ] 10.01.2016: don't try to clip stuff inside a skybox against the current portal. - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); if (!renderportal->CurrentPortalInSkybox && renderportal->CurrentPortal && !!P_PointOnLineSidePrecise(thing->Pos(), renderportal->CurrentPortal->dst)) return false; diff --git a/src/swrenderer/scene/r_opaque_pass.h b/src/swrenderer/scene/r_opaque_pass.h index a009cc772f..fcb6a58595 100644 --- a/src/swrenderer/scene/r_opaque_pass.h +++ b/src/swrenderer/scene/r_opaque_pass.h @@ -23,6 +23,7 @@ struct FVoxelDef; namespace swrenderer { + class RenderThread; struct VisiblePlane; // The 3072 below is just an arbitrary value picked to avoid @@ -51,7 +52,7 @@ namespace swrenderer class RenderOpaquePass { public: - static RenderOpaquePass *Instance(); + RenderOpaquePass(RenderThread *thread); void ClearClip(); void RenderScene(); @@ -62,6 +63,8 @@ namespace swrenderer short floorclip[MAXWIDTH]; short ceilingclip[MAXWIDTH]; + RenderThread *Thread = nullptr; + private: void RenderBSPNode(void *node); void RenderSubsector(subsector_t *sub); @@ -72,7 +75,7 @@ namespace swrenderer void AddSprites(sector_t *sec, int lightlevel, WaterFakeSide fakeside, bool foggy, FDynamicColormap *basecolormap); - static bool IsPotentiallyVisible(AActor *thing); + bool IsPotentiallyVisible(AActor *thing); static bool GetThingSprite(AActor *thing, ThingSprite &sprite); subsector_t *InSubsector = nullptr; diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index d6c8022554..38a9ffc1b1 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -57,6 +57,7 @@ #include "swrenderer/scene/r_light.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE) @@ -67,10 +68,9 @@ CVAR(Bool, r_skyboxes, true, 0) namespace swrenderer { - RenderPortal *RenderPortal::Instance() + RenderPortal::RenderPortal(RenderThread *thread) { - static RenderPortal renderportal; - return &renderportal; + Thread = thread; } // Draws any recorded sky boxes and then frees them. @@ -92,13 +92,13 @@ namespace swrenderer { numskyboxes = 0; - VisiblePlaneList *planes = VisiblePlaneList::Instance(); - DrawSegmentList *drawseglist = DrawSegmentList::Instance(); + VisiblePlaneList *planes = Thread->PlaneList.get(); + DrawSegmentList *drawseglist = Thread->DrawSegments.get(); if (!planes->HasPortalPlanes()) return; - Clip3DFloors::Instance()->EnterSkybox(); + Thread->Clip3DFloors->EnterSkybox(); CurrentPortalInSkybox = true; int savedextralight = extralight; @@ -112,7 +112,7 @@ namespace swrenderer { if (pl->right < pl->left || !r_skyboxes || numskyboxes == MAX_SKYBOX_PLANES || pl->portal == nullptr) { - pl->Render(OPAQUE, false, false); + pl->Render(Thread, OPAQUE, false, false); continue; } @@ -151,7 +151,7 @@ namespace swrenderer // not implemented yet default: - pl->Render(OPAQUE, false, false); + pl->Render(Thread, OPAQUE, false, false); numskyboxes--; continue; } @@ -165,12 +165,12 @@ namespace swrenderer validcount++; // Make sure we see all sprites planes->ClearKeepFakePlanes(); - RenderClipSegment::Instance()->Clear(pl->left, pl->right); + Thread->ClipSegments->Clear(pl->left, pl->right); WindowLeft = pl->left; WindowRight = pl->right; - auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; - auto floorclip = RenderOpaquePass::Instance()->floorclip; + auto ceilingclip = Thread->OpaquePass->ceilingclip; + auto floorclip = Thread->OpaquePass->floorclip; for (int i = pl->left; i < pl->right; i++) { if (pl->top[i] == 0x7fff) @@ -208,12 +208,12 @@ namespace swrenderer drawseglist->Push(draw_segment); drawseglist->PushPortal(); - VisibleSpriteList::Instance()->PushPortal(); + Thread->SpriteList->PushPortal(); viewposStack.Push(ViewPos); visplaneStack.Push(pl); - RenderOpaquePass::Instance()->RenderScene(); - Clip3DFloors::Instance()->ResetClip(); // reset clips (floor/ceiling) + Thread->OpaquePass->RenderScene(); + Thread->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) planes->Render(); port->mFlags &= ~PORTSF_INSKYBOX; @@ -228,16 +228,16 @@ namespace swrenderer // Masked textures and planes need the view coordinates restored for proper positioning. viewposStack.Pop(ViewPos); - RenderTranslucentPass::Instance()->Render(); + Thread->TranslucentPass->Render(); VisiblePlane *pl; visplaneStack.Pop(pl); if (pl->Alpha > 0 && pl->picnum != skyflatnum) { - pl->Render(pl->Alpha, pl->Additive, true); + pl->Render(Thread, pl->Alpha, pl->Additive, true); } - VisibleSpriteList::Instance()->PopPortal(); + Thread->SpriteList->PopPortal(); drawseglist->PopPortal(); } @@ -250,9 +250,9 @@ namespace swrenderer R_SetViewAngle(); CurrentPortalInSkybox = false; - Clip3DFloors::Instance()->LeaveSkybox(); + Thread->Clip3DFloors->LeaveSkybox(); - if (Clip3DFloors::Instance()->fakeActive) return; + if (Thread->Clip3DFloors->fakeActive) return; planes->ClearPortalPlanes(); } @@ -395,8 +395,8 @@ namespace swrenderer PortalDrawseg* prevpds = CurrentPortal; CurrentPortal = pds; - VisiblePlaneList::Instance()->ClearKeepFakePlanes(); - RenderClipSegment::Instance()->Clear(pds->x1, pds->x2); + Thread->PlaneList->ClearKeepFakePlanes(); + Thread->ClipSegments->Clear(pds->x1, pds->x2); WindowLeft = pds->x1; WindowRight = pds->x2; @@ -411,21 +411,21 @@ namespace swrenderer } // some portals have height differences, account for this here - Clip3DFloors::Instance()->EnterSkybox(); // push 3D floor height map + Thread->Clip3DFloors->EnterSkybox(); // push 3D floor height map CurrentPortalInSkybox = false; // first portal in a skybox should set this variable to false for proper clipping in skyboxes. // first pass, set clipping - auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; - auto floorclip = RenderOpaquePass::Instance()->floorclip; + auto ceilingclip = Thread->OpaquePass->ceilingclip; + auto floorclip = Thread->OpaquePass->floorclip; memcpy(ceilingclip + pds->x1, &pds->ceilingclip[0], pds->len * sizeof(*ceilingclip)); memcpy(floorclip + pds->x1, &pds->floorclip[0], pds->len * sizeof(*floorclip)); - RenderOpaquePass::Instance()->RenderScene(); - Clip3DFloors::Instance()->ResetClip(); // reset clips (floor/ceiling) + Thread->OpaquePass->RenderScene(); + Thread->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) if (!savedvisibility && camera) camera->renderflags &= ~RF_INVISIBLE; PlaneCycles.Clock(); - VisiblePlaneList::Instance()->Render(); + Thread->PlaneList->Render(); RenderPlanePortals(); PlaneCycles.Unclock(); @@ -444,12 +444,12 @@ namespace swrenderer NetUpdate(); MaskedCycles.Clock(); // [ZZ] count sprites in portals/mirrors along with normal ones. - RenderTranslucentPass::Instance()->Render(); // this is required since with portals there often will be cases when more than 80% of the view is inside a portal. + Thread->TranslucentPass->Render(); // this is required since with portals there often will be cases when more than 80% of the view is inside a portal. MaskedCycles.Unclock(); NetUpdate(); - Clip3DFloors::Instance()->LeaveSkybox(); // pop 3D floor height map + Thread->Clip3DFloors->LeaveSkybox(); // pop 3D floor height map CurrentPortalUniq = prevuniq2; // draw a red line around a portal if it's being highlighted @@ -529,10 +529,11 @@ namespace swrenderer WallPortals.Push(RenderMemory::NewObject(linedef, x1, x2, topclip, bottomclip)); } } - +/* ADD_STAT(skyboxes) { FString out; out.Format("%d skybox planes", swrenderer::RenderPortal::Instance()->numskyboxes); return out; } +*/ diff --git a/src/swrenderer/scene/r_portal.h b/src/swrenderer/scene/r_portal.h index f3e1b9bbef..0d65bfa749 100644 --- a/src/swrenderer/scene/r_portal.h +++ b/src/swrenderer/scene/r_portal.h @@ -17,12 +17,13 @@ namespace swrenderer { + class RenderThread; struct VisiblePlane; class RenderPortal { public: - static RenderPortal *Instance(); + RenderPortal(RenderThread *thread); void SetMainPortal(); void CopyStackedViewParameters(); @@ -31,6 +32,8 @@ namespace swrenderer void RenderLinePortals(); void AddLinePortal(line_t *linedef, int x1, int x2, const short *topclip, const short *bottomclip); + + RenderThread *Thread = nullptr; int WindowLeft = 0; int WindowRight = 0; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index fae0762c6a..7e8d20f783 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -47,6 +47,7 @@ #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_thread.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, r_clearbuffer) @@ -55,10 +56,9 @@ namespace swrenderer { cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; - RenderScene *RenderScene::Instance() + RenderScene::RenderScene(RenderThread *thread) { - static RenderScene instance; - return &instance; + Thread = thread; } void RenderScene::SetClearColor(int color) @@ -115,7 +115,7 @@ namespace swrenderer RenderMemory::Clear(); - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); clip3d->Cleanup(); clip3d->ResetClip(); // reset clips (floor/ceiling) @@ -123,25 +123,25 @@ namespace swrenderer CameraLight::Instance()->SetCamera(actor); RenderViewport::Instance()->SetupFreelook(); - RenderPortal::Instance()->CopyStackedViewParameters(); + Thread->Portal->CopyStackedViewParameters(); // Clear buffers. - RenderClipSegment::Instance()->Clear(0, viewwidth); - DrawSegmentList::Instance()->Clear(); - VisiblePlaneList::Instance()->Clear(); - RenderTranslucentPass::Instance()->Clear(); + Thread->ClipSegments->Clear(0, viewwidth); + Thread->DrawSegments->Clear(); + Thread->PlaneList->Clear(); + Thread->TranslucentPass->Clear(); // opening / clipping determination - RenderOpaquePass::Instance()->ClearClip(); + Thread->OpaquePass->ClearClip(); NetUpdate(); - RenderPortal::Instance()->SetMainPortal(); + Thread->Portal->SetMainPortal(); this->dontmaplines = dontmaplines; // [RH] Hack to make windows into underwater areas possible - RenderOpaquePass::Instance()->ResetFakingUnderwater(); + Thread->OpaquePass->ResetFakingUnderwater(); // [RH] Setup particles for this frame P_FindParticleSubsectors(); @@ -155,8 +155,8 @@ namespace swrenderer } // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function PO_LinkToSubsectors(); - RenderOpaquePass::Instance()->RenderScene(); - Clip3DFloors::Instance()->ResetClip(); // reset clips (floor/ceiling) + Thread->OpaquePass->RenderScene(); + Thread->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) camera->renderflags = savedflags; WallCycles.Unclock(); @@ -165,16 +165,16 @@ namespace swrenderer if (viewactive) { PlaneCycles.Clock(); - VisiblePlaneList::Instance()->Render(); - RenderPortal::Instance()->RenderPlanePortals(); + Thread->PlaneList->Render(); + Thread->Portal->RenderPlanePortals(); PlaneCycles.Unclock(); - RenderPortal::Instance()->RenderLinePortals(); + Thread->Portal->RenderLinePortals(); NetUpdate(); MaskedCycles.Clock(); - RenderTranslucentPass::Instance()->Render(); + Thread->TranslucentPass->Render(); MaskedCycles.Unclock(); NetUpdate(); @@ -237,7 +237,6 @@ namespace swrenderer void RenderScene::Init() { - atterm([]() { RenderScene::Instance()->Deinit(); }); // viewwidth / viewheight are set by the defaults fillshort(zeroarray, MAXWIDTH, 0); @@ -246,8 +245,8 @@ namespace swrenderer void RenderScene::Deinit() { - RenderTranslucentPass::Instance()->Deinit(); - Clip3DFloors::Instance()->Cleanup(); + Thread->TranslucentPass->Deinit(); + Thread->Clip3DFloors->Cleanup(); } ///////////////////////////////////////////////////////////////////////// diff --git a/src/swrenderer/scene/r_scene.h b/src/swrenderer/scene/r_scene.h index 599ef55fbe..231fd7a4a1 100644 --- a/src/swrenderer/scene/r_scene.h +++ b/src/swrenderer/scene/r_scene.h @@ -22,11 +22,13 @@ extern cycle_t FrameCycles; namespace swrenderer { extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; + + class RenderThread; class RenderScene { public: - static RenderScene *Instance(); + RenderScene(RenderThread *thread); void Init(); void ScreenResized(); @@ -39,6 +41,8 @@ namespace swrenderer bool DontMapLines() const { return dontmaplines; } + RenderThread *Thread = nullptr; + private: void RenderActorView(AActor *actor, bool dontmaplines = false); diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index dcf99f3f7e..d102d6c187 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -38,6 +38,7 @@ #include "swrenderer/line/r_renderdrawsegment.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" EXTERN_CVAR(Int, r_drawfuzz) EXTERN_CVAR(Bool, r_drawvoxels) @@ -47,10 +48,9 @@ CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG namespace swrenderer { - RenderTranslucentPass *RenderTranslucentPass::Instance() + RenderTranslucentPass::RenderTranslucentPass(RenderThread *thread) { - static RenderTranslucentPass instance; - return &instance; + Thread = thread; } void RenderTranslucentPass::Deinit() @@ -60,7 +60,7 @@ namespace swrenderer void RenderTranslucentPass::Clear() { - VisibleSpriteList::Instance()->Clear(); + Thread->SpriteList->Clear(); } void RenderTranslucentPass::CollectPortals() @@ -71,7 +71,7 @@ namespace swrenderer // a) exit early if no relevant info is found and // b) skip most of the collected drawsegs which have no portal attached. portaldrawsegs.Clear(); - DrawSegmentList *drawseglist = DrawSegmentList::Instance(); + DrawSegmentList *drawseglist = Thread->DrawSegments.get(); for (auto index = drawseglist->BeginIndex(); index != drawseglist->EndIndex(); index++) { DrawSegment *seg = drawseglist->Segment(index); @@ -98,7 +98,7 @@ namespace swrenderer bool RenderTranslucentPass::ClipSpriteColumnWithPortals(int x, VisibleSprite *spr) { - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); // [ZZ] 10.01.2016: don't clip sprites from the root of a skybox. if (renderportal->CurrentPortalInSkybox) @@ -126,14 +126,14 @@ namespace swrenderer void RenderTranslucentPass::DrawMaskedSingle(bool renew) { - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = Thread->Portal.get(); - auto &sortedSprites = VisibleSpriteList::Instance()->SortedSprites; + auto &sortedSprites = Thread->SpriteList->SortedSprites; for (int i = sortedSprites.Size(); i > 0; i--) { if (sortedSprites[i - 1]->IsCurrentPortalUniq(renderportal->CurrentPortalUniq)) { - sortedSprites[i - 1]->Render(); + sortedSprites[i - 1]->Render(Thread); } } @@ -141,10 +141,10 @@ namespace swrenderer if (renew) { - Clip3DFloors::Instance()->fake3D |= FAKE3D_REFRESHCLIP; + Thread->Clip3DFloors->fake3D |= FAKE3D_REFRESHCLIP; } - DrawSegmentList *drawseglist = DrawSegmentList::Instance(); + DrawSegmentList *drawseglist = Thread->DrawSegments.get(); for (auto index = drawseglist->BeginIndex(); index != drawseglist->EndIndex(); index++) { DrawSegment *ds = drawseglist->Segment(index); @@ -156,7 +156,7 @@ namespace swrenderer if (ds->fake) continue; if (ds->maskedtexturecol != nullptr || ds->bFogBoundary) { - RenderDrawSegment renderer; + RenderDrawSegment renderer(Thread); renderer.Render(ds, ds->x1, ds->x2); } } @@ -165,9 +165,9 @@ namespace swrenderer void RenderTranslucentPass::Render() { CollectPortals(); - VisibleSpriteList::Instance()->Sort(); + Thread->SpriteList->Sort(); - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); if (clip3d->height_top == nullptr) { // kg3D - no visible 3D floors, normal rendering DrawMaskedSingle(false); @@ -188,7 +188,7 @@ namespace swrenderer } clip3d->sclipBottom = hl->height; DrawMaskedSingle(true); - VisiblePlaneList::Instance()->RenderHeight(hl->height); + Thread->PlaneList->RenderHeight(hl->height); } // floors @@ -197,7 +197,7 @@ namespace swrenderer DrawMaskedSingle(true); for (HeightLevel *hl = clip3d->height_top; hl != nullptr && hl->height < ViewPos.Z; hl = hl->next) { - VisiblePlaneList::Instance()->RenderHeight(hl->height); + Thread->PlaneList->RenderHeight(hl->height); if (hl->next) { clip3d->fake3D = FAKE3D_DOWN2UP | FAKE3D_CLIPTOP | FAKE3D_CLIPBOTTOM; @@ -214,6 +214,6 @@ namespace swrenderer clip3d->fake3D = 0; } - RenderPlayerSprites::Instance()->Render(); + Thread->PlayerSprites->Render(); } } diff --git a/src/swrenderer/scene/r_translucent_pass.h b/src/swrenderer/scene/r_translucent_pass.h index 64695e0db8..44551b2867 100644 --- a/src/swrenderer/scene/r_translucent_pass.h +++ b/src/swrenderer/scene/r_translucent_pass.h @@ -22,13 +22,14 @@ struct FVoxel; namespace swrenderer { + class RenderThread; class VisibleSprite; struct DrawSegment; class RenderTranslucentPass { public: - static RenderTranslucentPass *Instance(); + RenderTranslucentPass(RenderThread *thread); void Deinit(); void Clear(); @@ -36,6 +37,8 @@ namespace swrenderer bool ClipSpriteColumnWithPortals(int x, VisibleSprite *spr); + RenderThread *Thread = nullptr; + private: void CollectPortals(); void DrawMaskedSingle(bool renew); diff --git a/src/swrenderer/segments/r_clipsegment.cpp b/src/swrenderer/segments/r_clipsegment.cpp index 517d2ee07e..6cfaaa2a0e 100644 --- a/src/swrenderer/segments/r_clipsegment.cpp +++ b/src/swrenderer/segments/r_clipsegment.cpp @@ -34,12 +34,6 @@ namespace swrenderer { - RenderClipSegment *RenderClipSegment::Instance() - { - static RenderClipSegment instance; - return &instance; - } - void RenderClipSegment::Clear(short left, short right) { solidsegs[0].first = -0x7fff; diff --git a/src/swrenderer/segments/r_clipsegment.h b/src/swrenderer/segments/r_clipsegment.h index 33ec0525de..b2fde462e4 100644 --- a/src/swrenderer/segments/r_clipsegment.h +++ b/src/swrenderer/segments/r_clipsegment.h @@ -20,8 +20,6 @@ namespace swrenderer class RenderClipSegment { public: - static RenderClipSegment *Instance(); - void Clear(short left, short right); bool Clip(int x1, int x2, bool solid, VisibleSegmentCallback callback); bool Check(int first, int last); diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index c4a03ac000..f84d48a7a1 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -43,10 +43,9 @@ namespace swrenderer { - DrawSegmentList *DrawSegmentList::Instance() + DrawSegmentList::DrawSegmentList(RenderThread *thread) { - static DrawSegmentList instance; - return &instance; + Thread = thread; } void DrawSegmentList::Clear() diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index e0710619ac..2fa41cd940 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -50,7 +50,7 @@ namespace swrenderer class DrawSegmentList { public: - static DrawSegmentList *Instance(); + DrawSegmentList(RenderThread *thread); unsigned int BeginIndex() const { return StartIndices.Last(); } unsigned int EndIndex() const { return Segments.Size(); } @@ -66,6 +66,8 @@ namespace swrenderer void Push(DrawSegment *segment); void PushInteresting(DrawSegment *segment); + RenderThread *Thread = nullptr; + private: TArray Segments; TArray StartIndices; diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 098de1dc20..6e3fdb7618 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -43,16 +43,17 @@ #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/viewport/r_spritedrawer.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void RenderDecal::RenderDecals(side_t *sidedef, DrawSegment *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom) + void RenderDecal::RenderDecals(RenderThread *thread, side_t *sidedef, DrawSegment *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom) { for (DBaseDecal *decal = sidedef->AttachedDecals; decal != NULL; decal = decal->WallNext) { - Render(sidedef, decal, draw_segment, wallshade, lightleft, lightstep, curline, wallC, foggy, basecolormap, walltop, wallbottom, 0); + Render(thread, sidedef, decal, draw_segment, wallshade, lightleft, lightstep, curline, wallC, foggy, basecolormap, walltop, wallbottom, 0); } } @@ -60,7 +61,7 @@ namespace swrenderer // = 1: drawing masked textures (including sprites) // Currently, only pass = 0 is done or used - void RenderDecal::Render(side_t *wall, DBaseDecal *decal, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &savecoord, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass) + void RenderDecal::Render(RenderThread *thread, side_t *wall, DBaseDecal *decal, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &savecoord, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass) { DVector2 decal_left, decal_right, decal_pos; int x1, x2; @@ -150,7 +151,7 @@ namespace swrenderer double texturemid; FWallCoords WallC; - if (WallC.Init(decal_left, decal_right, TOO_CLOSE_Z)) + if (WallC.Init(thread, decal_left, decal_right, TOO_CLOSE_Z)) return; x1 = WallC.sx1; @@ -160,7 +161,7 @@ namespace swrenderer return; FWallTmapVals WallT; - WallT.InitFromWallCoords(&WallC); + WallT.InitFromWallCoords(thread, &WallC); // Get the top and bottom clipping arrays switch (decal->RenderFlags & RF_CLIPMASK) @@ -178,7 +179,7 @@ namespace swrenderer else if (pass == 0) { mceilingclip = walltop; - mfloorclip = RenderOpaquePass::Instance()->ceilingclip; + mfloorclip = thread->OpaquePass->ceilingclip; needrepeat = 1; } else @@ -194,7 +195,7 @@ namespace swrenderer return; } mceilingclip = walltop; - mfloorclip = RenderOpaquePass::Instance()->ceilingclip; + mfloorclip = thread->OpaquePass->ceilingclip; break; case RF_CLIPMID: @@ -211,7 +212,7 @@ namespace swrenderer { return; } - mceilingclip = RenderOpaquePass::Instance()->floorclip; + mceilingclip = thread->OpaquePass->floorclip; mfloorclip = wallbottom; break; } @@ -302,7 +303,7 @@ namespace swrenderer { // calculate lighting drawerargs.SetLight(usecolormap, light, wallshade); } - DrawColumn(drawerargs, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + DrawColumn(thread, drawerargs, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } @@ -311,12 +312,12 @@ namespace swrenderer // If this sprite is RF_CLIPFULL on a two-sided line, needrepeat will // be set 1 if we need to draw on the lower wall. In all other cases, // needrepeat will be 0, and the while will fail. - mceilingclip = RenderOpaquePass::Instance()->floorclip; + mceilingclip = thread->OpaquePass->floorclip; mfloorclip = wallbottom; } while (needrepeat--); } - void RenderDecal::DrawColumn(SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderDecal::DrawColumn(RenderThread *thread, SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { auto viewport = RenderViewport::Instance(); diff --git a/src/swrenderer/things/r_decal.h b/src/swrenderer/things/r_decal.h index 4d14a85c91..4694014b75 100644 --- a/src/swrenderer/things/r_decal.h +++ b/src/swrenderer/things/r_decal.h @@ -25,10 +25,10 @@ namespace swrenderer class RenderDecal { public: - static void RenderDecals(side_t *wall, DrawSegment *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom); + static void RenderDecals(RenderThread *thread, side_t *wall, DrawSegment *draw_segment, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom); private: - static void Render(side_t *wall, DBaseDecal *first, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass); - static void DrawColumn(SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void Render(RenderThread *thread, side_t *wall, DBaseDecal *first, DrawSegment *clipper, int wallshade, float lightleft, float lightstep, seg_t *curline, const FWallCoords &wallC, bool foggy, FDynamicColormap *basecolormap, const short *walltop, const short *wallbottom, int pass); + static void DrawColumn(RenderThread *thread, SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); }; } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 2b2eae50e2..7d26723687 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -55,12 +55,13 @@ #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_pal.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void RenderParticle::Project(particle_t *particle, const sector_t *sector, int shade, WaterFakeSide fakeside, bool foggy) + void RenderParticle::Project(RenderThread *thread, particle_t *particle, const sector_t *sector, int shade, WaterFakeSide fakeside, bool foggy) { double tr_x, tr_y; double tx, ty; @@ -69,7 +70,7 @@ namespace swrenderer int x1, x2, y1, y2; sector_t* heightsec = NULL; - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = thread->Portal.get(); // [ZZ] Particle not visible through the portal plane if (renderportal->CurrentPortal && !!P_PointOnLineSide(particle->Pos, renderportal->CurrentPortal->dst)) @@ -120,8 +121,8 @@ namespace swrenderer // entered, we don't need to clip it to drawsegs like a normal sprite. // Clip particles behind walls. - auto ceilingclip = RenderOpaquePass::Instance()->ceilingclip; - auto floorclip = RenderOpaquePass::Instance()->floorclip; + auto ceilingclip = thread->OpaquePass->ceilingclip; + auto floorclip = thread->OpaquePass->floorclip; if (y1 < ceilingclip[x1]) y1 = ceilingclip[x1]; if (y1 < ceilingclip[x2 - 1]) y1 = ceilingclip[x2 - 1]; if (y2 >= floorclip[x1]) y2 = floorclip[x1] - 1; @@ -205,10 +206,10 @@ namespace swrenderer vis->Light.SetColormap(tiz * LightVisibility::Instance()->ParticleGlobVis(), shade, map, particle->bright != 0, false, false); - VisibleSpriteList::Instance()->Push(vis); + thread->SpriteList->Push(vis); } - void RenderParticle::Render(short *cliptop, short *clipbottom, int minZ, int maxZ) + void RenderParticle::Render(RenderThread *thread, short *cliptop, short *clipbottom, int minZ, int maxZ) { auto vis = this; @@ -222,7 +223,7 @@ namespace swrenderer if (ycount <= 0 || countbase <= 0) return; - DrawMaskedSegsBehindParticle(); + DrawMaskedSegsBehindParticle(thread); uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Light.ColormapNum << FRACBITS))); @@ -237,7 +238,7 @@ namespace swrenderer uint32_t fracstepx = PARTICLE_TEXTURE_SIZE * FRACUNIT / countbase; uint32_t fracposx = fracstepx / 2; - RenderTranslucentPass *translucentPass = RenderTranslucentPass::Instance(); + RenderTranslucentPass *translucentPass = thread->TranslucentPass.get(); if (viewport->RenderTarget->IsBgra()) { @@ -261,11 +262,11 @@ namespace swrenderer } } - void RenderParticle::DrawMaskedSegsBehindParticle() + void RenderParticle::DrawMaskedSegsBehindParticle(RenderThread *thread) { // Draw any masked textures behind this particle so that when the // particle is drawn, it will be in front of them. - DrawSegmentList *segmentlist = DrawSegmentList::Instance(); + DrawSegmentList *segmentlist = thread->DrawSegments.get(); for (unsigned int index = segmentlist->BeginInterestingIndex(); index != segmentlist->EndInterestingIndex(); index++) { DrawSegment *ds = segmentlist->InterestingSegment(index); @@ -281,7 +282,7 @@ namespace swrenderer // [ZZ] only draw stuff that's inside the same portal as the particle, other portals will care for themselves if (ds->CurrentPortalUniq == CurrentPortalUniq) { - RenderDrawSegment renderer; + RenderDrawSegment renderer(thread); renderer.Render(ds, MAX(ds->x1, x1), MIN(ds->x2, x2)); } } diff --git a/src/swrenderer/things/r_particle.h b/src/swrenderer/things/r_particle.h index 2115220749..af2f59e6cc 100644 --- a/src/swrenderer/things/r_particle.h +++ b/src/swrenderer/things/r_particle.h @@ -23,14 +23,14 @@ namespace swrenderer class RenderParticle : public VisibleSprite { public: - static void Project(particle_t *, const sector_t *sector, int shade, WaterFakeSide fakeside, bool foggy); + static void Project(RenderThread *thread, particle_t *, const sector_t *sector, int shade, WaterFakeSide fakeside, bool foggy); protected: bool IsParticle() const override { return true; } - void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; + void Render(RenderThread *thread, short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: - void DrawMaskedSegsBehindParticle(); + void DrawMaskedSegsBehindParticle(RenderThread *thread); fixed_t xscale = 0; fixed_t startfrac = 0; // horizontal position of x1 diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 0b8661e528..3521f115fe 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -55,6 +55,7 @@ #include "swrenderer/things/r_sprite.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" #include "g_levellocals.h" EXTERN_CVAR(Bool, st_scale) @@ -65,10 +66,9 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - RenderPlayerSprites *RenderPlayerSprites::Instance() + RenderPlayerSprites::RenderPlayerSprites(RenderThread *thread) { - static RenderPlayerSprites instance; - return &instance; + Thread = thread; } void RenderPlayerSprites::Render() @@ -121,7 +121,7 @@ namespace swrenderer else { // This used to use camera->Sector but due to interpolation that can be incorrect // when the interpolated viewpoint is in a different sector than the camera. - sec = RenderOpaquePass::Instance()->FakeFlat(viewsector, &tempsec, &floorlight, &ceilinglight, nullptr, 0, 0, 0, 0); + sec = Thread->OpaquePass->FakeFlat(viewsector, &tempsec, &floorlight, &ceilinglight, nullptr, 0, 0, 0, 0); // [RH] set basecolormap basecolormap = sec->ColorMap; diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index 7c821f7425..0a3d8903d1 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -74,11 +74,13 @@ namespace swrenderer class RenderPlayerSprites { public: - static RenderPlayerSprites *Instance(); + RenderPlayerSprites(RenderThread *thread); void Render(); void RenderRemaining(); + RenderThread *Thread = nullptr; + private: void RenderSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double wx, double wy, double ticfrac, int spriteshade, FDynamicColormap *basecolormap, bool foggy); diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 897af41cd4..6b02a0ec1e 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -55,12 +55,13 @@ #include "swrenderer/things/r_sprite.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - void RenderSprite::Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap) + void RenderSprite::Project(RenderThread *thread, AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap) { // transform the origin point double tr_x = pos.X - ViewPos.X; @@ -75,7 +76,7 @@ namespace swrenderer double tx = tr_x * ViewSin - tr_y * ViewCos; // [RH] Flip for mirrors - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = thread->Portal.get(); if (renderportal->MirrorFlags & RF_XFLIP) { tx = -tx; @@ -227,10 +228,10 @@ namespace swrenderer vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis() / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); - VisibleSpriteList::Instance()->Push(vis); + thread->SpriteList->Push(vis); } - void RenderSprite::Render(short *mfloorclip, short *mceilingclip, int, int) + void RenderSprite::Render(RenderThread *thread, short *mfloorclip, short *mceilingclip, int, int) { auto vis = this; @@ -285,7 +286,7 @@ namespace swrenderer if (x < x2) { - RenderTranslucentPass *translucentPass = RenderTranslucentPass::Instance(); + RenderTranslucentPass *translucentPass = thread->TranslucentPass.get(); while (x < x2) { diff --git a/src/swrenderer/things/r_sprite.h b/src/swrenderer/things/r_sprite.h index b6298d571b..776af54fa8 100644 --- a/src/swrenderer/things/r_sprite.h +++ b/src/swrenderer/things/r_sprite.h @@ -20,10 +20,10 @@ namespace swrenderer class RenderSprite : public VisibleSprite { public: - static void Project(AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap); + static void Project(RenderThread *thread, AActor *thing, const DVector3 &pos, FTexture *tex, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap); protected: - void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; + void Render(RenderThread *thread, short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: fixed_t xscale = 0; diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 7d19e14607..ac411db573 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -35,12 +35,13 @@ #include "swrenderer/scene/r_light.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void VisibleSprite::Render() + void VisibleSprite::Render(RenderThread *thread) { static short clipbot[MAXWIDTH]; static short cliptop[MAXWIDTH]; @@ -56,7 +57,7 @@ namespace swrenderer int colormapnum = spr->Light.ColormapNum; F3DFloor *rover; - Clip3DFloors *clip3d = Clip3DFloors::Instance(); + Clip3DFloors *clip3d = thread->Clip3DFloors.get(); // [RH] Check for particles if (spr->IsParticle()) @@ -65,7 +66,7 @@ namespace swrenderer if ((clip3d->fake3D & FAKE3D_CLIPBOTTOM) && spr->gpos.Z <= clip3d->sclipBottom) return; if ((clip3d->fake3D & FAKE3D_CLIPTOP) && spr->gpos.Z >= clip3d->sclipTop) return; - spr->Render(nullptr, nullptr, 0, 0); + spr->Render(thread, nullptr, nullptr, 0, 0); return; } @@ -279,7 +280,7 @@ namespace swrenderer // Scan drawsegs from end to start for obscuring segs. // The first drawseg that is closer than the sprite is the clip seg. - DrawSegmentList *segmentlist = DrawSegmentList::Instance(); + DrawSegmentList *segmentlist = thread->DrawSegments.get(); for (unsigned int index = segmentlist->BeginIndex(); index != segmentlist->EndIndex(); index++) { DrawSegment *ds = segmentlist->Segment(index); @@ -322,13 +323,13 @@ namespace swrenderer (spr->gpos.Y - ds->curline->v1->fY()) * (ds->curline->v2->fX() - ds->curline->v1->fX()) - (spr->gpos.X - ds->curline->v1->fX()) * (ds->curline->v2->fY() - ds->curline->v1->fY()) <= 0)) { - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = thread->Portal.get(); // seg is behind sprite, so draw the mid texture if it has one if (ds->CurrentPortalUniq == renderportal->CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here (ds->maskedtexturecol != nullptr || ds->bFogBoundary)) { - RenderDrawSegment renderer; + RenderDrawSegment renderer(thread); renderer.Render(ds, r1, r2); } @@ -373,7 +374,7 @@ namespace swrenderer if (!spr->IsVoxel()) { - spr->Render(clipbot, cliptop, 0, 0); + spr->Render(thread, clipbot, cliptop, 0, 0); } else { @@ -406,7 +407,7 @@ namespace swrenderer } int minvoxely = spr->gzt <= hzt ? 0 : xs_RoundToInt((spr->gzt - hzt) / spr->yscale); int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); - spr->Render(cliptop, clipbot, minvoxely, maxvoxely); + spr->Render(thread, cliptop, clipbot, minvoxely, maxvoxely); } spr->Light.BaseColormap = colormap; spr->Light.ColormapNum = colormapnum; diff --git a/src/swrenderer/things/r_visiblesprite.h b/src/swrenderer/things/r_visiblesprite.h index 87abe767a4..7bd5b9de2e 100644 --- a/src/swrenderer/things/r_visiblesprite.h +++ b/src/swrenderer/things/r_visiblesprite.h @@ -22,13 +22,15 @@ namespace swrenderer { + class RenderThread; + class VisibleSprite { public: VisibleSprite() { RenderStyle = STYLE_Normal; } virtual ~VisibleSprite() { } - void Render(); + void Render(RenderThread *thread); bool IsCurrentPortalUniq(int portalUniq) const { return CurrentPortalUniq == portalUniq; } const FVector3 &WorldPos() const { return gpos; } @@ -41,7 +43,7 @@ namespace swrenderer virtual bool IsVoxel() const { return false; } virtual bool IsWallSprite() const { return false; } - virtual void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) = 0; + virtual void Render(RenderThread *thread, short *cliptop, short *clipbottom, int minZ, int maxZ) = 0; FTexture *pic = nullptr; diff --git a/src/swrenderer/things/r_visiblespritelist.cpp b/src/swrenderer/things/r_visiblespritelist.cpp index 76706d7bc9..eac6c91f30 100644 --- a/src/swrenderer/things/r_visiblespritelist.cpp +++ b/src/swrenderer/things/r_visiblespritelist.cpp @@ -28,12 +28,6 @@ namespace swrenderer { - VisibleSpriteList *VisibleSpriteList::Instance() - { - static VisibleSpriteList instance; - return &instance; - } - void VisibleSpriteList::Clear() { Sprites.Clear(); diff --git a/src/swrenderer/things/r_visiblespritelist.h b/src/swrenderer/things/r_visiblespritelist.h index 99370c2503..2f53514fa3 100644 --- a/src/swrenderer/things/r_visiblespritelist.h +++ b/src/swrenderer/things/r_visiblespritelist.h @@ -21,8 +21,6 @@ namespace swrenderer class VisibleSpriteList { public: - static VisibleSpriteList *Instance(); - void Clear(); void PushPortal(); void PopPortal(); diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index e3e101c4a4..0a2d8e074e 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -43,12 +43,13 @@ #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/viewport/r_spritedrawer.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - void RenderVoxel::Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap) + void RenderVoxel::Project(RenderThread *thread, AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap) { // transform the origin point double tr_x = pos.X - ViewPos.X; @@ -58,7 +59,7 @@ namespace swrenderer double tx = tr_x * ViewSin - tr_y * ViewCos; // [RH] Flip for mirrors - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = thread->Portal.get(); if (renderportal->MirrorFlags & RF_XFLIP) { tx = -tx; @@ -178,10 +179,10 @@ namespace swrenderer vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis() / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); - VisibleSpriteList::Instance()->Push(vis, true); + thread->SpriteList->Push(vis, true); } - void RenderVoxel::Render(short *cliptop, short *clipbottom, int minZ, int maxZ) + void RenderVoxel::Render(RenderThread *thread, short *cliptop, short *clipbottom, int minZ, int maxZ) { auto sprite = this; auto viewport = RenderViewport::Instance(); diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index 7d5fd621f0..0bca0fb87c 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -58,13 +58,13 @@ namespace swrenderer class RenderVoxel : public VisibleSprite { public: - static void Project(AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap); + static void Project(RenderThread *thread, AActor *thing, DVector3 pos, FVoxelDef *voxel, const DVector2 &spriteScale, int renderflags, WaterFakeSide fakeside, F3DFloor *fakefloor, F3DFloor *fakeceiling, sector_t *current_sector, int spriteshade, bool foggy, FDynamicColormap *basecolormap); static void Deinit(); protected: bool IsVoxel() const override { return true; } - void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; + void Render(RenderThread *thread, short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: struct posang diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 8c813f479e..8b0dc16cc7 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -57,12 +57,13 @@ #include "swrenderer/line/r_walldraw.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); namespace swrenderer { - void RenderWallSprite::Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade, bool foggy, FDynamicColormap *basecolormap) + void RenderWallSprite::Project(RenderThread *thread, AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade, bool foggy, FDynamicColormap *basecolormap) { FWallCoords wallc; double x1, x2; @@ -87,10 +88,10 @@ namespace swrenderer right.Y = right.Y + x2 * angsin; // Is it off-screen? - if (wallc.Init(left, right, TOO_CLOSE_Z)) + if (wallc.Init(thread, left, right, TOO_CLOSE_Z)) return; - RenderPortal *renderportal = RenderPortal::Instance(); + RenderPortal *renderportal = thread->Portal.get(); if (wallc.sx1 >= renderportal->WindowRight || wallc.sx2 <= renderportal->WindowLeft) return; @@ -134,10 +135,10 @@ namespace swrenderer vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis() / MAX(tz, MINZ), spriteshade, basecolormap, false, false, false); - VisibleSpriteList::Instance()->Push(vis); + thread->SpriteList->Push(vis); } - void RenderWallSprite::Render(short *mfloorclip, short *mceilingclip, int, int) + void RenderWallSprite::Render(RenderThread *thread, short *mfloorclip, short *mceilingclip, int, int) { auto spr = this; @@ -151,7 +152,7 @@ namespace swrenderer return; FWallTmapVals WallT; - WallT.InitFromWallCoords(&spr->wallc); + WallT.InitFromWallCoords(thread, &spr->wallc); ProjectedWallTexcoords walltexcoords; walltexcoords.Project(spr->pic->GetWidth() << FRACBITS, x1, x2, WallT); @@ -229,7 +230,7 @@ namespace swrenderer } else { - RenderTranslucentPass *translucentPass = RenderTranslucentPass::Instance(); + RenderTranslucentPass *translucentPass = thread->TranslucentPass.get(); while (x < x2) { diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index 1a0c87b70e..796a9a2bdc 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -23,11 +23,11 @@ namespace swrenderer class RenderWallSprite : public VisibleSprite { public: - static void Project(AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade, bool foggy, FDynamicColormap *basecolormap); + static void Project(RenderThread *thread, AActor *thing, const DVector3 &pos, FTextureID picnum, const DVector2 &scale, int renderflags, int spriteshade, bool foggy, FDynamicColormap *basecolormap); protected: bool IsWallSprite() const override { return true; } - void Render(short *cliptop, short *clipbottom, int minZ, int maxZ) override; + void Render(RenderThread *thread, short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: static void DrawColumn(SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); From 45f623faf45419567970142c0a7adf3c38ba8918 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 4 Feb 2017 02:50:52 +0100 Subject: [PATCH 809/912] Move frame memory allocator into RenderThread --- src/swrenderer/line/r_line.cpp | 20 ++++++++++---------- src/swrenderer/plane/r_visibleplane.cpp | 11 ++++++----- src/swrenderer/plane/r_visibleplane.h | 4 ++-- src/swrenderer/plane/r_visibleplanelist.cpp | 2 +- src/swrenderer/r_memory.cpp | 3 --- src/swrenderer/r_memory.h | 12 ++++++------ src/swrenderer/r_renderthread.cpp | 2 ++ src/swrenderer/r_renderthread.h | 2 ++ src/swrenderer/scene/r_opaque_pass.cpp | 8 ++++---- src/swrenderer/scene/r_portal.cpp | 8 ++++---- src/swrenderer/scene/r_scene.cpp | 2 +- src/swrenderer/segments/r_portalsegment.cpp | 7 ++++--- src/swrenderer/segments/r_portalsegment.h | 4 +++- src/swrenderer/things/r_particle.cpp | 2 +- src/swrenderer/things/r_sprite.cpp | 2 +- src/swrenderer/things/r_voxel.cpp | 2 +- src/swrenderer/things/r_wallsprite.cpp | 2 +- 17 files changed, 49 insertions(+), 44 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 156c2f3af9..17a4758621 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -335,7 +335,7 @@ namespace swrenderer RenderPortal *renderportal = Thread->Portal.get(); - DrawSegment *draw_segment = RenderMemory::NewObject(); + DrawSegment *draw_segment = Thread->FrameMemory->NewObject(); Thread->DrawSegments->Push(draw_segment); draw_segment->CurrentPortalUniq = renderportal->CurrentPortalUniq; @@ -373,8 +373,8 @@ namespace swrenderer } else if (backsector == NULL) { - draw_segment->sprtopclip = RenderMemory::AllocMemory(stop - start); - draw_segment->sprbottomclip = RenderMemory::AllocMemory(stop - start); + draw_segment->sprtopclip = Thread->FrameMemory->AllocMemory(stop - start); + draw_segment->sprbottomclip = Thread->FrameMemory->AllocMemory(stop - start); fillshort(draw_segment->sprtopclip, stop - start, viewheight); memset(draw_segment->sprbottomclip, -1, (stop - start) * sizeof(short)); draw_segment->silhouette = SIL_BOTH; @@ -407,13 +407,13 @@ namespace swrenderer { if (doorclosed || (rw_backcz1 <= rw_frontfz1 && rw_backcz2 <= rw_frontfz2)) { - draw_segment->sprbottomclip = RenderMemory::AllocMemory(stop - start); + draw_segment->sprbottomclip = Thread->FrameMemory->AllocMemory(stop - start); memset(draw_segment->sprbottomclip, -1, (stop - start) * sizeof(short)); draw_segment->silhouette |= SIL_BOTTOM; } if (doorclosed || (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) { // killough 1/17/98, 2/8/98 - draw_segment->sprtopclip = RenderMemory::AllocMemory(stop - start); + draw_segment->sprtopclip = Thread->FrameMemory->AllocMemory(stop - start); fillshort(draw_segment->sprtopclip, stop - start, viewheight); draw_segment->silhouette |= SIL_TOP; } @@ -453,7 +453,7 @@ namespace swrenderer maskedtexture = true; // kg3D - backup for mid and fake walls - draw_segment->bkup = RenderMemory::AllocMemory(stop - start); + draw_segment->bkup = Thread->FrameMemory->AllocMemory(stop - start); memcpy(draw_segment->bkup, &Thread->OpaquePass->ceilingclip[start], sizeof(short)*(stop - start)); draw_segment->bFogBoundary = IsFogBoundary(frontsector, backsector); @@ -462,8 +462,8 @@ namespace swrenderer if (sidedef->GetTexture(side_t::mid).isValid()) draw_segment->bFakeBoundary |= 4; // it is also mid texture - draw_segment->maskedtexturecol = RenderMemory::AllocMemory(stop - start); - draw_segment->swall = RenderMemory::AllocMemory(stop - start); + draw_segment->maskedtexturecol = Thread->FrameMemory->AllocMemory(stop - start); + draw_segment->swall = Thread->FrameMemory->AllocMemory(stop - start); lwal = draw_segment->maskedtexturecol; swal = draw_segment->swall; @@ -563,13 +563,13 @@ namespace swrenderer // save sprite clipping info if (((draw_segment->silhouette & SIL_TOP) || maskedtexture) && draw_segment->sprtopclip == nullptr) { - draw_segment->sprtopclip = RenderMemory::AllocMemory(stop - start); + draw_segment->sprtopclip = Thread->FrameMemory->AllocMemory(stop - start); memcpy(draw_segment->sprtopclip, &Thread->OpaquePass->ceilingclip[start], sizeof(short)*(stop - start)); } if (((draw_segment->silhouette & SIL_BOTTOM) || maskedtexture) && draw_segment->sprbottomclip == nullptr) { - draw_segment->sprbottomclip = RenderMemory::AllocMemory(stop - start); + draw_segment->sprbottomclip = Thread->FrameMemory->AllocMemory(stop - start); memcpy(draw_segment->sprbottomclip, &Thread->OpaquePass->floorclip[start], sizeof(short)*(stop - start)); } diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index 0d9e97ef38..a0a58c7e78 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -29,6 +29,7 @@ #include "g_level.h" #include "gl/dynlights/gl_dynlight.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" #include "swrenderer/scene/r_opaque_pass.h" #include "swrenderer/scene/r_3dfloors.h" #include "swrenderer/scene/r_portal.h" @@ -43,19 +44,19 @@ CVAR(Bool, tilt, false, 0); namespace swrenderer { - VisiblePlane::VisiblePlane() + VisiblePlane::VisiblePlane(RenderThread *thread) { picnum.SetNull(); height.set(0.0, 0.0, 1.0, 0.0); - bottom = RenderMemory::AllocMemory(viewwidth); - top = RenderMemory::AllocMemory(viewwidth); + bottom = thread->FrameMemory->AllocMemory(viewwidth); + top = thread->FrameMemory->AllocMemory(viewwidth); fillshort(bottom, viewwidth, 0); fillshort(top, viewwidth, 0x7fff); } - void VisiblePlane::AddLights(FLightNode *node) + void VisiblePlane::AddLights(RenderThread *thread, FLightNode *node) { if (!r_dynlights) return; @@ -77,7 +78,7 @@ namespace swrenderer } if (!found) { - VisiblePlaneLight *newlight = RenderMemory::NewObject(); + VisiblePlaneLight *newlight = thread->FrameMemory->NewObject(); newlight->next = lights; newlight->lightsource = node->lightsource; lights = newlight; diff --git a/src/swrenderer/plane/r_visibleplane.h b/src/swrenderer/plane/r_visibleplane.h index 5e1c9230ba..970eee6326 100644 --- a/src/swrenderer/plane/r_visibleplane.h +++ b/src/swrenderer/plane/r_visibleplane.h @@ -35,9 +35,9 @@ namespace swrenderer struct VisiblePlane { - VisiblePlane(); + VisiblePlane(RenderThread *thread); - void AddLights(FLightNode *node); + void AddLights(RenderThread *thread, FLightNode *node); void Render(RenderThread *thread, fixed_t alpha, bool additive, bool masked); VisiblePlane *next = nullptr; // Next visplane in hash chain -- killough diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp index 7ecd745bfc..18c1f57e45 100644 --- a/src/swrenderer/plane/r_visibleplanelist.cpp +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -58,7 +58,7 @@ namespace swrenderer VisiblePlane *VisiblePlaneList::Add(unsigned hash) { - VisiblePlane *newplane = RenderMemory::NewObject(); + VisiblePlane *newplane = Thread->FrameMemory->NewObject(Thread); newplane->next = visplanes[hash]; visplanes[hash] = newplane; return newplane; diff --git a/src/swrenderer/r_memory.cpp b/src/swrenderer/r_memory.cpp index 5b102c09bd..11c5b0b3b4 100644 --- a/src/swrenderer/r_memory.cpp +++ b/src/swrenderer/r_memory.cpp @@ -68,7 +68,4 @@ namespace swrenderer FreeBlocks.push_back(std::move(block)); } } - - std::vector> RenderMemory::UsedBlocks; - std::vector> RenderMemory::FreeBlocks; } diff --git a/src/swrenderer/r_memory.h b/src/swrenderer/r_memory.h index dcd00a7a8c..f01d52a4dc 100644 --- a/src/swrenderer/r_memory.h +++ b/src/swrenderer/r_memory.h @@ -21,23 +21,23 @@ namespace swrenderer class RenderMemory { public: - static void Clear(); + void Clear(); template - static T *AllocMemory(int size = 1) + T *AllocMemory(int size = 1) { return (T*)AllocBytes(sizeof(T) * size); } template - static T *NewObject(Types &&... args) + T *NewObject(Types &&... args) { void *ptr = AllocBytes(sizeof(T)); return new (ptr)T(std::forward(args)...); } private: - static void *AllocBytes(int size); + void *AllocBytes(int size); enum { BlockSize = 1024 * 1024 }; @@ -52,7 +52,7 @@ namespace swrenderer uint8_t *Data; uint32_t Position; }; - static std::vector> UsedBlocks; - static std::vector> FreeBlocks; + std::vector> UsedBlocks; + std::vector> FreeBlocks; }; } diff --git a/src/swrenderer/r_renderthread.cpp b/src/swrenderer/r_renderthread.cpp index 057c096328..096f64bf6e 100644 --- a/src/swrenderer/r_renderthread.cpp +++ b/src/swrenderer/r_renderthread.cpp @@ -47,11 +47,13 @@ #include "swrenderer/plane/r_visibleplanelist.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/segments/r_clipsegment.h" +#include "r_memory.h" namespace swrenderer { RenderThread::RenderThread() { + FrameMemory = std::make_unique(); OpaquePass = std::make_unique(this); TranslucentPass = std::make_unique(this); SpriteList = std::make_unique(); diff --git a/src/swrenderer/r_renderthread.h b/src/swrenderer/r_renderthread.h index 21bb4b6faf..24b86e8613 100644 --- a/src/swrenderer/r_renderthread.h +++ b/src/swrenderer/r_renderthread.h @@ -36,6 +36,7 @@ namespace swrenderer class VisiblePlaneList; class DrawSegmentList; class RenderClipSegment; + class RenderMemory; class RenderThread { @@ -43,6 +44,7 @@ namespace swrenderer RenderThread(); ~RenderThread(); + std::unique_ptr FrameMemory; std::unique_ptr OpaquePass; std::unique_ptr TranslucentPass; std::unique_ptr SpriteList; diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index f2aca7a49f..73d2824881 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -519,7 +519,7 @@ namespace swrenderer ) : nullptr; if (ceilingplane) - ceilingplane->AddLights(frontsector->lighthead); + ceilingplane->AddLights(Thread, frontsector->lighthead); if (cameraLight->FixedLightLevel() < 0 && frontsector->e && frontsector->e->XFloor.lightlist.Size()) { @@ -560,7 +560,7 @@ namespace swrenderer ) : nullptr; if (floorplane) - floorplane->AddLights(frontsector->lighthead); + floorplane->AddLights(Thread, frontsector->lighthead); // kg3D - fake planes rendering if (r_3dfloors && frontsector->e && frontsector->e->XFloor.ffloors.Size()) @@ -625,7 +625,7 @@ namespace swrenderer basecolormap); if (floorplane) - floorplane->AddLights(frontsector->lighthead); + floorplane->AddLights(Thread, frontsector->lighthead); FakeDrawLoop(sub, floorplane, ceilingplane, foggy, basecolormap); clip3d->fake3D = 0; @@ -691,7 +691,7 @@ namespace swrenderer basecolormap); if (ceilingplane) - ceilingplane->AddLights(frontsector->lighthead); + ceilingplane->AddLights(Thread, frontsector->lighthead); FakeDrawLoop(sub, floorplane, ceilingplane, foggy, basecolormap); clip3d->fake3D = 0; diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 38a9ffc1b1..bf804a4d13 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -186,7 +186,7 @@ namespace swrenderer } // Create a drawseg to clip sprites to the sky plane - DrawSegment *draw_segment = RenderMemory::NewObject(); + DrawSegment *draw_segment = Thread->FrameMemory->NewObject(); draw_segment->CurrentPortalUniq = CurrentPortalUniq; draw_segment->siz1 = INT_MAX; draw_segment->siz2 = INT_MAX; @@ -195,8 +195,8 @@ namespace swrenderer draw_segment->x1 = pl->left; draw_segment->x2 = pl->right; draw_segment->silhouette = SIL_BOTH; - draw_segment->sprbottomclip = RenderMemory::AllocMemory(pl->right - pl->left); - draw_segment->sprtopclip = RenderMemory::AllocMemory(pl->right - pl->left); + draw_segment->sprbottomclip = Thread->FrameMemory->AllocMemory(pl->right - pl->left); + draw_segment->sprtopclip = Thread->FrameMemory->AllocMemory(pl->right - pl->left); draw_segment->maskedtexturecol = nullptr; draw_segment->swall = nullptr; draw_segment->bFogBoundary = false; @@ -526,7 +526,7 @@ namespace swrenderer void RenderPortal::AddLinePortal(line_t *linedef, int x1, int x2, const short *topclip, const short *bottomclip) { - WallPortals.Push(RenderMemory::NewObject(linedef, x1, x2, topclip, bottomclip)); + WallPortals.Push(Thread->FrameMemory->NewObject(Thread, linedef, x1, x2, topclip, bottomclip)); } } /* diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 7e8d20f783..37eff0b2b2 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -113,7 +113,7 @@ namespace swrenderer MaskedCycles.Reset(); WallScanCycles.Reset(); - RenderMemory::Clear(); + Thread->FrameMemory->Clear(); Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); clip3d->Cleanup(); diff --git a/src/swrenderer/segments/r_portalsegment.cpp b/src/swrenderer/segments/r_portalsegment.cpp index dbc8df9768..f1591b6667 100644 --- a/src/swrenderer/segments/r_portalsegment.cpp +++ b/src/swrenderer/segments/r_portalsegment.cpp @@ -32,17 +32,18 @@ #include "r_data/colormaps.h" #include "swrenderer/segments/r_portalsegment.h" #include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" namespace swrenderer { - PortalDrawseg::PortalDrawseg(line_t *linedef, int x1, int x2, const short *topclip, const short *bottomclip) : x1(x1), x2(x2) + PortalDrawseg::PortalDrawseg(RenderThread *thread, line_t *linedef, int x1, int x2, const short *topclip, const short *bottomclip) : x1(x1), x2(x2) { src = linedef; dst = linedef->special == Line_Mirror ? linedef : linedef->getPortalDestination(); len = x2 - x1; - ceilingclip = RenderMemory::AllocMemory(len); - floorclip = RenderMemory::AllocMemory(len); + ceilingclip = thread->FrameMemory->AllocMemory(len); + floorclip = thread->FrameMemory->AllocMemory(len); memcpy(ceilingclip, topclip, len * sizeof(short)); memcpy(floorclip, bottomclip, len * sizeof(short)); diff --git a/src/swrenderer/segments/r_portalsegment.h b/src/swrenderer/segments/r_portalsegment.h index 9c6f644dfb..74596bb4de 100644 --- a/src/swrenderer/segments/r_portalsegment.h +++ b/src/swrenderer/segments/r_portalsegment.h @@ -15,10 +15,12 @@ namespace swrenderer { + class RenderThread; + /* portal structure, this is used in r_ code in order to store drawsegs with portals (and mirrors) */ struct PortalDrawseg { - PortalDrawseg(line_t *linedef, int x1, int x2, const short *topclip, const short *bottomclip); + PortalDrawseg(RenderThread *thread, line_t *linedef, int x1, int x2, const short *topclip, const short *bottomclip); line_t* src = nullptr; // source line (the one drawn) this doesn't change over render loops line_t* dst = nullptr; // destination line (the one that the portal is linked with, equals 'src' for mirrors) diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 7d26723687..529c57b41c 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -182,7 +182,7 @@ namespace swrenderer return; // store information in a vissprite - RenderParticle *vis = RenderMemory::NewObject(); + RenderParticle *vis = thread->FrameMemory->NewObject(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->heightsec = heightsec; diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 6b02a0ec1e..5e4a8f908b 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -159,7 +159,7 @@ namespace swrenderer double yscale = spriteScale.Y / tex->Scale.Y; // store information in a vissprite - RenderSprite *vis = RenderMemory::NewObject(); + RenderSprite *vis = thread->FrameMemory->NewObject(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->xscale = FLOAT2FIXED(xscale); diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 0a2d8e074e..4214cdd5f2 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -107,7 +107,7 @@ namespace swrenderer } } - RenderVoxel *vis = RenderMemory::NewObject(); + RenderVoxel *vis = thread->FrameMemory->NewObject(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->xscale = FLOAT2FIXED(xscale); diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 8b0dc16cc7..699dcac524 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -105,7 +105,7 @@ namespace swrenderer gzt = pos.Z + scale.Y * scaled_to; gzb = pos.Z + scale.Y * scaled_bo; - RenderWallSprite *vis = RenderMemory::NewObject(); + RenderWallSprite *vis = thread->FrameMemory->NewObject(); vis->CurrentPortalUniq = renderportal->CurrentPortalUniq; vis->x1 = wallc.sx1 < renderportal->WindowLeft ? renderportal->WindowLeft : wallc.sx1; vis->x2 = wallc.sx2 >= renderportal->WindowRight ? renderportal->WindowRight : wallc.sx2; From 6f5e720576894729398f94037f97b556e69dd193 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 4 Feb 2017 12:38:05 +0100 Subject: [PATCH 810/912] Split drawer command queue from drawer threads --- src/polyrenderer/drawers/poly_triangle.cpp | 3 +- src/polyrenderer/poly_renderer.cpp | 17 ++- src/polyrenderer/poly_renderer.h | 7 ++ src/swrenderer/drawers/r_draw.h | 6 + src/swrenderer/drawers/r_draw_pal.h | 74 +++++------ src/swrenderer/drawers/r_draw_rgba.h | 72 +++++------ src/swrenderer/drawers/r_thread.cpp | 130 +++++++++----------- src/swrenderer/drawers/r_thread.h | 98 +++++++-------- src/swrenderer/line/r_fogboundary.cpp | 14 +-- src/swrenderer/line/r_fogboundary.h | 6 +- src/swrenderer/line/r_renderdrawsegment.cpp | 4 +- src/swrenderer/line/r_walldraw.cpp | 6 +- src/swrenderer/plane/r_flatplane.cpp | 9 +- src/swrenderer/plane/r_flatplane.h | 3 + src/swrenderer/plane/r_skyplane.cpp | 4 +- src/swrenderer/plane/r_slopeplane.cpp | 2 +- src/swrenderer/r_renderthread.cpp | 16 +++ src/swrenderer/r_renderthread.h | 13 ++ src/swrenderer/scene/r_scene.cpp | 14 +-- src/swrenderer/things/r_decal.cpp | 2 +- src/swrenderer/things/r_particle.cpp | 4 +- src/swrenderer/things/r_playersprite.cpp | 6 +- src/swrenderer/things/r_playersprite.h | 2 +- src/swrenderer/things/r_sprite.cpp | 2 +- src/swrenderer/things/r_voxel.cpp | 6 +- src/swrenderer/things/r_voxel.h | 2 +- src/swrenderer/things/r_wallsprite.cpp | 6 +- src/swrenderer/things/r_wallsprite.h | 2 +- src/swrenderer/viewport/r_skydrawer.cpp | 9 +- src/swrenderer/viewport/r_skydrawer.h | 6 +- src/swrenderer/viewport/r_spandrawer.cpp | 17 +-- src/swrenderer/viewport/r_spandrawer.h | 10 +- src/swrenderer/viewport/r_spritedrawer.cpp | 15 +-- src/swrenderer/viewport/r_spritedrawer.h | 8 +- src/swrenderer/viewport/r_viewport.cpp | 10 -- src/swrenderer/viewport/r_viewport.h | 9 -- src/swrenderer/viewport/r_walldrawer.cpp | 5 +- src/swrenderer/viewport/r_walldrawer.h | 4 +- src/v_draw.cpp | 10 +- 39 files changed, 333 insertions(+), 300 deletions(-) diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index a289457151..e4cd81dac5 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -34,6 +34,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "poly_triangle.h" +#include "polyrenderer/poly_renderer.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "screen_triangle.h" @@ -81,7 +82,7 @@ void PolyTriangleDrawer::toggle_mirror() void PolyTriangleDrawer::draw(const PolyDrawArgs &args) { - DrawerCommandQueue::QueueCommand(args, mirror); + PolyRenderer::Instance()->Thread.DrawQueue->Push(args, mirror); } void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadData *thread) diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index 1de84530db..5c00ef4fbd 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -50,6 +50,10 @@ PolyRenderer *PolyRenderer::Instance() return &scene; } +PolyRenderer::PolyRenderer() +{ +} + void PolyRenderer::RenderView(player_t *player) { using namespace swrenderer; @@ -71,10 +75,10 @@ void PolyRenderer::RenderView(player_t *player) CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->ShaderColormap() && viewport->RenderTarget->IsBgra() && !(r_shadercolormaps && screen->Accel2D)) { - R_BeginDrawerCommands(); - DrawerCommandQueue::QueueCommand(cameraLight->ShaderColormap(), screen); - R_EndDrawerCommands(); + Thread.DrawQueue->Push(cameraLight->ShaderColormap(), screen); } + + DrawerThreads::Execute({ Thread.DrawQueue }); } void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) @@ -94,6 +98,7 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int canvas->Lock(true); RenderActorView(actor, dontmaplines); + DrawerThreads::Execute({ Thread.DrawQueue }); canvas->Unlock(); @@ -122,8 +127,6 @@ void PolyRenderer::RenderActorView(AActor *actor, bool dontmaplines) if (!r_showviewer) camera->renderflags |= RF_INVISIBLE; - R_BeginDrawerCommands(); - ClearBuffers(); SetSceneViewport(); SetupPerspectiveMatrix(); @@ -137,10 +140,6 @@ void PolyRenderer::RenderActorView(AActor *actor, bool dontmaplines) interpolator.RestoreInterpolations (); NetUpdate(); - - R_EndDrawerCommands(); - - NetUpdate(); } void PolyRenderer::RenderRemainingPlayerSprites() diff --git a/src/polyrenderer/poly_renderer.h b/src/polyrenderer/poly_renderer.h index 75e192e9c0..565eec0045 100644 --- a/src/polyrenderer/poly_renderer.h +++ b/src/polyrenderer/poly_renderer.h @@ -31,13 +31,18 @@ #include "scene/poly_portal.h" #include "scene/poly_playersprite.h" #include "scene/poly_sky.h" +#include "swrenderer/r_renderthread.h" class AActor; class DCanvas; +class DrawerCommandQueue; +typedef std::shared_ptr DrawerCommandQueuePtr; class PolyRenderer { public: + PolyRenderer(); + void RenderView(player_t *player); void RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines); void RenderRemainingPlayerSprites(); @@ -50,6 +55,8 @@ public: bool InsertSeenMirror(line_t *mirrorLine); bool DontMapLines = false; + + swrenderer::RenderThread Thread; private: void RenderActorView(AActor *actor, bool dontmaplines); diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index e813acaaee..48185728b3 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -17,6 +17,9 @@ EXTERN_CVAR(Bool, r_drawtrans); EXTERN_CVAR(Float, transsouls); EXTERN_CVAR(Bool, r_dynlights); +class DrawerCommandQueue; +typedef std::shared_ptr DrawerCommandQueuePtr; + namespace swrenderer { class DrawerArgs; @@ -46,6 +49,7 @@ namespace swrenderer class SWPixelFormatDrawers { public: + SWPixelFormatDrawers(DrawerCommandQueuePtr queue) : Queue(queue) { } virtual ~SWPixelFormatDrawers() { } virtual void DrawWallColumn(const WallDrawerArgs &args) = 0; virtual void DrawWallMaskedColumn(const WallDrawerArgs &args) = 0; @@ -82,6 +86,8 @@ namespace swrenderer virtual void DrawTiltedSpan(const SpanDrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) = 0; virtual void DrawColoredSpan(const SpanDrawerArgs &args, int y, int x1, int x2) = 0; virtual void DrawFogBoundaryLine(const SpanDrawerArgs &args, int y, int x1, int x2) = 0; + + DrawerCommandQueuePtr Queue; }; void R_InitShadeMaps(); diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index cf955c607c..efc6d6a17c 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -244,53 +244,55 @@ namespace swrenderer class SWPalDrawers : public SWPixelFormatDrawers { public: - void DrawWallColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallMaskedColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + using SWPixelFormatDrawers::SWPixelFormatDrawers; + + void DrawWallColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawWallMaskedColumn(const WallDrawerArgs &args) override { Queue->Push(args); } void DrawWallAddColumn(const WallDrawerArgs &args) override { if (args.dc_num_lights == 0) - DrawerCommandQueue::QueueCommand(args); + Queue->Push(args); else - DrawerCommandQueue::QueueCommand(args); + Queue->Push(args); } - void DrawWallAddClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSingleSkyColumn(const SkyDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillAddClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillRevSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawFuzzColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); R_UpdateFuzzPos(args); } - void DrawAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTranslatedAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawShadedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawAddClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawRevSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpan(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanMasked(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanTranslucent(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanAddClamp(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillSpan(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + void DrawWallAddClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawWallSubClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawSingleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } + void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } + void DrawColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void FillColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void FillAddColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void FillAddClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void FillSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void FillRevSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawFuzzColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); R_UpdateFuzzPos(args); } + void DrawAddColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawTranslatedAddColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawShadedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawAddClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawRevSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawSpan(const SpanDrawerArgs &args) override { Queue->Push(args); } + void DrawSpanMasked(const SpanDrawerArgs &args) override { Queue->Push(args); } + void DrawSpanTranslucent(const SpanDrawerArgs &args) override { Queue->Push(args); } + void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) override { Queue->Push(args); } + void DrawSpanAddClamp(const SpanDrawerArgs &args) override { Queue->Push(args); } + void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) override { Queue->Push(args); } + void FillSpan(const SpanDrawerArgs &args) override { Queue->Push(args); } void DrawTiltedSpan(const SpanDrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override { - DrawerCommandQueue::QueueCommand(args, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); + Queue->Push(args, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); } - void DrawColoredSpan(const SpanDrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } - void DrawFogBoundaryLine(const SpanDrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } + void DrawColoredSpan(const SpanDrawerArgs &args, int y, int x1, int x2) override { Queue->Push(args, y, x1, x2); } + void DrawFogBoundaryLine(const SpanDrawerArgs &args, int y, int x1, int x2) override { Queue->Push(args, y, x1, x2); } }; } diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 6ba67f5c1d..8f31066491 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -357,46 +357,48 @@ namespace swrenderer class SWTruecolorDrawers : public SWPixelFormatDrawers { public: - void DrawWallColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallMaskedColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallAddColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallAddClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSingleSkyColumn(const SkyDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillAddClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillRevSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawFuzzColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); R_UpdateFuzzPos(args); } - void DrawAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawTranslatedAddColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawShadedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawAddClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawRevSubClampColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpan(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanMasked(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanTranslucent(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanAddClamp(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } - void FillSpan(const SpanDrawerArgs &args) override { DrawerCommandQueue::QueueCommand(args); } + using SWPixelFormatDrawers::SWPixelFormatDrawers; + + void DrawWallColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawWallMaskedColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawWallAddColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawWallAddClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawWallSubClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawSingleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } + void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } + void DrawColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void FillColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void FillAddColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void FillAddClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void FillSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void FillRevSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawFuzzColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); R_UpdateFuzzPos(args); } + void DrawAddColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawTranslatedAddColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawShadedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawAddClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawRevSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawSpan(const SpanDrawerArgs &args) override { Queue->Push(args); } + void DrawSpanMasked(const SpanDrawerArgs &args) override { Queue->Push(args); } + void DrawSpanTranslucent(const SpanDrawerArgs &args) override { Queue->Push(args); } + void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) override { Queue->Push(args); } + void DrawSpanAddClamp(const SpanDrawerArgs &args) override { Queue->Push(args); } + void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) override { Queue->Push(args); } + void FillSpan(const SpanDrawerArgs &args) override { Queue->Push(args); } void DrawTiltedSpan(const SpanDrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override { - DrawerCommandQueue::QueueCommand(args, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + Queue->Push(args, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } - void DrawColoredSpan(const SpanDrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } - void DrawFogBoundaryLine(const SpanDrawerArgs &args, int y, int x1, int x2) override { DrawerCommandQueue::QueueCommand(args, y, x1, x2); } + void DrawColoredSpan(const SpanDrawerArgs &args, int y, int x1, int x2) override { Queue->Push(args, y, x1, x2); } + void DrawFogBoundaryLine(const SpanDrawerArgs &args, int y, int x1, int x2) override { Queue->Push(args, y, x1, x2); } }; ///////////////////////////////////////////////////////////////////////////// diff --git a/src/swrenderer/drawers/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp index c35d597772..367783b565 100644 --- a/src/swrenderer/drawers/r_thread.cpp +++ b/src/swrenderer/drawers/r_thread.cpp @@ -31,80 +31,42 @@ #include "g_game.h" #include "g_level.h" #include "r_thread.h" +#include "swrenderer/r_memory.h" +#include "swrenderer/r_renderthread.h" CVAR(Bool, r_multithreaded, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); -void R_BeginDrawerCommands() -{ - DrawerCommandQueue::Begin(); -} - -void R_EndDrawerCommands() -{ - DrawerCommandQueue::End(); -} - ///////////////////////////////////////////////////////////////////////////// -DrawerCommandQueue *DrawerCommandQueue::Instance() +DrawerThreads *DrawerThreads::Instance() { - static DrawerCommandQueue queue; - return &queue; + static DrawerThreads threads; + return &threads; } -DrawerCommandQueue::DrawerCommandQueue() +DrawerThreads::DrawerThreads() { } -DrawerCommandQueue::~DrawerCommandQueue() +DrawerThreads::~DrawerThreads() { StopThreads(); } -void* DrawerCommandQueue::AllocMemory(size_t size) +void DrawerThreads::Execute(const std::vector &queues) { - // Make sure allocations remain 16-byte aligned - size = (size + 15) / 16 * 16; - - auto queue = Instance(); - if (queue->memorypool_pos + size > memorypool_size) - return nullptr; - - void *data = queue->memorypool + queue->memorypool_pos; - queue->memorypool_pos += size; - return data; -} - -void DrawerCommandQueue::Begin() -{ - auto queue = Instance(); - queue->Finish(); - queue->threaded_render++; -} - -void DrawerCommandQueue::End() -{ - auto queue = Instance(); - queue->Finish(); - if (queue->threaded_render > 0) - queue->threaded_render--; -} - -void DrawerCommandQueue::WaitForWorkers() -{ - Instance()->Finish(); -} - -void DrawerCommandQueue::Finish() -{ - auto queue = Instance(); - if (queue->commands.empty()) + bool hasWork = false; + for (const auto &queue : queues) + hasWork = hasWork || !queue->commands.empty(); + if (!hasWork) return; + + auto queue = Instance(); // Give worker threads something to do: std::unique_lock start_lock(queue->start_mutex); - queue->active_commands.swap(queue->commands); + queue->active_commands = queues; queue->run_id++; start_lock.unlock(); @@ -119,13 +81,15 @@ void DrawerCommandQueue::Finish() struct TryCatchData { - DrawerCommandQueue *queue; + DrawerThreads *queue; DrawerThread *thread; + size_t list_index; size_t command_index; } data; data.queue = queue; data.thread = &thread; + data.list_index = 0; data.command_index = 0; VectoredTryCatch(&data, [](void *data) @@ -139,18 +103,22 @@ void DrawerCommandQueue::Finish() if (pass + 1 == d->queue->num_passes) d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT); - size_t size = d->queue->active_commands.size(); - for (d->command_index = 0; d->command_index < size; d->command_index++) + for (auto &list : d->queue->active_commands) { - auto &command = d->queue->active_commands[d->command_index]; - command->Execute(d->thread); + size_t size = list->commands.size(); + for (d->command_index = 0; d->command_index < size; d->command_index++) + { + auto &command = list->commands[d->command_index]; + command->Execute(d->thread); + } + d->list_index++; } } }, [](void *data, const char *reason, bool fatal) { TryCatchData *d = (TryCatchData*)data; - ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal); + ReportDrawerError(d->queue->active_commands[d->list_index]->commands[d->command_index], true, reason, fatal); }); // Wait for everyone to finish: @@ -170,14 +138,17 @@ void DrawerCommandQueue::Finish() // Clean up batch: - for (auto &command : queue->active_commands) - command->~DrawerCommand(); + for (auto &list : queue->active_commands) + { + for (auto &command : list->commands) + command->~DrawerCommand(); + list->Clear(); + } queue->active_commands.clear(); - queue->memorypool_pos = 0; queue->finished_threads = 0; } -void DrawerCommandQueue::StartThreads() +void DrawerThreads::StartThreads() { if (!threads.empty()) return; @@ -190,7 +161,7 @@ void DrawerCommandQueue::StartThreads() for (int i = 0; i < num_threads - 1; i++) { - DrawerCommandQueue *queue = this; + DrawerThreads *queue = this; DrawerThread *thread = &threads[i]; thread->core = i + 1; thread->num_cores = num_threads; @@ -211,13 +182,15 @@ void DrawerCommandQueue::StartThreads() struct TryCatchData { - DrawerCommandQueue *queue; + DrawerThreads *queue; DrawerThread *thread; + size_t list_index; size_t command_index; } data; data.queue = queue; data.thread = thread; + data.list_index = 0; data.command_index = 0; VectoredTryCatch(&data, [](void *data) @@ -231,18 +204,22 @@ void DrawerCommandQueue::StartThreads() if (pass + 1 == d->queue->num_passes) d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT); - size_t size = d->queue->active_commands.size(); - for (d->command_index = 0; d->command_index < size; d->command_index++) + for (auto &list : d->queue->active_commands) { - auto &command = d->queue->active_commands[d->command_index]; - command->Execute(d->thread); + size_t size = list->commands.size(); + for (d->command_index = 0; d->command_index < size; d->command_index++) + { + auto &command = list->commands[d->command_index]; + command->Execute(d->thread); + } + d->list_index++; } } }, [](void *data, const char *reason, bool fatal) { TryCatchData *d = (TryCatchData*)data; - ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal); + ReportDrawerError(d->queue->active_commands[d->list_index]->commands[d->command_index], true, reason, fatal); }); // Notify main thread that we finished: @@ -255,7 +232,7 @@ void DrawerCommandQueue::StartThreads() } } -void DrawerCommandQueue::StopThreads() +void DrawerThreads::StopThreads() { std::unique_lock lock(start_mutex); shutdown_flag = true; @@ -268,7 +245,7 @@ void DrawerCommandQueue::StopThreads() shutdown_flag = false; } -void DrawerCommandQueue::ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal) +void DrawerThreads::ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal) { if (worker_thread) { @@ -298,3 +275,12 @@ void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock) } #endif + +DrawerCommandQueue::DrawerCommandQueue(swrenderer::RenderThread *renderthread) : renderthread(renderthread) +{ +} + +void *DrawerCommandQueue::AllocMemory(size_t size) +{ + return renderthread->FrameMemory->AllocMemory(size); +} diff --git a/src/swrenderer/drawers/r_thread.h b/src/swrenderer/drawers/r_thread.h index 8de57a32d1..da24854eb5 100644 --- a/src/swrenderer/drawers/r_thread.h +++ b/src/swrenderer/drawers/r_thread.h @@ -33,12 +33,6 @@ // Use multiple threads when drawing EXTERN_CVAR(Bool, r_multithreaded) -// Redirect drawer commands to worker threads -void R_BeginDrawerCommands(); - -// Wait until all drawers finished executing -void R_EndDrawerCommands(); - // Worker data for each thread executing drawer commands class DrawerThread { @@ -117,20 +111,30 @@ public: void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal)); -// Manages queueing up commands and executing them on worker threads -class DrawerCommandQueue +class DrawerCommandQueue; +typedef std::shared_ptr DrawerCommandQueuePtr; + +class DrawerThreads { - enum { memorypool_size = 16 * 1024 * 1024 }; - char memorypool[memorypool_size]; - size_t memorypool_pos = 0; - - std::vector commands; +public: + // Runs the collected commands on worker threads + static void Execute(const std::vector &queues); + +private: + DrawerThreads(); + ~DrawerThreads(); + + void StartThreads(); + void StopThreads(); + static DrawerThreads *Instance(); + static void ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal); + std::vector threads; std::mutex start_mutex; std::condition_variable start_condition; - std::vector active_commands; + std::vector active_commands; bool shutdown_flag = false; int run_id = 0; @@ -144,53 +148,45 @@ class DrawerCommandQueue DrawerThread single_core_thread; int num_passes = 1; int rows_in_pass = MAXHEIGHT; + + friend class DrawerCommandQueue; +}; - void StartThreads(); - void StopThreads(); - void Finish(); - - static DrawerCommandQueue *Instance(); - static void ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal); - - DrawerCommandQueue(); - ~DrawerCommandQueue(); +namespace swrenderer { class RenderThread; } +class DrawerCommandQueue +{ public: - // Allocate memory valid for the duration of a command execution - static void* AllocMemory(size_t size); - + DrawerCommandQueue(swrenderer::RenderThread *renderthread); + + void Clear() { commands.clear(); } + // Queue command to be executed by drawer worker threads template - static void QueueCommand(Types &&... args) + void Push(Types &&... args) { - auto queue = Instance(); - if (queue->threaded_render == 0 || !r_multithreaded) + DrawerThreads *threads = DrawerThreads::Instance(); + if (ThreadedRender && r_multithreaded) { - T command(std::forward(args)...); - command.Execute(&Instance()->single_core_thread); + void *ptr = AllocMemory(sizeof(T)); + T *command = new (ptr)T(std::forward(args)...); + commands.push_back(command); } else { - void *ptr = AllocMemory(sizeof(T)); - if (!ptr) // Out of memory - render what we got - { - queue->Finish(); - ptr = AllocMemory(sizeof(T)); - if (!ptr) - return; - } - T *command = new (ptr)T(std::forward(args)...); - queue->commands.push_back(command); + T command(std::forward(args)...); + command.Execute(&threads->single_core_thread); } } - - // Redirects all drawing commands to worker threads until End is called - // Begin/End blocks can be nested. - static void Begin(); - - // End redirection and wait until all worker threads finished executing - static void End(); - - // Waits until all worker threads finished executing - static void WaitForWorkers(); + + bool ThreadedRender = true; + +private: + // Allocate memory valid for the duration of a command execution + void *AllocMemory(size_t size); + + std::vector commands; + swrenderer::RenderThread *renderthread; + + friend class DrawerThreads; }; diff --git a/src/swrenderer/line/r_fogboundary.cpp b/src/swrenderer/line/r_fogboundary.cpp index b75ce82ce9..8bd907c708 100644 --- a/src/swrenderer/line/r_fogboundary.cpp +++ b/src/swrenderer/line/r_fogboundary.cpp @@ -45,7 +45,7 @@ namespace swrenderer { - void RenderFogBoundary::Render(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep, FDynamicColormap *basecolormap) + void RenderFogBoundary::Render(RenderThread *thread, int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep, FDynamicColormap *basecolormap) { // This is essentially the same as R_MapVisPlane but with an extra step // to create new horizontal spans whenever the light changes enough that @@ -82,7 +82,7 @@ namespace swrenderer if (t2 < b2 && rcolormap != 0) { // Colormap 0 is always the identity map, so rendering it is // just a waste of time. - RenderSection(t2, b2, xr); + RenderSection(thread, t2, b2, xr); } if (t1 < t2) t2 = t1; if (b1 > b2) b2 = b1; @@ -102,13 +102,13 @@ namespace swrenderer while (t2 < stop) { int y = t2++; - drawerargs.DrawFogBoundaryLine(y, xr, spanend[y]); + drawerargs.DrawFogBoundaryLine(thread, y, xr, spanend[y]); } stop = MAX(b1, t2); while (b2 > stop) { int y = --b2; - drawerargs.DrawFogBoundaryLine(y, xr, spanend[y]); + drawerargs.DrawFogBoundaryLine(thread, y, xr, spanend[y]); } } else @@ -134,15 +134,15 @@ namespace swrenderer } if (t2 < b2 && rcolormap != 0) { - RenderSection(t2, b2, x1); + RenderSection(thread, t2, b2, x1); } } - void RenderFogBoundary::RenderSection(int y, int y2, int x1) + void RenderFogBoundary::RenderSection(RenderThread *thread, int y, int y2, int x1) { for (; y < y2; ++y) { - drawerargs.DrawFogBoundaryLine(y, x1, spanend[y]); + drawerargs.DrawFogBoundaryLine(thread, y, x1, spanend[y]); } } } diff --git a/src/swrenderer/line/r_fogboundary.h b/src/swrenderer/line/r_fogboundary.h index 487e773851..c7df265a4b 100644 --- a/src/swrenderer/line/r_fogboundary.h +++ b/src/swrenderer/line/r_fogboundary.h @@ -17,13 +17,15 @@ namespace swrenderer { + class RenderThread; + class RenderFogBoundary { public: - void Render(int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep, FDynamicColormap *basecolormap); + void Render(RenderThread *thread, int x1, int x2, short *uclip, short *dclip, int wallshade, float lightleft, float lightstep, FDynamicColormap *basecolormap); private: - void RenderSection(int y, int y2, int x1); + void RenderSection(RenderThread *thread, int y, int y2, int x1); short spanend[MAXHEIGHT]; SpanDrawerArgs drawerargs; diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 203e02c447..85a55dc9ea 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -137,7 +137,7 @@ namespace swrenderer // [RH] Draw fog partition if (ds->bFogBoundary) { - renderfog.Render(x1, x2, mceilingclip, mfloorclip, wallshade, rw_light, rw_lightstep, basecolormap); + renderfog.Render(Thread, x1, x2, mceilingclip, mfloorclip, wallshade, rw_light, rw_lightstep, basecolormap); if (ds->maskedtexturecol == nullptr) { goto clearfog; @@ -302,7 +302,7 @@ namespace swrenderer else sprtopscreen = viewport->CenterY - texturemid * spryscale; - columndrawerargs.DrawMaskedColumn(x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + columndrawerargs.DrawMaskedColumn(Thread, x, iscale, tex, maskedtexturecol[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); rw_light += rw_lightstep; spryscale += rw_scalestep; diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index ec7ea38f60..9a07198e51 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -251,7 +251,7 @@ namespace swrenderer drawerargs.SetCount(count); drawerargs.SetTextureVStep(sampler.uv_step); drawerargs.SetTextureVPos(sampler.uv_pos); - drawerargs.DrawColumn(); + drawerargs.DrawColumn(Thread); uint64_t step64 = sampler.uv_step; uint64_t pos64 = sampler.uv_pos; @@ -269,7 +269,7 @@ namespace swrenderer drawerargs.SetCount(count); drawerargs.SetTextureVStep(sampler.uv_step); drawerargs.SetTextureVPos(sampler.uv_pos); - drawerargs.DrawColumn(); + drawerargs.DrawColumn(Thread); uint64_t step64 = sampler.uv_step; uint64_t pos64 = sampler.uv_pos; @@ -294,7 +294,7 @@ namespace swrenderer drawerargs.SetCount(count); drawerargs.SetTextureVStep(sampler.uv_step); drawerargs.SetTextureVPos(uv_pos); - drawerargs.DrawColumn(); + drawerargs.DrawColumn(Thread); left -= count; uv_pos += sampler.uv_step * count; diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 85e339e847..d7e4933847 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -265,7 +265,7 @@ namespace swrenderer drawerargs.SetDestX1(x1); drawerargs.SetDestX2(x2); - drawerargs.DrawSpan(); + drawerargs.DrawSpan(Thread); } void RenderFlatPlane::StepColumn() @@ -319,6 +319,11 @@ namespace swrenderer float RenderFlatPlane::yslope[MAXHEIGHT]; ///////////////////////////////////////////////////////////////////////// + + RenderColoredPlane::RenderColoredPlane(RenderThread *thread) + { + Thread = thread; + } void RenderColoredPlane::Render(VisiblePlane *pl) { @@ -327,6 +332,6 @@ namespace swrenderer void RenderColoredPlane::RenderLine(int y, int x1, int x2) { - drawerargs.DrawColoredSpan(y, x1, x2); + drawerargs.DrawColoredSpan(Thread, y, x1, x2); } } diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index 72a48d334c..95f24c93d3 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -54,7 +54,10 @@ namespace swrenderer class RenderColoredPlane : PlaneRenderer { public: + RenderColoredPlane(RenderThread *thread); void Render(VisiblePlane *pl); + + RenderThread *Thread = nullptr; private: void RenderLine(int y, int x1, int x2) override; diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index 8eefdb708d..dd1848f7a1 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -209,9 +209,9 @@ namespace swrenderer drawerargs.SetSolidBottom(frontskytex->GetSkyCapColor(true)); if (!backskytex) - drawerargs.DrawSingleSkyColumn(); + drawerargs.DrawSingleSkyColumn(Thread); else - drawerargs.DrawDoubleSkyColumn(); + drawerargs.DrawDoubleSkyColumn(Thread); } void RenderSkyPlane::DrawSkyColumn(int start_x, int y1, int y2) diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index ccb7e6107b..6be3b1a7b0 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -192,6 +192,6 @@ namespace swrenderer void RenderSlopePlane::RenderLine(int y, int x1, int x2) { - drawerargs.DrawTiltedSpan(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); + drawerargs.DrawTiltedSpan(Thread, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); } } diff --git a/src/swrenderer/r_renderthread.cpp b/src/swrenderer/r_renderthread.cpp index 096f64bf6e..dd14c8902f 100644 --- a/src/swrenderer/r_renderthread.cpp +++ b/src/swrenderer/r_renderthread.cpp @@ -47,6 +47,11 @@ #include "swrenderer/plane/r_visibleplanelist.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/segments/r_clipsegment.h" +#include "swrenderer/drawers/r_thread.h" +#include "swrenderer/drawers/r_draw.h" +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/drawers/r_draw_pal.h" +#include "swrenderer/viewport/r_viewport.h" #include "r_memory.h" namespace swrenderer @@ -54,6 +59,7 @@ namespace swrenderer RenderThread::RenderThread() { FrameMemory = std::make_unique(); + DrawQueue = std::make_shared(this); OpaquePass = std::make_unique(this); TranslucentPass = std::make_unique(this); SpriteList = std::make_unique(); @@ -64,9 +70,19 @@ namespace swrenderer Scene = std::make_unique(this); DrawSegments = std::make_unique(this); ClipSegments = std::make_unique(); + tc_drawers = std::make_unique(DrawQueue); + pal_drawers = std::make_unique(DrawQueue); } RenderThread::~RenderThread() { } + + SWPixelFormatDrawers *RenderThread::Drawers() + { + if (RenderViewport::Instance()->RenderTarget->IsBgra()) + return tc_drawers.get(); + else + return pal_drawers.get(); + } } diff --git a/src/swrenderer/r_renderthread.h b/src/swrenderer/r_renderthread.h index 24b86e8613..809fe080d9 100644 --- a/src/swrenderer/r_renderthread.h +++ b/src/swrenderer/r_renderthread.h @@ -24,6 +24,9 @@ #include +class DrawerCommandQueue; +typedef std::shared_ptr DrawerCommandQueuePtr; + namespace swrenderer { class VisibleSpriteList; @@ -37,6 +40,9 @@ namespace swrenderer class DrawSegmentList; class RenderClipSegment; class RenderMemory; + class SWPixelFormatDrawers; + class SWTruecolorDrawers; + class SWPalDrawers; class RenderThread { @@ -55,5 +61,12 @@ namespace swrenderer std::unique_ptr Scene; std::unique_ptr DrawSegments; std::unique_ptr ClipSegments; + DrawerCommandQueuePtr DrawQueue; + + SWPixelFormatDrawers *Drawers(); + + private: + std::unique_ptr tc_drawers; + std::unique_ptr pal_drawers; }; } diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 37eff0b2b2..371936a699 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -93,17 +93,15 @@ namespace swrenderer } } - R_BeginDrawerCommands(); - RenderActorView(player->mo); // Apply special colormap if the target cannot do it if (CameraLight::Instance()->ShaderColormap() && viewport->RenderTarget->IsBgra() && !(r_shadercolormaps && screen->Accel2D)) { - DrawerCommandQueue::QueueCommand(CameraLight::Instance()->ShaderColormap(), screen); + Thread->DrawQueue->Push(CameraLight::Instance()->ShaderColormap(), screen); } - - R_EndDrawerCommands(); + + DrawerThreads::Execute({ Thread->DrawQueue }); } void RenderScene::RenderActorView(AActor *actor, bool dontmaplines) @@ -195,8 +193,6 @@ namespace swrenderer const bool savedviewactive = viewactive; - R_BeginDrawerCommands(); - viewwidth = width; viewport->RenderTarget = canvas; @@ -208,8 +204,8 @@ namespace swrenderer RenderActorView(actor, dontmaplines); - R_EndDrawerCommands(); - + DrawerThreads::Execute({ Thread->DrawQueue }); + viewport->RenderTarget = screen; R_ExecuteSetViewSize(); diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 6e3fdb7618..47a89727be 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -329,6 +329,6 @@ namespace swrenderer else sprtopscreen = viewport->CenterY - texturemid * spryscale; - drawerargs.DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + drawerargs.DrawMaskedColumn(thread, x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 529c57b41c..9b9fbb4848 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -247,7 +247,7 @@ namespace swrenderer if (translucentPass->ClipSpriteColumnWithPortals(x, vis)) continue; uint32_t *dest = (uint32_t*)viewport->GetDest(x, yl); - DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); + thread->DrawQueue->Push(dest, yl, spacing, ycount, fg, alpha, fracposx); } } else @@ -257,7 +257,7 @@ namespace swrenderer if (translucentPass->ClipSpriteColumnWithPortals(x, vis)) continue; uint8_t *dest = viewport->GetDest(x, yl); - DrawerCommandQueue::QueueCommand(dest, yl, spacing, ycount, fg, alpha, fracposx); + thread->DrawQueue->Push(dest, yl, spacing, ycount, fg, alpha, fracposx); } } } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 3521f115fe..78e9975e69 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -550,7 +550,7 @@ namespace swrenderer } } - vis.Render(); + vis.Render(Thread); } void RenderPlayerSprites::RenderRemaining() @@ -584,7 +584,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - void NoAccelPlayerSprite::Render() + void NoAccelPlayerSprite::Render(RenderThread *thread) { if (xscale == 0 || fabs(yscale) < (1.0f / 32000.0f)) { // scaled to 0; can't see @@ -627,7 +627,7 @@ namespace swrenderer fixed_t frac = startfrac; for (int x = x1; x < x2; x++) { - drawerargs.DrawMaskedColumn(x, iscale, pic, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); + drawerargs.DrawMaskedColumn(thread, x, iscale, pic, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); frac += xiscale; } diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index 0a3d8903d1..09274702aa 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -45,7 +45,7 @@ namespace swrenderer short renderflags = 0; - void Render(); + void Render(RenderThread *thread); }; class HWAccelPlayerSprite diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 5e4a8f908b..4fce83fc84 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -291,7 +291,7 @@ namespace swrenderer while (x < x2) { if (!translucentPass->ClipSpriteColumnWithPortals(x, vis)) - drawerargs.DrawMaskedColumn(x, iscale, tex, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); + drawerargs.DrawMaskedColumn(thread, x, iscale, tex, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); x++; frac += xiscale; } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 4214cdd5f2..e91ae4ea86 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -292,7 +292,7 @@ namespace swrenderer voxel_pos.Y += dirY.X * x + dirY.Y * y; voxel_pos.Z += dirZ * z; - FillBox(drawerargs, voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); + FillBox(thread, drawerargs, voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); } } } @@ -315,7 +315,7 @@ namespace swrenderer return (kvxslab_t*)(((uint8_t*)slab) + 3 + slab->zleng); } - void RenderVoxel::FillBox(SpriteDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) + void RenderVoxel::FillBox(RenderThread *thread, SpriteDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) { auto viewport = RenderViewport::Instance(); @@ -345,7 +345,7 @@ namespace swrenderer drawerargs.SetDest(x, columnY1); drawerargs.SetSolidColor(color); drawerargs.SetCount(columnY2 - columnY1); - drawerargs.FillColumn(); + drawerargs.FillColumn(thread); } } } diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index 0bca0fb87c..1d663d251a 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -83,7 +83,7 @@ namespace swrenderer enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; - static void FillBox(SpriteDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); + static void FillBox(RenderThread *thread, SpriteDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); static kvxslab_t *GetSlabStart(const FVoxelMipLevel &mip, int x, int y); static kvxslab_t *GetSlabEnd(const FVoxelMipLevel &mip, int x, int y); diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 699dcac524..38a26aab19 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -239,14 +239,14 @@ namespace swrenderer drawerargs.SetLight(usecolormap, light, shade); } if (!translucentPass->ClipSpriteColumnWithPortals(x, spr)) - DrawColumn(drawerargs, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); + DrawColumn(thread, drawerargs, x, WallSpriteTile, walltexcoords, texturemid, maskedScaleY, sprflipvert, mfloorclip, mceilingclip); light += lightstep; x++; } } } - void RenderWallSprite::DrawColumn(SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) + void RenderWallSprite::DrawColumn(RenderThread *thread, SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip) { auto viewport = RenderViewport::Instance(); @@ -258,6 +258,6 @@ namespace swrenderer else sprtopscreen = viewport->CenterY - texturemid * spryscale; - drawerargs.DrawMaskedColumn(x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); + drawerargs.DrawMaskedColumn(thread, x, FLOAT2FIXED(iscale), WallSpriteTile, walltexcoords.UPos[x], spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip); } } diff --git a/src/swrenderer/things/r_wallsprite.h b/src/swrenderer/things/r_wallsprite.h index 796a9a2bdc..76bbdc6c03 100644 --- a/src/swrenderer/things/r_wallsprite.h +++ b/src/swrenderer/things/r_wallsprite.h @@ -30,7 +30,7 @@ namespace swrenderer void Render(RenderThread *thread, short *cliptop, short *clipbottom, int minZ, int maxZ) override; private: - static void DrawColumn(SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); + static void DrawColumn(RenderThread *thread, SpriteDrawerArgs &drawerargs, int x, FTexture *WallSpriteTile, const ProjectedWallTexcoords &walltexcoords, double texturemid, float maskedScaleY, bool sprflipvert, const short *mfloorclip, const short *mceilingclip); FWallCoords wallc; uint32_t Translation = 0; diff --git a/src/swrenderer/viewport/r_skydrawer.cpp b/src/swrenderer/viewport/r_skydrawer.cpp index 70a64e651e..74462b2fa8 100644 --- a/src/swrenderer/viewport/r_skydrawer.cpp +++ b/src/swrenderer/viewport/r_skydrawer.cpp @@ -13,17 +13,18 @@ #include #include "r_skydrawer.h" +#include "swrenderer/r_renderthread.h" namespace swrenderer { - void SkyDrawerArgs::DrawSingleSkyColumn() + void SkyDrawerArgs::DrawSingleSkyColumn(RenderThread *thread) { - RenderViewport::Instance()->Drawers()->DrawSingleSkyColumn(*this); + thread->Drawers()->DrawSingleSkyColumn(*this); } - void SkyDrawerArgs::DrawDoubleSkyColumn() + void SkyDrawerArgs::DrawDoubleSkyColumn(RenderThread *thread) { - RenderViewport::Instance()->Drawers()->DrawDoubleSkyColumn(*this); + thread->Drawers()->DrawDoubleSkyColumn(*this); } void SkyDrawerArgs::SetDest(int x, int y) diff --git a/src/swrenderer/viewport/r_skydrawer.h b/src/swrenderer/viewport/r_skydrawer.h index 8321b4e040..9389509686 100644 --- a/src/swrenderer/viewport/r_skydrawer.h +++ b/src/swrenderer/viewport/r_skydrawer.h @@ -9,6 +9,8 @@ struct TriLight; namespace swrenderer { + class RenderThread; + class SkyDrawerArgs : public DrawerArgs { public: @@ -38,8 +40,8 @@ namespace swrenderer int FrontTextureHeight() const { return dc_sourceheight; } int BackTextureHeight() const { return dc_sourceheight2; } - void DrawSingleSkyColumn(); - void DrawDoubleSkyColumn(); + void DrawSingleSkyColumn(RenderThread *thread); + void DrawDoubleSkyColumn(RenderThread *thread); private: uint8_t *dc_dest = nullptr; diff --git a/src/swrenderer/viewport/r_spandrawer.cpp b/src/swrenderer/viewport/r_spandrawer.cpp index 3c0acaab09..ac28c52423 100644 --- a/src/swrenderer/viewport/r_spandrawer.cpp +++ b/src/swrenderer/viewport/r_spandrawer.cpp @@ -13,6 +13,7 @@ #include #include "r_spandrawer.h" +#include "swrenderer/r_renderthread.h" namespace swrenderer { @@ -96,23 +97,23 @@ namespace swrenderer } } - void SpanDrawerArgs::DrawSpan() + void SpanDrawerArgs::DrawSpan(RenderThread *thread) { - (RenderViewport::Instance()->Drawers()->*spanfunc)(*this); + (thread->Drawers()->*spanfunc)(*this); } - void SpanDrawerArgs::DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) + void SpanDrawerArgs::DrawTiltedSpan(RenderThread *thread, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) { - RenderViewport::Instance()->Drawers()->DrawTiltedSpan(*this, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); + thread->Drawers()->DrawTiltedSpan(*this, y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap); } - void SpanDrawerArgs::DrawFogBoundaryLine(int y, int x1, int x2) + void SpanDrawerArgs::DrawFogBoundaryLine(RenderThread *thread, int y, int x1, int x2) { - RenderViewport::Instance()->Drawers()->DrawFogBoundaryLine(*this, y, x1, x2); + thread->Drawers()->DrawFogBoundaryLine(*this, y, x1, x2); } - void SpanDrawerArgs::DrawColoredSpan(int y, int x1, int x2) + void SpanDrawerArgs::DrawColoredSpan(RenderThread *thread, int y, int x1, int x2) { - RenderViewport::Instance()->Drawers()->DrawColoredSpan(*this, y, x1, x2); + thread->Drawers()->DrawColoredSpan(*this, y, x1, x2); } } diff --git a/src/swrenderer/viewport/r_spandrawer.h b/src/swrenderer/viewport/r_spandrawer.h index dcb348f201..8275cc6d5c 100644 --- a/src/swrenderer/viewport/r_spandrawer.h +++ b/src/swrenderer/viewport/r_spandrawer.h @@ -9,6 +9,8 @@ struct TriLight; namespace swrenderer { + class RenderThread; + class SpanDrawerArgs : public DrawerArgs { public: @@ -26,10 +28,10 @@ namespace swrenderer void SetTextureVStep(dsfixed_t vstep) { ds_ystep = vstep; } void SetSolidColor(int colorIndex) { ds_color = colorIndex; } - void DrawSpan(); - void DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); - void DrawColoredSpan(int y, int x1, int x2); - void DrawFogBoundaryLine(int y, int x1, int x2); + void DrawSpan(RenderThread *thread); + void DrawTiltedSpan(RenderThread *thread, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap); + void DrawColoredSpan(RenderThread *thread, int y, int x1, int x2); + void DrawFogBoundaryLine(RenderThread *thread, int y, int x1, int x2); uint32_t *SrcBlend() const { return dc_srcblend; } uint32_t *DestBlend() const { return dc_destblend; } diff --git a/src/swrenderer/viewport/r_spritedrawer.cpp b/src/swrenderer/viewport/r_spritedrawer.cpp index e0f0ccb3f3..2796f26bf6 100644 --- a/src/swrenderer/viewport/r_spritedrawer.cpp +++ b/src/swrenderer/viewport/r_spritedrawer.cpp @@ -34,6 +34,7 @@ #include #include "r_spritedrawer.h" +#include "swrenderer/r_renderthread.h" namespace swrenderer { @@ -42,14 +43,14 @@ namespace swrenderer colfunc = &SWPixelFormatDrawers::DrawColumn; } - void SpriteDrawerArgs::DrawMaskedColumn(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) + void SpriteDrawerArgs::DrawMaskedColumn(RenderThread *thread, int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) { auto viewport = RenderViewport::Instance(); // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. if (viewport->RenderTarget->IsBgra() && !drawer_needs_pal_input) // To do: add support to R_DrawColumnHoriz_rgba { - DrawMaskedColumnBgra(x, iscale, tex, col, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, unmasked); + DrawMaskedColumnBgra(thread, x, iscale, tex, col, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, unmasked); return; } @@ -115,13 +116,13 @@ namespace swrenderer else if (dc_iscale < 0) dc_count = MIN(dc_count, (dc_texturefrac - dc_iscale) / (-dc_iscale)); - (RenderViewport::Instance()->Drawers()->*colfunc)(*this); + (thread->Drawers()->*colfunc)(*this); } span++; } } - void SpriteDrawerArgs::DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) + void SpriteDrawerArgs::DrawMaskedColumnBgra(RenderThread *thread, int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) { dc_x = x; dc_iscale = iscale; @@ -230,7 +231,7 @@ namespace swrenderer double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight(); dc_texturefrac = (uint32_t)(v * (1 << 30)); - (RenderViewport::Instance()->Drawers()->*colfunc)(*this); + (thread->Drawers()->*colfunc)(*this); } span++; } @@ -488,9 +489,9 @@ namespace swrenderer return SetStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap, shadedlightshade); } - void SpriteDrawerArgs::FillColumn() + void SpriteDrawerArgs::FillColumn(RenderThread *thread) { - RenderViewport::Instance()->Drawers()->FillColumn(*this); + thread->Drawers()->FillColumn(*this); } void SpriteDrawerArgs::SetDest(int x, int y) diff --git a/src/swrenderer/viewport/r_spritedrawer.h b/src/swrenderer/viewport/r_spritedrawer.h index 48911a179d..50acc1c0ed 100644 --- a/src/swrenderer/viewport/r_spritedrawer.h +++ b/src/swrenderer/viewport/r_spritedrawer.h @@ -9,6 +9,8 @@ struct TriLight; namespace swrenderer { + class RenderThread; + class SpriteDrawerArgs : public DrawerArgs { public: @@ -20,8 +22,8 @@ namespace swrenderer void SetCount(int count) { dc_count = count; } void SetSolidColor(int color) { dc_color = color; } - void DrawMaskedColumn(int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); - void FillColumn(); + void DrawMaskedColumn(RenderThread *thread, int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); + void FillColumn(RenderThread *thread); uint8_t *Dest() const { return dc_dest; } int DestY() const { return dc_dest_y; } @@ -51,7 +53,7 @@ namespace swrenderer private: bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); static fixed_t GetAlpha(int type, fixed_t alpha); - void DrawMaskedColumnBgra(int x, fixed_t iscale, FTexture *tex, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked); + void DrawMaskedColumnBgra(RenderThread *thread, int x, fixed_t iscale, FTexture *tex, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked); uint8_t *dc_dest = nullptr; int dc_dest_y = 0; diff --git a/src/swrenderer/viewport/r_viewport.cpp b/src/swrenderer/viewport/r_viewport.cpp index 5cafc6f99f..e0f0e396cf 100644 --- a/src/swrenderer/viewport/r_viewport.cpp +++ b/src/swrenderer/viewport/r_viewport.cpp @@ -48,22 +48,12 @@ namespace swrenderer RenderViewport::RenderViewport() { - tc_drawers = std::make_unique(); - pal_drawers = std::make_unique(); } RenderViewport::~RenderViewport() { } - SWPixelFormatDrawers *RenderViewport::Drawers() - { - if (RenderTarget->IsBgra()) - return tc_drawers.get(); - else - return pal_drawers.get(); - } - void RenderViewport::SetViewport(int fullWidth, int fullHeight, float trueratio) { int virtheight, virtwidth, virtwidth2, virtheight2; diff --git a/src/swrenderer/viewport/r_viewport.h b/src/swrenderer/viewport/r_viewport.h index a61709d611..34f00c2779 100644 --- a/src/swrenderer/viewport/r_viewport.h +++ b/src/swrenderer/viewport/r_viewport.h @@ -19,10 +19,6 @@ namespace swrenderer { - class SWPixelFormatDrawers; - class SWTruecolorDrawers; - class SWPalDrawers; - class RenderViewport { public: @@ -63,15 +59,10 @@ namespace swrenderer DVector2 PointWorldToView(const DVector2 &worldPos) const; DVector2 ScaleViewToScreen(const DVector2 &scale, double viewZ, bool pixelstretch = true) const; - SWPixelFormatDrawers *Drawers(); - private: void InitTextureMapping(); void SetupBuffer(); double BaseYaspectMul = 0.0; // yaspectmul without a forced aspect ratio - - std::unique_ptr tc_drawers; - std::unique_ptr pal_drawers; }; } diff --git a/src/swrenderer/viewport/r_walldrawer.cpp b/src/swrenderer/viewport/r_walldrawer.cpp index 8672c4cf5d..aaada06047 100644 --- a/src/swrenderer/viewport/r_walldrawer.cpp +++ b/src/swrenderer/viewport/r_walldrawer.cpp @@ -13,6 +13,7 @@ #include #include "r_walldrawer.h" +#include "swrenderer/r_renderthread.h" namespace swrenderer { @@ -23,9 +24,9 @@ namespace swrenderer dc_dest_y = y; } - void WallDrawerArgs::DrawColumn() + void WallDrawerArgs::DrawColumn(RenderThread *thread) { - (RenderViewport::Instance()->Drawers()->*wallfunc)(*this); + (thread->Drawers()->*wallfunc)(*this); } void WallDrawerArgs::SetStyle(bool masked, bool additive, fixed_t alpha) diff --git a/src/swrenderer/viewport/r_walldrawer.h b/src/swrenderer/viewport/r_walldrawer.h index 678463b131..68a3ea46f3 100644 --- a/src/swrenderer/viewport/r_walldrawer.h +++ b/src/swrenderer/viewport/r_walldrawer.h @@ -9,6 +9,8 @@ struct TriLight; namespace swrenderer { + class RenderThread; + class WallDrawerArgs : public DrawerArgs { public: @@ -28,7 +30,7 @@ namespace swrenderer bool IsMaskedDrawer() const; - void DrawColumn(); + void DrawColumn(RenderThread *thread); uint8_t *Dest() const { return dc_dest; } int DestY() const { return dc_dest_y; } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index fe345a7ab8..9a1c841cee 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -47,6 +47,7 @@ #include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/viewport/r_viewport.h" +#include "swrenderer/r_renderthread.h" #endif #include "r_data/r_translate.h" #include "doomstat.h" @@ -278,9 +279,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) int x2_i = int(x2); fixed_t xiscale_i = FLOAT2FIXED(xiscale); + static RenderThread thread; + thread.DrawQueue->ThreadedRender = false; while (x < x2_i) { - drawerargs.DrawMaskedColumn(x, iscale, img, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, !parms.masked); + drawerargs.DrawMaskedColumn(&thread, x, iscale, img, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, !parms.masked); x++; frac += xiscale_i; } @@ -1426,6 +1429,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, pt1 = pt2; pt2++; if (pt2 > npoints) pt2 = 0; } while (pt1 != botpt); + + static RenderThread thread; + thread.DrawQueue->ThreadedRender = false; // Travel down the left edge and fill it in. pt1 = toppt; @@ -1472,7 +1478,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, drawerargs.SetTextureUPos(xs_RoundToInt(tex.X * scalex)); drawerargs.SetTextureVPos(xs_RoundToInt(tex.Y * scaley)); - drawerargs.DrawSpan(); + drawerargs.DrawSpan(&thread); #endif } x += xinc; From 5105a5d25428424adaeab5229023ff98721542fd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 4 Feb 2017 13:43:39 +0100 Subject: [PATCH 811/912] Fix msvc compile error --- src/swrenderer/drawers/r_draw.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 48185728b3..66843cc4ee 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -2,6 +2,7 @@ #pragma once #include "r_defs.h" +#include struct FSWColormap; struct FLightNode; From 5bae06a3c6503fdf969aba2b891d379b72f14886 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 4 Feb 2017 14:00:21 +0100 Subject: [PATCH 812/912] Make RenderScene the work dispatching class --- src/polyrenderer/poly_renderer.cpp | 2 +- src/swrenderer/drawers/r_thread.cpp | 2 +- src/swrenderer/r_renderthread.cpp | 4 +-- src/swrenderer/r_renderthread.h | 5 ++-- src/swrenderer/r_swrenderer.cpp | 16 +++++----- src/swrenderer/r_swrenderer.h | 4 +-- src/swrenderer/scene/r_scene.cpp | 46 ++++++++++++++--------------- src/swrenderer/scene/r_scene.h | 7 +++-- src/v_draw.cpp | 4 +-- 9 files changed, 47 insertions(+), 43 deletions(-) diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index 5c00ef4fbd..77b59ac4bc 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -50,7 +50,7 @@ PolyRenderer *PolyRenderer::Instance() return &scene; } -PolyRenderer::PolyRenderer() +PolyRenderer::PolyRenderer() : Thread(nullptr) { } diff --git a/src/swrenderer/drawers/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp index 367783b565..adcee27685 100644 --- a/src/swrenderer/drawers/r_thread.cpp +++ b/src/swrenderer/drawers/r_thread.cpp @@ -282,5 +282,5 @@ DrawerCommandQueue::DrawerCommandQueue(swrenderer::RenderThread *renderthread) : void *DrawerCommandQueue::AllocMemory(size_t size) { - return renderthread->FrameMemory->AllocMemory(size); + return renderthread->FrameMemory->AllocMemory((int)size); } diff --git a/src/swrenderer/r_renderthread.cpp b/src/swrenderer/r_renderthread.cpp index dd14c8902f..53b5f62f83 100644 --- a/src/swrenderer/r_renderthread.cpp +++ b/src/swrenderer/r_renderthread.cpp @@ -56,8 +56,9 @@ namespace swrenderer { - RenderThread::RenderThread() + RenderThread::RenderThread(RenderScene *scene) { + Scene = scene; FrameMemory = std::make_unique(); DrawQueue = std::make_shared(this); OpaquePass = std::make_unique(this); @@ -67,7 +68,6 @@ namespace swrenderer Clip3DFloors = std::make_unique(this); PlayerSprites = std::make_unique(this); PlaneList = std::make_unique(this); - Scene = std::make_unique(this); DrawSegments = std::make_unique(this); ClipSegments = std::make_unique(); tc_drawers = std::make_unique(DrawQueue); diff --git a/src/swrenderer/r_renderthread.h b/src/swrenderer/r_renderthread.h index 809fe080d9..1d3792fc1d 100644 --- a/src/swrenderer/r_renderthread.h +++ b/src/swrenderer/r_renderthread.h @@ -47,9 +47,11 @@ namespace swrenderer class RenderThread { public: - RenderThread(); + RenderThread(RenderScene *scene); ~RenderThread(); + RenderScene *Scene; + std::unique_ptr FrameMemory; std::unique_ptr OpaquePass; std::unique_ptr TranslucentPass; @@ -58,7 +60,6 @@ namespace swrenderer std::unique_ptr Clip3DFloors; std::unique_ptr PlayerSprites; std::unique_ptr PlaneList; - std::unique_ptr Scene; std::unique_ptr DrawSegments; std::unique_ptr ClipSegments; DrawerCommandQueuePtr DrawQueue; diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index a12a5ae003..561f0de430 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -88,7 +88,7 @@ void FSoftwareRenderer::Init() { gl_ParseDefs(); - mMainThread.Scene->Init(); + mScene.Init(); } bool FSoftwareRenderer::UsesColormap() const @@ -178,7 +178,7 @@ void FSoftwareRenderer::RenderView(player_t *player) if (r_polyrenderer) PolyRenderer::Instance()->RenderView(player); else - mMainThread.Scene->RenderView(player); + mScene.RenderView(player); FCanvasTextureInfo::UpdateAll(); } @@ -202,7 +202,7 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FileWriter *file, int wi if (r_polyrenderer) PolyRenderer::Instance()->RenderViewToCanvas(player->mo, pic, 0, 0, width, height, true); else - mMainThread.Scene->RenderViewToCanvas (player->mo, pic, 0, 0, width, height); + mScene.RenderViewToCanvas (player->mo, pic, 0, 0, width, height); screen->GetFlashedPalette (palette); M_CreatePNG (file, pic->GetBuffer(), palette, SS_PAL, width, height, pic->GetPitch()); pic->Unlock (); @@ -215,7 +215,7 @@ void FSoftwareRenderer::DrawRemainingPlayerSprites() { if (!r_polyrenderer) { - mMainThread.PlayerSprites->RenderRemaining(); + mScene.MainThread()->PlayerSprites->RenderRemaining(); } else { @@ -237,12 +237,12 @@ bool FSoftwareRenderer::RequireGLNodes() void FSoftwareRenderer::OnModeSet () { - mMainThread.Scene->ScreenResized(); + mScene.ScreenResized(); } void FSoftwareRenderer::SetClearColor(int color) { - mMainThread.Scene->SetClearColor(color); + mScene.SetClearColor(color); } void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) @@ -262,7 +262,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin if (r_polyrenderer) PolyRenderer::Instance()->RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); else - mMainThread.Scene->RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); + mScene.RenderViewToCanvas(viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); R_SetFOV (savedfov); @@ -319,7 +319,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin sector_t *FSoftwareRenderer::FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel) { - return mMainThread.OpaquePass->FakeFlat(sec, tempsec, floorlightlevel, ceilinglightlevel, nullptr, 0, 0, 0, 0); + return mScene.MainThread()->OpaquePass->FakeFlat(sec, tempsec, floorlightlevel, ceilinglightlevel, nullptr, 0, 0, 0, 0); } void FSoftwareRenderer::StateChanged(AActor *actor) diff --git a/src/swrenderer/r_swrenderer.h b/src/swrenderer/r_swrenderer.h index 9cdc0c9beb..6d93738793 100644 --- a/src/swrenderer/r_swrenderer.h +++ b/src/swrenderer/r_swrenderer.h @@ -2,7 +2,7 @@ #pragma once #include "r_renderer.h" -#include "r_renderthread.h" +#include "swrenderer/scene/r_scene.h" struct FSoftwareRenderer : public FRenderer { @@ -43,5 +43,5 @@ struct FSoftwareRenderer : public FRenderer private: void PrecacheTexture(FTexture *tex, int cache); - swrenderer::RenderThread mMainThread; + swrenderer::RenderScene mScene; }; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 371936a699..d784efb4ab 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -56,9 +56,9 @@ namespace swrenderer { cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; - RenderScene::RenderScene(RenderThread *thread) + RenderScene::RenderScene() { - Thread = thread; + Threads.push_back(std::make_unique(this)); } void RenderScene::SetClearColor(int color) @@ -98,10 +98,10 @@ namespace swrenderer // Apply special colormap if the target cannot do it if (CameraLight::Instance()->ShaderColormap() && viewport->RenderTarget->IsBgra() && !(r_shadercolormaps && screen->Accel2D)) { - Thread->DrawQueue->Push(CameraLight::Instance()->ShaderColormap(), screen); + MainThread()->DrawQueue->Push(CameraLight::Instance()->ShaderColormap(), screen); } - DrawerThreads::Execute({ Thread->DrawQueue }); + DrawerThreads::Execute({ MainThread()->DrawQueue }); } void RenderScene::RenderActorView(AActor *actor, bool dontmaplines) @@ -111,9 +111,9 @@ namespace swrenderer MaskedCycles.Reset(); WallScanCycles.Reset(); - Thread->FrameMemory->Clear(); + MainThread()->FrameMemory->Clear(); - Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = MainThread()->Clip3DFloors.get(); clip3d->Cleanup(); clip3d->ResetClip(); // reset clips (floor/ceiling) @@ -121,25 +121,25 @@ namespace swrenderer CameraLight::Instance()->SetCamera(actor); RenderViewport::Instance()->SetupFreelook(); - Thread->Portal->CopyStackedViewParameters(); + MainThread()->Portal->CopyStackedViewParameters(); // Clear buffers. - Thread->ClipSegments->Clear(0, viewwidth); - Thread->DrawSegments->Clear(); - Thread->PlaneList->Clear(); - Thread->TranslucentPass->Clear(); + MainThread()->ClipSegments->Clear(0, viewwidth); + MainThread()->DrawSegments->Clear(); + MainThread()->PlaneList->Clear(); + MainThread()->TranslucentPass->Clear(); // opening / clipping determination - Thread->OpaquePass->ClearClip(); + MainThread()->OpaquePass->ClearClip(); NetUpdate(); - Thread->Portal->SetMainPortal(); + MainThread()->Portal->SetMainPortal(); this->dontmaplines = dontmaplines; // [RH] Hack to make windows into underwater areas possible - Thread->OpaquePass->ResetFakingUnderwater(); + MainThread()->OpaquePass->ResetFakingUnderwater(); // [RH] Setup particles for this frame P_FindParticleSubsectors(); @@ -153,8 +153,8 @@ namespace swrenderer } // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function PO_LinkToSubsectors(); - Thread->OpaquePass->RenderScene(); - Thread->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) + MainThread()->OpaquePass->RenderScene(); + MainThread()->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) camera->renderflags = savedflags; WallCycles.Unclock(); @@ -163,16 +163,16 @@ namespace swrenderer if (viewactive) { PlaneCycles.Clock(); - Thread->PlaneList->Render(); - Thread->Portal->RenderPlanePortals(); + MainThread()->PlaneList->Render(); + MainThread()->Portal->RenderPlanePortals(); PlaneCycles.Unclock(); - Thread->Portal->RenderLinePortals(); + MainThread()->Portal->RenderLinePortals(); NetUpdate(); MaskedCycles.Clock(); - Thread->TranslucentPass->Render(); + MainThread()->TranslucentPass->Render(); MaskedCycles.Unclock(); NetUpdate(); @@ -204,7 +204,7 @@ namespace swrenderer RenderActorView(actor, dontmaplines); - DrawerThreads::Execute({ Thread->DrawQueue }); + DrawerThreads::Execute({ MainThread()->DrawQueue }); viewport->RenderTarget = screen; @@ -241,8 +241,8 @@ namespace swrenderer void RenderScene::Deinit() { - Thread->TranslucentPass->Deinit(); - Thread->Clip3DFloors->Cleanup(); + MainThread()->TranslucentPass->Deinit(); + MainThread()->Clip3DFloors->Cleanup(); } ///////////////////////////////////////////////////////////////////////// diff --git a/src/swrenderer/scene/r_scene.h b/src/swrenderer/scene/r_scene.h index 231fd7a4a1..a592ae1d3e 100644 --- a/src/swrenderer/scene/r_scene.h +++ b/src/swrenderer/scene/r_scene.h @@ -14,6 +14,7 @@ #pragma once #include +#include #include "r_defs.h" #include "d_player.h" @@ -28,7 +29,7 @@ namespace swrenderer class RenderScene { public: - RenderScene(RenderThread *thread); + RenderScene(); void Init(); void ScreenResized(); @@ -41,12 +42,14 @@ namespace swrenderer bool DontMapLines() const { return dontmaplines; } - RenderThread *Thread = nullptr; + RenderThread *MainThread() { return Threads.front().get(); } private: void RenderActorView(AActor *actor, bool dontmaplines = false); bool dontmaplines = false; int clearcolor = 0; + + std::vector> Threads; }; } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 9a1c841cee..fd00f82412 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -279,7 +279,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) int x2_i = int(x2); fixed_t xiscale_i = FLOAT2FIXED(xiscale); - static RenderThread thread; + static RenderThread thread(nullptr); thread.DrawQueue->ThreadedRender = false; while (x < x2_i) { @@ -1430,7 +1430,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, pt2++; if (pt2 > npoints) pt2 = 0; } while (pt1 != botpt); - static RenderThread thread; + static RenderThread thread(nullptr); thread.DrawQueue->ThreadedRender = false; // Travel down the left edge and fill it in. From f50532af8a4827811a83633846598a94b8d89327 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 4 Feb 2017 14:20:58 +0100 Subject: [PATCH 813/912] Reorder scene setup to do the stuff that can't be threaded first --- src/swrenderer/r_renderthread.h | 2 ++ src/swrenderer/scene/r_scene.cpp | 51 ++++++++++++++++---------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/swrenderer/r_renderthread.h b/src/swrenderer/r_renderthread.h index 1d3792fc1d..8d60de5c7d 100644 --- a/src/swrenderer/r_renderthread.h +++ b/src/swrenderer/r_renderthread.h @@ -51,6 +51,8 @@ namespace swrenderer ~RenderThread(); RenderScene *Scene; + int X1 = 0; + int X2 = MAXWIDTH; std::unique_ptr FrameMemory; std::unique_ptr OpaquePass; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index d784efb4ab..8c6ee71981 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -111,51 +111,48 @@ namespace swrenderer MaskedCycles.Reset(); WallScanCycles.Reset(); - MainThread()->FrameMemory->Clear(); - - Clip3DFloors *clip3d = MainThread()->Clip3DFloors.get(); - clip3d->Cleanup(); - clip3d->ResetClip(); // reset clips (floor/ceiling) - R_SetupFrame(actor); CameraLight::Instance()->SetCamera(actor); RenderViewport::Instance()->SetupFreelook(); - MainThread()->Portal->CopyStackedViewParameters(); - - // Clear buffers. - MainThread()->ClipSegments->Clear(0, viewwidth); - MainThread()->DrawSegments->Clear(); - MainThread()->PlaneList->Clear(); - MainThread()->TranslucentPass->Clear(); - - // opening / clipping determination - MainThread()->OpaquePass->ClearClip(); - NetUpdate(); - MainThread()->Portal->SetMainPortal(); - this->dontmaplines = dontmaplines; - // [RH] Hack to make windows into underwater areas possible - MainThread()->OpaquePass->ResetFakingUnderwater(); - // [RH] Setup particles for this frame P_FindParticleSubsectors(); - WallCycles.Clock(); + // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function + PO_LinkToSubsectors(); + ActorRenderFlags savedflags = camera->renderflags; // Never draw the player unless in chasecam mode if (!r_showviewer) { camera->renderflags |= RF_INVISIBLE; } - // Link the polyobjects right before drawing the scene to reduce the amounts of calls to this function - PO_LinkToSubsectors(); + + MainThread()->FrameMemory->Clear(); + MainThread()->Clip3DFloors->Cleanup(); + MainThread()->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) + MainThread()->Portal->CopyStackedViewParameters(); + MainThread()->ClipSegments->Clear(0, viewwidth); + MainThread()->DrawSegments->Clear(); + MainThread()->PlaneList->Clear(); + MainThread()->TranslucentPass->Clear(); + MainThread()->OpaquePass->ClearClip(); + MainThread()->OpaquePass->ResetFakingUnderwater(); // [RH] Hack to make windows into underwater areas possible + MainThread()->Portal->SetMainPortal(); + + // Cull things outside the range seen by this thread + if (MainThread()->X1 > 0) + MainThread()->ClipSegments->Clip(0, MainThread()->X1, true, [](int, int) { return true; }); + if (MainThread()->X2 < viewwidth) + MainThread()->ClipSegments->Clip(MainThread()->X2, viewwidth, true, [](int, int) { return true; }); + + WallCycles.Clock(); MainThread()->OpaquePass->RenderScene(); MainThread()->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) - camera->renderflags = savedflags; WallCycles.Unclock(); NetUpdate(); @@ -177,6 +174,8 @@ namespace swrenderer NetUpdate(); } + + camera->renderflags = savedflags; interpolator.RestoreInterpolations(); // If we don't want shadered colormaps, NULL it now so that the From d9e545a519840c33a0b55dd58027a9b838f48188 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 4 Feb 2017 15:51:54 +0100 Subject: [PATCH 814/912] Hook up thread slices and eliminate some statics hiding in misc functions --- src/swrenderer/line/r_line.cpp | 7 +- src/swrenderer/line/r_line.h | 7 +- src/swrenderer/line/r_walldraw.cpp | 25 ++-- src/swrenderer/r_renderthread.cpp | 3 +- src/swrenderer/r_renderthread.h | 9 +- src/swrenderer/scene/r_scene.cpp | 151 +++++++++++++++------- src/swrenderer/scene/r_scene.h | 3 + src/swrenderer/segments/r_clipsegment.cpp | 10 +- src/swrenderer/segments/r_clipsegment.h | 9 +- src/swrenderer/things/r_playersprite.cpp | 1 - src/swrenderer/things/r_playersprite.h | 1 + src/swrenderer/things/r_visiblesprite.cpp | 4 +- 12 files changed, 154 insertions(+), 76 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 17a4758621..ddecf5879a 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -64,7 +64,6 @@ namespace swrenderer void SWRenderLine::Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, VisiblePlane *linefloorplane, VisiblePlane *lineceilingplane, bool infog, FDynamicColormap *colormap) { - static sector_t tempsec; // killough 3/8/98: ceiling/water hack bool solid; DVector2 pt1, pt2; @@ -284,11 +283,7 @@ namespace swrenderer rw_floorstat = wallbottom.Project(frontsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); } - static SWRenderLine *self = this; - bool visible = Thread->ClipSegments->Clip(WallC.sx1, WallC.sx2, solid, [](int x1, int x2) -> bool - { - return self->RenderWallSegment(x1, x2); - }); + bool visible = Thread->ClipSegments->Clip(WallC.sx1, WallC.sx2, solid, this); if (visible) { diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 75b6d69b34..ef38e9e65f 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -15,6 +15,7 @@ #include "vectors.h" #include "r_wallsetup.h" +#include "swrenderer/segments/r_clipsegment.h" struct seg_t; struct subsector_t; @@ -48,7 +49,7 @@ namespace swrenderer void InitFromLine(RenderThread *thread, const DVector2 &left, const DVector2 &right); }; - class SWRenderLine + class SWRenderLine : VisibleSegmentRenderer { public: SWRenderLine(RenderThread *thread); @@ -57,7 +58,7 @@ namespace swrenderer RenderThread *Thread = nullptr; private: - bool RenderWallSegment(int x1, int x2); + bool RenderWallSegment(int x1, int x2) override; void SetWallVariables(bool needlights); void RenderWallSegmentTextures(int x1, int x2); @@ -133,5 +134,7 @@ namespace swrenderer ProjectedWallLine wallupper; ProjectedWallLine walllower; ProjectedWallTexcoords walltexcoords; + + sector_t tempsec; // killough 3/8/98: ceiling/water hack }; } diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 9a07198e51..ea165ab045 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -41,6 +41,7 @@ #include "swrenderer/line/r_walldraw.h" #include "swrenderer/line/r_wallsetup.h" #include "swrenderer/r_renderthread.h" +#include "swrenderer/r_memory.h" namespace swrenderer { @@ -188,14 +189,22 @@ namespace swrenderer drawerargs.dc_viewpos.Z = (float)((viewport->CenterY - y1 - 0.5) / viewport->InvZtoScale * zcol); drawerargs.dc_viewpos_step.Z = (float)(-zcol / viewport->InvZtoScale); - static TriLight lightbuffer[64 * 1024]; - static int nextlightindex = 0; + // Calculate max lights that can touch column so we can allocate memory for the list + int max_lights = 0; + FLightNode *cur_node = light_list; + while (cur_node) + { + if (!(cur_node->lightsource->flags2&MF2_DORMANT)) + max_lights++; + cur_node = cur_node->nextLight; + } + + drawerargs.dc_num_lights = 0; + drawerargs.dc_lights = Thread->FrameMemory->AllocMemory(max_lights); // Setup lights for column - drawerargs.dc_num_lights = 0; - drawerargs.dc_lights = lightbuffer + nextlightindex; - FLightNode *cur_node = light_list; - while (cur_node && nextlightindex < 64 * 1024) + cur_node = light_list; + while (cur_node) { if (!(cur_node->lightsource->flags2&MF2_DORMANT)) { @@ -220,7 +229,6 @@ namespace swrenderer uint32_t green = cur_node->lightsource->GetGreen(); uint32_t blue = cur_node->lightsource->GetBlue(); - nextlightindex++; auto &light = drawerargs.dc_lights[drawerargs.dc_num_lights++]; light.x = lconstant; light.y = nlconstant; @@ -232,9 +240,6 @@ namespace swrenderer cur_node = cur_node->nextLight; } - - if (nextlightindex == 64 * 1024) - nextlightindex = 0; } else { diff --git a/src/swrenderer/r_renderthread.cpp b/src/swrenderer/r_renderthread.cpp index 53b5f62f83..ba48993ca8 100644 --- a/src/swrenderer/r_renderthread.cpp +++ b/src/swrenderer/r_renderthread.cpp @@ -56,9 +56,10 @@ namespace swrenderer { - RenderThread::RenderThread(RenderScene *scene) + RenderThread::RenderThread(RenderScene *scene, bool mainThread) { Scene = scene; + MainThread = mainThread; FrameMemory = std::make_unique(); DrawQueue = std::make_shared(this); OpaquePass = std::make_unique(this); diff --git a/src/swrenderer/r_renderthread.h b/src/swrenderer/r_renderthread.h index 8d60de5c7d..8a4e8c54bf 100644 --- a/src/swrenderer/r_renderthread.h +++ b/src/swrenderer/r_renderthread.h @@ -47,12 +47,13 @@ namespace swrenderer class RenderThread { public: - RenderThread(RenderScene *scene); + RenderThread(RenderScene *scene, bool mainThread = true); ~RenderThread(); RenderScene *Scene; int X1 = 0; int X2 = MAXWIDTH; + bool MainThread = false; std::unique_ptr FrameMemory; std::unique_ptr OpaquePass; @@ -65,7 +66,11 @@ namespace swrenderer std::unique_ptr DrawSegments; std::unique_ptr ClipSegments; DrawerCommandQueuePtr DrawQueue; - + + // VisibleSprite working buffers + short clipbot[MAXWIDTH]; + short cliptop[MAXWIDTH]; + SWPixelFormatDrawers *Drawers(); private: diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 8c6ee71981..16a73ec912 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -52,6 +52,8 @@ EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, r_clearbuffer) +CVAR(Bool, r_scene_multithreaded, false, 0); + namespace swrenderer { cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; @@ -100,8 +102,19 @@ namespace swrenderer { MainThread()->DrawQueue->Push(CameraLight::Instance()->ShaderColormap(), screen); } - - DrawerThreads::Execute({ MainThread()->DrawQueue }); + + RenderDrawQueues(); + } + + void RenderScene::RenderDrawQueues() + { + // Use reverse order so main thread is drawn last + std::vector queues; + for (auto it = Threads.rbegin(); it != Threads.rend(); ++it) + { + queues.push_back((*it)->DrawQueue); + } + DrawerThreads::Execute(queues); } void RenderScene::RenderActorView(AActor *actor, bool dontmaplines) @@ -132,48 +145,7 @@ namespace swrenderer camera->renderflags |= RF_INVISIBLE; } - MainThread()->FrameMemory->Clear(); - MainThread()->Clip3DFloors->Cleanup(); - MainThread()->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) - MainThread()->Portal->CopyStackedViewParameters(); - MainThread()->ClipSegments->Clear(0, viewwidth); - MainThread()->DrawSegments->Clear(); - MainThread()->PlaneList->Clear(); - MainThread()->TranslucentPass->Clear(); - MainThread()->OpaquePass->ClearClip(); - MainThread()->OpaquePass->ResetFakingUnderwater(); // [RH] Hack to make windows into underwater areas possible - MainThread()->Portal->SetMainPortal(); - - // Cull things outside the range seen by this thread - if (MainThread()->X1 > 0) - MainThread()->ClipSegments->Clip(0, MainThread()->X1, true, [](int, int) { return true; }); - if (MainThread()->X2 < viewwidth) - MainThread()->ClipSegments->Clip(MainThread()->X2, viewwidth, true, [](int, int) { return true; }); - - WallCycles.Clock(); - MainThread()->OpaquePass->RenderScene(); - MainThread()->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) - WallCycles.Unclock(); - - NetUpdate(); - - if (viewactive) - { - PlaneCycles.Clock(); - MainThread()->PlaneList->Render(); - MainThread()->Portal->RenderPlanePortals(); - PlaneCycles.Unclock(); - - MainThread()->Portal->RenderLinePortals(); - - NetUpdate(); - - MaskedCycles.Clock(); - MainThread()->TranslucentPass->Render(); - MaskedCycles.Unclock(); - - NetUpdate(); - } + RenderThreadSlices(); camera->renderflags = savedflags; interpolator.RestoreInterpolations(); @@ -186,6 +158,94 @@ namespace swrenderer } } + void RenderScene::RenderThreadSlices() + { + int numThreads = r_scene_multithreaded ? 8 : 1; + + while (Threads.size() > (size_t)numThreads) + { + Threads.pop_back(); + } + + while (Threads.size() < (size_t)numThreads) + { + Threads.push_back(std::make_unique(this)); + } + + for (int i = 0; i < numThreads; i++) + { + Threads[i]->X1 = viewwidth * i / numThreads; + Threads[i]->X2 = viewwidth * (i + 1) / numThreads; + } + + for (int i = 0; i < numThreads; i++) + { + RenderThreadSlice(Threads[i].get()); + } + } + + void RenderScene::RenderThreadSlice(RenderThread *thread) + { + thread->FrameMemory->Clear(); + thread->Clip3DFloors->Cleanup(); + thread->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) + thread->Portal->CopyStackedViewParameters(); + thread->ClipSegments->Clear(0, viewwidth); + thread->DrawSegments->Clear(); + thread->PlaneList->Clear(); + thread->TranslucentPass->Clear(); + thread->OpaquePass->ClearClip(); + thread->OpaquePass->ResetFakingUnderwater(); // [RH] Hack to make windows into underwater areas possible + thread->Portal->SetMainPortal(); + + // Cull things outside the range seen by this thread + VisibleSegmentRenderer visitor; + if (thread->X1 > 0) + thread->ClipSegments->Clip(0, thread->X1, true, &visitor); + if (thread->X2 < viewwidth) + thread->ClipSegments->Clip(thread->X2, viewwidth, true, &visitor); + + if (thread->MainThread) + WallCycles.Clock(); + + thread->OpaquePass->RenderScene(); + thread->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) + + if (thread == MainThread()) + WallCycles.Unclock(); + + if (thread->MainThread) + NetUpdate(); + + if (viewactive) + { + if (thread->MainThread) + PlaneCycles.Clock(); + + thread->PlaneList->Render(); + thread->Portal->RenderPlanePortals(); + + if (thread->MainThread) + PlaneCycles.Unclock(); + + thread->Portal->RenderLinePortals(); + + if (thread->MainThread) + NetUpdate(); + + if (thread->MainThread) + MaskedCycles.Clock(); + + thread->TranslucentPass->Render(); + + if (thread->MainThread) + MaskedCycles.Unclock(); + + if (thread->MainThread) + NetUpdate(); + } + } + void RenderScene::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) { auto viewport = RenderViewport::Instance(); @@ -202,8 +262,7 @@ namespace swrenderer viewport->SetViewport(width, height, WidescreenRatio); RenderActorView(actor, dontmaplines); - - DrawerThreads::Execute({ MainThread()->DrawQueue }); + RenderDrawQueues(); viewport->RenderTarget = screen; diff --git a/src/swrenderer/scene/r_scene.h b/src/swrenderer/scene/r_scene.h index a592ae1d3e..d3053884c5 100644 --- a/src/swrenderer/scene/r_scene.h +++ b/src/swrenderer/scene/r_scene.h @@ -46,6 +46,9 @@ namespace swrenderer private: void RenderActorView(AActor *actor, bool dontmaplines = false); + void RenderDrawQueues(); + void RenderThreadSlices(); + void RenderThreadSlice(RenderThread *thread); bool dontmaplines = false; int clearcolor = 0; diff --git a/src/swrenderer/segments/r_clipsegment.cpp b/src/swrenderer/segments/r_clipsegment.cpp index 6cfaaa2a0e..4e25daa350 100644 --- a/src/swrenderer/segments/r_clipsegment.cpp +++ b/src/swrenderer/segments/r_clipsegment.cpp @@ -86,7 +86,7 @@ namespace swrenderer return true; } - bool RenderClipSegment::Clip(int first, int last, bool solid, VisibleSegmentCallback callback) + bool RenderClipSegment::Clip(int first, int last, bool solid, VisibleSegmentRenderer *visitor) { cliprange_t *next, *start; int i, j; @@ -104,7 +104,7 @@ namespace swrenderer if (last <= start->first) { // Post is entirely visible (above start). - if (!callback(first, last)) + if (!visitor->RenderWallSegment(first, last)) return true; // Insert a new clippost for solid walls. @@ -131,7 +131,7 @@ namespace swrenderer } // There is a fragment above *start. - if (callback(first, start->first) && solid) + if (visitor->RenderWallSegment(first, start->first) && solid) { start->first = first; // Adjust the clip size for solid walls } @@ -146,7 +146,7 @@ namespace swrenderer while (last >= (next + 1)->first) { // There is a fragment between two posts. - clipsegment = callback(next->last, (next + 1)->first); + clipsegment = visitor->RenderWallSegment(next->last, (next + 1)->first); next++; if (last <= next->last) @@ -158,7 +158,7 @@ namespace swrenderer } // There is a fragment after *next. - clipsegment = callback(next->last, last); + clipsegment = visitor->RenderWallSegment(next->last, last); crunch: if (!clipsegment) diff --git a/src/swrenderer/segments/r_clipsegment.h b/src/swrenderer/segments/r_clipsegment.h index b2fde462e4..48a083ba6e 100644 --- a/src/swrenderer/segments/r_clipsegment.h +++ b/src/swrenderer/segments/r_clipsegment.h @@ -17,11 +17,18 @@ namespace swrenderer { typedef bool(*VisibleSegmentCallback)(int x1, int x2); + class VisibleSegmentRenderer + { + public: + virtual ~VisibleSegmentRenderer() { } + virtual bool RenderWallSegment(int x1, int x2) { return true; } + }; + class RenderClipSegment { public: void Clear(short left, short right); - bool Clip(int x1, int x2, bool solid, VisibleSegmentCallback callback); + bool Clip(int x1, int x2, bool solid, VisibleSegmentRenderer *visitor); bool Check(int first, int last); bool IsVisible(int x1, int x2); diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 78e9975e69..0fb0b27c0a 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -78,7 +78,6 @@ namespace swrenderer DPSprite* psp; DPSprite* weapon; sector_t* sec = NULL; - static sector_t tempsec; int floorlight, ceilinglight; F3DFloor *rover; diff --git a/src/swrenderer/things/r_playersprite.h b/src/swrenderer/things/r_playersprite.h index 09274702aa..588f9f8dc6 100644 --- a/src/swrenderer/things/r_playersprite.h +++ b/src/swrenderer/things/r_playersprite.h @@ -88,5 +88,6 @@ namespace swrenderer enum { BASEYCENTER = 100 }; TArray AcceleratedSprites; + sector_t tempsec; }; } diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index ac411db573..17996b5e98 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -43,8 +43,8 @@ namespace swrenderer { void VisibleSprite::Render(RenderThread *thread) { - static short clipbot[MAXWIDTH]; - static short cliptop[MAXWIDTH]; + short *clipbot = thread->clipbot; + short *cliptop = thread->cliptop; VisibleSprite *spr = this; From 8ad132b64f3b69538b6e410780ffcfc47b777227 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 4 Feb 2017 16:45:36 +0100 Subject: [PATCH 815/912] Execute scene slices on worker threads --- src/swrenderer/r_renderthread.h | 3 ++ src/swrenderer/scene/r_scene.cpp | 85 ++++++++++++++++++++++++++++---- src/swrenderer/scene/r_scene.h | 15 ++++++ 3 files changed, 93 insertions(+), 10 deletions(-) diff --git a/src/swrenderer/r_renderthread.h b/src/swrenderer/r_renderthread.h index 8a4e8c54bf..a0945cfee4 100644 --- a/src/swrenderer/r_renderthread.h +++ b/src/swrenderer/r_renderthread.h @@ -23,6 +23,7 @@ #pragma once #include +#include class DrawerCommandQueue; typedef std::shared_ptr DrawerCommandQueuePtr; @@ -67,6 +68,8 @@ namespace swrenderer std::unique_ptr ClipSegments; DrawerCommandQueuePtr DrawQueue; + std::thread thread; + // VisibleSprite working buffers short clipbot[MAXWIDTH]; short cliptop[MAXWIDTH]; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 16a73ec912..bc4b6c5aee 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -63,6 +63,11 @@ namespace swrenderer Threads.push_back(std::make_unique(this)); } + RenderScene::~RenderScene() + { + StopThreads(); + } + void RenderScene::SetClearColor(int color) { clearcolor = color; @@ -161,26 +166,38 @@ namespace swrenderer void RenderScene::RenderThreadSlices() { int numThreads = r_scene_multithreaded ? 8 : 1; - - while (Threads.size() > (size_t)numThreads) + if (numThreads != Threads.size()) { - Threads.pop_back(); - } - - while (Threads.size() < (size_t)numThreads) - { - Threads.push_back(std::make_unique(this)); + StopThreads(); + StartThreads(numThreads); } + // Setup threads: + std::unique_lock start_lock(start_mutex); for (int i = 0; i < numThreads; i++) { Threads[i]->X1 = viewwidth * i / numThreads; Threads[i]->X2 = viewwidth * (i + 1) / numThreads; } + run_id++; + start_lock.unlock(); - for (int i = 0; i < numThreads; i++) + // Notify threads to run + if (Threads.size() > 1) { - RenderThreadSlice(Threads[i].get()); + start_condition.notify_all(); + } + + // Do the main thread ourselves: + RenderThreadSlice(MainThread()); + + // Wait for everyone to finish: + if (Threads.size() > 1) + { + std::unique_lock end_lock(end_mutex); + finished_threads++; + end_condition.wait(end_lock, [&]() { return finished_threads == Threads.size(); }); + finished_threads = 0; } } @@ -246,6 +263,54 @@ namespace swrenderer } } + void RenderScene::StartThreads(size_t numThreads) + { + while (Threads.size() < (size_t)numThreads) + { + auto thread = std::make_unique(this); + auto renderthread = thread.get(); + int start_run_id = run_id; + thread->thread = std::thread([=]() + { + int last_run_id = start_run_id; + while (true) + { + // Wait until we are signalled to run: + std::unique_lock start_lock(start_mutex); + start_condition.wait(start_lock, [&]() { return run_id != last_run_id || shutdown_flag; }); + if (shutdown_flag) + break; + last_run_id = run_id; + start_lock.unlock(); + + RenderThreadSlice(renderthread); + + // Notify main thread that we finished: + std::unique_lock end_lock(end_mutex); + finished_threads++; + end_lock.unlock(); + end_condition.notify_all(); + } + }); + Threads.push_back(std::move(thread)); + } + } + + void RenderScene::StopThreads() + { + std::unique_lock lock(start_mutex); + shutdown_flag = true; + lock.unlock(); + start_condition.notify_all(); + while (Threads.size() > 1) + { + Threads.back()->thread.join(); + Threads.pop_back(); + } + lock.lock(); + shutdown_flag = false; + } + void RenderScene::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) { auto viewport = RenderViewport::Instance(); diff --git a/src/swrenderer/scene/r_scene.h b/src/swrenderer/scene/r_scene.h index d3053884c5..d3680d422b 100644 --- a/src/swrenderer/scene/r_scene.h +++ b/src/swrenderer/scene/r_scene.h @@ -14,7 +14,11 @@ #pragma once #include +#include #include +#include +#include +#include #include "r_defs.h" #include "d_player.h" @@ -30,6 +34,7 @@ namespace swrenderer { public: RenderScene(); + ~RenderScene(); void Init(); void ScreenResized(); @@ -49,10 +54,20 @@ namespace swrenderer void RenderDrawQueues(); void RenderThreadSlices(); void RenderThreadSlice(RenderThread *thread); + + void StartThreads(size_t numThreads); + void StopThreads(); bool dontmaplines = false; int clearcolor = 0; std::vector> Threads; + std::mutex start_mutex; + std::condition_variable start_condition; + bool shutdown_flag = false; + int run_id = 0; + std::mutex end_mutex; + std::condition_variable end_condition; + size_t finished_threads = 0; }; } From cfe4c55973ef3890f9c5d8f15fd2f7943f63e8a6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 4 Feb 2017 19:13:56 +0100 Subject: [PATCH 816/912] Remove software renderer internals from F3DFloor --- src/p_3dfloors.cpp | 2 - src/p_3dfloors.h | 5 -- src/swrenderer/line/r_renderdrawsegment.cpp | 3 +- src/swrenderer/line/r_walldraw.cpp | 3 +- src/swrenderer/plane/r_visibleplane.cpp | 3 +- src/swrenderer/scene/r_3dfloors.cpp | 13 ++++ src/swrenderer/scene/r_3dfloors.h | 20 +++++- src/swrenderer/scene/r_opaque_pass.cpp | 80 ++++++++++----------- src/swrenderer/scene/r_portal.cpp | 6 +- src/swrenderer/scene/r_scene.cpp | 2 +- src/swrenderer/things/r_playersprite.cpp | 3 +- src/swrenderer/things/r_sprite.cpp | 3 +- 12 files changed, 86 insertions(+), 57 deletions(-) diff --git a/src/p_3dfloors.cpp b/src/p_3dfloors.cpp index f367c46377..76d8df3fe1 100644 --- a/src/p_3dfloors.cpp +++ b/src/p_3dfloors.cpp @@ -129,8 +129,6 @@ static void P_Add3DFloor(sector_t* sec, sector_t* sec2, line_t* master, int flag ffloor->top.copied = ffloor->bottom.copied = false; ffloor->top.model = ffloor->bottom.model = ffloor->model = sec2; ffloor->target = sec; - ffloor->ceilingclip = ffloor->floorclip = NULL; - ffloor->validcount = 0; if (!(flags&FF_THINFLOOR)) { diff --git a/src/p_3dfloors.h b/src/p_3dfloors.h index 75b8656e41..2c0dbff643 100644 --- a/src/p_3dfloors.h +++ b/src/p_3dfloors.h @@ -95,11 +95,6 @@ struct F3DFloor int lastlight; int alpha; - // kg3D - for software - short *floorclip; - short *ceilingclip; - int validcount; - FDynamicColormap *GetColormap(); void UpdateColormap(FDynamicColormap *&map); PalEntry GetBlend(); diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 85a55dc9ea..67953dcfa6 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -89,7 +89,8 @@ namespace swrenderer return; } - NetUpdate(); + if (Thread->MainThread) + NetUpdate(); frontsector = curline->frontsector; backsector = curline->backsector; diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index ea165ab045..a0e4641e43 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -362,7 +362,8 @@ namespace swrenderer Draw1Column(x, y1, y2, sampler); } - NetUpdate(); + if (Thread->MainThread) + NetUpdate(); } void RenderWallPart::ProcessNormalWall(const short *uwal, const short *dwal, double texturemid, float *swal, fixed_t *lwal) diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index a0a58c7e78..70cd4164f4 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -129,6 +129,7 @@ namespace swrenderer renderer.Render(this, xscale, yscale, alpha, additive, masked, colormap, tex); } } - NetUpdate(); + if (thread->MainThread) + NetUpdate(); } } diff --git a/src/swrenderer/scene/r_3dfloors.cpp b/src/swrenderer/scene/r_3dfloors.cpp index 69618f9f44..f49def6b38 100644 --- a/src/swrenderer/scene/r_3dfloors.cpp +++ b/src/swrenderer/scene/r_3dfloors.cpp @@ -14,6 +14,7 @@ #include "r_3dfloors.h" #include "r_utility.h" #include "swrenderer/r_renderthread.h" +#include "swrenderer/r_memory.h" CVAR(Int, r_3dfloors, true, 0); @@ -24,8 +25,20 @@ namespace swrenderer Thread = thread; } + void Clip3DFloors::SetFakeFloor(F3DFloor *newFakeFloor) + { + auto &clip = FakeFloors[newFakeFloor]; + if (clip == nullptr) + { + clip = Thread->FrameMemory->NewObject(newFakeFloor); + } + fakeFloor = clip; + } + void Clip3DFloors::Cleanup() { + FakeFloors.clear(); + fakeActive = false; fake3D = 0; while (CurrentSkybox) diff --git a/src/swrenderer/scene/r_3dfloors.h b/src/swrenderer/scene/r_3dfloors.h index f49d6c80a8..13168a80ba 100644 --- a/src/swrenderer/scene/r_3dfloors.h +++ b/src/swrenderer/scene/r_3dfloors.h @@ -2,12 +2,14 @@ #pragma once #include "p_3dfloors.h" +#include EXTERN_CVAR(Int, r_3dfloors); namespace swrenderer { class RenderThread; + class FakeFloorClip; struct HeightLevel { @@ -27,7 +29,7 @@ namespace swrenderer { short floorclip[MAXWIDTH]; short ceilingclip[MAXWIDTH]; - F3DFloor *ffloor; + FakeFloorClip *ffloor; ClipStack *next; }; @@ -51,6 +53,17 @@ namespace swrenderer FAKE3D_DOWN2UP = 8, // rendering from down to up (floors) }; + class FakeFloorClip + { + public: + FakeFloorClip(F3DFloor *fakeFloor) : fakeFloor(fakeFloor) { } + + F3DFloor *fakeFloor = nullptr; + short *floorclip = nullptr; + short *ceilingclip = nullptr; + int validcount = -1; + }; + class Clip3DFloors { public: @@ -64,12 +77,13 @@ namespace swrenderer void ResetClip(); void EnterSkybox(); void LeaveSkybox(); + void SetFakeFloor(F3DFloor *fakeFloor); RenderThread *Thread = nullptr; int fake3D = 0; - F3DFloor *fakeFloor = nullptr; + FakeFloorClip *fakeFloor = nullptr; fixed_t fakeAlpha = 0; bool fakeActive = false; double sclipBottom = 0; @@ -83,5 +97,7 @@ namespace swrenderer TArray toplist; ClipStack *clip_top = nullptr; ClipStack *clip_cur = nullptr; + + std::unordered_map FakeFloors; }; } diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 73d2824881..1ee21449ad 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -573,32 +573,32 @@ namespace swrenderer // first check all floors for (int i = 0; i < (int)frontsector->e->XFloor.ffloors.Size(); i++) { - clip3d->fakeFloor = frontsector->e->XFloor.ffloors[i]; - if (!(clip3d->fakeFloor->flags & FF_EXISTS)) continue; - if (!clip3d->fakeFloor->model) continue; - if (clip3d->fakeFloor->bottom.plane->isSlope()) continue; - if (!(clip3d->fakeFloor->flags & FF_NOSHADE) || (clip3d->fakeFloor->flags & (FF_RENDERPLANES | FF_RENDERSIDES))) + clip3d->SetFakeFloor(frontsector->e->XFloor.ffloors[i]); + if (!(clip3d->fakeFloor->fakeFloor->flags & FF_EXISTS)) continue; + if (!clip3d->fakeFloor->fakeFloor->model) continue; + if (clip3d->fakeFloor->fakeFloor->bottom.plane->isSlope()) continue; + if (!(clip3d->fakeFloor->fakeFloor->flags & FF_NOSHADE) || (clip3d->fakeFloor->fakeFloor->flags & (FF_RENDERPLANES | FF_RENDERSIDES))) { - clip3d->AddHeight(clip3d->fakeFloor->top.plane, frontsector); + clip3d->AddHeight(clip3d->fakeFloor->fakeFloor->top.plane, frontsector); } - if (!(clip3d->fakeFloor->flags & FF_RENDERPLANES)) continue; - if (clip3d->fakeFloor->alpha == 0) continue; - if (clip3d->fakeFloor->flags & FF_THISINSIDE && clip3d->fakeFloor->flags & FF_INVERTSECTOR) continue; - clip3d->fakeAlpha = MIN(Scale(clip3d->fakeFloor->alpha, OPAQUE, 255), OPAQUE); + if (!(clip3d->fakeFloor->fakeFloor->flags & FF_RENDERPLANES)) continue; + if (clip3d->fakeFloor->fakeFloor->alpha == 0) continue; + if (clip3d->fakeFloor->fakeFloor->flags & FF_THISINSIDE && clip3d->fakeFloor->fakeFloor->flags & FF_INVERTSECTOR) continue; + clip3d->fakeAlpha = MIN(Scale(clip3d->fakeFloor->fakeFloor->alpha, OPAQUE, 255), OPAQUE); if (clip3d->fakeFloor->validcount != validcount) { clip3d->fakeFloor->validcount = validcount; clip3d->NewClip(); } - double fakeHeight = clip3d->fakeFloor->top.plane->ZatPoint(frontsector->centerspot); + double fakeHeight = clip3d->fakeFloor->fakeFloor->top.plane->ZatPoint(frontsector->centerspot); if (fakeHeight < ViewPos.Z && fakeHeight > frontsector->floorplane.ZatPoint(frontsector->centerspot)) { clip3d->fake3D = FAKE3D_FAKEFLOOR; - tempsec = *clip3d->fakeFloor->model; - tempsec.floorplane = *clip3d->fakeFloor->top.plane; - tempsec.ceilingplane = *clip3d->fakeFloor->bottom.plane; - if (!(clip3d->fakeFloor->flags & FF_THISINSIDE) && !(clip3d->fakeFloor->flags & FF_INVERTSECTOR)) + tempsec = *clip3d->fakeFloor->fakeFloor->model; + tempsec.floorplane = *clip3d->fakeFloor->fakeFloor->top.plane; + tempsec.ceilingplane = *clip3d->fakeFloor->fakeFloor->bottom.plane; + if (!(clip3d->fakeFloor->fakeFloor->flags & FF_THISINSIDE) && !(clip3d->fakeFloor->fakeFloor->flags & FF_INVERTSECTOR)) { tempsec.SetTexture(sector_t::floor, tempsec.GetTexture(sector_t::ceiling)); position = sector_t::ceiling; @@ -618,7 +618,7 @@ namespace swrenderer frontsector->GetTexture(sector_t::floor), floorlightlevel + R_ActualExtraLight(foggy), // killough 3/16/98 frontsector->GetAlpha(sector_t::floor), - !!(clip3d->fakeFloor->flags & FF_ADDITIVETRANS), + !!(clip3d->fakeFloor->fakeFloor->flags & FF_ADDITIVETRANS), frontsector->planes[position].xform, frontsector->sky, nullptr, @@ -635,34 +635,34 @@ namespace swrenderer // and now ceilings for (unsigned int i = 0; i < frontsector->e->XFloor.ffloors.Size(); i++) { - clip3d->fakeFloor = frontsector->e->XFloor.ffloors[i]; - if (!(clip3d->fakeFloor->flags & FF_EXISTS)) continue; - if (!clip3d->fakeFloor->model) continue; - if (clip3d->fakeFloor->top.plane->isSlope()) continue; - if (!(clip3d->fakeFloor->flags & FF_NOSHADE) || (clip3d->fakeFloor->flags & (FF_RENDERPLANES | FF_RENDERSIDES))) + clip3d->SetFakeFloor(frontsector->e->XFloor.ffloors[i]); + if (!(clip3d->fakeFloor->fakeFloor->flags & FF_EXISTS)) continue; + if (!clip3d->fakeFloor->fakeFloor->model) continue; + if (clip3d->fakeFloor->fakeFloor->top.plane->isSlope()) continue; + if (!(clip3d->fakeFloor->fakeFloor->flags & FF_NOSHADE) || (clip3d->fakeFloor->fakeFloor->flags & (FF_RENDERPLANES | FF_RENDERSIDES))) { - clip3d->AddHeight(clip3d->fakeFloor->bottom.plane, frontsector); + clip3d->AddHeight(clip3d->fakeFloor->fakeFloor->bottom.plane, frontsector); } - if (!(clip3d->fakeFloor->flags & FF_RENDERPLANES)) continue; - if (clip3d->fakeFloor->alpha == 0) continue; - if (!(clip3d->fakeFloor->flags & FF_THISINSIDE) && (clip3d->fakeFloor->flags & (FF_SWIMMABLE | FF_INVERTSECTOR)) == (FF_SWIMMABLE | FF_INVERTSECTOR)) continue; - clip3d->fakeAlpha = MIN(Scale(clip3d->fakeFloor->alpha, OPAQUE, 255), OPAQUE); + if (!(clip3d->fakeFloor->fakeFloor->flags & FF_RENDERPLANES)) continue; + if (clip3d->fakeFloor->fakeFloor->alpha == 0) continue; + if (!(clip3d->fakeFloor->fakeFloor->flags & FF_THISINSIDE) && (clip3d->fakeFloor->fakeFloor->flags & (FF_SWIMMABLE | FF_INVERTSECTOR)) == (FF_SWIMMABLE | FF_INVERTSECTOR)) continue; + clip3d->fakeAlpha = MIN(Scale(clip3d->fakeFloor->fakeFloor->alpha, OPAQUE, 255), OPAQUE); if (clip3d->fakeFloor->validcount != validcount) { clip3d->fakeFloor->validcount = validcount; clip3d->NewClip(); } - double fakeHeight = clip3d->fakeFloor->bottom.plane->ZatPoint(frontsector->centerspot); + double fakeHeight = clip3d->fakeFloor->fakeFloor->bottom.plane->ZatPoint(frontsector->centerspot); if (fakeHeight > ViewPos.Z && fakeHeight < frontsector->ceilingplane.ZatPoint(frontsector->centerspot)) { clip3d->fake3D = FAKE3D_FAKECEILING; - tempsec = *clip3d->fakeFloor->model; - tempsec.floorplane = *clip3d->fakeFloor->top.plane; - tempsec.ceilingplane = *clip3d->fakeFloor->bottom.plane; - if ((!(clip3d->fakeFloor->flags & FF_THISINSIDE) && !(clip3d->fakeFloor->flags & FF_INVERTSECTOR)) || - (clip3d->fakeFloor->flags & FF_THISINSIDE && clip3d->fakeFloor->flags & FF_INVERTSECTOR)) + tempsec = *clip3d->fakeFloor->fakeFloor->model; + tempsec.floorplane = *clip3d->fakeFloor->fakeFloor->top.plane; + tempsec.ceilingplane = *clip3d->fakeFloor->fakeFloor->bottom.plane; + if ((!(clip3d->fakeFloor->fakeFloor->flags & FF_THISINSIDE) && !(clip3d->fakeFloor->fakeFloor->flags & FF_INVERTSECTOR)) || + (clip3d->fakeFloor->fakeFloor->flags & FF_THISINSIDE && clip3d->fakeFloor->fakeFloor->flags & FF_INVERTSECTOR)) { tempsec.SetTexture(sector_t::ceiling, tempsec.GetTexture(sector_t::floor)); position = sector_t::floor; @@ -684,7 +684,7 @@ namespace swrenderer frontsector->GetTexture(sector_t::ceiling), ceilinglightlevel + R_ActualExtraLight(foggy), // killough 4/11/98 frontsector->GetAlpha(sector_t::ceiling), - !!(clip3d->fakeFloor->flags & FF_ADDITIVETRANS), + !!(clip3d->fakeFloor->fakeFloor->flags & FF_ADDITIVETRANS), frontsector->planes[position].xform, frontsector->sky, nullptr, @@ -741,14 +741,14 @@ namespace swrenderer Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); for (unsigned int i = 0; i < line->backsector->e->XFloor.ffloors.Size(); i++) { - clip3d->fakeFloor = line->backsector->e->XFloor.ffloors[i]; - if (!(clip3d->fakeFloor->flags & FF_EXISTS)) continue; - if (!(clip3d->fakeFloor->flags & FF_RENDERPLANES)) continue; - if (!clip3d->fakeFloor->model) continue; + clip3d->SetFakeFloor(line->backsector->e->XFloor.ffloors[i]); + if (!(clip3d->fakeFloor->fakeFloor->flags & FF_EXISTS)) continue; + if (!(clip3d->fakeFloor->fakeFloor->flags & FF_RENDERPLANES)) continue; + if (!clip3d->fakeFloor->fakeFloor->model) continue; clip3d->fake3D = FAKE3D_FAKEBACK; - tempsec = *clip3d->fakeFloor->model; - tempsec.floorplane = *clip3d->fakeFloor->top.plane; - tempsec.ceilingplane = *clip3d->fakeFloor->bottom.plane; + tempsec = *clip3d->fakeFloor->fakeFloor->model; + tempsec.floorplane = *clip3d->fakeFloor->fakeFloor->top.plane; + tempsec.ceilingplane = *clip3d->fakeFloor->fakeFloor->bottom.plane; if (clip3d->fakeFloor->validcount != validcount) { clip3d->fakeFloor->validcount = validcount; diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index bf804a4d13..88ccb96c0a 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -441,13 +441,15 @@ namespace swrenderer int prevuniq2 = CurrentPortalUniq; CurrentPortalUniq = prevuniq; - NetUpdate(); + if (Thread->MainThread) + NetUpdate(); MaskedCycles.Clock(); // [ZZ] count sprites in portals/mirrors along with normal ones. Thread->TranslucentPass->Render(); // this is required since with portals there often will be cases when more than 80% of the view is inside a portal. MaskedCycles.Unclock(); - NetUpdate(); + if (Thread->MainThread) + NetUpdate(); Thread->Clip3DFloors->LeaveSkybox(); // pop 3D floor height map CurrentPortalUniq = prevuniq2; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index bc4b6c5aee..eeab47a7d5 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -267,7 +267,7 @@ namespace swrenderer { while (Threads.size() < (size_t)numThreads) { - auto thread = std::make_unique(this); + auto thread = std::make_unique(this, false); auto renderthread = thread.get(); int start_run_id = run_id; thread->thread = std::thread([=]() diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 0fb0b27c0a..a2e8acec30 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -630,6 +630,7 @@ namespace swrenderer frac += xiscale; } - NetUpdate(); + if (thread->MainThread) + NetUpdate(); } } diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 4fce83fc84..1da377c7df 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -298,6 +298,7 @@ namespace swrenderer } } - NetUpdate(); + if (thread->MainThread) + NetUpdate(); } } From beae9a3dc7ebeea0bce6a967ccedc7e5f2b33706 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 4 Feb 2017 20:32:06 +0100 Subject: [PATCH 817/912] Stop using validcount for sprites --- src/swrenderer/scene/r_opaque_pass.cpp | 14 ++++++++++---- src/swrenderer/scene/r_opaque_pass.h | 3 +++ src/swrenderer/scene/r_scene.cpp | 16 ++++++++++++---- src/swrenderer/things/r_visiblesprite.cpp | 4 ++++ 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 1ee21449ad..cf4155c4f3 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -773,6 +773,9 @@ namespace swrenderer void RenderOpaquePass::RenderScene() { + SeenSpriteSectors.clear(); + SeenActors.clear(); + InSubsector = nullptr; RenderBSPNode(nodes + numnodes - 1); // The head node is the last node output. } @@ -824,11 +827,12 @@ namespace swrenderer // A sector might have been split into several // subsectors during BSP building. // Thus we check whether it was already added. - if (sec->touching_renderthings == nullptr || sec->validcount == validcount) + if (sec->touching_renderthings == nullptr || SeenSpriteSectors.find(sec) != SeenSpriteSectors.end()/*|| sec->validcount == validcount*/) return; // Well, now it will be done. - sec->validcount = validcount; + //sec->validcount = validcount; + SeenSpriteSectors.insert(sec); int spriteshade = LIGHT2SHADE(lightlevel + R_ActualExtraLight(foggy)); @@ -836,8 +840,10 @@ namespace swrenderer for (auto p = sec->touching_renderthings; p != nullptr; p = p->m_snext) { auto thing = p->m_thing; - if (thing->validcount == validcount) continue; - thing->validcount = validcount; + if (SeenActors.find(thing) != SeenActors.end()) continue; + SeenActors.insert(thing); + //if (thing->validcount == validcount) continue; + //thing->validcount = validcount; FIntCVar *cvar = thing->GetClass()->distancecheck; if (cvar != nullptr && *cvar >= 0) diff --git a/src/swrenderer/scene/r_opaque_pass.h b/src/swrenderer/scene/r_opaque_pass.h index fcb6a58595..6f0f065812 100644 --- a/src/swrenderer/scene/r_opaque_pass.h +++ b/src/swrenderer/scene/r_opaque_pass.h @@ -18,6 +18,7 @@ #include "r_defs.h" #include "swrenderer/line/r_line.h" #include "swrenderer/scene/r_3dfloors.h" +#include struct FVoxelDef; @@ -84,5 +85,7 @@ namespace swrenderer bool r_fakingunderwater = false; SWRenderLine renderline; + std::set SeenSpriteSectors; + std::set SeenActors; }; } diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index eeab47a7d5..cdfcc8d82f 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -106,9 +106,8 @@ namespace swrenderer if (CameraLight::Instance()->ShaderColormap() && viewport->RenderTarget->IsBgra() && !(r_shadercolormaps && screen->Accel2D)) { MainThread()->DrawQueue->Push(CameraLight::Instance()->ShaderColormap(), screen); + RenderDrawQueues(); } - - RenderDrawQueues(); } void RenderScene::RenderDrawQueues() @@ -120,6 +119,9 @@ namespace swrenderer queues.push_back((*it)->DrawQueue); } DrawerThreads::Execute(queues); + + //using namespace std::chrono_literals; + //std::this_thread::sleep_for(0.5s); } void RenderScene::RenderActorView(AActor *actor, bool dontmaplines) @@ -151,6 +153,7 @@ namespace swrenderer } RenderThreadSlices(); + RenderDrawQueues(); camera->renderflags = savedflags; interpolator.RestoreInterpolations(); @@ -165,7 +168,13 @@ namespace swrenderer void RenderScene::RenderThreadSlices() { - int numThreads = r_scene_multithreaded ? 8 : 1; + int numThreads = std::thread::hardware_concurrency(); + if (numThreads == 0) + numThreads = 4; + + if (!r_scene_multithreaded) + numThreads = 1; + if (numThreads != Threads.size()) { StopThreads(); @@ -327,7 +336,6 @@ namespace swrenderer viewport->SetViewport(width, height, WidescreenRatio); RenderActorView(actor, dontmaplines); - RenderDrawQueues(); viewport->RenderTarget = screen; diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 17996b5e98..1d9111e74c 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -77,6 +77,10 @@ namespace swrenderer if (x1 >= x2) return; + // Reject sprites outside the slice rendered by the thread + if (x2 < thread->X1 || x1 > thread->X2) + return; + // [RH] Sprites split behind a one-sided line can also be discarded. if (spr->sector == nullptr) return; From d1cbb76b6310251df2a76bf0d0d7bf4c0ace599b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 4 Feb 2017 21:00:05 +0100 Subject: [PATCH 818/912] Only render player sprites on main thread --- src/swrenderer/scene/r_scene.cpp | 2 ++ src/swrenderer/scene/r_translucent_pass.cpp | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index cdfcc8d82f..18c5137df1 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -48,6 +48,7 @@ #include "swrenderer/drawers/r_thread.h" #include "swrenderer/r_memory.h" #include "swrenderer/r_renderthread.h" +#include "swrenderer/things/r_playersprite.h" EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, r_clearbuffer) @@ -153,6 +154,7 @@ namespace swrenderer } RenderThreadSlices(); + MainThread()->PlayerSprites->Render(); RenderDrawQueues(); camera->renderflags = savedflags; diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index d102d6c187..d4bbb16820 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -28,7 +28,6 @@ #include "swrenderer/things/r_particle.h" #include "swrenderer/things/r_sprite.h" #include "swrenderer/things/r_wallsprite.h" -#include "swrenderer/things/r_playersprite.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_portal.h" #include "swrenderer/scene/r_translucent_pass.h" @@ -213,7 +212,5 @@ namespace swrenderer clip3d->DeleteHeights(); clip3d->fake3D = 0; } - - Thread->PlayerSprites->Render(); } } From bf6ab1efc8cdc5cb85a268726b289dd8bb3ab167 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 4 Feb 2017 21:55:56 +0100 Subject: [PATCH 819/912] Clip sprites by thread slice --- src/swrenderer/viewport/r_spritedrawer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/swrenderer/viewport/r_spritedrawer.cpp b/src/swrenderer/viewport/r_spritedrawer.cpp index 2796f26bf6..441eedc0a6 100644 --- a/src/swrenderer/viewport/r_spritedrawer.cpp +++ b/src/swrenderer/viewport/r_spritedrawer.cpp @@ -45,6 +45,9 @@ namespace swrenderer void SpriteDrawerArgs::DrawMaskedColumn(RenderThread *thread, int x, fixed_t iscale, FTexture *tex, fixed_t col, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked) { + if (x < thread->X1 || x >= thread->X2) + return; + auto viewport = RenderViewport::Instance(); // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. From 4172d70d9564bbdf1a8ed0bfefddd3cd5e790f27 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 5 Feb 2017 00:42:42 +0100 Subject: [PATCH 820/912] Make Frozen Time rendering 4 times faster by grouping draw segments in batches of 100 (old algorithm processed 32000 draw segs per sprite!) --- src/swrenderer/scene/r_translucent_pass.cpp | 1 + src/swrenderer/segments/r_drawsegment.cpp | 80 ++++++++++ src/swrenderer/segments/r_drawsegment.h | 18 +++ src/swrenderer/things/r_visiblesprite.cpp | 163 ++++++++++++-------- 4 files changed, 200 insertions(+), 62 deletions(-) diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index d4bbb16820..06b61007d7 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -165,6 +165,7 @@ namespace swrenderer { CollectPortals(); Thread->SpriteList->Sort(); + Thread->DrawSegments->BuildSegmentGroups(); Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); if (clip3d->height_top == nullptr) diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index f84d48a7a1..84753cd4c7 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -40,6 +40,7 @@ #include "swrenderer/things/r_visiblesprite.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/viewport/r_viewport.h" +#include "swrenderer/r_renderthread.h" namespace swrenderer { @@ -83,4 +84,83 @@ namespace swrenderer { InterestingSegments.Push(segment); } + + void DrawSegmentList::BuildSegmentGroups() + { + SegmentGroups.Clear(); + + unsigned int groupSize = 100; + for (unsigned int index = BeginIndex(); index < EndIndex(); index += groupSize) + { + auto ds = Segment(index); + + DrawSegmentGroup group; + group.BeginIndex = index; + group.EndIndex = MIN(index + groupSize, EndIndex()); + group.x1 = ds->x1; + group.x2 = ds->x2; + group.neardepth = MIN(ds->sz1, ds->sz2); + group.fardepth = MAX(ds->sz1, ds->sz2); + + for (unsigned int groupIndex = group.BeginIndex + 1; groupIndex < group.EndIndex; groupIndex++) + { + ds = Segment(groupIndex); + group.x1 = MIN(group.x1, ds->x1); + group.x2 = MAX(group.x2, ds->x2); + group.neardepth = MIN(group.neardepth, ds->sz1); + group.neardepth = MIN(group.neardepth, ds->sz2); + group.fardepth = MAX(ds->sz1, group.fardepth); + group.fardepth = MAX(ds->sz2, group.fardepth); + } + + for (int x = group.x1; x < group.x2; x++) + { + cliptop[x] = 0; + clipbottom[x] = viewheight; + } + + for (unsigned int groupIndex = group.BeginIndex; groupIndex < group.EndIndex; groupIndex++) + { + ds = Segment(groupIndex); + + // kg3D - no clipping on fake segs + if (ds->fake) continue; + + if (ds->silhouette & SIL_BOTTOM) + { + short *clip1 = clipbottom + ds->x1; + const short *clip2 = ds->sprbottomclip; + int i = ds->x2 - ds->x1; + do + { + if (*clip1 > *clip2) + *clip1 = *clip2; + clip1++; + clip2++; + } while (--i); + } + + if (ds->silhouette & SIL_TOP) + { + short *clip1 = cliptop + ds->x1; + const short *clip2 = ds->sprtopclip; + int i = ds->x2 - ds->x1; + do + { + if (*clip1 < *clip2) + *clip1 = *clip2; + clip1++; + clip2++; + } while (--i); + } + } + + group.sprtopclip = Thread->FrameMemory->AllocMemory(group.x2 - group.x1); + group.sprbottomclip = Thread->FrameMemory->AllocMemory(group.x2 - group.x1); + memcpy(group.sprtopclip, cliptop + group.x1, (group.x2 - group.x1) * sizeof(short)); + memcpy(group.sprbottomclip, clipbottom + group.x1, (group.x2 - group.x1) * sizeof(short)); + + SegmentGroups.Push(group); + } + } } diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index 2fa41cd940..a82b2333b5 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -47,11 +47,23 @@ namespace swrenderer int CurrentPortalUniq; // [ZZ] to identify the portal that this drawseg is in. used for sprite clipping. }; + struct DrawSegmentGroup + { + short x1, x2; + float neardepth, fardepth; + short *sprtopclip; + short *sprbottomclip; + unsigned int BeginIndex; + unsigned int EndIndex; + }; + class DrawSegmentList { public: DrawSegmentList(RenderThread *thread); + TArray SegmentGroups; + unsigned int BeginIndex() const { return StartIndices.Last(); } unsigned int EndIndex() const { return Segments.Size(); } DrawSegment *Segment(unsigned int index) const { return Segments[Segments.Size() - 1 - index]; } @@ -66,6 +78,8 @@ namespace swrenderer void Push(DrawSegment *segment); void PushInteresting(DrawSegment *segment); + void BuildSegmentGroups(); + RenderThread *Thread = nullptr; private: @@ -74,5 +88,9 @@ namespace swrenderer TArray InterestingSegments; // drawsegs that have something drawn on them TArray StartInterestingIndices; + + // For building segment groups + short cliptop[MAXWIDTH]; + short clipbottom[MAXWIDTH]; }; } diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 1d9111e74c..2bfd31f9bd 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -285,70 +285,20 @@ namespace swrenderer // The first drawseg that is closer than the sprite is the clip seg. DrawSegmentList *segmentlist = thread->DrawSegments.get(); - for (unsigned int index = segmentlist->BeginIndex(); index != segmentlist->EndIndex(); index++) + for (unsigned int groupIndex = 0; groupIndex < segmentlist->SegmentGroups.Size(); groupIndex++) { - DrawSegment *ds = segmentlist->Segment(index); - - // [ZZ] portal handling here - //if (ds->CurrentPortalUniq != spr->CurrentPortalUniq) - // continue; - // [ZZ] WARNING: uncommenting the two above lines, totally breaks sprite clipping - - // kg3D - no clipping on fake segs - if (ds->fake) continue; - // determine if the drawseg obscures the sprite - if (ds->x1 >= x2 || ds->x2 <= x1 || - (!(ds->silhouette & SIL_BOTH) && ds->maskedtexturecol == nullptr && - !ds->bFogBoundary)) - { - // does not cover sprite + auto &group = segmentlist->SegmentGroups[groupIndex]; + if (group.x1 >= x2 || group.x2 <= x1) continue; - } - r1 = MAX(ds->x1, x1); - r2 = MIN(ds->x2, x2); - - float neardepth, fardepth; - if (!spr->IsWallSprite()) + if (group.fardepth < spr->depth) { - if (ds->sz1 < ds->sz2) - { - neardepth = ds->sz1, fardepth = ds->sz2; - } - else - { - neardepth = ds->sz2, fardepth = ds->sz1; - } - } + r1 = MAX(group.x1, x1); + r2 = MIN(group.x2, x2); - - // Check if sprite is in front of draw seg: - if ((!spr->IsWallSprite() && neardepth > spr->depth) || ((spr->IsWallSprite() || fardepth > spr->depth) && - (spr->gpos.Y - ds->curline->v1->fY()) * (ds->curline->v2->fX() - ds->curline->v1->fX()) - - (spr->gpos.X - ds->curline->v1->fX()) * (ds->curline->v2->fY() - ds->curline->v1->fY()) <= 0)) - { - RenderPortal *renderportal = thread->Portal.get(); - - // seg is behind sprite, so draw the mid texture if it has one - if (ds->CurrentPortalUniq == renderportal->CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here - (ds->maskedtexturecol != nullptr || ds->bFogBoundary)) - { - RenderDrawSegment renderer(thread); - renderer.Render(ds, r1, r2); - } - - continue; - } - - // clip this piece of the sprite - // killough 3/27/98: optimized and made much shorter - // [RH] Optimized further (at least for VC++; - // other compilers should be at least as good as before) - - if (ds->silhouette & SIL_BOTTOM) //bottom sil - { + // Clip bottom clip1 = clipbot + r1; - clip2 = ds->sprbottomclip + r1 - ds->x1; + clip2 = group.sprbottomclip + r1 - group.x1; i = r2 - r1; do { @@ -357,12 +307,10 @@ namespace swrenderer clip1++; clip2++; } while (--i); - } - if (ds->silhouette & SIL_TOP) // top sil - { + // Clip top clip1 = cliptop + r1; - clip2 = ds->sprtopclip + r1 - ds->x1; + clip2 = group.sprtopclip + r1 - group.x1; i = r2 - r1; do { @@ -372,6 +320,97 @@ namespace swrenderer clip2++; } while (--i); } + else + { + //for (unsigned int index = segmentlist->BeginIndex(); index != segmentlist->EndIndex(); index++) + for (unsigned int index = group.BeginIndex; index != group.EndIndex; index++) + { + DrawSegment *ds = segmentlist->Segment(index); + + // [ZZ] portal handling here + //if (ds->CurrentPortalUniq != spr->CurrentPortalUniq) + // continue; + // [ZZ] WARNING: uncommenting the two above lines, totally breaks sprite clipping + + // kg3D - no clipping on fake segs + if (ds->fake) continue; + // determine if the drawseg obscures the sprite + if (ds->x1 >= x2 || ds->x2 <= x1 || + (!(ds->silhouette & SIL_BOTH) && ds->maskedtexturecol == nullptr && + !ds->bFogBoundary)) + { + // does not cover sprite + continue; + } + + r1 = MAX(ds->x1, x1); + r2 = MIN(ds->x2, x2); + + float neardepth, fardepth; + if (!spr->IsWallSprite()) + { + if (ds->sz1 < ds->sz2) + { + neardepth = ds->sz1, fardepth = ds->sz2; + } + else + { + neardepth = ds->sz2, fardepth = ds->sz1; + } + } + + // Check if sprite is in front of draw seg: + if ((!spr->IsWallSprite() && neardepth > spr->depth) || ((spr->IsWallSprite() || fardepth > spr->depth) && + (spr->gpos.Y - ds->curline->v1->fY()) * (ds->curline->v2->fX() - ds->curline->v1->fX()) - + (spr->gpos.X - ds->curline->v1->fX()) * (ds->curline->v2->fY() - ds->curline->v1->fY()) <= 0)) + { + RenderPortal *renderportal = thread->Portal.get(); + + // seg is behind sprite, so draw the mid texture if it has one + if (ds->CurrentPortalUniq == renderportal->CurrentPortalUniq && // [ZZ] instead, portal uniq check is made here + (ds->maskedtexturecol != nullptr || ds->bFogBoundary)) + { + RenderDrawSegment renderer(thread); + renderer.Render(ds, r1, r2); + } + + continue; + } + + // clip this piece of the sprite + // killough 3/27/98: optimized and made much shorter + // [RH] Optimized further (at least for VC++; + // other compilers should be at least as good as before) + + if (ds->silhouette & SIL_BOTTOM) //bottom sil + { + clip1 = clipbot + r1; + clip2 = ds->sprbottomclip + r1 - ds->x1; + i = r2 - r1; + do + { + if (*clip1 > *clip2) + *clip1 = *clip2; + clip1++; + clip2++; + } while (--i); + } + + if (ds->silhouette & SIL_TOP) // top sil + { + clip1 = cliptop + r1; + clip2 = ds->sprtopclip + r1 - ds->x1; + i = r2 - r1; + do + { + if (*clip1 < *clip2) + *clip1 = *clip2; + clip1++; + clip2++; + } while (--i); + } + } + } } // all clipping has been performed, so draw the sprite From 9c50600286b643c9be17ce2521f61d5b3b7c9cf1 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 5 Feb 2017 00:53:36 -0500 Subject: [PATCH 821/912] - fix compile errors --- src/polyrenderer/scene/poly_playersprite.cpp | 5 +++-- src/swrenderer/things/r_playersprite.cpp | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index 2bd8f2f65a..339d3051a4 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -358,6 +358,7 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa screenSprite.Width = tex->GetWidth() * xscale; screenSprite.Height = tex->GetHeight() * yscale; screenSprite.Translation = TranslationToTable(translation); + //screenSprite.Translation = translation; screenSprite.Flip = xiscale < 0; screenSprite.Alpha = Alpha; screenSprite.RenderStyle = RenderStyle; @@ -426,7 +427,7 @@ void PolyScreenSprite::Render() Y1, DTA_DestWidthF, Width, DTA_DestHeightF, Height, - DTA_Translation, Translation, + DTA_TranslationIndex, Translation, DTA_FlipX, Flip, DTA_TopOffset, 0, DTA_LeftOffset, 0, @@ -434,7 +435,7 @@ void PolyScreenSprite::Render() DTA_ClipTop, viewwindowy, DTA_ClipRight, viewwindowx + viewwidth, DTA_ClipBottom, viewwindowy + viewheight, - DTA_AlphaF, Alpha, + DTA_Alpha, Alpha, DTA_RenderStyle, RenderStyle, DTA_FillColor, FillColor, DTA_SpecialColormap, special, diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index a2e8acec30..468355095f 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -561,7 +561,7 @@ namespace swrenderer viewwindowy + viewheight / 2 - sprite.texturemid * sprite.yscale - 0.5, DTA_DestWidthF, FIXED2DBL(sprite.pic->GetWidth() * sprite.xscale), DTA_DestHeightF, sprite.pic->GetHeight() * sprite.yscale, - DTA_Translation, TranslationToTable(sprite.Translation), + DTA_TranslationIndex, sprite.Translation, DTA_FlipX, sprite.flip, DTA_TopOffset, 0, DTA_LeftOffset, 0, @@ -569,7 +569,7 @@ namespace swrenderer DTA_ClipTop, viewwindowy, DTA_ClipRight, viewwindowx + viewwidth, DTA_ClipBottom, viewwindowy + viewheight, - DTA_AlphaF, sprite.Alpha, + DTA_Alpha, sprite.Alpha, DTA_RenderStyle, sprite.RenderStyle, DTA_FillColor, sprite.FillColor, DTA_SpecialColormap, sprite.special, From 7e6c91d73c4a14d8708134ccf64f7a396106bbfa Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 6 Feb 2017 15:15:09 +0100 Subject: [PATCH 822/912] Remove yslope loopup table and StepColumn --- src/swrenderer/plane/r_flatplane.cpp | 96 ++++++------------------ src/swrenderer/plane/r_flatplane.h | 9 +-- src/swrenderer/plane/r_planerenderer.cpp | 2 - src/swrenderer/plane/r_planerenderer.h | 1 - src/swrenderer/viewport/r_viewport.cpp | 2 - src/swrenderer/viewport/r_viewport.h | 8 ++ 6 files changed, 32 insertions(+), 86 deletions(-) diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index d7e4933847..2d1b6ae943 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -64,22 +64,20 @@ namespace swrenderer double xstep, ystep, leftxfrac, leftyfrac, rightxfrac, rightyfrac; double x; - xscale = xs_ToFixed(32 - drawerargs.TextureWidthBits(), _xscale); - yscale = xs_ToFixed(32 - drawerargs.TextureHeightBits(), _yscale); if (planeang != 0) { double cosine = cos(planeang), sine = sin(planeang); - pviewx = FLOAT2FIXED(pl->xform.xOffs + ViewPos.X * cosine - ViewPos.Y * sine); - pviewy = FLOAT2FIXED(pl->xform.yOffs - ViewPos.X * sine - ViewPos.Y * cosine); + pviewx = pl->xform.xOffs + ViewPos.X * cosine - ViewPos.Y * sine; + pviewy = pl->xform.yOffs - ViewPos.X * sine - ViewPos.Y * cosine; } else { - pviewx = FLOAT2FIXED(pl->xform.xOffs + ViewPos.X); - pviewy = FLOAT2FIXED(pl->xform.yOffs - ViewPos.Y); + pviewx = pl->xform.xOffs + ViewPos.X; + pviewy = pl->xform.yOffs - ViewPos.Y; } - pviewx = FixedMul(xscale, pviewx); - pviewy = FixedMul(yscale, pviewy); + pviewx = _xscale * pviewx; + pviewy = _yscale * pviewy; // left to right mapping planeang += (ViewAngle - 90).Radians(); @@ -100,17 +98,19 @@ namespace swrenderer planeang += M_PI / 2; double cosine = cos(planeang), sine = -sin(planeang); - x = pl->right - centerx - 0.5; + x = pl->right - viewport->CenterX - 0.5; rightxfrac = _xscale * (cosine + x * xstep); rightyfrac = _yscale * (sine + x * ystep); - x = pl->left - centerx - 0.5; + x = pl->left - viewport->CenterX + 0.5; leftxfrac = _xscale * (cosine + x * xstep); leftyfrac = _yscale * (sine + x * ystep); - basexfrac = rightxfrac; - baseyfrac = rightyfrac; - xstepscale = (rightxfrac - leftxfrac) / (pl->right - pl->left); - ystepscale = (rightyfrac - leftyfrac) / (pl->right - pl->left); + basexfrac = leftxfrac; + baseyfrac = leftyfrac; + xstepscale = (rightxfrac - leftxfrac) / (pl->right - pl->left + 1); + ystepscale = (rightyfrac - leftyfrac) / (pl->right - pl->left + 1); + + minx = pl->left; planeheight = fabs(pl->height.Zat0() - ViewPos.Z); @@ -143,8 +143,6 @@ namespace swrenderer void RenderFlatPlane::RenderLine(int y, int x1, int x2) { - double distance; - #ifdef RANGECHECK if (x2 < x1 || x1<0 || x2 >= viewwidth || (unsigned)y >= (unsigned)viewheight) { @@ -152,15 +150,17 @@ namespace swrenderer } #endif - // [RH] Notice that I dumped the caching scheme used by Doom. - // It did not offer any appreciable speedup. + auto viewport = RenderViewport::Instance(); - distance = planeheight * yslope[y]; + double curxfrac = basexfrac + xstepscale * (x1 + 0.5 - minx); + double curyfrac = baseyfrac + ystepscale * (x1 + 0.5 - minx); + + double distance = viewport->PlaneDepth(y, planeheight); if (drawerargs.TextureWidthBits() != 0) { drawerargs.SetTextureUStep(xs_ToFixed(32 - drawerargs.TextureWidthBits(), distance * xstepscale)); - drawerargs.SetTextureUPos(xs_ToFixed(32 - drawerargs.TextureWidthBits(), distance * basexfrac) + pviewx); + drawerargs.SetTextureUPos(xs_ToFixed(32 - drawerargs.TextureWidthBits(), distance * curxfrac + pviewx)); } else { @@ -171,7 +171,7 @@ namespace swrenderer if (drawerargs.TextureHeightBits() != 0) { drawerargs.SetTextureVStep(xs_ToFixed(32 - drawerargs.TextureHeightBits(), distance * ystepscale)); - drawerargs.SetTextureVPos(xs_ToFixed(32 - drawerargs.TextureHeightBits(), distance * baseyfrac) + pviewy); + drawerargs.SetTextureVPos(xs_ToFixed(32 - drawerargs.TextureHeightBits(), distance * curyfrac + pviewy)); } else { @@ -179,11 +179,9 @@ namespace swrenderer drawerargs.SetTextureVPos(0); } - auto viewport = RenderViewport::Instance(); - if (viewport->RenderTarget->IsBgra()) { - double distance2 = planeheight * yslope[(y + 1 < viewheight) ? y + 1 : y - 1]; + double distance2 = viewport->PlaneDepth(y + 1, planeheight); double xmagnitude = fabs(ystepscale * (distance2 - distance) * viewport->FocalLengthX); double ymagnitude = fabs(xstepscale * (distance2 - distance) * viewport->FocalLengthX); double magnitude = MAX(ymagnitude, xmagnitude); @@ -268,56 +266,6 @@ namespace swrenderer drawerargs.DrawSpan(Thread); } - void RenderFlatPlane::StepColumn() - { - basexfrac -= xstepscale; - baseyfrac -= ystepscale; - } - - void RenderFlatPlane::SetupSlope() - { - auto viewport = RenderViewport::Instance(); - - int i = 0; - int e = viewheight; - float focus = float(viewport->FocalLengthY); - float den; - float cy = float(viewport->CenterY); - if (i < centery) - { - den = cy - i - 0.5f; - if (e <= centery) - { - do { - yslope[i] = focus / den; - den -= 1; - } while (++i < e); - } - else - { - do { - yslope[i] = focus / den; - den -= 1; - } while (++i < centery); - den = i - cy + 0.5f; - do { - yslope[i] = focus / den; - den += 1; - } while (++i < e); - } - } - else - { - den = i - cy + 0.5f; - do { - yslope[i] = focus / den; - den += 1; - } while (++i < e); - } - } - - float RenderFlatPlane::yslope[MAXHEIGHT]; - ///////////////////////////////////////////////////////////////////////// RenderColoredPlane::RenderColoredPlane(RenderThread *thread) diff --git a/src/swrenderer/plane/r_flatplane.h b/src/swrenderer/plane/r_flatplane.h index 95f24c93d3..1d62c09f78 100644 --- a/src/swrenderer/plane/r_flatplane.h +++ b/src/swrenderer/plane/r_flatplane.h @@ -27,28 +27,23 @@ namespace swrenderer RenderFlatPlane(RenderThread *thread); void Render(VisiblePlane *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked, FDynamicColormap *basecolormap, FTexture *texture); - static void SetupSlope(); - RenderThread *Thread = nullptr; private: void RenderLine(int y, int x1, int x2) override; - void StepColumn() override; + int minx; double planeheight; bool plane_shade; int planeshade; double GlobVis; FDynamicColormap *basecolormap; - fixed_t pviewx, pviewy; - fixed_t xscale, yscale; + double pviewx, pviewy; double xstepscale, ystepscale; double basexfrac, baseyfrac; VisiblePlaneLight *light_list; SpanDrawerArgs drawerargs; - - static float yslope[MAXHEIGHT]; }; class RenderColoredPlane : PlaneRenderer diff --git a/src/swrenderer/plane/r_planerenderer.cpp b/src/swrenderer/plane/r_planerenderer.cpp index a3f47c5eac..6bc023738c 100644 --- a/src/swrenderer/plane/r_planerenderer.cpp +++ b/src/swrenderer/plane/r_planerenderer.cpp @@ -85,8 +85,6 @@ namespace swrenderer t2 = pl->top[x]; b2 = pl->bottom[x]; - - StepColumn(); } // Draw any spans that are still open while (t2 < b2) diff --git a/src/swrenderer/plane/r_planerenderer.h b/src/swrenderer/plane/r_planerenderer.h index 97bc17f2b9..f7992cd66b 100644 --- a/src/swrenderer/plane/r_planerenderer.h +++ b/src/swrenderer/plane/r_planerenderer.h @@ -26,7 +26,6 @@ namespace swrenderer void RenderLines(VisiblePlane *pl); virtual void RenderLine(int y, int x1, int x2) = 0; - virtual void StepColumn() { } private: short spanend[MAXHEIGHT]; diff --git a/src/swrenderer/viewport/r_viewport.cpp b/src/swrenderer/viewport/r_viewport.cpp index e0f0e396cf..79f61ecd21 100644 --- a/src/swrenderer/viewport/r_viewport.cpp +++ b/src/swrenderer/viewport/r_viewport.cpp @@ -130,8 +130,6 @@ namespace swrenderer centery = xs_ToInt(CenterY); globaluclip = -CenterY / InvZtoScale; globaldclip = (viewheight - CenterY) / InvZtoScale; - - RenderFlatPlane::SetupSlope(); } void RenderViewport::SetupBuffer() diff --git a/src/swrenderer/viewport/r_viewport.h b/src/swrenderer/viewport/r_viewport.h index 34f00c2779..7561ff83b6 100644 --- a/src/swrenderer/viewport/r_viewport.h +++ b/src/swrenderer/viewport/r_viewport.h @@ -58,6 +58,14 @@ namespace swrenderer DVector2 PointWorldToView(const DVector2 &worldPos) const; DVector2 ScaleViewToScreen(const DVector2 &scale, double viewZ, bool pixelstretch = true) const; + + double PlaneDepth(int screenY, double planeHeight) const + { + if (screenY + 0.5 < CenterY) + return FocalLengthY / (CenterY - screenY - 0.5) * planeHeight; + else + return FocalLengthY / (screenY + 0.5 - CenterY) * planeHeight; + } private: void InitTextureMapping(); From 58495ea7149d733a1c22cb6feedc21b4489f6a47 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 6 Feb 2017 16:04:27 +0100 Subject: [PATCH 823/912] Renamed Clip3DFloors to Clip3D to fix gcc compile error --- src/swrenderer/line/r_line.cpp | 6 +++--- src/swrenderer/line/r_renderdrawsegment.cpp | 8 ++++---- src/swrenderer/plane/r_visibleplanelist.cpp | 10 +++++----- src/swrenderer/r_renderthread.cpp | 2 +- src/swrenderer/r_renderthread.h | 2 +- src/swrenderer/scene/r_opaque_pass.cpp | 4 ++-- src/swrenderer/scene/r_portal.cpp | 14 +++++++------- src/swrenderer/scene/r_scene.cpp | 8 ++++---- src/swrenderer/scene/r_translucent_pass.cpp | 4 ++-- src/swrenderer/things/r_visiblesprite.cpp | 2 +- 10 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index ddecf5879a..5f5a578831 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -123,7 +123,7 @@ namespace swrenderer WallT.InitFromLine(Thread, v1->fPos() - ViewPos, v2->fPos() - ViewPos); } - Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = Thread->Clip3D.get(); if (!(clip3d->fake3D & FAKE3D_FAKEBACK)) { @@ -352,7 +352,7 @@ namespace swrenderer draw_segment->bFakeBoundary = false; draw_segment->foggy = foggy; - Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = Thread->Clip3D.get(); if (clip3d->fake3D & FAKE3D_FAKEMASK) draw_segment->fake = 1; else draw_segment->fake = 0; @@ -955,7 +955,7 @@ namespace swrenderer } } - Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = Thread->Clip3D.get(); // mark ceiling areas if (markceiling) diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index 67953dcfa6..f4b6f97dea 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -110,7 +110,7 @@ namespace swrenderer rw_lightstep = ds->lightstep; rw_light = ds->light + (x1 - ds->x1) * rw_lightstep; - Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = Thread->Clip3D.get(); CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedLightLevel() < 0) @@ -478,7 +478,7 @@ namespace swrenderer WallC.tright.Y = ds->cy + ds->cdy; WallT = ds->tmapvals; - Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = Thread->Clip3D.get(); wallupper.Project(clip3d->sclipTop - ViewPos.Z, &WallC); walllower.Project(clip3d->sclipBottom - ViewPos.Z, &WallC); @@ -532,7 +532,7 @@ namespace swrenderer floorHeight = backsector->CenterFloor(); ceilingHeight = backsector->CenterCeiling(); - Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = Thread->Clip3D.get(); // maybe fix clipheights if (!(clip3d->fake3D & FAKE3D_CLIPBOTTOM)) clip3d->sclipBottom = floorHeight; @@ -938,7 +938,7 @@ namespace swrenderer top = MAX(frontcz1, frontcz2); bot = MIN(frontfz1, frontfz2); - Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = Thread->Clip3D.get(); if (clip3d->fake3D & FAKE3D_CLIPTOP) { top = MIN(top, clip3d->sclipTop); diff --git a/src/swrenderer/plane/r_visibleplanelist.cpp b/src/swrenderer/plane/r_visibleplanelist.cpp index 18c1f57e45..4467410575 100644 --- a/src/swrenderer/plane/r_visibleplanelist.cpp +++ b/src/swrenderer/plane/r_visibleplanelist.cpp @@ -129,7 +129,7 @@ namespace swrenderer // kg3D - hack, store alpha in sky // i know there is ->alpha, but this also allows to identify fake plane // and ->alpha is for stacked sectors - Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = Thread->Clip3D.get(); if (clip3d->fake3D & (FAKE3D_FAKEFLOOR | FAKE3D_FAKECEILING)) sky = 0x80000000 | clip3d->fakeAlpha; else sky = 0; // not skyflatnum so it can't be a sky portal = nullptr; @@ -190,7 +190,7 @@ namespace swrenderer sky == check->sky && renderportal->CurrentPortalUniq == check->CurrentPortalUniq && renderportal->MirrorFlags == check->MirrorFlags && - Thread->Clip3DFloors->CurrentSkybox == check->CurrentSkybox && + Thread->Clip3D->CurrentSkybox == check->CurrentSkybox && ViewPos == check->viewpos ) { @@ -215,7 +215,7 @@ namespace swrenderer check->Additive = additive; check->CurrentPortalUniq = renderportal->CurrentPortalUniq; check->MirrorFlags = renderportal->MirrorFlags; - check->CurrentSkybox = Thread->Clip3DFloors->CurrentSkybox; + check->CurrentSkybox = Thread->Clip3D->CurrentSkybox; return check; } @@ -333,7 +333,7 @@ namespace swrenderer for (pl = visplanes[i]; pl; pl = pl->next) { // kg3D - draw only correct planes - if (pl->CurrentPortalUniq != renderportal->CurrentPortalUniq || pl->CurrentSkybox != Thread->Clip3DFloors->CurrentSkybox) + if (pl->CurrentPortalUniq != renderportal->CurrentPortalUniq || pl->CurrentSkybox != Thread->Clip3D->CurrentSkybox) continue; // kg3D - draw only real planes now if (pl->sky >= 0) { @@ -359,7 +359,7 @@ namespace swrenderer { for (pl = visplanes[i]; pl; pl = pl->next) { - if (pl->CurrentSkybox != Thread->Clip3DFloors->CurrentSkybox || pl->CurrentPortalUniq != renderportal->CurrentPortalUniq) + if (pl->CurrentSkybox != Thread->Clip3D->CurrentSkybox || pl->CurrentPortalUniq != renderportal->CurrentPortalUniq) continue; if (pl->sky < 0 && pl->height.Zat0() == height) diff --git a/src/swrenderer/r_renderthread.cpp b/src/swrenderer/r_renderthread.cpp index ba48993ca8..41bb2fc5af 100644 --- a/src/swrenderer/r_renderthread.cpp +++ b/src/swrenderer/r_renderthread.cpp @@ -66,7 +66,7 @@ namespace swrenderer TranslucentPass = std::make_unique(this); SpriteList = std::make_unique(); Portal = std::make_unique(this); - Clip3DFloors = std::make_unique(this); + Clip3D = std::make_unique(this); PlayerSprites = std::make_unique(this); PlaneList = std::make_unique(this); DrawSegments = std::make_unique(this); diff --git a/src/swrenderer/r_renderthread.h b/src/swrenderer/r_renderthread.h index a0945cfee4..e874dbe4e7 100644 --- a/src/swrenderer/r_renderthread.h +++ b/src/swrenderer/r_renderthread.h @@ -61,7 +61,7 @@ namespace swrenderer std::unique_ptr TranslucentPass; std::unique_ptr SpriteList; std::unique_ptr Portal; - std::unique_ptr Clip3DFloors; + std::unique_ptr Clip3D; std::unique_ptr PlayerSprites; std::unique_ptr PlaneList; std::unique_ptr DrawSegments; diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index cf4155c4f3..76528ae141 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -568,7 +568,7 @@ namespace swrenderer backupfp = floorplane; backupcp = ceilingplane; - Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = Thread->Clip3D.get(); // first check all floors for (int i = 0; i < (int)frontsector->e->XFloor.ffloors.Size(); i++) @@ -738,7 +738,7 @@ namespace swrenderer backupcp = ceilingplane; floorplane = nullptr; ceilingplane = nullptr; - Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = Thread->Clip3D.get(); for (unsigned int i = 0; i < line->backsector->e->XFloor.ffloors.Size(); i++) { clip3d->SetFakeFloor(line->backsector->e->XFloor.ffloors[i]); diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 88ccb96c0a..9053ebf150 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -98,7 +98,7 @@ namespace swrenderer if (!planes->HasPortalPlanes()) return; - Thread->Clip3DFloors->EnterSkybox(); + Thread->Clip3D->EnterSkybox(); CurrentPortalInSkybox = true; int savedextralight = extralight; @@ -213,7 +213,7 @@ namespace swrenderer visplaneStack.Push(pl); Thread->OpaquePass->RenderScene(); - Thread->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) + Thread->Clip3D->ResetClip(); // reset clips (floor/ceiling) planes->Render(); port->mFlags &= ~PORTSF_INSKYBOX; @@ -250,9 +250,9 @@ namespace swrenderer R_SetViewAngle(); CurrentPortalInSkybox = false; - Thread->Clip3DFloors->LeaveSkybox(); + Thread->Clip3D->LeaveSkybox(); - if (Thread->Clip3DFloors->fakeActive) return; + if (Thread->Clip3D->fakeActive) return; planes->ClearPortalPlanes(); } @@ -411,7 +411,7 @@ namespace swrenderer } // some portals have height differences, account for this here - Thread->Clip3DFloors->EnterSkybox(); // push 3D floor height map + Thread->Clip3D->EnterSkybox(); // push 3D floor height map CurrentPortalInSkybox = false; // first portal in a skybox should set this variable to false for proper clipping in skyboxes. // first pass, set clipping @@ -421,7 +421,7 @@ namespace swrenderer memcpy(floorclip + pds->x1, &pds->floorclip[0], pds->len * sizeof(*floorclip)); Thread->OpaquePass->RenderScene(); - Thread->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) + Thread->Clip3D->ResetClip(); // reset clips (floor/ceiling) if (!savedvisibility && camera) camera->renderflags &= ~RF_INVISIBLE; PlaneCycles.Clock(); @@ -451,7 +451,7 @@ namespace swrenderer if (Thread->MainThread) NetUpdate(); - Thread->Clip3DFloors->LeaveSkybox(); // pop 3D floor height map + Thread->Clip3D->LeaveSkybox(); // pop 3D floor height map CurrentPortalUniq = prevuniq2; // draw a red line around a portal if it's being highlighted diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 18c5137df1..905c0a097a 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -215,8 +215,8 @@ namespace swrenderer void RenderScene::RenderThreadSlice(RenderThread *thread) { thread->FrameMemory->Clear(); - thread->Clip3DFloors->Cleanup(); - thread->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) + thread->Clip3D->Cleanup(); + thread->Clip3D->ResetClip(); // reset clips (floor/ceiling) thread->Portal->CopyStackedViewParameters(); thread->ClipSegments->Clear(0, viewwidth); thread->DrawSegments->Clear(); @@ -237,7 +237,7 @@ namespace swrenderer WallCycles.Clock(); thread->OpaquePass->RenderScene(); - thread->Clip3DFloors->ResetClip(); // reset clips (floor/ceiling) + thread->Clip3D->ResetClip(); // reset clips (floor/ceiling) if (thread == MainThread()) WallCycles.Unclock(); @@ -375,7 +375,7 @@ namespace swrenderer void RenderScene::Deinit() { MainThread()->TranslucentPass->Deinit(); - MainThread()->Clip3DFloors->Cleanup(); + MainThread()->Clip3D->Cleanup(); } ///////////////////////////////////////////////////////////////////////// diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 06b61007d7..460fb62280 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -140,7 +140,7 @@ namespace swrenderer if (renew) { - Thread->Clip3DFloors->fake3D |= FAKE3D_REFRESHCLIP; + Thread->Clip3D->fake3D |= FAKE3D_REFRESHCLIP; } DrawSegmentList *drawseglist = Thread->DrawSegments.get(); @@ -167,7 +167,7 @@ namespace swrenderer Thread->SpriteList->Sort(); Thread->DrawSegments->BuildSegmentGroups(); - Clip3DFloors *clip3d = Thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = Thread->Clip3D.get(); if (clip3d->height_top == nullptr) { // kg3D - no visible 3D floors, normal rendering DrawMaskedSingle(false); diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 2bfd31f9bd..809a213977 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -57,7 +57,7 @@ namespace swrenderer int colormapnum = spr->Light.ColormapNum; F3DFloor *rover; - Clip3DFloors *clip3d = thread->Clip3DFloors.get(); + Clip3DFloors *clip3d = thread->Clip3D.get(); // [RH] Check for particles if (spr->IsParticle()) From b36444ca1c8ce895982f2d8ece49c53965c411bb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 7 Feb 2017 20:43:41 +0100 Subject: [PATCH 824/912] Fix camera shader colormap (invulnerability effect) not getting applied to hardware accelerated player sprites --- src/swrenderer/things/r_playersprite.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 468355095f..929acab532 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -529,6 +529,10 @@ namespace swrenderer { accelSprite.special = static_cast(vis.Light.BaseColormap); } + else if (CameraLight::Instance()->ShaderColormap()) + { + accelSprite.special = CameraLight::Instance()->ShaderColormap(); + } else if (colormap_to_use->Color == PalEntry(255, 255, 255) && colormap_to_use->Desaturate == 0) { From 2d25002e2a9be9bcb9cbfaecb8e11f94f2117b06 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 8 Feb 2017 02:59:25 +0100 Subject: [PATCH 825/912] Fix InterestingSegments not being properly resized in PopPortal --- src/swrenderer/segments/r_drawsegment.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index 84753cd4c7..c3b0e791c3 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -71,7 +71,7 @@ namespace swrenderer Segments.Resize(StartIndices.Last()); StartIndices.Pop(); - StartInterestingIndices.Resize(StartInterestingIndices.Last()); + InterestingSegments.Resize(StartInterestingIndices.Last()); StartInterestingIndices.Pop(); } From 388c511e1153fc91456fe5fd3cc31032c22d5a41 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 9 Feb 2017 05:42:15 +0100 Subject: [PATCH 826/912] Fix portal sprite clipping bugs --- src/swrenderer/scene/r_3dfloors.h | 1 + src/swrenderer/scene/r_opaque_pass.h | 2 ++ src/swrenderer/scene/r_portal.cpp | 17 +++++++++-------- src/swrenderer/scene/r_scene.cpp | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/swrenderer/scene/r_3dfloors.h b/src/swrenderer/scene/r_3dfloors.h index 13168a80ba..d697755b23 100644 --- a/src/swrenderer/scene/r_3dfloors.h +++ b/src/swrenderer/scene/r_3dfloors.h @@ -78,6 +78,7 @@ namespace swrenderer void EnterSkybox(); void LeaveSkybox(); void SetFakeFloor(F3DFloor *fakeFloor); + void ClearFakeFloors() { FakeFloors.clear(); } RenderThread *Thread = nullptr; diff --git a/src/swrenderer/scene/r_opaque_pass.h b/src/swrenderer/scene/r_opaque_pass.h index 6f0f065812..6a14703c58 100644 --- a/src/swrenderer/scene/r_opaque_pass.h +++ b/src/swrenderer/scene/r_opaque_pass.h @@ -60,6 +60,8 @@ namespace swrenderer void ResetFakingUnderwater() { r_fakingunderwater = false; } sector_t *FakeFlat(sector_t *sec, sector_t *tempsec, int *floorlightlevel, int *ceilinglightlevel, seg_t *backline, int backx1, int backx2, double frontcz1, double frontcz2); + + void ClearSeenSprites() { SeenSpriteSectors.clear(); SeenActors.clear(); } short floorclip[MAXWIDTH]; short ceilingclip[MAXWIDTH]; diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index 9053ebf150..af120103c9 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -162,7 +162,8 @@ namespace swrenderer viewsector = port->mDestination; assert(viewsector != nullptr); R_SetViewAngle(); - validcount++; // Make sure we see all sprites + Thread->OpaquePass->ClearSeenSprites(); + Thread->Clip3D->ClearFakeFloors(); planes->ClearKeepFakePlanes(); Thread->ClipSegments->Clear(pl->left, pl->right); @@ -185,6 +186,11 @@ namespace swrenderer } } + drawseglist->PushPortal(); + Thread->SpriteList->PushPortal(); + viewposStack.Push(ViewPos); + visplaneStack.Push(pl); + // Create a drawseg to clip sprites to the sky plane DrawSegment *draw_segment = Thread->FrameMemory->NewObject(); draw_segment->CurrentPortalUniq = CurrentPortalUniq; @@ -207,11 +213,6 @@ namespace swrenderer memcpy(draw_segment->sprtopclip, ceilingclip + pl->left, (pl->right - pl->left) * sizeof(short)); drawseglist->Push(draw_segment); - drawseglist->PushPortal(); - Thread->SpriteList->PushPortal(); - viewposStack.Push(ViewPos); - visplaneStack.Push(pl); - Thread->OpaquePass->RenderScene(); Thread->Clip3D->ResetClip(); // reset clips (floor/ceiling) planes->Render(); @@ -227,7 +228,7 @@ namespace swrenderer { // Masked textures and planes need the view coordinates restored for proper positioning. viewposStack.Pop(ViewPos); - + Thread->TranslucentPass->Render(); VisiblePlane *pl; @@ -236,7 +237,7 @@ namespace swrenderer { pl->Render(Thread, pl->Alpha, pl->Additive, true); } - + Thread->SpriteList->PopPortal(); drawseglist->PopPortal(); } diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 905c0a097a..f7b8c9c88a 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -177,7 +177,7 @@ namespace swrenderer if (!r_scene_multithreaded) numThreads = 1; - if (numThreads != Threads.size()) + if (numThreads != (int)Threads.size()) { StopThreads(); StartThreads(numThreads); From 371dd981027175000f4225a553139e3dcc8bb512 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 9 Feb 2017 05:57:37 +0100 Subject: [PATCH 827/912] Show player in mirrors and portals --- src/swrenderer/scene/r_portal.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/swrenderer/scene/r_portal.cpp b/src/swrenderer/scene/r_portal.cpp index af120103c9..4139e7fe36 100644 --- a/src/swrenderer/scene/r_portal.cpp +++ b/src/swrenderer/scene/r_portal.cpp @@ -322,6 +322,8 @@ namespace swrenderer DVector3 startpos = ViewPos; DVector3 savedpath[2] = { ViewPath[0], ViewPath[1] }; ActorRenderFlags savedvisibility = camera ? camera->renderflags & RF_INVISIBLE : ActorRenderFlags::FromInt(0); + + camera->renderflags &= ~RF_INVISIBLE; CurrentPortalUniq++; From c16aa3d3612c5a8217e0b5e2f0a487097bc436cc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 9 Feb 2017 12:18:40 +0100 Subject: [PATCH 828/912] Fix crouch player sprite --- src/swrenderer/scene/r_opaque_pass.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 76528ae141..ac8505dd26 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -939,6 +939,11 @@ namespace swrenderer sprite.spriteScale = thing->Scale; sprite.renderflags = thing->renderflags; + if (thing->player != nullptr) + { + P_CheckPlayerSprite(thing, sprite.spritenum, sprite.spriteScale); + } + if (thing->picnum.isValid()) { sprite.picnum = thing->picnum; @@ -1034,10 +1039,6 @@ namespace swrenderer sprite.spriteScale.Y = -sprite.spriteScale.Y; sprite.renderflags ^= RF_YFLIP; } - if (thing->player != nullptr) - { - P_CheckPlayerSprite(thing, sprite.spritenum, sprite.spriteScale); - } if (sprite.spriteScale.X < 0) { sprite.spriteScale.X = -sprite.spriteScale.X; From 3c0d7694994a613b780bf340ac7681c5a6fc5b9b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 9 Feb 2017 22:58:28 +0100 Subject: [PATCH 829/912] Fix portals draw segment indexing bug --- src/swrenderer/scene/r_translucent_pass.cpp | 4 ++-- src/swrenderer/segments/r_drawsegment.cpp | 4 ++-- src/swrenderer/segments/r_drawsegment.h | 6 ++---- src/swrenderer/things/r_particle.cpp | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 460fb62280..419bcf8ee9 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -71,7 +71,7 @@ namespace swrenderer // b) skip most of the collected drawsegs which have no portal attached. portaldrawsegs.Clear(); DrawSegmentList *drawseglist = Thread->DrawSegments.get(); - for (auto index = drawseglist->BeginIndex(); index != drawseglist->EndIndex(); index++) + for (auto index = 0; index != drawseglist->SegmentsCount(); index++) { DrawSegment *seg = drawseglist->Segment(index); @@ -144,7 +144,7 @@ namespace swrenderer } DrawSegmentList *drawseglist = Thread->DrawSegments.get(); - for (auto index = drawseglist->BeginIndex(); index != drawseglist->EndIndex(); index++) + for (auto index = 0; index != drawseglist->SegmentsCount(); index++) { DrawSegment *ds = drawseglist->Segment(index); diff --git a/src/swrenderer/segments/r_drawsegment.cpp b/src/swrenderer/segments/r_drawsegment.cpp index c3b0e791c3..e4bc3d26b4 100644 --- a/src/swrenderer/segments/r_drawsegment.cpp +++ b/src/swrenderer/segments/r_drawsegment.cpp @@ -90,13 +90,13 @@ namespace swrenderer SegmentGroups.Clear(); unsigned int groupSize = 100; - for (unsigned int index = BeginIndex(); index < EndIndex(); index += groupSize) + for (unsigned int index = 0; index < SegmentsCount(); index += groupSize) { auto ds = Segment(index); DrawSegmentGroup group; group.BeginIndex = index; - group.EndIndex = MIN(index + groupSize, EndIndex()); + group.EndIndex = MIN(index + groupSize, SegmentsCount()); group.x1 = ds->x1; group.x2 = ds->x2; group.neardepth = MIN(ds->sz1, ds->sz2); diff --git a/src/swrenderer/segments/r_drawsegment.h b/src/swrenderer/segments/r_drawsegment.h index a82b2333b5..9bfd5dc65f 100644 --- a/src/swrenderer/segments/r_drawsegment.h +++ b/src/swrenderer/segments/r_drawsegment.h @@ -64,12 +64,10 @@ namespace swrenderer TArray SegmentGroups; - unsigned int BeginIndex() const { return StartIndices.Last(); } - unsigned int EndIndex() const { return Segments.Size(); } + unsigned int SegmentsCount() const { return Segments.Size() - StartIndices.Last(); } DrawSegment *Segment(unsigned int index) const { return Segments[Segments.Size() - 1 - index]; } - unsigned int BeginInterestingIndex() const { return StartInterestingIndices.Last(); } - unsigned int EndInterestingIndex() const { return InterestingSegments.Size(); } + unsigned int InterestingSegmentsCount() const { return InterestingSegments.Size() - StartInterestingIndices.Last(); } DrawSegment *InterestingSegment(unsigned int index) const { return InterestingSegments[InterestingSegments.Size() - 1 - index]; } void Clear(); diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 9b9fbb4848..9212004e6a 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -267,7 +267,7 @@ namespace swrenderer // Draw any masked textures behind this particle so that when the // particle is drawn, it will be in front of them. DrawSegmentList *segmentlist = thread->DrawSegments.get(); - for (unsigned int index = segmentlist->BeginInterestingIndex(); index != segmentlist->EndInterestingIndex(); index++) + for (unsigned int index = 0; index != segmentlist->InterestingSegmentsCount(); index++) { DrawSegment *ds = segmentlist->InterestingSegment(index); From c17317de24c017dfa8c2520085ed20b918dc4ff3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Feb 2017 18:00:02 +0100 Subject: [PATCH 830/912] Rename variables and group them a little more how they are used --- src/swrenderer/line/r_line.cpp | 302 ++++++++++++++++----------------- src/swrenderer/line/r_line.h | 53 +++--- 2 files changed, 177 insertions(+), 178 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 5f5a578831..c01d1431cf 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -67,15 +67,14 @@ namespace swrenderer bool solid; DVector2 pt1, pt2; - InSubsector = subsector; - frontsector = sector; - backsector = fakebacksector; - floorplane = linefloorplane; - ceilingplane = lineceilingplane; + mSubsector = subsector; + mFrontSector = sector; + mBackSector = fakebacksector; + mFloorPlane = linefloorplane; + mCeilingPlane = lineceilingplane; foggy = infog; basecolormap = colormap; - - curline = line; + mLineSegment = line; pt1 = line->v1->fPos() - ViewPos; pt2 = line->v2->fPos() - ViewPos; @@ -96,7 +95,7 @@ namespace swrenderer { if (Thread->ClipSegments->Check(WallC.sx1, WallC.sx2)) { - InSubsector->flags |= SSECF_DRAWN; + mSubsector->flags |= SSECF_DRAWN; } return; } @@ -127,18 +126,17 @@ namespace swrenderer if (!(clip3d->fake3D & FAKE3D_FAKEBACK)) { - backsector = line->backsector; + mBackSector = line->backsector; } - rw_frontcz1 = frontsector->ceilingplane.ZatPoint(line->v1); - rw_frontfz1 = frontsector->floorplane.ZatPoint(line->v1); - rw_frontcz2 = frontsector->ceilingplane.ZatPoint(line->v2); - rw_frontfz2 = frontsector->floorplane.ZatPoint(line->v2); + mFrontCeilingZ1 = mFrontSector->ceilingplane.ZatPoint(line->v1); + mFrontFloorZ1 = mFrontSector->floorplane.ZatPoint(line->v1); + mFrontCeilingZ2 = mFrontSector->ceilingplane.ZatPoint(line->v2); + mFrontFloorZ2 = mFrontSector->floorplane.ZatPoint(line->v2); - rw_mustmarkfloor = rw_mustmarkceiling = false; rw_havehigh = rw_havelow = false; // Single sided line? - if (backsector == NULL) + if (mBackSector == NULL) { solid = true; } @@ -147,22 +145,22 @@ namespace swrenderer // kg3D - its fake, no transfer_heights if (!(clip3d->fake3D & FAKE3D_FAKEBACK)) { // killough 3/8/98, 4/4/98: hack for invisible ceilings / deep water - backsector = Thread->OpaquePass->FakeFlat(backsector, &tempsec, nullptr, nullptr, curline, WallC.sx1, WallC.sx2, rw_frontcz1, rw_frontcz2); + mBackSector = Thread->OpaquePass->FakeFlat(mBackSector, &tempsec, nullptr, nullptr, mLineSegment, WallC.sx1, WallC.sx2, mFrontCeilingZ1, mFrontCeilingZ2); } - doorclosed = false; // killough 4/16/98 + mDoorClosed = false; // killough 4/16/98 - rw_backcz1 = backsector->ceilingplane.ZatPoint(line->v1); - rw_backfz1 = backsector->floorplane.ZatPoint(line->v1); - rw_backcz2 = backsector->ceilingplane.ZatPoint(line->v2); - rw_backfz2 = backsector->floorplane.ZatPoint(line->v2); + mBackCeilingZ1 = mBackSector->ceilingplane.ZatPoint(line->v1); + mBackFloorZ1 = mBackSector->floorplane.ZatPoint(line->v1); + mBackCeilingZ2 = mBackSector->ceilingplane.ZatPoint(line->v2); + mBackFloorZ2 = mBackSector->floorplane.ZatPoint(line->v2); if (clip3d->fake3D & FAKE3D_FAKEBACK) { - if (rw_frontfz1 >= rw_backfz1 && rw_frontfz2 >= rw_backfz2) + if (mFrontFloorZ1 >= mBackFloorZ1 && mFrontFloorZ2 >= mBackFloorZ2) { clip3d->fake3D |= FAKE3D_CLIPBOTFRONT; } - if (rw_frontcz1 <= rw_backcz1 && rw_frontcz2 <= rw_backcz2) + if (mFrontCeilingZ1 <= mBackCeilingZ1 && mFrontCeilingZ2 <= mBackCeilingZ2) { clip3d->fake3D |= FAKE3D_CLIPTOPFRONT; } @@ -170,15 +168,15 @@ namespace swrenderer // Cannot make these walls solid, because it can result in // sprite clipping problems for sprites near the wall - if (rw_frontcz1 > rw_backcz1 || rw_frontcz2 > rw_backcz2) + if (mFrontCeilingZ1 > mBackCeilingZ1 || mFrontCeilingZ2 > mBackCeilingZ2) { rw_havehigh = true; - wallupper.Project(backsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + wallupper.Project(mBackSector->ceilingplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); } - if (rw_frontfz1 < rw_backfz1 || rw_frontfz2 < rw_backfz2) + if (mFrontFloorZ1 < mBackFloorZ1 || mFrontFloorZ2 < mBackFloorZ2) { rw_havelow = true; - walllower.Project(backsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + walllower.Project(mBackSector->floorplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); } // Portal @@ -187,21 +185,21 @@ namespace swrenderer solid = true; } // Closed door. - else if ((rw_backcz1 <= rw_frontfz1 && rw_backcz2 <= rw_frontfz2) || - (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) + else if ((mBackCeilingZ1 <= mFrontFloorZ1 && mBackCeilingZ2 <= mFrontFloorZ2) || + (mBackFloorZ1 >= mFrontCeilingZ1 && mBackFloorZ2 >= mFrontCeilingZ2)) { solid = true; } else if ( // properly render skies (consider door "open" if both ceilings are sky): - (backsector->GetTexture(sector_t::ceiling) != skyflatnum || frontsector->GetTexture(sector_t::ceiling) != skyflatnum) + (mBackSector->GetTexture(sector_t::ceiling) != skyflatnum || mFrontSector->GetTexture(sector_t::ceiling) != skyflatnum) // if door is closed because back is shut: - && rw_backcz1 <= rw_backfz1 && rw_backcz2 <= rw_backfz2 + && mBackCeilingZ1 <= mBackFloorZ1 && mBackCeilingZ2 <= mBackFloorZ2 // preserve a kind of transparent door/lift special effect: - && ((rw_backcz1 >= rw_frontcz1 && rw_backcz2 >= rw_frontcz2) || line->sidedef->GetTexture(side_t::top).isValid()) - && ((rw_backfz1 <= rw_frontfz1 && rw_backfz2 <= rw_frontfz2) || line->sidedef->GetTexture(side_t::bottom).isValid())) + && ((mBackCeilingZ1 >= mFrontCeilingZ1 && mBackCeilingZ2 >= mFrontCeilingZ2) || line->sidedef->GetTexture(side_t::top).isValid()) + && ((mBackFloorZ1 <= mFrontFloorZ1 && mBackFloorZ2 <= mFrontFloorZ2) || line->sidedef->GetTexture(side_t::bottom).isValid())) { // killough 1/18/98 -- This function is used to fix the automap bug which // showed lines behind closed doors simply because the door had a dropoff. @@ -211,41 +209,41 @@ namespace swrenderer // This fixes the automap floor height bug -- killough 1/18/98: // killough 4/7/98: optimize: save result in doorclosed for use in r_segs.c - doorclosed = true; + mDoorClosed = true; solid = true; } - else if (frontsector->ceilingplane != backsector->ceilingplane || - frontsector->floorplane != backsector->floorplane) + else if (mFrontSector->ceilingplane != mBackSector->ceilingplane || + mFrontSector->floorplane != mBackSector->floorplane) { // Window. solid = false; } - else if (SkyboxCompare(frontsector, backsector)) + else if (SkyboxCompare(mFrontSector, mBackSector)) { solid = false; } - else if (backsector->lightlevel != frontsector->lightlevel - || backsector->GetTexture(sector_t::floor) != frontsector->GetTexture(sector_t::floor) - || backsector->GetTexture(sector_t::ceiling) != frontsector->GetTexture(sector_t::ceiling) - || curline->sidedef->GetTexture(side_t::mid).isValid() + else if (mBackSector->lightlevel != mFrontSector->lightlevel + || mBackSector->GetTexture(sector_t::floor) != mFrontSector->GetTexture(sector_t::floor) + || mBackSector->GetTexture(sector_t::ceiling) != mFrontSector->GetTexture(sector_t::ceiling) + || mLineSegment->sidedef->GetTexture(side_t::mid).isValid() // killough 3/7/98: Take flats offsets into account: - || backsector->planes[sector_t::floor].xform != frontsector->planes[sector_t::floor].xform - || backsector->planes[sector_t::ceiling].xform != frontsector->planes[sector_t::ceiling].xform + || mBackSector->planes[sector_t::floor].xform != mFrontSector->planes[sector_t::floor].xform + || mBackSector->planes[sector_t::ceiling].xform != mFrontSector->planes[sector_t::ceiling].xform - || backsector->GetPlaneLight(sector_t::floor) != frontsector->GetPlaneLight(sector_t::floor) - || backsector->GetPlaneLight(sector_t::ceiling) != frontsector->GetPlaneLight(sector_t::ceiling) - || backsector->GetVisFlags(sector_t::floor) != frontsector->GetVisFlags(sector_t::floor) - || backsector->GetVisFlags(sector_t::ceiling) != frontsector->GetVisFlags(sector_t::ceiling) + || mBackSector->GetPlaneLight(sector_t::floor) != mFrontSector->GetPlaneLight(sector_t::floor) + || mBackSector->GetPlaneLight(sector_t::ceiling) != mFrontSector->GetPlaneLight(sector_t::ceiling) + || mBackSector->GetVisFlags(sector_t::floor) != mFrontSector->GetVisFlags(sector_t::floor) + || mBackSector->GetVisFlags(sector_t::ceiling) != mFrontSector->GetVisFlags(sector_t::ceiling) // [RH] Also consider colormaps - || backsector->ColorMap != frontsector->ColorMap + || mBackSector->ColorMap != mFrontSector->ColorMap // kg3D - and fake lights - || (frontsector->e && frontsector->e->XFloor.lightlist.Size()) - || (backsector->e && backsector->e->XFloor.lightlist.Size()) + || (mFrontSector->e && mFrontSector->e->XFloor.lightlist.Size()) + || (mBackSector->e && mBackSector->e->XFloor.lightlist.Size()) ) { solid = false; @@ -258,11 +256,11 @@ namespace swrenderer // When using GL nodes, do a clipping test for these lines so we can // mark their subsectors as visible for automap texturing. - if (hasglnodes && !(InSubsector->flags & SSECF_DRAWN)) + if (hasglnodes && !(mSubsector->flags & SSECF_DRAWN)) { if (Thread->ClipSegments->Check(WallC.sx1, WallC.sx2)) { - InSubsector->flags |= SSECF_DRAWN; + mSubsector->flags |= SSECF_DRAWN; } } return; @@ -279,15 +277,15 @@ namespace swrenderer } else { - rw_ceilstat = walltop.Project(frontsector->ceilingplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); - rw_floorstat = wallbottom.Project(frontsector->floorplane, &WallC, curline, renderportal->MirrorFlags & RF_XFLIP); + mCeilingClipped = walltop.Project(mFrontSector->ceilingplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); + mFloorClipped = wallbottom.Project(mFrontSector->floorplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); } bool visible = Thread->ClipSegments->Clip(WallC.sx1, WallC.sx2, solid, this); if (visible) { - InSubsector->flags |= SSECF_DRAWN; + mSubsector->flags |= SSECF_DRAWN; } } @@ -325,6 +323,8 @@ namespace swrenderer SetWallVariables(true); } + side_t *sidedef = mLineSegment->sidedef; + rw_offset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); rw_light = rw_lightleft + rw_lightstep * (start - WallC.sx1); @@ -347,7 +347,7 @@ namespace swrenderer draw_segment->siz2 = 1 / WallC.sz2; draw_segment->x1 = start; draw_segment->x2 = stop; - draw_segment->curline = curline; + draw_segment->curline = mLineSegment; draw_segment->bFogBoundary = false; draw_segment->bFakeBoundary = false; draw_segment->foggy = foggy; @@ -366,7 +366,7 @@ namespace swrenderer { draw_segment->silhouette = SIL_BOTH; } - else if (backsector == NULL) + else if (mBackSector == NULL) { draw_segment->sprtopclip = Thread->FrameMemory->AllocMemory(stop - start); draw_segment->sprbottomclip = Thread->FrameMemory->AllocMemory(stop - start); @@ -379,14 +379,14 @@ namespace swrenderer // two sided line draw_segment->silhouette = 0; - if (rw_frontfz1 > rw_backfz1 || rw_frontfz2 > rw_backfz2 || - backsector->floorplane.PointOnSide(ViewPos) < 0) + if (mFrontFloorZ1 > mBackFloorZ1 || mFrontFloorZ2 > mBackFloorZ2 || + mBackSector->floorplane.PointOnSide(ViewPos) < 0) { draw_segment->silhouette = SIL_BOTTOM; } - if (rw_frontcz1 < rw_backcz1 || rw_frontcz2 < rw_backcz2 || - backsector->ceilingplane.PointOnSide(ViewPos) < 0) + if (mFrontCeilingZ1 < mBackCeilingZ1 || mFrontCeilingZ2 < mBackCeilingZ2 || + mBackSector->ceilingplane.PointOnSide(ViewPos) < 0) { draw_segment->silhouette |= SIL_TOP; } @@ -400,13 +400,13 @@ namespace swrenderer // killough 4/7/98: make doorclosed external variable { - if (doorclosed || (rw_backcz1 <= rw_frontfz1 && rw_backcz2 <= rw_frontfz2)) + if (mDoorClosed || (mBackCeilingZ1 <= mFrontFloorZ1 && mBackCeilingZ2 <= mFrontFloorZ2)) { draw_segment->sprbottomclip = Thread->FrameMemory->AllocMemory(stop - start); memset(draw_segment->sprbottomclip, -1, (stop - start) * sizeof(short)); draw_segment->silhouette |= SIL_BOTTOM; } - if (doorclosed || (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) + if (mDoorClosed || (mBackFloorZ1 >= mFrontCeilingZ1 && mBackFloorZ2 >= mFrontCeilingZ2)) { // killough 1/17/98, 2/8/98 draw_segment->sprtopclip = Thread->FrameMemory->AllocMemory(stop - start); fillshort(draw_segment->sprtopclip, stop - start, viewheight); @@ -414,18 +414,18 @@ namespace swrenderer } } - if (!draw_segment->fake && r_3dfloors && backsector->e && backsector->e->XFloor.ffloors.Size()) { - for (i = 0; i < (int)backsector->e->XFloor.ffloors.Size(); i++) { - F3DFloor *rover = backsector->e->XFloor.ffloors[i]; + if (!draw_segment->fake && r_3dfloors && mBackSector->e && mBackSector->e->XFloor.ffloors.Size()) { + for (i = 0; i < (int)mBackSector->e->XFloor.ffloors.Size(); i++) { + F3DFloor *rover = mBackSector->e->XFloor.ffloors[i]; if (rover->flags & FF_RENDERSIDES && (!(rover->flags & FF_INVERTSIDES) || rover->flags & FF_ALLSIDES)) { draw_segment->bFakeBoundary |= 1; break; } } } - if (!draw_segment->fake && r_3dfloors && frontsector->e && frontsector->e->XFloor.ffloors.Size()) { - for (i = 0; i < (int)frontsector->e->XFloor.ffloors.Size(); i++) { - F3DFloor *rover = frontsector->e->XFloor.ffloors[i]; + if (!draw_segment->fake && r_3dfloors && mFrontSector->e && mFrontSector->e->XFloor.ffloors.Size()) { + for (i = 0; i < (int)mFrontSector->e->XFloor.ffloors.Size(); i++) { + F3DFloor *rover = mFrontSector->e->XFloor.ffloors[i]; if (rover->flags & FF_RENDERSIDES && (rover->flags & FF_ALLSIDES || rover->flags & FF_INVERTSIDES)) { draw_segment->bFakeBoundary |= 2; break; @@ -436,9 +436,9 @@ namespace swrenderer if (!draw_segment->fake) // allocate space for masked texture tables, if needed // [RH] Don't just allocate the space; fill it in too. - if ((TexMan(sidedef->GetTexture(side_t::mid), true)->UseType != FTexture::TEX_Null || draw_segment->bFakeBoundary || IsFogBoundary(frontsector, backsector)) && - (rw_ceilstat != ProjectedWallCull::OutsideBelow || !sidedef->GetTexture(side_t::top).isValid()) && - (rw_floorstat != ProjectedWallCull::OutsideAbove || !sidedef->GetTexture(side_t::bottom).isValid()) && + if ((TexMan(sidedef->GetTexture(side_t::mid), true)->UseType != FTexture::TEX_Null || draw_segment->bFakeBoundary || IsFogBoundary(mFrontSector, mBackSector)) && + (mCeilingClipped != ProjectedWallCull::OutsideBelow || !sidedef->GetTexture(side_t::top).isValid()) && + (mFloorClipped != ProjectedWallCull::OutsideAbove || !sidedef->GetTexture(side_t::bottom).isValid()) && (WallC.sz1 >= TOO_CLOSE_Z && WallC.sz2 >= TOO_CLOSE_Z)) { float *swal; @@ -451,7 +451,7 @@ namespace swrenderer draw_segment->bkup = Thread->FrameMemory->AllocMemory(stop - start); memcpy(draw_segment->bkup, &Thread->OpaquePass->ceilingclip[start], sizeof(short)*(stop - start)); - draw_segment->bFogBoundary = IsFogBoundary(frontsector, backsector); + draw_segment->bFogBoundary = IsFogBoundary(mFrontSector, mBackSector); if (sidedef->GetTexture(side_t::mid).isValid() || draw_segment->bFakeBoundary) { if (sidedef->GetTexture(side_t::mid).isValid()) @@ -507,13 +507,13 @@ namespace swrenderer // not from the current subsector, which is what the current wallshade value // comes from. We make an exeption for polyobjects, however, since their "home" // sector should be whichever one they move into. - if (curline->sidedef->Flags & WALLF_POLYOBJ) + if (mLineSegment->sidedef->Flags & WALLF_POLYOBJ) { draw_segment->shade = wallshade; } else { - draw_segment->shade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, curline->frontsector->lightlevel) + R_ActualExtraLight(foggy)); + draw_segment->shade = LIGHT2SHADE(mLineSegment->sidedef->GetLightLevel(foggy, mLineSegment->frontsector->lightlevel) + R_ActualExtraLight(foggy)); } if (draw_segment->bFogBoundary || draw_segment->maskedtexturecol != nullptr) @@ -526,9 +526,9 @@ namespace swrenderer // render it if (markceiling) { - if (ceilingplane) + if (mCeilingPlane) { // killough 4/11/98: add NULL ptr checks - ceilingplane = Thread->PlaneList->GetRange(ceilingplane, start, stop); + mCeilingPlane = Thread->PlaneList->GetRange(mCeilingPlane, start, stop); } else { @@ -538,9 +538,9 @@ namespace swrenderer if (markfloor) { - if (floorplane) + if (mFloorPlane) { // killough 4/11/98: add NULL ptr checks - floorplane = Thread->PlaneList->GetRange(floorplane, start, stop); + mFloorPlane = Thread->PlaneList->GetRange(mFloorPlane, start, stop); } else { @@ -568,7 +568,7 @@ namespace swrenderer memcpy(draw_segment->sprbottomclip, &Thread->OpaquePass->floorclip[start], sizeof(short)*(stop - start)); } - if (maskedtexture && curline->sidedef->GetTexture(side_t::mid).isValid()) + if (maskedtexture && mLineSegment->sidedef->GetTexture(side_t::mid).isValid()) { draw_segment->silhouette |= SIL_TOP | SIL_BOTTOM; } @@ -577,12 +577,12 @@ namespace swrenderer // [ZZ] Only if not an active mirror if (!rw_markportal) { - RenderDecal::RenderDecals(Thread, curline->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, curline, WallC, foggy, basecolormap, walltop.ScreenY, wallbottom.ScreenY); + RenderDecal::RenderDecals(Thread, mLineSegment->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, mLineSegment, WallC, foggy, basecolormap, walltop.ScreenY, wallbottom.ScreenY); } if (rw_markportal) { - Thread->Portal->AddLinePortal(curline->linedef, draw_segment->x1, draw_segment->x2, draw_segment->sprtopclip, draw_segment->sprbottomclip); + Thread->Portal->AddLinePortal(mLineSegment->linedef, draw_segment->x1, draw_segment->x2, draw_segment->sprtopclip, draw_segment->sprbottomclip); } return (clip3d->fake3D & FAKE3D_FAKEMASK) == 0; @@ -595,8 +595,8 @@ namespace swrenderer rw_markportal = false; - sidedef = curline->sidedef; - linedef = curline->linedef; + side_t *sidedef = mLineSegment->sidedef; + line_t *linedef = mLineSegment->linedef; // mark the segment as visible for auto map if (!Thread->Scene->DontMapLines()) linedef->flags |= ML_MAPPED; @@ -609,7 +609,7 @@ namespace swrenderer markfloor = markceiling = true; // act like a one-sided wall here (todo: check how does this work with transparency) rw_markportal = true; } - else if (backsector == NULL) + else if (mBackSector == NULL) { // single sided line // a single sided line is terminal, so it must mark ends @@ -631,11 +631,11 @@ namespace swrenderer { // normal orientation if (linedef->flags & ML_DONTPEGBOTTOM) { // bottom of texture at bottom - rw_midtexturemid = (frontsector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + midtexture->GetHeight(); + rw_midtexturemid = (mFrontSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + midtexture->GetHeight(); } else { // top of texture at top - rw_midtexturemid = (frontsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; + rw_midtexturemid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; if (rowoffset < 0 && midtexture != NULL) { rowoffset += midtexture->GetHeight(); @@ -647,11 +647,11 @@ namespace swrenderer rowoffset = -rowoffset; if (linedef->flags & ML_DONTPEGBOTTOM) { // top of texture at bottom - rw_midtexturemid = (frontsector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; + rw_midtexturemid = (mFrontSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; } else { // bottom of texture at top - rw_midtexturemid = (frontsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + midtexture->GetHeight(); + rw_midtexturemid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + midtexture->GetHeight(); } } if (midtexture->bWorldPanning) @@ -670,95 +670,95 @@ namespace swrenderer { // two-sided line // hack to allow height changes in outdoor areas - double rw_frontlowertop = frontsector->GetPlaneTexZ(sector_t::ceiling); + double rw_frontlowertop = mFrontSector->GetPlaneTexZ(sector_t::ceiling); - if (frontsector->GetTexture(sector_t::ceiling) == skyflatnum && - backsector->GetTexture(sector_t::ceiling) == skyflatnum) + if (mFrontSector->GetTexture(sector_t::ceiling) == skyflatnum && + mBackSector->GetTexture(sector_t::ceiling) == skyflatnum) { if (rw_havehigh) { // front ceiling is above back ceiling memcpy(&walltop.ScreenY[WallC.sx1], &wallupper.ScreenY[WallC.sx1], (WallC.sx2 - WallC.sx1) * sizeof(walltop.ScreenY[0])); rw_havehigh = false; } - else if (rw_havelow && frontsector->ceilingplane != backsector->ceilingplane) + else if (rw_havelow && mFrontSector->ceilingplane != mBackSector->ceilingplane) { // back ceiling is above front ceiling // The check for rw_havelow is not Doom-compliant, but it avoids HoM that // would otherwise occur because there is space made available for this // wall but nothing to draw for it. // Recalculate walltop so that the wall is clipped by the back sector's // ceiling instead of the front sector's ceiling. - walltop.Project(backsector->ceilingplane, &WallC, curline, Thread->Portal->MirrorFlags & RF_XFLIP); + walltop.Project(mBackSector->ceilingplane, &WallC, mLineSegment, Thread->Portal->MirrorFlags & RF_XFLIP); } // Putting sky ceilings on the front and back of a line alters the way unpegged // positioning works. - rw_frontlowertop = backsector->GetPlaneTexZ(sector_t::ceiling); + rw_frontlowertop = mBackSector->GetPlaneTexZ(sector_t::ceiling); } if (linedef->isVisualPortal()) { markceiling = markfloor = true; } - else if ((rw_backcz1 <= rw_frontfz1 && rw_backcz2 <= rw_frontfz2) || - (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) + else if ((mBackCeilingZ1 <= mFrontFloorZ1 && mBackCeilingZ2 <= mFrontFloorZ2) || + (mBackFloorZ1 >= mFrontCeilingZ1 && mBackFloorZ2 >= mFrontCeilingZ2)) { // closed door markceiling = markfloor = true; } else { - markfloor = rw_mustmarkfloor - || backsector->floorplane != frontsector->floorplane - || backsector->lightlevel != frontsector->lightlevel - || backsector->GetTexture(sector_t::floor) != frontsector->GetTexture(sector_t::floor) - || backsector->GetPlaneLight(sector_t::floor) != frontsector->GetPlaneLight(sector_t::floor) + markfloor = + mBackSector->floorplane != mFrontSector->floorplane + || mBackSector->lightlevel != mFrontSector->lightlevel + || mBackSector->GetTexture(sector_t::floor) != mFrontSector->GetTexture(sector_t::floor) + || mBackSector->GetPlaneLight(sector_t::floor) != mFrontSector->GetPlaneLight(sector_t::floor) // killough 3/7/98: Add checks for (x,y) offsets - || backsector->planes[sector_t::floor].xform != frontsector->planes[sector_t::floor].xform - || backsector->GetAlpha(sector_t::floor) != frontsector->GetAlpha(sector_t::floor) + || mBackSector->planes[sector_t::floor].xform != mFrontSector->planes[sector_t::floor].xform + || mBackSector->GetAlpha(sector_t::floor) != mFrontSector->GetAlpha(sector_t::floor) // killough 4/15/98: prevent 2s normals // from bleeding through deep water - || frontsector->heightsec + || mFrontSector->heightsec - || backsector->GetVisFlags(sector_t::floor) != frontsector->GetVisFlags(sector_t::floor) + || mBackSector->GetVisFlags(sector_t::floor) != mFrontSector->GetVisFlags(sector_t::floor) // [RH] Add checks for colormaps - || backsector->ColorMap != frontsector->ColorMap + || mBackSector->ColorMap != mFrontSector->ColorMap // kg3D - add fake lights - || (frontsector->e && frontsector->e->XFloor.lightlist.Size()) - || (backsector->e && backsector->e->XFloor.lightlist.Size()) + || (mFrontSector->e && mFrontSector->e->XFloor.lightlist.Size()) + || (mBackSector->e && mBackSector->e->XFloor.lightlist.Size()) || (sidedef->GetTexture(side_t::mid).isValid() && ((linedef->flags & (ML_CLIP_MIDTEX | ML_WRAP_MIDTEX)) || (sidedef->Flags & (WALLF_CLIP_MIDTEX | WALLF_WRAP_MIDTEX)))) ; - markceiling = (frontsector->GetTexture(sector_t::ceiling) != skyflatnum || - backsector->GetTexture(sector_t::ceiling) != skyflatnum) && - (rw_mustmarkceiling - || backsector->ceilingplane != frontsector->ceilingplane - || backsector->lightlevel != frontsector->lightlevel - || backsector->GetTexture(sector_t::ceiling) != frontsector->GetTexture(sector_t::ceiling) + markceiling = (mFrontSector->GetTexture(sector_t::ceiling) != skyflatnum || + mBackSector->GetTexture(sector_t::ceiling) != skyflatnum) && + ( + mBackSector->ceilingplane != mFrontSector->ceilingplane + || mBackSector->lightlevel != mFrontSector->lightlevel + || mBackSector->GetTexture(sector_t::ceiling) != mFrontSector->GetTexture(sector_t::ceiling) // killough 3/7/98: Add checks for (x,y) offsets - || backsector->planes[sector_t::ceiling].xform != frontsector->planes[sector_t::ceiling].xform - || backsector->GetAlpha(sector_t::ceiling) != frontsector->GetAlpha(sector_t::ceiling) + || mBackSector->planes[sector_t::ceiling].xform != mFrontSector->planes[sector_t::ceiling].xform + || mBackSector->GetAlpha(sector_t::ceiling) != mFrontSector->GetAlpha(sector_t::ceiling) // killough 4/15/98: prevent 2s normals // from bleeding through fake ceilings - || (frontsector->heightsec && frontsector->GetTexture(sector_t::ceiling) != skyflatnum) + || (mFrontSector->heightsec && mFrontSector->GetTexture(sector_t::ceiling) != skyflatnum) - || backsector->GetPlaneLight(sector_t::ceiling) != frontsector->GetPlaneLight(sector_t::ceiling) - || backsector->GetFlags(sector_t::ceiling) != frontsector->GetFlags(sector_t::ceiling) + || mBackSector->GetPlaneLight(sector_t::ceiling) != mFrontSector->GetPlaneLight(sector_t::ceiling) + || mBackSector->GetFlags(sector_t::ceiling) != mFrontSector->GetFlags(sector_t::ceiling) // [RH] Add check for colormaps - || backsector->ColorMap != frontsector->ColorMap + || mBackSector->ColorMap != mFrontSector->ColorMap // kg3D - add fake lights - || (frontsector->e && frontsector->e->XFloor.lightlist.Size()) - || (backsector->e && backsector->e->XFloor.lightlist.Size()) + || (mFrontSector->e && mFrontSector->e->XFloor.lightlist.Size()) + || (mBackSector->e && mBackSector->e->XFloor.lightlist.Size()) || (sidedef->GetTexture(side_t::mid).isValid() && ((linedef->flags & (ML_CLIP_MIDTEX | ML_WRAP_MIDTEX)) || @@ -779,7 +779,7 @@ namespace swrenderer { // normal orientation if (linedef->flags & ML_DONTPEGTOP) { // top of texture at top - rw_toptexturemid = (frontsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; + rw_toptexturemid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; if (rowoffset < 0 && toptexture != NULL) { rowoffset += toptexture->GetHeight(); @@ -787,7 +787,7 @@ namespace swrenderer } else { // bottom of texture at bottom - rw_toptexturemid = (backsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + toptexture->GetHeight(); + rw_toptexturemid = (mBackSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + toptexture->GetHeight(); } } else @@ -795,11 +795,11 @@ namespace swrenderer rowoffset = -rowoffset; if (linedef->flags & ML_DONTPEGTOP) { // bottom of texture at top - rw_toptexturemid = (frontsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + toptexture->GetHeight(); + rw_toptexturemid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + toptexture->GetHeight(); } else { // top of texture at bottom - rw_toptexturemid = (backsector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; + rw_toptexturemid = (mBackSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; } } if (toptexture->bWorldPanning) @@ -828,7 +828,7 @@ namespace swrenderer } else { // top of texture at top - rw_bottomtexturemid = (backsector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; + rw_bottomtexturemid = (mBackSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; if (rowoffset < 0 && bottomtexture != NULL) { rowoffset += bottomtexture->GetHeight(); @@ -844,7 +844,7 @@ namespace swrenderer } else { // bottom of texture at top - rw_bottomtexturemid = (backsector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + bottomtexture->GetHeight(); + rw_bottomtexturemid = (mBackSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + bottomtexture->GetHeight(); } } if (bottomtexture->bWorldPanning) @@ -863,20 +863,20 @@ namespace swrenderer // it is definitely invisible and doesn't need to be marked. // killough 3/7/98: add deep water check - if (frontsector->GetHeightSec() == NULL) + if (mFrontSector->GetHeightSec() == NULL) { int planeside; - planeside = frontsector->floorplane.PointOnSide(ViewPos); - if (frontsector->floorplane.fC() < 0) // 3D floors have the floor backwards + planeside = mFrontSector->floorplane.PointOnSide(ViewPos); + if (mFrontSector->floorplane.fC() < 0) // 3D floors have the floor backwards planeside = -planeside; if (planeside <= 0) // above view plane markfloor = false; - if (frontsector->GetTexture(sector_t::ceiling) != skyflatnum) + if (mFrontSector->GetTexture(sector_t::ceiling) != skyflatnum) { - planeside = frontsector->ceilingplane.PointOnSide(ViewPos); - if (frontsector->ceilingplane.fC() > 0) // 3D floors have the ceiling backwards + planeside = mFrontSector->ceilingplane.PointOnSide(ViewPos); + if (mFrontSector->ceilingplane.fC() > 0) // 3D floors have the ceiling backwards planeside = -planeside; if (planeside <= 0) // below view plane markceiling = false; @@ -888,7 +888,7 @@ namespace swrenderer bool segtextured = midtex != NULL || toptexture != NULL || bottomtexture != NULL; // calculate light table - if (needlights && (segtextured || (backsector && IsFogBoundary(frontsector, backsector)))) + if (needlights && (segtextured || (mBackSector && IsFogBoundary(mFrontSector, mBackSector)))) { lwallscale = midtex ? (midtex->Scale.X * sidedef->GetTextureXScale(side_t::mid)) : @@ -901,7 +901,7 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedColormap() == nullptr && cameraLight->FixedLightLevel() < 0) { - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, frontsector->lightlevel) + R_ActualExtraLight(foggy)); + wallshade = LIGHT2SHADE(mLineSegment->sidedef->GetLightLevel(foggy, mFrontSector->lightlevel) + R_ActualExtraLight(foggy)); double GlobVis = LightVisibility::Instance()->WallGlobVis(); rw_lightleft = float(GlobVis / WallC.sz1); rw_lightstep = float((GlobVis / WallC.sz2 - rw_lightleft) / (WallC.sx2 - WallC.sx1)); @@ -966,8 +966,8 @@ namespace swrenderer short bottom = MIN(walltop.ScreenY[x], floorclip[x]); if (top < bottom) { - ceilingplane->top[x] = top; - ceilingplane->bottom[x] = bottom; + mCeilingPlane->top[x] = top; + mCeilingPlane->bottom[x] = bottom; } } } @@ -982,8 +982,8 @@ namespace swrenderer if (top < bottom) { assert(bottom <= viewheight); - floorplane->top[x] = top; - floorplane->bottom[x] = bottom; + mFloorPlane->top[x] = top; + mFloorPlane->bottom[x] = bottom; } } } @@ -1018,19 +1018,19 @@ namespace swrenderer } if (clip3d->fake3D & FAKE3D_FAKEMASK) return; - FLightNode *light_list = (curline && curline->sidedef) ? curline->sidedef->lighthead : nullptr; + FLightNode *light_list = (mLineSegment && mLineSegment->sidedef) ? mLineSegment->sidedef->lighthead : nullptr; // draw the wall tiers if (midtexture) { // one sided line if (midtexture->UseType != FTexture::TEX_Null && viewactive) { - rw_pic = midtexture; + FTexture *rw_pic = midtexture; xscale = rw_pic->Scale.X * rw_midtexturescalex; yscale = rw_pic->Scale.Y * rw_midtexturescaley; if (xscale != lwallscale) { - walltexcoords.ProjectPos(curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); + walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } if (midtexture->bWorldPanning) @@ -1047,7 +1047,7 @@ namespace swrenderer } RenderWallPart renderWallpart(Thread); - renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, rw_midtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, rw_midtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } fillshort(ceilingclip + x1, x2 - x1, viewheight); fillshort(floorclip + x1, x2 - x1, 0xffff); @@ -1062,12 +1062,12 @@ namespace swrenderer } if (viewactive) { - rw_pic = toptexture; + FTexture *rw_pic = toptexture; xscale = rw_pic->Scale.X * rw_toptexturescalex; yscale = rw_pic->Scale.Y * rw_toptexturescaley; if (xscale != lwallscale) { - walltexcoords.ProjectPos(curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); + walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } if (toptexture->bWorldPanning) @@ -1084,7 +1084,7 @@ namespace swrenderer } RenderWallPart renderWallpart(Thread); - renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, rw_toptexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_frontcz1, rw_frontcz2), MIN(rw_backcz1, rw_backcz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, rw_toptexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mBackCeilingZ1, mBackCeilingZ2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(ceilingclip + x1, wallupper.ScreenY + x1, (x2 - x1) * sizeof(short)); } @@ -1102,12 +1102,12 @@ namespace swrenderer } if (viewactive) { - rw_pic = bottomtexture; + FTexture *rw_pic = bottomtexture; xscale = rw_pic->Scale.X * rw_bottomtexturescalex; yscale = rw_pic->Scale.Y * rw_bottomtexturescaley; if (xscale != lwallscale) { - walltexcoords.ProjectPos(curline->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); + walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } if (bottomtexture->bWorldPanning) @@ -1124,7 +1124,7 @@ namespace swrenderer } RenderWallPart renderWallpart(Thread); - renderWallpart.Render(drawerargs, frontsector, curline, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, rw_bottomtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(rw_backfz1, rw_backfz2), MIN(rw_frontfz1, rw_frontfz2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, rw_bottomtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mBackFloorZ1, mBackFloorZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(floorclip + x1, walllower.ScreenY + x1, (x2 - x1) * sizeof(short)); } diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index ef38e9e65f..272c70d488 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -65,43 +65,49 @@ namespace swrenderer bool IsFogBoundary(sector_t *front, sector_t *back) const; bool SkyboxCompare(sector_t *frontsector, sector_t *backsector) const; - subsector_t *InSubsector; - sector_t *frontsector; - sector_t *backsector; - VisiblePlane *floorplane; - VisiblePlane *ceilingplane; + // Line variables: - seg_t *curline; - side_t *sidedef; - line_t *linedef; + subsector_t *mSubsector; + sector_t *mFrontSector; + sector_t *mBackSector; + VisiblePlane *mFloorPlane; + VisiblePlane *mCeilingPlane; + seg_t *mLineSegment; + + double mBackCeilingZ1; + double mBackCeilingZ2; + double mBackFloorZ1; + double mBackFloorZ2; + double mFrontCeilingZ1; + double mFrontCeilingZ2; + double mFrontFloorZ1; + double mFrontFloorZ2; + + bool mDoorClosed; FWallCoords WallC; FWallTmapVals WallT; - double rw_backcz1; - double rw_backcz2; - double rw_backfz1; - double rw_backfz2; - double rw_frontcz1; - double rw_frontcz2; - double rw_frontfz1; - double rw_frontfz2; + bool foggy; + FDynamicColormap *basecolormap; + + // Wall segment variables: fixed_t rw_offset_top; fixed_t rw_offset_mid; fixed_t rw_offset_bottom; - ProjectedWallCull rw_ceilstat, rw_floorstat; - bool rw_mustmarkfloor, rw_mustmarkceiling; bool rw_prepped; bool rw_markportal; bool rw_havehigh; bool rw_havelow; + int wallshade; float rw_light; float rw_lightstep; float rw_lightleft; + double lwallscale; fixed_t rw_offset; double rw_midtexturemid; double rw_toptexturemid; @@ -113,21 +119,14 @@ namespace swrenderer double rw_bottomtexturescalex; double rw_bottomtexturescaley; - FTexture *rw_pic; - - bool doorclosed; - int wallshade; - bool markfloor; // False if the back side is the same plane. bool markceiling; FTexture *toptexture; FTexture *bottomtexture; FTexture *midtexture; - bool foggy; - FDynamicColormap *basecolormap; - - double lwallscale; + ProjectedWallCull mCeilingClipped; + ProjectedWallCull mFloorClipped; ProjectedWallLine walltop; ProjectedWallLine wallbottom; From 1141e05fdf29dac8564b3cf586855b7a90d8591f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Feb 2017 18:52:49 +0100 Subject: [PATCH 831/912] Decipher some line setup code into ShouldMarkFloor, ShouldMarkCeiling and ShouldMarkPortal --- src/swrenderer/line/r_line.cpp | 246 +++++++++++++++++++-------------- src/swrenderer/line/r_line.h | 5 +- 2 files changed, 143 insertions(+), 108 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index c01d1431cf..43c06efb2c 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -362,7 +362,9 @@ namespace swrenderer draw_segment->bkup = nullptr; draw_segment->swall = nullptr; - if (rw_markportal) + bool markportal = ShouldMarkPortal(); + + if (markportal) { draw_segment->silhouette = SIL_BOTH; } @@ -575,12 +577,12 @@ namespace swrenderer // [RH] Draw any decals bound to the seg // [ZZ] Only if not an active mirror - if (!rw_markportal) + if (!markportal) { RenderDecal::RenderDecals(Thread, mLineSegment->sidedef, draw_segment, wallshade, rw_lightleft, rw_lightstep, mLineSegment, WallC, foggy, basecolormap, walltop.ScreenY, wallbottom.ScreenY); } - if (rw_markportal) + if (markportal) { Thread->Portal->AddLinePortal(mLineSegment->linedef, draw_segment->x1, draw_segment->x2, draw_segment->sprtopclip, draw_segment->sprbottomclip); } @@ -588,36 +590,163 @@ namespace swrenderer return (clip3d->fake3D & FAKE3D_FAKEMASK) == 0; } + bool SWRenderLine::ShouldMarkFloor() const + { + // deep water check + if (mFrontSector->GetHeightSec() == nullptr) + { + int planeside = mFrontSector->floorplane.PointOnSide(ViewPos); + if (mFrontSector->floorplane.fC() < 0) // 3D floors have the floor backwards + planeside = -planeside; + if (planeside <= 0) // above view plane + return false; + } + + side_t *sidedef = mLineSegment->sidedef; + line_t *linedef = mLineSegment->linedef; + + if (sidedef == linedef->sidedef[0] && (linedef->special == Line_Mirror && r_drawmirrors)) + { + return true; + } + else if (mBackSector == nullptr) // single sided line + { + return true; + } + else // two-sided line + { + if (linedef->isVisualPortal()) return true; + + // closed door + if (mBackCeilingZ1 <= mFrontFloorZ1 && mBackCeilingZ2 <= mFrontFloorZ2) return true; + if (mBackFloorZ1 >= mFrontCeilingZ1 && mBackFloorZ2 >= mFrontCeilingZ2) return true; + + if (mBackSector->floorplane != mFrontSector->floorplane) return true; + if (mBackSector->lightlevel != mFrontSector->lightlevel) return true; + if (mBackSector->GetTexture(sector_t::floor) != mFrontSector->GetTexture(sector_t::floor)) return true; + if (mBackSector->GetPlaneLight(sector_t::floor) != mFrontSector->GetPlaneLight(sector_t::floor)) return true; + + // Add checks for (x,y) offsets + if (mBackSector->planes[sector_t::floor].xform != mFrontSector->planes[sector_t::floor].xform) return true; + if (mBackSector->GetAlpha(sector_t::floor) != mFrontSector->GetAlpha(sector_t::floor)) return true; + + // prevent 2s normals from bleeding through deep water + if (mFrontSector->heightsec) return true; + + if (mBackSector->GetVisFlags(sector_t::floor) != mFrontSector->GetVisFlags(sector_t::floor)) return true; + if (mBackSector->ColorMap != mFrontSector->ColorMap) return true; + if (mFrontSector->e && mFrontSector->e->XFloor.lightlist.Size()) return true; + if (mBackSector->e && mBackSector->e->XFloor.lightlist.Size()) return true; + + if (sidedef->GetTexture(side_t::mid).isValid() && ((linedef->flags & (ML_CLIP_MIDTEX | ML_WRAP_MIDTEX)) || sidedef->Flags & (WALLF_CLIP_MIDTEX | WALLF_WRAP_MIDTEX))) return true; + + return false; + } + } + + bool SWRenderLine::ShouldMarkCeiling() const + { + // deep water check + if (mFrontSector->GetHeightSec() == nullptr && mFrontSector->GetTexture(sector_t::ceiling) != skyflatnum) + { + int planeside = mFrontSector->ceilingplane.PointOnSide(ViewPos); + if (mFrontSector->ceilingplane.fC() > 0) // 3D floors have the ceiling backwards + planeside = -planeside; + if (planeside <= 0) // below view plane + return false; + } + + side_t *sidedef = mLineSegment->sidedef; + line_t *linedef = mLineSegment->linedef; + + if (sidedef == linedef->sidedef[0] && (linedef->special == Line_Mirror && r_drawmirrors)) + { + return true; + } + else if (mBackSector == nullptr) // single sided line + { + return true; + } + else // two-sided line + { + if (linedef->isVisualPortal()) return true; + + // closed door + if (mBackCeilingZ1 <= mFrontFloorZ1 && mBackCeilingZ2 <= mFrontFloorZ2) return true; + if (mBackFloorZ1 >= mFrontCeilingZ1 && mBackFloorZ2 >= mFrontCeilingZ2) return true; + + if (mFrontSector->GetTexture(sector_t::ceiling) != skyflatnum || mBackSector->GetTexture(sector_t::ceiling) != skyflatnum) + { + if (mBackSector->ceilingplane != mFrontSector->ceilingplane) return true; + if (mBackSector->lightlevel != mFrontSector->lightlevel) return true; + if (mBackSector->GetTexture(sector_t::ceiling) != mFrontSector->GetTexture(sector_t::ceiling)) return true; + + // Add checks for (x,y) offsets + if (mBackSector->planes[sector_t::ceiling].xform != mFrontSector->planes[sector_t::ceiling].xform) return true; + if (mBackSector->GetAlpha(sector_t::ceiling) != mFrontSector->GetAlpha(sector_t::ceiling)) return true; + + // prevent 2s normals from bleeding through fake ceilings + if (mFrontSector->heightsec && mFrontSector->GetTexture(sector_t::ceiling) != skyflatnum) return true; + + if (mBackSector->GetPlaneLight(sector_t::ceiling) != mFrontSector->GetPlaneLight(sector_t::ceiling)) return true; + if (mBackSector->GetFlags(sector_t::ceiling) != mFrontSector->GetFlags(sector_t::ceiling)) return true; + + if (mBackSector->ColorMap != mFrontSector->ColorMap) return true; + if (mFrontSector->e && mFrontSector->e->XFloor.lightlist.Size()) return true; + if (mBackSector->e && mBackSector->e->XFloor.lightlist.Size()) return true; + + if (sidedef->GetTexture(side_t::mid).isValid()) + { + if (linedef->flags & (ML_CLIP_MIDTEX | ML_WRAP_MIDTEX)) return true; + if (sidedef->Flags & (WALLF_CLIP_MIDTEX | WALLF_WRAP_MIDTEX)) return true; + } + } + return false; + } + } + + bool SWRenderLine::ShouldMarkPortal() const + { + side_t *sidedef = mLineSegment->sidedef; + line_t *linedef = mLineSegment->linedef; + + if (sidedef == linedef->sidedef[0] && (linedef->special == Line_Mirror && r_drawmirrors)) + { + return true; + } + else + { + return linedef->isVisualPortal(); + } + } + void SWRenderLine::SetWallVariables(bool needlights) { double rowoffset; double yrepeat; - rw_markportal = false; - side_t *sidedef = mLineSegment->sidedef; line_t *linedef = mLineSegment->linedef; // mark the segment as visible for auto map if (!Thread->Scene->DontMapLines()) linedef->flags |= ML_MAPPED; + markfloor = ShouldMarkFloor(); + markceiling = ShouldMarkCeiling(); + midtexture = toptexture = bottomtexture = 0; if (sidedef == linedef->sidedef[0] && (linedef->special == Line_Mirror && r_drawmirrors)) // [ZZ] compatibility with r_drawmirrors cvar that existed way before portals { - markfloor = markceiling = true; // act like a one-sided wall here (todo: check how does this work with transparency) - rw_markportal = true; } else if (mBackSector == NULL) { // single sided line - // a single sided line is terminal, so it must mark ends - markfloor = markceiling = true; + // [RH] Horizon lines do not need to be textured if (linedef->isVisualPortal()) { - rw_markportal = true; } else if (linedef->special != Line_Horizon) { @@ -694,78 +823,6 @@ namespace swrenderer rw_frontlowertop = mBackSector->GetPlaneTexZ(sector_t::ceiling); } - if (linedef->isVisualPortal()) - { - markceiling = markfloor = true; - } - else if ((mBackCeilingZ1 <= mFrontFloorZ1 && mBackCeilingZ2 <= mFrontFloorZ2) || - (mBackFloorZ1 >= mFrontCeilingZ1 && mBackFloorZ2 >= mFrontCeilingZ2)) - { - // closed door - markceiling = markfloor = true; - } - else - { - markfloor = - mBackSector->floorplane != mFrontSector->floorplane - || mBackSector->lightlevel != mFrontSector->lightlevel - || mBackSector->GetTexture(sector_t::floor) != mFrontSector->GetTexture(sector_t::floor) - || mBackSector->GetPlaneLight(sector_t::floor) != mFrontSector->GetPlaneLight(sector_t::floor) - - // killough 3/7/98: Add checks for (x,y) offsets - || mBackSector->planes[sector_t::floor].xform != mFrontSector->planes[sector_t::floor].xform - || mBackSector->GetAlpha(sector_t::floor) != mFrontSector->GetAlpha(sector_t::floor) - - // killough 4/15/98: prevent 2s normals - // from bleeding through deep water - || mFrontSector->heightsec - - || mBackSector->GetVisFlags(sector_t::floor) != mFrontSector->GetVisFlags(sector_t::floor) - - // [RH] Add checks for colormaps - || mBackSector->ColorMap != mFrontSector->ColorMap - - - // kg3D - add fake lights - || (mFrontSector->e && mFrontSector->e->XFloor.lightlist.Size()) - || (mBackSector->e && mBackSector->e->XFloor.lightlist.Size()) - - || (sidedef->GetTexture(side_t::mid).isValid() && - ((linedef->flags & (ML_CLIP_MIDTEX | ML_WRAP_MIDTEX)) || - (sidedef->Flags & (WALLF_CLIP_MIDTEX | WALLF_WRAP_MIDTEX)))) - ; - - markceiling = (mFrontSector->GetTexture(sector_t::ceiling) != skyflatnum || - mBackSector->GetTexture(sector_t::ceiling) != skyflatnum) && - ( - mBackSector->ceilingplane != mFrontSector->ceilingplane - || mBackSector->lightlevel != mFrontSector->lightlevel - || mBackSector->GetTexture(sector_t::ceiling) != mFrontSector->GetTexture(sector_t::ceiling) - - // killough 3/7/98: Add checks for (x,y) offsets - || mBackSector->planes[sector_t::ceiling].xform != mFrontSector->planes[sector_t::ceiling].xform - || mBackSector->GetAlpha(sector_t::ceiling) != mFrontSector->GetAlpha(sector_t::ceiling) - - // killough 4/15/98: prevent 2s normals - // from bleeding through fake ceilings - || (mFrontSector->heightsec && mFrontSector->GetTexture(sector_t::ceiling) != skyflatnum) - - || mBackSector->GetPlaneLight(sector_t::ceiling) != mFrontSector->GetPlaneLight(sector_t::ceiling) - || mBackSector->GetFlags(sector_t::ceiling) != mFrontSector->GetFlags(sector_t::ceiling) - - // [RH] Add check for colormaps - || mBackSector->ColorMap != mFrontSector->ColorMap - - // kg3D - add fake lights - || (mFrontSector->e && mFrontSector->e->XFloor.lightlist.Size()) - || (mBackSector->e && mBackSector->e->XFloor.lightlist.Size()) - - || (sidedef->GetTexture(side_t::mid).isValid() && - ((linedef->flags & (ML_CLIP_MIDTEX | ML_WRAP_MIDTEX)) || - (sidedef->Flags & (WALLF_CLIP_MIDTEX | WALLF_WRAP_MIDTEX)))) - ); - } - if (rw_havehigh) { // top texture toptexture = TexMan(sidedef->GetTexture(side_t::top), true); @@ -856,31 +913,6 @@ namespace swrenderer rw_bottomtexturemid += rowoffset; } } - rw_markportal = linedef->isVisualPortal(); - } - - // if a floor / ceiling plane is on the wrong side of the view plane, - // it is definitely invisible and doesn't need to be marked. - - // killough 3/7/98: add deep water check - if (mFrontSector->GetHeightSec() == NULL) - { - int planeside; - - planeside = mFrontSector->floorplane.PointOnSide(ViewPos); - if (mFrontSector->floorplane.fC() < 0) // 3D floors have the floor backwards - planeside = -planeside; - if (planeside <= 0) // above view plane - markfloor = false; - - if (mFrontSector->GetTexture(sector_t::ceiling) != skyflatnum) - { - planeside = mFrontSector->ceilingplane.PointOnSide(ViewPos); - if (mFrontSector->ceilingplane.fC() > 0) // 3D floors have the ceiling backwards - planeside = -planeside; - if (planeside <= 0) // below view plane - markceiling = false; - } } FTexture *midtex = TexMan(sidedef->GetTexture(side_t::mid), true); diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 272c70d488..071eeb449c 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -65,6 +65,10 @@ namespace swrenderer bool IsFogBoundary(sector_t *front, sector_t *back) const; bool SkyboxCompare(sector_t *frontsector, sector_t *backsector) const; + bool ShouldMarkFloor() const; + bool ShouldMarkCeiling() const; + bool ShouldMarkPortal() const; + // Line variables: subsector_t *mSubsector; @@ -98,7 +102,6 @@ namespace swrenderer fixed_t rw_offset_bottom; bool rw_prepped; - bool rw_markportal; bool rw_havehigh; bool rw_havelow; From 1983dd24836cfff475dfe5488ad4a1d906e12ee1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Feb 2017 19:13:26 +0100 Subject: [PATCH 832/912] Move code closer to where it is used --- src/swrenderer/line/r_line.cpp | 55 ++++++++++++++++------------------ 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 43c06efb2c..4027548408 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -64,9 +64,6 @@ namespace swrenderer void SWRenderLine::Render(seg_t *line, subsector_t *subsector, sector_t *sector, sector_t *fakebacksector, VisiblePlane *linefloorplane, VisiblePlane *lineceilingplane, bool infog, FDynamicColormap *colormap) { - bool solid; - DVector2 pt1, pt2; - mSubsector = subsector; mFrontSector = sector; mBackSector = fakebacksector; @@ -76,8 +73,8 @@ namespace swrenderer basecolormap = colormap; mLineSegment = line; - pt1 = line->v1->fPos() - ViewPos; - pt2 = line->v2->fPos() - ViewPos; + DVector2 pt1 = line->v1->fPos() - ViewPos; + DVector2 pt2 = line->v2->fPos() - ViewPos; // Reject lines not facing viewer if (pt1.Y * (pt1.X - pt2.X) + pt1.X * (pt2.Y - pt1.Y) >= 0) @@ -87,11 +84,10 @@ namespace swrenderer return; RenderPortal *renderportal = Thread->Portal.get(); - if (WallC.sx1 >= renderportal->WindowRight || WallC.sx2 <= renderportal->WindowLeft) return; - if (line->linedef == NULL) + if (line->linedef == nullptr) { if (Thread->ClipSegments->Check(WallC.sx1, WallC.sx2)) { @@ -105,9 +101,8 @@ namespace swrenderer if (!renderportal->CurrentPortalInSkybox && renderportal->CurrentPortal && P_ClipLineToPortal(line->linedef, renderportal->CurrentPortal->dst, ViewPos)) return; - vertex_t *v1, *v2; - v1 = line->linedef->v1; - v2 = line->linedef->v2; + vertex_t *v1 = line->linedef->v1; + vertex_t *v2 = line->linedef->v2; if ((v1 == line->v1 && v2 == line->v2) || (v2 == line->v1 && v1 == line->v2)) { // The seg is the entire wall. @@ -135,6 +130,8 @@ namespace swrenderer rw_havehigh = rw_havelow = false; + bool solid; + // Single sided line? if (mBackSector == NULL) { @@ -269,18 +266,6 @@ namespace swrenderer rw_prepped = false; - if (line->linedef->special == Line_Horizon) - { - // Be aware: Line_Horizon does not work properly with sloped planes - fillshort(walltop.ScreenY + WallC.sx1, WallC.sx2 - WallC.sx1, centery); - fillshort(wallbottom.ScreenY + WallC.sx1, WallC.sx2 - WallC.sx1, centery); - } - else - { - mCeilingClipped = walltop.Project(mFrontSector->ceilingplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); - mFloorClipped = wallbottom.Project(mFrontSector->floorplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); - } - bool visible = Thread->ClipSegments->Clip(WallC.sx1, WallC.sx2, solid, this); if (visible) @@ -722,8 +707,18 @@ namespace swrenderer void SWRenderLine::SetWallVariables(bool needlights) { - double rowoffset; - double yrepeat; + if (mLineSegment->linedef->special == Line_Horizon) + { + // Be aware: Line_Horizon does not work properly with sloped planes + fillshort(walltop.ScreenY + WallC.sx1, WallC.sx2 - WallC.sx1, centery); + fillshort(wallbottom.ScreenY + WallC.sx1, WallC.sx2 - WallC.sx1, centery); + } + else + { + RenderPortal *renderportal = Thread->Portal.get(); + mCeilingClipped = walltop.Project(mFrontSector->ceilingplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); + mFloorClipped = wallbottom.Project(mFrontSector->floorplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); + } side_t *sidedef = mLineSegment->sidedef; line_t *linedef = mLineSegment->linedef; @@ -752,10 +747,10 @@ namespace swrenderer { midtexture = TexMan(sidedef->GetTexture(side_t::mid), true); rw_offset_mid = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); - rowoffset = sidedef->GetTextureYOffset(side_t::mid); + double rowoffset = sidedef->GetTextureYOffset(side_t::mid); rw_midtexturescalex = sidedef->GetTextureXScale(side_t::mid); rw_midtexturescaley = sidedef->GetTextureYScale(side_t::mid); - yrepeat = midtexture->Scale.Y * rw_midtexturescaley; + double yrepeat = midtexture->Scale.Y * rw_midtexturescaley; if (yrepeat >= 0) { // normal orientation if (linedef->flags & ML_DONTPEGBOTTOM) @@ -828,10 +823,10 @@ namespace swrenderer toptexture = TexMan(sidedef->GetTexture(side_t::top), true); rw_offset_top = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::top)); - rowoffset = sidedef->GetTextureYOffset(side_t::top); + double rowoffset = sidedef->GetTextureYOffset(side_t::top); rw_toptexturescalex = sidedef->GetTextureXScale(side_t::top); rw_toptexturescaley = sidedef->GetTextureYScale(side_t::top); - yrepeat = toptexture->Scale.Y * rw_toptexturescaley; + double yrepeat = toptexture->Scale.Y * rw_toptexturescaley; if (yrepeat >= 0) { // normal orientation if (linedef->flags & ML_DONTPEGTOP) @@ -873,10 +868,10 @@ namespace swrenderer bottomtexture = TexMan(sidedef->GetTexture(side_t::bottom), true); rw_offset_bottom = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::bottom)); - rowoffset = sidedef->GetTextureYOffset(side_t::bottom); + double rowoffset = sidedef->GetTextureYOffset(side_t::bottom); rw_bottomtexturescalex = sidedef->GetTextureXScale(side_t::bottom); rw_bottomtexturescaley = sidedef->GetTextureYScale(side_t::bottom); - yrepeat = bottomtexture->Scale.Y * rw_bottomtexturescaley; + double yrepeat = bottomtexture->Scale.Y * rw_bottomtexturescaley; if (yrepeat >= 0) { // normal orientation if (linedef->flags & ML_DONTPEGBOTTOM) From 9e2702d8853f41c5510d60842f268d98c4d38322 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Feb 2017 20:01:23 +0100 Subject: [PATCH 833/912] Move more encrypted code into IsInvisibleLine, IsDoorClosed and IsSolid --- src/swrenderer/line/r_line.cpp | 195 +++++++++++++++++---------------- src/swrenderer/line/r_line.h | 4 + 2 files changed, 107 insertions(+), 92 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 4027548408..6f312664cf 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -130,21 +130,13 @@ namespace swrenderer rw_havehigh = rw_havelow = false; - bool solid; - - // Single sided line? - if (mBackSector == NULL) - { - solid = true; - } - else + if (mBackSector) { // kg3D - its fake, no transfer_heights if (!(clip3d->fake3D & FAKE3D_FAKEBACK)) { // killough 3/8/98, 4/4/98: hack for invisible ceilings / deep water mBackSector = Thread->OpaquePass->FakeFlat(mBackSector, &tempsec, nullptr, nullptr, mLineSegment, WallC.sx1, WallC.sx2, mFrontCeilingZ1, mFrontCeilingZ2); } - mDoorClosed = false; // killough 4/16/98 mBackCeilingZ1 = mBackSector->ceilingplane.ZatPoint(line->v1); mBackFloorZ1 = mBackSector->floorplane.ZatPoint(line->v1); @@ -175,98 +167,27 @@ namespace swrenderer rw_havelow = true; walllower.Project(mBackSector->floorplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); } + } - // Portal - if (line->linedef->isVisualPortal() && line->sidedef == line->linedef->sidedef[0]) + mDoorClosed = IsDoorClosed(); + + if (IsInvisibleLine()) + { + // When using GL nodes, do a clipping test for these lines so we can + // mark their subsectors as visible for automap texturing. + if (hasglnodes && !(mSubsector->flags & SSECF_DRAWN)) { - solid = true; - } - // Closed door. - else if ((mBackCeilingZ1 <= mFrontFloorZ1 && mBackCeilingZ2 <= mFrontFloorZ2) || - (mBackFloorZ1 >= mFrontCeilingZ1 && mBackFloorZ2 >= mFrontCeilingZ2)) - { - solid = true; - } - else if ( - // properly render skies (consider door "open" if both ceilings are sky): - (mBackSector->GetTexture(sector_t::ceiling) != skyflatnum || mFrontSector->GetTexture(sector_t::ceiling) != skyflatnum) - - // if door is closed because back is shut: - && mBackCeilingZ1 <= mBackFloorZ1 && mBackCeilingZ2 <= mBackFloorZ2 - - // preserve a kind of transparent door/lift special effect: - && ((mBackCeilingZ1 >= mFrontCeilingZ1 && mBackCeilingZ2 >= mFrontCeilingZ2) || line->sidedef->GetTexture(side_t::top).isValid()) - && ((mBackFloorZ1 <= mFrontFloorZ1 && mBackFloorZ2 <= mFrontFloorZ2) || line->sidedef->GetTexture(side_t::bottom).isValid())) - { - // killough 1/18/98 -- This function is used to fix the automap bug which - // showed lines behind closed doors simply because the door had a dropoff. - // - // It assumes that Doom has already ruled out a door being closed because - // of front-back closure (e.g. front floor is taller than back ceiling). - - // This fixes the automap floor height bug -- killough 1/18/98: - // killough 4/7/98: optimize: save result in doorclosed for use in r_segs.c - mDoorClosed = true; - solid = true; - } - else if (mFrontSector->ceilingplane != mBackSector->ceilingplane || - mFrontSector->floorplane != mBackSector->floorplane) - { - // Window. - solid = false; - } - else if (SkyboxCompare(mFrontSector, mBackSector)) - { - solid = false; - } - else if (mBackSector->lightlevel != mFrontSector->lightlevel - || mBackSector->GetTexture(sector_t::floor) != mFrontSector->GetTexture(sector_t::floor) - || mBackSector->GetTexture(sector_t::ceiling) != mFrontSector->GetTexture(sector_t::ceiling) - || mLineSegment->sidedef->GetTexture(side_t::mid).isValid() - - // killough 3/7/98: Take flats offsets into account: - || mBackSector->planes[sector_t::floor].xform != mFrontSector->planes[sector_t::floor].xform - || mBackSector->planes[sector_t::ceiling].xform != mFrontSector->planes[sector_t::ceiling].xform - - || mBackSector->GetPlaneLight(sector_t::floor) != mFrontSector->GetPlaneLight(sector_t::floor) - || mBackSector->GetPlaneLight(sector_t::ceiling) != mFrontSector->GetPlaneLight(sector_t::ceiling) - || mBackSector->GetVisFlags(sector_t::floor) != mFrontSector->GetVisFlags(sector_t::floor) - || mBackSector->GetVisFlags(sector_t::ceiling) != mFrontSector->GetVisFlags(sector_t::ceiling) - - // [RH] Also consider colormaps - || mBackSector->ColorMap != mFrontSector->ColorMap - - - - // kg3D - and fake lights - || (mFrontSector->e && mFrontSector->e->XFloor.lightlist.Size()) - || (mBackSector->e && mBackSector->e->XFloor.lightlist.Size()) - ) - { - solid = false; - } - else - { - // Reject empty lines used for triggers and special events. - // Identical floor and ceiling on both sides, identical light levels - // on both sides, and no middle texture. - - // When using GL nodes, do a clipping test for these lines so we can - // mark their subsectors as visible for automap texturing. - if (hasglnodes && !(mSubsector->flags & SSECF_DRAWN)) + if (Thread->ClipSegments->Check(WallC.sx1, WallC.sx2)) { - if (Thread->ClipSegments->Check(WallC.sx1, WallC.sx2)) - { - mSubsector->flags |= SSECF_DRAWN; - } + mSubsector->flags |= SSECF_DRAWN; } - return; } + return; } rw_prepped = false; - bool visible = Thread->ClipSegments->Clip(WallC.sx1, WallC.sx2, solid, this); + bool visible = Thread->ClipSegments->Clip(WallC.sx1, WallC.sx2, IsSolid(), this); if (visible) { @@ -274,6 +195,96 @@ namespace swrenderer } } + bool SWRenderLine::IsInvisibleLine() const + { + // Reject empty lines used for triggers and special events. + // Identical floor and ceiling on both sides, identical light levels + // on both sides, and no middle texture. + + if (!mBackSector) return false; + + // Portal + if (mLineSegment->linedef->isVisualPortal() && mLineSegment->sidedef == mLineSegment->linedef->sidedef[0]) return false; + + // Closed door. + if (mBackCeilingZ1 <= mFrontFloorZ1 && mBackCeilingZ2 <= mFrontFloorZ2) return false; + if (mBackFloorZ1 >= mFrontCeilingZ1 && mBackFloorZ2 >= mFrontCeilingZ2) return false; + if (IsDoorClosed()) return false; + + // Window. + if (mFrontSector->ceilingplane != mBackSector->ceilingplane || mFrontSector->floorplane != mBackSector->floorplane) return false; + if (SkyboxCompare(mFrontSector, mBackSector)) return false; + + if (mBackSector->lightlevel != mFrontSector->lightlevel) return false; + if (mBackSector->GetTexture(sector_t::floor) != mFrontSector->GetTexture(sector_t::floor)) return false; + if (mBackSector->GetTexture(sector_t::ceiling) != mFrontSector->GetTexture(sector_t::ceiling)) return false; + if (mLineSegment->sidedef->GetTexture(side_t::mid).isValid()) return false; + + if (mBackSector->planes[sector_t::floor].xform != mFrontSector->planes[sector_t::floor].xform) return false; + if (mBackSector->planes[sector_t::ceiling].xform != mFrontSector->planes[sector_t::ceiling].xform) return false; + + if (mBackSector->GetPlaneLight(sector_t::floor) != mFrontSector->GetPlaneLight(sector_t::floor)) return false; + if (mBackSector->GetPlaneLight(sector_t::ceiling) != mFrontSector->GetPlaneLight(sector_t::ceiling)) return false; + if (mBackSector->GetVisFlags(sector_t::floor) != mFrontSector->GetVisFlags(sector_t::floor)) return false; + if (mBackSector->GetVisFlags(sector_t::ceiling) != mFrontSector->GetVisFlags(sector_t::ceiling)) return false; + + if (mBackSector->ColorMap != mFrontSector->ColorMap) return false; + + if (mFrontSector->e && mFrontSector->e->XFloor.lightlist.Size()) return false; + if (mBackSector->e && mBackSector->e->XFloor.lightlist.Size()) return false; + + return true; + } + + bool SWRenderLine::IsSolid() const + { + // One sided + if (mBackSector == nullptr) return true; + + // Portal + if (mLineSegment->linedef->isVisualPortal() && mLineSegment->sidedef == mLineSegment->linedef->sidedef[0]) return true; + + // Closed door + if (mBackCeilingZ1 <= mFrontFloorZ1 && mBackCeilingZ2 <= mFrontFloorZ2) return true; + if (mBackFloorZ1 >= mFrontCeilingZ1 && mBackFloorZ2 >= mFrontCeilingZ2) return true; + if (IsDoorClosed()) return true; + + return false; + } + + bool SWRenderLine::IsDoorClosed() const + { + // Portal + if (mLineSegment->linedef->isVisualPortal() && mLineSegment->sidedef == mLineSegment->linedef->sidedef[0]) return false; + + // Closed door. + if (mBackCeilingZ1 <= mFrontFloorZ1 && mBackCeilingZ2 <= mFrontFloorZ2) return false; + if (mBackFloorZ1 >= mFrontCeilingZ1 && mBackFloorZ2 >= mFrontCeilingZ2) return false; + + // properly render skies (consider door "open" if both ceilings are sky) + if (mBackSector->GetTexture(sector_t::ceiling) == skyflatnum && mFrontSector->GetTexture(sector_t::ceiling) == skyflatnum) return false; + + // if door is closed because back is shut: + if (!(mBackCeilingZ1 <= mBackFloorZ1 && mBackCeilingZ2 <= mBackFloorZ2)) return false; + + // preserve a kind of transparent door/lift special effect: + if (((mBackCeilingZ1 >= mFrontCeilingZ1 && mBackCeilingZ2 >= mFrontCeilingZ2) || mLineSegment->sidedef->GetTexture(side_t::top).isValid()) + && ((mBackFloorZ1 <= mFrontFloorZ1 && mBackFloorZ2 <= mFrontFloorZ2) || mLineSegment->sidedef->GetTexture(side_t::bottom).isValid())) + { + // killough 1/18/98 -- This function is used to fix the automap bug which + // showed lines behind closed doors simply because the door had a dropoff. + // + // It assumes that Doom has already ruled out a door being closed because + // of front-back closure (e.g. front floor is taller than back ceiling). + + // This fixes the automap floor height bug -- killough 1/18/98: + // killough 4/7/98: optimize: save result in doorclosed for use in r_segs.c + return true; + } + + return false; + } + bool SWRenderLine::SkyboxCompare(sector_t *frontsector, sector_t *backsector) const { FSectorPortal *frontc = frontsector->GetPortal(sector_t::ceiling); diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 071eeb449c..603812f500 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -65,6 +65,10 @@ namespace swrenderer bool IsFogBoundary(sector_t *front, sector_t *back) const; bool SkyboxCompare(sector_t *frontsector, sector_t *backsector) const; + bool IsInvisibleLine() const; + bool IsDoorClosed() const; + bool IsSolid() const; + bool ShouldMarkFloor() const; bool ShouldMarkCeiling() const; bool ShouldMarkPortal() const; From 3e28d53308d34994efacc9eda146a755757fcf32 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Feb 2017 20:27:11 +0100 Subject: [PATCH 834/912] Move rw_havelow and rw_havehigh to local function variables --- src/swrenderer/line/r_line.cpp | 47 +++++++++++++++++++--------------- src/swrenderer/line/r_line.h | 2 -- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 6f312664cf..d3e2927294 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -117,18 +117,17 @@ namespace swrenderer WallT.InitFromLine(Thread, v1->fPos() - ViewPos, v2->fPos() - ViewPos); } + mFrontCeilingZ1 = mFrontSector->ceilingplane.ZatPoint(line->v1); + mFrontFloorZ1 = mFrontSector->floorplane.ZatPoint(line->v1); + mFrontCeilingZ2 = mFrontSector->ceilingplane.ZatPoint(line->v2); + mFrontFloorZ2 = mFrontSector->floorplane.ZatPoint(line->v2); + Clip3DFloors *clip3d = Thread->Clip3D.get(); if (!(clip3d->fake3D & FAKE3D_FAKEBACK)) { mBackSector = line->backsector; } - mFrontCeilingZ1 = mFrontSector->ceilingplane.ZatPoint(line->v1); - mFrontFloorZ1 = mFrontSector->floorplane.ZatPoint(line->v1); - mFrontCeilingZ2 = mFrontSector->ceilingplane.ZatPoint(line->v2); - mFrontFloorZ2 = mFrontSector->floorplane.ZatPoint(line->v2); - - rw_havehigh = rw_havelow = false; if (mBackSector) { @@ -154,19 +153,6 @@ namespace swrenderer clip3d->fake3D |= FAKE3D_CLIPTOPFRONT; } } - - // Cannot make these walls solid, because it can result in - // sprite clipping problems for sprites near the wall - if (mFrontCeilingZ1 > mBackCeilingZ1 || mFrontCeilingZ2 > mBackCeilingZ2) - { - rw_havehigh = true; - wallupper.Project(mBackSector->ceilingplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); - } - if (mFrontFloorZ1 < mBackFloorZ1 || mFrontFloorZ2 < mBackFloorZ2) - { - rw_havelow = true; - walllower.Project(mBackSector->floorplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); - } } mDoorClosed = IsDoorClosed(); @@ -254,6 +240,8 @@ namespace swrenderer bool SWRenderLine::IsDoorClosed() const { + if (!mBackSector) return false; + // Portal if (mLineSegment->linedef->isVisualPortal() && mLineSegment->sidedef == mLineSegment->linedef->sidedef[0]) return false; @@ -718,6 +706,26 @@ namespace swrenderer void SWRenderLine::SetWallVariables(bool needlights) { + RenderPortal *renderportal = Thread->Portal.get(); + + bool rw_havehigh = false; + bool rw_havelow = false; + if (mBackSector) + { + // Cannot make these walls solid, because it can result in + // sprite clipping problems for sprites near the wall + if (mFrontCeilingZ1 > mBackCeilingZ1 || mFrontCeilingZ2 > mBackCeilingZ2) + { + rw_havehigh = true; + wallupper.Project(mBackSector->ceilingplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); + } + if (mFrontFloorZ1 < mBackFloorZ1 || mFrontFloorZ2 < mBackFloorZ2) + { + rw_havelow = true; + walllower.Project(mBackSector->floorplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); + } + } + if (mLineSegment->linedef->special == Line_Horizon) { // Be aware: Line_Horizon does not work properly with sloped planes @@ -726,7 +734,6 @@ namespace swrenderer } else { - RenderPortal *renderportal = Thread->Portal.get(); mCeilingClipped = walltop.Project(mFrontSector->ceilingplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); mFloorClipped = wallbottom.Project(mFrontSector->floorplane, &WallC, mLineSegment, renderportal->MirrorFlags & RF_XFLIP); } diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 603812f500..03dc0c8ad6 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -106,8 +106,6 @@ namespace swrenderer fixed_t rw_offset_bottom; bool rw_prepped; - bool rw_havehigh; - bool rw_havelow; int wallshade; float rw_light; From 8c5360e54755d987c5a523f77ffc278d36b6bfaf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Feb 2017 22:10:52 +0100 Subject: [PATCH 835/912] Enable Ken Silverman's voxel drawing code now that he has given us permission to license it as GPL --- src/swrenderer/things/r_voxel.cpp | 1079 ++++++++++---------- src/swrenderer/things/r_voxel.h | 58 +- src/swrenderer/viewport/r_spritedrawer.cpp | 11 + src/swrenderer/viewport/r_spritedrawer.h | 1 + src/swrenderer/viewport/r_viewport.cpp | 3 + src/swrenderer/viewport/r_viewport.h | 7 +- 6 files changed, 595 insertions(+), 564 deletions(-) diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index e91ae4ea86..62eb4b62f8 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -1,25 +1,26 @@ -/* -** Voxel rendering -** Copyright (c) 1998-2016 Randy Heit -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ +// +//--------------------------------------------------------------------------- +// +// Voxel rendering +// Copyright(c) 1993 - 1997 Ken Silverman +// Copyright(c) 1998 - 2016 Randy Heit +// All rights reserved. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program. If not, see http://www.gnu.org/licenses/ +// +//-------------------------------------------------------------------------- +// #include #include "templates.h" @@ -152,7 +153,7 @@ namespace swrenderer vis->fakefloor = fakefloor; vis->fakeceiling = fakeceiling; vis->Light.ColormapNum = 0; - //vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; + vis->bInMirror = renderportal->MirrorFlags & RF_XFLIP; //vis->bSplitSprite = false; vis->voxel = voxel->Voxel; @@ -184,122 +185,422 @@ namespace swrenderer void RenderVoxel::Render(RenderThread *thread, short *cliptop, short *clipbottom, int minZ, int maxZ) { - auto sprite = this; + auto spr = this; auto viewport = RenderViewport::Instance(); - FDynamicColormap *basecolormap = static_cast(sprite->Light.BaseColormap); - SpriteDrawerArgs drawerargs; - drawerargs.SetLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); + drawerargs.SetLight(spr->Light.BaseColormap, 0, spr->Light.ColormapNum << FRACBITS); - bool visible = drawerargs.SetStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); + FDynamicColormap *basecolormap = (FDynamicColormap*)spr->Light.BaseColormap; + bool visible = drawerargs.SetStyle(spr->RenderStyle, spr->Alpha, spr->Translation, spr->FillColor, basecolormap); if (!visible) return; - DVector3 view_origin = { sprite->pa.vpos.X, sprite->pa.vpos.Y, sprite->pa.vpos.Z }; - FAngle view_angle = sprite->pa.vang; - DVector3 sprite_origin = { sprite->gpos.X, sprite->gpos.Y, sprite->gpos.Z }; - DAngle sprite_angle = sprite->Angle; - double sprite_xscale = FIXED2DBL(sprite->xscale); - double sprite_yscale = sprite->yscale; - FVoxel *voxel = sprite->voxel; - - // Select mipmap level: - - double viewSin = view_angle.Cos(); - double viewCos = view_angle.Sin(); - double logmip = fabs((view_origin.X - sprite_origin.X) * viewCos - (view_origin.Y - sprite_origin.Y) * viewSin); - int miplevel = 0; - while (miplevel < voxel->NumMips - 1 && logmip >= viewport->FocalLengthX) + int flags = 0; + + /* + if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) { - logmip *= 0.5; - miplevel++; + flags = DVF_OFFSCREEN | DVF_SPANSONLY; } - - const FVoxelMipLevel &mip = voxel->Mips[miplevel]; - if (mip.SlabData == nullptr) - return; - - minZ >>= miplevel; - maxZ >>= miplevel; - sprite_xscale *= (1 << miplevel); - sprite_yscale *= (1 << miplevel); - - // Find voxel cube eigenvectors and origin in world space: - - double spriteSin = sprite_angle.Sin(); - double spriteCos = sprite_angle.Cos(); - - DVector2 dirX(spriteSin * sprite_xscale, -spriteCos * sprite_xscale); - DVector2 dirY(spriteCos * sprite_xscale, spriteSin * sprite_xscale); - double dirZ = -sprite_yscale; - - DVector3 voxel_origin = sprite_origin; - voxel_origin.X -= dirX.X * mip.Pivot.X + dirX.Y * mip.Pivot.Y; - voxel_origin.Y -= dirY.X * mip.Pivot.X + dirY.Y * mip.Pivot.Y; - voxel_origin.Z -= dirZ * mip.Pivot.Z; - - // Voxel cube walking directions: - - int startX[4] = { 0, mip.SizeX - 1, 0, mip.SizeX - 1 }; - int startY[4] = { 0, 0, mip.SizeY - 1, mip.SizeY - 1 }; - int stepX[4] = { 1, -1, 1, -1 }; - int stepY[4] = { 1, 1, -1, -1 }; - - // The point in cube mipmap local space where voxel sides change from front to backfacing: - - double dx = (view_origin.X - sprite_origin.X) / sprite_xscale; - double dy = (view_origin.Y - sprite_origin.Y) / sprite_xscale; - int backX = (int)(dx * spriteCos - dy * spriteSin + mip.Pivot.X); - int backY = (int)(dy * spriteCos + dx * spriteSin + mip.Pivot.Y); - //int endX = clamp(backX, 0, mip.SizeX - 1); - //int endY = clamp(backY, 0, mip.SizeY - 1); - int endX = mip.SizeX - 1;// clamp(backX, 0, mip.SizeX - 1); - int endY = mip.SizeY - 1;// clamp(backY, 0, mip.SizeY - 1); - - // Draw the voxel cube: - - for (int index = 0; index < 1; index++) + else if (colfunc != basecolfunc) { - /*if ((stepX[index] < 0 && endX >= startX[index]) || - (stepX[index] > 0 && endX <= startX[index]) || - (stepY[index] < 0 && endY >= startY[index]) || - (stepY[index] > 0 && endY <= startY[index])) continue;*/ - - for (int x = startX[index]; x != endX; x += stepX[index]) + flags = DVF_OFFSCREEN; + } + if (flags != 0) + { + CheckOffscreenBuffer(viewport->RenderTarget->GetWidth(), viewport->RenderTarget->GetHeight(), !!(flags & DVF_SPANSONLY)); + } + if (spr->bInMirror) + { + flags |= DVF_MIRRORED; + } + */ + + // Render the voxel, either directly to the screen or offscreen. + DrawVoxel(thread, drawerargs, spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, cliptop, clipbottom, + minZ, maxZ, flags); + + /* + // Blend the voxel, if that's what we need to do. + if ((flags & ~DVF_MIRRORED) != 0) + { + int pixelsize = viewport->RenderTarget->IsBgra() ? 4 : 1; + for (int x = 0; x < viewwidth; ++x) { - for (int y = startY[index]; y != endY; y += stepY[index]) + if (!(flags & DVF_SPANSONLY) && (x & 3) == 0) { - kvxslab_t *slab_start = GetSlabStart(mip, x, y); - kvxslab_t *slab_end = GetSlabEnd(mip, x, y); - - for (kvxslab_t *slab = slab_start; slab != slab_end; slab = NextSlab(slab)) + rt_initcols(OffscreenColorBuffer + x * OffscreenBufferHeight); + } + for (FCoverageBuffer::Span *span = OffscreenCoverageBuffer->Spans[x]; span != NULL; span = span->NextSpan) + { + if (flags & DVF_SPANSONLY) { - // To do: check slab->backfacecull - - int ztop = slab->ztop; - int zbottom = ztop + slab->zleng; - - //ztop = MAX(ztop, minZ); - //zbottom = MIN(zbottom, maxZ); - - for (int z = ztop; z < zbottom; z++) + dc_x = x; + dc_yl = span->Start; + dc_yh = span->Stop - 1; + dc_count = span->Stop - span->Start; + dc_dest = (ylookup[span->Start] + x) * pixelsize + dc_destorg; + colfunc(); + } + else + { + rt_span_coverage(x, span->Start, span->Stop - 1); + } + } + if (!(flags & DVF_SPANSONLY) && (x & 3) == 3) + { + rt_draw4cols(x - 3); + } + } + } + */ + } + + void RenderVoxel::DrawVoxel( + RenderThread *thread, SpriteDrawerArgs &drawerargs, + const FVector3 &globalpos, FAngle viewangle, const FVector3 &dasprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, + short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) + { + int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff; + fixed_t cosang, sinang, sprcosang, sprsinang; + int backx, backy, gxinc, gyinc; + int daxscalerecip, dayscalerecip, cnt, gxstart, gystart, dazscale; + int lx, rx, nx, ny, x1=0, y1=0, x2=0, y2=0, yinc=0; + int yoff, xs=0, ys=0, xe, ye, xi=0, yi=0, cbackx, cbacky, dagxinc, dagyinc; + kvxslab_t *voxptr, *voxend; + FVoxelMipLevel *mip; + int z1a[64], z2a[64], yplc[64]; + + auto viewport = RenderViewport::Instance(); + + const int nytooclose = centerxwide * 2100, nytoofar = 32768*32768 - 1048576; + const int xdimenscale = FLOAT2FIXED(centerxwide * viewport->YaspectMul / 160); + const double centerxwide_f = centerxwide; + const double centerxwidebig_f = centerxwide_f * 65536*65536*8; + + // Convert to Build's coordinate system. + fixed_t globalposx = xs_Fix<4>::ToFix(globalpos.X); + fixed_t globalposy = xs_Fix<4>::ToFix(-globalpos.Y); + fixed_t globalposz = xs_Fix<8>::ToFix(-globalpos.Z); + + fixed_t dasprx = xs_Fix<4>::ToFix(dasprpos.X); + fixed_t daspry = xs_Fix<4>::ToFix(-dasprpos.Y); + fixed_t dasprz = xs_Fix<8>::ToFix(-dasprpos.Z); + + // Shift the scales from 16 bits of fractional precision to 6. + // Also do some magic voodoo scaling to make them the right size. + daxscale = daxscale / (0xC000 >> 6); + dayscale = dayscale / (0xC000 >> 6); + if (daxscale <= 0 || dayscale <= 0) + { + // won't be visible. + return; + } + + angle_t viewang = viewangle.BAMs(); + cosang = FLOAT2FIXED(viewangle.Cos()) >> 2; + sinang = FLOAT2FIXED(-viewangle.Sin()) >> 2; + sprcosang = FLOAT2FIXED(dasprang.Cos()) >> 2; + sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2; + + // Select mip level + i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang)); + i = DivScale6(i, MIN(daxscale, dayscale)); + j = xs_Fix<13>::ToFix(viewport->FocalLengthX); + for (k = 0; i >= j && k < voxobj->NumMips; ++k) + { + i >>= 1; + } + if (k >= voxobj->NumMips) k = voxobj->NumMips - 1; + + mip = &voxobj->Mips[k]; if (mip->SlabData == NULL) return; + + minslabz >>= k; + maxslabz >>= k; + + daxscale <<= (k+8); dayscale <<= (k+8); + dazscale = FixedDiv(dayscale, FLOAT2FIXED(viewport->BaseYaspectMul)); + daxscale = fixed_t(daxscale / viewport->YaspectMul); + daxscale = Scale(daxscale, xdimenscale, centerxwide << 9); + dayscale = Scale(dayscale, FixedMul(xdimenscale, viewport->viewingrangerecip), centerxwide << 9); + + daxscalerecip = (1<<30) / daxscale; + dayscalerecip = (1<<30) / dayscale; + + fixed_t piv_x = fixed_t(mip->Pivot.X*256.); + fixed_t piv_y = fixed_t(mip->Pivot.Y*256.); + fixed_t piv_z = fixed_t(mip->Pivot.Z*256.); + + x = FixedMul(globalposx - dasprx, daxscalerecip); + y = FixedMul(globalposy - daspry, daxscalerecip); + backx = (DMulScale10(x, sprcosang, y, sprsinang) + piv_x) >> 8; + backy = (DMulScale10(y, sprcosang, x, -sprsinang) + piv_y) >> 8; + cbackx = clamp(backx, 0, mip->SizeX - 1); + cbacky = clamp(backy, 0, mip->SizeY - 1); + + sprcosang = MulScale14(daxscale, sprcosang); + sprsinang = MulScale14(daxscale, sprsinang); + + x = (dasprx - globalposx) - DMulScale18(piv_x, sprcosang, piv_y, -sprsinang); + y = (daspry - globalposy) - DMulScale18(piv_y, sprcosang, piv_x, sprsinang); + + cosang = FixedMul(cosang, dayscalerecip); + sinang = FixedMul(sinang, dayscalerecip); + + gxstart = y*cosang - x*sinang; + gystart = x*cosang + y*sinang; + gxinc = DMulScale10(sprsinang, cosang, sprcosang, -sinang); + gyinc = DMulScale10(sprcosang, cosang, sprsinang, sinang); + if ((abs(globalposz - dasprz) >> 10) >= abs(dazscale)) return; + + x = 0; y = 0; j = MAX(mip->SizeX, mip->SizeY); + fixed_t *ggxinc = (fixed_t *)alloca((j + 1) * sizeof(fixed_t) * 2); + fixed_t *ggyinc = ggxinc + (j + 1); + for (i = 0; i <= j; i++) + { + ggxinc[i] = x; x += gxinc; + ggyinc[i] = y; y += gyinc; + } + + syoff = DivScale21(globalposz - dasprz, FixedMul(dazscale, 0xE800)) + (piv_z << 7); + yoff = (abs(gxinc) + abs(gyinc)) >> 1; + + for (cnt = 0; cnt < 8; cnt++) + { + switch (cnt) + { + case 0: xs = 0; ys = 0; xi = 1; yi = 1; break; + case 1: xs = mip->SizeX-1; ys = 0; xi = -1; yi = 1; break; + case 2: xs = 0; ys = mip->SizeY-1; xi = 1; yi = -1; break; + case 3: xs = mip->SizeX-1; ys = mip->SizeY-1; xi = -1; yi = -1; break; + case 4: xs = 0; ys = cbacky; xi = 1; yi = 2; break; + case 5: xs = mip->SizeX-1; ys = cbacky; xi = -1; yi = 2; break; + case 6: xs = cbackx; ys = 0; xi = 2; yi = 1; break; + case 7: xs = cbackx; ys = mip->SizeY-1; xi = 2; yi = -1; break; + } + xe = cbackx; ye = cbacky; + if (cnt < 4) + { + if ((xi < 0) && (xe >= xs)) continue; + if ((xi > 0) && (xe <= xs)) continue; + if ((yi < 0) && (ye >= ys)) continue; + if ((yi > 0) && (ye <= ys)) continue; + } + else + { + if ((xi < 0) && (xe > xs)) continue; + if ((xi > 0) && (xe < xs)) continue; + if ((yi < 0) && (ye > ys)) continue; + if ((yi > 0) && (ye < ys)) continue; + xe += xi; ye += yi; + } + + i = sgn(ys - backy) + sgn(xs - backx) * 3 + 4; + switch(i) + { + case 6: case 7: x1 = 0; y1 = 0; break; + case 8: case 5: x1 = gxinc; y1 = gyinc; break; + case 0: case 3: x1 = gyinc; y1 = -gxinc; break; + case 2: case 1: x1 = gxinc+gyinc; y1 = gyinc-gxinc; break; + } + switch(i) + { + case 2: case 5: x2 = 0; y2 = 0; break; + case 0: case 1: x2 = gxinc; y2 = gyinc; break; + case 8: case 7: x2 = gyinc; y2 = -gxinc; break; + case 6: case 3: x2 = gxinc+gyinc; y2 = gyinc-gxinc; break; + } + BYTE oand = (1 << int(xs 0) { dagxinc = gxinc; dagyinc = FixedMul(gyinc, viewport->viewingrangerecip); } + else { dagxinc = -gxinc; dagyinc = -FixedMul(gyinc, viewport->viewingrangerecip); } + + /* Fix for non 90 degree viewing ranges */ + nxoff = FixedMul(x2 - x1, viewport->viewingrangerecip); + x1 = FixedMul(x1, viewport->viewingrangerecip); + + ggxstart = gxstart + ggyinc[ys]; + ggystart = gystart - ggxinc[ys]; + + for (x = xs; x != xe; x += xi) + { + BYTE *slabxoffs = &mip->SlabData[mip->OffsetX[x]]; + short *xyoffs = &mip->OffsetXY[x * (mip->SizeY + 1)]; + + nx = FixedMul(ggxstart + ggxinc[x], viewport->viewingrangerecip) + x1; + ny = ggystart + ggyinc[x]; + for (y = ys; y != ye; y += yi, nx += dagyinc, ny -= dagxinc) + { + if ((ny <= nytooclose) || (ny >= nytoofar)) continue; + voxptr = (kvxslab_t *)(slabxoffs + xyoffs[y]); + voxend = (kvxslab_t *)(slabxoffs + xyoffs[y+1]); + if (voxptr >= voxend) continue; + + lx = xs_RoundToInt(nx * centerxwide_f / (ny + y1)) + centerx; + if (lx < 0) lx = 0; + rx = xs_RoundToInt((nx + nxoff) * centerxwide_f / (ny + y2)) + centerx; + if (rx > viewwidth) rx = viewwidth; + if (rx <= lx) continue; + + if (flags & DVF_MIRRORED) + { + int t = viewwidth - lx; + lx = viewwidth - rx; + rx = t; + } + + fixed_t l1 = xs_RoundToInt(centerxwidebig_f / (ny - yoff)); + fixed_t l2 = xs_RoundToInt(centerxwidebig_f / (ny + yoff)); + for (; voxptr < voxend; voxptr = (kvxslab_t *)((BYTE *)voxptr + voxptr->zleng + 3)) + { + const BYTE *col = voxptr->col; + int zleng = voxptr->zleng; + int ztop = voxptr->ztop; + fixed_t z1, z2; + + if (ztop < minslabz) { - uint8_t color = slab->col[z - slab->ztop]; - - DVector3 voxel_pos = voxel_origin; - voxel_pos.X += dirX.X * x + dirX.Y * y; - voxel_pos.Y += dirY.X * x + dirY.Y * y; - voxel_pos.Z += dirZ * z; - - FillBox(thread, drawerargs, voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); + int diff = minslabz - ztop; + ztop = minslabz; + col += diff; + zleng -= diff; + } + if (ztop + zleng > maxslabz) + { + int diff = ztop + zleng - maxslabz; + zleng -= diff; + } + if (zleng <= 0) continue; + + j = (ztop << 15) - syoff; + if (j < 0) + { + k = j + (zleng << 15); + if (k < 0) + { + if ((voxptr->backfacecull & oand32) == 0) continue; + z2 = MulScale32(l2, k) + centery; /* Below slab */ + } + else + { + if ((voxptr->backfacecull & oand) == 0) continue; /* Middle of slab */ + z2 = MulScale32(l1, k) + centery; + } + z1 = MulScale32(l1, j) + centery; + } + else + { + if ((voxptr->backfacecull & oand16) == 0) continue; + z1 = MulScale32(l2, j) + centery; /* Above slab */ + z2 = MulScale32(l1, j + (zleng << 15)) + centery; + } + + if (z2 <= z1) continue; + + if (zleng == 1) + { + yinc = 0; + } + else + { + if (z2-z1 >= 1024) yinc = FixedDiv(zleng, z2 - z1); + else yinc = (((1 << 24) - 1) / (z2 - z1)) * zleng >> 8; + } + // [RH] Clip each column separately, not just by the first one. + for (int stripwidth = MIN(countof(z1a), rx - lx), lxt = lx; + lxt < rx; + (lxt += countof(z1a)), stripwidth = MIN(countof(z1a), rx - lxt)) + { + // Calculate top and bottom pixels locations + for (int xxx = 0; xxx < stripwidth; ++xxx) + { + if (zleng == 1) + { + yplc[xxx] = 0; + z1a[xxx] = MAX(z1, daumost[lxt + xxx]); + } + else + { + if (z1 < daumost[lxt + xxx]) + { + yplc[xxx] = yinc * (daumost[lxt + xxx] - z1); + z1a[xxx] = daumost[lxt + xxx]; + } + else + { + yplc[xxx] = 0; + z1a[xxx] = z1; + } + } + z2a[xxx] = MIN(z2, dadmost[lxt + xxx]); + } + // Find top and bottom pixels that match and draw them as one strip + for (int xxl = 0, xxr; xxl < stripwidth; ) + { + if (z1a[xxl] >= z2a[xxl]) + { // No column here + xxl++; + continue; + } + int z1 = z1a[xxl]; + int z2 = z2a[xxl]; + // How many columns share the same extents? + for (xxr = xxl + 1; xxr < stripwidth; ++xxr) + { + if (z1a[xxr] != z1 || z2a[xxr] != z2) + break; + } + + for (int x = xxl; x < xxr; ++x) + { + drawerargs.SetDest(lxt + x, z1); + drawerargs.SetSolidColor(100); + drawerargs.SetCount(z2 - z1); + drawerargs.DrawVoxelColumn(thread, yplc[xxl], yinc, col, zleng); + } + + /* + if (!(flags & DVF_OFFSCREEN)) + { + // Draw directly to the screen. + R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, (ylookup[z1] + lxt + xxl) * pixelsize + dc_destorg); + } + else + { + // Record the area covered and possibly draw to an offscreen buffer. + dc_yl = z1; + dc_yh = z2 - 1; + dc_count = z2 - z1; + dc_iscale = yinc; + for (int x = xxl; x < xxr; ++x) + { + OffscreenCoverageBuffer->InsertSpan(lxt + x, z1, z2); + if (!(flags & DVF_SPANSONLY)) + { + dc_x = lxt + x; + rt_initcols(OffscreenColorBuffer + (dc_x & ~3) * OffscreenBufferHeight); + dc_source = col; + dc_source2 = nullptr; + dc_texturefrac = yplc[xxl]; + hcolfunc_pre(); + } + } + } + */ + + xxl = xxr; + } } } } } } } - + kvxslab_t *RenderVoxel::GetSlabStart(const FVoxelMipLevel &mip, int x, int y) { return (kvxslab_t *)&mip.SlabData[mip.OffsetX[x] + (int)mip.OffsetXY[x * (mip.SizeY + 1) + y]]; @@ -315,42 +616,6 @@ namespace swrenderer return (kvxslab_t*)(((uint8_t*)slab) + 3 + slab->zleng); } - void RenderVoxel::FillBox(RenderThread *thread, SpriteDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) - { - auto viewport = RenderViewport::Instance(); - - DVector3 viewPos = viewport->PointWorldToView(origin); - - if (viewPos.Z < 0.01f) - return; - - DVector3 screenPos = viewport->PointViewToScreen(viewPos); - DVector2 screenExtent = viewport->ScaleViewToScreen({ extentX, extentY }, viewPos.Z, pixelstretch); - - int x1 = MAX((int)(screenPos.X - screenExtent.X), 0); - int x2 = MIN((int)(screenPos.X + screenExtent.X + 0.5f), viewwidth - 1); - int y1 = MAX((int)(screenPos.Y - screenExtent.Y), 0); - int y2 = MIN((int)(screenPos.Y + screenExtent.Y + 0.5f), viewheight - 1); - - int pixelsize = viewport->RenderTarget->IsBgra() ? 4 : 1; - - if (y1 < y2) - { - for (int x = x1; x < x2; x++) - { - int columnY1 = MAX(y1, (int)cliptop[x]); - int columnY2 = MIN(y2, (int)clipbottom[x]); - if (columnY1 < columnY2) - { - drawerargs.SetDest(x, columnY1); - drawerargs.SetSolidColor(color); - drawerargs.SetCount(columnY2 - columnY1); - drawerargs.FillColumn(thread); - } - } - } - } - void RenderVoxel::Deinit() { // Free offscreen buffer @@ -550,416 +815,156 @@ namespace swrenderer return span; } - ///////////////////////////////////////////////////////////////////////// - // Old BUILD implementation follows: - // - // This file contains some code from the Build Engine. - // - // "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman - // Ken Silverman's official web site: "http://www.advsys.net/ken" - // See the included license file "BUILDLIC.TXT" for license info. - #if 0 - void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop, short *clipbot) + void RenderVoxel::Render(RenderThread *thread, short *cliptop, short *clipbottom, int minZ, int maxZ) { - int flags = 0; + auto sprite = this; + auto viewport = RenderViewport::Instance(); - // Do setup for blending. - R_SetColorMapLight(spr->BaseColormap, 0, spr->ColormapNum << FRACBITS); - bool visible = R_SetPatchStyle(spr->RenderStyle, spr->Alpha, spr->Translation, spr->FillColor); + FDynamicColormap *basecolormap = static_cast(sprite->Light.BaseColormap); + SpriteDrawerArgs drawerargs; + drawerargs.SetLight(sprite->Light.BaseColormap, 0, sprite->Light.ColormapNum << FRACBITS); + + bool visible = drawerargs.SetStyle(sprite->RenderStyle, sprite->Alpha, sprite->Translation, sprite->FillColor, basecolormap); if (!visible) - { return; - } - if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) - { - flags = DVF_OFFSCREEN | DVF_SPANSONLY; - } - else if (colfunc != basecolfunc) - { - flags = DVF_OFFSCREEN; - } - if (flags != 0) - { - R_CheckOffscreenBuffer(RenderTarget->GetWidth(), RenderTarget->GetHeight(), !!(flags & DVF_SPANSONLY)); - } - if (spr->bInMirror) - { - flags |= DVF_MIRRORED; - } - // Render the voxel, either directly to the screen or offscreen. - R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->BaseColormap, spr->ColormapNum, cliptop, clipbot, - minslabz, maxslabz, flags); - - // Blend the voxel, if that's what we need to do. - if ((flags & ~DVF_MIRRORED) != 0) + DVector3 view_origin = { sprite->pa.vpos.X, sprite->pa.vpos.Y, sprite->pa.vpos.Z }; + FAngle view_angle = sprite->pa.vang; + DVector3 sprite_origin = { sprite->gpos.X, sprite->gpos.Y, sprite->gpos.Z }; + DAngle sprite_angle = sprite->Angle; + double sprite_xscale = FIXED2DBL(sprite->xscale); + double sprite_yscale = sprite->yscale; + FVoxel *voxel = sprite->voxel; + + // Select mipmap level: + + double viewSin = view_angle.Cos(); + double viewCos = view_angle.Sin(); + double logmip = fabs((view_origin.X - sprite_origin.X) * viewCos - (view_origin.Y - sprite_origin.Y) * viewSin); + int miplevel = 0; + while (miplevel < voxel->NumMips - 1 && logmip >= viewport->FocalLengthX) { - int pixelsize = r_swtruecolor ? 4 : 1; - for (int x = 0; x < viewwidth; ++x) + logmip *= 0.5; + miplevel++; + } + + const FVoxelMipLevel &mip = voxel->Mips[miplevel]; + if (mip.SlabData == nullptr) + return; + + minZ >>= miplevel; + maxZ >>= miplevel; + sprite_xscale *= (1 << miplevel); + sprite_yscale *= (1 << miplevel); + + // Find voxel cube eigenvectors and origin in world space: + + double spriteSin = sprite_angle.Sin(); + double spriteCos = sprite_angle.Cos(); + + DVector2 dirX(spriteSin * sprite_xscale, -spriteCos * sprite_xscale); + DVector2 dirY(spriteCos * sprite_xscale, spriteSin * sprite_xscale); + double dirZ = -sprite_yscale; + + DVector3 voxel_origin = sprite_origin; + voxel_origin.X -= dirX.X * mip.Pivot.X + dirX.Y * mip.Pivot.Y; + voxel_origin.Y -= dirY.X * mip.Pivot.X + dirY.Y * mip.Pivot.Y; + voxel_origin.Z -= dirZ * mip.Pivot.Z; + + // Voxel cube walking directions: + + int startX[4] = { 0, mip.SizeX - 1, 0, mip.SizeX - 1 }; + int startY[4] = { 0, 0, mip.SizeY - 1, mip.SizeY - 1 }; + int stepX[4] = { 1, -1, 1, -1 }; + int stepY[4] = { 1, 1, -1, -1 }; + + // The point in cube mipmap local space where voxel sides change from front to backfacing: + + double dx = (view_origin.X - sprite_origin.X) / sprite_xscale; + double dy = (view_origin.Y - sprite_origin.Y) / sprite_xscale; + int backX = (int)(dx * spriteCos - dy * spriteSin + mip.Pivot.X); + int backY = (int)(dy * spriteCos + dx * spriteSin + mip.Pivot.Y); + //int endX = clamp(backX, 0, mip.SizeX - 1); + //int endY = clamp(backY, 0, mip.SizeY - 1); + int endX = mip.SizeX - 1;// clamp(backX, 0, mip.SizeX - 1); + int endY = mip.SizeY - 1;// clamp(backY, 0, mip.SizeY - 1); + + // Draw the voxel cube: + + for (int index = 0; index < 1; index++) + { + /*if ((stepX[index] < 0 && endX >= startX[index]) || + (stepX[index] > 0 && endX <= startX[index]) || + (stepY[index] < 0 && endY >= startY[index]) || + (stepY[index] > 0 && endY <= startY[index])) continue;*/ + + for (int x = startX[index]; x != endX; x += stepX[index]) { - if (!(flags & DVF_SPANSONLY) && (x & 3) == 0) + for (int y = startY[index]; y != endY; y += stepY[index]) { - rt_initcols(OffscreenColorBuffer + x * OffscreenBufferHeight); - } - for (FCoverageBuffer::Span *span = OffscreenCoverageBuffer->Spans[x]; span != NULL; span = span->NextSpan) - { - if (flags & DVF_SPANSONLY) + kvxslab_t *slab_start = GetSlabStart(mip, x, y); + kvxslab_t *slab_end = GetSlabEnd(mip, x, y); + + for (kvxslab_t *slab = slab_start; slab != slab_end; slab = NextSlab(slab)) { - dc_x = x; - dc_yl = span->Start; - dc_yh = span->Stop - 1; - dc_count = span->Stop - span->Start; - dc_dest = (ylookup[span->Start] + x) * pixelsize + dc_destorg; - colfunc(); + // To do: check slab->backfacecull + + int ztop = slab->ztop; + int zbottom = ztop + slab->zleng; + + //ztop = MAX(ztop, minZ); + //zbottom = MIN(zbottom, maxZ); + + for (int z = ztop; z < zbottom; z++) + { + uint8_t color = slab->col[z - slab->ztop]; + + DVector3 voxel_pos = voxel_origin; + voxel_pos.X += dirX.X * x + dirX.Y * y; + voxel_pos.Y += dirY.X * x + dirY.Y * y; + voxel_pos.Z += dirZ * z; + + FillBox(thread, drawerargs, voxel_pos, sprite_xscale, sprite_yscale, color, cliptop, clipbottom, false, false); + } } - else - { - rt_span_coverage(x, span->Start, span->Stop - 1); - } - } - if (!(flags & DVF_SPANSONLY) && (x & 3) == 3) - { - rt_draw4cols(x - 3); } } } - - R_FinishSetPatchStyle(); - NetUpdate(); } - void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, - const FVector3 &dasprpos, DAngle dasprang, - fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, - FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) + void RenderVoxel::FillBox(RenderThread *thread, SpriteDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch) { - int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff; - fixed_t cosang, sinang, sprcosang, sprsinang; - int backx, backy, gxinc, gyinc; - int daxscalerecip, dayscalerecip, cnt, gxstart, gystart, dazscale; - int lx, rx, nx, ny, x1=0, y1=0, x2=0, y2=0, yinc=0; - int yoff, xs=0, ys=0, xe, ye, xi=0, yi=0, cbackx, cbacky, dagxinc, dagyinc; - kvxslab_t *voxptr, *voxend; - FVoxelMipLevel *mip; - int z1a[64], z2a[64], yplc[64]; + auto viewport = RenderViewport::Instance(); - const int nytooclose = centerxwide * 2100, nytoofar = 32768*32768 - 1048576; - const int xdimenscale = FLOAT2FIXED(centerxwide * YaspectMul / 160); - const double centerxwide_f = centerxwide; - const double centerxwidebig_f = centerxwide_f * 65536*65536*8; + DVector3 viewPos = viewport->PointWorldToView(origin); - // Convert to Build's coordinate system. - fixed_t globalposx = xs_Fix<4>::ToFix(globalpos.X); - fixed_t globalposy = xs_Fix<4>::ToFix(-globalpos.Y); - fixed_t globalposz = xs_Fix<8>::ToFix(-globalpos.Z); - - fixed_t dasprx = xs_Fix<4>::ToFix(dasprpos.X); - fixed_t daspry = xs_Fix<4>::ToFix(-dasprpos.Y); - fixed_t dasprz = xs_Fix<8>::ToFix(-dasprpos.Z); - - // Shift the scales from 16 bits of fractional precision to 6. - // Also do some magic voodoo scaling to make them the right size. - daxscale = daxscale / (0xC000 >> 6); - dayscale = dayscale / (0xC000 >> 6); - if (daxscale <= 0 || dayscale <= 0) - { - // won't be visible. + if (viewPos.Z < 0.01f) return; - } - angle_t viewang = viewangle.BAMs(); - cosang = FLOAT2FIXED(viewangle.Cos()) >> 2; - sinang = FLOAT2FIXED(-viewangle.Sin()) >> 2; - sprcosang = FLOAT2FIXED(dasprang.Cos()) >> 2; - sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2; + DVector3 screenPos = viewport->PointViewToScreen(viewPos); + DVector2 screenExtent = viewport->ScaleViewToScreen({ extentX, extentY }, viewPos.Z, pixelstretch); - R_SetupDrawSlab(colormap, 0.0f, colormapnum << FRACBITS); + int x1 = MAX((int)(screenPos.X - screenExtent.X), 0); + int x2 = MIN((int)(screenPos.X + screenExtent.X + 0.5f), viewwidth - 1); + int y1 = MAX((int)(screenPos.Y - screenExtent.Y), 0); + int y2 = MIN((int)(screenPos.Y + screenExtent.Y + 0.5f), viewheight - 1); - int pixelsize = r_swtruecolor ? 4 : 1; + int pixelsize = viewport->RenderTarget->IsBgra() ? 4 : 1; - // Select mip level - i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang)); - i = DivScale6(i, MIN(daxscale, dayscale)); - j = xs_Fix<13>::ToFix(FocalLengthX); - for (k = 0; i >= j && k < voxobj->NumMips; ++k) + if (y1 < y2) { - i >>= 1; - } - if (k >= voxobj->NumMips) k = voxobj->NumMips - 1; - - mip = &voxobj->Mips[k]; if (mip->SlabData == NULL) return; - - minslabz >>= k; - maxslabz >>= k; - - daxscale <<= (k+8); dayscale <<= (k+8); - dazscale = FixedDiv(dayscale, FLOAT2FIXED(BaseYaspectMul)); - daxscale = fixed_t(daxscale / YaspectMul); - daxscale = Scale(daxscale, xdimenscale, centerxwide << 9); - dayscale = Scale(dayscale, FixedMul(xdimenscale, viewingrangerecip), centerxwide << 9); - - daxscalerecip = (1<<30) / daxscale; - dayscalerecip = (1<<30) / dayscale; - - fixed_t piv_x = fixed_t(mip->Pivot.X*256.); - fixed_t piv_y = fixed_t(mip->Pivot.Y*256.); - fixed_t piv_z = fixed_t(mip->Pivot.Z*256.); - - x = FixedMul(globalposx - dasprx, daxscalerecip); - y = FixedMul(globalposy - daspry, daxscalerecip); - backx = (DMulScale10(x, sprcosang, y, sprsinang) + piv_x) >> 8; - backy = (DMulScale10(y, sprcosang, x, -sprsinang) + piv_y) >> 8; - cbackx = clamp(backx, 0, mip->SizeX - 1); - cbacky = clamp(backy, 0, mip->SizeY - 1); - - sprcosang = MulScale14(daxscale, sprcosang); - sprsinang = MulScale14(daxscale, sprsinang); - - x = (dasprx - globalposx) - DMulScale18(piv_x, sprcosang, piv_y, -sprsinang); - y = (daspry - globalposy) - DMulScale18(piv_y, sprcosang, piv_x, sprsinang); - - cosang = FixedMul(cosang, dayscalerecip); - sinang = FixedMul(sinang, dayscalerecip); - - gxstart = y*cosang - x*sinang; - gystart = x*cosang + y*sinang; - gxinc = DMulScale10(sprsinang, cosang, sprcosang, -sinang); - gyinc = DMulScale10(sprcosang, cosang, sprsinang, sinang); - if ((abs(globalposz - dasprz) >> 10) >= abs(dazscale)) return; - - x = 0; y = 0; j = MAX(mip->SizeX, mip->SizeY); - fixed_t *ggxinc = (fixed_t *)alloca((j + 1) * sizeof(fixed_t) * 2); - fixed_t *ggyinc = ggxinc + (j + 1); - for (i = 0; i <= j; i++) - { - ggxinc[i] = x; x += gxinc; - ggyinc[i] = y; y += gyinc; - } - - syoff = DivScale21(globalposz - dasprz, FixedMul(dazscale, 0xE800)) + (piv_z << 7); - yoff = (abs(gxinc) + abs(gyinc)) >> 1; - - for (cnt = 0; cnt < 8; cnt++) - { - switch (cnt) + for (int x = x1; x < x2; x++) { - case 0: xs = 0; ys = 0; xi = 1; yi = 1; break; - case 1: xs = mip->SizeX-1; ys = 0; xi = -1; yi = 1; break; - case 2: xs = 0; ys = mip->SizeY-1; xi = 1; yi = -1; break; - case 3: xs = mip->SizeX-1; ys = mip->SizeY-1; xi = -1; yi = -1; break; - case 4: xs = 0; ys = cbacky; xi = 1; yi = 2; break; - case 5: xs = mip->SizeX-1; ys = cbacky; xi = -1; yi = 2; break; - case 6: xs = cbackx; ys = 0; xi = 2; yi = 1; break; - case 7: xs = cbackx; ys = mip->SizeY-1; xi = 2; yi = -1; break; - } - xe = cbackx; ye = cbacky; - if (cnt < 4) - { - if ((xi < 0) && (xe >= xs)) continue; - if ((xi > 0) && (xe <= xs)) continue; - if ((yi < 0) && (ye >= ys)) continue; - if ((yi > 0) && (ye <= ys)) continue; - } - else - { - if ((xi < 0) && (xe > xs)) continue; - if ((xi > 0) && (xe < xs)) continue; - if ((yi < 0) && (ye > ys)) continue; - if ((yi > 0) && (ye < ys)) continue; - xe += xi; ye += yi; - } - - i = sgn(ys - backy) + sgn(xs - backx) * 3 + 4; - switch(i) - { - case 6: case 7: x1 = 0; y1 = 0; break; - case 8: case 5: x1 = gxinc; y1 = gyinc; break; - case 0: case 3: x1 = gyinc; y1 = -gxinc; break; - case 2: case 1: x1 = gxinc+gyinc; y1 = gyinc-gxinc; break; - } - switch(i) - { - case 2: case 5: x2 = 0; y2 = 0; break; - case 0: case 1: x2 = gxinc; y2 = gyinc; break; - case 8: case 7: x2 = gyinc; y2 = -gxinc; break; - case 6: case 3: x2 = gxinc+gyinc; y2 = gyinc-gxinc; break; - } - BYTE oand = (1 << int(xs 0) { dagxinc = gxinc; dagyinc = FixedMul(gyinc, viewingrangerecip); } - else { dagxinc = -gxinc; dagyinc = -FixedMul(gyinc, viewingrangerecip); } - - /* Fix for non 90 degree viewing ranges */ - nxoff = FixedMul(x2 - x1, viewingrangerecip); - x1 = FixedMul(x1, viewingrangerecip); - - ggxstart = gxstart + ggyinc[ys]; - ggystart = gystart - ggxinc[ys]; - - for (x = xs; x != xe; x += xi) - { - BYTE *slabxoffs = &mip->SlabData[mip->OffsetX[x]]; - short *xyoffs = &mip->OffsetXY[x * (mip->SizeY + 1)]; - - nx = FixedMul(ggxstart + ggxinc[x], viewingrangerecip) + x1; - ny = ggystart + ggyinc[x]; - for (y = ys; y != ye; y += yi, nx += dagyinc, ny -= dagxinc) + int columnY1 = MAX(y1, (int)cliptop[x]); + int columnY2 = MIN(y2, (int)clipbottom[x]); + if (columnY1 < columnY2) { - if ((ny <= nytooclose) || (ny >= nytoofar)) continue; - voxptr = (kvxslab_t *)(slabxoffs + xyoffs[y]); - voxend = (kvxslab_t *)(slabxoffs + xyoffs[y+1]); - if (voxptr >= voxend) continue; - - lx = xs_RoundToInt(nx * centerxwide_f / (ny + y1)) + centerx; - if (lx < 0) lx = 0; - rx = xs_RoundToInt((nx + nxoff) * centerxwide_f / (ny + y2)) + centerx; - if (rx > viewwidth) rx = viewwidth; - if (rx <= lx) continue; - - if (flags & DVF_MIRRORED) - { - int t = viewwidth - lx; - lx = viewwidth - rx; - rx = t; - } - - fixed_t l1 = xs_RoundToInt(centerxwidebig_f / (ny - yoff)); - fixed_t l2 = xs_RoundToInt(centerxwidebig_f / (ny + yoff)); - for (; voxptr < voxend; voxptr = (kvxslab_t *)((BYTE *)voxptr + voxptr->zleng + 3)) - { - const BYTE *col = voxptr->col; - int zleng = voxptr->zleng; - int ztop = voxptr->ztop; - fixed_t z1, z2; - - if (ztop < minslabz) - { - int diff = minslabz - ztop; - ztop = minslabz; - col += diff; - zleng -= diff; - } - if (ztop + zleng > maxslabz) - { - int diff = ztop + zleng - maxslabz; - zleng -= diff; - } - if (zleng <= 0) continue; - - j = (ztop << 15) - syoff; - if (j < 0) - { - k = j + (zleng << 15); - if (k < 0) - { - if ((voxptr->backfacecull & oand32) == 0) continue; - z2 = MulScale32(l2, k) + centery; /* Below slab */ - } - else - { - if ((voxptr->backfacecull & oand) == 0) continue; /* Middle of slab */ - z2 = MulScale32(l1, k) + centery; - } - z1 = MulScale32(l1, j) + centery; - } - else - { - if ((voxptr->backfacecull & oand16) == 0) continue; - z1 = MulScale32(l2, j) + centery; /* Above slab */ - z2 = MulScale32(l1, j + (zleng << 15)) + centery; - } - - if (z2 <= z1) continue; - - if (zleng == 1) - { - yinc = 0; - } - else - { - if (z2-z1 >= 1024) yinc = FixedDiv(zleng, z2 - z1); - else yinc = (((1 << 24) - 1) / (z2 - z1)) * zleng >> 8; - } - // [RH] Clip each column separately, not just by the first one. - for (int stripwidth = MIN(countof(z1a), rx - lx), lxt = lx; - lxt < rx; - (lxt += countof(z1a)), stripwidth = MIN(countof(z1a), rx - lxt)) - { - // Calculate top and bottom pixels locations - for (int xxx = 0; xxx < stripwidth; ++xxx) - { - if (zleng == 1) - { - yplc[xxx] = 0; - z1a[xxx] = MAX(z1, daumost[lxt + xxx]); - } - else - { - if (z1 < daumost[lxt + xxx]) - { - yplc[xxx] = yinc * (daumost[lxt + xxx] - z1); - z1a[xxx] = daumost[lxt + xxx]; - } - else - { - yplc[xxx] = 0; - z1a[xxx] = z1; - } - } - z2a[xxx] = MIN(z2, dadmost[lxt + xxx]); - } - // Find top and bottom pixels that match and draw them as one strip - for (int xxl = 0, xxr; xxl < stripwidth; ) - { - if (z1a[xxl] >= z2a[xxl]) - { // No column here - xxl++; - continue; - } - int z1 = z1a[xxl]; - int z2 = z2a[xxl]; - // How many columns share the same extents? - for (xxr = xxl + 1; xxr < stripwidth; ++xxr) - { - if (z1a[xxr] != z1 || z2a[xxr] != z2) - break; - } - - if (!(flags & DVF_OFFSCREEN)) - { - // Draw directly to the screen. - R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, (ylookup[z1] + lxt + xxl) * pixelsize + dc_destorg); - } - else - { - // Record the area covered and possibly draw to an offscreen buffer. - dc_yl = z1; - dc_yh = z2 - 1; - dc_count = z2 - z1; - dc_iscale = yinc; - for (int x = xxl; x < xxr; ++x) - { - OffscreenCoverageBuffer->InsertSpan(lxt + x, z1, z2); - if (!(flags & DVF_SPANSONLY)) - { - dc_x = lxt + x; - rt_initcols(OffscreenColorBuffer + (dc_x & ~3) * OffscreenBufferHeight); - dc_source = col; - dc_source2 = nullptr; - dc_texturefrac = yplc[xxl]; - hcolfunc_pre(); - } - } - } - xxl = xxr; - } - } - } + drawerargs.SetDest(x, columnY1); + drawerargs.SetSolidColor(color); + drawerargs.SetCount(columnY2 - columnY1); + drawerargs.FillColumn(thread); } } } diff --git a/src/swrenderer/things/r_voxel.h b/src/swrenderer/things/r_voxel.h index 1d663d251a..e8bbe64cd3 100644 --- a/src/swrenderer/things/r_voxel.h +++ b/src/swrenderer/things/r_voxel.h @@ -1,25 +1,26 @@ -/* -** Voxel rendering -** Copyright (c) 1998-2016 Randy Heit -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ +// +//--------------------------------------------------------------------------- +// +// Voxel rendering +// Copyright(c) 1993 - 1997 Ken Silverman +// Copyright(c) 1998 - 2016 Randy Heit +// All rights reserved. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program. If not, see http://www.gnu.org/licenses/ +// +//-------------------------------------------------------------------------- +// #pragma once @@ -77,14 +78,13 @@ namespace swrenderer DAngle Angle = { 0.0 }; fixed_t xscale = 0; FVoxel *voxel = nullptr; + bool bInMirror = false; uint32_t Translation = 0; uint32_t FillColor = 0; enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; - static void FillBox(RenderThread *thread, SpriteDrawerArgs &drawerargs, DVector3 origin, double extentX, double extentY, int color, short *cliptop, short *clipbottom, bool viewspace, bool pixelstretch); - static kvxslab_t *GetSlabStart(const FVoxelMipLevel &mip, int x, int y); static kvxslab_t *GetSlabEnd(const FVoxelMipLevel &mip, int x, int y); static kvxslab_t *NextSlab(kvxslab_t *slab); @@ -95,5 +95,15 @@ namespace swrenderer static int OffscreenBufferWidth; static int OffscreenBufferHeight; static uint8_t *OffscreenColorBuffer; + + void DrawVoxel( + RenderThread *thread, SpriteDrawerArgs &drawerargs, + const FVector3 &globalpos, FAngle viewangle, const FVector3 &dasprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, + FVoxel *voxobj, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags); + + int sgn(int v) + { + return v < 0 ? -1 : v > 0 ? 1 : 0; + } }; } diff --git a/src/swrenderer/viewport/r_spritedrawer.cpp b/src/swrenderer/viewport/r_spritedrawer.cpp index 46c209f99a..0634bff709 100644 --- a/src/swrenderer/viewport/r_spritedrawer.cpp +++ b/src/swrenderer/viewport/r_spritedrawer.cpp @@ -497,6 +497,17 @@ namespace swrenderer thread->Drawers()->FillColumn(*this); } + void SpriteDrawerArgs::DrawVoxelColumn(RenderThread *thread, fixed_t vPos, fixed_t vStep, const uint8_t *voxels, int voxelsCount) + { + dc_iscale = vStep; + dc_texturefrac = vPos; + dc_texturefracx = 0; + dc_source = voxels; + dc_source2 = 0; + dc_textureheight = voxelsCount; + (thread->Drawers()->*colfunc)(*this); + } + void SpriteDrawerArgs::SetDest(int x, int y) { auto viewport = RenderViewport::Instance(); diff --git a/src/swrenderer/viewport/r_spritedrawer.h b/src/swrenderer/viewport/r_spritedrawer.h index 50acc1c0ed..fc5053e6e0 100644 --- a/src/swrenderer/viewport/r_spritedrawer.h +++ b/src/swrenderer/viewport/r_spritedrawer.h @@ -24,6 +24,7 @@ namespace swrenderer void DrawMaskedColumn(RenderThread *thread, int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); void FillColumn(RenderThread *thread); + void DrawVoxelColumn(RenderThread *thread, fixed_t vPos, fixed_t vStep, const uint8_t *voxels, int voxelsCount); uint8_t *Dest() const { return dc_dest; } int DestY() const { return dc_dest_y; } diff --git a/src/swrenderer/viewport/r_viewport.cpp b/src/swrenderer/viewport/r_viewport.cpp index 79f61ecd21..6182976d70 100644 --- a/src/swrenderer/viewport/r_viewport.cpp +++ b/src/swrenderer/viewport/r_viewport.cpp @@ -156,6 +156,9 @@ namespace swrenderer FocalLengthX = CenterX / FocalTangent; FocalLengthY = FocalLengthX * YaspectMul; + // This is 1/FocalTangent before the widescreen extension of FOV. + viewingrangerecip = FLOAT2FIXED(1. / tan(FieldOfView.Radians() / 2)); + // Now generate xtoviewangle for sky texture mapping. // [RH] Do not generate viewangletox, because texture mapping is no // longer done with trig, so it's not needed. diff --git a/src/swrenderer/viewport/r_viewport.h b/src/swrenderer/viewport/r_viewport.h index 7561ff83b6..a16c8fde22 100644 --- a/src/swrenderer/viewport/r_viewport.h +++ b/src/swrenderer/viewport/r_viewport.h @@ -42,7 +42,10 @@ namespace swrenderer double IYaspectMul = 0.0; double globaluclip = 0.0; double globaldclip = 0.0; - + + fixed_t viewingrangerecip = 0; + double BaseYaspectMul = 0.0; // yaspectmul without a forced aspect ratio + // The xtoviewangleangle[] table maps a screen pixel // to the lowest viewangle that maps back to x ranges // from clipangle to -clipangle. @@ -70,7 +73,5 @@ namespace swrenderer private: void InitTextureMapping(); void SetupBuffer(); - - double BaseYaspectMul = 0.0; // yaspectmul without a forced aspect ratio }; } From 0cea344dcef767f0133141e45a0e9115909a1a44 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Feb 2017 23:23:34 +0100 Subject: [PATCH 836/912] Fix voxel clipping bug --- src/swrenderer/things/r_visiblesprite.cpp | 2 +- src/swrenderer/things/r_voxel.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 809a213977..1f402de818 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -157,7 +157,7 @@ namespace swrenderer auto viewport = RenderViewport::Instance(); double scale = viewport->InvZtoScale * spr->idepth; - double hzb = DBL_MIN, hzt = DBL_MAX; + double hzb = -DBL_MAX, hzt = DBL_MAX; if (spr->IsVoxel() && spr->floorclip != 0) { diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 62eb4b62f8..ddb8d52c7e 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -558,7 +558,6 @@ namespace swrenderer for (int x = xxl; x < xxr; ++x) { drawerargs.SetDest(lxt + x, z1); - drawerargs.SetSolidColor(100); drawerargs.SetCount(z2 - z1); drawerargs.DrawVoxelColumn(thread, yplc[xxl], yinc, col, zleng); } From 5a85fabfa618168a809d306ce67d43ec85efcdc4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Feb 2017 01:27:26 +0100 Subject: [PATCH 837/912] Add true color support to voxel renderer --- src/r_data/voxels.cpp | 46 ++++++++++++++++++++++ src/r_data/voxels.h | 10 +++++ src/swrenderer/r_swrenderer.cpp | 1 + src/swrenderer/things/r_voxel.cpp | 14 ++++++- src/swrenderer/viewport/r_spritedrawer.cpp | 15 ++++++- 5 files changed, 83 insertions(+), 3 deletions(-) diff --git a/src/r_data/voxels.cpp b/src/r_data/voxels.cpp index d81220ea8f..4b80812882 100644 --- a/src/r_data/voxels.cpp +++ b/src/r_data/voxels.cpp @@ -392,6 +392,52 @@ FVoxel::~FVoxel() if (Palette != NULL) delete [] Palette; } +//========================================================================== +// +// Create true color version of the slab data +// +//========================================================================== + +void FVoxel::CreateBgraSlabData() +{ + assert(Palette != NULL); + + for (int i = 0; i < NumMips; ++i) + { + int size = Mips[i].OffsetX[Mips[i].SizeX]; + if (size <= 0) continue; + + Mips[i].SlabDataBgra.Resize(size); + + kvxslab_t *src = (kvxslab_t*)Mips[i].SlabData; + kvxslab_bgra_t *dest = (kvxslab_bgra_t*)&Mips[i].SlabDataBgra[0]; + + while (size >= 3) + { + dest->backfacecull = src->backfacecull; + dest->ztop = src->ztop; + dest->zleng = src->zleng; + + int slabzleng = src->zleng; + for (int j = 0; j < slabzleng; ++j) + { + int colorIndex = src->col[j]; + + uint32_t red = (Palette[colorIndex * 3 + 0] << 2) | (Palette[colorIndex * 3 + 0] >> 4); + uint32_t green = (Palette[colorIndex * 3 + 1] << 2) | (Palette[colorIndex * 3 + 1] >> 4); + uint32_t blue = (Palette[colorIndex * 3 + 2] << 2) | (Palette[colorIndex * 3 + 2] >> 4); + + dest->col[j] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + slabzleng += 3; + + dest = (kvxslab_bgra_t *)((uint32_t *)dest + slabzleng); + src = (kvxslab_t *)((BYTE *)src + slabzleng); + size -= slabzleng; + } + } +} + //========================================================================== // // Remap the voxel to the game palette diff --git a/src/r_data/voxels.h b/src/r_data/voxels.h index 85095c52f5..df5839836a 100644 --- a/src/r_data/voxels.h +++ b/src/r_data/voxels.h @@ -15,6 +15,14 @@ struct kvxslab_t BYTE col[1/*zleng*/];// color data from top to bottom }; +struct kvxslab_bgra_t +{ + uint32_t ztop; // starting z coordinate of top of slab + uint32_t zleng; // # of bytes in the color array - slab height + uint32_t backfacecull; // low 6 bits tell which of 6 faces are exposed + uint32_t col[1/*zleng*/];// color data from top to bottom +}; + struct FVoxelMipLevel { FVoxelMipLevel(); @@ -27,6 +35,7 @@ struct FVoxelMipLevel int *OffsetX; short *OffsetXY; BYTE *SlabData; + TArray SlabDataBgra; }; struct FVoxel @@ -39,6 +48,7 @@ struct FVoxel FVoxel(); ~FVoxel(); + void CreateBgraSlabData(); void Remap(); void RemovePalette(); }; diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index 561f0de430..0366886cec 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -187,6 +187,7 @@ void FSoftwareRenderer::RemapVoxels() { for (unsigned i=0; iCreateBgraSlabData(); Voxels[i]->Remap(); } } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index ddb8d52c7e..33e889c3f6 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -538,6 +538,18 @@ namespace swrenderer } z2a[xxx] = MIN(z2, dadmost[lxt + xxx]); } + + const uint8_t *columnColors = col; + bool bgra = viewport->RenderTarget->IsBgra(); + if (bgra) + { + // The true color slab data array is identical, except its using uint32 instead of uint8. + // + // We can find the same slab column by calculating the offset from the start of SlabData + // and use that to offset into the BGRA version of the same data. + columnColors = (const uint8_t *)(&mip->SlabDataBgra[0] + (ptrdiff_t)(col - mip->SlabData)); + } + // Find top and bottom pixels that match and draw them as one strip for (int xxl = 0, xxr; xxl < stripwidth; ) { @@ -559,7 +571,7 @@ namespace swrenderer { drawerargs.SetDest(lxt + x, z1); drawerargs.SetCount(z2 - z1); - drawerargs.DrawVoxelColumn(thread, yplc[xxl], yinc, col, zleng); + drawerargs.DrawVoxelColumn(thread, yplc[xxl], yinc, columnColors, zleng); } /* diff --git a/src/swrenderer/viewport/r_spritedrawer.cpp b/src/swrenderer/viewport/r_spritedrawer.cpp index 0634bff709..cfcd3f2850 100644 --- a/src/swrenderer/viewport/r_spritedrawer.cpp +++ b/src/swrenderer/viewport/r_spritedrawer.cpp @@ -499,8 +499,19 @@ namespace swrenderer void SpriteDrawerArgs::DrawVoxelColumn(RenderThread *thread, fixed_t vPos, fixed_t vStep, const uint8_t *voxels, int voxelsCount) { - dc_iscale = vStep; - dc_texturefrac = vPos; + if (RenderViewport::Instance()->RenderTarget->IsBgra()) + { + double v = vPos / (double)voxelsCount / FRACUNIT; + double vstep = vStep / (double)voxelsCount / FRACUNIT; + dc_texturefrac = (int)(v * (1 << 30)); + dc_iscale = (int)(vstep * (1 << 30)); + } + else + { + dc_texturefrac = vPos; + dc_iscale = vStep; + } + dc_texturefracx = 0; dc_source = voxels; dc_source2 = 0; From 9123c71bb692dda2adaa66dd247b60a1d71f8e77 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Feb 2017 03:25:17 +0100 Subject: [PATCH 838/912] Fix clang warning --- src/swrenderer/scene/r_translucent_pass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/swrenderer/scene/r_translucent_pass.cpp b/src/swrenderer/scene/r_translucent_pass.cpp index 419bcf8ee9..2e360a5e6a 100644 --- a/src/swrenderer/scene/r_translucent_pass.cpp +++ b/src/swrenderer/scene/r_translucent_pass.cpp @@ -71,7 +71,7 @@ namespace swrenderer // b) skip most of the collected drawsegs which have no portal attached. portaldrawsegs.Clear(); DrawSegmentList *drawseglist = Thread->DrawSegments.get(); - for (auto index = 0; index != drawseglist->SegmentsCount(); index++) + for (unsigned int index = 0; index != drawseglist->SegmentsCount(); index++) { DrawSegment *seg = drawseglist->Segment(index); @@ -144,7 +144,7 @@ namespace swrenderer } DrawSegmentList *drawseglist = Thread->DrawSegments.get(); - for (auto index = 0; index != drawseglist->SegmentsCount(); index++) + for (unsigned int index = 0; index != drawseglist->SegmentsCount(); index++) { DrawSegment *ds = drawseglist->Segment(index); From 0ba8448782f864e6004a2e61b301b819b639e184 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Feb 2017 03:25:27 +0100 Subject: [PATCH 839/912] Fix colormap null pointer crash --- src/v_draw.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 7b09674a76..7cb8fde337 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -243,12 +243,8 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) drawerargs.SetTranslationMap(identitymap); } - bool visible; FDynamicColormap *basecolormap = nullptr; - if (viewport->RenderTarget->IsBgra()) - visible = drawerargs.SetStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); - else - visible = drawerargs.SetStyle(parms.style, parms.Alpha, 0, parms.fillcolor, basecolormap); + bool visible = drawerargs.SetStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); double x0 = parms.x - parms.left * parms.destwidth / parms.texwidth; double y0 = parms.y - parms.top * parms.destheight / parms.texheight; From 0dadf38bbe3b137f863340e6ca7320460a03ebdb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Feb 2017 06:17:38 +0100 Subject: [PATCH 840/912] Create WallPartTexture class --- src/swrenderer/line/r_line.cpp | 170 ++++++++++++++++----------------- src/swrenderer/line/r_line.h | 29 +++--- 2 files changed, 95 insertions(+), 104 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index d3e2927294..2e48812054 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -489,7 +489,7 @@ namespace swrenderer draw_segment->light = rw_light; draw_segment->lightstep = rw_lightstep; - // Masked midtextures should get the light level from the sector they reference, + // Masked mMiddlePart.Textures should get the light level from the sector they reference, // not from the current subsector, which is what the current wallshade value // comes from. We make an exeption for polyobjects, however, since their "home" // sector should be whichever one they move into. @@ -512,26 +512,12 @@ namespace swrenderer // render it if (markceiling) { - if (mCeilingPlane) - { // killough 4/11/98: add NULL ptr checks - mCeilingPlane = Thread->PlaneList->GetRange(mCeilingPlane, start, stop); - } - else - { - markceiling = false; - } + mCeilingPlane = Thread->PlaneList->GetRange(mCeilingPlane, start, stop); } if (markfloor) { - if (mFloorPlane) - { // killough 4/11/98: add NULL ptr checks - mFloorPlane = Thread->PlaneList->GetRange(mFloorPlane, start, stop); - } - else - { - markfloor = false; - } + mFloorPlane = Thread->PlaneList->GetRange(mFloorPlane, start, stop); } RenderWallSegmentTextures(start, stop); @@ -576,6 +562,9 @@ namespace swrenderer bool SWRenderLine::ShouldMarkFloor() const { + if (!mFloorPlane) + return false; + // deep water check if (mFrontSector->GetHeightSec() == nullptr) { @@ -630,6 +619,9 @@ namespace swrenderer bool SWRenderLine::ShouldMarkCeiling() const { + if (!mCeilingPlane) + return false; + // deep water check if (mFrontSector->GetHeightSec() == nullptr && mFrontSector->GetTexture(sector_t::ceiling) != skyflatnum) { @@ -747,7 +739,9 @@ namespace swrenderer markfloor = ShouldMarkFloor(); markceiling = ShouldMarkCeiling(); - midtexture = toptexture = bottomtexture = 0; + mTopPart.Texture = nullptr; + mMiddlePart.Texture = nullptr; + mBottomPart.Texture = nullptr; if (sidedef == linedef->sidedef[0] && (linedef->special == Line_Mirror && r_drawmirrors)) // [ZZ] compatibility with r_drawmirrors cvar that existed way before portals @@ -763,24 +757,24 @@ namespace swrenderer } else if (linedef->special != Line_Horizon) { - midtexture = TexMan(sidedef->GetTexture(side_t::mid), true); - rw_offset_mid = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); + mMiddlePart.Texture = TexMan(sidedef->GetTexture(side_t::mid), true); + mMiddlePart.TextureOffsetU = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); double rowoffset = sidedef->GetTextureYOffset(side_t::mid); - rw_midtexturescalex = sidedef->GetTextureXScale(side_t::mid); - rw_midtexturescaley = sidedef->GetTextureYScale(side_t::mid); - double yrepeat = midtexture->Scale.Y * rw_midtexturescaley; + mMiddlePart.TextureScaleU = sidedef->GetTextureXScale(side_t::mid); + mMiddlePart.TextureScaleV = sidedef->GetTextureYScale(side_t::mid); + double yrepeat = mMiddlePart.Texture->Scale.Y * mMiddlePart.TextureScaleV; if (yrepeat >= 0) { // normal orientation if (linedef->flags & ML_DONTPEGBOTTOM) { // bottom of texture at bottom - rw_midtexturemid = (mFrontSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + midtexture->GetHeight(); + mMiddlePart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + mMiddlePart.Texture->GetHeight(); } else { // top of texture at top - rw_midtexturemid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; - if (rowoffset < 0 && midtexture != NULL) + mMiddlePart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; + if (rowoffset < 0 && mMiddlePart.Texture != NULL) { - rowoffset += midtexture->GetHeight(); + rowoffset += mMiddlePart.Texture->GetHeight(); } } } @@ -789,22 +783,22 @@ namespace swrenderer rowoffset = -rowoffset; if (linedef->flags & ML_DONTPEGBOTTOM) { // top of texture at bottom - rw_midtexturemid = (mFrontSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; + mMiddlePart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; } else { // bottom of texture at top - rw_midtexturemid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + midtexture->GetHeight(); + mMiddlePart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + mMiddlePart.Texture->GetHeight(); } } - if (midtexture->bWorldPanning) + if (mMiddlePart.Texture->bWorldPanning) { - rw_midtexturemid += rowoffset * yrepeat; + mMiddlePart.TextureMid += rowoffset * yrepeat; } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - rw_midtexturemid += rowoffset; + mMiddlePart.TextureMid += rowoffset; } } } @@ -838,26 +832,26 @@ namespace swrenderer if (rw_havehigh) { // top texture - toptexture = TexMan(sidedef->GetTexture(side_t::top), true); + mTopPart.Texture = TexMan(sidedef->GetTexture(side_t::top), true); - rw_offset_top = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::top)); + mTopPart.TextureOffsetU = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::top)); double rowoffset = sidedef->GetTextureYOffset(side_t::top); - rw_toptexturescalex = sidedef->GetTextureXScale(side_t::top); - rw_toptexturescaley = sidedef->GetTextureYScale(side_t::top); - double yrepeat = toptexture->Scale.Y * rw_toptexturescaley; + mTopPart.TextureScaleU = sidedef->GetTextureXScale(side_t::top); + mTopPart.TextureScaleV = sidedef->GetTextureYScale(side_t::top); + double yrepeat = mTopPart.Texture->Scale.Y * mTopPart.TextureScaleV; if (yrepeat >= 0) { // normal orientation if (linedef->flags & ML_DONTPEGTOP) { // top of texture at top - rw_toptexturemid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; - if (rowoffset < 0 && toptexture != NULL) + mTopPart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; + if (rowoffset < 0 && mTopPart.Texture != NULL) { - rowoffset += toptexture->GetHeight(); + rowoffset += mTopPart.Texture->GetHeight(); } } else { // bottom of texture at bottom - rw_toptexturemid = (mBackSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + toptexture->GetHeight(); + mTopPart.TextureMid = (mBackSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + mTopPart.Texture->GetHeight(); } } else @@ -865,43 +859,43 @@ namespace swrenderer rowoffset = -rowoffset; if (linedef->flags & ML_DONTPEGTOP) { // bottom of texture at top - rw_toptexturemid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + toptexture->GetHeight(); + mTopPart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + mTopPart.Texture->GetHeight(); } else { // top of texture at bottom - rw_toptexturemid = (mBackSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; + mTopPart.TextureMid = (mBackSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; } } - if (toptexture->bWorldPanning) + if (mTopPart.Texture->bWorldPanning) { - rw_toptexturemid += rowoffset * yrepeat; + mTopPart.TextureMid += rowoffset * yrepeat; } else { - rw_toptexturemid += rowoffset; + mTopPart.TextureMid += rowoffset; } } if (rw_havelow) { // bottom texture - bottomtexture = TexMan(sidedef->GetTexture(side_t::bottom), true); + mBottomPart.Texture = TexMan(sidedef->GetTexture(side_t::bottom), true); - rw_offset_bottom = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::bottom)); + mBottomPart.TextureOffsetU = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::bottom)); double rowoffset = sidedef->GetTextureYOffset(side_t::bottom); - rw_bottomtexturescalex = sidedef->GetTextureXScale(side_t::bottom); - rw_bottomtexturescaley = sidedef->GetTextureYScale(side_t::bottom); - double yrepeat = bottomtexture->Scale.Y * rw_bottomtexturescaley; + mBottomPart.TextureScaleU = sidedef->GetTextureXScale(side_t::bottom); + mBottomPart.TextureScaleV = sidedef->GetTextureYScale(side_t::bottom); + double yrepeat = mBottomPart.Texture->Scale.Y * mBottomPart.TextureScaleV; if (yrepeat >= 0) { // normal orientation if (linedef->flags & ML_DONTPEGBOTTOM) { // bottom of texture at bottom - rw_bottomtexturemid = (rw_frontlowertop - ViewPos.Z) * yrepeat; + mBottomPart.TextureMid = (rw_frontlowertop - ViewPos.Z) * yrepeat; } else { // top of texture at top - rw_bottomtexturemid = (mBackSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; - if (rowoffset < 0 && bottomtexture != NULL) + mBottomPart.TextureMid = (mBackSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; + if (rowoffset < 0 && mBottomPart.Texture != NULL) { - rowoffset += bottomtexture->GetHeight(); + rowoffset += mBottomPart.Texture->GetHeight(); } } } @@ -910,35 +904,35 @@ namespace swrenderer rowoffset = -rowoffset; if (linedef->flags & ML_DONTPEGBOTTOM) { // top of texture at bottom - rw_bottomtexturemid = (rw_frontlowertop - ViewPos.Z) * yrepeat; + mBottomPart.TextureMid = (rw_frontlowertop - ViewPos.Z) * yrepeat; } else { // bottom of texture at top - rw_bottomtexturemid = (mBackSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + bottomtexture->GetHeight(); + mBottomPart.TextureMid = (mBackSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + mBottomPart.Texture->GetHeight(); } } - if (bottomtexture->bWorldPanning) + if (mBottomPart.Texture->bWorldPanning) { - rw_bottomtexturemid += rowoffset * yrepeat; + mBottomPart.TextureMid += rowoffset * yrepeat; } else { - rw_bottomtexturemid += rowoffset; + mBottomPart.TextureMid += rowoffset; } } } FTexture *midtex = TexMan(sidedef->GetTexture(side_t::mid), true); - bool segtextured = midtex != NULL || toptexture != NULL || bottomtexture != NULL; + bool segtextured = midtex != NULL || mTopPart.Texture != NULL || mBottomPart.Texture != NULL; // calculate light table if (needlights && (segtextured || (mBackSector && IsFogBoundary(mFrontSector, mBackSector)))) { lwallscale = midtex ? (midtex->Scale.X * sidedef->GetTextureXScale(side_t::mid)) : - toptexture ? (toptexture->Scale.X * sidedef->GetTextureXScale(side_t::top)) : - bottomtexture ? (bottomtexture->Scale.X * sidedef->GetTextureXScale(side_t::bottom)) : + mTopPart.Texture ? (mTopPart.Texture->Scale.X * sidedef->GetTextureXScale(side_t::top)) : + mBottomPart.Texture ? (mBottomPart.Texture->Scale.X * sidedef->GetTextureXScale(side_t::bottom)) : 1.; walltexcoords.Project(sidedef->TexelLength * lwallscale, WallC.sx1, WallC.sx2, WallT); @@ -1066,25 +1060,25 @@ namespace swrenderer FLightNode *light_list = (mLineSegment && mLineSegment->sidedef) ? mLineSegment->sidedef->lighthead : nullptr; // draw the wall tiers - if (midtexture) + if (mMiddlePart.Texture) { // one sided line - if (midtexture->UseType != FTexture::TEX_Null && viewactive) + if (mMiddlePart.Texture->UseType != FTexture::TEX_Null && viewactive) { - FTexture *rw_pic = midtexture; - xscale = rw_pic->Scale.X * rw_midtexturescalex; - yscale = rw_pic->Scale.Y * rw_midtexturescaley; + FTexture *rw_pic = mMiddlePart.Texture; + xscale = rw_pic->Scale.X * mMiddlePart.TextureScaleU; + yscale = rw_pic->Scale.Y * mMiddlePart.TextureScaleV; if (xscale != lwallscale) { walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } - if (midtexture->bWorldPanning) + if (mMiddlePart.Texture->bWorldPanning) { - rw_offset = xs_RoundToInt(rw_offset_mid * xscale); + rw_offset = xs_RoundToInt(mMiddlePart.TextureOffsetU * xscale); } else { - rw_offset = rw_offset_mid; + rw_offset = mMiddlePart.TextureOffsetU; } if (xscale < 0) { @@ -1092,14 +1086,14 @@ namespace swrenderer } RenderWallPart renderWallpart(Thread); - renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, rw_midtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, mMiddlePart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } fillshort(ceilingclip + x1, x2 - x1, viewheight); fillshort(floorclip + x1, x2 - x1, 0xffff); } else { // two sided line - if (toptexture != NULL && toptexture->UseType != FTexture::TEX_Null) + if (mTopPart.Texture != NULL && mTopPart.Texture->UseType != FTexture::TEX_Null) { // top wall for (x = x1; x < x2; ++x) { @@ -1107,21 +1101,21 @@ namespace swrenderer } if (viewactive) { - FTexture *rw_pic = toptexture; - xscale = rw_pic->Scale.X * rw_toptexturescalex; - yscale = rw_pic->Scale.Y * rw_toptexturescaley; + FTexture *rw_pic = mTopPart.Texture; + xscale = rw_pic->Scale.X * mTopPart.TextureScaleU; + yscale = rw_pic->Scale.Y * mTopPart.TextureScaleV; if (xscale != lwallscale) { walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } - if (toptexture->bWorldPanning) + if (mTopPart.Texture->bWorldPanning) { - rw_offset = xs_RoundToInt(rw_offset_top * xscale); + rw_offset = xs_RoundToInt(mTopPart.TextureOffsetU * xscale); } else { - rw_offset = rw_offset_top; + rw_offset = mTopPart.TextureOffsetU; } if (xscale < 0) { @@ -1129,7 +1123,7 @@ namespace swrenderer } RenderWallPart renderWallpart(Thread); - renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, rw_toptexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mBackCeilingZ1, mBackCeilingZ2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, mTopPart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mBackCeilingZ1, mBackCeilingZ2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(ceilingclip + x1, wallupper.ScreenY + x1, (x2 - x1) * sizeof(short)); } @@ -1139,7 +1133,7 @@ namespace swrenderer } - if (bottomtexture != NULL && bottomtexture->UseType != FTexture::TEX_Null) + if (mBottomPart.Texture != NULL && mBottomPart.Texture->UseType != FTexture::TEX_Null) { // bottom wall for (x = x1; x < x2; ++x) { @@ -1147,21 +1141,21 @@ namespace swrenderer } if (viewactive) { - FTexture *rw_pic = bottomtexture; - xscale = rw_pic->Scale.X * rw_bottomtexturescalex; - yscale = rw_pic->Scale.Y * rw_bottomtexturescaley; + FTexture *rw_pic = mBottomPart.Texture; + xscale = rw_pic->Scale.X * mBottomPart.TextureScaleU; + yscale = rw_pic->Scale.Y * mBottomPart.TextureScaleV; if (xscale != lwallscale) { walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } - if (bottomtexture->bWorldPanning) + if (mBottomPart.Texture->bWorldPanning) { - rw_offset = xs_RoundToInt(rw_offset_bottom * xscale); + rw_offset = xs_RoundToInt(mBottomPart.TextureOffsetU * xscale); } else { - rw_offset = rw_offset_bottom; + rw_offset = mBottomPart.TextureOffsetU; } if (xscale < 0) { @@ -1169,7 +1163,7 @@ namespace swrenderer } RenderWallPart renderWallpart(Thread); - renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, rw_bottomtexturemid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mBackFloorZ1, mBackFloorZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, mBottomPart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mBackFloorZ1, mBackFloorZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(floorclip + x1, walllower.ScreenY + x1, (x2 - x1) * sizeof(short)); } diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 03dc0c8ad6..355b491caa 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -49,6 +49,15 @@ namespace swrenderer void InitFromLine(RenderThread *thread, const DVector2 &left, const DVector2 &right); }; + struct WallPartTexture + { + fixed_t TextureOffsetU; + double TextureMid; + double TextureScaleU; + double TextureScaleV; + FTexture *Texture; + }; + class SWRenderLine : VisibleSegmentRenderer { public: @@ -101,10 +110,6 @@ namespace swrenderer // Wall segment variables: - fixed_t rw_offset_top; - fixed_t rw_offset_mid; - fixed_t rw_offset_bottom; - bool rw_prepped; int wallshade; @@ -114,21 +119,13 @@ namespace swrenderer double lwallscale; fixed_t rw_offset; - double rw_midtexturemid; - double rw_toptexturemid; - double rw_bottomtexturemid; - double rw_midtexturescalex; - double rw_midtexturescaley; - double rw_toptexturescalex; - double rw_toptexturescaley; - double rw_bottomtexturescalex; - double rw_bottomtexturescaley; bool markfloor; // False if the back side is the same plane. bool markceiling; - FTexture *toptexture; - FTexture *bottomtexture; - FTexture *midtexture; + + WallPartTexture mTopPart; + WallPartTexture mMiddlePart; + WallPartTexture mBottomPart; ProjectedWallCull mCeilingClipped; ProjectedWallCull mFloorClipped; From 8d25a6d8d46cb1b5368eedf0e68fce6051ac0a69 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Feb 2017 06:23:57 +0100 Subject: [PATCH 841/912] rw_offset is not a member variable --- src/swrenderer/line/r_line.cpp | 30 +++++++++++++++--------------- src/swrenderer/line/r_line.h | 1 - 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 2e48812054..e9c55f9bbb 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -309,7 +309,6 @@ namespace swrenderer side_t *sidedef = mLineSegment->sidedef; - rw_offset = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); rw_light = rw_lightleft + rw_lightstep * (start - WallC.sx1); RenderPortal *renderportal = Thread->Portal.get(); @@ -967,7 +966,6 @@ namespace swrenderer int x; double xscale; double yscale; - fixed_t xoffset = rw_offset; WallDrawerArgs drawerargs; @@ -1072,21 +1070,22 @@ namespace swrenderer walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } + fixed_t offset; if (mMiddlePart.Texture->bWorldPanning) { - rw_offset = xs_RoundToInt(mMiddlePart.TextureOffsetU * xscale); + offset = xs_RoundToInt(mMiddlePart.TextureOffsetU * xscale); } else { - rw_offset = mMiddlePart.TextureOffsetU; + offset = mMiddlePart.TextureOffsetU; } if (xscale < 0) { - rw_offset = -rw_offset; + offset = -offset; } RenderWallPart renderWallpart(Thread); - renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, mMiddlePart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, mMiddlePart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } fillshort(ceilingclip + x1, x2 - x1, viewheight); fillshort(floorclip + x1, x2 - x1, 0xffff); @@ -1109,21 +1108,22 @@ namespace swrenderer walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } + fixed_t offset; if (mTopPart.Texture->bWorldPanning) { - rw_offset = xs_RoundToInt(mTopPart.TextureOffsetU * xscale); + offset = xs_RoundToInt(mTopPart.TextureOffsetU * xscale); } else { - rw_offset = mTopPart.TextureOffsetU; + offset = mTopPart.TextureOffsetU; } if (xscale < 0) { - rw_offset = -rw_offset; + offset = -offset; } RenderWallPart renderWallpart(Thread); - renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, mTopPart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mBackCeilingZ1, mBackCeilingZ2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, mTopPart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mBackCeilingZ1, mBackCeilingZ2), false, wallshade, offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(ceilingclip + x1, wallupper.ScreenY + x1, (x2 - x1) * sizeof(short)); } @@ -1149,21 +1149,22 @@ namespace swrenderer walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); lwallscale = xscale; } + fixed_t offset; if (mBottomPart.Texture->bWorldPanning) { - rw_offset = xs_RoundToInt(mBottomPart.TextureOffsetU * xscale); + offset = xs_RoundToInt(mBottomPart.TextureOffsetU * xscale); } else { - rw_offset = mBottomPart.TextureOffsetU; + offset = mBottomPart.TextureOffsetU; } if (xscale < 0) { - rw_offset = -rw_offset; + offset = -offset; } RenderWallPart renderWallpart(Thread); - renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, mBottomPart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mBackFloorZ1, mBackFloorZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, rw_offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, mBottomPart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mBackFloorZ1, mBackFloorZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } memcpy(floorclip + x1, walllower.ScreenY + x1, (x2 - x1) * sizeof(short)); } @@ -1172,7 +1173,6 @@ namespace swrenderer memcpy(floorclip + x1, wallbottom.ScreenY + x1, (x2 - x1) * sizeof(short)); } } - rw_offset = xoffset; } //////////////////////////////////////////////////////////////////////////// diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index 355b491caa..ab205b8ca8 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -118,7 +118,6 @@ namespace swrenderer float rw_lightleft; double lwallscale; - fixed_t rw_offset; bool markfloor; // False if the back side is the same plane. bool markceiling; From b5c5bd9a1c7f56ada088deb8d53b5b1b28df9bbc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Feb 2017 06:44:03 +0100 Subject: [PATCH 842/912] Create ClipSegmentTopBottom function --- src/swrenderer/line/r_line.cpp | 43 ++++++++++++++++++++-------------- src/swrenderer/line/r_line.h | 2 +- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index e9c55f9bbb..3bb4c59798 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -309,8 +309,6 @@ namespace swrenderer side_t *sidedef = mLineSegment->sidedef; - rw_light = rw_lightleft + rw_lightstep * (start - WallC.sx1); - RenderPortal *renderportal = Thread->Portal.get(); DrawSegment *draw_segment = Thread->FrameMemory->NewObject(); @@ -485,7 +483,7 @@ namespace swrenderer draw_segment->iscalestep = 0; } } - draw_segment->light = rw_light; + draw_segment->light = rw_lightleft + rw_lightstep * (start - WallC.sx1); draw_segment->lightstep = rw_lightstep; // Masked mMiddlePart.Textures should get the light level from the sector they reference, @@ -519,6 +517,7 @@ namespace swrenderer mFloorPlane = Thread->PlaneList->GetRange(mFloorPlane, start, stop); } + ClipSegmentTopBottom(start, stop); RenderWallSegmentTextures(start, stop); if (clip3d->fake3D & FAKE3D_FAKEMASK) @@ -958,6 +957,24 @@ namespace swrenderer front->ColorMap->Fade != back->ColorMap->Fade && (front->GetTexture(sector_t::ceiling) != skyflatnum || back->GetTexture(sector_t::ceiling) != skyflatnum); } + + void SWRenderLine::ClipSegmentTopBottom(int x1, int x2) + { + // clip wall to the floor and ceiling + auto ceilingclip = Thread->OpaquePass->ceilingclip; + auto floorclip = Thread->OpaquePass->floorclip; + for (int x = x1; x < x2; ++x) + { + if (walltop.ScreenY[x] < ceilingclip[x]) + { + walltop.ScreenY[x] = ceilingclip[x]; + } + if (wallbottom.ScreenY[x] > floorclip[x]) + { + wallbottom.ScreenY[x] = floorclip[x]; + } + } + } // Draws zero, one, or two textures for walls. // Can draw or mark the starting pixel of floor and ceiling textures. @@ -966,6 +983,9 @@ namespace swrenderer int x; double xscale; double yscale; + + auto ceilingclip = Thread->OpaquePass->ceilingclip; + auto floorclip = Thread->OpaquePass->floorclip; WallDrawerArgs drawerargs; @@ -977,21 +997,8 @@ namespace swrenderer else if (cameraLight->FixedColormap() != nullptr) drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); - // clip wall to the floor and ceiling - auto ceilingclip = Thread->OpaquePass->ceilingclip; - auto floorclip = Thread->OpaquePass->floorclip; - for (x = x1; x < x2; ++x) - { - if (walltop.ScreenY[x] < ceilingclip[x]) - { - walltop.ScreenY[x] = ceilingclip[x]; - } - if (wallbottom.ScreenY[x] > floorclip[x]) - { - wallbottom.ScreenY[x] = floorclip[x]; - } - } - + float rw_light = rw_lightleft + rw_lightstep * (x1 - WallC.sx1); + Clip3DFloors *clip3d = Thread->Clip3D.get(); // mark ceiling areas diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index ab205b8ca8..a0a3790292 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -69,6 +69,7 @@ namespace swrenderer private: bool RenderWallSegment(int x1, int x2) override; void SetWallVariables(bool needlights); + void ClipSegmentTopBottom(int x1, int x2); void RenderWallSegmentTextures(int x1, int x2); bool IsFogBoundary(sector_t *front, sector_t *back) const; @@ -113,7 +114,6 @@ namespace swrenderer bool rw_prepped; int wallshade; - float rw_light; float rw_lightstep; float rw_lightleft; From bb749c032ad028e935e3e4919d5c6b0b940694ea Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Feb 2017 07:24:35 +0100 Subject: [PATCH 843/912] Create SetTopTexture, SetMiddleTexture and SetBottomTexture --- src/swrenderer/line/r_line.cpp | 344 +++++++++++++++++---------------- src/swrenderer/line/r_line.h | 3 + 2 files changed, 185 insertions(+), 162 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 3bb4c59798..4f952e4a44 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -693,7 +693,7 @@ namespace swrenderer return linedef->isVisualPortal(); } } - + void SWRenderLine::SetWallVariables(bool needlights) { RenderPortal *renderportal = Thread->Portal.get(); @@ -737,75 +737,13 @@ namespace swrenderer markfloor = ShouldMarkFloor(); markceiling = ShouldMarkCeiling(); - mTopPart.Texture = nullptr; - mMiddlePart.Texture = nullptr; - mBottomPart.Texture = nullptr; + SetTopTexture(); + SetMiddleTexture(); + SetBottomTexture(); - if (sidedef == linedef->sidedef[0] && - (linedef->special == Line_Mirror && r_drawmirrors)) // [ZZ] compatibility with r_drawmirrors cvar that existed way before portals + if (mBackSector && !(sidedef == linedef->sidedef[0] && (linedef->special == Line_Mirror && r_drawmirrors))) { - } - else if (mBackSector == NULL) - { - // single sided line - - // [RH] Horizon lines do not need to be textured - if (linedef->isVisualPortal()) - { - } - else if (linedef->special != Line_Horizon) - { - mMiddlePart.Texture = TexMan(sidedef->GetTexture(side_t::mid), true); - mMiddlePart.TextureOffsetU = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); - double rowoffset = sidedef->GetTextureYOffset(side_t::mid); - mMiddlePart.TextureScaleU = sidedef->GetTextureXScale(side_t::mid); - mMiddlePart.TextureScaleV = sidedef->GetTextureYScale(side_t::mid); - double yrepeat = mMiddlePart.Texture->Scale.Y * mMiddlePart.TextureScaleV; - if (yrepeat >= 0) - { // normal orientation - if (linedef->flags & ML_DONTPEGBOTTOM) - { // bottom of texture at bottom - mMiddlePart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + mMiddlePart.Texture->GetHeight(); - } - else - { // top of texture at top - mMiddlePart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; - if (rowoffset < 0 && mMiddlePart.Texture != NULL) - { - rowoffset += mMiddlePart.Texture->GetHeight(); - } - } - } - else - { // upside down - rowoffset = -rowoffset; - if (linedef->flags & ML_DONTPEGBOTTOM) - { // top of texture at bottom - mMiddlePart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; - } - else - { // bottom of texture at top - mMiddlePart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + mMiddlePart.Texture->GetHeight(); - } - } - if (mMiddlePart.Texture->bWorldPanning) - { - mMiddlePart.TextureMid += rowoffset * yrepeat; - } - else - { - // rowoffset is added outside the multiply so that it positions the texture - // by texels instead of world units. - mMiddlePart.TextureMid += rowoffset; - } - } - } - else - { // two-sided line - // hack to allow height changes in outdoor areas - - double rw_frontlowertop = mFrontSector->GetPlaneTexZ(sector_t::ceiling); - + // skyhack to allow height changes in outdoor areas if (mFrontSector->GetTexture(sector_t::ceiling) == skyflatnum && mBackSector->GetTexture(sector_t::ceiling) == skyflatnum) { @@ -823,100 +761,6 @@ namespace swrenderer // ceiling instead of the front sector's ceiling. walltop.Project(mBackSector->ceilingplane, &WallC, mLineSegment, Thread->Portal->MirrorFlags & RF_XFLIP); } - // Putting sky ceilings on the front and back of a line alters the way unpegged - // positioning works. - rw_frontlowertop = mBackSector->GetPlaneTexZ(sector_t::ceiling); - } - - if (rw_havehigh) - { // top texture - mTopPart.Texture = TexMan(sidedef->GetTexture(side_t::top), true); - - mTopPart.TextureOffsetU = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::top)); - double rowoffset = sidedef->GetTextureYOffset(side_t::top); - mTopPart.TextureScaleU = sidedef->GetTextureXScale(side_t::top); - mTopPart.TextureScaleV = sidedef->GetTextureYScale(side_t::top); - double yrepeat = mTopPart.Texture->Scale.Y * mTopPart.TextureScaleV; - if (yrepeat >= 0) - { // normal orientation - if (linedef->flags & ML_DONTPEGTOP) - { // top of texture at top - mTopPart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; - if (rowoffset < 0 && mTopPart.Texture != NULL) - { - rowoffset += mTopPart.Texture->GetHeight(); - } - } - else - { // bottom of texture at bottom - mTopPart.TextureMid = (mBackSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + mTopPart.Texture->GetHeight(); - } - } - else - { // upside down - rowoffset = -rowoffset; - if (linedef->flags & ML_DONTPEGTOP) - { // bottom of texture at top - mTopPart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + mTopPart.Texture->GetHeight(); - } - else - { // top of texture at bottom - mTopPart.TextureMid = (mBackSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; - } - } - if (mTopPart.Texture->bWorldPanning) - { - mTopPart.TextureMid += rowoffset * yrepeat; - } - else - { - mTopPart.TextureMid += rowoffset; - } - } - if (rw_havelow) - { // bottom texture - mBottomPart.Texture = TexMan(sidedef->GetTexture(side_t::bottom), true); - - mBottomPart.TextureOffsetU = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::bottom)); - double rowoffset = sidedef->GetTextureYOffset(side_t::bottom); - mBottomPart.TextureScaleU = sidedef->GetTextureXScale(side_t::bottom); - mBottomPart.TextureScaleV = sidedef->GetTextureYScale(side_t::bottom); - double yrepeat = mBottomPart.Texture->Scale.Y * mBottomPart.TextureScaleV; - if (yrepeat >= 0) - { // normal orientation - if (linedef->flags & ML_DONTPEGBOTTOM) - { // bottom of texture at bottom - mBottomPart.TextureMid = (rw_frontlowertop - ViewPos.Z) * yrepeat; - } - else - { // top of texture at top - mBottomPart.TextureMid = (mBackSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; - if (rowoffset < 0 && mBottomPart.Texture != NULL) - { - rowoffset += mBottomPart.Texture->GetHeight(); - } - } - } - else - { // upside down - rowoffset = -rowoffset; - if (linedef->flags & ML_DONTPEGBOTTOM) - { // top of texture at bottom - mBottomPart.TextureMid = (rw_frontlowertop - ViewPos.Z) * yrepeat; - } - else - { // bottom of texture at top - mBottomPart.TextureMid = (mBackSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + mBottomPart.Texture->GetHeight(); - } - } - if (mBottomPart.Texture->bWorldPanning) - { - mBottomPart.TextureMid += rowoffset * yrepeat; - } - else - { - mBottomPart.TextureMid += rowoffset; - } } } @@ -951,6 +795,182 @@ namespace swrenderer } } + void SWRenderLine::SetTopTexture() + { + mTopPart.Texture = nullptr; + + if (!(mFrontCeilingZ1 > mBackCeilingZ1 || mFrontCeilingZ2 > mBackCeilingZ2)) return; + + side_t *sidedef = mLineSegment->sidedef; + line_t *linedef = mLineSegment->linedef; + if (sidedef == linedef->sidedef[0] && (linedef->special == Line_Mirror && r_drawmirrors)) return; + if (!mBackSector) return; + + mTopPart.Texture = TexMan(sidedef->GetTexture(side_t::top), true); + + mTopPart.TextureOffsetU = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::top)); + double rowoffset = sidedef->GetTextureYOffset(side_t::top); + mTopPart.TextureScaleU = sidedef->GetTextureXScale(side_t::top); + mTopPart.TextureScaleV = sidedef->GetTextureYScale(side_t::top); + double yrepeat = mTopPart.Texture->Scale.Y * mTopPart.TextureScaleV; + if (yrepeat >= 0) + { // normal orientation + if (linedef->flags & ML_DONTPEGTOP) + { // top of texture at top + mTopPart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; + if (rowoffset < 0 && mTopPart.Texture != NULL) + { + rowoffset += mTopPart.Texture->GetHeight(); + } + } + else + { // bottom of texture at bottom + mTopPart.TextureMid = (mBackSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + mTopPart.Texture->GetHeight(); + } + } + else + { // upside down + rowoffset = -rowoffset; + if (linedef->flags & ML_DONTPEGTOP) + { // bottom of texture at top + mTopPart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + mTopPart.Texture->GetHeight(); + } + else + { // top of texture at bottom + mTopPart.TextureMid = (mBackSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; + } + } + if (mTopPart.Texture->bWorldPanning) + { + mTopPart.TextureMid += rowoffset * yrepeat; + } + else + { + mTopPart.TextureMid += rowoffset; + } + } + + void SWRenderLine::SetMiddleTexture() + { + mMiddlePart.Texture = nullptr; + + side_t *sidedef = mLineSegment->sidedef; + line_t *linedef = mLineSegment->linedef; + if (sidedef == linedef->sidedef[0] && (linedef->special == Line_Mirror && r_drawmirrors)) return; + if (mBackSector) return; + + // [RH] Horizon lines do not need to be textured + if (linedef->isVisualPortal()) return; + if (linedef->special == Line_Horizon) return; + + mMiddlePart.Texture = TexMan(sidedef->GetTexture(side_t::mid), true); + mMiddlePart.TextureOffsetU = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::mid)); + double rowoffset = sidedef->GetTextureYOffset(side_t::mid); + mMiddlePart.TextureScaleU = sidedef->GetTextureXScale(side_t::mid); + mMiddlePart.TextureScaleV = sidedef->GetTextureYScale(side_t::mid); + double yrepeat = mMiddlePart.Texture->Scale.Y * mMiddlePart.TextureScaleV; + if (yrepeat >= 0) + { // normal orientation + if (linedef->flags & ML_DONTPEGBOTTOM) + { // bottom of texture at bottom + mMiddlePart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + mMiddlePart.Texture->GetHeight(); + } + else + { // top of texture at top + mMiddlePart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat; + if (rowoffset < 0 && mMiddlePart.Texture != NULL) + { + rowoffset += mMiddlePart.Texture->GetHeight(); + } + } + } + else + { // upside down + rowoffset = -rowoffset; + if (linedef->flags & ML_DONTPEGBOTTOM) + { // top of texture at bottom + mMiddlePart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; + } + else + { // bottom of texture at top + mMiddlePart.TextureMid = (mFrontSector->GetPlaneTexZ(sector_t::ceiling) - ViewPos.Z) * yrepeat + mMiddlePart.Texture->GetHeight(); + } + } + if (mMiddlePart.Texture->bWorldPanning) + { + mMiddlePart.TextureMid += rowoffset * yrepeat; + } + else + { + // rowoffset is added outside the multiply so that it positions the texture + // by texels instead of world units. + mMiddlePart.TextureMid += rowoffset; + } + } + + void SWRenderLine::SetBottomTexture() + { + mBottomPart.Texture = nullptr; + + if (!(mFrontFloorZ1 < mBackFloorZ1 || mFrontFloorZ2 < mBackFloorZ2)) return; + + side_t *sidedef = mLineSegment->sidedef; + line_t *linedef = mLineSegment->linedef; + if (sidedef == linedef->sidedef[0] && (linedef->special == Line_Mirror && r_drawmirrors)) return; + if (!mBackSector) return; + + double frontlowertop = mFrontSector->GetPlaneTexZ(sector_t::ceiling); + if (mFrontSector->GetTexture(sector_t::ceiling) == skyflatnum && mBackSector->GetTexture(sector_t::ceiling) == skyflatnum) + { + // Putting sky ceilings on the front and back of a line alters the way unpegged + // positioning works. + frontlowertop = mBackSector->GetPlaneTexZ(sector_t::ceiling); + } + + mBottomPart.Texture = TexMan(sidedef->GetTexture(side_t::bottom), true); + + mBottomPart.TextureOffsetU = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::bottom)); + double rowoffset = sidedef->GetTextureYOffset(side_t::bottom); + mBottomPart.TextureScaleU = sidedef->GetTextureXScale(side_t::bottom); + mBottomPart.TextureScaleV = sidedef->GetTextureYScale(side_t::bottom); + double yrepeat = mBottomPart.Texture->Scale.Y * mBottomPart.TextureScaleV; + if (yrepeat >= 0) + { // normal orientation + if (linedef->flags & ML_DONTPEGBOTTOM) + { // bottom of texture at bottom + mBottomPart.TextureMid = (frontlowertop - ViewPos.Z) * yrepeat; + } + else + { // top of texture at top + mBottomPart.TextureMid = (mBackSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat; + if (rowoffset < 0 && mBottomPart.Texture != NULL) + { + rowoffset += mBottomPart.Texture->GetHeight(); + } + } + } + else + { // upside down + rowoffset = -rowoffset; + if (linedef->flags & ML_DONTPEGBOTTOM) + { // top of texture at bottom + mBottomPart.TextureMid = (frontlowertop - ViewPos.Z) * yrepeat; + } + else + { // bottom of texture at top + mBottomPart.TextureMid = (mBackSector->GetPlaneTexZ(sector_t::floor) - ViewPos.Z) * yrepeat + mBottomPart.Texture->GetHeight(); + } + } + if (mBottomPart.Texture->bWorldPanning) + { + mBottomPart.TextureMid += rowoffset * yrepeat; + } + else + { + mBottomPart.TextureMid += rowoffset; + } + } + bool SWRenderLine::IsFogBoundary(sector_t *front, sector_t *back) const { return r_fogboundary && CameraLight::Instance()->FixedColormap() == nullptr && front->ColorMap->Fade && diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index a0a3790292..c57134efaa 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -69,6 +69,9 @@ namespace swrenderer private: bool RenderWallSegment(int x1, int x2) override; void SetWallVariables(bool needlights); + void SetTopTexture(); + void SetMiddleTexture(); + void SetBottomTexture(); void ClipSegmentTopBottom(int x1, int x2); void RenderWallSegmentTextures(int x1, int x2); From 83332562f131cb3d8fd9c6726a6ddc4ea7119f7f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Feb 2017 07:37:18 +0100 Subject: [PATCH 844/912] Fix skyhack --- src/swrenderer/line/r_line.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 4f952e4a44..0dfebc6b0a 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -806,6 +806,9 @@ namespace swrenderer if (sidedef == linedef->sidedef[0] && (linedef->special == Line_Mirror && r_drawmirrors)) return; if (!mBackSector) return; + // No top texture for skyhack lines + if (mFrontSector->GetTexture(sector_t::ceiling) == skyflatnum && mBackSector->GetTexture(sector_t::ceiling) == skyflatnum) return; + mTopPart.Texture = TexMan(sidedef->GetTexture(side_t::top), true); mTopPart.TextureOffsetU = FLOAT2FIXED(sidedef->GetTextureXOffset(side_t::top)); From 22be201fc7ce303edf8170fe4d21bf0231ac4599 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Feb 2017 00:00:25 +0100 Subject: [PATCH 845/912] Split line drawing into more functions --- src/swrenderer/line/r_line.cpp | 311 +++++++++++++++++++-------------- src/swrenderer/line/r_line.h | 8 +- 2 files changed, 188 insertions(+), 131 deletions(-) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 0dfebc6b0a..544055b2c8 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -506,25 +506,19 @@ namespace swrenderer } } - // render it - if (markceiling) - { - mCeilingPlane = Thread->PlaneList->GetRange(mCeilingPlane, start, stop); - } - - if (markfloor) - { - mFloorPlane = Thread->PlaneList->GetRange(mFloorPlane, start, stop); - } - ClipSegmentTopBottom(start, stop); - RenderWallSegmentTextures(start, stop); + + MarkCeilingPlane(start, stop); + MarkFloorPlane(start, stop); + Mark3DFloors(start, stop); if (clip3d->fake3D & FAKE3D_FAKEMASK) { return (clip3d->fake3D & FAKE3D_FAKEMASK) == 0; } + MarkOpaquePassClip(start, stop); + // save sprite clipping info if (((draw_segment->silhouette & SIL_TOP) || maskedtexture) && draw_segment->sprtopclip == nullptr) { @@ -543,6 +537,10 @@ namespace swrenderer draw_segment->silhouette |= SIL_TOP | SIL_BOTTOM; } + RenderMiddleTexture(start, stop); + RenderTopTexture(start, stop); + RenderBottomTexture(start, stop); + // [RH] Draw any decals bound to the seg // [ZZ] Only if not an active mirror if (!markportal) @@ -999,35 +997,18 @@ namespace swrenderer } } - // Draws zero, one, or two textures for walls. - // Can draw or mark the starting pixel of floor and ceiling textures. - void SWRenderLine::RenderWallSegmentTextures(int x1, int x2) + void SWRenderLine::MarkCeilingPlane(int x1, int x2) { - int x; - double xscale; - double yscale; - - auto ceilingclip = Thread->OpaquePass->ceilingclip; - auto floorclip = Thread->OpaquePass->floorclip; - - WallDrawerArgs drawerargs; - - drawerargs.SetStyle(false, false, OPAQUE); - - CameraLight *cameraLight = CameraLight::Instance(); - if (cameraLight->FixedLightLevel() >= 0) - drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); - else if (cameraLight->FixedColormap() != nullptr) - drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); - - float rw_light = rw_lightleft + rw_lightstep * (x1 - WallC.sx1); - - Clip3DFloors *clip3d = Thread->Clip3D.get(); - // mark ceiling areas if (markceiling) { - for (x = x1; x < x2; ++x) + mCeilingPlane = Thread->PlaneList->GetRange(mCeilingPlane, x1, x2); + + auto ceilingclip = Thread->OpaquePass->ceilingclip; + auto floorclip = Thread->OpaquePass->floorclip; + Clip3DFloors *clip3d = Thread->Clip3D.get(); + + for (int x = x1; x < x2; ++x) { short top = (clip3d->fakeFloor && clip3d->fake3D & FAKE3D_FAKECEILING) ? clip3d->fakeFloor->ceilingclip[x] : ceilingclip[x]; short bottom = MIN(walltop.ScreenY[x], floorclip[x]); @@ -1038,11 +1019,19 @@ namespace swrenderer } } } + } - // mark floor areas + void SWRenderLine::MarkFloorPlane(int x1, int x2) + { if (markfloor) { - for (x = x1; x < x2; ++x) + mFloorPlane = Thread->PlaneList->GetRange(mFloorPlane, x1, x2); + + auto ceilingclip = Thread->OpaquePass->ceilingclip; + auto floorclip = Thread->OpaquePass->floorclip; + Clip3DFloors *clip3d = Thread->Clip3D.get(); + + for (int x = x1; x < x2; ++x) { short top = MAX(wallbottom.ScreenY[x], ceilingclip[x]); short bottom = (clip3d->fakeFloor && clip3d->fake3D & FAKE3D_FAKEFLOOR) ? clip3d->fakeFloor->floorclip[x] : floorclip[x]; @@ -1054,17 +1043,25 @@ namespace swrenderer } } } + } + + void SWRenderLine::Mark3DFloors(int x1, int x2) + { + Clip3DFloors *clip3d = Thread->Clip3D.get(); // kg3D - fake planes clipping if (clip3d->fake3D & FAKE3D_REFRESHCLIP) { + auto ceilingclip = Thread->OpaquePass->ceilingclip; + auto floorclip = Thread->OpaquePass->floorclip; + if (clip3d->fake3D & FAKE3D_CLIPBOTFRONT) { memcpy(clip3d->fakeFloor->floorclip + x1, wallbottom.ScreenY + x1, (x2 - x1) * sizeof(short)); } else { - for (x = x1; x < x2; ++x) + for (int x = x1; x < x2; ++x) { walllower.ScreenY[x] = MIN(MAX(walllower.ScreenY[x], ceilingclip[x]), wallbottom.ScreenY[x]); } @@ -1076,47 +1073,22 @@ namespace swrenderer } else { - for (x = x1; x < x2; ++x) + for (int x = x1; x < x2; ++x) { wallupper.ScreenY[x] = MAX(MIN(wallupper.ScreenY[x], floorclip[x]), walltop.ScreenY[x]); } memcpy(clip3d->fakeFloor->ceilingclip + x1, wallupper.ScreenY + x1, (x2 - x1) * sizeof(short)); } } - if (clip3d->fake3D & FAKE3D_FAKEMASK) return; + } - FLightNode *light_list = (mLineSegment && mLineSegment->sidedef) ? mLineSegment->sidedef->lighthead : nullptr; + void SWRenderLine::MarkOpaquePassClip(int x1, int x2) + { + auto ceilingclip = Thread->OpaquePass->ceilingclip; + auto floorclip = Thread->OpaquePass->floorclip; - // draw the wall tiers - if (mMiddlePart.Texture) - { // one sided line - if (mMiddlePart.Texture->UseType != FTexture::TEX_Null && viewactive) - { - FTexture *rw_pic = mMiddlePart.Texture; - xscale = rw_pic->Scale.X * mMiddlePart.TextureScaleU; - yscale = rw_pic->Scale.Y * mMiddlePart.TextureScaleV; - if (xscale != lwallscale) - { - walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); - lwallscale = xscale; - } - fixed_t offset; - if (mMiddlePart.Texture->bWorldPanning) - { - offset = xs_RoundToInt(mMiddlePart.TextureOffsetU * xscale); - } - else - { - offset = mMiddlePart.TextureOffsetU; - } - if (xscale < 0) - { - offset = -offset; - } - - RenderWallPart renderWallpart(Thread); - renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, mMiddlePart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); - } + if (mMiddlePart.Texture) // one sided line + { fillshort(ceilingclip + x1, x2 - x1, viewheight); fillshort(floorclip + x1, x2 - x1, 0xffff); } @@ -1124,37 +1096,10 @@ namespace swrenderer { // two sided line if (mTopPart.Texture != NULL && mTopPart.Texture->UseType != FTexture::TEX_Null) { // top wall - for (x = x1; x < x2; ++x) + for (int x = x1; x < x2; ++x) { wallupper.ScreenY[x] = MAX(MIN(wallupper.ScreenY[x], floorclip[x]), walltop.ScreenY[x]); } - if (viewactive) - { - FTexture *rw_pic = mTopPart.Texture; - xscale = rw_pic->Scale.X * mTopPart.TextureScaleU; - yscale = rw_pic->Scale.Y * mTopPart.TextureScaleV; - if (xscale != lwallscale) - { - walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); - lwallscale = xscale; - } - fixed_t offset; - if (mTopPart.Texture->bWorldPanning) - { - offset = xs_RoundToInt(mTopPart.TextureOffsetU * xscale); - } - else - { - offset = mTopPart.TextureOffsetU; - } - if (xscale < 0) - { - offset = -offset; - } - - RenderWallPart renderWallpart(Thread); - renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, mTopPart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mBackCeilingZ1, mBackCeilingZ2), false, wallshade, offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); - } memcpy(ceilingclip + x1, wallupper.ScreenY + x1, (x2 - x1) * sizeof(short)); } else if (markceiling) @@ -1162,40 +1107,12 @@ namespace swrenderer memcpy(ceilingclip + x1, walltop.ScreenY + x1, (x2 - x1) * sizeof(short)); } - if (mBottomPart.Texture != NULL && mBottomPart.Texture->UseType != FTexture::TEX_Null) { // bottom wall - for (x = x1; x < x2; ++x) + for (int x = x1; x < x2; ++x) { walllower.ScreenY[x] = MIN(MAX(walllower.ScreenY[x], ceilingclip[x]), wallbottom.ScreenY[x]); } - if (viewactive) - { - FTexture *rw_pic = mBottomPart.Texture; - xscale = rw_pic->Scale.X * mBottomPart.TextureScaleU; - yscale = rw_pic->Scale.Y * mBottomPart.TextureScaleV; - if (xscale != lwallscale) - { - walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); - lwallscale = xscale; - } - fixed_t offset; - if (mBottomPart.Texture->bWorldPanning) - { - offset = xs_RoundToInt(mBottomPart.TextureOffsetU * xscale); - } - else - { - offset = mBottomPart.TextureOffsetU; - } - if (xscale < 0) - { - offset = -offset; - } - - RenderWallPart renderWallpart(Thread); - renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, mBottomPart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mBackFloorZ1, mBackFloorZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); - } memcpy(floorclip + x1, walllower.ScreenY + x1, (x2 - x1) * sizeof(short)); } else if (markfloor) @@ -1205,6 +1122,140 @@ namespace swrenderer } } + void SWRenderLine::RenderTopTexture(int x1, int x2) + { + if (mMiddlePart.Texture) return; + if (!mTopPart.Texture || mTopPart.Texture->UseType == FTexture::TEX_Null) return; + if (!viewactive) return; + + FTexture *rw_pic = mTopPart.Texture; + double xscale = rw_pic->Scale.X * mTopPart.TextureScaleU; + double yscale = rw_pic->Scale.Y * mTopPart.TextureScaleV; + if (xscale != lwallscale) + { + walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); + lwallscale = xscale; + } + fixed_t offset; + if (mTopPart.Texture->bWorldPanning) + { + offset = xs_RoundToInt(mTopPart.TextureOffsetU * xscale); + } + else + { + offset = mTopPart.TextureOffsetU; + } + if (xscale < 0) + { + offset = -offset; + } + + WallDrawerArgs drawerargs; + drawerargs.SetStyle(false, false, OPAQUE); + + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->FixedLightLevel() >= 0) + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + else if (cameraLight->FixedColormap() != nullptr) + drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); + + float rw_light = rw_lightleft + rw_lightstep * (x1 - WallC.sx1); + + FLightNode *light_list = (mLineSegment && mLineSegment->sidedef) ? mLineSegment->sidedef->lighthead : nullptr; + + RenderWallPart renderWallpart(Thread); + renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, mTopPart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mBackCeilingZ1, mBackCeilingZ2), false, wallshade, offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + } + + void SWRenderLine::RenderMiddleTexture(int x1, int x2) + { + if (!mMiddlePart.Texture || mMiddlePart.Texture->UseType == FTexture::TEX_Null) return; + if (!viewactive) return; + + FTexture *rw_pic = mMiddlePart.Texture; + double xscale = rw_pic->Scale.X * mMiddlePart.TextureScaleU; + double yscale = rw_pic->Scale.Y * mMiddlePart.TextureScaleV; + if (xscale != lwallscale) + { + walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); + lwallscale = xscale; + } + fixed_t offset; + if (mMiddlePart.Texture->bWorldPanning) + { + offset = xs_RoundToInt(mMiddlePart.TextureOffsetU * xscale); + } + else + { + offset = mMiddlePart.TextureOffsetU; + } + if (xscale < 0) + { + offset = -offset; + } + + WallDrawerArgs drawerargs; + drawerargs.SetStyle(false, false, OPAQUE); + + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->FixedLightLevel() >= 0) + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + else if (cameraLight->FixedColormap() != nullptr) + drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); + + float rw_light = rw_lightleft + rw_lightstep * (x1 - WallC.sx1); + + FLightNode *light_list = (mLineSegment && mLineSegment->sidedef) ? mLineSegment->sidedef->lighthead : nullptr; + + RenderWallPart renderWallpart(Thread); + renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, mMiddlePart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + } + + void SWRenderLine::RenderBottomTexture(int x1, int x2) + { + if (mMiddlePart.Texture) return; + if (!mBottomPart.Texture || mBottomPart.Texture->UseType == FTexture::TEX_Null) return; + if (!viewactive) return; + + FTexture *rw_pic = mBottomPart.Texture; + double xscale = rw_pic->Scale.X * mBottomPart.TextureScaleU; + double yscale = rw_pic->Scale.Y * mBottomPart.TextureScaleV; + if (xscale != lwallscale) + { + walltexcoords.ProjectPos(mLineSegment->sidedef->TexelLength*xscale, WallC.sx1, WallC.sx2, WallT); + lwallscale = xscale; + } + fixed_t offset; + if (mBottomPart.Texture->bWorldPanning) + { + offset = xs_RoundToInt(mBottomPart.TextureOffsetU * xscale); + } + else + { + offset = mBottomPart.TextureOffsetU; + } + if (xscale < 0) + { + offset = -offset; + } + + WallDrawerArgs drawerargs; + drawerargs.SetStyle(false, false, OPAQUE); + + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->FixedLightLevel() >= 0) + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + else if (cameraLight->FixedColormap() != nullptr) + drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); + + float rw_light = rw_lightleft + rw_lightstep * (x1 - WallC.sx1); + + FLightNode *light_list = (mLineSegment && mLineSegment->sidedef) ? mLineSegment->sidedef->lighthead : nullptr; + + RenderWallPart renderWallpart(Thread); + renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, mBottomPart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mBackFloorZ1, mBackFloorZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); + } + //////////////////////////////////////////////////////////////////////////// // Transform and clip coordinates. Returns true if it was clipped away diff --git a/src/swrenderer/line/r_line.h b/src/swrenderer/line/r_line.h index c57134efaa..b0e3c416d9 100644 --- a/src/swrenderer/line/r_line.h +++ b/src/swrenderer/line/r_line.h @@ -73,7 +73,13 @@ namespace swrenderer void SetMiddleTexture(); void SetBottomTexture(); void ClipSegmentTopBottom(int x1, int x2); - void RenderWallSegmentTextures(int x1, int x2); + void MarkCeilingPlane(int x1, int x2); + void MarkFloorPlane(int x1, int x2); + void Mark3DFloors(int x1, int x2); + void MarkOpaquePassClip(int x1, int x2); + void RenderTopTexture(int x1, int x2); + void RenderMiddleTexture(int x1, int x2); + void RenderBottomTexture(int x1, int x2); bool IsFogBoundary(sector_t *front, sector_t *back) const; bool SkyboxCompare(sector_t *frontsector, sector_t *backsector) const; From a2edca6032b1f3cf85609fba489ea3db82766cca Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Mon, 13 Feb 2017 07:01:44 -0500 Subject: [PATCH 846/912] - fix at least one of the GCC errors --- src/swrenderer/line/r_walldraw.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index a0e4641e43..1d2760e769 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -106,7 +106,7 @@ namespace swrenderer v = v - floor(v); double v_step = uv_stepd / texture->GetHeight(); - if (isnan(v) || isnan(v_step)) // this should never happen, but it apparently does.. + if (std::isnan(v) || std::isnan(v_step)) // this should never happen, but it apparently does.. { uv_stepd = 0.0; v = 0.0; From 69787fac727e7b94274eb73729e9d25107cb0d27 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Feb 2017 15:08:11 +0100 Subject: [PATCH 847/912] Fix translation issue --- src/swrenderer/viewport/r_spritedrawer.cpp | 14 ++++++++++++++ src/swrenderer/viewport/r_spritedrawer.h | 1 + src/v_draw.cpp | 19 +++---------------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/swrenderer/viewport/r_spritedrawer.cpp b/src/swrenderer/viewport/r_spritedrawer.cpp index cfcd3f2850..8d5fa07517 100644 --- a/src/swrenderer/viewport/r_spritedrawer.cpp +++ b/src/swrenderer/viewport/r_spritedrawer.cpp @@ -492,6 +492,20 @@ namespace swrenderer return SetStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap, shadedlightshade); } + bool SpriteDrawerArgs::SetStyle(FRenderStyle style, float alpha, lighttable_t *translation, uint32_t color) + { + SetTranslationMap(translation); + SetLight(nullptr, 0.0f, 0); + + FDynamicColormap *basecolormap = &identitycolormap; + if (!SetStyle(style, alpha, -1, color, basecolormap)) + return false; + + if (!Colormap()) + SetTranslationMap(identitymap); + return true; + } + void SpriteDrawerArgs::FillColumn(RenderThread *thread) { thread->Drawers()->FillColumn(*this); diff --git a/src/swrenderer/viewport/r_spritedrawer.h b/src/swrenderer/viewport/r_spritedrawer.h index fc5053e6e0..8f4b93115d 100644 --- a/src/swrenderer/viewport/r_spritedrawer.h +++ b/src/swrenderer/viewport/r_spritedrawer.h @@ -18,6 +18,7 @@ namespace swrenderer bool SetStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); bool SetStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); + bool SetStyle(FRenderStyle style, float alpha, lighttable_t *translation, uint32_t color); void SetDest(int x, int y); void SetCount(int count) { dc_count = count; } void SetSolidColor(int color) { dc_color = color; } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index a1af3ad2c0..6d86e0a0eb 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -192,7 +192,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) using namespace swrenderer; static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; - const BYTE *translation = NULL; + lighttable_t *translation = NULL; auto viewport = RenderViewport::Instance(); @@ -224,27 +224,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) else if (parms.remap != NULL) { if (viewport->RenderTarget->IsBgra()) - translation = (const BYTE*)parms.remap->Palette; + translation = (lighttable_t *)parms.remap->Palette; else translation = parms.remap->Remap; } SpriteDrawerArgs drawerargs; - if (translation != NULL) - { - drawerargs.SetTranslationMap((lighttable_t *)translation); - } - else - { - if (viewport->RenderTarget->IsBgra()) - drawerargs.SetTranslationMap(nullptr); - else - drawerargs.SetTranslationMap(identitymap); - } - - FDynamicColormap *basecolormap = nullptr; - bool visible = drawerargs.SetStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); + bool visible = drawerargs.SetStyle(parms.style, parms.Alpha, translation, parms.fillcolor); double x0 = parms.x - parms.left * parms.destwidth / parms.texwidth; double y0 = parms.y - parms.top * parms.destheight / parms.texheight; From 93f43e875115e35dac96322663825a2a1e53e398 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Feb 2017 15:42:35 +0100 Subject: [PATCH 848/912] Menu code accessed the screen buffer without a lock --- src/v_draw.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 6d86e0a0eb..6430b9448f 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -198,6 +198,8 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) viewport->RenderTarget = screen; + viewport->RenderTarget->Lock(true); + if (APART(parms.colorOverlay) != 0) { // The software renderer cannot invert the source without inverting the overlay @@ -324,6 +326,8 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) { NetUpdate(); } + + viewport->RenderTarget->Unlock(); #endif } From 10b36934c2187dadefffa53b92683e97047bace0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Feb 2017 16:13:24 +0100 Subject: [PATCH 849/912] Fix vid_hw2d 0 translation bug --- src/swrenderer/viewport/r_drawerargs.cpp | 1 - src/swrenderer/viewport/r_spritedrawer.cpp | 20 +----------------- src/swrenderer/viewport/r_spritedrawer.h | 1 - src/v_draw.cpp | 24 ++++++++++++++-------- 4 files changed, 17 insertions(+), 29 deletions(-) diff --git a/src/swrenderer/viewport/r_drawerargs.cpp b/src/swrenderer/viewport/r_drawerargs.cpp index 65477cd55f..60a0edf618 100644 --- a/src/swrenderer/viewport/r_drawerargs.cpp +++ b/src/swrenderer/viewport/r_drawerargs.cpp @@ -19,7 +19,6 @@ namespace swrenderer void DrawerArgs::SetLight(FSWColormap *base_colormap, float light, int shade) { mBaseColormap = base_colormap; - mTranslation = nullptr; mLight = light; mShade = shade; } diff --git a/src/swrenderer/viewport/r_spritedrawer.cpp b/src/swrenderer/viewport/r_spritedrawer.cpp index 8d5fa07517..118fd18b09 100644 --- a/src/swrenderer/viewport/r_spritedrawer.cpp +++ b/src/swrenderer/viewport/r_spritedrawer.cpp @@ -480,11 +480,7 @@ namespace swrenderer SetLight(&identitycolormap, 0, 0); } - if (!SpriteDrawerArgs::SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) - { - return false; - } - return true; + return SpriteDrawerArgs::SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags); } bool SpriteDrawerArgs::SetStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade) @@ -492,20 +488,6 @@ namespace swrenderer return SetStyle(style, FLOAT2FIXED(alpha), translation, color, basecolormap, shadedlightshade); } - bool SpriteDrawerArgs::SetStyle(FRenderStyle style, float alpha, lighttable_t *translation, uint32_t color) - { - SetTranslationMap(translation); - SetLight(nullptr, 0.0f, 0); - - FDynamicColormap *basecolormap = &identitycolormap; - if (!SetStyle(style, alpha, -1, color, basecolormap)) - return false; - - if (!Colormap()) - SetTranslationMap(identitymap); - return true; - } - void SpriteDrawerArgs::FillColumn(RenderThread *thread) { thread->Drawers()->FillColumn(*this); diff --git a/src/swrenderer/viewport/r_spritedrawer.h b/src/swrenderer/viewport/r_spritedrawer.h index 8f4b93115d..fc5053e6e0 100644 --- a/src/swrenderer/viewport/r_spritedrawer.h +++ b/src/swrenderer/viewport/r_spritedrawer.h @@ -18,7 +18,6 @@ namespace swrenderer bool SetStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); bool SetStyle(FRenderStyle style, float alpha, int translation, uint32_t color, FDynamicColormap *&basecolormap, fixed_t shadedlightshade = 0); - bool SetStyle(FRenderStyle style, float alpha, lighttable_t *translation, uint32_t color); void SetDest(int x, int y); void SetCount(int count) { dc_count = count; } void SetSolidColor(int color) { dc_color = color; } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 6430b9448f..ad9d1aa5bb 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -192,14 +192,15 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) using namespace swrenderer; static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; - lighttable_t *translation = NULL; auto viewport = RenderViewport::Instance(); - viewport->RenderTarget = screen; - viewport->RenderTarget->Lock(true); + lighttable_t *translation = nullptr; + FDynamicColormap *basecolormap = &identitycolormap; + int shade = 0; + if (APART(parms.colorOverlay) != 0) { // The software renderer cannot invert the source without inverting the overlay @@ -216,10 +217,15 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) parms.colorOverlay = PalEntry(parms.colorOverlay).InverseColor(); } // Note that this overrides the translation in software, but not in hardware. - if (!viewport->RenderTarget->IsBgra()) + FDynamicColormap *colormap = GetSpecialLights(MAKERGB(255, 255, 255), parms.colorOverlay & MAKEARGB(0, 255, 255, 255), 0); + + if (viewport->RenderTarget->IsBgra()) + { + basecolormap = colormap; + shade = (APART(parms.colorOverlay)*NUMCOLORMAPS / 255) << FRACBITS; + } + else { - FDynamicColormap *colormap = GetSpecialLights(MAKERGB(255, 255, 255), - parms.colorOverlay & MAKEARGB(0, 255, 255, 255), 0); translation = &colormap->Maps[(APART(parms.colorOverlay)*NUMCOLORMAPS / 255) * 256]; } } @@ -233,8 +239,10 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) SpriteDrawerArgs drawerargs; - bool visible = drawerargs.SetStyle(parms.style, parms.Alpha, translation, parms.fillcolor); - + drawerargs.SetTranslationMap(translation); + drawerargs.SetLight(basecolormap, 0.0f, shade); + bool visible = drawerargs.SetStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); + double x0 = parms.x - parms.left * parms.destwidth / parms.texwidth; double y0 = parms.y - parms.top * parms.destheight / parms.texheight; From 9159e3b1f0f3cd5ceac2bcfe7e6568a5e7605dcd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Feb 2017 17:32:52 +0100 Subject: [PATCH 850/912] Move software canvas drawing to its own file in the software renderer --- src/CMakeLists.txt | 1 + src/swrenderer/r_swcanvas.cpp | 858 ++++++++++++++++++++++++++++++++++ src/swrenderer/r_swcanvas.h | 22 + src/v_draw.cpp | 700 ++------------------------- src/v_video.cpp | 130 ------ src/v_video.h | 2 - 6 files changed, 913 insertions(+), 800 deletions(-) create mode 100644 src/swrenderer/r_swcanvas.cpp create mode 100644 src/swrenderer/r_swcanvas.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3768d55dd9..c671c0f3f0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -844,6 +844,7 @@ set( NOT_COMPILED_SOURCE_FILES ) set( FASTMATH_PCH_SOURCES + swrenderer/r_swcanvas.cpp swrenderer/r_swrenderer.cpp swrenderer/r_memory.cpp swrenderer/r_renderthread.cpp diff --git a/src/swrenderer/r_swcanvas.cpp b/src/swrenderer/r_swcanvas.cpp new file mode 100644 index 0000000000..ce089272bd --- /dev/null +++ b/src/swrenderer/r_swcanvas.cpp @@ -0,0 +1,858 @@ +/* +** r_swcanvas.cpp +** Draw to a canvas using software rendering +** +**--------------------------------------------------------------------------- +** Copyright 1998-2008 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +*/ + +#include "swrenderer/viewport/r_viewport.h" +#include "swrenderer/scene/r_light.h" +#include "swrenderer/r_renderthread.h" +#include "v_palette.h" +#include "v_video.h" +#include "m_png.h" +#include "colormatcher.h" +#include "r_swcanvas.h" +#include "textures/textures.h" +#include "r_data/voxels.h" +#include "drawers/r_draw_rgba.h" +#include "drawers/r_drawers.h" + +EXTERN_CVAR(Bool, r_blendmethod) + +using namespace swrenderer; + +void SWCanvas::DrawTexture(DCanvas *canvas, FTexture *img, DrawParms &parms) +{ + static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; + + auto viewport = RenderViewport::Instance(); + viewport->RenderTarget = canvas; + viewport->RenderTarget->Lock(true); + + lighttable_t *translation = nullptr; + FDynamicColormap *basecolormap = &identitycolormap; + int shade = 0; + + if (APART(parms.colorOverlay) != 0) + { + // The software renderer cannot invert the source without inverting the overlay + // too. That means if the source is inverted, we need to do the reverse of what + // the invert overlay flag says to do. + INTBOOL invertoverlay = (parms.style.Flags & STYLEF_InvertOverlay); + + if (parms.style.Flags & STYLEF_InvertSource) + { + invertoverlay = !invertoverlay; + } + if (invertoverlay) + { + parms.colorOverlay = PalEntry(parms.colorOverlay).InverseColor(); + } + // Note that this overrides the translation in software, but not in hardware. + FDynamicColormap *colormap = GetSpecialLights(MAKERGB(255, 255, 255), parms.colorOverlay & MAKEARGB(0, 255, 255, 255), 0); + + if (viewport->RenderTarget->IsBgra()) + { + basecolormap = colormap; + shade = (APART(parms.colorOverlay)*NUMCOLORMAPS / 255) << FRACBITS; + } + else + { + translation = &colormap->Maps[(APART(parms.colorOverlay)*NUMCOLORMAPS / 255) * 256]; + } + } + else if (parms.remap != NULL) + { + if (viewport->RenderTarget->IsBgra()) + translation = (lighttable_t *)parms.remap->Palette; + else + translation = parms.remap->Remap; + } + + SpriteDrawerArgs drawerargs; + + drawerargs.SetTranslationMap(translation); + drawerargs.SetLight(basecolormap, 0.0f, shade); + bool visible = drawerargs.SetStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); + + double x0 = parms.x - parms.left * parms.destwidth / parms.texwidth; + double y0 = parms.y - parms.top * parms.destheight / parms.texheight; + + if (visible) + { + double centeryback = viewport->CenterY; + viewport->CenterY = 0; + + // There is not enough precision in the drawing routines to keep the full + // precision for y0. :( + double sprtopscreen; + modf(y0, &sprtopscreen); + + double yscale = parms.destheight / img->GetHeight(); + double iyscale = 1 / yscale; + + double spryscale = yscale; + assert(spryscale > 0); + + bool sprflipvert = false; + fixed_t iscale = FLOAT2FIXED(1 / spryscale); + //dc_texturemid = (CenterY - 1 - sprtopscreen) * iscale / 65536; + fixed_t frac = 0; + double xiscale = img->GetWidth() / parms.destwidth; + double x2 = x0 + parms.destwidth; + + short *mfloorclip; + short *mceilingclip; + + if (bottomclipper[0] != parms.dclip) + { + fillshort(bottomclipper, screen->GetWidth(), (short)parms.dclip); + } + if (parms.uclip != 0) + { + if (topclipper[0] != parms.uclip) + { + fillshort(topclipper, screen->GetWidth(), (short)parms.uclip); + } + mceilingclip = topclipper; + } + else + { + mceilingclip = zeroarray; + } + mfloorclip = bottomclipper; + + if (parms.flipX) + { + frac = (img->GetWidth() << FRACBITS) - 1; + xiscale = -xiscale; + } + + if (parms.windowleft > 0 || parms.windowright < parms.texwidth) + { + double wi = MIN(parms.windowright, parms.texwidth); + double xscale = parms.destwidth / parms.texwidth; + x0 += parms.windowleft * xscale; + frac += FLOAT2FIXED(parms.windowleft); + x2 -= (parms.texwidth - wi) * xscale; + } + if (x0 < parms.lclip) + { + frac += FLOAT2FIXED((parms.lclip - x0) * xiscale); + x0 = parms.lclip; + } + if (x2 > parms.rclip) + { + x2 = parms.rclip; + } + + int x = int(x0); + int x2_i = int(x2); + fixed_t xiscale_i = FLOAT2FIXED(xiscale); + + static RenderThread thread(nullptr); + thread.DrawQueue->ThreadedRender = false; + while (x < x2_i) + { + drawerargs.DrawMaskedColumn(&thread, x, iscale, img, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, !parms.masked); + x++; + frac += xiscale_i; + } + + viewport->CenterY = centeryback; + } + + viewport->RenderTarget->Unlock(); + viewport->RenderTarget = canvas; +} + +void SWCanvas::FillSimplePoly(DCanvas *canvas, FTexture *tex, FVector2 *points, int npoints, + double originx, double originy, double scalex, double scaley, DAngle rotation, + FDynamicColormap *colormap, PalEntry flatcolor, int lightlevel, int bottomclip) +{ + // Use an equation similar to player sprites to determine shade + fixed_t shade = LIGHT2SHADE(lightlevel) - 12 * FRACUNIT; + float topy, boty, leftx, rightx; + int toppt, botpt, pt1, pt2; + int i; + int y1, y2, y; + fixed_t x; + bool dorotate = rotation != 0.; + double cosrot, sinrot; + + if (--npoints < 2) + { // not a polygon or we're not locked + return; + } + + if (bottomclip <= 0) + { + bottomclip = canvas->GetHeight(); + } + + // Find the extents of the polygon, in particular the highest and lowest points. + for (botpt = toppt = 0, boty = topy = points[0].Y, leftx = rightx = points[0].X, i = 1; i <= npoints; ++i) + { + if (points[i].Y < topy) + { + topy = points[i].Y; + toppt = i; + } + if (points[i].Y > boty) + { + boty = points[i].Y; + botpt = i; + } + if (points[i].X < leftx) + { + leftx = points[i].X; + } + if (points[i].X > rightx) + { + rightx = points[i].X; + } + } + if (topy >= bottomclip || // off the bottom of the screen + boty <= 0 || // off the top of the screen + leftx >= canvas->GetWidth() || // off the right of the screen + rightx <= 0) // off the left of the screen + { + return; + } + + auto viewport = RenderViewport::Instance(); + viewport->RenderTarget = canvas; + + viewport->RenderTarget->Lock(true); + + scalex /= tex->Scale.X; + scaley /= tex->Scale.Y; + + // Use the CRT's functions here. + cosrot = cos(rotation.Radians()); + sinrot = sin(rotation.Radians()); + + // Setup constant texture mapping parameters. + SpanDrawerArgs drawerargs; + drawerargs.SetTexture(tex); + if (colormap) + drawerargs.SetLight(colormap, 0, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); + else + drawerargs.SetLight(&identitycolormap, 0, 0); + if (drawerargs.TextureWidthBits() != 0) + { + scalex = double(1u << (32 - drawerargs.TextureWidthBits())) / scalex; + drawerargs.SetTextureUStep(xs_RoundToInt(cosrot * scalex)); + } + else + { // Texture is one pixel wide. + scalex = 0; + drawerargs.SetTextureUStep(0); + } + if (drawerargs.TextureHeightBits() != 0) + { + scaley = double(1u << (32 - drawerargs.TextureHeightBits())) / scaley; + drawerargs.SetTextureVStep(xs_RoundToInt(sinrot * scaley)); + } + else + { // Texture is one pixel tall. + scaley = 0; + drawerargs.SetTextureVStep(0); + } + + int width = canvas->GetWidth(); + + // Travel down the right edge and create an outline of that edge. + static short spanend[MAXHEIGHT]; + pt1 = toppt; + pt2 = toppt + 1; if (pt2 > npoints) pt2 = 0; + y1 = xs_RoundToInt(points[pt1].Y + 0.5f); + do + { + x = FLOAT2FIXED(points[pt1].X + 0.5f); + y2 = xs_RoundToInt(points[pt2].Y + 0.5f); + if (y1 >= y2 || (y1 < 0 && y2 < 0) || (y1 >= bottomclip && y2 >= bottomclip)) + { + } + else + { + fixed_t xinc = FLOAT2FIXED((points[pt2].X - points[pt1].X) / (points[pt2].Y - points[pt1].Y)); + int y3 = MIN(y2, bottomclip); + if (y1 < 0) + { + x += xinc * -y1; + y1 = 0; + } + for (y = y1; y < y3; ++y) + { + spanend[y] = clamp(x >> FRACBITS, -1, width); + x += xinc; + } + } + y1 = y2; + pt1 = pt2; + pt2++; if (pt2 > npoints) pt2 = 0; + } while (pt1 != botpt); + + static RenderThread thread(nullptr); + thread.DrawQueue->ThreadedRender = false; + + // Travel down the left edge and fill it in. + pt1 = toppt; + pt2 = toppt - 1; if (pt2 < 0) pt2 = npoints; + y1 = xs_RoundToInt(points[pt1].Y + 0.5f); + do + { + x = FLOAT2FIXED(points[pt1].X + 0.5f); + y2 = xs_RoundToInt(points[pt2].Y + 0.5f); + if (y1 >= y2 || (y1 < 0 && y2 < 0) || (y1 >= bottomclip && y2 >= bottomclip)) + { + } + else + { + fixed_t xinc = FLOAT2FIXED((points[pt2].X - points[pt1].X) / (points[pt2].Y - points[pt1].Y)); + int y3 = MIN(y2, bottomclip); + if (y1 < 0) + { + x += xinc * -y1; + y1 = 0; + } + for (y = y1; y < y3; ++y) + { + int x1 = x >> FRACBITS; + int x2 = spanend[y]; + if (x2 > x1 && x2 > 0 && x1 < width) + { + x1 = MAX(x1, 0); + x2 = MIN(x2, width); +#if 0 + memset(this->Buffer + y * this->Pitch + x1, (int)tex, x2 - x1); +#else + drawerargs.SetDestY(y); + drawerargs.SetDestX1(x1); + drawerargs.SetDestX2(x2 - 1); + + DVector2 tex(x1 - originx, y - originy); + if (dorotate) + { + double t = tex.X; + tex.X = t * cosrot - tex.Y * sinrot; + tex.Y = tex.Y * cosrot + t * sinrot; + } + drawerargs.SetTextureUPos(xs_RoundToInt(tex.X * scalex)); + drawerargs.SetTextureVPos(xs_RoundToInt(tex.Y * scaley)); + + drawerargs.DrawSpan(&thread); +#endif + } + x += xinc; + } + } + y1 = y2; + pt1 = pt2; + pt2--; if (pt2 < 0) pt2 = npoints; + } while (pt1 != botpt); + + viewport->RenderTarget->Unlock(); + viewport->RenderTarget = screen; +} + +void SWCanvas::DrawLine(DCanvas *canvas, int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) +{ + const int WeightingScale = 0; + const int WEIGHTBITS = 6; + const int WEIGHTSHIFT = 16 - WEIGHTBITS; + const int NUMWEIGHTS = (1 << WEIGHTBITS); + const int WEIGHTMASK = (NUMWEIGHTS - 1); + + if (palColor < 0) + { + palColor = PalFromRGB(realcolor); + } + + canvas->Lock(); + int deltaX, deltaY, xDir; + + if (y0 > y1) + { + int temp = y0; y0 = y1; y1 = temp; + temp = x0; x0 = x1; x1 = temp; + } + + PUTTRANSDOT(canvas, x0, y0, palColor, 0); + + if ((deltaX = x1 - x0) >= 0) + { + xDir = 1; + } + else + { + xDir = -1; + deltaX = -deltaX; + } + + if ((deltaY = y1 - y0) == 0) + { // horizontal line + if (x0 > x1) + { + swapvalues(x0, x1); + } + if (canvas->IsBgra()) + { + uint32_t fillColor = GPalette.BaseColors[palColor].d; + uint32_t *spot = (uint32_t*)canvas->GetBuffer() + y0*canvas->GetPitch() + x0; + for (int i = 0; i <= deltaX; i++) + spot[i] = fillColor; + } + else + { + memset(canvas->GetBuffer() + y0*canvas->GetPitch() + x0, palColor, deltaX + 1); + } + } + else if (deltaX == 0) + { // vertical line + if (canvas->IsBgra()) + { + uint32_t fillColor = GPalette.BaseColors[palColor].d; + uint32_t *spot = (uint32_t*)canvas->GetBuffer() + y0*canvas->GetPitch() + x0; + int pitch = canvas->GetPitch(); + do + { + *spot = fillColor; + spot += pitch; + } while (--deltaY != 0); + } + else + { + BYTE *spot = canvas->GetBuffer() + y0*canvas->GetPitch() + x0; + int pitch = canvas->GetPitch(); + do + { + *spot = palColor; + spot += pitch; + } while (--deltaY != 0); + } + } + else if (deltaX == deltaY) + { // diagonal line. + if (canvas->IsBgra()) + { + uint32_t fillColor = GPalette.BaseColors[palColor].d; + uint32_t *spot = (uint32_t*)canvas->GetBuffer() + y0*canvas->GetPitch() + x0; + int advance = canvas->GetPitch() + xDir; + do + { + *spot = fillColor; + spot += advance; + } while (--deltaY != 0); + } + else + { + BYTE *spot = canvas->GetBuffer() + y0*canvas->GetPitch() + x0; + int advance = canvas->GetPitch() + xDir; + do + { + *spot = palColor; + spot += advance; + } while (--deltaY != 0); + } + } + else + { + // line is not horizontal, diagonal, or vertical + fixed_t errorAcc = 0; + + if (deltaY > deltaX) + { // y-major line + fixed_t errorAdj = (((unsigned)deltaX << 16) / (unsigned)deltaY) & 0xffff; + if (xDir < 0) + { + if (WeightingScale == 0) + { + while (--deltaY) + { + errorAcc += errorAdj; + y0++; + int weighting = (errorAcc >> WEIGHTSHIFT) & WEIGHTMASK; + PUTTRANSDOT(canvas, x0 - (errorAcc >> 16), y0, palColor, weighting); + PUTTRANSDOT(canvas, x0 - (errorAcc >> 16) - 1, y0, + palColor, WEIGHTMASK - weighting); + } + } + else + { + while (--deltaY) + { + errorAcc += errorAdj; + y0++; + int weighting = ((errorAcc * WeightingScale) >> (WEIGHTSHIFT + 8)) & WEIGHTMASK; + PUTTRANSDOT(canvas, x0 - (errorAcc >> 16), y0, palColor, weighting); + PUTTRANSDOT(canvas, x0 - (errorAcc >> 16) - 1, y0, + palColor, WEIGHTMASK - weighting); + } + } + } + else + { + if (WeightingScale == 0) + { + while (--deltaY) + { + errorAcc += errorAdj; + y0++; + int weighting = (errorAcc >> WEIGHTSHIFT) & WEIGHTMASK; + PUTTRANSDOT(canvas, x0 + (errorAcc >> 16), y0, palColor, weighting); + PUTTRANSDOT(canvas, x0 + (errorAcc >> 16) + xDir, y0, + palColor, WEIGHTMASK - weighting); + } + } + else + { + while (--deltaY) + { + errorAcc += errorAdj; + y0++; + int weighting = ((errorAcc * WeightingScale) >> (WEIGHTSHIFT + 8)) & WEIGHTMASK; + PUTTRANSDOT(canvas, x0 + (errorAcc >> 16), y0, palColor, weighting); + PUTTRANSDOT(canvas, x0 + (errorAcc >> 16) + xDir, y0, + palColor, WEIGHTMASK - weighting); + } + } + } + } + else + { // x-major line + fixed_t errorAdj = (((DWORD)deltaY << 16) / (DWORD)deltaX) & 0xffff; + + if (WeightingScale == 0) + { + while (--deltaX) + { + errorAcc += errorAdj; + x0 += xDir; + int weighting = (errorAcc >> WEIGHTSHIFT) & WEIGHTMASK; + PUTTRANSDOT(canvas, x0, y0 + (errorAcc >> 16), palColor, weighting); + PUTTRANSDOT(canvas, x0, y0 + (errorAcc >> 16) + 1, + palColor, WEIGHTMASK - weighting); + } + } + else + { + while (--deltaX) + { + errorAcc += errorAdj; + x0 += xDir; + int weighting = ((errorAcc * WeightingScale) >> (WEIGHTSHIFT + 8)) & WEIGHTMASK; + PUTTRANSDOT(canvas, x0, y0 + (errorAcc >> 16), palColor, weighting); + PUTTRANSDOT(canvas, x0, y0 + (errorAcc >> 16) + 1, + palColor, WEIGHTMASK - weighting); + } + } + } + PUTTRANSDOT(canvas, x1, y1, palColor, 0); + } + canvas->Unlock(); +} + +void SWCanvas::DrawPixel(DCanvas *canvas, int x, int y, int palColor, uint32 realcolor) +{ + if (palColor < 0) + { + palColor = PalFromRGB(realcolor); + } + + canvas->GetBuffer()[canvas->GetPitch() * y + x] = (BYTE)palColor; +} + +void SWCanvas::PUTTRANSDOT(DCanvas *canvas, int xx, int yy, int basecolor, int level) +{ + static int oldyy; + static int oldyyshifted; + + if (yy == oldyy + 1) + { + oldyy++; + oldyyshifted += canvas->GetPitch(); + } + else if (yy == oldyy - 1) + { + oldyy--; + oldyyshifted -= canvas->GetPitch(); + } + else if (yy != oldyy) + { + oldyy = yy; + oldyyshifted = yy * canvas->GetPitch(); + } + + if (canvas->IsBgra()) + { + uint32_t *spot = (uint32_t*)canvas->GetBuffer() + oldyyshifted + xx; + + uint32_t fg = swrenderer::LightBgra::shade_pal_index_simple(basecolor, swrenderer::LightBgra::calc_light_multiplier(0)); + uint32_t fg_red = (((fg >> 16) & 0xff) * (63 - level)) >> 6; + uint32_t fg_green = (((fg >> 8) & 0xff) * (63 - level)) >> 6; + uint32_t fg_blue = ((fg & 0xff) * (63 - level)) >> 6; + + uint32_t bg_red = (((*spot >> 16) & 0xff) * level) >> 6; + uint32_t bg_green = (((*spot >> 8) & 0xff) * level) >> 6; + uint32_t bg_blue = (((*spot) & 0xff) * level) >> 6; + + uint32_t red = fg_red + bg_red; + uint32_t green = fg_green + bg_green; + uint32_t blue = fg_blue + bg_blue; + + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; + } + else if (!r_blendmethod) + { + BYTE *spot = canvas->GetBuffer() + oldyyshifted + xx; + DWORD *bg2rgb = Col2RGB8[1 + level]; + DWORD *fg2rgb = Col2RGB8[63 - level]; + DWORD fg = fg2rgb[basecolor]; + DWORD bg = bg2rgb[*spot]; + bg = (fg + bg) | 0x1f07c1f; + *spot = RGB32k.All[bg&(bg >> 15)]; + } + else + { + BYTE *spot = canvas->GetBuffer() + oldyyshifted + xx; + + uint32_t r = (GPalette.BaseColors[*spot].r * (64 - level) + GPalette.BaseColors[basecolor].r * level) / 64; + uint32_t g = (GPalette.BaseColors[*spot].g * (64 - level) + GPalette.BaseColors[basecolor].g * level) / 64; + uint32_t b = (GPalette.BaseColors[*spot].b * (64 - level) + GPalette.BaseColors[basecolor].b * level) / 64; + + *spot = (BYTE)RGB256k.RGB[r][g][b]; + } +} + +void SWCanvas::Clear(DCanvas *canvas, int left, int top, int right, int bottom, int palcolor, uint32 color) +{ + int x, y; + + if (left == right || top == bottom) + { + return; + } + + assert(left < right); + assert(top < bottom); + + int Width = canvas->GetWidth(); + int Height = canvas->GetHeight(); + int Pitch = canvas->GetPitch(); + + if (left >= Width || right <= 0 || top >= Height || bottom <= 0) + { + return; + } + left = MAX(0, left); + right = MIN(Width, right); + top = MAX(0, top); + bottom = MIN(Height, bottom); + + if (palcolor < 0) + { + palcolor = PalFromRGB(color); + } + + if (canvas->IsBgra()) + { + uint32_t fill_color = GPalette.BaseColors[palcolor]; + + uint32_t *dest = (uint32_t*)canvas->GetBuffer() + top * Pitch + left; + x = right - left; + for (y = top; y < bottom; y++) + { + for (int i = 0; i < x; i++) + dest[i] = fill_color; + dest += Pitch; + } + } + else + { + BYTE *dest = canvas->GetBuffer() + top * Pitch + left; + x = right - left; + for (y = top; y < bottom; y++) + { + memset(dest, palcolor, x); + dest += Pitch; + } + } +} + +void SWCanvas::Dim(DCanvas *canvas, PalEntry color, float damount, int x1, int y1, int w, int h) +{ + if (damount == 0.f) + return; + + int Width = canvas->GetWidth(); + int Height = canvas->GetHeight(); + int Pitch = canvas->GetPitch(); + + if (x1 >= Width || y1 >= Height) + { + return; + } + if (x1 + w > Width) + { + w = Width - x1; + } + if (y1 + h > Height) + { + h = Height - y1; + } + if (w <= 0 || h <= 0) + { + return; + } + + if (canvas->IsBgra()) + { + uint32_t *spot = (uint32_t*)canvas->GetBuffer() + x1 + y1*Pitch; + int gap = Pitch - w; + + uint32_t fg = color.d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f); + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + for (int y = h; y != 0; y--) + { + for (int x = w; x != 0; x--) + { + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; + + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; + spot++; + } + spot += gap; + } + } + else + { + DWORD *bg2rgb; + DWORD fg; + + BYTE *spot = canvas->GetBuffer() + x1 + y1*Pitch; + int gap = Pitch - w; + + int alpha = (int)((float)64 * damount); + int ialpha = 64 - alpha; + int dimmedcolor_r = color.r * alpha; + int dimmedcolor_g = color.g * alpha; + int dimmedcolor_b = color.b * alpha; + + if (!r_blendmethod) + { + { + int amount; + + amount = (int)(damount * 64); + bg2rgb = Col2RGB8[64 - amount]; + + fg = (((color.r * amount) >> 4) << 20) | + ((color.g * amount) >> 4) | + (((color.b * amount) >> 4) << 10); + } + + for (int y = h; y != 0; y--) + { + for (int x = w; x != 0; x--) + { + DWORD bg; + + bg = bg2rgb[(*spot) & 0xff]; + bg = (fg + bg) | 0x1f07c1f; + *spot = RGB32k.All[bg&(bg >> 15)]; + spot++; + } + spot += gap; + } + } + else + { + for (int y = h; y != 0; y--) + { + for (int x = w; x != 0; x--) + { + uint32_t r = (dimmedcolor_r + GPalette.BaseColors[*spot].r * ialpha) >> 8; + uint32_t g = (dimmedcolor_g + GPalette.BaseColors[*spot].g * ialpha) >> 8; + uint32_t b = (dimmedcolor_b + GPalette.BaseColors[*spot].b * ialpha) >> 8; + *spot = (BYTE)RGB256k.RGB[r][g][b]; + spot++; + } + spot += gap; + } + } + } +} + +int SWCanvas::PalFromRGB(uint32 rgb) +{ + // For routines that take RGB colors, cache the previous lookup in case there + // are several repetitions with the same color. + static int LastPal = -1; + static uint32 LastRGB; + + if (LastPal >= 0 && LastRGB == rgb) + { + return LastPal; + } + // Quick check for black and white. + if (rgb == MAKEARGB(255, 0, 0, 0)) + { + LastPal = GPalette.BlackIndex; + } + else if (rgb == MAKEARGB(255, 255, 255, 255)) + { + LastPal = GPalette.WhiteIndex; + } + else + { + LastPal = ColorMatcher.Pick(RPART(rgb), GPART(rgb), BPART(rgb)); + } + LastRGB = rgb; + return LastPal; +} diff --git a/src/swrenderer/r_swcanvas.h b/src/swrenderer/r_swcanvas.h new file mode 100644 index 0000000000..a3f80113b9 --- /dev/null +++ b/src/swrenderer/r_swcanvas.h @@ -0,0 +1,22 @@ + +#pragma once + +#include "v_video.h" +#include "r_data/colormaps.h" + +class SWCanvas +{ +public: + static void DrawTexture(DCanvas *canvas, FTexture *img, DrawParms &parms); + static void FillSimplePoly(DCanvas *canvas, FTexture *tex, FVector2 *points, int npoints, + double originx, double originy, double scalex, double scaley, DAngle rotation, + FDynamicColormap *colormap, PalEntry flatcolor, int lightlevel, int bottomclip); + static void DrawLine(DCanvas *canvas, int x0, int y0, int x1, int y1, int palColor, uint32 realcolor); + static void DrawPixel(DCanvas *canvas, int x, int y, int palColor, uint32 realcolor); + static void Clear(DCanvas *canvas, int left, int top, int right, int bottom, int palcolor, uint32 color); + static void Dim(DCanvas *canvas, PalEntry color, float damount, int x1, int y1, int w, int h); + +private: + static void PUTTRANSDOT(DCanvas *canvas, int xx, int yy, int basecolor, int level); + static int PalFromRGB(uint32 rgb); +}; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index ad9d1aa5bb..a58638e8b6 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -32,7 +32,7 @@ ** */ -// #define NO_SWRENDER // set this if you want to exclude the software renderer. Without software renderer the base implementations of DrawTextureV and FillSimplePoly need to be disabled because they depend on it. +// #define NO_SWRENDER // set this if you want to exclude the software renderer. Without the software renderer software canvas drawing does nothing. #include #include @@ -42,12 +42,9 @@ #include "m_swap.h" #include "r_defs.h" #include "r_utility.h" +#include "r_renderer.h" #ifndef NO_SWRENDER -#include "swrenderer/drawers/r_draw.h" -#include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/scene/r_light.h" -#include "swrenderer/viewport/r_viewport.h" -#include "swrenderer/r_renderthread.h" +#include "swrenderer/r_swcanvas.h" #endif #include "r_data/r_translate.h" #include "doomstat.h" @@ -72,7 +69,6 @@ CUSTOM_CVAR(Int, uiscale, 2, CVAR_ARCHIVE | CVAR_NOINITCALL) StatusBar->ScreenSizeChanged(); } } -EXTERN_CVAR(Bool, r_blendmethod) // [RH] Stretch values to make a 320x200 image best fit the screen // without using fractional steppings @@ -86,11 +82,6 @@ int CleanXfac_1, CleanYfac_1, CleanWidth_1, CleanHeight_1; CVAR (Bool, hud_scale, true, CVAR_ARCHIVE); -// For routines that take RGB colors, cache the previous lookup in case there -// are several repetitions with the same color. -static int LastPal = -1; -static uint32 LastRGB; - DEFINE_ACTION_FUNCTION(_Screen, GetWidth) { PARAM_PROLOGUE; @@ -112,30 +103,6 @@ DEFINE_ACTION_FUNCTION(_Screen, PaletteColor) ACTION_RETURN_INT(index); } -static int PalFromRGB(uint32 rgb) -{ - if (LastPal >= 0 && LastRGB == rgb) - { - return LastPal; - } - // Quick check for black and white. - if (rgb == MAKEARGB(255,0,0,0)) - { - LastPal = GPalette.BlackIndex; - } - else if (rgb == MAKEARGB(255,255,255,255)) - { - LastPal = GPalette.WhiteIndex; - } - else - { - LastPal = ColorMatcher.Pick(RPART(rgb), GPART(rgb), BPART(rgb)); - } - LastRGB = rgb; - return LastPal; -} - - void DCanvas::DrawTexture (FTexture *img, double x, double y, int tags_first, ...) { Va_List tags; @@ -189,154 +156,13 @@ DEFINE_ACTION_FUNCTION(_Screen, DrawHUDTexture) void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) { #ifndef NO_SWRENDER - using namespace swrenderer; - - static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; - - auto viewport = RenderViewport::Instance(); - viewport->RenderTarget = screen; - viewport->RenderTarget->Lock(true); - - lighttable_t *translation = nullptr; - FDynamicColormap *basecolormap = &identitycolormap; - int shade = 0; - - if (APART(parms.colorOverlay) != 0) - { - // The software renderer cannot invert the source without inverting the overlay - // too. That means if the source is inverted, we need to do the reverse of what - // the invert overlay flag says to do. - INTBOOL invertoverlay = (parms.style.Flags & STYLEF_InvertOverlay); - - if (parms.style.Flags & STYLEF_InvertSource) - { - invertoverlay = !invertoverlay; - } - if (invertoverlay) - { - parms.colorOverlay = PalEntry(parms.colorOverlay).InverseColor(); - } - // Note that this overrides the translation in software, but not in hardware. - FDynamicColormap *colormap = GetSpecialLights(MAKERGB(255, 255, 255), parms.colorOverlay & MAKEARGB(0, 255, 255, 255), 0); - - if (viewport->RenderTarget->IsBgra()) - { - basecolormap = colormap; - shade = (APART(parms.colorOverlay)*NUMCOLORMAPS / 255) << FRACBITS; - } - else - { - translation = &colormap->Maps[(APART(parms.colorOverlay)*NUMCOLORMAPS / 255) * 256]; - } - } - else if (parms.remap != NULL) - { - if (viewport->RenderTarget->IsBgra()) - translation = (lighttable_t *)parms.remap->Palette; - else - translation = parms.remap->Remap; - } - - SpriteDrawerArgs drawerargs; - - drawerargs.SetTranslationMap(translation); - drawerargs.SetLight(basecolormap, 0.0f, shade); - bool visible = drawerargs.SetStyle(parms.style, parms.Alpha, -1, parms.fillcolor, basecolormap); - - double x0 = parms.x - parms.left * parms.destwidth / parms.texwidth; - double y0 = parms.y - parms.top * parms.destheight / parms.texheight; - - if (visible) - { - double centeryback = viewport->CenterY; - viewport->CenterY = 0; - - // There is not enough precision in the drawing routines to keep the full - // precision for y0. :( - double sprtopscreen; - modf(y0, &sprtopscreen); - - double yscale = parms.destheight / img->GetHeight(); - double iyscale = 1 / yscale; - - double spryscale = yscale; - assert(spryscale > 0); - - bool sprflipvert = false; - fixed_t iscale = FLOAT2FIXED(1 / spryscale); - //dc_texturemid = (CenterY - 1 - sprtopscreen) * iscale / 65536; - fixed_t frac = 0; - double xiscale = img->GetWidth() / parms.destwidth; - double x2 = x0 + parms.destwidth; - - short *mfloorclip; - short *mceilingclip; - - if (bottomclipper[0] != parms.dclip) - { - fillshort(bottomclipper, screen->GetWidth(), (short)parms.dclip); - } - if (parms.uclip != 0) - { - if (topclipper[0] != parms.uclip) - { - fillshort(topclipper, screen->GetWidth(), (short)parms.uclip); - } - mceilingclip = topclipper; - } - else - { - mceilingclip = zeroarray; - } - mfloorclip = bottomclipper; - - if (parms.flipX) - { - frac = (img->GetWidth() << FRACBITS) - 1; - xiscale = -xiscale; - } - - if (parms.windowleft > 0 || parms.windowright < parms.texwidth) - { - double wi = MIN(parms.windowright, parms.texwidth); - double xscale = parms.destwidth / parms.texwidth; - x0 += parms.windowleft * xscale; - frac += FLOAT2FIXED(parms.windowleft); - x2 -= (parms.texwidth - wi) * xscale; - } - if (x0 < parms.lclip) - { - frac += FLOAT2FIXED((parms.lclip - x0) * xiscale); - x0 = parms.lclip; - } - if (x2 > parms.rclip) - { - x2 = parms.rclip; - } - - int x = int(x0); - int x2_i = int(x2); - fixed_t xiscale_i = FLOAT2FIXED(xiscale); - - static RenderThread thread(nullptr); - thread.DrawQueue->ThreadedRender = false; - while (x < x2_i) - { - drawerargs.DrawMaskedColumn(&thread, x, iscale, img, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, !parms.masked); - x++; - frac += xiscale_i; - } - - viewport->CenterY = centeryback; - } + SWCanvas::DrawTexture(this, img, parms); +#endif if (ticdup != 0 && menuactive == MENU_Off) { NetUpdate(); } - - viewport->RenderTarget->Unlock(); -#endif } bool DCanvas::SetTextureParms(DrawParms *parms, FTexture *img, double xx, double yy) const @@ -1047,274 +873,18 @@ void DCanvas::FillBorder (FTexture *img) } } -void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) -{ - static int oldyy; - static int oldyyshifted; - - if (yy == oldyy+1) - { - oldyy++; - oldyyshifted += GetPitch(); - } - else if (yy == oldyy-1) - { - oldyy--; - oldyyshifted -= GetPitch(); - } - else if (yy != oldyy) - { - oldyy = yy; - oldyyshifted = yy * GetPitch(); - } - - if (IsBgra()) - { - uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; - - uint32_t fg = swrenderer::LightBgra::shade_pal_index_simple(basecolor, swrenderer::LightBgra::calc_light_multiplier(0)); - uint32_t fg_red = (((fg >> 16) & 0xff) * (63 - level)) >> 6; - uint32_t fg_green = (((fg >> 8) & 0xff) * (63 - level)) >> 6; - uint32_t fg_blue = ((fg & 0xff) * (63 - level)) >> 6; - - uint32_t bg_red = (((*spot >> 16) & 0xff) * level) >> 6; - uint32_t bg_green = (((*spot >> 8) & 0xff) * level) >> 6; - uint32_t bg_blue = (((*spot) & 0xff) * level) >> 6; - - uint32_t red = fg_red + bg_red; - uint32_t green = fg_green + bg_green; - uint32_t blue = fg_blue + bg_blue; - - *spot = 0xff000000 | (red << 16) | (green << 8) | blue; - } - else if (!r_blendmethod) - { - BYTE *spot = GetBuffer() + oldyyshifted + xx; - DWORD *bg2rgb = Col2RGB8[1+level]; - DWORD *fg2rgb = Col2RGB8[63-level]; - DWORD fg = fg2rgb[basecolor]; - DWORD bg = bg2rgb[*spot]; - bg = (fg+bg) | 0x1f07c1f; - *spot = RGB32k.All[bg&(bg>>15)]; - } - else - { - BYTE *spot = GetBuffer() + oldyyshifted + xx; - - uint32_t r = (GPalette.BaseColors[*spot].r * (64 - level) + GPalette.BaseColors[basecolor].r * level) / 64; - uint32_t g = (GPalette.BaseColors[*spot].g * (64 - level) + GPalette.BaseColors[basecolor].g * level) / 64; - uint32_t b = (GPalette.BaseColors[*spot].b * (64 - level) + GPalette.BaseColors[basecolor].b * level) / 64; - - *spot = (BYTE)RGB256k.RGB[r][g][b]; - } -} - void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) -//void DrawTransWuLine (int x0, int y0, int x1, int y1, BYTE palColor) { - const int WeightingScale = 0; - const int WEIGHTBITS = 6; - const int WEIGHTSHIFT = 16-WEIGHTBITS; - const int NUMWEIGHTS = (1< y1) - { - int temp = y0; y0 = y1; y1 = temp; - temp = x0; x0 = x1; x1 = temp; - } - - PUTTRANSDOT (x0, y0, palColor, 0); - - if ((deltaX = x1 - x0) >= 0) - { - xDir = 1; - } - else - { - xDir = -1; - deltaX = -deltaX; - } - - if ((deltaY = y1 - y0) == 0) - { // horizontal line - if (x0 > x1) - { - swapvalues (x0, x1); - } - if (IsBgra()) - { - uint32_t fillColor = GPalette.BaseColors[palColor].d; - uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; - for (int i = 0; i <= deltaX; i++) - spot[i] = fillColor; - } - else - { - memset (GetBuffer() + y0*GetPitch() + x0, palColor, deltaX+1); - } - } - else if (deltaX == 0) - { // vertical line - if (IsBgra()) - { - uint32_t fillColor = GPalette.BaseColors[palColor].d; - uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; - int pitch = GetPitch(); - do - { - *spot = fillColor; - spot += pitch; - } while (--deltaY != 0); - } - else - { - BYTE *spot = GetBuffer() + y0*GetPitch() + x0; - int pitch = GetPitch(); - do - { - *spot = palColor; - spot += pitch; - } while (--deltaY != 0); - } - } - else if (deltaX == deltaY) - { // diagonal line. - if (IsBgra()) - { - uint32_t fillColor = GPalette.BaseColors[palColor].d; - uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; - int advance = GetPitch() + xDir; - do - { - *spot = fillColor; - spot += advance; - } while (--deltaY != 0); - } - else - { - BYTE *spot = GetBuffer() + y0*GetPitch() + x0; - int advance = GetPitch() + xDir; - do - { - *spot = palColor; - spot += advance; - } while (--deltaY != 0); - } - } - else - { - // line is not horizontal, diagonal, or vertical - fixed_t errorAcc = 0; - - if (deltaY > deltaX) - { // y-major line - fixed_t errorAdj = (((unsigned)deltaX << 16) / (unsigned)deltaY) & 0xffff; - if (xDir < 0) - { - if (WeightingScale == 0) - { - while (--deltaY) - { - errorAcc += errorAdj; - y0++; - int weighting = (errorAcc >> WEIGHTSHIFT) & WEIGHTMASK; - PUTTRANSDOT (x0 - (errorAcc >> 16), y0, palColor, weighting); - PUTTRANSDOT (x0 - (errorAcc >> 16) - 1, y0, - palColor, WEIGHTMASK - weighting); - } - } - else - { - while (--deltaY) - { - errorAcc += errorAdj; - y0++; - int weighting = ((errorAcc * WeightingScale) >> (WEIGHTSHIFT+8)) & WEIGHTMASK; - PUTTRANSDOT (x0 - (errorAcc >> 16), y0, palColor, weighting); - PUTTRANSDOT (x0 - (errorAcc >> 16) - 1, y0, - palColor, WEIGHTMASK - weighting); - } - } - } - else - { - if (WeightingScale == 0) - { - while (--deltaY) - { - errorAcc += errorAdj; - y0++; - int weighting = (errorAcc >> WEIGHTSHIFT) & WEIGHTMASK; - PUTTRANSDOT (x0 + (errorAcc >> 16), y0, palColor, weighting); - PUTTRANSDOT (x0 + (errorAcc >> 16) + xDir, y0, - palColor, WEIGHTMASK - weighting); - } - } - else - { - while (--deltaY) - { - errorAcc += errorAdj; - y0++; - int weighting = ((errorAcc * WeightingScale) >> (WEIGHTSHIFT+8)) & WEIGHTMASK; - PUTTRANSDOT (x0 + (errorAcc >> 16), y0, palColor, weighting); - PUTTRANSDOT (x0 + (errorAcc >> 16) + xDir, y0, - palColor, WEIGHTMASK - weighting); - } - } - } - } - else - { // x-major line - fixed_t errorAdj = (((DWORD) deltaY << 16) / (DWORD) deltaX) & 0xffff; - - if (WeightingScale == 0) - { - while (--deltaX) - { - errorAcc += errorAdj; - x0 += xDir; - int weighting = (errorAcc >> WEIGHTSHIFT) & WEIGHTMASK; - PUTTRANSDOT (x0, y0 + (errorAcc >> 16), palColor, weighting); - PUTTRANSDOT (x0, y0 + (errorAcc >> 16) + 1, - palColor, WEIGHTMASK - weighting); - } - } - else - { - while (--deltaX) - { - errorAcc += errorAdj; - x0 += xDir; - int weighting = ((errorAcc * WeightingScale) >> (WEIGHTSHIFT+8)) & WEIGHTMASK; - PUTTRANSDOT (x0, y0 + (errorAcc >> 16), palColor, weighting); - PUTTRANSDOT (x0, y0 + (errorAcc >> 16) + 1, - palColor, WEIGHTMASK - weighting); - } - } - } - PUTTRANSDOT (x1, y1, palColor, 0); - } - Unlock(); +#ifndef NO_SWRENDER + SWCanvas::DrawLine(this, x0, y0, x1, y1, palColor, realcolor); +#endif } void DCanvas::DrawPixel(int x, int y, int palColor, uint32 realcolor) { - if (palColor < 0) - { - palColor = PalFromRGB(realcolor); - } - - Buffer[Pitch * y + x] = (BYTE)palColor; +#ifndef NO_SWRENDER + SWCanvas::DrawPixel(this, x, y, palColor, realcolor); +#endif } //========================================================================== @@ -1327,59 +897,16 @@ void DCanvas::DrawPixel(int x, int y, int palColor, uint32 realcolor) void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uint32 color) { - int x, y; - - if (left == right || top == bottom) +#ifndef NO_SWRENDER + if (palcolor < 0 && APART(color) != 255) { - return; - } - - assert(left < right); - assert(top < bottom); - - if (left >= Width || right <= 0 || top >= Height || bottom <= 0) - { - return; - } - left = MAX(0,left); - right = MIN(Width,right); - top = MAX(0,top); - bottom = MIN(Height,bottom); - - if (palcolor < 0) - { - if (APART(color) != 255) - { - Dim(color, APART(color)/255.f, left, top, right - left, bottom - top); - return; - } - - palcolor = PalFromRGB(color); - } - - if (IsBgra()) - { - uint32_t fill_color = GPalette.BaseColors[palcolor]; - - uint32_t *dest = (uint32_t*)Buffer + top * Pitch + left; - x = right - left; - for (y = top; y < bottom; y++) - { - for (int i = 0; i < x; i++) - dest[i] = fill_color; - dest += Pitch; - } + Dim(color, APART(color) / 255.f, left, top, right - left, bottom - top); } else { - BYTE *dest = Buffer + top * Pitch + left; - x = right - left; - for (y = top; y < bottom; y++) - { - memset(dest, palcolor, x); - dest += Pitch; - } + SWCanvas::Clear(this, left, top, right, bottom, palcolor, color); } +#endif } DEFINE_ACTION_FUNCTION(_Screen, Clear) @@ -1395,6 +922,21 @@ DEFINE_ACTION_FUNCTION(_Screen, Clear) return 0; } +//========================================================================== +// +// DCanvas :: Dim +// +// Applies a colored overlay to an area of the screen. +// +//========================================================================== + +void DCanvas::Dim(PalEntry color, float damount, int x1, int y1, int w, int h) +{ +#ifndef NO_SWRENDER + SWCanvas::Dim(this, color, damount, x1, y1, w, h); +#endif +} + //========================================================================== // // DCanvas :: FillSimplePoly @@ -1415,185 +957,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, FDynamicColormap *colormap, PalEntry flatcolor, int lightlevel, int bottomclip) { #ifndef NO_SWRENDER - using namespace swrenderer; - - // Use an equation similar to player sprites to determine shade - fixed_t shade = LIGHT2SHADE(lightlevel) - 12*FRACUNIT; - float topy, boty, leftx, rightx; - int toppt, botpt, pt1, pt2; - int i; - int y1, y2, y; - fixed_t x; - bool dorotate = rotation != 0.; - double cosrot, sinrot; - - if (--npoints < 2 || Buffer == NULL) - { // not a polygon or we're not locked - return; - } - - if (bottomclip <= 0) - { - bottomclip = Height; - } - - // Find the extents of the polygon, in particular the highest and lowest points. - for (botpt = toppt = 0, boty = topy = points[0].Y, leftx = rightx = points[0].X, i = 1; i <= npoints; ++i) - { - if (points[i].Y < topy) - { - topy = points[i].Y; - toppt = i; - } - if (points[i].Y > boty) - { - boty = points[i].Y; - botpt = i; - } - if (points[i].X < leftx) - { - leftx = points[i].X; - } - if (points[i].X > rightx) - { - rightx = points[i].X; - } - } - if (topy >= bottomclip || // off the bottom of the screen - boty <= 0 || // off the top of the screen - leftx >= Width || // off the right of the screen - rightx <= 0) // off the left of the screen - { - return; - } - - auto viewport = RenderViewport::Instance(); - - scalex /= tex->Scale.X; - scaley /= tex->Scale.Y; - - // Use the CRT's functions here. - cosrot = cos(rotation.Radians()); - sinrot = sin(rotation.Radians()); - - // Setup constant texture mapping parameters. - SpanDrawerArgs drawerargs; - drawerargs.SetTexture(tex); - if (colormap) - drawerargs.SetLight(colormap, 0, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); - else - drawerargs.SetLight(&identitycolormap, 0, 0); - if (drawerargs.TextureWidthBits() != 0) - { - scalex = double(1u << (32 - drawerargs.TextureWidthBits())) / scalex; - drawerargs.SetTextureUStep(xs_RoundToInt(cosrot * scalex)); - } - else - { // Texture is one pixel wide. - scalex = 0; - drawerargs.SetTextureUStep(0); - } - if (drawerargs.TextureHeightBits() != 0) - { - scaley = double(1u << (32 - drawerargs.TextureHeightBits())) / scaley; - drawerargs.SetTextureVStep(xs_RoundToInt(sinrot * scaley)); - } - else - { // Texture is one pixel tall. - scaley = 0; - drawerargs.SetTextureVStep(0); - } - - // Travel down the right edge and create an outline of that edge. - static short spanend[MAXHEIGHT]; - pt1 = toppt; - pt2 = toppt + 1; if (pt2 > npoints) pt2 = 0; - y1 = xs_RoundToInt(points[pt1].Y + 0.5f); - do - { - x = FLOAT2FIXED(points[pt1].X + 0.5f); - y2 = xs_RoundToInt(points[pt2].Y + 0.5f); - if (y1 >= y2 || (y1 < 0 && y2 < 0) || (y1 >= bottomclip && y2 >= bottomclip)) - { - } - else - { - fixed_t xinc = FLOAT2FIXED((points[pt2].X - points[pt1].X) / (points[pt2].Y - points[pt1].Y)); - int y3 = MIN(y2, bottomclip); - if (y1 < 0) - { - x += xinc * -y1; - y1 = 0; - } - for (y = y1; y < y3; ++y) - { - spanend[y] = clamp(x >> FRACBITS, -1, Width); - x += xinc; - } - } - y1 = y2; - pt1 = pt2; - pt2++; if (pt2 > npoints) pt2 = 0; - } while (pt1 != botpt); - - static RenderThread thread(nullptr); - thread.DrawQueue->ThreadedRender = false; - - // Travel down the left edge and fill it in. - pt1 = toppt; - pt2 = toppt - 1; if (pt2 < 0) pt2 = npoints; - y1 = xs_RoundToInt(points[pt1].Y + 0.5f); - do - { - x = FLOAT2FIXED(points[pt1].X + 0.5f); - y2 = xs_RoundToInt(points[pt2].Y + 0.5f); - if (y1 >= y2 || (y1 < 0 && y2 < 0) || (y1 >= bottomclip && y2 >= bottomclip)) - { - } - else - { - fixed_t xinc = FLOAT2FIXED((points[pt2].X - points[pt1].X) / (points[pt2].Y - points[pt1].Y)); - int y3 = MIN(y2, bottomclip); - if (y1 < 0) - { - x += xinc * -y1; - y1 = 0; - } - for (y = y1; y < y3; ++y) - { - int x1 = x >> FRACBITS; - int x2 = spanend[y]; - if (x2 > x1 && x2 > 0 && x1 < Width) - { - x1 = MAX(x1, 0); - x2 = MIN(x2, Width); -#if 0 - memset(this->Buffer + y * this->Pitch + x1, (int)tex, x2 - x1); -#else - drawerargs.SetDestY(y); - drawerargs.SetDestX1(x1); - drawerargs.SetDestX2(x2 - 1); - - DVector2 tex(x1 - originx, y - originy); - if (dorotate) - { - double t = tex.X; - tex.X = t * cosrot - tex.Y * sinrot; - tex.Y = tex.Y * cosrot + t * sinrot; - } - drawerargs.SetTextureUPos(xs_RoundToInt(tex.X * scalex)); - drawerargs.SetTextureVPos(xs_RoundToInt(tex.Y * scaley)); - - drawerargs.DrawSpan(&thread); -#endif - } - x += xinc; - } - } - y1 = y2; - pt1 = pt2; - pt2--; if (pt2 < 0) pt2 = npoints; - } while (pt1 != botpt); + SWCanvas::FillSimplePoly(this, tex, points, npoints, originx, originy, scalex, scaley, rotation, colormap, flatcolor, lightlevel, bottomclip); #endif } diff --git a/src/v_video.cpp b/src/v_video.cpp index 0f9086bc20..56f45c3160 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -337,136 +337,6 @@ void DCanvas::Dim (PalEntry color) Dim (dimmer, amount, 0, 0, Width, Height); } -//========================================================================== -// -// DCanvas :: Dim -// -// Applies a colored overlay to an area of the screen. -// -//========================================================================== - -void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) -{ - if (damount == 0.f) - return; - - int gap; - int x, y; - - if (x1 >= Width || y1 >= Height) - { - return; - } - if (x1 + w > Width) - { - w = Width - x1; - } - if (y1 + h > Height) - { - h = Height - y1; - } - if (w <= 0 || h <= 0) - { - return; - } - - gap = Pitch - w; - - if (IsBgra()) - { - uint32_t *spot = (uint32_t*)Buffer + x1 + y1*Pitch; - - uint32_t fg = color.d; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f); - uint32_t inv_alpha = 256 - alpha; - - fg_red *= alpha; - fg_green *= alpha; - fg_blue *= alpha; - - for (y = h; y != 0; y--) - { - for (x = w; x != 0; x--) - { - uint32_t bg_red = (*spot >> 16) & 0xff; - uint32_t bg_green = (*spot >> 8) & 0xff; - uint32_t bg_blue = (*spot) & 0xff; - - uint32_t red = (fg_red + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; - - *spot = 0xff000000 | (red << 16) | (green << 8) | blue; - spot++; - } - spot += gap; - } - } - else - { - BYTE *spot = Buffer + x1 + y1*Pitch; - - DWORD *bg2rgb; - DWORD fg; - - spot = Buffer + x1 + y1*Pitch; - gap = Pitch - w; - - int alpha = (int)((float)64 * damount); - int ialpha = 64 - alpha; - int dimmedcolor_r = color.r * alpha; - int dimmedcolor_g = color.g * alpha; - int dimmedcolor_b = color.b * alpha; - - if (!r_blendmethod) - { - { - int amount; - - amount = (int)(damount * 64); - bg2rgb = Col2RGB8[64 - amount]; - - fg = (((color.r * amount) >> 4) << 20) | - ((color.g * amount) >> 4) | - (((color.b * amount) >> 4) << 10); - } - - for (y = h; y != 0; y--) - { - for (x = w; x != 0; x--) - { - DWORD bg; - - bg = bg2rgb[(*spot)&0xff]; - bg = (fg+bg) | 0x1f07c1f; - *spot = RGB32k.All[bg&(bg>>15)]; - spot++; - } - spot += gap; - } - } - else - { - for (y = h; y != 0; y--) - { - for (x = w; x != 0; x--) - { - uint32_t r = (dimmedcolor_r + GPalette.BaseColors[*spot].r * ialpha) >> 8; - uint32_t g = (dimmedcolor_g + GPalette.BaseColors[*spot].g * ialpha) >> 8; - uint32_t b = (dimmedcolor_b + GPalette.BaseColors[*spot].b * ialpha) >> 8; - *spot = (BYTE)RGB256k.RGB[r][g][b]; - spot++; - } - spot += gap; - } - } - } -} - //========================================================================== // // DCanvas :: GetScreenshotBuffer diff --git a/src/v_video.h b/src/v_video.h index c06b6dd927..8cd7be4f87 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -300,8 +300,6 @@ private: // Keep track of canvases, for automatic destruction at exit DCanvas *Next; static DCanvas *CanvasChain; - - void PUTTRANSDOT (int xx, int yy, int basecolor, int level); }; // A canvas in system memory. From 5ef8ecce2a79fffd424dbb5cfea09483f35b66aa Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 14 Feb 2017 02:52:41 +0100 Subject: [PATCH 851/912] Fix wall scroller direction --- src/swrenderer/line/r_wallsetup.cpp | 53 ++++++----------------------- 1 file changed, 11 insertions(+), 42 deletions(-) diff --git a/src/swrenderer/line/r_wallsetup.cpp b/src/swrenderer/line/r_wallsetup.cpp index 94c3a7aa44..071ae799d2 100644 --- a/src/swrenderer/line/r_wallsetup.cpp +++ b/src/swrenderer/line/r_wallsetup.cpp @@ -163,31 +163,15 @@ namespace swrenderer float depthScale = (float)(WallT.InvZstep * viewport->WallTMapScale2); float depthOrg = (float)(-WallT.UoverZstep * viewport->WallTMapScale2); - if (xrepeat < 0.0f) + for (int x = x1; x < x2; x++) { - for (int x = x1; x < x2; x++) - { - float u = uOverZ / invZ; + float u = uOverZ / invZ; - UPos[x] = (fixed_t)((xrepeat - u * xrepeat) * FRACUNIT); - VStep[x] = depthOrg + u * depthScale; + UPos[x] = (fixed_t)(u * xrepeat * FRACUNIT); + VStep[x] = depthOrg + u * depthScale; - uOverZ += uGradient; - invZ += zGradient; - } - } - else - { - for (int x = x1; x < x2; x++) - { - float u = uOverZ / invZ; - - UPos[x] = (fixed_t)(u * xrepeat * FRACUNIT); - VStep[x] = depthOrg + u * depthScale; - - uOverZ += uGradient; - invZ += zGradient; - } + uOverZ += uGradient; + invZ += zGradient; } } @@ -201,29 +185,14 @@ namespace swrenderer float zGradient = WallT.InvZstep; float xrepeat = (float)walxrepeat; - if (xrepeat < 0.0f) + for (int x = x1; x < x2; x++) { - for (int x = x1; x < x2; x++) - { - float u = uOverZ / invZ * xrepeat - xrepeat; + float u = uOverZ / invZ * xrepeat; - UPos[x] = (fixed_t)(u * FRACUNIT); + UPos[x] = (fixed_t)(u * FRACUNIT); - uOverZ += uGradient; - invZ += zGradient; - } - } - else - { - for (int x = x1; x < x2; x++) - { - float u = uOverZ / invZ * xrepeat; - - UPos[x] = (fixed_t)(u * FRACUNIT); - - uOverZ += uGradient; - invZ += zGradient; - } + uOverZ += uGradient; + invZ += zGradient; } } } From eac98ac2263b2625f43614c3f3454b24907d3748 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 14 Feb 2017 06:37:06 +0100 Subject: [PATCH 852/912] Calculate sprite dynamic light contribution and pass it along to the sprite drawer --- src/swrenderer/things/r_sprite.cpp | 50 ++++++++++++++++++++++++ src/swrenderer/things/r_sprite.h | 2 + src/swrenderer/viewport/r_spritedrawer.h | 5 +++ 3 files changed, 57 insertions(+) diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 1da377c7df..652bfcef27 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -56,6 +56,7 @@ #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/r_memory.h" #include "swrenderer/r_renderthread.h" +#include "gl/dynlights/gl_dynlight.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) @@ -225,6 +226,54 @@ namespace swrenderer bool fullbright = !vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; + + if (r_dynlights) + { + float lit_red = 0; + float lit_green = 0; + float lit_blue = 0; + auto node = vis->sector->lighthead; + while (node != nullptr) + { + ADynamicLight *light = node->lightsource; + if (light->visibletoplayer && !(light->flags2&MF2_DORMANT) && (!(light->flags4&MF4_DONTLIGHTSELF) || light->target != thing)) + { + double lightX = light->X() - ViewPos.X; + double lightY = light->Y() - ViewPos.Y; + double lightZ = light->Z() - ViewPos.Z; + + float lx = (float)(lightX * ViewSin - lightY * ViewCos) - pos.X; + float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - pos.Y; + float lz = (float)lightZ - pos.Z; + + bool is_point_light = (node->lightsource->flags4 & MF4_ATTENUATE) != 0; + float LdotL = lx * lx + ly * ly + lz * lz; + float NdotL = is_point_light ? -ly : 0.0f; + + float radius = node->lightsource->GetRadius(); + if (radius * radius >= LdotL && NdotL > 0.0f) + { + uint32_t red = light->GetRed(); + uint32_t green = light->GetGreen(); + uint32_t blue = light->GetBlue(); + float distance = sqrt(LdotL); + float attenuation = distance / radius * NdotL; + lit_red += red * attenuation; + lit_red += green * attenuation; + lit_red += blue * attenuation; + } + } + node = node->nextLight; + } + lit_red = MIN(lit_red, 255.0f); + lit_green = MIN(lit_green, 255.0f); + lit_blue = MIN(lit_blue, 255.0f); + vis->dynlightcolor = (((uint32_t)lit_red) << 16) | (((uint32_t)lit_green) << 8) | ((uint32_t)lit_blue); + } + else + { + vis->dynlightcolor = 0; + } vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis() / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); @@ -250,6 +299,7 @@ namespace swrenderer SpriteDrawerArgs drawerargs; drawerargs.SetLight(vis->Light.BaseColormap, 0, vis->Light.ColormapNum << FRACBITS); + drawerargs.SetDynamicLight(dynlightcolor); FDynamicColormap *basecolormap = static_cast(vis->Light.BaseColormap); diff --git a/src/swrenderer/things/r_sprite.h b/src/swrenderer/things/r_sprite.h index 776af54fa8..678c909090 100644 --- a/src/swrenderer/things/r_sprite.h +++ b/src/swrenderer/things/r_sprite.h @@ -32,5 +32,7 @@ namespace swrenderer uint32_t Translation = 0; uint32_t FillColor = 0; + + uint32_t dynlightcolor = 0; }; } diff --git a/src/swrenderer/viewport/r_spritedrawer.h b/src/swrenderer/viewport/r_spritedrawer.h index fc5053e6e0..bca4664aa1 100644 --- a/src/swrenderer/viewport/r_spritedrawer.h +++ b/src/swrenderer/viewport/r_spritedrawer.h @@ -21,6 +21,7 @@ namespace swrenderer void SetDest(int x, int y); void SetCount(int count) { dc_count = count; } void SetSolidColor(int color) { dc_color = color; } + void SetDynamicLight(uint32_t color) { dynlightcolor = color; } void DrawMaskedColumn(RenderThread *thread, int x, fixed_t iscale, FTexture *texture, fixed_t column, double spryscale, double sprtopscreen, bool sprflipvert, const short *mfloorclip, const short *mceilingclip, bool unmasked = false); void FillColumn(RenderThread *thread); @@ -50,6 +51,8 @@ namespace swrenderer uint32_t *DestBlend() const { return dc_destblend; } fixed_t SrcAlpha() const { return dc_srcalpha; } fixed_t DestAlpha() const { return dc_destalpha; } + + uint32_t DynamicLight() const { return dynlightcolor; } private: bool SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags); @@ -81,6 +84,8 @@ namespace swrenderer int dc_color = 0; uint32_t dc_srccolor = 0; uint32_t dc_srccolor_bgra = 0; + + uint32_t dynlightcolor = 0; typedef void(SWPixelFormatDrawers::*SpriteDrawerFunc)(const SpriteDrawerArgs &args); SpriteDrawerFunc colfunc; From fc97ef09137dc5c57fd75e2567b098ca9dfaf67f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 15 Feb 2017 13:01:00 +0100 Subject: [PATCH 853/912] Fix palette fog boundary rendering error --- src/swrenderer/drawers/r_draw_pal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index fad7c24362..fcd14a27bc 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -2889,7 +2889,7 @@ namespace swrenderer DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(const SpanDrawerArgs &args, int y, int x1, int x2) : PalSpanCommand(args), y(y), x1(x1), x2(x2) { _colormap = args.Colormap(); - _dest = RenderViewport::Instance()->GetDest(x1, y); + _dest = RenderViewport::Instance()->GetDest(0, y); } void DrawFogBoundaryLinePalCommand::Execute(DrawerThread *thread) From 01cbb14f14a7e454532c942ad8aedc6ee633eee2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 15 Feb 2017 13:26:43 +0100 Subject: [PATCH 854/912] Change wall project to use the fixed version from GZDoom as it was most likely more tested --- src/swrenderer/line/r_wallsetup.cpp | 57 ++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/src/swrenderer/line/r_wallsetup.cpp b/src/swrenderer/line/r_wallsetup.cpp index 071ae799d2..579ff21f21 100644 --- a/src/swrenderer/line/r_wallsetup.cpp +++ b/src/swrenderer/line/r_wallsetup.cpp @@ -159,19 +159,35 @@ namespace swrenderer float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - viewport->CenterX); float uGradient = WallT.UoverZstep; float zGradient = WallT.InvZstep; - float xrepeat = (float)walxrepeat; + float xrepeat = (float)fabs(walxrepeat); float depthScale = (float)(WallT.InvZstep * viewport->WallTMapScale2); float depthOrg = (float)(-WallT.UoverZstep * viewport->WallTMapScale2); - for (int x = x1; x < x2; x++) + if (walxrepeat < 0.0) { - float u = uOverZ / invZ; + for (int x = x1; x < x2; x++) + { + float u = uOverZ / invZ; - UPos[x] = (fixed_t)(u * xrepeat * FRACUNIT); - VStep[x] = depthOrg + u * depthScale; + UPos[x] = (fixed_t)((xrepeat - u * xrepeat) * FRACUNIT); + VStep[x] = depthOrg + u * depthScale; - uOverZ += uGradient; - invZ += zGradient; + uOverZ += uGradient; + invZ += zGradient; + } + } + else + { + for (int x = x1; x < x2; x++) + { + float u = uOverZ / invZ; + + UPos[x] = (fixed_t)(u * xrepeat * FRACUNIT); + VStep[x] = depthOrg + u * depthScale; + + uOverZ += uGradient; + invZ += zGradient; + } } } @@ -183,16 +199,31 @@ namespace swrenderer float invZ = WallT.InvZorg + WallT.InvZstep * (float)(x1 + 0.5 - viewport->CenterX); float uGradient = WallT.UoverZstep; float zGradient = WallT.InvZstep; - float xrepeat = (float)walxrepeat; + float xrepeat = (float)fabs(walxrepeat); - for (int x = x1; x < x2; x++) + if (walxrepeat < 0.0f) { - float u = uOverZ / invZ * xrepeat; + for (int x = x1; x < x2; x++) + { + float u = uOverZ / invZ * xrepeat - xrepeat; - UPos[x] = (fixed_t)(u * FRACUNIT); + UPos[x] = (fixed_t)(u * FRACUNIT); - uOverZ += uGradient; - invZ += zGradient; + uOverZ += uGradient; + invZ += zGradient; + } + } + else + { + for (int x = x1; x < x2; x++) + { + float u = uOverZ / invZ * xrepeat; + + UPos[x] = (fixed_t)(u * FRACUNIT); + + uOverZ += uGradient; + invZ += zGradient; + } } } } From e42f914efac3c3a8939ee891796fb9702e27f46b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Feb 2017 05:34:33 +0100 Subject: [PATCH 855/912] Added php script generating the 32 bit wall drawers --- src/swrenderer/drawers/r_draw_wall32.php | 250 +++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 src/swrenderer/drawers/r_draw_wall32.php diff --git a/src/swrenderer/drawers/r_draw_wall32.php b/src/swrenderer/drawers/r_draw_wall32.php new file mode 100644 index 0000000000..0d243ace7c --- /dev/null +++ b/src/swrenderer/drawers/r_draw_wall32.php @@ -0,0 +1,250 @@ +#!/usr/bin/php +/* +** Drawer commands for walls +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +/* + Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_walldrawer.h" + +namespace swrenderer +{ + + class : public DrawerCommand + { + protected: + WallDrawerArgs args; + bool is_nearest_filter = false; + + public: + (const WallDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + + } + else + { + + } + } + }; + + + if (is_nearest_filter) + { + + } + else + { + + } + + int textureheight = args.TextureHeight(); + + // Shade constants + int light = args.Light(); + __m128i mlight = _mm_set_epi16(light, light, light, 256, light, light, light, 256); + __m128i inv_light = _mm_set1_epi16(256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light, 0); + + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha); + __m128i shade_light = _mm_set_epi16(shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha); + + + int count = args.Count(); + for (int index = 0; index < count; index++) + { + int offset = index * pitch * 4; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32(dest[offset]), _mm_setzero_si128()); + + // Sample + + + // Shade + + + // Blend + + + dest[offset] = _mm_cvtsi32(outcolor); + frac += fracstep; + } + + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int ifgcolor = source[sample_index * 4]; + __m128i fgcolor = _mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32(ifgcolor), _mm_setzero_si128()), _mm_setzero_si128()); + + unsigned int frac_y0 = (texturefracy >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((texturefracy + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0 * 4]; + unsigned int p01 = source[y1 * 4]; + unsigned int p10 = source2[y0 * 4]; + unsigned int p11 = source2[y1 * 4]; + + unsigned int inv_b = texturefracx; + unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int inv_a = 16 - a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + __m128i fgcolor = _mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32(ifgcolor), _mm_setzero_si128()), _mm_setzero_si128()); + + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + int blue = BPART(ifgcolor); + int green = GPART(ifgcolor); + int red = RPART(ifgcolor); + + __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + __m128 outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero128()); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero128()); + outcolor = _mm_or_si128(outcolor, _mm_set_epi32(0xff000000)); + + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero128()); + outcolor = _mm_or_si128(outcolor, _mm_set_epi32(0xff000000)); + + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero128()); + outcolor = _mm_or_si128(outcolor, _mm_set_epi32(0xff000000)); + + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero128()); + outcolor = _mm_or_si128(outcolor, _mm_set_epi32(0xff000000)); + + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + +} From 4240a15f3a5924ff3cf8890112a38f15a45b341f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Feb 2017 22:58:55 +0100 Subject: [PATCH 856/912] Hook up php generated wall drawer --- src/swrenderer/drawers/r_draw_rgba.cpp | 11 + src/swrenderer/drawers/r_draw_rgba.h | 2 +- src/swrenderer/drawers/r_draw_wall32.h | 1766 ++++++++++++++++++++++ src/swrenderer/drawers/r_draw_wall32.php | 79 +- 4 files changed, 1829 insertions(+), 29 deletions(-) create mode 100644 src/swrenderer/drawers/r_draw_wall32.h diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 0b9af6b5e7..91229d1aaf 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -40,6 +40,7 @@ #include "gl/data/gl_matrix.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/scene/r_light.h" +#include "r_draw_wall32.h" #include "gi.h" #include "stats.h" @@ -58,8 +59,18 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Level of detail texture bias CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG when a good default has been decided +CVAR(Bool, r_phpdrawers, false, 0); + namespace swrenderer { + void SWTruecolorDrawers::DrawWallColumn(const WallDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs) { auto shade_constants = drawerargs.ColormapConstants(); diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 8f31066491..8e3d43fee6 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -359,7 +359,7 @@ namespace swrenderer public: using SWPixelFormatDrawers::SWPixelFormatDrawers; - void DrawWallColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawWallColumn(const WallDrawerArgs &args) override; void DrawWallMaskedColumn(const WallDrawerArgs &args) override { Queue->Push(args); } void DrawWallAddColumn(const WallDrawerArgs &args) override { Queue->Push(args); } void DrawWallAddClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } diff --git a/src/swrenderer/drawers/r_draw_wall32.h b/src/swrenderer/drawers/r_draw_wall32.h new file mode 100644 index 0000000000..97097d3afd --- /dev/null +++ b/src/swrenderer/drawers/r_draw_wall32.h @@ -0,0 +1,1766 @@ +/* +** Drawer commands for walls +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +/* + Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_walldrawer.h" + +namespace swrenderer +{ + class DrawWall32Command : public DrawerCommand + { + protected: + WallDrawerArgs args; + + public: + DrawWall32Command(const WallDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int ifgcolor = source[sample_index]; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128 outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int inv_a = 16 - a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128 outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int ifgcolor = source[sample_index]; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + int blue = BPART(ifgcolor); + int green = GPART(ifgcolor); + int red = RPART(ifgcolor); + + __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128 outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int inv_a = 16 - a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + int blue = BPART(ifgcolor); + int green = GPART(ifgcolor); + int red = RPART(ifgcolor); + + __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128 outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + } + } + + FString DebugInfo() override { return "DrawWall32Command"; } + }; + + class DrawWallMasked32Command : public DrawerCommand + { + protected: + WallDrawerArgs args; + + public: + DrawWallMasked32Command(const WallDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int ifgcolor = source[sample_index]; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int inv_a = 16 - a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int ifgcolor = source[sample_index]; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + int blue = BPART(ifgcolor); + int green = GPART(ifgcolor); + int red = RPART(ifgcolor); + + __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int inv_a = 16 - a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + int blue = BPART(ifgcolor); + int green = GPART(ifgcolor); + int red = RPART(ifgcolor); + + __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + } + } + + FString DebugInfo() override { return "DrawWallMasked32Command"; } + }; + + class DrawWallAddClamp32Command : public DrawerCommand + { + protected: + WallDrawerArgs args; + + public: + DrawWallAddClamp32Command(const WallDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int ifgcolor = source[sample_index]; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int inv_a = 16 - a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int ifgcolor = source[sample_index]; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + int blue = BPART(ifgcolor); + int green = GPART(ifgcolor); + int red = RPART(ifgcolor); + + __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int inv_a = 16 - a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + int blue = BPART(ifgcolor); + int green = GPART(ifgcolor); + int red = RPART(ifgcolor); + + __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + } + } + + FString DebugInfo() override { return "DrawWallAddClamp32Command"; } + }; + + class DrawWallSubClamp32Command : public DrawerCommand + { + protected: + WallDrawerArgs args; + + public: + DrawWallSubClamp32Command(const WallDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int ifgcolor = source[sample_index]; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int inv_a = 16 - a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int ifgcolor = source[sample_index]; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + int blue = BPART(ifgcolor); + int green = GPART(ifgcolor); + int red = RPART(ifgcolor); + + __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int inv_a = 16 - a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + int blue = BPART(ifgcolor); + int green = GPART(ifgcolor); + int red = RPART(ifgcolor); + + __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + } + } + + FString DebugInfo() override { return "DrawWallSubClamp32Command"; } + }; + + class DrawWallRevSubClamp32Command : public DrawerCommand + { + protected: + WallDrawerArgs args; + + public: + DrawWallRevSubClamp32Command(const WallDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int ifgcolor = source[sample_index]; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int inv_a = 16 - a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int ifgcolor = source[sample_index]; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + int blue = BPART(ifgcolor); + int green = GPART(ifgcolor); + int red = RPART(ifgcolor); + + __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + + for (int index = 0; index < count; index++) + { + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int inv_a = 16 - a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + + // Shade + int blue = BPART(ifgcolor); + int green = GPART(ifgcolor); + int red = RPART(ifgcolor); + + __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + frac += fracstep; + } + } + } + } + + FString DebugInfo() override { return "DrawWallRevSubClamp32Command"; } + }; + +} diff --git a/src/swrenderer/drawers/r_draw_wall32.php b/src/swrenderer/drawers/r_draw_wall32.php index 0d243ace7c..71d1da7d22 100644 --- a/src/swrenderer/drawers/r_draw_wall32.php +++ b/src/swrenderer/drawers/r_draw_wall32.php @@ -46,7 +46,6 @@ namespace swrenderer { protected: WallDrawerArgs args; - bool is_nearest_filter = false; public: (const WallDrawerArgs &drawerargs) : args(drawerargs) { } @@ -63,6 +62,8 @@ namespace swrenderer } } + + FString DebugInfo() override { return ""; } }; + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); if (is_nearest_filter) { @@ -83,23 +87,42 @@ namespace swrenderer function Loop($blendVariant, $isSimpleShade, $isNearestFilter) { ?> int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = args.Light(); - __m128i mlight = _mm_set_epi16(light, light, light, 256, light, light, light, 256); - __m128i inv_light = _mm_set1_epi16(256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light, 0); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha); - __m128i shade_light = _mm_set_epi16(shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha); + __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + for (int index = 0; index < count; index++) { - int offset = index * pitch * 4; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample @@ -110,7 +133,7 @@ namespace swrenderer // Blend - dest[offset] = _mm_cvtsi32(outcolor); + dest[offset] = _mm_cvtsi128_si32(outcolor); frac += fracstep; } int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int ifgcolor = source[sample_index * 4]; - __m128i fgcolor = _mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32(ifgcolor), _mm_setzero_si128()), _mm_setzero_si128()); + unsigned int ifgcolor = source[sample_index]; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); - unsigned int frac_y0 = (texturefracy >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((texturefracy + one) >> FRACBITS) * textureheight; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; - unsigned int p00 = source[y0 * 4]; - unsigned int p01 = source[y1 * 4]; - unsigned int p10 = source2[y0 * 4]; - unsigned int p11 = source2[y1 * 4]; + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; @@ -146,7 +169,7 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - __m128i fgcolor = _mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32(ifgcolor), _mm_setzero_si128()), _mm_setzero_si128()); + __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); __m128 outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero128()); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); Date: Fri, 17 Feb 2017 23:03:48 +0100 Subject: [PATCH 857/912] Fix typo where __m128 should have been __m128i --- src/swrenderer/drawers/r_draw_wall32.h | 120 +++++++++++------------ src/swrenderer/drawers/r_draw_wall32.php | 10 +- 2 files changed, 65 insertions(+), 65 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_wall32.h b/src/swrenderer/drawers/r_draw_wall32.h index 97097d3afd..f4e117588c 100644 --- a/src/swrenderer/drawers/r_draw_wall32.h +++ b/src/swrenderer/drawers/r_draw_wall32.h @@ -89,7 +89,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128 outcolor = fgcolor; + __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); dest[offset] = _mm_cvtsi128_si32(outcolor); @@ -157,7 +157,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128 outcolor = fgcolor; + __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); dest[offset] = _mm_cvtsi128_si32(outcolor); @@ -225,7 +225,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128 outcolor = fgcolor; + __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); dest[offset] = _mm_cvtsi128_si32(outcolor); @@ -306,7 +306,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128 outcolor = fgcolor; + __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); dest[offset] = _mm_cvtsi128_si32(outcolor); @@ -377,10 +377,10 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); @@ -452,10 +452,10 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); @@ -527,10 +527,10 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); @@ -615,10 +615,10 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); @@ -693,15 +693,15 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); @@ -783,15 +783,15 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); @@ -873,15 +873,15 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); @@ -976,15 +976,15 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); @@ -1069,15 +1069,15 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); @@ -1159,15 +1159,15 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); @@ -1249,15 +1249,15 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); @@ -1352,15 +1352,15 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); @@ -1445,15 +1445,15 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); @@ -1535,15 +1535,15 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); @@ -1625,15 +1625,15 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); @@ -1728,15 +1728,15 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); diff --git a/src/swrenderer/drawers/r_draw_wall32.php b/src/swrenderer/drawers/r_draw_wall32.php index 71d1da7d22..cfac631e1c 100644 --- a/src/swrenderer/drawers/r_draw_wall32.php +++ b/src/swrenderer/drawers/r_draw_wall32.php @@ -198,7 +198,7 @@ namespace swrenderer { if ($blendVariant == "opaque") { ?> - __m128 outcolor = fgcolor; + __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - __m128 alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128 inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - __m128 bgalpha = _mm_mullo_epi16(destalpha, alpha); + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); - __m128 fgalpha = _mm_mullo_epi16(srcalpha, alpha); + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); From 097dda38a94111ab7419fa362cdef3538edcac58 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 18 Feb 2017 05:34:26 +0100 Subject: [PATCH 858/912] Hook up all php generated wall drawers and enable them per default --- src/swrenderer/drawers/r_draw_rgba.cpp | 42 ++++- src/swrenderer/drawers/r_draw_rgba.h | 10 +- src/swrenderer/drawers/r_draw_wall32.h | 230 ++++++++++++----------- src/swrenderer/drawers/r_draw_wall32.php | 25 ++- 4 files changed, 181 insertions(+), 126 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 91229d1aaf..b022636921 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -59,7 +59,7 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Level of detail texture bias CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG when a good default has been decided -CVAR(Bool, r_phpdrawers, false, 0); +CVAR(Bool, r_phpdrawers, true, 0); namespace swrenderer { @@ -71,6 +71,46 @@ namespace swrenderer Queue->Push(args); } + void SWTruecolorDrawers::DrawWallMaskedColumn(const WallDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawWallAddColumn(const WallDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawWallAddClampColumn(const WallDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawWallSubClampColumn(const WallDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawWallRevSubClampColumn(const WallDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs) { auto shade_constants = drawerargs.ColormapConstants(); diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 8e3d43fee6..7bff1cffaf 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -360,11 +360,11 @@ namespace swrenderer using SWPixelFormatDrawers::SWPixelFormatDrawers; void DrawWallColumn(const WallDrawerArgs &args) override; - void DrawWallMaskedColumn(const WallDrawerArgs &args) override { Queue->Push(args); } - void DrawWallAddColumn(const WallDrawerArgs &args) override { Queue->Push(args); } - void DrawWallAddClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } - void DrawWallSubClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } - void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawWallMaskedColumn(const WallDrawerArgs &args) override; + void DrawWallAddColumn(const WallDrawerArgs &args) override; + void DrawWallAddClampColumn(const WallDrawerArgs &args) override; + void DrawWallSubClampColumn(const WallDrawerArgs &args) override; + void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override; void DrawSingleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } void DrawColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } diff --git a/src/swrenderer/drawers/r_draw_wall32.h b/src/swrenderer/drawers/r_draw_wall32.h index f4e117588c..a85b96805b 100644 --- a/src/swrenderer/drawers/r_draw_wall32.h +++ b/src/swrenderer/drawers/r_draw_wall32.h @@ -71,7 +71,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -120,7 +119,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -141,8 +140,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -179,8 +178,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -198,7 +198,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -213,15 +212,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -241,8 +241,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -260,7 +261,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -281,8 +282,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -294,15 +295,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -359,7 +361,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -415,7 +416,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -436,8 +437,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -481,8 +482,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -500,7 +502,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -515,15 +516,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -550,8 +552,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -569,7 +572,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -590,8 +593,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -603,15 +606,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -675,7 +679,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -746,7 +749,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -767,8 +770,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -827,8 +830,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -846,7 +850,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -861,15 +864,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -911,8 +915,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -930,7 +935,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -951,8 +956,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -964,15 +969,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -1051,7 +1057,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1122,7 +1127,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1143,8 +1148,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -1203,8 +1208,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -1222,7 +1228,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1237,15 +1242,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -1287,8 +1293,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -1306,7 +1313,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1327,8 +1334,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -1340,15 +1347,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -1427,7 +1435,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1498,7 +1505,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1519,8 +1526,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -1579,8 +1586,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -1598,7 +1606,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1613,15 +1620,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -1663,8 +1671,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -1682,7 +1691,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1703,8 +1712,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -1716,15 +1725,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend diff --git a/src/swrenderer/drawers/r_draw_wall32.php b/src/swrenderer/drawers/r_draw_wall32.php index cfac631e1c..d51c52455d 100644 --- a/src/swrenderer/drawers/r_draw_wall32.php +++ b/src/swrenderer/drawers/r_draw_wall32.php @@ -95,8 +95,9 @@ namespace swrenderer __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -115,7 +116,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + + frac -= one / 2; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -159,8 +163,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -181,15 +185,16 @@ namespace swrenderer - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); Date: Sat, 18 Feb 2017 02:43:35 -0500 Subject: [PATCH 859/912] - added "vid_glswfb" CVAR for linux, defaults to false, to allow init without GL framebuffer. This will need to be ultimately finished at a later date. --- src/posix/sdl/sdlglvideo.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/posix/sdl/sdlglvideo.cpp b/src/posix/sdl/sdlglvideo.cpp index 51a15b8c4a..226600c648 100644 --- a/src/posix/sdl/sdlglvideo.cpp +++ b/src/posix/sdl/sdlglvideo.cpp @@ -62,6 +62,10 @@ CUSTOM_CVAR(Bool, gl_debug, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINI { Printf("This won't take effect until " GAMENAME " is restarted.\n"); } +CUSTOM_CVAR(Bool, vid_glswfb, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +{ + Printf("This won't take effect until " GAMENAME " is restarted.\n"); +} #ifdef __arm__ CUSTOM_CVAR(Bool, gl_es, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) @@ -211,6 +215,10 @@ DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool bgra, b { fb = new OpenGLFrameBuffer(0, width, height, 32, 60, fullscreen); } + else if (vid_glswfb == 0) + { + fb = new SDLFB(width, height, bgra, fullscreen, nullptr); + } else { fb = (SDLBaseFB*)CreateGLSWFrameBuffer(width, height, bgra, fullscreen); From 8f06b5f9a17afd67a07bbb510f45d7f11059b43a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 18 Feb 2017 09:17:47 +0100 Subject: [PATCH 860/912] Updated wall drawers to process two lines at a time --- src/swrenderer/drawers/r_draw_wall32.h | 1972 ++++++++++++++++++++-- src/swrenderer/drawers/r_draw_wall32.php | 71 +- 2 files changed, 1876 insertions(+), 167 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_wall32.h b/src/swrenderer/drawers/r_draw_wall32.h index a85b96805b..1e4da006cc 100644 --- a/src/swrenderer/drawers/r_draw_wall32.h +++ b/src/swrenderer/drawers/r_draw_wall32.h @@ -74,15 +74,54 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; // Sample + unsigned int ifgcolor[2]; + { int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int ifgcolor = source[sample_index]; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -92,7 +131,6 @@ namespace swrenderer outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } else @@ -123,12 +161,17 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; // Sample + unsigned int ifgcolor[2]; + { unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; @@ -149,8 +192,80 @@ namespace swrenderer unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -160,7 +275,6 @@ namespace swrenderer outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } } @@ -201,23 +315,82 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; // Sample + unsigned int ifgcolor[2]; + { int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int ifgcolor = source[sample_index]; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - int blue0 = BPART(ifgcolor); - int green0 = GPART(ifgcolor); - int red0 = RPART(ifgcolor); - + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); fgcolor = _mm_mullo_epi16(fgcolor, mlight); @@ -229,7 +402,6 @@ namespace swrenderer outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } else @@ -265,12 +437,17 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; // Sample + unsigned int ifgcolor[2]; + { unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; @@ -291,16 +468,108 @@ namespace swrenderer unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - int blue0 = BPART(ifgcolor); - int green0 = GPART(ifgcolor); - int red0 = RPART(ifgcolor); - + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); fgcolor = _mm_mullo_epi16(fgcolor, mlight); @@ -312,7 +581,6 @@ namespace swrenderer outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } } @@ -364,15 +632,63 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; int offset = index * pitch; __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int ifgcolor = source[sample_index]; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -389,7 +705,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } else @@ -420,12 +735,18 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; + { unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; @@ -446,8 +767,88 @@ namespace swrenderer unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -464,7 +865,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } } @@ -505,23 +905,91 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; int offset = index * pitch; __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int ifgcolor = source[sample_index]; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - int blue0 = BPART(ifgcolor); - int green0 = GPART(ifgcolor); - int red0 = RPART(ifgcolor); - + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); fgcolor = _mm_mullo_epi16(fgcolor, mlight); @@ -540,7 +1008,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } else @@ -576,12 +1043,18 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; + { unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; @@ -602,16 +1075,116 @@ namespace swrenderer unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - int blue0 = BPART(ifgcolor); - int green0 = GPART(ifgcolor); - int red0 = RPART(ifgcolor); - + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); fgcolor = _mm_mullo_epi16(fgcolor, mlight); @@ -630,7 +1203,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } } @@ -682,15 +1254,78 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; int offset = index * pitch; __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int ifgcolor = source[sample_index]; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -722,7 +1357,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } else @@ -753,12 +1387,18 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; + { unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; @@ -779,8 +1419,103 @@ namespace swrenderer unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -812,7 +1547,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } } @@ -853,23 +1587,106 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; int offset = index * pitch; __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int ifgcolor = source[sample_index]; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - int blue0 = BPART(ifgcolor); - int green0 = GPART(ifgcolor); - int red0 = RPART(ifgcolor); - + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); fgcolor = _mm_mullo_epi16(fgcolor, mlight); @@ -903,7 +1720,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } else @@ -939,12 +1755,18 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; + { unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; @@ -965,16 +1787,131 @@ namespace swrenderer unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - int blue0 = BPART(ifgcolor); - int green0 = GPART(ifgcolor); - int red0 = RPART(ifgcolor); - + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); fgcolor = _mm_mullo_epi16(fgcolor, mlight); @@ -1008,7 +1945,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } } @@ -1060,15 +1996,78 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; int offset = index * pitch; __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int ifgcolor = source[sample_index]; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -1100,7 +2099,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } else @@ -1131,12 +2129,18 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; + { unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; @@ -1157,8 +2161,103 @@ namespace swrenderer unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -1190,7 +2289,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } } @@ -1231,23 +2329,106 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; int offset = index * pitch; __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int ifgcolor = source[sample_index]; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - int blue0 = BPART(ifgcolor); - int green0 = GPART(ifgcolor); - int red0 = RPART(ifgcolor); - + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); fgcolor = _mm_mullo_epi16(fgcolor, mlight); @@ -1281,7 +2462,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } else @@ -1317,12 +2497,18 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; + { unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; @@ -1343,16 +2529,131 @@ namespace swrenderer unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - int blue0 = BPART(ifgcolor); - int green0 = GPART(ifgcolor); - int red0 = RPART(ifgcolor); - + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); fgcolor = _mm_mullo_epi16(fgcolor, mlight); @@ -1386,7 +2687,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } } @@ -1438,15 +2738,78 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; int offset = index * pitch; __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int ifgcolor = source[sample_index]; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -1478,7 +2841,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } else @@ -1509,12 +2871,18 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; + { unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; @@ -1535,8 +2903,103 @@ namespace swrenderer unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -1568,7 +3031,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } } @@ -1609,23 +3071,106 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; int offset = index * pitch; __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int ifgcolor = source[sample_index]; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - int blue0 = BPART(ifgcolor); - int green0 = GPART(ifgcolor); - int red0 = RPART(ifgcolor); - + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); fgcolor = _mm_mullo_epi16(fgcolor, mlight); @@ -1659,7 +3204,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } else @@ -1695,12 +3239,18 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample + unsigned int ifgcolor[2]; + { unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; @@ -1721,16 +3271,131 @@ namespace swrenderer unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - int blue0 = BPART(ifgcolor); - int green0 = GPART(ifgcolor); - int red0 = RPART(ifgcolor); - + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); fgcolor = _mm_mullo_epi16(fgcolor, mlight); @@ -1764,7 +3429,6 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } } } diff --git a/src/swrenderer/drawers/r_draw_wall32.php b/src/swrenderer/drawers/r_draw_wall32.php index d51c52455d..81abbfe693 100644 --- a/src/swrenderer/drawers/r_draw_wall32.php +++ b/src/swrenderer/drawers/r_draw_wall32.php @@ -123,13 +123,56 @@ namespace swrenderer __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); - for (int index = 0; index < count; index++) + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) { - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + // Sample + unsigned int ifgcolor[2]; + { + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + + // Blend + + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + + // Sample + unsigned int ifgcolor[2]; + + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade @@ -138,7 +181,6 @@ namespace swrenderer dest[offset] = _mm_cvtsi128_si32(outcolor); - frac += fracstep; } int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int ifgcolor = source[sample_index]; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = source[sample_index]; @@ -172,8 +213,7 @@ namespace swrenderer unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int ifgcolor = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - int blue0 = BPART(ifgcolor); - int green0 = GPART(ifgcolor); - int red0 = RPART(ifgcolor); - + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); fgcolor = _mm_mullo_epi16(fgcolor, mlight); From 2bedfca071341b2b303c9b93438db0981d2fc61d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Feb 2017 00:49:46 +0100 Subject: [PATCH 861/912] Add php script for sprite drawers --- src/swrenderer/drawers/r_draw_rgba.cpp | 1 + src/swrenderer/drawers/r_draw_sprite32.h | 5515 ++++++++++++++++++++ src/swrenderer/drawers/r_draw_sprite32.php | 363 ++ 3 files changed, 5879 insertions(+) create mode 100644 src/swrenderer/drawers/r_draw_sprite32.h create mode 100644 src/swrenderer/drawers/r_draw_sprite32.php diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index b022636921..b61cdb7303 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -41,6 +41,7 @@ #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "r_draw_wall32.h" +#include "r_draw_sprite32.h" #include "gi.h" #include "stats.h" diff --git a/src/swrenderer/drawers/r_draw_sprite32.h b/src/swrenderer/drawers/r_draw_sprite32.h new file mode 100644 index 0000000000..bd23b550f2 --- /dev/null +++ b/src/swrenderer/drawers/r_draw_sprite32.h @@ -0,0 +1,5515 @@ +/* +** Drawer commands for sprites +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +/* + Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_walldrawer.h" + +namespace swrenderer +{ + class DrawSpriteCopy32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + DrawSpriteCopy32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + frac -= one / 2; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + frac -= one / 2; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + + FString DebugInfo() override { return "DrawSpriteCopy32Command"; } + }; + + class DrawSprite32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + DrawSprite32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + frac -= one / 2; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + frac -= one / 2; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + + FString DebugInfo() override { return "DrawSprite32Command"; } + }; + + class DrawSpriteAddClamp32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + DrawSpriteAddClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + frac -= one / 2; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + frac -= one / 2; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + + FString DebugInfo() override { return "DrawSpriteAddClamp32Command"; } + }; + + class DrawSpriteSubClamp32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + DrawSpriteSubClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + frac -= one / 2; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + frac -= one / 2; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + + FString DebugInfo() override { return "DrawSpriteSubClamp32Command"; } + }; + + class DrawSpriteRevSubClamp32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + DrawSpriteRevSubClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + frac -= one / 2; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + frac -= one / 2; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + + FString DebugInfo() override { return "DrawSpriteRevSubClamp32Command"; } + }; + + class FillSprite32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + FillSprite32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = srccolor; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = srccolor; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + + FString DebugInfo() override { return "FillSprite32Command"; } + }; + + class FillSpriteAddClamp32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + FillSpriteAddClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = srccolor; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = srccolor; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + + FString DebugInfo() override { return "FillSpriteAddClamp32Command"; } + }; + + class FillSpriteSubClamp32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + FillSpriteSubClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = srccolor; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = srccolor; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + + FString DebugInfo() override { return "FillSpriteSubClamp32Command"; } + }; + + class FillSpriteRevSubClamp32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + FillSpriteRevSubClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = srccolor; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = srccolor; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = srccolor; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + + FString DebugInfo() override { return "FillSpriteRevSubClamp32Command"; } + }; + + class DrawSpriteShaded32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + DrawSpriteShaded32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = color; + unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; + samplealphaout = clamp(samplealphaout, 0, 64) * 4; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = color; + unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; + samplealphaout = clamp(samplealphaout, 0, 64) * 4; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + int shadealpha = 256; // To do: this comes from a sampled source (samplealphaout) + __m128i alpha = _mm_set1_epi16(shadealpha); + __m128i inv_alpha = _mm_set1_epi16(256 - shadealpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = color; + unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; + samplealphaout = clamp(samplealphaout, 0, 64) * 4; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + int shadealpha = 256; // To do: this comes from a sampled source (samplealphaout) + __m128i alpha = _mm_set1_epi16(shadealpha); + __m128i inv_alpha = _mm_set1_epi16(256 - shadealpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = color; + unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; + samplealphaout = clamp(samplealphaout, 0, 64) * 4; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = color; + unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; + samplealphaout = clamp(samplealphaout, 0, 64) * 4; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + int shadealpha = 256; // To do: this comes from a sampled source (samplealphaout) + __m128i alpha = _mm_set1_epi16(shadealpha); + __m128i inv_alpha = _mm_set1_epi16(256 - shadealpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = color; + unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; + samplealphaout = clamp(samplealphaout, 0, 64) * 4; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + int shadealpha = 256; // To do: this comes from a sampled source (samplealphaout) + __m128i alpha = _mm_set1_epi16(shadealpha); + __m128i inv_alpha = _mm_set1_epi16(256 - shadealpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + + FString DebugInfo() override { return "DrawSpriteShaded32Command"; } + }; + + class DrawSpriteTranslated32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + DrawSpriteTranslated32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + + FString DebugInfo() override { return "DrawSpriteTranslated32Command"; } + }; + + class DrawSpriteTranslatedAddClamp32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + DrawSpriteTranslatedAddClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + + FString DebugInfo() override { return "DrawSpriteTranslatedAddClamp32Command"; } + }; + + class DrawSpriteTranslatedSubClamp32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + DrawSpriteTranslatedSubClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + + FString DebugInfo() override { return "DrawSpriteTranslatedSubClamp32Command"; } + }; + + class DrawSpriteTranslatedRevSubClamp32Command : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + DrawSpriteTranslatedRevSubClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + + FString DebugInfo() override { return "DrawSpriteTranslatedRevSubClamp32Command"; } + }; + +} diff --git a/src/swrenderer/drawers/r_draw_sprite32.php b/src/swrenderer/drawers/r_draw_sprite32.php new file mode 100644 index 0000000000..9d4721de67 --- /dev/null +++ b/src/swrenderer/drawers/r_draw_sprite32.php @@ -0,0 +1,363 @@ +#!/usr/bin/php +/* +** Drawer commands for sprites +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +/* + Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_walldrawer.h" + +namespace swrenderer +{ + + class : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + (const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + + } + else + { + + } + } + + FString DebugInfo() override { return ""; } + }; + + + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *colormap = args.Colormap(); + const uint32_t *translation = (const uint32_t*)args.TranslationMap(); + + bool is_nearest_filter = (source2 == nullptr); + if (is_nearest_filter) + { + + } + else + { + + } + + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + frac -= one / 2; + + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); + __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = args.SolidColor(); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + + + // Sample + unsigned int ifgcolor[2]; + { + + ifgcolor[0] = sampleout; + frac += fracstep; + } + { + + ifgcolor[1] = sampleout; + frac += fracstep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + + // Blend + + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + + // Sample + unsigned int ifgcolor[2]; + + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + + // Blend + + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + + unsigned int sampleout = color; + unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; + samplealphaout = clamp(samplealphaout, 0, 64) * 4; + + unsigned int sampleout = translation[source[frac >> FRACBITS]]; + + unsigned int sampleout = srccolor; + + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + unsigned int sampleout = source[sample_index]; + + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + int shadealpha = 256; // To do: this comes from a sampled source (samplealphaout) + __m128i alpha = _mm_set1_epi16(shadealpha); + __m128i inv_alpha = _mm_set1_epi16(256 - shadealpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + + __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + + __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); + __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); + __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + + __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); + bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + + __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); + fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + +} From dfcfd0462fd0ad33ab6a0c60c8d3514681d2b365 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Feb 2017 01:23:16 +0100 Subject: [PATCH 862/912] Hook up sprite php drawers (but disable them for now as they don't fully work yet) --- src/CMakeLists.txt | 1 + src/swrenderer/drawers/r_draw_rgba.cpp | 136 ++++++++++++++++++++++++- src/swrenderer/drawers/r_draw_rgba.h | 34 +++---- src/swrenderer/things/r_sprite.cpp | 6 +- 4 files changed, 156 insertions(+), 21 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6d656086b5..c7a9eafd80 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -805,6 +805,7 @@ file( GLOB HEADER_FILES xlat/*.h swrenderer/*.h swrenderer/drawers/*.h + swrenderer/drawers/*.php swrenderer/scene/*.h swrenderer/segments/*.h swrenderer/line/*.h diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index b61cdb7303..d2baacaaea 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -60,7 +60,7 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Level of detail texture bias CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG when a good default has been decided -CVAR(Bool, r_phpdrawers, true, 0); +CVAR(Bool, r_phpdrawers, false, 0); namespace swrenderer { @@ -112,6 +112,140 @@ namespace swrenderer Queue->Push(args); } + void SWTruecolorDrawers::DrawColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::FillColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::FillAddColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::FillAddClampColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::FillSubClampColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::FillRevSubClampColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawFuzzColumn(const SpriteDrawerArgs &args) + { + Queue->Push(args); + R_UpdateFuzzPos(args); + } + + void SWTruecolorDrawers::DrawAddColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawTranslatedColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawTranslatedAddColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawShadedColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawAddClampColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawSubClampColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawRevSubClampColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs) { auto shade_constants = drawerargs.ColormapConstants(); diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 7bff1cffaf..383ba13a56 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -367,23 +367,23 @@ namespace swrenderer void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override; void DrawSingleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } - void DrawColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void FillColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void FillAddColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void FillAddClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void FillSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void FillRevSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void DrawFuzzColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); R_UpdateFuzzPos(args); } - void DrawAddColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void DrawTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void DrawTranslatedAddColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void DrawShadedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void DrawAddClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void DrawSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void DrawRevSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } - void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } + void DrawColumn(const SpriteDrawerArgs &args) override; + void FillColumn(const SpriteDrawerArgs &args) override; + void FillAddColumn(const SpriteDrawerArgs &args) override; + void FillAddClampColumn(const SpriteDrawerArgs &args) override; + void FillSubClampColumn(const SpriteDrawerArgs &args) override; + void FillRevSubClampColumn(const SpriteDrawerArgs &args) override; + void DrawFuzzColumn(const SpriteDrawerArgs &args) override; + void DrawAddColumn(const SpriteDrawerArgs &args) override; + void DrawTranslatedColumn(const SpriteDrawerArgs &args) override; + void DrawTranslatedAddColumn(const SpriteDrawerArgs &args) override; + void DrawShadedColumn(const SpriteDrawerArgs &args) override; + void DrawAddClampColumn(const SpriteDrawerArgs &args) override; + void DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args) override; + void DrawSubClampColumn(const SpriteDrawerArgs &args) override; + void DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) override; + void DrawRevSubClampColumn(const SpriteDrawerArgs &args) override; + void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) override; void DrawSpan(const SpanDrawerArgs &args) override { Queue->Push(args); } void DrawSpanMasked(const SpanDrawerArgs &args) override { Queue->Push(args); } void DrawSpanTranslucent(const SpanDrawerArgs &args) override { Queue->Push(args); } diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 652bfcef27..8e1d85f585 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -242,9 +242,9 @@ namespace swrenderer double lightY = light->Y() - ViewPos.Y; double lightZ = light->Z() - ViewPos.Z; - float lx = (float)(lightX * ViewSin - lightY * ViewCos) - pos.X; - float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin) - pos.Y; - float lz = (float)lightZ - pos.Z; + float lx = (float)(lightX * ViewSin - lightY * ViewCos - pos.X); + float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin - pos.Y); + float lz = (float)(lightZ - pos.Z); bool is_point_light = (node->lightsource->flags4 & MF4_ATTENUATE) != 0; float LdotL = lx * lx + ly * ly + lz * lz; From ee3bcb6f78df168ef2be24a7894f6aeec11d6746 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Feb 2017 03:51:49 +0100 Subject: [PATCH 863/912] Fix sampling --- src/swrenderer/drawers/r_draw_sprite32.h | 210 ++++++++++++--------- src/swrenderer/drawers/r_draw_sprite32.php | 7 +- 2 files changed, 124 insertions(+), 93 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_sprite32.h b/src/swrenderer/drawers/r_draw_sprite32.h index bd23b550f2..43802f795c 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.h +++ b/src/swrenderer/drawers/r_draw_sprite32.h @@ -89,13 +89,13 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; frac += fracstep; } { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[1] = sampleout; frac += fracstep; @@ -120,7 +120,7 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; ifgcolor[1] = 0; @@ -176,8 +176,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -201,8 +202,9 @@ namespace swrenderer frac += fracstep; } { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -245,8 +247,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -332,13 +335,13 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; frac += fracstep; } { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[1] = sampleout; frac += fracstep; @@ -363,7 +366,7 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; ifgcolor[1] = 0; @@ -424,8 +427,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -449,8 +453,9 @@ namespace swrenderer frac += fracstep; } { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -493,8 +498,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -591,13 +597,13 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; frac += fracstep; } { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[1] = sampleout; frac += fracstep; @@ -623,7 +629,7 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; ifgcolor[1] = 0; @@ -680,8 +686,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -705,8 +712,9 @@ namespace swrenderer frac += fracstep; } { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -750,8 +758,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -838,13 +847,13 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; frac += fracstep; } { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[1] = sampleout; frac += fracstep; @@ -870,7 +879,7 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; ifgcolor[1] = 0; @@ -932,8 +941,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -957,8 +967,9 @@ namespace swrenderer frac += fracstep; } { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -1002,8 +1013,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -1102,13 +1114,13 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; frac += fracstep; } { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[1] = sampleout; frac += fracstep; @@ -1157,7 +1169,7 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; ifgcolor[1] = 0; @@ -1237,8 +1249,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -1262,8 +1275,9 @@ namespace swrenderer frac += fracstep; } { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -1330,8 +1344,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -1441,13 +1456,13 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; frac += fracstep; } { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[1] = sampleout; frac += fracstep; @@ -1496,7 +1511,7 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; ifgcolor[1] = 0; @@ -1581,8 +1596,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -1606,8 +1622,9 @@ namespace swrenderer frac += fracstep; } { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -1674,8 +1691,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -1796,13 +1814,13 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; frac += fracstep; } { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[1] = sampleout; frac += fracstep; @@ -1851,7 +1869,7 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; ifgcolor[1] = 0; @@ -1931,8 +1949,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -1956,8 +1975,9 @@ namespace swrenderer frac += fracstep; } { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -2024,8 +2044,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -2135,13 +2156,13 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; frac += fracstep; } { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[1] = sampleout; frac += fracstep; @@ -2190,7 +2211,7 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; ifgcolor[1] = 0; @@ -2275,8 +2296,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -2300,8 +2322,9 @@ namespace swrenderer frac += fracstep; } { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -2368,8 +2391,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -2490,13 +2514,13 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; frac += fracstep; } { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[1] = sampleout; frac += fracstep; @@ -2545,7 +2569,7 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; ifgcolor[1] = 0; @@ -2625,8 +2649,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -2650,8 +2675,9 @@ namespace swrenderer frac += fracstep; } { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -2718,8 +2744,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -2829,13 +2856,13 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; frac += fracstep; } { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[1] = sampleout; frac += fracstep; @@ -2884,7 +2911,7 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; ifgcolor[0] = sampleout; ifgcolor[1] = 0; @@ -2969,8 +2996,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -2994,8 +3022,9 @@ namespace swrenderer frac += fracstep; } { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; @@ -3062,8 +3091,9 @@ namespace swrenderer // Sample unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; diff --git a/src/swrenderer/drawers/r_draw_sprite32.php b/src/swrenderer/drawers/r_draw_sprite32.php index 9d4721de67..376b9ea4da 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.php +++ b/src/swrenderer/drawers/r_draw_sprite32.php @@ -225,13 +225,14 @@ namespace swrenderer - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int y0 = frac_y0 >> FRACBITS; unsigned int y1 = frac_y1 >> FRACBITS; From c2eed19b5107751e3c324875e7163b62dad0330e Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Wed, 15 Feb 2017 10:10:54 -0500 Subject: [PATCH 864/912] - fixed: Change compiled exe version strings to match git repo numbers. This is viewable in Windows by right-clicking on the executable and selecting "Properties". --- src/version.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index ca7de97fef..b9f9a1327c 100644 --- a/src/version.h +++ b/src/version.h @@ -41,12 +41,16 @@ const char *GetVersionString(); /** Lots of different version numbers **/ +#ifdef GIT_DESCRIPTION +#define VERSIONSTR GIT_DESCRIPTION +#else #define VERSIONSTR "2.3pre" +#endif // The version as seen in the Windows resource #define RC_FILEVERSION 2,3,9999,0 #define RC_PRODUCTVERSION 2,3,9999,0 -#define RC_PRODUCTVERSION2 "2.3pre" +#define RC_PRODUCTVERSION2 VERSIONSTR // Version identifier for network games. // Bump it every time you do a release unless you're certain you From fe854fb71f4d146509a6a2ca4ec8ff3814993ce1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 20 Feb 2017 07:09:30 +0100 Subject: [PATCH 865/912] Fix php drawer blend modes --- src/swrenderer/drawers/r_draw_sprite32.h | 2786 +++++++++++++------- src/swrenderer/drawers/r_draw_sprite32.php | 140 +- src/swrenderer/drawers/r_draw_wall32.h | 832 +++--- src/swrenderer/drawers/r_draw_wall32.php | 101 +- 4 files changed, 2528 insertions(+), 1331 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_sprite32.h b/src/swrenderer/drawers/r_draw_sprite32.h index 43802f795c..e3baa6c08e 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.h +++ b/src/swrenderer/drawers/r_draw_sprite32.h @@ -46,8 +46,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); bool is_nearest_filter = (source2 == nullptr); if (is_nearest_filter) { @@ -73,10 +71,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -87,17 +85,21 @@ namespace swrenderer desttmp[1] = dest[offset + pitch]; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -119,11 +121,14 @@ namespace swrenderer int offset = index * pitch; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade @@ -160,10 +165,10 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -174,7 +179,7 @@ namespace swrenderer desttmp[1] = dest[offset + pitch]; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -198,7 +203,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { @@ -224,7 +231,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -246,7 +255,7 @@ namespace swrenderer int offset = index * pitch; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -269,8 +278,11 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade @@ -287,8 +299,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); bool is_nearest_filter = (source2 == nullptr); if (is_nearest_filter) { @@ -319,10 +329,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -333,17 +343,21 @@ namespace swrenderer desttmp[1] = dest[offset + pitch]; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -365,11 +379,14 @@ namespace swrenderer int offset = index * pitch; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade @@ -411,10 +428,10 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -425,7 +442,7 @@ namespace swrenderer desttmp[1] = dest[offset + pitch]; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -449,7 +466,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { @@ -475,7 +494,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -497,7 +518,7 @@ namespace swrenderer int offset = index * pitch; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -520,8 +541,11 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade @@ -554,8 +578,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); bool is_nearest_filter = (source2 == nullptr); if (is_nearest_filter) { @@ -581,10 +603,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -595,17 +617,21 @@ namespace swrenderer desttmp[1] = dest[offset + pitch]; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -628,11 +654,14 @@ namespace swrenderer int offset = index * pitch; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade @@ -670,10 +699,10 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -684,7 +713,7 @@ namespace swrenderer desttmp[1] = dest[offset + pitch]; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -708,7 +737,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { @@ -734,7 +765,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -757,7 +790,7 @@ namespace swrenderer int offset = index * pitch; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -780,8 +813,11 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade @@ -799,8 +835,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); bool is_nearest_filter = (source2 == nullptr); if (is_nearest_filter) { @@ -831,10 +865,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -845,23 +879,42 @@ namespace swrenderer desttmp[1] = dest[offset + pitch]; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend __m128i outcolor = fgcolor; @@ -878,15 +931,33 @@ namespace swrenderer int offset = index * pitch; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend __m128i outcolor = fgcolor; @@ -925,10 +996,10 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -939,7 +1010,7 @@ namespace swrenderer desttmp[1] = dest[offset + pitch]; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -963,7 +1034,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { @@ -989,13 +1062,30 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend __m128i outcolor = fgcolor; @@ -1012,7 +1102,7 @@ namespace swrenderer int offset = index * pitch; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -1035,12 +1125,30 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend __m128i outcolor = fgcolor; @@ -1070,8 +1178,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); bool is_nearest_filter = (source2 == nullptr); if (is_nearest_filter) { @@ -1097,10 +1203,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -1112,17 +1218,21 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -1131,16 +1241,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1149,10 +1263,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1168,27 +1285,34 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1197,10 +1321,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1232,10 +1359,10 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -1247,7 +1374,7 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -1271,7 +1398,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { @@ -1297,7 +1426,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -1306,16 +1437,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1324,10 +1459,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1343,7 +1481,7 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -1366,24 +1504,31 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1392,10 +1537,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1407,8 +1555,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); bool is_nearest_filter = (source2 == nullptr); if (is_nearest_filter) { @@ -1439,10 +1585,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -1454,35 +1600,58 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1491,10 +1660,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1510,27 +1682,49 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1539,10 +1733,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1579,10 +1776,10 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -1594,7 +1791,7 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -1618,7 +1815,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { @@ -1644,25 +1843,46 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1671,10 +1891,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1690,7 +1913,7 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -1713,24 +1936,46 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1739,10 +1984,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1770,8 +2018,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); bool is_nearest_filter = (source2 == nullptr); if (is_nearest_filter) { @@ -1797,10 +2043,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -1812,17 +2058,21 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -1831,16 +2081,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1849,10 +2103,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1868,27 +2125,34 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1897,10 +2161,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1932,10 +2199,10 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -1947,7 +2214,7 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -1971,7 +2238,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { @@ -1997,7 +2266,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -2006,16 +2277,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2024,10 +2299,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2043,7 +2321,7 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -2066,24 +2344,31 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2092,10 +2377,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2107,8 +2395,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); bool is_nearest_filter = (source2 == nullptr); if (is_nearest_filter) { @@ -2139,10 +2425,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -2154,35 +2440,58 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2191,10 +2500,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2210,27 +2522,49 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2239,10 +2573,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2279,10 +2616,10 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -2294,7 +2631,7 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -2318,7 +2655,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { @@ -2344,25 +2683,46 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2371,10 +2731,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2390,7 +2753,7 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -2413,24 +2776,46 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2439,10 +2824,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2470,8 +2858,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); bool is_nearest_filter = (source2 == nullptr); if (is_nearest_filter) { @@ -2497,10 +2883,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -2512,17 +2898,21 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -2531,16 +2921,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2549,10 +2943,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2568,27 +2965,34 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2597,10 +3001,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2632,10 +3039,10 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -2647,7 +3054,7 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -2671,7 +3078,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { @@ -2697,7 +3106,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -2706,16 +3117,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2724,10 +3139,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2743,7 +3161,7 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -2766,24 +3184,31 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2792,10 +3217,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2807,8 +3235,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); bool is_nearest_filter = (source2 == nullptr); if (is_nearest_filter) { @@ -2839,10 +3265,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -2854,35 +3280,58 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2891,10 +3340,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2910,27 +3362,49 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2939,10 +3413,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2979,10 +3456,10 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -2994,7 +3471,7 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -3018,7 +3495,9 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { @@ -3044,25 +3523,46 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3071,10 +3571,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3090,7 +3593,7 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; // Clamp to edge unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; @@ -3113,24 +3616,46 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3139,10 +3664,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3170,8 +3698,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; @@ -3194,10 +3720,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -3208,15 +3734,19 @@ namespace swrenderer desttmp[1] = dest[offset + pitch]; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -3239,10 +3769,13 @@ namespace swrenderer int offset = index * pitch; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade @@ -3259,8 +3792,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; @@ -3288,10 +3819,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -3302,21 +3833,40 @@ namespace swrenderer desttmp[1] = dest[offset + pitch]; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend __m128i outcolor = fgcolor; @@ -3333,14 +3883,32 @@ namespace swrenderer int offset = index * pitch; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend __m128i outcolor = fgcolor; @@ -3369,8 +3937,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; @@ -3393,10 +3959,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -3408,15 +3974,19 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -3425,16 +3995,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3443,10 +4017,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3462,26 +4039,33 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3490,10 +4074,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3504,8 +4091,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; @@ -3533,10 +4118,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -3548,33 +4133,56 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3583,10 +4191,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3602,26 +4213,48 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3630,10 +4263,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3660,8 +4296,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; @@ -3684,10 +4318,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -3699,15 +4333,19 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -3716,16 +4354,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3734,10 +4376,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3753,26 +4398,33 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3781,10 +4433,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3795,8 +4450,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; @@ -3824,10 +4477,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -3839,33 +4492,56 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3874,10 +4550,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3893,26 +4572,48 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3921,10 +4622,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3951,8 +4655,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; @@ -3975,10 +4677,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -3990,15 +4692,19 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -4007,16 +4713,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -4025,10 +4735,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -4044,26 +4757,33 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -4072,10 +4792,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -4086,8 +4809,6 @@ namespace swrenderer { const uint32_t *source = (const uint32_t*)args.TexturePixels(); const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; @@ -4115,10 +4836,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -4130,33 +4851,56 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -4165,10 +4909,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -4184,26 +4931,48 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -4212,10 +4981,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -4240,8 +5012,7 @@ namespace swrenderer auto shade_constants = args.ColormapConstants(); if (shade_constants.simple_shade) { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *source = args.TexturePixels(); const uint8_t *colormap = args.Colormap(); const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); @@ -4266,10 +5037,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -4281,30 +5052,30 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = color; - unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; - samplealphaout = clamp(samplealphaout, 0, 64) * 4; + unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; + sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = color; - unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; - samplealphaout = clamp(samplealphaout, 0, 64) * 4; + unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; + sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - int shadealpha = 256; // To do: this comes from a sampled source (samplealphaout) - __m128i alpha = _mm_set1_epi16(shadealpha); - __m128i inv_alpha = _mm_set1_epi16(256 - shadealpha); + __m128i alpha = _mm_set_epi16(ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[0], ifgshade[0], ifgshade[0], ifgshade[0]); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); @@ -4324,21 +5095,21 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = color; - unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; - samplealphaout = clamp(samplealphaout, 0, 64) * 4; + unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; + sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - int shadealpha = 256; // To do: this comes from a sampled source (samplealphaout) - __m128i alpha = _mm_set1_epi16(shadealpha); - __m128i inv_alpha = _mm_set1_epi16(256 - shadealpha); + __m128i alpha = _mm_set_epi16(ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[0], ifgshade[0], ifgshade[0], ifgshade[0]); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); @@ -4351,8 +5122,7 @@ namespace swrenderer } else { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *source = args.TexturePixels(); const uint8_t *colormap = args.Colormap(); const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); @@ -4382,10 +5152,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -4397,30 +5167,30 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = color; - unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; - samplealphaout = clamp(samplealphaout, 0, 64) * 4; + unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; + sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = color; - unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; - samplealphaout = clamp(samplealphaout, 0, 64) * 4; + unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; + sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - int shadealpha = 256; // To do: this comes from a sampled source (samplealphaout) - __m128i alpha = _mm_set1_epi16(shadealpha); - __m128i inv_alpha = _mm_set1_epi16(256 - shadealpha); + __m128i alpha = _mm_set_epi16(ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[0], ifgshade[0], ifgshade[0], ifgshade[0]); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); @@ -4440,21 +5210,21 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = color; - unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; - samplealphaout = clamp(samplealphaout, 0, 64) * 4; + unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; + sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - int shadealpha = 256; // To do: this comes from a sampled source (samplealphaout) - __m128i alpha = _mm_set1_epi16(shadealpha); - __m128i inv_alpha = _mm_set1_epi16(256 - shadealpha); + __m128i alpha = _mm_set_epi16(ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[0], ifgshade[0], ifgshade[0], ifgshade[0]); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); @@ -4483,8 +5253,7 @@ namespace swrenderer auto shade_constants = args.ColormapConstants(); if (shade_constants.simple_shade) { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *source = args.TexturePixels(); const uint8_t *colormap = args.Colormap(); const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); @@ -4509,10 +5278,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -4523,15 +5292,19 @@ namespace swrenderer desttmp[1] = dest[offset + pitch]; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -4554,10 +5327,13 @@ namespace swrenderer int offset = index * pitch; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade @@ -4572,8 +5348,7 @@ namespace swrenderer } else { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *source = args.TexturePixels(); const uint8_t *colormap = args.Colormap(); const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); @@ -4603,10 +5378,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -4617,21 +5392,40 @@ namespace swrenderer desttmp[1] = dest[offset + pitch]; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend __m128i outcolor = fgcolor; @@ -4648,14 +5442,32 @@ namespace swrenderer int offset = index * pitch; // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend __m128i outcolor = fgcolor; @@ -4682,8 +5494,7 @@ namespace swrenderer auto shade_constants = args.ColormapConstants(); if (shade_constants.simple_shade) { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *source = args.TexturePixels(); const uint8_t *colormap = args.Colormap(); const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); @@ -4708,10 +5519,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -4723,15 +5534,19 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -4740,16 +5555,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -4758,10 +5577,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -4777,26 +5599,33 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -4805,10 +5634,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -4817,8 +5649,7 @@ namespace swrenderer } else { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *source = args.TexturePixels(); const uint8_t *colormap = args.Colormap(); const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); @@ -4848,10 +5679,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -4863,33 +5694,56 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -4898,10 +5752,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -4917,26 +5774,48 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -4945,10 +5824,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -4973,8 +5855,7 @@ namespace swrenderer auto shade_constants = args.ColormapConstants(); if (shade_constants.simple_shade) { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *source = args.TexturePixels(); const uint8_t *colormap = args.Colormap(); const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); @@ -4999,10 +5880,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -5014,15 +5895,19 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -5031,16 +5916,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -5049,10 +5938,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -5068,26 +5960,33 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -5096,10 +5995,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -5108,8 +6010,7 @@ namespace swrenderer } else { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *source = args.TexturePixels(); const uint8_t *colormap = args.Colormap(); const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); @@ -5139,10 +6040,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -5154,33 +6055,56 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -5189,10 +6113,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -5208,26 +6135,48 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -5236,10 +6185,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -5264,8 +6216,7 @@ namespace swrenderer auto shade_constants = args.ColormapConstants(); if (shade_constants.simple_shade) { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *source = args.TexturePixels(); const uint8_t *colormap = args.Colormap(); const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); @@ -5290,10 +6241,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -5305,15 +6256,19 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); @@ -5322,16 +6277,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -5340,10 +6299,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -5359,26 +6321,33 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -5387,10 +6356,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -5399,8 +6371,7 @@ namespace swrenderer } else { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + const uint8_t *source = args.TexturePixels(); const uint8_t *colormap = args.Colormap(); const uint32_t *translation = (const uint32_t*)args.TranslationMap(); int textureheight = args.TextureHeight(); @@ -5430,10 +6401,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -5445,33 +6416,56 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -5480,10 +6474,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -5499,26 +6496,48 @@ namespace swrenderer __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -5527,10 +6546,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); diff --git a/src/swrenderer/drawers/r_draw_sprite32.php b/src/swrenderer/drawers/r_draw_sprite32.php index 376b9ea4da..483c867627 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.php +++ b/src/swrenderer/drawers/r_draw_sprite32.php @@ -82,12 +82,20 @@ namespace swrenderer } function LoopShade($blendVariant, $sampleVariant, $isSimpleShade) - { ?> - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + { + if ($sampleVariant == "shaded" || $sampleVariant == "translated") + { ?> + const uint8_t *source = args.TexturePixels(); const uint8_t *colormap = args.Colormap(); const uint32_t *translation = (const uint32_t*)args.TranslationMap(); - + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); if (is_nearest_filter) @@ -141,10 +149,10 @@ namespace swrenderer { ?> frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = args.SolidColor(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -158,21 +166,23 @@ namespace swrenderer // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; { ifgcolor[0] = sampleout; + ifgshade[0] = sampleshadeout; frac += fracstep; } { ifgcolor[1] = sampleout; + ifgshade[1] = sampleshadeout; frac += fracstep; } __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - + // Blend @@ -191,14 +201,16 @@ namespace swrenderer // Sample - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; ifgcolor[0] = sampleout; ifgcolor[1] = 0; + ifgshade[0] = sampleshadeout; + ifgshade[1] = 0; __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - + // Blend @@ -212,21 +224,24 @@ namespace swrenderer if ($sampleVariant == "shaded") { ?> unsigned int sampleout = color; - unsigned int samplealphaout = colormap[source[frac >> FRACBITS]]; - samplealphaout = clamp(samplealphaout, 0, 64) * 4; + unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; + sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; unsigned int sampleout = translation[source[frac >> FRACBITS]]; + unsigned int sampleshadeout = 0; unsigned int sampleout = srccolor; + unsigned int sampleshadeout = 0; int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; unsigned int sampleout = source[sample_index]; + unsigned int sampleshadeout = 0; @@ -252,11 +267,14 @@ namespace swrenderer unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + unsigned int sampleshadeout = 0; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -291,9 +309,8 @@ namespace swrenderer - int shadealpha = 256; // To do: this comes from a sampled source (samplealphaout) - __m128i alpha = _mm_set1_epi16(shadealpha); - __m128i inv_alpha = _mm_set1_epi16(256 - shadealpha); + __m128i alpha = _mm_set_epi16(ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[0], ifgshade[0], ifgshade[0], ifgshade[0]); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); @@ -301,55 +318,22 @@ namespace swrenderer outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + else + { ?> + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -358,7 +342,29 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); + + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + } diff --git a/src/swrenderer/drawers/r_draw_wall32.h b/src/swrenderer/drawers/r_draw_wall32.h index 1e4da006cc..b6c446a531 100644 --- a/src/swrenderer/drawers/r_draw_wall32.h +++ b/src/swrenderer/drawers/r_draw_wall32.h @@ -71,8 +71,8 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -158,8 +158,8 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -312,8 +312,8 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -434,8 +434,8 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -629,8 +629,8 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -665,6 +665,7 @@ namespace swrenderer alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); @@ -698,6 +699,7 @@ namespace swrenderer alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); @@ -732,8 +734,8 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -806,6 +808,7 @@ namespace swrenderer alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); @@ -858,6 +861,7 @@ namespace swrenderer alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); @@ -902,8 +906,8 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -953,6 +957,7 @@ namespace swrenderer alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); @@ -1001,6 +1006,7 @@ namespace swrenderer alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); @@ -1040,8 +1046,8 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -1129,6 +1135,7 @@ namespace swrenderer alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); @@ -1196,6 +1203,7 @@ namespace swrenderer alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); @@ -1251,8 +1259,8 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -1283,16 +1291,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1301,10 +1313,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1331,16 +1346,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1349,10 +1368,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1384,8 +1406,8 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -1454,16 +1476,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1472,10 +1498,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1521,16 +1550,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1539,10 +1572,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1584,8 +1620,8 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -1631,16 +1667,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1649,10 +1689,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1694,16 +1737,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1712,10 +1759,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1752,8 +1802,8 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -1837,16 +1887,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1855,10 +1909,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1919,16 +1976,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -1937,10 +1998,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -1993,8 +2057,8 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -2025,16 +2089,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2043,10 +2111,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2073,16 +2144,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2091,10 +2166,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2126,8 +2204,8 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -2196,16 +2274,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2214,10 +2296,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2263,16 +2348,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2281,10 +2370,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2326,8 +2418,8 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -2373,16 +2465,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2391,10 +2487,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2436,16 +2535,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2454,10 +2557,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2494,8 +2600,8 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -2579,16 +2685,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2597,10 +2707,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2661,16 +2774,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2679,10 +2796,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2735,8 +2855,8 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -2767,16 +2887,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2785,10 +2909,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2815,16 +2942,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2833,10 +2964,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -2868,8 +3002,8 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -2938,16 +3072,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -2956,10 +3094,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3005,16 +3146,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3023,10 +3168,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3068,8 +3216,8 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -3115,16 +3263,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3133,10 +3285,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3178,16 +3333,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3196,10 +3355,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3236,8 +3398,8 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -3321,16 +3483,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3339,10 +3505,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); @@ -3403,16 +3572,20 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -3421,10 +3594,13 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); diff --git a/src/swrenderer/drawers/r_draw_wall32.php b/src/swrenderer/drawers/r_draw_wall32.php index 81abbfe693..a414ee7d79 100644 --- a/src/swrenderer/drawers/r_draw_wall32.php +++ b/src/swrenderer/drawers/r_draw_wall32.php @@ -120,8 +120,8 @@ namespace swrenderer { ?> frac -= one / 2; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); - __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); int ssecount = count / 2; for (int index = 0; index < ssecount; index++) @@ -252,63 +252,34 @@ namespace swrenderer outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + { ?> + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + fgcolor = _mm_mullo_epi16(fgcolor, alpha); bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - __m128i out_lo = _mm_srai_epi16(_mm_add_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_add_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(fg_lo, bg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(fg_hi, bg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - - __m128i out_lo = _mm_srai_epi16(_mm_sub_epi32(bg_lo, fg_lo), 8); - __m128i out_hi = _mm_srai_epi16(_mm_sub_epi32(bg_hi, fg_hi), 8); - __m128i outcolor = _mm_packs_epi32(fg_lo, fg_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - - __m128i bgalpha = _mm_mullo_epi16(destalpha, alpha); - bgalpha = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(bgalpha, _mm_slli_epi16(inv_alpha, 8)), _mm_set1_epi16(128)), 8); + else + { ?> + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; - __m128i fgalpha = _mm_mullo_epi16(srcalpha, alpha); - fgalpha = _mm_srli_epi16(_mm_add_epi16(fgalpha, _mm_set1_epi16(128)), 8); + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); @@ -317,7 +288,29 @@ namespace swrenderer __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); + + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + } From 5fa5b062d6b5b4edb1784fc388e41da418435e5f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 20 Feb 2017 22:49:52 +0100 Subject: [PATCH 866/912] Added php script for the span drawers --- src/swrenderer/drawers/r_draw_rgba.cpp | 49 + src/swrenderer/drawers/r_draw_rgba.h | 12 +- src/swrenderer/drawers/r_draw_span32.h | 5174 ++++++++++++++++++++++ src/swrenderer/drawers/r_draw_span32.php | 388 ++ 4 files changed, 5617 insertions(+), 6 deletions(-) create mode 100644 src/swrenderer/drawers/r_draw_span32.h create mode 100644 src/swrenderer/drawers/r_draw_span32.php diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index d2baacaaea..532aa09400 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -42,6 +42,7 @@ #include "swrenderer/scene/r_light.h" #include "r_draw_wall32.h" #include "r_draw_sprite32.h" +#include "r_draw_span32.h" #include "gi.h" #include "stats.h" @@ -246,6 +247,54 @@ namespace swrenderer Queue->Push(args); } + void SWTruecolorDrawers::DrawSpan(const SpanDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawSpanMasked(const SpanDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawSpanTranslucent(const SpanDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawSpanAddClamp(const SpanDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs) { auto shade_constants = drawerargs.ColormapConstants(); diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 383ba13a56..52452fab05 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -384,12 +384,12 @@ namespace swrenderer void DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) override; void DrawRevSubClampColumn(const SpriteDrawerArgs &args) override; void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) override; - void DrawSpan(const SpanDrawerArgs &args) override { Queue->Push(args); } - void DrawSpanMasked(const SpanDrawerArgs &args) override { Queue->Push(args); } - void DrawSpanTranslucent(const SpanDrawerArgs &args) override { Queue->Push(args); } - void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) override { Queue->Push(args); } - void DrawSpanAddClamp(const SpanDrawerArgs &args) override { Queue->Push(args); } - void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) override { Queue->Push(args); } + void DrawSpan(const SpanDrawerArgs &args) override; + void DrawSpanMasked(const SpanDrawerArgs &args) override; + void DrawSpanTranslucent(const SpanDrawerArgs &args) override; + void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) override; + void DrawSpanAddClamp(const SpanDrawerArgs &args) override; + void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) override; void FillSpan(const SpanDrawerArgs &args) override { Queue->Push(args); } void DrawTiltedSpan(const SpanDrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override diff --git a/src/swrenderer/drawers/r_draw_span32.h b/src/swrenderer/drawers/r_draw_span32.h new file mode 100644 index 0000000000..6f282e7bd5 --- /dev/null +++ b/src/swrenderer/drawers/r_draw_span32.h @@ -0,0 +1,5174 @@ +/* +** Drawer commands for spans +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +/* + Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_spandrawer.h" + +namespace swrenderer +{ + class DrawSpan32Command : public DrawerCommand + { + protected: + SpanDrawerArgs args; + + public: + DrawSpan32Command(const SpanDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(args.DestY())) return; + + uint32_t xbits = args.TextureWidthBits(); + uint32_t ybits = args.TextureHeightBits(); + uint32_t xstep = args.TextureUStep(); + uint32_t ystep = args.TextureVStep(); + uint32_t xfrac = args.TextureUPos(); + uint32_t yfrac = args.TextureVPos(); + uint32_t yshift = 32 - ybits; + uint32_t xshift = yshift - xbits; + uint32_t xmask = ((1 << xbits) - 1) << ybits; + + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + + double lod = args.TextureLOD(); + bool mipmapped = args.MipmappedTexture(); + + bool magnifying = lod < 0.0; + if (r_mipmap && mipmapped) + { + int level = (int)lod; + while (level > 0) + { + if (xbits <= 2 || ybits <= 2) + break; + + source += (1 << (xbits)) * (1 << (ybits)); + xbits -= 1; + ybits -= 1; + level--; + } + } + + bool is_nearest_filter = !((magnifying && r_magfilter) || (!magnifying && r_minfilter)); + + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + if (is_nearest_filter) + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + else + { + if (is_nearest_filter) + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + } + + FString DebugInfo() override { return "DrawSpan32Command"; } + }; + + class DrawSpanMasked32Command : public DrawerCommand + { + protected: + SpanDrawerArgs args; + + public: + DrawSpanMasked32Command(const SpanDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(args.DestY())) return; + + uint32_t xbits = args.TextureWidthBits(); + uint32_t ybits = args.TextureHeightBits(); + uint32_t xstep = args.TextureUStep(); + uint32_t ystep = args.TextureVStep(); + uint32_t xfrac = args.TextureUPos(); + uint32_t yfrac = args.TextureVPos(); + uint32_t yshift = 32 - ybits; + uint32_t xshift = yshift - xbits; + uint32_t xmask = ((1 << xbits) - 1) << ybits; + + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + + double lod = args.TextureLOD(); + bool mipmapped = args.MipmappedTexture(); + + bool magnifying = lod < 0.0; + if (r_mipmap && mipmapped) + { + int level = (int)lod; + while (level > 0) + { + if (xbits <= 2 || ybits <= 2) + break; + + source += (1 << (xbits)) * (1 << (ybits)); + xbits -= 1; + ybits -= 1; + level--; + } + } + + bool is_nearest_filter = !((magnifying && r_magfilter) || (!magnifying && r_minfilter)); + + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + if (is_nearest_filter) + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + else + { + if (is_nearest_filter) + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + } + + FString DebugInfo() override { return "DrawSpanMasked32Command"; } + }; + + class DrawSpanTranslucent32Command : public DrawerCommand + { + protected: + SpanDrawerArgs args; + + public: + DrawSpanTranslucent32Command(const SpanDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(args.DestY())) return; + + uint32_t xbits = args.TextureWidthBits(); + uint32_t ybits = args.TextureHeightBits(); + uint32_t xstep = args.TextureUStep(); + uint32_t ystep = args.TextureVStep(); + uint32_t xfrac = args.TextureUPos(); + uint32_t yfrac = args.TextureVPos(); + uint32_t yshift = 32 - ybits; + uint32_t xshift = yshift - xbits; + uint32_t xmask = ((1 << xbits) - 1) << ybits; + + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + + double lod = args.TextureLOD(); + bool mipmapped = args.MipmappedTexture(); + + bool magnifying = lod < 0.0; + if (r_mipmap && mipmapped) + { + int level = (int)lod; + while (level > 0) + { + if (xbits <= 2 || ybits <= 2) + break; + + source += (1 << (xbits)) * (1 << (ybits)); + xbits -= 1; + ybits -= 1; + level--; + } + } + + bool is_nearest_filter = !((magnifying && r_magfilter) || (!magnifying && r_minfilter)); + + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + if (is_nearest_filter) + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + else + { + if (is_nearest_filter) + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + } + + FString DebugInfo() override { return "DrawSpanTranslucent32Command"; } + }; + + class DrawSpanAddClamp32Command : public DrawerCommand + { + protected: + SpanDrawerArgs args; + + public: + DrawSpanAddClamp32Command(const SpanDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(args.DestY())) return; + + uint32_t xbits = args.TextureWidthBits(); + uint32_t ybits = args.TextureHeightBits(); + uint32_t xstep = args.TextureUStep(); + uint32_t ystep = args.TextureVStep(); + uint32_t xfrac = args.TextureUPos(); + uint32_t yfrac = args.TextureVPos(); + uint32_t yshift = 32 - ybits; + uint32_t xshift = yshift - xbits; + uint32_t xmask = ((1 << xbits) - 1) << ybits; + + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + + double lod = args.TextureLOD(); + bool mipmapped = args.MipmappedTexture(); + + bool magnifying = lod < 0.0; + if (r_mipmap && mipmapped) + { + int level = (int)lod; + while (level > 0) + { + if (xbits <= 2 || ybits <= 2) + break; + + source += (1 << (xbits)) * (1 << (ybits)); + xbits -= 1; + ybits -= 1; + level--; + } + } + + bool is_nearest_filter = !((magnifying && r_magfilter) || (!magnifying && r_minfilter)); + + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + if (is_nearest_filter) + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + else + { + if (is_nearest_filter) + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + else + { + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 26; + uint32_t yybits = 26; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + else + { + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + // Sample + unsigned int ifgcolor[2]; + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + // Blend + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + } + } + } + + FString DebugInfo() override { return "DrawSpanAddClamp32Command"; } + }; + +} diff --git a/src/swrenderer/drawers/r_draw_span32.php b/src/swrenderer/drawers/r_draw_span32.php new file mode 100644 index 0000000000..6a8a0b32aa --- /dev/null +++ b/src/swrenderer/drawers/r_draw_span32.php @@ -0,0 +1,388 @@ +#!/usr/bin/php +/* +** Drawer commands for spans +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +/* + Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_spandrawer.h" + +namespace swrenderer +{ + + class : public DrawerCommand + { + protected: + SpanDrawerArgs args; + + public: + (const SpanDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(args.DestY())) return; + + uint32_t xbits = args.TextureWidthBits(); + uint32_t ybits = args.TextureHeightBits(); + uint32_t xstep = args.TextureUStep(); + uint32_t ystep = args.TextureVStep(); + uint32_t xfrac = args.TextureUPos(); + uint32_t yfrac = args.TextureVPos(); + uint32_t yshift = 32 - ybits; + uint32_t xshift = yshift - xbits; + uint32_t xmask = ((1 << xbits) - 1) << ybits; + + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + + double lod = args.TextureLOD(); + bool mipmapped = args.MipmappedTexture(); + + bool magnifying = lod < 0.0; + if (r_mipmap && mipmapped) + { + int level = (int)lod; + while (level > 0) + { + if (xbits <= 2 || ybits <= 2) + break; + + source += (1 << (xbits)) * (1 << (ybits)); + xbits -= 1; + ybits -= 1; + level--; + } + } + + bool is_nearest_filter = !((magnifying && r_magfilter) || (!magnifying && r_minfilter)); + + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + + } + else + { + + } + } + + FString DebugInfo() override { return ""; } + }; + + + if (is_nearest_filter) + { + + } + else + { + + } + + bool is_64x64 = xbits == 6 && ybits == 6; + if (is_64x64) + { + + } + else + { + + } + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + int desaturate = shade_constants.desaturate; + + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + + xfrac -= 1 << (31 - xbits); + yfrac -= 1 << (31 - ybits); + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + + __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + + + // Sample + unsigned int ifgcolor[2]; + { + + ifgcolor[0] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + { + + ifgcolor[1] = sampleout; + xfrac += xstep; + yfrac += ystep; + } + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + + // Blend + + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + + __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + + + // Sample + unsigned int ifgcolor[2]; + + ifgcolor[0] = sampleout; + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + // Shade + + + // Blend + + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + unsigned int sampleout = source[sample_index]; + + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + unsigned int sampleout = source[sample_index]; + + uint32_t xxbits = 26; + uint32_t yybits = 26; + + uint32_t xxbits = 32 - xbits; + uint32_t yybits = 32 - ybits; + + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + + int blue0 = BPART(ifgcolor[0]); + int green0 = GPART(ifgcolor[0]); + int red0 = RPART(ifgcolor[0]); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor[1]); + int green1 = GPART(ifgcolor[1]); + int red1 = RPART(ifgcolor[1]); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + + uint32_t alpha0 = APART(ifgcolor[0]); + uint32_t alpha1 = APART(ifgcolor[1]); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); + + __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); + __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); + + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + +} From e73031b3c9fc06e9398c444c55ac112bd2017240 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Feb 2017 01:25:08 +0100 Subject: [PATCH 867/912] Added sky drawers --- src/swrenderer/drawers/r_draw_rgba.cpp | 17 ++ src/swrenderer/drawers/r_draw_rgba.h | 4 +- src/swrenderer/drawers/r_draw_sky32.h | 314 +++++++++++++++++++++++++ 3 files changed, 333 insertions(+), 2 deletions(-) create mode 100644 src/swrenderer/drawers/r_draw_sky32.h diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 532aa09400..f60f1447c4 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -43,6 +43,7 @@ #include "r_draw_wall32.h" #include "r_draw_sprite32.h" #include "r_draw_span32.h" +#include "r_draw_sky32.h" #include "gi.h" #include "stats.h" @@ -294,6 +295,22 @@ namespace swrenderer else Queue->Push(args); } + + void SWTruecolorDrawers::DrawSingleSkyColumn(const SkyDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawDoubleSkyColumn(const SkyDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs) { diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 52452fab05..a3ec7e5f94 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -365,8 +365,8 @@ namespace swrenderer void DrawWallAddClampColumn(const WallDrawerArgs &args) override; void DrawWallSubClampColumn(const WallDrawerArgs &args) override; void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override; - void DrawSingleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } - void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } + void DrawSingleSkyColumn(const SkyDrawerArgs &args) override; + void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override; void DrawColumn(const SpriteDrawerArgs &args) override; void FillColumn(const SpriteDrawerArgs &args) override; void FillAddColumn(const SpriteDrawerArgs &args) override; diff --git a/src/swrenderer/drawers/r_draw_sky32.h b/src/swrenderer/drawers/r_draw_sky32.h new file mode 100644 index 0000000000..f047c77957 --- /dev/null +++ b/src/swrenderer/drawers/r_draw_sky32.h @@ -0,0 +1,314 @@ +/* +** Drawer commands for spans +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_skydrawer.h" + +namespace swrenderer +{ + class DrawSkySingle32Command : public DrawerCommand + { + protected: + SkyDrawerArgs args; + + public: + DrawSkySingle32Command(const SkyDrawerArgs &args) : args(args) { } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t *)args.Dest(); + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); + int textureheight0 = args.FrontTextureHeight(); + + int32_t frac = args.TextureVPos(); + int32_t fracstep = args.TextureVStep(); + + uint32_t solid_top = args.SolidTopColor(); + uint32_t solid_bottom = args.SolidBottomColor(); + bool fadeSky = args.FadeSky(); + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int start_fade = 2; // How fast it should fade out + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac) / fracstep; + int end_fadetop_y = (fade_length - frac) / fracstep; + int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep; + int end_fadebottom_y = ((2 << 24) - frac) / fracstep; + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(args.DestY()); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * skipped; + fracstep *= num_cores; + pitch *= num_cores; + + if (!fadeSky) + { + count = thread->count_for_thread(args.DestY(), count); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + dest += pitch; + frac += fracstep; + } + + return; + } + + __m128i solid_top_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_top), _mm_setzero_si128()); + __m128i solid_bottom_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_bottom), _mm_setzero_si128()); + + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *dest = solid_top; + dest += pitch; + frac += fracstep; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + + __m128i alpha = _mm_set1_epi16(MAX(MIN(frac >> (16 - start_fade), 256), 0)); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i c = _mm_unpacklo_epi8(_mm_cvtsi32_si128(fg), _mm_setzero_si128()); + c = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(c, alpha), _mm_mullo_epi16(solid_top_fill, inv_alpha)), 8); + *dest = _mm_cvtsi128_si32(_mm_packus_epi16(c, _mm_setzero_si128())); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + + __m128i alpha = _mm_set1_epi16(MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0)); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i c = _mm_unpacklo_epi8(_mm_cvtsi32_si128(fg), _mm_setzero_si128()); + c = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(c, alpha), _mm_mullo_epi16(solid_top_fill, inv_alpha)), 8); + *dest = _mm_cvtsi128_si32(_mm_packus_epi16(c, _mm_setzero_si128())); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *dest = solid_bottom; + dest += pitch; + index += num_cores; + } + } + + FString DebugInfo() override { return "DrawSkySingle32Command"; } + }; + + class DrawSkyDouble32Command : public DrawerCommand + { + protected: + SkyDrawerArgs args; + + public: + DrawSkyDouble32Command(const SkyDrawerArgs &args) : args(args) { } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t *)args.Dest(); + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); + const uint32_t *source1 = (const uint32_t *)args.BackTexturePixels(); + int textureheight0 = args.FrontTextureHeight(); + uint32_t maxtextureheight1 = args.BackTextureHeight() - 1; + + int32_t frac = args.TextureVPos(); + int32_t fracstep = args.TextureVStep(); + + uint32_t solid_top = args.SolidTopColor(); + uint32_t solid_bottom = args.SolidBottomColor(); + bool fadeSky = args.FadeSky(); + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int start_fade = 2; // How fast it should fade out + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac) / fracstep; + int end_fadetop_y = (fade_length - frac) / fracstep; + int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep; + int end_fadebottom_y = ((2 << 24) - frac) / fracstep; + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(args.DestY()); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * skipped; + fracstep *= num_cores; + pitch *= num_cores; + + if (!fadeSky) + { + count = thread->count_for_thread(args.DestY(), count); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + *dest = fg; + dest += pitch; + frac += fracstep; + } + + return; + } + + __m128i solid_top_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_top), _mm_setzero_si128()); + __m128i solid_bottom_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_bottom), _mm_setzero_si128()); + + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *dest = solid_top; + dest += pitch; + frac += fracstep; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + __m128i alpha = _mm_set1_epi16(MAX(MIN(frac >> (16 - start_fade), 256), 0)); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i c = _mm_unpacklo_epi8(_mm_cvtsi32_si128(fg), _mm_setzero_si128()); + c = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(c, alpha), _mm_mullo_epi16(solid_top_fill, inv_alpha)), 8); + *dest = _mm_cvtsi128_si32(_mm_packus_epi16(c, _mm_setzero_si128())); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + *dest = fg; + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + __m128i alpha = _mm_set1_epi16(MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0)); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i c = _mm_unpacklo_epi8(_mm_cvtsi32_si128(fg), _mm_setzero_si128()); + c = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(c, alpha), _mm_mullo_epi16(solid_top_fill, inv_alpha)), 8); + *dest = _mm_cvtsi128_si32(_mm_packus_epi16(c, _mm_setzero_si128())); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *dest = solid_bottom; + dest += pitch; + index += num_cores; + } + } + + FString DebugInfo() override { return "DrawSkyDouble32Command"; } + }; +} From c918950ff614b31955b142a24be5908379437e34 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Feb 2017 09:08:51 +0100 Subject: [PATCH 868/912] Add php drawer for the poly renderer --- src/polyrenderer/drawers/poly_drawers.h | 15180 +++++++++++++++++ src/polyrenderer/drawers/poly_drawers.php | 386 + src/polyrenderer/drawers/poly_triangle.cpp | 23 +- src/polyrenderer/drawers/screen_triangle.cpp | 1 + src/polyrenderer/drawers/screen_triangle.h | 5 + 5 files changed, 15589 insertions(+), 6 deletions(-) create mode 100644 src/polyrenderer/drawers/poly_drawers.h create mode 100644 src/polyrenderer/drawers/poly_drawers.php diff --git a/src/polyrenderer/drawers/poly_drawers.h b/src/polyrenderer/drawers/poly_drawers.h new file mode 100644 index 0000000000..37b5eb25ca --- /dev/null +++ b/src/polyrenderer/drawers/poly_drawers.h @@ -0,0 +1,15180 @@ +/* +** Projected triangle drawer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +/* + Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. +*/ + +#pragma once + +#include "screen_triangle.h" + +static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); + float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); + return top / bottom; +} + +static float FindGradientY(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); + float bottom = (x0 - x2) * (y1 - y2) - (x1 - x2) * (y0 - y2); + return top / bottom; +} + +static void TriFill32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint32_t fg = color; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint32_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint32_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint32_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8Add(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8Stencil(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriFill8Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + uint8_t fg = color; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8Add(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8Stencil(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +static void TriDraw8Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + uint8_t * RESTRICT destOrg = (uint8_t*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + uint8_t color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + uint8_t *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + uint8_t *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask0 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask0 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if (mask1 & (1 << 31)) + { + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + uint8_t fg = texPixels[texelX * texHeight + texelY]; + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + dest[x] = fg; + } + mask1 <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } +} + +std::vector ScreenTriangle::TriFill32 = +{ + &TriFill32Copy, + &TriFill32AlphaBlend, + &TriFill32AddSolid, + &TriFill32Add, + &TriFill32Sub, + &TriFill32RevSub, + &TriFill32Stencil, + &TriFill32Shaded, + &TriFill32TranslateCopy, + &TriFill32TranslateAlphaBlend, + &TriFill32TranslateAdd, + &TriFill32TranslateSub, + &TriFill32TranslateRevSub, + &TriFill32AddSrcColorOneMinusSrcColor, + &TriFill32Skycap, +}; + +std::vector ScreenTriangle::TriDraw32 = +{ + &TriDraw32Copy, + &TriDraw32AlphaBlend, + &TriDraw32AddSolid, + &TriDraw32Add, + &TriDraw32Sub, + &TriDraw32RevSub, + &TriDraw32Stencil, + &TriDraw32Shaded, + &TriDraw32TranslateCopy, + &TriDraw32TranslateAlphaBlend, + &TriDraw32TranslateAdd, + &TriDraw32TranslateSub, + &TriDraw32TranslateRevSub, + &TriDraw32AddSrcColorOneMinusSrcColor, + &TriDraw32Skycap, +}; + +std::vector ScreenTriangle::TriFill8 = +{ + &TriFill8Copy, + &TriFill8AlphaBlend, + &TriFill8AddSolid, + &TriFill8Add, + &TriFill8Sub, + &TriFill8RevSub, + &TriFill8Stencil, + &TriFill8Shaded, + &TriFill8TranslateCopy, + &TriFill8TranslateAlphaBlend, + &TriFill8TranslateAdd, + &TriFill8TranslateSub, + &TriFill8TranslateRevSub, + &TriFill8AddSrcColorOneMinusSrcColor, + &TriFill8Skycap, +}; + +std::vector ScreenTriangle::TriDraw8 = +{ + &TriDraw8Copy, + &TriDraw8AlphaBlend, + &TriDraw8AddSolid, + &TriDraw8Add, + &TriDraw8Sub, + &TriDraw8RevSub, + &TriDraw8Stencil, + &TriDraw8Shaded, + &TriDraw8TranslateCopy, + &TriDraw8TranslateAlphaBlend, + &TriDraw8TranslateAdd, + &TriDraw8TranslateSub, + &TriDraw8TranslateRevSub, + &TriDraw8AddSrcColorOneMinusSrcColor, + &TriDraw8Skycap, +}; + diff --git a/src/polyrenderer/drawers/poly_drawers.php b/src/polyrenderer/drawers/poly_drawers.php new file mode 100644 index 0000000000..b451888108 --- /dev/null +++ b/src/polyrenderer/drawers/poly_drawers.php @@ -0,0 +1,386 @@ +/* +** Projected triangle drawer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +/* + Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. +*/ + +#pragma once + +#include "screen_triangle.h" + +static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); + float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); + return top / bottom; +} + +static float FindGradientY(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) +{ + float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); + float bottom = (x0 - x2) * (y1 - y2) - (x1 - x2) * (y0 - y2); + return top / bottom; +} + + +std::vector ScreenTriangle:: = +{ + +}; + + + &, + +static void (const TriDrawTriangleArgs *args, WorkerThreadData *thread) +{ + int numSpans = thread->NumFullSpans; + auto fullSpans = thread->FullSpans; + int numBlocks = thread->NumPartialBlocks; + auto partialBlocks = thread->PartialBlocks; + int startX = thread->StartX; + int startY = thread->StartY; + + auto flags = args->uniforms->flags; + bool is_simple_shade = (flags & TriUniforms::simple_shade) == TriUniforms::simple_shade; + bool is_nearest_filter = (flags & TriUniforms::nearest_filter) == TriUniforms::nearest_filter; + bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; + uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; + auto colormaps = args->colormaps; + + // Calculate gradients + const TriVertex &v1 = *args->v1; + const TriVertex &v2 = *args->v2; + const TriVertex &v3 = *args->v3; + ScreenTriangleStepVariables gradientX; + ScreenTriangleStepVariables gradientY; + ScreenTriangleStepVariables start; + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); + } + + const * RESTRICT texPixels = (const *)args->texturePixels; + uint32_t texWidth = args->textureWidth; + uint32_t texHeight = args->textureHeight; + + * RESTRICT destOrg = (*)args->dest; + int pitch = args->pitch; + + uint32_t light = args->uniforms->light; + float shade = (64.0f - (light * 255 / 256 + 12.0f) * 32.0f / 128.0f) / 32.0f; + float globVis = args->uniforms->globvis * (1.0f / 32.0f); + + color = args->uniforms->color; + + for (int i = 0; i < numSpans; i++) + { + const auto &span = fullSpans[i]; + + *dest = destOrg + span.X + span.Y * pitch; + int width = span.Length; + int height = 8; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (span.X - startX) + gradientY.Varying[j] * (span.Y - startY); + + for (int y = 0; y < height; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + for (int x = 0; x < width; x++) + { + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int ix = 0; ix < 8; ix++) + { + + fg = color; + + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + fg = texPixels[texelX * texHeight + texelY]; + + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x * 8 + ix]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + + fg = 0xff000000 | (r << 16) | (g << 8) | b; + + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + + dest[x * 8 + ix] = fg; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + } + + for (int i = 0; i < numBlocks; i++) + { + const auto &block = partialBlocks[i]; + + ScreenTriangleStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (block.X - startX) + gradientY.Varying[j] * (block.Y - startY); + + *dest = destOrg + block.X + block.Y * pitch; + uint32_t mask0 = block.Mask0; + uint32_t mask1 = block.Mask1; + + for (int y = 0; y < 4; y++) + { + ScreenTriangleStepVariables blockPosX = blockPosY; + + float rcpW = 0x01000000 / blockPosX.W; + int32_t varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + + int lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); + + blockPosX.W += gradientX.W * 8; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] += gradientX.Varying[j] * 8; + + rcpW = 0x01000000 / blockPosX.W; + int32_t varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + int32_t nextPos = (int32_t)(blockPosX.Varying[j] * rcpW); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + int lightnext = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); + int lightstep = (lightnext - lightpos) / 8; + lightstep = lightstep & lightmask; + + for (int x = 0; x < 8; x++) + { + if ( & (1 << 31)) + { + + fg = color; + + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + fg = texPixels[texelX * texHeight + texelY]; + + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = dest[x]; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; + + fg = 0xff000000 | (r << 16) | (g << 8) | b; + + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + + dest[x] = fg; + } + <<= 1; + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] += varyingStep[j]; + lightpos += lightstep; + } + + blockPosY.W += gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] += gradientY.Varying[j]; + + dest += pitch; + } + + } +} + + \ No newline at end of file diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index e4cd81dac5..e51dbe81a1 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -85,13 +85,13 @@ void PolyTriangleDrawer::draw(const PolyDrawArgs &args) PolyRenderer::Instance()->Thread.DrawQueue->Push(args, mirror); } +EXTERN_CVAR(Bool, r_phpdrawers); + void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadData *thread) { if (drawargs.vcount < 3) return; - auto llvm = Drawers::Instance(); - PolyDrawFuncPtr drawfuncs[4]; int num_drawfuncs = 0; @@ -100,10 +100,21 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD if (!r_debug_trisetup) // For profiling how much time is spent in setup vs drawal { int bmode = (int)drawargs.blendmode; - if (drawargs.writeColor && drawargs.texturePixels) - drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriDraw32[bmode] : llvm->TriDraw8[bmode]; - else if (drawargs.writeColor) - drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriFill32[bmode] : llvm->TriFill8[bmode]; + if (r_phpdrawers) + { + if (drawargs.writeColor && drawargs.texturePixels) + drawfuncs[num_drawfuncs++] = dest_bgra ? ScreenTriangle::TriDraw32[bmode] : ScreenTriangle::TriDraw8[bmode]; + else if (drawargs.writeColor) + drawfuncs[num_drawfuncs++] = dest_bgra ? ScreenTriangle::TriFill32[bmode] : ScreenTriangle::TriFill8[bmode]; + } + else + { + auto llvm = Drawers::Instance(); + if (drawargs.writeColor && drawargs.texturePixels) + drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriDraw32[bmode] : llvm->TriDraw8[bmode]; + else if (drawargs.writeColor) + drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriFill32[bmode] : llvm->TriFill8[bmode]; + } } if (drawargs.writeStencil) diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp index d1858d5111..54af97f2c1 100644 --- a/src/polyrenderer/drawers/screen_triangle.cpp +++ b/src/polyrenderer/drawers/screen_triangle.cpp @@ -36,6 +36,7 @@ #include "poly_triangle.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "screen_triangle.h" +#include "poly_drawers.h" void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { diff --git a/src/polyrenderer/drawers/screen_triangle.h b/src/polyrenderer/drawers/screen_triangle.h index 0238088bfc..1f11aad545 100644 --- a/src/polyrenderer/drawers/screen_triangle.h +++ b/src/polyrenderer/drawers/screen_triangle.h @@ -31,6 +31,11 @@ public: static void SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); + + static std::vector TriDraw8; + static std::vector TriDraw32; + static std::vector TriFill8; + static std::vector TriFill32; }; struct ScreenTriangleStepVariables From 38453d04358319c4be15e1306b7672031da9b513 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Feb 2017 00:27:06 +0100 Subject: [PATCH 869/912] Added most of the blend modes --- src/polyrenderer/drawers/poly_drawers.h | 1422 ++++++++++++--------- src/polyrenderer/drawers/poly_drawers.php | 279 ++-- 2 files changed, 999 insertions(+), 702 deletions(-) diff --git a/src/polyrenderer/drawers/poly_drawers.h b/src/polyrenderer/drawers/poly_drawers.h index 37b5eb25ca..d17d570f2c 100644 --- a/src/polyrenderer/drawers/poly_drawers.h +++ b/src/polyrenderer/drawers/poly_drawers.h @@ -57,6 +57,8 @@ static void TriFill32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -133,6 +135,7 @@ static void TriFill32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -140,9 +143,8 @@ static void TriFill32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -202,6 +204,7 @@ static void TriFill32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -209,9 +212,8 @@ static void TriFill32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -258,6 +260,7 @@ static void TriFill32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -265,9 +268,8 @@ static void TriFill32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -300,6 +302,8 @@ static void TriFill32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -376,6 +380,7 @@ static void TriFill32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -383,11 +388,10 @@ static void TriFill32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -395,7 +399,7 @@ static void TriFill32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -455,6 +459,7 @@ static void TriFill32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -462,11 +467,10 @@ static void TriFill32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -474,7 +478,7 @@ static void TriFill32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -521,6 +525,7 @@ static void TriFill32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -528,11 +533,10 @@ static void TriFill32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -540,7 +544,7 @@ static void TriFill32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -573,6 +577,8 @@ static void TriFill32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -649,6 +655,7 @@ static void TriFill32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -656,19 +663,9 @@ static void TriFill32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -728,6 +725,7 @@ static void TriFill32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -735,19 +733,9 @@ static void TriFill32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -794,6 +782,7 @@ static void TriFill32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -801,19 +790,9 @@ static void TriFill32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -846,6 +825,8 @@ static void TriFill32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -922,6 +903,7 @@ static void TriFill32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -929,11 +911,10 @@ static void TriFill32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -941,7 +922,7 @@ static void TriFill32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -1001,6 +982,7 @@ static void TriFill32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -1008,11 +990,10 @@ static void TriFill32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -1020,7 +1001,7 @@ static void TriFill32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -1067,6 +1048,7 @@ static void TriFill32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -1074,11 +1056,10 @@ static void TriFill32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -1086,7 +1067,7 @@ static void TriFill32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -1119,6 +1100,8 @@ static void TriFill32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -1195,6 +1178,7 @@ static void TriFill32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -1202,11 +1186,10 @@ static void TriFill32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -1214,7 +1197,7 @@ static void TriFill32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -1274,6 +1257,7 @@ static void TriFill32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -1281,11 +1265,10 @@ static void TriFill32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -1293,7 +1276,7 @@ static void TriFill32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -1340,6 +1323,7 @@ static void TriFill32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -1347,11 +1331,10 @@ static void TriFill32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -1359,7 +1342,7 @@ static void TriFill32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -1392,6 +1375,8 @@ static void TriFill32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -1468,6 +1453,7 @@ static void TriFill32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -1475,11 +1461,10 @@ static void TriFill32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -1487,7 +1472,7 @@ static void TriFill32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -1547,6 +1532,7 @@ static void TriFill32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -1554,11 +1540,10 @@ static void TriFill32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -1566,7 +1551,7 @@ static void TriFill32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -1613,6 +1598,7 @@ static void TriFill32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -1620,11 +1606,10 @@ static void TriFill32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -1632,7 +1617,7 @@ static void TriFill32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -1665,6 +1650,8 @@ static void TriFill32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -1741,6 +1728,7 @@ static void TriFill32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -1748,11 +1736,12 @@ static void TriFill32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + uint32_t fgalpha = APART(fg); + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -1760,7 +1749,7 @@ static void TriFill32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -1820,6 +1809,7 @@ static void TriFill32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -1827,11 +1817,12 @@ static void TriFill32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + uint32_t fgalpha = APART(fg); + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -1839,7 +1830,7 @@ static void TriFill32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -1886,6 +1877,7 @@ static void TriFill32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -1893,11 +1885,12 @@ static void TriFill32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + uint32_t fgalpha = APART(fg); + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -1905,7 +1898,7 @@ static void TriFill32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -1938,6 +1931,8 @@ static void TriFill32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -1956,7 +1951,8 @@ static void TriFill32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint32_t * RESTRICT translation = (const uint32_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -2014,6 +2010,7 @@ static void TriFill32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -2021,11 +2018,16 @@ static void TriFill32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + int sample = texPixels[texelX * texHeight + texelY]; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t fgalpha = sample;//clamp(sample, 0, 64) * 4; + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; + + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -2033,7 +2035,7 @@ static void TriFill32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -2093,6 +2095,7 @@ static void TriFill32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -2100,11 +2103,16 @@ static void TriFill32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + int sample = texPixels[texelX * texHeight + texelY]; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t fgalpha = sample;//clamp(sample, 0, 64) * 4; + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; + + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -2112,7 +2120,7 @@ static void TriFill32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -2159,6 +2167,7 @@ static void TriFill32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -2166,11 +2175,16 @@ static void TriFill32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + int sample = texPixels[texelX * texHeight + texelY]; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t fgalpha = sample;//clamp(sample, 0, 64) * 4; + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; + + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -2178,7 +2192,7 @@ static void TriFill32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -2211,6 +2225,8 @@ static void TriFill32TranslateCopy(const TriDrawTriangleArgs *args, WorkerThread bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -2229,7 +2245,8 @@ static void TriFill32TranslateCopy(const TriDrawTriangleArgs *args, WorkerThread start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint32_t * RESTRICT translation = (const uint32_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -2287,26 +2304,17 @@ static void TriFill32TranslateCopy(const TriDrawTriangleArgs *args, WorkerThread for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -2366,26 +2374,17 @@ static void TriFill32TranslateCopy(const TriDrawTriangleArgs *args, WorkerThread { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -2432,26 +2431,17 @@ static void TriFill32TranslateCopy(const TriDrawTriangleArgs *args, WorkerThread { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -2484,6 +2474,8 @@ static void TriFill32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -2502,7 +2494,8 @@ static void TriFill32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint32_t * RESTRICT translation = (const uint32_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -2560,18 +2553,19 @@ static void TriFill32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -2579,7 +2573,7 @@ static void TriFill32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -2639,18 +2633,19 @@ static void TriFill32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -2658,7 +2653,7 @@ static void TriFill32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -2705,18 +2700,19 @@ static void TriFill32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -2724,7 +2720,7 @@ static void TriFill32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -2757,6 +2753,8 @@ static void TriFill32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -2775,7 +2773,8 @@ static void TriFill32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint32_t * RESTRICT translation = (const uint32_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -2833,18 +2832,19 @@ static void TriFill32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -2852,7 +2852,7 @@ static void TriFill32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -2912,18 +2912,19 @@ static void TriFill32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -2931,7 +2932,7 @@ static void TriFill32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -2978,18 +2979,19 @@ static void TriFill32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -2997,7 +2999,7 @@ static void TriFill32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -3030,6 +3032,8 @@ static void TriFill32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -3048,7 +3052,8 @@ static void TriFill32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint32_t * RESTRICT translation = (const uint32_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -3106,18 +3111,19 @@ static void TriFill32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -3125,7 +3131,7 @@ static void TriFill32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -3185,18 +3191,19 @@ static void TriFill32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -3204,7 +3211,7 @@ static void TriFill32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -3251,18 +3258,19 @@ static void TriFill32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -3270,7 +3278,7 @@ static void TriFill32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -3303,6 +3311,8 @@ static void TriFill32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -3321,7 +3331,8 @@ static void TriFill32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint32_t * RESTRICT translation = (const uint32_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -3379,18 +3390,19 @@ static void TriFill32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -3398,7 +3410,7 @@ static void TriFill32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -3458,18 +3470,19 @@ static void TriFill32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -3477,7 +3490,7 @@ static void TriFill32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -3524,18 +3537,19 @@ static void TriFill32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -3543,7 +3557,7 @@ static void TriFill32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -3576,6 +3590,8 @@ static void TriFill32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -3652,6 +3668,7 @@ static void TriFill32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -3659,19 +3676,18 @@ static void TriFill32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t inv_r = 256 - (r + (r >> 7)); + uint32_t inv_g = 256 - (g + (r >> 7)); + uint32_t inv_b = 256 - (b + (r >> 7)); + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; + r = r + ((bg_red * inv_r + 127) >> 8); + g = g + ((bg_green * inv_g + 127) >> 8); + b = b + ((bg_blue * inv_b + 127) >> 8); fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -3731,6 +3747,7 @@ static void TriFill32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -3738,19 +3755,18 @@ static void TriFill32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t inv_r = 256 - (r + (r >> 7)); + uint32_t inv_g = 256 - (g + (r >> 7)); + uint32_t inv_b = 256 - (b + (r >> 7)); + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; + r = r + ((bg_red * inv_r + 127) >> 8); + g = g + ((bg_green * inv_g + 127) >> 8); + b = b + ((bg_blue * inv_b + 127) >> 8); fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -3797,6 +3813,7 @@ static void TriFill32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -3804,19 +3821,18 @@ static void TriFill32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t inv_r = 256 - (r + (r >> 7)); + uint32_t inv_g = 256 - (g + (r >> 7)); + uint32_t inv_b = 256 - (b + (r >> 7)); + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; + r = r + ((bg_red * inv_r + 127) >> 8); + g = g + ((bg_green * inv_g + 127) >> 8); + b = b + ((bg_blue * inv_b + 127) >> 8); fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -3849,6 +3865,8 @@ static void TriFill32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -3925,6 +3943,7 @@ static void TriFill32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -3932,19 +3951,21 @@ static void TriFill32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + int start_fade = 2; // How fast it should fade out + + int alpha_top = clamp(varyingPos[1] >> (16 - start_fade), 0, 256); + int alpha_bottom = clamp(((2 << 24) - varyingPos[1]) >> (16 - start_fade), 0, 256); + int a = MIN(alpha_top, alpha_bottom); + int inv_a = 256 - a; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); + uint32_t bg_red = RPART(color); + uint32_t bg_green = GPART(color); + uint32_t bg_blue = BPART(color); r = (r * a + bg_red * inv_a + 127) >> 8; g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -4004,6 +4025,7 @@ static void TriFill32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -4011,19 +4033,21 @@ static void TriFill32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + int start_fade = 2; // How fast it should fade out + + int alpha_top = clamp(varyingPos[1] >> (16 - start_fade), 0, 256); + int alpha_bottom = clamp(((2 << 24) - varyingPos[1]) >> (16 - start_fade), 0, 256); + int a = MIN(alpha_top, alpha_bottom); + int inv_a = 256 - a; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); + uint32_t bg_red = RPART(color); + uint32_t bg_green = GPART(color); + uint32_t bg_blue = BPART(color); r = (r * a + bg_red * inv_a + 127) >> 8; g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -4070,6 +4094,7 @@ static void TriFill32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); @@ -4077,19 +4102,21 @@ static void TriFill32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + int start_fade = 2; // How fast it should fade out + + int alpha_top = clamp(varyingPos[1] >> (16 - start_fade), 0, 256); + int alpha_bottom = clamp(((2 << 24) - varyingPos[1]) >> (16 - start_fade), 0, 256); + int a = MIN(alpha_top, alpha_bottom); + int inv_a = 256 - a; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); + uint32_t bg_red = RPART(color); + uint32_t bg_green = GPART(color); + uint32_t bg_blue = BPART(color); r = (r * a + bg_red * inv_a + 127) >> 8; g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -4122,6 +4149,8 @@ static void TriDraw32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -4198,6 +4227,7 @@ static void TriDraw32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -4207,9 +4237,8 @@ static void TriDraw32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -4269,6 +4298,7 @@ static void TriDraw32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -4278,9 +4308,8 @@ static void TriDraw32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -4327,6 +4356,7 @@ static void TriDraw32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -4336,9 +4366,8 @@ static void TriDraw32Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thr r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -4371,6 +4400,8 @@ static void TriDraw32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -4447,6 +4478,7 @@ static void TriDraw32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -4456,11 +4488,10 @@ static void TriDraw32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -4468,7 +4499,7 @@ static void TriDraw32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -4528,6 +4559,7 @@ static void TriDraw32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -4537,11 +4569,10 @@ static void TriDraw32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -4549,7 +4580,7 @@ static void TriDraw32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -4596,6 +4627,7 @@ static void TriDraw32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -4605,11 +4637,10 @@ static void TriDraw32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -4617,7 +4648,7 @@ static void TriDraw32AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadDat g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -4650,6 +4681,8 @@ static void TriDraw32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -4726,6 +4759,7 @@ static void TriDraw32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -4735,19 +4769,9 @@ static void TriDraw32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -4807,6 +4831,7 @@ static void TriDraw32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -4816,19 +4841,9 @@ static void TriDraw32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -4875,6 +4890,7 @@ static void TriDraw32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -4884,19 +4900,9 @@ static void TriDraw32AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; + fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -4929,6 +4935,8 @@ static void TriDraw32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -5005,6 +5013,7 @@ static void TriDraw32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -5014,11 +5023,10 @@ static void TriDraw32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -5026,7 +5034,7 @@ static void TriDraw32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -5086,6 +5094,7 @@ static void TriDraw32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -5095,11 +5104,10 @@ static void TriDraw32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -5107,7 +5115,7 @@ static void TriDraw32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -5154,6 +5162,7 @@ static void TriDraw32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -5163,11 +5172,10 @@ static void TriDraw32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -5175,7 +5183,7 @@ static void TriDraw32Add(const TriDrawTriangleArgs *args, WorkerThreadData *thre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -5208,6 +5216,8 @@ static void TriDraw32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -5284,6 +5294,7 @@ static void TriDraw32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -5293,11 +5304,10 @@ static void TriDraw32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -5305,7 +5315,7 @@ static void TriDraw32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -5365,6 +5375,7 @@ static void TriDraw32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -5374,11 +5385,10 @@ static void TriDraw32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -5386,7 +5396,7 @@ static void TriDraw32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -5433,6 +5443,7 @@ static void TriDraw32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -5442,11 +5453,10 @@ static void TriDraw32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -5454,7 +5464,7 @@ static void TriDraw32Sub(const TriDrawTriangleArgs *args, WorkerThreadData *thre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -5487,6 +5497,8 @@ static void TriDraw32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -5563,6 +5575,7 @@ static void TriDraw32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -5572,11 +5585,10 @@ static void TriDraw32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -5584,7 +5596,7 @@ static void TriDraw32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -5644,6 +5656,7 @@ static void TriDraw32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -5653,11 +5666,10 @@ static void TriDraw32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -5665,7 +5677,7 @@ static void TriDraw32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -5712,6 +5724,7 @@ static void TriDraw32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -5721,11 +5734,10 @@ static void TriDraw32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -5733,7 +5745,7 @@ static void TriDraw32RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *t g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -5766,6 +5778,8 @@ static void TriDraw32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -5842,6 +5856,7 @@ static void TriDraw32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -5851,11 +5866,12 @@ static void TriDraw32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + uint32_t fgalpha = APART(fg); + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -5863,7 +5879,7 @@ static void TriDraw32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -5923,6 +5939,7 @@ static void TriDraw32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -5932,11 +5949,12 @@ static void TriDraw32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + uint32_t fgalpha = APART(fg); + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -5944,7 +5962,7 @@ static void TriDraw32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -5991,6 +6009,7 @@ static void TriDraw32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -6000,11 +6019,12 @@ static void TriDraw32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + uint32_t fgalpha = APART(fg); + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -6012,7 +6032,7 @@ static void TriDraw32Stencil(const TriDrawTriangleArgs *args, WorkerThreadData * g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -6045,6 +6065,8 @@ static void TriDraw32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -6063,7 +6085,8 @@ static void TriDraw32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint32_t * RESTRICT translation = (const uint32_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -6121,20 +6144,24 @@ static void TriDraw32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t for (int ix = 0; ix < 8; ix++) { - int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; - int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; - uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t *destptr = dest + x * 8 + ix; + uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + int sample = texPixels[texelX * texHeight + texelY]; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t fgalpha = sample;//clamp(sample, 0, 64) * 4; + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; + + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -6142,7 +6169,7 @@ static void TriDraw32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -6202,20 +6229,24 @@ static void TriDraw32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask0 & (1 << 31)) { - int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; - int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; - uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t *destptr = dest + x; + uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + int sample = texPixels[texelX * texHeight + texelY]; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t fgalpha = sample;//clamp(sample, 0, 64) * 4; + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; + + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -6223,7 +6254,7 @@ static void TriDraw32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -6270,20 +6301,24 @@ static void TriDraw32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask1 & (1 << 31)) { - int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; - int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; - uint32_t fg = texPixels[texelX * texHeight + texelY]; + uint32_t *destptr = dest + x; + uint32_t fg = color; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + int sample = texPixels[texelX * texHeight + texelY]; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t fgalpha = sample;//clamp(sample, 0, 64) * 4; + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; + + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -6291,7 +6326,7 @@ static void TriDraw32Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *t g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -6324,6 +6359,8 @@ static void TriDraw32TranslateCopy(const TriDrawTriangleArgs *args, WorkerThread bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -6342,7 +6379,8 @@ static void TriDraw32TranslateCopy(const TriDrawTriangleArgs *args, WorkerThread start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint32_t * RESTRICT translation = (const uint32_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -6400,28 +6438,19 @@ static void TriDraw32TranslateCopy(const TriDrawTriangleArgs *args, WorkerThread for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -6481,28 +6510,19 @@ static void TriDraw32TranslateCopy(const TriDrawTriangleArgs *args, WorkerThread { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -6549,28 +6569,19 @@ static void TriDraw32TranslateCopy(const TriDrawTriangleArgs *args, WorkerThread { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -6603,6 +6614,8 @@ static void TriDraw32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -6621,7 +6634,8 @@ static void TriDraw32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint32_t * RESTRICT translation = (const uint32_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -6679,20 +6693,21 @@ static void TriDraw32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -6700,7 +6715,7 @@ static void TriDraw32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -6760,20 +6775,21 @@ static void TriDraw32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -6781,7 +6797,7 @@ static void TriDraw32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -6828,20 +6844,21 @@ static void TriDraw32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -6849,7 +6866,7 @@ static void TriDraw32TranslateAlphaBlend(const TriDrawTriangleArgs *args, Worker g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -6882,6 +6899,8 @@ static void TriDraw32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -6900,7 +6919,8 @@ static void TriDraw32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint32_t * RESTRICT translation = (const uint32_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -6958,20 +6978,21 @@ static void TriDraw32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -6979,7 +7000,7 @@ static void TriDraw32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -7039,20 +7060,21 @@ static void TriDraw32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -7060,7 +7082,7 @@ static void TriDraw32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -7107,20 +7129,21 @@ static void TriDraw32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -7128,7 +7151,7 @@ static void TriDraw32TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadD g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -7161,6 +7184,8 @@ static void TriDraw32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -7179,7 +7204,8 @@ static void TriDraw32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint32_t * RESTRICT translation = (const uint32_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -7237,20 +7263,21 @@ static void TriDraw32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -7258,7 +7285,7 @@ static void TriDraw32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -7318,20 +7345,21 @@ static void TriDraw32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -7339,7 +7367,7 @@ static void TriDraw32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -7386,20 +7414,21 @@ static void TriDraw32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -7407,7 +7436,7 @@ static void TriDraw32TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadD g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -7440,6 +7469,8 @@ static void TriDraw32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -7458,7 +7489,8 @@ static void TriDraw32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint32_t * RESTRICT translation = (const uint32_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -7516,20 +7548,21 @@ static void TriDraw32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -7537,7 +7570,7 @@ static void TriDraw32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -7597,20 +7630,21 @@ static void TriDraw32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -7618,7 +7652,7 @@ static void TriDraw32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -7665,20 +7699,21 @@ static void TriDraw32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; uint32_t r = RPART(fg); uint32_t g = GPART(fg); uint32_t b = BPART(fg); r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - uint32_t a = APART(fg); a += a >> 7; uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); @@ -7686,7 +7721,7 @@ static void TriDraw32TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThre g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -7719,6 +7754,8 @@ static void TriDraw32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -7795,6 +7832,7 @@ static void TriDraw32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -7804,19 +7842,18 @@ static void TriDraw32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; + uint32_t inv_r = 256 - (r + (r >> 7)); + uint32_t inv_g = 256 - (g + (r >> 7)); + uint32_t inv_b = 256 - (b + (r >> 7)); + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; + r = r + ((bg_red * inv_r + 127) >> 8); + g = g + ((bg_green * inv_g + 127) >> 8); + b = b + ((bg_blue * inv_b + 127) >> 8); fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -7876,6 +7913,7 @@ static void TriDraw32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -7885,19 +7923,18 @@ static void TriDraw32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t inv_r = 256 - (r + (r >> 7)); + uint32_t inv_g = 256 - (g + (r >> 7)); + uint32_t inv_b = 256 - (b + (r >> 7)); + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; + r = r + ((bg_red * inv_r + 127) >> 8); + g = g + ((bg_green * inv_g + 127) >> 8); + b = b + ((bg_blue * inv_b + 127) >> 8); fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -7944,6 +7981,7 @@ static void TriDraw32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -7953,19 +7991,18 @@ static void TriDraw32AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; + uint32_t inv_r = 256 - (r + (r >> 7)); + uint32_t inv_g = 256 - (g + (r >> 7)); + uint32_t inv_b = 256 - (b + (r >> 7)); + uint32_t bg = *destptr; uint32_t bg_red = RPART(bg); uint32_t bg_green = GPART(bg); uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; + r = r + ((bg_red * inv_r + 127) >> 8); + g = g + ((bg_green * inv_g + 127) >> 8); + b = b + ((bg_blue * inv_b + 127) >> 8); fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -7998,6 +8035,8 @@ static void TriDraw32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -8074,6 +8113,7 @@ static void TriDraw32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t for (int ix = 0; ix < 8; ix++) { + uint32_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -8083,19 +8123,21 @@ static void TriDraw32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + int start_fade = 2; // How fast it should fade out + + int alpha_top = clamp(varyingPos[1] >> (16 - start_fade), 0, 256); + int alpha_bottom = clamp(((2 << 24) - varyingPos[1]) >> (16 - start_fade), 0, 256); + int a = MIN(alpha_top, alpha_bottom); + int inv_a = 256 - a; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); + uint32_t bg_red = RPART(color); + uint32_t bg_green = GPART(color); + uint32_t bg_blue = BPART(color); r = (r * a + bg_red * inv_a + 127) >> 8; g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -8155,6 +8197,7 @@ static void TriDraw32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask0 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -8164,19 +8207,21 @@ static void TriDraw32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + int start_fade = 2; // How fast it should fade out + + int alpha_top = clamp(varyingPos[1] >> (16 - start_fade), 0, 256); + int alpha_bottom = clamp(((2 << 24) - varyingPos[1]) >> (16 - start_fade), 0, 256); + int a = MIN(alpha_top, alpha_bottom); + int inv_a = 256 - a; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); + uint32_t bg_red = RPART(color); + uint32_t bg_green = GPART(color); + uint32_t bg_blue = BPART(color); r = (r * a + bg_red * inv_a + 127) >> 8; g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -8223,6 +8268,7 @@ static void TriDraw32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask1 & (1 << 31)) { + uint32_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint32_t fg = texPixels[texelX * texHeight + texelY]; @@ -8232,19 +8278,21 @@ static void TriDraw32Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *t r = (r * lightpos) >> 16; g = (g * lightpos) >> 16; b = (b * lightpos) >> 16; + int start_fade = 2; // How fast it should fade out + + int alpha_top = clamp(varyingPos[1] >> (16 - start_fade), 0, 256); + int alpha_bottom = clamp(((2 << 24) - varyingPos[1]) >> (16 - start_fade), 0, 256); + int a = MIN(alpha_top, alpha_bottom); + int inv_a = 256 - a; - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); + uint32_t bg_red = RPART(color); + uint32_t bg_green = GPART(color); + uint32_t bg_blue = BPART(color); r = (r * a + bg_red * inv_a + 127) >> 8; g = (g * a + bg_green * inv_a + 127) >> 8; b = (b * a + bg_blue * inv_a + 127) >> 8; fg = 0xff000000 | (r << 16) | (g << 8) | b; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -8277,6 +8325,8 @@ static void TriFill8Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thre bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -8353,10 +8403,11 @@ static void TriFill8Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thre for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -8416,10 +8467,11 @@ static void TriFill8Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thre { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -8466,10 +8518,11 @@ static void TriFill8Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thre { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -8502,6 +8555,8 @@ static void TriFill8AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -8578,10 +8633,11 @@ static void TriFill8AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -8641,10 +8697,11 @@ static void TriFill8AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -8691,10 +8748,11 @@ static void TriFill8AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -8727,6 +8785,8 @@ static void TriFill8AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData * bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -8803,10 +8863,11 @@ static void TriFill8AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData * for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -8866,10 +8927,11 @@ static void TriFill8AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData * { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -8916,10 +8978,11 @@ static void TriFill8AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData * { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -8952,6 +9015,8 @@ static void TriFill8Add(const TriDrawTriangleArgs *args, WorkerThreadData *threa bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -9028,10 +9093,11 @@ static void TriFill8Add(const TriDrawTriangleArgs *args, WorkerThreadData *threa for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -9091,10 +9157,11 @@ static void TriFill8Add(const TriDrawTriangleArgs *args, WorkerThreadData *threa { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -9141,10 +9208,11 @@ static void TriFill8Add(const TriDrawTriangleArgs *args, WorkerThreadData *threa { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -9177,6 +9245,8 @@ static void TriFill8Sub(const TriDrawTriangleArgs *args, WorkerThreadData *threa bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -9253,10 +9323,11 @@ static void TriFill8Sub(const TriDrawTriangleArgs *args, WorkerThreadData *threa for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -9316,10 +9387,11 @@ static void TriFill8Sub(const TriDrawTriangleArgs *args, WorkerThreadData *threa { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -9366,10 +9438,11 @@ static void TriFill8Sub(const TriDrawTriangleArgs *args, WorkerThreadData *threa { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -9402,6 +9475,8 @@ static void TriFill8RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *th bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -9478,10 +9553,11 @@ static void TriFill8RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *th for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -9541,10 +9617,11 @@ static void TriFill8RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *th { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -9591,10 +9668,11 @@ static void TriFill8RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *th { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -9627,6 +9705,8 @@ static void TriFill8Stencil(const TriDrawTriangleArgs *args, WorkerThreadData *t bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -9703,10 +9783,11 @@ static void TriFill8Stencil(const TriDrawTriangleArgs *args, WorkerThreadData *t for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -9766,10 +9847,11 @@ static void TriFill8Stencil(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -9816,10 +9898,11 @@ static void TriFill8Stencil(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -9852,6 +9935,8 @@ static void TriFill8Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *th bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -9870,7 +9955,8 @@ static void TriFill8Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *th start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint8_t * RESTRICT translation = (const uint8_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -9928,10 +10014,11 @@ static void TriFill8Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *th for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -9991,10 +10078,11 @@ static void TriFill8Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *th { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -10041,10 +10129,11 @@ static void TriFill8Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *th { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -10077,6 +10166,8 @@ static void TriFill8TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadD bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -10095,7 +10186,8 @@ static void TriFill8TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadD start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint8_t * RESTRICT translation = (const uint8_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -10153,10 +10245,12 @@ static void TriFill8TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadD for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -10216,10 +10310,12 @@ static void TriFill8TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadD { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -10266,10 +10362,12 @@ static void TriFill8TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadD { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -10302,6 +10400,8 @@ static void TriFill8TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerT bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -10320,7 +10420,8 @@ static void TriFill8TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerT start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint8_t * RESTRICT translation = (const uint8_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -10378,10 +10479,12 @@ static void TriFill8TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerT for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -10441,10 +10544,12 @@ static void TriFill8TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerT { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -10491,10 +10596,12 @@ static void TriFill8TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerT { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -10527,6 +10634,8 @@ static void TriFill8TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadDa bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -10545,7 +10654,8 @@ static void TriFill8TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadDa start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint8_t * RESTRICT translation = (const uint8_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -10603,10 +10713,12 @@ static void TriFill8TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadDa for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -10666,10 +10778,12 @@ static void TriFill8TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadDa { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -10716,10 +10830,12 @@ static void TriFill8TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadDa { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -10752,6 +10868,8 @@ static void TriFill8TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadDa bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -10770,7 +10888,8 @@ static void TriFill8TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadDa start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint8_t * RESTRICT translation = (const uint8_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -10828,10 +10947,12 @@ static void TriFill8TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadDa for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -10891,10 +11012,12 @@ static void TriFill8TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadDa { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -10941,10 +11064,12 @@ static void TriFill8TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadDa { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -10977,6 +11102,8 @@ static void TriFill8TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThrea bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -10995,7 +11122,8 @@ static void TriFill8TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThrea start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint8_t * RESTRICT translation = (const uint8_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -11053,10 +11181,12 @@ static void TriFill8TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThrea for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -11116,10 +11246,12 @@ static void TriFill8TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThrea { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -11166,10 +11298,12 @@ static void TriFill8TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThrea { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -11202,6 +11336,8 @@ static void TriFill8AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args, bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -11278,10 +11414,11 @@ static void TriFill8AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args, for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -11341,10 +11478,11 @@ static void TriFill8AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args, { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -11391,10 +11529,11 @@ static void TriFill8AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args, { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -11427,6 +11566,8 @@ static void TriFill8Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *th bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -11503,10 +11644,11 @@ static void TriFill8Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *th for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -11566,10 +11708,11 @@ static void TriFill8Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *th { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -11616,10 +11759,11 @@ static void TriFill8Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *th { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -11652,6 +11796,8 @@ static void TriDraw8Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thre bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -11728,12 +11874,13 @@ static void TriDraw8Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thre for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -11793,12 +11940,13 @@ static void TriDraw8Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thre { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -11845,12 +11993,13 @@ static void TriDraw8Copy(const TriDrawTriangleArgs *args, WorkerThreadData *thre { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -11883,6 +12032,8 @@ static void TriDraw8AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -11959,12 +12110,13 @@ static void TriDraw8AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -12024,12 +12176,13 @@ static void TriDraw8AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -12076,12 +12229,13 @@ static void TriDraw8AlphaBlend(const TriDrawTriangleArgs *args, WorkerThreadData { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -12114,6 +12268,8 @@ static void TriDraw8AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData * bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -12190,12 +12346,13 @@ static void TriDraw8AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData * for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -12255,12 +12412,13 @@ static void TriDraw8AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData * { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -12307,12 +12465,13 @@ static void TriDraw8AddSolid(const TriDrawTriangleArgs *args, WorkerThreadData * { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -12345,6 +12504,8 @@ static void TriDraw8Add(const TriDrawTriangleArgs *args, WorkerThreadData *threa bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -12421,12 +12582,13 @@ static void TriDraw8Add(const TriDrawTriangleArgs *args, WorkerThreadData *threa for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -12486,12 +12648,13 @@ static void TriDraw8Add(const TriDrawTriangleArgs *args, WorkerThreadData *threa { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -12538,12 +12701,13 @@ static void TriDraw8Add(const TriDrawTriangleArgs *args, WorkerThreadData *threa { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -12576,6 +12740,8 @@ static void TriDraw8Sub(const TriDrawTriangleArgs *args, WorkerThreadData *threa bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -12652,12 +12818,13 @@ static void TriDraw8Sub(const TriDrawTriangleArgs *args, WorkerThreadData *threa for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -12717,12 +12884,13 @@ static void TriDraw8Sub(const TriDrawTriangleArgs *args, WorkerThreadData *threa { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -12769,12 +12937,13 @@ static void TriDraw8Sub(const TriDrawTriangleArgs *args, WorkerThreadData *threa { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -12807,6 +12976,8 @@ static void TriDraw8RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *th bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -12883,12 +13054,13 @@ static void TriDraw8RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *th for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -12948,12 +13120,13 @@ static void TriDraw8RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *th { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -13000,12 +13173,13 @@ static void TriDraw8RevSub(const TriDrawTriangleArgs *args, WorkerThreadData *th { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -13038,6 +13212,8 @@ static void TriDraw8Stencil(const TriDrawTriangleArgs *args, WorkerThreadData *t bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -13114,12 +13290,13 @@ static void TriDraw8Stencil(const TriDrawTriangleArgs *args, WorkerThreadData *t for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -13179,12 +13356,13 @@ static void TriDraw8Stencil(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -13231,12 +13409,13 @@ static void TriDraw8Stencil(const TriDrawTriangleArgs *args, WorkerThreadData *t { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -13269,6 +13448,8 @@ static void TriDraw8Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *th bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -13287,7 +13468,8 @@ static void TriDraw8Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *th start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint8_t * RESTRICT translation = (const uint8_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -13345,12 +13527,11 @@ static void TriDraw8Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *th for (int ix = 0; ix < 8; ix++) { - int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; - int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; - uint8_t fg = texPixels[texelX * texHeight + texelY]; + uint8_t *destptr = dest + x * 8 + ix; + uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -13410,12 +13591,11 @@ static void TriDraw8Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *th { if (mask0 & (1 << 31)) { - int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; - int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; - uint8_t fg = texPixels[texelX * texHeight + texelY]; + uint8_t *destptr = dest + x; + uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -13462,12 +13642,11 @@ static void TriDraw8Shaded(const TriDrawTriangleArgs *args, WorkerThreadData *th { if (mask1 & (1 << 31)) { - int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; - int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; - uint8_t fg = texPixels[texelX * texHeight + texelY]; + uint8_t *destptr = dest + x; + uint8_t fg = color; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -13500,6 +13679,8 @@ static void TriDraw8TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadD bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -13518,7 +13699,8 @@ static void TriDraw8TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadD start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint8_t * RESTRICT translation = (const uint8_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -13576,12 +13758,14 @@ static void TriDraw8TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadD for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -13641,12 +13825,14 @@ static void TriDraw8TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadD { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -13693,12 +13879,14 @@ static void TriDraw8TranslateCopy(const TriDrawTriangleArgs *args, WorkerThreadD { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -13731,6 +13919,8 @@ static void TriDraw8TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerT bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -13749,7 +13939,8 @@ static void TriDraw8TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerT start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint8_t * RESTRICT translation = (const uint8_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -13807,12 +13998,14 @@ static void TriDraw8TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerT for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -13872,12 +14065,14 @@ static void TriDraw8TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerT { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -13924,12 +14119,14 @@ static void TriDraw8TranslateAlphaBlend(const TriDrawTriangleArgs *args, WorkerT { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -13962,6 +14159,8 @@ static void TriDraw8TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadDa bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -13980,7 +14179,8 @@ static void TriDraw8TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadDa start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint8_t * RESTRICT translation = (const uint8_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -14038,12 +14238,14 @@ static void TriDraw8TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadDa for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -14103,12 +14305,14 @@ static void TriDraw8TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadDa { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -14155,12 +14359,14 @@ static void TriDraw8TranslateAdd(const TriDrawTriangleArgs *args, WorkerThreadDa { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -14193,6 +14399,8 @@ static void TriDraw8TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadDa bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -14211,7 +14419,8 @@ static void TriDraw8TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadDa start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint8_t * RESTRICT translation = (const uint8_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -14269,12 +14478,14 @@ static void TriDraw8TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadDa for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -14334,12 +14545,14 @@ static void TriDraw8TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadDa { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -14386,12 +14599,14 @@ static void TriDraw8TranslateSub(const TriDrawTriangleArgs *args, WorkerThreadDa { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -14424,6 +14639,8 @@ static void TriDraw8TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThrea bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -14442,7 +14659,8 @@ static void TriDraw8TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThrea start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } - const uint8_t * RESTRICT texPixels = (const uint8_t *)args->texturePixels; + const uint8_t * RESTRICT texPixels = args->texturePixels; + const uint8_t * RESTRICT translation = (const uint8_t *)args->translation; uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -14500,12 +14718,14 @@ static void TriDraw8TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThrea for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -14565,12 +14785,14 @@ static void TriDraw8TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThrea { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -14617,12 +14839,14 @@ static void TriDraw8TranslateRevSub(const TriDrawTriangleArgs *args, WorkerThrea { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; + fg = translation[fg]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -14655,6 +14879,8 @@ static void TriDraw8AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args, bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -14731,12 +14957,13 @@ static void TriDraw8AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args, for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -14796,12 +15023,13 @@ static void TriDraw8AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args, { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -14848,12 +15076,13 @@ static void TriDraw8AddSrcColorOneMinusSrcColor(const TriDrawTriangleArgs *args, { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; @@ -14886,6 +15115,8 @@ static void TriDraw8Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *th bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -14962,12 +15193,13 @@ static void TriDraw8Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *th for (int ix = 0; ix < 8; ix++) { + uint8_t *destptr = dest + x * 8 + ix; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x * 8 + ix] = fg; + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -15027,12 +15259,13 @@ static void TriDraw8Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *th { if (mask0 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask0 <<= 1; @@ -15079,12 +15312,13 @@ static void TriDraw8Skycap(const TriDrawTriangleArgs *args, WorkerThreadData *th { if (mask1 & (1 << 31)) { + uint8_t *destptr = dest + x; int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; uint8_t fg = texPixels[texelX * texHeight + texelY]; int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; fg = colormaps[colormapindex + fg]; - dest[x] = fg; + *destptr = fg; } mask1 <<= 1; diff --git a/src/polyrenderer/drawers/poly_drawers.php b/src/polyrenderer/drawers/poly_drawers.php index b451888108..741fcf3c7b 100644 --- a/src/polyrenderer/drawers/poly_drawers.php +++ b/src/polyrenderer/drawers/poly_drawers.php @@ -78,21 +78,21 @@ std::vector ScreenTria { @@ -101,7 +101,7 @@ std::vector ScreenTria @@ -109,11 +109,11 @@ function OutputDrawer($drawerName, $blendmode, $isTruecolor, $isColorFill, $isLi @@ -132,6 +132,8 @@ static void (const TriDrawTriangleArgs *args, WorkerThreadData * bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; auto colormaps = args->colormaps; + uint32_t srcalpha = args->uniforms->srcalpha; + uint32_t destalpha = args->uniforms->destalpha; // Calculate gradients const TriVertex &v1 = *args->v1; @@ -150,7 +152,15 @@ static void (const TriDrawTriangleArgs *args, WorkerThreadData * start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (startX - v1.x) + gradientY.Varying[i] * (startY - v1.y); } + + const uint8_t * RESTRICT texPixels = args->texturePixels; + const * RESTRICT translation = (const *)args->translation; + const * RESTRICT texPixels = (const *)args->texturePixels; + uint32_t texWidth = args->textureWidth; uint32_t texHeight = args->textureHeight; @@ -208,51 +218,9 @@ static void (const TriDrawTriangleArgs *args, WorkerThreadData * for (int ix = 0; ix < 8; ix++) { - - fg = color; - - int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; - int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; - fg = texPixels[texelX * texHeight + texelY]; - - uint32_t r = RPART(fg); - uint32_t g = GPART(fg); - uint32_t b = BPART(fg); - r = (r * lightpos) >> 16; - g = (g * lightpos) >> 16; - b = (b * lightpos) >> 16; - - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x * 8 + ix]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; - fg = 0xff000000 | (r << 16) | (g << 8) | b; - - fg = 0xff000000 | (r << 16) | (g << 8) | b; - - int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; - fg = colormaps[colormapindex + fg]; - - dest[x * 8 + ix] = fg; + *destptr = dest + x * 8 + ix; + + *destptr = fg; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -317,51 +285,9 @@ static void (const TriDrawTriangleArgs *args, WorkerThreadData * { if ( & (1 << 31)) { - - fg = color; - - int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; - int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; - fg = texPixels[texelX * texHeight + texelY]; - - uint32_t r = RPART(fg); - uint32_t g = GPART(fg); - uint32_t b = BPART(fg); - r = (r * lightpos) >> 16; - g = (g * lightpos) >> 16; - b = (b * lightpos) >> 16; - - - uint32_t a = APART(fg); - a += a >> 7; - uint32_t inv_a = 256 - a; - uint32_t bg = dest[x]; - uint32_t bg_red = RPART(bg); - uint32_t bg_green = GPART(bg); - uint32_t bg_blue = BPART(bg); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; - fg = 0xff000000 | (r << 16) | (g << 8) | b; - - fg = 0xff000000 | (r << 16) | (g << 8) | b; - - int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; - fg = colormaps[colormapindex + fg]; - - dest[x] = fg; + *destptr = dest + x; + + *destptr = fg; } <<= 1; @@ -383,4 +309,141 @@ static void (const TriDrawTriangleArgs *args, WorkerThreadData * + fg = color; + + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + fg = texPixels[texelX * texHeight + texelY]; + + fg = translation[fg]; + + uint32_t r = RPART(fg); + uint32_t g = GPART(fg); + uint32_t b = BPART(fg); + r = (r * lightpos) >> 16; + g = (g * lightpos) >> 16; + b = (b * lightpos) >> 16; + + fg = 0xff000000 | (r << 16) | (g << 8) | b; + + int colormapindex = MIN(((256 - (lightpos >> 8)) * 32) >> 8, 31) << 8; + fg = colormaps[colormapindex + fg]; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = *destptr; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + + + + int texelX = ((((uint32_t)varyingPos[0] << 8) >> 16) * texWidth) >> 16; + int texelY = ((((uint32_t)varyingPos[1] << 8) >> 16) * texHeight) >> 16; + int sample = texPixels[texelX * texHeight + texelY]; + + uint32_t fgalpha = sample;//clamp(sample, 0, 64) * 4; + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; + + uint32_t bg = *destptr; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + + uint32_t fgalpha = APART(fg); + uint32_t inv_fgalpha = 256 - fgalpha; + int a = (fgalpha * srcalpha + 128) >> 8; + int inv_a = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; + + uint32_t bg = *destptr; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + + uint32_t inv_r = 256 - (r + (r >> 7)); + uint32_t inv_g = 256 - (g + (r >> 7)); + uint32_t inv_b = 256 - (b + (r >> 7)); + uint32_t bg = *destptr; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = r + ((bg_red * inv_r + 127) >> 8); + g = g + ((bg_green * inv_g + 127) >> 8); + b = b + ((bg_blue * inv_b + 127) >> 8); + + int start_fade = 2; // How fast it should fade out + + int alpha_top = clamp(varyingPos[1] >> (16 - start_fade), 0, 256); + int alpha_bottom = clamp(((2 << 24) - varyingPos[1]) >> (16 - start_fade), 0, 256); + int a = MIN(alpha_top, alpha_bottom); + int inv_a = 256 - a; + + uint32_t bg_red = RPART(color); + uint32_t bg_green = GPART(color); + uint32_t bg_blue = BPART(color); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + + uint32_t a = APART(fg); + a += a >> 7; + uint32_t inv_a = 256 - a; + uint32_t bg = *destptr; + uint32_t bg_red = RPART(bg); + uint32_t bg_green = GPART(bg); + uint32_t bg_blue = BPART(bg); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + \ No newline at end of file From 01a8df7eb3e51ac30997d9c69c0bb3a965cd7be8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Feb 2017 08:05:23 +0100 Subject: [PATCH 870/912] Add dynlights to wall drawer --- src/swrenderer/drawers/r_draw_wall32.h | 1880 +++++++++++++++++++++- src/swrenderer/drawers/r_draw_wall32.php | 62 +- 2 files changed, 1919 insertions(+), 23 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_wall32.h b/src/swrenderer/drawers/r_draw_wall32.h index b6c446a531..d4ba5d51e2 100644 --- a/src/swrenderer/drawers/r_draw_wall32.h +++ b/src/swrenderer/drawers/r_draw_wall32.h @@ -56,7 +56,7 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -64,6 +64,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -99,7 +108,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -108,6 +158,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -124,7 +175,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -142,7 +234,7 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -150,6 +242,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -224,7 +325,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -233,6 +375,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -268,7 +411,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -297,7 +481,7 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -305,6 +489,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -340,6 +533,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -356,6 +550,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -364,6 +598,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -380,6 +615,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -396,6 +632,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -418,7 +694,7 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -426,6 +702,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -500,6 +785,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -516,6 +802,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -524,6 +850,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -559,6 +886,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -575,6 +903,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -614,7 +982,7 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -622,6 +990,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -658,7 +1035,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -675,6 +1093,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -692,7 +1111,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -718,7 +1178,7 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -726,6 +1186,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -801,7 +1270,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -818,6 +1328,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -854,7 +1365,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -891,7 +1443,7 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -899,6 +1451,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -935,6 +1496,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -951,6 +1513,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -967,6 +1569,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -984,6 +1587,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1000,6 +1604,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1030,7 +1674,7 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -1038,6 +1682,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -1113,6 +1766,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1129,6 +1783,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1145,6 +1839,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -1181,6 +1876,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1197,6 +1893,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1244,7 +1980,7 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -1252,6 +1988,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -1288,7 +2033,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -1326,6 +2112,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -1343,7 +2130,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -1390,7 +2218,7 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -1398,6 +2226,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -1473,7 +2310,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -1511,6 +2389,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -1547,7 +2426,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -1605,7 +2525,7 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -1613,6 +2533,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -1649,6 +2578,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1665,6 +2595,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -1702,6 +2672,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -1719,6 +2690,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1735,6 +2707,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -1786,7 +2798,7 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -1794,6 +2806,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -1869,6 +2890,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1885,6 +2907,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -1922,6 +2984,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -1958,6 +3021,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1974,6 +3038,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2042,7 +3146,7 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -2050,6 +3154,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -2086,7 +3199,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2124,6 +3278,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -2141,7 +3296,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2188,7 +3384,7 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -2196,6 +3392,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -2271,7 +3476,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2309,6 +3555,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -2345,7 +3592,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2403,7 +3691,7 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -2411,6 +3699,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -2447,6 +3744,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2463,6 +3761,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2500,6 +3838,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -2517,6 +3856,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2533,6 +3873,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2584,7 +3964,7 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -2592,6 +3972,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -2667,6 +4056,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2683,6 +4073,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2720,6 +4150,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -2756,6 +4187,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2772,6 +4204,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2840,7 +4312,7 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -2848,6 +4320,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -2884,7 +4365,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2922,6 +4444,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -2939,7 +4462,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2986,7 +4550,7 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -2994,6 +4558,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -3069,7 +4642,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3107,6 +4721,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -3143,7 +4758,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3201,7 +4857,7 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -3209,6 +4865,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -3245,6 +4910,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3261,6 +4927,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3298,6 +5004,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -3315,6 +5022,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3331,6 +5039,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3382,7 +5130,7 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -3390,6 +5138,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -3465,6 +5222,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3481,6 +5239,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3518,6 +5316,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -3554,6 +5353,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3570,6 +5370,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); diff --git a/src/swrenderer/drawers/r_draw_wall32.php b/src/swrenderer/drawers/r_draw_wall32.php index a414ee7d79..2944fa407a 100644 --- a/src/swrenderer/drawers/r_draw_wall32.php +++ b/src/swrenderer/drawers/r_draw_wall32.php @@ -101,7 +101,7 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t fracstep = args.TextureVStep(); @@ -109,6 +109,15 @@ namespace swrenderer uint32_t texturefracx = args.TextureUPos(); uint32_t *dest = (uint32_t*)args.Dest(); int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z; + float stepvpz = args.dc_viewpos_step.Z; + vpz += thread->skipped_by_thread(dest_y) * stepvpz; + stepvpz *= thread->num_cores; + __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); if (count <= 0) return; @@ -157,6 +166,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)desttmp, outcolor); dest[offset] = desttmp[0]; dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); } if (ssecount * 2 != count) @@ -218,8 +228,9 @@ namespace swrenderer } function Shade($isSimpleShade) - { - if ($isSimpleShade == true) + { ?> + __m128i material = fgcolor; + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + } From e9efb64a0b9d637cf192b184798ac91d9fa0f289 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 23 Feb 2017 03:50:24 +0100 Subject: [PATCH 871/912] Fix light offset --- src/swrenderer/drawers/r_draw_wall32.h | 160 +++++++++-------------- src/swrenderer/drawers/r_draw_wall32.php | 8 +- 2 files changed, 63 insertions(+), 105 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_wall32.h b/src/swrenderer/drawers/r_draw_wall32.h index d4ba5d51e2..7e6dd931dd 100644 --- a/src/swrenderer/drawers/r_draw_wall32.h +++ b/src/swrenderer/drawers/r_draw_wall32.h @@ -67,11 +67,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -245,11 +243,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -492,11 +488,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -705,11 +699,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -993,11 +985,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -1189,11 +1179,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -1454,11 +1442,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -1685,11 +1671,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -1991,11 +1975,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -2229,11 +2211,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -2536,11 +2516,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -2809,11 +2787,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -3157,11 +3133,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -3395,11 +3369,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -3702,11 +3674,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -3975,11 +3945,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -4323,11 +4291,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -4561,11 +4527,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -4868,11 +4832,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -5141,11 +5103,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); diff --git a/src/swrenderer/drawers/r_draw_wall32.php b/src/swrenderer/drawers/r_draw_wall32.php index 2944fa407a..bc003c9a5f 100644 --- a/src/swrenderer/drawers/r_draw_wall32.php +++ b/src/swrenderer/drawers/r_draw_wall32.php @@ -112,11 +112,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); From c6235fb674556b07b4cd3c17485ed1d61e4f1155 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 23 Feb 2017 04:26:37 +0100 Subject: [PATCH 872/912] Added light to span drawers --- src/swrenderer/drawers/r_draw_span32.h | 2880 ++++++++++++++++++++++ src/swrenderer/drawers/r_draw_span32.php | 57 +- 2 files changed, 2936 insertions(+), 1 deletion(-) diff --git a/src/swrenderer/drawers/r_draw_span32.h b/src/swrenderer/drawers/r_draw_span32.h index 6f282e7bd5..af2970f986 100644 --- a/src/swrenderer/drawers/r_draw_span32.h +++ b/src/swrenderer/drawers/r_draw_span32.h @@ -89,6 +89,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -120,13 +127,55 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -143,7 +192,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -159,6 +249,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -190,13 +287,55 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -213,7 +352,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -233,6 +413,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -312,13 +499,55 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -358,7 +587,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -374,6 +644,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -453,13 +730,55 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -499,7 +818,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -527,6 +887,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -558,6 +925,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -574,12 +942,53 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -596,6 +1005,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -612,6 +1022,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -632,6 +1082,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -663,6 +1120,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -679,12 +1137,53 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -701,6 +1200,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -717,6 +1217,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -741,6 +1281,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -820,6 +1367,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -836,12 +1384,53 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -881,6 +1470,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -897,6 +1487,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -917,6 +1547,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -996,6 +1633,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1012,12 +1650,53 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -1057,6 +1736,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1073,6 +1753,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i outcolor = fgcolor; @@ -1146,6 +1866,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -1178,7 +1905,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1193,6 +1961,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -1210,7 +1979,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1234,6 +2044,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -1266,7 +2083,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1281,6 +2139,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -1298,7 +2157,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1326,6 +2226,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -1406,7 +2313,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1421,6 +2369,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -1461,7 +2410,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1485,6 +2475,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -1565,7 +2562,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1580,6 +2618,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -1620,7 +2659,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1656,6 +2736,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -1688,6 +2775,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1704,6 +2792,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1718,6 +2846,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -1735,6 +2864,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1751,6 +2881,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1779,6 +2949,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -1811,6 +2988,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1827,6 +3005,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1841,6 +3059,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -1858,6 +3077,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1874,6 +3094,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -1906,6 +3166,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -1986,6 +3253,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2002,6 +3270,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -2016,6 +3324,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -2056,6 +3365,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2072,6 +3382,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -2100,6 +3450,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -2180,6 +3537,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2196,6 +3554,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -2210,6 +3608,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -2250,6 +3649,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2266,6 +3666,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); @@ -2347,6 +3787,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -2379,7 +3826,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -2403,6 +3891,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -2420,7 +3909,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -2453,6 +3983,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -2485,7 +4022,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -2509,6 +4087,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -2526,7 +4105,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -2563,6 +4183,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -2643,7 +4270,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -2667,6 +4335,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -2707,7 +4376,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -2740,6 +4450,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -2820,7 +4537,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -2844,6 +4602,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -2884,7 +4643,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -2929,6 +4729,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -2961,6 +4768,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2977,6 +4785,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -3000,6 +4848,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -3017,6 +4866,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3033,6 +4883,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -3070,6 +4960,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -3102,6 +4999,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3118,6 +5016,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -3141,6 +5079,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -3158,6 +5097,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3174,6 +5114,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -3215,6 +5195,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -3295,6 +5282,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3311,6 +5299,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -3334,6 +5362,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -3374,6 +5403,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3390,6 +5420,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -3427,6 +5497,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -3507,6 +5584,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3523,6 +5601,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -3546,6 +5664,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -3586,6 +5705,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3602,6 +5722,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend __m128i fgalpha = _mm_set1_epi16(srcalpha); @@ -3692,6 +5852,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -3724,7 +5891,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3760,6 +5968,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -3777,7 +5986,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3822,6 +6072,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -3854,7 +6111,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3890,6 +6188,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -3907,7 +6206,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3956,6 +6296,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -4036,7 +6383,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4072,6 +6460,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -4112,7 +6501,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4157,6 +6587,13 @@ namespace swrenderer __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -4237,7 +6674,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4273,6 +6751,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -4313,7 +6792,48 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4370,6 +6890,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -4402,6 +6929,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4418,6 +6946,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4453,6 +7021,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -4470,6 +7039,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4486,6 +7056,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4535,6 +7145,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -4567,6 +7184,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4583,6 +7201,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4618,6 +7276,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -4635,6 +7294,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4651,6 +7311,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4704,6 +7404,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -4784,6 +7491,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4800,6 +7508,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4835,6 +7583,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -4875,6 +7624,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4891,6 +7641,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4940,6 +7730,13 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -5020,6 +7817,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5036,6 +7834,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -5071,6 +7909,7 @@ namespace swrenderer outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -5111,6 +7950,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5127,6 +7967,46 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); diff --git a/src/swrenderer/drawers/r_draw_span32.php b/src/swrenderer/drawers/r_draw_span32.php index 6a8a0b32aa..400c981dc7 100644 --- a/src/swrenderer/drawers/r_draw_span32.php +++ b/src/swrenderer/drawers/r_draw_span32.php @@ -146,6 +146,13 @@ namespace swrenderer int desaturate = shade_constants.desaturate; + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + int count = args.DestX2() - args.DestX1() + 1; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); @@ -189,6 +196,7 @@ namespace swrenderer _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); } if (ssecount * 2 != count) @@ -268,7 +276,9 @@ namespace swrenderer } function Shade($isSimpleShade) - { + { ?> + __m128i material = fgcolor; + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -292,6 +302,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + } From ef41e8e54eb61272a565be80dcd6ecc2ee630ebf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 23 Feb 2017 06:01:01 +0100 Subject: [PATCH 873/912] Add dynamic light to sprites --- src/swrenderer/drawers/r_draw_sprite32.h | 408 +++++++++++++++++++++ src/swrenderer/drawers/r_draw_sprite32.php | 15 +- src/swrenderer/things/r_sprite.cpp | 37 +- 3 files changed, 445 insertions(+), 15 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_sprite32.h b/src/swrenderer/drawers/r_draw_sprite32.h index e3baa6c08e..0bd83936d8 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.h +++ b/src/swrenderer/drawers/r_draw_sprite32.h @@ -56,6 +56,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -149,6 +153,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -314,6 +322,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -412,6 +424,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -588,6 +604,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -637,8 +657,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -665,8 +689,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -683,6 +711,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -773,8 +805,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -821,8 +857,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -850,6 +890,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -899,6 +943,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -916,6 +961,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -942,6 +990,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -959,6 +1008,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -980,6 +1032,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -1070,6 +1126,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1087,6 +1144,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -1133,6 +1193,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1150,6 +1211,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -1188,6 +1252,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -1238,8 +1306,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1296,8 +1368,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1343,6 +1419,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -1434,8 +1514,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1512,8 +1596,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1570,6 +1658,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -1620,6 +1712,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1637,6 +1730,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1693,6 +1789,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1710,6 +1807,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1760,6 +1860,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -1851,6 +1955,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1868,6 +1973,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1944,6 +2052,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1961,6 +2070,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2028,6 +2140,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -2078,8 +2194,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2136,8 +2256,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2183,6 +2307,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -2274,8 +2402,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2352,8 +2484,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2410,6 +2546,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -2460,6 +2600,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2477,6 +2618,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2533,6 +2677,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2550,6 +2695,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2600,6 +2748,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -2691,6 +2843,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2708,6 +2861,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2784,6 +2940,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2801,6 +2958,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2868,6 +3028,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -2918,8 +3082,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2976,8 +3144,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3023,6 +3195,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -3114,8 +3290,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3192,8 +3372,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3250,6 +3434,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -3300,6 +3488,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3317,6 +3506,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3373,6 +3565,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3390,6 +3583,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3440,6 +3636,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -3531,6 +3731,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3548,6 +3749,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3624,6 +3828,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3641,6 +3846,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3705,6 +3913,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -3752,8 +3964,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -3779,8 +3995,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -3804,6 +4024,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -3851,6 +4075,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3868,6 +4093,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -3893,6 +4121,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3910,6 +4139,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -3944,6 +4176,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -3992,8 +4228,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4049,8 +4289,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4103,6 +4347,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -4151,6 +4399,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4168,6 +4417,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4223,6 +4475,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4240,6 +4493,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4303,6 +4559,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -4351,8 +4611,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4408,8 +4672,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4462,6 +4730,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -4510,6 +4782,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4527,6 +4800,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4582,6 +4858,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4599,6 +4876,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4662,6 +4942,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -4710,8 +4994,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4767,8 +5055,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4821,6 +5113,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -4869,6 +5165,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4886,6 +5183,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4941,6 +5241,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4958,6 +5259,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -5022,6 +5326,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5137,6 +5445,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5263,6 +5575,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5310,8 +5626,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -5337,8 +5657,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -5363,6 +5687,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5410,6 +5738,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5427,6 +5756,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -5452,6 +5784,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5469,6 +5802,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -5504,6 +5840,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5552,8 +5892,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -5609,8 +5953,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -5664,6 +6012,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5712,6 +6064,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5729,6 +6082,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -5784,6 +6140,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5801,6 +6158,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -5865,6 +6225,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5913,8 +6277,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -5970,8 +6338,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6025,6 +6397,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -6073,6 +6449,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6090,6 +6467,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6145,6 +6525,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6162,6 +6543,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6226,6 +6610,10 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -6274,8 +6662,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6331,8 +6723,12 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6386,6 +6782,10 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -6434,6 +6834,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6451,6 +6852,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6506,6 +6910,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade + __m128i material = fgcolor; int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6523,6 +6928,9 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); diff --git a/src/swrenderer/drawers/r_draw_sprite32.php b/src/swrenderer/drawers/r_draw_sprite32.php index 483c867627..9fd67b0013 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.php +++ b/src/swrenderer/drawers/r_draw_sprite32.php @@ -130,6 +130,10 @@ namespace swrenderer __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -274,7 +278,9 @@ namespace swrenderer function Shade($blendVariant, $isSimpleShade) { if ($blendVariant == "copy" || $blendVariant == "shaded") return; - +?> + __m128i material = fgcolor; + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); @@ -297,8 +303,11 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); +lightsource->flags4 & MF4_ATTENUATE) != 0; + // Attenuated lights disabled for sprites for now to keep consistency with the GL renderer + //bool is_point_light = (node->lightsource->flags4 & MF4_ATTENUATE) != 0; float LdotL = lx * lx + ly * ly + lz * lz; - float NdotL = is_point_light ? -ly : 0.0f; + float NdotL = 1.0f;//is_point_light ? -ly : 1.0f; float radius = node->lightsource->GetRadius(); if (radius * radius >= LdotL && NdotL > 0.0f) { - uint32_t red = light->GetRed(); - uint32_t green = light->GetGreen(); - uint32_t blue = light->GetBlue(); float distance = sqrt(LdotL); - float attenuation = distance / radius * NdotL; - lit_red += red * attenuation; - lit_red += green * attenuation; - lit_red += blue * attenuation; + float attenuation = (1.0f - distance / radius) * NdotL; + if (attenuation > 0.0f) + { + float red = light->GetRed() * (1.0f / 255.0f); + float green = light->GetGreen() * (1.0f / 255.0f); + float blue = light->GetBlue() * (1.0f / 255.0f); + /*if (light->IsSubtractive()) + { + float bright = FVector3(lr, lg, lb).Length(); + FVector3 lightColor(lr, lg, lb); + red = (bright - lr) * -1; + green = (bright - lg) * -1; + blue = (bright - lb) * -1; + }*/ + + lit_red += red * attenuation; + lit_green += green * attenuation; + lit_blue += blue * attenuation; + } } } node = node->nextLight; } - lit_red = MIN(lit_red, 255.0f); - lit_green = MIN(lit_green, 255.0f); - lit_blue = MIN(lit_blue, 255.0f); + lit_red = clamp(lit_red * 255.0f, 0.0f, 255.0f); + lit_green = clamp(lit_green * 255.0f, 0.0f, 255.0f); + lit_blue = clamp(lit_blue * 255.0f, 0.0f, 255.0f); vis->dynlightcolor = (((uint32_t)lit_red) << 16) | (((uint32_t)lit_green) << 8) | ((uint32_t)lit_blue); } else From 92e6f070b415e2aed8ffc8731ff7ab1798fa65b0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 23 Feb 2017 07:05:21 +0100 Subject: [PATCH 874/912] Fix sprite dynlight distance check --- src/swrenderer/things/r_sprite.cpp | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index d8ea3c1671..49c823ba28 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -238,24 +238,15 @@ namespace swrenderer ADynamicLight *light = node->lightsource; if (light->visibletoplayer && !(light->flags2&MF2_DORMANT) && (!(light->flags4&MF4_DONTLIGHTSELF) || light->target != thing)) { - double lightX = light->X() - ViewPos.X; - double lightY = light->Y() - ViewPos.Y; - double lightZ = light->Z() - ViewPos.Z; - - float lx = (float)(lightX * ViewSin - lightY * ViewCos - pos.X); - float ly = (float)(lightX * ViewTanCos + lightY * ViewTanSin - pos.Y); - float lz = (float)(lightZ - pos.Z); - - // Attenuated lights disabled for sprites for now to keep consistency with the GL renderer - //bool is_point_light = (node->lightsource->flags4 & MF4_ATTENUATE) != 0; + float lx = (float)(light->X() - pos.X); + float ly = (float)(light->Y() - pos.Y); + float lz = (float)(light->Z() - pos.Z); float LdotL = lx * lx + ly * ly + lz * lz; - float NdotL = 1.0f;//is_point_light ? -ly : 1.0f; - float radius = node->lightsource->GetRadius(); - if (radius * radius >= LdotL && NdotL > 0.0f) + if (radius * radius >= LdotL) { float distance = sqrt(LdotL); - float attenuation = (1.0f - distance / radius) * NdotL; + float attenuation = 1.0f - distance / radius; if (attenuation > 0.0f) { float red = light->GetRed() * (1.0f / 255.0f); From c5683bbde57c4c7b473f7bc63b4f373be41ce6e2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 23 Feb 2017 07:49:02 +0100 Subject: [PATCH 875/912] Clamp dynlights to 0-1 range on sprites --- src/swrenderer/drawers/r_draw_sprite32.h | 320 +++++---------------- src/swrenderer/drawers/r_draw_sprite32.php | 9 +- 2 files changed, 67 insertions(+), 262 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_sprite32.h b/src/swrenderer/drawers/r_draw_sprite32.h index 0bd83936d8..f8d596642d 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.h +++ b/src/swrenderer/drawers/r_draw_sprite32.h @@ -657,12 +657,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -689,12 +686,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -805,12 +799,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -857,12 +848,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -943,7 +931,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -961,9 +949,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -990,7 +975,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1008,9 +993,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -1126,7 +1108,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1144,9 +1126,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -1193,7 +1172,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1211,9 +1190,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -1306,12 +1282,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1368,12 +1341,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1514,12 +1484,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1596,12 +1563,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1712,7 +1676,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1730,9 +1694,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1789,7 +1750,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1807,9 +1768,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -1955,7 +1913,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1973,9 +1931,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2052,7 +2007,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2070,9 +2025,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2194,12 +2146,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2256,12 +2205,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2402,12 +2348,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2484,12 +2427,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2600,7 +2540,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2618,9 +2558,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2677,7 +2614,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2695,9 +2632,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2843,7 +2777,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2861,9 +2795,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -2940,7 +2871,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2958,9 +2889,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3082,12 +3010,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3144,12 +3069,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3290,12 +3212,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3372,12 +3291,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3488,7 +3404,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3506,9 +3422,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3565,7 +3478,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3583,9 +3496,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3731,7 +3641,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3749,9 +3659,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3828,7 +3735,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3846,9 +3753,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -3964,12 +3868,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -3995,12 +3896,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -4075,7 +3973,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4093,9 +3991,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -4121,7 +4016,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4139,9 +4034,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -4228,12 +4120,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4289,12 +4178,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4399,7 +4285,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4417,9 +4303,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4475,7 +4358,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4493,9 +4376,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4611,12 +4491,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4672,12 +4549,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4782,7 +4656,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4800,9 +4674,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4858,7 +4729,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4876,9 +4747,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -4994,12 +4862,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -5055,12 +4920,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -5165,7 +5027,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5183,9 +5045,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -5241,7 +5100,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5259,9 +5118,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -5626,12 +5482,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -5657,12 +5510,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -5738,7 +5588,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5756,9 +5606,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -5784,7 +5631,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5802,9 +5649,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend __m128i outcolor = fgcolor; outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); @@ -5892,12 +5736,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -5953,12 +5794,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6064,7 +5902,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6082,9 +5920,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6140,7 +5975,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6158,9 +5993,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6277,12 +6109,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6338,12 +6167,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6449,7 +6275,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6467,9 +6293,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6525,7 +6348,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6543,9 +6366,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6662,12 +6482,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6723,12 +6540,9 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6834,7 +6648,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6852,9 +6666,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); @@ -6910,7 +6721,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6928,9 +6739,6 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - // Blend uint32_t alpha0 = APART(ifgcolor[0]); uint32_t alpha1 = APART(ifgcolor[1]); diff --git a/src/swrenderer/drawers/r_draw_sprite32.php b/src/swrenderer/drawers/r_draw_sprite32.php index 9fd67b0013..c66bba0c35 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.php +++ b/src/swrenderer/drawers/r_draw_sprite32.php @@ -279,7 +279,7 @@ namespace swrenderer { if ($blendVariant == "copy" || $blendVariant == "shaded") return; ?> - __m128i material = fgcolor; + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); @@ -303,11 +303,8 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - Date: Thu, 23 Feb 2017 08:28:18 +0100 Subject: [PATCH 876/912] Removed drawergen tool and all LLVM dependencies (don't let the door hit you on your way out, llvm!) --- src/CMakeLists.txt | 30 +- src/polyrenderer/drawers/poly_triangle.cpp | 22 +- src/swrenderer/drawers/r_draw_rgba.cpp | 416 +---------- src/swrenderer/drawers/r_draw_rgba.h | 125 ---- src/swrenderer/drawers/r_drawers.cpp | 287 -------- src/swrenderer/drawers/r_drawers.h | 191 ----- tools/CMakeLists.txt | 1 - tools/drawergen/CMakeLists.txt | 187 ----- tools/drawergen/drawergen.cpp | 142 ---- tools/drawergen/exception.h | 36 - .../fixedfunction/drawcolumncodegen.cpp | 322 -------- .../fixedfunction/drawcolumncodegen.h | 92 --- .../drawergen/fixedfunction/drawercodegen.cpp | 169 ----- tools/drawergen/fixedfunction/drawercodegen.h | 96 --- .../fixedfunction/drawskycodegen.cpp | 131 ---- .../drawergen/fixedfunction/drawskycodegen.h | 62 -- .../fixedfunction/drawspancodegen.cpp | 298 -------- .../drawergen/fixedfunction/drawspancodegen.h | 83 --- .../fixedfunction/drawtrianglecodegen.cpp | 697 ------------------ .../fixedfunction/drawtrianglecodegen.h | 112 --- .../fixedfunction/drawwallcodegen.cpp | 231 ------ .../drawergen/fixedfunction/drawwallcodegen.h | 80 -- .../fixedfunction/setuptrianglecodegen.cpp | 573 -------------- .../fixedfunction/setuptrianglecodegen.h | 98 --- tools/drawergen/llvm_include.h | 99 --- tools/drawergen/llvmdrawers.cpp | 428 ----------- tools/drawergen/llvmdrawers.h | 84 --- tools/drawergen/llvmprogram.cpp | 171 ----- tools/drawergen/llvmprogram.h | 41 -- tools/drawergen/precomp.h | 11 - tools/drawergen/ssa/ssa_barycentric_weight.h | 118 --- tools/drawergen/ssa/ssa_bool.cpp | 173 ----- tools/drawergen/ssa/ssa_bool.h | 75 -- tools/drawergen/ssa/ssa_float.cpp | 143 ---- tools/drawergen/ssa/ssa_float.h | 60 -- tools/drawergen/ssa/ssa_float_ptr.cpp | 91 --- tools/drawergen/ssa/ssa_float_ptr.h | 49 -- tools/drawergen/ssa/ssa_for_block.cpp | 62 -- tools/drawergen/ssa/ssa_for_block.h | 38 - tools/drawergen/ssa/ssa_function.cpp | 76 -- tools/drawergen/ssa/ssa_function.h | 51 -- tools/drawergen/ssa/ssa_if_block.cpp | 58 -- tools/drawergen/ssa/ssa_if_block.h | 67 -- tools/drawergen/ssa/ssa_int.cpp | 208 ------ tools/drawergen/ssa/ssa_int.h | 80 -- tools/drawergen/ssa/ssa_int_ptr.cpp | 111 --- tools/drawergen/ssa/ssa_int_ptr.h | 52 -- tools/drawergen/ssa/ssa_phi.h | 54 -- tools/drawergen/ssa/ssa_scope.cpp | 96 --- tools/drawergen/ssa/ssa_scope.h | 64 -- tools/drawergen/ssa/ssa_short.cpp | 174 ----- tools/drawergen/ssa/ssa_short.h | 72 -- tools/drawergen/ssa/ssa_stack.h | 48 -- tools/drawergen/ssa/ssa_struct_type.cpp | 40 - tools/drawergen/ssa/ssa_struct_type.h | 38 - tools/drawergen/ssa/ssa_ubyte.cpp | 122 --- tools/drawergen/ssa/ssa_ubyte.h | 60 -- tools/drawergen/ssa/ssa_ubyte_ptr.cpp | 173 ----- tools/drawergen/ssa/ssa_ubyte_ptr.h | 57 -- tools/drawergen/ssa/ssa_value.cpp | 81 -- tools/drawergen/ssa/ssa_value.h | 74 -- tools/drawergen/ssa/ssa_vec16ub.cpp | 188 ----- tools/drawergen/ssa/ssa_vec16ub.h | 63 -- tools/drawergen/ssa/ssa_vec4f.cpp | 209 ------ tools/drawergen/ssa/ssa_vec4f.h | 71 -- tools/drawergen/ssa/ssa_vec4f_ptr.cpp | 73 -- tools/drawergen/ssa/ssa_vec4f_ptr.h | 45 -- tools/drawergen/ssa/ssa_vec4i.cpp | 275 ------- tools/drawergen/ssa/ssa_vec4i.h | 80 -- tools/drawergen/ssa/ssa_vec4i_ptr.cpp | 65 -- tools/drawergen/ssa/ssa_vec4i_ptr.h | 46 -- tools/drawergen/ssa/ssa_vec8s.cpp | 189 ----- tools/drawergen/ssa/ssa_vec8s.h | 67 -- tools/drawergen/timestamp.h | 12 - tools/drawergen/trustinfo.rc | 6 - tools/drawergen/trustinfo.txt | 16 - 76 files changed, 36 insertions(+), 9349 deletions(-) delete mode 100644 src/swrenderer/drawers/r_drawers.cpp delete mode 100644 tools/drawergen/CMakeLists.txt delete mode 100644 tools/drawergen/drawergen.cpp delete mode 100644 tools/drawergen/exception.h delete mode 100644 tools/drawergen/fixedfunction/drawcolumncodegen.cpp delete mode 100644 tools/drawergen/fixedfunction/drawcolumncodegen.h delete mode 100644 tools/drawergen/fixedfunction/drawercodegen.cpp delete mode 100644 tools/drawergen/fixedfunction/drawercodegen.h delete mode 100644 tools/drawergen/fixedfunction/drawskycodegen.cpp delete mode 100644 tools/drawergen/fixedfunction/drawskycodegen.h delete mode 100644 tools/drawergen/fixedfunction/drawspancodegen.cpp delete mode 100644 tools/drawergen/fixedfunction/drawspancodegen.h delete mode 100644 tools/drawergen/fixedfunction/drawtrianglecodegen.cpp delete mode 100644 tools/drawergen/fixedfunction/drawtrianglecodegen.h delete mode 100644 tools/drawergen/fixedfunction/drawwallcodegen.cpp delete mode 100644 tools/drawergen/fixedfunction/drawwallcodegen.h delete mode 100644 tools/drawergen/fixedfunction/setuptrianglecodegen.cpp delete mode 100644 tools/drawergen/fixedfunction/setuptrianglecodegen.h delete mode 100644 tools/drawergen/llvm_include.h delete mode 100644 tools/drawergen/llvmdrawers.cpp delete mode 100644 tools/drawergen/llvmdrawers.h delete mode 100644 tools/drawergen/llvmprogram.cpp delete mode 100644 tools/drawergen/llvmprogram.h delete mode 100644 tools/drawergen/precomp.h delete mode 100644 tools/drawergen/ssa/ssa_barycentric_weight.h delete mode 100644 tools/drawergen/ssa/ssa_bool.cpp delete mode 100644 tools/drawergen/ssa/ssa_bool.h delete mode 100644 tools/drawergen/ssa/ssa_float.cpp delete mode 100644 tools/drawergen/ssa/ssa_float.h delete mode 100644 tools/drawergen/ssa/ssa_float_ptr.cpp delete mode 100644 tools/drawergen/ssa/ssa_float_ptr.h delete mode 100644 tools/drawergen/ssa/ssa_for_block.cpp delete mode 100644 tools/drawergen/ssa/ssa_for_block.h delete mode 100644 tools/drawergen/ssa/ssa_function.cpp delete mode 100644 tools/drawergen/ssa/ssa_function.h delete mode 100644 tools/drawergen/ssa/ssa_if_block.cpp delete mode 100644 tools/drawergen/ssa/ssa_if_block.h delete mode 100644 tools/drawergen/ssa/ssa_int.cpp delete mode 100644 tools/drawergen/ssa/ssa_int.h delete mode 100644 tools/drawergen/ssa/ssa_int_ptr.cpp delete mode 100644 tools/drawergen/ssa/ssa_int_ptr.h delete mode 100644 tools/drawergen/ssa/ssa_phi.h delete mode 100644 tools/drawergen/ssa/ssa_scope.cpp delete mode 100644 tools/drawergen/ssa/ssa_scope.h delete mode 100644 tools/drawergen/ssa/ssa_short.cpp delete mode 100644 tools/drawergen/ssa/ssa_short.h delete mode 100644 tools/drawergen/ssa/ssa_stack.h delete mode 100644 tools/drawergen/ssa/ssa_struct_type.cpp delete mode 100644 tools/drawergen/ssa/ssa_struct_type.h delete mode 100644 tools/drawergen/ssa/ssa_ubyte.cpp delete mode 100644 tools/drawergen/ssa/ssa_ubyte.h delete mode 100644 tools/drawergen/ssa/ssa_ubyte_ptr.cpp delete mode 100644 tools/drawergen/ssa/ssa_ubyte_ptr.h delete mode 100644 tools/drawergen/ssa/ssa_value.cpp delete mode 100644 tools/drawergen/ssa/ssa_value.h delete mode 100644 tools/drawergen/ssa/ssa_vec16ub.cpp delete mode 100644 tools/drawergen/ssa/ssa_vec16ub.h delete mode 100644 tools/drawergen/ssa/ssa_vec4f.cpp delete mode 100644 tools/drawergen/ssa/ssa_vec4f.h delete mode 100644 tools/drawergen/ssa/ssa_vec4f_ptr.cpp delete mode 100644 tools/drawergen/ssa/ssa_vec4f_ptr.h delete mode 100644 tools/drawergen/ssa/ssa_vec4i.cpp delete mode 100644 tools/drawergen/ssa/ssa_vec4i.h delete mode 100644 tools/drawergen/ssa/ssa_vec4i_ptr.cpp delete mode 100644 tools/drawergen/ssa/ssa_vec4i_ptr.h delete mode 100644 tools/drawergen/ssa/ssa_vec8s.cpp delete mode 100644 tools/drawergen/ssa/ssa_vec8s.h delete mode 100644 tools/drawergen/timestamp.h delete mode 100644 tools/drawergen/trustinfo.rc delete mode 100644 tools/drawergen/trustinfo.txt diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c0029ca3f0..7a66ede47a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -588,20 +588,6 @@ add_custom_target( revision_check ALL WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} DEPENDS updaterevision ) -# Run drawer codegen tool - -if ( WIN32 ) - add_custom_target( drawergen_target ALL - COMMAND drawergen src/r_drawersasm.obj - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} - DEPENDS drawergen ) -else() - add_custom_target( drawergen_target ALL - COMMAND drawergen src/r_drawersasm.o - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} - DEPENDS drawergen ) -endif() - # Libraries ZDoom needs message( STATUS "Fluid synth libs: ${FLUIDSYNTH_LIBRARIES}" ) @@ -857,7 +843,6 @@ set( FASTMATH_PCH_SOURCES swrenderer/drawers/r_draw.cpp swrenderer/drawers/r_draw_pal.cpp swrenderer/drawers/r_draw_rgba.cpp - swrenderer/drawers/r_drawers.cpp swrenderer/drawers/r_thread.cpp swrenderer/scene/r_3dfloors.cpp swrenderer/scene/r_light.cpp @@ -1296,16 +1281,6 @@ set (PCH_SOURCES ) enable_precompiled_headers( g_pch.h PCH_SOURCES ) -if ( WIN32 ) - set (CODEGENOBJ_SOURCES - r_drawersasm.obj - ) -else() - set (CODEGENOBJ_SOURCES - r_drawersasm.o - ) -endif() - add_executable( zdoom WIN32 MACOSX_BUNDLE ${HEADER_FILES} ${NOT_COMPILED_SOURCE_FILES} @@ -1336,11 +1311,8 @@ add_executable( zdoom WIN32 MACOSX_BUNDLE math/tanh.c math/fastsin.cpp zzautozend.cpp - ${CMAKE_BINARY_DIR}/src/${CODEGENOBJ_SOURCES} ) -set_source_files_properties( ${CODEGENOBJ_SOURCES} PROPERTIES EXTERNAL_OBJECT true GENERATED true) - set_source_files_properties( ${FASTMATH_SOURCES} PROPERTIES COMPILE_FLAGS ${ZD_FASTMATH_FLAG} ) set_source_files_properties( xlat/parse_xlat.cpp PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c" ) set_source_files_properties( sc_man.cpp PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/sc_man_scanner.h" ) @@ -1378,7 +1350,7 @@ include_directories( . ${CMAKE_BINARY_DIR}/gdtoa ${SYSTEM_SOURCES_DIR} ) -add_dependencies( zdoom revision_check drawergen_target ) +add_dependencies( zdoom revision_check ) # Due to some quirks, we need to do this in this order if( NOT ZDOOM_OUTPUT_OLDSTYLE ) diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index e51dbe81a1..0a58c6e60d 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -85,8 +85,6 @@ void PolyTriangleDrawer::draw(const PolyDrawArgs &args) PolyRenderer::Instance()->Thread.DrawQueue->Push(args, mirror); } -EXTERN_CVAR(Bool, r_phpdrawers); - void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadData *thread) { if (drawargs.vcount < 3) @@ -100,21 +98,11 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD if (!r_debug_trisetup) // For profiling how much time is spent in setup vs drawal { int bmode = (int)drawargs.blendmode; - if (r_phpdrawers) - { - if (drawargs.writeColor && drawargs.texturePixels) - drawfuncs[num_drawfuncs++] = dest_bgra ? ScreenTriangle::TriDraw32[bmode] : ScreenTriangle::TriDraw8[bmode]; - else if (drawargs.writeColor) - drawfuncs[num_drawfuncs++] = dest_bgra ? ScreenTriangle::TriFill32[bmode] : ScreenTriangle::TriFill8[bmode]; - } - else - { - auto llvm = Drawers::Instance(); - if (drawargs.writeColor && drawargs.texturePixels) - drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriDraw32[bmode] : llvm->TriDraw8[bmode]; - else if (drawargs.writeColor) - drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriFill32[bmode] : llvm->TriFill8[bmode]; - } + + if (drawargs.writeColor && drawargs.texturePixels) + drawfuncs[num_drawfuncs++] = dest_bgra ? ScreenTriangle::TriDraw32[bmode] : ScreenTriangle::TriDraw8[bmode]; + else if (drawargs.writeColor) + drawfuncs[num_drawfuncs++] = dest_bgra ? ScreenTriangle::TriFill32[bmode] : ScreenTriangle::TriFill8[bmode]; } if (drawargs.writeStencil) diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index f60f1447c4..f9c120a594 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -62,104 +62,66 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Level of detail texture bias CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG when a good default has been decided -CVAR(Bool, r_phpdrawers, false, 0); - namespace swrenderer { void SWTruecolorDrawers::DrawWallColumn(const WallDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawWallMaskedColumn(const WallDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawWallAddColumn(const WallDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawWallAddClampColumn(const WallDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawWallSubClampColumn(const WallDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawWallRevSubClampColumn(const WallDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::FillColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::FillAddColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::FillAddClampColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::FillSubClampColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::FillRevSubClampColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawFuzzColumn(const SpriteDrawerArgs &args) @@ -170,410 +132,92 @@ namespace swrenderer void SWTruecolorDrawers::DrawAddColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawTranslatedColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawTranslatedAddColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawShadedColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawAddClampColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawSubClampColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawRevSubClampColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawSpan(const SpanDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawSpanMasked(const SpanDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawSpanTranslucent(const SpanDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawSpanAddClamp(const SpanDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawSingleSkyColumn(const SkyDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); + Queue->Push(args); } void SWTruecolorDrawers::DrawDoubleSkyColumn(const SkyDrawerArgs &args) { - if (r_phpdrawers) - Queue->Push(args); - else - Queue->Push(args); - } - - DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs) - { - auto shade_constants = drawerargs.ColormapConstants(); - args.xfrac = drawerargs.TextureUPos(); - args.yfrac = drawerargs.TextureVPos(); - args.xstep = drawerargs.TextureUStep(); - args.ystep = drawerargs.TextureVStep(); - args.x1 = drawerargs.DestX1(); - args.x2 = drawerargs.DestX2(); - args.y = drawerargs.DestY(); - args.xbits = drawerargs.TextureWidthBits(); - args.ybits = drawerargs.TextureHeightBits(); - args.destorg = (uint32_t*)RenderViewport::Instance()->GetDest(0, 0); - args.destpitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - args.source = (const uint32_t*)drawerargs.TexturePixels(); - args.light = LightBgra::calc_light_multiplier(drawerargs.Light()); - args.light_red = shade_constants.light_red; - args.light_green = shade_constants.light_green; - args.light_blue = shade_constants.light_blue; - args.light_alpha = shade_constants.light_alpha; - args.fade_red = shade_constants.fade_red; - args.fade_green = shade_constants.fade_green; - args.fade_blue = shade_constants.fade_blue; - args.fade_alpha = shade_constants.fade_alpha; - args.desaturate = shade_constants.desaturate; - args.srcalpha = drawerargs.SrcAlpha() >> (FRACBITS - 8); - args.destalpha = drawerargs.DestAlpha() >> (FRACBITS - 8); - args.flags = 0; - if (shade_constants.simple_shade) - args.flags |= DrawSpanArgs::simple_shade; - if (!sampler_setup(drawerargs.TextureLOD(), args.source, args.xbits, args.ybits, drawerargs.MipmappedTexture())) - args.flags |= DrawSpanArgs::nearest_filter; - - args.viewpos_x = drawerargs.dc_viewpos.X; - args.step_viewpos_x = drawerargs.dc_viewpos_step.X; - args.dynlights = drawerargs.dc_lights; - args.num_dynlights = drawerargs.dc_num_lights; - } - - void DrawSpanLLVMCommand::Execute(DrawerThread *thread) - { - if (thread->skipped_by_thread(args.y)) - return; - Drawers::Instance()->DrawSpan(&args); - } - - FString DrawSpanLLVMCommand::DebugInfo() - { - return "DrawSpan\n" + args.ToString(); - } - - bool DrawSpanLLVMCommand::sampler_setup(double lod, const uint32_t * &source, int &xbits, int &ybits, bool mipmapped) - { - bool magnifying = lod < 0.0; - if (r_mipmap && mipmapped) - { - int level = (int)lod; - while (level > 0) - { - if (xbits <= 2 || ybits <= 2) - break; - - source += (1 << (xbits)) * (1 << (ybits)); - xbits -= 1; - ybits -= 1; - level--; - } - } - - return (magnifying && r_magfilter) || (!magnifying && r_minfilter); - } - - ///////////////////////////////////////////////////////////////////////////// - - void DrawSpanMaskedLLVMCommand::Execute(DrawerThread *thread) - { - if (thread->skipped_by_thread(args.y)) - return; - Drawers::Instance()->DrawSpanMasked(&args); - } - - void DrawSpanTranslucentLLVMCommand::Execute(DrawerThread *thread) - { - if (thread->skipped_by_thread(args.y)) - return; - Drawers::Instance()->DrawSpanTranslucent(&args); - } - - void DrawSpanMaskedTranslucentLLVMCommand::Execute(DrawerThread *thread) - { - if (thread->skipped_by_thread(args.y)) - return; - Drawers::Instance()->DrawSpanMaskedTranslucent(&args); - } - - void DrawSpanAddClampLLVMCommand::Execute(DrawerThread *thread) - { - if (thread->skipped_by_thread(args.y)) - return; - Drawers::Instance()->DrawSpanAddClamp(&args); - } - - void DrawSpanMaskedAddClampLLVMCommand::Execute(DrawerThread *thread) - { - if (thread->skipped_by_thread(args.y)) - return; - Drawers::Instance()->DrawSpanMaskedAddClamp(&args); - } - - ///////////////////////////////////////////////////////////////////////////// - - WorkerThreadData DrawWall1LLVMCommand::ThreadData(DrawerThread *thread) - { - WorkerThreadData d; - d.core = thread->core; - d.num_cores = thread->num_cores; - d.pass_start_y = thread->pass_start_y; - d.pass_end_y = thread->pass_end_y; - return d; - } - - DrawWall1LLVMCommand::DrawWall1LLVMCommand(const WallDrawerArgs &drawerargs) - { - auto shade_constants = drawerargs.ColormapConstants(); - args.dest = (uint32_t*)drawerargs.Dest(); - args.dest_y = drawerargs.DestY(); - args.pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - args.count = drawerargs.Count(); - args.texturefrac[0] = drawerargs.TextureVPos(); - args.texturefracx[0] = drawerargs.TextureUPos(); - args.iscale[0] = drawerargs.TextureVStep(); - args.textureheight[0] = drawerargs.TextureHeight(); - args.source[0] = (const uint32 *)drawerargs.TexturePixels(); - args.source2[0] = (const uint32 *)drawerargs.TexturePixels2(); - args.light[0] = LightBgra::calc_light_multiplier(drawerargs.Light()); - args.light_red = shade_constants.light_red; - args.light_green = shade_constants.light_green; - args.light_blue = shade_constants.light_blue; - args.light_alpha = shade_constants.light_alpha; - args.fade_red = shade_constants.fade_red; - args.fade_green = shade_constants.fade_green; - args.fade_blue = shade_constants.fade_blue; - args.fade_alpha = shade_constants.fade_alpha; - args.desaturate = shade_constants.desaturate; - args.srcalpha = drawerargs.SrcAlpha() >> (FRACBITS - 8); - args.destalpha = drawerargs.DestAlpha() >> (FRACBITS - 8); - args.flags = 0; - if (shade_constants.simple_shade) - args.flags |= DrawWallArgs::simple_shade; - if (args.source2[0] == nullptr) - args.flags |= DrawWallArgs::nearest_filter; - - args.z = drawerargs.dc_viewpos.Z; - args.step_z = drawerargs.dc_viewpos_step.Z; - args.dynlights = drawerargs.dc_lights; - args.num_dynlights = drawerargs.dc_num_lights; - } - - void DrawWall1LLVMCommand::Execute(DrawerThread *thread) - { - WorkerThreadData d = ThreadData(thread); - Drawers::Instance()->vlinec1(&args, &d); - } - - FString DrawWall1LLVMCommand::DebugInfo() - { - return "DrawWall1\n" + args.ToString(); - } - - ///////////////////////////////////////////////////////////////////////////// - - WorkerThreadData DrawColumnLLVMCommand::ThreadData(DrawerThread *thread) - { - WorkerThreadData d; - d.core = thread->core; - d.num_cores = thread->num_cores; - d.pass_start_y = thread->pass_start_y; - d.pass_end_y = thread->pass_end_y; - return d; - } - - FString DrawColumnLLVMCommand::DebugInfo() - { - return "DrawColumn\n" + args.ToString(); - } - - DrawColumnLLVMCommand::DrawColumnLLVMCommand(const SpriteDrawerArgs &drawerargs) - { - auto shade_constants = drawerargs.ColormapConstants(); - - args.dest = (uint32_t*)drawerargs.Dest(); - args.source = drawerargs.TexturePixels(); - args.source2 = drawerargs.TexturePixels2(); - args.colormap = drawerargs.Colormap(); - args.translation = drawerargs.TranslationMap(); - args.basecolors = (const uint32_t *)GPalette.BaseColors; - args.pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - args.count = drawerargs.Count(); - args.dest_y = drawerargs.DestY(); - args.iscale = drawerargs.TextureVStep(); - args.texturefracx = drawerargs.TextureUPos(); - args.textureheight = drawerargs.TextureHeight(); - args.texturefrac = drawerargs.TextureVPos(); - args.light = LightBgra::calc_light_multiplier(drawerargs.Light()); - args.color = LightBgra::shade_pal_index_simple(drawerargs.SolidColor(), args.light); - args.srccolor = drawerargs.SrcColorBgra(); - args.srcalpha = drawerargs.SrcAlpha() >> (FRACBITS - 8); - args.destalpha = drawerargs.DestAlpha() >> (FRACBITS - 8); - args.light_red = shade_constants.light_red; - args.light_green = shade_constants.light_green; - args.light_blue = shade_constants.light_blue; - args.light_alpha = shade_constants.light_alpha; - args.fade_red = shade_constants.fade_red; - args.fade_green = shade_constants.fade_green; - args.fade_blue = shade_constants.fade_blue; - args.fade_alpha = shade_constants.fade_alpha; - args.desaturate = shade_constants.desaturate; - args.flags = 0; - if (shade_constants.simple_shade) - args.flags |= DrawColumnArgs::simple_shade; - if (args.source2 == nullptr) - args.flags |= DrawColumnArgs::nearest_filter; - } - - void DrawColumnLLVMCommand::Execute(DrawerThread *thread) - { - WorkerThreadData d = ThreadData(thread); - Drawers::Instance()->DrawColumn(&args, &d); - } - - ///////////////////////////////////////////////////////////////////////////// - - WorkerThreadData DrawSkyLLVMCommand::ThreadData(DrawerThread *thread) - { - WorkerThreadData d; - d.core = thread->core; - d.num_cores = thread->num_cores; - d.pass_start_y = thread->pass_start_y; - d.pass_end_y = thread->pass_end_y; - return d; - } - - DrawSkyLLVMCommand::DrawSkyLLVMCommand(const SkyDrawerArgs &drawerargs) - { - args.dest = (uint32_t*)drawerargs.Dest(); - args.dest_y = drawerargs.DestY(); - args.count = drawerargs.Count(); - args.pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - args.texturefrac[0] = drawerargs.TextureVPos(); - args.iscale[0] = drawerargs.TextureVStep(); - args.source0[0] = (const uint32_t *)drawerargs.FrontTexturePixels(); - args.source1[0] = (const uint32_t *)drawerargs.BackTexturePixels(); - args.textureheight0 = drawerargs.FrontTextureHeight(); - args.textureheight1 = drawerargs.BackTextureHeight(); - args.top_color = drawerargs.SolidTopColor(); - args.bottom_color = drawerargs.SolidBottomColor(); - args.flags = drawerargs.FadeSky() ? DrawSkyArgs::fade_sky : 0; - } - - FString DrawSkyLLVMCommand::DebugInfo() - { - return "DrawSky\n" + args.ToString(); + Queue->Push(args); } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index a3ec7e5f94..1ed068f827 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -68,131 +68,6 @@ namespace swrenderer #endif #endif - #define DECLARE_DRAW_COMMAND(name, func, base) \ - class name##LLVMCommand : public base \ - { \ - public: \ - using base::base; \ - void Execute(DrawerThread *thread) override \ - { \ - WorkerThreadData d = ThreadData(thread); \ - Drawers::Instance()->func(&args, &d); \ - } \ - }; - - class DrawSpanLLVMCommand : public DrawerCommand - { - public: - DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs); - - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - - protected: - DrawSpanArgs args; - - private: - inline static bool sampler_setup(double lod, const uint32_t * &source, int &xbits, int &ybits, bool mipmapped); - }; - - class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand - { - public: - using DrawSpanLLVMCommand::DrawSpanLLVMCommand; - void Execute(DrawerThread *thread) override; - }; - - class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand - { - public: - using DrawSpanLLVMCommand::DrawSpanLLVMCommand; - void Execute(DrawerThread *thread) override; - }; - - class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand - { - public: - using DrawSpanLLVMCommand::DrawSpanLLVMCommand; - void Execute(DrawerThread *thread) override; - }; - - class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand - { - public: - using DrawSpanLLVMCommand::DrawSpanLLVMCommand; - void Execute(DrawerThread *thread) override; - }; - - class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand - { - public: - using DrawSpanLLVMCommand::DrawSpanLLVMCommand; - void Execute(DrawerThread *thread) override; - }; - - class DrawWall1LLVMCommand : public DrawerCommand - { - protected: - DrawWallArgs args; - - WorkerThreadData ThreadData(DrawerThread *thread); - - public: - DrawWall1LLVMCommand(const WallDrawerArgs &drawerargs); - - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - }; - - class DrawColumnLLVMCommand : public DrawerCommand - { - protected: - DrawColumnArgs args; - - WorkerThreadData ThreadData(DrawerThread *thread); - FString DebugInfo() override; - - public: - DrawColumnLLVMCommand(const SpriteDrawerArgs &drawerargs); - - void Execute(DrawerThread *thread) override; - }; - - class DrawSkyLLVMCommand : public DrawerCommand - { - protected: - DrawSkyArgs args; - - WorkerThreadData ThreadData(DrawerThread *thread); - - public: - DrawSkyLLVMCommand(const SkyDrawerArgs &drawerargs); - FString DebugInfo() override; - }; - - DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawWallSubClamp1, tmvline1_subclamp, DrawWall1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawWallRevSubClamp1, tmvline1_revsubclamp, DrawWall1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnAdd, DrawColumnAdd, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnTranslated, DrawColumnTranslated, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnTlatedAdd, DrawColumnTlatedAdd, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnShaded, DrawColumnShaded, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnAddClamp, DrawColumnAddClamp, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnAddClampTranslated, DrawColumnAddClampTranslated, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnSubClamp, DrawColumnSubClamp, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnSubClampTranslated, DrawColumnSubClampTranslated, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRevSubClamp, DrawColumnRevSubClamp, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRevSubClampTranslated, DrawColumnRevSubClampTranslated, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(FillColumn, FillColumn, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(FillColumnAdd, FillColumnAdd, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(FillColumnAddClamp, FillColumnAddClamp, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand); - DECLARE_DRAW_COMMAND(DrawSingleSky1, DrawSky1, DrawSkyLLVMCommand); - DECLARE_DRAW_COMMAND(DrawDoubleSky1, DrawDoubleSky1, DrawSkyLLVMCommand); - class DrawFuzzColumnRGBACommand : public DrawerCommand { int _x; diff --git a/src/swrenderer/drawers/r_drawers.cpp b/src/swrenderer/drawers/r_drawers.cpp deleted file mode 100644 index 544b8e779a..0000000000 --- a/src/swrenderer/drawers/r_drawers.cpp +++ /dev/null @@ -1,287 +0,0 @@ -/* -** LLVM code generated drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "i_system.h" -#include "r_drawers.h" -#include "x86.h" -#include "c_cvars.h" -#include "version.h" -#include "m_misc.h" - -///////////////////////////////////////////////////////////////////////////// - -#if !defined(NO_DRAWERGEN) - -extern "C" -{ - void DrawColumn_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnShaded_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnAddClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRevSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnTlatedAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnAddClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnSubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRevSubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void FillColumn_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void FillColumnAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void FillColumnAddClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void FillColumnSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void FillColumnRevSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt1_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt1Copy_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt1Add_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt1Shaded_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt1AddClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt1SubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt1RevSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt1Translated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt1TlatedAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt1AddClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt1SubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt1RevSubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt4_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt4Copy_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt4Add_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt4Shaded_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt4AddClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt4SubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt4RevSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt4Translated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt4TlatedAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt4AddClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt4SubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawColumnRt4RevSubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *); - void DrawSpan_SSE2(const DrawSpanArgs *); - void DrawSpanMasked_SSE2(const DrawSpanArgs *); - void DrawSpanTranslucent_SSE2(const DrawSpanArgs *); - void DrawSpanMaskedTranslucent_SSE2(const DrawSpanArgs *); - void DrawSpanAddClamp_SSE2(const DrawSpanArgs *); - void DrawSpanMaskedAddClamp_SSE2(const DrawSpanArgs *); - void vlinec1_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void mvlinec1_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void tmvline1_add_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void tmvline4_add_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void tmvline1_addclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void tmvline4_addclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void tmvline1_subclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void tmvline4_subclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void tmvline1_revsubclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void tmvline4_revsubclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void DrawSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *); - void DrawDoubleSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *); - void TriDraw8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriDraw32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); - void TriFill32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); -} -#endif - -///////////////////////////////////////////////////////////////////////////// - -Drawers::Drawers() -{ -#if !defined(NO_DRAWERGEN) - DrawColumn = DrawColumn_SSE2; - DrawColumnAdd = DrawColumnAdd_SSE2; - DrawColumnShaded = DrawColumnShaded_SSE2; - DrawColumnAddClamp = DrawColumnAddClamp_SSE2; - DrawColumnSubClamp = DrawColumnSubClamp_SSE2; - DrawColumnRevSubClamp = DrawColumnRevSubClamp_SSE2; - DrawColumnTranslated = DrawColumnTranslated_SSE2; - DrawColumnTlatedAdd = DrawColumnTlatedAdd_SSE2; - DrawColumnAddClampTranslated = DrawColumnAddClampTranslated_SSE2; - DrawColumnSubClampTranslated = DrawColumnSubClampTranslated_SSE2; - DrawColumnRevSubClampTranslated = DrawColumnRevSubClampTranslated_SSE2; - FillColumn = FillColumn_SSE2; - FillColumnAdd = FillColumnAdd_SSE2; - FillColumnAddClamp = FillColumnAddClamp_SSE2; - FillColumnSubClamp = FillColumnSubClamp_SSE2; - FillColumnRevSubClamp = FillColumnRevSubClamp_SSE2; - DrawSpan = DrawSpan_SSE2; - DrawSpanMasked = DrawSpanMasked_SSE2; - DrawSpanTranslucent = DrawSpanTranslucent_SSE2; - DrawSpanMaskedTranslucent = DrawSpanMaskedTranslucent_SSE2; - DrawSpanAddClamp = DrawSpanAddClamp_SSE2; - DrawSpanMaskedAddClamp = DrawSpanMaskedAddClamp_SSE2; - vlinec1 = vlinec1_SSE2; - mvlinec1 = mvlinec1_SSE2; - tmvline1_add = tmvline1_add_SSE2; - tmvline1_addclamp = tmvline1_addclamp_SSE2; - tmvline1_subclamp = tmvline1_subclamp_SSE2; - tmvline1_revsubclamp = tmvline1_revsubclamp_SSE2; - DrawSky1 = DrawSky1_SSE2; - DrawDoubleSky1 = DrawDoubleSky1_SSE2; - TriDraw8.push_back(TriDraw8_0_SSE2); - TriDraw8.push_back(TriDraw8_1_SSE2); - TriDraw8.push_back(TriDraw8_2_SSE2); - TriDraw8.push_back(TriDraw8_3_SSE2); - TriDraw8.push_back(TriDraw8_4_SSE2); - TriDraw8.push_back(TriDraw8_5_SSE2); - TriDraw8.push_back(TriDraw8_6_SSE2); - TriDraw8.push_back(TriDraw8_7_SSE2); - TriDraw8.push_back(TriDraw8_8_SSE2); - TriDraw8.push_back(TriDraw8_9_SSE2); - TriDraw8.push_back(TriDraw8_10_SSE2); - TriDraw8.push_back(TriDraw8_11_SSE2); - TriDraw8.push_back(TriDraw8_12_SSE2); - TriDraw8.push_back(TriDraw8_13_SSE2); - TriDraw8.push_back(TriDraw8_14_SSE2); - TriDraw32.push_back(TriDraw32_0_SSE2); - TriDraw32.push_back(TriDraw32_1_SSE2); - TriDraw32.push_back(TriDraw32_2_SSE2); - TriDraw32.push_back(TriDraw32_3_SSE2); - TriDraw32.push_back(TriDraw32_4_SSE2); - TriDraw32.push_back(TriDraw32_5_SSE2); - TriDraw32.push_back(TriDraw32_6_SSE2); - TriDraw32.push_back(TriDraw32_7_SSE2); - TriDraw32.push_back(TriDraw32_8_SSE2); - TriDraw32.push_back(TriDraw32_9_SSE2); - TriDraw32.push_back(TriDraw32_10_SSE2); - TriDraw32.push_back(TriDraw32_11_SSE2); - TriDraw32.push_back(TriDraw32_12_SSE2); - TriDraw32.push_back(TriDraw32_13_SSE2); - TriDraw32.push_back(TriDraw32_14_SSE2); - TriFill8.push_back(TriFill8_0_SSE2); - TriFill8.push_back(TriFill8_1_SSE2); - TriFill8.push_back(TriFill8_2_SSE2); - TriFill8.push_back(TriFill8_3_SSE2); - TriFill8.push_back(TriFill8_4_SSE2); - TriFill8.push_back(TriFill8_5_SSE2); - TriFill8.push_back(TriFill8_6_SSE2); - TriFill8.push_back(TriFill8_7_SSE2); - TriFill8.push_back(TriFill8_8_SSE2); - TriFill8.push_back(TriFill8_9_SSE2); - TriFill8.push_back(TriFill8_10_SSE2); - TriFill8.push_back(TriFill8_11_SSE2); - TriFill8.push_back(TriFill8_12_SSE2); - TriFill8.push_back(TriFill8_13_SSE2); - TriFill8.push_back(TriFill8_14_SSE2); - TriFill32.push_back(TriFill32_0_SSE2); - TriFill32.push_back(TriFill32_1_SSE2); - TriFill32.push_back(TriFill32_2_SSE2); - TriFill32.push_back(TriFill32_3_SSE2); - TriFill32.push_back(TriFill32_4_SSE2); - TriFill32.push_back(TriFill32_5_SSE2); - TriFill32.push_back(TriFill32_6_SSE2); - TriFill32.push_back(TriFill32_7_SSE2); - TriFill32.push_back(TriFill32_8_SSE2); - TriFill32.push_back(TriFill32_9_SSE2); - TriFill32.push_back(TriFill32_10_SSE2); - TriFill32.push_back(TriFill32_11_SSE2); - TriFill32.push_back(TriFill32_12_SSE2); - TriFill32.push_back(TriFill32_13_SSE2); - TriFill32.push_back(TriFill32_14_SSE2); -#endif -} - -Drawers *Drawers::Instance() -{ - static Drawers drawers; - return &drawers; -} - -FString DrawWallArgs::ToString() -{ - FString info; - info.Format("dest_y = %i, count = %i, flags = %i, texturefrac[0] = %u, textureheight[0] = %u", dest_y, count, flags, texturefrac[0], textureheight[0]); - return info; -} - -FString DrawSpanArgs::ToString() -{ - FString info; - info.Format("x1 = %i, x2 = %i, y = %i, flags = %i", x1, x2, y, flags); - return info; -} - -FString DrawColumnArgs::ToString() -{ - FString info; - info.Format("dest_y = %i, count = %i, flags = %i, iscale = %i (%f), texturefrac = %i (%f)", dest_y, count, flags, iscale, ((fixed_t)iscale) / (float)FRACUNIT, texturefrac, ((fixed_t)texturefrac) / (float)FRACUNIT); - return info; -} - -FString DrawSkyArgs::ToString() -{ - FString info; - info.Format("dest_y = %i, count = %i", dest_y, count); - return info; -} diff --git a/src/swrenderer/drawers/r_drawers.h b/src/swrenderer/drawers/r_drawers.h index 2d251478c6..6205940bfd 100644 --- a/src/swrenderer/drawers/r_drawers.h +++ b/src/swrenderer/drawers/r_drawers.h @@ -66,149 +66,6 @@ struct TriLight float radius; }; -struct DrawWallArgs -{ - uint32_t *dest; - const uint32_t *source[4]; - const uint32_t *source2[4]; - int32_t pitch; - int32_t count; - int32_t dest_y; - uint32_t texturefrac[4]; - uint32_t texturefracx[4]; - uint32_t iscale[4]; - uint32_t textureheight[4]; - uint32_t light[4]; - uint32_t srcalpha; - uint32_t destalpha; - - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - uint32_t flags; - enum Flags - { - simple_shade = 1, - nearest_filter = 2 - }; - - float z, step_z; - TriLight *dynlights; - uint32_t num_dynlights; - - FString ToString(); -}; - -struct DrawSpanArgs -{ - uint32_t *destorg; - const uint32_t *source; - int32_t destpitch; - int32_t xfrac; - int32_t yfrac; - int32_t xstep; - int32_t ystep; - int32_t x1; - int32_t x2; - int32_t y; - int32_t xbits; - int32_t ybits; - uint32_t light; - uint32_t srcalpha; - uint32_t destalpha; - - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - uint32_t flags; - enum Flags - { - simple_shade = 1, - nearest_filter = 2 - }; - - float viewpos_x, step_viewpos_x; - TriLight *dynlights; - uint32_t num_dynlights; - - FString ToString(); -}; - -struct DrawColumnArgs -{ - uint32_t *dest; - const uint8_t *source; - const uint8_t *source2; - uint8_t *colormap; - uint8_t *translation; - const uint32_t *basecolors; - int32_t pitch; - int32_t count; - int32_t dest_y; - uint32_t iscale; - uint32_t texturefracx; - uint32_t textureheight; - uint32_t texturefrac; - uint32_t light; - uint32_t color; - uint32_t srccolor; - uint32_t srcalpha; - uint32_t destalpha; - - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - uint32_t flags; - enum Flags - { - simple_shade = 1, - nearest_filter = 2 - }; - - FString ToString(); -}; - -struct DrawSkyArgs -{ - uint32_t *dest; - const uint32_t *source0[4]; - const uint32_t *source1[4]; - int32_t pitch; - int32_t count; - int32_t dest_y; - uint32_t texturefrac[4]; - uint32_t iscale[4]; - uint32_t textureheight0; - uint32_t textureheight1; - uint32_t top_color; - uint32_t bottom_color; - uint32_t flags; - enum Flags - { - fade_sky = 1 - }; - - FString ToString(); -}; - struct TriVertex { TriVertex() { } @@ -292,51 +149,3 @@ enum class TriBlendMode }; inline int NumTriBlendModes() { return (int)TriBlendMode::Skycap + 1; } - -class Drawers -{ -public: - static Drawers *Instance(); - - void(*DrawColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnShaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnTlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnAddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*FillColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*FillColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*FillColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*FillColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*FillColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - - void(*DrawSpan)(const DrawSpanArgs *) = nullptr; - void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; - void(*DrawSpanTranslucent)(const DrawSpanArgs *) = nullptr; - void(*DrawSpanMaskedTranslucent)(const DrawSpanArgs *) = nullptr; - void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr; - void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr; - - void(*vlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*mvlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*tmvline1_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*tmvline1_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*tmvline1_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*tmvline1_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - - void(*DrawSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; - void(*DrawDoubleSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; - - std::vector TriDraw8; - std::vector TriDraw32; - std::vector TriFill8; - std::vector TriFill32; - -private: - Drawers(); -}; diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index f4fe251e1b..8a97243bb8 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -7,6 +7,5 @@ if( WIN32 AND NOT CMAKE_SIZEOF_VOID_P MATCHES "8" ) endif() add_subdirectory( updaterevision ) add_subdirectory( zipdir ) -add_subdirectory( drawergen ) set( CROSS_EXPORTS ${CROSS_EXPORTS} PARENT_SCOPE ) diff --git a/tools/drawergen/CMakeLists.txt b/tools/drawergen/CMakeLists.txt deleted file mode 100644 index 215125ec2c..0000000000 --- a/tools/drawergen/CMakeLists.txt +++ /dev/null @@ -1,187 +0,0 @@ -cmake_minimum_required( VERSION 2.8.7 ) - -include( CheckCXXCompilerFlag ) - -include(../../precompiled_headers.cmake) - -# Path where it looks for the LLVM compiled files on Windows -set( LLVM_PRECOMPILED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../llvm" ) - -if( CMAKE_SIZEOF_VOID_P MATCHES "8" ) - set( X64 64 ) -endif() - -if( NOT DRAWERGEN_LIBS ) - set( DRAWERGEN_LIBS "" ) -endif() - -include_directories( . ) - -file( GLOB HEADER_FILES - *.h - ssa/*.h - fixedfunction/*.h -) - -if( NOT WIN32 ) - set( LLVM_COMPONENTS core support asmparser asmprinter bitreader bitwriter codegen ipo - irreader transformutils instrumentation profiledata runtimedyld - object instcombine linker analysis selectiondag scalaropts vectorize executionengine - mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen - armasmprinter arminfo armdesc armcodegen ) - - # Example LLVM_DIR folder: C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm - find_package(LLVM REQUIRED CONFIG) - message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") - message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") - llvm_map_components_to_libnames( llvm_libs ${LLVM_COMPONENTS} ) - include_directories( ${LLVM_INCLUDE_DIRS} ) - set( DRAWERGEN_LIBS ${DRAWERGEN_LIBS} ${llvm_libs} ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti" ) -else() - set( LLVM_COMPONENTS core support asmparser asmprinter bitreader bitwriter codegen passes ipo - irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld - object instcombine linker analysis selectiondag scalaropts vectorize executionengine - mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) - - include_directories( "${LLVM_PRECOMPILED_DIR}/include" ) - if( X64 ) - include_directories( "${LLVM_PRECOMPILED_DIR}/64bit-include" ) - set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/64bit-" ) - else() - include_directories( "${LLVM_PRECOMPILED_DIR}/32bit-include" ) - set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/32bit-" ) - endif() - foreach(buildtype IN ITEMS RELEASE DEBUG) - set( llvm_libs_${buildtype} "${llvm_libs_base}${buildtype}" ) - set( LLVM_${buildtype}_LIBS "" ) - foreach( llvm_module ${LLVM_COMPONENTS} ) - find_library( LLVM_${llvm_module}_LIBRARY_${buildtype} LLVM${llvm_module} PATHS ${llvm_libs_${buildtype}} ) - set( LLVM_${buildtype}_LIBS ${LLVM_${buildtype}_LIBS} ${LLVM_${llvm_module}_LIBRARY_${buildtype}} ) - endforeach( llvm_module ) - endforeach(buildtype) -endif() - -if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.5") - set( CMAKE_C_FLAGS "-Wno-unused-result ${CMAKE_C_FLAGS}" ) - set( CMAKE_CXX_FLAGS "-Wno-unused-result ${CMAKE_CXX_FLAGS}" ) - endif() - if( CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) - if( APPLE OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "3.6" ) - set( CMAKE_CXX_FLAGS "-Wno-inconsistent-missing-override ${CMAKE_CXX_FLAGS}" ) - endif() - endif() - set( CMAKE_C_FLAGS "-Wall -Wextra -Wno-unused -Wno-unused-parameter -Wno-missing-field-initializers -ffp-contract=off ${CMAKE_C_FLAGS}" ) - set( CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused -Wno-unused-parameter -Wno-missing-field-initializers -ffp-contract=off ${CMAKE_CXX_FLAGS}" ) - - # Use the highest C++ standard available since VS2015 compiles with C++14 - # but we only require C++11. The recommended way to do this in CMake is to - # probably to use target_compile_features, but I don't feel like maintaining - # a list of features we use. - CHECK_CXX_COMPILER_FLAG( "-std=gnu++14" CAN_DO_CPP14 ) - if ( CAN_DO_CPP14 ) - set ( CMAKE_CXX_FLAGS "-std=gnu++14 ${CMAKE_CXX_FLAGS}" ) - else () - CHECK_CXX_COMPILER_FLAG( "-std=gnu++1y" CAN_DO_CPP1Y ) - if ( CAN_DO_CPP1Y ) - set ( CMAKE_CXX_FLAGS "-std=gnu++1y ${CMAKE_CXX_FLAGS}" ) - else () - CHECK_CXX_COMPILER_FLAG( "-std=gnu++11" CAN_DO_CPP11 ) - if ( CAN_DO_CPP11 ) - set ( CMAKE_CXX_FLAGS "-std=gnu++11 ${CMAKE_CXX_FLAGS}" ) - else () - CHECK_CXX_COMPILER_FLAG( "-std=gnu++0x" CAN_DO_CPP0X ) - if ( CAN_DO_CPP0X ) - set ( CMAKE_CXX_FLAGS "-std=gnu++0x ${CMAKE_CXX_FLAGS}" ) - endif () - endif () - endif () - endif () - - if ( APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) - set( CMAKE_CXX_FLAGS "-stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) - set( CMAKE_EXE_LINKER_FLAGS "-stdlib=libc++ ${CMAKE_EXE_LINKER_FLAGS}" ) - endif () -endif() - -if( WIN32 ) - if( MSVC_VERSION GREATER 1399 ) - # VC 8+ adds a manifest automatically to the executable. We need to - # merge ours with it. - set( MT_MERGE ON ) - else() - set( TRUSTINFO trustinfo.rc ) - endif() -else( WIN32 ) - set( TRUSTINFO "" ) -endif() - -set (SOURCES - drawergen.cpp - llvmprogram.cpp - llvmdrawers.cpp - ssa/ssa_bool.cpp - ssa/ssa_float.cpp - ssa/ssa_float_ptr.cpp - ssa/ssa_for_block.cpp - ssa/ssa_function.cpp - ssa/ssa_if_block.cpp - ssa/ssa_int.cpp - ssa/ssa_int_ptr.cpp - ssa/ssa_short.cpp - ssa/ssa_scope.cpp - ssa/ssa_struct_type.cpp - ssa/ssa_ubyte.cpp - ssa/ssa_ubyte_ptr.cpp - ssa/ssa_value.cpp - ssa/ssa_vec4f.cpp - ssa/ssa_vec4f_ptr.cpp - ssa/ssa_vec4i.cpp - ssa/ssa_vec4i_ptr.cpp - ssa/ssa_vec8s.cpp - ssa/ssa_vec16ub.cpp - fixedfunction/drawercodegen.cpp - fixedfunction/drawspancodegen.cpp - fixedfunction/drawwallcodegen.cpp - fixedfunction/drawcolumncodegen.cpp - fixedfunction/drawskycodegen.cpp - fixedfunction/drawtrianglecodegen.cpp - fixedfunction/setuptrianglecodegen.cpp -) -enable_precompiled_headers( precomp.h SOURCES ) - -if( NOT CMAKE_CROSSCOMPILING ) - add_executable( drawergen ${SOURCES} ${TRUSTINFO} ${HEADER_FILES} ) - set( CROSS_EXPORTS ${CROSS_EXPORTS} drawergen PARENT_SCOPE ) -endif() - -if( MT_MERGE ) - add_custom_command(TARGET drawergen POST_BUILD - COMMAND mt -inputresource:$ -manifest ${CMAKE_CURRENT_SOURCE_DIR}/trustinfo.txt -outputresource:$ -nologo - COMMENT "Embedding trustinfo into drawergen" ) -endif() - -# Linux - add these flags for LLVM compatibility to prevent crashing -#if ( UNIX AND NOT APPLE ) -# set( CMAKE_EXE_LINKER_FLAGS "-Wl,--exclude-libs,ALL ${CMAKE_EXE_LINKER_FLAGS}" ) -#endif() - -target_link_libraries( drawergen ${DRAWERGEN_LIBS} ) - -if( WIN32 ) - foreach(debuglib ${LLVM_DEBUG_LIBS}) - target_link_libraries( drawergen debug ${debuglib} ) - endforeach(debuglib) - foreach(releaselib ${LLVM_RELEASE_LIBS}) - target_link_libraries( drawergen optimized ${releaselib} ) - endforeach(releaselib) -endif() - -#source_group("Render Compiler" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/.+") -#source_group("Render Compiler\\SSA" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/ssa/.+") -#source_group("Render Compiler\\Fixed Function" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/fixedfunction/.+") - -source_group("Compiler" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/.+\\.(cpp|h)$") -source_group("Compiler\\SSA" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/ssa/.+") -source_group("Compiler\\Fixed Function" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/fixedfunction/.+") diff --git a/tools/drawergen/drawergen.cpp b/tools/drawergen/drawergen.cpp deleted file mode 100644 index dcb039a404..0000000000 --- a/tools/drawergen/drawergen.cpp +++ /dev/null @@ -1,142 +0,0 @@ -/* -** LLVM code generated drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "timestamp.h" -#include "exception.h" -#include "llvmdrawers.h" - -std::string &AllTimestamps() -{ - static std::string timestamps; - return timestamps; -} - -void AddSourceFileTimestamp(const char *timestamp) -{ - if (!AllTimestamps().empty()) AllTimestamps().push_back(' '); - AllTimestamps() += timestamp; -} - -int main(int argc, char **argv) -{ - try - { - if (argc != 2) - { - std::cerr << "Usage: " << argv[0] << "" << std::endl; - return 1; - } - - std::string timestamp_filename = argv[1] + std::string(".timestamp"); - - FILE *file = fopen(timestamp_filename.c_str(), "rb"); - if (file != nullptr) - { - char buffer[4096]; - int bytes_read = fread(buffer, 1, 4096, file); - fclose(file); - std::string last_timestamp; - if (bytes_read > 0) - last_timestamp = std::string(buffer, bytes_read); - - if (AllTimestamps() == last_timestamp) - { - std::cout << "Not recompiling drawers because the object file is already up to date." << std::endl; - exit(0); - } - } - - llvm::install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) - { - std::cerr << "LLVM fatal error: " << reason; - exit(1); - }); - - llvm::InitializeNativeTarget(); - llvm::InitializeNativeTargetAsmPrinter(); - - std::string triple = llvm::sys::getDefaultTargetTriple(); - -#ifdef __APPLE__ - // Target triple is x86_64-apple-darwin15.6.0 - auto pos = triple.find("-apple-darwin"); - if (pos != std::string::npos) - { - triple = triple.substr(0, pos) + "-apple-darwin10.11.0"; - } -#endif - - std::cout << "Target triple is " << triple << std::endl; - -#ifdef __arm__ - std::string cpuName = llvm::sys::getHostCPUName(); // "armv8"; -#else - std::string cpuName = "pentium4"; -#endif - std::string features; - std::cout << "Compiling drawer code for " << cpuName << ".." << std::endl; - - LLVMDrawers drawersSSE2(triple, cpuName, features, "_SSE2"); - - file = fopen(argv[1], "wb"); - if (file == nullptr) - { - std::cerr << "Unable to open " << argv[1] << " for writing." << std::endl; - return 1; - } - - int result = fwrite(drawersSSE2.ObjectFile.data(), drawersSSE2.ObjectFile.size(), 1, file); - fclose(file); - - if (result != 1) - { - std::cerr << "Could not write data to " << argv[1] << std::endl; - return 1; - } - - file = fopen(timestamp_filename.c_str(), "wb"); - if (file == nullptr) - { - std::cerr << "Could not create timestamp file" << std::endl; - return 1; - } - result = fwrite(AllTimestamps().data(), AllTimestamps().length(), 1, file); - fclose(file); - if (result != 1) - { - std::cerr << "Could not write timestamp file" << std::endl; - return 1; - } - - //LLVMDrawers drawersSSE4("core2"); - //LLVMDrawers drawersAVX("sandybridge"); - //LLVMDrawers drawersAVX2("haswell"); - - return 0; - } - catch (const std::exception &e) - { - std::cerr << e.what() << std::endl; - return 1; - } -} diff --git a/tools/drawergen/exception.h b/tools/drawergen/exception.h deleted file mode 100644 index 22d3c18fbb..0000000000 --- a/tools/drawergen/exception.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -** LLVM code generated drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include -#include - -class Exception : public std::exception -{ -public: - Exception(const std::string &message) : message(message) { } - const char *what() const noexcept override { return message.c_str(); } - -private: - std::string message; -}; diff --git a/tools/drawergen/fixedfunction/drawcolumncodegen.cpp b/tools/drawergen/fixedfunction/drawcolumncodegen.cpp deleted file mode 100644 index 1a4f806bf2..0000000000 --- a/tools/drawergen/fixedfunction/drawcolumncodegen.cpp +++ /dev/null @@ -1,322 +0,0 @@ -/* -** DrawColumn code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "timestamp.h" -#include "fixedfunction/drawcolumncodegen.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_scope.h" -#include "ssa/ssa_for_block.h" -#include "ssa/ssa_if_block.h" -#include "ssa/ssa_stack.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_struct_type.h" -#include "ssa/ssa_value.h" - -void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data) -{ - dest = args[0][0].load(true); - source = args[0][1].load(true); - source2 = args[0][2].load(true); - colormap = args[0][3].load(true); - translation = args[0][4].load(true); - basecolors = args[0][5].load(true); - pitch = args[0][6].load(true); - count = args[0][7].load(true); - dest_y = args[0][8].load(true); - iscale = args[0][9].load(true); - texturefracx = args[0][10].load(true); - textureheight = args[0][11].load(true); - texturefrac = args[0][12].load(true); - light = args[0][13].load(true); - color = SSAVec4i::unpack(args[0][14].load(true)); - srccolor = SSAVec4i::unpack(args[0][15].load(true)); - srcalpha = args[0][16].load(true); - destalpha = args[0][17].load(true); - SSAShort light_alpha = args[0][18].load(true); - SSAShort light_red = args[0][19].load(true); - SSAShort light_green = args[0][20].load(true); - SSAShort light_blue = args[0][21].load(true); - SSAShort fade_alpha = args[0][22].load(true); - SSAShort fade_red = args[0][23].load(true); - SSAShort fade_green = args[0][24].load(true); - SSAShort fade_blue = args[0][25].load(true); - SSAShort desaturate = args[0][26].load(true); - SSAInt flags = args[0][27].load(true); - shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); - shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); - shade_constants.desaturate = desaturate.zext_int(); - - thread.core = thread_data[0][0].load(true); - thread.num_cores = thread_data[0][1].load(true); - thread.pass_start_y = thread_data[0][2].load(true); - thread.pass_end_y = thread_data[0][3].load(true); - thread.temp = thread_data[0][4].load(true); - - is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade); - is_nearest_filter = (flags & DrawColumnArgs::nearest_filter) == SSAInt(DrawColumnArgs::nearest_filter); - - count = count_for_thread(dest_y, count, thread); - dest = dest_for_thread(dest_y, pitch, dest, thread); - pitch = pitch * thread.num_cores; - - stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); - iscale = iscale * thread.num_cores; - - SSAIfBlock branch; - branch.if_block(is_simple_shade); - LoopShade(variant, true); - branch.else_block(); - LoopShade(variant, false); - branch.end_block(); -} - -void DrawColumnCodegen::LoopShade(DrawColumnVariant variant, bool isSimpleShade) -{ - SSAIfBlock branch; - branch.if_block(is_nearest_filter); - Loop(variant, isSimpleShade, true); - branch.else_block(); - one = (1 << 30) / textureheight; - stack_frac.store(stack_frac.load() - (one >> 1)); - Loop(variant, isSimpleShade, false); - branch.end_block(); -} - -void DrawColumnCodegen::Loop(DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter) -{ - stack_index.store(SSAInt(0)); - { - SSAForBlock loop; - SSAInt index = stack_index.load(); - loop.loop_block(index < count); - - SSAInt sample_index, frac; - frac = stack_frac.load(); - if (IsPaletteInput(variant)) - sample_index = frac >> FRACBITS; - else - sample_index = frac; - - SSAInt offset = index * pitch * 4; - SSAVec4i bgcolor = dest[offset].load_vec4ub(false); - - SSAVec4i outcolor = ProcessPixel(sample_index, bgcolor, variant, isSimpleShade, isNearestFilter); - - dest[offset].store_vec4ub(outcolor); - - stack_index.store(index.add(SSAInt(1), true, true)); - stack_frac.store(frac + iscale); - loop.end_block(); - } -} - -bool DrawColumnCodegen::IsPaletteInput(DrawColumnVariant variant) -{ - switch (variant) - { - default: - case DrawColumnVariant::DrawCopy: - case DrawColumnVariant::Draw: - case DrawColumnVariant::DrawAdd: - case DrawColumnVariant::DrawAddClamp: - case DrawColumnVariant::DrawSubClamp: - case DrawColumnVariant::DrawRevSubClamp: - case DrawColumnVariant::Fill: - case DrawColumnVariant::FillAdd: - case DrawColumnVariant::FillAddClamp: - case DrawColumnVariant::FillSubClamp: - case DrawColumnVariant::FillRevSubClamp: - return false; - case DrawColumnVariant::DrawShaded: - case DrawColumnVariant::DrawTranslated: - case DrawColumnVariant::DrawTlatedAdd: - case DrawColumnVariant::DrawAddClampTranslated: - case DrawColumnVariant::DrawSubClampTranslated: - case DrawColumnVariant::DrawRevSubClampTranslated: - return true; - } -} - -SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter) -{ - SSAInt alpha, inv_alpha; - SSAVec4i fg; - switch (variant) - { - default: - case DrawColumnVariant::DrawCopy: - return blend_copy(Sample(sample_index, isNearestFilter)); - case DrawColumnVariant::Draw: - return blend_copy(Shade(Sample(sample_index, isNearestFilter), isSimpleShade)); - case DrawColumnVariant::DrawAdd: - case DrawColumnVariant::DrawAddClamp: - fg = Shade(Sample(sample_index, isNearestFilter), isSimpleShade); - return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); - case DrawColumnVariant::DrawShaded: - alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; - inv_alpha = 256 - alpha; - return blend_add(color, bgcolor, alpha, inv_alpha); - case DrawColumnVariant::DrawSubClamp: - fg = Shade(Sample(sample_index, isNearestFilter), isSimpleShade); - return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); - case DrawColumnVariant::DrawRevSubClamp: - fg = Shade(Sample(sample_index, isNearestFilter), isSimpleShade); - return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); - case DrawColumnVariant::DrawTranslated: - return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade)); - case DrawColumnVariant::DrawTlatedAdd: - case DrawColumnVariant::DrawAddClampTranslated: - fg = Shade(TranslateSample(sample_index), isSimpleShade); - return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); - case DrawColumnVariant::DrawSubClampTranslated: - fg = Shade(TranslateSample(sample_index), isSimpleShade); - return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); - case DrawColumnVariant::DrawRevSubClampTranslated: - fg = Shade(TranslateSample(sample_index), isSimpleShade); - return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); - case DrawColumnVariant::Fill: - return blend_copy(color); - case DrawColumnVariant::FillAdd: - alpha = srccolor[3]; - alpha = alpha + (alpha >> 7); - inv_alpha = 256 - alpha; - return blend_add(srccolor, bgcolor, alpha, inv_alpha); - case DrawColumnVariant::FillAddClamp: - return blend_add(srccolor, bgcolor, srcalpha, destalpha); - case DrawColumnVariant::FillSubClamp: - return blend_sub(srccolor, bgcolor, srcalpha, destalpha); - case DrawColumnVariant::FillRevSubClamp: - return blend_revsub(srccolor, bgcolor, srcalpha, destalpha); - } -} - -SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade) -{ - SSAInt alpha, inv_alpha; - switch (variant) - { - default: - case DrawColumnVariant::DrawCopy: - return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub(true)); - case DrawColumnVariant::Draw: - return blend_copy(ShadePal(ColormapSample(sample_index), isSimpleShade)); - case DrawColumnVariant::DrawAdd: - case DrawColumnVariant::DrawAddClamp: - return blend_add(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); - case DrawColumnVariant::DrawShaded: - alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; - inv_alpha = 256 - alpha; - return blend_add(color, bgcolor, alpha, inv_alpha); - case DrawColumnVariant::DrawSubClamp: - return blend_sub(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); - case DrawColumnVariant::DrawRevSubClamp: - return blend_revsub(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); - case DrawColumnVariant::DrawTranslated: - return blend_copy(ShadePal(TranslateSamplePal(sample_index), isSimpleShade)); - case DrawColumnVariant::DrawTlatedAdd: - case DrawColumnVariant::DrawAddClampTranslated: - return blend_add(ShadePal(TranslateSamplePal(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); - case DrawColumnVariant::DrawSubClampTranslated: - return blend_sub(ShadePal(TranslateSamplePal(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); - case DrawColumnVariant::DrawRevSubClampTranslated: - return blend_revsub(ShadePal(TranslateSamplePal(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); - case DrawColumnVariant::Fill: - return blend_copy(color); - case DrawColumnVariant::FillAdd: - alpha = srccolor[3]; - alpha = alpha + (alpha >> 7); - inv_alpha = 256 - alpha; - return blend_add(srccolor, bgcolor, alpha, inv_alpha); - case DrawColumnVariant::FillAddClamp: - return blend_add(srccolor, bgcolor, srcalpha, destalpha); - case DrawColumnVariant::FillSubClamp: - return blend_sub(srccolor, bgcolor, srcalpha, destalpha); - case DrawColumnVariant::FillRevSubClamp: - return blend_revsub(srccolor, bgcolor, srcalpha, destalpha); - } -} - -SSAVec4i DrawColumnCodegen::Sample(SSAInt frac, bool isNearestFilter) -{ - if (isNearestFilter) - { - SSAInt sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - return source[sample_index * 4].load_vec4ub(false); - } - else - { - return SampleLinear(source, source2, texturefracx, frac, one, textureheight); - } -} - -SSAVec4i DrawColumnCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) -{ - // Clamp to edge - SSAInt frac_y0 = (SSAInt::MAX(SSAInt::MIN(texturefracy, SSAInt((1 << 30) - 1)), SSAInt(0)) >> (FRACBITS - 2)) * height; - SSAInt frac_y1 = (SSAInt::MAX(SSAInt::MIN(texturefracy + one, SSAInt((1 << 30) - 1)), SSAInt(0)) >> (FRACBITS - 2)) * height; - SSAInt y0 = frac_y0 >> FRACBITS; - SSAInt y1 = frac_y1 >> FRACBITS; - - SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true); - SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true); - SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true); - SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true); - - SSAInt inv_b = texturefracx; - SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} - -SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index) -{ - return colormap[source[sample_index].load(true).zext_int()].load(true).zext_int(); -} - -SSAVec4i DrawColumnCodegen::TranslateSample(SSAInt sample_index) -{ - return translation[source[sample_index].load(true).zext_int() * 4].load_vec4ub(true); -} - -SSAInt DrawColumnCodegen::TranslateSamplePal(SSAInt sample_index) -{ - return translation[source[sample_index].load(true).zext_int()].load(true).zext_int(); -} - -SSAVec4i DrawColumnCodegen::Shade(SSAVec4i fg, bool isSimpleShade) -{ - if (isSimpleShade) - return shade_bgra_simple(fg, light); - else - return shade_bgra_advanced(fg, light, shade_constants); -} - -SSAVec4i DrawColumnCodegen::ShadePal(SSAInt palIndex, bool isSimpleShade) -{ - if (isSimpleShade) - return shade_pal_index_simple(palIndex, light, basecolors); - else - return shade_pal_index_advanced(palIndex, light, shade_constants, basecolors); -} diff --git a/tools/drawergen/fixedfunction/drawcolumncodegen.h b/tools/drawergen/fixedfunction/drawcolumncodegen.h deleted file mode 100644 index 9056549111..0000000000 --- a/tools/drawergen/fixedfunction/drawcolumncodegen.h +++ /dev/null @@ -1,92 +0,0 @@ -/* -** DrawColumn code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "drawercodegen.h" - -enum class DrawColumnVariant -{ - Fill, - FillAdd, - FillAddClamp, - FillSubClamp, - FillRevSubClamp, - DrawCopy, - Draw, - DrawAdd, - DrawTranslated, - DrawTlatedAdd, - DrawShaded, - DrawAddClamp, - DrawAddClampTranslated, - DrawSubClamp, - DrawSubClampTranslated, - DrawRevSubClamp, - DrawRevSubClampTranslated -}; - -class DrawColumnCodegen : public DrawerCodegen -{ -public: - void Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data); - -private: - void LoopShade(DrawColumnVariant variant, bool isSimpleShade); - void Loop(DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter); - SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter); - SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); - SSAVec4i Sample(SSAInt frac, bool isNearestFilter); - SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); - SSAInt ColormapSample(SSAInt frac); - SSAVec4i TranslateSample(SSAInt frac); - SSAInt TranslateSamplePal(SSAInt frac); - SSAVec4i Shade(SSAVec4i fgcolor, bool isSimpleShade); - SSAVec4i ShadePal(SSAInt palIndex, bool isSimpleShade); - bool IsPaletteInput(DrawColumnVariant variant); - - SSAStack stack_index, stack_frac; - - SSAUBytePtr dest; - SSAUBytePtr source; - SSAUBytePtr source2; - SSAUBytePtr colormap; - SSAUBytePtr translation; - SSAUBytePtr basecolors; - SSAInt pitch; - SSAInt count; - SSAInt dest_y; - SSAInt iscale; - SSAInt texturefracx; - SSAInt textureheight; - SSAInt one; - SSAInt texturefrac; - SSAInt light; - SSAVec4i color; - SSAVec4i srccolor; - SSAInt srcalpha; - SSAInt destalpha; - SSABool is_simple_shade; - SSABool is_nearest_filter; - SSAShadeConstants shade_constants; - SSAWorkerThread thread; -}; diff --git a/tools/drawergen/fixedfunction/drawercodegen.cpp b/tools/drawergen/fixedfunction/drawercodegen.cpp deleted file mode 100644 index f7a67e5a62..0000000000 --- a/tools/drawergen/fixedfunction/drawercodegen.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/* -** Drawer code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "timestamp.h" -#include "fixedfunction/drawercodegen.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_scope.h" -#include "ssa/ssa_for_block.h" -#include "ssa/ssa_if_block.h" -#include "ssa/ssa_stack.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_struct_type.h" -#include "ssa/ssa_value.h" - -SSABool DrawerCodegen::line_skipped_by_thread(SSAInt line, SSAWorkerThread thread) -{ - return line < thread.pass_start_y || line >= thread.pass_end_y || !(line % thread.num_cores == thread.core); -} - -SSAInt DrawerCodegen::skipped_by_thread(SSAInt first_line, SSAWorkerThread thread) -{ - SSAInt pass_skip = SSAInt::MAX(thread.pass_start_y - first_line, SSAInt(0)); - SSAInt core_skip = (thread.num_cores - (first_line + pass_skip - thread.core) % thread.num_cores) % thread.num_cores; - return pass_skip + core_skip; -} - -SSAInt DrawerCodegen::count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread) -{ - SSAInt lines_until_pass_end = SSAInt::MAX(thread.pass_end_y - first_line, SSAInt(0)); - count = SSAInt::MIN(count, lines_until_pass_end); - SSAInt c = (count - skipped_by_thread(first_line, thread) + thread.num_cores - 1) / thread.num_cores; - return SSAInt::MAX(c, SSAInt(0)); -} - -SSAUBytePtr DrawerCodegen::dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread) -{ - return dest[skipped_by_thread(first_line, thread) * pitch * 4]; -} - -SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) -{ - return 256 - (light >> (FRACBITS - 8)); -} - -SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) -{ - SSAVec4i color = basecolors[index * 4].load_vec4ub(true); // = GPalette.BaseColors[index]; - return shade_bgra_simple(color, light); -} - -SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) -{ - SSAVec4i color = basecolors[index * 4].load_vec4ub(true); // = GPalette.BaseColors[index]; - return shade_bgra_advanced(color, light, constants); -} - -SSAVec4i DrawerCodegen::shade_bgra_simple(SSAVec4i color, SSAInt light) -{ - SSAInt alpha = color[3]; - color = color * light / 256; - return color.insert(3, alpha); -} - -SSAVec4i DrawerCodegen::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) -{ - SSAInt blue = color[0]; - SSAInt green = color[1]; - SSAInt red = color[2]; - SSAInt alpha = color[3]; - - SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - SSAVec4i inv_light = 256 - light; - SSAVec4i inv_desaturate = 256 - constants.desaturate; - - color = (color * inv_desaturate + intensity) / 256; - color = (constants.fade * inv_light + color * light) / 256; - color = (color * constants.light) / 256; - - return color.insert(3, alpha); -} - -SSAVec4i DrawerCodegen::blend_copy(SSAVec4i fg) -{ - return fg; -} - -SSAVec4i DrawerCodegen::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAInt alpha = fg[3]; - alpha = alpha + (alpha >> 7); // 255 -> 256 - srcalpha = (alpha * srcalpha + 128) >> 8; - SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i DrawerCodegen::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAInt alpha = fg[3]; - alpha = alpha + (alpha >> 7); // 255 -> 256 - srcalpha = (alpha * srcalpha + 128) >> 8; - SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i DrawerCodegen::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAInt alpha = fg[3]; - alpha = alpha + (alpha >> 7); // 255 -> 256 - srcalpha = (alpha * srcalpha + 128) >> 8; - SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) -{ - SSAInt alpha = fg[3]; - alpha = alpha + (alpha >> 7); // 255 -> 256 - SSAInt inv_alpha = 256 - alpha; - SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; - return color.insert(3, 255); -} - -SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha) -{ - SSAInt alpha = fg[3]; - alpha = alpha + (alpha >> 7); - SSAInt inv_alpha = 256 - alpha; - return (destalpha * alpha + 256 * inv_alpha + 128) >> 8; -} - -SSAVec4i DrawerCodegen::blend_stencil(SSAVec4i stencilcolor, SSAInt fgalpha, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - fgalpha = fgalpha + (fgalpha >> 7); // 255 -> 256 - SSAInt inv_fgalpha = 256 - fgalpha; - - srcalpha = (fgalpha * srcalpha + 128) >> 8; - destalpha = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8; - - SSAVec4i color = (stencilcolor * srcalpha + bg * destalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i DrawerCodegen::blend_add_srccolor_oneminussrccolor(SSAVec4i fg, SSAVec4i bg) -{ - SSAVec4i fgcolor = fg + (fg >> 7); // 255 -> 256 - SSAVec4i inv_fgcolor = SSAVec4i(256) - fgcolor; - return fg + ((bg * inv_fgcolor + 128) >> 8); -} diff --git a/tools/drawergen/fixedfunction/drawercodegen.h b/tools/drawergen/fixedfunction/drawercodegen.h deleted file mode 100644 index d1931e99cd..0000000000 --- a/tools/drawergen/fixedfunction/drawercodegen.h +++ /dev/null @@ -1,96 +0,0 @@ -/* -** Drawer code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "precomp.h" -#include "ssa/ssa_value.h" -#include "ssa/ssa_vec4f.h" -#include "ssa/ssa_vec4i.h" -#include "ssa/ssa_vec8s.h" -#include "ssa/ssa_vec16ub.h" -#include "ssa/ssa_int.h" -#include "ssa/ssa_int_ptr.h" -#include "ssa/ssa_short.h" -#include "ssa/ssa_ubyte_ptr.h" -#include "ssa/ssa_vec4f_ptr.h" -#include "ssa/ssa_vec4i_ptr.h" -#include "ssa/ssa_stack.h" -#include "ssa/ssa_bool.h" -#include "ssa/ssa_barycentric_weight.h" -#include "llvm_include.h" - -class SSAWorkerThread -{ -public: - SSAInt core; - SSAInt num_cores; - SSAInt pass_start_y; - SSAInt pass_end_y; - SSAUBytePtr temp; -}; - -class SSAShadeConstants -{ -public: - SSAVec4i light; - SSAVec4i fade; - SSAInt desaturate; -}; - -class DrawerCodegen -{ -public: - // Checks if a line is rendered by this thread - SSABool line_skipped_by_thread(SSAInt line, SSAWorkerThread thread); - - // The number of lines to skip to reach the first line to be rendered by this thread - SSAInt skipped_by_thread(SSAInt first_line, SSAWorkerThread thread); - - // The number of lines to be rendered by this thread - SSAInt count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread); - - // Calculate the dest address for the first line to be rendered by this thread - SSAUBytePtr dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread); - - // LightBgra - SSAInt calc_light_multiplier(SSAInt light); - SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors); - SSAVec4i shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors); - SSAVec4i shade_bgra_simple(SSAVec4i color, SSAInt light); - SSAVec4i shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants); - - // BlendBgra - SSAVec4i blend_copy(SSAVec4i fg); - SSAVec4i blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); - SSAVec4i blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); - SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); - SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg); - SSAVec4i blend_stencil(SSAVec4i color, SSAInt fgalpha, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); - SSAVec4i blend_add_srccolor_oneminussrccolor(SSAVec4i fg, SSAVec4i bg); - - // Calculates the final alpha values to be used when combined with the source texture alpha channel - SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha); -}; - -#define FRACBITS 16 -#define FRACUNIT (1<> FRACBITS) * textureheight0) >> FRACBITS; - if (variant == DrawSkyVariant::Single) - { - return source0[sample_index * 4].load_vec4ub(false); - } - else - { - SSAInt sample_index2 = SSAInt::MIN(sample_index, maxtextureheight1); - SSAVec4i color0 = source0[sample_index * 4].load_vec4ub(false); - SSAVec4i color1 = source1[sample_index2 * 4].load_vec4ub(false); - return blend_alpha_blend(color0, color1); - } -} - -SSAVec4i DrawSkyCodegen::FadeOut(SSAInt frac, SSAVec4i color) -{ - int start_fade = 2; // How fast it should fade out - - SSAInt alpha_top = SSAInt::MAX(SSAInt::MIN(frac.ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); - SSAInt alpha_bottom = SSAInt::MAX(SSAInt::MIN(((2 << 24) - frac).ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); - SSAInt inv_alpha_top = 256 - alpha_top; - SSAInt inv_alpha_bottom = 256 - alpha_bottom; - - color = (color * alpha_top + top_color * inv_alpha_top) / 256; - color = (color * alpha_bottom + bottom_color * inv_alpha_bottom) / 256; - return color.insert(3, 255); -} diff --git a/tools/drawergen/fixedfunction/drawskycodegen.h b/tools/drawergen/fixedfunction/drawskycodegen.h deleted file mode 100644 index 463c8ca232..0000000000 --- a/tools/drawergen/fixedfunction/drawskycodegen.h +++ /dev/null @@ -1,62 +0,0 @@ -/* -** DrawSky code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "drawercodegen.h" - -enum class DrawSkyVariant -{ - Single, - Double -}; - -class DrawSkyCodegen : public DrawerCodegen -{ -public: - void Generate(DrawSkyVariant variant, SSAValue args, SSAValue thread_data); - -private: - void Loop(DrawSkyVariant variant, bool fade_sky); - SSAVec4i Sample(SSAInt frac, DrawSkyVariant variant); - SSAVec4i FadeOut(SSAInt frac, SSAVec4i color); - - SSAStack stack_index, stack_frac; - - SSAUBytePtr dest; - SSAUBytePtr source0; - SSAUBytePtr source1; - SSAInt pitch; - SSAInt count; - SSAInt dest_y; - SSAInt texturefrac; - SSAInt iscale; - SSAInt textureheight0; - SSAInt maxtextureheight1; - SSAVec4i top_color; - SSAVec4i bottom_color; - SSAWorkerThread thread; - - SSAInt fracstep; - - SSABool is_fade_sky; -}; diff --git a/tools/drawergen/fixedfunction/drawspancodegen.cpp b/tools/drawergen/fixedfunction/drawspancodegen.cpp deleted file mode 100644 index f5a3b46c7b..0000000000 --- a/tools/drawergen/fixedfunction/drawspancodegen.cpp +++ /dev/null @@ -1,298 +0,0 @@ -/* -** DrawSpan code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "timestamp.h" -#include "fixedfunction/drawspancodegen.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_scope.h" -#include "ssa/ssa_for_block.h" -#include "ssa/ssa_if_block.h" -#include "ssa/ssa_stack.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_struct_type.h" -#include "ssa/ssa_value.h" - -void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args) -{ - destorg = args[0][0].load(true); - source = args[0][1].load(true); - destpitch = args[0][2].load(true); - stack_xfrac.store(args[0][3].load(true)); - stack_yfrac.store(args[0][4].load(true)); - xstep = args[0][5].load(true); - ystep = args[0][6].load(true); - x1 = args[0][7].load(true); - x2 = args[0][8].load(true); - y = args[0][9].load(true); - xbits = args[0][10].load(true); - ybits = args[0][11].load(true); - light = args[0][12].load(true); - srcalpha = args[0][13].load(true); - destalpha = args[0][14].load(true); - SSAShort light_alpha = args[0][15].load(true); - SSAShort light_red = args[0][16].load(true); - SSAShort light_green = args[0][17].load(true); - SSAShort light_blue = args[0][18].load(true); - SSAShort fade_alpha = args[0][19].load(true); - SSAShort fade_red = args[0][20].load(true); - SSAShort fade_green = args[0][21].load(true); - SSAShort fade_blue = args[0][22].load(true); - SSAShort desaturate = args[0][23].load(true); - SSAInt flags = args[0][24].load(true); - start_viewpos_x = args[0][25].load(true); - step_viewpos_x = args[0][26].load(true); - dynlights = args[0][27].load(true); - num_dynlights = args[0][28].load(true); - shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); - shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); - shade_constants.desaturate = desaturate.zext_int(); - - count = x2 - x1 + 1; - data = destorg[(x1 + y * destpitch) * 4]; - - yshift = 32 - ybits; - xshift = yshift - xbits; - xmask = ((SSAInt(1) << xbits) - 1) << ybits; - - // 64x64 is the most common case by far, so special case it. - is_64x64 = xbits == SSAInt(6) && ybits == SSAInt(6); - is_simple_shade = (flags & DrawSpanArgs::simple_shade) == SSAInt(DrawSpanArgs::simple_shade); - is_nearest_filter = (flags & DrawSpanArgs::nearest_filter) == SSAInt(DrawSpanArgs::nearest_filter); - - SSAIfBlock branch; - branch.if_block(is_simple_shade); - LoopShade(variant, true); - branch.else_block(); - LoopShade(variant, false); - branch.end_block(); -} - -void DrawSpanCodegen::LoopShade(DrawSpanVariant variant, bool isSimpleShade) -{ - SSAIfBlock branch; - branch.if_block(is_nearest_filter); - LoopFilter(variant, isSimpleShade, true); - branch.else_block(); - stack_xfrac.store(stack_xfrac.load() - (SSAInt(1) << (31 - xbits))); - stack_yfrac.store(stack_yfrac.load() - (SSAInt(1) << (31 - ybits))); - LoopFilter(variant, isSimpleShade, false); - branch.end_block(); -} - -void DrawSpanCodegen::LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter) -{ - SSAIfBlock branch; - branch.if_block(is_64x64); - { - SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, true); - Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, true); - } - branch.else_block(); - { - SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, false); - Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, false); - } - branch.end_block(); -} - -SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64) -{ - SSAInt sseLength = count / 4; - stack_index.store(SSAInt(0)); - stack_viewpos_x.store(start_viewpos_x); - { - SSAForBlock loop; - SSAInt index = stack_index.load(); - loop.loop_block(index < sseLength); - - SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub(false); - SSAVec8s bg0 = SSAVec8s::extendlo(bg); - SSAVec8s bg1 = SSAVec8s::extendhi(bg); - SSAVec4i bgcolors[4] = - { - SSAVec4i::extendlo(bg0), - SSAVec4i::extendhi(bg0), - SSAVec4i::extendlo(bg1), - SSAVec4i::extendhi(bg1) - }; - - SSAVec4i colors[4]; - for (int i = 0; i < 4; i++) - { - SSAInt xfrac = stack_xfrac.load(); - SSAInt yfrac = stack_yfrac.load(); - viewpos_x = stack_viewpos_x.load(); - - colors[i] = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolors[i], variant); - - stack_viewpos_x.store(viewpos_x + step_viewpos_x); - stack_xfrac.store(xfrac + xstep); - stack_yfrac.store(yfrac + ystep); - } - - SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); - data[index * 16].store_unaligned_vec16ub(color); - - stack_index.store(index.add(SSAInt(1), true, true)); - loop.end_block(); - } - return sseLength; -} - -void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64) -{ - stack_index.store(start); - { - SSAForBlock loop; - SSAInt index = stack_index.load(); - viewpos_x = stack_viewpos_x.load(); - loop.loop_block(index < count); - - SSAInt xfrac = stack_xfrac.load(); - SSAInt yfrac = stack_yfrac.load(); - - SSAVec4i bgcolor = data[index * 4].load_vec4ub(false); - SSAVec4i color = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolor, variant); - data[index * 4].store_vec4ub(color); - - stack_viewpos_x.store(viewpos_x + step_viewpos_x); - stack_index.store(index.add(SSAInt(1), true, true)); - stack_xfrac.store(xfrac + xstep); - stack_yfrac.store(yfrac + ystep); - loop.end_block(); - } -} - -SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64) -{ - if (isNearestFilter) - { - SSAInt spot; - if (is64x64) - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - else - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - return source[spot * 4].load_vec4ub(true); - } - else - { - if (is64x64) - { - return SampleLinear(source, xfrac, yfrac, SSAInt(26), SSAInt(26)); - } - else - { - return SampleLinear(source, xfrac, yfrac, 32 - xbits, 32 - ybits); - } - } -} - -SSAVec4i DrawSpanCodegen::SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) -{ - SSAInt xshift = (32 - xbits); - SSAInt yshift = (32 - ybits); - SSAInt xmask = (SSAInt(1) << xshift) - 1; - SSAInt ymask = (SSAInt(1) << yshift) - 1; - SSAInt x = xfrac >> xbits; - SSAInt y = yfrac >> ybits; - - SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); - SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); - SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); - SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); - - SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; - SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} - -SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) -{ - SSAVec4i c; - if (isSimpleShade) - c = shade_bgra_simple(fg, light); - else - c = shade_bgra_advanced(fg, light, shade_constants); - - stack_lit_color.store(SSAVec4i(0)); - stack_light_index.store(SSAInt(0)); - - SSAForBlock block; - SSAInt light_index = stack_light_index.load(); - SSAVec4i lit_color = stack_lit_color.load(); - block.loop_block(light_index < num_dynlights); - { - SSAVec4i light_color = SSAUBytePtr(SSAValue(dynlights[light_index][0]).v).load_vec4ub(true); - SSAFloat light_x = dynlights[light_index][1].load(true); - SSAFloat light_y = dynlights[light_index][2].load(true); - SSAFloat light_z = dynlights[light_index][3].load(true); - SSAFloat light_rcp_radius = dynlights[light_index][4].load(true); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // attenuation = 1 - MIN(dist * (1/radius), 1) - SSAFloat Lyz2 = light_y; // L.y*L.y + L.z*L.z - SSAFloat Lx = light_x - viewpos_x; - SSAFloat dist2 = Lyz2 + Lx * Lx; - SSAFloat rcp_dist = SSAFloat::rsqrt(dist2); - SSAFloat dist = dist2 * rcp_dist; - SSAFloat distance_attenuation = SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)); - - // The simple light type - SSAFloat simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - SSAFloat point_attenuation = light_z * rcp_dist * distance_attenuation; - - SSAInt attenuation = SSAInt((light_z == SSAFloat(0.0f)).select(simple_attenuation, point_attenuation), true); - SSAVec4i contribution = (light_color * attenuation) >> 8; - - stack_lit_color.store(lit_color + contribution); - stack_light_index.store(light_index + 1); - } - block.end_block(); - - return c + ((stack_lit_color.load() * fg) >> 8); -} - -SSAVec4i DrawSpanCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant) -{ - switch (variant) - { - default: - case DrawSpanVariant::Opaque: - return blend_copy(fg); - case DrawSpanVariant::Masked: - return blend_alpha_blend(fg, bg); - case DrawSpanVariant::Translucent: - case DrawSpanVariant::AddClamp: - return blend_add(fg, bg, srcalpha, destalpha); - case DrawSpanVariant::MaskedTranslucent: - case DrawSpanVariant::MaskedAddClamp: - return blend_add(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - } -} diff --git a/tools/drawergen/fixedfunction/drawspancodegen.h b/tools/drawergen/fixedfunction/drawspancodegen.h deleted file mode 100644 index 9e0c67c412..0000000000 --- a/tools/drawergen/fixedfunction/drawspancodegen.h +++ /dev/null @@ -1,83 +0,0 @@ -/* -** DrawSpan code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "drawercodegen.h" - -enum class DrawSpanVariant -{ - Opaque, - Masked, - Translucent, - MaskedTranslucent, - AddClamp, - MaskedAddClamp -}; - -class DrawSpanCodegen : public DrawerCodegen -{ -public: - void Generate(DrawSpanVariant variant, SSAValue args); - -private: - void LoopShade(DrawSpanVariant variant, bool isSimpleShade); - void LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter); - SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); - void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); - SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); - SSAVec4i SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); - SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); - SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant); - - SSAStack stack_index, stack_xfrac, stack_yfrac, stack_light_index; - SSAStack stack_lit_color; - SSAStack stack_viewpos_x; - - SSAUBytePtr destorg; - SSAUBytePtr source; - SSAInt destpitch; - SSAInt xstep; - SSAInt ystep; - SSAInt x1; - SSAInt x2; - SSAInt y; - SSAInt xbits; - SSAInt ybits; - SSAInt light; - SSAInt srcalpha; - SSAInt destalpha; - SSAInt count; - SSAUBytePtr data; - SSAInt yshift; - SSAInt xshift; - SSAInt xmask; - SSABool is_64x64; - SSABool is_simple_shade; - SSABool is_nearest_filter; - SSAShadeConstants shade_constants; - - SSAFloat start_viewpos_x, step_viewpos_x; - SSAValue dynlights; // TriLight* - SSAInt num_dynlights; - SSAFloat viewpos_x; -}; diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp deleted file mode 100644 index 6f3a725fab..0000000000 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ /dev/null @@ -1,697 +0,0 @@ -/* -** DrawTriangle code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "timestamp.h" -#include "fixedfunction/drawtrianglecodegen.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_scope.h" -#include "ssa/ssa_for_block.h" -#include "ssa/ssa_if_block.h" -#include "ssa/ssa_stack.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_struct_type.h" -#include "ssa/ssa_value.h" - -void DrawTriangleCodegen::Generate(TriBlendMode blendmode, bool truecolor, bool colorfill, SSAValue args, SSAValue thread_data) -{ - this->blendmode = blendmode; - this->truecolor = truecolor; - this->colorfill = colorfill; - pixelsize = truecolor ? 4 : 1; - - LoadArgs(args, thread_data); - CalculateGradients(); - - if (truecolor) - { - SSAIfBlock branch; - branch.if_block(is_simple_shade); - { - DrawFullSpans(true); - DrawPartialBlocks(true); - } - branch.else_block(); - { - DrawFullSpans(false); - DrawPartialBlocks(false); - } - branch.end_block(); - } - else - { - DrawFullSpans(true); - DrawPartialBlocks(true); - } -} - -void DrawTriangleCodegen::DrawFullSpans(bool isSimpleShade) -{ - stack_i.store(SSAInt(0)); - SSAForBlock loop; - SSAInt i = stack_i.load(); - loop.loop_block(i < numSpans, 0); - { - SSAInt spanX = SSAShort(fullSpans[i][0].load(true).v).zext_int(); - SSAInt spanY = SSAShort(fullSpans[i][1].load(true).v).zext_int(); - SSAInt spanLength = fullSpans[i][2].load(true); - - SSAInt width = spanLength; - SSAInt height = SSAInt(8); - - stack_dest.store(destOrg[(spanX + spanY * pitch) * pixelsize]); - stack_posYW.store(start.W + gradientX.W * (spanX - startX) + gradientY.W * (spanY - startY)); - for (int j = 0; j < TriVertex::NumVarying; j++) - stack_posYVarying[j].store(start.Varying[j] + gradientX.Varying[j] * (spanX - startX) + gradientY.Varying[j] * (spanY - startY)); - stack_y.store(SSAInt(0)); - - SSAForBlock loop_y; - SSAInt y = stack_y.load(); - SSAUBytePtr dest = stack_dest.load(); - SSAStepVariables blockPosY; - blockPosY.W = stack_posYW.load(); - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosY.Varying[j] = stack_posYVarying[j].load(); - loop_y.loop_block(y < height, 0); - { - stack_posXW.store(blockPosY.W); - for (int j = 0; j < TriVertex::NumVarying; j++) - stack_posXVarying[j].store(blockPosY.Varying[j]); - - SSAFloat rcpW = SSAFloat((float)0x01000000) / blockPosY.W; - stack_lightpos.store(FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f / 32.0f), globVis * blockPosY.W), SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true)); - for (int j = 0; j < TriVertex::NumVarying; j++) - stack_varyingPos[j].store(SSAInt(blockPosY.Varying[j] * rcpW, false)); - stack_x.store(SSAInt(0)); - - SSAForBlock loop_x; - SSAInt x = stack_x.load(); - SSAStepVariables blockPosX; - blockPosX.W = stack_posXW.load(); - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosX.Varying[j] = stack_posXVarying[j].load(); - SSAInt lightpos = stack_lightpos.load(); - SSAInt varyingPos[TriVertex::NumVarying]; - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] = stack_varyingPos[j].load(); - loop_x.loop_block(x < width, 0); - { - blockPosX.W = blockPosX.W + gradientX.W * 8.0f; - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosX.Varying[j] = blockPosX.Varying[j] + gradientX.Varying[j] * 8.0f; - - rcpW = SSAFloat((float)0x01000000) / blockPosX.W; - SSAInt varyingStep[TriVertex::NumVarying]; - for (int j = 0; j < TriVertex::NumVarying; j++) - { - SSAInt nextPos = SSAInt(blockPosX.Varying[j] * rcpW, false); - varyingStep[j] = (nextPos - varyingPos[j]) / 8; - } - - SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f / 32.0f), globVis * blockPosX.W), SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); - SSAInt lightstep = (lightnext - lightpos) / 8; - - if (truecolor) - { - for (int ix = 0; ix < 8; ix += 4) - { - SSAUBytePtr destptr = dest[(x * 8 + ix) * 4]; - SSAVec16ub pixels16 = destptr.load_unaligned_vec16ub(false); - SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16); - SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16); - SSAVec4i pixels[4] = - { - SSAVec4i::extendlo(pixels8lo), - SSAVec4i::extendhi(pixels8lo), - SSAVec4i::extendlo(pixels8hi), - SSAVec4i::extendhi(pixels8hi) - }; - - for (int sse = 0; sse < 4; sse++) - { - currentlight = is_fixed_light.select(light, lightpos >> 8); - pixels[sse] = ProcessPixel32(pixels[sse], varyingPos, isSimpleShade); - - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] = varyingPos[j] + varyingStep[j]; - lightpos = lightpos + lightstep; - } - - destptr.store_unaligned_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3]))); - } - } - else - { - for (int ix = 0; ix < 8; ix++) - { - currentlight = is_fixed_light.select(light, lightpos >> 8); - SSAInt colormapindex = SSAInt::MIN((256 - currentlight) * 32 / 256, SSAInt(31)); - currentcolormap = Colormaps[colormapindex << 8]; - - SSAUBytePtr destptr = dest[(x * 8 + ix)]; - destptr.store(ProcessPixel8(destptr.load(false).zext_int(), varyingPos).trunc_ubyte()); - - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] = varyingPos[j] + varyingStep[j]; - lightpos = lightpos + lightstep; - } - } - - for (int j = 0; j < TriVertex::NumVarying; j++) - stack_varyingPos[j].store(varyingPos[j]); - stack_lightpos.store(lightpos); - stack_posXW.store(blockPosX.W); - for (int j = 0; j < TriVertex::NumVarying; j++) - stack_posXVarying[j].store(blockPosX.Varying[j]); - stack_x.store(x + 1); - } - loop_x.end_block(); - - stack_posYW.store(blockPosY.W + gradientY.W); - for (int j = 0; j < TriVertex::NumVarying; j++) - stack_posYVarying[j].store(blockPosY.Varying[j] + gradientY.Varying[j]); - stack_dest.store(dest[pitch * pixelsize]); - stack_y.store(y + 1); - } - loop_y.end_block(); - - stack_i.store(i + 1); - } - loop.end_block(); -} - -void DrawTriangleCodegen::DrawPartialBlocks(bool isSimpleShade) -{ - stack_i.store(SSAInt(0)); - SSAForBlock loop; - SSAInt i = stack_i.load(); - loop.loop_block(i < numBlocks, 0); - { - SSAInt blockX = SSAShort(partialBlocks[i][0].load(true).v).zext_int(); - SSAInt blockY = SSAShort(partialBlocks[i][1].load(true).v).zext_int(); - SSAInt mask0 = partialBlocks[i][2].load(true); - SSAInt mask1 = partialBlocks[i][3].load(true); - - SSAUBytePtr dest = destOrg[(blockX + blockY * pitch) * pixelsize]; - - SSAStepVariables blockPosY; - blockPosY.W = start.W + gradientX.W * (blockX - startX) + gradientY.W * (blockY - startY); - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (blockX - startX) + gradientY.Varying[j] * (blockY - startY); - - for (int maskNum = 0; maskNum < 2; maskNum++) - { - SSAInt mask = (maskNum == 0) ? mask0 : mask1; - - for (int y = 0; y < 4; y++) - { - SSAStepVariables blockPosX = blockPosY; - - SSAFloat rcpW = SSAFloat((float)0x01000000) / blockPosX.W; - SSAInt varyingPos[TriVertex::NumVarying]; - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] = SSAInt(blockPosX.Varying[j] * rcpW, false); - - SSAInt lightpos = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f / 32.0f), globVis * blockPosX.W), SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); - - blockPosX.W = blockPosX.W + gradientX.W * 8.0f; - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosX.Varying[j] = blockPosX.Varying[j] + gradientX.Varying[j] * 8.0f; - - rcpW = SSAFloat((float)0x01000000) / blockPosX.W; - SSAInt varyingStep[TriVertex::NumVarying]; - for (int j = 0; j < TriVertex::NumVarying; j++) - { - SSAInt nextPos = SSAInt(blockPosX.Varying[j] * rcpW, false); - varyingStep[j] = (nextPos - varyingPos[j]) / 8; - } - - SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f / 32.0f), globVis * blockPosX.W), SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); - SSAInt lightstep = (lightnext - lightpos) / 8; - - for (int x = 0; x < 8; x++) - { - SSABool covered = !((mask & (1 << (31 - y * 8 - x))) == SSAInt(0)); - SSAIfBlock branch; - branch.if_block(covered); - { - if (truecolor) - { - currentlight = is_fixed_light.select(light, lightpos >> 8); - - SSAUBytePtr destptr = dest[x * 4]; - destptr.store_vec4ub(ProcessPixel32(destptr.load_vec4ub(false), varyingPos, isSimpleShade)); - } - else - { - currentlight = is_fixed_light.select(light, lightpos >> 8); - SSAInt colormapindex = SSAInt::MIN((256 - currentlight) * 32 / 256, SSAInt(31)); - currentcolormap = Colormaps[colormapindex << 8]; - - SSAUBytePtr destptr = dest[x]; - destptr.store(ProcessPixel8(destptr.load(false).zext_int(), varyingPos).trunc_ubyte()); - } - } - branch.end_block(); - - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] = varyingPos[j] + varyingStep[j]; - lightpos = lightpos + lightstep; - } - - blockPosY.W = blockPosY.W + gradientY.W; - for (int j = 0; j < TriVertex::NumVarying; j++) - blockPosY.Varying[j] = blockPosY.Varying[j] + gradientY.Varying[j]; - - dest = dest[pitch * pixelsize]; - } - } - - stack_i.store(i + 1); - } - loop.end_block(); -} - -SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt *varying) -{ - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - if (colorfill) - return translation[color * 4].load_vec4ub(true); - else - return translation[texturePixels[uvoffset].load(true).zext_int() * 4].load_vec4ub(true); -} - -SSAInt DrawTriangleCodegen::TranslateSample8(SSAInt *varying) -{ - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - if (colorfill) - return translation[color].load(true).zext_int(); - else - return translation[texturePixels[uvoffset].load(true).zext_int()].load(true).zext_int(); -} - -SSAVec4i DrawTriangleCodegen::Sample32(SSAInt *varying) -{ - if (colorfill) - return SSAVec4i::unpack(color); - - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAVec4i nearest; - SSAVec4i linear; - - { - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - nearest = texturePixels[uvoffset * 4].load_vec4ub(true); - } - - return nearest; - - /* - { - SSAInt uone = (SSAInt(0x01000000) / textureWidth) << 8; - SSAInt vone = (SSAInt(0x01000000) / textureHeight) << 8; - - ufrac = ufrac - (uone >> 1); - vfrac = vfrac - (vone >> 1); - - SSAInt frac_x0 = (ufrac >> FRACBITS) * textureWidth; - SSAInt frac_x1 = ((ufrac + uone) >> FRACBITS) * textureWidth; - SSAInt frac_y0 = (vfrac >> FRACBITS) * textureHeight; - SSAInt frac_y1 = ((vfrac + vone) >> FRACBITS) * textureHeight; - - SSAInt x0 = frac_x0 >> FRACBITS; - SSAInt x1 = frac_x1 >> FRACBITS; - SSAInt y0 = frac_y0 >> FRACBITS; - SSAInt y1 = frac_y1 >> FRACBITS; - - SSAVec4i p00 = texturePixels[(x0 * textureHeight + y0) * 4].load_vec4ub(true); - SSAVec4i p01 = texturePixels[(x0 * textureHeight + y1) * 4].load_vec4ub(true); - SSAVec4i p10 = texturePixels[(x1 * textureHeight + y0) * 4].load_vec4ub(true); - SSAVec4i p11 = texturePixels[(x1 * textureHeight + y1) * 4].load_vec4ub(true); - - SSAInt inv_b = (frac_x1 >> (FRACBITS - 4)) & 15; - SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - linear = (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; - } - - // // Min filter = linear, Mag filter = nearest: - // AffineLinear = (gradVaryingX[0] / AffineW) > SSAFloat(1.0f) || (gradVaryingX[0] / AffineW) < SSAFloat(-1.0f); - - return AffineLinear.select(linear, nearest); - */ -} - -SSAInt DrawTriangleCodegen::Sample8(SSAInt *varying) -{ - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - if (colorfill) - return color; - else - return texturePixels[uvoffset].load(true).zext_int(); -} - -SSAInt DrawTriangleCodegen::Shade8(SSAInt c) -{ - return currentcolormap[c].load(true).zext_int(); -} - -SSAVec4i DrawTriangleCodegen::Shade32(SSAVec4i fg, SSAInt light, bool isSimpleShade) -{ - if (isSimpleShade) - return shade_bgra_simple(fg, currentlight); - else - return shade_bgra_advanced(fg, currentlight, shade_constants); -} - -SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying, bool isSimpleShade) -{ - SSAVec4i fg; - SSAVec4i output; - - switch (blendmode) - { - default: - case TriBlendMode::Copy: - fg = Sample32(varying); - output = blend_copy(Shade32(fg, currentlight, isSimpleShade)); - break; - case TriBlendMode::AlphaBlend: - fg = Sample32(varying); - output = blend_alpha_blend(Shade32(fg, currentlight, isSimpleShade), bg); - break; - case TriBlendMode::AddSolid: - fg = Sample32(varying); - output = blend_add(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, destalpha); - break; - case TriBlendMode::Add: - fg = Sample32(varying); - output = blend_add(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::Sub: - fg = Sample32(varying); - output = blend_sub(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::RevSub: - fg = Sample32(varying); - output = blend_revsub(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::Stencil: - fg = Sample32(varying); - output = blend_stencil(Shade32(SSAVec4i::unpack(color), currentlight, isSimpleShade), fg[3], bg, srcalpha, destalpha); - break; - case TriBlendMode::Shaded: - output = blend_stencil(Shade32(SSAVec4i::unpack(color), currentlight, isSimpleShade), Sample8(varying), bg, srcalpha, destalpha); - break; - case TriBlendMode::TranslateCopy: - fg = TranslateSample32(varying); - output = blend_copy(Shade32(fg, currentlight, isSimpleShade)); - break; - case TriBlendMode::TranslateAlphaBlend: - fg = TranslateSample32(varying); - output = blend_alpha_blend(Shade32(fg, currentlight, isSimpleShade), bg); - break; - case TriBlendMode::TranslateAdd: - fg = TranslateSample32(varying); - output = blend_add(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::TranslateSub: - fg = TranslateSample32(varying); - output = blend_sub(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::TranslateRevSub: - fg = TranslateSample32(varying); - output = blend_revsub(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::AddSrcColorOneMinusSrcColor: - fg = Sample32(varying); - output = blend_add_srccolor_oneminussrccolor(Shade32(fg, currentlight, isSimpleShade), bg); - break; - case TriBlendMode::Skycap: - fg = Sample32(varying); - output = FadeOut(varying[1], fg); - break; - } - - return output; -} - -SSAVec4i DrawTriangleCodegen::ToBgra(SSAInt index) -{ - SSAVec4i c = BaseColors[index * 4].load_vec4ub(true); - c = c.insert(3, 255); - return c; -} - -SSAInt DrawTriangleCodegen::ToPal8(SSAVec4i c) -{ - SSAInt red = SSAInt::clamp(c[0], SSAInt(0), SSAInt(255)); - SSAInt green = SSAInt::clamp(c[1], SSAInt(0), SSAInt(255)); - SSAInt blue = SSAInt::clamp(c[2], SSAInt(0), SSAInt(255)); - return RGB256k[((blue >> 2) * 64 + (green >> 2)) * 64 + (red >> 2)].load(true).zext_int(); -} - -SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) -{ - SSAVec4i fg; - SSAInt alpha, inv_alpha; - SSAInt output; - SSAInt palindex; - - switch (blendmode) - { - default: - case TriBlendMode::Copy: - output = Shade8(Sample8(varying)); - break; - case TriBlendMode::AlphaBlend: - palindex = Sample8(varying); - output = Shade8(palindex); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::AddSolid: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, destalpha)); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::Add: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::Sub: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::RevSub: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::Stencil: - output = ToPal8(blend_stencil(ToBgra(Shade8(color)), (Sample8(varying) == SSAInt(0)).select(SSAInt(0), SSAInt(256)), ToBgra(bg), srcalpha, destalpha)); - break; - case TriBlendMode::Shaded: - palindex = Sample8(varying); - output = ToPal8(blend_stencil(ToBgra(Shade8(color)), palindex, ToBgra(bg), srcalpha, destalpha)); - break; - case TriBlendMode::TranslateCopy: - palindex = TranslateSample8(varying); - output = Shade8(palindex); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::TranslateAlphaBlend: - palindex = TranslateSample8(varying); - output = Shade8(palindex); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::TranslateAdd: - palindex = TranslateSample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::TranslateSub: - palindex = TranslateSample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::TranslateRevSub: - palindex = TranslateSample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::AddSrcColorOneMinusSrcColor: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_add_srccolor_oneminussrccolor(fg, ToBgra(bg))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::Skycap: - fg = ToBgra(Sample8(varying)); - output = ToPal8(FadeOut(varying[1], fg)); - break; - } - - return output; -} - -SSAVec4i DrawTriangleCodegen::FadeOut(SSAInt frac, SSAVec4i fg) -{ - int start_fade = 2; // How fast it should fade out - - SSAInt alpha_top = SSAInt::MAX(SSAInt::MIN(frac.ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); - SSAInt alpha_bottom = SSAInt::MAX(SSAInt::MIN(((2 << 24) - frac).ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); - SSAInt alpha = SSAInt::MIN(alpha_top, alpha_bottom); - SSAInt inv_alpha = 256 - alpha; - - fg = (fg * alpha + SSAVec4i::unpack(color) * inv_alpha) / 256; - return fg.insert(3, 255); -} - -void DrawTriangleCodegen::CalculateGradients() -{ - gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); - start.W = v1.w + gradientX.W * (SSAFloat(startX) - v1.x) + gradientY.W * (SSAFloat(startY) - v1.y); - for (int i = 0; i < TriVertex::NumVarying; i++) - { - gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); - start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (SSAFloat(startX) - v1.x) + gradientY.Varying[i] * (SSAFloat(startY) - v1.y); - } - - shade = (64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f) / 32.0f; -} - -void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) -{ - destOrg = args[0][0].load(true); - pitch = args[0][1].load(true); - v1 = LoadTriVertex(args[0][2].load(true)); - v2 = LoadTriVertex(args[0][3].load(true)); - v3 = LoadTriVertex(args[0][4].load(true)); - texturePixels = args[0][9].load(true); - textureWidth = args[0][10].load(true); - textureHeight = args[0][11].load(true); - translation = args[0][12].load(true); - LoadUniforms(args[0][13].load(true)); - if (!truecolor) - { - Colormaps = args[0][20].load(true); - RGB256k = args[0][21].load(true); - BaseColors = args[0][22].load(true); - } - - fullSpans = thread_data[0][5].load(true); - partialBlocks = thread_data[0][6].load(true); - numSpans = thread_data[0][7].load(true); - numBlocks = thread_data[0][8].load(true); - startX = thread_data[0][9].load(true); - startY = thread_data[0][10].load(true); -} - -SSATriVertex DrawTriangleCodegen::LoadTriVertex(SSAValue ptr) -{ - SSATriVertex v; - v.x = ptr[0][0].load(true); - v.y = ptr[0][1].load(true); - v.z = ptr[0][2].load(true); - v.w = ptr[0][3].load(true); - for (int i = 0; i < TriVertex::NumVarying; i++) - v.varying[i] = ptr[0][4 + i].load(true); - return v; -} - -void DrawTriangleCodegen::LoadUniforms(SSAValue uniforms) -{ - light = uniforms[0][0].load(true); - color = uniforms[0][2].load(true); - srcalpha = uniforms[0][3].load(true); - destalpha = uniforms[0][4].load(true); - - SSAShort light_alpha = uniforms[0][5].load(true); - SSAShort light_red = uniforms[0][6].load(true); - SSAShort light_green = uniforms[0][7].load(true); - SSAShort light_blue = uniforms[0][8].load(true); - SSAShort fade_alpha = uniforms[0][9].load(true); - SSAShort fade_red = uniforms[0][10].load(true); - SSAShort fade_green = uniforms[0][11].load(true); - SSAShort fade_blue = uniforms[0][12].load(true); - SSAShort desaturate = uniforms[0][13].load(true); - globVis = uniforms[0][14].load(true); - globVis = globVis * SSAFloat(1.0f / 32.0f); - SSAInt flags = uniforms[0][15].load(true); - shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); - shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); - shade_constants.desaturate = desaturate.zext_int(); - - is_simple_shade = (flags & TriUniforms::simple_shade) == SSAInt(TriUniforms::simple_shade); - is_nearest_filter = (flags & TriUniforms::nearest_filter) == SSAInt(TriUniforms::nearest_filter); - is_fixed_light = (flags & TriUniforms::fixed_light) == SSAInt(TriUniforms::fixed_light); -} - -SSAFloat DrawTriangleCodegen::FindGradientX(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2) -{ - SSAFloat top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); - SSAFloat bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); - return top / bottom; -} - -SSAFloat DrawTriangleCodegen::FindGradientY(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2) -{ - SSAFloat top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); - SSAFloat bottom = (x0 - x2) * (y1 - y2) - (x1 - x2) * (y0 - y2); - return top / bottom; -} diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h deleted file mode 100644 index 71d6ebd58f..0000000000 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ /dev/null @@ -1,112 +0,0 @@ -/* -** DrawTriangle code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "drawercodegen.h" - -struct SSATriVertex -{ - SSAFloat x, y, z, w; - SSAFloat varying[TriVertex::NumVarying]; -}; - -struct SSAStepVariables -{ - SSAFloat W; - SSAFloat Varying[TriVertex::NumVarying]; -}; - -class DrawTriangleCodegen : public DrawerCodegen -{ -public: - void Generate(TriBlendMode blendmode, bool truecolor, bool colorfill, SSAValue args, SSAValue thread_data); - -private: - void LoadArgs(SSAValue args, SSAValue thread_data); - SSATriVertex LoadTriVertex(SSAValue v); - void LoadUniforms(SSAValue uniforms); - void CalculateGradients(); - SSAFloat FindGradientX(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); - SSAFloat FindGradientY(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); - void DrawFullSpans(bool isSimpleShade); - void DrawPartialBlocks(bool isSimpleShade); - - SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying, bool isSimpleShade); - SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying); - SSAVec4i TranslateSample32(SSAInt *varying); - SSAInt TranslateSample8(SSAInt *varying); - SSAVec4i Sample32(SSAInt *varying); - SSAInt Sample8(SSAInt *varying); - SSAVec4i Shade32(SSAVec4i fg, SSAInt light, bool isSimpleShade); - SSAInt Shade8(SSAInt c); - SSAVec4i ToBgra(SSAInt index); - SSAInt ToPal8(SSAVec4i c); - SSAVec4i FadeOut(SSAInt frac, SSAVec4i color); - - SSAStack stack_i, stack_y, stack_x; - SSAStack stack_posYW, stack_posXW; - SSAStack stack_posYVarying[TriVertex::NumVarying]; - SSAStack stack_posXVarying[TriVertex::NumVarying]; - SSAStack stack_varyingPos[TriVertex::NumVarying]; - SSAStack stack_lightpos; - SSAStack stack_dest; - - SSAStepVariables gradientX, gradientY, start; - SSAFloat shade, globVis; - - SSAInt currentlight; - SSAUBytePtr currentcolormap; - - SSAUBytePtr destOrg; - SSAInt pitch; - SSATriVertex v1; - SSATriVertex v2; - SSATriVertex v3; - SSAUBytePtr texturePixels; - SSAInt textureWidth; - SSAInt textureHeight; - SSAUBytePtr translation; - SSAInt color, srcalpha, destalpha; - - SSAInt light; - SSAShadeConstants shade_constants; - SSABool is_simple_shade; - SSABool is_nearest_filter; - SSABool is_fixed_light; - - SSAUBytePtr Colormaps; - SSAUBytePtr RGB256k; - SSAUBytePtr BaseColors; - - SSAInt numSpans; - SSAInt numBlocks; - SSAInt startX; - SSAInt startY; - SSAValue fullSpans; // TriFullSpan[] - SSAValue partialBlocks; // TriPartialBlock[] - - TriBlendMode blendmode; - bool truecolor; - bool colorfill; - int pixelsize; -}; diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.cpp b/tools/drawergen/fixedfunction/drawwallcodegen.cpp deleted file mode 100644 index df89432392..0000000000 --- a/tools/drawergen/fixedfunction/drawwallcodegen.cpp +++ /dev/null @@ -1,231 +0,0 @@ -/* -** DrawWall code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "timestamp.h" -#include "fixedfunction/drawwallcodegen.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_scope.h" -#include "ssa/ssa_for_block.h" -#include "ssa/ssa_if_block.h" -#include "ssa/ssa_stack.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_struct_type.h" -#include "ssa/ssa_value.h" - -void DrawWallCodegen::Generate(DrawWallVariant variant, SSAValue args, SSAValue thread_data) -{ - dest = args[0][0].load(true); - source = args[0][1].load(true); - source2 = args[0][5].load(true); - pitch = args[0][9].load(true); - count = args[0][10].load(true); - dest_y = args[0][11].load(true); - texturefrac = args[0][12].load(true); - texturefracx = args[0][16].load(true); - iscale = args[0][20].load(true); - textureheight = args[0][24].load(true); - light = args[0][28].load(true); - srcalpha = args[0][32].load(true); - destalpha = args[0][33].load(true); - SSAShort light_alpha = args[0][34].load(true); - SSAShort light_red = args[0][35].load(true); - SSAShort light_green = args[0][36].load(true); - SSAShort light_blue = args[0][37].load(true); - SSAShort fade_alpha = args[0][38].load(true); - SSAShort fade_red = args[0][39].load(true); - SSAShort fade_green = args[0][40].load(true); - SSAShort fade_blue = args[0][41].load(true); - SSAShort desaturate = args[0][42].load(true); - SSAInt flags = args[0][43].load(true); - start_z = args[0][44].load(true); - step_z = args[0][45].load(true); - dynlights = args[0][46].load(true); - num_dynlights = args[0][47].load(true); - shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); - shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); - shade_constants.desaturate = desaturate.zext_int(); - - thread.core = thread_data[0][0].load(true); - thread.num_cores = thread_data[0][1].load(true); - thread.pass_start_y = thread_data[0][2].load(true); - thread.pass_end_y = thread_data[0][3].load(true); - - is_simple_shade = (flags & DrawWallArgs::simple_shade) == SSAInt(DrawWallArgs::simple_shade); - is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == SSAInt(DrawWallArgs::nearest_filter); - - count = count_for_thread(dest_y, count, thread); - dest = dest_for_thread(dest_y, pitch, dest, thread); - - pitch = pitch * thread.num_cores; - - stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); - fracstep = iscale * thread.num_cores; - one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - start_z = start_z + step_z * SSAFloat(skipped_by_thread(dest_y, thread)); - step_z = step_z * SSAFloat(thread.num_cores); - - SSAIfBlock branch; - branch.if_block(is_simple_shade); - LoopShade(variant, true); - branch.else_block(); - LoopShade(variant, false); - branch.end_block(); -} - -void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool isSimpleShade) -{ - SSAIfBlock branch; - branch.if_block(is_nearest_filter); - Loop(variant, isSimpleShade, true); - branch.else_block(); - stack_frac.store(stack_frac.load() - (one / 2)); - Loop(variant, isSimpleShade, false); - branch.end_block(); -} - -void DrawWallCodegen::Loop(DrawWallVariant variant, bool isSimpleShade, bool isNearestFilter) -{ - stack_index.store(SSAInt(0)); - stack_z.store(start_z); - { - SSAForBlock loop; - SSAInt index = stack_index.load(); - z = stack_z.load(); - loop.loop_block(index < count); - - SSAInt frac = stack_frac.load(); - SSAInt offset = index * pitch * 4; - - SSAVec4i bgcolor = dest[offset].load_vec4ub(false); - SSAVec4i color = Blend(Shade(Sample(frac, isNearestFilter), isSimpleShade), bgcolor, variant); - dest[offset].store_vec4ub(color); - - stack_z.store(z + step_z); - stack_index.store(index.add(SSAInt(1), true, true)); - stack_frac.store(frac + fracstep); - loop.end_block(); - } -} - -SSAVec4i DrawWallCodegen::Sample(SSAInt frac, bool isNearestFilter) -{ - if (isNearestFilter) - { - SSAInt sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - return source[sample_index * 4].load_vec4ub(false); - } - else - { - return SampleLinear(source, source2, texturefracx, frac, one, textureheight); - } -} - -SSAVec4i DrawWallCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) -{ - SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; - SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; - SSAInt y0 = frac_y0 >> FRACBITS; - SSAInt y1 = frac_y1 >> FRACBITS; - - SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true); - SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true); - SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true); - SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true); - - SSAInt inv_b = texturefracx; - SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt inv_a = 16 - a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} - -SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, bool isSimpleShade) -{ - SSAVec4i c; - if (isSimpleShade) - c = shade_bgra_simple(fg, light); - else - c = shade_bgra_advanced(fg, light, shade_constants); - - stack_lit_color.store(SSAVec4i(0)); - stack_light_index.store(SSAInt(0)); - - SSAForBlock block; - SSAInt light_index = stack_light_index.load(); - SSAVec4i lit_color = stack_lit_color.load(); - block.loop_block(light_index < num_dynlights); - { - SSAVec4i light_color = SSAUBytePtr(SSAValue(dynlights[light_index][0]).v).load_vec4ub(true); - SSAFloat light_x = dynlights[light_index][1].load(true); - SSAFloat light_y = dynlights[light_index][2].load(true); - SSAFloat light_z = dynlights[light_index][3].load(true); - SSAFloat light_rcp_radius = dynlights[light_index][4].load(true); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // attenuation = 1 - MIN(dist * (1/radius), 1) - SSAFloat Lxy2 = light_x; // L.x*L.x + L.y*L.y - SSAFloat Lz = light_z - z; - SSAFloat dist2 = Lxy2 + Lz * Lz; - SSAFloat rcp_dist = SSAFloat::rsqrt(dist2); - SSAFloat dist = dist2 * rcp_dist; - SSAFloat distance_attenuation = SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f)); - - // The simple light type - SSAFloat simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - SSAFloat point_attenuation = light_y * rcp_dist * distance_attenuation; - - SSAInt attenuation = SSAInt((light_y == SSAFloat(0.0f)).select(simple_attenuation, point_attenuation), true); - SSAVec4i contribution = (light_color * attenuation) >> 8; - - stack_lit_color.store(lit_color + contribution); - stack_light_index.store(light_index + 1); - } - block.end_block(); - - return c + ((stack_lit_color.load() * fg) >> 8); -} - -SSAVec4i DrawWallCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant) -{ - switch (variant) - { - default: - case DrawWallVariant::Opaque: - return blend_copy(fg); - case DrawWallVariant::Masked: - return blend_alpha_blend(fg, bg); - case DrawWallVariant::Add: - case DrawWallVariant::AddClamp: - return blend_add(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - case DrawWallVariant::SubClamp: - return blend_sub(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - case DrawWallVariant::RevSubClamp: - return blend_revsub(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - } -} diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.h b/tools/drawergen/fixedfunction/drawwallcodegen.h deleted file mode 100644 index 1afb5396ae..0000000000 --- a/tools/drawergen/fixedfunction/drawwallcodegen.h +++ /dev/null @@ -1,80 +0,0 @@ -/* -** DrawWall code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "drawercodegen.h" - -enum class DrawWallVariant -{ - Opaque, - Masked, - Add, - AddClamp, - SubClamp, - RevSubClamp -}; - -class DrawWallCodegen : public DrawerCodegen -{ -public: - void Generate(DrawWallVariant variant, SSAValue args, SSAValue thread_data); - -private: - void LoopShade(DrawWallVariant variant, bool isSimpleShade); - void Loop(DrawWallVariant variant, bool isSimpleShade, bool isNearestFilter); - SSAVec4i Sample(SSAInt frac, bool isNearestFilter); - SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); - SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); - SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); - - SSAStack stack_index, stack_frac, stack_light_index; - SSAStack stack_lit_color; - SSAStack stack_z; - - SSAUBytePtr dest; - SSAUBytePtr source; - SSAUBytePtr source2; - SSAInt pitch; - SSAInt count; - SSAInt dest_y; - SSAInt texturefrac; - SSAInt texturefracx; - SSAInt iscale; - SSAInt textureheight; - SSAInt light; - SSAInt srcalpha; - SSAInt destalpha; - SSABool is_simple_shade; - SSABool is_nearest_filter; - SSAShadeConstants shade_constants; - SSAWorkerThread thread; - - SSAInt fracstep; - SSAInt one; - - SSAFloat start_z, step_z; - - SSAValue dynlights; // TriLight* - SSAInt num_dynlights; - SSAFloat z; -}; diff --git a/tools/drawergen/fixedfunction/setuptrianglecodegen.cpp b/tools/drawergen/fixedfunction/setuptrianglecodegen.cpp deleted file mode 100644 index f29764d9eb..0000000000 --- a/tools/drawergen/fixedfunction/setuptrianglecodegen.cpp +++ /dev/null @@ -1,573 +0,0 @@ -/* -** DrawTriangle code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "timestamp.h" -#include "fixedfunction/setuptrianglecodegen.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_scope.h" -#include "ssa/ssa_for_block.h" -#include "ssa/ssa_if_block.h" -#include "ssa/ssa_stack.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_struct_type.h" -#include "ssa/ssa_value.h" - -void SetupTriangleCodegen::Generate(bool subsectorTest, SSAValue args, SSAValue thread_data) -{ - this->subsectorTest = subsectorTest; - LoadArgs(args, thread_data); - Setup(); - LoopBlockY(); -} - -SSAInt SetupTriangleCodegen::FloatTo28_4(SSAFloat v) -{ - // SSAInt(SSAFloat::round(16.0f * v), false); - SSAInt a = SSAInt(v * 32.0f, false); - return (a + (a.ashr(31) | SSAInt(1))).ashr(1); -} - -void SetupTriangleCodegen::Setup() -{ - // 28.4 fixed-point coordinates - Y1 = FloatTo28_4(v1.y); - Y2 = FloatTo28_4(v2.y); - Y3 = FloatTo28_4(v3.y); - - X1 = FloatTo28_4(v1.x); - X2 = FloatTo28_4(v2.x); - X3 = FloatTo28_4(v3.x); - - // Deltas - DX12 = X1 - X2; - DX23 = X2 - X3; - DX31 = X3 - X1; - - DY12 = Y1 - Y2; - DY23 = Y2 - Y3; - DY31 = Y3 - Y1; - - // Fixed-point deltas - FDX12 = DX12 << 4; - FDX23 = DX23 << 4; - FDX31 = DX31 << 4; - - FDY12 = DY12 << 4; - FDY23 = DY23 << 4; - FDY31 = DY31 << 4; - - // Bounding rectangle - minx = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(X1, X2), X3) + 0xF).ashr(4), SSAInt(0)); - maxx = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(X1, X2), X3) + 0xF).ashr(4), clipright - 1); - miny = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(Y1, Y2), Y3) + 0xF).ashr(4), SSAInt(0)); - maxy = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(Y1, Y2), Y3) + 0xF).ashr(4), clipbottom - 1); - - SSAIfBlock if0; - if0.if_block(minx >= maxx || miny >= maxy); - if0.end_retvoid(); - - // Start in corner of 8x8 block - minx = minx & ~(q - 1); - miny = miny & ~(q - 1); - - // Half-edge constants - C1 = DY12 * X1 - DX12 * Y1; - C2 = DY23 * X2 - DX23 * Y2; - C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - SSAIfBlock if1; - if1.if_block(DY12 < SSAInt(0) || (DY12 == SSAInt(0) && DX12 > SSAInt(0))); - stack_C1.store(C1 + 1); - if1.else_block(); - stack_C1.store(C1); - if1.end_block(); - C1 = stack_C1.load(); - SSAIfBlock if2; - if2.if_block(DY23 < SSAInt(0) || (DY23 == SSAInt(0) && DX23 > SSAInt(0))); - stack_C2.store(C2 + 1); - if2.else_block(); - stack_C2.store(C2); - if2.end_block(); - C2 = stack_C2.load(); - SSAIfBlock if3; - if3.if_block(DY31 < SSAInt(0) || (DY31 == SSAInt(0) && DX31 > SSAInt(0))); - stack_C3.store(C3 + 1); - if3.else_block(); - stack_C3.store(C3); - if3.end_block(); - C3 = stack_C3.load(); -} - -void SetupTriangleCodegen::LoopBlockY() -{ - SSAInt blocks_skipped = skipped_by_thread(miny / q, thread); - stack_y.store(miny + blocks_skipped * q); - stack_subsectorGBuffer.store(subsectorGBuffer[blocks_skipped * q * pitch]); - - SSAForBlock loop; - y = stack_y.load(); - subsectorGBuffer = stack_subsectorGBuffer.load(); - loop.loop_block(y < maxy, 0); - { - LoopBlockX(); - - stack_subsectorGBuffer.store(subsectorGBuffer[q * pitch * thread.num_cores]); - stack_y.store(y + thread.num_cores * q); - } - loop.end_block(); -} - -void SetupTriangleCodegen::LoopBlockX() -{ - stack_x.store(minx); - - SSAForBlock loop; - x = stack_x.load(); - loop.loop_block(x < maxx, 0); - { - // Corners of block - x0 = x << 4; - x1 = (x + q - 1) << 4; - y0 = y << 4; - y1 = (y + q - 1) << 4; - - // Evaluate half-space functions - SSABool a00 = C1 + DX12 * y0 - DY12 * x0 > SSAInt(0); - SSABool a10 = C1 + DX12 * y0 - DY12 * x1 > SSAInt(0); - SSABool a01 = C1 + DX12 * y1 - DY12 * x0 > SSAInt(0); - SSABool a11 = C1 + DX12 * y1 - DY12 * x1 > SSAInt(0); - - SSAInt a = (a00.zext_int() << 0) | (a10.zext_int() << 1) | (a01.zext_int() << 2) | (a11.zext_int() << 3); - - SSABool b00 = C2 + DX23 * y0 - DY23 * x0 > SSAInt(0); - SSABool b10 = C2 + DX23 * y0 - DY23 * x1 > SSAInt(0); - SSABool b01 = C2 + DX23 * y1 - DY23 * x0 > SSAInt(0); - SSABool b11 = C2 + DX23 * y1 - DY23 * x1 > SSAInt(0); - SSAInt b = (b00.zext_int() << 0) | (b10.zext_int() << 1) | (b01.zext_int() << 2) | (b11.zext_int() << 3); - - SSABool c00 = C3 + DX31 * y0 - DY31 * x0 > SSAInt(0); - SSABool c10 = C3 + DX31 * y0 - DY31 * x1 > SSAInt(0); - SSABool c01 = C3 + DX31 * y1 - DY31 * x0 > SSAInt(0); - SSABool c11 = C3 + DX31 * y1 - DY31 * x1 > SSAInt(0); - SSAInt c = (c00.zext_int() << 0) | (c10.zext_int() << 1) | (c01.zext_int() << 2) | (c11.zext_int() << 3); - - // Skip block when outside an edge - SSABool process_block = !(a == SSAInt(0) || b == SSAInt(0) || c == SSAInt(0)); - - SetStencilBlock(x / 8 + y / 8 * stencilPitch); - - // Stencil test the whole block, if possible - if (subsectorTest) - { - process_block = process_block && (!StencilIsSingleValue() || SSABool::compare_uge(StencilGetSingle(), stencilTestValue)); - } - else - { - process_block = process_block && (!StencilIsSingleValue() || StencilGetSingle() == stencilTestValue); - } - - SSAIfBlock branch; - branch.if_block(process_block); - - // Check if block needs clipping - SSABool clipneeded = (x + q) > clipright || (y + q) > clipbottom; - - SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded && StencilIsSingleValue(); - - // Accept whole block when totally covered - SSAIfBlock branch_covered; - branch_covered.if_block(covered); - { - LoopFullBlock(); - } - branch_covered.else_block(); - { - SSAIfBlock branch_covered_stencil; - branch_covered_stencil.if_block(StencilIsSingleValue()); - { - SSABool stenciltestpass; - if (subsectorTest) - { - stenciltestpass = SSABool::compare_uge(StencilGetSingle(), stencilTestValue); - } - else - { - stenciltestpass = StencilGetSingle() == stencilTestValue; - } - - SSAIfBlock branch_stenciltestpass; - branch_stenciltestpass.if_block(stenciltestpass); - { - LoopPartialBlock(true); - } - branch_stenciltestpass.end_block(); - } - branch_covered_stencil.else_block(); - { - LoopPartialBlock(false); - } - branch_covered_stencil.end_block(); - } - branch_covered.end_block(); - - branch.end_block(); - - stack_x.store(x + q); - } - loop.end_block(); -} - -void SetupTriangleCodegen::LoopFullBlock() -{ - /* - if (variant == TriDrawVariant::Stencil) - { - StencilClear(stencilWriteValue); - } - else if (variant == TriDrawVariant::StencilClose) - { - StencilClear(stencilWriteValue); - for (int iy = 0; iy < q; iy++) - { - SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch]; - for (int ix = 0; ix < q; ix += 4) - { - subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth)); - } - } - } - else - { - int pixelsize = truecolor ? 4 : 1; - - AffineW = posx_w; - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosY[i] = posx_varying[i]; - - for (int iy = 0; iy < q; iy++) - { - SSAUBytePtr buffer = dest[(x + iy * pitch) * pixelsize]; - SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch]; - - SetupAffineBlock(); - - for (int ix = 0; ix < q; ix += 4) - { - SSAUBytePtr buf = buffer[ix * pixelsize]; - if (truecolor) - { - SSAVec16ub pixels16 = buf.load_unaligned_vec16ub(false); - SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16); - SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16); - SSAVec4i pixels[4] = - { - SSAVec4i::extendlo(pixels8lo), - SSAVec4i::extendhi(pixels8lo), - SSAVec4i::extendlo(pixels8hi), - SSAVec4i::extendhi(pixels8hi) - }; - - for (int sse = 0; sse < 4; sse++) - { - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - { - SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth; - pixels[sse] = subsectorTest.select(ProcessPixel32(pixels[sse], AffineVaryingPosX), pixels[sse]); - } - else - { - pixels[sse] = ProcessPixel32(pixels[sse], AffineVaryingPosX); - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i]; - } - - buf.store_unaligned_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3]))); - } - else - { - SSAVec4i pixelsvec = buf.load_vec4ub(false); - SSAInt pixels[4] = - { - pixelsvec[0], - pixelsvec[1], - pixelsvec[2], - pixelsvec[3] - }; - - for (int sse = 0; sse < 4; sse++) - { - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - { - SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth; - pixels[sse] = subsectorTest.select(ProcessPixel8(pixels[sse], AffineVaryingPosX), pixels[sse]); - } - else - { - pixels[sse] = ProcessPixel8(pixels[sse], AffineVaryingPosX); - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i]; - } - - buf.store_vec4ub(SSAVec4i(pixels[0], pixels[1], pixels[2], pixels[3])); - } - - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) - subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth)); - } - - AffineW = AffineW + gradWY; - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosY[i] = AffineVaryingPosY[i] + gradVaryingY[i]; - } - } - */ -} - -void SetupTriangleCodegen::LoopPartialBlock(bool isSingleStencilValue) -{ - /* - int pixelsize = truecolor ? 4 : 1; - - if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) - { - if (isSingleStencilValue) - { - SSAInt stencilMask = StencilBlockMask.load(false); - SSAUByte val0 = stencilMask.trunc_ubyte(); - for (int i = 0; i < 8 * 8; i++) - StencilBlock[i].store(val0); - StencilBlockMask.store(SSAInt(0)); - } - - SSAUByte lastStencilValue = StencilBlock[0].load(false); - stack_stencilblock_restored.store(SSABool(true)); - stack_stencilblock_lastval.store(lastStencilValue); - } - - stack_CY1.store(C1 + DX12 * y0 - DY12 * x0); - stack_CY2.store(C2 + DX23 * y0 - DY23 * x0); - stack_CY3.store(C3 + DX31 * y0 - DY31 * x0); - stack_iy.store(SSAInt(0)); - stack_buffer.store(dest[x * pixelsize]); - stack_subsectorbuffer.store(subsectorGBuffer[x]); - stack_AffineW.store(posx_w); - for (int i = 0; i < TriVertex::NumVarying; i++) - { - stack_AffineVaryingPosY[i].store(posx_varying[i]); - } - - SSAForBlock loopy; - SSAInt iy = stack_iy.load(); - SSAUBytePtr buffer = stack_buffer.load(); - SSAIntPtr subsectorbuffer = stack_subsectorbuffer.load(); - SSAInt CY1 = stack_CY1.load(); - SSAInt CY2 = stack_CY2.load(); - SSAInt CY3 = stack_CY3.load(); - AffineW = stack_AffineW.load(); - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosY[i] = stack_AffineVaryingPosY[i].load(); - loopy.loop_block(iy < SSAInt(q), q); - { - SetupAffineBlock(); - - for (int i = 0; i < TriVertex::NumVarying; i++) - stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i]); - - stack_CX1.store(CY1); - stack_CX2.store(CY2); - stack_CX3.store(CY3); - stack_ix.store(SSAInt(0)); - - SSAForBlock loopx; - SSABool stencilblock_restored; - SSAUByte lastStencilValue; - if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) - { - stencilblock_restored = stack_stencilblock_restored.load(); - lastStencilValue = stack_stencilblock_lastval.load(); - } - SSAInt ix = stack_ix.load(); - SSAInt CX1 = stack_CX1.load(); - SSAInt CX2 = stack_CX2.load(); - SSAInt CX3 = stack_CX3.load(); - for (int i = 0; i < TriVertex::NumVarying; i++) - AffineVaryingPosX[i] = stack_AffineVaryingPosX[i].load(); - loopx.loop_block(ix < SSAInt(q), q); - { - SSABool visible = (ix + x < clipright) && (iy + y < clipbottom); - SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible; - - if (!isSingleStencilValue) - { - SSAUByte stencilValue = StencilBlock[ix + iy * 8].load(false); - - if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - { - covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth; - } - else if (variant == TriDrawVariant::StencilClose) - { - covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue); - } - else - { - covered = covered && stencilValue == stencilTestValue; - } - } - else if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - { - covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth; - } - - SSAIfBlock branch; - branch.if_block(covered); - { - if (variant == TriDrawVariant::Stencil) - { - StencilBlock[ix + iy * 8].store(stencilWriteValue); - } - else if (variant == TriDrawVariant::StencilClose) - { - StencilBlock[ix + iy * 8].store(stencilWriteValue); - subsectorbuffer[ix].store(subsectorDepth); - } - else - { - SSAUBytePtr buf = buffer[ix * pixelsize]; - - if (truecolor) - { - SSAVec4i bg = buf.load_vec4ub(false); - buf.store_vec4ub(ProcessPixel32(bg, AffineVaryingPosX)); - } - else - { - SSAUByte bg = buf.load(false); - buf.store(ProcessPixel8(bg.zext_int(), AffineVaryingPosX).trunc_ubyte()); - } - - if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) - subsectorbuffer[ix].store(subsectorDepth); - } - } - branch.end_block(); - - if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) - { - SSAUByte newStencilValue = StencilBlock[ix + iy * 8].load(false); - stack_stencilblock_restored.store(stencilblock_restored && newStencilValue == lastStencilValue); - stack_stencilblock_lastval.store(newStencilValue); - } - - for (int i = 0; i < TriVertex::NumVarying; i++) - stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i] + AffineVaryingStepX[i]); - - stack_CX1.store(CX1 - FDY12); - stack_CX2.store(CX2 - FDY23); - stack_CX3.store(CX3 - FDY31); - stack_ix.store(ix + 1); - } - loopx.end_block(); - - stack_AffineW.store(AffineW + gradWY); - for (int i = 0; i < TriVertex::NumVarying; i++) - stack_AffineVaryingPosY[i].store(AffineVaryingPosY[i] + gradVaryingY[i]); - stack_CY1.store(CY1 + FDX12); - stack_CY2.store(CY2 + FDX23); - stack_CY3.store(CY3 + FDX31); - stack_buffer.store(buffer[pitch * pixelsize]); - stack_subsectorbuffer.store(subsectorbuffer[pitch]); - stack_iy.store(iy + 1); - } - loopy.end_block(); - - if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose) - { - SSAIfBlock branch; - SSABool restored = stack_stencilblock_restored.load(); - branch.if_block(restored); - { - SSAUByte lastStencilValue = stack_stencilblock_lastval.load(); - StencilClear(lastStencilValue); - } - branch.end_block(); - } - */ -} - -void SetupTriangleCodegen::SetStencilBlock(SSAInt block) -{ - StencilBlock = stencilValues[block * 64]; - StencilBlockMask = stencilMasks[block]; -} - -SSAUByte SetupTriangleCodegen::StencilGetSingle() -{ - return StencilBlockMask.load(false).trunc_ubyte(); -} - -void SetupTriangleCodegen::StencilClear(SSAUByte value) -{ - StencilBlockMask.store(SSAInt(0xffffff00) | value.zext_int()); -} - -SSABool SetupTriangleCodegen::StencilIsSingleValue() -{ - return (StencilBlockMask.load(false) & SSAInt(0xffffff00)) == SSAInt(0xffffff00); -} - -void SetupTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) -{ - pitch = args[0][1].load(true); - v1 = LoadTriVertex(args[0][2].load(true)); - v2 = LoadTriVertex(args[0][3].load(true)); - v3 = LoadTriVertex(args[0][4].load(true)); - clipright = args[0][6].load(true); - clipbottom = args[0][8].load(true); - stencilValues = args[0][14].load(true); - stencilMasks = args[0][15].load(true); - stencilPitch = args[0][16].load(true); - stencilTestValue = args[0][17].load(true); - stencilWriteValue = args[0][18].load(true); - subsectorGBuffer = args[0][19].load(true); - - thread.core = thread_data[0][0].load(true); - thread.num_cores = thread_data[0][1].load(true); - thread.pass_start_y = SSAInt(0); - thread.pass_end_y = SSAInt(32000); -} - -SSASetupVertex SetupTriangleCodegen::LoadTriVertex(SSAValue ptr) -{ - SSASetupVertex v; - v.x = ptr[0][0].load(true); - v.y = ptr[0][1].load(true); - v.z = ptr[0][2].load(true); - v.w = ptr[0][3].load(true); - return v; -} diff --git a/tools/drawergen/fixedfunction/setuptrianglecodegen.h b/tools/drawergen/fixedfunction/setuptrianglecodegen.h deleted file mode 100644 index dda7b7667d..0000000000 --- a/tools/drawergen/fixedfunction/setuptrianglecodegen.h +++ /dev/null @@ -1,98 +0,0 @@ -/* -** SetupTriangle code generation -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "drawercodegen.h" - -struct SSASetupVertex -{ - SSAFloat x, y, z, w; -}; - -class SetupTriangleCodegen : public DrawerCodegen -{ -public: - void Generate(bool subsectorTest, SSAValue args, SSAValue thread_data); - -private: - void LoadArgs(SSAValue args, SSAValue thread_data); - SSASetupVertex LoadTriVertex(SSAValue v); - void Setup(); - SSAInt FloatTo28_4(SSAFloat v); - void LoopBlockY(); - void LoopBlockX(); - void LoopFullBlock(); - void LoopPartialBlock(bool isSingleStencilValue); - - void SetStencilBlock(SSAInt block); - void StencilClear(SSAUByte value); - SSAUByte StencilGetSingle(); - SSABool StencilIsSingleValue(); - - bool subsectorTest; - - SSAStack stack_C1, stack_C2, stack_C3; - SSAStack stack_y; - SSAStack stack_subsectorGBuffer; - SSAStack stack_x; - SSAStack stack_buffer; - SSAStack stack_iy, stack_ix; - SSAStack stack_CY1, stack_CY2, stack_CY3; - SSAStack stack_CX1, stack_CX2, stack_CX3; - //SSAStack stack_stencilblock_restored; - //SSAStack stack_stencilblock_lastval; - - SSAIntPtr subsectorGBuffer; - SSAInt pitch; - SSASetupVertex v1; - SSASetupVertex v2; - SSASetupVertex v3; - SSAInt clipright; - SSAInt clipbottom; - - SSAUBytePtr stencilValues; - SSAIntPtr stencilMasks; - SSAInt stencilPitch; - SSAUByte stencilTestValue; - SSAUByte stencilWriteValue; - - SSAWorkerThread thread; - - // Block size, standard 8x8 (must be power of two) - const int q = 8; - - SSAInt Y1, Y2, Y3; - SSAInt X1, X2, X3; - SSAInt DX12, DX23, DX31; - SSAInt DY12, DY23, DY31; - SSAInt FDX12, FDX23, FDX31; - SSAInt FDY12, FDY23, FDY31; - SSAInt minx, maxx, miny, maxy; - SSAInt C1, C2, C3; - - SSAInt x, y; - SSAInt x0, x1, y0, y1; - - SSAUBytePtr StencilBlock; - SSAIntPtr StencilBlockMask; -}; diff --git a/tools/drawergen/llvm_include.h b/tools/drawergen/llvm_include.h deleted file mode 100644 index 323eef0143..0000000000 --- a/tools/drawergen/llvm_include.h +++ /dev/null @@ -1,99 +0,0 @@ -/* -** LLVM includes -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#ifdef _MSC_VER - -#if defined(min) -#define llvm_min_bug min -#undef min -#endif -#if defined(max) -#define llvm_max_bug max -#undef max -#endif - -#pragma warning(disable: 4146) // warning C4146: unary minus operator applied to unsigned type, result still unsigned -#pragma warning(disable: 4624) // warning C4624: 'llvm::AugmentedUse' : destructor could not be generated because a base class destructor is inaccessible -#pragma warning(disable: 4355) // warning C4355: 'this' : used in base member initializer list -#pragma warning(disable: 4800) // warning C4800: 'const unsigned int' : forcing value to bool 'true' or 'false' (performance warning) -#pragma warning(disable: 4996) // warning C4996: 'std::_Copy_impl': Function call with parameters that may be unsafe - this call relies on the caller to check that the passed values are correct. To disable this warning, use -D_Sclan::SECURE_NO_WARNINGS. See documentation on how to use Visual C++ 'Checked Iterators' -#pragma warning(disable: 4244) // warning C4244: 'return' : conversion from 'uint64_t' to 'unsigned int', possible loss of data -#pragma warning(disable: 4141) // warning C4141: 'inline': used more than once -#pragma warning(disable: 4291) // warning C4291: 'void *llvm::User::operator new(std::size_t,unsigned int,unsigned int)': no matching operator delete found; memory will not be freed if initialization throws an exception -#pragma warning(disable: 4267) // warning C4267: 'return': conversion from 'size_t' to 'unsigned int', possible loss of data -#pragma warning(disable: 4244) // warning C4244: 'initializing': conversion from '__int64' to 'unsigned int', possible loss of data - -#endif - -#if defined(__APPLE__) || defined(__clang__) -#define __STDC_LIMIT_MACROS // DataTypes.h:57:3: error: "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h" -#define __STDC_CONSTANT_MACROS // DataTypes.h:61:3: error: "Must #define __STDC_CONSTANT_MACROS before " "#including Support/DataTypes.h" -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wredundant-move" -#endif - -#include -#include -#include -#include -#include -//#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) -#include -#endif - -#if defined(__APPLE__) || defined(__clang__) -#pragma clang diagnostic pop -#endif - -#ifdef _MSC_VER - -#if defined(llvm_min_bug) -#define min llvm_min_bug -#undef llvm_min_bug -#endif -#if defined(llvm_max_bug) -#define max llvm_max_bug -#undef llvm_max_bug -#endif - -#endif diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp deleted file mode 100644 index 7822597020..0000000000 --- a/tools/drawergen/llvmdrawers.cpp +++ /dev/null @@ -1,428 +0,0 @@ -/* -** LLVM code generated drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "timestamp.h" -#include "llvmdrawers.h" -#include "exception.h" - -LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName, const std::string &features, const std::string namePostfix) : mNamePostfix(namePostfix) -{ - mProgram.CreateModule(); - - CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill); - CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd); - CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp); - CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp); - CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp); - CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw); - CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd); - CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded); - CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp); - CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp); - CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp); - CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated); - CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd); - CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated); - CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated); - CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated); - CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); - CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); - CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); - CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); - CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); - CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); - CodegenDrawWall("vlinec1", DrawWallVariant::Opaque); - CodegenDrawWall("mvlinec1", DrawWallVariant::Masked); - CodegenDrawWall("tmvline1_add", DrawWallVariant::Add); - CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp); - CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp); - CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp); - CodegenDrawSky("DrawSky1", DrawSkyVariant::Single); - CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double); - for (int i = 0; i < NumTriBlendModes(); i++) - { - CodegenDrawTriangle("TriDraw8_" + std::to_string(i), (TriBlendMode)i, false, false); - CodegenDrawTriangle("TriDraw32_" + std::to_string(i), (TriBlendMode)i, true, false); - CodegenDrawTriangle("TriFill8_" + std::to_string(i), (TriBlendMode)i, false, true); - CodegenDrawTriangle("TriFill32_" + std::to_string(i), (TriBlendMode)i, true, true); - } - - ObjectFile = mProgram.GenerateObjectFile(triple, cpuName, features); -} - -void LLVMDrawers::CodegenDrawColumn(const char *name, DrawColumnVariant variant) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name + mNamePostfix); - function.add_parameter(GetDrawColumnArgsStruct(mProgram.context())); - function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); - function.create_public(); - - DrawColumnCodegen codegen; - codegen.Generate(variant, function.parameter(0), function.parameter(1)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - throw Exception("verifyFunction failed for CodegenDrawColumn()"); -} - -void LLVMDrawers::CodegenDrawSpan(const char *name, DrawSpanVariant variant) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name + mNamePostfix); - function.add_parameter(GetDrawSpanArgsStruct(mProgram.context())); - function.create_public(); - - DrawSpanCodegen codegen; - codegen.Generate(variant, function.parameter(0)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - throw Exception("verifyFunction failed for CodegenDrawSpan()"); -} - -void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name + mNamePostfix); - function.add_parameter(GetDrawWallArgsStruct(mProgram.context())); - function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); - function.create_public(); - - DrawWallCodegen codegen; - codegen.Generate(variant, function.parameter(0), function.parameter(1)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - throw Exception("verifyFunction failed for CodegenDrawWall()"); -} - -void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name + mNamePostfix); - function.add_parameter(GetDrawSkyArgsStruct(mProgram.context())); - function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); - function.create_public(); - - DrawSkyCodegen codegen; - codegen.Generate(variant, function.parameter(0), function.parameter(1)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - throw Exception("verifyFunction failed for CodegenDrawSky()"); -} - -void LLVMDrawers::CodegenDrawTriangle(const std::string &name, TriBlendMode blendmode, bool truecolor, bool colorfill) -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function(name + mNamePostfix); - function.add_parameter(GetTriDrawTriangleArgs(mProgram.context())); - function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); - function.create_public(); - - DrawTriangleCodegen codegen; - codegen.Generate(blendmode, truecolor, colorfill, function.parameter(0), function.parameter(1)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - throw Exception("verifyFunction failed for CodegenDrawTriangle()"); -} - -llvm::Type *LLVMDrawers::GetDrawColumnArgsStruct(llvm::LLVMContext &context) -{ - if (DrawColumnArgsStruct) - return DrawColumnArgsStruct; - - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source2; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefracx; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureheight; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srccolor; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - DrawColumnArgsStruct = llvm::StructType::create(context, elements, "DrawColumnArgs", false)->getPointerTo(); - return DrawColumnArgsStruct; -} - -llvm::Type *LLVMDrawers::GetDrawSpanArgsStruct(llvm::LLVMContext &context) -{ - if (DrawSpanArgsStruct) - return DrawSpanArgsStruct; - - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - elements.push_back(llvm::Type::getFloatTy(context)); // float viewpos_x; - elements.push_back(llvm::Type::getFloatTy(context)); // float step_viewpos_x; - elements.push_back(GetTriLightStruct(context)); // TriLight *dynlights; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t num_dynlights; - DrawSpanArgsStruct = llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo(); - return DrawSpanArgsStruct; -} - -llvm::Type *LLVMDrawers::GetDrawWallArgsStruct(llvm::LLVMContext &context) -{ - if (DrawWallArgsStruct) - return DrawWallArgsStruct; - - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 8; i++) - elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 25; i++) - elements.push_back(llvm::Type::getInt32Ty(context)); - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - elements.push_back(llvm::Type::getFloatTy(context)); // float z; - elements.push_back(llvm::Type::getFloatTy(context)); // float step_z; - elements.push_back(GetTriLightStruct(context)); // TriLight *dynlights; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t num_dynlights; - - DrawWallArgsStruct = llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo(); - return DrawWallArgsStruct; -} - -llvm::Type *LLVMDrawers::GetDrawSkyArgsStruct(llvm::LLVMContext &context) -{ - if (DrawSkyArgsStruct) - return DrawSkyArgsStruct; - - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 8; i++) - elements.push_back(llvm::Type::getInt8PtrTy(context)); - for (int i = 0; i < 16; i++) - elements.push_back(llvm::Type::getInt32Ty(context)); - DrawSkyArgsStruct = llvm::StructType::create(context, elements, "DrawSkyArgs", false)->getPointerTo(); - return DrawSkyArgsStruct; -} - -llvm::Type *LLVMDrawers::GetWorkerThreadDataStruct(llvm::LLVMContext &context) -{ - if (WorkerThreadDataStruct) - return WorkerThreadDataStruct; - - std::vector elements; - for (int i = 0; i < 4; i++) - elements.push_back(llvm::Type::getInt32Ty(context)); - elements.push_back(llvm::Type::getInt8PtrTy(context)); - elements.push_back(GetTriFullSpanStruct(context)); - elements.push_back(GetTriPartialBlockStruct(context)); - for (int i = 0; i < 4; i++) - elements.push_back(llvm::Type::getInt32Ty(context)); - WorkerThreadDataStruct = llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo(); - return WorkerThreadDataStruct; -} - -llvm::Type *LLVMDrawers::GetTriLightStruct(llvm::LLVMContext &context) -{ - if (TriLightStruct) - return TriLightStruct; - - std::vector elements; - elements.push_back(llvm::Type::getInt32Ty(context)); - for (int i = 0; i < 4; i++) - elements.push_back(llvm::Type::getFloatTy(context)); - TriLightStruct = llvm::StructType::create(context, elements, "TriLight", false)->getPointerTo(); - return TriLightStruct; -} - -llvm::Type *LLVMDrawers::GetTriVertexStruct(llvm::LLVMContext &context) -{ - if (TriVertexStruct) - return TriVertexStruct; - - std::vector elements; - for (int i = 0; i < 4 + TriVertex::NumVarying; i++) - elements.push_back(llvm::Type::getFloatTy(context)); - TriVertexStruct = llvm::StructType::create(context, elements, "TriVertex", false)->getPointerTo(); - return TriVertexStruct; -} - -llvm::Type *LLVMDrawers::GetTriMatrixStruct(llvm::LLVMContext &context) -{ - if (TriMatrixStruct) - return TriMatrixStruct; - - std::vector elements; - for (int i = 0; i < 4 * 4; i++) - elements.push_back(llvm::Type::getFloatTy(context)); - TriMatrixStruct = llvm::StructType::create(context, elements, "TriMatrix", false)->getPointerTo(); - return TriMatrixStruct; -} - -llvm::Type *LLVMDrawers::GetTriUniformsStruct(llvm::LLVMContext &context) -{ - if (TriUniformsStruct) - return TriUniformsStruct; - - std::vector elements; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t subsectorDepth; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getFloatTy(context)); // float globvis; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - elements.push_back(GetTriMatrixStruct(context)); // TriMatrix objectToClip - TriUniformsStruct = llvm::StructType::create(context, elements, "TriUniforms", false)->getPointerTo(); - return TriUniformsStruct; -} - -llvm::Type *LLVMDrawers::GetTriFullSpanStruct(llvm::LLVMContext &context) -{ - if (TriFullSpanStruct) - return TriFullSpanStruct; - - std::vector elements; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t X; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t Y; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t Length; - TriFullSpanStruct = llvm::StructType::create(context, elements, "TriFullSpan", false)->getPointerTo(); - return TriFullSpanStruct; -} - -llvm::Type *LLVMDrawers::GetTriPartialBlockStruct(llvm::LLVMContext &context) -{ - if (TriPartialBlockStruct) - return TriPartialBlockStruct; - - std::vector elements; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t X; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t Y; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t Mask0; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t Mask1; - TriPartialBlockStruct = llvm::StructType::create(context, elements, "TriPartialBlock", false)->getPointerTo(); - return TriPartialBlockStruct; -} - -llvm::Type *LLVMDrawers::GetTriDrawTriangleArgs(llvm::LLVMContext &context) -{ - if (TriDrawTriangleArgs) - return TriDrawTriangleArgs; - - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *dest; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; - elements.push_back(GetTriVertexStruct(context)); // TriVertex *v1; - elements.push_back(GetTriVertexStruct(context)); // TriVertex *v2; - elements.push_back(GetTriVertexStruct(context)); // TriVertex *v3; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipleft; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipright; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t cliptop; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipbottom; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *texturePixels; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureWidth; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureHeight; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *translation; - elements.push_back(GetTriUniformsStruct(context)); // const TriUniforms *uniforms; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *stencilValues; - elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *stencilMasks; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t stencilPitch; - elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilTestValue; - elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilWriteValue; - elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *subsectorGBuffer; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *colormaps; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB256k; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *BaseColors; - TriDrawTriangleArgs = llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); - return TriDrawTriangleArgs; -} diff --git a/tools/drawergen/llvmdrawers.h b/tools/drawergen/llvmdrawers.h deleted file mode 100644 index dd66c2a86c..0000000000 --- a/tools/drawergen/llvmdrawers.h +++ /dev/null @@ -1,84 +0,0 @@ -/* -** LLVM code generated drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "fixedfunction/drawspancodegen.h" -#include "fixedfunction/drawwallcodegen.h" -#include "fixedfunction/drawcolumncodegen.h" -#include "fixedfunction/drawskycodegen.h" -#include "fixedfunction/drawtrianglecodegen.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_scope.h" -#include "ssa/ssa_for_block.h" -#include "ssa/ssa_if_block.h" -#include "ssa/ssa_stack.h" -#include "ssa/ssa_function.h" -#include "ssa/ssa_struct_type.h" -#include "ssa/ssa_value.h" -#include "ssa/ssa_barycentric_weight.h" -#include "llvmprogram.h" -#include - -class LLVMDrawers -{ -public: - LLVMDrawers(const std::string &triple, const std::string &cpuName, const std::string &features, const std::string namePostfix); - - std::vector ObjectFile; - -private: - void CodegenDrawColumn(const char *name, DrawColumnVariant variant); - void CodegenDrawSpan(const char *name, DrawSpanVariant variant); - void CodegenDrawWall(const char *name, DrawWallVariant variant); - void CodegenDrawSky(const char *name, DrawSkyVariant variant); - void CodegenDrawTriangle(const std::string &name, TriBlendMode blendmode, bool truecolor, bool colorfill); - - llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); - llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); - llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); - llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context); - llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); - llvm::Type *GetTriLightStruct(llvm::LLVMContext &context); - llvm::Type *GetTriVertexStruct(llvm::LLVMContext &context); - llvm::Type *GetTriMatrixStruct(llvm::LLVMContext &context); - llvm::Type *GetTriUniformsStruct(llvm::LLVMContext &context); - llvm::Type *GetTriFullSpanStruct(llvm::LLVMContext &context); - llvm::Type *GetTriPartialBlockStruct(llvm::LLVMContext &context); - llvm::Type *GetTriDrawTriangleArgs(llvm::LLVMContext &context); - - llvm::Type *DrawColumnArgsStruct = nullptr; - llvm::Type *DrawSpanArgsStruct = nullptr; - llvm::Type *DrawWallArgsStruct = nullptr; - llvm::Type *DrawSkyArgsStruct = nullptr; - llvm::Type *WorkerThreadDataStruct = nullptr; - llvm::Type *TriLightStruct = nullptr; - llvm::Type *TriVertexStruct = nullptr; - llvm::Type *TriMatrixStruct = nullptr; - llvm::Type *TriUniformsStruct = nullptr; - llvm::Type *TriFullSpanStruct = nullptr; - llvm::Type *TriPartialBlockStruct = nullptr; - llvm::Type *TriDrawTriangleArgs = nullptr; - - LLVMProgram mProgram; - std::string mNamePostfix; -}; diff --git a/tools/drawergen/llvmprogram.cpp b/tools/drawergen/llvmprogram.cpp deleted file mode 100644 index 84094eda63..0000000000 --- a/tools/drawergen/llvmprogram.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* -** LLVM code generated drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "timestamp.h" -#include "llvmprogram.h" - -LLVMProgram::LLVMProgram() -{ - mContext = std::make_unique(); -} - -void LLVMProgram::CreateModule() -{ - mModule = std::make_unique("render", context()); -} - -std::vector LLVMProgram::GenerateObjectFile(const std::string &triple, const std::string &cpuName, const std::string &features) -{ - using namespace llvm; - - std::string errorstring; - - llvm::Module *module = mModule.get(); - - const Target *target = TargetRegistry::lookupTarget(triple, errorstring); - -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) - Reloc::Model relocationModel = Reloc::PIC_; -#else - Optional relocationModel = Reloc::PIC_; -#endif - - CodeModel::Model codeModel = CodeModel::Model::Default; - - TargetOptions options; - options.LessPreciseFPMADOption = true; - options.AllowFPOpFusion = FPOpFusion::Fast; - options.UnsafeFPMath = true; - options.NoInfsFPMath = true; - options.NoNaNsFPMath = true; - options.HonorSignDependentRoundingFPMathOption = false; - options.NoZerosInBSS = false; - options.GuaranteedTailCallOpt = false; - options.StackAlignmentOverride = 0; - options.UseInitArray = true; - options.DataSections = false; - options.FunctionSections = false; - options.JTType = JumpTable::Single; // Create a single table for all jumptable functions - options.ThreadModel = ThreadModel::POSIX; - options.DisableIntegratedAS = false; - options.MCOptions.SanitizeAddress = false; - options.MCOptions.MCRelaxAll = false; // relax all fixups in the emitted object file - options.MCOptions.DwarfVersion = 0; - options.MCOptions.ShowMCInst = false; - options.MCOptions.ABIName = ""; - options.MCOptions.MCFatalWarnings = false; - options.MCOptions.ShowMCEncoding = false; // Show encoding in .s output - options.MCOptions.MCUseDwarfDirectory = false; - options.MCOptions.AsmVerbose = true; - -#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9) - options.Reciprocals = TargetRecip({ "all" }); - options.StackSymbolOrdering = true; - options.UniqueSectionNames = true; - options.EmulatedTLS = false; - options.ExceptionModel = ExceptionHandling::None; - options.EABIVersion = EABI::Default; - options.DebuggerTuning = DebuggerKind::Default; - options.MCOptions.MCIncrementalLinkerCompatible = false; - options.MCOptions.MCNoWarn = false; - options.MCOptions.PreserveAsmComments = true; -#endif - - CodeGenOpt::Level optimizationLevel = CodeGenOpt::Aggressive; - machine = target->createTargetMachine(triple, cpuName, features, options, relocationModel, codeModel, optimizationLevel); - - -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - std::string targetTriple = machine->getTargetTriple(); -#else - std::string targetTriple = machine->getTargetTriple().getTriple(); -#endif - - module->setTargetTriple(targetTriple); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout())); -#else - module->setDataLayout(machine->createDataLayout()); -#endif - - legacy::FunctionPassManager PerFunctionPasses(module); - legacy::PassManager PerModulePasses; - -#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8) - PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); - PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); -#endif - - SmallString<16 * 1024> str; -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - raw_svector_ostream vecstream(str); - formatted_raw_ostream stream(vecstream); -#else - raw_svector_ostream stream(str); -#endif - machine->addPassesToEmitFile(PerModulePasses, stream, TargetMachine::CGFT_ObjectFile); - - PassManagerBuilder passManagerBuilder; - passManagerBuilder.OptLevel = 3; - passManagerBuilder.SizeLevel = 0; - passManagerBuilder.Inliner = createFunctionInliningPass(); - passManagerBuilder.SLPVectorize = true; - passManagerBuilder.LoopVectorize = true; - passManagerBuilder.LoadCombine = true; - passManagerBuilder.populateModulePassManager(PerModulePasses); - passManagerBuilder.populateFunctionPassManager(PerFunctionPasses); - - // Run function passes: - PerFunctionPasses.doInitialization(); - for (llvm::Function &func : *module) - { - if (!func.isDeclaration()) - PerFunctionPasses.run(func); - } - PerFunctionPasses.doFinalization(); - - // Run module passes: - PerModulePasses.run(*module); - - // Return the resulting object file -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - stream.flush(); - vecstream.flush(); -#endif - std::vector data; - data.resize(str.size()); - memcpy(data.data(), str.data(), data.size()); - return data; -} - -std::string LLVMProgram::DumpModule() -{ - std::string str; - llvm::raw_string_ostream stream(str); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8) - mModule->print(stream, nullptr); -#else - mModule->print(stream, nullptr, false, true); -#endif - return stream.str(); -} diff --git a/tools/drawergen/llvmprogram.h b/tools/drawergen/llvmprogram.h deleted file mode 100644 index 30cf33d5ac..0000000000 --- a/tools/drawergen/llvmprogram.h +++ /dev/null @@ -1,41 +0,0 @@ -/* -** LLVM code generated drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -class LLVMProgram -{ -public: - LLVMProgram(); - - void CreateModule(); - std::vector GenerateObjectFile(const std::string &triple, const std::string &cpuName, const std::string &features); - std::string DumpModule(); - - llvm::LLVMContext &context() { return *mContext; } - llvm::Module *module() { return mModule.get(); } - -private: - llvm::TargetMachine *machine = nullptr; - std::unique_ptr mContext; - std::unique_ptr mModule; -}; diff --git a/tools/drawergen/precomp.h b/tools/drawergen/precomp.h deleted file mode 100644 index bb4f818df8..0000000000 --- a/tools/drawergen/precomp.h +++ /dev/null @@ -1,11 +0,0 @@ - -#pragma once - -#include "llvm_include.h" -#include "../../src/swrenderer/drawers/r_drawers.h" - -#ifdef __arm__ -#define ARM_TARGET -#else -#define X86_TARGET -#endif diff --git a/tools/drawergen/ssa/ssa_barycentric_weight.h b/tools/drawergen/ssa/ssa_barycentric_weight.h deleted file mode 100644 index 1a07d6c75a..0000000000 --- a/tools/drawergen/ssa/ssa_barycentric_weight.h +++ /dev/null @@ -1,118 +0,0 @@ -/* -** SSA barycentric weight and viewport calculations -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "ssa_vec4f.h" -#include "ssa_float.h" -#include "ssa_int.h" - -class SSAViewport -{ -public: - SSAViewport(SSAInt x, SSAInt y, SSAInt width, SSAInt height) - : x(x), y(y), width(width), height(height), right(x + width), bottom(y + height), - half_width(SSAFloat(width) * 0.5f), half_height(SSAFloat(height) * 0.5f), - rcp_half_width(1.0f / (SSAFloat(width) * 0.5f)), - rcp_half_height(1.0f / (SSAFloat(height) * 0.5f)) - { - } - - SSAInt x, y; - SSAInt width, height; - SSAInt right, bottom; - SSAFloat half_width; - SSAFloat half_height; - SSAFloat rcp_half_width; - SSAFloat rcp_half_height; - - SSAVec4f clip_to_window(SSAVec4f clip) const - { - SSAFloat w = clip[3]; - SSAVec4f normalized = SSAVec4f::insert_element(clip / SSAVec4f::shuffle(clip, 3, 3, 3, 3), w, 3); - return normalized_to_window(normalized); - } - - SSAVec4f normalized_to_window(SSAVec4f normalized) const - { - return SSAVec4f( - SSAFloat(x) + (normalized[0] + 1.0f) * half_width, - SSAFloat(y) + (normalized[1] + 1.0f) * half_height, - 0.0f - normalized[2], - normalized[3]); - } -}; - -class SSABarycentricWeight -{ -public: - SSABarycentricWeight(SSAViewport vp, SSAVec4f v1, SSAVec4f v2); - SSAFloat from_window_x(SSAInt x) const; - SSAFloat from_window_y(SSAInt y) const; - - SSAViewport viewport; - SSAVec4f v1; - SSAVec4f v2; -}; - -inline SSABarycentricWeight::SSABarycentricWeight(SSAViewport viewport, SSAVec4f v1, SSAVec4f v2) -: viewport(viewport), v1(v1), v2(v2) -{ -} - -inline SSAFloat SSABarycentricWeight::from_window_x(SSAInt x) const -{ -/* SSAFloat xnormalized = (x + 0.5f - viewport.x) * viewport.rcp_half_width - 1.0f; - SSAFloat dx = v2.x-v1.x; - SSAFloat dw = v2.w-v1.w; - SSAFloat a = (v2.x - xnormalized * v2.w) / (dx - xnormalized * dw); - return a;*/ - - SSAFloat xnormalized = (SSAFloat(x) + 0.5f - SSAFloat(viewport.x)) * viewport.rcp_half_width - 1.0f; - SSAFloat dx = v2[0]-v1[0]; - SSAFloat dw = v2[3]-v1[3]; - SSAFloat t = (xnormalized * v1[3] - v1[0]) / (dx - xnormalized * dw); - return 1.0f - t; -} - -inline SSAFloat SSABarycentricWeight::from_window_y(SSAInt y) const -{ -/* SSAFloat ynormalized = (y + 0.5f - viewport.y) * viewport.rcp_half_height - 1.0f; - SSAFloat dy = v2.y-v1.y; - SSAFloat dw = v2.w-v1.w; - SSAFloat a = (v2.y - ynormalized * v2.w) / (dy - ynormalized * dw); - return a;*/ - - SSAFloat ynormalized = (SSAFloat(y) + 0.5f - SSAFloat(viewport.y)) * viewport.rcp_half_height - 1.0f; - SSAFloat dy = v2[1]-v1[1]; - SSAFloat dw = v2[3]-v1[3]; - SSAFloat t = (ynormalized * v1[3] - v1[1]) / (dy - ynormalized * dw); - return 1.0f - t; -} - -/* - y = (v1.y + t * dy) / (v1.w + t * dw) - - y * v1.w + y * t * dw = v1.y + t * dy - y * v1.w - v1.y = t * (dy - y * dw) - t = (y * v1.w - v1.y) / (dy - y * dw) -*/ diff --git a/tools/drawergen/ssa/ssa_bool.cpp b/tools/drawergen/ssa/ssa_bool.cpp deleted file mode 100644 index 6eac90afb7..0000000000 --- a/tools/drawergen/ssa/ssa_bool.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* -** SSA boolean -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_bool.h" -#include "ssa_ubyte.h" -#include "ssa_vec4i.h" -#include "ssa_value.h" -#include "ssa_scope.h" - -SSABool::SSABool() -: v(0) -{ -} - -SSABool::SSABool(bool constant) -: v(0) -{ - v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, constant, false)); -} - -SSABool::SSABool(llvm::Value *v) -: v(v) -{ -} - -llvm::Type *SSABool::llvm_type() -{ - return llvm::Type::getInt1Ty(SSAScope::context()); -} - -SSAInt SSABool::zext_int() -{ - return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint())); -} - -SSAInt SSABool::select(SSAInt a, SSAInt b) -{ - return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint())); -} - -SSAFloat SSABool::select(SSAFloat a, SSAFloat b) -{ - return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint())); -} - -SSAUByte SSABool::select(SSAUByte a, SSAUByte b) -{ - return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint())); -} - -SSAVec4i SSABool::select(SSAVec4i a, SSAVec4i b) -{ - return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint())); -} - -SSABool SSABool::compare_uge(const SSAUByte &a, const SSAUByte &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateICmpUGE(a.v, b.v, SSAScope::hint())); -} - -SSABool operator&&(const SSABool &a, const SSABool &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); -} - -SSABool operator||(const SSABool &a, const SSABool &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); -} - -SSABool operator!(const SSABool &a) -{ - return SSABool::from_llvm(SSAScope::builder().CreateNot(a.v, SSAScope::hint())); -} - -SSABool operator<(const SSAInt &a, const SSAInt &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateICmpSLT(a.v, b.v, SSAScope::hint())); -} - -SSABool operator<=(const SSAInt &a, const SSAInt &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateICmpSLE(a.v, b.v, SSAScope::hint())); -} - -SSABool operator==(const SSAInt &a, const SSAInt &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateICmpEQ(a.v, b.v, SSAScope::hint())); -} - -SSABool operator>=(const SSAInt &a, const SSAInt &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateICmpSGE(a.v, b.v, SSAScope::hint())); -} - -SSABool operator>(const SSAInt &a, const SSAInt &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateICmpSGT(a.v, b.v, SSAScope::hint())); -} - -///////////////////////////////////////////////////////////////////////////// - -SSABool operator<(const SSAUByte &a, const SSAUByte &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateICmpSLT(a.v, b.v, SSAScope::hint())); -} - -SSABool operator<=(const SSAUByte &a, const SSAUByte &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateICmpSLE(a.v, b.v, SSAScope::hint())); -} - -SSABool operator==(const SSAUByte &a, const SSAUByte &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateICmpEQ(a.v, b.v, SSAScope::hint())); -} - -SSABool operator>=(const SSAUByte &a, const SSAUByte &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateICmpSGE(a.v, b.v, SSAScope::hint())); -} - -SSABool operator>(const SSAUByte &a, const SSAUByte &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateICmpSGT(a.v, b.v, SSAScope::hint())); -} - -///////////////////////////////////////////////////////////////////////////// - -SSABool operator<(const SSAFloat &a, const SSAFloat &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLT(a.v, b.v, SSAScope::hint())); -} - -SSABool operator<=(const SSAFloat &a, const SSAFloat &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLE(a.v, b.v, SSAScope::hint())); -} - -SSABool operator==(const SSAFloat &a, const SSAFloat &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateFCmpOEQ(a.v, b.v, SSAScope::hint())); -} - -SSABool operator>=(const SSAFloat &a, const SSAFloat &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGE(a.v, b.v, SSAScope::hint())); -} - -SSABool operator>(const SSAFloat &a, const SSAFloat &b) -{ - return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGT(a.v, b.v, SSAScope::hint())); -} diff --git a/tools/drawergen/ssa/ssa_bool.h b/tools/drawergen/ssa/ssa_bool.h deleted file mode 100644 index fbf5192d36..0000000000 --- a/tools/drawergen/ssa/ssa_bool.h +++ /dev/null @@ -1,75 +0,0 @@ -/* -** SSA boolean -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "ssa_int.h" -#include "ssa_ubyte.h" -#include "ssa_float.h" - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSAVec4i; - -class SSABool -{ -public: - SSABool(); - explicit SSABool(bool constant); - explicit SSABool(llvm::Value *v); - static SSABool from_llvm(llvm::Value *v) { return SSABool(v); } - static llvm::Type *llvm_type(); - - SSAInt zext_int(); - SSAInt select(SSAInt a, SSAInt b); - SSAFloat select(SSAFloat a, SSAFloat b); - SSAUByte select(SSAUByte a, SSAUByte b); - SSAVec4i select(SSAVec4i a, SSAVec4i b); - - static SSABool compare_uge(const SSAUByte &a, const SSAUByte &b); - - llvm::Value *v; -}; - -SSABool operator&&(const SSABool &a, const SSABool &b); -SSABool operator||(const SSABool &a, const SSABool &b); - -SSABool operator!(const SSABool &a); - -SSABool operator<(const SSAInt &a, const SSAInt &b); -SSABool operator<=(const SSAInt &a, const SSAInt &b); -SSABool operator==(const SSAInt &a, const SSAInt &b); -SSABool operator>=(const SSAInt &a, const SSAInt &b); -SSABool operator>(const SSAInt &a, const SSAInt &b); - -SSABool operator<(const SSAUByte &a, const SSAUByte &b); -SSABool operator<=(const SSAUByte &a, const SSAUByte &b); -SSABool operator==(const SSAUByte &a, const SSAUByte &b); -SSABool operator>=(const SSAUByte &a, const SSAUByte &b); -SSABool operator>(const SSAUByte &a, const SSAUByte &b); - -SSABool operator<(const SSAFloat &a, const SSAFloat &b); -SSABool operator<=(const SSAFloat &a, const SSAFloat &b); -SSABool operator==(const SSAFloat &a, const SSAFloat &b); -SSABool operator>=(const SSAFloat &a, const SSAFloat &b); -SSABool operator>(const SSAFloat &a, const SSAFloat &b); diff --git a/tools/drawergen/ssa/ssa_float.cpp b/tools/drawergen/ssa/ssa_float.cpp deleted file mode 100644 index 6c597dc1c4..0000000000 --- a/tools/drawergen/ssa/ssa_float.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/* -** SSA float32 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_float.h" -#include "ssa_int.h" -#include "ssa_scope.h" -#include "ssa_bool.h" -#include "ssa_vec4f.h" - -SSAFloat::SSAFloat() -: v(0) -{ -} - -SSAFloat::SSAFloat(float constant) -: v(0) -{ - v = llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant)); -} - -SSAFloat::SSAFloat(SSAInt i) -: v(0) -{ - v = SSAScope::builder().CreateSIToFP(i.v, llvm::Type::getFloatTy(SSAScope::context()), SSAScope::hint()); -} - -SSAFloat::SSAFloat(llvm::Value *v) -: v(v) -{ -} - -llvm::Type *SSAFloat::llvm_type() -{ - return llvm::Type::getFloatTy(SSAScope::context()); -} - -SSAFloat SSAFloat::rsqrt(SSAFloat f) -{ -#ifdef ARM_TARGET - //return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::aarch64_neon_frsqrts), f.v, SSAScope::hint())); - return SSAFloat(1.0f) / (f * SSAFloat(0.01f)); -#else - llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint()); - return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(f_ss, SSAInt(0).v, SSAScope::hint())); -#endif -} - -SSAFloat SSAFloat::MIN(SSAFloat a, SSAFloat b) -{ - return SSAFloat::from_llvm(SSAScope::builder().CreateSelect((a < b).v, a.v, b.v, SSAScope::hint())); -} - -SSAFloat SSAFloat::MAX(SSAFloat a, SSAFloat b) -{ - return SSAFloat::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint())); -} - -SSAFloat SSAFloat::clamp(SSAFloat a, SSAFloat b, SSAFloat c) -{ - return SSAFloat::MAX(SSAFloat::MIN(a, c), b); -} - -SSAFloat operator+(const SSAFloat &a, const SSAFloat &b) -{ - return SSAFloat::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint())); -} - -SSAFloat operator-(const SSAFloat &a, const SSAFloat &b) -{ - return SSAFloat::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint())); -} - -SSAFloat operator*(const SSAFloat &a, const SSAFloat &b) -{ - return SSAFloat::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint())); -} - -SSAFloat operator/(const SSAFloat &a, const SSAFloat &b) -{ - return SSAFloat::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint())); -} - -SSAFloat operator+(float a, const SSAFloat &b) -{ - return SSAFloat(a) + b; -} - -SSAFloat operator-(float a, const SSAFloat &b) -{ - return SSAFloat(a) - b; -} - -SSAFloat operator*(float a, const SSAFloat &b) -{ - return SSAFloat(a) * b; -} - -SSAFloat operator/(float a, const SSAFloat &b) -{ - return SSAFloat(a) / b; -} - -SSAFloat operator+(const SSAFloat &a, float b) -{ - return a + SSAFloat(b); -} - -SSAFloat operator-(const SSAFloat &a, float b) -{ - return a - SSAFloat(b); -} - -SSAFloat operator*(const SSAFloat &a, float b) -{ - return a * SSAFloat(b); -} - -SSAFloat operator/(const SSAFloat &a, float b) -{ - return a / SSAFloat(b); -} - diff --git a/tools/drawergen/ssa/ssa_float.h b/tools/drawergen/ssa/ssa_float.h deleted file mode 100644 index b3a35486f3..0000000000 --- a/tools/drawergen/ssa/ssa_float.h +++ /dev/null @@ -1,60 +0,0 @@ -/* -** SSA float32 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSAInt; - -class SSAFloat -{ -public: - SSAFloat(); - SSAFloat(SSAInt i); - explicit SSAFloat(float constant); - explicit SSAFloat(llvm::Value *v); - static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); } - static llvm::Type *llvm_type(); - static SSAFloat rsqrt(SSAFloat f); - static SSAFloat MIN(SSAFloat a, SSAFloat b); - static SSAFloat MAX(SSAFloat a, SSAFloat b); - static SSAFloat clamp(SSAFloat a, SSAFloat b, SSAFloat c); - - llvm::Value *v; -}; - -SSAFloat operator+(const SSAFloat &a, const SSAFloat &b); -SSAFloat operator-(const SSAFloat &a, const SSAFloat &b); -SSAFloat operator*(const SSAFloat &a, const SSAFloat &b); -SSAFloat operator/(const SSAFloat &a, const SSAFloat &b); - -SSAFloat operator+(float a, const SSAFloat &b); -SSAFloat operator-(float a, const SSAFloat &b); -SSAFloat operator*(float a, const SSAFloat &b); -SSAFloat operator/(float a, const SSAFloat &b); - -SSAFloat operator+(const SSAFloat &a, float b); -SSAFloat operator-(const SSAFloat &a, float b); -SSAFloat operator*(const SSAFloat &a, float b); -SSAFloat operator/(const SSAFloat &a, float b); diff --git a/tools/drawergen/ssa/ssa_float_ptr.cpp b/tools/drawergen/ssa/ssa_float_ptr.cpp deleted file mode 100644 index 731fbbef8e..0000000000 --- a/tools/drawergen/ssa/ssa_float_ptr.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/* -** SSA float32 pointer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_float_ptr.h" -#include "ssa_scope.h" - -SSAFloatPtr::SSAFloatPtr() -: v(0) -{ -} - -SSAFloatPtr::SSAFloatPtr(llvm::Value *v) -: v(v) -{ -} - -llvm::Type *SSAFloatPtr::llvm_type() -{ - return llvm::Type::getFloatPtrTy(SSAScope::context()); -} - -SSAFloatPtr SSAFloatPtr::operator[](SSAInt index) const -{ - return SSAFloatPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); -} - -SSAFloat SSAFloatPtr::load(bool constantScopeDomain) const -{ - auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - return SSAFloat::from_llvm(loadInst); -} - -SSAVec4f SSAFloatPtr::load_vec4f(bool constantScopeDomain) const -{ - llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16, false, SSAScope::hint()); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - return SSAVec4f::from_llvm(loadInst); -} - -SSAVec4f SSAFloatPtr::load_unaligned_vec4f(bool constantScopeDomain) const -{ - llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 1, false, SSAScope::hint()); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - return SSAVec4f::from_llvm(loadInst); -} - -void SSAFloatPtr::store(const SSAFloat &new_value) -{ - auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} - -void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value) -{ - llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} - -void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value) -{ - llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} diff --git a/tools/drawergen/ssa/ssa_float_ptr.h b/tools/drawergen/ssa/ssa_float_ptr.h deleted file mode 100644 index 66e462539a..0000000000 --- a/tools/drawergen/ssa/ssa_float_ptr.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -** SSA float32 pointer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "ssa_float.h" -#include "ssa_int.h" -#include "ssa_vec4f.h" - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSAFloatPtr -{ -public: - SSAFloatPtr(); - explicit SSAFloatPtr(llvm::Value *v); - static SSAFloatPtr from_llvm(llvm::Value *v) { return SSAFloatPtr(v); } - static llvm::Type *llvm_type(); - SSAFloatPtr operator[](SSAInt index) const; - SSAFloatPtr operator[](int index) const { return (*this)[SSAInt(index)]; } - SSAFloat load(bool constantScopeDomain) const; - SSAVec4f load_vec4f(bool constantScopeDomain) const; - SSAVec4f load_unaligned_vec4f(bool constantScopeDomain) const; - void store(const SSAFloat &new_value); - void store_vec4f(const SSAVec4f &new_value); - void store_unaligned_vec4f(const SSAVec4f &new_value); - - llvm::Value *v; -}; diff --git a/tools/drawergen/ssa/ssa_for_block.cpp b/tools/drawergen/ssa/ssa_for_block.cpp deleted file mode 100644 index 12b2f1fc5f..0000000000 --- a/tools/drawergen/ssa/ssa_for_block.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* -** LLVM for loop branching -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_for_block.h" -#include "ssa_scope.h" - -SSAForBlock::SSAForBlock() -: if_basic_block(0), loop_basic_block(0), end_basic_block(0) -{ - if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forbegin", SSAScope::builder().GetInsertBlock()->getParent()); - loop_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forloop", SSAScope::builder().GetInsertBlock()->getParent()); - end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forend", SSAScope::builder().GetInsertBlock()->getParent()); - SSAScope::builder().CreateBr(if_basic_block); - SSAScope::builder().SetInsertPoint(if_basic_block); -} - -void SSAForBlock::loop_block(SSABool true_condition, int unroll_count) -{ - auto branch = SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block); -#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9) - if (unroll_count > 0) - { - using namespace llvm; - auto md_unroll_enable = MDNode::get(SSAScope::context(), { - MDString::get(SSAScope::context(), "llvm.loop.unroll.enable") - }); - auto md_unroll_count = MDNode::get(SSAScope::context(), { - MDString::get(SSAScope::context(), "llvm.loop.unroll.count"), - ConstantAsMetadata::get(ConstantInt::get(SSAScope::context(), APInt(32, unroll_count))) - }); - auto md_loop = MDNode::getDistinct(SSAScope::context(), { md_unroll_enable, md_unroll_count }); - branch->setMetadata(LLVMContext::MD_loop, md_loop); - } -#endif - SSAScope::builder().SetInsertPoint(loop_basic_block); -} - -void SSAForBlock::end_block() -{ - SSAScope::builder().CreateBr(if_basic_block); - SSAScope::builder().SetInsertPoint(end_basic_block); -} diff --git a/tools/drawergen/ssa/ssa_for_block.h b/tools/drawergen/ssa/ssa_for_block.h deleted file mode 100644 index b65fb1c8bd..0000000000 --- a/tools/drawergen/ssa/ssa_for_block.h +++ /dev/null @@ -1,38 +0,0 @@ -/* -** LLVM for loop branching -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "ssa_bool.h" - -class SSAForBlock -{ -public: - SSAForBlock(); - void loop_block(SSABool true_condition, int unroll_count = 8); - void end_block(); - -private: - llvm::BasicBlock *if_basic_block; - llvm::BasicBlock *loop_basic_block; - llvm::BasicBlock *end_basic_block; -}; diff --git a/tools/drawergen/ssa/ssa_function.cpp b/tools/drawergen/ssa/ssa_function.cpp deleted file mode 100644 index e21b0b2299..0000000000 --- a/tools/drawergen/ssa/ssa_function.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* -** LLVM function -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_function.h" -#include "ssa_int.h" -#include "ssa_scope.h" -#include "ssa_value.h" - -SSAFunction::SSAFunction(const std::string name) -: func(), name(name), return_type(llvm::Type::getVoidTy(SSAScope::context())) -{ -} - -void SSAFunction::set_return_type(llvm::Type *type) -{ - return_type = type; -} - -void SSAFunction::add_parameter(llvm::Type *type) -{ - parameters.push_back(type); -} - -void SSAFunction::create_public() -{ - func = SSAScope::module()->getFunction(name.c_str()); - if (func == 0) - { - llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false); - func = llvm::Function::Create(function_type, llvm::Function::ExternalLinkage, name.c_str(), SSAScope::module()); - //func->setCallingConv(llvm::CallingConv::X86_StdCall); - } - llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func); - SSAScope::builder().SetInsertPoint(entry); -} - -void SSAFunction::create_private() -{ - func = SSAScope::module()->getFunction(name.c_str()); - if (func == 0) - { - llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false); - func = llvm::Function::Create(function_type, llvm::Function::PrivateLinkage, name.c_str(), SSAScope::module()); - func->addFnAttr(llvm::Attribute::AlwaysInline); - } - llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func); - SSAScope::builder().SetInsertPoint(entry); -} - -SSAValue SSAFunction::parameter(int index) -{ - llvm::Function::arg_iterator arg_it = func->arg_begin(); - for (int i = 0; i < index; i++) - ++arg_it; - return SSAValue::from_llvm(static_cast(arg_it)); -} diff --git a/tools/drawergen/ssa/ssa_function.h b/tools/drawergen/ssa/ssa_function.h deleted file mode 100644 index faa8b03fe8..0000000000 --- a/tools/drawergen/ssa/ssa_function.h +++ /dev/null @@ -1,51 +0,0 @@ -/* -** LLVM function -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include -#include - -namespace llvm { class Value; } -namespace llvm { class Type; } -namespace llvm { class Function; } - -class SSAInt; -class SSAValue; - -class SSAFunction -{ -public: - SSAFunction(const std::string name); - void set_return_type(llvm::Type *type); - void add_parameter(llvm::Type *type); - void create_public(); - void create_private(); - SSAValue parameter(int index); - - llvm::Function *func; - -private: - std::string name; - llvm::Type *return_type; - std::vector parameters; -}; diff --git a/tools/drawergen/ssa/ssa_if_block.cpp b/tools/drawergen/ssa/ssa_if_block.cpp deleted file mode 100644 index 068073f983..0000000000 --- a/tools/drawergen/ssa/ssa_if_block.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/* -** LLVM if statement branching -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_if_block.h" -#include "ssa_scope.h" - -SSAIfBlock::SSAIfBlock() -: if_basic_block(0), else_basic_block(0), end_basic_block(0) -{ -} - -void SSAIfBlock::if_block(SSABool true_condition) -{ - if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "if", SSAScope::builder().GetInsertBlock()->getParent()); - else_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "else", SSAScope::builder().GetInsertBlock()->getParent()); - end_basic_block = else_basic_block; - SSAScope::builder().CreateCondBr(true_condition.v, if_basic_block, else_basic_block); - SSAScope::builder().SetInsertPoint(if_basic_block); -} - -void SSAIfBlock::else_block() -{ - end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "end", SSAScope::builder().GetInsertBlock()->getParent()); - SSAScope::builder().CreateBr(end_basic_block); - SSAScope::builder().SetInsertPoint(else_basic_block); -} - -void SSAIfBlock::end_block() -{ - SSAScope::builder().CreateBr(end_basic_block); - SSAScope::builder().SetInsertPoint(end_basic_block); -} - -void SSAIfBlock::end_retvoid() -{ - SSAScope::builder().CreateRetVoid(); - SSAScope::builder().SetInsertPoint(end_basic_block); -} diff --git a/tools/drawergen/ssa/ssa_if_block.h b/tools/drawergen/ssa/ssa_if_block.h deleted file mode 100644 index b958883eb5..0000000000 --- a/tools/drawergen/ssa/ssa_if_block.h +++ /dev/null @@ -1,67 +0,0 @@ -/* -** LLVM if statement branching -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "ssa_bool.h" -#include "ssa_phi.h" - -class SSAIfBlock -{ -public: - SSAIfBlock(); - void if_block(SSABool true_condition); - void else_block(); - void end_block(); - void end_retvoid(); - -private: - llvm::BasicBlock *if_basic_block; - llvm::BasicBlock *else_basic_block; - llvm::BasicBlock *end_basic_block; -}; - -template -T ssa_min(T a, T b) -{ - SSAPhi phi; - SSAIfBlock if_block; - if_block.if_block(a <= b); - phi.add_incoming(a); - if_block.else_block(); - phi.add_incoming(b); - if_block.end_block(); - return phi.create(); -} - -template -T ssa_max(T a, T b) -{ - SSAPhi phi; - SSAIfBlock if_block; - if_block.if_block(a >= b); - phi.add_incoming(a); - if_block.else_block(); - phi.add_incoming(b); - if_block.end_block(); - return phi.create(); -} diff --git a/tools/drawergen/ssa/ssa_int.cpp b/tools/drawergen/ssa/ssa_int.cpp deleted file mode 100644 index 3bee48a7e7..0000000000 --- a/tools/drawergen/ssa/ssa_int.cpp +++ /dev/null @@ -1,208 +0,0 @@ -/* -** SSA int32 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_int.h" -#include "ssa_float.h" -#include "ssa_ubyte.h" -#include "ssa_bool.h" -#include "ssa_scope.h" - -SSAInt::SSAInt() -: v(0) -{ -} - -SSAInt::SSAInt(int constant) -: v(0) -{ - v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true)); -} - -SSAInt::SSAInt(SSAFloat f, bool uint) -: v(0) -{ - if (uint) - v = SSAScope::builder().CreateFPToUI(f.v, llvm::Type::getInt32Ty(SSAScope::context()), SSAScope::hint()); - else - v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt32Ty(SSAScope::context()), SSAScope::hint()); -} - -SSAInt::SSAInt(llvm::Value *v) -: v(v) -{ -} - -llvm::Type *SSAInt::llvm_type() -{ - return llvm::Type::getInt32Ty(SSAScope::context()); -} - -SSAInt SSAInt::MIN(SSAInt a, SSAInt b) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a < b).v, a.v, b.v, SSAScope::hint())); -} - -SSAInt SSAInt::MAX(SSAInt a, SSAInt b) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint())); -} - -SSAInt SSAInt::clamp(SSAInt a, SSAInt b, SSAInt c) -{ - return SSAInt::MAX(SSAInt::MIN(a, c), b); -} - -SSAInt SSAInt::add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateAdd(v, b.v, SSAScope::hint(), no_unsigned_wrap, no_signed_wrap)); -} - -SSAInt SSAInt::ashr(int bits) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateAShr(v, bits, SSAScope::hint())); -} - -SSAUByte SSAInt::trunc_ubyte() -{ - return SSAUByte::from_llvm(SSAScope::builder().CreateTrunc(v, SSAUByte::llvm_type(), SSAScope::hint())); -} - -SSAInt operator+(const SSAInt &a, const SSAInt &b) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); -} - -SSAInt operator-(const SSAInt &a, const SSAInt &b) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); -} - -SSAInt operator*(const SSAInt &a, const SSAInt &b) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); -} - -SSAInt operator/(const SSAInt &a, const SSAInt &b) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); -} - -SSAInt operator%(const SSAInt &a, const SSAInt &b) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateSRem(a.v, b.v, SSAScope::hint())); -} - -SSAInt operator+(int a, const SSAInt &b) -{ - return SSAInt(a) + b; -} - -SSAInt operator-(int a, const SSAInt &b) -{ - return SSAInt(a) - b; -} - -SSAInt operator*(int a, const SSAInt &b) -{ - return SSAInt(a) * b; -} - -SSAInt operator/(int a, const SSAInt &b) -{ - return SSAInt(a) / b; -} - -SSAInt operator%(int a, const SSAInt &b) -{ - return SSAInt(a) % b; -} - -SSAInt operator+(const SSAInt &a, int b) -{ - return a + SSAInt(b); -} - -SSAInt operator-(const SSAInt &a, int b) -{ - return a - SSAInt(b); -} - -SSAInt operator*(const SSAInt &a, int b) -{ - return a * SSAInt(b); -} - -SSAInt operator/(const SSAInt &a, int b) -{ - return a / SSAInt(b); -} - -SSAInt operator%(const SSAInt &a, int b) -{ - return a % SSAInt(b); -} - -SSAInt operator<<(const SSAInt &a, int bits) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); -} - -SSAInt operator>>(const SSAInt &a, int bits) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); -} - -SSAInt operator<<(const SSAInt &a, const SSAInt &bits) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits.v, SSAScope::hint())); -} - -SSAInt operator>>(const SSAInt &a, const SSAInt &bits) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits.v, SSAScope::hint())); -} - -SSAInt operator&(const SSAInt &a, int b) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateAnd(a.v, b, SSAScope::hint())); -} - -SSAInt operator&(const SSAInt &a, const SSAInt &b) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); -} - -SSAInt operator|(const SSAInt &a, int b) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateOr(a.v, b, SSAScope::hint())); -} - -SSAInt operator|(const SSAInt &a, const SSAInt &b) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); -} - -SSAInt operator~(const SSAInt &a) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateNot(a.v, SSAScope::hint())); -} diff --git a/tools/drawergen/ssa/ssa_int.h b/tools/drawergen/ssa/ssa_int.h deleted file mode 100644 index dab8adcb99..0000000000 --- a/tools/drawergen/ssa/ssa_int.h +++ /dev/null @@ -1,80 +0,0 @@ -/* -** SSA int32 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSAFloat; -class SSAUByte; - -class SSAInt -{ -public: - SSAInt(); - explicit SSAInt(int constant); - SSAInt(SSAFloat f, bool uint); - explicit SSAInt(llvm::Value *v); - static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); } - static llvm::Type *llvm_type(); - - static SSAInt MIN(SSAInt a, SSAInt b); - static SSAInt MAX(SSAInt a, SSAInt b); - static SSAInt clamp(SSAInt a, SSAInt b, SSAInt c); - - SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap); - SSAInt ashr(int bits); - - SSAUByte trunc_ubyte(); - - llvm::Value *v; -}; - -SSAInt operator+(const SSAInt &a, const SSAInt &b); -SSAInt operator-(const SSAInt &a, const SSAInt &b); -SSAInt operator*(const SSAInt &a, const SSAInt &b); -SSAInt operator/(const SSAInt &a, const SSAInt &b); -SSAInt operator%(const SSAInt &a, const SSAInt &b); - -SSAInt operator+(int a, const SSAInt &b); -SSAInt operator-(int a, const SSAInt &b); -SSAInt operator*(int a, const SSAInt &b); -SSAInt operator/(int a, const SSAInt &b); -SSAInt operator%(int a, const SSAInt &b); - -SSAInt operator+(const SSAInt &a, int b); -SSAInt operator-(const SSAInt &a, int b); -SSAInt operator*(const SSAInt &a, int b); -SSAInt operator/(const SSAInt &a, int b); -SSAInt operator%(const SSAInt &a, int b); - -SSAInt operator<<(const SSAInt &a, int bits); -SSAInt operator>>(const SSAInt &a, int bits); -SSAInt operator<<(const SSAInt &a, const SSAInt &bits); -SSAInt operator>>(const SSAInt &a, const SSAInt &bits); - -SSAInt operator&(const SSAInt &a, int b); -SSAInt operator&(const SSAInt &a, const SSAInt &b); -SSAInt operator|(const SSAInt &a, int b); -SSAInt operator|(const SSAInt &a, const SSAInt &b); -SSAInt operator~(const SSAInt &a); diff --git a/tools/drawergen/ssa/ssa_int_ptr.cpp b/tools/drawergen/ssa/ssa_int_ptr.cpp deleted file mode 100644 index 5f60f73589..0000000000 --- a/tools/drawergen/ssa/ssa_int_ptr.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* -** SSA int32 pointer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_int_ptr.h" -#include "ssa_scope.h" -#include "ssa_bool.h" - -SSAIntPtr::SSAIntPtr() -: v(0) -{ -} - -SSAIntPtr::SSAIntPtr(llvm::Value *v) -: v(v) -{ -} - -llvm::Type *SSAIntPtr::llvm_type() -{ - return llvm::Type::getInt32PtrTy(SSAScope::context()); -} - -SSAIntPtr SSAIntPtr::operator[](SSAInt index) const -{ - return SSAIntPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); -} - -SSAInt SSAIntPtr::load(bool constantScopeDomain) const -{ - auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - return SSAInt::from_llvm(loadInst); -} - -SSAVec4i SSAIntPtr::load_vec4i(bool constantScopeDomain) const -{ - llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16, false, SSAScope::hint()); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - return SSAVec4i::from_llvm(loadInst); -} - -SSAVec4i SSAIntPtr::load_unaligned_vec4i(bool constantScopeDomain) const -{ - llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 1, false, SSAScope::hint()); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - return SSAVec4i::from_llvm(loadInst); -} - -void SSAIntPtr::store(const SSAInt &new_value) -{ - auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} - -void SSAIntPtr::store_vec4i(const SSAVec4i &new_value) -{ - llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} - -void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value) -{ - llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} - -void SSAIntPtr::store_masked_vec4i(const SSAVec4i &new_value, SSABool mask[4]) -{ - // Create mask vector - std::vector maskconstants; - maskconstants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, 0, false))); - llvm::Value *maskValue = llvm::ConstantVector::get(maskconstants); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) - for (int i = 0; i < 4; i++) - maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, SSAInt(i).v, SSAScope::hint()); -#else - for (int i = 0; i < 4; i++) - maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, (uint64_t)i, SSAScope::hint()); -#endif - - llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - auto inst = SSAScope::builder().CreateMaskedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 1, maskValue); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} diff --git a/tools/drawergen/ssa/ssa_int_ptr.h b/tools/drawergen/ssa/ssa_int_ptr.h deleted file mode 100644 index da6ecf168b..0000000000 --- a/tools/drawergen/ssa/ssa_int_ptr.h +++ /dev/null @@ -1,52 +0,0 @@ -/* -** SSA int32 pointer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "ssa_float.h" -#include "ssa_int.h" -#include "ssa_vec4i.h" - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSABool; - -class SSAIntPtr -{ -public: - SSAIntPtr(); - explicit SSAIntPtr(llvm::Value *v); - static SSAIntPtr from_llvm(llvm::Value *v) { return SSAIntPtr(v); } - static llvm::Type *llvm_type(); - SSAIntPtr operator[](SSAInt index) const; - SSAIntPtr operator[](int index) const { return (*this)[SSAInt(index)]; } - SSAInt load(bool constantScopeDomain) const; - SSAVec4i load_vec4i(bool constantScopeDomain) const; - SSAVec4i load_unaligned_vec4i(bool constantScopeDomain) const; - void store(const SSAInt &new_value); - void store_vec4i(const SSAVec4i &new_value); - void store_unaligned_vec4i(const SSAVec4i &new_value); - void store_masked_vec4i(const SSAVec4i &new_value, SSABool mask[4]); - - llvm::Value *v; -}; diff --git a/tools/drawergen/ssa/ssa_phi.h b/tools/drawergen/ssa/ssa_phi.h deleted file mode 100644 index 66f233b6df..0000000000 --- a/tools/drawergen/ssa/ssa_phi.h +++ /dev/null @@ -1,54 +0,0 @@ -/* -** SSA phi node -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "ssa_scope.h" - -class SSAIfBlock; - -template -class SSAPhi -{ -public: - void add_incoming(SSAVariable var) - { - incoming.push_back(Incoming(var.v, SSAScope::builder().GetInsertBlock())); - } - - SSAVariable create() - { - llvm::PHINode *phi_node = SSAScope::builder().CreatePHI(SSAVariable::llvm_type(), (unsigned int)incoming.size(), SSAScope::hint()); - for (size_t i = 0; i < incoming.size(); i++) - phi_node->addIncoming(incoming[i].v, incoming[i].bb); - return SSAVariable::from_llvm(phi_node); - } - -private: - struct Incoming - { - Incoming(llvm::Value *v, llvm::BasicBlock *bb) : v(v), bb(bb) { } - llvm::Value *v; - llvm::BasicBlock *bb; - }; - std::vector incoming; -}; diff --git a/tools/drawergen/ssa/ssa_scope.cpp b/tools/drawergen/ssa/ssa_scope.cpp deleted file mode 100644 index 6f081a74ad..0000000000 --- a/tools/drawergen/ssa/ssa_scope.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/* -** SSA scope data -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_scope.h" -#include "ssa_int.h" - -SSAScope::SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder) -: _context(context), _module(module), _builder(builder) -{ - instance = this; - - _constant_scope_domain = llvm::MDNode::get(SSAScope::context(), { llvm::MDString::get(SSAScope::context(), "ConstantScopeDomain") }); - _constant_scope = llvm::MDNode::getDistinct(SSAScope::context(), { _constant_scope_domain }); - _constant_scope_list = llvm::MDNode::get(SSAScope::context(), { _constant_scope }); -} - -SSAScope::~SSAScope() -{ - instance = 0; -} - -llvm::LLVMContext &SSAScope::context() -{ - return *instance->_context; -} - -llvm::Module *SSAScope::module() -{ - return instance->_module; -} - -llvm::IRBuilder<> &SSAScope::builder() -{ - return *instance->_builder; -} - -llvm::Function *SSAScope::intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types) -{ - llvm::Function *func = module()->getFunction(llvm::Intrinsic::getName(id, parameter_types)); - if (func == 0) - func = llvm::Function::Create(llvm::Intrinsic::getType(context(), id, parameter_types), llvm::Function::ExternalLinkage, llvm::Intrinsic::getName(id, parameter_types), module()); - return func; -} - -llvm::Value *SSAScope::alloc_stack(llvm::Type *type) -{ - return alloc_stack(type, SSAInt(1)); -} - -llvm::Value *SSAScope::alloc_stack(llvm::Type *type, SSAInt size) -{ - // Allocas must be created at top of entry block for the PromoteMemoryToRegisterPass to work - llvm::BasicBlock &entry = SSAScope::builder().GetInsertBlock()->getParent()->getEntryBlock(); - llvm::IRBuilder<> alloca_builder(&entry, entry.begin()); - return alloca_builder.CreateAlloca(type, size.v, hint()); -} - -llvm::MDNode *SSAScope::constant_scope_list() -{ - return instance->_constant_scope_list; -} - -const std::string &SSAScope::hint() -{ - return instance->_hint; -} - -void SSAScope::set_hint(const std::string &new_hint) -{ - if (new_hint.empty()) - instance->_hint = "tmp"; - else - instance->_hint = new_hint; -} - -SSAScope *SSAScope::instance = 0; diff --git a/tools/drawergen/ssa/ssa_scope.h b/tools/drawergen/ssa/ssa_scope.h deleted file mode 100644 index 0b0228558f..0000000000 --- a/tools/drawergen/ssa/ssa_scope.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -** SSA scope data -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -class SSAInt; - -class SSAScope -{ -public: - SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder); - ~SSAScope(); - static llvm::LLVMContext &context(); - static llvm::Module *module(); - static llvm::IRBuilder<> &builder(); - static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types = llvm::ArrayRef()); - static llvm::Value *alloc_stack(llvm::Type *type); - static llvm::Value *alloc_stack(llvm::Type *type, SSAInt size); - static llvm::MDNode *constant_scope_list(); - static const std::string &hint(); - static void set_hint(const std::string &hint); - -private: - static SSAScope *instance; - llvm::LLVMContext *_context; - llvm::Module *_module; - llvm::IRBuilder<> *_builder; - llvm::MDNode *_constant_scope_domain; - llvm::MDNode *_constant_scope; - llvm::MDNode *_constant_scope_list; - std::string _hint; -}; - -class SSAScopeHint -{ -public: - SSAScopeHint() : old_hint(SSAScope::hint()) { } - SSAScopeHint(const std::string &hint) : old_hint(SSAScope::hint()) { SSAScope::set_hint(hint); } - ~SSAScopeHint() { SSAScope::set_hint(old_hint); } - void set(const std::string &hint) { SSAScope::set_hint(hint); } - void clear() { SSAScope::set_hint(old_hint); } - -private: - std::string old_hint; -}; diff --git a/tools/drawergen/ssa/ssa_short.cpp b/tools/drawergen/ssa/ssa_short.cpp deleted file mode 100644 index ed9d90dd01..0000000000 --- a/tools/drawergen/ssa/ssa_short.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/* -** SSA int16 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_short.h" -#include "ssa_float.h" -#include "ssa_int.h" -#include "ssa_scope.h" - -SSAShort::SSAShort() -: v(0) -{ -} - -SSAShort::SSAShort(int constant) -: v(0) -{ - v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant, true)); -} - -SSAShort::SSAShort(SSAFloat f) -: v(0) -{ - v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt16Ty(SSAScope::context()), SSAScope::hint()); -} - -SSAShort::SSAShort(llvm::Value *v) -: v(v) -{ -} - -llvm::Type *SSAShort::llvm_type() -{ - return llvm::Type::getInt16Ty(SSAScope::context()); -} - -SSAInt SSAShort::zext_int() -{ - return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint())); -} - -SSAShort operator+(const SSAShort &a, const SSAShort &b) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); -} - -SSAShort operator-(const SSAShort &a, const SSAShort &b) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); -} - -SSAShort operator*(const SSAShort &a, const SSAShort &b) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); -} - -SSAShort operator/(const SSAShort &a, const SSAShort &b) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); -} - -SSAShort operator%(const SSAShort &a, const SSAShort &b) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateSRem(a.v, b.v, SSAScope::hint())); -} - -SSAShort operator+(int a, const SSAShort &b) -{ - return SSAShort(a) + b; -} - -SSAShort operator-(int a, const SSAShort &b) -{ - return SSAShort(a) - b; -} - -SSAShort operator*(int a, const SSAShort &b) -{ - return SSAShort(a) * b; -} - -SSAShort operator/(int a, const SSAShort &b) -{ - return SSAShort(a) / b; -} - -SSAShort operator%(int a, const SSAShort &b) -{ - return SSAShort(a) % b; -} - -SSAShort operator+(const SSAShort &a, int b) -{ - return a + SSAShort(b); -} - -SSAShort operator-(const SSAShort &a, int b) -{ - return a - SSAShort(b); -} - -SSAShort operator*(const SSAShort &a, int b) -{ - return a * SSAShort(b); -} - -SSAShort operator/(const SSAShort &a, int b) -{ - return a / SSAShort(b); -} - -SSAShort operator%(const SSAShort &a, int b) -{ - return a % SSAShort(b); -} - -SSAShort operator<<(const SSAShort &a, int bits) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); -} - -SSAShort operator>>(const SSAShort &a, int bits) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); -} - -SSAShort operator<<(const SSAShort &a, const SSAInt &bits) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateShl(a.v, bits.v, SSAScope::hint())); -} - -SSAShort operator>>(const SSAShort &a, const SSAInt &bits) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateLShr(a.v, bits.v, SSAScope::hint())); -} - -SSAShort operator&(const SSAShort &a, int b) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateAnd(a.v, b, SSAScope::hint())); -} - -SSAShort operator&(const SSAShort &a, const SSAShort &b) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); -} - -SSAShort operator|(const SSAShort &a, int b) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateOr(a.v, b, SSAScope::hint())); -} - -SSAShort operator|(const SSAShort &a, const SSAShort &b) -{ - return SSAShort::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); -} diff --git a/tools/drawergen/ssa/ssa_short.h b/tools/drawergen/ssa/ssa_short.h deleted file mode 100644 index efb782b85e..0000000000 --- a/tools/drawergen/ssa/ssa_short.h +++ /dev/null @@ -1,72 +0,0 @@ -/* -** SSA int16 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSAFloat; -class SSAInt; - -class SSAShort -{ -public: - SSAShort(); - explicit SSAShort(int constant); - SSAShort(SSAFloat f); - explicit SSAShort(llvm::Value *v); - static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); } - static llvm::Type *llvm_type(); - - SSAInt zext_int(); - - llvm::Value *v; -}; - -SSAShort operator+(const SSAShort &a, const SSAShort &b); -SSAShort operator-(const SSAShort &a, const SSAShort &b); -SSAShort operator*(const SSAShort &a, const SSAShort &b); -SSAShort operator/(const SSAShort &a, const SSAShort &b); -SSAShort operator%(const SSAShort &a, const SSAShort &b); - -SSAShort operator+(int a, const SSAShort &b); -SSAShort operator-(int a, const SSAShort &b); -SSAShort operator*(int a, const SSAShort &b); -SSAShort operator/(int a, const SSAShort &b); -SSAShort operator%(int a, const SSAShort &b); - -SSAShort operator+(const SSAShort &a, int b); -SSAShort operator-(const SSAShort &a, int b); -SSAShort operator*(const SSAShort &a, int b); -SSAShort operator/(const SSAShort &a, int b); -SSAShort operator%(const SSAShort &a, int b); - -SSAShort operator<<(const SSAShort &a, int bits); -SSAShort operator>>(const SSAShort &a, int bits); -SSAShort operator<<(const SSAShort &a, const SSAInt &bits); -SSAShort operator>>(const SSAShort &a, const SSAInt &bits); - -SSAShort operator&(const SSAShort &a, int b); -SSAShort operator&(const SSAShort &a, const SSAShort &b); -SSAShort operator|(const SSAShort &a, int b); -SSAShort operator|(const SSAShort &a, const SSAShort &b); diff --git a/tools/drawergen/ssa/ssa_stack.h b/tools/drawergen/ssa/ssa_stack.h deleted file mode 100644 index 24c807a8a5..0000000000 --- a/tools/drawergen/ssa/ssa_stack.h +++ /dev/null @@ -1,48 +0,0 @@ -/* -** LLVM stack variable -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "ssa_scope.h" - -template -class SSAStack -{ -public: - SSAStack() - : v(0) - { - v = SSAScope::alloc_stack(SSAVariable::llvm_type()); - } - - SSAVariable load() const - { - return SSAVariable::from_llvm(SSAScope::builder().CreateLoad(v, SSAScope::hint())); - } - - void store(const SSAVariable &new_value) - { - SSAScope::builder().CreateStore(new_value.v, v); - } - - llvm::Value *v; -}; diff --git a/tools/drawergen/ssa/ssa_struct_type.cpp b/tools/drawergen/ssa/ssa_struct_type.cpp deleted file mode 100644 index 9e11154edc..0000000000 --- a/tools/drawergen/ssa/ssa_struct_type.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* -** LLVM struct -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_struct_type.h" -#include "ssa_scope.h" - -void SSAStructType::add_parameter(llvm::Type *type) -{ - elements.push_back(type); -} - -llvm::Type *SSAStructType::llvm_type() -{ - return llvm::StructType::get(SSAScope::context(), elements, false); -} - -llvm::Type *SSAStructType::llvm_type_packed() -{ - return llvm::StructType::get(SSAScope::context(), elements, true); -} diff --git a/tools/drawergen/ssa/ssa_struct_type.h b/tools/drawergen/ssa/ssa_struct_type.h deleted file mode 100644 index f21dc92c30..0000000000 --- a/tools/drawergen/ssa/ssa_struct_type.h +++ /dev/null @@ -1,38 +0,0 @@ -/* -** LLVM struct -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include - -namespace llvm { class Type; } - -class SSAStructType -{ -public: - void add_parameter(llvm::Type *type); - llvm::Type *llvm_type(); - llvm::Type *llvm_type_packed(); - -private: - std::vector elements; -}; diff --git a/tools/drawergen/ssa/ssa_ubyte.cpp b/tools/drawergen/ssa/ssa_ubyte.cpp deleted file mode 100644 index 2ca75d036c..0000000000 --- a/tools/drawergen/ssa/ssa_ubyte.cpp +++ /dev/null @@ -1,122 +0,0 @@ -/* -** SSA uint8 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_ubyte.h" -#include "ssa_int.h" -#include "ssa_scope.h" - -SSAUByte::SSAUByte() -: v(0) -{ -} - -SSAUByte::SSAUByte(unsigned char constant) -: v(0) -{ - v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false)); -} - -SSAUByte::SSAUByte(llvm::Value *v) -: v(v) -{ -} - -llvm::Type *SSAUByte::llvm_type() -{ - return llvm::Type::getInt8Ty(SSAScope::context()); -} - -SSAInt SSAUByte::zext_int() -{ - return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint())); -} - -SSAUByte operator+(const SSAUByte &a, const SSAUByte &b) -{ - return SSAUByte::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); -} - -SSAUByte operator-(const SSAUByte &a, const SSAUByte &b) -{ - return SSAUByte::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); -} - -SSAUByte operator*(const SSAUByte &a, const SSAUByte &b) -{ - return SSAUByte::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); -} -/* -SSAUByte operator/(const SSAUByte &a, const SSAUByte &b) -{ - return SSAScope::builder().CreateDiv(a.v, b.v); -} -*/ -SSAUByte operator+(unsigned char a, const SSAUByte &b) -{ - return SSAUByte(a) + b; -} - -SSAUByte operator-(unsigned char a, const SSAUByte &b) -{ - return SSAUByte(a) - b; -} - -SSAUByte operator*(unsigned char a, const SSAUByte &b) -{ - return SSAUByte(a) * b; -} -/* -SSAUByte operator/(unsigned char a, const SSAUByte &b) -{ - return SSAUByte(a) / b; -} -*/ -SSAUByte operator+(const SSAUByte &a, unsigned char b) -{ - return a + SSAUByte(b); -} - -SSAUByte operator-(const SSAUByte &a, unsigned char b) -{ - return a - SSAUByte(b); -} - -SSAUByte operator*(const SSAUByte &a, unsigned char b) -{ - return a * SSAUByte(b); -} -/* -SSAUByte operator/(const SSAUByte &a, unsigned char b) -{ - return a / SSAUByte(b); -} -*/ -SSAUByte operator<<(const SSAUByte &a, unsigned char bits) -{ - return SSAUByte::from_llvm(SSAScope::builder().CreateShl(a.v, bits)); -} - -SSAUByte operator>>(const SSAUByte &a, unsigned char bits) -{ - return SSAUByte::from_llvm(SSAScope::builder().CreateLShr(a.v, bits)); -} diff --git a/tools/drawergen/ssa/ssa_ubyte.h b/tools/drawergen/ssa/ssa_ubyte.h deleted file mode 100644 index ef1390162d..0000000000 --- a/tools/drawergen/ssa/ssa_ubyte.h +++ /dev/null @@ -1,60 +0,0 @@ -/* -** SSA uint8 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSAInt; - -class SSAUByte -{ -public: - SSAUByte(); - explicit SSAUByte(unsigned char constant); - explicit SSAUByte(llvm::Value *v); - static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); } - static llvm::Type *llvm_type(); - - SSAInt zext_int(); - - llvm::Value *v; -}; - -SSAUByte operator+(const SSAUByte &a, const SSAUByte &b); -SSAUByte operator-(const SSAUByte &a, const SSAUByte &b); -SSAUByte operator*(const SSAUByte &a, const SSAUByte &b); -//SSAUByte operator/(const SSAUByte &a, const SSAUByte &b); - -SSAUByte operator+(unsigned char a, const SSAUByte &b); -SSAUByte operator-(unsigned char a, const SSAUByte &b); -SSAUByte operator*(unsigned char a, const SSAUByte &b); -//SSAUByte operator/(unsigned char a, const SSAUByte &b); - -SSAUByte operator+(const SSAUByte &a, unsigned char b); -SSAUByte operator-(const SSAUByte &a, unsigned char b); -SSAUByte operator*(const SSAUByte &a, unsigned char b); -//SSAUByte operator/(const SSAUByte &a, unsigned char b); - -SSAUByte operator<<(const SSAUByte &a, unsigned char bits); -SSAUByte operator>>(const SSAUByte &a, unsigned char bits); diff --git a/tools/drawergen/ssa/ssa_ubyte_ptr.cpp b/tools/drawergen/ssa/ssa_ubyte_ptr.cpp deleted file mode 100644 index bde0b5b643..0000000000 --- a/tools/drawergen/ssa/ssa_ubyte_ptr.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* -** SSA uint8 pointer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_ubyte_ptr.h" -#include "ssa_scope.h" -#include "ssa_bool.h" - -SSAUBytePtr::SSAUBytePtr() -: v(0) -{ -} - -SSAUBytePtr::SSAUBytePtr(llvm::Value *v) -: v(v) -{ -} - -llvm::Type *SSAUBytePtr::llvm_type() -{ - return llvm::Type::getInt8PtrTy(SSAScope::context()); -} - -SSAUBytePtr SSAUBytePtr::operator[](SSAInt index) const -{ - return SSAUBytePtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); -} - -SSAUByte SSAUBytePtr::load(bool constantScopeDomain) const -{ - auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - return SSAUByte::from_llvm(loadInst); -} - -SSAVec4i SSAUBytePtr::load_vec4ub(bool constantScopeDomain) const -{ - auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint()); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - SSAInt i32 = SSAInt::from_llvm(loadInst); - return SSAVec4i::unpack(i32); -} - -SSAVec16ub SSAUBytePtr::load_vec16ub(bool constantScopeDomain) const -{ - llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16, false, SSAScope::hint()); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - return SSAVec16ub::from_llvm(loadInst); -} - -SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub(bool constantScopeDomain) const -{ - llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 1, false, SSAScope::hint()); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - return SSAVec16ub::from_llvm(loadInst); -} - -void SSAUBytePtr::store(const SSAUByte &new_value) -{ - auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} - -void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) -{ - // Store using saturate: - SSAVec8s v8s(new_value, new_value); - SSAVec16ub v16ub(v8s, v8s); - - llvm::Type *m16xint8type = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16); - llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); - std::vector constants; - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 1))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 2))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3))); - llvm::Value *mask = llvm::ConstantVector::get(constants); - llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), mask, SSAScope::hint()); - llvm::StoreInst *inst = SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} - -void SSAUBytePtr::store_masked_vec4ub(const SSAVec4i &new_value, SSABool mask[4]) -{ - // Store using saturate: - SSAVec8s v8s(new_value, new_value); - SSAVec16ub v16ub(v8s, v8s); - - // Create mask vector - std::vector maskconstants; - maskconstants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, 0, false))); - llvm::Value *maskValue = llvm::ConstantVector::get(maskconstants); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) - for (int i = 0; i < 4; i++) - maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, SSAInt(i).v, SSAScope::hint()); -#else - for (int i = 0; i < 4; i++) - maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, (uint64_t)i, SSAScope::hint()); -#endif - - llvm::Type *m16xint8type = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16); - llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); - std::vector constants; - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 1))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 2))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3))); - llvm::Value *shufflemask = llvm::ConstantVector::get(constants); - llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), shufflemask, SSAScope::hint()); - llvm::CallInst *inst = SSAScope::builder().CreateMaskedStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), 1, maskValue); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} - -void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) -{ - llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); - - // The following generates _mm_stream_si128, maybe! - // llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1)); - // inst->setMetadata(SSAScope::module()->getMDKindID("nontemporal"), node); -} - -void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value) -{ - llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} - -void SSAUBytePtr::store_masked_vec16ub(const SSAVec16ub &new_value, SSABool mask[4]) -{ - std::vector constants; - constants.resize(16, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, 0, false))); - llvm::Value *maskValue = llvm::ConstantVector::get(constants); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) - for (int i = 0; i < 16; i++) - maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i / 4].v, SSAInt(i).v, SSAScope::hint()); -#else - for (int i = 0; i < 16; i++) - maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i / 4].v, (uint64_t)i, SSAScope::hint()); -#endif - - llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - llvm::CallInst *inst = SSAScope::builder().CreateMaskedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 1, maskValue); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} diff --git a/tools/drawergen/ssa/ssa_ubyte_ptr.h b/tools/drawergen/ssa/ssa_ubyte_ptr.h deleted file mode 100644 index ba4fb5397e..0000000000 --- a/tools/drawergen/ssa/ssa_ubyte_ptr.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -** SSA uint8 pointer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "ssa_ubyte.h" -#include "ssa_int.h" -#include "ssa_vec4i.h" -#include "ssa_vec8s.h" -#include "ssa_vec16ub.h" - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSABool; - -class SSAUBytePtr -{ -public: - SSAUBytePtr(); - explicit SSAUBytePtr(llvm::Value *v); - static SSAUBytePtr from_llvm(llvm::Value *v) { return SSAUBytePtr(v); } - static llvm::Type *llvm_type(); - SSAUBytePtr operator[](SSAInt index) const; - SSAUBytePtr operator[](int index) const { return (*this)[SSAInt(index)]; } - SSAUByte load(bool constantScopeDomain) const; - SSAVec4i load_vec4ub(bool constantScopeDomain) const; - SSAVec16ub load_vec16ub(bool constantScopeDomain) const; - SSAVec16ub load_unaligned_vec16ub(bool constantScopeDomain) const; - void store(const SSAUByte &new_value); - void store_vec4ub(const SSAVec4i &new_value); - void store_masked_vec4ub(const SSAVec4i &new_value, SSABool mask[4]); - void store_vec16ub(const SSAVec16ub &new_value); - void store_unaligned_vec16ub(const SSAVec16ub &new_value); - void store_masked_vec16ub(const SSAVec16ub &new_value, SSABool mask[4]); - - llvm::Value *v; -}; diff --git a/tools/drawergen/ssa/ssa_value.cpp b/tools/drawergen/ssa/ssa_value.cpp deleted file mode 100644 index 881b85ad37..0000000000 --- a/tools/drawergen/ssa/ssa_value.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* -** SSA value -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_value.h" -#include "ssa_int.h" -#include "ssa_scope.h" - -SSAValue SSAValue::load(bool constantScopeDomain) -{ - auto loadInst = SSAScope::builder().CreateLoad(v, false); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - return SSAValue::from_llvm(loadInst); -} - -void SSAValue::store(llvm::Value *value) -{ - auto inst = SSAScope::builder().CreateStore(value, v, false); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} - -SSAIndexLookup SSAValue::operator[](int index) -{ - SSAIndexLookup result; - result.v = v; - result.indexes.push_back(SSAInt(index).v); - return result; -} - -SSAIndexLookup SSAValue::operator[](SSAInt index) -{ - SSAIndexLookup result; - result.v = v; - result.indexes.push_back(index.v); - return result; -} - -///////////////////////////////////////////////////////////////////////////// - -SSAIndexLookup::operator SSAValue() -{ - return SSAValue::from_llvm(SSAScope::builder().CreateGEP(v, indexes)); -} - -SSAIndexLookup SSAIndexLookup::operator[](int index) -{ - SSAIndexLookup result; - result.v = v; - result.indexes = indexes; - result.indexes.push_back(SSAInt(index).v); - return result; -} - -SSAIndexLookup SSAIndexLookup::operator[](SSAInt index) -{ - SSAIndexLookup result; - result.v = v; - result.indexes = indexes; - result.indexes.push_back(index.v); - return result; -} diff --git a/tools/drawergen/ssa/ssa_value.h b/tools/drawergen/ssa/ssa_value.h deleted file mode 100644 index 1df94b267e..0000000000 --- a/tools/drawergen/ssa/ssa_value.h +++ /dev/null @@ -1,74 +0,0 @@ -/* -** SSA value -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include - -namespace llvm { class Value; } - -class SSAInt; -class SSAIndexLookup; - -class SSAValue -{ -public: - SSAValue() : v(0) { } - - static SSAValue from_llvm(llvm::Value *v) { SSAValue val; val.v = v; return val; } - - SSAValue load(bool constantScopeDomain); - void store(llvm::Value *v); - - template - operator Type() - { - return Type::from_llvm(v); - } - - SSAIndexLookup operator[](int index); - SSAIndexLookup operator[](SSAInt index); - - llvm::Value *v; -}; - -class SSAIndexLookup -{ -public: - SSAIndexLookup() : v(0) { } - - llvm::Value *v; - std::vector indexes; - - SSAValue load(bool constantScopeDomain) { SSAValue value = *this; return value.load(constantScopeDomain); } - void store(llvm::Value *v) { SSAValue value = *this; return value.store(v); } - - template - operator Type() - { - return Type::from_llvm(v); - } - - operator SSAValue(); - SSAIndexLookup operator[](int index); - SSAIndexLookup operator[](SSAInt index); -}; diff --git a/tools/drawergen/ssa/ssa_vec16ub.cpp b/tools/drawergen/ssa/ssa_vec16ub.cpp deleted file mode 100644 index e64fe8e851..0000000000 --- a/tools/drawergen/ssa/ssa_vec16ub.cpp +++ /dev/null @@ -1,188 +0,0 @@ -/* -** SSA vec16 uint8 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_vec16ub.h" -#include "ssa_vec8s.h" -#include "ssa_vec4i.h" -#include "ssa_scope.h" - -SSAVec16ub::SSAVec16ub() -: v(0) -{ -} - -SSAVec16ub::SSAVec16ub(unsigned char constant) -: v(0) -{ - std::vector constants; - constants.resize(16, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false))); - v = llvm::ConstantVector::get(constants); -} - -SSAVec16ub::SSAVec16ub( - unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7, - unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15) -: v(0) -{ - std::vector constants; - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant0, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant1, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant2, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant3, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant4, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant5, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant6, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant7, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant8, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant9, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant10, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant11, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant12, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant13, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant14, false))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant15, false))); - v = llvm::ConstantVector::get(constants); -} - -SSAVec16ub::SSAVec16ub(llvm::Value *v) -: v(v) -{ -} - -SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1) -: v(0) -{ -#ifdef ARM_TARGET - /* - llvm::Value *int8x8_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s0.v, SSAScope::hint()); - llvm::Value *int8x8_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s1.v, SSAScope::hint()); - v = shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).v; - */ - // To do: add some clamping here - llvm::Value *int8x8_i0 = SSAScope::builder().CreateTrunc(s0.v, llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 8)); - llvm::Value *int8x8_i1 = SSAScope::builder().CreateTrunc(s1.v, llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 8)); - v = shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).v; -#else - llvm::Value *values[2] = { s0.v, s1.v }; - v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint()); -#endif -} - -llvm::Type *SSAVec16ub::llvm_type() -{ - return llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16); -} - -SSAVec16ub SSAVec16ub::bitcast(SSAVec4i i32) -{ - return SSAVec16ub::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint())); -} - -SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15) -{ - return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7, index8, index9, index10, index11, index12, index13, index14, index15); -} - -SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15) -{ - std::vector constants; - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index8))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index9))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index10))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index11))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index12))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index13))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index14))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index15))); - llvm::Value *mask = llvm::ConstantVector::get(constants); - return SSAVec16ub::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); -} - -SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b) -{ - return SSAVec16ub::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); -} - -SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b) -{ - return SSAVec16ub::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); -} - -SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b) -{ - return SSAVec16ub::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); -} -/* -SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b) -{ - return SSAScope::builder().CreateDiv(a.v, b.v, SSAScope::hint()); -} -*/ -SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b) -{ - return SSAVec16ub(a) + b; -} - -SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b) -{ - return SSAVec16ub(a) - b; -} - -SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b) -{ - return SSAVec16ub(a) * b; -} -/* -SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b) -{ - return SSAVec16ub(a) / b; -} -*/ -SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b) -{ - return a + SSAVec16ub(b); -} - -SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b) -{ - return a - SSAVec16ub(b); -} - -SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b) -{ - return a * SSAVec16ub(b); -} -/* -SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b) -{ - return a / SSAVec16ub(b); -} -*/ \ No newline at end of file diff --git a/tools/drawergen/ssa/ssa_vec16ub.h b/tools/drawergen/ssa/ssa_vec16ub.h deleted file mode 100644 index 42f4ef3ee8..0000000000 --- a/tools/drawergen/ssa/ssa_vec16ub.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -** SSA vec16 uint8 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSAVec8s; -class SSAVec4i; - -class SSAVec16ub -{ -public: - SSAVec16ub(); - explicit SSAVec16ub(unsigned char constant); - explicit SSAVec16ub( - unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7, - unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15); - explicit SSAVec16ub(llvm::Value *v); - SSAVec16ub(SSAVec8s s0, SSAVec8s s1); - static SSAVec16ub from_llvm(llvm::Value *v) { return SSAVec16ub(v); } - static llvm::Type *llvm_type(); - static SSAVec16ub bitcast(SSAVec4i i32); - static SSAVec16ub shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15); - static SSAVec16ub shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15); - - llvm::Value *v; -}; - -SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b); -SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b); -SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b); -SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b); - -SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b); -SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b); -SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b); -SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b); - -SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b); -SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b); -SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b); -SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b); diff --git a/tools/drawergen/ssa/ssa_vec4f.cpp b/tools/drawergen/ssa/ssa_vec4f.cpp deleted file mode 100644 index d59400ea93..0000000000 --- a/tools/drawergen/ssa/ssa_vec4f.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -** SSA vec4 float -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_vec4f.h" -#include "ssa_vec4i.h" -#include "ssa_float.h" -#include "ssa_int.h" -#include "ssa_scope.h" - -SSAVec4f::SSAVec4f() -: v(0) -{ -} - -SSAVec4f::SSAVec4f(float constant) -: v(0) -{ - std::vector constants; - constants.resize(4, llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant))); - v = llvm::ConstantVector::get(constants); -} - -SSAVec4f::SSAVec4f(float constant0, float constant1, float constant2, float constant3) -: v(0) -{ - std::vector constants; - constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant0))); - constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant1))); - constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant2))); - constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant3))); - v = llvm::ConstantVector::get(constants); -} - -SSAVec4f::SSAVec4f(SSAFloat f) -: v(0) -{ - llvm::Type *m1xfloattype = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 1); - std::vector constants; - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); - llvm::Value *mask = llvm::ConstantVector::get(constants); - v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(f.v, m1xfloattype, SSAScope::hint()), llvm::UndefValue::get(m1xfloattype), mask, SSAScope::hint()); -} - -SSAVec4f::SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3) -: v(0) -{ - v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(llvm_type()), f0.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - v = SSAScope::builder().CreateInsertElement(v, f1.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)1))); - v = SSAScope::builder().CreateInsertElement(v, f2.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)2))); - v = SSAScope::builder().CreateInsertElement(v, f3.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)3))); -} - -SSAVec4f::SSAVec4f(llvm::Value *v) -: v(v) -{ -} - -SSAVec4f::SSAVec4f(SSAVec4i i32) -: v(0) -{ -#ifdef ARM_TARGET - v = SSAScope::builder().CreateSIToFP(i32.v, llvm_type(), SSAScope::hint()); -#else - //llvm::VectorType *m128type = llvm::VectorType::get(llvm::Type::getFloatTy(*context), 4); - //return builder->CreateSIToFP(i32.v, m128type); - v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvtdq2ps), i32.v, SSAScope::hint()); -#endif -} - -llvm::Type *SSAVec4f::llvm_type() -{ - return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4); -} - -SSAFloat SSAVec4f::operator[](SSAInt index) const -{ - return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); -} - -SSAFloat SSAVec4f::operator[](int index) const -{ - return (*this)[SSAInt(index)]; -} - -SSAVec4f SSAVec4f::insert_element(SSAVec4f vec4f, SSAFloat value, int index) -{ - return from_llvm(SSAScope::builder().CreateInsertElement(vec4f.v, value.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)index)))); -} - -SSAVec4f SSAVec4f::bitcast(SSAVec4i i32) -{ - return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint())); -} - -void SSAVec4f::transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3) -{ - SSAVec4f tmp0 = shuffle(row0, row1, 0x44);//_MM_SHUFFLE(1,0,1,0)); - SSAVec4f tmp2 = shuffle(row0, row1, 0xEE);//_MM_SHUFFLE(3,2,3,2)); - SSAVec4f tmp1 = shuffle(row2, row3, 0x44);//_MM_SHUFFLE(1,0,1,0)); - SSAVec4f tmp3 = shuffle(row2, row3, 0xEE);//_MM_SHUFFLE(3,2,3,2)); - row0 = shuffle(tmp0, tmp1, 0x88);//_MM_SHUFFLE(2,0,2,0)); - row1 = shuffle(tmp0, tmp1, 0xDD);//_MM_SHUFFLE(3,1,3,1)); - row2 = shuffle(tmp2, tmp3, 0x88);//_MM_SHUFFLE(2,0,2,0)); - row3 = shuffle(tmp2, tmp3, 0xDD);//_MM_SHUFFLE(3,1,3,1)); -} - -SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3) -{ - return shuffle(f0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3); -} - -SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3) -{ - std::vector constants; - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); - llvm::Value *mask = llvm::ConstantVector::get(constants); - return SSAVec4f::from_llvm(SSAScope::builder().CreateShuffleVector(f0.v, f1.v, mask, SSAScope::hint())); -} - -SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask) -{ - return shuffle(f0, f1, mask & 3, (mask >> 2) & 3, ((mask >> 4) & 3) + 4, ((mask >> 6) & 3) + 4); -} - -SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b) -{ - return SSAVec4f::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint())); -} - -SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b) -{ - return SSAVec4f::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint())); -} - -SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b) -{ - return SSAVec4f::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint())); -} - -SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b) -{ - return SSAVec4f::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint())); -} - -SSAVec4f operator+(float a, const SSAVec4f &b) -{ - return SSAVec4f(a) + b; -} - -SSAVec4f operator-(float a, const SSAVec4f &b) -{ - return SSAVec4f(a) - b; -} - -SSAVec4f operator*(float a, const SSAVec4f &b) -{ - return SSAVec4f(a) * b; -} - -SSAVec4f operator/(float a, const SSAVec4f &b) -{ - return SSAVec4f(a) / b; -} - -SSAVec4f operator+(const SSAVec4f &a, float b) -{ - return a + SSAVec4f(b); -} - -SSAVec4f operator-(const SSAVec4f &a, float b) -{ - return a - SSAVec4f(b); -} - -SSAVec4f operator*(const SSAVec4f &a, float b) -{ - return a * SSAVec4f(b); -} - -SSAVec4f operator/(const SSAVec4f &a, float b) -{ - return a / SSAVec4f(b); -} diff --git a/tools/drawergen/ssa/ssa_vec4f.h b/tools/drawergen/ssa/ssa_vec4f.h deleted file mode 100644 index 3011da9d00..0000000000 --- a/tools/drawergen/ssa/ssa_vec4f.h +++ /dev/null @@ -1,71 +0,0 @@ -/* -** SSA vec4 float -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSAVec4i; -class SSAFloat; -class SSAInt; - -class SSAVec4f -{ -public: - SSAVec4f(); - explicit SSAVec4f(float constant); - explicit SSAVec4f(float constant0, float constant1, float constant2, float constant3); - SSAVec4f(SSAFloat f); - SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3); - explicit SSAVec4f(llvm::Value *v); - SSAVec4f(SSAVec4i i32); - SSAFloat operator[](SSAInt index) const; - SSAFloat operator[](int index) const; - static SSAVec4f insert_element(SSAVec4f vec4f, SSAFloat value, int index); - static SSAVec4f bitcast(SSAVec4i i32); - static void transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3); - static SSAVec4f shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3); - static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3); - static SSAVec4f from_llvm(llvm::Value *v) { return SSAVec4f(v); } - static llvm::Type *llvm_type(); - - llvm::Value *v; - -private: - static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask); -}; - -SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b); -SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b); -SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b); -SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b); - -SSAVec4f operator+(float a, const SSAVec4f &b); -SSAVec4f operator-(float a, const SSAVec4f &b); -SSAVec4f operator*(float a, const SSAVec4f &b); -SSAVec4f operator/(float a, const SSAVec4f &b); - -SSAVec4f operator+(const SSAVec4f &a, float b); -SSAVec4f operator-(const SSAVec4f &a, float b); -SSAVec4f operator*(const SSAVec4f &a, float b); -SSAVec4f operator/(const SSAVec4f &a, float b); diff --git a/tools/drawergen/ssa/ssa_vec4f_ptr.cpp b/tools/drawergen/ssa/ssa_vec4f_ptr.cpp deleted file mode 100644 index 866331f840..0000000000 --- a/tools/drawergen/ssa/ssa_vec4f_ptr.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* -** SSA vec4 float pointer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_vec4f_ptr.h" -#include "ssa_scope.h" - -SSAVec4fPtr::SSAVec4fPtr() -: v(0) -{ -} - -SSAVec4fPtr::SSAVec4fPtr(llvm::Value *v) -: v(v) -{ -} - -llvm::Type *SSAVec4fPtr::llvm_type() -{ - return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); -} - -SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const -{ - return SSAVec4fPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); -} - -SSAVec4f SSAVec4fPtr::load(bool constantScopeDomain) const -{ - auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 16, false, SSAScope::hint()); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - return SSAVec4f::from_llvm(loadInst); -} - -SSAVec4f SSAVec4fPtr::load_unaligned(bool constantScopeDomain) const -{ - auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 1, false, SSAScope::hint()); - if (constantScopeDomain) - loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); - return SSAVec4f::from_llvm(loadInst); -} - -void SSAVec4fPtr::store(const SSAVec4f &new_value) -{ - auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} - -void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value) -{ - auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); - inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); -} diff --git a/tools/drawergen/ssa/ssa_vec4f_ptr.h b/tools/drawergen/ssa/ssa_vec4f_ptr.h deleted file mode 100644 index 46f31ef6da..0000000000 --- a/tools/drawergen/ssa/ssa_vec4f_ptr.h +++ /dev/null @@ -1,45 +0,0 @@ -/* -** SSA vec4 float pointer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "ssa_int.h" -#include "ssa_vec4f.h" - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSAVec4fPtr -{ -public: - SSAVec4fPtr(); - explicit SSAVec4fPtr(llvm::Value *v); - static SSAVec4fPtr from_llvm(llvm::Value *v) { return SSAVec4fPtr(v); } - static llvm::Type *llvm_type(); - SSAVec4fPtr operator[](SSAInt index) const; - SSAVec4f load(bool constantScopeDomain) const; - SSAVec4f load_unaligned(bool constantScopeDomain) const; - void store(const SSAVec4f &new_value); - void store_unaligned(const SSAVec4f &new_value); - - llvm::Value *v; -}; diff --git a/tools/drawergen/ssa/ssa_vec4i.cpp b/tools/drawergen/ssa/ssa_vec4i.cpp deleted file mode 100644 index 4b0d7766fb..0000000000 --- a/tools/drawergen/ssa/ssa_vec4i.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/* -** SSA vec4 int32 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_vec4i.h" -#include "ssa_vec4f.h" -#include "ssa_vec8s.h" -#include "ssa_vec16ub.h" -#include "ssa_int.h" -#include "ssa_scope.h" - -SSAVec4i::SSAVec4i() -: v(0) -{ -} - -SSAVec4i::SSAVec4i(int constant) -: v(0) -{ - std::vector constants; - constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true))); - v = llvm::ConstantVector::get(constants); -} - -SSAVec4i::SSAVec4i(int constant0, int constant1, int constant2, int constant3) -: v(0) -{ - std::vector constants; - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant0, true))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant1, true))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant2, true))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant3, true))); - v = llvm::ConstantVector::get(constants); -} - -SSAVec4i::SSAVec4i(llvm::Value *v) -: v(v) -{ -} - -SSAVec4i::SSAVec4i(SSAInt i) -: v(0) -{ - llvm::Type *m1xi32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 1); - std::vector constants; - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); - llvm::Value *mask = llvm::ConstantVector::get(constants); - v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint()); -} - -SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3) -: v(0) -{ - std::vector constants; - constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true))); - v = llvm::ConstantVector::get(constants); -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) - v = SSAScope::builder().CreateInsertElement(v, i0.v, SSAInt(0).v, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, i1.v, SSAInt(1).v, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, i2.v, SSAInt(2).v, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, i3.v, SSAInt(3).v, SSAScope::hint()); -#else - v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint()); -#endif -} - -/* -SSAVec4i::SSAVec4i(SSAVec4f f32) -: v(0) -{ -#ifdef ARM_TARGET - v = SSAScope::builder().CreateFPToSI(f32.v, llvm_type(), SSAScope::hint()); -#else - v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint()); -#endif -} -*/ - -SSAInt SSAVec4i::operator[](SSAInt index) const -{ - return SSAInt::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); -} - -SSAInt SSAVec4i::operator[](int index) const -{ - return (*this)[SSAInt(index)]; -} - -SSAVec4i SSAVec4i::insert(SSAInt index, SSAInt value) -{ - return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index.v, SSAScope::hint())); -} - -SSAVec4i SSAVec4i::insert(int index, SSAInt value) -{ -#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9) - return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, SSAInt(index).v, SSAScope::hint())); -#else - return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint())); -#endif -} - -SSAVec4i SSAVec4i::insert(int index, int value) -{ - return insert(index, SSAInt(value)); -} - -llvm::Type *SSAVec4i::llvm_type() -{ - return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); -} - -SSAVec4i SSAVec4i::unpack(SSAInt i32) -{ - // _mm_cvtsi32_si128 as implemented by clang: - llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint()); - SSAVec4i v4i = SSAVec4i::from_llvm(v); - - SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), SSAVec16ub((unsigned char)0), 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7)); // _mm_unpacklo_epi8 - return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16 -} - -SSAVec4i SSAVec4i::bitcast(SSAVec4f f32) -{ - return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(f32.v, llvm_type(), SSAScope::hint())); -} - -SSAVec4i SSAVec4i::bitcast(SSAVec8s i16) -{ - return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i16.v, llvm_type(), SSAScope::hint())); -} - -SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, int index0, int index1, int index2, int index3) -{ - return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3); -} - -SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, const SSAVec4i &i1, int index0, int index1, int index2, int index3) -{ - std::vector constants; - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); - llvm::Value *mask = llvm::ConstantVector::get(constants); - return SSAVec4i::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); -} - -void SSAVec4i::extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3) -{ - SSAVec8s low = SSAVec8s::extendlo(a); - SSAVec8s high = SSAVec8s::extendhi(a); - out0 = extendlo(low); - out1 = extendhi(low); - out2 = extendlo(high); - out3 = extendhi(high); -} - -SSAVec4i SSAVec4i::extendhi(SSAVec8s i16) -{ - return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, SSAVec8s((short)0), 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 -} - -SSAVec4i SSAVec4i::extendlo(SSAVec8s i16) -{ - return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, SSAVec8s((short)0), 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 -} - -SSAVec4i SSAVec4i::combinehi(SSAVec8s a, SSAVec8s b) -{ - return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 -} - -SSAVec4i SSAVec4i::combinelo(SSAVec8s a, SSAVec8s b) -{ - return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 -} - -SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b) -{ - return SSAVec4i::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); -} - -SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b) -{ - return SSAVec4i::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); -} - -SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b) -{ - return SSAVec4i::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); -} - -SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b) -{ - return SSAVec4i::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); -} - -SSAVec4i operator+(int a, const SSAVec4i &b) -{ - return SSAVec4i(a) + b; -} - -SSAVec4i operator-(int a, const SSAVec4i &b) -{ - return SSAVec4i(a) - b; -} - -SSAVec4i operator*(int a, const SSAVec4i &b) -{ - return SSAVec4i(a) * b; -} - -SSAVec4i operator/(int a, const SSAVec4i &b) -{ - return SSAVec4i(a) / b; -} - -SSAVec4i operator+(const SSAVec4i &a, int b) -{ - return a + SSAVec4i(b); -} - -SSAVec4i operator-(const SSAVec4i &a, int b) -{ - return a - SSAVec4i(b); -} - -SSAVec4i operator*(const SSAVec4i &a, int b) -{ - return a * SSAVec4i(b); -} - -SSAVec4i operator/(const SSAVec4i &a, int b) -{ - return a / SSAVec4i(b); -} - -SSAVec4i operator<<(const SSAVec4i &a, int bits) -{ - return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); -} - -SSAVec4i operator>>(const SSAVec4i &a, int bits) -{ - return SSAVec4i::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); -} diff --git a/tools/drawergen/ssa/ssa_vec4i.h b/tools/drawergen/ssa/ssa_vec4i.h deleted file mode 100644 index e8a3ec9a5a..0000000000 --- a/tools/drawergen/ssa/ssa_vec4i.h +++ /dev/null @@ -1,80 +0,0 @@ -/* -** SSA vec4 int32 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSAVec4f; -class SSAVec8s; -class SSAVec16ub; -class SSAInt; - -class SSAVec4i -{ -public: - SSAVec4i(); - explicit SSAVec4i(int constant); - explicit SSAVec4i(int constant0, int constant1, int constant2, int constant3); - SSAVec4i(SSAInt i); - SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3); - explicit SSAVec4i(llvm::Value *v); - SSAVec4i(SSAVec4f f32); - SSAInt operator[](SSAInt index) const; - SSAInt operator[](int index) const; - SSAVec4i insert(SSAInt index, SSAInt value); - SSAVec4i insert(int index, SSAInt value); - SSAVec4i insert(int index, int value); - static SSAVec4i unpack(SSAInt value); - static SSAVec4i bitcast(SSAVec4f f32); - static SSAVec4i bitcast(SSAVec8s i16); - static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3); - static SSAVec4i shuffle(const SSAVec4i &f0, const SSAVec4i &f1, int index0, int index1, int index2, int index3); - static SSAVec4i extendhi(SSAVec8s i16); - static SSAVec4i extendlo(SSAVec8s i16); - static void extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3); - static SSAVec4i combinehi(SSAVec8s v0, SSAVec8s v1); - static SSAVec4i combinelo(SSAVec8s v0, SSAVec8s v1); - static SSAVec4i from_llvm(llvm::Value *v) { return SSAVec4i(v); } - static llvm::Type *llvm_type(); - - llvm::Value *v; -}; - -SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b); -SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b); -SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b); -SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b); - -SSAVec4i operator+(int a, const SSAVec4i &b); -SSAVec4i operator-(int a, const SSAVec4i &b); -SSAVec4i operator*(int a, const SSAVec4i &b); -SSAVec4i operator/(int a, const SSAVec4i &b); - -SSAVec4i operator+(const SSAVec4i &a, int b); -SSAVec4i operator-(const SSAVec4i &a, int b); -SSAVec4i operator*(const SSAVec4i &a, int b); -SSAVec4i operator/(const SSAVec4i &a, int b); - -SSAVec4i operator<<(const SSAVec4i &a, int bits); -SSAVec4i operator>>(const SSAVec4i &a, int bits); diff --git a/tools/drawergen/ssa/ssa_vec4i_ptr.cpp b/tools/drawergen/ssa/ssa_vec4i_ptr.cpp deleted file mode 100644 index 9f1ad96845..0000000000 --- a/tools/drawergen/ssa/ssa_vec4i_ptr.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* -** SSA vec4 int32 pointer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_vec4i_ptr.h" -#include "ssa_scope.h" - -SSAVec4iPtr::SSAVec4iPtr() -: v(0) -{ -} - -SSAVec4iPtr::SSAVec4iPtr(llvm::Value *v) -: v(v) -{ -} - -llvm::Type *SSAVec4iPtr::llvm_type() -{ - return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); -} - -SSAVec4iPtr SSAVec4iPtr::operator[](SSAInt index) const -{ - return SSAVec4iPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); -} - -SSAVec4i SSAVec4iPtr::load() const -{ - return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); -} - -SSAVec4i SSAVec4iPtr::load_unaligned() const -{ - return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4))); -} - -void SSAVec4iPtr::store(const SSAVec4i &new_value) -{ - SSAScope::builder().CreateStore(new_value.v, v, false); -} - -void SSAVec4iPtr::store_unaligned(const SSAVec4i &new_value) -{ - SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); -} diff --git a/tools/drawergen/ssa/ssa_vec4i_ptr.h b/tools/drawergen/ssa/ssa_vec4i_ptr.h deleted file mode 100644 index e4d8134bb5..0000000000 --- a/tools/drawergen/ssa/ssa_vec4i_ptr.h +++ /dev/null @@ -1,46 +0,0 @@ -/* -** SSA vec4 int32 pointer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "ssa_int.h" -#include "ssa_vec4i.h" - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSAVec4iPtr -{ -public: - SSAVec4iPtr(); - explicit SSAVec4iPtr(llvm::Value *v); - static SSAVec4iPtr from_llvm(llvm::Value *v) { return SSAVec4iPtr(v); } - static llvm::Type *llvm_type(); - SSAVec4iPtr operator[](SSAInt index) const; - SSAVec4iPtr operator[](int index) const { return (*this)[SSAInt(index)]; } - SSAVec4i load() const; - SSAVec4i load_unaligned() const; - void store(const SSAVec4i &new_value); - void store_unaligned(const SSAVec4i &new_value); - - llvm::Value *v; -}; diff --git a/tools/drawergen/ssa/ssa_vec8s.cpp b/tools/drawergen/ssa/ssa_vec8s.cpp deleted file mode 100644 index 795194ca5b..0000000000 --- a/tools/drawergen/ssa/ssa_vec8s.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/* -** SSA vec8 int16 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "precomp.h" -#include "ssa_vec8s.h" -#include "ssa_vec4i.h" -#include "ssa_vec16ub.h" -#include "ssa_scope.h" - -SSAVec8s::SSAVec8s() -: v(0) -{ -} - -SSAVec8s::SSAVec8s(short constant) -: v(0) -{ - std::vector constants; - constants.resize(8, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant, true))); - v = llvm::ConstantVector::get(constants); -} - -SSAVec8s::SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7) -: v(0) -{ - std::vector constants; - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant0, true))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant1, true))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant2, true))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant3, true))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant4, true))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant5, true))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant6, true))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant7, true))); - v = llvm::ConstantVector::get(constants); -} - -SSAVec8s::SSAVec8s(llvm::Value *v) -: v(v) -{ -} - -SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1) -: v(0) -{ -#ifdef ARM_TARGET - /* - llvm::Value *int16x4_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i0.v, SSAScope::hint()); - llvm::Value *int16x4_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i1.v, SSAScope::hint()); - v = shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7).v; - */ - // To do: add some clamping here - llvm::Value *int16x4_i0 = SSAScope::builder().CreateTrunc(i0.v, llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 4)); - llvm::Value *int16x4_i1 = SSAScope::builder().CreateTrunc(i1.v, llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 4)); - v = shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7).v; -#else - llvm::Value *values[2] = { i0.v, i1.v }; - v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint()); -#endif -} - -llvm::Type *SSAVec8s::llvm_type() -{ - return llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 8); -} - -SSAVec8s SSAVec8s::bitcast(SSAVec16ub i8) -{ - return SSAVec8s::from_llvm(SSAScope::builder().CreateBitCast(i8.v, llvm_type(), SSAScope::hint())); -} - -SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7) -{ - return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7); -} - -SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7) -{ - std::vector constants; - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6))); - constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7))); - llvm::Value *mask = llvm::ConstantVector::get(constants); - return SSAVec8s::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); -} - -SSAVec8s SSAVec8s::extendhi(SSAVec16ub a) -{ - return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, SSAVec16ub((unsigned char)0), 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15)); // _mm_unpackhi_epi8 -} - -SSAVec8s SSAVec8s::extendlo(SSAVec16ub a) -{ - return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, SSAVec16ub((unsigned char)0), 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 -} - -SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b) -{ - return SSAVec8s::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); -} - -SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b) -{ - return SSAVec8s::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); -} - -SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b) -{ - return SSAVec8s::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); -} - -SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b) -{ - return SSAVec8s::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); -} - -SSAVec8s operator+(short a, const SSAVec8s &b) -{ - return SSAVec8s(a) + b; -} - -SSAVec8s operator-(short a, const SSAVec8s &b) -{ - return SSAVec8s(a) - b; -} - -SSAVec8s operator*(short a, const SSAVec8s &b) -{ - return SSAVec8s(a) * b; -} - -SSAVec8s operator/(short a, const SSAVec8s &b) -{ - return SSAVec8s(a) / b; -} - -SSAVec8s operator+(const SSAVec8s &a, short b) -{ - return a + SSAVec8s(b); -} - -SSAVec8s operator-(const SSAVec8s &a, short b) -{ - return a - SSAVec8s(b); -} - -SSAVec8s operator*(const SSAVec8s &a, short b) -{ - return a * SSAVec8s(b); -} - -SSAVec8s operator/(const SSAVec8s &a, short b) -{ - return a / SSAVec8s(b); -} - -SSAVec8s operator<<(const SSAVec8s &a, int bits) -{ - return SSAVec8s::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); -} - -SSAVec8s operator>>(const SSAVec8s &a, int bits) -{ - return SSAVec8s::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); -} diff --git a/tools/drawergen/ssa/ssa_vec8s.h b/tools/drawergen/ssa/ssa_vec8s.h deleted file mode 100644 index fdd56d972a..0000000000 --- a/tools/drawergen/ssa/ssa_vec8s.h +++ /dev/null @@ -1,67 +0,0 @@ -/* -** SSA vec8 int16 -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -namespace llvm { class Value; } -namespace llvm { class Type; } - -class SSAVec4i; -class SSAVec16ub; - -class SSAVec8s -{ -public: - SSAVec8s(); - explicit SSAVec8s(short constant); - explicit SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7); - explicit SSAVec8s(llvm::Value *v); - SSAVec8s(SSAVec4i i0, SSAVec4i i1); - static SSAVec8s bitcast(SSAVec16ub i8); - static SSAVec8s shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7); - static SSAVec8s shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7); - static SSAVec8s extendhi(SSAVec16ub a); - static SSAVec8s extendlo(SSAVec16ub a); - static SSAVec8s mulhi(SSAVec8s a, SSAVec8s b); - static SSAVec8s from_llvm(llvm::Value *v) { return SSAVec8s(v); } - static llvm::Type *llvm_type(); - - llvm::Value *v; -}; - -SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b); -SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b); -SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b); -SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b); - -SSAVec8s operator+(short a, const SSAVec8s &b); -SSAVec8s operator-(short a, const SSAVec8s &b); -SSAVec8s operator*(short a, const SSAVec8s &b); -SSAVec8s operator/(short a, const SSAVec8s &b); - -SSAVec8s operator+(const SSAVec8s &a, short b); -SSAVec8s operator-(const SSAVec8s &a, short b); -SSAVec8s operator*(const SSAVec8s &a, short b); -SSAVec8s operator/(const SSAVec8s &a, short b); - -SSAVec8s operator<<(const SSAVec8s &a, int bits); -SSAVec8s operator>>(const SSAVec8s &a, int bits); diff --git a/tools/drawergen/timestamp.h b/tools/drawergen/timestamp.h deleted file mode 100644 index 6dd11bcffd..0000000000 --- a/tools/drawergen/timestamp.h +++ /dev/null @@ -1,12 +0,0 @@ - -#pragma once - -void AddSourceFileTimestamp(const char *timestamp); - -namespace -{ - struct TimestampSourceFile - { - TimestampSourceFile() { AddSourceFileTimestamp(__TIME__); } - } timestamp; -} diff --git a/tools/drawergen/trustinfo.rc b/tools/drawergen/trustinfo.rc deleted file mode 100644 index 366f9b2fd2..0000000000 --- a/tools/drawergen/trustinfo.rc +++ /dev/null @@ -1,6 +0,0 @@ -// This resource script is for compiling with MinGW only. Visual C++ -// compilations use the manifest tool to insert the manifest instead. - -#include - -1 RT_MANIFEST "trustinfo.txt" diff --git a/tools/drawergen/trustinfo.txt b/tools/drawergen/trustinfo.txt deleted file mode 100644 index 5216df6503..0000000000 --- a/tools/drawergen/trustinfo.txt +++ /dev/null @@ -1,16 +0,0 @@ - - - - - Drawergen for the ZDoom source build process. - - - - - - - - From 0e8d99971074f975c0c243822c2f0e5d1cf7ee98 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Thu, 23 Feb 2017 02:40:28 -0500 Subject: [PATCH 877/912] Delete fix-llvm-3.8-ubuntu.sh This is no longer needed. --- tools/fix-llvm-3.8-ubuntu.sh | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100755 tools/fix-llvm-3.8-ubuntu.sh diff --git a/tools/fix-llvm-3.8-ubuntu.sh b/tools/fix-llvm-3.8-ubuntu.sh deleted file mode 100755 index 2dafc2c346..0000000000 --- a/tools/fix-llvm-3.8-ubuntu.sh +++ /dev/null @@ -1,17 +0,0 @@ -echo This tool adds in missing CMAKE stuff to your llvm-3.8 package. -echo This requires super-user access. This tool is only meant for -echo convenience and is possibly very dangerous. Rude things may tend -echo to occur when using this tool. -echo -read -p "Are you sure you want to use this tool? " -n 1 -r -echo -if [[ $REPLY =~ ^[Yy]$ ]] -then - sudo mkdir -p /usr/lib/llvm-3.8/share/llvm - sudo ln -s /usr/share/llvm-3.8/cmake /usr/lib/llvm-3.8/share/llvm/cmake - sudo sed -i -e '/get_filename_component(LLVM_INSTALL_PREFIX/ {s|^|#|}' -e '/^# Compute the installation prefix/i set(LLVM_INSTALL_PREFIX "/usr/lib/llvm-3.8")' /usr/lib/llvm-3.8/share/llvm/cmake/LLVMConfig.cmake - sudo sed -i '/_IMPORT_CHECK_TARGETS Polly/ {s|^|#|}' /usr/lib/llvm-3.8/share/llvm/cmake/LLVMExports-relwithdebinfo.cmake - sudo sed -i '/_IMPORT_CHECK_TARGETS sancov/ {s|^|#|}' /usr/lib/llvm-3.8/share/llvm/cmake/LLVMExports-relwithdebinfo.cmake - sudo ln -s /usr/lib/x86_64-linux-gnu/libLLVM-3.8.so.1 /usr/lib/llvm-3.8/lib/ -fi - From 56045c12935f3856a4a608f8c31f8b0cdbd67a18 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 23 Feb 2017 09:02:13 +0100 Subject: [PATCH 878/912] Remove r_drawers.h and move softpoly drawer related stuff out of the swrenderer --- src/CMakeLists.txt | 1 + src/polyrenderer/drawers/poly_buffer.cpp | 2 +- src/polyrenderer/drawers/poly_draw_args.h | 2 +- src/polyrenderer/drawers/poly_triangle.h | 2 +- src/polyrenderer/drawers/screen_triangle.h | 121 ++++++++++++++++- src/swrenderer/drawers/r_draw.h | 1 - src/swrenderer/drawers/r_draw_pal.cpp | 26 ++-- src/swrenderer/drawers/r_draw_pal.h | 8 +- src/swrenderer/drawers/r_draw_rgba.cpp | 1 - src/swrenderer/drawers/r_draw_rgba.h | 1 - src/swrenderer/drawers/r_drawers.h | 151 --------------------- src/swrenderer/drawers/r_thread.h | 2 +- src/swrenderer/line/r_walldraw.cpp | 3 +- src/swrenderer/plane/r_flatplane.cpp | 2 +- src/swrenderer/r_swcanvas.cpp | 1 - src/swrenderer/r_swrenderer.cpp | 1 - src/swrenderer/viewport/r_drawerargs.h | 8 +- src/swrenderer/viewport/r_skydrawer.h | 1 - src/swrenderer/viewport/r_spandrawer.h | 3 +- src/swrenderer/viewport/r_spritedrawer.h | 1 - src/swrenderer/viewport/r_walldrawer.h | 3 +- 21 files changed, 153 insertions(+), 188 deletions(-) delete mode 100644 src/swrenderer/drawers/r_drawers.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7a66ede47a..897d78721c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -801,6 +801,7 @@ file( GLOB HEADER_FILES polyrenderer/*.h polyrenderer/math/*.h polyrenderer/drawers/*.h + polyrenderer/drawers/*.php polyrenderer/scene/*.h gl/*.h gl/api/*.h diff --git a/src/polyrenderer/drawers/poly_buffer.cpp b/src/polyrenderer/drawers/poly_buffer.cpp index 5ad9f4474e..291e05acb4 100644 --- a/src/polyrenderer/drawers/poly_buffer.cpp +++ b/src/polyrenderer/drawers/poly_buffer.cpp @@ -34,7 +34,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "poly_buffer.h" -#include "swrenderer/drawers/r_drawers.h" +#include "screen_triangle.h" ///////////////////////////////////////////////////////////////////////////// diff --git a/src/polyrenderer/drawers/poly_draw_args.h b/src/polyrenderer/drawers/poly_draw_args.h index 78e6d07a23..bf38ffab9e 100644 --- a/src/polyrenderer/drawers/poly_draw_args.h +++ b/src/polyrenderer/drawers/poly_draw_args.h @@ -24,7 +24,7 @@ #include "r_data/r_translate.h" #include "r_data/colormaps.h" -#include "swrenderer/drawers/r_drawers.h" +#include "screen_triangle.h" class FTexture; diff --git a/src/polyrenderer/drawers/poly_triangle.h b/src/polyrenderer/drawers/poly_triangle.h index bc2357b484..8debeec8ac 100644 --- a/src/polyrenderer/drawers/poly_triangle.h +++ b/src/polyrenderer/drawers/poly_triangle.h @@ -24,7 +24,7 @@ #include "swrenderer/drawers/r_draw.h" #include "swrenderer/drawers/r_thread.h" -#include "swrenderer/drawers/r_drawers.h" +#include "polyrenderer/drawers/screen_triangle.h" #include "polyrenderer/math/tri_matrix.h" #include "polyrenderer/drawers/poly_buffer.h" #include "polyrenderer/drawers/poly_draw_args.h" diff --git a/src/polyrenderer/drawers/screen_triangle.h b/src/polyrenderer/drawers/screen_triangle.h index 1f11aad545..cd2a6cbe08 100644 --- a/src/polyrenderer/drawers/screen_triangle.h +++ b/src/polyrenderer/drawers/screen_triangle.h @@ -22,7 +22,126 @@ #pragma once -#include "swrenderer/drawers/r_drawers.h" +#include +#include + +class FString; + +struct TriFullSpan +{ + uint16_t X; + uint16_t Y; + uint32_t Length; +}; + +struct TriPartialBlock +{ + uint16_t X; + uint16_t Y; + uint32_t Mask0; + uint32_t Mask1; +}; + +struct WorkerThreadData +{ + int32_t core; + int32_t num_cores; + int32_t pass_start_y; + int32_t pass_end_y; + uint32_t *temp; + + // Triangle working data: + TriFullSpan *FullSpans; + TriPartialBlock *PartialBlocks; + uint32_t NumFullSpans; + uint32_t NumPartialBlocks; + int32_t StartX; + int32_t StartY; +}; + +struct TriVertex +{ + TriVertex() { } + TriVertex(float x, float y, float z, float w, float u, float v) : x(x), y(y), z(z), w(w) { varying[0] = u; varying[1] = v; } + + enum { NumVarying = 2 }; + float x, y, z, w; + float varying[NumVarying]; +}; + +struct TriUniforms +{ + uint32_t light; + uint32_t subsectorDepth; + uint32_t color; + uint32_t srcalpha; + uint32_t destalpha; + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + float globvis; + uint32_t flags; + enum Flags + { + simple_shade = 1, + nearest_filter = 2, + fixed_light = 4 + }; +}; + +struct TriDrawTriangleArgs +{ + uint8_t *dest; + int32_t pitch; + TriVertex *v1; + TriVertex *v2; + TriVertex *v3; + int32_t clipleft; + int32_t clipright; + int32_t cliptop; + int32_t clipbottom; + const uint8_t *texturePixels; + uint32_t textureWidth; + uint32_t textureHeight; + const uint8_t *translation; + const TriUniforms *uniforms; + uint8_t *stencilValues; + uint32_t *stencilMasks; + int32_t stencilPitch; + uint8_t stencilTestValue; + uint8_t stencilWriteValue; + uint32_t *subsectorGBuffer; + const uint8_t *colormaps; + const uint8_t *RGB256k; + const uint8_t *BaseColors; +}; + +enum class TriBlendMode +{ + Copy, // blend_copy(shade(fg)) + AlphaBlend, // blend_alpha_blend(shade(fg), bg) + AddSolid, // blend_add(shade(fg), bg, srcalpha, destalpha) + Add, // blend_add(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + Sub, // blend_sub(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + RevSub, // blend_revsub(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + Stencil, // blend_stencil(shade(color), fg.a, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + Shaded, // blend_stencil(shade(color), fg.index, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + TranslateCopy, // blend_copy(shade(translate(fg))) + TranslateAlphaBlend, // blend_alpha_blend(shade(translate(fg)), bg) + TranslateAdd, // blend_add(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + TranslateSub, // blend_sub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + TranslateRevSub,// blend_revsub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) + AddSrcColorOneMinusSrcColor, // glBlendMode(GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR) used by GZDoom's fullbright additive sprites + Skycap // Fade to sky color when the V texture coordinate go beyond the [-1, 1] range +}; + +inline int NumTriBlendModes() { return (int)TriBlendMode::Skycap + 1; } class ScreenTriangle { diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index 66843cc4ee..29b908398b 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -6,7 +6,6 @@ struct FSWColormap; struct FLightNode; -struct TriLight; EXTERN_CVAR(Bool, r_multithreaded); EXTERN_CVAR(Bool, r_magfilter); diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index fcd14a27bc..358bafbd3d 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -115,7 +115,7 @@ namespace swrenderer _step_viewpos_z = args.dc_viewpos_step.Z; } - uint8_t PalWall1Command::AddLights(const TriLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material) + uint8_t PalWall1Command::AddLights(const DrawerLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material) { uint32_t lit_r = 0; uint32_t lit_g = 0; @@ -175,7 +175,7 @@ namespace swrenderer uint8_t *dest = _dest; int bits = _fracbits; int pitch = _pitch; - TriLight *dynlights = _dynlights; + DrawerLight *dynlights = _dynlights; int num_dynlights = _num_dynlights; float viewpos_z = _viewpos_z; float step_viewpos_z = _step_viewpos_z; @@ -226,7 +226,7 @@ namespace swrenderer uint8_t *dest = _dest; int bits = _fracbits; int pitch = _pitch; - TriLight *dynlights = _dynlights; + DrawerLight *dynlights = _dynlights; int num_dynlights = _num_dynlights; float viewpos_z = _viewpos_z; float step_viewpos_z = _step_viewpos_z; @@ -347,7 +347,7 @@ namespace swrenderer uint8_t *dest = _dest; int bits = _fracbits; int pitch = _pitch; - TriLight *dynlights = _dynlights; + DrawerLight *dynlights = _dynlights; int num_dynlights = _num_dynlights; float viewpos_z = _viewpos_z; float step_viewpos_z = _step_viewpos_z; @@ -421,7 +421,7 @@ namespace swrenderer uint8_t *dest = _dest; int bits = _fracbits; int pitch = _pitch; - TriLight *dynlights = _dynlights; + DrawerLight *dynlights = _dynlights; int num_dynlights = _num_dynlights; float viewpos_z = _viewpos_z; float step_viewpos_z = _step_viewpos_z; @@ -494,7 +494,7 @@ namespace swrenderer uint8_t *dest = _dest; int bits = _fracbits; int pitch = _pitch; - TriLight *dynlights = _dynlights; + DrawerLight *dynlights = _dynlights; int num_dynlights = _num_dynlights; float viewpos_z = _viewpos_z; float step_viewpos_z = _step_viewpos_z; @@ -1874,7 +1874,7 @@ namespace swrenderer _step_viewpos_x = args.dc_viewpos_step.X; } - uint8_t PalSpanCommand::AddLights(const TriLight *lights, int num_lights, float viewpos_x, uint8_t fg, uint8_t material) + uint8_t PalSpanCommand::AddLights(const DrawerLight *lights, int num_lights, float viewpos_x, uint8_t fg, uint8_t material) { uint32_t lit_r = 0; uint32_t lit_g = 0; @@ -1949,7 +1949,7 @@ namespace swrenderer xstep = _xstep; ystep = _ystep; - const TriLight *dynlights = _dynlights; + const DrawerLight *dynlights = _dynlights; int num_dynlights = _num_dynlights; float viewpos_x = _viewpos_x; float step_viewpos_x = _step_viewpos_x; @@ -2037,7 +2037,7 @@ namespace swrenderer xstep = _xstep; ystep = _ystep; - const TriLight *dynlights = _dynlights; + const DrawerLight *dynlights = _dynlights; int num_dynlights = _num_dynlights; float viewpos_x = _viewpos_x; float step_viewpos_x = _step_viewpos_x; @@ -2113,7 +2113,7 @@ namespace swrenderer const PalEntry *palette = GPalette.BaseColors; - const TriLight *dynlights = _dynlights; + const DrawerLight *dynlights = _dynlights; int num_dynlights = _num_dynlights; float viewpos_x = _viewpos_x; float step_viewpos_x = _step_viewpos_x; @@ -2219,7 +2219,7 @@ namespace swrenderer const PalEntry *palette = GPalette.BaseColors; - const TriLight *dynlights = _dynlights; + const DrawerLight *dynlights = _dynlights; int num_dynlights = _num_dynlights; float viewpos_x = _viewpos_x; float step_viewpos_x = _step_viewpos_x; @@ -2360,7 +2360,7 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; - const TriLight *dynlights = _dynlights; + const DrawerLight *dynlights = _dynlights; int num_dynlights = _num_dynlights; float viewpos_x = _viewpos_x; float step_viewpos_x = _step_viewpos_x; @@ -2483,7 +2483,7 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; const PalEntry *palette = GPalette.BaseColors; - const TriLight *dynlights = _dynlights; + const DrawerLight *dynlights = _dynlights; int num_dynlights = _num_dynlights; float viewpos_x = _viewpos_x; float step_viewpos_x = _step_viewpos_x; diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index efc6d6a17c..dc499d79ec 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -18,7 +18,7 @@ namespace swrenderer FString DebugInfo() override { return "PalWallCommand"; } protected: - inline static uint8_t AddLights(const TriLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material); + inline static uint8_t AddLights(const DrawerLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material); uint32_t _iscale; uint32_t _texturefrac; @@ -31,7 +31,7 @@ namespace swrenderer int _pitch; uint32_t *_srcblend; uint32_t *_destblend; - TriLight *_dynlights; + DrawerLight *_dynlights; int _num_dynlights; float _viewpos_z; float _step_viewpos_z; @@ -134,7 +134,7 @@ namespace swrenderer FString DebugInfo() override { return "PalSpanCommand"; } protected: - inline static uint8_t AddLights(const TriLight *lights, int num_lights, float viewpos_x, uint8_t fg, uint8_t material); + inline static uint8_t AddLights(const DrawerLight *lights, int num_lights, float viewpos_x, uint8_t fg, uint8_t material); const uint8_t *_source; const uint8_t *_colormap; @@ -153,7 +153,7 @@ namespace swrenderer int _color; fixed_t _srcalpha; fixed_t _destalpha; - TriLight *_dynlights; + DrawerLight *_dynlights; int _num_dynlights; float _viewpos_x; float _step_viewpos_x; diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index f9c120a594..36f85f6e49 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -36,7 +36,6 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_draw_rgba.h" -#include "r_drawers.h" #include "gl/data/gl_matrix.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/scene/r_light.h" diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 1ed068f827..2eab9a1854 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -25,7 +25,6 @@ #include "r_draw.h" #include "v_palette.h" #include "r_thread.h" -#include "r_drawers.h" #include "swrenderer/viewport/r_skydrawer.h" #include "swrenderer/viewport/r_spandrawer.h" #include "swrenderer/viewport/r_walldrawer.h" diff --git a/src/swrenderer/drawers/r_drawers.h b/src/swrenderer/drawers/r_drawers.h deleted file mode 100644 index 6205940bfd..0000000000 --- a/src/swrenderer/drawers/r_drawers.h +++ /dev/null @@ -1,151 +0,0 @@ -/* -** LLVM code generated drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include -#include - -class FString; - -struct TriFullSpan -{ - uint16_t X; - uint16_t Y; - uint32_t Length; -}; - -struct TriPartialBlock -{ - uint16_t X; - uint16_t Y; - uint32_t Mask0; - uint32_t Mask1; -}; - -struct WorkerThreadData -{ - int32_t core; - int32_t num_cores; - int32_t pass_start_y; - int32_t pass_end_y; - uint32_t *temp; - - // Triangle working data: - TriFullSpan *FullSpans; - TriPartialBlock *PartialBlocks; - uint32_t NumFullSpans; - uint32_t NumPartialBlocks; - int32_t StartX; - int32_t StartY; -}; - -struct TriLight -{ - uint32_t color; - float x, y, z; - float radius; -}; - -struct TriVertex -{ - TriVertex() { } - TriVertex(float x, float y, float z, float w, float u, float v) : x(x), y(y), z(z), w(w) { varying[0] = u; varying[1] = v; } - - enum { NumVarying = 2 }; - float x, y, z, w; - float varying[NumVarying]; -}; - -struct TriUniforms -{ - uint32_t light; - uint32_t subsectorDepth; - uint32_t color; - uint32_t srcalpha; - uint32_t destalpha; - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - float globvis; - uint32_t flags; - enum Flags - { - simple_shade = 1, - nearest_filter = 2, - fixed_light = 4 - }; -}; - -struct TriDrawTriangleArgs -{ - uint8_t *dest; - int32_t pitch; - TriVertex *v1; - TriVertex *v2; - TriVertex *v3; - int32_t clipleft; - int32_t clipright; - int32_t cliptop; - int32_t clipbottom; - const uint8_t *texturePixels; - uint32_t textureWidth; - uint32_t textureHeight; - const uint8_t *translation; - const TriUniforms *uniforms; - uint8_t *stencilValues; - uint32_t *stencilMasks; - int32_t stencilPitch; - uint8_t stencilTestValue; - uint8_t stencilWriteValue; - uint32_t *subsectorGBuffer; - const uint8_t *colormaps; - const uint8_t *RGB256k; - const uint8_t *BaseColors; -}; - -enum class TriBlendMode -{ - Copy, // blend_copy(shade(fg)) - AlphaBlend, // blend_alpha_blend(shade(fg), bg) - AddSolid, // blend_add(shade(fg), bg, srcalpha, destalpha) - Add, // blend_add(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) - Sub, // blend_sub(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) - RevSub, // blend_revsub(shade(fg), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) - Stencil, // blend_stencil(shade(color), fg.a, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) - Shaded, // blend_stencil(shade(color), fg.index, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) - TranslateCopy, // blend_copy(shade(translate(fg))) - TranslateAlphaBlend, // blend_alpha_blend(shade(translate(fg)), bg) - TranslateAdd, // blend_add(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) - TranslateSub, // blend_sub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) - TranslateRevSub,// blend_revsub(shade(translate(fg)), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)) - AddSrcColorOneMinusSrcColor, // glBlendMode(GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR) used by GZDoom's fullbright additive sprites - Skycap // Fade to sky color when the V texture coordinate go beyond the [-1, 1] range -}; - -inline int NumTriBlendModes() { return (int)TriBlendMode::Skycap + 1; } diff --git a/src/swrenderer/drawers/r_thread.h b/src/swrenderer/drawers/r_thread.h index da24854eb5..cc8d33eabe 100644 --- a/src/swrenderer/drawers/r_thread.h +++ b/src/swrenderer/drawers/r_thread.h @@ -23,7 +23,7 @@ #pragma once #include "r_draw.h" -#include "r_drawers.h" +#include "polyrenderer/drawers/screen_triangle.h" #include #include #include diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 1d2760e769..5f4cd8485a 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -29,7 +29,6 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "gl/dynlights/gl_dynlight.h" -#include "swrenderer/drawers/r_drawers.h" #include "swrenderer/drawers/r_draw.h" #include "swrenderer/segments/r_drawsegment.h" #include "swrenderer/scene/r_opaque_pass.h" @@ -200,7 +199,7 @@ namespace swrenderer } drawerargs.dc_num_lights = 0; - drawerargs.dc_lights = Thread->FrameMemory->AllocMemory(max_lights); + drawerargs.dc_lights = Thread->FrameMemory->AllocMemory(max_lights); // Setup lights for column cur_node = light_list; diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 2d1b6ae943..28961baad2 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -204,7 +204,7 @@ namespace swrenderer drawerargs.dc_viewpos.Z = (float)((viewport->CenterY - y - 0.5) / viewport->InvZtoScale * zspan); drawerargs.dc_viewpos_step.X = (float)(zspan / viewport->CenterX); - static TriLight lightbuffer[64 * 1024]; + static DrawerLight lightbuffer[64 * 1024]; static int nextlightindex = 0; // Plane normal diff --git a/src/swrenderer/r_swcanvas.cpp b/src/swrenderer/r_swcanvas.cpp index ce089272bd..73d17ca3cf 100644 --- a/src/swrenderer/r_swcanvas.cpp +++ b/src/swrenderer/r_swcanvas.cpp @@ -43,7 +43,6 @@ #include "textures/textures.h" #include "r_data/voxels.h" #include "drawers/r_draw_rgba.h" -#include "drawers/r_drawers.h" EXTERN_CVAR(Bool, r_blendmethod) diff --git a/src/swrenderer/r_swrenderer.cpp b/src/swrenderer/r_swrenderer.cpp index 0366886cec..75a53102bb 100644 --- a/src/swrenderer/r_swrenderer.cpp +++ b/src/swrenderer/r_swrenderer.cpp @@ -47,7 +47,6 @@ #include "textures/textures.h" #include "r_data/voxels.h" #include "drawers/r_draw_rgba.h" -#include "drawers/r_drawers.h" #include "polyrenderer/poly_renderer.h" #include "p_setup.h" diff --git a/src/swrenderer/viewport/r_drawerargs.h b/src/swrenderer/viewport/r_drawerargs.h index 3172a54401..ad77dadd29 100644 --- a/src/swrenderer/viewport/r_drawerargs.h +++ b/src/swrenderer/viewport/r_drawerargs.h @@ -13,7 +13,6 @@ struct FSWColormap; struct FLightNode; -struct TriLight; namespace swrenderer { @@ -21,6 +20,13 @@ namespace swrenderer class DrawerArgs; struct ShadeConstants; + struct DrawerLight + { + uint32_t color; + float x, y, z; + float radius; + }; + class DrawerArgs { public: diff --git a/src/swrenderer/viewport/r_skydrawer.h b/src/swrenderer/viewport/r_skydrawer.h index 9389509686..b4d23a7ee5 100644 --- a/src/swrenderer/viewport/r_skydrawer.h +++ b/src/swrenderer/viewport/r_skydrawer.h @@ -5,7 +5,6 @@ struct FSWColormap; struct FLightNode; -struct TriLight; namespace swrenderer { diff --git a/src/swrenderer/viewport/r_spandrawer.h b/src/swrenderer/viewport/r_spandrawer.h index 8275cc6d5c..368ae8a327 100644 --- a/src/swrenderer/viewport/r_spandrawer.h +++ b/src/swrenderer/viewport/r_spandrawer.h @@ -5,7 +5,6 @@ struct FSWColormap; struct FLightNode; -struct TriLight; namespace swrenderer { @@ -54,7 +53,7 @@ namespace swrenderer FVector3 dc_normal; FVector3 dc_viewpos; FVector3 dc_viewpos_step; - TriLight *dc_lights = nullptr; + DrawerLight *dc_lights = nullptr; int dc_num_lights = 0; private: diff --git a/src/swrenderer/viewport/r_spritedrawer.h b/src/swrenderer/viewport/r_spritedrawer.h index bca4664aa1..31d3861d9c 100644 --- a/src/swrenderer/viewport/r_spritedrawer.h +++ b/src/swrenderer/viewport/r_spritedrawer.h @@ -5,7 +5,6 @@ struct FSWColormap; struct FLightNode; -struct TriLight; namespace swrenderer { diff --git a/src/swrenderer/viewport/r_walldrawer.h b/src/swrenderer/viewport/r_walldrawer.h index 68a3ea46f3..e6d5309328 100644 --- a/src/swrenderer/viewport/r_walldrawer.h +++ b/src/swrenderer/viewport/r_walldrawer.h @@ -5,7 +5,6 @@ struct FSWColormap; struct FLightNode; -struct TriLight; namespace swrenderer { @@ -54,7 +53,7 @@ namespace swrenderer FVector3 dc_normal; FVector3 dc_viewpos; FVector3 dc_viewpos_step; - TriLight *dc_lights = nullptr; + DrawerLight *dc_lights = nullptr; int dc_num_lights = 0; private: From 0702e4523da1fad519418af6fc084d60f64868f4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 23 Feb 2017 09:27:00 +0100 Subject: [PATCH 879/912] Fix that sprite light wasn't calculated using the center of the sprite --- src/swrenderer/things/r_sprite.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 49c823ba28..2ebf1fd579 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -59,6 +59,7 @@ #include "gl/dynlights/gl_dynlight.h" EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) +EXTERN_CVAR(Bool, gl_light_sprites) namespace swrenderer { @@ -227,7 +228,7 @@ namespace swrenderer bool fullbright = !vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; - if (r_dynlights) + if (r_dynlights && gl_light_sprites) { float lit_red = 0; float lit_green = 0; @@ -238,9 +239,9 @@ namespace swrenderer ADynamicLight *light = node->lightsource; if (light->visibletoplayer && !(light->flags2&MF2_DORMANT) && (!(light->flags4&MF4_DONTLIGHTSELF) || light->target != thing)) { - float lx = (float)(light->X() - pos.X); - float ly = (float)(light->Y() - pos.Y); - float lz = (float)(light->Z() - pos.Z); + float lx = (float)(light->X() - thing->X()); + float ly = (float)(light->Y() - thing->Y()); + float lz = (float)(light->Z() - thing->Center()); float LdotL = lx * lx + ly * ly + lz * lz; float radius = node->lightsource->GetRadius(); if (radius * radius >= LdotL) From 1a9b1de9a1f855102bd00250d8abb9d6a7ab6300 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Thu, 23 Feb 2017 03:39:04 -0500 Subject: [PATCH 880/912] - Added "clipmidtex" compatflag and applied it to unloved.pk3 --- src/compatibility.cpp | 2 +- src/d_main.cpp | 1 + src/doomdef.h | 1 + src/g_mapinfo.cpp | 1 + src/p_setup.cpp | 6 ++++-- src/p_udmf.cpp | 3 ++- wadsrc/static/compatibility.txt | 25 +++++++++++++++++++++++++ 7 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/compatibility.cpp b/src/compatibility.cpp index 0b27a1efa0..a1fb2701a5 100644 --- a/src/compatibility.cpp +++ b/src/compatibility.cpp @@ -151,7 +151,7 @@ static FCompatOption Options[] = { "multiexit", COMPATF2_MULTIEXIT, SLOT_COMPAT2 }, { "teleport", COMPATF2_TELEPORT, SLOT_COMPAT2 }, { "disablepushwindowcheck", COMPATF2_PUSHWINDOW, SLOT_COMPAT2 }, - + { "clipmidtex", COMPATF2_CLIPMIDTEX, SLOT_COMPAT2 }, { NULL, 0, 0 } }; diff --git a/src/d_main.cpp b/src/d_main.cpp index a30641d60a..1ecc62fd07 100644 --- a/src/d_main.cpp +++ b/src/d_main.cpp @@ -641,6 +641,7 @@ CVAR (Flag, compat_pointonline, compatflags2, COMPATF2_POINTONLINE); CVAR (Flag, compat_multiexit, compatflags2, COMPATF2_MULTIEXIT); CVAR (Flag, compat_teleport, compatflags2, COMPATF2_TELEPORT); CVAR (Flag, compat_pushwindow, compatflags2, COMPATF2_PUSHWINDOW); +CVAR (Flag, compat_clipmidtex, compatflags2, COMPATF2_CLIPMIDTEX); //========================================================================== // diff --git a/src/doomdef.h b/src/doomdef.h index 5a1d1f95f4..d8116fb025 100644 --- a/src/doomdef.h +++ b/src/doomdef.h @@ -344,6 +344,7 @@ enum : unsigned int COMPATF2_MULTIEXIT = 1 << 4, // Level exit can be triggered multiple times (required by Daedalus's travel tubes, thanks to a faulty script) COMPATF2_TELEPORT = 1 << 5, // Don't let indirect teleports trigger sector actions COMPATF2_PUSHWINDOW = 1 << 6, // Disable the window check in CheckForPushSpecial() + COMPATF2_CLIPMIDTEX = 1 << 7, // Always Clip midtex's in the software renderer (required to run certain GZDoom maps) }; // Emulate old bugs for select maps. These are not exposed by a cvar diff --git a/src/g_mapinfo.cpp b/src/g_mapinfo.cpp index f83e18ece4..19eb692c76 100644 --- a/src/g_mapinfo.cpp +++ b/src/g_mapinfo.cpp @@ -1330,6 +1330,7 @@ MapFlagHandlers[] = { "compat_multiexit", MITYPE_COMPATFLAG, 0, COMPATF2_MULTIEXIT }, { "compat_teleport", MITYPE_COMPATFLAG, 0, COMPATF2_TELEPORT }, { "compat_pushwindow", MITYPE_COMPATFLAG, 0, COMPATF2_PUSHWINDOW }, + { "compat_clipmidtex", MITYPE_COMPATFLAG, 0, COMPATF2_CLIPMIDTEX }, { "cd_start_track", MITYPE_EATNEXT, 0, 0 }, { "cd_end1_track", MITYPE_EATNEXT, 0, 0 }, { "cd_end2_track", MITYPE_EATNEXT, 0, 0 }, diff --git a/src/p_setup.cpp b/src/p_setup.cpp index ac7faea765..59cbfdf36c 100644 --- a/src/p_setup.cpp +++ b/src/p_setup.cpp @@ -2176,7 +2176,8 @@ void P_LoadLineDefs (MapData * map) P_AdjustLine (ld); P_SaveLineSpecial (ld); - if (level.flags2 & LEVEL2_CLIPMIDTEX) ld->flags |= ML_CLIP_MIDTEX; + if ((level.flags2 & LEVEL2_CLIPMIDTEX) || (ii_compatflags2 & COMPATF2_CLIPMIDTEX)) + ld->flags |= ML_CLIP_MIDTEX; if (level.flags2 & LEVEL2_WRAPMIDTEX) ld->flags |= ML_WRAP_MIDTEX; if (level.flags2 & LEVEL2_CHECKSWITCHRANGE) ld->flags |= ML_CHECKSWITCHRANGE; } @@ -2262,7 +2263,8 @@ void P_LoadLineDefs2 (MapData * map) P_AdjustLine (ld); P_SetLineID(i, ld); P_SaveLineSpecial (ld); - if (level.flags2 & LEVEL2_CLIPMIDTEX) ld->flags |= ML_CLIP_MIDTEX; + if ((level.flags2 & LEVEL2_CLIPMIDTEX) || (ii_compatflags2 & COMPATF2_CLIPMIDTEX)) + ld->flags |= ML_CLIP_MIDTEX; if (level.flags2 & LEVEL2_WRAPMIDTEX) ld->flags |= ML_WRAP_MIDTEX; if (level.flags2 & LEVEL2_CHECKSWITCHRANGE) ld->flags |= ML_CHECKSWITCHRANGE; diff --git a/src/p_udmf.cpp b/src/p_udmf.cpp index fc71160ae9..4ae40d565c 100644 --- a/src/p_udmf.cpp +++ b/src/p_udmf.cpp @@ -838,7 +838,8 @@ public: ld->portalindex = UINT_MAX; ld->portaltransferred = UINT_MAX; ld->sidedef[0] = ld->sidedef[1] = NULL; - if (level.flags2 & LEVEL2_CLIPMIDTEX) ld->flags |= ML_CLIP_MIDTEX; + if ((level.flags2 & LEVEL2_CLIPMIDTEX) || (ii_compatflags2 & COMPATF2_CLIPMIDTEX)) + ld->flags |= ML_CLIP_MIDTEX; if (level.flags2 & LEVEL2_WRAPMIDTEX) ld->flags |= ML_WRAP_MIDTEX; if (level.flags2 & LEVEL2_CHECKSWITCHRANGE) ld->flags |= ML_CHECKSWITCHRANGE; diff --git a/wadsrc/static/compatibility.txt b/wadsrc/static/compatibility.txt index 020d8477d6..4750a2bdf4 100644 --- a/wadsrc/static/compatibility.txt +++ b/wadsrc/static/compatibility.txt @@ -510,3 +510,28 @@ ABC4EB5A1535ECCD0061AD14F3547908 // Plutonia Experiment, map26 setlinespecial 2410 Sector_Set3DFloor 32002 4 1 0 0 } +1ED329858AB154C55878DA1C11A4F100 // unloved.pk3:unlovedmaps.wad map01 +{ + clipmidtex +} + +FA23E72FA955E66EC68609F72C0BA71E // unloved.pk3:unlovedmaps.wad map02 +{ + clipmidtex +} + +41BEC1F643CFEEC997AF98276A05EC88 // unloved.pk3:unlovedmaps.wad map03 +{ + clipmidtex +} + +AF9A6370BE562584BC11165ECF364713 // unloved.pk3:unlovedmaps.wad map04 +{ + clipmidtex +} + +DC96228097DD004C40CCB1DB14A91EAA // unloved.pk3:unlovedmaps.wad map05 +{ + clipmidtex +} + From 3c3917f1f385876c481c316136a8c9983a4fc108 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 23 Feb 2017 10:55:44 +0100 Subject: [PATCH 881/912] Change dynlight sprite math to not overflow when used with dynamic colormaps --- src/swrenderer/drawers/r_draw_sprite32.h | 256 ++++++++++++++++++--- src/swrenderer/drawers/r_draw_sprite32.php | 12 +- 2 files changed, 233 insertions(+), 35 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_sprite32.h b/src/swrenderer/drawers/r_draw_sprite32.h index f8d596642d..1264d5e278 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.h +++ b/src/swrenderer/drawers/r_draw_sprite32.h @@ -931,7 +931,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -948,6 +951,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -975,7 +981,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -992,6 +1001,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -1108,7 +1120,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1125,6 +1140,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -1172,7 +1190,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1189,6 +1210,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -1676,7 +1700,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1693,6 +1720,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -1750,7 +1780,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1767,6 +1800,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -1913,7 +1949,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1930,6 +1969,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2007,7 +2049,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2024,6 +2069,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2540,7 +2588,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2557,6 +2608,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2614,7 +2668,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2631,6 +2688,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2777,7 +2837,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2794,6 +2857,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2871,7 +2937,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2888,6 +2957,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3404,7 +3476,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3421,6 +3496,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3478,7 +3556,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3495,6 +3576,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3641,7 +3725,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3658,6 +3745,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3735,7 +3825,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3752,6 +3845,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3973,7 +4069,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3990,6 +4089,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -4016,7 +4118,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4033,6 +4138,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -4285,7 +4393,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4302,6 +4413,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4358,7 +4472,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4375,6 +4492,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4656,7 +4776,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4673,6 +4796,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4729,7 +4855,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4746,6 +4875,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -5027,7 +5159,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5044,6 +5179,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -5100,7 +5238,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5117,6 +5258,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -5588,7 +5732,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5605,6 +5752,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -5631,7 +5781,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5648,6 +5801,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -5902,7 +6058,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5919,6 +6078,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -5975,7 +6137,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5992,6 +6157,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -6275,7 +6443,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6292,6 +6463,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -6348,7 +6522,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6365,6 +6542,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -6648,7 +6828,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6665,6 +6848,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -6721,7 +6907,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6738,6 +6927,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); diff --git a/src/swrenderer/drawers/r_draw_sprite32.php b/src/swrenderer/drawers/r_draw_sprite32.php index c66bba0c35..85b6e60037 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.php +++ b/src/swrenderer/drawers/r_draw_sprite32.php @@ -278,15 +278,18 @@ namespace swrenderer function Shade($blendVariant, $isSimpleShade) { if ($blendVariant == "copy" || $blendVariant == "shaded") return; -?> - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -303,6 +306,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); Date: Thu, 23 Feb 2017 03:49:14 -0500 Subject: [PATCH 882/912] - Added menu option for clipmidtex compatflag --- wadsrc/static/language.enu | 1 + wadsrc/static/menudef.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 7bfb0e0175..9cde3d55d6 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -2088,6 +2088,7 @@ CMPTMNU_SOUNDCUTOFF = "Sounds stop when actor vanishes"; CMPTMNU_SOUNDTARGET = "Use original sound target handling"; CMPTMNU_TELEPORT = "Scripted teleports don't trigger sector actions"; CMPTMNU_PUSHWINDOW = "Non-blocking lines can be pushed"; +CMPTMNU_CLIPMIDTEX = "Clip Midtextures Like OpenGL Renderer"; // Sound Options SNDMNU_TITLE = "SOUND OPTIONS"; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 083f72627f..bcf080d3be 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -1409,6 +1409,7 @@ OptionMenu "CompatibilityOptions" Option "$CMPTMNU_MULTIEXIT", "compat_multiexit", "YesNo" Option "$CMPTMNU_TELEPORT", "compat_teleport", "YesNo" Option "$CMPTMNU_PUSHWINDOW", "compat_pushwindow", "YesNo" + Option "$CMPTMNU_CLIPMIDTEX", "compat_clipmidtex", "YesNo" StaticText " " StaticText "$CMPTMNU_PHYSICSBEHAVIOR",1 From d94150f3168a48d902f015d2e7a3509fb2c93a64 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Thu, 23 Feb 2017 05:57:34 -0500 Subject: [PATCH 883/912] Revert "- Added menu option for clipmidtex compatflag" This reverts commit 4a66621d9f7710561b76e7ab95dd333d2b2df719. --- wadsrc/static/language.enu | 1 - wadsrc/static/menudef.txt | 1 - 2 files changed, 2 deletions(-) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 9cde3d55d6..7bfb0e0175 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -2088,7 +2088,6 @@ CMPTMNU_SOUNDCUTOFF = "Sounds stop when actor vanishes"; CMPTMNU_SOUNDTARGET = "Use original sound target handling"; CMPTMNU_TELEPORT = "Scripted teleports don't trigger sector actions"; CMPTMNU_PUSHWINDOW = "Non-blocking lines can be pushed"; -CMPTMNU_CLIPMIDTEX = "Clip Midtextures Like OpenGL Renderer"; // Sound Options SNDMNU_TITLE = "SOUND OPTIONS"; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index bcf080d3be..083f72627f 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -1409,7 +1409,6 @@ OptionMenu "CompatibilityOptions" Option "$CMPTMNU_MULTIEXIT", "compat_multiexit", "YesNo" Option "$CMPTMNU_TELEPORT", "compat_teleport", "YesNo" Option "$CMPTMNU_PUSHWINDOW", "compat_pushwindow", "YesNo" - Option "$CMPTMNU_CLIPMIDTEX", "compat_clipmidtex", "YesNo" StaticText " " StaticText "$CMPTMNU_PHYSICSBEHAVIOR",1 From 10e268ebc0a50281644d047063848c099846e9d3 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Thu, 23 Feb 2017 06:32:53 -0500 Subject: [PATCH 884/912] - Change "clipmidtex" compatflag to be accessible in compatibility.txt only. --- src/compatibility.cpp | 2 +- src/d_main.cpp | 1 - src/doomdef.h | 2 +- src/g_mapinfo.cpp | 1 - src/p_setup.cpp | 6 ++---- src/p_udmf.cpp | 3 +-- src/swrenderer/line/r_line.cpp | 3 ++- src/swrenderer/line/r_renderdrawsegment.cpp | 6 ++++-- 8 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/compatibility.cpp b/src/compatibility.cpp index a1fb2701a5..6bc7e18cc3 100644 --- a/src/compatibility.cpp +++ b/src/compatibility.cpp @@ -113,6 +113,7 @@ static FCompatOption Options[] = { "linkfrozenprops", BCOMPATF_LINKFROZENPROPS, SLOT_BCOMPAT }, { "floatbob", BCOMPATF_FLOATBOB, SLOT_BCOMPAT }, { "noslopeid", BCOMPATF_NOSLOPEID, SLOT_BCOMPAT }, + { "clipmidtex", BCOMPATF_CLIPMIDTEX, SLOT_BCOMPAT }, // list copied from g_mapinfo.cpp { "shorttex", COMPATF_SHORTTEX, SLOT_COMPAT }, @@ -151,7 +152,6 @@ static FCompatOption Options[] = { "multiexit", COMPATF2_MULTIEXIT, SLOT_COMPAT2 }, { "teleport", COMPATF2_TELEPORT, SLOT_COMPAT2 }, { "disablepushwindowcheck", COMPATF2_PUSHWINDOW, SLOT_COMPAT2 }, - { "clipmidtex", COMPATF2_CLIPMIDTEX, SLOT_COMPAT2 }, { NULL, 0, 0 } }; diff --git a/src/d_main.cpp b/src/d_main.cpp index 1ecc62fd07..a30641d60a 100644 --- a/src/d_main.cpp +++ b/src/d_main.cpp @@ -641,7 +641,6 @@ CVAR (Flag, compat_pointonline, compatflags2, COMPATF2_POINTONLINE); CVAR (Flag, compat_multiexit, compatflags2, COMPATF2_MULTIEXIT); CVAR (Flag, compat_teleport, compatflags2, COMPATF2_TELEPORT); CVAR (Flag, compat_pushwindow, compatflags2, COMPATF2_PUSHWINDOW); -CVAR (Flag, compat_clipmidtex, compatflags2, COMPATF2_CLIPMIDTEX); //========================================================================== // diff --git a/src/doomdef.h b/src/doomdef.h index d8116fb025..48d8f479e4 100644 --- a/src/doomdef.h +++ b/src/doomdef.h @@ -344,7 +344,6 @@ enum : unsigned int COMPATF2_MULTIEXIT = 1 << 4, // Level exit can be triggered multiple times (required by Daedalus's travel tubes, thanks to a faulty script) COMPATF2_TELEPORT = 1 << 5, // Don't let indirect teleports trigger sector actions COMPATF2_PUSHWINDOW = 1 << 6, // Disable the window check in CheckForPushSpecial() - COMPATF2_CLIPMIDTEX = 1 << 7, // Always Clip midtex's in the software renderer (required to run certain GZDoom maps) }; // Emulate old bugs for select maps. These are not exposed by a cvar @@ -360,6 +359,7 @@ enum BCOMPATF_LINKFROZENPROPS = 1 << 6, // Clearing PROP_TOTALLYFROZEN or PROP_FROZEN also clears the other BCOMPATF_FLOATBOB = 1 << 8, // Use Hexen's original method of preventing floatbobbing items from falling down BCOMPATF_NOSLOPEID = 1 << 9, // disable line IDs on slopes. + BCOMPATF_CLIPMIDTEX = 1 << 10, // Always Clip midtex's in the software renderer (required to run certain GZDoom maps) }; // phares 3/20/98: diff --git a/src/g_mapinfo.cpp b/src/g_mapinfo.cpp index 19eb692c76..f83e18ece4 100644 --- a/src/g_mapinfo.cpp +++ b/src/g_mapinfo.cpp @@ -1330,7 +1330,6 @@ MapFlagHandlers[] = { "compat_multiexit", MITYPE_COMPATFLAG, 0, COMPATF2_MULTIEXIT }, { "compat_teleport", MITYPE_COMPATFLAG, 0, COMPATF2_TELEPORT }, { "compat_pushwindow", MITYPE_COMPATFLAG, 0, COMPATF2_PUSHWINDOW }, - { "compat_clipmidtex", MITYPE_COMPATFLAG, 0, COMPATF2_CLIPMIDTEX }, { "cd_start_track", MITYPE_EATNEXT, 0, 0 }, { "cd_end1_track", MITYPE_EATNEXT, 0, 0 }, { "cd_end2_track", MITYPE_EATNEXT, 0, 0 }, diff --git a/src/p_setup.cpp b/src/p_setup.cpp index 59cbfdf36c..ac7faea765 100644 --- a/src/p_setup.cpp +++ b/src/p_setup.cpp @@ -2176,8 +2176,7 @@ void P_LoadLineDefs (MapData * map) P_AdjustLine (ld); P_SaveLineSpecial (ld); - if ((level.flags2 & LEVEL2_CLIPMIDTEX) || (ii_compatflags2 & COMPATF2_CLIPMIDTEX)) - ld->flags |= ML_CLIP_MIDTEX; + if (level.flags2 & LEVEL2_CLIPMIDTEX) ld->flags |= ML_CLIP_MIDTEX; if (level.flags2 & LEVEL2_WRAPMIDTEX) ld->flags |= ML_WRAP_MIDTEX; if (level.flags2 & LEVEL2_CHECKSWITCHRANGE) ld->flags |= ML_CHECKSWITCHRANGE; } @@ -2263,8 +2262,7 @@ void P_LoadLineDefs2 (MapData * map) P_AdjustLine (ld); P_SetLineID(i, ld); P_SaveLineSpecial (ld); - if ((level.flags2 & LEVEL2_CLIPMIDTEX) || (ii_compatflags2 & COMPATF2_CLIPMIDTEX)) - ld->flags |= ML_CLIP_MIDTEX; + if (level.flags2 & LEVEL2_CLIPMIDTEX) ld->flags |= ML_CLIP_MIDTEX; if (level.flags2 & LEVEL2_WRAPMIDTEX) ld->flags |= ML_WRAP_MIDTEX; if (level.flags2 & LEVEL2_CHECKSWITCHRANGE) ld->flags |= ML_CHECKSWITCHRANGE; diff --git a/src/p_udmf.cpp b/src/p_udmf.cpp index 4ae40d565c..fc71160ae9 100644 --- a/src/p_udmf.cpp +++ b/src/p_udmf.cpp @@ -838,8 +838,7 @@ public: ld->portalindex = UINT_MAX; ld->portaltransferred = UINT_MAX; ld->sidedef[0] = ld->sidedef[1] = NULL; - if ((level.flags2 & LEVEL2_CLIPMIDTEX) || (ii_compatflags2 & COMPATF2_CLIPMIDTEX)) - ld->flags |= ML_CLIP_MIDTEX; + if (level.flags2 & LEVEL2_CLIPMIDTEX) ld->flags |= ML_CLIP_MIDTEX; if (level.flags2 & LEVEL2_WRAPMIDTEX) ld->flags |= ML_WRAP_MIDTEX; if (level.flags2 & LEVEL2_CHECKSWITCHRANGE) ld->flags |= ML_CHECKSWITCHRANGE; diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 544055b2c8..2b2417ec22 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -607,7 +607,7 @@ namespace swrenderer if (mFrontSector->e && mFrontSector->e->XFloor.lightlist.Size()) return true; if (mBackSector->e && mBackSector->e->XFloor.lightlist.Size()) return true; - if (sidedef->GetTexture(side_t::mid).isValid() && ((linedef->flags & (ML_CLIP_MIDTEX | ML_WRAP_MIDTEX)) || sidedef->Flags & (WALLF_CLIP_MIDTEX | WALLF_WRAP_MIDTEX))) return true; + if (sidedef->GetTexture(side_t::mid).isValid() && ((ib_compatflags & BCOMPATF_CLIPMIDTEX) || (linedef->flags & (ML_CLIP_MIDTEX | ML_WRAP_MIDTEX)) || sidedef->Flags & (WALLF_CLIP_MIDTEX | WALLF_WRAP_MIDTEX))) return true; return false; } @@ -669,6 +669,7 @@ namespace swrenderer if (sidedef->GetTexture(side_t::mid).isValid()) { + if (ib_compatflags & BCOMPATF_CLIPMIDTEX) return true; if (linedef->flags & (ML_CLIP_MIDTEX | ML_WRAP_MIDTEX)) return true; if (sidedef->Flags & (WALLF_CLIP_MIDTEX | WALLF_WRAP_MIDTEX)) return true; } diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index f4b6f97dea..bb7823bebf 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -277,7 +277,8 @@ namespace swrenderer // or above the ceiling, so the appropriate end won't be clipped automatically when adding // this drawseg. if ((curline->linedef->flags & ML_CLIP_MIDTEX) || - (curline->sidedef->Flags & WALLF_CLIP_MIDTEX)) + (curline->sidedef->Flags & WALLF_CLIP_MIDTEX) || + (ib_compatflags & BCOMPATF_CLIPMIDTEX)) { ClipMidtex(x1, x2); } @@ -335,7 +336,8 @@ namespace swrenderer // or above the ceiling, so the appropriate end won't be clipped automatically when adding // this drawseg. if ((curline->linedef->flags & ML_CLIP_MIDTEX) || - (curline->sidedef->Flags & WALLF_CLIP_MIDTEX)) + (curline->sidedef->Flags & WALLF_CLIP_MIDTEX) || + (ib_compatflags & BCOMPATF_CLIPMIDTEX)) { ClipMidtex(x1, x2); } From 832790f5baae970922ae5433b687f966a3da61c7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 23 Feb 2017 21:08:08 +0100 Subject: [PATCH 885/912] Fix sprite light bug --- src/swrenderer/drawers/r_draw_sprite32.h | 547 +++++++++------------ src/swrenderer/drawers/r_draw_sprite32.php | 25 +- 2 files changed, 249 insertions(+), 323 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_sprite32.h b/src/swrenderer/drawers/r_draw_sprite32.h index 1264d5e278..d54aff4e9e 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.h +++ b/src/swrenderer/drawers/r_draw_sprite32.h @@ -53,13 +53,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -150,13 +149,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -314,6 +312,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -322,10 +323,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -416,6 +416,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -424,10 +427,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -601,13 +603,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -657,7 +658,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -686,7 +686,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -702,13 +701,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -799,7 +797,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -848,7 +845,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -870,6 +866,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -878,10 +877,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -931,9 +929,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -952,7 +948,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -981,9 +977,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -1002,7 +996,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -1018,6 +1012,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -1026,10 +1023,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -1120,9 +1116,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -1141,7 +1135,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -1190,9 +1184,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -1211,7 +1203,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -1249,13 +1241,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -1306,7 +1297,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -1365,7 +1355,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -1410,13 +1399,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -1508,7 +1496,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -1587,7 +1574,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -1638,6 +1624,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -1646,10 +1635,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -1700,9 +1688,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -1721,7 +1707,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -1780,9 +1766,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -1801,7 +1785,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -1846,6 +1830,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -1854,10 +1841,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -1949,9 +1935,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -1970,7 +1954,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -2049,9 +2033,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -2070,7 +2052,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -2137,13 +2119,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -2194,7 +2175,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -2253,7 +2233,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -2298,13 +2277,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -2396,7 +2374,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -2475,7 +2452,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -2526,6 +2502,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -2534,10 +2513,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -2588,9 +2566,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -2609,7 +2585,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -2668,9 +2644,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -2689,7 +2663,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -2734,6 +2708,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -2742,10 +2719,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -2837,9 +2813,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -2858,7 +2832,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -2937,9 +2911,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -2958,7 +2930,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -3025,13 +2997,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -3082,7 +3053,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -3141,7 +3111,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -3186,13 +3155,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -3284,7 +3252,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -3363,7 +3330,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -3414,6 +3380,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -3422,10 +3391,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -3476,9 +3444,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -3497,7 +3463,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -3556,9 +3522,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -3577,7 +3541,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -3622,6 +3586,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -3630,10 +3597,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -3725,9 +3691,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -3746,7 +3710,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -3825,9 +3789,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -3846,7 +3808,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -3910,13 +3872,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -3964,7 +3925,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -3992,7 +3952,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -4010,6 +3969,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -4018,10 +3980,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -4069,9 +4030,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -4090,7 +4049,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -4118,9 +4077,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -4139,7 +4096,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -4173,13 +4130,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -4228,7 +4184,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -4286,7 +4241,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -4333,6 +4287,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -4341,10 +4298,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -4393,9 +4349,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -4414,7 +4368,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -4472,9 +4426,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -4493,7 +4445,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -4556,13 +4508,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -4611,7 +4562,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -4669,7 +4619,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -4716,6 +4665,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -4724,10 +4676,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -4776,9 +4727,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -4797,7 +4746,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -4855,9 +4804,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -4876,7 +4823,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -4939,13 +4886,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -4994,7 +4940,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -5052,7 +4997,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -5099,6 +5043,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -5107,10 +5054,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5159,9 +5105,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -5180,7 +5124,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -5238,9 +5182,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -5259,7 +5201,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -5323,13 +5265,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5437,6 +5378,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -5445,10 +5389,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5572,13 +5515,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5626,7 +5568,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -5654,7 +5595,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -5673,6 +5613,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -5681,10 +5624,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5732,9 +5674,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -5753,7 +5693,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -5781,9 +5721,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -5802,7 +5740,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -5837,13 +5775,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -5892,7 +5829,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -5950,7 +5886,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -5998,6 +5933,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -6006,10 +5944,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -6058,9 +5995,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -6079,7 +6014,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -6137,9 +6072,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -6158,7 +6091,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -6222,13 +6155,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -6277,7 +6209,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -6335,7 +6266,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -6383,6 +6313,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -6391,10 +6324,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -6443,9 +6375,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -6464,7 +6394,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -6522,9 +6452,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -6543,7 +6471,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -6607,13 +6535,12 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -6662,7 +6589,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -6720,7 +6646,6 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); // Blend @@ -6768,6 +6693,9 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); @@ -6776,10 +6704,9 @@ namespace swrenderer shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -6828,9 +6755,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -6849,7 +6774,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend @@ -6907,9 +6832,7 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -6928,7 +6851,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend diff --git a/src/swrenderer/drawers/r_draw_sprite32.php b/src/swrenderer/drawers/r_draw_sprite32.php index 85b6e60037..e08ba551fe 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.php +++ b/src/swrenderer/drawers/r_draw_sprite32.php @@ -119,21 +119,27 @@ namespace swrenderer uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; + + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); @@ -281,14 +287,11 @@ namespace swrenderer if ($isSimpleShade == true) { ?> - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); @@ -307,7 +310,7 @@ namespace swrenderer fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); Date: Thu, 23 Feb 2017 21:50:16 +0100 Subject: [PATCH 886/912] Add dynamic light to sprites in palette mode --- src/swrenderer/drawers/r_draw_pal.cpp | 62 ++++++++++++++++++++++----- src/swrenderer/drawers/r_draw_pal.h | 4 ++ 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 358bafbd3d..31aa65dd2a 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -882,6 +882,24 @@ namespace swrenderer _srccolor = args.SrcColorIndex(); _srcalpha = args.SrcAlpha(); _destalpha = args.DestAlpha(); + _dynlight = args.DynamicLight(); + _light = args.Light(); + } + + uint8_t PalColumnCommand::AddLights(uint8_t fg, uint8_t material, uint32_t lit_r, uint32_t lit_g, uint32_t lit_b) + { + if (lit_r == 0 && lit_g == 0 && lit_b == 0) + return fg; + + uint32_t material_r = GPalette.BaseColors[material].r; + uint32_t material_g = GPalette.BaseColors[material].g; + uint32_t material_b = GPalette.BaseColors[material].b; + + lit_r = MIN(GPalette.BaseColors[fg].r + ((lit_r * material_r) >> 8), 255); + lit_g = MIN(GPalette.BaseColors[fg].g + ((lit_g * material_g) >> 8), 255); + lit_b = MIN(GPalette.BaseColors[fg].b + ((lit_b * material_b) >> 8), 255); + + return RGB256k.All[((lit_r >> 2) << 12) | ((lit_g >> 2) << 6) | (lit_b >> 2)]; } void DrawColumnPalCommand::Execute(DrawerThread *thread) @@ -915,20 +933,42 @@ namespace swrenderer // has a better chance of optimizing this well. const uint8_t *colormap = _colormap; const uint8_t *source = _source; - - // Inner loop that does the actual texture mapping, - // e.g. a DDA-lile scaling. - // This is as fast as it gets. - do + + if (_dynlight == 0) { - // Re-map color indices from wall texture column - // using a lighting/special effects LUT. - *dest = colormap[source[frac >> FRACBITS]]; + do + { + *dest = colormap[source[frac >> FRACBITS]]; - dest += pitch; - frac += fracstep; + dest += pitch; + frac += fracstep; - } while (--count); + } while (--count); + } + else + { + uint32_t lit_r = RPART(_dynlight); + uint32_t lit_g = GPART(_dynlight); + uint32_t lit_b = BPART(_dynlight); + uint32_t light = 256 - (_light >> (FRACBITS - 8)); + lit_r = MIN(light + lit_r, 256); + lit_g = MIN(light + lit_g, 256); + lit_b = MIN(light + lit_b, 256); + lit_r = lit_r - light; + lit_g = lit_g - light; + lit_b = lit_b - light; + + do + { + auto material = source[frac >> FRACBITS]; + auto fg = colormap[material]; + *dest = AddLights(fg, material, lit_r, lit_g, lit_b); + + dest += pitch; + frac += fracstep; + + } while (--count); + } } void FillColumnPalCommand::Execute(DrawerThread *thread) diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index dc499d79ec..fabf26afa2 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -76,6 +76,8 @@ namespace swrenderer FString DebugInfo() override { return "PalColumnCommand"; } protected: + uint8_t AddLights(uint8_t fg, uint8_t material, uint32_t lit_r, uint32_t lit_g, uint32_t lit_b); + int _count; uint8_t *_dest; int _dest_y; @@ -91,6 +93,8 @@ namespace swrenderer uint32_t _srccolor; fixed_t _srcalpha; fixed_t _destalpha; + uint32_t _dynlight; + fixed_t _light; }; class DrawColumnPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; From b4b391c51e4f929225308807eef8f3c4f5e5256b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 23 Feb 2017 23:13:21 +0100 Subject: [PATCH 887/912] Fix palette light artifacts caused by dynamic lights in some rare cases --- src/swrenderer/drawers/r_draw_pal.cpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 31aa65dd2a..3e5c33e1cf 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -121,10 +121,6 @@ namespace swrenderer uint32_t lit_g = 0; uint32_t lit_b = 0; - uint32_t material_r = GPalette.BaseColors[material].r; - uint32_t material_g = GPalette.BaseColors[material].g; - uint32_t material_b = GPalette.BaseColors[material].b; - for (int i = 0; i < num_lights; i++) { uint32_t light_color_r = RPART(lights[i].color); @@ -158,6 +154,13 @@ namespace swrenderer lit_b += (light_color_b * attenuation) >> 8; } + if (lit_r == 0 && lit_g == 0 && lit_b == 0) + return fg; + + uint32_t material_r = GPalette.BaseColors[material].r; + uint32_t material_g = GPalette.BaseColors[material].g; + uint32_t material_b = GPalette.BaseColors[material].b; + lit_r = MIN(GPalette.BaseColors[fg].r + ((lit_r * material_r) >> 8), 255); lit_g = MIN(GPalette.BaseColors[fg].g + ((lit_g * material_g) >> 8), 255); lit_b = MIN(GPalette.BaseColors[fg].b + ((lit_b * material_b) >> 8), 255); @@ -1920,10 +1923,6 @@ namespace swrenderer uint32_t lit_g = 0; uint32_t lit_b = 0; - uint32_t material_r = GPalette.BaseColors[material].r; - uint32_t material_g = GPalette.BaseColors[material].g; - uint32_t material_b = GPalette.BaseColors[material].b; - for (int i = 0; i < num_lights; i++) { uint32_t light_color_r = RPART(lights[i].color); @@ -1957,6 +1956,13 @@ namespace swrenderer lit_b += (light_color_b * attenuation) >> 8; } + if (lit_r == 0 && lit_g == 0 && lit_b == 0) + return fg; + + uint32_t material_r = GPalette.BaseColors[material].r; + uint32_t material_g = GPalette.BaseColors[material].g; + uint32_t material_b = GPalette.BaseColors[material].b; + lit_r = MIN(GPalette.BaseColors[fg].r + ((lit_r * material_r) >> 8), 255); lit_g = MIN(GPalette.BaseColors[fg].g + ((lit_g * material_g) >> 8), 255); lit_b = MIN(GPalette.BaseColors[fg].b + ((lit_b * material_b) >> 8), 255); From 8bd0df1aceb29dc6b17a83398a193cca604383b0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 23 Feb 2017 23:53:38 +0100 Subject: [PATCH 888/912] Simplify CalcTiltedLighting in hope that this function is what made GCC break --- src/swrenderer/drawers/r_draw_pal.cpp | 98 ++++----------------------- 1 file changed, 14 insertions(+), 84 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 3e5c33e1cf..0210029e96 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -2818,100 +2818,30 @@ namespace swrenderer // Calculates the lighting for one row of a tilted plane. If the definition // of GETPALOOKUP changes, this needs to change, too. - void DrawTiltedSpanPalCommand::CalcTiltedLighting(double lval, double lend, int width, DrawerThread *thread) + void DrawTiltedSpanPalCommand::CalcTiltedLighting(double lstart, double lend, int width, DrawerThread *thread) { const uint8_t **tiltlighting = thread->tiltlighting; - double lstep; - uint8_t *lightfiller = nullptr; - int i = 0; + uint8_t *lightstart = basecolormapdata + (GETPALOOKUP(lstart, planeshade) << COLORMAPSHIFT); + uint8_t *lightend = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); - if (width == 0 || lval == lend) - { // Constant lighting - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); + if (width == 0 || lightstart == lightend) + { + for (int i = 0; i <= width; i++) + { + tiltlighting[i] = lightstart; + } } else { - lstep = (lend - lval) / width; - if (lval >= MAXLIGHTVIS) - { // lval starts "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - for (; i <= width && lval >= MAXLIGHTVIS; ++i) - { - tiltlighting[i] = lightfiller; - lval += lstep; - } - } - if (lend >= MAXLIGHTVIS) - { // lend ends "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); - for (; width > i && lend >= MAXLIGHTVIS; --width) - { - tiltlighting[width] = lightfiller; - lend -= lstep; - } - } - if (width > 0) + double lstep = (lend - lstart) / width; + double lval = lstart; + for (int i = 0; i <= width; i++) { - lval = FIXED2DBL(planeshade) - lval; - lend = FIXED2DBL(planeshade) - lend; - lstep = (lend - lval) / width; - if (lstep < 0) - { // Going from dark to light - if (lval < 1.) - { // All bright - lightfiller = basecolormapdata; - } - else - { - if (lval >= NUMCOLORMAPS) - { // Starts beyond the dark end - uint8_t *clight = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); - while (lval >= NUMCOLORMAPS && i <= width) - { - tiltlighting[i++] = clight; - lval += lstep; - } - if (i > width) - return; - } - while (i <= width && lval >= 0) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata; - } - } - else - { // Going from light to dark - if (lval >= (NUMCOLORMAPS - 1)) - { // All dark - lightfiller = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); - } - else - { - while (lval < 0 && i <= width) - { - tiltlighting[i++] = basecolormapdata; - lval += lstep; - } - if (i > width) - return; - while (i <= width && lval < (NUMCOLORMAPS - 1)) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); - } - } + tiltlighting[i] = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); + lval += lstep; } } - for (; i <= width; i++) - { - tiltlighting[i] = lightfiller; - } } ///////////////////////////////////////////////////////////////////////// From f76a039a768444840aa7ba02c7ce904d4699dcdd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Feb 2017 02:37:13 +0100 Subject: [PATCH 889/912] Don't copy the WallDrawerArgs members now that drawer args are grouped by drawer family --- src/swrenderer/drawers/r_draw_pal.cpp | 278 ++++++++++++-------------- src/swrenderer/drawers/r_draw_pal.h | 19 +- 2 files changed, 132 insertions(+), 165 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 0210029e96..18a87b5e66 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -96,23 +96,8 @@ CVAR(Bool, r_blendmethod, false, CVAR_GLOBALCONFIG | CVAR_ARCHIVE) namespace swrenderer { - PalWall1Command::PalWall1Command(const WallDrawerArgs &args) + PalWall1Command::PalWall1Command(const WallDrawerArgs &args) : args(args) { - _iscale = args.TextureVStep(); - _texturefrac = args.TextureVPos(); - _colormap = args.Colormap(); - _count = args.Count(); - _source = args.TexturePixels(); - _dest = args.Dest(); - _dest_y = args.DestY(); - _fracbits = args.TextureFracBits(); - _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - _srcblend = args.SrcBlend(); - _destblend = args.DestBlend(); - _dynlights = args.dc_lights; - _num_dynlights = args.dc_num_lights; - _viewpos_z = args.dc_viewpos.Z; - _step_viewpos_z = args.dc_viewpos_step.Z; } uint8_t PalWall1Command::AddLights(const DrawerLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material) @@ -170,25 +155,25 @@ namespace swrenderer void DrawWall1PalCommand::Execute(DrawerThread *thread) { - uint32_t fracstep = _iscale; - uint32_t frac = _texturefrac; - uint8_t *colormap = _colormap; - int count = _count; - const uint8_t *source = _source; - uint8_t *dest = _dest; - int bits = _fracbits; - int pitch = _pitch; - DrawerLight *dynlights = _dynlights; - int num_dynlights = _num_dynlights; - float viewpos_z = _viewpos_z; - float step_viewpos_z = _step_viewpos_z; + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint8_t *colormap = args.Colormap(); + int count = args.Count(); + const uint8_t *source = args.TexturePixels(); + uint8_t *dest = args.Dest(); + int bits = args.TextureFracBits(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + DrawerLight *dynlights = args.dc_lights; + int num_dynlights = args.dc_num_lights; + float viewpos_z = args.dc_viewpos.Z; + float step_viewpos_z = args.dc_viewpos_step.Z; - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; @@ -203,10 +188,10 @@ namespace swrenderer } else { - float viewpos_z = _viewpos_z; - float step_viewpos_z = _step_viewpos_z; + float viewpos_z = args.dc_viewpos.Z; + float step_viewpos_z = args.dc_viewpos_step.Z; - viewpos_z += step_viewpos_z * thread->skipped_by_thread(_dest_y); + viewpos_z += step_viewpos_z * thread->skipped_by_thread(args.DestY()); step_viewpos_z *= thread->num_cores; do @@ -221,25 +206,25 @@ namespace swrenderer void DrawWallMasked1PalCommand::Execute(DrawerThread *thread) { - uint32_t fracstep = _iscale; - uint32_t frac = _texturefrac; - uint8_t *colormap = _colormap; - int count = _count; - const uint8_t *source = _source; - uint8_t *dest = _dest; - int bits = _fracbits; - int pitch = _pitch; - DrawerLight *dynlights = _dynlights; - int num_dynlights = _num_dynlights; - float viewpos_z = _viewpos_z; - float step_viewpos_z = _step_viewpos_z; + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint8_t *colormap = args.Colormap(); + int count = args.Count(); + const uint8_t *source = args.TexturePixels(); + uint8_t *dest = args.Dest(); + int bits = args.TextureFracBits(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + DrawerLight *dynlights = args.dc_lights; + int num_dynlights = args.dc_num_lights; + float viewpos_z = args.dc_viewpos.Z; + float step_viewpos_z = args.dc_viewpos_step.Z; - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; @@ -258,10 +243,10 @@ namespace swrenderer } else { - float viewpos_z = _viewpos_z; - float step_viewpos_z = _step_viewpos_z; + float viewpos_z = args.dc_viewpos.Z; + float step_viewpos_z = args.dc_viewpos_step.Z; - viewpos_z += step_viewpos_z * thread->skipped_by_thread(_dest_y); + viewpos_z += step_viewpos_z * thread->skipped_by_thread(args.DestY()); step_viewpos_z *= thread->num_cores; do @@ -280,24 +265,24 @@ namespace swrenderer void DrawWallAdd1PalCommand::Execute(DrawerThread *thread) { - uint32_t fracstep = _iscale; - uint32_t frac = _texturefrac; - uint8_t *colormap = _colormap; - int count = _count; - const uint8_t *source = _source; - uint8_t *dest = _dest; - int bits = _fracbits; - int pitch = _pitch; + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint8_t *colormap = args.Colormap(); + int count = args.Count(); + const uint8_t *source = args.TexturePixels(); + uint8_t *dest = args.Dest(); + int bits = args.TextureFracBits(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + uint32_t *fg2rgb = args.SrcBlend(); + uint32_t *bg2rgb = args.DestBlend(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; @@ -342,31 +327,31 @@ namespace swrenderer void DrawWallAddClamp1PalCommand::Execute(DrawerThread *thread) { - uint32_t fracstep = _iscale; - uint32_t frac = _texturefrac; - uint8_t *colormap = _colormap; - int count = _count; - const uint8_t *source = _source; - uint8_t *dest = _dest; - int bits = _fracbits; - int pitch = _pitch; - DrawerLight *dynlights = _dynlights; - int num_dynlights = _num_dynlights; - float viewpos_z = _viewpos_z; - float step_viewpos_z = _step_viewpos_z; + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint8_t *colormap = args.Colormap(); + int count = args.Count(); + const uint8_t *source = args.TexturePixels(); + uint8_t *dest = args.Dest(); + int bits = args.TextureFracBits(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + DrawerLight *dynlights = args.dc_lights; + int num_dynlights = args.dc_num_lights; + float viewpos_z = args.dc_viewpos.Z; + float step_viewpos_z = args.dc_viewpos_step.Z; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + uint32_t *fg2rgb = args.SrcBlend(); + uint32_t *bg2rgb = args.DestBlend(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; - viewpos_z += step_viewpos_z * thread->skipped_by_thread(_dest_y); + viewpos_z += step_viewpos_z * thread->skipped_by_thread(args.DestY()); step_viewpos_z *= thread->num_cores; if (!r_blendmethod) @@ -416,31 +401,31 @@ namespace swrenderer void DrawWallSubClamp1PalCommand::Execute(DrawerThread *thread) { - uint32_t fracstep = _iscale; - uint32_t frac = _texturefrac; - uint8_t *colormap = _colormap; - int count = _count; - const uint8_t *source = _source; - uint8_t *dest = _dest; - int bits = _fracbits; - int pitch = _pitch; - DrawerLight *dynlights = _dynlights; - int num_dynlights = _num_dynlights; - float viewpos_z = _viewpos_z; - float step_viewpos_z = _step_viewpos_z; + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint8_t *colormap = args.Colormap(); + int count = args.Count(); + const uint8_t *source = args.TexturePixels(); + uint8_t *dest = args.Dest(); + int bits = args.TextureFracBits(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + DrawerLight *dynlights = args.dc_lights; + int num_dynlights = args.dc_num_lights; + float viewpos_z = args.dc_viewpos.Z; + float step_viewpos_z = args.dc_viewpos_step.Z; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + uint32_t *fg2rgb = args.SrcBlend(); + uint32_t *bg2rgb = args.DestBlend(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; - viewpos_z += step_viewpos_z * thread->skipped_by_thread(_dest_y); + viewpos_z += step_viewpos_z * thread->skipped_by_thread(args.DestY()); step_viewpos_z *= thread->num_cores; if (!r_blendmethod) @@ -489,31 +474,31 @@ namespace swrenderer void DrawWallRevSubClamp1PalCommand::Execute(DrawerThread *thread) { - uint32_t fracstep = _iscale; - uint32_t frac = _texturefrac; - uint8_t *colormap = _colormap; - int count = _count; - const uint8_t *source = _source; - uint8_t *dest = _dest; - int bits = _fracbits; - int pitch = _pitch; - DrawerLight *dynlights = _dynlights; - int num_dynlights = _num_dynlights; - float viewpos_z = _viewpos_z; - float step_viewpos_z = _step_viewpos_z; + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint8_t *colormap = args.Colormap(); + int count = args.Count(); + const uint8_t *source = args.TexturePixels(); + uint8_t *dest = args.Dest(); + int bits = args.TextureFracBits(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + DrawerLight *dynlights = args.dc_lights; + int num_dynlights = args.dc_num_lights; + float viewpos_z = args.dc_viewpos.Z; + float step_viewpos_z = args.dc_viewpos_step.Z; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + uint32_t *fg2rgb = args.SrcBlend(); + uint32_t *bg2rgb = args.DestBlend(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; - viewpos_z += step_viewpos_z * thread->skipped_by_thread(_dest_y); + viewpos_z += step_viewpos_z * thread->skipped_by_thread(args.DestY()); step_viewpos_z *= thread->num_cores; if (!r_blendmethod) @@ -567,7 +552,6 @@ namespace swrenderer _dest = args.Dest(); _dest_y = args.DestY(); _count = args.Count(); - _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); _source = args.FrontTexturePixels(); _source2 = args.BackTexturePixels(); _sourceheight[0] = args.FrontTextureHeight(); @@ -583,7 +567,7 @@ namespace swrenderer { uint8_t *dest = _dest; int count = _count; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); const uint8_t *source0 = _source; int textureheight0 = _sourceheight[0]; @@ -714,7 +698,7 @@ namespace swrenderer { uint8_t *dest = _dest; int count = _count; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); const uint8_t *source0 = _source; const uint8_t *source1 = _source2; int textureheight0 = _sourceheight[0]; @@ -873,7 +857,6 @@ namespace swrenderer _count = args.Count(); _dest = args.Dest(); _dest_y = args.DestY(); - _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); _iscale = args.TextureVStep(); _texturefrac = args.TextureVPos(); _colormap = args.Colormap(); @@ -926,7 +909,7 @@ namespace swrenderer if (count <= 0) return; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); dest = thread->dest_for_thread(_dest_y, pitch, dest); frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; @@ -986,7 +969,7 @@ namespace swrenderer if (count <= 0) return; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); dest = thread->dest_for_thread(_dest_y, pitch, dest); pitch *= thread->num_cores; @@ -1010,7 +993,7 @@ namespace swrenderer bg2rgb = _destblend; fg = _srccolor; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); count = thread->count_for_thread(_dest_y, count); if (count <= 0) @@ -1060,7 +1043,7 @@ namespace swrenderer bg2rgb = _destblend; fg = _srccolor; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); count = thread->count_for_thread(_dest_y, count); if (count <= 0) @@ -1114,7 +1097,7 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; uint32_t fg = _srccolor; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); count = thread->count_for_thread(_dest_y, count); if (count <= 0) @@ -1171,7 +1154,7 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; uint32_t fg = _srccolor; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); count = thread->count_for_thread(_dest_y, count); if (count <= 0) @@ -1232,7 +1215,7 @@ namespace swrenderer if (count <= 0) return; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); dest = thread->dest_for_thread(_dest_y, pitch, dest); frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; @@ -1293,7 +1276,7 @@ namespace swrenderer if (count <= 0) return; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); dest = thread->dest_for_thread(_dest_y, pitch, dest); frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; @@ -1330,7 +1313,7 @@ namespace swrenderer if (count <= 0) return; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); dest = thread->dest_for_thread(_dest_y, pitch, dest); frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; @@ -1391,7 +1374,7 @@ namespace swrenderer if (count <= 0) return; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); dest = thread->dest_for_thread(_dest_y, pitch, dest); frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; @@ -1449,7 +1432,7 @@ namespace swrenderer if (count <= 0) return; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); dest = thread->dest_for_thread(_dest_y, pitch, dest); frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; @@ -1511,7 +1494,7 @@ namespace swrenderer if (count <= 0) return; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); dest = thread->dest_for_thread(_dest_y, pitch, dest); frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; @@ -1574,7 +1557,7 @@ namespace swrenderer if (count <= 0) return; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); dest = thread->dest_for_thread(_dest_y, pitch, dest); frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; @@ -1635,7 +1618,7 @@ namespace swrenderer if (count <= 0) return; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); dest = thread->dest_for_thread(_dest_y, pitch, dest); frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; @@ -1697,7 +1680,7 @@ namespace swrenderer if (count <= 0) return; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); dest = thread->dest_for_thread(_dest_y, pitch, dest); frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; @@ -1758,7 +1741,7 @@ namespace swrenderer if (count <= 0) return; - int pitch = _pitch; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); dest = thread->dest_for_thread(_dest_y, pitch, dest); frac += fracstep * thread->skipped_by_thread(_dest_y); fracstep *= thread->num_cores; @@ -1811,7 +1794,6 @@ namespace swrenderer _yh = args.FuzzY2(); _x = args.FuzzX(); _destorg = RenderViewport::Instance()->GetDest(0, 0); - _pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); _fuzzpos = fuzzpos; _fuzzviewheight = fuzzviewheight; } @@ -1829,9 +1811,10 @@ namespace swrenderer uint8_t *map = &NormalLight.Maps[6 * 256]; - uint8_t *dest = thread->dest_for_thread(yl, _pitch, yl * _pitch + _x + _destorg); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint8_t *dest = thread->dest_for_thread(yl, pitch, yl * pitch + _x + _destorg); - int pitch = _pitch * thread->num_cores; + pitch = pitch * thread->num_cores; int fuzzstep = thread->num_cores; int fuzz = (_fuzzpos + thread->skipped_by_thread(yl)) % FUZZTABLE; @@ -1841,7 +1824,7 @@ namespace swrenderer if (yl < fuzzstep) { uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep + pitch; - //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (pitch)) < viewheight); *dest = map[*srcdest]; dest += pitch; @@ -1870,7 +1853,7 @@ namespace swrenderer do { uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep; - //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (pitch)) < viewheight); *dest = map[*srcdest]; dest += pitch; @@ -1884,7 +1867,7 @@ namespace swrenderer if (lowerbounds) { uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep - pitch; - //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (pitch)) < viewheight); *dest = map[*srcdest]; } @@ -2901,8 +2884,9 @@ namespace swrenderer if (count <= 0) return; - uint8_t *dest = thread->dest_for_thread(_dest_y, _pitch, _dest); - int pitch = _pitch * thread->num_cores; + int pitch = _pitch; + uint8_t *dest = thread->dest_for_thread(_dest_y, pitch, _dest); + pitch = pitch * thread->num_cores; const uint32_t *source = &particle_texture[(_fracposx >> FRACBITS) * PARTICLE_TEXTURE_SIZE]; uint32_t particle_alpha = _alpha; diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index fabf26afa2..4acbc8dbe9 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -20,21 +20,7 @@ namespace swrenderer protected: inline static uint8_t AddLights(const DrawerLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material); - uint32_t _iscale; - uint32_t _texturefrac; - uint8_t *_colormap; - int _count; - const uint8_t *_source; - uint8_t *_dest; - int _dest_y; - int _fracbits; - int _pitch; - uint32_t *_srcblend; - uint32_t *_destblend; - DrawerLight *_dynlights; - int _num_dynlights; - float _viewpos_z; - float _step_viewpos_z; + WallDrawerArgs args; }; class DrawWall1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void Execute(DrawerThread *thread) override; }; @@ -58,7 +44,6 @@ namespace swrenderer uint8_t *_dest; int _dest_y; int _count; - int _pitch; const uint8_t *_source; const uint8_t *_source2; int _sourceheight[2]; @@ -81,7 +66,6 @@ namespace swrenderer int _count; uint8_t *_dest; int _dest_y; - int _pitch; fixed_t _iscale; fixed_t _texturefrac; const uint8_t *_colormap; @@ -126,7 +110,6 @@ namespace swrenderer int _yh; int _x; uint8_t *_destorg; - int _pitch; int _fuzzpos; int _fuzzviewheight; }; From 218708571e053cd98931c20738a58b385f96d7d0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Feb 2017 04:03:27 +0100 Subject: [PATCH 890/912] Don't copy the SkyDrawerArgs members now that drawer args are grouped by drawer family --- src/swrenderer/drawers/r_draw_pal.cpp | 64 ++++++++++++--------------- src/swrenderer/drawers/r_draw_pal.h | 13 +----- 2 files changed, 30 insertions(+), 47 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 18a87b5e66..369876a98b 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -547,32 +547,20 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - PalSkyCommand::PalSkyCommand(const SkyDrawerArgs &args) + PalSkyCommand::PalSkyCommand(const SkyDrawerArgs &args) : args(args) { - _dest = args.Dest(); - _dest_y = args.DestY(); - _count = args.Count(); - _source = args.FrontTexturePixels(); - _source2 = args.BackTexturePixels(); - _sourceheight[0] = args.FrontTextureHeight(); - _sourceheight[1] = args.BackTextureHeight(); - _iscale = args.TextureVStep(); - _texturefrac = args.TextureVPos(); - solid_top = args.SolidTopColor(); - solid_bottom = args.SolidBottomColor(); - fadeSky = args.FadeSky(); } void DrawSingleSky1PalCommand::Execute(DrawerThread *thread) { - uint8_t *dest = _dest; - int count = _count; + uint8_t *dest = args.Dest(); + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - const uint8_t *source0 = _source; - int textureheight0 = _sourceheight[0]; + const uint8_t *source0 = args.FrontTexturePixels(); + int textureheight0 = args.FrontTextureHeight(); - int32_t frac = _texturefrac; - int32_t fracstep = _iscale; + int32_t frac = args.TextureVPos(); + int32_t fracstep = args.TextureVStep(); // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: int start_fade = 2; // How fast it should fade out @@ -587,15 +575,15 @@ namespace swrenderer end_fadebottom_y = clamp(end_fadebottom_y, 0, count); int num_cores = thread->num_cores; - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); + int skipped = thread->skipped_by_thread(args.DestY()); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); frac += fracstep * skipped; fracstep *= num_cores; pitch *= num_cores; - if (!fadeSky) + if (!args.FadeSky()) { - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); for (int index = 0; index < count; index++) { @@ -608,6 +596,9 @@ namespace swrenderer return; } + uint32_t solid_top = args.SolidTopColor(); + uint32_t solid_bottom = args.SolidBottomColor(); + int solid_top_r = RPART(solid_top); int solid_top_g = GPART(solid_top); int solid_top_b = BPART(solid_top); @@ -696,16 +687,16 @@ namespace swrenderer void DrawDoubleSky1PalCommand::Execute(DrawerThread *thread) { - uint8_t *dest = _dest; - int count = _count; + uint8_t *dest = args.Dest(); + int count = args.Count(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - const uint8_t *source0 = _source; - const uint8_t *source1 = _source2; - int textureheight0 = _sourceheight[0]; - uint32_t maxtextureheight1 = _sourceheight[1] - 1; + const uint8_t *source0 = args.FrontTexturePixels(); + const uint8_t *source1 = args.BackTexturePixels(); + int textureheight0 = args.FrontTextureHeight(); + uint32_t maxtextureheight1 = args.BackTextureHeight() - 1; - int32_t frac = _texturefrac; - int32_t fracstep = _iscale; + int32_t frac = args.TextureVPos(); + int32_t fracstep = args.TextureVStep(); // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: int start_fade = 2; // How fast it should fade out @@ -720,15 +711,15 @@ namespace swrenderer end_fadebottom_y = clamp(end_fadebottom_y, 0, count); int num_cores = thread->num_cores; - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); + int skipped = thread->skipped_by_thread(args.DestY()); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); frac += fracstep * skipped; fracstep *= num_cores; pitch *= num_cores; - if (!fadeSky) + if (!args.FadeSky()) { - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); for (int index = 0; index < count; index++) { @@ -748,6 +739,9 @@ namespace swrenderer return; } + uint32_t solid_top = args.SolidTopColor(); + uint32_t solid_bottom = args.SolidBottomColor(); + int solid_top_r = RPART(solid_top); int solid_top_g = GPART(solid_top); int solid_top_b = BPART(solid_top); diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index 4acbc8dbe9..ed5bbff90c 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -37,18 +37,7 @@ namespace swrenderer FString DebugInfo() override { return "PalSkyCommand"; } protected: - uint32_t solid_top; - uint32_t solid_bottom; - bool fadeSky; - - uint8_t *_dest; - int _dest_y; - int _count; - const uint8_t *_source; - const uint8_t *_source2; - int _sourceheight[2]; - uint32_t _iscale; - uint32_t _texturefrac; + SkyDrawerArgs args; }; class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; From 616e800635fcecf40e622db18d2e70cc5bfb3f04 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Feb 2017 04:32:40 +0100 Subject: [PATCH 891/912] Don't copy the SpriteDrawerArgs members now that drawer args are grouped by drawer family --- src/swrenderer/drawers/r_draw_pal.cpp | 461 +++++++++++++------------- src/swrenderer/drawers/r_draw_pal.h | 17 +- 2 files changed, 239 insertions(+), 239 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index 369876a98b..0e08427cfd 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -846,24 +846,8 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - PalColumnCommand::PalColumnCommand(const SpriteDrawerArgs &args) + PalColumnCommand::PalColumnCommand(const SpriteDrawerArgs &args) : args(args) { - _count = args.Count(); - _dest = args.Dest(); - _dest_y = args.DestY(); - _iscale = args.TextureVStep(); - _texturefrac = args.TextureVPos(); - _colormap = args.Colormap(); - _source = args.TexturePixels(); - _translation = args.TranslationMap(); - _color = args.SolidColor(); - _srcblend = args.SrcBlend(); - _destblend = args.DestBlend(); - _srccolor = args.SrcColorIndex(); - _srcalpha = args.SrcAlpha(); - _destalpha = args.DestAlpha(); - _dynlight = args.DynamicLight(); - _light = args.Light(); } uint8_t PalColumnCommand::AddLights(uint8_t fg, uint8_t material, uint32_t lit_r, uint32_t lit_g, uint32_t lit_b) @@ -889,32 +873,33 @@ namespace swrenderer fixed_t frac; fixed_t fracstep; - count = _count; + count = args.Count(); // Framebuffer destination address. - dest = _dest; + dest = args.Dest(); // Determine scaling, // which is the only mapping to be done. - fracstep = _iscale; - frac = _texturefrac; + fracstep = args.TextureVStep(); + frac = args.TextureVPos(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; // [RH] Get local copies of these variables so that the compiler // has a better chance of optimizing this well. - const uint8_t *colormap = _colormap; - const uint8_t *source = _source; + const uint8_t *colormap = args.Colormap(); + const uint8_t *source = args.TexturePixels(); - if (_dynlight == 0) + uint32_t dynlight = args.DynamicLight(); + if (dynlight == 0) { do { @@ -927,10 +912,10 @@ namespace swrenderer } else { - uint32_t lit_r = RPART(_dynlight); - uint32_t lit_g = GPART(_dynlight); - uint32_t lit_b = BPART(_dynlight); - uint32_t light = 256 - (_light >> (FRACBITS - 8)); + uint32_t lit_r = RPART(dynlight); + uint32_t lit_g = GPART(dynlight); + uint32_t lit_b = BPART(dynlight); + uint32_t light = 256 - (args.Light() >> (FRACBITS - 8)); lit_r = MIN(light + lit_r, 256); lit_g = MIN(light + lit_g, 256); lit_b = MIN(light + lit_b, 256); @@ -956,18 +941,18 @@ namespace swrenderer int count; uint8_t *dest; - count = _count; - dest = _dest; + count = args.Count(); + dest = args.Dest(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - dest = thread->dest_for_thread(_dest_y, pitch, dest); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); pitch *= thread->num_cores; - uint8_t color = _color; + uint8_t color = args.SolidColor(); do { *dest = color; @@ -980,20 +965,20 @@ namespace swrenderer int count; uint8_t *dest; - count = _count; - dest = _dest; + count = args.Count(); + dest = args.Dest(); uint32_t *bg2rgb; uint32_t fg; - bg2rgb = _destblend; - fg = _srccolor; + bg2rgb = args.DestBlend(); + fg = args.SrcColorIndex(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; - dest = thread->dest_for_thread(_dest_y, pitch, dest); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); pitch *= thread->num_cores; const PalEntry* pal = GPalette.BaseColors; @@ -1010,14 +995,17 @@ namespace swrenderer } else { - int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; - int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; - int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + uint32_t srccolor = args.SrcColorIndex(); + fixed_t srcalpha = args.SrcAlpha(); + fixed_t destalpha = args.DestAlpha(); + int src_r = ((srccolor >> 16) & 0xff) * srcalpha; + int src_g = ((srccolor >> 0) & 0xff) * srcalpha; + int src_b = ((srccolor >> 8) & 0xff) * srcalpha; do { - int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); - int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); - int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); + int r = clamp((src_r + pal[*dest].r * destalpha)>>18, 0, 255); + int g = clamp((src_g + pal[*dest].g * destalpha)>>18, 0, 255); + int b = clamp((src_b + pal[*dest].b * destalpha)>>18, 0, 255); *dest = RGB256k.RGB[r][g][b]; dest += pitch; } while (--count); @@ -1029,21 +1017,21 @@ namespace swrenderer int count; uint8_t *dest; - count = _count; + count = args.Count(); - dest = _dest; + dest = args.Dest(); uint32_t *bg2rgb; uint32_t fg; - bg2rgb = _destblend; - fg = _srccolor; + bg2rgb = args.DestBlend(); + fg = args.SrcColorIndex(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; - dest = thread->dest_for_thread(_dest_y, pitch, dest); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); pitch *= thread->num_cores; const PalEntry* pal = GPalette.BaseColors; @@ -1066,14 +1054,17 @@ namespace swrenderer } else { - int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; - int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; - int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + uint32_t srccolor = args.SrcColorIndex(); + fixed_t srcalpha = args.SrcAlpha(); + fixed_t destalpha = args.DestAlpha(); + int src_r = ((srccolor >> 16) & 0xff) * srcalpha; + int src_g = ((srccolor >> 0) & 0xff) * srcalpha; + int src_b = ((srccolor >> 8) & 0xff) * srcalpha; do { - int r = clamp((src_r + pal[*dest].r * _destalpha)>>18, 0, 255); - int g = clamp((src_g + pal[*dest].g * _destalpha)>>18, 0, 255); - int b = clamp((src_b + pal[*dest].b * _destalpha)>>18, 0, 255); + int r = clamp((src_r + pal[*dest].r * destalpha)>>18, 0, 255); + int g = clamp((src_g + pal[*dest].g * destalpha)>>18, 0, 255); + int b = clamp((src_b + pal[*dest].b * destalpha)>>18, 0, 255); *dest = RGB256k.RGB[r][g][b]; dest += pitch; } while (--count); @@ -1085,19 +1076,19 @@ namespace swrenderer int count; uint8_t *dest; - count = _count; + count = args.Count(); - dest = _dest; - uint32_t *bg2rgb = _destblend; - uint32_t fg = _srccolor; + dest = args.Dest(); + uint32_t *bg2rgb = args.DestBlend(); + uint32_t fg = args.SrcColorIndex(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; - dest = thread->dest_for_thread(_dest_y, pitch, dest); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); pitch *= thread->num_cores; const PalEntry* palette = GPalette.BaseColors; @@ -1119,15 +1110,18 @@ namespace swrenderer } else { + uint32_t srccolor = args.SrcColorIndex(); + fixed_t srcalpha = args.SrcAlpha(); + fixed_t destalpha = args.DestAlpha(); do { - int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; - int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; - int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int src_r = ((srccolor >> 16) & 0xff) * srcalpha; + int src_g = ((srccolor >> 0) & 0xff) * srcalpha; + int src_b = ((srccolor >> 8) & 0xff) * srcalpha; int bg = *dest; - int r = MAX((-src_r + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((-src_g + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((-src_b + palette[bg].b * _destalpha)>>18, 0); + int r = MAX((-src_r + palette[bg].r * destalpha)>>18, 0); + int g = MAX((-src_g + palette[bg].g * destalpha)>>18, 0); + int b = MAX((-src_b + palette[bg].b * destalpha)>>18, 0); *dest = RGB256k.RGB[r][g][b]; dest += pitch; @@ -1140,21 +1134,21 @@ namespace swrenderer int count; uint8_t *dest; - count = _count; + count = args.Count(); if (count <= 0) return; - dest = _dest; - uint32_t *bg2rgb = _destblend; - uint32_t fg = _srccolor; + dest = args.Dest(); + uint32_t *bg2rgb = args.DestBlend(); + uint32_t fg = args.SrcColorIndex(); int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; - dest = thread->dest_for_thread(_dest_y, pitch, dest); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); pitch *= thread->num_cores; const PalEntry *palette = GPalette.BaseColors; @@ -1176,15 +1170,18 @@ namespace swrenderer } else { + uint32_t srccolor = args.SrcColorIndex(); + fixed_t srcalpha = args.SrcAlpha(); + fixed_t destalpha = args.DestAlpha(); do { - int src_r = ((_srccolor >> 16) & 0xff) * _srcalpha; - int src_g = ((_srccolor >> 0) & 0xff) * _srcalpha; - int src_b = ((_srccolor >> 8) & 0xff) * _srcalpha; + int src_r = ((srccolor >> 16) & 0xff) * srcalpha; + int src_g = ((srccolor >> 0) & 0xff) * srcalpha; + int src_b = ((srccolor >> 8) & 0xff) * srcalpha; int bg = *dest; - int r = MAX((src_r - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((src_g - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((src_b - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((src_r - palette[bg].r * destalpha)>>18, 0); + int g = MAX((src_g - palette[bg].g * destalpha)>>18, 0); + int b = MAX((src_b - palette[bg].b * destalpha)>>18, 0); *dest = RGB256k.RGB[r][g][b]; dest += pitch; @@ -1199,26 +1196,26 @@ namespace swrenderer fixed_t frac; fixed_t fracstep; - count = _count; - dest = _dest; + count = args.Count(); + dest = args.Dest(); - fracstep = _iscale; - frac = _texturefrac; + fracstep = args.TextureVStep(); + frac = args.TextureVPos(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; - const uint8_t *colormap = _colormap; - const uint8_t *source = _source; + uint32_t *fg2rgb = args.SrcBlend(); + uint32_t *bg2rgb = args.DestBlend(); + const uint8_t *colormap = args.Colormap(); + const uint8_t *source = args.TexturePixels(); const PalEntry *palette = GPalette.BaseColors; if (!r_blendmethod) @@ -1238,13 +1235,16 @@ namespace swrenderer } else { + uint32_t srccolor = args.SrcColorIndex(); + fixed_t srcalpha = args.SrcAlpha(); + fixed_t destalpha = args.DestAlpha(); do { uint32_t fg = colormap[source[frac >> FRACBITS]]; uint32_t bg = *dest; - uint32_t r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - uint32_t g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - uint32_t b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + uint32_t r = MIN((palette[fg].r * srcalpha + palette[bg].r * destalpha)>>18, 63); + uint32_t g = MIN((palette[fg].g * srcalpha + palette[bg].g * destalpha)>>18, 63); + uint32_t b = MIN((palette[fg].b * srcalpha + palette[bg].b * destalpha)>>18, 63); *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; @@ -1259,27 +1259,27 @@ namespace swrenderer fixed_t frac; fixed_t fracstep; - count = _count; + count = args.Count(); - dest = _dest; + dest = args.Dest(); - fracstep = _iscale; - frac = _texturefrac; + fracstep = args.TextureVStep(); + frac = args.TextureVPos(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; // [RH] Local copies of global vars to improve compiler optimizations - const uint8_t *colormap = _colormap; - const uint8_t *translation = _translation; - const uint8_t *source = _source; + const uint8_t *colormap = args.Colormap(); + const uint8_t *translation = args.TranslationMap(); + const uint8_t *source = args.TexturePixels(); do { @@ -1297,27 +1297,27 @@ namespace swrenderer fixed_t frac; fixed_t fracstep; - count = _count; - dest = _dest; + count = args.Count(); + dest = args.Dest(); - fracstep = _iscale; - frac = _texturefrac; + fracstep = args.TextureVStep(); + frac = args.TextureVPos(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; - const uint8_t *translation = _translation; - const uint8_t *colormap = _colormap; - const uint8_t *source = _source; + uint32_t *fg2rgb = args.SrcBlend(); + uint32_t *bg2rgb = args.DestBlend(); + const uint8_t *translation = args.TranslationMap(); + const uint8_t *colormap = args.Colormap(); + const uint8_t *source = args.TexturePixels(); const PalEntry *palette = GPalette.BaseColors; @@ -1338,13 +1338,15 @@ namespace swrenderer } else { + fixed_t srcalpha = args.SrcAlpha(); + fixed_t destalpha = args.DestAlpha(); do { uint32_t fg = colormap[translation[source[frac >> FRACBITS]]]; uint32_t bg = *dest; - uint32_t r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - uint32_t g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - uint32_t b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + uint32_t r = MIN((palette[fg].r * srcalpha + palette[bg].r * destalpha)>>18, 63); + uint32_t g = MIN((palette[fg].g * srcalpha + palette[bg].g * destalpha)>>18, 63); + uint32_t b = MIN((palette[fg].b * srcalpha + palette[bg].b * destalpha)>>18, 63); *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; @@ -1358,25 +1360,25 @@ namespace swrenderer uint8_t *dest; fixed_t frac, fracstep; - count = _count; - dest = _dest; + count = args.Count(); + dest = args.Dest(); - fracstep = _iscale; - frac = _texturefrac; + fracstep = args.TextureVStep(); + frac = args.TextureVPos(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; - const uint8_t *source = _source; - const uint8_t *colormap = _colormap; - uint32_t *fgstart = &Col2RGB8[0][_color]; + const uint8_t *source = args.TexturePixels(); + const uint8_t *colormap = args.Colormap(); + uint32_t *fgstart = &Col2RGB8[0][args.SolidColor()]; const PalEntry *palette = GPalette.BaseColors; if (!r_blendmethod) @@ -1394,13 +1396,14 @@ namespace swrenderer } else { + int color = args.SolidColor(); do { uint32_t val = source[frac >> FRACBITS]; - int r = (palette[*dest].r * (255-val) + palette[_color].r * val) >> 10; - int g = (palette[*dest].g * (255-val) + palette[_color].g * val) >> 10; - int b = (palette[*dest].b * (255-val) + palette[_color].b * val) >> 10; + int r = (palette[*dest].r * (255-val) + palette[color].r * val) >> 10; + int g = (palette[*dest].g * (255-val) + palette[color].g * val) >> 10; + int b = (palette[*dest].b * (255-val) + palette[color].b * val) >> 10; *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; dest += pitch; @@ -1416,26 +1419,26 @@ namespace swrenderer fixed_t frac; fixed_t fracstep; - count = _count; - dest = _dest; + count = args.Count(); + dest = args.Dest(); - fracstep = _iscale; - frac = _texturefrac; + fracstep = args.TextureVStep(); + frac = args.TextureVPos(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; - const uint8_t *colormap = _colormap; - const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const uint8_t *colormap = args.Colormap(); + const uint8_t *source = args.TexturePixels(); + uint32_t *fg2rgb = args.SrcBlend(); + uint32_t *bg2rgb = args.DestBlend(); const PalEntry *palette = GPalette.BaseColors; if (!r_blendmethod) @@ -1457,13 +1460,15 @@ namespace swrenderer } else { + fixed_t srcalpha = args.SrcAlpha(); + fixed_t destalpha = args.DestAlpha(); do { int fg = colormap[source[frac >> FRACBITS]]; int bg = *dest; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + int r = MIN((palette[fg].r * srcalpha + palette[bg].r * destalpha)>>18, 63); + int g = MIN((palette[fg].g * srcalpha + palette[bg].g * destalpha)>>18, 63); + int b = MIN((palette[fg].b * srcalpha + palette[bg].b * destalpha)>>18, 63); *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; @@ -1478,27 +1483,27 @@ namespace swrenderer fixed_t frac; fixed_t fracstep; - count = _count; - dest = _dest; + count = args.Count(); + dest = args.Dest(); - fracstep = _iscale; - frac = _texturefrac; + fracstep = args.TextureVStep(); + frac = args.TextureVPos(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; - const uint8_t *translation = _translation; - const uint8_t *colormap = _colormap; - const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const uint8_t *translation = args.TranslationMap(); + const uint8_t *colormap = args.Colormap(); + const uint8_t *source = args.TexturePixels(); + uint32_t *fg2rgb = args.SrcBlend(); + uint32_t *bg2rgb = args.DestBlend(); const PalEntry *palette = GPalette.BaseColors; if (!r_blendmethod) @@ -1520,13 +1525,15 @@ namespace swrenderer } else { + fixed_t srcalpha = args.SrcAlpha(); + fixed_t destalpha = args.DestAlpha(); do { int fg = colormap[translation[source[frac >> FRACBITS]]]; int bg = *dest; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); + int r = MIN((palette[fg].r * srcalpha + palette[bg].r * destalpha)>>18, 63); + int g = MIN((palette[fg].g * srcalpha + palette[bg].g * destalpha)>>18, 63); + int b = MIN((palette[fg].b * srcalpha + palette[bg].b * destalpha)>>18, 63); *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; @@ -1541,26 +1548,26 @@ namespace swrenderer fixed_t frac; fixed_t fracstep; - count = _count; - dest = _dest; + count = args.Count(); + dest = args.Dest(); - fracstep = _iscale; - frac = _texturefrac; + fracstep = args.TextureVStep(); + frac = args.TextureVPos(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; - const uint8_t *colormap = _colormap; - const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const uint8_t *colormap = args.Colormap(); + const uint8_t *source = args.TexturePixels(); + uint32_t *fg2rgb = args.SrcBlend(); + uint32_t *bg2rgb = args.DestBlend(); const PalEntry *palette = GPalette.BaseColors; if (!r_blendmethod) @@ -1581,13 +1588,15 @@ namespace swrenderer } else { + fixed_t srcalpha = args.SrcAlpha(); + fixed_t destalpha = args.DestAlpha(); do { int fg = colormap[source[frac >> FRACBITS]]; int bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((palette[fg].r * srcalpha - palette[bg].r * destalpha)>>18, 0); + int g = MAX((palette[fg].g * srcalpha - palette[bg].g * destalpha)>>18, 0); + int b = MAX((palette[fg].b * srcalpha - palette[bg].b * destalpha)>>18, 0); *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; @@ -1602,27 +1611,27 @@ namespace swrenderer fixed_t frac; fixed_t fracstep; - count = _count; - dest = _dest; + count = args.Count(); + dest = args.Dest(); - fracstep = _iscale; - frac = _texturefrac; + fracstep = args.TextureVStep(); + frac = args.TextureVPos(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; - const uint8_t *translation = _translation; - const uint8_t *colormap = _colormap; - const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const uint8_t *translation = args.TranslationMap(); + const uint8_t *colormap = args.Colormap(); + const uint8_t *source = args.TexturePixels(); + uint32_t *fg2rgb = args.SrcBlend(); + uint32_t *bg2rgb = args.DestBlend(); const PalEntry *palette = GPalette.BaseColors; if (!r_blendmethod) @@ -1643,13 +1652,15 @@ namespace swrenderer } else { + fixed_t srcalpha = args.SrcAlpha(); + fixed_t destalpha = args.DestAlpha(); do { int fg = colormap[translation[source[frac >> FRACBITS]]]; int bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); + int r = MAX((palette[fg].r * srcalpha - palette[bg].r * destalpha)>>18, 0); + int g = MAX((palette[fg].g * srcalpha - palette[bg].g * destalpha)>>18, 0); + int b = MAX((palette[fg].b * srcalpha - palette[bg].b * destalpha)>>18, 0); *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; @@ -1664,26 +1675,26 @@ namespace swrenderer fixed_t frac; fixed_t fracstep; - count = _count; - dest = _dest; + count = args.Count(); + dest = args.Dest(); - fracstep = _iscale; - frac = _texturefrac; + fracstep = args.TextureVStep(); + frac = args.TextureVPos(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; - const uint8_t *colormap = _colormap; - const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const uint8_t *colormap = args.Colormap(); + const uint8_t *source = args.TexturePixels(); + uint32_t *fg2rgb = args.SrcBlend(); + uint32_t *bg2rgb = args.DestBlend(); const PalEntry *palette = GPalette.BaseColors; if (!r_blendmethod) @@ -1704,13 +1715,15 @@ namespace swrenderer } else { + fixed_t srcalpha = args.SrcAlpha(); + fixed_t destalpha = args.DestAlpha(); do { int fg = colormap[source[frac >> FRACBITS]]; int bg = *dest; - int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + int r = MAX((-palette[fg].r * srcalpha + palette[bg].r * destalpha)>>18, 0); + int g = MAX((-palette[fg].g * srcalpha + palette[bg].g * destalpha)>>18, 0); + int b = MAX((-palette[fg].b * srcalpha + palette[bg].b * destalpha)>>18, 0); *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; @@ -1725,27 +1738,27 @@ namespace swrenderer fixed_t frac; fixed_t fracstep; - count = _count; - dest = _dest; + count = args.Count(); + dest = args.Dest(); - fracstep = _iscale; - frac = _texturefrac; + fracstep = args.TextureVStep(); + frac = args.TextureVPos(); - count = thread->count_for_thread(_dest_y, count); + count = thread->count_for_thread(args.DestY(), count); if (count <= 0) return; int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - frac += fracstep * thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * thread->skipped_by_thread(args.DestY()); fracstep *= thread->num_cores; pitch *= thread->num_cores; - const uint8_t *translation = _translation; - const uint8_t *colormap = _colormap; - const uint8_t *source = _source; - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; + const uint8_t *translation = args.TranslationMap(); + const uint8_t *colormap = args.Colormap(); + const uint8_t *source = args.TexturePixels(); + uint32_t *fg2rgb = args.SrcBlend(); + uint32_t *bg2rgb = args.DestBlend(); const PalEntry *palette = GPalette.BaseColors; if (!r_blendmethod) @@ -1766,13 +1779,15 @@ namespace swrenderer } else { + fixed_t srcalpha = args.SrcAlpha(); + fixed_t destalpha = args.DestAlpha(); do { int fg = colormap[translation[source[frac >> FRACBITS]]]; int bg = *dest; - int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); + int r = MAX((-palette[fg].r * srcalpha + palette[bg].r * destalpha)>>18, 0); + int g = MAX((-palette[fg].g * srcalpha + palette[bg].g * destalpha)>>18, 0); + int b = MAX((-palette[fg].b * srcalpha + palette[bg].b * destalpha)>>18, 0); *dest = RGB256k.RGB[r][g][b]; dest += pitch; frac += fracstep; diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index ed5bbff90c..b57bfdf30b 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -52,22 +52,7 @@ namespace swrenderer protected: uint8_t AddLights(uint8_t fg, uint8_t material, uint32_t lit_r, uint32_t lit_g, uint32_t lit_b); - int _count; - uint8_t *_dest; - int _dest_y; - fixed_t _iscale; - fixed_t _texturefrac; - const uint8_t *_colormap; - const uint8_t *_source; - const uint8_t *_translation; - int _color; - uint32_t *_srcblend; - uint32_t *_destblend; - uint32_t _srccolor; - fixed_t _srcalpha; - fixed_t _destalpha; - uint32_t _dynlight; - fixed_t _light; + SpriteDrawerArgs args; }; class DrawColumnPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; }; From 2a7bdaad797dd2e2b4213c62c6e795f62e916de5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Feb 2017 13:56:20 +0100 Subject: [PATCH 892/912] Switched to a template version of the span32 drawers --- src/swrenderer/drawers/r_draw_rgba.cpp | 2 +- src/swrenderer/drawers/r_draw_span32_sse2.h | 509 ++++++++++++++++++++ 2 files changed, 510 insertions(+), 1 deletion(-) create mode 100644 src/swrenderer/drawers/r_draw_span32_sse2.h diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 36f85f6e49..8d7a807f1f 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -41,7 +41,7 @@ #include "swrenderer/scene/r_light.h" #include "r_draw_wall32.h" #include "r_draw_sprite32.h" -#include "r_draw_span32.h" +#include "r_draw_span32_sse2.h" #include "r_draw_sky32.h" #include "gi.h" diff --git a/src/swrenderer/drawers/r_draw_span32_sse2.h b/src/swrenderer/drawers/r_draw_span32_sse2.h new file mode 100644 index 0000000000..35fda5f922 --- /dev/null +++ b/src/swrenderer/drawers/r_draw_span32_sse2.h @@ -0,0 +1,509 @@ +/* +** Drawer commands for spans +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_spandrawer.h" + +namespace swrenderer +{ + namespace DrawSpan32TModes + { + enum class SpanBlendModes { Opaque, Masked, Translucent, AddClamp, SubClamp, RevSubClamp }; + struct OpaqueSpan { static const int Mode = (int)SpanBlendModes::Opaque; }; + struct MaskedSpan { static const int Mode = (int)SpanBlendModes::Masked; }; + struct TranslucentSpan { static const int Mode = (int)SpanBlendModes::Translucent; }; + struct AddClampSpan { static const int Mode = (int)SpanBlendModes::AddClamp; }; + struct SubClampSpan { static const int Mode = (int)SpanBlendModes::SubClamp; }; + struct RevSubClampSpan { static const int Mode = (int)SpanBlendModes::RevSubClamp; }; + + enum class FilterModes { Nearest, Linear }; + struct NearestFilter { static const int Mode = (int)FilterModes::Nearest; }; + struct LinearFilter { static const int Mode = (int)FilterModes::Linear; }; + + enum class ShadeMode { Simple, Advanced }; + struct SimpleShade { static const int Mode = (int)ShadeMode::Simple; }; + struct AdvancedShade { static const int Mode = (int)ShadeMode::Advanced; }; + + enum class SpanTextureSize { SizeAny, Size64x64 }; + struct TextureSizeAny { static const int Mode = (int)SpanTextureSize::SizeAny; }; + struct TextureSize64x64 { static const int Mode = (int)SpanTextureSize::Size64x64; }; + } + + template + class DrawSpan32T : public DrawerCommand + { + protected: + SpanDrawerArgs args; + + public: + DrawSpan32T(const SpanDrawerArgs &drawerargs) : args(drawerargs) { } + + struct TextureData + { + uint32_t xbits; + uint32_t ybits; + uint32_t xstep; + uint32_t ystep; + uint32_t xfrac; + uint32_t yfrac; + uint32_t yshift; + uint32_t xshift; + uint32_t xmask; + const uint32_t *source; + }; + + void Execute(DrawerThread *thread) override + { + using namespace DrawSpan32TModes; + + if (thread->line_skipped_by_thread(args.DestY())) return; + + TextureData texdata; + texdata.xbits = args.TextureWidthBits(); + texdata.ybits = args.TextureHeightBits(); + texdata.xstep = args.TextureUStep(); + texdata.ystep = args.TextureVStep(); + texdata.xfrac = args.TextureUPos(); + texdata.yfrac = args.TextureVPos(); + texdata.yshift = 32 - texdata.ybits; + texdata.xshift = texdata.yshift - texdata.xbits; + texdata.xmask = ((1 << texdata.xbits) - 1) << texdata.ybits; + + texdata.source = (const uint32_t*)args.TexturePixels(); + + double lod = args.TextureLOD(); + bool mipmapped = args.MipmappedTexture(); + + bool magnifying = lod < 0.0; + if (r_mipmap && mipmapped) + { + int level = (int)lod; + while (level > 0) + { + if (texdata.xbits <= 2 || texdata.ybits <= 2) + break; + + texdata.source += (1 << (texdata.xbits)) * (1 << (texdata.ybits)); + texdata.xbits -= 1; + texdata.ybits -= 1; + level--; + } + } + + bool is_nearest_filter = !((magnifying && r_magfilter) || (!magnifying && r_minfilter)); + bool is_64x64 = texdata.xbits == 6 && texdata.ybits == 6; + + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + if (is_nearest_filter) + { + if (is_64x64) + Loop(thread, texdata, shade_constants); + else + Loop(thread, texdata, shade_constants); + } + else + { + if (is_64x64) + Loop(thread, texdata, shade_constants); + else + Loop(thread, texdata, shade_constants); + } + } + else + { + if (is_nearest_filter) + { + if (is_64x64) + Loop(thread, texdata, shade_constants); + else + Loop(thread, texdata, shade_constants); + } + else + { + if (is_64x64) + Loop(thread, texdata, shade_constants); + else + Loop(thread, texdata, shade_constants); + } + } + } + + template + void Loop(DrawerThread *thread, TextureData texdata, ShadeConstants shade_constants) + { + using namespace DrawSpan32TModes; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i inv_desaturate, shade_fade, shade_light; + int desaturate; + if (ShadeModeT::Mode == (int)ShadeMode::Advanced) + { + inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + desaturate = shade_constants.desaturate; + } + else + { + inv_desaturate = _mm_setzero_si128(); + shade_fade = _mm_setzero_si128(); + shade_fade = _mm_setzero_si128(); + shade_light = _mm_setzero_si128(); + desaturate = 0; + } + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpx = args.dc_viewpos.X; + float stepvpx = args.dc_viewpos_step.X; + __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); + __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); + + int count = args.DestX2() - args.DestX1() + 1; + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); + + if (FilterModeT::Mode == (int)FilterModes::Linear) + { + texdata.xfrac -= 1 << (31 - texdata.xbits); + texdata.yfrac -= 1 << (31 - texdata.ybits); + } + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * 2; + + __m128i bgcolor; + if (BlendT::Mode != (int)SpanBlendModes::Opaque) + { + bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); + } + else + { + bgcolor = _mm_setzero_si128(); + } + + // Sample + unsigned int ifgcolor[2]; + ifgcolor[0] = Sample(texdata.xbits, texdata.ybits, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.yshift, texdata.xshift, texdata.xmask, texdata.source); + texdata.xfrac += texdata.xstep; + texdata.yfrac += texdata.ystep; + + ifgcolor[1] = Sample(texdata.xbits, texdata.ybits, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.yshift, texdata.xshift, texdata.xmask, texdata.source); + texdata.xfrac += texdata.xstep; + texdata.yfrac += texdata.ystep; + + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade, shade_light, lights, num_lights, viewpos_x); + __m128i outcolor = Blend(fgcolor, bgcolor, srcalpha, destalpha, ifgcolor[0], ifgcolor[1]); + + _mm_storel_epi64((__m128i*)(dest + offset), outcolor); + viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index; + + __m128i bgcolor; + if (BlendT::Mode != (int)SpanBlendModes::Opaque) + { + bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + } + else + { + bgcolor = _mm_setzero_si128(); + } + + // Sample + unsigned int ifgcolor[2]; + ifgcolor[0] = Sample(texdata.xbits, texdata.ybits, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.yshift, texdata.xshift, texdata.xmask, texdata.source); + ifgcolor[1] = 0; + + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade, shade_light, lights, num_lights, viewpos_x); + __m128i outcolor = Blend(fgcolor, bgcolor, srcalpha, destalpha, ifgcolor[0], ifgcolor[1]); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + + } + + template + unsigned int Sample(uint32_t xbits, uint32_t ybits, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, uint32_t yshift, uint32_t xshift, uint32_t xmask, const uint32_t *source) + { + using namespace DrawSpan32TModes; + + if (FilterModeT::Mode == (int)FilterModes::Nearest && TextureSizeT::Mode == (int)SpanTextureSize::Size64x64) + { + int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); + return source[sample_index]; + } + else if (FilterModeT::Mode == (int)FilterModes::Nearest) + { + int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + return source[sample_index]; + } + else + { + uint32_t xxbits, yybits; + if (TextureSizeT::Mode == (int)SpanTextureSize::Size64x64) + { + xxbits = 26; + yybits = 26; + } + else + { + xxbits = 32 - xbits; + yybits = 32 - ybits; + } + + uint32_t xxshift = (32 - xxbits); + uint32_t yyshift = (32 - yybits); + uint32_t xxmask = (1 << xxshift) - 1; + uint32_t yymask = (1 << yyshift) - 1; + uint32_t x = xfrac >> xxbits; + uint32_t y = yfrac >> yybits; + + uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; + uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; + uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; + + uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + return (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + } + } + + template + __m128i Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_x) + { + using namespace DrawSpan32TModes; + + __m128i material = fgcolor; + if (ShadeModeT::Mode == (int)ShadeMode::Simple) + { + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + } + else + { + int blue0 = BPART(ifgcolor0); + int green0 = GPART(ifgcolor0); + int red0 = RPART(ifgcolor0); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor1); + int green1 = GPART(ifgcolor1); + int red1 = RPART(ifgcolor1); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + } + + return AddLights(material, fgcolor, lights, num_lights, viewpos_x); + } + + __m128i AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_x) + { + using namespace DrawSpan32TModes; + + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z + __m128 Lx = _mm_sub_ps(light_x, viewpos_x); + __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0, 0, 0, 0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1, 1, 1, 1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1, 0, 1, 0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + return fgcolor; + } + + __m128i Blend(__m128i fgcolor, __m128i bgcolor, uint32_t srcalpha, uint32_t destalpha, unsigned int ifgcolor0, unsigned int ifgcolor1) + { + using namespace DrawSpan32TModes; + + if (BlendT::Mode == (int)SpanBlendModes::Opaque) + { + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + return outcolor; + } + else if (BlendT::Mode == (int)SpanBlendModes::Masked) + { + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3, 3, 3, 3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3, 3, 3, 3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else if (BlendT::Mode == (int)SpanBlendModes::Translucent) + { + __m128i fgalpha = _mm_set1_epi16(srcalpha); + __m128i bgalpha = _mm_set1_epi16(destalpha); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); + __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else + { + uint32_t alpha0 = APART(ifgcolor0); + uint32_t alpha1 = APART(ifgcolor1); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo, out_hi; + if (BlendT::Mode == (int)SpanBlendModes::AddClamp) + { + out_lo = _mm_add_epi32(fg_lo, bg_lo); + out_hi = _mm_add_epi32(fg_hi, bg_hi); + } + else if (BlendT::Mode == (int)SpanBlendModes::SubClamp) + { + out_lo = _mm_sub_epi32(fg_lo, bg_lo); + out_hi = _mm_sub_epi32(fg_hi, bg_hi); + } + else if (BlendT::Mode == (int)SpanBlendModes::RevSubClamp) + { + out_lo = _mm_sub_epi32(bg_lo, fg_lo); + out_hi = _mm_sub_epi32(bg_hi, fg_hi); + } + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + } + + FString DebugInfo() override { return "DrawSpan32T"; } + }; + + typedef DrawSpan32T DrawSpan32Command; + typedef DrawSpan32T DrawSpanMasked32Command; + typedef DrawSpan32T DrawSpanTranslucent32Command; + typedef DrawSpan32T DrawSpanAddClamp32Command; + typedef DrawSpan32T DrawSpanSubClamp32Command; + typedef DrawSpan32T DrawSpanRevSubClamp32Command; +} From d3812e32f89c9d4a3b6bb90d38e4b13a0ca684b0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Feb 2017 15:31:47 +0100 Subject: [PATCH 893/912] Created templated version of wall drawers --- src/swrenderer/drawers/r_draw_rgba.cpp | 2 +- src/swrenderer/drawers/r_draw_span32_sse2.h | 1 - src/swrenderer/drawers/r_draw_wall32_sse2.h | 406 ++++++++++++++++++++ 3 files changed, 407 insertions(+), 2 deletions(-) create mode 100644 src/swrenderer/drawers/r_draw_wall32_sse2.h diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 8d7a807f1f..01e517ebd2 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -39,7 +39,7 @@ #include "gl/data/gl_matrix.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/scene/r_light.h" -#include "r_draw_wall32.h" +#include "r_draw_wall32_sse2.h" #include "r_draw_sprite32.h" #include "r_draw_span32_sse2.h" #include "r_draw_sky32.h" diff --git a/src/swrenderer/drawers/r_draw_span32_sse2.h b/src/swrenderer/drawers/r_draw_span32_sse2.h index 35fda5f922..34daaeb974 100644 --- a/src/swrenderer/drawers/r_draw_span32_sse2.h +++ b/src/swrenderer/drawers/r_draw_span32_sse2.h @@ -215,7 +215,6 @@ namespace swrenderer bgcolor = _mm_setzero_si128(); } - // Sample unsigned int ifgcolor[2]; ifgcolor[0] = Sample(texdata.xbits, texdata.ybits, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.yshift, texdata.xshift, texdata.xmask, texdata.source); texdata.xfrac += texdata.xstep; diff --git a/src/swrenderer/drawers/r_draw_wall32_sse2.h b/src/swrenderer/drawers/r_draw_wall32_sse2.h new file mode 100644 index 0000000000..03be70e75f --- /dev/null +++ b/src/swrenderer/drawers/r_draw_wall32_sse2.h @@ -0,0 +1,406 @@ +/* +** Drawer commands for walls +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_walldrawer.h" + +namespace swrenderer +{ + namespace DrawWall32TModes + { + enum class WallBlendModes { Opaque, Masked, AddClamp, SubClamp, RevSubClamp }; + struct OpaqueWall { static const int Mode = (int)WallBlendModes::Opaque; }; + struct MaskedWall { static const int Mode = (int)WallBlendModes::Masked; }; + struct AddClampWall { static const int Mode = (int)WallBlendModes::AddClamp; }; + struct SubClampWall { static const int Mode = (int)WallBlendModes::SubClamp; }; + struct RevSubClampWall { static const int Mode = (int)WallBlendModes::RevSubClamp; }; + + enum class FilterModes { Nearest, Linear }; + struct NearestFilter { static const int Mode = (int)FilterModes::Nearest; }; + struct LinearFilter { static const int Mode = (int)FilterModes::Linear; }; + + enum class ShadeMode { Simple, Advanced }; + struct SimpleShade { static const int Mode = (int)ShadeMode::Simple; }; + struct AdvancedShade { static const int Mode = (int)ShadeMode::Advanced; }; + } + + template + class DrawWall32T : public DrawerCommand + { + protected: + WallDrawerArgs args; + + public: + DrawWall32T(const WallDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + using namespace DrawWall32TModes; + + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + if (is_nearest_filter) + Loop(thread, shade_constants); + else + Loop(thread, shade_constants); + } + else + { + if (is_nearest_filter) + Loop(thread, shade_constants); + else + Loop(thread, shade_constants); + } + } + + template + void Loop(DrawerThread *thread, ShadeConstants shade_constants) + { + using namespace DrawWall32TModes; + + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i inv_desaturate, shade_fade, shade_light; + int desaturate; + if (ShadeModeT::Mode == (int)ShadeMode::Advanced) + { + inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + desaturate = shade_constants.desaturate; + } + else + { + inv_desaturate = _mm_setzero_si128(); + shade_fade = _mm_setzero_si128(); + shade_fade = _mm_setzero_si128(); + shade_light = _mm_setzero_si128(); + desaturate = 0; + } + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + if (FilterModeT::Mode == (int)FilterModes::Linear) + { + frac -= one / 2; + } + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + __m128i bgcolor; + if (BlendT::Mode != (int)WallBlendModes::Opaque) + { + bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + } + else + { + bgcolor = _mm_setzero_si128(); + } + + unsigned int ifgcolor[2]; + ifgcolor[0] = Sample(frac, source, source2, textureheight, one, texturefracx); + frac += fracstep; + + ifgcolor[1] = Sample(frac, source, source2, textureheight, one, texturefracx); + frac += fracstep; + + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade, shade_light, lights, num_lights, viewpos_z); + __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], srcalpha, destalpha); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + __m128i bgcolor; + if (BlendT::Mode != (int)WallBlendModes::Opaque) + { + bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + } + else + { + bgcolor = _mm_setzero_si128(); + } + + unsigned int ifgcolor[2]; + ifgcolor[0] = Sample(frac, source, source2, textureheight, one, texturefracx); + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade, shade_light, lights, num_lights, viewpos_z); + __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], srcalpha, destalpha); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + + template + unsigned int Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, int textureheight, uint32_t one, uint32_t texturefracx) + { + using namespace DrawWall32TModes; + + if (FilterModeT::Mode == (int)FilterModes::Nearest) + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + return source[sample_index]; + } + else + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + return (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + } + } + + template + __m128i Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_z) + { + using namespace DrawWall32TModes; + + __m128i material = fgcolor; + if (ShadeModeT::Mode == (int)ShadeMode::Simple) + { + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + } + else + { + int blue0 = BPART(ifgcolor0); + int green0 = GPART(ifgcolor0); + int red0 = RPART(ifgcolor0); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor1); + int green1 = GPART(ifgcolor1); + int red1 = RPART(ifgcolor1); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + } + + return AddLights(material, fgcolor, lights, num_lights, viewpos_z); + } + + __m128i AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_z) + { + using namespace DrawWall32TModes; + + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0, 0, 0, 0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1, 1, 1, 1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1, 0, 1, 0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + return fgcolor; + } + + __m128i Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, uint32_t srcalpha, uint32_t destalpha) + { + using namespace DrawWall32TModes; + + if (BlendT::Mode == (int)WallBlendModes::Opaque) + { + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + return outcolor; + } + else if (BlendT::Mode == (int)WallBlendModes::Masked) + { + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3, 3, 3, 3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3, 3, 3, 3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else + { + uint32_t alpha0 = APART(ifgcolor0); + uint32_t alpha1 = APART(ifgcolor1); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo, out_hi; + if (BlendT::Mode == (int)WallBlendModes::AddClamp) + { + out_lo = _mm_add_epi32(fg_lo, bg_lo); + out_hi = _mm_add_epi32(fg_hi, bg_hi); + } + else if (BlendT::Mode == (int)WallBlendModes::SubClamp) + { + out_lo = _mm_sub_epi32(fg_lo, bg_lo); + out_hi = _mm_sub_epi32(fg_hi, bg_hi); + } + else if (BlendT::Mode == (int)WallBlendModes::RevSubClamp) + { + out_lo = _mm_sub_epi32(bg_lo, fg_lo); + out_hi = _mm_sub_epi32(bg_hi, fg_hi); + } + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + } + + FString DebugInfo() override { return "DrawWall32T"; } + }; + + typedef DrawWall32T DrawWall32Command; + typedef DrawWall32T DrawWallMasked32Command; + typedef DrawWall32T DrawWallAddClamp32Command; + typedef DrawWall32T DrawWallSubClamp32Command; + typedef DrawWall32T DrawWallRevSubClamp32Command; +} From 51b872b30b8be570653547978ca89626401030a4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Feb 2017 16:52:13 +0100 Subject: [PATCH 894/912] Created template version of sprite drawers --- src/swrenderer/drawers/r_draw_rgba.cpp | 2 +- src/swrenderer/drawers/r_draw_sprite32_sse2.h | 456 ++++++++++++++++++ 2 files changed, 457 insertions(+), 1 deletion(-) create mode 100644 src/swrenderer/drawers/r_draw_sprite32_sse2.h diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 01e517ebd2..aa0855a102 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -40,7 +40,7 @@ #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/scene/r_light.h" #include "r_draw_wall32_sse2.h" -#include "r_draw_sprite32.h" +#include "r_draw_sprite32_sse2.h" #include "r_draw_span32_sse2.h" #include "r_draw_sky32.h" diff --git a/src/swrenderer/drawers/r_draw_sprite32_sse2.h b/src/swrenderer/drawers/r_draw_sprite32_sse2.h new file mode 100644 index 0000000000..0ec3e63d87 --- /dev/null +++ b/src/swrenderer/drawers/r_draw_sprite32_sse2.h @@ -0,0 +1,456 @@ +/* +** Drawer commands for sprites +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_walldrawer.h" + +namespace swrenderer +{ + namespace DrawSprite32TModes + { + enum class SpriteBlendModes { Copy, Opaque, Shaded, AddClamp, SubClamp, RevSubClamp }; + struct CopySprite { static const int Mode = (int)SpriteBlendModes::Copy; }; + struct OpaqueSprite { static const int Mode = (int)SpriteBlendModes::Opaque; }; + struct ShadedSprite { static const int Mode = (int)SpriteBlendModes::Shaded; }; + struct AddClampSprite { static const int Mode = (int)SpriteBlendModes::AddClamp; }; + struct SubClampSprite { static const int Mode = (int)SpriteBlendModes::SubClamp; }; + struct RevSubClampSprite { static const int Mode = (int)SpriteBlendModes::RevSubClamp; }; + + enum class FilterModes { Nearest, Linear }; + struct NearestFilter { static const int Mode = (int)FilterModes::Nearest; }; + struct LinearFilter { static const int Mode = (int)FilterModes::Linear; }; + + enum class ShadeMode { Simple, Advanced }; + struct SimpleShade { static const int Mode = (int)ShadeMode::Simple; }; + struct AdvancedShade { static const int Mode = (int)ShadeMode::Advanced; }; + + enum class SpriteSamplers { Texture, Fill, Shaded, Translated }; + struct TextureSampler { static const int Mode = (int)SpriteSamplers::Texture; }; + struct FillSampler { static const int Mode = (int)SpriteSamplers::Fill; }; + struct ShadedSampler { static const int Mode = (int)SpriteSamplers::Shaded; }; + struct TranslatedSampler { static const int Mode = (int)SpriteSamplers::Translated; }; + } + + template + class DrawSprite32T : public DrawerCommand + { + protected: + SpriteDrawerArgs args; + + public: + DrawSprite32T(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + using namespace DrawSprite32TModes; + + auto shade_constants = args.ColormapConstants(); + if (SamplerT::Mode == (int)SpriteSamplers::Texture) + { + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + + if (shade_constants.simple_shade) + { + if (is_nearest_filter) + Loop(thread, shade_constants); + else + Loop(thread, shade_constants); + } + else + { + if (is_nearest_filter) + Loop(thread, shade_constants); + else + Loop(thread, shade_constants); + } + } + else // no linear filtering for translated, shaded or fill + { + if (shade_constants.simple_shade) + { + Loop(thread, shade_constants); + } + else + { + Loop(thread, shade_constants); + } + } + } + + template + void Loop(DrawerThread *thread, ShadeConstants shade_constants) + { + using namespace DrawSprite32TModes; + + const uint32_t *source; + const uint32_t *source2; + const uint8_t *colormap; + const uint32_t *translation; + + if (SamplerT::Mode == (int)SpriteSamplers::Shaded || SamplerT::Mode == (int)SpriteSamplers::Translated) + { + source = (const uint32_t*)args.TexturePixels(); + source2 = nullptr; + colormap = args.Colormap(); + translation = (const uint32_t*)args.TranslationMap(); + } + else + { + source = (const uint32_t*)args.TexturePixels(); + source2 = (const uint32_t*)args.TexturePixels2(); + colormap = nullptr; + translation = nullptr; + } + + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); + dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); + dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1, 0, 1, 0)); + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + + __m128i inv_desaturate, shade_fade, shade_light; + int desaturate; + __m128i lightcontrib; + if (ShadeModeT::Mode == (int)ShadeMode::Advanced) + { + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + desaturate = shade_constants.desaturate; + + lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + } + else + { + inv_desaturate = _mm_setzero_si128(); + shade_fade = _mm_setzero_si128(); + shade_fade = _mm_setzero_si128(); + shade_light = _mm_setzero_si128(); + desaturate = 0; + lightcontrib = _mm_setzero_si128(); + + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + } + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + if (FilterModeT::Mode == (int)FilterModes::Linear) + { + frac -= one / 2; + } + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + uint32_t srccolor = args.SrcColorBgra(); + uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + __m128i bgcolor; + if (BlendT::Mode != (int)SpriteBlendModes::Opaque && BlendT::Mode != (int)SpriteBlendModes::Copy) + { + bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + } + else + { + bgcolor = _mm_setzero_si128(); + } + + unsigned int ifgcolor[2], ifgshade[2]; + ifgcolor[0] = Sample(frac, source, source2, translation, textureheight, one, texturefracx, color, srccolor); + ifgshade[0] = SampleShade(frac, source, colormap); + frac += fracstep; + + ifgcolor[1] = Sample(frac, source, source2, translation, textureheight, one, texturefracx, color, srccolor); + ifgshade[1] = SampleShade(frac, source, colormap); + frac += fracstep; + + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade, shade_light, lightcontrib); + __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + __m128i bgcolor; + if (BlendT::Mode != (int)SpriteBlendModes::Opaque && BlendT::Mode != (int)SpriteBlendModes::Copy) + { + bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + } + else + { + bgcolor = _mm_setzero_si128(); + } + + // Sample + unsigned int ifgcolor[2], ifgshade[2]; + ifgcolor[0] = Sample(frac, source, source2, translation, textureheight, one, texturefracx, color, srccolor); + ifgcolor[1] = 0; + ifgshade[0] = SampleShade(frac, source, colormap); + ifgshade[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade, shade_light, lightcontrib); + __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + + template + unsigned int Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, const uint32_t *translation, int textureheight, uint32_t one, uint32_t texturefracx, uint32_t color, uint32_t srccolor) + { + using namespace DrawSprite32TModes; + + if (SamplerT::Mode == (int)SpriteSamplers::Shaded) + { + return color; + } + else if (SamplerT::Mode == (int)SpriteSamplers::Translated) + { + const uint8_t *sourcepal = (const uint8_t *)source; + return translation[sourcepal[frac >> FRACBITS]]; + } + else if (SamplerT::Mode == (int)SpriteSamplers::Fill) + { + return srccolor; + } + else if (FilterModeT::Mode == (int)FilterModes::Nearest) + { + int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; + return source[sample_index]; + } + else + { + // Clamp to edge + unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + return (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + } + } + + unsigned int SampleShade(uint32_t frac, const uint32_t *source, const uint8_t *colormap) + { + using namespace DrawSprite32TModes; + + if (SamplerT::Mode == (int)SpriteSamplers::Shaded) + { + const uint8_t *sourcepal = (const uint8_t *)source; + unsigned int sampleshadeout = colormap[sourcepal[frac >> FRACBITS]]; + return clamp(sampleshadeout, 0, 64) * 4; + } + else + { + return 0; + } + } + + template + __m128i Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, __m128i lightcontrib) + { + using namespace DrawSprite32TModes; + + if (BlendT::Mode == (int)SpriteBlendModes::Copy || BlendT::Mode == (int)SpriteBlendModes::Shaded) + return fgcolor; + + if (ShadeModeT::Mode == (int)ShadeMode::Simple) + { + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + return fgcolor; + } + else + { + __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + + int blue0 = BPART(ifgcolor0); + int green0 = GPART(ifgcolor0); + int red0 = RPART(ifgcolor0); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor1); + int green1 = GPART(ifgcolor1); + int red1 = RPART(ifgcolor1); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); + return fgcolor; + } + } + + __m128i Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha) + { + using namespace DrawSprite32TModes; + + if (BlendT::Mode == (int)SpriteBlendModes::Opaque) + { + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + return outcolor; + } + else if (BlendT::Mode == (int)SpriteBlendModes::Shaded) + { + __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else + { + uint32_t alpha0 = APART(ifgcolor0); + uint32_t alpha1 = APART(ifgcolor1); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo, out_hi; + if (BlendT::Mode == (int)SpriteBlendModes::AddClamp) + { + out_lo = _mm_add_epi32(fg_lo, bg_lo); + out_hi = _mm_add_epi32(fg_hi, bg_hi); + } + else if (BlendT::Mode == (int)SpriteBlendModes::SubClamp) + { + out_lo = _mm_sub_epi32(fg_lo, bg_lo); + out_hi = _mm_sub_epi32(fg_hi, bg_hi); + } + else if (BlendT::Mode == (int)SpriteBlendModes::RevSubClamp) + { + out_lo = _mm_sub_epi32(bg_lo, fg_lo); + out_hi = _mm_sub_epi32(bg_hi, fg_hi); + } + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + } + + FString DebugInfo() override { return "DrawSprite32T"; } + }; + + typedef DrawSprite32T DrawSpriteCopy32Command; + + typedef DrawSprite32T DrawSprite32Command; + typedef DrawSprite32T DrawSpriteAddClamp32Command; + typedef DrawSprite32T DrawSpriteSubClamp32Command; + typedef DrawSprite32T DrawSpriteRevSubClamp32Command; + + typedef DrawSprite32T FillSprite32Command; + typedef DrawSprite32T FillSpriteAddClamp32Command; + typedef DrawSprite32T FillSpriteSubClamp32Command; + typedef DrawSprite32T FillSpriteRevSubClamp32Command; + + typedef DrawSprite32T DrawSpriteShaded32Command; + + typedef DrawSprite32T DrawSpriteTranslated32Command; + typedef DrawSprite32T DrawSpriteTranslatedAddClamp32Command; + typedef DrawSprite32T DrawSpriteTranslatedSubClamp32Command; + typedef DrawSprite32T DrawSpriteTranslatedRevSubClamp32Command; +} From e697746e7d526f8ab4164872f9414b561b5ace4b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Feb 2017 16:59:45 +0100 Subject: [PATCH 895/912] Remove the php drawers and their generated output --- src/swrenderer/drawers/r_draw_rgba.cpp | 2 +- .../{r_draw_sky32.h => r_draw_sky32_sse2.h} | 0 src/swrenderer/drawers/r_draw_span32.h | 8054 ----------------- src/swrenderer/drawers/r_draw_span32.php | 443 - src/swrenderer/drawers/r_draw_sprite32.h | 6898 -------------- src/swrenderer/drawers/r_draw_sprite32.php | 385 - src/swrenderer/drawers/r_draw_wall32.h | 5416 ----------- src/swrenderer/drawers/r_draw_wall32.php | 370 - 8 files changed, 1 insertion(+), 21567 deletions(-) rename src/swrenderer/drawers/{r_draw_sky32.h => r_draw_sky32_sse2.h} (100%) delete mode 100644 src/swrenderer/drawers/r_draw_span32.h delete mode 100644 src/swrenderer/drawers/r_draw_span32.php delete mode 100644 src/swrenderer/drawers/r_draw_sprite32.h delete mode 100644 src/swrenderer/drawers/r_draw_sprite32.php delete mode 100644 src/swrenderer/drawers/r_draw_wall32.h delete mode 100644 src/swrenderer/drawers/r_draw_wall32.php diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index aa0855a102..48bbc2c38b 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -42,7 +42,7 @@ #include "r_draw_wall32_sse2.h" #include "r_draw_sprite32_sse2.h" #include "r_draw_span32_sse2.h" -#include "r_draw_sky32.h" +#include "r_draw_sky32_sse2.h" #include "gi.h" #include "stats.h" diff --git a/src/swrenderer/drawers/r_draw_sky32.h b/src/swrenderer/drawers/r_draw_sky32_sse2.h similarity index 100% rename from src/swrenderer/drawers/r_draw_sky32.h rename to src/swrenderer/drawers/r_draw_sky32_sse2.h diff --git a/src/swrenderer/drawers/r_draw_span32.h b/src/swrenderer/drawers/r_draw_span32.h deleted file mode 100644 index af2970f986..0000000000 --- a/src/swrenderer/drawers/r_draw_span32.h +++ /dev/null @@ -1,8054 +0,0 @@ -/* -** Drawer commands for spans -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -/* - Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. -*/ - -#pragma once - -#include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/viewport/r_spandrawer.h" - -namespace swrenderer -{ - class DrawSpan32Command : public DrawerCommand - { - protected: - SpanDrawerArgs args; - - public: - DrawSpan32Command(const SpanDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - if (thread->line_skipped_by_thread(args.DestY())) return; - - uint32_t xbits = args.TextureWidthBits(); - uint32_t ybits = args.TextureHeightBits(); - uint32_t xstep = args.TextureUStep(); - uint32_t ystep = args.TextureVStep(); - uint32_t xfrac = args.TextureUPos(); - uint32_t yfrac = args.TextureVPos(); - uint32_t yshift = 32 - ybits; - uint32_t xshift = yshift - xbits; - uint32_t xmask = ((1 << xbits) - 1) << ybits; - - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - - double lod = args.TextureLOD(); - bool mipmapped = args.MipmappedTexture(); - - bool magnifying = lod < 0.0; - if (r_mipmap && mipmapped) - { - int level = (int)lod; - while (level > 0) - { - if (xbits <= 2 || ybits <= 2) - break; - - source += (1 << (xbits)) * (1 << (ybits)); - xbits -= 1; - ybits -= 1; - level--; - } - } - - bool is_nearest_filter = !((magnifying && r_magfilter) || (!magnifying && r_minfilter)); - - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - if (is_nearest_filter) - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - else - { - if (is_nearest_filter) - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - } - - FString DebugInfo() override { return "DrawSpan32Command"; } - }; - - class DrawSpanMasked32Command : public DrawerCommand - { - protected: - SpanDrawerArgs args; - - public: - DrawSpanMasked32Command(const SpanDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - if (thread->line_skipped_by_thread(args.DestY())) return; - - uint32_t xbits = args.TextureWidthBits(); - uint32_t ybits = args.TextureHeightBits(); - uint32_t xstep = args.TextureUStep(); - uint32_t ystep = args.TextureVStep(); - uint32_t xfrac = args.TextureUPos(); - uint32_t yfrac = args.TextureVPos(); - uint32_t yshift = 32 - ybits; - uint32_t xshift = yshift - xbits; - uint32_t xmask = ((1 << xbits) - 1) << ybits; - - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - - double lod = args.TextureLOD(); - bool mipmapped = args.MipmappedTexture(); - - bool magnifying = lod < 0.0; - if (r_mipmap && mipmapped) - { - int level = (int)lod; - while (level > 0) - { - if (xbits <= 2 || ybits <= 2) - break; - - source += (1 << (xbits)) * (1 << (ybits)); - xbits -= 1; - ybits -= 1; - level--; - } - } - - bool is_nearest_filter = !((magnifying && r_magfilter) || (!magnifying && r_minfilter)); - - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - if (is_nearest_filter) - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - else - { - if (is_nearest_filter) - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - } - - FString DebugInfo() override { return "DrawSpanMasked32Command"; } - }; - - class DrawSpanTranslucent32Command : public DrawerCommand - { - protected: - SpanDrawerArgs args; - - public: - DrawSpanTranslucent32Command(const SpanDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - if (thread->line_skipped_by_thread(args.DestY())) return; - - uint32_t xbits = args.TextureWidthBits(); - uint32_t ybits = args.TextureHeightBits(); - uint32_t xstep = args.TextureUStep(); - uint32_t ystep = args.TextureVStep(); - uint32_t xfrac = args.TextureUPos(); - uint32_t yfrac = args.TextureVPos(); - uint32_t yshift = 32 - ybits; - uint32_t xshift = yshift - xbits; - uint32_t xmask = ((1 << xbits) - 1) << ybits; - - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - - double lod = args.TextureLOD(); - bool mipmapped = args.MipmappedTexture(); - - bool magnifying = lod < 0.0; - if (r_mipmap && mipmapped) - { - int level = (int)lod; - while (level > 0) - { - if (xbits <= 2 || ybits <= 2) - break; - - source += (1 << (xbits)) * (1 << (ybits)); - xbits -= 1; - ybits -= 1; - level--; - } - } - - bool is_nearest_filter = !((magnifying && r_magfilter) || (!magnifying && r_minfilter)); - - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - if (is_nearest_filter) - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - else - { - if (is_nearest_filter) - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - } - - FString DebugInfo() override { return "DrawSpanTranslucent32Command"; } - }; - - class DrawSpanAddClamp32Command : public DrawerCommand - { - protected: - SpanDrawerArgs args; - - public: - DrawSpanAddClamp32Command(const SpanDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - if (thread->line_skipped_by_thread(args.DestY())) return; - - uint32_t xbits = args.TextureWidthBits(); - uint32_t ybits = args.TextureHeightBits(); - uint32_t xstep = args.TextureUStep(); - uint32_t ystep = args.TextureVStep(); - uint32_t xfrac = args.TextureUPos(); - uint32_t yfrac = args.TextureVPos(); - uint32_t yshift = 32 - ybits; - uint32_t xshift = yshift - xbits; - uint32_t xmask = ((1 << xbits) - 1) << ybits; - - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - - double lod = args.TextureLOD(); - bool mipmapped = args.MipmappedTexture(); - - bool magnifying = lod < 0.0; - if (r_mipmap && mipmapped) - { - int level = (int)lod; - while (level > 0) - { - if (xbits <= 2 || ybits <= 2) - break; - - source += (1 << (xbits)) * (1 << (ybits)); - xbits -= 1; - ybits -= 1; - level--; - } - } - - bool is_nearest_filter = !((magnifying && r_magfilter) || (!magnifying && r_minfilter)); - - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - if (is_nearest_filter) - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - else - { - if (is_nearest_filter) - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 26; - uint32_t yybits = 26; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - } - - FString DebugInfo() override { return "DrawSpanAddClamp32Command"; } - }; - -} diff --git a/src/swrenderer/drawers/r_draw_span32.php b/src/swrenderer/drawers/r_draw_span32.php deleted file mode 100644 index 400c981dc7..0000000000 --- a/src/swrenderer/drawers/r_draw_span32.php +++ /dev/null @@ -1,443 +0,0 @@ -#!/usr/bin/php -/* -** Drawer commands for spans -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -/* - Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. -*/ - -#pragma once - -#include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/viewport/r_spandrawer.h" - -namespace swrenderer -{ - - class : public DrawerCommand - { - protected: - SpanDrawerArgs args; - - public: - (const SpanDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - if (thread->line_skipped_by_thread(args.DestY())) return; - - uint32_t xbits = args.TextureWidthBits(); - uint32_t ybits = args.TextureHeightBits(); - uint32_t xstep = args.TextureUStep(); - uint32_t ystep = args.TextureVStep(); - uint32_t xfrac = args.TextureUPos(); - uint32_t yfrac = args.TextureVPos(); - uint32_t yshift = 32 - ybits; - uint32_t xshift = yshift - xbits; - uint32_t xmask = ((1 << xbits) - 1) << ybits; - - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - - double lod = args.TextureLOD(); - bool mipmapped = args.MipmappedTexture(); - - bool magnifying = lod < 0.0; - if (r_mipmap && mipmapped) - { - int level = (int)lod; - while (level > 0) - { - if (xbits <= 2 || ybits <= 2) - break; - - source += (1 << (xbits)) * (1 << (ybits)); - xbits -= 1; - ybits -= 1; - level--; - } - } - - bool is_nearest_filter = !((magnifying && r_magfilter) || (!magnifying && r_minfilter)); - - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - - } - else - { - - } - } - - FString DebugInfo() override { return ""; } - }; - - - if (is_nearest_filter) - { - - } - else - { - - } - - bool is_64x64 = xbits == 6 && ybits == 6; - if (is_64x64) - { - - } - else - { - - } - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpx = args.dc_viewpos.X; - float stepvpx = args.dc_viewpos_step.X; - __m128 viewpos_x = _mm_setr_ps(vpx, vpx + stepvpx, 0.0f, 0.0f); - __m128 step_viewpos_x = _mm_set1_ps(stepvpx * 2.0f); - - int count = args.DestX2() - args.DestX1() + 1; - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY()); - - - xfrac -= 1 << (31 - xbits); - yfrac -= 1 << (31 - ybits); - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * 2; - - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128()); - - - // Sample - unsigned int ifgcolor[2]; - { - - ifgcolor[0] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - { - - ifgcolor[1] = sampleout; - xfrac += xstep; - yfrac += ystep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - - // Blend - - - _mm_storel_epi64((__m128i*)(dest + offset), outcolor); - viewpos_x = _mm_add_ps(viewpos_x, step_viewpos_x); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index; - - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - - // Sample - unsigned int ifgcolor[2]; - - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - - // Blend - - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - - int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6)); - unsigned int sampleout = source[sample_index]; - - int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - unsigned int sampleout = source[sample_index]; - - uint32_t xxbits = 26; - uint32_t yybits = 26; - - uint32_t xxbits = 32 - xbits; - uint32_t yybits = 32 - ybits; - - uint32_t xxshift = (32 - xxbits); - uint32_t yyshift = (32 - yybits); - uint32_t xxmask = (1 << xxshift) - 1; - uint32_t yymask = (1 << yyshift) - 1; - uint32_t x = xfrac >> xxbits; - uint32_t y = yfrac >> yybits; - - uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))]; - uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))]; - uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))]; - - uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15; - uint32_t inv_a = (yfrac >> (yybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - - __m128i material = fgcolor; - - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - __m128i fgalpha = _mm_set1_epi16(srcalpha); - __m128i bgalpha = _mm_set1_epi16(destalpha); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lyz2 = light_y; // L.y*L.y + L.z*L.z - __m128 Lx = _mm_sub_ps(light_x, viewpos_x); - __m128 dist2 = _mm_add_ps(Lyz2, _mm_mul_ps(Lx, Lx)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_z, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_z, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - -} diff --git a/src/swrenderer/drawers/r_draw_sprite32.h b/src/swrenderer/drawers/r_draw_sprite32.h deleted file mode 100644 index d54aff4e9e..0000000000 --- a/src/swrenderer/drawers/r_draw_sprite32.h +++ /dev/null @@ -1,6898 +0,0 @@ -/* -** Drawer commands for sprites -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -/* - Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. -*/ - -#pragma once - -#include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/viewport/r_walldrawer.h" - -namespace swrenderer -{ - class DrawSpriteCopy32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - DrawSpriteCopy32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - - FString DebugInfo() override { return "DrawSpriteCopy32Command"; } - }; - - class DrawSprite32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - DrawSprite32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - - FString DebugInfo() override { return "DrawSprite32Command"; } - }; - - class DrawSpriteAddClamp32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - DrawSpriteAddClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - - FString DebugInfo() override { return "DrawSpriteAddClamp32Command"; } - }; - - class DrawSpriteSubClamp32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - DrawSpriteSubClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - - FString DebugInfo() override { return "DrawSpriteSubClamp32Command"; } - }; - - class DrawSpriteRevSubClamp32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - DrawSpriteRevSubClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - - FString DebugInfo() override { return "DrawSpriteRevSubClamp32Command"; } - }; - - class FillSprite32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - FillSprite32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - - FString DebugInfo() override { return "FillSprite32Command"; } - }; - - class FillSpriteAddClamp32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - FillSpriteAddClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - - FString DebugInfo() override { return "FillSpriteAddClamp32Command"; } - }; - - class FillSpriteSubClamp32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - FillSpriteSubClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - - FString DebugInfo() override { return "FillSpriteSubClamp32Command"; } - }; - - class FillSpriteRevSubClamp32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - FillSpriteRevSubClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - - FString DebugInfo() override { return "FillSpriteRevSubClamp32Command"; } - }; - - class DrawSpriteShaded32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - DrawSpriteShaded32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint8_t *source = args.TexturePixels(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = color; - unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; - sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = color; - unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; - sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - // Blend - __m128i alpha = _mm_set_epi16(ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[0], ifgshade[0], ifgshade[0], ifgshade[0]); - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = color; - unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; - sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - // Blend - __m128i alpha = _mm_set_epi16(ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[0], ifgshade[0], ifgshade[0], ifgshade[0]); - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - const uint8_t *source = args.TexturePixels(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = color; - unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; - sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = color; - unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; - sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - // Blend - __m128i alpha = _mm_set_epi16(ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[0], ifgshade[0], ifgshade[0], ifgshade[0]); - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = color; - unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; - sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - // Blend - __m128i alpha = _mm_set_epi16(ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[0], ifgshade[0], ifgshade[0], ifgshade[0]); - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - - FString DebugInfo() override { return "DrawSpriteShaded32Command"; } - }; - - class DrawSpriteTranslated32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - DrawSpriteTranslated32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint8_t *source = args.TexturePixels(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - const uint8_t *source = args.TexturePixels(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - - FString DebugInfo() override { return "DrawSpriteTranslated32Command"; } - }; - - class DrawSpriteTranslatedAddClamp32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - DrawSpriteTranslatedAddClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint8_t *source = args.TexturePixels(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - const uint8_t *source = args.TexturePixels(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - - FString DebugInfo() override { return "DrawSpriteTranslatedAddClamp32Command"; } - }; - - class DrawSpriteTranslatedSubClamp32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - DrawSpriteTranslatedSubClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint8_t *source = args.TexturePixels(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - const uint8_t *source = args.TexturePixels(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - - FString DebugInfo() override { return "DrawSpriteTranslatedSubClamp32Command"; } - }; - - class DrawSpriteTranslatedRevSubClamp32Command : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - DrawSpriteTranslatedRevSubClamp32Command(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint8_t *source = args.TexturePixels(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - const uint8_t *source = args.TexturePixels(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - - FString DebugInfo() override { return "DrawSpriteTranslatedRevSubClamp32Command"; } - }; - -} diff --git a/src/swrenderer/drawers/r_draw_sprite32.php b/src/swrenderer/drawers/r_draw_sprite32.php deleted file mode 100644 index e08ba551fe..0000000000 --- a/src/swrenderer/drawers/r_draw_sprite32.php +++ /dev/null @@ -1,385 +0,0 @@ -#!/usr/bin/php -/* -** Drawer commands for sprites -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -/* - Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. -*/ - -#pragma once - -#include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/viewport/r_walldrawer.h" - -namespace swrenderer -{ - - class : public DrawerCommand - { - protected: - SpriteDrawerArgs args; - - public: - (const SpriteDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - - } - else - { - - } - } - - FString DebugInfo() override { return ""; } - }; - - - const uint8_t *source = args.TexturePixels(); - const uint8_t *colormap = args.Colormap(); - const uint32_t *translation = (const uint32_t*)args.TranslationMap(); - - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - - } - else - { - - } - - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - __m128i dynlight = _mm_cvtsi32_si128(args.DynamicLight()); - dynlight = _mm_unpacklo_epi8(dynlight, _mm_setzero_si128()); - dynlight = _mm_shuffle_epi32(dynlight, _MM_SHUFFLE(1,0,1,0)); - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - lightcontrib = _mm_sub_epi16(lightcontrib, mlight); - - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - - frac -= one / 2; - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - uint32_t srccolor = args.SrcColorBgra(); - uint32_t color = LightBgra::shade_pal_index_simple(args.SolidColor(), light); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - { - - ifgcolor[0] = sampleout; - ifgshade[0] = sampleshadeout; - frac += fracstep; - } - { - - ifgcolor[1] = sampleout; - ifgshade[1] = sampleshadeout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - - // Blend - - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - - // Sample - unsigned int ifgcolor[2], ifgshade[2]; - - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - ifgshade[0] = sampleshadeout; - ifgshade[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - - // Blend - - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - - unsigned int sampleout = color; - unsigned int sampleshadeout = colormap[source[frac >> FRACBITS]]; - sampleshadeout = clamp(sampleshadeout, 0, 64) * 4; - - unsigned int sampleout = translation[source[frac >> FRACBITS]]; - unsigned int sampleshadeout = 0; - - unsigned int sampleout = srccolor; - unsigned int sampleshadeout = 0; - - int sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - unsigned int sampleshadeout = 0; - - // Clamp to edge - unsigned int frac_y0 = (clamp(frac, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int frac_y1 = (clamp(frac + one, 0, 1 << 30) >> (FRACBITS - 2)) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - unsigned int sampleshadeout = 0; - - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - __m128i lit_dynlight = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - fgcolor = _mm_add_epi16(fgcolor, lit_dynlight); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); - - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - __m128i alpha = _mm_set_epi16(ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[1], ifgshade[0], ifgshade[0], ifgshade[0], ifgshade[0]); - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - -} diff --git a/src/swrenderer/drawers/r_draw_wall32.h b/src/swrenderer/drawers/r_draw_wall32.h deleted file mode 100644 index 7e6dd931dd..0000000000 --- a/src/swrenderer/drawers/r_draw_wall32.h +++ /dev/null @@ -1,5416 +0,0 @@ -/* -** Drawer commands for walls -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -/* - Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. -*/ - -#pragma once - -#include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/viewport/r_walldrawer.h" - -namespace swrenderer -{ - class DrawWall32Command : public DrawerCommand - { - protected: - WallDrawerArgs args; - - public: - DrawWall32Command(const WallDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2]; - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - // Sample - unsigned int ifgcolor[2]; - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - // Sample - unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - - FString DebugInfo() override { return "DrawWall32Command"; } - }; - - class DrawWallMasked32Command : public DrawerCommand - { - protected: - WallDrawerArgs args; - - public: - DrawWallMasked32Command(const WallDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - - FString DebugInfo() override { return "DrawWallMasked32Command"; } - }; - - class DrawWallAddClamp32Command : public DrawerCommand - { - protected: - WallDrawerArgs args; - - public: - DrawWallAddClamp32Command(const WallDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - - FString DebugInfo() override { return "DrawWallAddClamp32Command"; } - }; - - class DrawWallSubClamp32Command : public DrawerCommand - { - protected: - WallDrawerArgs args; - - public: - DrawWallSubClamp32Command(const WallDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - - FString DebugInfo() override { return "DrawWallSubClamp32Command"; } - }; - - class DrawWallRevSubClamp32Command : public DrawerCommand - { - protected: - WallDrawerArgs args; - - public: - DrawWallRevSubClamp32Command(const WallDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - else - { - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - else - { - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - frac -= one / 2; - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - // Sample - unsigned int ifgcolor[2]; - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - __m128i material = fgcolor; - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - - // Blend - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - } - } - } - - FString DebugInfo() override { return "DrawWallRevSubClamp32Command"; } - }; - -} diff --git a/src/swrenderer/drawers/r_draw_wall32.php b/src/swrenderer/drawers/r_draw_wall32.php deleted file mode 100644 index bc003c9a5f..0000000000 --- a/src/swrenderer/drawers/r_draw_wall32.php +++ /dev/null @@ -1,370 +0,0 @@ -#!/usr/bin/php -/* -** Drawer commands for walls -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -/* - Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it. -*/ - -#pragma once - -#include "swrenderer/drawers/r_draw_rgba.h" -#include "swrenderer/viewport/r_walldrawer.h" - -namespace swrenderer -{ - - class : public DrawerCommand - { - protected: - WallDrawerArgs args; - - public: - (const WallDrawerArgs &drawerargs) : args(drawerargs) { } - - void Execute(DrawerThread *thread) override - { - auto shade_constants = args.ColormapConstants(); - if (shade_constants.simple_shade) - { - - } - else - { - - } - } - - FString DebugInfo() override { return ""; } - }; - - - const uint32_t *source = (const uint32_t*)args.TexturePixels(); - const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); - bool is_nearest_filter = (source2 == nullptr); - if (is_nearest_filter) - { - - } - else - { - - } - - int textureheight = args.TextureHeight(); - uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; - - // Shade constants - int light = 256 - (args.Light() >> (FRACBITS - 8)); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - - __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); - __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); - shade_fade = _mm_mullo_epi16(shade_fade, inv_light); - __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); - int desaturate = shade_constants.desaturate; - - - int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); - uint32_t fracstep = args.TextureVStep(); - uint32_t frac = args.TextureVPos(); - uint32_t texturefracx = args.TextureUPos(); - uint32_t *dest = (uint32_t*)args.Dest(); - int dest_y = args.DestY(); - - auto lights = args.dc_lights; - auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); - float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; - __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); - __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); - - count = thread->count_for_thread(dest_y, count); - if (count <= 0) return; - frac += thread->skipped_by_thread(dest_y) * fracstep; - dest = thread->dest_for_thread(dest_y, pitch, dest); - fracstep *= thread->num_cores; - pitch *= thread->num_cores; - - frac -= one / 2; - - uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); - uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); - - int ssecount = count / 2; - for (int index = 0; index < ssecount; index++) - { - int offset = index * pitch * 2; - uint32_t desttmp[2]; - desttmp[0] = dest[offset]; - desttmp[1] = dest[offset + pitch]; - - __m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - - - // Sample - unsigned int ifgcolor[2]; - { - - ifgcolor[0] = sampleout; - frac += fracstep; - } - { - - ifgcolor[1] = sampleout; - frac += fracstep; - } - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - - // Blend - - - _mm_storel_epi64((__m128i*)desttmp, outcolor); - dest[offset] = desttmp[0]; - dest[offset + pitch] = desttmp[1]; - viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); - } - - if (ssecount * 2 != count) - { - int index = ssecount * 2; - int offset = index * pitch; - - __m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); - - - // Sample - unsigned int ifgcolor[2]; - - ifgcolor[0] = sampleout; - ifgcolor[1] = 0; - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - - // Shade - - - // Blend - - - dest[offset] = _mm_cvtsi128_si32(outcolor); - } - - int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; - unsigned int sampleout = source[sample_index]; - - unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; - unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = source[y0]; - unsigned int p01 = source[y1]; - unsigned int p10 = source2[y0]; - unsigned int p11 = source2[y1]; - - unsigned int inv_b = texturefracx; - unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - - __m128i material = fgcolor; - - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - - int blue0 = BPART(ifgcolor[0]); - int green0 = GPART(ifgcolor[0]); - int red0 = RPART(ifgcolor[0]); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor[1]); - int green1 = GPART(ifgcolor[1]); - int red1 = RPART(ifgcolor[1]); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - - __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3)); - alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - uint32_t alpha0 = APART(ifgcolor[0]); - uint32_t alpha1 = APART(ifgcolor[1]); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - - __m128i out_lo = _mm_add_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_add_epi32(fg_hi, bg_hi); - - __m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo); - __m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi); - - __m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo); - __m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi); - - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - - __m128i lit = _mm_setzero_si128(); - - for (int i = 0; i != num_lights; i++) - { - __m128 light_x = _mm_set1_ps(lights[i].x); - __m128 light_y = _mm_set1_ps(lights[i].y); - __m128 light_z = _mm_set1_ps(lights[i].z); - __m128 light_radius = _mm_set1_ps(lights[i].radius); - __m128 m256 = _mm_set1_ps(256.0f); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y - __m128 Lz = _mm_sub_ps(light_z, viewpos_z); - __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); - __m128 rcp_dist = _mm_rsqrt_ps(dist2); - __m128 dist = _mm_mul_ps(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = dot(N,L) * attenuation - __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); - - __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0,0,0,0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1,1,1,1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1,0,1,0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - -} From 735157aea47e25d8d035d7ea27ac17bf61e7b631 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 25 Feb 2017 01:22:54 +0100 Subject: [PATCH 896/912] Bump minimum architecture to SSE 2 on the x86 platform (a Pentium 4 from 2001!) --- CMakeLists.txt | 31 ++++++++++--------- src/CMakeLists.txt | 1 - src/swrenderer/drawers/r_draw_rgba.h | 7 +++++ src/swrenderer/drawers/r_draw_span32_sse2.h | 10 +++--- src/swrenderer/drawers/r_draw_sprite32_sse2.h | 10 +++--- src/swrenderer/drawers/r_draw_wall32_sse2.h | 10 +++--- 6 files changed, 39 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b52e680dd6..9df5498e4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,20 +174,23 @@ if( MSVC ) # Disable run-time type information set( ALL_C_FLAGS "/GF /Gy /GR-" ) - if( CMAKE_SIZEOF_VOID_P MATCHES "4") - # SSE2 option (to allow x87 in 32 bit and disallow extended feature sets which have not yet been checked for precision) - option (ZDOOM_USE_SSE2 "Use SSE2 instruction set") - if (ZDOOM_USE_SSE2) - set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:SSE2") - else () - if (MSVC_VERSION GREATER 1699) - # On Visual C++ 2012 and later SSE2 is the default, so we need to switch it off explicitly - set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:IA32") - endif () - endif () - else() - set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:SSE2") - endif() + # Use SSE 2 as minimum always as the true color drawers needs it for __vectorcall + set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:SSE2") + +# if( CMAKE_SIZEOF_VOID_P MATCHES "4") +# # SSE2 option (to allow x87 in 32 bit and disallow extended feature sets which have not yet been checked for precision) +# option (ZDOOM_USE_SSE2 "Use SSE2 instruction set") +# if (ZDOOM_USE_SSE2) +# set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:SSE2") +# else () +# if (MSVC_VERSION GREATER 1699) +# # On Visual C++ 2012 and later SSE2 is the default, so we need to switch it off explicitly +# set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:IA32") +# endif () +# endif () +# else() +# set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:SSE2") +# endif() # Avoid CRT DLL dependancies in release builds, optionally generate assembly output for checking crash locations. option( ZDOOM_GENERATE_ASM "Generate assembly output." OFF ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 897d78721c..a2e2bc0535 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -791,7 +791,6 @@ file( GLOB HEADER_FILES xlat/*.h swrenderer/*.h swrenderer/drawers/*.h - swrenderer/drawers/*.php swrenderer/scene/*.h swrenderer/segments/*.h swrenderer/line/*.h diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 2eab9a1854..3e95c8cb67 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -67,6 +67,13 @@ namespace swrenderer #endif #endif + // Force the compiler to use a calling convention that works for vector types + #if defined(_MSC_VER) + #define VECTORCALL __vectorcall + #else + #define VECTORCALL + #endif + class DrawFuzzColumnRGBACommand : public DrawerCommand { int _x; diff --git a/src/swrenderer/drawers/r_draw_span32_sse2.h b/src/swrenderer/drawers/r_draw_span32_sse2.h index 34daaeb974..e8ee704db4 100644 --- a/src/swrenderer/drawers/r_draw_span32_sse2.h +++ b/src/swrenderer/drawers/r_draw_span32_sse2.h @@ -152,7 +152,7 @@ namespace swrenderer } template - void Loop(DrawerThread *thread, TextureData texdata, ShadeConstants shade_constants) + FORCEINLINE void VECTORCALL Loop(DrawerThread *thread, TextureData texdata, ShadeConstants shade_constants) { using namespace DrawSpan32TModes; @@ -264,7 +264,7 @@ namespace swrenderer } template - unsigned int Sample(uint32_t xbits, uint32_t ybits, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, uint32_t yshift, uint32_t xshift, uint32_t xmask, const uint32_t *source) + FORCEINLINE unsigned int VECTORCALL Sample(uint32_t xbits, uint32_t ybits, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, uint32_t yshift, uint32_t xshift, uint32_t xmask, const uint32_t *source) { using namespace DrawSpan32TModes; @@ -319,7 +319,7 @@ namespace swrenderer } template - __m128i Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_x) + FORCEINLINE __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_x) { using namespace DrawSpan32TModes; @@ -351,7 +351,7 @@ namespace swrenderer return AddLights(material, fgcolor, lights, num_lights, viewpos_x); } - __m128i AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_x) + FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_x) { using namespace DrawSpan32TModes; @@ -398,7 +398,7 @@ namespace swrenderer return fgcolor; } - __m128i Blend(__m128i fgcolor, __m128i bgcolor, uint32_t srcalpha, uint32_t destalpha, unsigned int ifgcolor0, unsigned int ifgcolor1) + FORCEINLINE __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, uint32_t srcalpha, uint32_t destalpha, unsigned int ifgcolor0, unsigned int ifgcolor1) { using namespace DrawSpan32TModes; diff --git a/src/swrenderer/drawers/r_draw_sprite32_sse2.h b/src/swrenderer/drawers/r_draw_sprite32_sse2.h index 0ec3e63d87..dc4a421721 100644 --- a/src/swrenderer/drawers/r_draw_sprite32_sse2.h +++ b/src/swrenderer/drawers/r_draw_sprite32_sse2.h @@ -100,7 +100,7 @@ namespace swrenderer } template - void Loop(DrawerThread *thread, ShadeConstants shade_constants) + FORCEINLINE void VECTORCALL Loop(DrawerThread *thread, ShadeConstants shade_constants) { using namespace DrawSprite32TModes; @@ -254,7 +254,7 @@ namespace swrenderer } template - unsigned int Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, const uint32_t *translation, int textureheight, uint32_t one, uint32_t texturefracx, uint32_t color, uint32_t srccolor) + FORCEINLINE unsigned int VECTORCALL Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, const uint32_t *translation, int textureheight, uint32_t one, uint32_t texturefracx, uint32_t color, uint32_t srccolor) { using namespace DrawSprite32TModes; @@ -303,7 +303,7 @@ namespace swrenderer } } - unsigned int SampleShade(uint32_t frac, const uint32_t *source, const uint8_t *colormap) + FORCEINLINE unsigned int VECTORCALL SampleShade(uint32_t frac, const uint32_t *source, const uint8_t *colormap) { using namespace DrawSprite32TModes; @@ -320,7 +320,7 @@ namespace swrenderer } template - __m128i Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, __m128i lightcontrib) + FORCEINLINE __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, __m128i lightcontrib) { using namespace DrawSprite32TModes; @@ -359,7 +359,7 @@ namespace swrenderer } } - __m128i Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha) + FORCEINLINE __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha) { using namespace DrawSprite32TModes; diff --git a/src/swrenderer/drawers/r_draw_wall32_sse2.h b/src/swrenderer/drawers/r_draw_wall32_sse2.h index 03be70e75f..7c8057f1c1 100644 --- a/src/swrenderer/drawers/r_draw_wall32_sse2.h +++ b/src/swrenderer/drawers/r_draw_wall32_sse2.h @@ -78,7 +78,7 @@ namespace swrenderer } template - void Loop(DrawerThread *thread, ShadeConstants shade_constants) + FORCEINLINE void VECTORCALL Loop(DrawerThread *thread, ShadeConstants shade_constants) { using namespace DrawWall32TModes; @@ -205,7 +205,7 @@ namespace swrenderer } template - unsigned int Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, int textureheight, uint32_t one, uint32_t texturefracx) + FORCEINLINE unsigned int VECTORCALL Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, int textureheight, uint32_t one, uint32_t texturefracx) { using namespace DrawWall32TModes; @@ -241,7 +241,7 @@ namespace swrenderer } template - __m128i Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_z) + FORCEINLINE __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_z) { using namespace DrawWall32TModes; @@ -273,7 +273,7 @@ namespace swrenderer return AddLights(material, fgcolor, lights, num_lights, viewpos_z); } - __m128i AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_z) + FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_z) { using namespace DrawWall32TModes; @@ -320,7 +320,7 @@ namespace swrenderer return fgcolor; } - __m128i Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, uint32_t srcalpha, uint32_t destalpha) + FORCEINLINE __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, uint32_t srcalpha, uint32_t destalpha) { using namespace DrawWall32TModes; From 9b40aa9605d2a844644a96cbac1f71de787d8ca3 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 25 Feb 2017 15:32:50 -0500 Subject: [PATCH 897/912] - Fixed two typos as identified by dpJudas. --- src/swrenderer/drawers/r_draw_span32_sse2.h | 2 +- src/swrenderer/drawers/r_draw_wall32_sse2.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_span32_sse2.h b/src/swrenderer/drawers/r_draw_span32_sse2.h index e8ee704db4..abf65feb0d 100644 --- a/src/swrenderer/drawers/r_draw_span32_sse2.h +++ b/src/swrenderer/drawers/r_draw_span32_sse2.h @@ -411,7 +411,7 @@ namespace swrenderer else if (BlendT::Mode == (int)SpanBlendModes::Masked) { __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3, 3, 3, 3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3, 3, 3, 3)); + alpha = _mm_shufflehi_epi16(alpha, _MM_SHUFFLE(3, 3, 3, 3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); diff --git a/src/swrenderer/drawers/r_draw_wall32_sse2.h b/src/swrenderer/drawers/r_draw_wall32_sse2.h index 7c8057f1c1..4ba9060d49 100644 --- a/src/swrenderer/drawers/r_draw_wall32_sse2.h +++ b/src/swrenderer/drawers/r_draw_wall32_sse2.h @@ -333,7 +333,7 @@ namespace swrenderer else if (BlendT::Mode == (int)WallBlendModes::Masked) { __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3, 3, 3, 3)); - alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3, 3, 3, 3)); + alpha = _mm_shufflehi_epi16(alpha, _MM_SHUFFLE(3, 3, 3, 3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); From e7a1a59053a3288880a6b4731577afaeb9421565 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 25 Feb 2017 16:36:57 -0500 Subject: [PATCH 898/912] - Disabled alpha-masking textures for now, since they do not yet work. (needs texman fix) --- src/swrenderer/drawers/r_draw_span32_sse2.h | 9 +++++++++ src/swrenderer/drawers/r_draw_wall32_sse2.h | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/src/swrenderer/drawers/r_draw_span32_sse2.h b/src/swrenderer/drawers/r_draw_span32_sse2.h index abf65feb0d..94d5b278c4 100644 --- a/src/swrenderer/drawers/r_draw_span32_sse2.h +++ b/src/swrenderer/drawers/r_draw_span32_sse2.h @@ -410,9 +410,11 @@ namespace swrenderer } else if (BlendT::Mode == (int)SpanBlendModes::Masked) { +#if 0 // leaving this in for alpha texture support (todo: fix in texture manager later?) __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3, 3, 3, 3)); alpha = _mm_shufflehi_epi16(alpha, _MM_SHUFFLE(3, 3, 3, 3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); fgcolor = _mm_mullo_epi16(fgcolor, alpha); @@ -421,6 +423,13 @@ namespace swrenderer outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); return outcolor; +#endif + __m128i mask = _mm_cmpeq_epi32(_mm_packus_epi16(fgcolor, _mm_setzero_si128()), _mm_setzero_si128()); + mask = _mm_unpacklo_epi8(mask, _mm_setzero_si128()); + __m128i outcolor = _mm_or_si128(_mm_and_si128(mask, bgcolor), _mm_andnot_si128(mask, fgcolor)); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; } else if (BlendT::Mode == (int)SpanBlendModes::Translucent) { diff --git a/src/swrenderer/drawers/r_draw_wall32_sse2.h b/src/swrenderer/drawers/r_draw_wall32_sse2.h index 4ba9060d49..31ab06e86b 100644 --- a/src/swrenderer/drawers/r_draw_wall32_sse2.h +++ b/src/swrenderer/drawers/r_draw_wall32_sse2.h @@ -332,9 +332,11 @@ namespace swrenderer } else if (BlendT::Mode == (int)WallBlendModes::Masked) { +#if 0 // leaving this in for alpha texture support (todo: fix in texture manager later?) __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3, 3, 3, 3)); alpha = _mm_shufflehi_epi16(alpha, _MM_SHUFFLE(3, 3, 3, 3)); alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); fgcolor = _mm_mullo_epi16(fgcolor, alpha); @@ -343,6 +345,13 @@ namespace swrenderer outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); return outcolor; +#endif + __m128i mask = _mm_cmpeq_epi32(_mm_packus_epi16(fgcolor, _mm_setzero_si128()), _mm_setzero_si128()); + mask = _mm_unpacklo_epi8(mask, _mm_setzero_si128()); + __m128i outcolor = _mm_or_si128(_mm_and_si128(mask, bgcolor), _mm_andnot_si128(mask, fgcolor)); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; } else { From bd61e9f3f4225b5e3e73a9e3b6d1a83bc7bd4be3 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 27 Feb 2017 08:20:26 -0500 Subject: [PATCH 899/912] - Disabled dynlights during invulnerability/lightamp (or any FixedColormap or FixedLightLevel) --- src/swrenderer/line/r_line.cpp | 9 +++++++++ src/swrenderer/plane/r_visibleplane.cpp | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 2b2417ec22..94b1cfc4be 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -1164,6 +1164,9 @@ namespace swrenderer FLightNode *light_list = (mLineSegment && mLineSegment->sidedef) ? mLineSegment->sidedef->lighthead : nullptr; + if ((cameraLight->FixedLightLevel() >= 0) || (cameraLight->FixedColormap() != nullptr)) + light_list = nullptr; // [SP] Don't draw dynlights if invul/lightamp active + RenderWallPart renderWallpart(Thread); renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallupper.ScreenY, mTopPart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mBackCeilingZ1, mBackCeilingZ2), false, wallshade, offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } @@ -1208,6 +1211,9 @@ namespace swrenderer FLightNode *light_list = (mLineSegment && mLineSegment->sidedef) ? mLineSegment->sidedef->lighthead : nullptr; + if ((cameraLight->FixedLightLevel() >= 0) || (cameraLight->FixedColormap() != nullptr)) + light_list = nullptr; // [SP] Don't draw dynlights if invul/lightamp active + RenderWallPart renderWallpart(Thread); renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walltop.ScreenY, wallbottom.ScreenY, mMiddlePart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mFrontCeilingZ1, mFrontCeilingZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } @@ -1253,6 +1259,9 @@ namespace swrenderer FLightNode *light_list = (mLineSegment && mLineSegment->sidedef) ? mLineSegment->sidedef->lighthead : nullptr; + if ((cameraLight->FixedLightLevel() >= 0) || (cameraLight->FixedColormap() != nullptr)) + light_list = nullptr; // [SP] Don't draw dynlights if invul/lightamp active + RenderWallPart renderWallpart(Thread); renderWallpart.Render(drawerargs, mFrontSector, mLineSegment, WallC, rw_pic, x1, x2, walllower.ScreenY, wallbottom.ScreenY, mBottomPart.TextureMid, walltexcoords.VStep, walltexcoords.UPos, yscale, MAX(mBackFloorZ1, mBackFloorZ2), MIN(mFrontFloorZ1, mFrontFloorZ2), false, wallshade, offset, rw_light, rw_lightstep, light_list, foggy, basecolormap); } diff --git a/src/swrenderer/plane/r_visibleplane.cpp b/src/swrenderer/plane/r_visibleplane.cpp index 70cd4164f4..b32e092b48 100644 --- a/src/swrenderer/plane/r_visibleplane.cpp +++ b/src/swrenderer/plane/r_visibleplane.cpp @@ -61,6 +61,10 @@ namespace swrenderer if (!r_dynlights) return; + CameraLight *cameraLight = CameraLight::Instance(); + if (cameraLight->FixedColormap() != NULL || cameraLight->FixedLightLevel() >= 0) + return; // [SP] no dynlights if invul or lightamp + while (node) { if (!(node->lightsource->flags2&MF2_DORMANT)) From 4a9845a6c09203766c4da013db412f5efa5b40f7 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 27 Feb 2017 12:02:46 -0500 Subject: [PATCH 900/912] - Fixed: voxels were not properly remapped after "restart" ccmd --- src/d_main.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/d_main.cpp b/src/d_main.cpp index 62558064a4..e5cf881baf 100644 --- a/src/d_main.cpp +++ b/src/d_main.cpp @@ -2689,6 +2689,7 @@ void D_DoomMain (void) // These calls from inside V_Init2 are still necessary C_NewModeAdjust(); M_InitVideoModesMenu(); + Renderer->RemapVoxels(); D_StartTitle (); // start up intro loop setmodeneeded = false; // This may be set to true here, but isn't needed for a restart } From 869de7554f619070a6ea85f3060dfce01f34ea0f Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 27 Feb 2017 12:15:17 -0500 Subject: [PATCH 901/912] - fixed: Opening automap without 2D acceleration and rgb666 blending crashed due to bad color calculations. (Attempted to pass rgb888 values into rgb666 array) --- src/swrenderer/r_swcanvas.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/swrenderer/r_swcanvas.cpp b/src/swrenderer/r_swcanvas.cpp index 73d17ca3cf..c728dd3153 100644 --- a/src/swrenderer/r_swcanvas.cpp +++ b/src/swrenderer/r_swcanvas.cpp @@ -645,9 +645,9 @@ void SWCanvas::PUTTRANSDOT(DCanvas *canvas, int xx, int yy, int basecolor, int l { BYTE *spot = canvas->GetBuffer() + oldyyshifted + xx; - uint32_t r = (GPalette.BaseColors[*spot].r * (64 - level) + GPalette.BaseColors[basecolor].r * level) / 64; - uint32_t g = (GPalette.BaseColors[*spot].g * (64 - level) + GPalette.BaseColors[basecolor].g * level) / 64; - uint32_t b = (GPalette.BaseColors[*spot].b * (64 - level) + GPalette.BaseColors[basecolor].b * level) / 64; + uint32_t r = (GPalette.BaseColors[*spot].r * (64 - level) + GPalette.BaseColors[basecolor].r * level) / 256; + uint32_t g = (GPalette.BaseColors[*spot].g * (64 - level) + GPalette.BaseColors[basecolor].g * level) / 256; + uint32_t b = (GPalette.BaseColors[*spot].b * (64 - level) + GPalette.BaseColors[basecolor].b * level) / 256; *spot = (BYTE)RGB256k.RGB[r][g][b]; } From 4ebd25171c91415e550bae11b13e1402dc1424eb Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Wed, 1 Mar 2017 09:46:31 -0500 Subject: [PATCH 902/912] - Implemented a simple graphics switch. This currently affects NVidia Optimus-enabled notebooks only. --- src/win32/hardware.cpp | 33 +++++++++++++++++++++++++++++++++ wadsrc/static/language.enu | 3 +++ wadsrc/static/menudef.txt | 13 ++++++++++++- 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 70898ebe5a..5a17e90ece 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -71,8 +71,34 @@ FRenderer *gl_CreateInterface(); void I_RestartRenderer(); int currentrenderer = -1; int currentcanvas = -1; +int currentgpuswitch = -1; bool changerenderer; +// Optimus/Hybrid switcher +CUSTOM_CVAR(Int, vid_gpuswitch, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +{ + if (self != currentgpuswitch) + { + switch (self) + { + case 0: + Printf("Selecting default GPU...\n"); + break; + case 1: + Printf("Selecting high-performance dedicated GPU...\n"); + break; + case 2: + Printf("Selecting power-saving integrated GPU...\n"); + break; + default: + Printf("Unknown option (%d) - falling back to 'default'\n", *vid_gpuswitch); + self = 0; + break; + } + Printf("You must restart " GAMENAME " for this change to take effect.\n"); + } +} + // Software OpenGL canvas CUSTOM_CVAR(Bool, vid_used3d, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { @@ -128,6 +154,13 @@ void I_InitGraphics () { UCVarValue val; + // todo: implement ATI version of this. this only works for nvidia notebooks, for now. + currentgpuswitch = vid_gpuswitch; + if (currentgpuswitch == 1) + putenv("SHIM_MCCOMPAT=0x800000001"); // discrete + else if (currentgpuswitch == 2) + putenv("SHIM_MCCOMPAT=0x800000000"); // integrated + // If the focus window is destroyed, it doesn't go back to the active window. // (e.g. because the net pane was up, and a button on it had focus) if (GetFocus() == NULL && GetActiveWindow() == Window) diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 75ef5eb512..85f7ee830d 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1832,6 +1832,7 @@ DSPLYMNU_DIMCOLOR = "Dim color"; DSPLYMNU_MOVEBOB = "View bob amount while moving"; DSPLYMNU_STILLBOB = "View bob amount while not moving"; DSPLYMNU_BOBSPEED = "Weapon bob speed"; +DSPLYMNU_GPUSWITCH = "Notebook Switchable GPU"; // HUD Options HUDMNU_TITLE = "HUD Options"; @@ -2359,6 +2360,8 @@ OPTVAL_GL = "OpenGL"; OPTVAL_D3D = "Direct3D"; OPTVAL_HWPOLY = "OpenGL-Accelerated"; OPTVAL_SWDOOM = "Doom Software Renderer"; +OPTVAL_DEDICATED = "High-Performance"; +OPTVAL_INTEGRATED = "Power-Saving"; // Colors C_BRICK = "\cabrick"; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index ef20322417..2f5bd0d71c 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -683,6 +683,13 @@ OptionValue Fuzziness 2.0, "$OPTVAL_SHADOW" } +OptionValue GPUSwitch +{ + 0.0, "$OPTVAL_DEFAULT" + 1.0, "$OPTVAL_DEDICATED" + 2.0, "$OPTVAL_INTEGRATED" +} + OptionMenu "OpenGLOptions" { Title "$GLMNU_TITLE" @@ -755,7 +762,11 @@ OptionMenu "VideoOptions" Slider "$DSPLYMNU_MOVEBOB", "movebob", 0, 1.0, 0.05, 2 Slider "$DSPLYMNU_STILLBOB", "stillbob", 0, 1.0, 0.05, 2 Slider "$DSPLYMNU_BOBSPEED", "wbobspeed", 0, 2.0, 0.1 - + IfOption(Windows) + { + StaticText " " + Option "$DSPLYMNU_GPUSWITCH", vid_gpuswitch, "GPUSwitch" + } } //------------------------------------------------------------------------------------------- From 09530e9496603d2c65c16d75559761b840c34d3d Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 4 Mar 2017 08:03:42 -0500 Subject: [PATCH 903/912] - added unloved2 to compatibility list for clipmidtex --- wadsrc/static/compatibility.txt | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/wadsrc/static/compatibility.txt b/wadsrc/static/compatibility.txt index 7e244b7b35..93807661eb 100644 --- a/wadsrc/static/compatibility.txt +++ b/wadsrc/static/compatibility.txt @@ -541,3 +541,28 @@ DC96228097DD004C40CCB1DB14A91EAA // unloved.pk3:unlovedmaps.wad map05 clipmidtex } +261E64897A572C8DB8DC041E64BE27AD // unloved2beta1.pk3:u2_new2maps2.wad map06 +{ + clipmidtex +} + +04800B1F35E8C036EBABC8C616402927 // unloved2beta1.pk3:u2_new2maps2.wad map07 +{ + clipmidtex +} + +9E54F70648A77BBD090FF78A3AA05367 // unloved2beta1.pk3:u2_new2maps2.wad map08 +{ + clipmidtex +} + +72E9E0F41F691B7F956E62F35B4A617F // unloved2beta1.pk3:u2_new2maps2.wad map09 +{ + clipmidtex +} + +3D3FE412E87AD8B2316DAEC9E25F2E5D // unloved2beta1.pk3:u2_new2maps2.wad map10 +{ + clipmidtex +} + From be8abba344f036ff1b054f310fcfdfda81ce0df2 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 4 Mar 2017 16:50:42 -0500 Subject: [PATCH 904/912] - started adding ARM support. incomplete. won't compile. don't try. --- src/CMakeLists.txt | 5 + src/ila/SSE2NEON.h | 1198 ++++++++++++++++++++++++++ src/ila/ila.h | 56 ++ src/ila/sse_to_neon.hpp | 187 ++++ src/swrenderer/drawers/r_draw_rgba.h | 2 + 5 files changed, 1448 insertions(+) create mode 100644 src/ila/SSE2NEON.h create mode 100644 src/ila/ila.h create mode 100644 src/ila/sse_to_neon.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 950cfe9c02..0e1415e5cc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -502,6 +502,11 @@ if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) set( CMAKE_EXE_LINKER_FLAGS "-stdlib=libc++ ${CMAKE_EXE_LINKER_FLAGS}" ) endif () + # ARM processors (Raspberry Pi) - enable ARM NEON support. + if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") + set( CMAKE_CXX_FLAGS "-mfpu=neon ${CMAKE_CXX_FLAGS}" ) + endif () + # Remove extra warnings when using the official DirectX headers. # Also, TDM-GCC 4.4.0 no longer accepts glibc-style printf formats as valid, # which is a royal pain. The previous version I had been using was fine with them. diff --git a/src/ila/SSE2NEON.h b/src/ila/SSE2NEON.h new file mode 100644 index 0000000000..4e94e9607d --- /dev/null +++ b/src/ila/SSE2NEON.h @@ -0,0 +1,1198 @@ +#ifndef SSE2NEON_H +#define SSE2NEON_H + +// This header file provides a simple API translation layer +// between SSE intrinsics to their corresponding ARM NEON versions +// +// This header file does not (yet) translate *all* of the SSE intrinsics. +// Since this is in support of a specific porting effort, I have only +// included the intrinsics I needed to get my port to work. +// +// Questions/Comments/Feedback send to: jratcliffscarab@gmail.com +// +// If you want to improve or add to this project, send me an +// email and I will probably approve your access to the depot. +// +// Project is located here: +// +// https://github.com/jratcliff63367/sse2neon +// +// Show your appreciation for open source by sending me a bitcoin tip to the following +// address. +// +// TipJar: 1PzgWDSyq4pmdAXRH8SPUtta4SWGrt4B1p : +// https://blockchain.info/address/1PzgWDSyq4pmdAXRH8SPUtta4SWGrt4B1p +// +// +// Contributors to this project are: +// +// John W. Ratcliff : jratcliffscarab@gmail.com +// Brandon Rowlett : browlett@nvidia.com +// Ken Fast : kfast@gdeb.com +// Eric van Beurden : evanbeurden@nvidia.com +// +// +// ********************************************************************************************************************* +// Release notes for January 20, 2017 version: +// +// The unit tests have been refactored. They no longer assert on an error, instead they return a pass/fail condition +// The unit-tests now test 10,000 random float and int values against each intrinsic. +// +// SSE2NEON now supports 95 SSE intrinsics. 39 of them have formal unit tests which have been implemented and +// fully tested on NEON/ARM. The remaining 56 still need unit tests implemented. +// +// A struct is now defined in this header file called 'SIMDVec' which can be used by applications which +// attempt to access the contents of an _m128 struct directly. It is important to note that accessing the __m128 +// struct directly is bad coding practice by Microsoft: @see: https://msdn.microsoft.com/en-us/library/ayeb3ayc.aspx +// +// However, some legacy source code may try to access the contents of an __m128 struct directly so the developer +// can use the SIMDVec as an alias for it. Any casting must be done manually by the developer, as you cannot +// cast or otherwise alias the base NEON data type for intrinsic operations. +// +// A bug was found with the _mm_shuffle_ps intrinsic. If the shuffle permutation was not one of the ones with +// a custom/unique implementation causing it to fall through to the default shuffle implementation it was failing +// to return the correct value. This is now fixed. +// +// A bug was found with the _mm_cvtps_epi32 intrinsic. This converts floating point values to integers. +// It was not honoring the correct rounding mode. In SSE the default rounding mode when converting from float to int +// is to use 'round to even' otherwise known as 'bankers rounding'. ARMv7 did not support this feature but ARMv8 does. +// As it stands today, this header file assumes ARMv8. If you are trying to target really old ARM devices, you may get +// a build error. +// +// Support for a number of new intrinsics was added, however, none of them yet have unit-tests to 100% confirm they are +// producing the correct results on NEON. These unit tests will be added as soon as possible. +// +// Here is the list of new instrinsics which have been added: +// +// _mm_cvtss_f32 : extracts the lower order floating point value from the parameter +// _mm_add_ss : adds the scalar single - precision floating point values of a and b +// _mm_div_ps : Divides the four single - precision, floating - point values of a and b. +// _mm_div_ss : Divides the scalar single - precision floating point value of a by b. +// _mm_sqrt_ss : Computes the approximation of the square root of the scalar single - precision floating point value of in. +// _mm_rsqrt_ps : Computes the approximations of the reciprocal square roots of the four single - precision floating point values of in. +// _mm_comilt_ss : Compares the lower single - precision floating point scalar values of a and b using a less than operation +// _mm_comigt_ss : Compares the lower single - precision floating point scalar values of a and b using a greater than operation. +// _mm_comile_ss : Compares the lower single - precision floating point scalar values of a and b using a less than or equal operation. +// _mm_comige_ss : Compares the lower single - precision floating point scalar values of a and b using a greater than or equal operation. +// _mm_comieq_ss : Compares the lower single - precision floating point scalar values of a and b using an equality operation. +// _mm_comineq_s : Compares the lower single - precision floating point scalar values of a and b using an inequality operation +// _mm_unpackhi_epi8 : Interleaves the upper 8 signed or unsigned 8 - bit integers in a with the upper 8 signed or unsigned 8 - bit integers in b. +// _mm_unpackhi_epi16: Interleaves the upper 4 signed or unsigned 16 - bit integers in a with the upper 4 signed or unsigned 16 - bit integers in b. +// +// ********************************************************************************************************************* +/* +** The MIT license: +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and associated documentation files (the "Software"), to deal +** in the Software without restriction, including without limitation the rights +** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +** copies of the Software, and to permit persons to whom the Software is furnished +** to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in all +** copies or substantial portions of the Software. + +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#define GCC 1 +#define ENABLE_CPP_VERSION 0 + +#if GCC +#define FORCE_INLINE inline __attribute__((always_inline)) +#define ALIGN_STRUCT(x) __attribute__((aligned(x))) +#else +#define FORCE_INLINE inline +#define ALIGN_STRUCT(x) __declspec(align(x)) +#endif + +#include +#include "arm_neon.h" + +/*******************************************************/ +/* MACRO for shuffle parameter for _mm_shuffle_ps(). */ +/* Argument fp3 is a digit[0123] that represents the fp*/ +/* from argument "b" of mm_shuffle_ps that will be */ +/* placed in fp3 of result. fp2 is the same for fp2 in */ +/* result. fp1 is a digit[0123] that represents the fp */ +/* from argument "a" of mm_shuffle_ps that will be */ +/* places in fp1 of result. fp0 is the same for fp0 of */ +/* result */ +/*******************************************************/ +#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \ + ((fp1) << 2) | ((fp0))) + +typedef float32x4_t __m128; +typedef int32x4_t __m128i; + +// union intended to allow direct access to an __m128 variable using the names that the MSVC +// compiler provides. This union should really only be used when trying to access the members +// of the vector as integer values. GCC/clang allow native access to the float members through +// a simple array access operator (in C since 4.6, in C++ since 4.8). +// +// Ideally direct accesses to SIMD vectors should not be used since it can cause a performance +// hit. If it really is needed however, the original __m128 variable can be aliased with a +// pointer to this union and used to access individual components. The use of this union should +// be hidden behind a macro that is used throughout the codebase to access the members instead +// of always declaring this type of variable. +typedef union ALIGN_STRUCT(16) SIMDVec +{ + float m128_f32[4]; // as floats - do not to use this. Added for convenience. + int8_t m128_i8[16]; // as signed 8-bit integers. + int16_t m128_i16[8]; // as signed 16-bit integers. + int32_t m128_i32[4]; // as signed 32-bit integers. + int64_t m128_i64[2]; // as signed 64-bit integers. + uint8_t m128_u8[16]; // as unsigned 8-bit integers. + uint16_t m128_u16[8]; // as unsigned 16-bit integers. + uint32_t m128_u32[4]; // as unsigned 32-bit integers. + uint64_t m128_u64[2]; // as unsigned 64-bit integers. +} SIMDVec; + +// ****************************************** +// Set/get methods +// ****************************************** + +// extracts the lower order floating point value from the parameter : https://msdn.microsoft.com/en-us/library/bb514059%28v=vs.120%29.aspx?f=255&MSPPError=-2147217396 +FORCE_INLINE float _mm_cvtss_f32(__m128 a) +{ + return vgetq_lane_f32(a, 0); +} + +// Sets the 128-bit value to zero https://msdn.microsoft.com/en-us/library/vstudio/ys7dw0kh(v=vs.100).aspx +FORCE_INLINE __m128i _mm_setzero_si128() +{ + return vdupq_n_s32(0); +} + +// Clears the four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/tk1t2tbz(v=vs.100).aspx +FORCE_INLINE __m128 _mm_setzero_ps(void) +{ + return vdupq_n_f32(0); +} + +// Sets the four single-precision, floating-point values to w. https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx +FORCE_INLINE __m128 _mm_set1_ps(float _w) +{ + return vdupq_n_f32(_w); +} + +// Sets the four single-precision, floating-point values to w. https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx +FORCE_INLINE __m128 _mm_set_ps1(float _w) +{ + return vdupq_n_f32(_w); +} + +// Sets the four single-precision, floating-point values to the four inputs. https://msdn.microsoft.com/en-us/library/vstudio/afh0zf75(v=vs.100).aspx +FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x) +{ + float __attribute__((aligned(16))) data[4] = { x, y, z, w }; + return vld1q_f32(data); +} + +// Sets the four single-precision, floating-point values to the four inputs in reverse order. https://msdn.microsoft.com/en-us/library/vstudio/d2172ct3(v=vs.100).aspx +FORCE_INLINE __m128 _mm_setr_ps(float w, float z , float y , float x ) +{ + float __attribute__ ((aligned (16))) data[4] = { w, z, y, x }; + return vld1q_f32(data); +} + +// Sets the 4 signed 32-bit integer values to i. https://msdn.microsoft.com/en-us/library/vstudio/h4xscxat(v=vs.100).aspx +FORCE_INLINE __m128i _mm_set1_epi32(int _i) +{ + return vdupq_n_s32(_i); +} + +// Sets the 4 signed 32-bit integer values. https://msdn.microsoft.com/en-us/library/vstudio/019beekt(v=vs.100).aspx +FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0) +{ + int32_t __attribute__((aligned(16))) data[4] = { i0, i1, i2, i3 }; + return vld1q_s32(data); +} + +// Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/s3h4ay6y(v=vs.100).aspx +FORCE_INLINE void _mm_store_ps(float *p, __m128 a) +{ + vst1q_f32(p, a); +} + +// Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/44e30x22(v=vs.100).aspx +FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a) +{ + vst1q_f32(p, a); +} + +// Stores four 32-bit integer values as (as a __m128i value) at the address p. https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx +FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a ) +{ + vst1q_s32((int32_t*) p,a); +} + +// Stores the lower single - precision, floating - point value. https://msdn.microsoft.com/en-us/library/tzz10fbx(v=vs.100).aspx +FORCE_INLINE void _mm_store_ss(float *p, __m128 a) +{ + vst1q_lane_f32(p, a, 0); +} + +// Reads the lower 64 bits of b and stores them into the lower 64 bits of a. https://msdn.microsoft.com/en-us/library/hhwf428f%28v=vs.90%29.aspx +FORCE_INLINE void _mm_storel_epi64(__m128i* a, __m128i b) +{ + *a = (__m128i)vsetq_lane_s64((int64_t)vget_low_s32(b), *(int64x2_t*)a, 0); +} + +// Loads a single single-precision, floating-point value, copying it into all four words https://msdn.microsoft.com/en-us/library/vstudio/5cdkf716(v=vs.100).aspx +FORCE_INLINE __m128 _mm_load1_ps(const float * p) +{ + return vld1q_dup_f32(p); +} + +// Loads four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/zzd50xxt(v=vs.100).aspx +FORCE_INLINE __m128 _mm_load_ps(const float * p) +{ + return vld1q_f32(p); +} + +// Loads four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/x1b16s7z%28v=vs.90%29.aspx +FORCE_INLINE __m128 _mm_loadu_ps(const float * p) +{ + // for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are equivalent for neon + return vld1q_f32(p); +} + +// Loads an single - precision, floating - point value into the low word and clears the upper three words. https://msdn.microsoft.com/en-us/library/548bb9h4%28v=vs.90%29.aspx +FORCE_INLINE __m128 _mm_load_ss(const float * p) +{ + __m128 result = vdupq_n_f32(0); + return vsetq_lane_f32(*p, result, 0); +} + + +// ****************************************** +// Logic/Binary operations +// ****************************************** + +// Compares for inequality. https://msdn.microsoft.com/en-us/library/sf44thbx(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b) +{ + return (__m128)vmvnq_s32((__m128i)vceqq_f32(a, b)); +} + +// Computes the bitwise AND-NOT of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/68h7wd02(v=vs.100).aspx +FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b) +{ + return (__m128)vbicq_s32((__m128i)b, (__m128i)a); // *NOTE* argument swap +} + +// Computes the bitwise AND of the 128-bit value in b and the bitwise NOT of the 128-bit value in a. https://msdn.microsoft.com/en-us/library/vstudio/1beaceh8(v=vs.100).aspx +FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b) +{ + return (__m128i)vbicq_s32(b, a); // *NOTE* argument swap +} + +// Computes the bitwise AND of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/6d1txsa8(v=vs.100).aspx +FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b) +{ + return (__m128i)vandq_s32(a, b); +} + +// Computes the bitwise AND of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/73ck1xc5(v=vs.100).aspx +FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b) +{ + return (__m128)vandq_s32((__m128i)a, (__m128i)b); +} + +// Computes the bitwise OR of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/7ctdsyy0(v=vs.100).aspx +FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) +{ + return (__m128)vorrq_s32((__m128i)a, (__m128i)b); +} + +// Computes bitwise EXOR (exclusive-or) of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/ss6k3wk8(v=vs.100).aspx +FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b) +{ + return (__m128)veorq_s32((__m128i)a, (__m128i)b); +} + +// Computes the bitwise OR of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/ew8ty0db(v=vs.100).aspx +FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b) +{ + return (__m128i)vorrq_s32(a, b); +} + +// Computes the bitwise XOR of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/fzt08www(v=vs.100).aspx +FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b) +{ + return veorq_s32(a, b); +} + +// NEON does not provide this method +// Creates a 4-bit mask from the most significant bits of the four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/4490ys29(v=vs.100).aspx +FORCE_INLINE int _mm_movemask_ps(__m128 a) +{ +#if ENABLE_CPP_VERSION // I am not yet convinced that the NEON version is faster than the C version of this + uint32x4_t &ia = *(uint32x4_t *)&a; + return (ia[0] >> 31) | ((ia[1] >> 30) & 2) | ((ia[2] >> 29) & 4) | ((ia[3] >> 28) & 8); +#else + static const uint32x4_t movemask = { 1, 2, 4, 8 }; + static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + uint32x4_t t0 = vreinterpretq_u32_f32(a); + uint32x4_t t1 = vtstq_u32(t0, highbit); + uint32x4_t t2 = vandq_u32(t1, movemask); + uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2)); + return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1); +#endif +} + +// Takes the upper 64 bits of a and places it in the low end of the result +// Takes the lower 64 bits of b and places it into the high end of the result. +FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b) +{ + return vcombine_f32(vget_high_f32(a), vget_low_f32(b)); +} + +// takes the lower two 32-bit values from a and swaps them and places in high end of result +// takes the higher two 32 bit values from b and swaps them and places in low end of result. +FORCE_INLINE __m128 _mm_shuffle_ps_2301(__m128 a, __m128 b) +{ + return vcombine_f32(vrev64_f32(vget_low_f32(a)), vrev64_f32(vget_high_f32(b))); +} + +// keeps the low 64 bits of b in the low and puts the high 64 bits of a in the high +FORCE_INLINE __m128 _mm_shuffle_ps_3210(__m128 a, __m128 b) +{ + return vcombine_f32(vget_low_f32(a), vget_high_f32(b)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_0011(__m128 a, __m128 b) +{ + return vcombine_f32(vdup_n_f32(vgetq_lane_f32(a, 1)), vdup_n_f32(vgetq_lane_f32(b, 0))); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_0022(__m128 a, __m128 b) +{ + return vcombine_f32(vdup_n_f32(vgetq_lane_f32(a, 2)), vdup_n_f32(vgetq_lane_f32(b, 0))); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2200(__m128 a, __m128 b) +{ + return vcombine_f32(vdup_n_f32(vgetq_lane_f32(a, 0)), vdup_n_f32(vgetq_lane_f32(b, 2))); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b) +{ + float32_t a0 = vgetq_lane_f32(a, 0); + float32_t a2 = vgetq_lane_f32(a, 2); + float32x2_t aVal = vdup_n_f32(a2); + aVal = vset_lane_f32(a0, aVal, 1); + return vcombine_f32(aVal, vget_high_f32(b)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_1133(__m128 a, __m128 b) +{ + return vcombine_f32(vdup_n_f32(vgetq_lane_f32(a, 3)), vdup_n_f32(vgetq_lane_f32(b, 1))); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2010(__m128 a, __m128 b) +{ + float32_t b0 = vgetq_lane_f32(b, 0); + float32_t b2 = vgetq_lane_f32(b, 2); + float32x2_t bVal = vdup_n_f32(b0); + bVal = vset_lane_f32(b2, bVal, 1); + return vcombine_f32(vget_low_f32(a), bVal); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2001(__m128 a, __m128 b) +{ + float32_t b0 = vgetq_lane_f32(b, 0); + float32_t b2 = vgetq_lane_f32(b, 2); + float32x2_t bVal = vdup_n_f32(b0); + bVal = vset_lane_f32(b2, bVal, 1); + return vcombine_f32(vrev64_f32(vget_low_f32(a)), bVal); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b) +{ + float32_t b0 = vgetq_lane_f32(b, 0); + float32_t b2 = vgetq_lane_f32(b, 2); + float32x2_t bVal = vdup_n_f32(b0); + bVal = vset_lane_f32(b2, bVal, 1); + return vcombine_f32(vget_high_f32(a), bVal); +} + + +// NEON does not support a general purpose permute intrinsic +// Currently I am not sure whether the C implementation is faster or slower than the NEON version. +// Note, this has to be expanded as a template because the shuffle value must be an immediate value. +// The same is true on SSE as well. +// Selects four specific single-precision, floating-point values from a and b, based on the mask i. https://msdn.microsoft.com/en-us/library/vstudio/5f0858x0(v=vs.100).aspx +template +FORCE_INLINE __m128 _mm_shuffle_ps_default(__m128 a, __m128 b) +{ +#if ENABLE_CPP_VERSION // I am not convinced that the NEON version is faster than the C version yet. + __m128 ret; + ret[0] = a[i & 0x3]; + ret[1] = a[(i >> 2) & 0x3]; + ret[2] = b[(i >> 4) & 0x03]; + ret[3] = b[(i >> 6) & 0x03]; + return ret; +#else + __m128 ret = vmovq_n_f32(vgetq_lane_f32(a, i & 0x3)); + ret = vsetq_lane_f32(vgetq_lane_f32(a, (i >> 2) & 0x3), ret, 1); + ret = vsetq_lane_f32(vgetq_lane_f32(b, (i >> 4) & 0x3), ret, 2); + ret = vsetq_lane_f32(vgetq_lane_f32(b, (i >> 6) & 0x3), ret, 3); + return ret; +#endif +} + +template +FORCE_INLINE __m128 _mm_shuffle_ps_function(__m128 a, __m128 b) +{ + switch (i) + { + case _MM_SHUFFLE(1, 0, 3, 2): + return _mm_shuffle_ps_1032(a, b); + break; + case _MM_SHUFFLE(2, 3, 0, 1): + return _mm_shuffle_ps_2301(a, b); + break; + case _MM_SHUFFLE(3, 2, 1, 0): + return _mm_shuffle_ps_3210(a, b); + break; + case _MM_SHUFFLE(0, 0, 1, 1): + return _mm_shuffle_ps_0011(a, b); + break; + case _MM_SHUFFLE(0, 0, 2, 2): + return _mm_shuffle_ps_0022(a, b); + break; + case _MM_SHUFFLE(2, 2, 0, 0): + return _mm_shuffle_ps_2200(a, b); + break; + case _MM_SHUFFLE(3, 2, 0, 2): + return _mm_shuffle_ps_3202(a, b); + break; + case _MM_SHUFFLE(1, 1, 3, 3): + return _mm_shuffle_ps_1133(a, b); + break; + case _MM_SHUFFLE(2, 0, 1, 0): + return _mm_shuffle_ps_2010(a, b); + break; + case _MM_SHUFFLE(2, 0, 0, 1): + return _mm_shuffle_ps_2001(a, b); + break; + case _MM_SHUFFLE(2, 0, 3, 2): + return _mm_shuffle_ps_2032(a, b); + break; + } + return _mm_shuffle_ps_default(a, b); +} + +#define _mm_shuffle_ps(a,b,i) _mm_shuffle_ps_function(a,b) + +// Takes the upper 64 bits of a and places it in the low end of the result +// Takes the lower 64 bits of b and places it into the high end of the result. +FORCE_INLINE __m128i _mm_shuffle_epi_1032(__m128i a, __m128i b) +{ + return vcombine_s32(vget_high_s32(a), vget_low_s32(b)); +} + +// takes the lower two 32-bit values from a and swaps them and places in low end of result +// takes the higher two 32 bit values from b and swaps them and places in high end of result. +FORCE_INLINE __m128i _mm_shuffle_epi_2301(__m128i a, __m128i b) +{ + return vcombine_s32(vrev64_s32(vget_low_s32(a)), vrev64_s32(vget_high_s32(b))); +} + +// shift a right by 32 bits, and put the lower 32 bits of a into the upper 32 bits of b +// when a and b are the same, rotates the least significant 32 bits into the most signficant 32 bits, and shifts the rest down +FORCE_INLINE __m128i _mm_shuffle_epi_0321(__m128i a, __m128i b) +{ + return vextq_s32(a, b, 1); +} + +// shift a left by 32 bits, and put the upper 32 bits of b into the lower 32 bits of a +// when a and b are the same, rotates the most significant 32 bits into the least signficant 32 bits, and shifts the rest up +FORCE_INLINE __m128i _mm_shuffle_epi_2103(__m128i a, __m128i b) +{ + return vextq_s32(a, b, 3); +} + +// gets the lower 64 bits of a, and places it in the upper 64 bits +// gets the lower 64 bits of b and places it in the lower 64 bits +FORCE_INLINE __m128i _mm_shuffle_epi_1010(__m128i a, __m128i b) +{ + return vcombine_s32(vget_low_s32(a), vget_low_s32(a)); +} + +// gets the lower 64 bits of a, and places it in the upper 64 bits +// gets the lower 64 bits of b, swaps the 0 and 1 elements, and places it in the lower 64 bits +FORCE_INLINE __m128i _mm_shuffle_epi_1001(__m128i a, __m128i b) +{ + return vcombine_s32(vrev64_s32(vget_low_s32(a)), vget_low_s32(b)); +} + +// gets the lower 64 bits of a, swaps the 0 and 1 elements and places it in the upper 64 bits +// gets the lower 64 bits of b, swaps the 0 and 1 elements, and places it in the lower 64 bits +FORCE_INLINE __m128i _mm_shuffle_epi_0101(__m128i a, __m128i b) +{ + return vcombine_s32(vrev64_s32(vget_low_s32(a)), vrev64_s32(vget_low_s32(b))); +} + +FORCE_INLINE __m128i _mm_shuffle_epi_2211(__m128i a, __m128i b) +{ + return vcombine_s32(vdup_n_s32(vgetq_lane_s32(a, 1)), vdup_n_s32(vgetq_lane_s32(b, 2))); +} + +FORCE_INLINE __m128i _mm_shuffle_epi_0122(__m128i a, __m128i b) +{ + return vcombine_s32(vdup_n_s32(vgetq_lane_s32(a, 2)), vrev64_s32(vget_low_s32(b))); +} + +FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a, __m128i b) +{ + return vcombine_s32(vget_high_s32(a), vdup_n_s32(vgetq_lane_s32(b, 3))); +} + +template +FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __m128i b) +{ +#if ENABLE_CPP_VERSION + __m128i ret; + ret[0] = a[i & 0x3]; + ret[1] = a[(i >> 2) & 0x3]; + ret[2] = b[(i >> 4) & 0x03]; + ret[3] = b[(i >> 6) & 0x03]; + return ret; +#else + __m128i ret = vmovq_n_s32(vgetq_lane_s32(a, i & 0x3)); + ret = vsetq_lane_s32(vgetq_lane_s32(a, (i >> 2) & 0x3), ret, 1); + ret = vsetq_lane_s32(vgetq_lane_s32(b, (i >> 4) & 0x3), ret, 2); + ret = vsetq_lane_s32(vgetq_lane_s32(b, (i >> 6) & 0x3), ret, 3); + return ret; +#endif +} + +template +FORCE_INLINE __m128i _mm_shuffle_epi32_function(__m128i a, __m128i b) +{ + switch (i) + { + case _MM_SHUFFLE(1, 0, 3, 2): return _mm_shuffle_epi_1032(a, b); break; + case _MM_SHUFFLE(2, 3, 0, 1): return _mm_shuffle_epi_2301(a, b); break; + case _MM_SHUFFLE(0, 3, 2, 1): return _mm_shuffle_epi_0321(a, b); break; + case _MM_SHUFFLE(2, 1, 0, 3): return _mm_shuffle_epi_2103(a, b); break; + case _MM_SHUFFLE(1, 0, 1, 0): return _mm_shuffle_epi_1010(a, b); break; + case _MM_SHUFFLE(1, 0, 0, 1): return _mm_shuffle_epi_1001(a, b); break; + case _MM_SHUFFLE(0, 1, 0, 1): return _mm_shuffle_epi_0101(a, b); break; + case _MM_SHUFFLE(2, 2, 1, 1): return _mm_shuffle_epi_2211(a, b); break; + case _MM_SHUFFLE(0, 1, 2, 2): return _mm_shuffle_epi_0122(a, b); break; + case _MM_SHUFFLE(3, 3, 3, 2): return _mm_shuffle_epi_3332(a, b); break; + default: return _mm_shuffle_epi32_default(a, b); + } +} + +template +FORCE_INLINE __m128i _mm_shuffle_epi32_splat(__m128i a) +{ + return vdupq_n_s32(vgetq_lane_s32(a, i)); +} + +template +FORCE_INLINE __m128i _mm_shuffle_epi32_single(__m128i a) +{ + switch (i) + { + case _MM_SHUFFLE(0, 0, 0, 0): return _mm_shuffle_epi32_splat<0>(a); break; + case _MM_SHUFFLE(1, 1, 1, 1): return _mm_shuffle_epi32_splat<1>(a); break; + case _MM_SHUFFLE(2, 2, 2, 2): return _mm_shuffle_epi32_splat<2>(a); break; + case _MM_SHUFFLE(3, 3, 3, 3): return _mm_shuffle_epi32_splat<3>(a); break; + default: return _mm_shuffle_epi32_function(a, a); + } +} + +// Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm. https://msdn.microsoft.com/en-us/library/56f67xbk%28v=vs.90%29.aspx +#define _mm_shuffle_epi32(a,i) _mm_shuffle_epi32_single(a) + +template +FORCE_INLINE __m128i _mm_shufflehi_epi16_function(__m128i a) +{ + int16x8_t ret = (int16x8_t)a; + int16x4_t highBits = vget_high_s16(ret); + ret = vsetq_lane_s16(vget_lane_s16(highBits, i & 0x3), ret, 4); + ret = vsetq_lane_s16(vget_lane_s16(highBits, (i >> 2) & 0x3), ret, 5); + ret = vsetq_lane_s16(vget_lane_s16(highBits, (i >> 4) & 0x3), ret, 6); + ret = vsetq_lane_s16(vget_lane_s16(highBits, (i >> 6) & 0x3), ret, 7); + return (__m128i)ret; +} + +// Shuffles the upper 4 signed or unsigned 16 - bit integers in a as specified by imm. https://msdn.microsoft.com/en-us/library/13ywktbs(v=vs.100).aspx +#define _mm_shufflehi_epi16(a,i) _mm_shufflehi_epi16_function(a) + +// Shifts the 4 signed or unsigned 32-bit integers in a left by count bits while shifting in zeros. : https://msdn.microsoft.com/en-us/library/z2k3bbtb%28v=vs.90%29.aspx +#define _mm_slli_epi32(a, imm) (__m128i)vshlq_n_s32(a,imm) + +//Shifts the 4 signed or unsigned 32-bit integers in a right by count bits while shifting in zeros. https://msdn.microsoft.com/en-us/library/w486zcfa(v=vs.100).aspx +#define _mm_srli_epi32( a, imm ) (__m128i)vshrq_n_u32((uint32x4_t)a, imm) + +// Shifts the 4 signed 32 - bit integers in a right by count bits while shifting in the sign bit. https://msdn.microsoft.com/en-us/library/z1939387(v=vs.100).aspx +#define _mm_srai_epi32( a, imm ) vshrq_n_s32(a, imm) + +// Shifts the 128 - bit value in a right by imm bytes while shifting in zeros.imm must be an immediate. https://msdn.microsoft.com/en-us/library/305w28yz(v=vs.100).aspx +//#define _mm_srli_si128( a, imm ) (__m128i)vmaxq_s8((int8x16_t)a, vextq_s8((int8x16_t)a, vdupq_n_s8(0), imm)) +#define _mm_srli_si128( a, imm ) (__m128i)vextq_s8((int8x16_t)a, vdupq_n_s8(0), (imm)) + +// Shifts the 128-bit value in a left by imm bytes while shifting in zeros. imm must be an immediate. https://msdn.microsoft.com/en-us/library/34d3k2kt(v=vs.100).aspx +#define _mm_slli_si128( a, imm ) (__m128i)vextq_s8(vdupq_n_s8(0), (int8x16_t)a, 16 - (imm)) + +// NEON does not provide a version of this function, here is an article about some ways to repro the results. +// http://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon +// Creates a 16-bit mask from the most significant bits of the 16 signed or unsigned 8-bit integers in a and zero extends the upper bits. https://msdn.microsoft.com/en-us/library/vstudio/s090c8fk(v=vs.100).aspx +FORCE_INLINE int _mm_movemask_epi8(__m128i _a) +{ + uint8x16_t input = (uint8x16_t)_a; + const int8_t __attribute__((aligned(16))) xr[8] = { -7, -6, -5, -4, -3, -2, -1, 0 }; + uint8x8_t mask_and = vdup_n_u8(0x80); + int8x8_t mask_shift = vld1_s8(xr); + + uint8x8_t lo = vget_low_u8(input); + uint8x8_t hi = vget_high_u8(input); + + lo = vand_u8(lo, mask_and); + lo = vshl_u8(lo, mask_shift); + + hi = vand_u8(hi, mask_and); + hi = vshl_u8(hi, mask_shift); + + lo = vpadd_u8(lo, lo); + lo = vpadd_u8(lo, lo); + lo = vpadd_u8(lo, lo); + + hi = vpadd_u8(hi, hi); + hi = vpadd_u8(hi, hi); + hi = vpadd_u8(hi, hi); + + return ((hi[0] << 8) | (lo[0] & 0xFF)); +} + + +// ****************************************** +// Math operations +// ****************************************** + +// Subtracts the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/1zad2k61(v=vs.100).aspx +FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b) +{ + return vsubq_f32(a, b); +} + +// Subtracts the 4 signed or unsigned 32-bit integers of b from the 4 signed or unsigned 32-bit integers of a. https://msdn.microsoft.com/en-us/library/vstudio/fhh866h0(v=vs.100).aspx +FORCE_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b) +{ + return vsubq_s32(a, b); +} + +// Adds the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/c9848chc(v=vs.100).aspx +FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b) +{ + return vaddq_f32(a, b); +} + +// adds the scalar single-precision floating point values of a and b. https://msdn.microsoft.com/en-us/library/be94x2y6(v=vs.100).aspx +FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b) +{ + const float32_t b0 = vgetq_lane_f32(b, 0); + float32x4_t value = vdupq_n_f32(0); + + //the upper values in the result must be the remnants of . + value = vsetq_lane_f32(b0, value, 0); + return vaddq_f32(a, value); +} + +// Adds the 4 signed or unsigned 32-bit integers in a to the 4 signed or unsigned 32-bit integers in b. https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx +FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b) +{ + return vaddq_s32(a, b); +} + +// Adds the 8 signed or unsigned 16-bit integers in a to the 8 signed or unsigned 16-bit integers in b. https://msdn.microsoft.com/en-us/library/fceha5k4(v=vs.100).aspx +FORCE_INLINE __m128i _mm_add_epi16(__m128i a, __m128i b) +{ + return (__m128i)vaddq_s16((int16x8_t)a, (int16x8_t)b); +} + +// Multiplies the 8 signed or unsigned 16-bit integers from a by the 8 signed or unsigned 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/9ks1472s(v=vs.100).aspx +FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b) +{ + return (__m128i)vmulq_s16((int16x8_t)a, (int16x8_t)b); +} + +// Multiplies the 4 signed or unsigned 32-bit integers from a by the 4 signed or unsigned 32-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/bb531409(v=vs.100).aspx +FORCE_INLINE __m128i _mm_mullo_epi32 (__m128i a, __m128i b) +{ + return (__m128i)vmulq_s32((int32x4_t)a,(int32x4_t)b); +} + +// Multiplies the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/22kbk6t9(v=vs.100).aspx +FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b) +{ + return vmulq_f32(a, b); +} + +// Divides the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/edaw8147(v=vs.100).aspx +FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b) +{ + __m128 recip = vrecpeq_f32(b); + recip = vmulq_f32(recip, vrecpsq_f32(recip, b)); + return vmulq_f32(a, recip); +} + +// Divides the scalar single-precision floating point value of a by b. https://msdn.microsoft.com/en-us/library/4y73xa49(v=vs.100).aspx +FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b) +{ + float32x4_t value; + float32x4_t result = a; + value = _mm_div_ps(a, b); + return vsetq_lane_f32(vgetq_lane_f32(value, 0), result, 0); +} + +// This version does additional iterations to improve accuracy. Between 1 and 4 recommended. +// Computes the approximations of reciprocals of the four single-precision, floating-point values of a. https://msdn.microsoft.com/en-us/library/vstudio/796k1tty(v=vs.100).aspx +FORCE_INLINE __m128 recipq_newton(__m128 in, int n) +{ + __m128 recip = vrecpeq_f32(in); + for (int i = 0; i(a),reinterpret_cast(b)); +} +#else +// Just standard old Intel! +#include +#endif + diff --git a/src/ila/sse_to_neon.hpp b/src/ila/sse_to_neon.hpp new file mode 100644 index 0000000000..c910491bf3 --- /dev/null +++ b/src/ila/sse_to_neon.hpp @@ -0,0 +1,187 @@ +// +// sse_to_neon.hpp +// neon_test +// +// Created by Tim Oberhauser on 11/16/13. +// Copyright (c) 2013 Tim Oberhauser. All rights reserved. +// + +#ifndef neon_test_sse_to_neon_hpp +#define neon_test_sse_to_neon_hpp + +#include + +#if defined(__MM_MALLOC_H) +// copied from mm_malloc.h { +#include + +/* We can't depend on since the prototype of posix_memalign + may not be visible. */ +#ifndef __cplusplus +extern int posix_memalign (void **, size_t, size_t); +#else +extern "C" int posix_memalign (void **, size_t, size_t) throw (); +#endif + +static __inline void * +_mm_malloc (size_t size, size_t alignment) +{ + void *ptr; + if (alignment == 1) + return malloc (size); + if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4)) + alignment = sizeof (void *); + if (posix_memalign (&ptr, alignment, size) == 0) + return ptr; + else + return NULL; +} + +static __inline void +_mm_free (void * ptr) +{ + free (ptr); +} +// } copied from mm_malloc.h +#endif + + +typedef int16x8_t __m128i; +typedef float32x4_t __m128; + + +// ADDITION +inline __m128i _mm_add_epi16(const __m128i& a, const __m128i& b){ + return vaddq_s16(reinterpret_cast(a),reinterpret_cast(b)); +} + +inline __m128 _mm_add_ps(const __m128& a, const __m128& b){ + return vaddq_f32(a,b); +} + + +// SUBTRACTION +inline __m128i _mm_sub_epi16(const __m128i& a, const __m128i& b){ + return vsubq_s16(reinterpret_cast(a),reinterpret_cast(b)); +} + +inline __m128 _mm_sub_ps(const __m128& a, const __m128& b){ + return vsubq_f32(a,b); +} + + +// MULTIPLICATION +#if 0 +inline __m128i _mm_mullo_epi16(const __m128i& a, const __m128i& b){ + return vqrdmulhq_s16(reinterpret_cast(a),reinterpret_cast(b)); +} +#endif + +inline __m128 _mm_mul_ps(const __m128& a, const __m128& b){ + return vmulq_f32(a,b); +} + + +// SET VALUE +inline __m128i _mm_set1_epi16(const int16_t w){ + return vmovq_n_s16(w); +} + +inline __m128i _mm_setzero_si128(){ + return vmovq_n_s16(0); +} + +inline __m128 _mm_set1_ps(const float32_t& w){ + return vmovq_n_f32(w); +} + + +// STORE +inline void _mm_storeu_si128(__m128i* p, __m128i& a){ + vst1q_s16(reinterpret_cast(p),reinterpret_cast(a)); +} + +inline void _mm_store_ps(float32_t* p, __m128&a){ + vst1q_f32(p,a); +} + + +// LOAD +inline __m128i _mm_loadu_si128(__m128i* p){//For SSE address p does not need be 16-byte aligned + return reinterpret_cast<__m128i>(vld1q_s16(reinterpret_cast(p))); +} + +inline __m128i _mm_load_si128(__m128i* p){//For SSE address p must be 16-byte aligned + return reinterpret_cast<__m128i>(vld1q_s16(reinterpret_cast(p))); +} + +inline __m128 _mm_load_ps(const float32_t* p){ + return reinterpret_cast<__m128>(vld1q_f32(p)); +} + + +// SHIFT OPERATIONS +inline __m128i _mm_srai_epi16(const __m128i& a, const int count){ + int16x8_t b = vmovq_n_s16(-count); + return reinterpret_cast<__m128i>(vshlq_s16(a,b)); + // return vrshrq_n_s16(a, count);// TODO Argument to '__builtin_neon_vrshrq_n_v' must be a constant integer +} + + +// MIN/MAX OPERATIONS +inline __m128 _mm_max_ps(const __m128& a, const __m128& b){ + return reinterpret_cast<__m128>(vmaxq_f32(reinterpret_cast(a),reinterpret_cast(b))); +} + + +// SINGLE ELEMENT ACCESS +inline int16_t _mm_extract_epi16(__m128i& a, int index){ + return (reinterpret_cast(&a))[index]; + // return vgetq_lane_s16(a,index);// TODO Argument to '__builtin_neon_vgetq_lane_i16' must be a constant integer +} + + +// MISCELLANOUS +inline __m128i _mm_sad_epu8 (__m128i a, __m128i b){ + uint64x2_t sad = reinterpret_cast(vabdq_u8(reinterpret_cast(a),reinterpret_cast(b))); + sad = reinterpret_cast(vpaddlq_u8(reinterpret_cast(sad))); + sad = reinterpret_cast(vpaddlq_u16(reinterpret_cast(sad))); + sad = vpaddlq_u32(reinterpret_cast(sad)); + return reinterpret_cast<__m128i>(sad); +} + + +// LOGICAL OPERATIONS +inline __m128 _mm_and_ps(__m128& a, __m128& b){ + return reinterpret_cast<__m128>(vandq_u32(reinterpret_cast(a),reinterpret_cast(b))); +} + + +// CONVERSIONS +inline __m128i _mm_packus_epi16 (const __m128i a, const __m128i b){ + __m128i result = _mm_setzero_si128(); + int8x8_t* a_narrow = reinterpret_cast(&result); + int8x8_t* b_narrow = &a_narrow[1]; + *a_narrow = reinterpret_cast(vqmovun_s16(a)); + *b_narrow = reinterpret_cast(vqmovun_s16(b)); + return result; +} + +// In my case this function was only needed to convert 8 bit to 16 bit integers by extending with zeros, the general case is not implemented!!! +inline __m128i _mm_unpacklo_epi8(__m128i a, const __m128i dummy_zero){ + // dummy_zero is a dummy variable + uint8x8_t* a_low = reinterpret_cast(&a); + return reinterpret_cast<__m128i>(vmovl_u8(*a_low)); +} + +// In my case this function was only needed to convert 8 bit to 16 bit integers by extending with zeros, the general case is not implemented!!! +inline __m128i _mm_unpackhi_epi8(__m128i a, const __m128i dummy_zero){ + // dummy_zero is a dummy variable + uint8x8_t* a_low = reinterpret_cast(&a); + return reinterpret_cast<__m128i>(vmovl_u8(a_low[1])); +} + + + + +#endif diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 3e95c8cb67..cdb3eda4ab 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -36,6 +36,8 @@ #ifndef NO_SSE #include +#else +#include "ila/ila.h" #endif struct FSpecialColormap; From eb7955694b2699ef4fbdb5bb4298862d29b9156f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 5 Mar 2017 16:57:58 +0100 Subject: [PATCH 905/912] Disable diminishing light in software renderer --- src/swrenderer/scene/r_light.cpp | 5 +++++ src/swrenderer/scene/r_light.h | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index 5be847bf8d..fa9ae27f93 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -145,6 +145,11 @@ namespace swrenderer FloorVisibility = 160.0 * FloorVisibility / viewport->FocalLengthY; TiltVisibility = float(vis * FocalTangent * (16.f * 320.f) / viewwidth); + + // Disable diminishing light (To do: make a cvar control this) + WallVisibility = 0.0; + FloorVisibility = 0.0; + TiltVisibility = 0.0f; } // Controls how quickly light ramps across a 1/z range. Set this, and it diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index fd919fba15..7b127acb2e 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -32,7 +32,10 @@ // Convert a light level into an unbounded colormap index (shade). Result is // fixed point. Why the +12? I wish I knew, but experimentation indicates it // is necessary in order to best reproduce Doom's original lighting. -#define LIGHT2SHADE(l) ((NUMCOLORMAPS*2*FRACUNIT)-(((l)+12)*(FRACUNIT*NUMCOLORMAPS/128))) +//#define LIGHT2SHADE(l) ((NUMCOLORMAPS*2*FRACUNIT)-(((l)+12)*(FRACUNIT*NUMCOLORMAPS/128))) + +// Disable diminishing light (To do: merge with LIGHT2SHADE and let a cvar control it, maybe by converting this to a function, like R_ActualExtraLight) +#define LIGHT2SHADE(lightlev) ((MAX(255 - lightlev, 0) * NUMCOLORMAPS) << (FRACBITS - 8)) // MAXLIGHTSCALE from original DOOM, divided by 2. #define MAXLIGHTVIS (24.0) From effe8a1e803222516313941828ca093636a75b21 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sun, 5 Mar 2017 13:07:25 -0500 Subject: [PATCH 906/912] Revert "- started adding ARM support. incomplete. won't compile. don't try." This reverts commit be8abba344f036ff1b054f310fcfdfda81ce0df2. --- src/CMakeLists.txt | 5 - src/ila/SSE2NEON.h | 1198 -------------------------- src/ila/ila.h | 56 -- src/ila/sse_to_neon.hpp | 187 ---- src/swrenderer/drawers/r_draw_rgba.h | 2 - 5 files changed, 1448 deletions(-) delete mode 100644 src/ila/SSE2NEON.h delete mode 100644 src/ila/ila.h delete mode 100644 src/ila/sse_to_neon.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0e1415e5cc..950cfe9c02 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -502,11 +502,6 @@ if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) set( CMAKE_EXE_LINKER_FLAGS "-stdlib=libc++ ${CMAKE_EXE_LINKER_FLAGS}" ) endif () - # ARM processors (Raspberry Pi) - enable ARM NEON support. - if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") - set( CMAKE_CXX_FLAGS "-mfpu=neon ${CMAKE_CXX_FLAGS}" ) - endif () - # Remove extra warnings when using the official DirectX headers. # Also, TDM-GCC 4.4.0 no longer accepts glibc-style printf formats as valid, # which is a royal pain. The previous version I had been using was fine with them. diff --git a/src/ila/SSE2NEON.h b/src/ila/SSE2NEON.h deleted file mode 100644 index 4e94e9607d..0000000000 --- a/src/ila/SSE2NEON.h +++ /dev/null @@ -1,1198 +0,0 @@ -#ifndef SSE2NEON_H -#define SSE2NEON_H - -// This header file provides a simple API translation layer -// between SSE intrinsics to their corresponding ARM NEON versions -// -// This header file does not (yet) translate *all* of the SSE intrinsics. -// Since this is in support of a specific porting effort, I have only -// included the intrinsics I needed to get my port to work. -// -// Questions/Comments/Feedback send to: jratcliffscarab@gmail.com -// -// If you want to improve or add to this project, send me an -// email and I will probably approve your access to the depot. -// -// Project is located here: -// -// https://github.com/jratcliff63367/sse2neon -// -// Show your appreciation for open source by sending me a bitcoin tip to the following -// address. -// -// TipJar: 1PzgWDSyq4pmdAXRH8SPUtta4SWGrt4B1p : -// https://blockchain.info/address/1PzgWDSyq4pmdAXRH8SPUtta4SWGrt4B1p -// -// -// Contributors to this project are: -// -// John W. Ratcliff : jratcliffscarab@gmail.com -// Brandon Rowlett : browlett@nvidia.com -// Ken Fast : kfast@gdeb.com -// Eric van Beurden : evanbeurden@nvidia.com -// -// -// ********************************************************************************************************************* -// Release notes for January 20, 2017 version: -// -// The unit tests have been refactored. They no longer assert on an error, instead they return a pass/fail condition -// The unit-tests now test 10,000 random float and int values against each intrinsic. -// -// SSE2NEON now supports 95 SSE intrinsics. 39 of them have formal unit tests which have been implemented and -// fully tested on NEON/ARM. The remaining 56 still need unit tests implemented. -// -// A struct is now defined in this header file called 'SIMDVec' which can be used by applications which -// attempt to access the contents of an _m128 struct directly. It is important to note that accessing the __m128 -// struct directly is bad coding practice by Microsoft: @see: https://msdn.microsoft.com/en-us/library/ayeb3ayc.aspx -// -// However, some legacy source code may try to access the contents of an __m128 struct directly so the developer -// can use the SIMDVec as an alias for it. Any casting must be done manually by the developer, as you cannot -// cast or otherwise alias the base NEON data type for intrinsic operations. -// -// A bug was found with the _mm_shuffle_ps intrinsic. If the shuffle permutation was not one of the ones with -// a custom/unique implementation causing it to fall through to the default shuffle implementation it was failing -// to return the correct value. This is now fixed. -// -// A bug was found with the _mm_cvtps_epi32 intrinsic. This converts floating point values to integers. -// It was not honoring the correct rounding mode. In SSE the default rounding mode when converting from float to int -// is to use 'round to even' otherwise known as 'bankers rounding'. ARMv7 did not support this feature but ARMv8 does. -// As it stands today, this header file assumes ARMv8. If you are trying to target really old ARM devices, you may get -// a build error. -// -// Support for a number of new intrinsics was added, however, none of them yet have unit-tests to 100% confirm they are -// producing the correct results on NEON. These unit tests will be added as soon as possible. -// -// Here is the list of new instrinsics which have been added: -// -// _mm_cvtss_f32 : extracts the lower order floating point value from the parameter -// _mm_add_ss : adds the scalar single - precision floating point values of a and b -// _mm_div_ps : Divides the four single - precision, floating - point values of a and b. -// _mm_div_ss : Divides the scalar single - precision floating point value of a by b. -// _mm_sqrt_ss : Computes the approximation of the square root of the scalar single - precision floating point value of in. -// _mm_rsqrt_ps : Computes the approximations of the reciprocal square roots of the four single - precision floating point values of in. -// _mm_comilt_ss : Compares the lower single - precision floating point scalar values of a and b using a less than operation -// _mm_comigt_ss : Compares the lower single - precision floating point scalar values of a and b using a greater than operation. -// _mm_comile_ss : Compares the lower single - precision floating point scalar values of a and b using a less than or equal operation. -// _mm_comige_ss : Compares the lower single - precision floating point scalar values of a and b using a greater than or equal operation. -// _mm_comieq_ss : Compares the lower single - precision floating point scalar values of a and b using an equality operation. -// _mm_comineq_s : Compares the lower single - precision floating point scalar values of a and b using an inequality operation -// _mm_unpackhi_epi8 : Interleaves the upper 8 signed or unsigned 8 - bit integers in a with the upper 8 signed or unsigned 8 - bit integers in b. -// _mm_unpackhi_epi16: Interleaves the upper 4 signed or unsigned 16 - bit integers in a with the upper 4 signed or unsigned 16 - bit integers in b. -// -// ********************************************************************************************************************* -/* -** The MIT license: -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and associated documentation files (the "Software"), to deal -** in the Software without restriction, including without limitation the rights -** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -** copies of the Software, and to permit persons to whom the Software is furnished -** to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in all -** copies or substantial portions of the Software. - -** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#define GCC 1 -#define ENABLE_CPP_VERSION 0 - -#if GCC -#define FORCE_INLINE inline __attribute__((always_inline)) -#define ALIGN_STRUCT(x) __attribute__((aligned(x))) -#else -#define FORCE_INLINE inline -#define ALIGN_STRUCT(x) __declspec(align(x)) -#endif - -#include -#include "arm_neon.h" - -/*******************************************************/ -/* MACRO for shuffle parameter for _mm_shuffle_ps(). */ -/* Argument fp3 is a digit[0123] that represents the fp*/ -/* from argument "b" of mm_shuffle_ps that will be */ -/* placed in fp3 of result. fp2 is the same for fp2 in */ -/* result. fp1 is a digit[0123] that represents the fp */ -/* from argument "a" of mm_shuffle_ps that will be */ -/* places in fp1 of result. fp0 is the same for fp0 of */ -/* result */ -/*******************************************************/ -#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \ - ((fp1) << 2) | ((fp0))) - -typedef float32x4_t __m128; -typedef int32x4_t __m128i; - -// union intended to allow direct access to an __m128 variable using the names that the MSVC -// compiler provides. This union should really only be used when trying to access the members -// of the vector as integer values. GCC/clang allow native access to the float members through -// a simple array access operator (in C since 4.6, in C++ since 4.8). -// -// Ideally direct accesses to SIMD vectors should not be used since it can cause a performance -// hit. If it really is needed however, the original __m128 variable can be aliased with a -// pointer to this union and used to access individual components. The use of this union should -// be hidden behind a macro that is used throughout the codebase to access the members instead -// of always declaring this type of variable. -typedef union ALIGN_STRUCT(16) SIMDVec -{ - float m128_f32[4]; // as floats - do not to use this. Added for convenience. - int8_t m128_i8[16]; // as signed 8-bit integers. - int16_t m128_i16[8]; // as signed 16-bit integers. - int32_t m128_i32[4]; // as signed 32-bit integers. - int64_t m128_i64[2]; // as signed 64-bit integers. - uint8_t m128_u8[16]; // as unsigned 8-bit integers. - uint16_t m128_u16[8]; // as unsigned 16-bit integers. - uint32_t m128_u32[4]; // as unsigned 32-bit integers. - uint64_t m128_u64[2]; // as unsigned 64-bit integers. -} SIMDVec; - -// ****************************************** -// Set/get methods -// ****************************************** - -// extracts the lower order floating point value from the parameter : https://msdn.microsoft.com/en-us/library/bb514059%28v=vs.120%29.aspx?f=255&MSPPError=-2147217396 -FORCE_INLINE float _mm_cvtss_f32(__m128 a) -{ - return vgetq_lane_f32(a, 0); -} - -// Sets the 128-bit value to zero https://msdn.microsoft.com/en-us/library/vstudio/ys7dw0kh(v=vs.100).aspx -FORCE_INLINE __m128i _mm_setzero_si128() -{ - return vdupq_n_s32(0); -} - -// Clears the four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/tk1t2tbz(v=vs.100).aspx -FORCE_INLINE __m128 _mm_setzero_ps(void) -{ - return vdupq_n_f32(0); -} - -// Sets the four single-precision, floating-point values to w. https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx -FORCE_INLINE __m128 _mm_set1_ps(float _w) -{ - return vdupq_n_f32(_w); -} - -// Sets the four single-precision, floating-point values to w. https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx -FORCE_INLINE __m128 _mm_set_ps1(float _w) -{ - return vdupq_n_f32(_w); -} - -// Sets the four single-precision, floating-point values to the four inputs. https://msdn.microsoft.com/en-us/library/vstudio/afh0zf75(v=vs.100).aspx -FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x) -{ - float __attribute__((aligned(16))) data[4] = { x, y, z, w }; - return vld1q_f32(data); -} - -// Sets the four single-precision, floating-point values to the four inputs in reverse order. https://msdn.microsoft.com/en-us/library/vstudio/d2172ct3(v=vs.100).aspx -FORCE_INLINE __m128 _mm_setr_ps(float w, float z , float y , float x ) -{ - float __attribute__ ((aligned (16))) data[4] = { w, z, y, x }; - return vld1q_f32(data); -} - -// Sets the 4 signed 32-bit integer values to i. https://msdn.microsoft.com/en-us/library/vstudio/h4xscxat(v=vs.100).aspx -FORCE_INLINE __m128i _mm_set1_epi32(int _i) -{ - return vdupq_n_s32(_i); -} - -// Sets the 4 signed 32-bit integer values. https://msdn.microsoft.com/en-us/library/vstudio/019beekt(v=vs.100).aspx -FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0) -{ - int32_t __attribute__((aligned(16))) data[4] = { i0, i1, i2, i3 }; - return vld1q_s32(data); -} - -// Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/s3h4ay6y(v=vs.100).aspx -FORCE_INLINE void _mm_store_ps(float *p, __m128 a) -{ - vst1q_f32(p, a); -} - -// Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/44e30x22(v=vs.100).aspx -FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a) -{ - vst1q_f32(p, a); -} - -// Stores four 32-bit integer values as (as a __m128i value) at the address p. https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx -FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a ) -{ - vst1q_s32((int32_t*) p,a); -} - -// Stores the lower single - precision, floating - point value. https://msdn.microsoft.com/en-us/library/tzz10fbx(v=vs.100).aspx -FORCE_INLINE void _mm_store_ss(float *p, __m128 a) -{ - vst1q_lane_f32(p, a, 0); -} - -// Reads the lower 64 bits of b and stores them into the lower 64 bits of a. https://msdn.microsoft.com/en-us/library/hhwf428f%28v=vs.90%29.aspx -FORCE_INLINE void _mm_storel_epi64(__m128i* a, __m128i b) -{ - *a = (__m128i)vsetq_lane_s64((int64_t)vget_low_s32(b), *(int64x2_t*)a, 0); -} - -// Loads a single single-precision, floating-point value, copying it into all four words https://msdn.microsoft.com/en-us/library/vstudio/5cdkf716(v=vs.100).aspx -FORCE_INLINE __m128 _mm_load1_ps(const float * p) -{ - return vld1q_dup_f32(p); -} - -// Loads four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/zzd50xxt(v=vs.100).aspx -FORCE_INLINE __m128 _mm_load_ps(const float * p) -{ - return vld1q_f32(p); -} - -// Loads four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/x1b16s7z%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_loadu_ps(const float * p) -{ - // for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are equivalent for neon - return vld1q_f32(p); -} - -// Loads an single - precision, floating - point value into the low word and clears the upper three words. https://msdn.microsoft.com/en-us/library/548bb9h4%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_load_ss(const float * p) -{ - __m128 result = vdupq_n_f32(0); - return vsetq_lane_f32(*p, result, 0); -} - - -// ****************************************** -// Logic/Binary operations -// ****************************************** - -// Compares for inequality. https://msdn.microsoft.com/en-us/library/sf44thbx(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b) -{ - return (__m128)vmvnq_s32((__m128i)vceqq_f32(a, b)); -} - -// Computes the bitwise AND-NOT of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/68h7wd02(v=vs.100).aspx -FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b) -{ - return (__m128)vbicq_s32((__m128i)b, (__m128i)a); // *NOTE* argument swap -} - -// Computes the bitwise AND of the 128-bit value in b and the bitwise NOT of the 128-bit value in a. https://msdn.microsoft.com/en-us/library/vstudio/1beaceh8(v=vs.100).aspx -FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b) -{ - return (__m128i)vbicq_s32(b, a); // *NOTE* argument swap -} - -// Computes the bitwise AND of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/6d1txsa8(v=vs.100).aspx -FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b) -{ - return (__m128i)vandq_s32(a, b); -} - -// Computes the bitwise AND of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/73ck1xc5(v=vs.100).aspx -FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b) -{ - return (__m128)vandq_s32((__m128i)a, (__m128i)b); -} - -// Computes the bitwise OR of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/7ctdsyy0(v=vs.100).aspx -FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) -{ - return (__m128)vorrq_s32((__m128i)a, (__m128i)b); -} - -// Computes bitwise EXOR (exclusive-or) of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/ss6k3wk8(v=vs.100).aspx -FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b) -{ - return (__m128)veorq_s32((__m128i)a, (__m128i)b); -} - -// Computes the bitwise OR of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/ew8ty0db(v=vs.100).aspx -FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b) -{ - return (__m128i)vorrq_s32(a, b); -} - -// Computes the bitwise XOR of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/fzt08www(v=vs.100).aspx -FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b) -{ - return veorq_s32(a, b); -} - -// NEON does not provide this method -// Creates a 4-bit mask from the most significant bits of the four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/4490ys29(v=vs.100).aspx -FORCE_INLINE int _mm_movemask_ps(__m128 a) -{ -#if ENABLE_CPP_VERSION // I am not yet convinced that the NEON version is faster than the C version of this - uint32x4_t &ia = *(uint32x4_t *)&a; - return (ia[0] >> 31) | ((ia[1] >> 30) & 2) | ((ia[2] >> 29) & 4) | ((ia[3] >> 28) & 8); -#else - static const uint32x4_t movemask = { 1, 2, 4, 8 }; - static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; - uint32x4_t t0 = vreinterpretq_u32_f32(a); - uint32x4_t t1 = vtstq_u32(t0, highbit); - uint32x4_t t2 = vandq_u32(t1, movemask); - uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2)); - return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1); -#endif -} - -// Takes the upper 64 bits of a and places it in the low end of the result -// Takes the lower 64 bits of b and places it into the high end of the result. -FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b) -{ - return vcombine_f32(vget_high_f32(a), vget_low_f32(b)); -} - -// takes the lower two 32-bit values from a and swaps them and places in high end of result -// takes the higher two 32 bit values from b and swaps them and places in low end of result. -FORCE_INLINE __m128 _mm_shuffle_ps_2301(__m128 a, __m128 b) -{ - return vcombine_f32(vrev64_f32(vget_low_f32(a)), vrev64_f32(vget_high_f32(b))); -} - -// keeps the low 64 bits of b in the low and puts the high 64 bits of a in the high -FORCE_INLINE __m128 _mm_shuffle_ps_3210(__m128 a, __m128 b) -{ - return vcombine_f32(vget_low_f32(a), vget_high_f32(b)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0011(__m128 a, __m128 b) -{ - return vcombine_f32(vdup_n_f32(vgetq_lane_f32(a, 1)), vdup_n_f32(vgetq_lane_f32(b, 0))); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0022(__m128 a, __m128 b) -{ - return vcombine_f32(vdup_n_f32(vgetq_lane_f32(a, 2)), vdup_n_f32(vgetq_lane_f32(b, 0))); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2200(__m128 a, __m128 b) -{ - return vcombine_f32(vdup_n_f32(vgetq_lane_f32(a, 0)), vdup_n_f32(vgetq_lane_f32(b, 2))); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b) -{ - float32_t a0 = vgetq_lane_f32(a, 0); - float32_t a2 = vgetq_lane_f32(a, 2); - float32x2_t aVal = vdup_n_f32(a2); - aVal = vset_lane_f32(a0, aVal, 1); - return vcombine_f32(aVal, vget_high_f32(b)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_1133(__m128 a, __m128 b) -{ - return vcombine_f32(vdup_n_f32(vgetq_lane_f32(a, 3)), vdup_n_f32(vgetq_lane_f32(b, 1))); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2010(__m128 a, __m128 b) -{ - float32_t b0 = vgetq_lane_f32(b, 0); - float32_t b2 = vgetq_lane_f32(b, 2); - float32x2_t bVal = vdup_n_f32(b0); - bVal = vset_lane_f32(b2, bVal, 1); - return vcombine_f32(vget_low_f32(a), bVal); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2001(__m128 a, __m128 b) -{ - float32_t b0 = vgetq_lane_f32(b, 0); - float32_t b2 = vgetq_lane_f32(b, 2); - float32x2_t bVal = vdup_n_f32(b0); - bVal = vset_lane_f32(b2, bVal, 1); - return vcombine_f32(vrev64_f32(vget_low_f32(a)), bVal); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b) -{ - float32_t b0 = vgetq_lane_f32(b, 0); - float32_t b2 = vgetq_lane_f32(b, 2); - float32x2_t bVal = vdup_n_f32(b0); - bVal = vset_lane_f32(b2, bVal, 1); - return vcombine_f32(vget_high_f32(a), bVal); -} - - -// NEON does not support a general purpose permute intrinsic -// Currently I am not sure whether the C implementation is faster or slower than the NEON version. -// Note, this has to be expanded as a template because the shuffle value must be an immediate value. -// The same is true on SSE as well. -// Selects four specific single-precision, floating-point values from a and b, based on the mask i. https://msdn.microsoft.com/en-us/library/vstudio/5f0858x0(v=vs.100).aspx -template -FORCE_INLINE __m128 _mm_shuffle_ps_default(__m128 a, __m128 b) -{ -#if ENABLE_CPP_VERSION // I am not convinced that the NEON version is faster than the C version yet. - __m128 ret; - ret[0] = a[i & 0x3]; - ret[1] = a[(i >> 2) & 0x3]; - ret[2] = b[(i >> 4) & 0x03]; - ret[3] = b[(i >> 6) & 0x03]; - return ret; -#else - __m128 ret = vmovq_n_f32(vgetq_lane_f32(a, i & 0x3)); - ret = vsetq_lane_f32(vgetq_lane_f32(a, (i >> 2) & 0x3), ret, 1); - ret = vsetq_lane_f32(vgetq_lane_f32(b, (i >> 4) & 0x3), ret, 2); - ret = vsetq_lane_f32(vgetq_lane_f32(b, (i >> 6) & 0x3), ret, 3); - return ret; -#endif -} - -template -FORCE_INLINE __m128 _mm_shuffle_ps_function(__m128 a, __m128 b) -{ - switch (i) - { - case _MM_SHUFFLE(1, 0, 3, 2): - return _mm_shuffle_ps_1032(a, b); - break; - case _MM_SHUFFLE(2, 3, 0, 1): - return _mm_shuffle_ps_2301(a, b); - break; - case _MM_SHUFFLE(3, 2, 1, 0): - return _mm_shuffle_ps_3210(a, b); - break; - case _MM_SHUFFLE(0, 0, 1, 1): - return _mm_shuffle_ps_0011(a, b); - break; - case _MM_SHUFFLE(0, 0, 2, 2): - return _mm_shuffle_ps_0022(a, b); - break; - case _MM_SHUFFLE(2, 2, 0, 0): - return _mm_shuffle_ps_2200(a, b); - break; - case _MM_SHUFFLE(3, 2, 0, 2): - return _mm_shuffle_ps_3202(a, b); - break; - case _MM_SHUFFLE(1, 1, 3, 3): - return _mm_shuffle_ps_1133(a, b); - break; - case _MM_SHUFFLE(2, 0, 1, 0): - return _mm_shuffle_ps_2010(a, b); - break; - case _MM_SHUFFLE(2, 0, 0, 1): - return _mm_shuffle_ps_2001(a, b); - break; - case _MM_SHUFFLE(2, 0, 3, 2): - return _mm_shuffle_ps_2032(a, b); - break; - } - return _mm_shuffle_ps_default(a, b); -} - -#define _mm_shuffle_ps(a,b,i) _mm_shuffle_ps_function(a,b) - -// Takes the upper 64 bits of a and places it in the low end of the result -// Takes the lower 64 bits of b and places it into the high end of the result. -FORCE_INLINE __m128i _mm_shuffle_epi_1032(__m128i a, __m128i b) -{ - return vcombine_s32(vget_high_s32(a), vget_low_s32(b)); -} - -// takes the lower two 32-bit values from a and swaps them and places in low end of result -// takes the higher two 32 bit values from b and swaps them and places in high end of result. -FORCE_INLINE __m128i _mm_shuffle_epi_2301(__m128i a, __m128i b) -{ - return vcombine_s32(vrev64_s32(vget_low_s32(a)), vrev64_s32(vget_high_s32(b))); -} - -// shift a right by 32 bits, and put the lower 32 bits of a into the upper 32 bits of b -// when a and b are the same, rotates the least significant 32 bits into the most signficant 32 bits, and shifts the rest down -FORCE_INLINE __m128i _mm_shuffle_epi_0321(__m128i a, __m128i b) -{ - return vextq_s32(a, b, 1); -} - -// shift a left by 32 bits, and put the upper 32 bits of b into the lower 32 bits of a -// when a and b are the same, rotates the most significant 32 bits into the least signficant 32 bits, and shifts the rest up -FORCE_INLINE __m128i _mm_shuffle_epi_2103(__m128i a, __m128i b) -{ - return vextq_s32(a, b, 3); -} - -// gets the lower 64 bits of a, and places it in the upper 64 bits -// gets the lower 64 bits of b and places it in the lower 64 bits -FORCE_INLINE __m128i _mm_shuffle_epi_1010(__m128i a, __m128i b) -{ - return vcombine_s32(vget_low_s32(a), vget_low_s32(a)); -} - -// gets the lower 64 bits of a, and places it in the upper 64 bits -// gets the lower 64 bits of b, swaps the 0 and 1 elements, and places it in the lower 64 bits -FORCE_INLINE __m128i _mm_shuffle_epi_1001(__m128i a, __m128i b) -{ - return vcombine_s32(vrev64_s32(vget_low_s32(a)), vget_low_s32(b)); -} - -// gets the lower 64 bits of a, swaps the 0 and 1 elements and places it in the upper 64 bits -// gets the lower 64 bits of b, swaps the 0 and 1 elements, and places it in the lower 64 bits -FORCE_INLINE __m128i _mm_shuffle_epi_0101(__m128i a, __m128i b) -{ - return vcombine_s32(vrev64_s32(vget_low_s32(a)), vrev64_s32(vget_low_s32(b))); -} - -FORCE_INLINE __m128i _mm_shuffle_epi_2211(__m128i a, __m128i b) -{ - return vcombine_s32(vdup_n_s32(vgetq_lane_s32(a, 1)), vdup_n_s32(vgetq_lane_s32(b, 2))); -} - -FORCE_INLINE __m128i _mm_shuffle_epi_0122(__m128i a, __m128i b) -{ - return vcombine_s32(vdup_n_s32(vgetq_lane_s32(a, 2)), vrev64_s32(vget_low_s32(b))); -} - -FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a, __m128i b) -{ - return vcombine_s32(vget_high_s32(a), vdup_n_s32(vgetq_lane_s32(b, 3))); -} - -template -FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __m128i b) -{ -#if ENABLE_CPP_VERSION - __m128i ret; - ret[0] = a[i & 0x3]; - ret[1] = a[(i >> 2) & 0x3]; - ret[2] = b[(i >> 4) & 0x03]; - ret[3] = b[(i >> 6) & 0x03]; - return ret; -#else - __m128i ret = vmovq_n_s32(vgetq_lane_s32(a, i & 0x3)); - ret = vsetq_lane_s32(vgetq_lane_s32(a, (i >> 2) & 0x3), ret, 1); - ret = vsetq_lane_s32(vgetq_lane_s32(b, (i >> 4) & 0x3), ret, 2); - ret = vsetq_lane_s32(vgetq_lane_s32(b, (i >> 6) & 0x3), ret, 3); - return ret; -#endif -} - -template -FORCE_INLINE __m128i _mm_shuffle_epi32_function(__m128i a, __m128i b) -{ - switch (i) - { - case _MM_SHUFFLE(1, 0, 3, 2): return _mm_shuffle_epi_1032(a, b); break; - case _MM_SHUFFLE(2, 3, 0, 1): return _mm_shuffle_epi_2301(a, b); break; - case _MM_SHUFFLE(0, 3, 2, 1): return _mm_shuffle_epi_0321(a, b); break; - case _MM_SHUFFLE(2, 1, 0, 3): return _mm_shuffle_epi_2103(a, b); break; - case _MM_SHUFFLE(1, 0, 1, 0): return _mm_shuffle_epi_1010(a, b); break; - case _MM_SHUFFLE(1, 0, 0, 1): return _mm_shuffle_epi_1001(a, b); break; - case _MM_SHUFFLE(0, 1, 0, 1): return _mm_shuffle_epi_0101(a, b); break; - case _MM_SHUFFLE(2, 2, 1, 1): return _mm_shuffle_epi_2211(a, b); break; - case _MM_SHUFFLE(0, 1, 2, 2): return _mm_shuffle_epi_0122(a, b); break; - case _MM_SHUFFLE(3, 3, 3, 2): return _mm_shuffle_epi_3332(a, b); break; - default: return _mm_shuffle_epi32_default(a, b); - } -} - -template -FORCE_INLINE __m128i _mm_shuffle_epi32_splat(__m128i a) -{ - return vdupq_n_s32(vgetq_lane_s32(a, i)); -} - -template -FORCE_INLINE __m128i _mm_shuffle_epi32_single(__m128i a) -{ - switch (i) - { - case _MM_SHUFFLE(0, 0, 0, 0): return _mm_shuffle_epi32_splat<0>(a); break; - case _MM_SHUFFLE(1, 1, 1, 1): return _mm_shuffle_epi32_splat<1>(a); break; - case _MM_SHUFFLE(2, 2, 2, 2): return _mm_shuffle_epi32_splat<2>(a); break; - case _MM_SHUFFLE(3, 3, 3, 3): return _mm_shuffle_epi32_splat<3>(a); break; - default: return _mm_shuffle_epi32_function(a, a); - } -} - -// Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm. https://msdn.microsoft.com/en-us/library/56f67xbk%28v=vs.90%29.aspx -#define _mm_shuffle_epi32(a,i) _mm_shuffle_epi32_single(a) - -template -FORCE_INLINE __m128i _mm_shufflehi_epi16_function(__m128i a) -{ - int16x8_t ret = (int16x8_t)a; - int16x4_t highBits = vget_high_s16(ret); - ret = vsetq_lane_s16(vget_lane_s16(highBits, i & 0x3), ret, 4); - ret = vsetq_lane_s16(vget_lane_s16(highBits, (i >> 2) & 0x3), ret, 5); - ret = vsetq_lane_s16(vget_lane_s16(highBits, (i >> 4) & 0x3), ret, 6); - ret = vsetq_lane_s16(vget_lane_s16(highBits, (i >> 6) & 0x3), ret, 7); - return (__m128i)ret; -} - -// Shuffles the upper 4 signed or unsigned 16 - bit integers in a as specified by imm. https://msdn.microsoft.com/en-us/library/13ywktbs(v=vs.100).aspx -#define _mm_shufflehi_epi16(a,i) _mm_shufflehi_epi16_function(a) - -// Shifts the 4 signed or unsigned 32-bit integers in a left by count bits while shifting in zeros. : https://msdn.microsoft.com/en-us/library/z2k3bbtb%28v=vs.90%29.aspx -#define _mm_slli_epi32(a, imm) (__m128i)vshlq_n_s32(a,imm) - -//Shifts the 4 signed or unsigned 32-bit integers in a right by count bits while shifting in zeros. https://msdn.microsoft.com/en-us/library/w486zcfa(v=vs.100).aspx -#define _mm_srli_epi32( a, imm ) (__m128i)vshrq_n_u32((uint32x4_t)a, imm) - -// Shifts the 4 signed 32 - bit integers in a right by count bits while shifting in the sign bit. https://msdn.microsoft.com/en-us/library/z1939387(v=vs.100).aspx -#define _mm_srai_epi32( a, imm ) vshrq_n_s32(a, imm) - -// Shifts the 128 - bit value in a right by imm bytes while shifting in zeros.imm must be an immediate. https://msdn.microsoft.com/en-us/library/305w28yz(v=vs.100).aspx -//#define _mm_srli_si128( a, imm ) (__m128i)vmaxq_s8((int8x16_t)a, vextq_s8((int8x16_t)a, vdupq_n_s8(0), imm)) -#define _mm_srli_si128( a, imm ) (__m128i)vextq_s8((int8x16_t)a, vdupq_n_s8(0), (imm)) - -// Shifts the 128-bit value in a left by imm bytes while shifting in zeros. imm must be an immediate. https://msdn.microsoft.com/en-us/library/34d3k2kt(v=vs.100).aspx -#define _mm_slli_si128( a, imm ) (__m128i)vextq_s8(vdupq_n_s8(0), (int8x16_t)a, 16 - (imm)) - -// NEON does not provide a version of this function, here is an article about some ways to repro the results. -// http://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon -// Creates a 16-bit mask from the most significant bits of the 16 signed or unsigned 8-bit integers in a and zero extends the upper bits. https://msdn.microsoft.com/en-us/library/vstudio/s090c8fk(v=vs.100).aspx -FORCE_INLINE int _mm_movemask_epi8(__m128i _a) -{ - uint8x16_t input = (uint8x16_t)_a; - const int8_t __attribute__((aligned(16))) xr[8] = { -7, -6, -5, -4, -3, -2, -1, 0 }; - uint8x8_t mask_and = vdup_n_u8(0x80); - int8x8_t mask_shift = vld1_s8(xr); - - uint8x8_t lo = vget_low_u8(input); - uint8x8_t hi = vget_high_u8(input); - - lo = vand_u8(lo, mask_and); - lo = vshl_u8(lo, mask_shift); - - hi = vand_u8(hi, mask_and); - hi = vshl_u8(hi, mask_shift); - - lo = vpadd_u8(lo, lo); - lo = vpadd_u8(lo, lo); - lo = vpadd_u8(lo, lo); - - hi = vpadd_u8(hi, hi); - hi = vpadd_u8(hi, hi); - hi = vpadd_u8(hi, hi); - - return ((hi[0] << 8) | (lo[0] & 0xFF)); -} - - -// ****************************************** -// Math operations -// ****************************************** - -// Subtracts the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/1zad2k61(v=vs.100).aspx -FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b) -{ - return vsubq_f32(a, b); -} - -// Subtracts the 4 signed or unsigned 32-bit integers of b from the 4 signed or unsigned 32-bit integers of a. https://msdn.microsoft.com/en-us/library/vstudio/fhh866h0(v=vs.100).aspx -FORCE_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b) -{ - return vsubq_s32(a, b); -} - -// Adds the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/c9848chc(v=vs.100).aspx -FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b) -{ - return vaddq_f32(a, b); -} - -// adds the scalar single-precision floating point values of a and b. https://msdn.microsoft.com/en-us/library/be94x2y6(v=vs.100).aspx -FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b) -{ - const float32_t b0 = vgetq_lane_f32(b, 0); - float32x4_t value = vdupq_n_f32(0); - - //the upper values in the result must be the remnants of . - value = vsetq_lane_f32(b0, value, 0); - return vaddq_f32(a, value); -} - -// Adds the 4 signed or unsigned 32-bit integers in a to the 4 signed or unsigned 32-bit integers in b. https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx -FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b) -{ - return vaddq_s32(a, b); -} - -// Adds the 8 signed or unsigned 16-bit integers in a to the 8 signed or unsigned 16-bit integers in b. https://msdn.microsoft.com/en-us/library/fceha5k4(v=vs.100).aspx -FORCE_INLINE __m128i _mm_add_epi16(__m128i a, __m128i b) -{ - return (__m128i)vaddq_s16((int16x8_t)a, (int16x8_t)b); -} - -// Multiplies the 8 signed or unsigned 16-bit integers from a by the 8 signed or unsigned 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/9ks1472s(v=vs.100).aspx -FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b) -{ - return (__m128i)vmulq_s16((int16x8_t)a, (int16x8_t)b); -} - -// Multiplies the 4 signed or unsigned 32-bit integers from a by the 4 signed or unsigned 32-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/bb531409(v=vs.100).aspx -FORCE_INLINE __m128i _mm_mullo_epi32 (__m128i a, __m128i b) -{ - return (__m128i)vmulq_s32((int32x4_t)a,(int32x4_t)b); -} - -// Multiplies the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/22kbk6t9(v=vs.100).aspx -FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b) -{ - return vmulq_f32(a, b); -} - -// Divides the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/edaw8147(v=vs.100).aspx -FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b) -{ - __m128 recip = vrecpeq_f32(b); - recip = vmulq_f32(recip, vrecpsq_f32(recip, b)); - return vmulq_f32(a, recip); -} - -// Divides the scalar single-precision floating point value of a by b. https://msdn.microsoft.com/en-us/library/4y73xa49(v=vs.100).aspx -FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b) -{ - float32x4_t value; - float32x4_t result = a; - value = _mm_div_ps(a, b); - return vsetq_lane_f32(vgetq_lane_f32(value, 0), result, 0); -} - -// This version does additional iterations to improve accuracy. Between 1 and 4 recommended. -// Computes the approximations of reciprocals of the four single-precision, floating-point values of a. https://msdn.microsoft.com/en-us/library/vstudio/796k1tty(v=vs.100).aspx -FORCE_INLINE __m128 recipq_newton(__m128 in, int n) -{ - __m128 recip = vrecpeq_f32(in); - for (int i = 0; i(a),reinterpret_cast(b)); -} -#else -// Just standard old Intel! -#include -#endif - diff --git a/src/ila/sse_to_neon.hpp b/src/ila/sse_to_neon.hpp deleted file mode 100644 index c910491bf3..0000000000 --- a/src/ila/sse_to_neon.hpp +++ /dev/null @@ -1,187 +0,0 @@ -// -// sse_to_neon.hpp -// neon_test -// -// Created by Tim Oberhauser on 11/16/13. -// Copyright (c) 2013 Tim Oberhauser. All rights reserved. -// - -#ifndef neon_test_sse_to_neon_hpp -#define neon_test_sse_to_neon_hpp - -#include - -#if defined(__MM_MALLOC_H) -// copied from mm_malloc.h { -#include - -/* We can't depend on since the prototype of posix_memalign - may not be visible. */ -#ifndef __cplusplus -extern int posix_memalign (void **, size_t, size_t); -#else -extern "C" int posix_memalign (void **, size_t, size_t) throw (); -#endif - -static __inline void * -_mm_malloc (size_t size, size_t alignment) -{ - void *ptr; - if (alignment == 1) - return malloc (size); - if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4)) - alignment = sizeof (void *); - if (posix_memalign (&ptr, alignment, size) == 0) - return ptr; - else - return NULL; -} - -static __inline void -_mm_free (void * ptr) -{ - free (ptr); -} -// } copied from mm_malloc.h -#endif - - -typedef int16x8_t __m128i; -typedef float32x4_t __m128; - - -// ADDITION -inline __m128i _mm_add_epi16(const __m128i& a, const __m128i& b){ - return vaddq_s16(reinterpret_cast(a),reinterpret_cast(b)); -} - -inline __m128 _mm_add_ps(const __m128& a, const __m128& b){ - return vaddq_f32(a,b); -} - - -// SUBTRACTION -inline __m128i _mm_sub_epi16(const __m128i& a, const __m128i& b){ - return vsubq_s16(reinterpret_cast(a),reinterpret_cast(b)); -} - -inline __m128 _mm_sub_ps(const __m128& a, const __m128& b){ - return vsubq_f32(a,b); -} - - -// MULTIPLICATION -#if 0 -inline __m128i _mm_mullo_epi16(const __m128i& a, const __m128i& b){ - return vqrdmulhq_s16(reinterpret_cast(a),reinterpret_cast(b)); -} -#endif - -inline __m128 _mm_mul_ps(const __m128& a, const __m128& b){ - return vmulq_f32(a,b); -} - - -// SET VALUE -inline __m128i _mm_set1_epi16(const int16_t w){ - return vmovq_n_s16(w); -} - -inline __m128i _mm_setzero_si128(){ - return vmovq_n_s16(0); -} - -inline __m128 _mm_set1_ps(const float32_t& w){ - return vmovq_n_f32(w); -} - - -// STORE -inline void _mm_storeu_si128(__m128i* p, __m128i& a){ - vst1q_s16(reinterpret_cast(p),reinterpret_cast(a)); -} - -inline void _mm_store_ps(float32_t* p, __m128&a){ - vst1q_f32(p,a); -} - - -// LOAD -inline __m128i _mm_loadu_si128(__m128i* p){//For SSE address p does not need be 16-byte aligned - return reinterpret_cast<__m128i>(vld1q_s16(reinterpret_cast(p))); -} - -inline __m128i _mm_load_si128(__m128i* p){//For SSE address p must be 16-byte aligned - return reinterpret_cast<__m128i>(vld1q_s16(reinterpret_cast(p))); -} - -inline __m128 _mm_load_ps(const float32_t* p){ - return reinterpret_cast<__m128>(vld1q_f32(p)); -} - - -// SHIFT OPERATIONS -inline __m128i _mm_srai_epi16(const __m128i& a, const int count){ - int16x8_t b = vmovq_n_s16(-count); - return reinterpret_cast<__m128i>(vshlq_s16(a,b)); - // return vrshrq_n_s16(a, count);// TODO Argument to '__builtin_neon_vrshrq_n_v' must be a constant integer -} - - -// MIN/MAX OPERATIONS -inline __m128 _mm_max_ps(const __m128& a, const __m128& b){ - return reinterpret_cast<__m128>(vmaxq_f32(reinterpret_cast(a),reinterpret_cast(b))); -} - - -// SINGLE ELEMENT ACCESS -inline int16_t _mm_extract_epi16(__m128i& a, int index){ - return (reinterpret_cast(&a))[index]; - // return vgetq_lane_s16(a,index);// TODO Argument to '__builtin_neon_vgetq_lane_i16' must be a constant integer -} - - -// MISCELLANOUS -inline __m128i _mm_sad_epu8 (__m128i a, __m128i b){ - uint64x2_t sad = reinterpret_cast(vabdq_u8(reinterpret_cast(a),reinterpret_cast(b))); - sad = reinterpret_cast(vpaddlq_u8(reinterpret_cast(sad))); - sad = reinterpret_cast(vpaddlq_u16(reinterpret_cast(sad))); - sad = vpaddlq_u32(reinterpret_cast(sad)); - return reinterpret_cast<__m128i>(sad); -} - - -// LOGICAL OPERATIONS -inline __m128 _mm_and_ps(__m128& a, __m128& b){ - return reinterpret_cast<__m128>(vandq_u32(reinterpret_cast(a),reinterpret_cast(b))); -} - - -// CONVERSIONS -inline __m128i _mm_packus_epi16 (const __m128i a, const __m128i b){ - __m128i result = _mm_setzero_si128(); - int8x8_t* a_narrow = reinterpret_cast(&result); - int8x8_t* b_narrow = &a_narrow[1]; - *a_narrow = reinterpret_cast(vqmovun_s16(a)); - *b_narrow = reinterpret_cast(vqmovun_s16(b)); - return result; -} - -// In my case this function was only needed to convert 8 bit to 16 bit integers by extending with zeros, the general case is not implemented!!! -inline __m128i _mm_unpacklo_epi8(__m128i a, const __m128i dummy_zero){ - // dummy_zero is a dummy variable - uint8x8_t* a_low = reinterpret_cast(&a); - return reinterpret_cast<__m128i>(vmovl_u8(*a_low)); -} - -// In my case this function was only needed to convert 8 bit to 16 bit integers by extending with zeros, the general case is not implemented!!! -inline __m128i _mm_unpackhi_epi8(__m128i a, const __m128i dummy_zero){ - // dummy_zero is a dummy variable - uint8x8_t* a_low = reinterpret_cast(&a); - return reinterpret_cast<__m128i>(vmovl_u8(a_low[1])); -} - - - - -#endif diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index cdb3eda4ab..3e95c8cb67 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -36,8 +36,6 @@ #ifndef NO_SSE #include -#else -#include "ila/ila.h" #endif struct FSpecialColormap; From 28abc96aef14dd39af83b6c050c3ca283b8fe350 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 6 Mar 2017 16:14:54 -0500 Subject: [PATCH 907/912] - fully implemented "nolightfade" to turn off diminishing lights in the software renderer. - unfortunately, LIGHT2SHADE had to be transformed into a namespaced function. --- src/gl/system/gl_swframebuffer.cpp | 2 +- src/polyrenderer/scene/poly_decal.cpp | 2 +- src/polyrenderer/scene/poly_particle.cpp | 2 +- src/polyrenderer/scene/poly_plane.cpp | 6 ++-- src/polyrenderer/scene/poly_playersprite.cpp | 4 +-- src/polyrenderer/scene/poly_scene.cpp | 3 +- src/polyrenderer/scene/poly_sprite.cpp | 2 +- src/polyrenderer/scene/poly_wall.cpp | 3 +- src/polyrenderer/scene/poly_wallsprite.cpp | 2 +- src/swrenderer/line/r_line.cpp | 6 ++-- src/swrenderer/line/r_walldraw.cpp | 2 +- src/swrenderer/plane/r_flatplane.cpp | 9 +++-- src/swrenderer/plane/r_slopeplane.cpp | 10 ++++-- src/swrenderer/r_swcanvas.cpp | 2 +- src/swrenderer/scene/r_light.cpp | 18 +++++++--- src/swrenderer/scene/r_light.h | 37 ++++++++++++-------- src/swrenderer/scene/r_opaque_pass.cpp | 6 ++-- src/swrenderer/things/r_particle.cpp | 2 +- src/swrenderer/things/r_playersprite.cpp | 2 +- src/swrenderer/things/r_sprite.cpp | 2 +- src/swrenderer/things/r_visiblesprite.cpp | 4 +-- src/swrenderer/things/r_voxel.cpp | 2 +- src/swrenderer/things/r_wallsprite.cpp | 6 ++-- src/win32/fb_d3d9.cpp | 2 +- 24 files changed, 84 insertions(+), 52 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index 04c2b7eb7b..c883c662f7 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -2951,7 +2951,7 @@ void OpenGLSWFrameBuffer::FillSimplePoly(FTexture *texture, FVector2 *points, in DAngle rotation, FDynamicColormap *colormap, PalEntry flatcolor, int lightlevel, int bottomclip) { // Use an equation similar to player sprites to determine shade - double fadelevel = clamp((LIGHT2SHADE(lightlevel) / 65536. - 12) / NUMCOLORMAPS, 0.0, 1.0); + double fadelevel = clamp((swrenderer::LIGHT2SHADE(lightlevel, true) / 65536. - 12) / NUMCOLORMAPS, 0.0, 1.0); BufferedTris *quad; FBVERTEX *verts; diff --git a/src/polyrenderer/scene/poly_decal.cpp b/src/polyrenderer/scene/poly_decal.cpp index fad44862f6..0812f3a79f 100644 --- a/src/polyrenderer/scene/poly_decal.cpp +++ b/src/polyrenderer/scene/poly_decal.cpp @@ -139,7 +139,7 @@ void RenderPolyDecal::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan args.uniforms.flags = 0; args.SetColormap(front->ColorMap); args.SetTexture(tex, decal->Translation, true); - args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(foggy); if (fullbrightSprite || cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) { args.uniforms.light = 256; diff --git a/src/polyrenderer/scene/poly_particle.cpp b/src/polyrenderer/scene/poly_particle.cpp index 39f5514f45..fd9ee5c88f 100644 --- a/src/polyrenderer/scene/poly_particle.cpp +++ b/src/polyrenderer/scene/poly_particle.cpp @@ -74,7 +74,7 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, const Vec4f &clipP PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->ParticleGlobVis(); + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->ParticleGlobVis(foggy); if (fullbrightSprite || cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) { diff --git a/src/polyrenderer/scene/poly_plane.cpp b/src/polyrenderer/scene/poly_plane.cpp index 2aec6b08df..00a283b0ab 100644 --- a/src/polyrenderer/scene/poly_plane.cpp +++ b/src/polyrenderer/scene/poly_plane.cpp @@ -99,6 +99,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); int lightlevel = 255; + bool foggy = false; if (cameraLight->FixedLightLevel() < 0 && sub->sector->e->XFloor.lightlist.Size()) { lightlist_t *light = P_GetPlaneLight(sub->sector, &sub->sector->ceilingplane, false); @@ -109,7 +110,7 @@ void RenderPolyPlane::Render3DFloor(const TriMatrix &worldToClip, const Vec4f &c UVTransform xform(ceiling ? fakeFloor->top.model->planes[sector_t::ceiling].xform : fakeFloor->top.model->planes[sector_t::floor].xform, tex); PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SlopePlaneGlobVis() * 48.0f; + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SlopePlaneGlobVis(foggy) * 48.0f; args.uniforms.light = (uint32_t)(lightlevel / 255.0f * 256.0f); if (cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) args.uniforms.light = 256; @@ -155,6 +156,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan { std::vector portalSegments; FSectorPortal *portal = nullptr;// sub->sector->ValidatePortal(ceiling ? sector_t::ceiling : sector_t::floor); + bool foggy = false; PolyDrawSectorPortal *polyportal = nullptr; if (portal && (portal->mFlags & PORTSF_INSKYBOX) == PORTSF_INSKYBOX) // Do not recurse into portals we already recursed into portal = nullptr; @@ -305,7 +307,7 @@ void RenderPolyPlane::Render(const TriMatrix &worldToClip, const Vec4f &clipPlan swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SlopePlaneGlobVis() * 48.0f; + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SlopePlaneGlobVis(foggy) * 48.0f; args.uniforms.light = (uint32_t)(frontsector->lightlevel / 255.0f * 256.0f); if (cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) args.uniforms.light = 256; diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index 339d3051a4..d55ecb9e75 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -223,9 +223,9 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa bool foggy = false; int actualextralight = foggy ? 0 : extralight << 4; - int spriteshade = LIGHT2SHADE(owner->Sector->lightlevel + actualextralight); + int spriteshade = swrenderer::LIGHT2SHADE(owner->Sector->lightlevel + actualextralight, foggy); double minz = double((2048 * 4) / double(1 << 20)); - ColormapNum = GETPALOOKUP(swrenderer::LightVisibility::Instance()->SpriteGlobVis() / minz, spriteshade); + ColormapNum = GETPALOOKUP(swrenderer::LightVisibility::Instance()->SpriteGlobVis(foggy) / minz, spriteshade); if (sprite->GetID() < PSP_TARGETCENTER) { diff --git a/src/polyrenderer/scene/poly_scene.cpp b/src/polyrenderer/scene/poly_scene.cpp index 9685c4241f..1255376786 100644 --- a/src/polyrenderer/scene/poly_scene.cpp +++ b/src/polyrenderer/scene/poly_scene.cpp @@ -228,6 +228,7 @@ void RenderPolyScene::RenderLine(subsector_t *sub, seg_t *line, sector_t *fronts void RenderPolyScene::RenderPortals(int portalDepth) { + bool foggy = false; if (portalDepth < r_portal_recursions) { for (auto &portal : SectorPortals) @@ -241,7 +242,7 @@ void RenderPolyScene::RenderPortals(int portalDepth) PolyDrawArgs args; args.objectToClip = &WorldToClip; args.mode = TriangleDrawMode::Fan; - args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(foggy); args.uniforms.color = 0; args.uniforms.light = 256; args.uniforms.flags = TriUniforms::fixed_light; diff --git a/src/polyrenderer/scene/poly_sprite.cpp b/src/polyrenderer/scene/poly_sprite.cpp index 0599a4b202..e2ca59b240 100644 --- a/src/polyrenderer/scene/poly_sprite.cpp +++ b/src/polyrenderer/scene/poly_sprite.cpp @@ -139,7 +139,7 @@ void RenderPolySprite::Render(const TriMatrix &worldToClip, const Vec4f &clipPla swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SpriteGlobVis(); + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->SpriteGlobVis(foggy); args.uniforms.flags = 0; if (fullbrightSprite || cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) { diff --git a/src/polyrenderer/scene/poly_wall.cpp b/src/polyrenderer/scene/poly_wall.cpp index d8fb2a7ae6..e64d1f6c6c 100644 --- a/src/polyrenderer/scene/poly_wall.cpp +++ b/src/polyrenderer/scene/poly_wall.cpp @@ -198,6 +198,7 @@ void RenderPolyWall::SetCoords(const DVector2 &v1, const DVector2 &v2, double ce void RenderPolyWall::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane, PolyCull &cull) { + bool foggy = false; FTexture *tex = GetTexture(); if (!tex && !Polyportal) return; @@ -247,7 +248,7 @@ void RenderPolyWall::Render(const TriMatrix &worldToClip, const Vec4f &clipPlane } PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(foggy); args.uniforms.light = (uint32_t)(GetLightLevel() / 255.0f * 256.0f); args.uniforms.flags = 0; args.uniforms.subsectorDepth = SubsectorDepth; diff --git a/src/polyrenderer/scene/poly_wallsprite.cpp b/src/polyrenderer/scene/poly_wallsprite.cpp index 31aded9ac3..a563f026df 100644 --- a/src/polyrenderer/scene/poly_wallsprite.cpp +++ b/src/polyrenderer/scene/poly_wallsprite.cpp @@ -101,7 +101,7 @@ void RenderPolyWallSprite::Render(const TriMatrix &worldToClip, const Vec4f &cli swrenderer::CameraLight *cameraLight = swrenderer::CameraLight::Instance(); PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(foggy); if (fullbrightSprite || cameraLight->FixedLightLevel() >= 0 || cameraLight->FixedColormap()) { args.uniforms.light = 256; diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 94b1cfc4be..33006c8c6a 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -496,7 +496,7 @@ namespace swrenderer } else { - draw_segment->shade = LIGHT2SHADE(mLineSegment->sidedef->GetLightLevel(foggy, mLineSegment->frontsector->lightlevel) + R_ActualExtraLight(foggy)); + draw_segment->shade = LIGHT2SHADE(mLineSegment->sidedef->GetLightLevel(foggy, mLineSegment->frontsector->lightlevel) + R_ActualExtraLight(foggy), foggy); } if (draw_segment->bFogBoundary || draw_segment->maskedtexturecol != nullptr) @@ -781,8 +781,8 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedColormap() == nullptr && cameraLight->FixedLightLevel() < 0) { - wallshade = LIGHT2SHADE(mLineSegment->sidedef->GetLightLevel(foggy, mFrontSector->lightlevel) + R_ActualExtraLight(foggy)); - double GlobVis = LightVisibility::Instance()->WallGlobVis(); + wallshade = LIGHT2SHADE(mLineSegment->sidedef->GetLightLevel(foggy, mFrontSector->lightlevel) + R_ActualExtraLight(foggy), foggy); + double GlobVis = LightVisibility::Instance()->WallGlobVis(foggy); rw_lightleft = float(GlobVis / WallC.sz1); rw_lightstep = float((GlobVis / WallC.sz2 - rw_lightleft) / (WallC.sx2 - WallC.sx1)); } diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 5f4cd8485a..9b2a4f7ce1 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -401,7 +401,7 @@ namespace swrenderer lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + R_ActualExtraLight(foggy)); + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + R_ActualExtraLight(foggy), foggy); } ProcessNormalWall(up, dwal, texturemid, swal, lwal); diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 28961baad2..725dd46162 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -26,6 +26,7 @@ #include "cmdlib.h" #include "d_net.h" #include "g_level.h" +#include "g_levellocals.h" #include "swrenderer/scene/r_opaque_pass.h" #include "r_flatplane.h" #include "swrenderer/scene/r_3dfloors.h" @@ -115,7 +116,11 @@ namespace swrenderer planeheight = fabs(pl->height.Zat0() - ViewPos.Z); basecolormap = colormap; - GlobVis = LightVisibility::Instance()->FlatPlaneGlobVis() / planeheight; + + // [RH] set foggy flag + bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); + + GlobVis = LightVisibility::Instance()->FlatPlaneGlobVis(foggy) / planeheight; CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedLightLevel() >= 0) @@ -131,7 +136,7 @@ namespace swrenderer else { plane_shade = true; - planeshade = LIGHT2SHADE(pl->lightlevel); + planeshade = LIGHT2SHADE(pl->lightlevel, foggy); } drawerargs.SetStyle(masked, additive, alpha); diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 6be3b1a7b0..033c5257df 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -26,6 +26,7 @@ #include "cmdlib.h" #include "d_net.h" #include "g_level.h" +#include "g_levellocals.h" #include "swrenderer/scene/r_opaque_pass.h" #include "r_slopeplane.h" #include "swrenderer/scene/r_3dfloors.h" @@ -152,12 +153,15 @@ namespace swrenderer plane_sz[0] = -plane_sz[0]; } - planelightfloat = (LightVisibility::Instance()->SlopePlaneGlobVis() * lxscale * lyscale) / (fabs(pl->height.ZatPoint(ViewPos) - ViewPos.Z)) / 65536.f; + // [RH] set foggy flag + basecolormap = colormap; + bool foggy = level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE);; + + planelightfloat = (LightVisibility::Instance()->SlopePlaneGlobVis(foggy) * lxscale * lyscale) / (fabs(pl->height.ZatPoint(ViewPos) - ViewPos.Z)) / 65536.f; if (pl->height.fC() > 0) planelightfloat = -planelightfloat; - basecolormap = colormap; CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedLightLevel() >= 0) @@ -174,7 +178,7 @@ namespace swrenderer { drawerargs.SetLight(basecolormap, 0, 0); plane_shade = true; - planeshade = LIGHT2SHADE(pl->lightlevel); + planeshade = LIGHT2SHADE(pl->lightlevel, foggy); } // Hack in support for 1 x Z and Z x 1 texture sizes diff --git a/src/swrenderer/r_swcanvas.cpp b/src/swrenderer/r_swcanvas.cpp index c728dd3153..4866ac0cd8 100644 --- a/src/swrenderer/r_swcanvas.cpp +++ b/src/swrenderer/r_swcanvas.cpp @@ -198,7 +198,7 @@ void SWCanvas::FillSimplePoly(DCanvas *canvas, FTexture *tex, FVector2 *points, FDynamicColormap *colormap, PalEntry flatcolor, int lightlevel, int bottomclip) { // Use an equation similar to player sprites to determine shade - fixed_t shade = LIGHT2SHADE(lightlevel) - 12 * FRACUNIT; + fixed_t shade = LIGHT2SHADE(lightlevel, true) - 12 * FRACUNIT; float topy, boty, leftx, rightx; int toppt, botpt, pt1, pt2; int i; diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index fa9ae27f93..686cfc524c 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -32,12 +32,20 @@ #include "d_player.h" #include "swrenderer/scene/r_light.h" #include "swrenderer/viewport/r_viewport.h" +#include "gl/data/gl_data.h" CVAR(Bool, r_shadercolormaps, true, CVAR_ARCHIVE) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { + fixed_t LIGHT2SHADE(int l, bool foggy) + { + return (!foggy && (glset.nolightfade) ? + ((MAX(255 - l, 0) * NUMCOLORMAPS) << (FRACBITS - 8)) : + ((NUMCOLORMAPS*2*FRACUNIT)-(((l)+12)*(FRACUNIT*NUMCOLORMAPS/128)))); + } + CameraLight *CameraLight::Instance() { static CameraLight instance; @@ -146,10 +154,12 @@ namespace swrenderer TiltVisibility = float(vis * FocalTangent * (16.f * 320.f) / viewwidth); - // Disable diminishing light (To do: make a cvar control this) - WallVisibility = 0.0; - FloorVisibility = 0.0; - TiltVisibility = 0.0f; + NoLightFade = glset.nolightfade; + + // Disable diminishing light + // WallVisibility = 0.0; + // FloorVisibility = 0.0; + // TiltVisibility = 0.0f; } // Controls how quickly light ramps across a 1/z range. Set this, and it diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index 7b127acb2e..1219a06c17 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -29,13 +29,18 @@ // The size of a single colormap, in bits #define COLORMAPSHIFT 8 -// Convert a light level into an unbounded colormap index (shade). Result is -// fixed point. Why the +12? I wish I knew, but experimentation indicates it -// is necessary in order to best reproduce Doom's original lighting. +// Convert a light level into an unbounded colormap index (shade). Result is +// fixed point. Why the +12? I wish I knew, but experimentation indicates it +// is necessary in order to best reproduce Doom's original lighting. //#define LIGHT2SHADE(l) ((NUMCOLORMAPS*2*FRACUNIT)-(((l)+12)*(FRACUNIT*NUMCOLORMAPS/128))) -// Disable diminishing light (To do: merge with LIGHT2SHADE and let a cvar control it, maybe by converting this to a function, like R_ActualExtraLight) -#define LIGHT2SHADE(lightlev) ((MAX(255 - lightlev, 0) * NUMCOLORMAPS) << (FRACBITS - 8)) +// Disable diminishing light (To do: merge with LIGHT2SHADE and let a cvar control it, maybe by converting this to a function, like R_ActualExtraLight) +//#define LIGHT2SHADE(lightlev) ((MAX(255 - lightlev, 0) * NUMCOLORMAPS) << (FRACBITS - 8)) + +// combined! +//#define LIGHT2SHADE(l) ((glset.nolightfade)? \ +// ((MAX(255 - l, 0) * NUMCOLORMAPS) << (FRACBITS - 8)) : \ +// ((NUMCOLORMAPS*2*FRACUNIT)-(((l)+12)*(FRACUNIT*NUMCOLORMAPS/128)))) // MAXLIGHTSCALE from original DOOM, divided by 2. #define MAXLIGHTVIS (24.0) @@ -57,6 +62,8 @@ struct FSWColormap; namespace swrenderer { + fixed_t LIGHT2SHADE(int lightlevel, bool foggy); + class CameraLight { public: @@ -83,17 +90,17 @@ namespace swrenderer void SetVisibility(double visibility); double GetVisibility() const { return CurrentVisibility; } - double WallGlobVis() const { return WallVisibility; } - double SpriteGlobVis() const { return WallVisibility; } - double ParticleGlobVis() const { return WallVisibility * 0.5; } - double FlatPlaneGlobVis() const { return FloorVisibility; } - double SlopePlaneGlobVis() const { return TiltVisibility; } + double WallGlobVis(bool foggy) const { return (NoLightFade && !foggy) ? 0.0f : WallVisibility; } + double SpriteGlobVis(bool foggy) const { return (NoLightFade && !foggy) ? 0.0f : WallVisibility; } + double ParticleGlobVis(bool foggy) const { return (NoLightFade && !foggy) ? 0.0f : (WallVisibility * 0.5); } + double FlatPlaneGlobVis(bool foggy) const { return (NoLightFade && !foggy) ? 0.0f : FloorVisibility; } + double SlopePlaneGlobVis(bool foggy) const { return (NoLightFade && !foggy) ? 0.0f : TiltVisibility; } // The vis value to pass into the GETPALOOKUP or LIGHTSCALE macros - double WallVis(double screenZ) const { return WallGlobVis() / screenZ; } - double SpriteVis(double screenZ) const { return SpriteGlobVis() / screenZ; } - double ParticleVis(double screenZ) const { return ParticleGlobVis() / screenZ; } - double FlatPlaneVis(int screenY, double planeZ) const { return FlatPlaneGlobVis() / fabs(planeZ - ViewPos.Z) * fabs(RenderViewport::Instance()->CenterY - screenY); } + double WallVis(double screenZ, bool foggy) const { return WallGlobVis(foggy) / screenZ; } + double SpriteVis(double screenZ, bool foggy) const { return SpriteGlobVis(foggy) / screenZ; } + double ParticleVis(double screenZ, bool foggy) const { return ParticleGlobVis(foggy) / screenZ; } + double FlatPlaneVis(int screenY, double planeZ, bool foggy) const { return FlatPlaneGlobVis(foggy) / fabs(planeZ - ViewPos.Z) * fabs(RenderViewport::Instance()->CenterY - screenY); } private: double BaseVisibility = 0.0; @@ -101,6 +108,8 @@ namespace swrenderer double FloorVisibility = 0.0; float TiltVisibility = 0.0f; + bool NoLightFade = false; + double CurrentVisibility = 8.f; double MaxVisForWall = 0.0; double MaxVisForFloor = 0.0; diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index ac8505dd26..29063ed6f2 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -717,7 +717,7 @@ namespace swrenderer // [RH] Add particles if ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors) { // Only do it for the main BSP. - int shade = LIGHT2SHADE((floorlightlevel + ceilinglightlevel) / 2 + R_ActualExtraLight(foggy)); + int shade = LIGHT2SHADE((floorlightlevel + ceilinglightlevel) / 2 + R_ActualExtraLight(foggy), foggy); for (WORD i = ParticlesInSubsec[(unsigned int)(sub - subsectors)]; i != NO_PARTICLE; i = Particles[i].snext) { RenderParticle::Project(Thread, Particles + i, subsectors[sub - subsectors].sector, shade, FakeSide, foggy); @@ -834,7 +834,7 @@ namespace swrenderer //sec->validcount = validcount; SeenSpriteSectors.insert(sec); - int spriteshade = LIGHT2SHADE(lightlevel + R_ActualExtraLight(foggy)); + int spriteshade = LIGHT2SHADE(lightlevel + R_ActualExtraLight(foggy), foggy); // Handle all things in sector. for (auto p = sec->touching_renderthings; p != nullptr; p = p->m_snext) @@ -886,7 +886,7 @@ namespace swrenderer if (sec->sectornum != thing->Sector->sectornum) // compare sectornums to account for R_FakeFlat copies. { int lightlevel = thing->Sector->GetTexture(sector_t::ceiling) == skyflatnum ? thing->Sector->GetCeilingLight() : thing->Sector->GetFloorLight(); - thingShade = LIGHT2SHADE(lightlevel + R_ActualExtraLight(foggy)); + thingShade = LIGHT2SHADE(lightlevel + R_ActualExtraLight(foggy), foggy); thingColormap = thing->Sector->ColorMap; } diff --git a/src/swrenderer/things/r_particle.cpp b/src/swrenderer/things/r_particle.cpp index 9212004e6a..17656d69cd 100644 --- a/src/swrenderer/things/r_particle.cpp +++ b/src/swrenderer/things/r_particle.cpp @@ -204,7 +204,7 @@ namespace swrenderer vis->floorclip = 0; vis->foggy = foggy; - vis->Light.SetColormap(tiz * LightVisibility::Instance()->ParticleGlobVis(), shade, map, particle->bright != 0, false, false); + vis->Light.SetColormap(tiz * LightVisibility::Instance()->ParticleGlobVis(foggy), shade, map, particle->bright != 0, false, false); thread->SpriteList->Push(vis); } diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 929acab532..98243def9c 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -131,7 +131,7 @@ namespace swrenderer // get light level lightnum = ((floorlight + ceilinglight) >> 1) + R_ActualExtraLight(foggy); - int spriteshade = LIGHT2SHADE(lightnum) - 24 * FRACUNIT; + int spriteshade = LIGHT2SHADE(lightnum, foggy) - 24 * FRACUNIT; if (camera->player != NULL) { diff --git a/src/swrenderer/things/r_sprite.cpp b/src/swrenderer/things/r_sprite.cpp index 2ebf1fd579..0979291972 100644 --- a/src/swrenderer/things/r_sprite.cpp +++ b/src/swrenderer/things/r_sprite.cpp @@ -280,7 +280,7 @@ namespace swrenderer vis->dynlightcolor = 0; } - vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis() / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); + vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis(foggy) / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); thread->SpriteList->Push(vis); } diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 1f402de818..6a0d9bc3f7 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -138,9 +138,9 @@ namespace swrenderer bool isFullBright = !foggy && (renderflags & RF_FULLBRIGHT); bool fadeToBlack = spr->RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0; - int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(spr->foggy)); + int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(spr->foggy), foggy); - Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis() / MAX(MINZ, (double)spr->depth), spriteshade, mybasecolormap, isFullBright, invertcolormap, fadeToBlack); + Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis(foggy) / MAX(MINZ, (double)spr->depth), spriteshade, mybasecolormap, isFullBright, invertcolormap, fadeToBlack); } } diff --git a/src/swrenderer/things/r_voxel.cpp b/src/swrenderer/things/r_voxel.cpp index 33e889c3f6..b337cd8578 100644 --- a/src/swrenderer/things/r_voxel.cpp +++ b/src/swrenderer/things/r_voxel.cpp @@ -178,7 +178,7 @@ namespace swrenderer bool fullbright = !vis->foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)); bool fadeToBlack = (vis->RenderStyle.Flags & STYLEF_FadeToBlack) != 0; - vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis() / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); + vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis(foggy) / MAX(tz, MINZ), spriteshade, basecolormap, fullbright, invertcolormap, fadeToBlack); thread->SpriteList->Push(vis, true); } diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 38a26aab19..6db6c746d0 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -133,7 +133,7 @@ namespace swrenderer vis->wallc = wallc; vis->foggy = foggy; - vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis() / MAX(tz, MINZ), spriteshade, basecolormap, false, false, false); + vis->Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis(foggy) / MAX(tz, MINZ), spriteshade, basecolormap, false, false, false); thread->SpriteList->Push(vis); } @@ -182,8 +182,8 @@ namespace swrenderer SpriteDrawerArgs drawerargs; - int shade = LIGHT2SHADE(spr->sector->lightlevel + R_ActualExtraLight(spr->foggy)); - double GlobVis = LightVisibility::Instance()->WallGlobVis(); + int shade = LIGHT2SHADE(spr->sector->lightlevel + R_ActualExtraLight(spr->foggy), spr->foggy); + double GlobVis = LightVisibility::Instance()->WallGlobVis(foggy); float lightleft = float(GlobVis / spr->wallc.sz1); float lightstep = float((GlobVis / spr->wallc.sz2 - lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); float light = lightleft + (x1 - spr->wallc.sx1) * lightstep; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index e0f79f51d9..254725b266 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -3130,7 +3130,7 @@ void D3DFB::FillSimplePoly(FTexture *texture, FVector2 *points, int npoints, DAngle rotation, FDynamicColormap *colormap, PalEntry flatcolor, int lightlevel, int bottomclip) { // Use an equation similar to player sprites to determine shade - double fadelevel = clamp((LIGHT2SHADE(lightlevel)/65536. - 12) / NUMCOLORMAPS, 0.0, 1.0); + double fadelevel = clamp((swrenderer::LIGHT2SHADE(lightlevel, true)/65536. - 12) / NUMCOLORMAPS, 0.0, 1.0); BufferedTris *quad; FBVERTEX *verts; From cb8b2de2878ff78a18cb33696ba71599c3f9b183 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Mon, 6 Mar 2017 16:58:48 -0500 Subject: [PATCH 908/912] - somehow, this missed a few. --- src/polyrenderer/scene/poly_sky.cpp | 3 ++- src/swrenderer/line/r_renderdrawsegment.cpp | 16 +++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/polyrenderer/scene/poly_sky.cpp b/src/polyrenderer/scene/poly_sky.cpp index af24364fe9..507319e26c 100644 --- a/src/polyrenderer/scene/poly_sky.cpp +++ b/src/polyrenderer/scene/poly_sky.cpp @@ -38,6 +38,7 @@ PolySkyDome::PolySkyDome() void PolySkyDome::Render(const TriMatrix &worldToClip) { FTextureID sky1tex, sky2tex; + bool foggy = false; if ((level.flags & LEVEL_SWAPSKIES) && !(level.flags & LEVEL_DOUBLESKY)) sky1tex = sky2texture; else @@ -55,7 +56,7 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) int rc = mRows + 1; PolyDrawArgs args; - args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(); + args.uniforms.globvis = (float)swrenderer::LightVisibility::Instance()->WallGlobVis(foggy); args.uniforms.light = 256; args.uniforms.flags = 0; args.uniforms.subsectorDepth = RenderPolyScene::SkySubsectorDepth; diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index bb7823bebf..e4ac3e2318 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -20,6 +20,7 @@ #include "p_setup.h" #include "a_sharedglobal.h" #include "g_level.h" +#include "g_levellocals.h" #include "p_effect.h" #include "doomstat.h" #include "r_state.h" @@ -125,7 +126,8 @@ namespace swrenderer { lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); + bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); // [RH] set foggy flag + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy), foggy); break; } } @@ -701,7 +703,8 @@ namespace swrenderer { lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); + bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); // [RH] set foggy flag + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy), foggy); break; } } @@ -714,7 +717,8 @@ namespace swrenderer { lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); + bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); // [RH] set foggy flag + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy), foggy); break; } } @@ -876,7 +880,8 @@ namespace swrenderer { lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); + bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); // [RH] set foggy flag + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy), foggy); break; } } @@ -889,7 +894,8 @@ namespace swrenderer { lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy)); + bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); // [RH] set foggy flag + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy), foggy); break; } } From 65f8d65858f94fe89a0a689c53db1b2775582e0e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 6 Mar 2017 23:27:02 +0100 Subject: [PATCH 909/912] Minor code clean up --- src/gl/system/gl_swframebuffer.cpp | 2 +- src/polyrenderer/scene/poly_playersprite.cpp | 2 +- src/swrenderer/line/r_line.cpp | 10 ++++---- src/swrenderer/line/r_renderdrawsegment.cpp | 16 ++++++------ src/swrenderer/line/r_walldraw.cpp | 2 +- src/swrenderer/plane/r_flatplane.cpp | 4 +-- src/swrenderer/plane/r_slopeplane.cpp | 4 +-- src/swrenderer/r_swcanvas.cpp | 2 +- src/swrenderer/scene/r_light.cpp | 26 +++++++++++--------- src/swrenderer/scene/r_light.h | 25 ++++--------------- src/swrenderer/scene/r_opaque_pass.cpp | 14 +++++------ src/swrenderer/things/r_decal.cpp | 2 +- src/swrenderer/things/r_playersprite.cpp | 4 +-- src/swrenderer/things/r_visiblesprite.cpp | 2 +- src/swrenderer/things/r_wallsprite.cpp | 4 +-- src/swrenderer/viewport/r_spritedrawer.cpp | 2 +- src/win32/fb_d3d9.cpp | 2 +- 17 files changed, 56 insertions(+), 67 deletions(-) diff --git a/src/gl/system/gl_swframebuffer.cpp b/src/gl/system/gl_swframebuffer.cpp index c883c662f7..a0fc6e716d 100644 --- a/src/gl/system/gl_swframebuffer.cpp +++ b/src/gl/system/gl_swframebuffer.cpp @@ -2951,7 +2951,7 @@ void OpenGLSWFrameBuffer::FillSimplePoly(FTexture *texture, FVector2 *points, in DAngle rotation, FDynamicColormap *colormap, PalEntry flatcolor, int lightlevel, int bottomclip) { // Use an equation similar to player sprites to determine shade - double fadelevel = clamp((swrenderer::LIGHT2SHADE(lightlevel, true) / 65536. - 12) / NUMCOLORMAPS, 0.0, 1.0); + double fadelevel = clamp((swrenderer::LightVisibility::LightLevelToShade(lightlevel, true) / 65536. - 12) / NUMCOLORMAPS, 0.0, 1.0); BufferedTris *quad; FBVERTEX *verts; diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index d55ecb9e75..6ec53f325f 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -223,7 +223,7 @@ void RenderPolyPlayerSprites::RenderSprite(DPSprite *sprite, AActor *owner, floa bool foggy = false; int actualextralight = foggy ? 0 : extralight << 4; - int spriteshade = swrenderer::LIGHT2SHADE(owner->Sector->lightlevel + actualextralight, foggy); + int spriteshade = swrenderer::LightVisibility::LightLevelToShade(owner->Sector->lightlevel + actualextralight, foggy); double minz = double((2048 * 4) / double(1 << 20)); ColormapNum = GETPALOOKUP(swrenderer::LightVisibility::Instance()->SpriteGlobVis(foggy) / minz, spriteshade); diff --git a/src/swrenderer/line/r_line.cpp b/src/swrenderer/line/r_line.cpp index 33006c8c6a..963e3d17b2 100644 --- a/src/swrenderer/line/r_line.cpp +++ b/src/swrenderer/line/r_line.cpp @@ -496,7 +496,7 @@ namespace swrenderer } else { - draw_segment->shade = LIGHT2SHADE(mLineSegment->sidedef->GetLightLevel(foggy, mLineSegment->frontsector->lightlevel) + R_ActualExtraLight(foggy), foggy); + draw_segment->shade = LightVisibility::LightLevelToShade(mLineSegment->sidedef->GetLightLevel(foggy, mLineSegment->frontsector->lightlevel) + LightVisibility::ActualExtraLight(foggy), foggy); } if (draw_segment->bFogBoundary || draw_segment->maskedtexturecol != nullptr) @@ -781,7 +781,7 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedColormap() == nullptr && cameraLight->FixedLightLevel() < 0) { - wallshade = LIGHT2SHADE(mLineSegment->sidedef->GetLightLevel(foggy, mFrontSector->lightlevel) + R_ActualExtraLight(foggy), foggy); + wallshade = LightVisibility::LightLevelToShade(mLineSegment->sidedef->GetLightLevel(foggy, mFrontSector->lightlevel) + LightVisibility::ActualExtraLight(foggy), foggy); double GlobVis = LightVisibility::Instance()->WallGlobVis(foggy); rw_lightleft = float(GlobVis / WallC.sz1); rw_lightstep = float((GlobVis / WallC.sz2 - rw_lightleft) / (WallC.sx2 - WallC.sx1)); @@ -1156,7 +1156,7 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedLightLevel() >= 0) - drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, cameraLight->FixedLightLevelShade()); else if (cameraLight->FixedColormap() != nullptr) drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); @@ -1203,7 +1203,7 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedLightLevel() >= 0) - drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, cameraLight->FixedLightLevelShade()); else if (cameraLight->FixedColormap() != nullptr) drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); @@ -1251,7 +1251,7 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedLightLevel() >= 0) - drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, cameraLight->FixedLightLevelShade()); else if (cameraLight->FixedColormap() != nullptr) drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); diff --git a/src/swrenderer/line/r_renderdrawsegment.cpp b/src/swrenderer/line/r_renderdrawsegment.cpp index e4ac3e2318..f157a89b68 100644 --- a/src/swrenderer/line/r_renderdrawsegment.cpp +++ b/src/swrenderer/line/r_renderdrawsegment.cpp @@ -127,7 +127,7 @@ namespace swrenderer lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; basecolormap = lit->extra_colormap; bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); // [RH] set foggy flag - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy), foggy); + wallshade = LightVisibility::LightLevelToShade(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + LightVisibility::ActualExtraLight(ds->foggy), foggy); break; } } @@ -159,8 +159,8 @@ namespace swrenderer if (cameraLight->FixedLightLevel() >= 0) { - walldrawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); - columndrawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + walldrawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, cameraLight->FixedLightLevelShade()); + columndrawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, cameraLight->FixedLightLevelShade()); } else if (cameraLight->FixedColormap() != nullptr) { @@ -468,7 +468,7 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedLightLevel() >= 0) - drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : basecolormap, 0, cameraLight->FixedLightLevelShade()); else if (cameraLight->FixedColormap() != nullptr) drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); @@ -704,7 +704,7 @@ namespace swrenderer lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); // [RH] set foggy flag - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy), foggy); + wallshade = LightVisibility::LightLevelToShade(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + LightVisibility::ActualExtraLight(ds->foggy), foggy); break; } } @@ -718,7 +718,7 @@ namespace swrenderer lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); // [RH] set foggy flag - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy), foggy); + wallshade = LightVisibility::LightLevelToShade(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + LightVisibility::ActualExtraLight(ds->foggy), foggy); break; } } @@ -881,7 +881,7 @@ namespace swrenderer lightlist_t *lit = &backsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); // [RH] set foggy flag - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy), foggy); + wallshade = LightVisibility::LightLevelToShade(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + LightVisibility::ActualExtraLight(ds->foggy), foggy); break; } } @@ -895,7 +895,7 @@ namespace swrenderer lightlist_t *lit = &frontsector->e->XFloor.lightlist[j]; basecolormap = lit->extra_colormap; bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); // [RH] set foggy flag - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + R_ActualExtraLight(ds->foggy), foggy); + wallshade = LightVisibility::LightLevelToShade(curline->sidedef->GetLightLevel(ds->foggy, *lit->p_lightlevel, lit->lightsource != nullptr) + LightVisibility::ActualExtraLight(ds->foggy), foggy); break; } } diff --git a/src/swrenderer/line/r_walldraw.cpp b/src/swrenderer/line/r_walldraw.cpp index 9b2a4f7ce1..40c9f3551a 100644 --- a/src/swrenderer/line/r_walldraw.cpp +++ b/src/swrenderer/line/r_walldraw.cpp @@ -401,7 +401,7 @@ namespace swrenderer lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + R_ActualExtraLight(foggy), foggy); + wallshade = LightVisibility::LightLevelToShade(curline->sidedef->GetLightLevel(foggy, *lit->p_lightlevel, lit->lightsource != NULL) + LightVisibility::ActualExtraLight(foggy), foggy); } ProcessNormalWall(up, dwal, texturemid, swal, lwal); diff --git a/src/swrenderer/plane/r_flatplane.cpp b/src/swrenderer/plane/r_flatplane.cpp index 725dd46162..b3c695c78a 100644 --- a/src/swrenderer/plane/r_flatplane.cpp +++ b/src/swrenderer/plane/r_flatplane.cpp @@ -125,7 +125,7 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedLightLevel() >= 0) { - drawerargs.SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + drawerargs.SetLight(basecolormap, 0, cameraLight->FixedLightLevelShade()); plane_shade = false; } else if (cameraLight->FixedColormap()) @@ -136,7 +136,7 @@ namespace swrenderer else { plane_shade = true; - planeshade = LIGHT2SHADE(pl->lightlevel, foggy); + planeshade = LightVisibility::LightLevelToShade(pl->lightlevel, foggy); } drawerargs.SetStyle(masked, additive, alpha); diff --git a/src/swrenderer/plane/r_slopeplane.cpp b/src/swrenderer/plane/r_slopeplane.cpp index 033c5257df..833fefe86e 100644 --- a/src/swrenderer/plane/r_slopeplane.cpp +++ b/src/swrenderer/plane/r_slopeplane.cpp @@ -166,7 +166,7 @@ namespace swrenderer CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedLightLevel() >= 0) { - drawerargs.SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + drawerargs.SetLight(basecolormap, 0, cameraLight->FixedLightLevelShade()); plane_shade = false; } else if (cameraLight->FixedColormap()) @@ -178,7 +178,7 @@ namespace swrenderer { drawerargs.SetLight(basecolormap, 0, 0); plane_shade = true; - planeshade = LIGHT2SHADE(pl->lightlevel, foggy); + planeshade = LightVisibility::LightLevelToShade(pl->lightlevel, foggy); } // Hack in support for 1 x Z and Z x 1 texture sizes diff --git a/src/swrenderer/r_swcanvas.cpp b/src/swrenderer/r_swcanvas.cpp index 4866ac0cd8..aa817e707d 100644 --- a/src/swrenderer/r_swcanvas.cpp +++ b/src/swrenderer/r_swcanvas.cpp @@ -198,7 +198,7 @@ void SWCanvas::FillSimplePoly(DCanvas *canvas, FTexture *tex, FVector2 *points, FDynamicColormap *colormap, PalEntry flatcolor, int lightlevel, int bottomclip) { // Use an equation similar to player sprites to determine shade - fixed_t shade = LIGHT2SHADE(lightlevel, true) - 12 * FRACUNIT; + fixed_t shade = LightVisibility::LightLevelToShade(lightlevel, true) - 12 * FRACUNIT; float topy, boty, leftx, rightx; int toppt, botpt, pt1, pt2; int i; diff --git a/src/swrenderer/scene/r_light.cpp b/src/swrenderer/scene/r_light.cpp index 686cfc524c..3407fb4bb7 100644 --- a/src/swrenderer/scene/r_light.cpp +++ b/src/swrenderer/scene/r_light.cpp @@ -39,13 +39,6 @@ EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) namespace swrenderer { - fixed_t LIGHT2SHADE(int l, bool foggy) - { - return (!foggy && (glset.nolightfade) ? - ((MAX(255 - l, 0) * NUMCOLORMAPS) << (FRACBITS - 8)) : - ((NUMCOLORMAPS*2*FRACUNIT)-(((l)+12)*(FRACUNIT*NUMCOLORMAPS/128)))); - } - CameraLight *CameraLight::Instance() { static CameraLight instance; @@ -155,11 +148,22 @@ namespace swrenderer TiltVisibility = float(vis * FocalTangent * (16.f * 320.f) / viewwidth); NoLightFade = glset.nolightfade; + } - // Disable diminishing light - // WallVisibility = 0.0; - // FloorVisibility = 0.0; - // TiltVisibility = 0.0f; + fixed_t LightVisibility::LightLevelToShade(int lightlevel, bool foggy) + { + bool nolightfade = !foggy && (glset.nolightfade); + if (nolightfade) + { + return (MAX(255 - lightlevel, 0) * NUMCOLORMAPS) << (FRACBITS - 8); + } + else + { + // Convert a light level into an unbounded colormap index (shade). Result is + // fixed point. Why the +12? I wish I knew, but experimentation indicates it + // is necessary in order to best reproduce Doom's original lighting. + return (NUMCOLORMAPS * 2 * FRACUNIT) - ((lightlevel + 12) * (FRACUNIT*NUMCOLORMAPS / 128)); + } } // Controls how quickly light ramps across a 1/z range. Set this, and it diff --git a/src/swrenderer/scene/r_light.h b/src/swrenderer/scene/r_light.h index 1219a06c17..843faedd16 100644 --- a/src/swrenderer/scene/r_light.h +++ b/src/swrenderer/scene/r_light.h @@ -29,19 +29,6 @@ // The size of a single colormap, in bits #define COLORMAPSHIFT 8 -// Convert a light level into an unbounded colormap index (shade). Result is -// fixed point. Why the +12? I wish I knew, but experimentation indicates it -// is necessary in order to best reproduce Doom's original lighting. -//#define LIGHT2SHADE(l) ((NUMCOLORMAPS*2*FRACUNIT)-(((l)+12)*(FRACUNIT*NUMCOLORMAPS/128))) - -// Disable diminishing light (To do: merge with LIGHT2SHADE and let a cvar control it, maybe by converting this to a function, like R_ActualExtraLight) -//#define LIGHT2SHADE(lightlev) ((MAX(255 - lightlev, 0) * NUMCOLORMAPS) << (FRACBITS - 8)) - -// combined! -//#define LIGHT2SHADE(l) ((glset.nolightfade)? \ -// ((MAX(255 - l, 0) * NUMCOLORMAPS) << (FRACBITS - 8)) : \ -// ((NUMCOLORMAPS*2*FRACUNIT)-(((l)+12)*(FRACUNIT*NUMCOLORMAPS/128)))) - // MAXLIGHTSCALE from original DOOM, divided by 2. #define MAXLIGHTVIS (24.0) @@ -55,15 +42,10 @@ // Returns a value between 0 and 1 in fixed point #define LIGHTSCALE(vis,shade) FLOAT2FIXED(clamp((FIXED2DBL(shade) - (MIN(MAXLIGHTVIS,double(vis)))) / NUMCOLORMAPS, 0.0, (NUMCOLORMAPS-1)/(double)NUMCOLORMAPS)) -// Converts fixedlightlev into a shade value -#define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) - struct FSWColormap; namespace swrenderer { - fixed_t LIGHT2SHADE(int lightlevel, bool foggy); - class CameraLight { public: @@ -73,6 +55,8 @@ namespace swrenderer FSWColormap *FixedColormap() const { return fixedcolormap; } FSpecialColormap *ShaderColormap() const { return realfixedcolormap; } + fixed_t FixedLightLevelShade() const { return (FixedLightLevel() >> COLORMAPSHIFT) << FRACBITS; } + void SetCamera(AActor *actor); void ClearShaderColormap() { realfixedcolormap = nullptr; } @@ -102,6 +86,9 @@ namespace swrenderer double ParticleVis(double screenZ, bool foggy) const { return ParticleGlobVis(foggy) / screenZ; } double FlatPlaneVis(int screenY, double planeZ, bool foggy) const { return FlatPlaneGlobVis(foggy) / fabs(planeZ - ViewPos.Z) * fabs(RenderViewport::Instance()->CenterY - screenY); } + static fixed_t LightLevelToShade(int lightlevel, bool foggy); + static int ActualExtraLight(bool fog) { return fog ? 0 : extralight << 4; } + private: double BaseVisibility = 0.0; double WallVisibility = 0.0; @@ -123,6 +110,4 @@ namespace swrenderer void SetColormap(double visibility, int shade, FDynamicColormap *basecolormap, bool fullbright, bool invertColormap, bool fadeToBlack); }; - - inline int R_ActualExtraLight(bool fog) { return fog ? 0 : extralight << 4; } } diff --git a/src/swrenderer/scene/r_opaque_pass.cpp b/src/swrenderer/scene/r_opaque_pass.cpp index 29063ed6f2..7ad2c3a016 100644 --- a/src/swrenderer/scene/r_opaque_pass.cpp +++ b/src/swrenderer/scene/r_opaque_pass.cpp @@ -509,7 +509,7 @@ namespace swrenderer frontsector->heightsec->GetTexture(sector_t::floor) == skyflatnum) ? Thread->PlaneList->FindPlane(frontsector->ceilingplane, // killough 3/8/98 frontsector->GetTexture(sector_t::ceiling), - ceilinglightlevel + R_ActualExtraLight(foggy), // killough 4/11/98 + ceilinglightlevel + LightVisibility::ActualExtraLight(foggy), // killough 4/11/98 frontsector->GetAlpha(sector_t::ceiling), !!(frontsector->GetFlags(sector_t::ceiling) & PLANEF_ADDITIVE), frontsector->planes[sector_t::ceiling].xform, @@ -550,7 +550,7 @@ namespace swrenderer frontsector->heightsec->GetTexture(sector_t::ceiling) == skyflatnum) ? Thread->PlaneList->FindPlane(frontsector->floorplane, frontsector->GetTexture(sector_t::floor), - floorlightlevel + R_ActualExtraLight(foggy), // killough 3/16/98 + floorlightlevel + LightVisibility::ActualExtraLight(foggy), // killough 3/16/98 frontsector->GetAlpha(sector_t::floor), !!(frontsector->GetFlags(sector_t::floor) & PLANEF_ADDITIVE), frontsector->planes[sector_t::floor].xform, @@ -616,7 +616,7 @@ namespace swrenderer ceilingplane = nullptr; floorplane = Thread->PlaneList->FindPlane(frontsector->floorplane, frontsector->GetTexture(sector_t::floor), - floorlightlevel + R_ActualExtraLight(foggy), // killough 3/16/98 + floorlightlevel + LightVisibility::ActualExtraLight(foggy), // killough 3/16/98 frontsector->GetAlpha(sector_t::floor), !!(clip3d->fakeFloor->fakeFloor->flags & FF_ADDITIVETRANS), frontsector->planes[position].xform, @@ -682,7 +682,7 @@ namespace swrenderer floorplane = nullptr; ceilingplane = Thread->PlaneList->FindPlane(frontsector->ceilingplane, // killough 3/8/98 frontsector->GetTexture(sector_t::ceiling), - ceilinglightlevel + R_ActualExtraLight(foggy), // killough 4/11/98 + ceilinglightlevel + LightVisibility::ActualExtraLight(foggy), // killough 4/11/98 frontsector->GetAlpha(sector_t::ceiling), !!(clip3d->fakeFloor->fakeFloor->flags & FF_ADDITIVETRANS), frontsector->planes[position].xform, @@ -717,7 +717,7 @@ namespace swrenderer // [RH] Add particles if ((unsigned int)(sub - subsectors) < (unsigned int)numsubsectors) { // Only do it for the main BSP. - int shade = LIGHT2SHADE((floorlightlevel + ceilinglightlevel) / 2 + R_ActualExtraLight(foggy), foggy); + int shade = LightVisibility::LightLevelToShade((floorlightlevel + ceilinglightlevel) / 2 + LightVisibility::ActualExtraLight(foggy), foggy); for (WORD i = ParticlesInSubsec[(unsigned int)(sub - subsectors)]; i != NO_PARTICLE; i = Particles[i].snext) { RenderParticle::Project(Thread, Particles + i, subsectors[sub - subsectors].sector, shade, FakeSide, foggy); @@ -834,7 +834,7 @@ namespace swrenderer //sec->validcount = validcount; SeenSpriteSectors.insert(sec); - int spriteshade = LIGHT2SHADE(lightlevel + R_ActualExtraLight(foggy), foggy); + int spriteshade = LightVisibility::LightLevelToShade(lightlevel + LightVisibility::ActualExtraLight(foggy), foggy); // Handle all things in sector. for (auto p = sec->touching_renderthings; p != nullptr; p = p->m_snext) @@ -886,7 +886,7 @@ namespace swrenderer if (sec->sectornum != thing->Sector->sectornum) // compare sectornums to account for R_FakeFlat copies. { int lightlevel = thing->Sector->GetTexture(sector_t::ceiling) == skyflatnum ? thing->Sector->GetCeilingLight() : thing->Sector->GetFloorLight(); - thingShade = LIGHT2SHADE(lightlevel + R_ActualExtraLight(foggy), foggy); + thingShade = LightVisibility::LightLevelToShade(lightlevel + LightVisibility::ActualExtraLight(foggy), foggy); thingColormap = thing->Sector->ColorMap; } diff --git a/src/swrenderer/things/r_decal.cpp b/src/swrenderer/things/r_decal.cpp index 47a89727be..52e4337049 100644 --- a/src/swrenderer/things/r_decal.cpp +++ b/src/swrenderer/things/r_decal.cpp @@ -279,7 +279,7 @@ namespace swrenderer SpriteDrawerArgs drawerargs; if (cameraLight->FixedLightLevel() >= 0) - drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + drawerargs.SetLight((r_fullbrightignoresectorcolor) ? &FullNormalLight : usecolormap, 0, cameraLight->FixedLightLevelShade()); else if (cameraLight->FixedColormap() != NULL) drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) diff --git a/src/swrenderer/things/r_playersprite.cpp b/src/swrenderer/things/r_playersprite.cpp index 98243def9c..4b8bb074bc 100644 --- a/src/swrenderer/things/r_playersprite.cpp +++ b/src/swrenderer/things/r_playersprite.cpp @@ -130,8 +130,8 @@ namespace swrenderer bool foggy = (level.fadeto || basecolormap->Fade || (level.flags & LEVEL_HASFADETABLE)); // get light level - lightnum = ((floorlight + ceilinglight) >> 1) + R_ActualExtraLight(foggy); - int spriteshade = LIGHT2SHADE(lightnum, foggy) - 24 * FRACUNIT; + lightnum = ((floorlight + ceilinglight) >> 1) + LightVisibility::ActualExtraLight(foggy); + int spriteshade = LightVisibility::LightLevelToShade(lightnum, foggy) - 24 * FRACUNIT; if (camera->player != NULL) { diff --git a/src/swrenderer/things/r_visiblesprite.cpp b/src/swrenderer/things/r_visiblesprite.cpp index 6a0d9bc3f7..b2e3da8005 100644 --- a/src/swrenderer/things/r_visiblesprite.cpp +++ b/src/swrenderer/things/r_visiblesprite.cpp @@ -138,7 +138,7 @@ namespace swrenderer bool isFullBright = !foggy && (renderflags & RF_FULLBRIGHT); bool fadeToBlack = spr->RenderStyle == LegacyRenderStyles[STYLE_Add] && mybasecolormap->Fade != 0; - int spriteshade = LIGHT2SHADE(sec->lightlevel + R_ActualExtraLight(spr->foggy), foggy); + int spriteshade = LightVisibility::LightLevelToShade(sec->lightlevel + LightVisibility::ActualExtraLight(spr->foggy), foggy); Light.SetColormap(LightVisibility::Instance()->SpriteGlobVis(foggy) / MAX(MINZ, (double)spr->depth), spriteshade, mybasecolormap, isFullBright, invertcolormap, fadeToBlack); } diff --git a/src/swrenderer/things/r_wallsprite.cpp b/src/swrenderer/things/r_wallsprite.cpp index 6db6c746d0..d4d8f9b58e 100644 --- a/src/swrenderer/things/r_wallsprite.cpp +++ b/src/swrenderer/things/r_wallsprite.cpp @@ -182,14 +182,14 @@ namespace swrenderer SpriteDrawerArgs drawerargs; - int shade = LIGHT2SHADE(spr->sector->lightlevel + R_ActualExtraLight(spr->foggy), spr->foggy); + int shade = LightVisibility::LightLevelToShade(spr->sector->lightlevel + LightVisibility::ActualExtraLight(spr->foggy), spr->foggy); double GlobVis = LightVisibility::Instance()->WallGlobVis(foggy); float lightleft = float(GlobVis / spr->wallc.sz1); float lightstep = float((GlobVis / spr->wallc.sz2 - lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); float light = lightleft + (x1 - spr->wallc.sx1) * lightstep; CameraLight *cameraLight = CameraLight::Instance(); if (cameraLight->FixedLightLevel() >= 0) - drawerargs.SetLight(usecolormap, 0, FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel())); + drawerargs.SetLight(usecolormap, 0, cameraLight->FixedLightLevelShade()); else if (cameraLight->FixedColormap() != NULL) drawerargs.SetLight(cameraLight->FixedColormap(), 0, 0); else if (!spr->foggy && (spr->renderflags & RF_FULLBRIGHT)) diff --git a/src/swrenderer/viewport/r_spritedrawer.cpp b/src/swrenderer/viewport/r_spritedrawer.cpp index 118fd18b09..85f187259b 100644 --- a/src/swrenderer/viewport/r_spritedrawer.cpp +++ b/src/swrenderer/viewport/r_spritedrawer.cpp @@ -445,7 +445,7 @@ namespace swrenderer if (cameraLight->FixedLightLevel() >= 0 && !cameraLight->FixedColormap()) { fixed_t shade = shadedlightshade; - if (shade == 0) FIXEDLIGHT2SHADE(cameraLight->FixedLightLevel()); + if (shade == 0) shade = cameraLight->FixedLightLevelShade(); SetLight(basecolormap, 0, shade); } else diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 254725b266..2689e549a9 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -3130,7 +3130,7 @@ void D3DFB::FillSimplePoly(FTexture *texture, FVector2 *points, int npoints, DAngle rotation, FDynamicColormap *colormap, PalEntry flatcolor, int lightlevel, int bottomclip) { // Use an equation similar to player sprites to determine shade - double fadelevel = clamp((swrenderer::LIGHT2SHADE(lightlevel, true)/65536. - 12) / NUMCOLORMAPS, 0.0, 1.0); + double fadelevel = clamp((swrenderer::LightVisibility::LightLevelToShade(lightlevel, true)/65536. - 12) / NUMCOLORMAPS, 0.0, 1.0); BufferedTris *quad; FBVERTEX *verts; From 279fa7e886331828f4a79d01f5e1e2a7317b7e98 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 8 Mar 2017 23:35:13 +0100 Subject: [PATCH 910/912] Non-SSE version of the sky drawers --- src/swrenderer/drawers/r_draw_rgba.cpp | 7 + src/swrenderer/drawers/r_draw_rgba.h | 64 +++++ src/swrenderer/drawers/r_draw_sky32.h | 319 +++++++++++++++++++++++++ 3 files changed, 390 insertions(+) create mode 100644 src/swrenderer/drawers/r_draw_sky32.h diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 48bbc2c38b..d90c057a3e 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -39,10 +39,17 @@ #include "gl/data/gl_matrix.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/scene/r_light.h" +#ifdef NO_SSE +#include "r_draw_wall32.h" +#include "r_draw_sprite32.h" +#include "r_draw_span32.h" +#include "r_draw_sky32.h" +#else #include "r_draw_wall32_sse2.h" #include "r_draw_sprite32_sse2.h" #include "r_draw_span32_sse2.h" #include "r_draw_sky32_sse2.h" +#endif #include "gi.h" #include "stats.h" diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 3e95c8cb67..263a5a643f 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -387,4 +387,68 @@ namespace swrenderer return alpha | (red << 16) | (green << 8) | blue; } }; + + ///////////////////////////////////////////////////////////////////////////// + // Vector classes for non-SSE drawers that behave like their SSE counterparts + + namespace drawervectors + { + struct vec4ui + { + vec4ui() {} + vec4ui(uint32_t v) : a(v), r(v), g(v), b(v) { } + vec4ui(uint32_t a, uint32_t r, uint32_t g, uint32_t b) : a(a), r(r), g(g), b(b) { } + uint32_t a, r, g, b; + }; + + struct vec8us + { + vec8us() {} + vec8us(uint16_t v) : a0(v), r0(v), g0(v), b0(v) { } + vec8us(uint16_t a0, uint16_t r0, uint16_t g0, uint16_t b0, uint16_t a1, uint16_t r1, uint16_t g1, uint16_t b1) : a0(a0), r0(r0), g0(g0), b0(b0), a1(a1), r1(r1), g1(g1), b1(b1) { } + uint16_t a0, r0, g0, b0, a1, r1, g1, b1; + }; + + inline vec8us unpack(uint32_t lo, uint32_t hi) { return vec8us(APART(lo), RPART(lo), GPART(lo), BPART(lo), APART(hi), RPART(hi), GPART(hi), BPART(hi)); } + inline vec4ui unpacklo(vec8us v) { return vec4ui(v.a0, v.r0, v.g0, v.b0); } + inline vec4ui unpackhi(vec8us v) { return vec4ui(v.a1, v.r1, v.g1, v.b1); } + + inline vec8us pack(vec4ui lo, vec4ui hi) + { + return vec8us(lo.a, lo.r, lo.g, lo.b, hi.a, hi.r, hi.g, hi.b); + } + inline uint32_t packlo(vec8us v) + { + return MAKEARGB((uint32_t)clamp(v.a0, 0, 255), (uint32_t)clamp(v.r0, 0, 255), (uint32_t)clamp(v.g0, 0, 255), (uint32_t)clamp(v.b0, 0, 255)); + } + inline uint32_t packhi(vec8us v) + { + return MAKEARGB((uint32_t)clamp(v.a1, 0, 255), (uint32_t)clamp(v.r1, 0, 255), (uint32_t)clamp(v.g1, 0, 255), (uint32_t)clamp(v.b1, 0, 255)); + } + + inline vec8us operator+(vec8us a, vec8us b) + { + return vec8us(a.a0 + b.a0, a.r0 + b.r0, a.g0 + b.g0, a.b0 + b.b0, a.a1 + b.a1, a.r1 + b.r1, a.g1 + b.g1, a.b1 + b.b1); + } + + inline vec8us operator-(vec8us a, vec8us b) + { + return vec8us(a.a0 - b.a0, a.r0 - b.r0, a.g0 - b.g0, a.b0 - b.b0, a.a1 - b.a1, a.r1 - b.r1, a.g1 - b.g1, a.b1 - b.b1); + } + + inline vec8us operator*(vec8us a, vec8us b) + { + return vec8us(a.a0 * b.a0, a.r0 * b.r0, a.g0 * b.g0, a.b0 * b.b0, a.a1 * b.a1, a.r1 * b.r1, a.g1 * b.g1, a.b1 * b.b1); + } + + inline vec8us operator<<(vec8us a, int bits) + { + return vec8us(a.a0 << bits, a.r0 << bits, a.g0 << bits, a.b0 << bits, a.a1 << bits, a.r1 << bits, a.g1 << bits, a.b1 << bits); + } + + inline vec8us operator>>(vec8us a, int bits) + { + return vec8us(a.a0 >> bits, a.r0 >> bits, a.g0 >> bits, a.b0 >> bits, a.a1 >> bits, a.r1 >> bits, a.g1 >> bits, a.b1 >> bits); + } + } } diff --git a/src/swrenderer/drawers/r_draw_sky32.h b/src/swrenderer/drawers/r_draw_sky32.h new file mode 100644 index 0000000000..ed34c340fa --- /dev/null +++ b/src/swrenderer/drawers/r_draw_sky32.h @@ -0,0 +1,319 @@ +/* +** Drawer commands for spans +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_skydrawer.h" + +namespace swrenderer +{ + + class DrawSkySingle32Command : public DrawerCommand + { + protected: + SkyDrawerArgs args; + + public: + DrawSkySingle32Command(const SkyDrawerArgs &args) : args(args) { } + + void Execute(DrawerThread *thread) override + { + using namespace drawervectors; + + uint32_t *dest = (uint32_t *)args.Dest(); + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); + int textureheight0 = args.FrontTextureHeight(); + + int32_t frac = args.TextureVPos(); + int32_t fracstep = args.TextureVStep(); + + uint32_t solid_top = args.SolidTopColor(); + uint32_t solid_bottom = args.SolidBottomColor(); + bool fadeSky = args.FadeSky(); + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int start_fade = 2; // How fast it should fade out + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac) / fracstep; + int end_fadetop_y = (fade_length - frac) / fracstep; + int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep; + int end_fadebottom_y = ((2 << 24) - frac) / fracstep; + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(args.DestY()); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * skipped; + fracstep *= num_cores; + pitch *= num_cores; + + if (!fadeSky) + { + count = thread->count_for_thread(args.DestY(), count); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + dest += pitch; + frac += fracstep; + } + + return; + } + + vec8us solid_top_fill = unpack(solid_top, 0); + vec8us solid_bottom_fill = unpack(solid_bottom, 0); + + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *dest = solid_top; + dest += pitch; + frac += fracstep; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + + vec8us alpha = MAX(MIN(frac >> (16 - start_fade), 256), 0); + vec8us inv_alpha = vec8us(256) - alpha; + + vec8us c = unpack(fg, 0); + c = (c * alpha + solid_top_fill * inv_alpha) >> 8; + *dest = packlo(c); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + + vec8us alpha = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + vec8us inv_alpha = vec8us(256) - alpha; + + vec8us c = unpack(fg, 0); + c = (c * alpha + solid_top_fill * inv_alpha) >> 8; + *dest = packlo(c); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *dest = solid_bottom; + dest += pitch; + index += num_cores; + } + } + + FString DebugInfo() override { return "DrawSkySingle32Command"; } + }; + + class DrawSkyDouble32Command : public DrawerCommand + { + protected: + SkyDrawerArgs args; + + public: + DrawSkyDouble32Command(const SkyDrawerArgs &args) : args(args) { } + + void Execute(DrawerThread *thread) override + { + using namespace drawervectors; + + uint32_t *dest = (uint32_t *)args.Dest(); + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); + const uint32_t *source1 = (const uint32_t *)args.BackTexturePixels(); + int textureheight0 = args.FrontTextureHeight(); + uint32_t maxtextureheight1 = args.BackTextureHeight() - 1; + + int32_t frac = args.TextureVPos(); + int32_t fracstep = args.TextureVStep(); + + uint32_t solid_top = args.SolidTopColor(); + uint32_t solid_bottom = args.SolidBottomColor(); + bool fadeSky = args.FadeSky(); + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int start_fade = 2; // How fast it should fade out + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac) / fracstep; + int end_fadetop_y = (fade_length - frac) / fracstep; + int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep; + int end_fadebottom_y = ((2 << 24) - frac) / fracstep; + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(args.DestY()); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * skipped; + fracstep *= num_cores; + pitch *= num_cores; + + if (!fadeSky) + { + count = thread->count_for_thread(args.DestY(), count); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + *dest = fg; + dest += pitch; + frac += fracstep; + } + + return; + } + + vec8us solid_top_fill = unpack(solid_top, 0); + vec8us solid_bottom_fill = unpack(solid_bottom, 0); + + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *dest = solid_top; + dest += pitch; + frac += fracstep; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + vec8us alpha = MAX(MIN(frac >> (16 - start_fade), 256), 0); + vec8us inv_alpha = vec8us(256) - alpha; + + vec8us c = unpack(fg, 0); + c = (c * alpha + solid_top_fill * inv_alpha) >> 8; + *dest = packlo(c); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + *dest = fg; + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + vec8us alpha = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + vec8us inv_alpha = vec8us(256) - alpha; + + vec8us c = unpack(fg, 0); + c = (c * alpha + solid_top_fill * inv_alpha) >> 8; + *dest = packlo(c); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *dest = solid_bottom; + dest += pitch; + index += num_cores; + } + } + + FString DebugInfo() override { return "DrawSkyDouble32Command"; } + }; +} From 4d70b01da5438e53bf7bc8c81198b3b79472bc80 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Wed, 8 Mar 2017 21:34:57 -0500 Subject: [PATCH 911/912] - followed through with GZDoom's SQWORD removals. --- src/swrenderer/drawers/r_draw_rgba.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index d90c057a3e..b4e3628c38 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -510,10 +510,10 @@ namespace swrenderer double endz = 1.f / iz; double endu = uz*endz; double endv = vz*endz; - uint32_t stepu = (uint32_t)(SQWORD((endu - startu) * INVSPAN)); - uint32_t stepv = (uint32_t)(SQWORD((endv - startv) * INVSPAN)); - uint32_t u = (uint32_t)(SQWORD(startu) + _pviewx); - uint32_t v = (uint32_t)(SQWORD(startv) + _pviewy); + uint32_t stepu = (uint32_t)(int64_t((endu - startu) * INVSPAN)); + uint32_t stepv = (uint32_t)(int64_t((endv - startv) * INVSPAN)); + uint32_t u = (uint32_t)(int64_t(startu) + _pviewx); + uint32_t v = (uint32_t)(int64_t(startv) + _pviewy); for (int i = 0; i < SPANSIZE; i++) { @@ -541,8 +541,8 @@ namespace swrenderer double endz = 1.f / iz; startu = uz*endz; startv = vz*endz; - uint32_t u = (uint32_t)(SQWORD(startu) + _pviewx); - uint32_t v = (uint32_t)(SQWORD(startv) + _pviewy); + uint32_t u = (uint32_t)(int64_t(startu) + _pviewx); + uint32_t v = (uint32_t)(int64_t(startv) + _pviewy); uint32_t sx = ((u >> 16) * source_width) >> 16; uint32_t sy = ((v >> 16) * source_height) >> 16; From 536e8fad19988ca9fc0db01401b5b3b2f418f71d Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Thu, 2 Mar 2017 08:05:23 -0500 Subject: [PATCH 912/912] - Make QZDoom GZDoom again! --- CMakeLists.txt | 4 ++-- src/posix/cocoa/i_video.mm | 2 +- src/posix/sdl/hardware.cpp | 2 +- src/version.h | 18 +++++++++--------- src/win32/hardware.cpp | 2 +- src/win32/zdoom.rc | 20 ++++++++++---------- wadsrc/CMakeLists.txt | 2 +- 7 files changed, 25 insertions(+), 25 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9df5498e4a..9d7606c192 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required( VERSION 2.8.7 ) -project(QZDoom) +project(GZDoom) if( COMMAND cmake_policy ) if( POLICY CMP0011 ) @@ -122,7 +122,7 @@ IF( NOT CMAKE_BUILD_TYPE ) ENDIF() set( ZDOOM_OUTPUT_DIR ${CMAKE_BINARY_DIR} CACHE PATH "Directory where zdoom.pk3 and the executable will be created." ) -set( ZDOOM_EXE_NAME "qzdoom" CACHE FILEPATH "Name of the executable to create" ) +set( ZDOOM_EXE_NAME "gzdoom" CACHE FILEPATH "Name of the executable to create" ) if( MSVC ) # Allow the user to use ZDOOM_OUTPUT_DIR as a single release point. # Use zdoom, zdoomd, zdoom64, and zdoomd64 for the binary names diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 72f579a051..1e894c8715 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -148,7 +148,7 @@ CUSTOM_CVAR(Bool, vid_autoswitch, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_ static int s_currentRenderer; -CUSTOM_CVAR(Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR(Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // 0: Software renderer // 1: OpenGL renderer diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index 84508b728e..16c1ff1c8c 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -65,7 +65,7 @@ void I_RestartRenderer(); int currentrenderer; // [ZDoomGL] -CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR (Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // 0: Software renderer // 1: OpenGL renderer diff --git a/src/version.h b/src/version.h index ad6a47390d..4f8f76fabc 100644 --- a/src/version.h +++ b/src/version.h @@ -46,12 +46,12 @@ const char *GetVersionString(); #ifdef GIT_DESCRIPTION #define VERSIONSTR GIT_DESCRIPTION #else -#define VERSIONSTR "1.3pre" +#define VERSIONSTR "2.3pre" #endif // The version as seen in the Windows resource -#define RC_FILEVERSION 1,3,9999,0 -#define RC_PRODUCTVERSION 1,3,9999,0 +#define RC_FILEVERSION 2,3,9999,0 +#define RC_PRODUCTVERSION 2,3,9999,0 #define RC_PRODUCTVERSION2 VERSIONSTR // These are for content versioning. The current state is '2.4'. #define VER_MAJOR 2 @@ -92,14 +92,14 @@ const char *GetVersionString(); #define SAVEVER 4550 // This is so that derivates can use the same savegame versions without worrying about engine compatibility -#define GAMESIG "QZDOOM" -#define BASEWAD "qzdoom.pk3" +#define GAMESIG "GZDOOM" +#define BASEWAD "gzdoom.pk3" // More stuff that needs to be different for derivatives. -#define GAMENAME "QZDoom" -#define GAMENAMELOWERCASE "qzdoom" -#define FORUM_URL "http://forum.drdteam.org/viewforum.php?f=196" -#define BUGS_FORUM_URL "http://forum.drdteam.org/viewforum.php?f=197" +#define GAMENAME "GZDoom" +#define GAMENAMELOWERCASE "gzdoom" +#define FORUM_URL "http://forum.drdteam.org" +#define BUGS_FORUM_URL "http://forum.drdteam.org/viewforum.php?f=24" #if defined(__APPLE__) || defined(_WIN32) #define GAME_DIR GAMENAME diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 5a17e90ece..437ec81aa5 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -107,7 +107,7 @@ CUSTOM_CVAR(Bool, vid_used3d, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOIN } // [ZDoomGL] -CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR (Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // 0: Software renderer // 1: OpenGL renderer diff --git a/src/win32/zdoom.rc b/src/win32/zdoom.rc index cede1e894d..0d21faae5d 100644 --- a/src/win32/zdoom.rc +++ b/src/win32/zdoom.rc @@ -72,13 +72,13 @@ BEGIN " BEGIN\r\n" " VALUE ""Comments"", ""Thanks to id Software for creating DOOM and then releasing the source code. Thanks also to TeamTNT for creating BOOM, which ZDoom is partially based on. Includes code based on the Cajun Bot 0.97 by Martin Collberg.""\r\n" " VALUE ""CompanyName"", "" ""\r\n" - " VALUE ""FileDescription"", ""QZDoom""\r\n" + " VALUE ""FileDescription"", ""GZDoom""\r\n" " VALUE ""FileVersion"", RC_FILEVERSION2\r\n" - " VALUE ""InternalName"", ""QZDoom""\r\n" + " VALUE ""InternalName"", ""GZDoom""\r\n" " VALUE ""LegalCopyright"", ""Copyright \\u00A9 1993-1996 id Software, 1998-2010 Randy Heit, 2002-2010 Christoph Oelckers, et al.""\r\n" " VALUE ""LegalTrademarks"", ""DoomR is a Registered Trademark of id Software, Inc.""\r\n" - " VALUE ""OriginalFilename"", ""qzdoom.exe""\r\n" - " VALUE ""ProductName"", ""QZDoom""\r\n" + " VALUE ""OriginalFilename"", ""gzdoom.exe""\r\n" + " VALUE ""ProductName"", ""GZDoom""\r\n" " VALUE ""ProductVersion"", RC_PRODUCTVERSION2\r\n" " END\r\n" " END\r\n" @@ -228,7 +228,7 @@ LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US FONT 8, "MS Shell Dlg" { CONTROL 101, IDC_STATIC, STATIC, SS_ICON | WS_CHILD | WS_VISIBLE, 7, 7, 21, 20 - CONTROL "Welcome to QZDoom!", IDC_STATIC, STATIC, SS_LEFT | WS_CHILD | WS_VISIBLE | WS_GROUP, 42, 8, 180, 8 + CONTROL "Welcome to GZDoom!", IDC_STATIC, STATIC, SS_LEFT | WS_CHILD | WS_VISIBLE | WS_GROUP, 42, 8, 180, 8 CONTROL "", IDC_WELCOME_VERSION, STATIC, SS_LEFT | WS_CHILD | WS_VISIBLE | WS_GROUP, 42, 18, 180, 8 CONTROL "IWAD selection", IDC_STATIC, BUTTON, BS_GROUPBOX | WS_CHILD | WS_VISIBLE, 8, 32, 208, 117 CONTROL "Select which game file (IWAD) to run.", IDC_STATIC, STATIC, SS_LEFT | WS_CHILD | WS_VISIBLE | WS_GROUP, 12, 44, 190, 8 @@ -242,7 +242,7 @@ FONT 8, "MS Shell Dlg" CONTROL "Load lights", IDC_WELCOME_LIGHTS, BUTTON, BS_AUTOCHECKBOX | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 130, 180, 51, 10 CONTROL "Load brightmaps", IDC_WELCOME_BRIGHTMAPS, BUTTON, BS_AUTOCHECKBOX | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 130, 190, 65, 10 CONTROL "Don't ask me this again", IDC_DONTASKIWAD, BUTTON, BS_AUTOCHECKBOX | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 73, 211, 87, 10 - CONTROL "Play QZDoom", IDOK, BUTTON, BS_DEFPUSHBUTTON | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 8, 228, 90, 14 + CONTROL "Play GZDoom", IDOK, BUTTON, BS_DEFPUSHBUTTON | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 8, 228, 90, 14 CONTROL "Exit", IDCANCEL, BUTTON, BS_PUSHBUTTON | WS_CHILD | WS_VISIBLE | WS_TABSTOP, 166, 228, 50, 14 } @@ -504,13 +504,13 @@ BEGIN BEGIN VALUE "Comments", "Thanks to id Software for creating DOOM and then releasing the source code. Thanks also to TeamTNT for creating BOOM, which ZDoom is partially based on. Includes code based on the Cajun Bot 0.97 by Martin Collberg." VALUE "CompanyName", " " - VALUE "FileDescription", "QZDoom" + VALUE "FileDescription", "GZDoom" VALUE "FileVersion", RC_FILEVERSION2 - VALUE "InternalName", "QZDoom" + VALUE "InternalName", "GZDoom" VALUE "LegalCopyright", "Copyright \u00A9 1993-1996 id Software, 1998-2010 Randy Heit, 2002-2010 Christoph Oelckers, et al." VALUE "LegalTrademarks", "DoomR is a Registered Trademark of id Software, Inc." - VALUE "OriginalFilename", "qzdoom.exe" - VALUE "ProductName", "QZDoom" + VALUE "OriginalFilename", "gzdoom.exe" + VALUE "ProductName", "GZDoom" VALUE "ProductVersion", RC_PRODUCTVERSION2 END END diff --git a/wadsrc/CMakeLists.txt b/wadsrc/CMakeLists.txt index 5a85840e01..80189a328c 100644 --- a/wadsrc/CMakeLists.txt +++ b/wadsrc/CMakeLists.txt @@ -1,3 +1,3 @@ cmake_minimum_required( VERSION 2.8.7 ) -add_pk3(qzdoom.pk3 ${CMAKE_CURRENT_SOURCE_DIR}/static) +add_pk3(gzdoom.pk3 ${CMAKE_CURRENT_SOURCE_DIR}/static)